summaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libutil
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/CMakeLists.txt24
-rw-r--r--src/libutil/addr.c2049
-rw-r--r--src/libutil/addr.h356
-rw-r--r--src/libutil/cxx/error.hxx161
-rw-r--r--src/libutil/cxx/file_util.cxx457
-rw-r--r--src/libutil/cxx/file_util.hxx312
-rw-r--r--src/libutil/cxx/hash_util.hxx109
-rw-r--r--src/libutil/cxx/local_shared_ptr.hxx440
-rw-r--r--src/libutil/cxx/utf8_util.cxx421
-rw-r--r--src/libutil/cxx/utf8_util.h85
-rw-r--r--src/libutil/cxx/util.hxx238
-rw-r--r--src/libutil/cxx/util_tests.cxx82
-rw-r--r--src/libutil/expression.c1635
-rw-r--r--src/libutil/expression.h173
-rw-r--r--src/libutil/fstring.c482
-rw-r--r--src/libutil/fstring.h231
-rw-r--r--src/libutil/hash.c716
-rw-r--r--src/libutil/hash.h114
-rw-r--r--src/libutil/heap.c197
-rw-r--r--src/libutil/heap.h97
-rw-r--r--src/libutil/libev_helper.c111
-rw-r--r--src/libutil/libev_helper.h86
-rw-r--r--src/libutil/mem_pool.c1327
-rw-r--r--src/libutil/mem_pool.h470
-rw-r--r--src/libutil/mem_pool_internal.h92
-rw-r--r--src/libutil/multipattern.c821
-rw-r--r--src/libutil/multipattern.h173
-rw-r--r--src/libutil/printf.c1097
-rw-r--r--src/libutil/printf.h96
-rw-r--r--src/libutil/radix.c434
-rw-r--r--src/libutil/radix.h123
-rw-r--r--src/libutil/ref.h91
-rw-r--r--src/libutil/regexp.c1359
-rw-r--r--src/libutil/regexp.h276
-rw-r--r--src/libutil/rrd.c1502
-rw-r--r--src/libutil/rrd.h362
-rw-r--r--src/libutil/shingles.c412
-rw-r--r--src/libutil/shingles.h101
-rw-r--r--src/libutil/sqlite_utils.c620
-rw-r--r--src/libutil/sqlite_utils.h90
-rw-r--r--src/libutil/str_util.c3886
-rw-r--r--src/libutil/str_util.h565
-rw-r--r--src/libutil/unix-std.h79
-rw-r--r--src/libutil/upstream.c1761
-rw-r--r--src/libutil/upstream.h344
-rw-r--r--src/libutil/uthash_strcase.h91
-rw-r--r--src/libutil/util.c2746
-rw-r--r--src/libutil/util.h581
48 files changed, 28075 insertions, 0 deletions
diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt
new file mode 100644
index 0000000..67b7e94
--- /dev/null
+++ b/src/libutil/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Librspamd-util
+SET(LIBRSPAMDUTILSRC
+ ${CMAKE_CURRENT_SOURCE_DIR}/addr.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/libev_helper.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/expression.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/fstring.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/hash.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/mem_pool.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/printf.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/radix.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regexp.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/rrd.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/shingles.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/sqlite_utils.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/str_util.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/upstream.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/util.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/heap.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/cxx/utf8_util.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/cxx/util_tests.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/cxx/file_util.cxx)
+# Rspamdutil
+SET(RSPAMD_UTIL ${LIBRSPAMDUTILSRC} PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libutil/addr.c b/src/libutil/addr.c
new file mode 100644
index 0000000..e011c99
--- /dev/null
+++ b/src/libutil/addr.c
@@ -0,0 +1,2049 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "addr.h"
+#include "util.h"
+#include "logger.h"
+#include "cryptobox.h"
+#include "unix-std.h"
+/* pwd and grp */
+#ifdef HAVE_PWD_H
+#include <pwd.h>
+#endif
+
+#ifdef HAVE_GRP_H
+#include <grp.h>
+#endif
+
+static void *local_addrs;
+
+enum {
+ RSPAMD_IPV6_UNDEFINED = 0,
+ RSPAMD_IPV6_SUPPORTED,
+ RSPAMD_IPV6_UNSUPPORTED
+} ipv6_status = RSPAMD_IPV6_UNDEFINED;
+
+/**
+ * Union that is used for storing sockaddrs
+ */
+union sa_union {
+ struct sockaddr sa;
+ struct sockaddr_in s4;
+ struct sockaddr_in6 s6;
+ struct sockaddr_un su;
+ struct sockaddr_storage ss;
+};
+
+union sa_inet {
+ struct sockaddr sa;
+ struct sockaddr_in s4;
+ struct sockaddr_in6 s6;
+};
+
+struct rspamd_addr_unix {
+ struct sockaddr_un addr;
+ gint mode;
+ uid_t owner;
+ gid_t group;
+};
+
+struct rspamd_addr_inet {
+ union sa_inet addr;
+};
+
+struct rspamd_inet_addr_s {
+ union {
+ struct rspamd_addr_inet in;
+ struct rspamd_addr_unix *un;
+ } u;
+ gint af;
+ socklen_t slen;
+};
+
+static void
+rspamd_ip_validate_af(rspamd_inet_addr_t *addr)
+{
+ if (addr->af != AF_UNIX) {
+ if (addr->u.in.addr.sa.sa_family != addr->af) {
+ addr->u.in.addr.sa.sa_family = addr->af;
+ }
+ }
+ else {
+ addr->u.un->addr.sun_family = AF_UNIX;
+ }
+
+ if (addr->af == AF_INET) {
+ addr->slen = sizeof(struct sockaddr_in);
+ }
+ else if (addr->af == AF_INET6) {
+ addr->slen = sizeof(struct sockaddr_in6);
+ }
+ else if (addr->af == AF_UNIX) {
+#ifdef SUN_LEN
+ addr->slen = SUN_LEN(&addr->u.un->addr);
+#else
+ addr->slen = sizeof(addr->u.un->addr);
+#endif
+#if defined(FREEBSD) || defined(__APPLE__)
+ addr->u.un->addr.sun_len = addr->slen;
+#endif
+ }
+}
+
+#define RSPAMD_MAYBE_ALLOC_POOL(pool, sz) \
+ (pool != NULL) ? rspamd_mempool_alloc((pool), (sz)) : g_malloc(sz)
+#define RSPAMD_MAYBE_ALLOC0_POOL(pool, sz) \
+ (pool != NULL) ? rspamd_mempool_alloc0((pool), (sz)) : g_malloc0(sz)
+
+static rspamd_inet_addr_t *
+rspamd_inet_addr_create(gint af, rspamd_mempool_t *pool)
+{
+ rspamd_inet_addr_t *addr;
+
+ addr = RSPAMD_MAYBE_ALLOC0_POOL(pool, sizeof(*addr));
+
+ addr->af = af;
+
+ if (af == AF_UNIX) {
+ addr->u.un = RSPAMD_MAYBE_ALLOC0_POOL(pool, sizeof(*addr->u.un));
+ addr->slen = sizeof(addr->u.un->addr);
+ }
+ else {
+ rspamd_ip_validate_af(addr);
+ }
+
+ return addr;
+}
+
+void rspamd_inet_address_free(rspamd_inet_addr_t *addr)
+{
+ if (addr) {
+ if (addr->af == AF_UNIX) {
+ if (addr->u.un) {
+ g_free(addr->u.un);
+ }
+ }
+ g_free(addr);
+ }
+}
+
+static void
+rspamd_ip_check_ipv6(void)
+{
+ if (ipv6_status == RSPAMD_IPV6_UNDEFINED) {
+ gint s;
+
+ s = socket(AF_INET6, SOCK_STREAM, 0);
+
+ if (s == -1) {
+ ipv6_status = RSPAMD_IPV6_UNSUPPORTED;
+ }
+ else {
+ /*
+ * Try to check /proc if we are on Linux (the common case)
+ */
+ struct stat st;
+
+ close(s);
+
+ if (stat("/proc/net/dev", &st) != -1) {
+ if (stat("/proc/net/if_inet6", &st) != -1) {
+ ipv6_status = RSPAMD_IPV6_SUPPORTED;
+ }
+ else {
+ ipv6_status = RSPAMD_IPV6_UNSUPPORTED;
+ }
+ }
+ else {
+ /* Not a Linux, so we assume it supports ipv6 somehow... */
+ ipv6_status = RSPAMD_IPV6_SUPPORTED;
+ }
+ }
+ }
+}
+
+gboolean
+rspamd_ip_is_valid(const rspamd_inet_addr_t *addr)
+{
+ const struct in_addr ip4_any = {INADDR_ANY}, ip4_none = {INADDR_NONE};
+ const struct in6_addr ip6_any = IN6ADDR_ANY_INIT;
+ gboolean ret = FALSE;
+
+ if (G_LIKELY(addr->af == AF_INET)) {
+ if (memcmp(&addr->u.in.addr.s4.sin_addr, &ip4_any,
+ sizeof(struct in_addr)) != 0 &&
+ memcmp(&addr->u.in.addr.s4.sin_addr, &ip4_none,
+ sizeof(struct in_addr)) != 0) {
+ ret = TRUE;
+ }
+ }
+ else if (G_UNLIKELY(addr->af == AF_INET6)) {
+ if (memcmp(&addr->u.in.addr.s6.sin6_addr, &ip6_any,
+ sizeof(struct in6_addr)) != 0) {
+ ret = TRUE;
+ }
+ }
+
+ return ret;
+}
+
+gint rspamd_accept_from_socket(gint sock, rspamd_inet_addr_t **target,
+ rspamd_accept_throttling_handler hdl,
+ void *hdl_data)
+{
+ gint nfd, serrno;
+ union sa_union su;
+ socklen_t len = sizeof(su);
+ rspamd_inet_addr_t *addr = NULL;
+
+ if ((nfd = accept(sock, &su.sa, &len)) == -1) {
+ if (target) {
+ *target = NULL;
+ }
+
+ if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) {
+ return 0;
+ }
+ else if (errno == EMFILE || errno == ENFILE) {
+ /* Temporary disable accept event */
+ if (hdl) {
+ hdl(sock, hdl_data);
+ }
+
+ return 0;
+ }
+
+ return -1;
+ }
+
+ if (su.sa.sa_family == AF_INET6) {
+ /* Deal with bloody v4 mapped to v6 addresses */
+
+ static const guint8 mask[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ const guint8 *p;
+
+ if (memcmp((const guint8 *) &su.s6.sin6_addr, mask, sizeof(mask)) == 0) {
+ p = (const guint8 *) &su.s6.sin6_addr;
+
+ if ((p[10] == 0xff && p[11] == 0xff)) {
+ addr = rspamd_inet_addr_create(AF_INET, NULL);
+ memcpy(&addr->u.in.addr.s4.sin_addr, &p[12],
+ sizeof(struct in_addr));
+ addr->u.in.addr.s4.sin_port = su.s6.sin6_port;
+ }
+ else {
+ /* Something strange but not mapped v4 address */
+ addr = rspamd_inet_addr_create(AF_INET6, NULL);
+ memcpy(&addr->u.in.addr.s6, &su.s6,
+ sizeof(struct sockaddr_in6));
+ }
+ }
+ else {
+ addr = rspamd_inet_addr_create(AF_INET6, NULL);
+ memcpy(&addr->u.in.addr.s6, &su.s6,
+ sizeof(struct sockaddr_in6));
+ }
+ }
+ else {
+ addr = rspamd_inet_addr_create(su.sa.sa_family, NULL);
+ addr->slen = len;
+
+ if (addr->af == AF_UNIX) {
+ len = sizeof(su);
+
+ if (getsockname(sock, &su.sa, &len) != -1) {
+ memcpy(&addr->u.un->addr, &su.su, MIN(len, sizeof(struct sockaddr_un)));
+ }
+ else {
+ /* Just copy socket address */
+ memcpy(&addr->u.un->addr, &su.sa, sizeof(struct sockaddr));
+ }
+ }
+ else {
+ memcpy(&addr->u.in.addr, &su, MIN(len, sizeof(addr->u.in.addr)));
+ }
+ }
+
+ if (rspamd_socket_nonblocking(nfd) < 0) {
+ goto out;
+ }
+
+ /* Set close on exec */
+ if (fcntl(nfd, F_SETFD, FD_CLOEXEC) == -1) {
+ msg_warn("fcntl failed: %d, '%s'", errno, strerror(errno));
+ goto out;
+ }
+
+ if (target) {
+ *target = addr;
+ }
+ else {
+ /* Avoid leak */
+ rspamd_inet_address_free(addr);
+ }
+
+ return (nfd);
+
+out:
+ serrno = errno;
+ close(nfd);
+ errno = serrno;
+ rspamd_inet_address_free(addr);
+
+ return (-1);
+}
+
+static gboolean
+rspamd_parse_unix_path(rspamd_inet_addr_t **target,
+ const char *src, gsize len,
+ rspamd_mempool_t *pool,
+ enum rspamd_inet_address_parse_flags how)
+{
+ gchar **tokens, **cur_tok, *p, *pwbuf;
+ glong pwlen;
+ struct passwd pw, *ppw;
+ struct group gr, *pgr;
+ rspamd_inet_addr_t *addr;
+ bool has_group = false;
+
+ addr = rspamd_inet_addr_create(AF_UNIX, pool);
+
+ addr->u.un->mode = 00644;
+ addr->u.un->owner = (uid_t) -1;
+ addr->u.un->group = (gid_t) -1;
+
+ if (!(how & RSPAMD_INET_ADDRESS_PARSE_REMOTE)) {
+ tokens = rspamd_string_len_split(src, len, " ,", -1, pool);
+
+ if (tokens[0] == NULL) {
+
+ if (!pool) {
+ rspamd_inet_address_free(addr);
+ g_strfreev(tokens);
+ }
+
+ return FALSE;
+ }
+
+ rspamd_strlcpy(addr->u.un->addr.sun_path, tokens[0],
+ sizeof(addr->u.un->addr.sun_path));
+#if defined(FREEBSD) || defined(__APPLE__)
+ addr->u.un->addr.sun_len = SUN_LEN(&addr->u.un->addr);
+#endif
+ }
+ else {
+ rspamd_strlcpy(addr->u.un->addr.sun_path, src,
+ MIN(len + 1, sizeof(addr->u.un->addr.sun_path)));
+#if defined(FREEBSD) || defined(__APPLE__)
+ addr->u.un->addr.sun_len = SUN_LEN(&addr->u.un->addr);
+#endif
+
+ if (target) {
+ rspamd_ip_validate_af(addr);
+ *target = addr;
+ }
+ else {
+ if (!pool) {
+ rspamd_inet_address_free(addr);
+ }
+ }
+
+ return TRUE;
+ }
+
+ /* Skip for remote */
+ cur_tok = &tokens[1];
+#ifdef _SC_GETPW_R_SIZE_MAX
+ pwlen = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (pwlen <= 0) {
+ pwlen = 8192;
+ }
+#else
+ pwlen = 8192;
+#endif
+
+ pwbuf = g_malloc0(pwlen);
+
+ while (*cur_tok) {
+ if (g_ascii_strncasecmp(*cur_tok, "mode=", sizeof("mode=") - 1) == 0) {
+ p = strchr(*cur_tok, '=');
+ /* XXX: add error check */
+ addr->u.un->mode = strtoul(p + 1, NULL, 0);
+
+ if (addr->u.un->mode == 0) {
+ msg_err("bad mode: %s", p + 1);
+ errno = EINVAL;
+ goto err;
+ }
+ }
+ else if (g_ascii_strncasecmp(*cur_tok, "owner=",
+ sizeof("owner=") - 1) == 0) {
+ p = strchr(*cur_tok, '=');
+
+ if (getpwnam_r(p + 1, &pw, pwbuf, pwlen, &ppw) != 0 || ppw == NULL) {
+ msg_err("bad user: %s", p + 1);
+ if (ppw == NULL) {
+ errno = ENOENT;
+ }
+ goto err;
+ }
+ addr->u.un->owner = pw.pw_uid;
+
+ if (!has_group) {
+ addr->u.un->group = pw.pw_gid;
+ }
+ }
+ else if (g_ascii_strncasecmp(*cur_tok, "group=",
+ sizeof("group=") - 1) == 0) {
+ p = strchr(*cur_tok, '=');
+
+ if (getgrnam_r(p + 1, &gr, pwbuf, pwlen, &pgr) != 0 || pgr == NULL) {
+ msg_err("bad group: %s", p + 1);
+ if (pgr == NULL) {
+ errno = ENOENT;
+ }
+ goto err;
+ }
+
+ has_group = true;
+ addr->u.un->group = gr.gr_gid;
+ }
+ cur_tok++;
+ }
+
+ g_free(pwbuf);
+
+ if (!pool) {
+ g_strfreev(tokens);
+ }
+
+ if (target) {
+ rspamd_ip_validate_af(addr);
+ *target = addr;
+ }
+ else {
+ if (!pool) {
+ rspamd_inet_address_free(addr);
+ }
+ }
+
+ return TRUE;
+
+err:
+
+ g_free(pwbuf);
+
+ if (!pool) {
+ g_strfreev(tokens);
+ rspamd_inet_address_free(addr);
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_parse_inet_address_ip4(const guchar *text, gsize len, gpointer target)
+{
+ const guchar *p;
+ guchar c;
+ guint32 addr = 0, *addrptr = target;
+ guint octet = 0, n = 0;
+
+ g_assert(text != NULL);
+ g_assert(target != NULL);
+
+ if (len == 0) {
+ len = strlen(text);
+ }
+
+ for (p = text; p < text + len; p++) {
+ c = *p;
+
+ if (c >= '0' && c <= '9') {
+ octet = octet * 10 + (c - '0');
+
+ if (octet > 255) {
+ return FALSE;
+ }
+
+ continue;
+ }
+
+ if (c == '.') {
+ addr = (addr << 8) + octet;
+ octet = 0;
+ n++;
+ continue;
+ }
+
+ return FALSE;
+ }
+
+ if (n == 3) {
+ addr = (addr << 8) + octet;
+ *addrptr = ntohl(addr);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+rspamd_parse_inet_address_ip6(const guchar *text, gsize len, gpointer target)
+{
+ guchar t, *zero = NULL, *s, *d, *addr = target;
+ const guchar *p, *digit = NULL, *percent;
+ gsize len4 = 0;
+ guint n = 8, nibbles = 0, word = 0;
+
+ g_assert(text != NULL);
+ g_assert(target != NULL);
+
+ p = text;
+ if (len == 0) {
+ len = strlen(text);
+ }
+
+ /* Check IPv6 scope */
+ if ((percent = memchr(p, '%', len)) != NULL && percent > p) {
+ len = percent - p; /* Ignore scope */
+ }
+
+ if (len > sizeof("IPv6:") - 1 &&
+ g_ascii_strncasecmp(p, "IPv6:", sizeof("IPv6:") - 1) == 0) {
+ /* Special case, SMTP conformant IPv6 address */
+ p += sizeof("IPv6:") - 1;
+ len -= sizeof("IPv6:") - 1;
+ }
+
+ if (*p == '[' && len > 1 && p[len - 1] == ']') {
+ /* Strip [] as well */
+ p++;
+ len -= 2;
+ }
+
+ /* Ignore leading colon */
+ if (len > 0 && *p == ':') {
+ p++;
+ len--;
+ }
+
+ for (/* void */; len; len--) {
+ t = *p++;
+
+ if (t == ':') {
+ if (nibbles) {
+ digit = p;
+ len4 = len;
+ *addr++ = (u_char) (word >> 8);
+ *addr++ = (u_char) (word & 0xff);
+
+ if (--n) {
+ nibbles = 0;
+ word = 0;
+ continue;
+ }
+ }
+ else {
+ if (zero == NULL) {
+ digit = p;
+ len4 = len;
+ zero = addr;
+ continue;
+ }
+ }
+
+ return FALSE;
+ }
+
+ if (t == '.' && nibbles) {
+ if (n < 2 || digit == NULL) {
+ return FALSE;
+ }
+
+ /* IPv4 encoded in IPv6 */
+ if (!rspamd_parse_inet_address_ip4(digit, len4 - 1, &word)) {
+ return FALSE;
+ }
+
+ word = ntohl(word);
+ *addr++ = (guchar) ((word >> 24) & 0xff);
+ *addr++ = (guchar) ((word >> 16) & 0xff);
+ n--;
+ break;
+ }
+
+ if (++nibbles > 4) {
+ /* Too many digits */
+ return FALSE;
+ }
+
+ /* Restore from hex */
+ if (t >= '0' && t <= '9') {
+ word = word * 16 + (t - '0');
+ continue;
+ }
+
+ t |= 0x20;
+
+ if (t >= 'a' && t <= 'f') {
+ word = word * 16 + (t - 'a') + 10;
+ continue;
+ }
+
+ return FALSE;
+ }
+
+ if (nibbles == 0 && zero == NULL) {
+ return FALSE;
+ }
+
+ *addr++ = (guchar) (word >> 8);
+ *addr++ = (guchar) (word & 0xff);
+
+ if (--n) {
+ if (zero) {
+ n *= 2;
+ s = addr - 1;
+ d = s + n;
+ while (s >= zero) {
+ *d-- = *s--;
+ }
+ memset(zero, 0, n);
+
+ return TRUE;
+ }
+ }
+ else {
+ if (zero == NULL) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/* Checks for ipv6 mapped address */
+static rspamd_inet_addr_t *
+rspamd_inet_address_v6_maybe_map(const struct sockaddr_in6 *sin6,
+ rspamd_mempool_t *pool)
+{
+ rspamd_inet_addr_t *addr = NULL;
+ /* 10 zero bytes or 80 bits */
+ static const guint8 mask[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ const guint8 *p;
+
+ if (memcmp((const guint8 *) &sin6->sin6_addr, mask, sizeof(mask)) == 0) {
+ p = (const guint8 *) &sin6->sin6_addr;
+
+ if ((p[10] == 0xff && p[11] == 0xff)) {
+ addr = rspamd_inet_addr_create(AF_INET, pool);
+ memcpy(&addr->u.in.addr.s4.sin_addr, &p[12],
+ sizeof(struct in_addr));
+ }
+ else {
+ /* Something strange but not mapped v4 address */
+ addr = rspamd_inet_addr_create(AF_INET6, pool);
+ memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ }
+ }
+ else {
+ addr = rspamd_inet_addr_create(AF_INET6, pool);
+ memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ }
+
+ return addr;
+}
+
+static void
+rspamd_inet_address_v6_maybe_map_static(const struct sockaddr_in6 *sin6,
+ rspamd_inet_addr_t *addr)
+{
+ /* 10 zero bytes or 80 bits */
+ static const guint8 mask[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ const guint8 *p;
+
+ if (memcmp((const guint8 *) &sin6->sin6_addr, mask, sizeof(mask)) == 0) {
+ p = (const guint8 *) &sin6->sin6_addr;
+
+ if ((p[10] == 0xff && p[11] == 0xff)) {
+ memcpy(&addr->u.in.addr.s4.sin_addr, &p[12],
+ sizeof(struct in_addr));
+ addr->af = AF_INET;
+ addr->slen = sizeof(addr->u.in.addr.s4);
+ }
+ else {
+ /* Something strange but not mapped v4 address */
+ memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ addr->af = AF_INET6;
+ addr->slen = sizeof(addr->u.in.addr.s6);
+ }
+ }
+ else {
+ memcpy(&addr->u.in.addr.s6.sin6_addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ addr->af = AF_INET6;
+ addr->slen = sizeof(addr->u.in.addr.s6);
+ }
+}
+
+static gboolean
+rspamd_parse_inet_address_common(rspamd_inet_addr_t **target,
+ const char *src,
+ gsize srclen,
+ rspamd_mempool_t *pool,
+ enum rspamd_inet_address_parse_flags how)
+{
+ gboolean ret = FALSE;
+ rspamd_inet_addr_t *addr = NULL;
+ union sa_inet su;
+ const char *end = NULL;
+ char ipbuf[INET6_ADDRSTRLEN + 1];
+ guint iplen;
+ gulong portnum;
+
+ if (srclen == 0) {
+ return FALSE;
+ }
+
+ g_assert(src != NULL);
+ g_assert(target != NULL);
+
+ rspamd_ip_check_ipv6();
+
+ if (!(how & RSPAMD_INET_ADDRESS_PARSE_NO_UNIX) &&
+ (src[0] == '/' || src[0] == '.')) {
+ return rspamd_parse_unix_path(target, src, srclen, pool, how);
+ }
+
+ if (src[0] == '[') {
+ const gchar *ip_start;
+ /* Ipv6 address in format [::1]:port or just [::1] */
+ end = memchr(src + 1, ']', srclen - 1);
+
+ if (end == NULL) {
+ return FALSE;
+ }
+
+ iplen = end - src - 1;
+
+ if (iplen == 0 || iplen >= sizeof(ipbuf)) {
+ return FALSE;
+ }
+
+ ip_start = src + 1;
+ rspamd_strlcpy(ipbuf, ip_start, iplen + 1);
+
+ if (rspamd_parse_inet_address_ip6(ipbuf, iplen,
+ &su.s6.sin6_addr)) {
+ addr = rspamd_inet_address_v6_maybe_map(&su.s6, pool);
+ ret = TRUE;
+ }
+
+ if (!(how & RSPAMD_INET_ADDRESS_PARSE_NO_PORT) && ret && end[1] == ':') {
+ /* Port part */
+ rspamd_strtoul(end + 1, srclen - iplen - 3, &portnum);
+ rspamd_inet_address_set_port(addr, portnum);
+ }
+ }
+ else {
+
+ if (!(how & RSPAMD_INET_ADDRESS_PARSE_NO_PORT) &&
+ (end = memchr(src, ':', srclen)) != NULL) {
+ /* This is either port number and ipv4 addr or ipv6 addr */
+ /* Search for another semicolon */
+ if (memchr(end + 1, ':', srclen - (end - src + 1)) &&
+ rspamd_parse_inet_address_ip6(src, srclen,
+ &su.s6.sin6_addr)) {
+ addr = rspamd_inet_address_v6_maybe_map(&su.s6, pool);
+ ret = TRUE;
+ }
+ else {
+ /* Not ipv6, so try ip:port */
+ iplen = end - src;
+
+ if (iplen >= sizeof(ipbuf) || iplen <= 1) {
+ return FALSE;
+ }
+ else {
+ rspamd_strlcpy(ipbuf, src, iplen + 1);
+ }
+
+ if (rspamd_parse_inet_address_ip4(ipbuf, iplen,
+ &su.s4.sin_addr)) {
+ addr = rspamd_inet_addr_create(AF_INET, pool);
+ memcpy(&addr->u.in.addr.s4.sin_addr, &su.s4.sin_addr,
+ sizeof(struct in_addr));
+ rspamd_strtoul(end + 1, srclen - iplen - 1, &portnum);
+ rspamd_inet_address_set_port(addr, portnum);
+ ret = TRUE;
+ }
+ }
+ }
+ else {
+ if (rspamd_parse_inet_address_ip4(src, srclen, &su.s4.sin_addr)) {
+ addr = rspamd_inet_addr_create(AF_INET, pool);
+ memcpy(&addr->u.in.addr.s4.sin_addr, &su.s4.sin_addr,
+ sizeof(struct in_addr));
+ ret = TRUE;
+ }
+ else if (rspamd_parse_inet_address_ip6(src, srclen, &su.s6.sin6_addr)) {
+ addr = rspamd_inet_address_v6_maybe_map(&su.s6, pool);
+ ret = TRUE;
+ }
+ }
+ }
+
+ if (ret && target) {
+ *target = addr;
+ }
+
+ return ret;
+}
+
+gboolean
+rspamd_parse_inet_address(rspamd_inet_addr_t **target,
+ const char *src,
+ gsize srclen,
+ enum rspamd_inet_address_parse_flags how)
+{
+ return rspamd_parse_inet_address_common(target, src, srclen, NULL, how);
+}
+
+rspamd_inet_addr_t *
+rspamd_parse_inet_address_pool(const char *src,
+ gsize srclen,
+ rspamd_mempool_t *pool,
+ enum rspamd_inet_address_parse_flags how)
+{
+ rspamd_inet_addr_t *ret = NULL;
+
+ if (!rspamd_parse_inet_address_common(&ret, src, srclen, pool, how)) {
+ return NULL;
+ }
+
+ return ret;
+}
+
+gboolean
+rspamd_parse_inet_address_ip(const char *src, gsize srclen,
+ rspamd_inet_addr_t *target)
+{
+ const char *end;
+ char ipbuf[INET6_ADDRSTRLEN + 1];
+ guint iplen;
+ gulong portnum;
+ gboolean ret = FALSE;
+ union sa_inet su;
+
+ g_assert(target != NULL);
+ g_assert(src != NULL);
+
+ if (src[0] == '[') {
+ /* Ipv6 address in format [::1]:port or just [::1] */
+ end = memchr(src + 1, ']', srclen - 1);
+
+ if (end == NULL) {
+ return FALSE;
+ }
+
+ iplen = end - src - 1;
+
+ if (iplen == 0 || iplen >= sizeof(ipbuf)) {
+ return FALSE;
+ }
+
+ rspamd_strlcpy(ipbuf, src + 1, iplen + 1);
+
+ if (rspamd_parse_inet_address_ip6(ipbuf, iplen,
+ &su.s6.sin6_addr)) {
+ rspamd_inet_address_v6_maybe_map_static(&su.s6, target);
+ ret = TRUE;
+ }
+
+ if (ret && end[1] == ':') {
+ /* Port part */
+ rspamd_strtoul(end + 1, srclen - iplen - 3, &portnum);
+ rspamd_inet_address_set_port(target, portnum);
+ }
+ }
+ else {
+
+ if ((end = memchr(src, ':', srclen)) != NULL) {
+ /* This is either port number and ipv4 addr or ipv6 addr */
+ /* Search for another semicolon */
+ if (memchr(end + 1, ':', srclen - (end - src + 1)) &&
+ rspamd_parse_inet_address_ip6(src, srclen, &su.s6.sin6_addr)) {
+ rspamd_inet_address_v6_maybe_map_static(&su.s6, target);
+ ret = TRUE;
+ }
+ else {
+ /* Not ipv6, so try ip:port */
+ iplen = end - src;
+
+ if (iplen >= sizeof(ipbuf) || iplen <= 1) {
+ return FALSE;
+ }
+ else {
+ rspamd_strlcpy(ipbuf, src, iplen + 1);
+ }
+
+ if (rspamd_parse_inet_address_ip4(ipbuf, iplen,
+ &su.s4.sin_addr)) {
+ memcpy(&target->u.in.addr.s4.sin_addr, &su.s4.sin_addr,
+ sizeof(struct in_addr));
+ target->af = AF_INET;
+ target->slen = sizeof(target->u.in.addr.s4);
+ rspamd_strtoul(end + 1, srclen - iplen - 1, &portnum);
+ rspamd_inet_address_set_port(target, portnum);
+ ret = TRUE;
+ }
+ }
+ }
+ else {
+ if (rspamd_parse_inet_address_ip4(src, srclen, &su.s4.sin_addr)) {
+ memcpy(&target->u.in.addr.s4.sin_addr, &su.s4.sin_addr,
+ sizeof(struct in_addr));
+ target->af = AF_INET;
+ target->slen = sizeof(target->u.in.addr.s4);
+ ret = TRUE;
+ }
+ else if (rspamd_parse_inet_address_ip6(src, srclen,
+ &su.s6.sin6_addr)) {
+ rspamd_inet_address_v6_maybe_map_static(&su.s6, target);
+ ret = TRUE;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * This is used to allow rspamd_inet_address_to_string to be used several times
+ * at the same function invocation, like printf("%s -> %s", f(ip1), f(ip2));
+ * Yes, it is bad but it helps to utilise this function without temporary buffers
+ * for up to 5 simultaneous invocations.
+ */
+#define NADDR_BUFS 5
+
+const char *
+rspamd_inet_address_to_string(const rspamd_inet_addr_t *addr)
+{
+ static char addr_str[NADDR_BUFS][INET6_ADDRSTRLEN + 1];
+ static guint cur_addr = 0;
+ char *addr_buf;
+
+ if (addr == NULL) {
+ return "<empty inet address>";
+ }
+
+ addr_buf = addr_str[cur_addr++ % NADDR_BUFS];
+
+ switch (addr->af) {
+ case AF_INET:
+ return inet_ntop(addr->af, &addr->u.in.addr.s4.sin_addr, addr_buf,
+ INET6_ADDRSTRLEN + 1);
+ case AF_INET6:
+ return inet_ntop(addr->af, &addr->u.in.addr.s6.sin6_addr, addr_buf,
+ INET6_ADDRSTRLEN + 1);
+ case AF_UNIX:
+ return addr->u.un->addr.sun_path;
+ }
+
+ return "undefined";
+}
+
+#define PRETTY_IP_BUFSIZE 128
+
+const char *
+rspamd_inet_address_to_string_pretty(const rspamd_inet_addr_t *addr)
+{
+ static char addr_str[NADDR_BUFS][PRETTY_IP_BUFSIZE];
+ static guint cur_addr = 0;
+ char *addr_buf;
+
+ if (addr == NULL) {
+ return "<empty inet address>";
+ }
+
+ addr_buf = addr_str[cur_addr++ % NADDR_BUFS];
+
+ switch (addr->af) {
+ case AF_INET:
+ rspamd_snprintf(addr_buf, PRETTY_IP_BUFSIZE, "%s:%d",
+ rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr));
+ break;
+ case AF_INET6:
+ rspamd_snprintf(addr_buf, PRETTY_IP_BUFSIZE, "[%s]:%d",
+ rspamd_inet_address_to_string(addr),
+ rspamd_inet_address_get_port(addr));
+ break;
+ case AF_UNIX:
+ rspamd_snprintf(addr_buf, PRETTY_IP_BUFSIZE, "unix:%s",
+ rspamd_inet_address_to_string(addr));
+ break;
+ }
+
+ return addr_buf;
+}
+
+uint16_t
+rspamd_inet_address_get_port(const rspamd_inet_addr_t *addr)
+{
+ switch (addr->af) {
+ case AF_INET:
+ return ntohs(addr->u.in.addr.s4.sin_port);
+ case AF_INET6:
+ return ntohs(addr->u.in.addr.s6.sin6_port);
+ }
+
+ return 0;
+}
+
+void rspamd_inet_address_set_port(rspamd_inet_addr_t *addr, uint16_t port)
+{
+ switch (addr->af) {
+ case AF_INET:
+ addr->u.in.addr.s4.sin_port = htons(port);
+ break;
+ case AF_INET6:
+ addr->u.in.addr.s6.sin6_port = htons(port);
+ break;
+ }
+}
+
+int rspamd_inet_address_connect(const rspamd_inet_addr_t *addr, gint type,
+ gboolean async)
+{
+ int fd, r;
+ const struct sockaddr *sa;
+
+ if (addr == NULL) {
+ return -1;
+ }
+
+ fd = rspamd_socket_create(addr->af, type, 0, async);
+ if (fd == -1) {
+ return -1;
+ }
+
+ if (addr->af == AF_UNIX) {
+ sa = (const struct sockaddr *) &addr->u.un->addr;
+
+ if (type == (int) SOCK_DGRAM) {
+ struct sockaddr ca;
+
+ memset(&ca, 0, sizeof(ca));
+ ca.sa_family = AF_UNIX;
+
+ r = bind(fd, &ca, sizeof(sa_family_t));
+ if (r == -1) {
+ msg_info("unix socket client autobind failed: %s, '%s'",
+ addr->u.un->addr.sun_path, strerror(errno));
+ }
+ }
+ }
+ else {
+ sa = &addr->u.in.addr.sa;
+ }
+
+ r = connect(fd, sa, addr->slen);
+
+ if (r == -1) {
+ if (!async || errno != EINPROGRESS) {
+ close(fd);
+ msg_info("connect %s failed: %d, '%s'",
+ rspamd_inet_address_to_string_pretty(addr),
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return fd;
+}
+
+int rspamd_inet_address_listen(const rspamd_inet_addr_t *addr, gint type,
+ enum rspamd_inet_address_listen_opts opts,
+ gint listen_queue)
+{
+ gint fd, r;
+ gint on = 1, serrno;
+ const struct sockaddr *sa;
+ const char *path;
+
+ if (addr == NULL) {
+ return -1;
+ }
+
+ fd = rspamd_socket_create(addr->af, type, 0,
+ (opts & RSPAMD_INET_ADDRESS_LISTEN_ASYNC));
+ if (fd == -1) {
+ return -1;
+ }
+
+ if (addr->af == AF_UNIX && access(addr->u.un->addr.sun_path, W_OK) != -1) {
+ /* Unlink old socket */
+ (void) unlink(addr->u.un->addr.sun_path);
+ }
+
+ if (addr->af == AF_UNIX) {
+ sa = (const struct sockaddr *) &addr->u.un->addr;
+ }
+ else {
+ sa = &addr->u.in.addr.sa;
+ }
+
+#if defined(SO_REUSEADDR)
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *) &on, sizeof(gint)) == -1) {
+ msg_err("cannot set SO_REUSEADDR on %s (fd=%d): %s",
+ rspamd_inet_address_to_string_pretty(addr),
+ fd, strerror(errno));
+ goto err;
+ }
+#endif
+
+#if defined(SO_REUSEPORT) && defined(LINUX)
+ if (opts & RSPAMD_INET_ADDRESS_LISTEN_REUSEPORT) {
+ on = 1;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (const void *) &on, sizeof(gint)) == -1) {
+ msg_err("cannot set SO_REUSEPORT on %s (fd=%d): %s",
+ rspamd_inet_address_to_string_pretty(addr),
+ fd, strerror(errno));
+ goto err;
+ }
+ }
+#endif
+
+#ifdef HAVE_IPV6_V6ONLY
+ if (addr->af == AF_INET6) {
+ /* We need to set this flag to avoid errors */
+ on = 1;
+#ifdef SOL_IPV6
+ (void) setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, (const void *) &on, sizeof(gint));
+#elif defined(IPPROTO_IPV6)
+ (void) setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *) &on, sizeof(gint));
+#endif
+ }
+#endif
+
+ r = bind(fd, sa, addr->slen);
+ if (r == -1) {
+ if (!(opts & RSPAMD_INET_ADDRESS_LISTEN_ASYNC) || errno != EINPROGRESS) {
+ msg_warn("bind %s failed: %d, '%s'",
+ rspamd_inet_address_to_string_pretty(addr),
+ errno,
+ strerror(errno));
+
+ goto err;
+ }
+ }
+
+ if (addr->af == AF_UNIX) {
+ path = addr->u.un->addr.sun_path;
+ /* Try to set mode and owner */
+
+ if (addr->u.un->owner != (uid_t) -1 || addr->u.un->group != (gid_t) -1) {
+ if (chown(path, addr->u.un->owner, addr->u.un->group) == -1) {
+ msg_info("cannot change owner for %s to %d:%d: %s",
+ path, addr->u.un->owner, addr->u.un->group,
+ strerror(errno));
+ }
+ }
+
+ if (chmod(path, addr->u.un->mode) == -1) {
+ msg_info("cannot change mode for %s to %od %s",
+ path, addr->u.un->mode, strerror(errno));
+ }
+ }
+
+ if (type != (int) SOCK_DGRAM) {
+
+ if (!(opts & RSPAMD_INET_ADDRESS_LISTEN_NOLISTEN)) {
+ r = listen(fd, listen_queue);
+
+ if (r == -1) {
+ msg_warn("listen %s failed: %d, '%s'",
+ rspamd_inet_address_to_string_pretty(addr),
+ errno, strerror(errno));
+
+ goto err;
+ }
+ }
+ }
+
+ return fd;
+
+err:
+ /* Error path */
+ serrno = errno;
+
+ if (fd != -1) {
+ close(fd);
+ }
+
+ errno = serrno;
+
+ return -1;
+}
+
+gssize
+rspamd_inet_address_recvfrom(gint fd, void *buf, gsize len, gint fl,
+ rspamd_inet_addr_t **target)
+{
+ gssize ret;
+ union sa_union su;
+ socklen_t slen = sizeof(su);
+ rspamd_inet_addr_t *addr = NULL;
+
+ if ((ret = recvfrom(fd, buf, len, fl, &su.sa, &slen)) == -1) {
+ if (target) {
+ *target = NULL;
+ }
+
+ return -1;
+ }
+
+ if (target) {
+ addr = rspamd_inet_addr_create(su.sa.sa_family, NULL);
+ addr->slen = slen;
+
+ if (addr->af == AF_UNIX) {
+ addr->u.un = g_malloc(sizeof(*addr->u.un));
+ memcpy(&addr->u.un->addr, &su.su, sizeof(struct sockaddr_un));
+ }
+ else {
+ memcpy(&addr->u.in.addr, &su.sa, MIN(slen, sizeof(addr->u.in.addr)));
+ }
+
+ *target = addr;
+ }
+
+ return (ret);
+}
+
+gssize
+rspamd_inet_address_sendto(gint fd, const void *buf, gsize len, gint fl,
+ const rspamd_inet_addr_t *addr)
+{
+ gssize r;
+ const struct sockaddr *sa;
+
+ if (addr == NULL) {
+#ifdef EADDRNOTAVAIL
+ errno = EADDRNOTAVAIL;
+#endif
+ return -1;
+ }
+
+ if (addr->af == AF_UNIX) {
+ sa = (struct sockaddr *) &addr->u.un->addr;
+ }
+ else {
+ sa = &addr->u.in.addr.sa;
+ }
+
+ r = sendto(fd, buf, len, fl, sa, addr->slen);
+
+ return r;
+}
+
+static gboolean
+rspamd_check_port_priority(const char *line, guint default_port,
+ guint *priority, gchar *out,
+ gsize outlen, rspamd_mempool_t *pool)
+{
+ guint real_port = default_port, real_priority = 0;
+ gchar *err_str, *err_str_prio;
+
+ if (line && line[0] == ':') {
+ errno = 0;
+ real_port = strtoul(line + 1, &err_str, 10);
+
+ if (err_str && *err_str == ':') {
+ /* We have priority */
+ real_priority = strtoul(err_str + 1, &err_str_prio, 10);
+
+ if (err_str_prio && *err_str_prio != '\0') {
+ msg_err_pool_check(
+ "cannot parse priority: %s, at symbol %c, error: %s",
+ line,
+ *err_str_prio,
+ strerror(errno));
+
+ return FALSE;
+ }
+ }
+ else if (err_str && *err_str != '\0') {
+ msg_err_pool_check(
+ "cannot parse port: %s, at symbol %c, error: %s",
+ line,
+ *err_str,
+ strerror(errno));
+
+ return FALSE;
+ }
+ }
+
+ if (priority) {
+ *priority = real_priority;
+ }
+
+ rspamd_snprintf(out, outlen, "%ud", real_port);
+
+ return TRUE;
+}
+
+static enum rspamd_parse_host_port_result
+rspamd_resolve_addrs(const char *begin, size_t len, GPtrArray **addrs,
+ const gchar *portbuf, gint flags,
+ rspamd_mempool_t *pool)
+{
+ struct addrinfo hints, *res, *cur;
+ rspamd_inet_addr_t *cur_addr = NULL;
+ gint r, addr_cnt;
+ gchar *addr_cpy = NULL;
+ enum rspamd_parse_host_port_result ret = RSPAMD_PARSE_ADDR_FAIL;
+
+ rspamd_ip_check_ipv6();
+
+ if (rspamd_parse_inet_address(&cur_addr,
+ begin, len, RSPAMD_INET_ADDRESS_PARSE_DEFAULT) &&
+ cur_addr != NULL) {
+ if (*addrs == NULL) {
+ *addrs = g_ptr_array_new_full(1,
+ (GDestroyNotify) rspamd_inet_address_free);
+
+ if (pool != NULL) {
+ rspamd_mempool_add_destructor(pool,
+ rspamd_ptr_array_free_hard, *addrs);
+ }
+ }
+
+ rspamd_inet_address_set_port(cur_addr, strtoul(portbuf, NULL, 10));
+ g_ptr_array_add(*addrs, cur_addr);
+ ret = RSPAMD_PARSE_ADDR_NUMERIC;
+ }
+ else {
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_STREAM; /* Type of the socket */
+ hints.ai_flags = AI_NUMERICSERV | flags;
+
+ if (len > 0) {
+ if (pool) {
+ addr_cpy = rspamd_mempool_alloc(pool, len + 1);
+ }
+ else {
+ addr_cpy = g_malloc(len + 1);
+ }
+
+ rspamd_strlcpy(addr_cpy, begin, len + 1);
+ }
+ /* Otherwise it will be NULL */
+
+ if (ipv6_status == RSPAMD_IPV6_SUPPORTED) {
+ hints.ai_family = AF_UNSPEC;
+ }
+ else {
+ hints.ai_family = AF_INET;
+ }
+
+ if ((r = getaddrinfo(addr_cpy, portbuf, &hints, &res)) == 0) {
+ /* Now copy up to max_addrs of addresses */
+ addr_cnt = 0;
+ cur = res;
+ while (cur) {
+ cur = cur->ai_next;
+ addr_cnt++;
+ }
+
+ if (*addrs == NULL) {
+ *addrs = g_ptr_array_new_full(addr_cnt,
+ (GDestroyNotify) rspamd_inet_address_free);
+
+ if (pool != NULL) {
+ rspamd_mempool_add_destructor(pool,
+ rspamd_ptr_array_free_hard, *addrs);
+ }
+ }
+
+ cur = res;
+ while (cur) {
+ cur_addr = rspamd_inet_address_from_sa(cur->ai_addr,
+ cur->ai_addrlen);
+
+ if (cur_addr != NULL) {
+ g_ptr_array_add(*addrs, cur_addr);
+ }
+ cur = cur->ai_next;
+ }
+
+ freeaddrinfo(res);
+ ret = RSPAMD_PARSE_ADDR_RESOLVED;
+ }
+ else if (addr_cpy) {
+ msg_err_pool_check("address resolution for %s failed: %s",
+ addr_cpy,
+ gai_strerror(r));
+
+ if (pool == NULL) {
+ g_free(addr_cpy);
+ }
+
+ return RSPAMD_PARSE_ADDR_FAIL;
+ }
+ else {
+ /* Should never ever happen */
+ g_assert(0);
+ }
+ }
+
+ if (pool == NULL) {
+ g_free(addr_cpy);
+ }
+
+ return ret;
+}
+
+enum rspamd_parse_host_port_result
+rspamd_parse_host_port_priority(const gchar *str,
+ GPtrArray **addrs,
+ guint *priority,
+ gchar **name_ptr,
+ guint default_port,
+ gboolean allow_listen,
+ rspamd_mempool_t *pool)
+{
+ gchar portbuf[8];
+ const gchar *p, *name = NULL;
+ gsize namelen;
+ rspamd_inet_addr_t *cur_addr = NULL;
+ enum rspamd_parse_host_port_result ret = RSPAMD_PARSE_ADDR_FAIL;
+ union sa_union su;
+
+ /*
+ * In this function, we can have several possibilities:
+ * 1) Unix socket: check for '.' or '/' at the begin of string
+ * 2) \[ipv6\]: check for '[' at the beginning
+ * 3) '*': means listening on any address
+ * 4) ip|host[:port[:priority]]
+ */
+
+ if (allow_listen && str[0] == '*') {
+ bool v4_any = true, v6_any = true;
+
+ p = &str[1];
+
+ if (g_ascii_strncasecmp(p, "v4", 2) == 0) {
+ p += 2;
+ name = "*v4";
+ v6_any = false;
+ }
+ else if (g_ascii_strncasecmp(p, "v6", 2) == 0) {
+ p += 2;
+ name = "*v6";
+ v4_any = false;
+ }
+ else {
+ name = "*";
+ }
+
+ if (!rspamd_check_port_priority(p, default_port, priority,
+ portbuf, sizeof(portbuf), pool)) {
+ return ret;
+ }
+
+ if (*addrs == NULL) {
+ *addrs = g_ptr_array_new_full(1,
+ pool == NULL ? NULL : (GDestroyNotify) rspamd_inet_address_free);
+
+ if (pool != NULL) {
+ rspamd_mempool_add_destructor(pool,
+ rspamd_ptr_array_free_hard, *addrs);
+ }
+ }
+
+ if (v4_any) {
+ cur_addr = rspamd_inet_addr_create(AF_INET, NULL);
+ rspamd_parse_inet_address_ip4("0.0.0.0",
+ sizeof("0.0.0.0") - 1, &su.s4.sin_addr);
+ memcpy(&cur_addr->u.in.addr.s4.sin_addr, &su.s4.sin_addr,
+ sizeof(struct in_addr));
+ rspamd_inet_address_set_port(cur_addr,
+ strtoul(portbuf, NULL, 10));
+ g_ptr_array_add(*addrs, cur_addr);
+ }
+ if (v6_any) {
+ cur_addr = rspamd_inet_addr_create(AF_INET6, NULL);
+ rspamd_parse_inet_address_ip6("::",
+ sizeof("::") - 1, &su.s6.sin6_addr);
+ memcpy(&cur_addr->u.in.addr.s6.sin6_addr, &su.s6.sin6_addr,
+ sizeof(struct in6_addr));
+ rspamd_inet_address_set_port(cur_addr,
+ strtoul(portbuf, NULL, 10));
+ g_ptr_array_add(*addrs, cur_addr);
+ }
+
+ namelen = strlen(name);
+ ret = RSPAMD_PARSE_ADDR_NUMERIC; /* No resolution here */
+ }
+ else if (str[0] == '[') {
+ /* This is braced IPv6 address */
+ p = strchr(str, ']');
+
+ if (p == NULL) {
+ msg_err_pool_check("cannot parse address definition %s: %s",
+ str,
+ strerror(EINVAL));
+
+ return ret;
+ }
+
+ name = str + 1;
+ namelen = p - str - 1;
+
+ if (!rspamd_check_port_priority(p + 1, default_port, priority, portbuf,
+ sizeof(portbuf), pool)) {
+ return ret;
+ }
+
+ ret = rspamd_resolve_addrs(name, namelen, addrs, portbuf, 0, pool);
+ }
+ else if (str[0] == '/' || str[0] == '.') {
+ /* Special case of unix socket, as getaddrinfo cannot deal with them */
+ if (*addrs == NULL) {
+ *addrs = g_ptr_array_new_full(1,
+ (GDestroyNotify) rspamd_inet_address_free);
+
+ if (pool != NULL) {
+ rspamd_mempool_add_destructor(pool,
+ rspamd_ptr_array_free_hard, *addrs);
+ }
+ }
+
+ if (!rspamd_parse_inet_address(&cur_addr,
+ str, strlen(str), RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+ msg_err_pool_check("cannot parse unix socket definition %s: %s",
+ str,
+ strerror(errno));
+
+ return ret;
+ }
+
+ g_ptr_array_add(*addrs, cur_addr);
+ name = str;
+ namelen = strlen(str);
+ ret = RSPAMD_PARSE_ADDR_NUMERIC; /* No resolution here: unix socket */
+ }
+ else {
+ p = strchr(str, ':');
+
+ if (p == NULL) {
+ /* Just address or IP */
+ name = str;
+ namelen = strlen(str);
+ rspamd_check_port_priority("", default_port, priority, portbuf,
+ sizeof(portbuf), pool);
+
+ ret = rspamd_resolve_addrs(name, namelen, addrs,
+ portbuf, 0, pool);
+ }
+ else {
+ const gchar *second_semicolon = strchr(p + 1, ':');
+
+ name = str;
+
+ if (second_semicolon) {
+ /* name + port part excluding priority */
+ namelen = second_semicolon - str;
+ }
+ else {
+ /* Full ip/name + port */
+ namelen = strlen(str);
+ }
+
+ if (!rspamd_check_port_priority(p, default_port, priority, portbuf,
+ sizeof(portbuf), pool)) {
+ return ret;
+ }
+
+ ret = rspamd_resolve_addrs(str, p - str, addrs,
+ portbuf, 0, pool);
+ }
+ }
+
+ if (name_ptr != NULL) {
+ if (pool) {
+ *name_ptr = rspamd_mempool_alloc(pool, namelen + 1);
+ }
+ else {
+ *name_ptr = g_malloc(namelen + 1);
+ }
+
+ rspamd_strlcpy(*name_ptr, name, namelen + 1);
+ }
+
+ return ret;
+}
+
+guchar *
+rspamd_inet_address_get_hash_key(const rspamd_inet_addr_t *addr, guint *klen)
+{
+ guchar *res = NULL;
+ static struct in_addr local = {INADDR_LOOPBACK};
+
+ g_assert(addr != NULL);
+ g_assert(klen != NULL);
+
+ if (addr->af == AF_INET) {
+ *klen = sizeof(struct in_addr);
+ res = (guchar *) &addr->u.in.addr.s4.sin_addr;
+ }
+ else if (addr->af == AF_INET6) {
+ *klen = sizeof(struct in6_addr);
+ res = (guchar *) &addr->u.in.addr.s6.sin6_addr;
+ }
+ else if (addr->af == AF_UNIX) {
+ *klen = sizeof(struct in_addr);
+ res = (guchar *) &local;
+ }
+ else {
+ *klen = 0;
+ res = NULL;
+ }
+
+ return res;
+}
+
+
+rspamd_inet_addr_t *
+rspamd_inet_address_new(int af, const void *init)
+{
+ rspamd_inet_addr_t *addr;
+
+ addr = rspamd_inet_addr_create(af, NULL);
+
+ if (init != NULL) {
+ if (af == AF_UNIX) {
+ /* Init is a path */
+ rspamd_strlcpy(addr->u.un->addr.sun_path, init,
+ sizeof(addr->u.un->addr.sun_path));
+#if defined(FREEBSD) || defined(__APPLE__)
+ addr->u.un->addr.sun_len = SUN_LEN(&addr->u.un->addr);
+#endif
+ }
+ else if (af == AF_INET) {
+ memcpy(&addr->u.in.addr.s4.sin_addr, init, sizeof(struct in_addr));
+ }
+ else if (af == AF_INET6) {
+ memcpy(&addr->u.in.addr.s6.sin6_addr, init, sizeof(struct in6_addr));
+ }
+ }
+
+ return addr;
+}
+
+rspamd_inet_addr_t *
+rspamd_inet_address_from_sa(const struct sockaddr *sa, socklen_t slen)
+{
+ rspamd_inet_addr_t *addr;
+
+ g_assert(sa != NULL);
+ /* Address of an AF_UNIX socket can be tiny */
+ g_assert(slen >= sizeof(sa_family_t) + 1);
+
+ addr = rspamd_inet_addr_create(sa->sa_family, NULL);
+
+ if (sa->sa_family == AF_UNIX) {
+ /* Init is a path */
+ const struct sockaddr_un *un = (const struct sockaddr_un *) sa;
+
+ g_assert(slen >= SUN_LEN(un));
+ g_assert(slen <= sizeof(addr->u.un->addr));
+
+ /* sun_path can legally contain intermittent NULL bytes */
+ memcpy(&addr->u.un->addr, un, slen);
+
+ /* length of AF_UNIX addresses is variable */
+ addr->slen = slen;
+ }
+ else if (sa->sa_family == AF_INET) {
+ g_assert(slen >= sizeof(struct sockaddr_in));
+ memcpy(&addr->u.in.addr.s4, sa, sizeof(struct sockaddr_in));
+ }
+ else if (sa->sa_family == AF_INET6) {
+ g_assert(slen >= sizeof(struct sockaddr_in6));
+ memcpy(&addr->u.in.addr.s6, sa, sizeof(struct sockaddr_in6));
+ }
+ else {
+ /* XXX: currently we cannot deal with other AF */
+ g_assert(0);
+ }
+
+ return addr;
+}
+
+rspamd_inet_addr_t *
+rspamd_inet_address_from_rnds(const struct rdns_reply_entry *rep)
+{
+ rspamd_inet_addr_t *addr = NULL;
+
+ g_assert(rep != NULL);
+
+ if (rep->type == RDNS_REQUEST_A) {
+ addr = rspamd_inet_addr_create(AF_INET, NULL);
+ memcpy(&addr->u.in.addr.s4.sin_addr, &rep->content.a.addr,
+ sizeof(struct in_addr));
+ }
+ else if (rep->type == RDNS_REQUEST_AAAA) {
+ addr = rspamd_inet_addr_create(AF_INET6, NULL);
+ memcpy(&addr->u.in.addr.s6.sin6_addr, &rep->content.aaa.addr,
+ sizeof(struct in6_addr));
+ }
+
+ return addr;
+}
+
+void rspamd_inet_address_apply_mask(rspamd_inet_addr_t *addr, guint mask)
+{
+ guint32 umsk, *p;
+
+ if (mask > 0 && addr != NULL) {
+ if (addr->af == AF_INET && mask <= 32) {
+ umsk = htonl(G_MAXUINT32 << (32 - mask));
+ addr->u.in.addr.s4.sin_addr.s_addr &= umsk;
+ }
+ else if (addr->af == AF_INET6 && mask <= 128) {
+ p = (uint32_t *) &addr->u.in.addr.s6.sin6_addr;
+ mask = 128 - mask;
+ p += 3;
+
+ for (;;) {
+ if (mask >= 32) {
+ mask -= 32;
+ *p = 0;
+ }
+ else {
+ umsk = htonl(G_MAXUINT32 << mask);
+ *p &= umsk;
+ break;
+ }
+
+ p--;
+ }
+ }
+ }
+}
+
+static gint
+rspamd_inet_address_af_order(const rspamd_inet_addr_t *addr)
+{
+ int ret;
+
+ switch (addr->af) {
+ case AF_UNIX:
+ ret = 2;
+ break;
+ case AF_INET:
+ ret = 1;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+gint rspamd_inet_address_compare(const rspamd_inet_addr_t *a1,
+ const rspamd_inet_addr_t *a2, gboolean compare_ports)
+{
+ g_assert(a1 != NULL);
+ g_assert(a2 != NULL);
+
+ if (a1->af != a2->af) {
+ return (rspamd_inet_address_af_order(a2) -
+ rspamd_inet_address_af_order(a1));
+ }
+ else {
+ switch (a1->af) {
+ case AF_INET:
+ if (!compare_ports) {
+ return memcmp(&a1->u.in.addr.s4.sin_addr,
+ &a2->u.in.addr.s4.sin_addr, sizeof(struct in_addr));
+ }
+ else {
+ if (a1->u.in.addr.s4.sin_port == a2->u.in.addr.s4.sin_port) {
+ return memcmp(&a1->u.in.addr.s4.sin_addr,
+ &a2->u.in.addr.s4.sin_addr, sizeof(struct in_addr));
+ }
+ else {
+ return a1->u.in.addr.s4.sin_port - a2->u.in.addr.s4.sin_port;
+ }
+ }
+ case AF_INET6:
+ if (!compare_ports) {
+ return memcmp(&a1->u.in.addr.s6.sin6_addr,
+ &a2->u.in.addr.s6.sin6_addr, sizeof(struct in6_addr));
+ }
+ else {
+ if (a1->u.in.addr.s6.sin6_port == a2->u.in.addr.s6.sin6_port) {
+ return memcmp(&a1->u.in.addr.s6.sin6_addr,
+ &a2->u.in.addr.s6.sin6_addr, sizeof(struct in6_addr));
+ }
+ else {
+ return a1->u.in.addr.s6.sin6_port - a2->u.in.addr.s6.sin6_port;
+ }
+ }
+ case AF_UNIX:
+ return strncmp(a1->u.un->addr.sun_path,
+ a2->u.un->addr.sun_path, sizeof(a1->u.un->addr.sun_path));
+ default:
+ return memcmp(&a1->u.in, &a2->u.in, sizeof(a1->u.in));
+ }
+ }
+
+ return 0;
+}
+
+gint rspamd_inet_address_compare_ptr(gconstpointer a1,
+ gconstpointer a2)
+{
+ const rspamd_inet_addr_t **i1 = (const rspamd_inet_addr_t **) a1,
+ **i2 = (const rspamd_inet_addr_t **) a2;
+
+ return rspamd_inet_address_compare(*i1, *i2, FALSE);
+}
+
+rspamd_inet_addr_t *
+rspamd_inet_address_copy(const rspamd_inet_addr_t *addr, rspamd_mempool_t *pool)
+{
+ rspamd_inet_addr_t *n;
+
+ if (addr == NULL) {
+ return NULL;
+ }
+
+ n = rspamd_inet_addr_create(addr->af, pool);
+
+ if (n->af == AF_UNIX) {
+ memcpy(n->u.un, addr->u.un, sizeof(*addr->u.un));
+ }
+ else {
+ memcpy(&n->u.in, &addr->u.in, sizeof(addr->u.in));
+ }
+
+ return n;
+}
+
+gint rspamd_inet_address_get_af(const rspamd_inet_addr_t *addr)
+{
+ g_assert(addr != NULL);
+
+ return addr->af;
+}
+
+struct sockaddr *
+rspamd_inet_address_get_sa(const rspamd_inet_addr_t *addr,
+ socklen_t *sz)
+{
+ g_assert(addr != NULL);
+
+ if (addr->af == AF_UNIX) {
+ *sz = addr->slen;
+ return (struct sockaddr *) &addr->u.un->addr;
+ }
+ else {
+ *sz = addr->slen;
+ return (struct sockaddr *) &addr->u.in.addr.sa;
+ }
+}
+
+
+guint rspamd_inet_address_hash(gconstpointer a)
+{
+ const rspamd_inet_addr_t *addr = a;
+ struct {
+ gchar buf[sizeof(struct in6_addr)]; /* 16 bytes */
+ int af;
+ } layout;
+
+ gint32 k;
+
+ if (addr->af == AF_UNIX && addr->u.un) {
+ rspamd_cryptobox_fast_hash_state_t st;
+
+ rspamd_cryptobox_fast_hash_init(&st, rspamd_hash_seed());
+ rspamd_cryptobox_fast_hash_update(&st, &addr->af, sizeof(addr->af));
+ rspamd_cryptobox_fast_hash_update(&st, addr->u.un, sizeof(*addr->u.un));
+
+ return rspamd_cryptobox_fast_hash_final(&st);
+ }
+ else {
+ memset(&layout, 0, sizeof(layout));
+ layout.af = addr->af;
+
+ /* We ignore port part here */
+ if (addr->af == AF_INET) {
+ memcpy(layout.buf, &addr->u.in.addr.s4.sin_addr,
+ sizeof(addr->u.in.addr.s4.sin_addr));
+ }
+ else {
+ memcpy(layout.buf, &addr->u.in.addr.s6.sin6_addr,
+ sizeof(addr->u.in.addr.s6.sin6_addr));
+ }
+
+ k = rspamd_cryptobox_fast_hash(&layout, sizeof(layout),
+ rspamd_hash_seed());
+ }
+
+ return k;
+}
+
+guint rspamd_inet_address_port_hash(gconstpointer a)
+{
+ const rspamd_inet_addr_t *addr = a;
+ struct {
+ gchar buf[sizeof(struct in6_addr)]; /* 16 bytes */
+ int port;
+ int af;
+ } layout;
+
+ gint32 k;
+
+ if (addr->af == AF_UNIX && addr->u.un) {
+ rspamd_cryptobox_fast_hash_state_t st;
+
+ rspamd_cryptobox_fast_hash_init(&st, rspamd_hash_seed());
+ rspamd_cryptobox_fast_hash_update(&st, &addr->af, sizeof(addr->af));
+ rspamd_cryptobox_fast_hash_update(&st, addr->u.un, sizeof(*addr->u.un));
+
+ return rspamd_cryptobox_fast_hash_final(&st);
+ }
+ else {
+ memset(&layout, 0, sizeof(layout));
+ layout.af = addr->af;
+
+ /* We consider port part here */
+ if (addr->af == AF_INET) {
+ memcpy(layout.buf, &addr->u.in.addr.s4.sin_addr,
+ sizeof(addr->u.in.addr.s4.sin_addr));
+ layout.port = addr->u.in.addr.s4.sin_port;
+ }
+ else {
+ memcpy(layout.buf, &addr->u.in.addr.s6.sin6_addr,
+ sizeof(addr->u.in.addr.s6.sin6_addr));
+ layout.port = addr->u.in.addr.s6.sin6_port;
+ }
+
+ k = rspamd_cryptobox_fast_hash(&layout, sizeof(layout),
+ rspamd_hash_seed());
+ }
+
+ return k;
+}
+
+gboolean
+rspamd_inet_address_equal(gconstpointer a, gconstpointer b)
+{
+ const rspamd_inet_addr_t *a1 = a, *a2 = b;
+
+ return rspamd_inet_address_compare(a1, a2, FALSE) == 0;
+}
+
+gboolean
+rspamd_inet_address_port_equal(gconstpointer a, gconstpointer b)
+{
+ const rspamd_inet_addr_t *a1 = a, *a2 = b;
+
+ return rspamd_inet_address_compare(a1, a2, TRUE) == 0;
+}
+
+#ifndef IN6_IS_ADDR_LOOPBACK
+#define IN6_IS_ADDR_LOOPBACK(a) \
+ ((*(const __uint32_t *) (const void *) (&(a)->s6_addr[0]) == 0) && \
+ (*(const __uint32_t *) (const void *) (&(a)->s6_addr[4]) == 0) && \
+ (*(const __uint32_t *) (const void *) (&(a)->s6_addr[8]) == 0) && \
+ (*(const __uint32_t *) (const void *) (&(a)->s6_addr[12]) == ntohl(1)))
+#endif
+#ifndef IN6_IS_ADDR_LINKLOCAL
+#define IN6_IS_ADDR_LINKLOCAL(a) \
+ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
+#endif
+#ifndef IN6_IS_ADDR_SITELOCAL
+#define IN6_IS_ADDR_SITELOCAL(a) \
+ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0))
+#endif
+
+gboolean
+rspamd_inet_address_is_local(const rspamd_inet_addr_t *addr)
+{
+ if (addr == NULL) {
+ return FALSE;
+ }
+
+ if (addr->af == AF_UNIX) {
+ /* Always true for unix sockets */
+ return TRUE;
+ }
+ else {
+ if (addr->af == AF_INET) {
+ if ((ntohl(addr->u.in.addr.s4.sin_addr.s_addr) & 0xff000000) == 0x7f000000) {
+ return TRUE;
+ }
+ }
+ else if (addr->af == AF_INET6) {
+ if (IN6_IS_ADDR_LOOPBACK(&addr->u.in.addr.s6.sin6_addr) ||
+ IN6_IS_ADDR_LINKLOCAL(&addr->u.in.addr.s6.sin6_addr) ||
+ IN6_IS_ADDR_SITELOCAL(&addr->u.in.addr.s6.sin6_addr)) {
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+void **
+rspamd_inet_library_init(void)
+{
+ return &local_addrs;
+}
+
+void *
+rspamd_inet_library_get_lib_ctx(void)
+{
+ return local_addrs;
+}
+
+void rspamd_inet_library_destroy(void)
+{
+ /* Ugly: local_addrs will actually be freed by config object */
+}
+
+gsize rspamd_inet_address_storage_size(void)
+{
+ return sizeof(rspamd_inet_addr_t);
+}
diff --git a/src/libutil/addr.h b/src/libutil/addr.h
new file mode 100644
index 0000000..25a3641
--- /dev/null
+++ b/src/libutil/addr.h
@@ -0,0 +1,356 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ADDR_H_
+#define ADDR_H_
+
+#include "config.h"
+#include "rdns.h"
+
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif
+#ifdef HAVE_ARPA_INET_H
+#include <arpa/inet.h>
+#endif
+/* unix sockets */
+#ifdef HAVE_SYS_UN_H
+#include <sys/un.h>
+#endif
+
+#include "mem_pool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Opaque structure
+ */
+typedef struct rspamd_inet_addr_s rspamd_inet_addr_t;
+
+/**
+ * Returns pointer storage for global singleton (map for local addresses)
+ * @return
+ */
+void **rspamd_inet_library_init(void);
+/**
+ * Returns local addresses singleton
+ * @return
+ */
+void *rspamd_inet_library_get_lib_ctx(void);
+/**
+ * Cleanup library (currently it does nothing)
+ */
+void rspamd_inet_library_destroy(void);
+
+/**
+ * Create new inet address structure based on the address family and opaque init pointer
+ * @param af
+ * @param init
+ * @return new inet addr
+ */
+rspamd_inet_addr_t *rspamd_inet_address_new(int af, const void *init);
+
+/**
+ * Create new inet address structure from struct sockaddr
+ * @param sa
+ * @param slen
+ * @return
+ */
+rspamd_inet_addr_t *rspamd_inet_address_from_sa(const struct sockaddr *sa,
+ socklen_t slen);
+
+/**
+ * Create new inet address from rdns reply
+ * @param rep reply element
+ * @return new ipv4 or ipv6 addr (port is NOT set)
+ */
+rspamd_inet_addr_t *rspamd_inet_address_from_rnds(
+ const struct rdns_reply_entry *rep);
+
+/**
+ * Parse string with ipv6 address of length `len` to `target` which should be
+ * at least sizeof (struct in6_addr)
+ * @param text input string
+ * @param len length of `text` (if 0, then `text` must be zero terminated)
+ * @param target target structure
+ * @return TRUE if the address has been parsed, otherwise `target` content is undefined
+ */
+gboolean rspamd_parse_inet_address_ip6(const guchar *text, gsize len,
+ gpointer target);
+
+enum rspamd_inet_address_parse_flags {
+ RSPAMD_INET_ADDRESS_PARSE_DEFAULT = 0,
+ RSPAMD_INET_ADDRESS_PARSE_REMOTE = 1u << 0u,
+ RSPAMD_INET_ADDRESS_PARSE_NO_UNIX = 1u << 1u,
+ RSPAMD_INET_ADDRESS_PARSE_NO_PORT = 1u << 2u,
+};
+
+/**
+ * Parse string with ipv4 address of length `len` to `target` which should be
+ * at least sizeof (in4_addr_t)
+ * @param text input string
+ * @param len length of `text` (if 0, then `text` must be zero terminated)
+ * @param target target structure
+ * @return TRUE if the address has been parsed, otherwise `target` content is undefined
+ */
+gboolean rspamd_parse_inet_address_ip4(const guchar *text, gsize len,
+ gpointer target);
+
+/**
+ * Parse ipv4 or ipv6 address to a static buffer `target`. Does not support Unix sockets
+ * @param src
+ * @param srclen
+ * @param target
+ * @return
+ */
+gboolean rspamd_parse_inet_address_ip(const char *src,
+ gsize srclen,
+ rspamd_inet_addr_t *target);
+
+/**
+ * Try to parse address from string
+ * @param target target to fill
+ * @param src IP string representation
+ * @return TRUE if addr has been parsed
+ */
+gboolean rspamd_parse_inet_address(rspamd_inet_addr_t **target,
+ const char *src,
+ gsize srclen,
+ enum rspamd_inet_address_parse_flags how);
+
+/**
+ * Use memory pool allocated inet address
+ * @param src
+ * @param srclen
+ * @param pool
+ * @return
+ */
+rspamd_inet_addr_t *rspamd_parse_inet_address_pool(const char *src,
+ gsize srclen,
+ rspamd_mempool_t *pool,
+ enum rspamd_inet_address_parse_flags how);
+
+/**
+ * Returns string representation of inet address
+ * @param addr
+ * @return statically allocated string pointer (not thread safe)
+ */
+const char *rspamd_inet_address_to_string(const rspamd_inet_addr_t *addr);
+
+/**
+ * Returns pretty string representation of inet address
+ * @param addr
+ * @return statically allocated string pointer (not thread safe)
+ */
+const char *rspamd_inet_address_to_string_pretty(const rspamd_inet_addr_t *addr);
+
+/**
+ * Returns port number for the specified inet address in host byte order
+ * @param addr
+ * @return
+ */
+uint16_t rspamd_inet_address_get_port(const rspamd_inet_addr_t *addr);
+
+/**
+ * Returns address family of inet address
+ * @param addr
+ * @return
+ */
+gint rspamd_inet_address_get_af(const rspamd_inet_addr_t *addr);
+
+/**
+ * Returns sockaddr and size for this address
+ * @param addr
+ * @param sz
+ * @return
+ */
+struct sockaddr *rspamd_inet_address_get_sa(const rspamd_inet_addr_t *addr,
+ socklen_t *sz);
+
+/**
+ * Makes a radix key from inet address
+ * @param addr
+ * @param klen
+ * @return
+ */
+guchar *rspamd_inet_address_get_hash_key(const rspamd_inet_addr_t *addr, guint *klen);
+
+/**
+ * Receive data from an unconnected socket and fill the inet_addr structure if needed
+ * @param fd
+ * @param buf
+ * @param len
+ * @param target
+ * @return same as recvfrom(2)
+ */
+gssize rspamd_inet_address_recvfrom(gint fd, void *buf, gsize len, gint fl,
+ rspamd_inet_addr_t **target);
+
+/**
+ * Send data via unconnected socket using the specified inet_addr structure
+ * @param fd
+ * @param buf
+ * @param len
+ * @param target
+ * @return
+ */
+gssize rspamd_inet_address_sendto(gint fd, const void *buf, gsize len, gint fl,
+ const rspamd_inet_addr_t *addr);
+
+/**
+ * Set port for inet address
+ */
+void rspamd_inet_address_set_port(rspamd_inet_addr_t *addr, uint16_t port);
+
+/**
+ * Connect to inet_addr address
+ * @param addr
+ * @param async perform operations asynchronously
+ * @return newly created and connected socket
+ */
+int rspamd_inet_address_connect(const rspamd_inet_addr_t *addr, gint type,
+ gboolean async);
+
+enum rspamd_inet_address_listen_opts {
+ RSPAMD_INET_ADDRESS_LISTEN_DEFAULT = 0,
+ RSPAMD_INET_ADDRESS_LISTEN_ASYNC = (1u << 0u),
+ RSPAMD_INET_ADDRESS_LISTEN_REUSEPORT = (1u << 1u),
+ RSPAMD_INET_ADDRESS_LISTEN_NOLISTEN = (1u << 2u),
+};
+/**
+ * Listen on a specified inet address
+ * @param addr
+ * @param type
+ * @param opts
+ * @return
+ */
+int rspamd_inet_address_listen(const rspamd_inet_addr_t *addr, gint type,
+ enum rspamd_inet_address_listen_opts opts,
+ gint listen_queue);
+
+/**
+ * Check whether specified ip is valid (not INADDR_ANY or INADDR_NONE) for ipv4 or ipv6
+ * @param ptr pointer to struct in_addr or struct in6_addr
+ * @param af address family (AF_INET or AF_INET6)
+ * @return TRUE if the address is valid
+ */
+gboolean rspamd_ip_is_valid(const rspamd_inet_addr_t *addr);
+
+typedef void (*rspamd_accept_throttling_handler)(gint, void *);
+
+/**
+ * Accept from listening socket filling addr structure
+ * @param sock listening socket
+ * @param target allocated inet addr structure
+ * @return
+ */
+gint rspamd_accept_from_socket(gint sock,
+ rspamd_inet_addr_t **target,
+ rspamd_accept_throttling_handler hdl,
+ void *hdl_data);
+
+enum rspamd_parse_host_port_result {
+ RSPAMD_PARSE_ADDR_FAIL = 0,
+ RSPAMD_PARSE_ADDR_RESOLVED = 1,
+ RSPAMD_PARSE_ADDR_NUMERIC = 2,
+};
+/**
+ * Parse host[:port[:priority]] line
+ * @param ina host address
+ * @param port port
+ * @param priority priority
+ * @return RSPAMD_PARSE_ADDR_FAIL in case of error, RSPAMD_PARSE_ADDR_NUMERIC in case of pure ip/unix socket
+ */
+enum rspamd_parse_host_port_result
+rspamd_parse_host_port_priority(const gchar *str,
+ GPtrArray **addrs,
+ guint *priority, gchar **name,
+ guint default_port,
+ gboolean allow_listen,
+ rspamd_mempool_t *pool);
+
+/**
+ * Destroy the specified IP address
+ * @param addr
+ */
+void rspamd_inet_address_free(rspamd_inet_addr_t *addr);
+
+/**
+ * Apply the specified mask to an address (ignored for AF_UNIX)
+ * @param addr
+ * @param mask
+ */
+void rspamd_inet_address_apply_mask(rspamd_inet_addr_t *addr, guint mask);
+
+/**
+ * Compare a1 and a2 and return value >0, ==0 and <0 if a1 is more, equal or less than a2 correspondingly
+ * @param a1
+ * @param a2
+ * @return
+ */
+gint rspamd_inet_address_compare(const rspamd_inet_addr_t *a1,
+ const rspamd_inet_addr_t *a2, gboolean compare_ports);
+
+/**
+ * Utility function to compare addresses by in g_ptr_array
+ * @param a1
+ * @param a2
+ * @return
+ */
+gint rspamd_inet_address_compare_ptr(gconstpointer a1,
+ gconstpointer a2);
+
+/**
+ * Performs deep copy of rspamd inet addr
+ * @param addr
+ * @return
+ */
+rspamd_inet_addr_t *rspamd_inet_address_copy(const rspamd_inet_addr_t *addr, rspamd_mempool_t *pool);
+
+/**
+ * Returns hash for inet address (ignoring port)
+ */
+guint rspamd_inet_address_hash(gconstpointer a);
+
+guint rspamd_inet_address_port_hash(gconstpointer a);
+
+/**
+ * Returns true if two address are equal
+ */
+gboolean rspamd_inet_address_equal(gconstpointer a, gconstpointer b);
+
+gboolean rspamd_inet_address_port_equal(gconstpointer a, gconstpointer b);
+
+/**
+ * Returns TRUE if an address belongs to some local address
+ */
+gboolean rspamd_inet_address_is_local(const rspamd_inet_addr_t *addr);
+
+/**
+ * Returns size of storage required to store a complete IP address
+ * @return
+ */
+gsize rspamd_inet_address_storage_size(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ADDR_H_ */
diff --git a/src/libutil/cxx/error.hxx b/src/libutil/cxx/error.hxx
new file mode 100644
index 0000000..4689d42
--- /dev/null
+++ b/src/libutil/cxx/error.hxx
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_ERROR_HXX
+#define RSPAMD_ERROR_HXX
+#pragma once
+
+#include "config.h"
+#include <string>
+#include <string_view>
+#include <cstdint>
+#include <optional>
+
+/***
+ * This unit is used to represent Rspamd C++ errors in a way to interoperate
+ * with C code if needed and avoid allocations for static strings
+ */
+namespace rspamd::util {
+
+enum class error_category : std::uint8_t {
+ INFORMAL,
+ IMPORTANT,
+ CRITICAL
+};
+
+struct error {
+public:
+ /**
+ * Construct from a static string, this string must live long enough to outlive this object
+ * @param msg
+ * @param code
+ * @param category
+ */
+ error(const char *msg, int code, error_category category = error_category::INFORMAL)
+ : error_message(msg), error_code(code), category(category)
+ {
+ }
+ /**
+ * Construct error from a temporary string taking membership
+ * @param msg
+ * @param code
+ * @param category
+ */
+ error(std::string &&msg, int code, error_category category = error_category::INFORMAL)
+ : error_code(code), category(category)
+ {
+ static_storage = std::move(msg);
+ error_message = static_storage.value();
+ }
+ /**
+ * Construct error from another string copying it into own storage
+ * @param msg
+ * @param code
+ * @param category
+ */
+ error(const std::string &msg, int code, error_category category = error_category::INFORMAL)
+ : error_code(code), category(category)
+ {
+ static_storage = msg;
+ error_message = static_storage.value();
+ }
+
+ error(const error &other)
+ : error_code(other.error_code), category(other.category)
+ {
+ if (other.static_storage) {
+ static_storage = other.static_storage;
+ error_message = static_storage.value();
+ }
+ else {
+ error_message = other.error_message;
+ }
+ }
+
+ error(error &&other) noexcept
+ {
+ *this = std::move(other);
+ }
+
+ error &operator=(error &&other) noexcept
+ {
+ if (other.static_storage.has_value()) {
+ std::swap(static_storage, other.static_storage);
+ error_message = static_storage.value();
+ }
+ else {
+ std::swap(error_message, other.error_message);
+ }
+ std::swap(other.error_code, error_code);
+ std::swap(other.category, category);
+
+ return *this;
+ }
+
+ /**
+ * Convert into GError
+ * @return
+ */
+ auto into_g_error() const -> GError *
+ {
+ return g_error_new(g_quark_from_static_string("rspamd"), error_code, "%s",
+ error_message.data());
+ }
+
+ /**
+ * Convenience alias for the `into_g_error`
+ * @param err
+ */
+ auto into_g_error_set(GError **err) const -> void
+ {
+ if (err && *err == nullptr) {
+ *err = into_g_error();
+ }
+ }
+
+ /**
+ * Convert into GError
+ * @return
+ */
+ auto into_g_error(GQuark quark) const -> GError *
+ {
+ return g_error_new(quark, error_code, "%s",
+ error_message.data());
+ }
+
+ /**
+ * Convenience alias for the `into_g_error`
+ * @param err
+ */
+ auto into_g_error_set(GQuark quark, GError **err) const -> void
+ {
+ if (err && *err == nullptr) {
+ *err = into_g_error(quark);
+ }
+ }
+
+public:
+ std::string_view error_message;
+ int error_code;
+ error_category category;
+
+private:
+ std::optional<std::string> static_storage;
+};
+
+}// namespace rspamd::util
+
+#endif//RSPAMD_ERROR_HXX
diff --git a/src/libutil/cxx/file_util.cxx b/src/libutil/cxx/file_util.cxx
new file mode 100644
index 0000000..2f031f0
--- /dev/null
+++ b/src/libutil/cxx/file_util.cxx
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "file_util.hxx"
+#include <fmt/core.h>
+#include "libutil/util.h"
+#include "libutil/unix-std.h"
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+
+#include "doctest/doctest.h"
+
+namespace rspamd::util {
+
+auto raii_file::open(const char *fname, int flags) -> tl::expected<raii_file, error>
+{
+ int oflags = flags;
+#ifdef O_CLOEXEC
+ oflags |= O_CLOEXEC;
+#endif
+
+ if (fname == nullptr) {
+ return tl::make_unexpected(error{"cannot open file; filename is nullptr", EINVAL, error_category::CRITICAL});
+ }
+
+ auto fd = ::open(fname, oflags);
+
+ if (fd == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot open file {}: {}", fname, ::strerror(errno)), errno});
+ }
+
+ auto ret = raii_file{fname, fd, false};
+
+ if (fstat(ret.fd, &ret.st) == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", fname, ::strerror(errno)), errno});
+ }
+
+ return ret;
+}
+
+auto raii_file::create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>
+{
+ int oflags = flags | O_CREAT;
+#ifdef O_CLOEXEC
+ oflags |= O_CLOEXEC;
+#endif
+
+ if (fname == nullptr) {
+ return tl::make_unexpected(error{"cannot create file; filename is nullptr", EINVAL, error_category::CRITICAL});
+ }
+
+ auto fd = ::open(fname, oflags, perms);
+
+ if (fd == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot create file {}: {}", fname, ::strerror(errno)), errno});
+ }
+
+ auto ret = raii_file{fname, fd, false};
+
+ if (fstat(ret.fd, &ret.st) == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", fname, ::strerror(errno)), errno});
+ }
+
+ return ret;
+}
+
+auto raii_file::create_temp(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>
+{
+ int oflags = flags;
+#ifdef O_CLOEXEC
+ oflags |= O_CLOEXEC | O_CREAT | O_EXCL;
+#endif
+ if (fname == nullptr) {
+ return tl::make_unexpected(error{"cannot open file; filename is nullptr", EINVAL, error_category::CRITICAL});
+ }
+
+ auto fd = ::open(fname, oflags, perms);
+
+ if (fd == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot create file {}: {}", fname, ::strerror(errno)), errno});
+ }
+
+ auto ret = raii_file{fname, fd, true};
+
+ if (fstat(ret.fd, &ret.st) == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}", fname, ::strerror(errno)), errno});
+ }
+
+ return ret;
+}
+
+auto raii_file::mkstemp(const char *pattern, int flags, int perms) -> tl::expected<raii_file, error>
+{
+ int oflags = flags;
+#ifdef O_CLOEXEC
+ oflags |= O_CLOEXEC | O_CREAT | O_EXCL;
+#endif
+ if (pattern == nullptr) {
+ return tl::make_unexpected(error{"cannot open file; pattern is nullptr", EINVAL, error_category::CRITICAL});
+ }
+
+ std::string mutable_pattern = pattern;
+
+ auto fd = g_mkstemp_full(mutable_pattern.data(), oflags, perms);
+
+ if (fd == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot create file {}: {}", pattern, ::strerror(errno)), errno});
+ }
+
+ auto ret = raii_file{mutable_pattern.c_str(), fd, true};
+
+ if (fstat(ret.fd, &ret.st) == -1) {
+ return tl::make_unexpected(error{fmt::format("cannot stat file {}: {}",
+ mutable_pattern, ::strerror(errno)),
+ errno});
+ }
+
+ return ret;
+}
+
+raii_file::~raii_file() noexcept
+{
+ if (fd != -1) {
+ if (temp) {
+ (void) unlink(fname.c_str());
+ }
+ close(fd);
+ }
+}
+
+auto raii_file::update_stat() noexcept -> bool
+{
+ return fstat(fd, &st) != -1;
+}
+
+raii_file::raii_file(const char *fname, int fd, bool temp)
+ : fd(fd), temp(temp)
+{
+ std::size_t nsz;
+
+ /* Normalize path */
+ this->fname = fname;
+ rspamd_normalize_path_inplace(this->fname.data(), this->fname.size(), &nsz);
+ this->fname.resize(nsz);
+}
+
+
+raii_locked_file::~raii_locked_file() noexcept
+{
+ if (fd != -1) {
+ (void) rspamd_file_unlock(fd, FALSE);
+ }
+}
+
+auto raii_locked_file::lock_raii_file(raii_file &&unlocked) -> tl::expected<raii_locked_file, error>
+{
+ if (!rspamd_file_lock(unlocked.get_fd(), TRUE)) {
+ return tl::make_unexpected(
+ error{fmt::format("cannot lock file {}: {}", unlocked.get_name(), ::strerror(errno)), errno});
+ }
+
+ return raii_locked_file{std::move(unlocked)};
+}
+
+auto raii_locked_file::unlock() -> raii_file
+{
+ if (fd != -1) {
+ (void) rspamd_file_unlock(fd, FALSE);
+ }
+
+ return raii_file{static_cast<raii_file &&>(std::move(*this))};
+}
+
+raii_mmaped_file::raii_mmaped_file(raii_file &&file, void *map, std::size_t sz)
+ : file(std::move(file)), map(map), map_size(sz)
+{
+}
+
+auto raii_mmaped_file::mmap_shared(raii_file &&file,
+ int flags, std::int64_t offset) -> tl::expected<raii_mmaped_file, error>
+{
+ void *map;
+
+ if (file.get_stat().st_size < offset || offset < 0) {
+ return tl::make_unexpected(error{
+ fmt::format("cannot mmap file {} due to incorrect offset; offset={}, size={}",
+ file.get_name(), offset, file.get_size()),
+ EINVAL});
+ }
+ /* Update stat on file to ensure it is up-to-date */
+ file.update_stat();
+ map = mmap(nullptr, (std::size_t)(file.get_size() - offset), flags, MAP_SHARED, file.get_fd(), offset);
+
+ if (map == MAP_FAILED) {
+ return tl::make_unexpected(error{fmt::format("cannot mmap file {}: {}",
+ file.get_name(), ::strerror(errno)),
+ errno});
+ }
+
+ return raii_mmaped_file{std::move(file), map, (std::size_t)(file.get_size() - offset)};
+}
+
+auto raii_mmaped_file::mmap_shared(const char *fname, int open_flags,
+ int mmap_flags, std::int64_t offset) -> tl::expected<raii_mmaped_file, error>
+{
+ auto file = raii_file::open(fname, open_flags);
+
+ if (!file.has_value()) {
+ return tl::make_unexpected(file.error());
+ }
+
+ return raii_mmaped_file::mmap_shared(std::move(file.value()), mmap_flags, offset);
+}
+
+raii_mmaped_file::~raii_mmaped_file()
+{
+ if (map != nullptr) {
+ munmap(map, map_size);
+ }
+}
+
+raii_mmaped_file::raii_mmaped_file(raii_mmaped_file &&other) noexcept
+ : file(std::move(other.file))
+{
+ std::swap(map, other.map);
+ std::swap(map_size, other.map_size);
+}
+
+auto raii_file_sink::create(const char *fname, int flags, int perms,
+ const char *suffix) -> tl::expected<raii_file_sink, error>
+{
+ if (!fname || !suffix) {
+ return tl::make_unexpected(error{"cannot create file; filename is nullptr", EINVAL, error_category::CRITICAL});
+ }
+
+ auto tmp_fname = fmt::format("{}.{}", fname, suffix);
+ auto file = raii_locked_file::create(tmp_fname.c_str(), flags, perms);
+
+ if (!file.has_value()) {
+ return tl::make_unexpected(file.error());
+ }
+
+ return raii_file_sink{std::move(file.value()), fname, std::move(tmp_fname)};
+}
+
+auto raii_file_sink::write_output() -> bool
+{
+ if (success) {
+ /* We cannot write output twice */
+ return false;
+ }
+
+ if (rename(tmp_fname.c_str(), output_fname.c_str()) == -1) {
+ return false;
+ }
+
+ success = true;
+
+ return true;
+}
+
+raii_file_sink::~raii_file_sink()
+{
+ if (!success) {
+ /* Unlink sink */
+ unlink(tmp_fname.c_str());
+ }
+}
+
+raii_file_sink::raii_file_sink(raii_locked_file &&_file, const char *_output, std::string &&_tmp_fname)
+ : file(std::move(_file)), output_fname(_output), tmp_fname(std::move(_tmp_fname)), success(false)
+{
+}
+
+raii_file_sink::raii_file_sink(raii_file_sink &&other) noexcept
+ : file(std::move(other.file)),
+ output_fname(std::move(other.output_fname)),
+ tmp_fname(std::move(other.tmp_fname)),
+ success(other.success)
+{
+}
+
+namespace tests {
+template<class T>
+static auto test_read_file(const T &f)
+{
+ auto fd = f.get_fd();
+ (void) ::lseek(fd, 0, SEEK_SET);
+ std::string buf('\0', (std::size_t) f.get_size());
+ ::read(fd, buf.data(), buf.size());
+ return buf;
+}
+template<class T>
+static auto test_write_file(const T &f, const std::string_view &buf)
+{
+ auto fd = f.get_fd();
+ (void) ::lseek(fd, 0, SEEK_SET);
+ return ::write(fd, buf.data(), buf.size());
+}
+auto random_fname(std::string_view extension)
+{
+ const auto *tmpdir = getenv("TMPDIR");
+ if (tmpdir == nullptr) {
+ tmpdir = G_DIR_SEPARATOR_S "tmp";
+ }
+
+ std::string out_fname{tmpdir};
+ out_fname += G_DIR_SEPARATOR_S;
+
+ char hexbuf[32];
+ rspamd_random_hex(hexbuf, sizeof(hexbuf));
+ out_fname.append((const char *) hexbuf, sizeof(hexbuf));
+ if (!extension.empty()) {
+ out_fname.append(".");
+ out_fname.append(extension);
+ }
+
+ return out_fname;
+}
+TEST_SUITE("loked files utils")
+{
+
+ TEST_CASE("create and delete file")
+ {
+ auto fname = random_fname("tmp");
+ {
+ auto raii_locked_file = raii_locked_file::create_temp(fname.c_str(), O_RDONLY, 00600);
+ CHECK(raii_locked_file.has_value());
+ CHECK(raii_locked_file.value().get_extension() == "tmp");
+ CHECK(::access(fname.c_str(), R_OK) == 0);
+ }
+ // File must be deleted after this call
+ auto ret = ::access(fname.c_str(), R_OK);
+ auto serrno = errno;
+ CHECK(ret == -1);
+ CHECK(serrno == ENOENT);
+ // Create one more time
+ {
+ auto raii_locked_file = raii_locked_file::create_temp(fname.c_str(), O_RDONLY, 00600);
+ CHECK(raii_locked_file.has_value());
+ CHECK(::access(fname.c_str(), R_OK) == 0);
+ }
+ ret = ::access(fname.c_str(), R_OK);
+ serrno = errno;
+ CHECK(ret == -1);
+ CHECK(serrno == ENOENT);
+ }
+
+ TEST_CASE("check lock")
+ {
+ auto fname = random_fname("");
+ {
+ auto raii_locked_file = raii_locked_file::create_temp(fname.c_str(), O_RDONLY, 00600);
+ CHECK(raii_locked_file.has_value());
+ CHECK(raii_locked_file.value().get_extension() == "");
+ CHECK(::access(fname.c_str(), R_OK) == 0);
+ auto raii_locked_file2 = raii_locked_file::open(fname.c_str(), O_RDONLY);
+ CHECK(!raii_locked_file2.has_value());
+ CHECK(::access(fname.c_str(), R_OK) == 0);
+ }
+ // File must be deleted after this call
+ auto ret = ::access(fname.c_str(), R_OK);
+ auto serrno = errno;
+ CHECK(ret == -1);
+ CHECK(serrno == ENOENT);
+ }
+
+ auto get_tmpdir()->std::string
+ {
+ const auto *tmpdir = getenv("TMPDIR");
+ if (tmpdir == nullptr) {
+ tmpdir = G_DIR_SEPARATOR_S "tmp";
+ }
+
+ std::size_t sz;
+ std::string mut_fname = tmpdir;
+ rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+ mut_fname.resize(sz);
+
+ if (!mut_fname.ends_with(G_DIR_SEPARATOR)) {
+ mut_fname += G_DIR_SEPARATOR;
+ }
+
+ return mut_fname;
+ }
+
+ TEST_CASE("tempfile")
+ {
+ std::string tmpname;
+ const std::string tmpdir{get_tmpdir()};
+ {
+ auto raii_locked_file = raii_locked_file::mkstemp(std::string(tmpdir + G_DIR_SEPARATOR_S + "doctest-XXXXXXXX").c_str(),
+ O_RDONLY, 00600);
+ CHECK(raii_locked_file.has_value());
+ CHECK(raii_locked_file.value().get_dir() == tmpdir);
+ CHECK(access(raii_locked_file.value().get_name().data(), R_OK) == 0);
+ auto raii_locked_file2 = raii_locked_file::open(raii_locked_file.value().get_name().data(), O_RDONLY);
+ CHECK(!raii_locked_file2.has_value());
+ CHECK(access(raii_locked_file.value().get_name().data(), R_OK) == 0);
+ tmpname = raii_locked_file.value().get_name();
+ }
+ // File must be deleted after this call
+ auto ret = ::access(tmpname.c_str(), R_OK);
+ auto serrno = errno;
+ CHECK(ret == -1);
+ CHECK(serrno == ENOENT);
+ }
+
+ TEST_CASE("mmap")
+ {
+ std::string tmpname;
+ const std::string tmpdir{get_tmpdir()};
+ {
+ auto raii_file = raii_file::mkstemp(std::string(tmpdir + G_DIR_SEPARATOR_S + "doctest-XXXXXXXX").c_str(),
+ O_RDWR | O_CREAT | O_EXCL, 00600);
+ CHECK(raii_file.has_value());
+ CHECK(raii_file->get_dir() == tmpdir);
+ CHECK(access(raii_file->get_name().data(), R_OK) == 0);
+ tmpname = std::string{raii_file->get_name()};
+ char payload[] = {'1', '2', '3'};
+ CHECK(write(raii_file->get_fd(), payload, sizeof(payload)) == sizeof(payload));
+ auto mmapped_file1 = raii_mmaped_file::mmap_shared(std::move(raii_file.value()), PROT_READ | PROT_WRITE);
+ CHECK(mmapped_file1.has_value());
+ CHECK(!raii_file->is_valid());
+ CHECK(mmapped_file1->get_size() == sizeof(payload));
+ CHECK(memcmp(mmapped_file1->get_map(), payload, sizeof(payload)) == 0);
+ *(char *) mmapped_file1->get_map() = '2';
+ auto mmapped_file2 = raii_mmaped_file::mmap_shared(tmpname.c_str(), O_RDONLY, PROT_READ);
+ CHECK(mmapped_file2.has_value());
+ CHECK(mmapped_file2->get_size() == sizeof(payload));
+ CHECK(memcmp(mmapped_file2->get_map(), payload, sizeof(payload)) != 0);
+ CHECK(memcmp(mmapped_file2->get_map(), mmapped_file1->get_map(), sizeof(payload)) == 0);
+ }
+ // File must be deleted after this call
+ auto ret = ::access(tmpname.c_str(), R_OK);
+ auto serrno = errno;
+ CHECK(ret == -1);
+ CHECK(serrno == ENOENT);
+ }
+
+}// TEST_SUITE
+
+}// namespace tests
+
+}// namespace rspamd::util
diff --git a/src/libutil/cxx/file_util.hxx b/src/libutil/cxx/file_util.hxx
new file mode 100644
index 0000000..4528905
--- /dev/null
+++ b/src/libutil/cxx/file_util.hxx
@@ -0,0 +1,312 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_FILE_UTIL_HXX
+#define RSPAMD_FILE_UTIL_HXX
+#pragma once
+
+#include "config.h"
+#include "contrib/expected/expected.hpp"
+#include "libutil/cxx/error.hxx"
+#include <string>
+#include <sys/stat.h>
+
+namespace rspamd::util {
+/**
+ * A simple RAII object to contain a move only file descriptor
+ * A file is unlocked and closed when not needed
+ */
+struct raii_file {
+public:
+ virtual ~raii_file() noexcept;
+
+ static auto open(const char *fname, int flags) -> tl::expected<raii_file, error>;
+ static auto open(const std::string &fname, int flags) -> tl::expected<raii_file, error>
+ {
+ return open(fname.c_str(), flags);
+ };
+ static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>;
+ static auto create(const std::string &fname, int flags, int perms) -> tl::expected<raii_file, error>
+ {
+ return create(fname.c_str(), flags, perms);
+ };
+
+ static auto create_temp(const char *fname, int flags, int perms) -> tl::expected<raii_file, error>;
+ static auto mkstemp(const char *pattern, int flags, int perms) -> tl::expected<raii_file, error>;
+
+ auto get_fd() const -> int
+ {
+ return fd;
+ }
+
+ auto get_stat() const -> const struct stat &
+ {
+ return st;
+ };
+
+ auto get_size() const -> std::size_t
+ {
+ return st.st_size;
+ };
+
+ auto get_name() const -> std::string_view
+ {
+ return std::string_view{fname};
+ }
+
+ auto get_dir() const -> std::string_view
+ {
+ auto sep_pos = fname.rfind(G_DIR_SEPARATOR);
+
+ if (sep_pos == std::string::npos) {
+ return std::string_view{fname};
+ }
+
+ while (sep_pos >= 1 && fname[sep_pos - 1] == G_DIR_SEPARATOR) {
+ sep_pos--;
+ }
+
+ return std::string_view{fname.c_str(), sep_pos + 1};
+ }
+
+ auto get_extension() const -> std::string_view
+ {
+ auto sep_pos = fname.rfind(G_DIR_SEPARATOR);
+
+ if (sep_pos == std::string::npos) {
+ sep_pos = 0;
+ }
+
+ auto filename = std::string_view{fname.c_str() + sep_pos};
+ auto dot_pos = filename.find('.');
+
+ if (dot_pos == std::string::npos) {
+ return std::string_view{};
+ }
+ else {
+ return std::string_view{filename.data() + dot_pos + 1, filename.size() - dot_pos - 1};
+ }
+ }
+
+ raii_file &operator=(raii_file &&other) noexcept
+ {
+ std::swap(fd, other.fd);
+ std::swap(temp, other.temp);
+ std::swap(fname, other.fname);
+ std::swap(st, other.st);
+
+ return *this;
+ }
+
+ raii_file(raii_file &&other) noexcept
+ {
+ *this = std::move(other);
+ }
+
+ /**
+ * Prevent file from being deleted
+ * @return
+ */
+ auto make_immortal() noexcept
+ {
+ temp = false;
+ }
+
+ /**
+ * Performs fstat on an opened file to refresh internal stat
+ * @return
+ */
+ auto update_stat() noexcept -> bool;
+
+ auto is_valid() noexcept -> bool
+ {
+ return fd != -1;
+ }
+
+ /* Do not allow copy/default ctor */
+ const raii_file &operator=(const raii_file &other) = delete;
+ raii_file() = delete;
+ raii_file(const raii_file &other) = delete;
+
+protected:
+ int fd = -1;
+ bool temp;
+ std::string fname;
+ struct stat st;
+
+ explicit raii_file(const char *fname, int fd, bool temp);
+};
+/**
+ * A simple RAII object to contain a file descriptor with an flock wrap
+ * A file is unlocked and closed when not needed
+ */
+struct raii_locked_file final : public raii_file {
+public:
+ ~raii_locked_file() noexcept override;
+
+ static auto open(const char *fname, int flags) -> tl::expected<raii_locked_file, error>
+ {
+ auto locked = raii_file::open(fname, flags).and_then([]<class T>(T &&file) {
+ return lock_raii_file(std::forward<T>(file));
+ });
+
+ return locked;
+ }
+ static auto create(const char *fname, int flags, int perms) -> tl::expected<raii_locked_file, error>
+ {
+ auto locked = raii_file::create(fname, flags, perms).and_then([]<class T>(T &&file) {
+ return lock_raii_file(std::forward<T>(file));
+ });
+
+ return locked;
+ }
+ static auto create_temp(const char *fname, int flags, int perms) -> tl::expected<raii_locked_file, error>
+ {
+ auto locked = raii_file::create_temp(fname, flags, perms).and_then([]<class T>(T &&file) {
+ return lock_raii_file(std::forward<T>(file));
+ });
+
+ return locked;
+ }
+ static auto mkstemp(const char *pattern, int flags, int perms) -> tl::expected<raii_locked_file, error>
+ {
+ auto locked = raii_file::mkstemp(pattern, flags, perms).and_then([]<class T>(T &&file) {
+ return lock_raii_file(std::forward<T>(file));
+ });
+
+ return locked;
+ }
+
+ raii_locked_file &operator=(raii_locked_file &&other) noexcept
+ {
+ std::swap(fd, other.fd);
+ std::swap(temp, other.temp);
+ std::swap(fname, other.fname);
+ std::swap(st, other.st);
+
+ return *this;
+ }
+
+ /**
+ * Unlock a locked file and return back unlocked file transferring ownership.
+ * A locked file cannot be used after this method.
+ */
+ auto unlock() -> raii_file;
+
+ raii_locked_file(raii_locked_file &&other) noexcept
+ : raii_file(static_cast<raii_file &&>(std::move(other)))
+ {
+ }
+ /* Do not allow copy/default ctor */
+ const raii_locked_file &operator=(const raii_locked_file &other) = delete;
+ raii_locked_file() = delete;
+ raii_locked_file(const raii_locked_file &other) = delete;
+
+private:
+ static auto lock_raii_file(raii_file &&unlocked) -> tl::expected<raii_locked_file, error>;
+ raii_locked_file(raii_file &&other) noexcept
+ : raii_file(std::move(other))
+ {
+ }
+ explicit raii_locked_file(const char *fname, int fd, bool temp)
+ : raii_file(fname, fd, temp)
+ {
+ }
+};
+
+/**
+ * A mmap wrapper on top of a locked file
+ */
+struct raii_mmaped_file final {
+ ~raii_mmaped_file();
+ static auto mmap_shared(raii_file &&file, int flags, std::int64_t offset = 0) -> tl::expected<raii_mmaped_file, error>;
+ static auto mmap_shared(const char *fname, int open_flags, int mmap_flags, std::int64_t offset = 0) -> tl::expected<raii_mmaped_file, error>;
+ // Returns a constant pointer to the underlying map
+ auto get_map() const -> void *
+ {
+ return map;
+ }
+ auto get_file() const -> const raii_file &
+ {
+ return file;
+ }
+ // Passes the ownership of the mmaped memory to the callee
+ auto steal_map() -> std::tuple<void *, std::size_t>
+ {
+ auto ret = std::make_tuple(this->map, map_size);
+ this->map = nullptr;
+ return ret;
+ }
+
+ auto get_size() const -> std::size_t
+ {
+ return file.get_stat().st_size;
+ }
+
+ raii_mmaped_file &operator=(raii_mmaped_file &&other) noexcept
+ {
+ std::swap(map, other.map);
+ std::swap(map_size, other.map_size);
+ file = std::move(other.file);
+
+ return *this;
+ }
+
+ raii_mmaped_file(raii_mmaped_file &&other) noexcept;
+
+ /* Do not allow copy/default ctor */
+ const raii_mmaped_file &operator=(const raii_mmaped_file &other) = delete;
+ raii_mmaped_file() = delete;
+ raii_mmaped_file(const raii_mmaped_file &other) = delete;
+
+private:
+ /* Is intended to be used with map_shared */
+ explicit raii_mmaped_file(raii_file &&_file, void *_map, std::size_t sz);
+ raii_file file;
+ void *map = nullptr;
+ std::size_t map_size;
+};
+
+/**
+ * A helper to have a file to write that will be renamed to the
+ * target file if successful or deleted in the case of failure
+ */
+struct raii_file_sink final {
+ static auto create(const char *fname, int flags, int perms, const char *suffix = "new")
+ -> tl::expected<raii_file_sink, error>;
+ auto write_output() -> bool;
+ ~raii_file_sink();
+ auto get_fd() const -> int
+ {
+ return file.get_fd();
+ }
+
+ raii_file_sink(raii_file_sink &&other) noexcept;
+ /* Do not allow copy/default ctor */
+ const raii_file_sink &operator=(const raii_file_sink &other) = delete;
+ raii_file_sink() = delete;
+ raii_file_sink(const raii_file_sink &other) = delete;
+
+private:
+ explicit raii_file_sink(raii_locked_file &&_file, const char *_output, std::string &&_tmp_fname);
+ raii_locked_file file;
+ std::string output_fname;
+ std::string tmp_fname;
+ bool success;
+};
+
+}// namespace rspamd::util
+
+#endif//RSPAMD_FILE_UTIL_HXX
diff --git a/src/libutil/cxx/hash_util.hxx b/src/libutil/cxx/hash_util.hxx
new file mode 100644
index 0000000..05f3d97
--- /dev/null
+++ b/src/libutil/cxx/hash_util.hxx
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_HASH_UTIL_HXX
+#define RSPAMD_HASH_UTIL_HXX
+
+#pragma once
+
+#include <string_view>
+#include <string>
+#include "contrib/ankerl/unordered_dense.h"
+
+
+namespace rspamd {
+/*
+ * Transparent smart pointers hashing
+ */
+template<typename T>
+struct smart_ptr_equal {
+ using is_transparent = void; /* We want to find values in a set of shared_ptr by reference */
+ auto operator()(const std::shared_ptr<T> &a, const std::shared_ptr<T> &b) const
+ {
+ return (*a) == (*b);
+ }
+ auto operator()(const std::shared_ptr<T> &a, const T &b) const
+ {
+ return (*a) == b;
+ }
+ auto operator()(const T &a, const std::shared_ptr<T> &b) const
+ {
+ return a == (*b);
+ }
+ auto operator()(const std::unique_ptr<T> &a, const std::unique_ptr<T> &b) const
+ {
+ return (*a) == (*b);
+ }
+ auto operator()(const std::unique_ptr<T> &a, const T &b) const
+ {
+ return (*a) == b;
+ }
+ auto operator()(const T &a, const std::unique_ptr<T> &b) const
+ {
+ return a == (*b);
+ }
+};
+
+template<typename T>
+struct smart_ptr_hash {
+ using is_transparent = void; /* We want to find values in a set of shared_ptr by reference */
+ using is_avalanching = void;
+ auto operator()(const std::shared_ptr<T> &a) const
+ {
+ return std::hash<T>()(*a);
+ }
+ auto operator()(const std::unique_ptr<T> &a) const
+ {
+ return std::hash<T>()(*a);
+ }
+ auto operator()(const T &a) const
+ {
+ return std::hash<T>()(a);
+ }
+};
+
+/* Enable lookup by string view */
+struct smart_str_equal {
+ using is_transparent = void;
+ auto operator()(const std::string &a, const std::string &b) const
+ {
+ return a == b;
+ }
+ auto operator()(const std::string_view &a, const std::string &b) const
+ {
+ return a == b;
+ }
+ auto operator()(const std::string &a, const std::string_view &b) const
+ {
+ return a == b;
+ }
+};
+
+struct smart_str_hash {
+ using is_transparent = void;
+ using is_avalanching = void;
+ auto operator()(const std::string &a) const
+ {
+ return ankerl::unordered_dense::hash<std::string>()(a);
+ }
+ auto operator()(const std::string_view &a) const
+ {
+ return ankerl::unordered_dense::hash<std::string_view>()(a);
+ }
+};
+
+}// namespace rspamd
+
+#endif//RSPAMD_HASH_UTIL_HXX
diff --git a/src/libutil/cxx/local_shared_ptr.hxx b/src/libutil/cxx/local_shared_ptr.hxx
new file mode 100644
index 0000000..78ed5ba
--- /dev/null
+++ b/src/libutil/cxx/local_shared_ptr.hxx
@@ -0,0 +1,440 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_LOCAL_SHARED_PTR_HXX
+#define RSPAMD_LOCAL_SHARED_PTR_HXX
+
+#pragma once
+
+#include <memory>
+#include <algorithm> // for std::swap
+#include <cstddef> // for std::size_t
+#include <functional>// for std::less
+
+/*
+ * Smart pointers with no atomic refcounts to speed up Rspamd which is
+ * apparently single threaded
+ */
+namespace rspamd {
+
+namespace detail {
+
+class ref_cnt {
+public:
+ using refcount_t = int;
+
+ constexpr auto add_shared() -> refcount_t
+ {
+ return ++ref_shared;
+ }
+ constexpr auto add_weak() -> refcount_t
+ {
+ return ++ref_weak;
+ }
+ constexpr auto release_shared() -> refcount_t
+ {
+ return --ref_shared;
+ }
+ constexpr auto release_weak() -> refcount_t
+ {
+ return --ref_weak;
+ }
+ constexpr auto shared_count() const -> refcount_t
+ {
+ return ref_shared;
+ }
+ constexpr auto weak_count() const -> refcount_t
+ {
+ return ref_weak;
+ }
+ virtual ~ref_cnt()
+ {
+ }
+ virtual void dispose() = 0;
+
+private:
+ refcount_t ref_weak = 0;
+ refcount_t ref_shared = 1;
+};
+
+template<class T>
+class obj_and_refcnt : public ref_cnt {
+private:
+ typedef typename std::aligned_storage<sizeof(T), std::alignment_of<T>::value>::type storage_type;
+ storage_type storage;
+ bool initialized;
+ virtual void dispose() override
+ {
+ if (initialized) {
+ T *p = reinterpret_cast<T *>(&storage);
+ p->~T();
+ initialized = false;
+ }
+ }
+
+public:
+ template<typename... Args>
+ explicit obj_and_refcnt(Args &&...args)
+ : initialized(true)
+ {
+ new (&storage) T(std::forward<Args>(args)...);
+ }
+ auto get(void) -> T *
+ {
+ if (initialized) {
+ return reinterpret_cast<T *>(&storage);
+ }
+
+ return nullptr;
+ }
+ virtual ~obj_and_refcnt() = default;
+};
+
+template<class T, class D = typename std::default_delete<T>>
+class ptr_and_refcnt : public ref_cnt {
+private:
+ T *ptr;
+ D deleter;
+ virtual void dispose() override
+ {
+ deleter(ptr);
+ ptr = nullptr;
+ }
+
+public:
+ explicit ptr_and_refcnt(T *_ptr, D &&d = std::default_delete<T>())
+ : ptr(_ptr),
+ deleter(std::move(d))
+ {
+ }
+ virtual ~ptr_and_refcnt() = default;
+};
+
+}// namespace detail
+
+template<class T>
+class local_weak_ptr;
+
+template<class T>
+class local_shared_ptr {
+public:
+ typedef T element_type;
+ typedef local_weak_ptr<T> weak_type;
+
+ // Simplified comparing to libc++, no custom deleter and no rebind here
+ // constructors:
+ constexpr local_shared_ptr() noexcept
+ : px(nullptr), cnt(nullptr)
+ {
+ }
+
+ template<class Y, typename std::enable_if<
+ std::is_convertible<Y *, element_type *>::value, bool>::type = true>
+ explicit local_shared_ptr(Y *p)
+ : px(p), cnt(new detail::ptr_and_refcnt(p))
+ {
+ }
+
+ // custom deleter
+ template<class Y, class D, typename std::enable_if<std::is_convertible<Y *, element_type *>::value, bool>::type = true>
+ explicit local_shared_ptr(Y *p, D &&d)
+ : px(p), cnt(new detail::ptr_and_refcnt<Y, D>(p, std::forward<D>(d)))
+ {
+ }
+
+ local_shared_ptr(const local_shared_ptr &r) noexcept
+ : px(r.px), cnt(r.cnt)
+ {
+ if (cnt) {
+ cnt->add_shared();
+ }
+ }
+ local_shared_ptr(local_shared_ptr &&r) noexcept
+ : px(r.px), cnt(r.cnt)
+ {
+ r.px = nullptr;
+ r.cnt = nullptr;
+ }
+ template<class Y>
+ explicit local_shared_ptr(const local_weak_ptr<Y> &r)
+ : px(r.px), cnt(r.cnt)
+ {
+ if (cnt) {
+ cnt->add_shared();
+ }
+ }
+ local_shared_ptr(std::nullptr_t)
+ : local_shared_ptr()
+ {
+ }
+
+ ~local_shared_ptr()
+ {
+ if (cnt) {
+ if (cnt->release_shared() <= 0) {
+ cnt->dispose();
+
+ if (cnt->weak_count() == 0) {
+ delete cnt;
+ }
+ }
+ }
+ }
+
+ // assignment:
+ local_shared_ptr &operator=(const local_shared_ptr &r) noexcept
+ {
+ local_shared_ptr(r).swap(*this);
+ return *this;
+ }
+ local_shared_ptr &operator=(local_shared_ptr &&r) noexcept
+ {
+ local_shared_ptr(std::move(r)).swap(*this);
+ return *this;
+ }
+
+ // Mutators
+ void swap(local_shared_ptr &r) noexcept
+ {
+ std::swap(this->cnt, r.cnt);
+ std::swap(this->px, r.px);
+ }
+ void reset() noexcept
+ {
+ local_shared_ptr().swap(*this);
+ }
+
+ // Observers:
+ T *get() const noexcept
+ {
+ return px;
+ }
+
+ T &operator*() const noexcept
+ {
+ return *px;
+ }
+ T *operator->() const noexcept
+ {
+ return px;
+ }
+ long use_count() const noexcept
+ {
+ if (cnt) {
+ return cnt->shared_count();
+ }
+
+ return 0;
+ }
+ bool unique() const noexcept
+ {
+ return use_count() == 1;
+ }
+
+ explicit operator bool() const noexcept
+ {
+ return px != nullptr;
+ }
+
+ template<class Y, typename std::enable_if<
+ std::is_convertible<Y *, element_type *>::value, bool>::type = true>
+ auto operator==(const local_shared_ptr<Y> &other) const -> bool
+ {
+ return px == other.px;
+ }
+
+ template<class Y, typename std::enable_if<
+ std::is_convertible<Y *, element_type *>::value, bool>::type = true>
+ auto operator<(const local_shared_ptr<Y> &other) const -> auto
+ {
+ return *px < *other.px;
+ }
+
+private:
+ T *px;// contained pointer
+ detail::ref_cnt *cnt;
+
+ template<class _T, class... Args>
+ friend local_shared_ptr<_T> local_make_shared(Args &&...args);
+ friend class local_weak_ptr<T>;
+};
+
+template<class T, class... Args>
+local_shared_ptr<T> local_make_shared(Args &&...args)
+{
+ local_shared_ptr<T> ptr;
+ auto tmp_object = new detail::obj_and_refcnt<T>(std::forward<Args>(args)...);
+ ptr.px = tmp_object->get();
+ ptr.cnt = tmp_object;
+
+ return ptr;
+}
+
+template<class T>
+class local_weak_ptr {
+public:
+ typedef T element_type;
+
+ // constructors
+ constexpr local_weak_ptr() noexcept
+ : px(nullptr), cnt(nullptr)
+ {
+ }
+ template<class Y, typename std::enable_if<
+ std::is_convertible<Y *, element_type *>::value, bool>::type = true>
+ local_weak_ptr(local_shared_ptr<Y> const &r) noexcept
+ : px(r.px), cnt(r.cnt)
+ {
+ if (cnt) {
+ cnt->add_weak();
+ }
+ }
+
+ local_weak_ptr(local_weak_ptr const &r) noexcept
+ : px(r.px), cnt(r.cnt)
+ {
+ if (cnt) {
+ cnt->add_weak();
+ }
+ }
+ local_weak_ptr(local_weak_ptr &&r) noexcept
+ : px(r.px), cnt(r.cnt)
+ {
+ r.px = nullptr;
+ r.cnt = nullptr;
+ }
+
+ ~local_weak_ptr()
+ {
+ if (cnt) {
+ if (cnt->release_weak() <= 0 && cnt->shared_count() == 0) {
+ delete cnt;
+ }
+ }
+ }
+
+ // assignment
+ local_weak_ptr &operator=(local_weak_ptr const &r) noexcept
+ {
+ local_weak_ptr(r).swap(*this);
+ return *this;
+ }
+ local_weak_ptr &operator=(local_shared_ptr<T> const &r) noexcept
+ {
+ local_weak_ptr(r).swap(*this);
+ return *this;
+ }
+
+ template<class Y, typename std::enable_if<
+ std::is_convertible<Y *, element_type *>::value, bool>::type = true>
+ local_weak_ptr &operator=(local_weak_ptr<Y> const &r) noexcept
+ {
+ local_weak_ptr(r).swap(*this);
+ return *this;
+ }
+ local_weak_ptr &operator=(local_weak_ptr &&r) noexcept
+ {
+ local_weak_ptr(std::move(r)).swap(*this);
+ return *this;
+ }
+
+ // modifiers
+ void swap(local_weak_ptr &r) noexcept
+ {
+ std::swap(this->cnt, r.cnt);
+ std::swap(this->px, r.px);
+ }
+ void reset() noexcept
+ {
+ local_weak_ptr().swap(*this);
+ }
+
+ // observers
+ long use_count() const noexcept
+ {
+ if (cnt) {
+ return cnt->shared_count();
+ }
+ return 0;
+ }
+ bool expired() const noexcept
+ {
+ if (cnt) {
+ return cnt->shared_count() == 0;
+ }
+
+ return true;
+ }
+
+ local_shared_ptr<T> lock() const noexcept
+ {
+ local_shared_ptr<T> tmp;
+ tmp.cnt = cnt;
+
+ if (cnt) {
+ cnt->add_shared();
+ tmp.px = px;
+ }
+
+ return tmp;
+ }
+
+private:
+ element_type *px;
+ detail::ref_cnt *cnt;
+};
+
+
+}// namespace rspamd
+
+/* Hashing stuff */
+namespace std {
+template<class T>
+struct hash<rspamd::local_shared_ptr<T>> {
+ inline auto operator()(const rspamd::local_shared_ptr<T> &p) const -> auto
+ {
+ if (!p) {
+ throw std::logic_error("no hash for dangling pointer");
+ }
+ return hash<T>()(*p.get());
+ }
+};
+template<class T>
+struct hash<rspamd::local_weak_ptr<T>> {
+ inline auto operator()(const rspamd::local_weak_ptr<T> &p) const -> auto
+ {
+ if (!p) {
+ throw std::logic_error("no hash for dangling pointer");
+ }
+ return hash<T>()(*p.get());
+ }
+};
+
+template<class T>
+inline void swap(rspamd::local_shared_ptr<T> &x, rspamd::local_shared_ptr<T> &y) noexcept
+{
+ x.swap(y);
+}
+
+template<class T>
+inline void swap(rspamd::local_weak_ptr<T> &x, rspamd::local_weak_ptr<T> &y) noexcept
+{
+ x.swap(y);
+}
+
+}// namespace std
+
+#endif//RSPAMD_LOCAL_SHARED_PTR_HXX
diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx
new file mode 100644
index 0000000..5fc83ca
--- /dev/null
+++ b/src/libutil/cxx/utf8_util.cxx
@@ -0,0 +1,421 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define U_CHARSET_IS_UTF8 1
+#include <unicode/utypes.h>
+#include <unicode/utf8.h>
+#include <unicode/uchar.h>
+#include <unicode/normalizer2.h>
+#include <unicode/schriter.h>
+#include <unicode/coll.h>
+#include <unicode/translit.h>
+#include <utility>
+#include <tuple>
+#include <string>
+#include <limits>
+#include <memory>
+
+#include "utf8_util.h"
+#include "str_util.h"
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+const char *
+rspamd_string_unicode_trim_inplace(const char *str, size_t *len)
+{
+ const auto *p = str, *end = str + *len;
+ auto i = 0;
+
+ while (i < *len) {
+ UChar32 uc;
+ auto prev_i = i;
+
+ U8_NEXT(p, i, *len, uc);
+
+ if (!u_isUWhiteSpace(uc) && !IS_ZERO_WIDTH_SPACE(uc)) {
+ i = prev_i;
+ break;
+ }
+ }
+
+ p += i;
+ (*len) -= i;
+ i = end - p;
+ auto *ret = p;
+
+ if (i > 0) {
+
+ while (i > 0) {
+ UChar32 uc;
+ auto prev_i = i;
+
+ U8_PREV(p, 0, i, uc);
+
+ if (!u_isUWhiteSpace(uc) && !IS_ZERO_WIDTH_SPACE(uc)) {
+ i = prev_i;
+ break;
+ }
+ }
+
+ *len = i;
+ }
+
+ return ret;
+}
+
+enum rspamd_utf8_normalise_result
+rspamd_normalise_unicode_inplace(char *start, size_t *len)
+{
+ UErrorCode uc_err = U_ZERO_ERROR;
+ const auto *nfkc_norm = icu::Normalizer2::getNFKCInstance(uc_err);
+ static icu::UnicodeSet zw_spaces{};
+
+ if (!zw_spaces.isFrozen()) {
+ /* Add zw spaces to the set */
+ zw_spaces.add(0x200B);
+ /* TODO: ZW non joiner, it might be used for ligatures, so it should possibly be excluded as well */
+ zw_spaces.add(0x200C);
+ /* See github issue #4290 for explanation. It seems that the ZWJ has many legit use cases */
+ //zw_spaces.add(0x200D);
+ zw_spaces.add(0xFEF);
+ zw_spaces.add(0x00AD);
+ zw_spaces.freeze();
+ }
+
+ int ret = RSPAMD_UNICODE_NORM_NORMAL;
+
+ g_assert(U_SUCCESS(uc_err));
+
+ auto uc_string = icu::UnicodeString::fromUTF8(icu::StringPiece(start, *len));
+ auto is_normal = nfkc_norm->quickCheck(uc_string, uc_err);
+
+ if (!U_SUCCESS(uc_err)) {
+ return RSPAMD_UNICODE_NORM_ERROR;
+ }
+
+ /* Filter zero width spaces and push resulting string back */
+ const auto filter_zw_spaces_and_push_back = [&](const icu::UnicodeString &input) -> size_t {
+ icu::StringCharacterIterator it{input};
+ size_t i = 0;
+
+ while (it.hasNext()) {
+ /* libicu is very 'special' if it comes to 'safe' macro */
+ if (i >= *len) {
+ ret |= RSPAMD_UNICODE_NORM_ERROR;
+ break;
+ }
+
+ auto uc = it.next32PostInc();
+
+ if (zw_spaces.contains(uc)) {
+ ret |= RSPAMD_UNICODE_NORM_ZERO_SPACES;
+ }
+ else {
+ UBool err = 0;
+
+ if (uc == 0xFFFD) {
+ ret |= RSPAMD_UNICODE_NORM_UNNORMAL;
+ }
+ U8_APPEND((uint8_t *) start, i, *len, uc, err);
+
+ if (err) {
+ ret |= RSPAMD_UNICODE_NORM_ERROR;
+ break;
+ }
+ }
+ }
+
+ return i;
+ };
+
+ if (is_normal != UNORM_YES) {
+ /* Need to normalise */
+ ret |= RSPAMD_UNICODE_NORM_UNNORMAL;
+
+ auto normalised = nfkc_norm->normalize(uc_string, uc_err);
+
+ if (!U_SUCCESS(uc_err)) {
+ return RSPAMD_UNICODE_NORM_ERROR;
+ }
+
+ *len = filter_zw_spaces_and_push_back(normalised);
+ }
+ else {
+ *len = filter_zw_spaces_and_push_back(uc_string);
+ }
+
+ return static_cast<enum rspamd_utf8_normalise_result>(ret);
+}
+
+gchar *
+rspamd_utf8_transliterate(const gchar *start, gsize len, gsize *target_len)
+{
+ UErrorCode uc_err = U_ZERO_ERROR;
+
+ static std::unique_ptr<icu::Transliterator> transliterator;
+
+ if (!transliterator) {
+ UParseError parse_err;
+ static const auto rules = icu::UnicodeString{":: Any-Latin;"
+ ":: [:Nonspacing Mark:] Remove;"
+ ":: [:Punctuation:] Remove;"
+ ":: [:Symbol:] Remove;"
+ ":: [:Format:] Remove;"
+ ":: Latin-ASCII;"
+ ":: Lower();"
+ ":: NULL;"
+ "[:Space Separator:] > ' '"};
+ transliterator = std::unique_ptr<icu::Transliterator>(
+ icu::Transliterator::createFromRules("RspamdTranslit", rules, UTRANS_FORWARD, parse_err, uc_err));
+
+ if (U_FAILURE(uc_err) || !transliterator) {
+ auto context = icu::UnicodeString(parse_err.postContext, sizeof(parse_err.preContext) / sizeof(UChar));
+ g_error("fatal error: cannot init libicu transliteration engine: %s, line: %d, offset: %d",
+ u_errorName(uc_err), parse_err.line, parse_err.offset);
+ abort();
+ }
+ }
+
+ auto uc_string = icu::UnicodeString::fromUTF8(icu::StringPiece(start, len));
+ transliterator->transliterate(uc_string);
+
+ // We assume that all characters are now ascii
+ auto dest_len = uc_string.length();
+ gchar *dest = (gchar *) g_malloc(dest_len + 1);
+ auto sink = icu::CheckedArrayByteSink(dest, dest_len);
+ uc_string.toUTF8(sink);
+
+ *target_len = sink.NumberOfBytesWritten();
+ dest[*target_len] = '\0';
+
+ return dest;
+}
+
+struct rspamd_icu_collate_storage {
+ icu::Collator *collator = nullptr;
+ rspamd_icu_collate_storage()
+ {
+ UErrorCode uc_err = U_ZERO_ERROR;
+ collator = icu::Collator::createInstance(icu::Locale::getEnglish(), uc_err);
+
+ if (U_FAILURE(uc_err) || collator == nullptr) {
+ g_error("fatal error: cannot init libicu collation engine: %s",
+ u_errorName(uc_err));
+ abort();
+ }
+ /* Ignore all difference except functional */
+ collator->setStrength(icu::Collator::PRIMARY);
+ }
+
+ ~rspamd_icu_collate_storage()
+ {
+ if (collator) {
+ delete collator;
+ }
+ }
+};
+
+static rspamd_icu_collate_storage collate_storage;
+
+int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2)
+{
+ if (n1 >= std::numeric_limits<int>::max() || n2 >= std::numeric_limits<int>::max()) {
+ /*
+ * It's hard to say what to do here... But libicu wants int, so we fall
+ * back to g_ascii_strcasecmp which can deal with size_t
+ */
+ if (n1 == n2) {
+ return g_ascii_strncasecmp(s1, s2, n1);
+ }
+ else {
+ return n1 - n2;
+ }
+ }
+
+ UErrorCode success = U_ZERO_ERROR;
+ auto res = collate_storage.collator->compareUTF8({s1, (int) n1}, {s2, (int) n2},
+ success);
+
+ switch (res) {
+ case UCOL_EQUAL:
+ return 0;
+ case UCOL_GREATER:
+ return 1;
+ case UCOL_LESS:
+ default:
+ return -1;
+ }
+}
+
+int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+{
+ return rspamd_utf8_strcmp_sizes(s1, n, s2, n);
+}
+
+TEST_SUITE("utf8 utils")
+{
+ TEST_CASE("utf8 normalise")
+ {
+ std::tuple<const char *, const char *, int> cases[] = {
+ {"abc", "abc", RSPAMD_UNICODE_NORM_NORMAL},
+ {"тест", "тест", RSPAMD_UNICODE_NORM_NORMAL},
+ /* Zero width spaces */
+ {"\xE2\x80\x8B"
+ "те"
+ "\xE2\x80\x8B"
+ "ст",
+ "тест", RSPAMD_UNICODE_NORM_ZERO_SPACES},
+ /* Special case of diacritic */
+ {"13_\u0020\u0308\u0301\u038e\u03ab", "13_ ̈́ΎΫ", RSPAMD_UNICODE_NORM_UNNORMAL},
+ // String containing a non-joiner character
+ {"س\u200Cت", "ست", RSPAMD_UNICODE_NORM_ZERO_SPACES},
+ // String containing a soft hyphen
+ {"in\u00ADter\u00ADest\u00ADing", "interesting", RSPAMD_UNICODE_NORM_ZERO_SPACES},
+ // String with ligature
+ {"fish", "fish", RSPAMD_UNICODE_NORM_UNNORMAL},
+ // String with accented characters and zero-width spaces
+ {"café\u200Blatté\u200C", "cafélatté", RSPAMD_UNICODE_NORM_ZERO_SPACES},
+ /* Same with zw spaces */
+ {"13\u200C_\u0020\u0308\u0301\u038e\u03ab", "13_ ̈́ΎΫ",
+ RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ZERO_SPACES},
+ /* Buffer overflow case */
+ {"u\xC2\xC2\xC2\xC2\xC2\xC2"
+ "abcdef"
+ "abcdef",
+ "u\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD",
+ RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ERROR},
+ // String with a mix of special characters, ligatures, and zero-width spaces
+ {"fish\u200Bcafé\u200C\u200Dlatté\u200D\u00AD", "fishcafé\u200Dlatté\u200D", RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ZERO_SPACES},
+ // Empty string
+ {"", "", RSPAMD_UNICODE_NORM_NORMAL},
+ };
+
+ for (const auto &c: cases) {
+ std::string cpy{std::get<0>(c)};
+ auto ns = cpy.size();
+ auto res = rspamd_normalise_unicode_inplace(cpy.data(), &ns);
+ cpy.resize(ns);
+ CHECK(cpy == std::string(std::get<1>(c)));
+ CHECK(res == std::get<2>(c));
+ }
+ }
+
+ TEST_CASE("utf8 trim")
+ {
+ std::pair<const char *, const char *> cases[] = {
+ {" \u200B"
+ "abc ",
+ "abc"},
+ {" ", ""},
+ {" a", "a"},
+ {"a ", "a"},
+ {"a a", "a a"},
+ {"abc", "abc"},
+ {"a ", "a"},
+ {" abc ", "abc"},
+ {" abc ", "abc"},
+ {" \xE2\x80\x8B"
+ "a\xE2\x80\x8B"
+ "bc ",
+ "a\xE2\x80\x8B"
+ "bc"},
+ {" \xE2\x80\x8B"
+ "abc\xE2\x80\x8B ",
+ "abc"},
+ {" \xE2\x80\x8B"
+ "abc \xE2\x80\x8B ",
+ "abc"},
+ };
+
+ for (const auto &c: cases) {
+ std::string cpy{c.first};
+ auto ns = cpy.size();
+ auto *nstart = rspamd_string_unicode_trim_inplace(cpy.data(), &ns);
+ std::string res{nstart, ns};
+ CHECK(res == std::string{c.second});
+ }
+ }
+
+
+ TEST_CASE("utf8 strcmp")
+ {
+ std::tuple<const char *, const char *, int, int> cases[] = {
+ {"abc", "abc", -1, 0},
+ {"", "", -1, 0},
+ {"aBc", "AbC", -1, 0},
+ {"abc", "ab", 2, 0},
+ {"теСт", "ТесТ", -1, 0},
+ {"теСт", "Тезт", 4, 0},
+ {"теСт", "Тезт", -1, 1},
+ {"abc", "ABD", -1, -1},
+ {"\0a\0", "\0a\1", 2, 0},
+ {"\0a\0", "\0b\1", 3, -1},
+ };
+
+ for (const auto &c: cases) {
+ auto [s1, s2, n, expected] = c;
+ if (n == -1) {
+ n = MIN(strlen(s1), strlen(s2));
+ }
+ SUBCASE((std::string("test case: ") + s1 + " <=> " + s2).c_str())
+ {
+ auto ret = rspamd_utf8_strcmp(s1, s2, n);
+ CHECK(ret == expected);
+ }
+ }
+ }
+
+ TEST_CASE("transliterate")
+ {
+ using namespace std::literals;
+ std::tuple<std::string_view, const char *> cases[] = {
+ {"abc"sv, "abc"},
+ {""sv, ""},
+ {"тест"sv, "test"},
+ // Diacritic to ascii
+ {"Ύ"sv, "y"},
+ // Chinese to pinyin
+ {"你好"sv, "ni hao"},
+ // Japanese to romaji
+ {"こんにちは"sv, "konnichiha"},
+ // Devanagari to latin
+ {"नमस्ते"sv, "namaste"},
+ // Arabic to latin
+ {"مرحبا"sv, "mrhba"},
+ // Remove of punctuation
+ {"a.b.c"sv, "abc"},
+ // Lowercase
+ {"ABC"sv, "abc"},
+ // Remove zero-width spaces
+ {"\xE2\x80\x8B"
+ "abc\xE2\x80\x8B"
+ "def"sv,
+ "abcdef"},
+ };
+
+ for (const auto &c: cases) {
+ auto [s1, s2] = c;
+ SUBCASE((std::string("test case: ") + std::string(s1) + " => " + s2).c_str())
+ {
+ gsize tlen;
+ auto *ret = rspamd_utf8_transliterate(s1.data(), s1.length(), &tlen);
+ CHECK(tlen == strlen(s2));
+ CHECK(strcmp(s2, ret) == 0);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/libutil/cxx/utf8_util.h b/src/libutil/cxx/utf8_util.h
new file mode 100644
index 0000000..044beae
--- /dev/null
+++ b/src/libutil/cxx/utf8_util.h
@@ -0,0 +1,85 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_UTF8_UTIL_H
+#define RSPAMD_UTF8_UTIL_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Removes all unicode spaces from a string
+ * @param str start of the string
+ * @param len length
+ * @return new length of the string trimmed
+ */
+const char *rspamd_string_unicode_trim_inplace(const char *str, size_t *len);
+
+enum rspamd_utf8_normalise_result {
+ RSPAMD_UNICODE_NORM_NORMAL = 0,
+ RSPAMD_UNICODE_NORM_UNNORMAL = (1 << 0),
+ RSPAMD_UNICODE_NORM_ZERO_SPACES = (1 << 1),
+ RSPAMD_UNICODE_NORM_ERROR = (1 << 2),
+ RSPAMD_UNICODE_NORM_OVERFLOW = (1 << 3)
+};
+
+/**
+ * Gets a string in UTF8 and normalises it to NFKC_Casefold form
+ * @param pool optional memory pool used for logging purposes
+ * @param start
+ * @param len
+ * @return TRUE if a string has been normalised
+ */
+enum rspamd_utf8_normalise_result rspamd_normalise_unicode_inplace(gchar *start, gsize *len);
+
+/**
+ * Transliterate a string to ASCII
+ * @param start
+ * @param len
+ * @param target_len
+ * @return a new string that should be freed with g_free
+ */
+gchar *rspamd_utf8_transliterate(const gchar *start, gsize len, gsize *target_len);
+
+/**
+ * Compare two strings using libicu collator
+ * @param s1
+ * @param s2
+ * @param n
+ * @return an integer greater than, equal to, or less than 0, according as the string s1 is greater than, equal to, or less than the string s2.
+ */
+int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n);
+/**
+ * Similar to rspamd_utf8_strcmp but accepts two sizes
+ * @param s1
+ * @param n1
+ * @param s2
+ * @param n2
+ * @return
+ */
+int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//RSPAMD_UTF8_UTIL_H
diff --git a/src/libutil/cxx/util.hxx b/src/libutil/cxx/util.hxx
new file mode 100644
index 0000000..32ec0b5
--- /dev/null
+++ b/src/libutil/cxx/util.hxx
@@ -0,0 +1,238 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_UTIL_HXX
+#define RSPAMD_UTIL_HXX
+
+#pragma once
+
+#include <memory>
+#include <array>
+#include <string_view>
+#include <optional>
+#include <tuple>
+#include <algorithm>
+
+/*
+ * Common C++ utilities
+ */
+
+namespace rspamd {
+/*
+ * Creates std::array from a standard C style array with automatic size calculation
+ */
+template<typename... Ts>
+constexpr auto array_of(Ts &&...t) -> std::array<typename std::decay_t<typename std::common_type_t<Ts...>>, sizeof...(Ts)>
+{
+ using T = typename std::decay_t<typename std::common_type_t<Ts...>>;
+ return {{std::forward<T>(t)...}};
+}
+
+/**
+ * Find a value in a map
+ * @tparam C Map type
+ * @tparam K Key type
+ * @tparam V Value type
+ * @param c Map to search
+ * @param k Key to search
+ * @return Value if found or std::nullopt otherwise
+ */
+template<class C, class K, class V = typename C::mapped_type, typename std::enable_if_t<std::is_constructible_v<typename C::key_type, K> && std::is_constructible_v<typename C::mapped_type, V>, bool> = false>
+constexpr auto find_map(const C &c, const K &k) -> std::optional<std::reference_wrapper<const V>>
+{
+ auto f = c.find(k);
+
+ if (f != c.end()) {
+ return std::cref<V>(f->second);
+ }
+
+ return std::nullopt;
+}
+
+
+template<typename It>
+inline constexpr auto make_string_view_from_it(It begin, It end)
+{
+ using result_type = std::string_view;
+
+ return result_type{((begin != end) ? &*begin : nullptr),
+ (typename result_type::size_type) std::max(std::distance(begin, end),
+ (typename result_type::difference_type) 0)};
+}
+
+/**
+ * Iterate over lines in a string, newline characters are dropped
+ * @tparam S
+ * @tparam F
+ * @param input
+ * @param functor
+ * @return
+ */
+template<class S, class F, typename std::enable_if_t<std::is_invocable_v<F, std::string_view> && std::is_constructible_v<std::string_view, S>, bool> = true>
+inline auto string_foreach_line(const S &input, const F &functor)
+{
+ auto it = input.begin();
+ auto end = input.end();
+
+ while (it != end) {
+ auto next = std::find(it, end, '\n');
+ while (next >= it && (*next == '\n' || *next == '\r')) {
+ --next;
+ }
+ functor(make_string_view_from_it(it, next));
+ it = next;
+
+ if (it != end) {
+ ++it;
+ }
+ }
+}
+
+/**
+ * Iterate over elements in a string
+ * @tparam S string type
+ * @tparam D delimiter type
+ * @tparam F functor type
+ * @param input string to iterate
+ * @param delim delimiter to use
+ * @param functor functor to call
+ * @param ignore_empty ignore empty elements
+ * @return nothing
+ */
+template<class S, class D, class F,
+ typename std::enable_if_t<std::is_invocable_v<F, std::string_view> && std::is_constructible_v<std::string_view, S> && std::is_constructible_v<std::string_view, D>, bool> = true>
+inline auto string_foreach_delim(const S &input, const D &delim, const F &functor, const bool ignore_empty = true) -> void
+{
+ size_t first = 0;
+ auto sv_input = std::string_view{input};
+ auto sv_delim = std::string_view{delim};
+
+ while (first < sv_input.size()) {
+ const auto second = sv_input.find_first_of(sv_delim, first);
+
+ if (first != second || !ignore_empty) {
+ functor(sv_input.substr(first, second - first));
+ }
+
+ if (second == std::string_view::npos) {
+ break;
+ }
+
+ first = second + 1;
+ }
+}
+
+/**
+ * Split string on a character
+ * @tparam S string type
+ * @param input string to split
+ * @param chr character to split on
+ * @return pair of strings
+ */
+template<class S, typename std::enable_if_t<std::is_constructible_v<std::string_view, S>, bool> = true>
+inline auto string_split_on(const S &input, std::string_view::value_type chr) -> std::pair<std::string_view, std::string_view>
+{
+ auto pos = std::find(std::begin(input), std::end(input), chr);
+
+ if (pos != input.end()) {
+ auto first = std::string_view{std::begin(input), static_cast<std::size_t>(std::distance(std::begin(input), pos))};
+ while (*pos == chr && pos != input.end()) {
+ ++pos;
+ }
+ auto last = std::string_view{pos, static_cast<std::size_t>(std::distance(pos, std::end(input)))};
+
+ return {first, last};
+ }
+
+ return {std::string_view{input}, std::string_view{}};
+}
+
+/**
+ * Enumerate for range loop
+ * @tparam T iterable type
+ * @tparam TIter iterator type
+ * @param iterable iterable object
+ * @return iterator object
+ */
+template<typename T,
+ typename TIter = decltype(std::begin(std::declval<T>())),
+ typename = decltype(std::end(std::declval<T>()))>
+constexpr auto enumerate(T &&iterable)
+{
+ struct iterator {
+ size_t i;
+ TIter iter;
+ bool operator!=(const iterator &other) const
+ {
+ return iter != other.iter;
+ }
+ void operator++()
+ {
+ ++i;
+ ++iter;
+ }
+ auto operator*() const
+ {
+ return std::tie(i, *iter);
+ }
+ };
+ struct iterable_wrapper {
+ T iterable;
+ auto begin()
+ {
+ return iterator{0, std::begin(iterable)};
+ }
+ auto end()
+ {
+ return iterator{0, std::end(iterable)};
+ }
+ };
+ return iterable_wrapper{std::forward<T>(iterable)};
+}
+
+/**
+ * Allocator that cleans up memory in a secure way on destruction
+ * @tparam T
+ */
+template<class T>
+class secure_mem_allocator : public std::allocator<T> {
+public:
+ using value_type = typename std::allocator<T>::value_type;
+ using size_type = typename std::allocator<T>::size_type;
+ template<class U>
+ struct rebind {
+ typedef secure_mem_allocator<U> other;
+ };
+ secure_mem_allocator() noexcept = default;
+ secure_mem_allocator(const secure_mem_allocator &_) noexcept
+ : std::allocator<T>(_)
+ {
+ }
+ template<class U>
+ explicit secure_mem_allocator(const secure_mem_allocator<U> &) noexcept
+ {
+ }
+
+ void deallocate(value_type *p, size_type num) noexcept
+ {
+ rspamd_explicit_memzero((void *) p, num);
+ std::allocator<T>::deallocate(p, num);
+ }
+};
+
+
+}// namespace rspamd
+
+#endif//RSPAMD_UTIL_HXX
diff --git a/src/libutil/cxx/util_tests.cxx b/src/libutil/cxx/util_tests.cxx
new file mode 100644
index 0000000..6c3c177
--- /dev/null
+++ b/src/libutil/cxx/util_tests.cxx
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util.hxx"
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+#include <vector>
+
+using namespace rspamd;
+using namespace std::literals::string_view_literals;
+
+TEST_SUITE("cxx utils")
+{
+ TEST_CASE("string_split_on")
+ {
+ std::tuple<std::string_view, char, std::pair<std::string_view, std::string_view>> cases[] = {
+ {"test test"sv, ' ', std::pair{"test"sv, "test"sv}},
+ {"test test"sv, ' ', std::pair{"test"sv, "test"sv}},
+ {"test test "sv, ' ', std::pair{"test"sv, "test "sv}},
+ {"testtest "sv, ' ', std::pair{"testtest"sv, ""sv}},
+ {" testtest "sv, ' ', std::pair{""sv, "testtest "sv}},
+ {"testtest"sv, ' ', std::pair{"testtest"sv, ""sv}},
+ {""sv, ' ', std::pair{""sv, ""sv}},
+ };
+
+ for (const auto &c: cases) {
+ auto res = string_split_on(std::get<0>(c), std::get<1>(c));
+ auto expected = std::get<2>(c);
+ CHECK(res.first == expected.first);
+ CHECK(res.second == expected.second);
+ }
+ }
+
+ TEST_CASE("string_foreach_delim")
+ {
+ std::tuple<std::string_view, std::string_view, std::pair<std::vector<std::string_view>, std::vector<std::string_view>>> cases[] = {
+ {"test"sv, ","sv, {{"test"}, {"test"}}},
+ {"test,test"sv, ","sv, {{"test", "test"}, {"test", "test"}}},
+ {"test, test"sv, ", "sv, {{"test", "test"}, {"test", "", "test"}}},
+ {"test, test,,"sv, ", "sv, {{"test", "test"}, {"test", "", "test", ""}}},
+ };
+
+ for (const auto &c: cases) {
+ auto res = std::vector<std::string_view>();
+ string_foreach_delim(std::get<0>(c), std::get<1>(c), [&](const auto &v) {
+ res.push_back(v);
+ });
+
+ auto compare_vec = []<class T>(const std::vector<T> &v1, const std::vector<T> &v2) {
+ CHECK(v1.size() == v2.size());
+ for (size_t i = 0; i < v1.size(); ++i) {
+ CHECK(v1[i] == v2[i]);
+ }
+ };
+
+ compare_vec(res, std::get<2>(c).first);
+
+ res.clear();
+ // Perform the same test but with no skip empty
+ string_foreach_delim(
+ std::get<0>(c), std::get<1>(c), [&](const auto &v) {
+ res.push_back(v);
+ },
+ false);
+ compare_vec(res, std::get<2>(c).second);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/libutil/expression.c b/src/libutil/expression.c
new file mode 100644
index 0000000..957c47f
--- /dev/null
+++ b/src/libutil/expression.c
@@ -0,0 +1,1635 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "expression.h"
+#include "printf.h"
+#include "regexp.h"
+#include "util.h"
+#include "utlist.h"
+#include "ottery.h"
+#include "libserver/logger.h"
+#include "libcryptobox/cryptobox.h"
+#include <math.h>
+
+#define RSPAMD_EXPR_FLAG_NEGATE (1 << 0)
+#define RSPAMD_EXPR_FLAG_PROCESSED (1 << 1)
+
+#define MIN_RESORT_EVALS 50
+#define MAX_RESORT_EVALS 150
+
+enum rspamd_expression_elt_type {
+ ELT_OP = 0,
+ ELT_ATOM,
+ ELT_LIMIT
+};
+
+enum rspamd_expression_op_flag {
+ RSPAMD_EXPRESSION_UNARY = 1u << 0u,
+ RSPAMD_EXPRESSION_BINARY = 1u << 1u,
+ RSPAMD_EXPRESSION_NARY = 1u << 2u,
+ RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
+ RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
+ RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
+};
+
+struct rspamd_expression_operation {
+ enum rspamd_expression_op op;
+ guint logical_priority;
+ guint op_flags;
+};
+
+struct rspamd_expression_elt {
+ enum rspamd_expression_elt_type type;
+ union {
+ rspamd_expression_atom_t *atom;
+ struct rspamd_expression_operation op;
+ gdouble lim;
+ } p;
+
+ gint flags;
+ gint priority;
+ gdouble value;
+};
+
+struct rspamd_expression {
+ const struct rspamd_atom_subr *subr;
+ GArray *expressions;
+ GPtrArray *expression_stack;
+ GNode *ast;
+ gchar *log_id;
+ guint next_resort;
+ guint evals;
+};
+
+struct rspamd_expr_process_data {
+ gpointer *ud;
+ gint flags;
+ /* != NULL if trace is collected */
+ GPtrArray *trace;
+ rspamd_expression_process_cb process_closure;
+};
+
+#define msg_debug_expression(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_expression_log_id, "expression", e->log_id, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+
+#ifdef DEBUG_EXPRESSIONS
+#define msg_debug_expression_verbose(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_expression_log_id, "expression", e->log_id, \
+ RSPAMD_LOG_FUNC, \
+ __VA_ARGS__)
+#else
+#define msg_debug_expression_verbose(...) \
+ do { \
+ } while (0)
+#endif
+
+INIT_LOG_MODULE(expression)
+
+static GQuark
+rspamd_expr_quark(void)
+{
+ return g_quark_from_static_string("rspamd-expression");
+}
+
+static const gchar *RSPAMD_CONST_FUNCTION
+rspamd_expr_op_to_str(enum rspamd_expression_op op);
+static const gchar *
+rspamd_expr_op_to_str(enum rspamd_expression_op op)
+{
+ const gchar *op_str = NULL;
+
+ switch (op) {
+ case OP_AND:
+ op_str = "&";
+ break;
+ case OP_OR:
+ op_str = "|";
+ break;
+ case OP_MULT:
+ op_str = "*";
+ break;
+ case OP_PLUS:
+ op_str = "+";
+ break;
+ case OP_MINUS:
+ op_str = "-";
+ break;
+ case OP_DIVIDE:
+ op_str = "/";
+ break;
+ case OP_NOT:
+ op_str = "!";
+ break;
+ case OP_GE:
+ op_str = ">=";
+ break;
+ case OP_GT:
+ op_str = ">";
+ break;
+ case OP_LE:
+ op_str = "<=";
+ break;
+ case OP_LT:
+ op_str = "<";
+ break;
+ case OP_EQ:
+ op_str = "==";
+ break;
+ case OP_NE:
+ op_str = "!=";
+ break;
+ case OP_OBRACE:
+ op_str = "(";
+ break;
+ case OP_CBRACE:
+ op_str = ")";
+ break;
+ default:
+ op_str = "???";
+ break;
+ }
+
+ return op_str;
+}
+
+#define G_ARRAY_LAST(ar, type) (&g_array_index((ar), type, (ar)->len - 1))
+
+static void
+rspamd_expr_stack_elt_push(GPtrArray *stack,
+ gpointer elt)
+{
+ g_ptr_array_add(stack, elt);
+}
+
+
+static gpointer
+rspamd_expr_stack_elt_pop(GPtrArray *stack)
+{
+ gpointer e;
+ gint idx;
+
+ if (stack->len == 0) {
+ return NULL;
+ }
+
+ idx = stack->len - 1;
+ e = g_ptr_array_index(stack, idx);
+ g_ptr_array_remove_index_fast(stack, idx);
+
+ return e;
+}
+
+
+static void
+rspamd_expr_stack_push(struct rspamd_expression *expr,
+ gpointer elt)
+{
+ rspamd_expr_stack_elt_push(expr->expression_stack, elt);
+}
+
+static gpointer
+rspamd_expr_stack_pop(struct rspamd_expression *expr)
+{
+ return rspamd_expr_stack_elt_pop(expr->expression_stack);
+}
+
+static gpointer
+rspamd_expr_stack_peek(struct rspamd_expression *expr)
+{
+ gpointer e;
+ gint idx;
+ GPtrArray *stack = expr->expression_stack;
+
+ if (stack->len == 0) {
+ return NULL;
+ }
+
+ idx = stack->len - 1;
+ e = g_ptr_array_index(stack, idx);
+
+ return e;
+}
+
+/*
+ * Return operation priority
+ */
+static gint RSPAMD_CONST_FUNCTION
+rspamd_expr_logic_priority(enum rspamd_expression_op op);
+static gint
+rspamd_expr_logic_priority(enum rspamd_expression_op op)
+{
+ gint ret = 0;
+
+ switch (op) {
+ case OP_NOT:
+ ret = 7;
+ break;
+ case OP_MULT:
+ case OP_DIVIDE:
+ ret = 6;
+ break;
+ case OP_PLUS:
+ case OP_MINUS:
+ ret = 5;
+ break;
+ case OP_GE:
+ case OP_GT:
+ case OP_LE:
+ case OP_LT:
+ case OP_EQ:
+ case OP_NE:
+ ret = 4;
+ break;
+ case OP_AND:
+ ret = 3;
+ break;
+ case OP_OR:
+ ret = 2;
+ break;
+ case OP_OBRACE:
+ case OP_CBRACE:
+ ret = 1;
+ break;
+ case OP_INVALID:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static guint RSPAMD_CONST_FUNCTION
+rspamd_expr_op_flags(enum rspamd_expression_op op);
+
+static guint
+rspamd_expr_op_flags(enum rspamd_expression_op op)
+{
+ guint ret = 0;
+
+ switch (op) {
+ case OP_NOT:
+ ret |= RSPAMD_EXPRESSION_UNARY | RSPAMD_EXPRESSION_LOGICAL;
+ break;
+ case OP_MULT:
+ ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_DIVIDE:
+ ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_PLUS:
+ ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_MINUS:
+ ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_ARITHMETIC;
+ break;
+ case OP_GE:
+ case OP_GT:
+ case OP_LE:
+ case OP_LT:
+ case OP_EQ:
+ case OP_NE:
+ ret |= RSPAMD_EXPRESSION_BINARY | RSPAMD_EXPRESSION_COMPARISON;
+ break;
+ case OP_AND:
+ case OP_OR:
+ ret |= RSPAMD_EXPRESSION_NARY | RSPAMD_EXPRESSION_LOGICAL;
+ break;
+ case OP_OBRACE:
+ case OP_CBRACE:
+ case OP_INVALID:
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Return FALSE if symbol is not operation symbol (operand)
+ * Return TRUE if symbol is operation symbol
+ */
+static gboolean RSPAMD_CONST_FUNCTION
+rspamd_expr_is_operation_symbol(gchar a);
+static gboolean
+rspamd_expr_is_operation_symbol(gchar a)
+{
+ switch (a) {
+ case '!':
+ case '&':
+ case '|':
+ case '(':
+ case ')':
+ case '>':
+ case '<':
+ case '+':
+ case '*':
+ case '-':
+ case '/':
+ case '=':
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_expr_is_operation(struct rspamd_expression *e,
+ const gchar *p, const gchar *end, rspamd_regexp_t *num_re)
+{
+ if (rspamd_expr_is_operation_symbol(*p)) {
+ if (p + 1 < end) {
+ gchar t = *(p + 1);
+
+ if (t == ':') {
+ /* Special case, treat it as an atom */
+ }
+ else if (*p == '/') {
+ /* Lookahead for division operation to distinguish from regexp */
+ const gchar *track = p + 1;
+
+ /* Skip spaces */
+ while (track < end && g_ascii_isspace(*track)) {
+ track++;
+ }
+
+ /* Check for a number */
+ if (rspamd_regexp_search(num_re,
+ track,
+ end - track,
+ NULL,
+ NULL,
+ FALSE,
+ NULL)) {
+ msg_debug_expression_verbose("found divide operation");
+ return TRUE;
+ }
+
+ msg_debug_expression_verbose("false divide operation");
+ /* Fallback to PARSE_ATOM state */
+ }
+ else if (*p == '-') {
+ /* - is used in composites, so we need to distinguish - from
+ * 1) unary minus of a limit!
+ * 2) -BLAH in composites
+ * Decision is simple: require a space after binary `-` op
+ */
+ if (g_ascii_isspace(t)) {
+ return TRUE;
+ }
+ /* Fallback to PARSE_ATOM state */
+ msg_debug_expression_verbose("false minus operation");
+ }
+ else {
+ /* Generic operation */
+ return TRUE;
+ }
+ }
+ else {
+ /* Last op */
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/* Return character representation of operation */
+static enum rspamd_expression_op
+rspamd_expr_str_to_op(const gchar *a, const gchar *end, const gchar **next)
+{
+ enum rspamd_expression_op op = OP_INVALID;
+
+ g_assert(a < end);
+
+ switch (*a) {
+ case '!':
+ case '&':
+ case '|':
+ case '+':
+ case '*':
+ case '/':
+ case '-':
+ case '(':
+ case ')':
+ case '=': {
+ if (a < end - 1) {
+ if ((a[0] == '&' && a[1] == '&') ||
+ (a[0] == '|' && a[1] == '|') ||
+ (a[0] == '!' && a[1] == '=') ||
+ (a[0] == '=' && a[1] == '=')) {
+ *next = a + 2;
+ }
+ else {
+ *next = a + 1;
+ }
+ }
+ else {
+ *next = end;
+ }
+ /* XXX: not especially effective */
+ switch (*a) {
+ case '!':
+ if (a < end - 1 && a[1] == '=') {
+ op = OP_NE;
+ }
+ else {
+ op = OP_NOT;
+ }
+ break;
+ case '&':
+ op = OP_AND;
+ break;
+ case '*':
+ op = OP_MULT;
+ break;
+ case '|':
+ op = OP_OR;
+ break;
+ case '+':
+ op = OP_PLUS;
+ break;
+ case '/':
+ op = OP_DIVIDE;
+ break;
+ case '-':
+ op = OP_MINUS;
+ break;
+ case '=':
+ op = OP_EQ;
+ break;
+ case ')':
+ op = OP_CBRACE;
+ break;
+ case '(':
+ op = OP_OBRACE;
+ break;
+ default:
+ op = OP_INVALID;
+ break;
+ }
+ break;
+ }
+ case 'O':
+ case 'o':
+ if ((gulong) (end - a) >= sizeof("or") &&
+ g_ascii_strncasecmp(a, "or", sizeof("or") - 1) == 0) {
+ *next = a + sizeof("or") - 1;
+ op = OP_OR;
+ }
+ break;
+ case 'A':
+ case 'a':
+ if ((gulong) (end - a) >= sizeof("and") &&
+ g_ascii_strncasecmp(a, "and", sizeof("and") - 1) == 0) {
+ *next = a + sizeof("and") - 1;
+ op = OP_AND;
+ }
+ break;
+ case 'N':
+ case 'n':
+ if ((gulong) (end - a) >= sizeof("not") &&
+ g_ascii_strncasecmp(a, "not", sizeof("not") - 1) == 0) {
+ *next = a + sizeof("not") - 1;
+ op = OP_NOT;
+ }
+ break;
+ case '>':
+ if (a < end - 1 && a[1] == '=') {
+ *next = a + 2;
+ op = OP_GE;
+ }
+ else {
+ *next = a + 1;
+ op = OP_GT;
+ }
+ break;
+ case '<':
+ if (a < end - 1 && a[1] == '=') {
+ *next = a + 2;
+ op = OP_LE;
+ }
+ else {
+ *next = a + 1;
+ op = OP_LT;
+ }
+ break;
+ default:
+ op = OP_INVALID;
+ break;
+ }
+
+ return op;
+}
+
+static void
+rspamd_expression_destroy(struct rspamd_expression *expr)
+{
+ guint i;
+ struct rspamd_expression_elt *elt;
+
+ if (expr != NULL) {
+
+ if (expr->subr->destroy) {
+ /* Free atoms */
+ for (i = 0; i < expr->expressions->len; i++) {
+ elt = &g_array_index(expr->expressions,
+ struct rspamd_expression_elt, i);
+
+ if (elt->type == ELT_ATOM) {
+ expr->subr->destroy(elt->p.atom);
+ }
+ }
+ }
+
+ if (expr->expressions) {
+ g_array_free(expr->expressions, TRUE);
+ }
+ if (expr->expression_stack) {
+ g_ptr_array_free(expr->expression_stack, TRUE);
+ }
+ if (expr->ast) {
+ g_node_destroy(expr->ast);
+ }
+ if (expr->log_id) {
+ g_free(expr->log_id);
+ }
+
+ g_free(expr);
+ }
+}
+
+static gboolean
+rspamd_ast_add_node(struct rspamd_expression *e,
+ GPtrArray *operands,
+ struct rspamd_expression_elt *op,
+ GError **err)
+{
+
+ GNode *res, *a1, *a2, *test;
+
+ g_assert(op->type == ELT_OP);
+
+ if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
+ /* Unary operator */
+ struct rspamd_expression_elt *test_elt;
+
+ res = g_node_new(op);
+ a1 = rspamd_expr_stack_elt_pop(operands);
+
+ if (a1 == NULL) {
+ g_set_error(err, rspamd_expr_quark(), EINVAL, "no operand to "
+ "unary '%s' operation",
+ rspamd_expr_op_to_str(op->p.op.op));
+ g_node_destroy(res);
+
+ return FALSE;
+ }
+
+ g_node_append(res, a1);
+ test_elt = a1->data;
+
+ if (test_elt->type == ELT_ATOM) {
+ test_elt->p.atom->parent = res;
+ msg_debug_expression("added unary op %s to AST; operand: %*s",
+ rspamd_expr_op_to_str(op->p.op.op),
+ (int) test_elt->p.atom->len, test_elt->p.atom->str);
+ }
+ else {
+ msg_debug_expression("added unary op %s to AST; operand type: %d",
+ rspamd_expr_op_to_str(op->p.op.op),
+ test_elt->type);
+ }
+ }
+ else {
+ struct rspamd_expression_elt *e1, *e2;
+ /* For binary/nary operators we might want to examine chains */
+ a2 = rspamd_expr_stack_elt_pop(operands);
+ a1 = rspamd_expr_stack_elt_pop(operands);
+
+ if (a2 == NULL) {
+ g_set_error(err, rspamd_expr_quark(), EINVAL, "no left operand to "
+ "'%s' operation",
+ rspamd_expr_op_to_str(op->p.op.op));
+ return FALSE;
+ }
+
+ if (a1 == NULL) {
+ g_set_error(err, rspamd_expr_quark(), EINVAL, "no right operand to "
+ "'%s' operation",
+ rspamd_expr_op_to_str(op->p.op.op));
+ return FALSE;
+ }
+
+ /* Nary stuff */
+ if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
+ /*
+ * We convert a set of ops like X + Y + Z to a nary tree like
+ * X Y Z +
+ * for the longest possible prefix of atoms/limits
+ */
+
+ /* First try with a1 */
+ test = a1;
+ e1 = test->data;
+
+ if (e1->type == ELT_OP && e1->p.op.op == op->p.op.op) {
+ /* Add children */
+ g_node_append(test, a2);
+ rspamd_expr_stack_elt_push(operands, a1);
+
+ msg_debug_expression("added nary op %s to AST merged with the first operand",
+ rspamd_expr_op_to_str(op->p.op.op));
+
+ return TRUE;
+ }
+
+ /* Now test a2 */
+ test = a2;
+ e2 = test->data;
+
+ if (e2->type == ELT_OP && e2->p.op.op == op->p.op.op) {
+ /* Add children */
+ g_node_prepend(test, a1);
+ rspamd_expr_stack_elt_push(operands, a2);
+
+ msg_debug_expression("added nary op %s to AST merged with the second operand",
+ rspamd_expr_op_to_str(op->p.op.op));
+
+ return TRUE;
+ }
+ }
+
+ /* No optimizations possible, so create a new level */
+ res = g_node_new(op);
+ g_node_append(res, a1);
+ g_node_append(res, a2);
+
+ e1 = a1->data;
+ e2 = a2->data;
+
+ if (e1->type == ELT_ATOM) {
+ e1->p.atom->parent = res;
+ }
+
+ if (e2->type == ELT_ATOM) {
+ e2->p.atom->parent = res;
+ }
+
+ if (e1->type == ELT_ATOM && e2->type == ELT_ATOM) {
+ msg_debug_expression("added binary op %s to AST; operands: (%*s; %*s)",
+ rspamd_expr_op_to_str(op->p.op.op),
+ (int) e1->p.atom->len, e1->p.atom->str,
+ (int) e2->p.atom->len, e2->p.atom->str);
+ }
+ else {
+ msg_debug_expression("added binary op %s to AST; operands (types): (%d; %d)",
+ rspamd_expr_op_to_str(op->p.op.op),
+ e1->type,
+ e2->type);
+ }
+ }
+
+ /* Push back resulting node to the stack */
+ rspamd_expr_stack_elt_push(operands, res);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_ast_priority_traverse(GNode *node, gpointer d)
+{
+ struct rspamd_expression_elt *elt = node->data, *cur_elt;
+ struct rspamd_expression *expr = d;
+ gint cnt = 0;
+ GNode *cur;
+
+ if (node->children) {
+ cur = node->children;
+ while (cur) {
+ cur_elt = cur->data;
+ cnt += cur_elt->priority;
+ cur = cur->next;
+ }
+ elt->priority = cnt;
+ }
+ else {
+ /* It is atom or limit */
+ g_assert(elt->type != ELT_OP);
+
+ if (elt->type == ELT_LIMIT) {
+ /* Always push limit first */
+ elt->priority = 0;
+ }
+ else {
+ elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY;
+
+ if (expr->subr->priority != NULL) {
+ elt->priority = RSPAMD_EXPRESSION_MAX_PRIORITY -
+ expr->subr->priority(elt->p.atom);
+ }
+ elt->p.atom->hits = 0;
+ }
+ }
+
+ return FALSE;
+}
+
+#define ATOM_PRIORITY(a) ((a)->p.atom->hits / ((a)->p.atom->exec_time.mean > 0 ? (a)->p.atom->exec_time.mean * 10000000 : 1.0))
+
+static gint
+rspamd_ast_priority_cmp(GNode *a, GNode *b)
+{
+ struct rspamd_expression_elt *ea = a->data, *eb = b->data;
+ gdouble w1, w2;
+
+ if (ea->type == ELT_LIMIT) {
+ return 1;
+ }
+ else if (eb->type == ELT_LIMIT) {
+ return -1;
+ }
+
+ /* Special logic for atoms */
+ if (ea->type == ELT_ATOM && eb->type == ELT_ATOM &&
+ ea->priority == eb->priority) {
+ w1 = ATOM_PRIORITY(ea);
+ w2 = ATOM_PRIORITY(eb);
+
+ ea->p.atom->hits = 0;
+
+ return w1 - w2;
+ }
+ else {
+ return ea->priority - eb->priority;
+ }
+}
+
+static gboolean
+rspamd_ast_resort_traverse(GNode *node, gpointer unused)
+{
+ GNode *children, *last;
+ struct rspamd_expression_elt *elt;
+
+ elt = (struct rspamd_expression_elt *) node->data;
+
+ /*
+ * We sort merely logical operations, everything else is dangerous
+ */
+ if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
+
+ if (node->children) {
+
+ children = node->children;
+ last = g_node_last_sibling(children);
+ /* Needed for utlist compatibility */
+ children->prev = last;
+ DL_SORT(node->children, rspamd_ast_priority_cmp);
+ /* Restore GLIB compatibility */
+ children = node->children;
+ children->prev = NULL;
+ }
+ }
+
+ return FALSE;
+}
+
+static struct rspamd_expression_elt *
+rspamd_expr_dup_elt(rspamd_mempool_t *pool, struct rspamd_expression_elt *elt)
+{
+ struct rspamd_expression_elt *n;
+
+ n = rspamd_mempool_alloc(pool, sizeof(*n));
+ memcpy(n, elt, sizeof(*n));
+
+ return n;
+}
+
+gboolean
+rspamd_parse_expression(const gchar *line, gsize len,
+ const struct rspamd_atom_subr *subr, gpointer subr_data,
+ rspamd_mempool_t *pool, GError **err,
+ struct rspamd_expression **target)
+{
+ struct rspamd_expression *e;
+ struct rspamd_expression_elt elt;
+ rspamd_expression_atom_t *atom;
+ rspamd_regexp_t *num_re;
+ enum rspamd_expression_op op, op_stack;
+ const gchar *p, *c, *end;
+ GPtrArray *operand_stack;
+ GNode *tmp;
+
+ enum {
+ PARSE_ATOM = 0,
+ PARSE_OP,
+ PARSE_LIM,
+ SKIP_SPACES
+ } state = PARSE_ATOM;
+
+ g_assert(line != NULL);
+ g_assert(subr != NULL && subr->parse != NULL);
+
+ if (len == 0) {
+ len = strlen(line);
+ }
+
+ memset(&elt, 0, sizeof(elt));
+ num_re = rspamd_regexp_cache_create(NULL,
+ "/^(?:[+-]?([0-9]*[.])?[0-9]+)(?:\\s+|[)]|$)/", NULL, NULL);
+
+ p = line;
+ c = line;
+ end = line + len;
+ e = g_malloc0(sizeof(*e));
+ e->expressions = g_array_new(FALSE, FALSE,
+ sizeof(struct rspamd_expression_elt));
+ operand_stack = g_ptr_array_sized_new(32);
+ e->ast = NULL;
+ e->expression_stack = g_ptr_array_sized_new(32);
+ e->subr = subr;
+ e->evals = 0;
+ e->next_resort = ottery_rand_range(MAX_RESORT_EVALS) + MIN_RESORT_EVALS;
+ e->log_id = g_malloc0(RSPAMD_LOG_ID_LEN + 1);
+ guint64 h = rspamd_cryptobox_fast_hash(line, len, 0xdeadbabe);
+ rspamd_snprintf(e->log_id, RSPAMD_LOG_ID_LEN + 1, "%xL", h);
+ msg_debug_expression("start to parse expression '%*s'", (int) len, line);
+
+ /* Shunting-yard algorithm */
+ while (p < end) {
+ switch (state) {
+ case PARSE_ATOM:
+ if (g_ascii_isspace(*p)) {
+ state = SKIP_SPACES;
+ continue;
+ }
+ else if (rspamd_expr_is_operation(e, p, end, num_re)) {
+ /* Lookahead */
+ state = PARSE_OP;
+ continue;
+ }
+
+ /*
+ * First of all, we check some pre-conditions:
+ * 1) if we have 'and ' or 'or ' or 'not ' strings, they are op
+ * 2) if we have full numeric string, then we check for
+ * the following expression:
+ * ^\d+\s*[><]$
+ * and check the operation on stack
+ */
+ if ((gulong) (end - p) > sizeof("and ") &&
+ (g_ascii_strncasecmp(p, "and ", sizeof("and ") - 1) == 0 ||
+ g_ascii_strncasecmp(p, "not ", sizeof("not ") - 1) == 0)) {
+ state = PARSE_OP;
+ }
+ else if ((gulong) (end - p) > sizeof("or ") &&
+ g_ascii_strncasecmp(p, "or ", sizeof("or ") - 1) == 0) {
+ state = PARSE_OP;
+ }
+ else {
+ /*
+ * If we have any comparison or arithmetic operator in the stack, then try
+ * to parse limit
+ */
+ op = GPOINTER_TO_INT(rspamd_expr_stack_peek(e));
+
+ if (op == OP_MULT || op == OP_MINUS || op == OP_DIVIDE ||
+ op == OP_PLUS || (op >= OP_LT && op <= OP_NE)) {
+ if (rspamd_regexp_search(num_re,
+ p,
+ end - p,
+ NULL,
+ NULL,
+ FALSE,
+ NULL)) {
+ c = p;
+ state = PARSE_LIM;
+ continue;
+ }
+ /* Fallback to atom parsing */
+ }
+
+ /* Try to parse atom */
+ atom = subr->parse(p, end - p, pool, subr_data, err);
+ if (atom == NULL || atom->len == 0) {
+ /* We couldn't parse the atom, so go out */
+ if (err != NULL && *err == NULL) {
+ g_set_error(err,
+ rspamd_expr_quark(),
+ 500,
+ "Cannot parse atom: callback function failed"
+ " to parse '%.*s'",
+ (int) (end - p),
+ p);
+ }
+ goto error_label;
+ }
+
+ if (atom->str == NULL) {
+ atom->str = p;
+ }
+
+ p = p + atom->len;
+
+ /* Push to output */
+ elt.type = ELT_ATOM;
+ elt.p.atom = atom;
+ g_array_append_val(e->expressions, elt);
+ rspamd_expr_stack_elt_push(operand_stack,
+ g_node_new(rspamd_expr_dup_elt(pool, &elt)));
+ msg_debug_expression("found atom: %*s; pushed onto operand stack (%d size)",
+ (int) atom->len, atom->str, operand_stack->len);
+ }
+ break;
+ case PARSE_LIM:
+ if ((g_ascii_isdigit(*p) || *p == '-' || *p == '.') && p < end - 1) {
+ p++;
+ }
+ else {
+ if (p == end - 1 && g_ascii_isdigit(*p)) {
+ p++;
+ }
+
+ if (p - c > 0) {
+ elt.type = ELT_LIMIT;
+ elt.p.lim = strtod(c, NULL);
+ g_array_append_val(e->expressions, elt);
+ rspamd_expr_stack_elt_push(operand_stack,
+ g_node_new(rspamd_expr_dup_elt(pool, &elt)));
+ msg_debug_expression("found limit: %.1f; pushed onto operand stack (%d size)",
+ elt.p.lim, operand_stack->len);
+ c = p;
+ state = SKIP_SPACES;
+ }
+ else {
+ g_set_error(err, rspamd_expr_quark(), 400, "Empty number");
+ goto error_label;
+ }
+ }
+ break;
+ case PARSE_OP:
+ op = rspamd_expr_str_to_op(p, end, &p);
+ if (op == OP_INVALID) {
+ g_set_error(err, rspamd_expr_quark(), 500, "Bad operator %c",
+ *p);
+ goto error_label;
+ }
+ else if (op == OP_OBRACE) {
+ /*
+ * If the token is a left parenthesis, then push it onto
+ * the stack.
+ */
+ rspamd_expr_stack_push(e, GINT_TO_POINTER(op));
+ msg_debug_expression("found obrace, pushed to operators stack (%d size)",
+ e->expression_stack->len);
+ }
+ else if (op == OP_CBRACE) {
+ /*
+ * Until the token at the top of the stack is a left
+ * parenthesis, pop operators off the stack onto the
+ * output queue.
+ *
+ * Pop the left parenthesis from the stack,
+ * but not onto the output queue.
+ *
+ * If the stack runs out without finding a left parenthesis,
+ * then there are mismatched parentheses.
+ */
+ msg_debug_expression("found cbrace, rewind operators stack (%d size)",
+ e->expression_stack->len);
+
+ do {
+ op = GPOINTER_TO_INT(rspamd_expr_stack_pop(e));
+
+ if (op == OP_INVALID) {
+ g_set_error(err, rspamd_expr_quark(), 600,
+ "Braces mismatch");
+ goto error_label;
+ }
+
+ guint op_priority = rspamd_expr_logic_priority(op);
+ msg_debug_expression("found op: %s; priority = %d",
+ rspamd_expr_op_to_str(op), op_priority);
+
+ if (op != OP_OBRACE) {
+ elt.type = ELT_OP;
+ elt.p.op.op = op;
+ elt.p.op.op_flags = rspamd_expr_op_flags(op);
+ elt.p.op.logical_priority = op_priority;
+ g_array_append_val(e->expressions, elt);
+
+ if (!rspamd_ast_add_node(e, operand_stack,
+ rspamd_expr_dup_elt(pool, &elt), err)) {
+ goto error_label;
+ }
+ }
+
+ } while (op != OP_OBRACE);
+ }
+ else {
+ /*
+ * While there is an operator token, o2, at the top of
+ * the operator stack, and either:
+ *
+ * - o1 is left-associative and its precedence is less than
+ * or equal to that of o2, or
+ * - o1 is right associative, and has precedence less than
+ * that of o2,
+ *
+ * then pop o2 off the operator stack, onto the output queue;
+ *
+ * push o1 onto the operator stack.
+ */
+
+ for (;;) {
+ op_stack = GPOINTER_TO_INT(rspamd_expr_stack_pop(e));
+
+ if (op_stack == OP_INVALID) {
+ /* Stack is empty */
+ msg_debug_expression("no operations in operators stack");
+ break;
+ }
+
+ /* We ignore associativity for now */
+ guint op_priority = rspamd_expr_logic_priority(op),
+ stack_op_priority = rspamd_expr_logic_priority(op_stack);
+
+ msg_debug_expression("operators stack %d; operands stack: %d; "
+ "process operation '%s'(%d); pop operation '%s'(%d)",
+ e->expression_stack->len,
+ operand_stack->len,
+ rspamd_expr_op_to_str(op), op_priority,
+ rspamd_expr_op_to_str(op_stack), stack_op_priority);
+
+ if (op_stack != OP_OBRACE &&
+ op_priority < stack_op_priority) {
+ elt.type = ELT_OP;
+ elt.p.op.op = op_stack;
+ elt.p.op.op_flags = rspamd_expr_op_flags(op_stack);
+ elt.p.op.logical_priority = op_priority;
+
+ g_array_append_val(e->expressions, elt);
+
+ if (!rspamd_ast_add_node(e, operand_stack,
+ rspamd_expr_dup_elt(pool, &elt), err)) {
+ goto error_label;
+ }
+ }
+ else {
+ /* Push op_stack back */
+ msg_debug_expression("operators stack %d; operands stack: %d; "
+ "process operation '%s'(%d); push back to stack '%s'(%d)",
+ e->expression_stack->len,
+ operand_stack->len,
+ rspamd_expr_op_to_str(op), op_priority,
+ rspamd_expr_op_to_str(op_stack), stack_op_priority);
+ rspamd_expr_stack_push(e, GINT_TO_POINTER(op_stack));
+ break;
+ }
+ }
+
+ /* Push new operator itself */
+ msg_debug_expression("operators stack %d; operands stack: %d; "
+ "process operation '%s'; push to stack",
+ e->expression_stack->len,
+ operand_stack->len,
+ rspamd_expr_op_to_str(op));
+ rspamd_expr_stack_push(e, GINT_TO_POINTER(op));
+ }
+
+ state = SKIP_SPACES;
+ break;
+ case SKIP_SPACES:
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+ if (rspamd_expr_is_operation(e, p, end, num_re)) {
+ /* Lookahead */
+ state = PARSE_OP;
+ }
+ else {
+ state = PARSE_ATOM;
+ }
+ break;
+ }
+ }
+
+ /* Now we process the stack and push operators to the output */
+ while ((op_stack = GPOINTER_TO_INT(rspamd_expr_stack_pop(e))) != OP_INVALID) {
+ msg_debug_expression("operators stack %d; operands stack: %d; "
+ "rewind stack; op: %s",
+ e->expression_stack->len,
+ operand_stack->len,
+ rspamd_expr_op_to_str(op_stack));
+
+ if (op_stack != OP_OBRACE) {
+ elt.type = ELT_OP;
+ elt.p.op.op = op_stack;
+ elt.p.op.op_flags = rspamd_expr_op_flags(op_stack);
+ elt.p.op.logical_priority = rspamd_expr_logic_priority(op_stack);
+
+ g_array_append_val(e->expressions, elt);
+ if (!rspamd_ast_add_node(e, operand_stack,
+ rspamd_expr_dup_elt(pool, &elt), err)) {
+ goto error_label;
+ }
+ }
+ else {
+ g_set_error(err, rspamd_expr_quark(), 600,
+ "Braces mismatch");
+ goto error_label;
+ }
+ }
+
+ if (operand_stack->len != 1) {
+ g_set_error(err, rspamd_expr_quark(), 601,
+ "Operators mismatch: %d elts in stack", operand_stack->len);
+ goto error_label;
+ }
+
+ e->ast = rspamd_expr_stack_elt_pop(operand_stack);
+ g_ptr_array_free(operand_stack, TRUE);
+
+ /* Set priorities for branches */
+ g_node_traverse(e->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_ast_priority_traverse, e);
+
+ /* Now set less expensive branches to be evaluated first */
+ g_node_traverse(e->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
+ rspamd_ast_resort_traverse, NULL);
+
+ if (target) {
+ *target = e;
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) rspamd_expression_destroy, e);
+ }
+ else {
+ rspamd_expression_destroy(e);
+ }
+
+ return TRUE;
+
+error_label:
+ if (err && *err) {
+ msg_debug_expression("fatal expression parse error: %e", *err);
+ }
+
+ while ((tmp = rspamd_expr_stack_elt_pop(operand_stack)) != NULL) {
+ g_node_destroy(tmp);
+ }
+
+ g_ptr_array_free(operand_stack, TRUE);
+ rspamd_expression_destroy(e);
+
+ return FALSE;
+}
+
+/*
+ * Node optimizer function: skip nodes that are not relevant
+ */
+static gboolean
+rspamd_ast_node_done(struct rspamd_expression_elt *elt, gdouble acc)
+{
+ gboolean ret = FALSE;
+
+ g_assert(elt->type == ELT_OP);
+
+ switch (elt->p.op.op) {
+ case OP_NOT:
+ ret = TRUE;
+ break;
+ case OP_AND:
+ ret = acc == 0;
+ break;
+ case OP_OR:
+ ret = acc != 0;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+
+static gdouble
+rspamd_ast_do_unary_op(struct rspamd_expression_elt *elt, gdouble operand)
+{
+ gdouble ret;
+ g_assert(elt->type == ELT_OP);
+
+ switch (elt->p.op.op) {
+ case OP_NOT:
+ ret = fabs(operand) > DBL_EPSILON ? 0.0 : 1.0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return ret;
+}
+
+static gdouble
+rspamd_ast_do_binary_op(struct rspamd_expression_elt *elt, gdouble op1, gdouble op2)
+{
+ gdouble ret;
+
+ g_assert(elt->type == ELT_OP);
+
+ switch (elt->p.op.op) {
+ case OP_MINUS:
+ ret = op1 - op2;
+ break;
+ case OP_DIVIDE:
+ ret = op1 / op2;
+ break;
+ case OP_GE:
+ ret = op1 >= op2;
+ break;
+ case OP_GT:
+ ret = op1 > op2;
+ break;
+ case OP_LE:
+ ret = op1 <= op2;
+ break;
+ case OP_LT:
+ ret = op1 < op2;
+ break;
+ case OP_EQ:
+ ret = op1 == op2;
+ break;
+ case OP_NE:
+ ret = op1 != op2;
+ break;
+
+ case OP_NOT:
+ case OP_PLUS:
+ case OP_MULT:
+ case OP_AND:
+ case OP_OR:
+ default:
+ g_assert_not_reached();
+ break;
+ }
+
+ return ret;
+}
+
+static gdouble
+rspamd_ast_do_nary_op(struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
+{
+ gdouble ret;
+
+ g_assert(elt->type == ELT_OP);
+
+ if (isnan(acc)) {
+ return val;
+ }
+
+ switch (elt->p.op.op) {
+ case OP_PLUS:
+ ret = acc + val;
+ break;
+ case OP_MULT:
+ ret = acc * val;
+ break;
+ case OP_AND:
+ ret = (fabs(acc) > DBL_EPSILON) && (fabs(val) > DBL_EPSILON);
+ break;
+ case OP_OR:
+ ret = (fabs(acc) > DBL_EPSILON) || (fabs(val) > DBL_EPSILON);
+ break;
+ default:
+ case OP_NOT:
+ case OP_MINUS:
+ case OP_DIVIDE:
+ case OP_GE:
+ case OP_GT:
+ case OP_LE:
+ case OP_LT:
+ case OP_EQ:
+ case OP_NE:
+ g_assert_not_reached();
+ break;
+ }
+
+ return ret;
+}
+
+static gdouble
+rspamd_ast_process_node(struct rspamd_expression *e, GNode *node,
+ struct rspamd_expr_process_data *process_data)
+{
+ struct rspamd_expression_elt *elt;
+ GNode *cld;
+ gdouble acc = NAN;
+ float t1, t2;
+ gdouble val;
+ gboolean calc_ticks = FALSE;
+ __attribute__((unused)) const gchar *op_name = NULL;
+
+ elt = node->data;
+
+ switch (elt->type) {
+ case ELT_ATOM:
+ if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
+ /*
+ * Check once per 256 evaluations approx
+ */
+ calc_ticks = (rspamd_random_uint64_fast() & 0xff) == 0xff;
+ if (calc_ticks) {
+ t1 = rspamd_get_ticks(TRUE);
+ }
+
+ elt->value = process_data->process_closure(process_data->ud, elt->p.atom);
+
+ if (fabs(elt->value) > DBL_EPSILON) {
+ elt->p.atom->hits++;
+
+ if (process_data->trace) {
+ g_ptr_array_add(process_data->trace, elt->p.atom);
+ }
+ }
+
+ if (calc_ticks) {
+ t2 = rspamd_get_ticks(TRUE);
+ rspamd_set_counter_ema(&elt->p.atom->exec_time, (t2 - t1), 0.5f);
+ }
+
+ elt->flags |= RSPAMD_EXPR_FLAG_PROCESSED;
+ }
+
+ acc = elt->value;
+ msg_debug_expression_verbose("atom: elt=%s; acc=%.1f", elt->p.atom->str, acc);
+ break;
+ case ELT_LIMIT:
+
+ acc = elt->p.lim;
+ msg_debug_expression_verbose("limit: lim=%.1f; acc=%.1f;", elt->p.lim, acc);
+ break;
+ case ELT_OP:
+ g_assert(node->children != NULL);
+#ifdef DEBUG_EXPRESSIONS
+ op_name = rspamd_expr_op_to_str(elt->p.op.op);
+#endif
+
+ if (elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
+ msg_debug_expression_verbose("proceed nary operation %s", op_name);
+ /* Proceed all ops in chain */
+ DL_FOREACH(node->children, cld)
+ {
+ val = rspamd_ast_process_node(e, cld, process_data);
+ msg_debug_expression_verbose("before op: op=%s; acc=%.1f; val = %.2f", op_name,
+ acc, val);
+ acc = rspamd_ast_do_nary_op(elt, val, acc);
+ msg_debug_expression_verbose("after op: op=%s; acc=%.1f; val = %.2f", op_name,
+ acc, val);
+
+ /* Check if we need to process further */
+ if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
+ if (rspamd_ast_node_done(elt, acc)) {
+ msg_debug_expression_verbose("optimizer: done");
+ return acc;
+ }
+ }
+ }
+ }
+ else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_BINARY) {
+ GNode *c1 = node->children, *c2;
+
+ c2 = c1->next;
+ g_assert(c2->next == NULL);
+ gdouble val1, val2;
+
+ msg_debug_expression_verbose("proceed binary operation %s",
+ op_name);
+ val1 = rspamd_ast_process_node(e, c1, process_data);
+ val2 = rspamd_ast_process_node(e, c2, process_data);
+
+ msg_debug_expression_verbose("before op: op=%s; op1 = %.1f, op2 = %.1f",
+ op_name, val1, val2);
+ acc = rspamd_ast_do_binary_op(elt, val1, val2);
+ msg_debug_expression_verbose("after op: op=%s; res=%.1f",
+ op_name, acc);
+ }
+ else if (elt->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
+ GNode *c1 = node->children;
+
+ g_assert(c1->next == NULL);
+
+ msg_debug_expression_verbose("proceed unary operation %s",
+ op_name);
+ val = rspamd_ast_process_node(e, c1, process_data);
+
+ msg_debug_expression_verbose("before op: op=%s; op1 = %.1f",
+ op_name, val);
+ acc = rspamd_ast_do_unary_op(elt, val);
+ msg_debug_expression_verbose("after op: op=%s; res=%.1f",
+ op_name, acc);
+ }
+ break;
+ }
+
+ return acc;
+}
+
+static gboolean
+rspamd_ast_cleanup_traverse(GNode *n, gpointer d)
+{
+ struct rspamd_expression_elt *elt = n->data;
+
+ elt->value = 0;
+ elt->flags = 0;
+
+ return FALSE;
+}
+
+gdouble
+rspamd_process_expression_closure(struct rspamd_expression *expr,
+ rspamd_expression_process_cb cb,
+ gint flags,
+ gpointer runtime_ud,
+ GPtrArray **track)
+{
+ struct rspamd_expr_process_data pd;
+ gdouble ret = 0;
+
+ g_assert(expr != NULL);
+ /* Ensure that stack is empty at this point */
+ g_assert(expr->expression_stack->len == 0);
+
+ expr->evals++;
+
+ memset(&pd, 0, sizeof(pd));
+ pd.process_closure = cb;
+ pd.flags = flags;
+ pd.ud = runtime_ud;
+
+ if (track) {
+ pd.trace = g_ptr_array_sized_new(32);
+ *track = pd.trace;
+ }
+
+ ret = rspamd_ast_process_node(expr, expr->ast, &pd);
+
+ /* Cleanup */
+ g_node_traverse(expr->ast, G_IN_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_ast_cleanup_traverse, NULL);
+
+ /* Check if we need to resort */
+ if (expr->evals % expr->next_resort == 0) {
+ expr->next_resort = ottery_rand_range(MAX_RESORT_EVALS) +
+ MIN_RESORT_EVALS;
+ /* Set priorities for branches */
+ g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_ast_priority_traverse, expr);
+
+ /* Now set less expensive branches to be evaluated first */
+ g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_NON_LEAVES, -1,
+ rspamd_ast_resort_traverse, NULL);
+ }
+
+ return ret;
+}
+
+gdouble
+rspamd_process_expression_track(struct rspamd_expression *expr,
+ gint flags,
+ gpointer runtime_ud,
+ GPtrArray **track)
+{
+ return rspamd_process_expression_closure(expr,
+ expr->subr->process, flags, runtime_ud, track);
+}
+
+gdouble
+rspamd_process_expression(struct rspamd_expression *expr,
+ gint flags,
+ gpointer runtime_ud)
+{
+ return rspamd_process_expression_closure(expr,
+ expr->subr->process, flags, runtime_ud, NULL);
+}
+
+static gboolean
+rspamd_ast_string_traverse(GNode *n, gpointer d)
+{
+ GString *res = d;
+ gint cnt;
+ GNode *cur;
+ struct rspamd_expression_elt *elt = n->data;
+ const char *op_str = NULL;
+
+ if (elt->type == ELT_ATOM) {
+ rspamd_printf_gstring(res, "(%*s)",
+ (int) elt->p.atom->len, elt->p.atom->str);
+ }
+ else if (elt->type == ELT_LIMIT) {
+ if (elt->p.lim == (double) (gint64) elt->p.lim) {
+ rspamd_printf_gstring(res, "%L", (gint64) elt->p.lim);
+ }
+ else {
+ rspamd_printf_gstring(res, "%f", elt->p.lim);
+ }
+ }
+ else {
+ op_str = rspamd_expr_op_to_str(elt->p.op.op);
+ g_string_append(res, op_str);
+
+ if (n->children) {
+ LL_COUNT(n->children, cur, cnt);
+
+ if (cnt > 2) {
+ /* Print n-ary of the operator */
+ g_string_append_printf(res, "(%d)", cnt);
+ }
+ }
+ }
+
+ g_string_append_c(res, ' ');
+
+ return FALSE;
+}
+
+GString *
+rspamd_expression_tostring(struct rspamd_expression *expr)
+{
+ GString *res;
+
+ g_assert(expr != NULL);
+
+ res = g_string_new(NULL);
+ g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_ast_string_traverse, res);
+
+ /* Last space */
+ if (res->len > 0) {
+ g_string_erase(res, res->len - 1, 1);
+ }
+
+ return res;
+}
+
+struct atom_foreach_cbdata {
+ rspamd_expression_atom_foreach_cb cb;
+ gpointer cbdata;
+};
+
+static gboolean
+rspamd_ast_atom_traverse(GNode *n, gpointer d)
+{
+ struct atom_foreach_cbdata *data = d;
+ struct rspamd_expression_elt *elt = n->data;
+ rspamd_ftok_t tok;
+
+ if (elt->type == ELT_ATOM) {
+ tok.begin = elt->p.atom->str;
+ tok.len = elt->p.atom->len;
+
+ data->cb(&tok, data->cbdata);
+ }
+
+ return FALSE;
+}
+
+void rspamd_expression_atom_foreach(struct rspamd_expression *expr,
+ rspamd_expression_atom_foreach_cb cb, gpointer cbdata)
+{
+ struct atom_foreach_cbdata data;
+
+ g_assert(expr != NULL);
+
+ data.cb = cb;
+ data.cbdata = cbdata;
+ g_node_traverse(expr->ast, G_POST_ORDER, G_TRAVERSE_ALL, -1,
+ rspamd_ast_atom_traverse, &data);
+}
+
+gboolean
+rspamd_expression_node_is_op(GNode *node, enum rspamd_expression_op op)
+{
+ struct rspamd_expression_elt *elt;
+
+ g_assert(node != NULL);
+
+ elt = node->data;
+
+ if (elt->type == ELT_OP && elt->p.op.op == op) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
diff --git a/src/libutil/expression.h b/src/libutil/expression.h
new file mode 100644
index 0000000..ea4e102
--- /dev/null
+++ b/src/libutil/expression.h
@@ -0,0 +1,173 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_EXPRESSION_H_
+#define SRC_LIBUTIL_EXPRESSION_H_
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include "util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RSPAMD_EXPRESSION_MAX_PRIORITY 1024
+
+#define RSPAMD_EXPRESSION_FLAG_NOOPT (1 << 0)
+
+enum rspamd_expression_op {
+ OP_INVALID = 0,
+ OP_PLUS, /* + */
+ OP_MULT, /* * */
+ OP_MINUS, /* - */
+ OP_DIVIDE, /* / */
+ OP_OR, /* || or | */
+ OP_AND, /* && or & */
+ OP_NOT, /* ! */
+ OP_LT, /* < */
+ OP_GT, /* > */
+ OP_LE, /* <= */
+ OP_GE, /* >= */
+ OP_EQ, /* == */
+ OP_NE, /* != */
+ OP_OBRACE, /* ( */
+ OP_CBRACE /* ) */
+};
+
+typedef struct rspamd_expression_atom_s {
+ /* Parent node */
+ GNode *parent;
+ /* Opaque userdata */
+ gpointer data;
+ /* String representation of atom */
+ const gchar *str;
+ /* Length of the string representation of atom */
+ guint len;
+ /* Relative priority */
+ gint priority;
+ guint hits;
+ struct rspamd_counter_data exec_time;
+} rspamd_expression_atom_t;
+
+typedef gdouble (*rspamd_expression_process_cb)(gpointer runtime_data,
+ rspamd_expression_atom_t *atom);
+
+struct rspamd_atom_subr {
+ /* Parses atom from string and returns atom structure */
+ rspamd_expression_atom_t *(*parse)(const gchar *line, gsize len,
+ rspamd_mempool_t *pool, gpointer ud, GError **err);
+
+ /* Process atom via the opaque pointer (e.g. struct rspamd_task *) */
+ rspamd_expression_process_cb process;
+
+ /* Calculates the relative priority of the expression */
+ gint (*priority)(rspamd_expression_atom_t *atom);
+
+ void (*destroy)(rspamd_expression_atom_t *atom);
+};
+
+/* Opaque structure */
+struct rspamd_expression;
+
+/**
+ * Parse symbolic expression and create the expression using the specified subroutines for atoms processing
+ * @param line line to parse
+ * @param len length of the line (if 0 then line should be NULL terminated)
+ * @param subr subroutines for atoms parsing
+ * @param subr_data opaque dat pointer
+ * @param pool pool to use for memory allocations
+ * @param err error pointer
+ * @param target the target expression
+ * @return TRUE if an expression have been parsed
+ */
+gboolean rspamd_parse_expression(const gchar *line, gsize len,
+ const struct rspamd_atom_subr *subr, gpointer subr_data,
+ rspamd_mempool_t *pool, GError **err,
+ struct rspamd_expression **target);
+
+/**
+ * Process the expression and return its value using atom 'process' functions with the specified data pointer
+ * @param expr expression to process
+ * @param data opaque data pointer for all the atoms
+ * @return the value of expression
+ */
+gdouble rspamd_process_expression(struct rspamd_expression *expr,
+ gint flags,
+ gpointer runtime_ud);
+
+/**
+ * Process the expression and return its value using atom 'process' functions with the specified data pointer.
+ * This function also accepts `track` argument where it writes matched atoms (those whose value is more than 0)
+ * @param expr expression to process
+ * @param data opaque data pointer for all the atoms
+ * @param track pointer array to atoms tracking
+ * @return the value of expression
+ */
+gdouble rspamd_process_expression_track(struct rspamd_expression *expr,
+ gint flags,
+ gpointer runtime_ud,
+ GPtrArray **track);
+
+/**
+ * Process the expression with the custom processor
+ * @param expr
+ * @param cb
+ * @param process_data
+ * @return
+ */
+gdouble rspamd_process_expression_closure(struct rspamd_expression *expr,
+ rspamd_expression_process_cb cb,
+ gint flags,
+ gpointer runtime_ud,
+ GPtrArray **track);
+
+/**
+ * Shows string representation of an expression
+ * @param expr expression to show
+ * @return freshly allocated string with expression
+ */
+GString *rspamd_expression_tostring(struct rspamd_expression *expr);
+
+/**
+ * Callback that is called on @see rspamd_expression_atom_foreach, atom is ephemeral
+ * and should not be modified within callback
+ */
+typedef void (*rspamd_expression_atom_foreach_cb)(const rspamd_ftok_t *atom,
+ gpointer ud);
+
+/**
+ * Traverse over all atoms in the expression
+ * @param expr expression
+ * @param cb callback to be called
+ * @param ud opaque data passed to `cb`
+ */
+void rspamd_expression_atom_foreach(struct rspamd_expression *expr,
+ rspamd_expression_atom_foreach_cb cb, gpointer cbdata);
+
+/**
+ * Checks if a specified node in AST is the specified operation
+ * @param node AST node packed in GNode container
+ * @param op operation to check
+ * @return TRUE if node is operation node and is exactly the specified option
+ */
+gboolean rspamd_expression_node_is_op(GNode *node, enum rspamd_expression_op op);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_EXPRESSION_H_ */
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c
new file mode 100644
index 0000000..a921f32
--- /dev/null
+++ b/src/libutil/fstring.c
@@ -0,0 +1,482 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "fstring.h"
+#include "str_util.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include "contrib/mumhash/mum.h"
+
+
+#ifdef WITH_JEMALLOC
+#include <jemalloc/jemalloc.h>
+#if (JEMALLOC_VERSION_MAJOR == 3 && JEMALLOC_VERSION_MINOR >= 6) || (JEMALLOC_VERSION_MAJOR > 3)
+#define HAVE_MALLOC_SIZE 1
+#define sys_alloc_size(sz) nallocx(sz, 0)
+#endif
+#elif defined(__APPLE__)
+#include <malloc/malloc.h>
+#define HAVE_MALLOC_SIZE 1
+#define sys_alloc_size(sz) malloc_good_size(sz)
+#endif
+
+static const gsize default_initial_size = 16;
+
+#define fstravail(s) ((s)->allocated - (s)->len)
+
+rspamd_fstring_t *
+rspamd_fstring_new(void)
+{
+ rspamd_fstring_t *s;
+
+ if ((s = malloc(default_initial_size + sizeof(*s))) == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, default_initial_size + sizeof(*s));
+
+ return NULL;
+ }
+
+ s->len = 0;
+ s->allocated = default_initial_size;
+
+ return s;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_sized_new(gsize initial_size)
+{
+ rspamd_fstring_t *s;
+ gsize real_size = MAX(default_initial_size, initial_size);
+
+ if ((s = malloc(real_size + sizeof(*s))) == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, real_size + sizeof(*s));
+
+ return NULL;
+ }
+ s->len = 0;
+ s->allocated = real_size;
+
+ return s;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_new_init(const gchar *init, gsize len)
+{
+ rspamd_fstring_t *s;
+ gsize real_size = MAX(default_initial_size, len);
+
+ if ((s = malloc(real_size + sizeof(*s))) == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, real_size + sizeof(*s));
+
+ abort();
+ }
+
+ s->len = len;
+ s->allocated = real_size;
+ memcpy(s->str, init, len);
+
+ return s;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_assign(rspamd_fstring_t *str, const gchar *init, gsize len)
+{
+ gsize avail;
+
+ if (str == NULL) {
+ return rspamd_fstring_new_init(init, len);
+ }
+
+ avail = fstravail(str);
+
+ if (avail < len) {
+ str = rspamd_fstring_grow(str, len);
+ }
+
+ if (len > 0) {
+ memcpy(str->str, init, len);
+ }
+
+ str->len = len;
+
+ return str;
+}
+
+void rspamd_fstring_free(rspamd_fstring_t *str)
+{
+ free(str);
+}
+
+inline gsize
+rspamd_fstring_suggest_size(gsize len, gsize allocated, gsize needed_len)
+{
+ gsize newlen, optlen = 0;
+
+ if (allocated < 4096) {
+ newlen = MAX(len + needed_len, allocated * 2);
+ }
+ else {
+ newlen = MAX(len + needed_len, 1 + allocated * 3 / 2);
+ }
+
+#ifdef HAVE_MALLOC_SIZE
+ optlen = sys_alloc_size(newlen + sizeof(rspamd_fstring_t));
+#endif
+
+ return MAX(newlen, optlen);
+}
+
+rspamd_fstring_t *
+rspamd_fstring_grow(rspamd_fstring_t *str, gsize needed_len)
+{
+ gsize newlen;
+ gpointer nptr;
+
+ newlen = rspamd_fstring_suggest_size(str->len, str->allocated, needed_len);
+
+ nptr = realloc(str, newlen + sizeof(*str));
+
+ if (nptr == NULL) {
+ /* Avoid memory leak */
+ free(str);
+ g_error("%s: failed to re-allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, newlen + sizeof(*str));
+ abort();
+ }
+
+ str = nptr;
+ str->allocated = newlen;
+
+ return str;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_append(rspamd_fstring_t *str, const char *in, gsize len)
+{
+ if (str == NULL) {
+ str = rspamd_fstring_new_init(in, len);
+ }
+ else {
+ gsize avail = fstravail(str);
+
+ if (avail < len) {
+ str = rspamd_fstring_grow(str, len);
+ }
+
+ memcpy(str->str + str->len, in, len);
+ str->len += len;
+ }
+
+ return str;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_append_chars(rspamd_fstring_t *str,
+ char c, gsize len)
+{
+ if (str == NULL) {
+ str = rspamd_fstring_sized_new(len);
+
+ memset(str->str + str->len, c, len);
+ str->len += len;
+ }
+ else {
+ gsize avail = fstravail(str);
+
+ if (avail < len) {
+ str = rspamd_fstring_grow(str, len);
+ }
+
+ memset(str->str + str->len, c, len);
+ str->len += len;
+ }
+
+ return str;
+}
+
+void rspamd_fstring_erase(rspamd_fstring_t *str, gsize pos, gsize len)
+{
+ if (pos < str->len) {
+ if (pos + len > str->len) {
+ len = str->len - pos;
+ }
+
+ if (len == str->len - pos) {
+ /* Fast path */
+ str->len = pos;
+ }
+ else {
+ memmove(str->str + pos, str->str + pos + len, str->len - pos);
+ str->len -= pos;
+ }
+ }
+ else {
+ /* Do nothing */
+ }
+}
+
+/* Compat code */
+static guint64
+fstrhash_c(guint64 c, guint64 hval)
+{
+ return mum_hash_step(hval, c);
+}
+
+
+/*
+ * Return hash value for a string
+ */
+guint32
+rspamd_fstrhash_lc(const rspamd_ftok_t *str, gboolean is_utf)
+{
+ gsize i;
+ guint64 hval;
+ const gchar *p, *end = NULL;
+ gunichar uc;
+
+ if (str == NULL) {
+ return 0;
+ }
+
+ p = str->begin;
+ hval = str->len;
+ end = p + str->len;
+
+ if (is_utf) {
+ if (rspamd_fast_utf8_validate(p, str->len) != 0) {
+ return rspamd_fstrhash_lc(str, FALSE);
+ }
+ while (p < end) {
+ uc = g_unichar_tolower(g_utf8_get_char(p));
+ hval = fstrhash_c(uc, hval);
+ p = g_utf8_next_char(p);
+ }
+ }
+ else {
+ gsize large_steps = str->len / sizeof(guint64);
+ for (i = 0; i < large_steps; i++, p += sizeof(guint64)) {
+ /* Copy to the uint64 lowercasing each byte */
+ union {
+ char c[sizeof(guint64)];
+ guint64 iu64;
+ } t;
+ for (int j = 0; j < sizeof(guint64); j++) {
+ t.c[j] = g_ascii_tolower(p[j]);
+ }
+ hval = fstrhash_c(t.iu64, hval);
+ }
+
+ gsize remain = str->len % sizeof(guint64);
+ for (i = 0; i < remain; i++, p++) {
+ hval = fstrhash_c(g_ascii_tolower(*p), hval);
+ }
+ }
+
+ return hval;
+}
+
+gboolean
+rspamd_fstring_equal(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return (memcmp(s1->str, s2->str, s1->len) == 0);
+ }
+
+ return FALSE;
+}
+
+gint rspamd_fstring_casecmp(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ gint ret = 0;
+
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ ret = rspamd_lc_cmp(s1->str, s2->str, s1->len);
+ }
+ else {
+ ret = s1->len - s2->len;
+ }
+
+ return ret;
+}
+
+gint rspamd_fstring_cmp(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return memcmp(s1->str, s2->str, s1->len);
+ }
+
+ return s1->len - s2->len;
+}
+
+gint rspamd_ftok_casecmp(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2)
+{
+ gint ret = 0;
+
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ ret = rspamd_lc_cmp(s1->begin, s2->begin, s1->len);
+ }
+ else {
+ ret = s1->len - s2->len;
+ }
+
+ return ret;
+}
+
+gint rspamd_ftok_cmp(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return memcmp(s1->begin, s2->begin, s1->len);
+ }
+
+ return s1->len - s2->len;
+}
+
+gboolean
+rspamd_ftok_starts_with(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len >= s2->len) {
+ return !!(memcmp(s1->begin, s2->begin, s2->len) == 0);
+ }
+
+ return FALSE;
+}
+
+void rspamd_fstring_mapped_ftok_free(gpointer p)
+{
+ rspamd_ftok_t *tok = p;
+ rspamd_fstring_t *storage;
+
+ storage = (rspamd_fstring_t *) (tok->begin - 2 * sizeof(gsize));
+ rspamd_fstring_free(storage);
+ g_free(tok);
+}
+
+rspamd_ftok_t *
+rspamd_ftok_map(const rspamd_fstring_t *s)
+{
+ rspamd_ftok_t *tok;
+
+ g_assert(s != NULL);
+
+ tok = g_malloc(sizeof(*tok));
+ tok->begin = s->str;
+ tok->len = s->len;
+
+ return tok;
+}
+
+char *
+rspamd_fstring_cstr(const rspamd_fstring_t *s)
+{
+ char *result;
+
+ if (s == NULL) {
+ return NULL;
+ }
+
+ result = g_malloc(s->len + 1);
+ memcpy(result, s->str, s->len);
+ result[s->len] = '\0';
+
+ return result;
+}
+
+char *
+rspamd_ftok_cstr(const rspamd_ftok_t *s)
+{
+ char *result;
+
+ if (s == NULL) {
+ return NULL;
+ }
+
+ result = g_malloc(s->len + 1);
+ memcpy(result, s->begin, s->len);
+ result[s->len] = '\0';
+
+ return result;
+}
+
+gboolean
+rspamd_ftok_cstr_equal(const rspamd_ftok_t *s, const gchar *pat,
+ gboolean icase)
+{
+ gsize slen;
+ rspamd_ftok_t srch;
+
+ g_assert(s != NULL);
+ g_assert(pat != NULL);
+
+ slen = strlen(pat);
+ srch.begin = pat;
+ srch.len = slen;
+
+ if (icase) {
+ return (rspamd_ftok_casecmp(s, &srch) == 0);
+ }
+
+ return (rspamd_ftok_cmp(s, &srch) == 0);
+}
+
+gchar *
+rspamd_ftokdup(const rspamd_ftok_t *src)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = g_malloc(src->len + 1);
+ memcpy(newstr, src->begin, src->len);
+ newstr[src->len] = '\0';
+
+ return newstr;
+}
+
+gchar *
+rspamd_fstringdup(const rspamd_fstring_t *src)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = g_malloc(src->len + 1);
+ memcpy(newstr, src->str, src->len);
+ newstr[src->len] = '\0';
+
+ return newstr;
+}
diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h
new file mode 100644
index 0000000..9eacf21
--- /dev/null
+++ b/src/libutil/fstring.h
@@ -0,0 +1,231 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef FSTRING_H
+#define FSTRING_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include <unicode/uchar.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Fixed strings library
+ * These strings are NOT null-terminated for speed
+ */
+
+typedef struct f_str_s {
+ gsize len;
+ gsize allocated;
+ gchar str[];
+} rspamd_fstring_t;
+
+#define RSPAMD_FSTRING_DATA(s) ((s)->str)
+#define RSPAMD_FSTRING_LEN(s) ((s)->len)
+#define RSPAMD_FSTRING_LIT(lit) rspamd_fstring_new_init((lit), sizeof(lit) - 1)
+
+typedef struct f_str_tok {
+ gsize len;
+ const gchar *begin;
+} rspamd_ftok_t;
+
+typedef struct f_str_unicode_tok {
+ gsize len; /* in UChar32 */
+ const UChar32 *begin;
+} rspamd_ftok_unicode_t;
+
+/**
+ * Create new fixed length string
+ */
+rspamd_fstring_t *rspamd_fstring_new(void)
+ G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Create new fixed length string with preallocated size
+ */
+rspamd_fstring_t *rspamd_fstring_sized_new(gsize initial_size)
+ G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Create new fixed length string and initialize it with the initial data
+ */
+rspamd_fstring_t *rspamd_fstring_new_init(const gchar *init, gsize len)
+ G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Assign new value to fixed string
+ */
+rspamd_fstring_t *rspamd_fstring_assign(rspamd_fstring_t *str,
+ const gchar *init, gsize len) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Free fixed length string
+ */
+void rspamd_fstring_free(rspamd_fstring_t *str);
+
+/**
+ * Append data to a fixed length string
+ */
+rspamd_fstring_t *rspamd_fstring_append(rspamd_fstring_t *str,
+ const char *in, gsize len) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Append `len` repeated chars `c` to string `str`
+ */
+rspamd_fstring_t *rspamd_fstring_append_chars(rspamd_fstring_t *str,
+ char c, gsize len) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Erase `len` characters at position `pos`
+ */
+void rspamd_fstring_erase(rspamd_fstring_t *str, gsize pos, gsize len);
+
+#define rspamd_fstring_clear(s) rspamd_fstring_erase(s, 0, s->len)
+
+/**
+ * Convert fixed string to a zero terminated string. This string must be
+ * freed by a caller
+ */
+char *rspamd_fstring_cstr(const rspamd_fstring_t *str)
+ G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Convert fixed string usign ftok_t to a zero terminated string. This string must be
+ * freed by a caller
+ */
+char *rspamd_ftok_cstr(const rspamd_ftok_t *str)
+ G_GNUC_WARN_UNUSED_RESULT;
+
+/*
+ * Return fast hash value for fixed string converted to lowercase
+ */
+guint32 rspamd_fstrhash_lc(const rspamd_ftok_t *str, gboolean is_utf);
+
+/**
+ * Return true if two strings are equal
+ */
+gboolean rspamd_fstring_equal(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2);
+
+/**
+ * Compare two fixed strings ignoring case
+ */
+gint rspamd_fstring_casecmp(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2);
+
+/**
+ * Compare two fixed strings
+ */
+gint rspamd_fstring_cmp(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2);
+
+/**
+ * Compare two fixed tokens ignoring case
+ */
+gint rspamd_ftok_casecmp(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2);
+
+/**
+ * Compare two fixed tokens
+ */
+gint rspamd_ftok_cmp(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2);
+
+/**
+ * Returns true if `s1` starts with `s2`
+ * @param s1
+ * @param s2
+ * @return
+ */
+gboolean rspamd_ftok_starts_with(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2);
+
+/**
+ * Return TRUE if ftok is equal to specified C string
+ */
+gboolean rspamd_ftok_cstr_equal(const rspamd_ftok_t *s,
+ const gchar *pat, gboolean icase);
+
+/**
+ * Free fstring_t that is mapped to ftok_t
+ *
+ * | len | allocated | <data> -- fstring_t
+ * <begin> -- tok
+ *
+ * tok is expected to be allocated with g_malloc
+ */
+void rspamd_fstring_mapped_ftok_free(gpointer p);
+
+/**
+ * Map token to a specified string. Token must be freed using g_free
+ */
+rspamd_ftok_t *rspamd_ftok_map(const rspamd_fstring_t *s);
+
+/**
+ * Suggest suitable size to grow fstring
+ * @param len
+ * @param allocated
+ * @param needed_len
+ * @return
+ */
+gsize rspamd_fstring_suggest_size(gsize len, gsize allocated, gsize needed_len);
+
+/**
+ * Grow the specified fixed string
+ * @param str
+ * @param needed_len
+ * @return
+ */
+rspamd_fstring_t *rspamd_fstring_grow(rspamd_fstring_t *str,
+ gsize needed_len) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Copies ftok to zero terminated string (must be freed using g_free)
+ * @param src
+ * @return
+ */
+gchar *rspamd_ftokdup(const rspamd_ftok_t *src) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Copies fstring to zero terminated string (must be freed using g_free)
+ * @param src
+ * @return
+ */
+gchar *rspamd_fstringdup(const rspamd_fstring_t *src) G_GNUC_WARN_UNUSED_RESULT;
+
+#define RSPAMD_FTOK_ASSIGN(t, lit) \
+ do { \
+ (t)->begin = (lit); \
+ (t)->len = sizeof(lit) - 1; \
+ } while (0)
+#define RSPAMD_FTOK_FROM_STR(t, str) \
+ do { \
+ if (G_LIKELY(str)) { \
+ (t)->begin = (const char *) (str); \
+ (t)->len = strlen(str); \
+ } \
+ else { \
+ (t)->begin = NULL; \
+ (t)->len = 0; \
+ } \
+ } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/libutil/hash.c b/src/libutil/hash.c
new file mode 100644
index 0000000..d2af88c
--- /dev/null
+++ b/src/libutil/hash.c
@@ -0,0 +1,716 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "hash.h"
+#include "util.h"
+#include "khash.h"
+
+/**
+ * LRU hashing
+ */
+
+static const guint log_base = 10;
+static const guint eviction_candidates = 16;
+static const gdouble lfu_base_value = 5.0;
+
+struct rspamd_lru_volatile_element_s;
+
+struct rspamd_lru_hash_s {
+ guint maxsize;
+ guint eviction_min_prio;
+ guint eviction_used;
+ struct rspamd_lru_element_s **eviction_pool;
+
+ GDestroyNotify value_destroy;
+ GDestroyNotify key_destroy;
+ GHashFunc hfunc;
+ GEqualFunc eqfunc;
+
+ khint_t n_buckets, size, n_occupied, upper_bound;
+ khint32_t *flags;
+ gpointer *keys;
+ struct rspamd_lru_volatile_element_s *vals;
+};
+
+enum rspamd_lru_element_flags {
+ RSPAMD_LRU_ELEMENT_NORMAL = 0,
+ RSPAMD_LRU_ELEMENT_VOLATILE = (1 << 0),
+ RSPAMD_LRU_ELEMENT_IMMORTAL = (1 << 1),
+};
+
+struct rspamd_lru_element_s {
+ guint16 last;
+ guint8 lg_usages;
+ guint8 eviction_pos;
+ guint8 flags;
+ gpointer data;
+};
+
+struct rspamd_lru_volatile_element_s {
+ struct rspamd_lru_element_s e;
+ time_t creation_time;
+ time_t ttl;
+};
+typedef struct rspamd_lru_volatile_element_s rspamd_lru_vol_element_t;
+
+#define TIME_TO_TS(t) ((guint16) (((t) / 60) & 0xFFFFU))
+
+static rspamd_lru_vol_element_t *
+rspamd_lru_hash_get(const rspamd_lru_hash_t *h, gconstpointer key)
+{
+ if (h->n_buckets) {
+ khint_t k, i, last, mask, step = 0;
+ mask = h->n_buckets - 1;
+ k = h->hfunc(key);
+ i = k & mask;
+ last = i;
+
+ while (!__ac_isempty(h->flags, i) &&
+ (__ac_isdel(h->flags, i) || !h->eqfunc(h->keys[i], key))) {
+ i = (i + (++step)) & mask;
+ if (i == last) {
+ return NULL;
+ }
+ }
+
+ return __ac_iseither(h->flags, i) ? NULL : &h->vals[i];
+ }
+
+ return NULL;
+}
+
+static int
+rspamd_lru_hash_resize(rspamd_lru_hash_t *h,
+ khint_t new_n_buckets)
+{
+ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
+ khint32_t *new_flags = 0;
+ khint_t j = 1;
+
+ kroundup32(new_n_buckets);
+ if (new_n_buckets < 4) {
+ new_n_buckets = 4;
+ }
+
+ if (h->size >= (khint_t) (new_n_buckets * __ac_HASH_UPPER + 0.5)) {
+ j = 0;
+ /* requested size is too small */
+ }
+ else {
+ /* hash table size to be changed (shrink or expand); rehash */
+ new_flags = (khint32_t *) g_malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));
+
+ if (!new_flags) {
+ return -1;
+ }
+
+ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t));
+ if (h->n_buckets < new_n_buckets) {
+ /* expand */
+ gpointer *new_keys = (gpointer *) g_realloc((void *) h->keys,
+ new_n_buckets * sizeof(gpointer));
+
+ if (!new_keys) {
+ g_free(new_flags);
+ return -1;
+ }
+
+ h->keys = new_keys;
+ rspamd_lru_vol_element_t *new_vals =
+ (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
+ new_n_buckets * sizeof(rspamd_lru_vol_element_t));
+ if (!new_vals) {
+ g_free(new_flags);
+ return -1;
+ }
+
+ h->vals = new_vals;
+ }
+ /* Shrink */
+ }
+
+ if (j) {
+ /* rehashing is needed */
+ h->eviction_used = 0;
+
+ for (j = 0; j != h->n_buckets; ++j) {
+ if (__ac_iseither(h->flags, j) == 0) {
+ gpointer key = h->keys[j];
+ rspamd_lru_vol_element_t val;
+ khint_t new_mask;
+ new_mask = new_n_buckets - 1;
+ val = h->vals[j];
+ val.e.eviction_pos = (guint8) -1;
+ __ac_set_isdel_true(h->flags, j);
+
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */
+ khint_t k, i, step = 0;
+ k = h->hfunc(key);
+ i = k & new_mask;
+
+ while (!__ac_isempty(new_flags, i)) {
+ i = (i + (++step)) & new_mask;
+ }
+
+ __ac_set_isempty_false(new_flags, i);
+
+ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) {
+ /* kick out the existing element */
+ {
+ gpointer tmp = h->keys[i];
+ h->keys[i] = key;
+ key = tmp;
+ }
+ {
+ rspamd_lru_vol_element_t tmp = h->vals[i];
+ h->vals[i] = val;
+ val = tmp;
+ val.e.eviction_pos = (guint8) -1;
+ }
+ __ac_set_isdel_true(h->flags, i);
+ /* mark it as deleted in the old hash table */
+ }
+ else { /* write the element and jump out of the loop */
+ h->keys[i] = key;
+ h->vals[i] = val;
+ break;
+ }
+ }
+ }
+ }
+
+ if (h->n_buckets > new_n_buckets) {
+ /* shrink the hash table */
+ h->keys = (gpointer *) g_realloc((void *) h->keys,
+ new_n_buckets * sizeof(gpointer));
+ h->vals = (rspamd_lru_vol_element_t *) g_realloc((void *) h->vals,
+ new_n_buckets * sizeof(rspamd_lru_vol_element_t));
+ }
+
+ g_free(h->flags); /* free the working space */
+ h->flags = new_flags;
+ h->n_buckets = new_n_buckets;
+ h->n_occupied = h->size;
+ h->upper_bound = (khint_t) (h->n_buckets * __ac_HASH_UPPER + 0.5);
+ }
+
+ return 0;
+}
+
+static rspamd_lru_vol_element_t *
+rspamd_lru_hash_put(rspamd_lru_hash_t *h, gpointer key, int *ret)
+{
+ khint_t x;
+
+ if (h->n_occupied >= h->upper_bound) {
+ /* update the hash table */
+ if (h->n_buckets > (h->size << 1)) {
+ if (rspamd_lru_hash_resize(h, h->n_buckets - 1) < 0) {
+ /* clear "deleted" elements */
+ *ret = -1;
+ return NULL;
+ }
+ }
+ else if (rspamd_lru_hash_resize(h, h->n_buckets + 1) < 0) {
+ /* expand the hash table */
+ *ret = -1;
+ return NULL;
+ }
+ }
+
+ khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0;
+ x = site = h->n_buckets;
+ k = h->hfunc(key);
+ i = k & mask;
+
+ if (__ac_isempty(h->flags, i)) {
+ x = i; /* for speed up */
+ }
+ else {
+ last = i;
+ while (!__ac_isempty(h->flags, i) &&
+ (__ac_isdel(h->flags, i) ||
+ !h->eqfunc(h->keys[i], key))) {
+ if (__ac_isdel(h->flags, i)) {
+ site = i;
+ }
+
+ i = (i + (++step)) & mask;
+
+ if (i == last) {
+ x = site;
+ break;
+ }
+ }
+
+ if (x == h->n_buckets) {
+ if (__ac_isempty(h->flags, i) && site != h->n_buckets) {
+ x = site;
+ }
+ else {
+ x = i;
+ }
+ }
+ }
+
+ if (__ac_isempty(h->flags, x)) { /* not present at all */
+ h->keys[x] = key;
+ __ac_set_isboth_false(h->flags, x);
+ ++h->size;
+ ++h->n_occupied;
+ *ret = 1;
+ }
+ else if (__ac_isdel(h->flags, x)) { /* deleted */
+ h->keys[x] = key;
+ __ac_set_isboth_false(h->flags, x);
+ ++h->size;
+ *ret = 2;
+ }
+ else {
+ /* Don't touch h->keys[x] if present and not deleted */
+ *ret = 0;
+ }
+
+ return &h->vals[x];
+}
+
+static void
+rspamd_lru_hash_del(rspamd_lru_hash_t *h, rspamd_lru_vol_element_t *elt)
+{
+ khint_t x = elt - h->vals;
+
+ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {
+ __ac_set_isdel_true(h->flags, x);
+ --h->size;
+
+ if (h->key_destroy) {
+ h->key_destroy(h->keys[x]);
+ }
+
+ if (h->value_destroy) {
+ h->value_destroy(elt->e.data);
+ }
+ }
+}
+
+static void
+rspamd_lru_hash_remove_evicted(rspamd_lru_hash_t *hash,
+ rspamd_lru_element_t *elt)
+{
+ guint i;
+ rspamd_lru_element_t *cur;
+
+ g_assert(hash->eviction_used > 0);
+ g_assert(elt->eviction_pos < hash->eviction_used);
+
+ memmove(&hash->eviction_pool[elt->eviction_pos],
+ &hash->eviction_pool[elt->eviction_pos + 1],
+ sizeof(rspamd_lru_element_t *) *
+ (eviction_candidates - elt->eviction_pos - 1));
+
+ hash->eviction_used--;
+
+ if (hash->eviction_used > 0) {
+ /* We also need to update min_prio and renumber eviction list */
+ hash->eviction_min_prio = G_MAXUINT;
+
+ for (i = 0; i < hash->eviction_used; i++) {
+ cur = hash->eviction_pool[i];
+
+ if (hash->eviction_min_prio > cur->lg_usages) {
+ hash->eviction_min_prio = cur->lg_usages;
+ }
+
+ cur->eviction_pos = i;
+ }
+ }
+ else {
+ hash->eviction_min_prio = G_MAXUINT;
+ }
+}
+
+static void
+rspamd_lru_hash_update_counter(rspamd_lru_element_t *elt)
+{
+ guint8 counter = elt->lg_usages;
+
+ if (counter != 255) {
+ double r, baseval, p;
+
+ r = rspamd_random_double_fast();
+ baseval = counter - lfu_base_value;
+
+ if (baseval < 0) {
+ baseval = 0;
+ }
+
+ p = 1.0 / (baseval * log_base + 1);
+
+ if (r < p) {
+ elt->lg_usages++;
+ }
+ }
+}
+
+static inline void
+rspamd_lru_hash_decrease_counter(rspamd_lru_element_t *elt, time_t now)
+{
+ if (now - elt->last > lfu_base_value) {
+ /* Penalise counters for outdated records */
+ elt->lg_usages /= 2;
+ }
+}
+
+static gboolean
+rspamd_lru_hash_maybe_evict(rspamd_lru_hash_t *hash,
+ rspamd_lru_element_t *elt)
+{
+ guint i;
+ rspamd_lru_element_t *cur;
+
+ if (elt->eviction_pos == (guint8) -1) {
+ if (hash->eviction_used < eviction_candidates) {
+ /* There are free places in eviction pool */
+ hash->eviction_pool[hash->eviction_used] = elt;
+ elt->eviction_pos = hash->eviction_used;
+ hash->eviction_used++;
+
+ if (hash->eviction_min_prio > elt->lg_usages) {
+ hash->eviction_min_prio = elt->lg_usages;
+ }
+
+ return TRUE;
+ }
+ else {
+ /* Find any candidate that has higher usage count */
+ for (i = 0; i < hash->eviction_used; i++) {
+ cur = hash->eviction_pool[i];
+
+ if (cur->lg_usages > elt->lg_usages) {
+ cur->eviction_pos = -1;
+ elt->eviction_pos = i;
+ hash->eviction_pool[i] = elt;
+
+ if (hash->eviction_min_prio > elt->lg_usages) {
+ hash->eviction_min_prio = elt->lg_usages;
+ }
+
+ return TRUE;
+ }
+ }
+ }
+ }
+ else {
+ /* Already in the eviction list */
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_lru_hash_remove_node(rspamd_lru_hash_t *hash, rspamd_lru_element_t *elt)
+{
+ if (elt->eviction_pos != (guint8) -1) {
+ rspamd_lru_hash_remove_evicted(hash, elt);
+ }
+
+ rspamd_lru_hash_del(hash, (rspamd_lru_vol_element_t *) elt);
+}
+
+static void
+rspamd_lru_hash_evict(rspamd_lru_hash_t *hash, time_t now)
+{
+ double r;
+ guint i;
+ rspamd_lru_element_t *elt = NULL;
+ guint nexpired = 0;
+
+ /*
+ * We either evict one node from the eviction list
+ * or, at some probability scan all table and update eviction
+ * list first
+ */
+ r = rspamd_random_double_fast();
+
+ if (r < ((double) eviction_candidates) / hash->maxsize) {
+ /* Full hash scan */
+ rspamd_lru_vol_element_t *cur;
+ rspamd_lru_element_t *selected = NULL;
+
+ kh_foreach_value_ptr(hash, cur, {
+ rspamd_lru_element_t *node = &cur->e;
+
+ if (node->flags & RSPAMD_LRU_ELEMENT_IMMORTAL) {
+ continue;
+ }
+
+ if (node->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
+ /* If element is expired, just remove it */
+ if (now - cur->creation_time > cur->ttl) {
+ rspamd_lru_hash_remove_node(hash, node);
+
+ nexpired++;
+ continue;
+ }
+ }
+ else {
+ rspamd_lru_hash_decrease_counter(node, now);
+
+ if (rspamd_lru_hash_maybe_evict(hash, node)) {
+ if (selected && node->lg_usages < selected->lg_usages) {
+ selected = node;
+ }
+ else if (selected == NULL) {
+ selected = node;
+ }
+ }
+ }
+ });
+
+ if (selected) {
+ elt = selected;
+ }
+ }
+ else {
+ /* Fast random eviction */
+ for (i = 0; i < hash->eviction_used; i++) {
+ elt = hash->eviction_pool[i];
+
+ if (elt->lg_usages <= hash->eviction_min_prio) {
+ break;
+ }
+ }
+ }
+
+ /* Evict if nothing else has been cleaned */
+ if (elt && nexpired == 0) {
+ rspamd_lru_hash_remove_node(hash, elt);
+ }
+}
+
+rspamd_lru_hash_t *
+rspamd_lru_hash_new_full(gint maxsize,
+ GDestroyNotify key_destroy,
+ GDestroyNotify value_destroy,
+ GHashFunc hf,
+ GEqualFunc cmpf)
+{
+ rspamd_lru_hash_t *h;
+
+ if (maxsize < eviction_candidates * 2) {
+ maxsize = eviction_candidates * 2;
+ }
+
+ h = g_malloc0(sizeof(rspamd_lru_hash_t));
+ h->hfunc = hf;
+ h->eqfunc = cmpf;
+ h->eviction_pool = g_malloc0(sizeof(rspamd_lru_element_t *) *
+ eviction_candidates);
+ h->maxsize = maxsize;
+ h->value_destroy = value_destroy;
+ h->key_destroy = key_destroy;
+ h->eviction_min_prio = G_MAXUINT;
+
+ /* Preallocate some elements */
+ rspamd_lru_hash_resize(h, MIN(h->maxsize, 128));
+
+ return h;
+}
+
+rspamd_lru_hash_t *
+rspamd_lru_hash_new(gint maxsize,
+ GDestroyNotify key_destroy,
+ GDestroyNotify value_destroy)
+{
+ return rspamd_lru_hash_new_full(maxsize,
+ key_destroy, value_destroy,
+ rspamd_strcase_hash, rspamd_strcase_equal);
+}
+
+gpointer
+rspamd_lru_hash_lookup(rspamd_lru_hash_t *hash, gconstpointer key, time_t now)
+{
+ rspamd_lru_element_t *res;
+ rspamd_lru_vol_element_t *vnode;
+
+ vnode = rspamd_lru_hash_get(hash, (gpointer) key);
+ if (vnode != NULL) {
+ res = &vnode->e;
+
+ if (res->flags & RSPAMD_LRU_ELEMENT_VOLATILE) {
+ /* Check ttl */
+
+ if (now - vnode->creation_time > vnode->ttl) {
+ rspamd_lru_hash_remove_node(hash, res);
+
+ return NULL;
+ }
+ }
+
+ now = TIME_TO_TS(now);
+ res->last = MAX(res->last, now);
+ rspamd_lru_hash_update_counter(res);
+ rspamd_lru_hash_maybe_evict(hash, res);
+
+ return res->data;
+ }
+
+ return NULL;
+}
+
+gboolean
+rspamd_lru_hash_remove(rspamd_lru_hash_t *hash,
+ gconstpointer key)
+{
+ rspamd_lru_vol_element_t *res;
+
+ res = rspamd_lru_hash_get(hash, key);
+
+ if (res != NULL) {
+ rspamd_lru_hash_remove_node(hash, &res->e);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void rspamd_lru_hash_insert(rspamd_lru_hash_t *hash,
+ gpointer key,
+ gpointer value,
+ time_t now,
+ guint ttl)
+{
+ rspamd_lru_element_t *node;
+ rspamd_lru_vol_element_t *vnode;
+ gint ret;
+
+ vnode = rspamd_lru_hash_put(hash, key, &ret);
+ node = &vnode->e;
+
+ if (ret == 0) {
+ /* Existing element, be careful about destructors */
+ if (hash->value_destroy) {
+ /* Remove old data */
+ hash->value_destroy(vnode->e.data);
+ }
+
+ if (hash->key_destroy) {
+ /* Here are dragons! */
+ goffset off = vnode - hash->vals;
+
+ hash->key_destroy(hash->keys[off]);
+ hash->keys[off] = key;
+ }
+ }
+
+
+ if (ttl == 0) {
+ node->flags = RSPAMD_LRU_ELEMENT_NORMAL;
+ }
+ else {
+ vnode->creation_time = now;
+ vnode->ttl = ttl;
+ node->flags = RSPAMD_LRU_ELEMENT_VOLATILE;
+ }
+
+ node->data = value;
+ node->lg_usages = (guint8) lfu_base_value;
+ node->last = TIME_TO_TS(now);
+ node->eviction_pos = (guint8) -1;
+
+ if (ret != 0) {
+ /* Also need to check maxsize */
+ if (kh_size(hash) >= hash->maxsize) {
+ node->flags |= RSPAMD_LRU_ELEMENT_IMMORTAL;
+ rspamd_lru_hash_evict(hash, now);
+ node->flags &= ~RSPAMD_LRU_ELEMENT_IMMORTAL;
+ }
+ }
+
+ rspamd_lru_hash_maybe_evict(hash, node);
+}
+
+void rspamd_lru_hash_destroy(rspamd_lru_hash_t *hash)
+{
+ if (hash) {
+ if (hash->key_destroy || hash->value_destroy) {
+ gpointer k;
+ rspamd_lru_vol_element_t cur;
+
+ kh_foreach(hash, k, cur, {
+ if (hash->key_destroy) {
+ hash->key_destroy(k);
+ }
+ if (hash->value_destroy) {
+ hash->value_destroy(cur.e.data);
+ }
+ });
+ }
+
+ g_free(hash->keys);
+ g_free(hash->vals);
+ g_free(hash->flags);
+ g_free(hash->eviction_pool);
+ g_free(hash);
+ }
+}
+
+gpointer
+rspamd_lru_hash_element_data(rspamd_lru_element_t *elt)
+{
+ return elt->data;
+}
+
+int rspamd_lru_hash_foreach(rspamd_lru_hash_t *h, int it, gpointer *k,
+ gpointer *v)
+{
+ gint i;
+ g_assert(it >= 0);
+
+ for (i = it; i != kh_end(h); ++i) {
+ if (!kh_exist(h, i)) {
+ continue;
+ }
+
+ *k = h->keys[i];
+ *v = h->vals[i].e.data;
+
+ break;
+ }
+
+ if (i == kh_end(h)) {
+ return -1;
+ }
+
+ return i + 1;
+}
+
+
+guint rspamd_lru_hash_size(rspamd_lru_hash_t *hash)
+{
+ return kh_size(hash);
+}
+
+/**
+ * Returns hash capacity
+ * @param hash hash object
+ */
+guint rspamd_lru_hash_capacity(rspamd_lru_hash_t *hash)
+{
+ return hash->maxsize;
+} \ No newline at end of file
diff --git a/src/libutil/hash.h b/src/libutil/hash.h
new file mode 100644
index 0000000..3882ce5
--- /dev/null
+++ b/src/libutil/hash.h
@@ -0,0 +1,114 @@
+/**
+ * @file hash.h
+ * Hash table implementation that allows using memory pools for storage as well as using
+ * shared memory for this purpose
+ */
+
+#ifndef RSPAMD_HASH_H
+#define RSPAMD_HASH_H
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_lru_hash_s;
+typedef struct rspamd_lru_hash_s rspamd_lru_hash_t;
+struct rspamd_lru_element_s;
+typedef struct rspamd_lru_element_s rspamd_lru_element_t;
+
+
+/**
+ * Create new lru hash
+ * @param maxsize maximum elements in a hash
+ * @param maxage maximum age of element
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_lru_hash_t *rspamd_lru_hash_new(gint maxsize,
+ GDestroyNotify key_destroy,
+ GDestroyNotify value_destroy);
+
+
+/**
+ * Create new lru hash
+ * @param maxsize maximum elements in a hash
+ * @param maxage maximum age of element
+ * @param hash_func pointer to hash function
+ * @param key_equal_func pointer to function for comparing keys
+ * @return new rspamd_hash object
+ */
+rspamd_lru_hash_t *rspamd_lru_hash_new_full(gint maxsize,
+ GDestroyNotify key_destroy,
+ GDestroyNotify value_destroy,
+ GHashFunc hfunc,
+ GEqualFunc eqfunc);
+
+/**
+ * Lookup item from hash
+ * @param hash hash object
+ * @param key key to find
+ * @return value of key or NULL if key is not found
+ */
+gpointer rspamd_lru_hash_lookup(rspamd_lru_hash_t *hash,
+ gconstpointer key,
+ time_t now);
+
+/**
+ * Removes key from LRU cache
+ * @param hash
+ * @param key
+ * @return TRUE if key has been found and removed
+ */
+gboolean rspamd_lru_hash_remove(rspamd_lru_hash_t *hash,
+ gconstpointer key);
+
+/**
+ * Insert item in hash
+ * @param hash hash object
+ * @param key key to insert
+ * @param value value of key
+ */
+void rspamd_lru_hash_insert(rspamd_lru_hash_t *hash,
+ gpointer key,
+ gpointer value,
+ time_t now,
+ guint ttl);
+
+/**
+ * Remove lru hash
+ * @param hash hash object
+ */
+
+void rspamd_lru_hash_destroy(rspamd_lru_hash_t *hash);
+
+/**
+ * Iterate over lru hash. Iterations must start from it=0 and are done when it==-1
+ * @param hash
+ * @param it
+ * @param k
+ * @param v
+ * @return new it or -1 if iteration has been reached over
+ */
+int rspamd_lru_hash_foreach(rspamd_lru_hash_t *hash, int it, gpointer *k,
+ gpointer *v);
+
+/**
+ * Returns number of elements in a hash
+ * @param hash hash object
+ */
+guint rspamd_lru_hash_size(rspamd_lru_hash_t *hash);
+
+/**
+ * Returns hash capacity
+ * @param hash hash object
+ */
+guint rspamd_lru_hash_capacity(rspamd_lru_hash_t *hash);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libutil/heap.c b/src/libutil/heap.c
new file mode 100644
index 0000000..8ce70cf
--- /dev/null
+++ b/src/libutil/heap.c
@@ -0,0 +1,197 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libutil/heap.h"
+
+struct rspamd_min_heap {
+ GPtrArray *ar;
+};
+
+#define __SWAP(a, b) \
+ do { \
+ __typeof__(a) _a = (a); \
+ __typeof__(b) _b = (b); \
+ a = _b; \
+ b = _a; \
+ } while (0)
+#define heap_swap(h, e1, e2) \
+ do { \
+ __SWAP((h)->ar->pdata[(e1)->idx - 1], (h)->ar->pdata[(e2)->idx - 1]); \
+ __SWAP((e1)->idx, (e2)->idx); \
+ } while (0)
+
+#define min_elt(e1, e2) ((e1)->pri <= (e2)->pri ? (e1) : (e2))
+
+/*
+ * Swims element added (or changed) to preserve heap's invariant
+ */
+static void
+rspamd_min_heap_swim(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt)
+{
+ struct rspamd_min_heap_elt *parent;
+
+ while (elt->idx > 1) {
+ parent = g_ptr_array_index(heap->ar, elt->idx / 2 - 1);
+
+ if (parent->pri > elt->pri) {
+ heap_swap(heap, elt, parent);
+ }
+ else {
+ break;
+ }
+ }
+}
+
+/*
+ * Sinks the element popped (or changed) to preserve heap's invariant
+ */
+static void
+rspamd_min_heap_sink(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt)
+{
+ struct rspamd_min_heap_elt *c1, *c2, *m;
+
+ while (elt->idx * 2 < heap->ar->len) {
+ c1 = g_ptr_array_index(heap->ar, elt->idx * 2 - 1);
+ c2 = g_ptr_array_index(heap->ar, elt->idx * 2);
+ m = min_elt(c1, c2);
+
+ if (elt->pri > m->pri) {
+ heap_swap(heap, elt, m);
+ }
+ else {
+ break;
+ }
+ }
+
+ if (elt->idx * 2 - 1 < heap->ar->len) {
+ m = g_ptr_array_index(heap->ar, elt->idx * 2 - 1);
+ if (elt->pri > m->pri) {
+ heap_swap(heap, elt, m);
+ }
+ }
+}
+
+struct rspamd_min_heap *
+rspamd_min_heap_create(gsize reserved_size)
+{
+ struct rspamd_min_heap *heap;
+
+ heap = g_malloc(sizeof(*heap));
+ heap->ar = g_ptr_array_sized_new(reserved_size);
+
+ return heap;
+}
+
+void rspamd_min_heap_push(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt)
+{
+ g_assert(heap != NULL);
+ g_assert(elt != NULL);
+
+ /* Add to the end */
+ elt->idx = heap->ar->len + 1;
+ g_ptr_array_add(heap->ar, elt);
+ /* Now swim it up */
+ rspamd_min_heap_swim(heap, elt);
+}
+
+struct rspamd_min_heap_elt *
+rspamd_min_heap_pop(struct rspamd_min_heap *heap)
+{
+ struct rspamd_min_heap_elt *elt, *last;
+
+ g_assert(heap != NULL);
+
+ if (heap->ar->len == 0) {
+ return NULL;
+ }
+
+ elt = g_ptr_array_index(heap->ar, 0);
+ last = g_ptr_array_index(heap->ar, heap->ar->len - 1);
+
+ if (elt != last) {
+ /* Now replace elt with the last element and sink it if needed */
+ heap_swap(heap, elt, last);
+ g_ptr_array_remove_index_fast(heap->ar, heap->ar->len - 1);
+ rspamd_min_heap_sink(heap, last);
+ }
+ else {
+ g_ptr_array_remove_index_fast(heap->ar, heap->ar->len - 1);
+ }
+
+
+ return elt;
+}
+
+void rspamd_min_heap_update_elt(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt, guint npri)
+{
+ guint oldpri;
+
+ g_assert(heap != NULL);
+ g_assert(elt->idx > 0 && elt->idx <= heap->ar->len);
+
+ oldpri = elt->pri;
+ elt->pri = npri;
+
+ if (npri > oldpri) {
+ /* We might need to sink */
+ rspamd_min_heap_sink(heap, elt);
+ }
+ else if (npri < oldpri) {
+ /* We might need to swim */
+ rspamd_min_heap_swim(heap, elt);
+ }
+}
+
+void rspamd_min_heap_remove_elt(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt)
+{
+ struct rspamd_min_heap_elt *first;
+
+ g_assert(heap != NULL);
+ g_assert(elt->idx > 0 && elt->idx <= heap->ar->len);
+
+ first = g_ptr_array_index(heap->ar, 0);
+
+ if (elt != first) {
+ elt->pri = first->pri - 1;
+ rspamd_min_heap_swim(heap, elt);
+ }
+
+ /* Now the desired element is on the top of queue */
+ (void) rspamd_min_heap_pop(heap);
+}
+
+void rspamd_min_heap_destroy(struct rspamd_min_heap *heap)
+{
+ if (heap) {
+ g_ptr_array_free(heap->ar, TRUE);
+ g_free(heap);
+ }
+}
+
+struct rspamd_min_heap_elt *
+rspamd_min_heap_index(struct rspamd_min_heap *heap, guint idx)
+{
+ g_assert(heap != NULL);
+ g_assert(idx < heap->ar->len);
+
+ return g_ptr_array_index(heap->ar, idx);
+}
diff --git a/src/libutil/heap.h b/src/libutil/heap.h
new file mode 100644
index 0000000..805f817
--- /dev/null
+++ b/src/libutil/heap.h
@@ -0,0 +1,97 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_HEAP_H_
+#define SRC_LIBUTIL_HEAP_H_
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Binary minimal heap interface based on glib
+ */
+
+struct rspamd_min_heap_elt {
+ gpointer data;
+ guint pri;
+ guint idx;
+};
+
+struct rspamd_min_heap;
+
+/**
+ * Creates min heap with the specified reserved size and compare function
+ * @param reserved_size reserved size in elements
+ * @return opaque minimal heap
+ */
+struct rspamd_min_heap *rspamd_min_heap_create(gsize reserved_size);
+
+/**
+ * Pushes an element to the heap. `pri` should be initialized to use this function,
+ * `idx` is used internally by heap interface
+ * @param heap heap structure
+ * @param elt element to push
+ */
+void rspamd_min_heap_push(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt);
+
+/**
+ * Pops the minimum element from the heap and reorder the queue
+ * @param heap heap structure
+ * @return minimum element
+ */
+struct rspamd_min_heap_elt *rspamd_min_heap_pop(struct rspamd_min_heap *heap);
+
+/**
+ * Updates priority for the element. It must be in queue (so `idx` should be sane)
+ * @param heap heap structure
+ * @param elt element to update
+ * @param npri new priority
+ */
+void rspamd_min_heap_update_elt(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt, guint npri);
+
+
+/**
+ * Removes element from the heap
+ * @param heap
+ * @param elt
+ */
+void rspamd_min_heap_remove_elt(struct rspamd_min_heap *heap,
+ struct rspamd_min_heap_elt *elt);
+
+/**
+ * Destroys heap (elements are not destroyed themselves)
+ * @param heap
+ */
+void rspamd_min_heap_destroy(struct rspamd_min_heap *heap);
+
+/**
+ * Returns element from the heap with the specified index
+ * @param heap
+ * @param idx
+ * @return
+ */
+struct rspamd_min_heap_elt *rspamd_min_heap_index(struct rspamd_min_heap *heap,
+ guint idx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_HEAP_H_ */
diff --git a/src/libutil/libev_helper.c b/src/libutil/libev_helper.c
new file mode 100644
index 0000000..770964b
--- /dev/null
+++ b/src/libutil/libev_helper.c
@@ -0,0 +1,111 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libev_helper.h"
+
+static void
+rspamd_ev_watcher_io_cb(EV_P_ struct ev_io *w, int revents)
+{
+ struct rspamd_io_ev *ev = (struct rspamd_io_ev *) w->data;
+
+ ev->cb(ev->io.fd, revents, ev->ud);
+}
+
+static void
+rspamd_ev_watcher_timer_cb(EV_P_ struct ev_timer *w, int revents)
+{
+ struct rspamd_io_ev *ev = (struct rspamd_io_ev *) w->data;
+
+ /*
+ * We now call timeout callback in all the cases, as we assume that all
+ * timeouts are final
+ */
+ ev->cb(ev->io.fd, EV_TIMER, ev->ud);
+}
+
+
+void rspamd_ev_watcher_init(struct rspamd_io_ev *ev,
+ int fd,
+ short what,
+ rspamd_ev_cb cb,
+ void *ud)
+{
+ ev_io_init(&ev->io, rspamd_ev_watcher_io_cb, fd, what);
+ ev->io.data = ev;
+ ev_init(&ev->tm, rspamd_ev_watcher_timer_cb);
+ ev->tm.data = ev;
+ ev->ud = ud;
+ ev->cb = cb;
+}
+
+void rspamd_ev_watcher_start(struct ev_loop *loop,
+ struct rspamd_io_ev *ev,
+ ev_tstamp timeout)
+{
+ g_assert(ev->cb != NULL);
+
+ ev_io_start(EV_A_ & ev->io);
+
+ if (timeout > 0) {
+ /* Update timestamp to avoid timers running early */
+ ev_now_update_if_cheap(loop);
+
+ ev->timeout = timeout;
+ ev_timer_set(&ev->tm, timeout, 0.0);
+ ev_timer_start(EV_A_ & ev->tm);
+ }
+}
+
+void rspamd_ev_watcher_stop(struct ev_loop *loop,
+ struct rspamd_io_ev *ev)
+{
+ if (ev_can_stop(&ev->io)) {
+ ev_io_stop(EV_A_ & ev->io);
+ }
+
+ if (ev->timeout > 0) {
+ ev_timer_stop(EV_A_ & ev->tm);
+ }
+}
+
+void rspamd_ev_watcher_reschedule(struct ev_loop *loop,
+ struct rspamd_io_ev *ev,
+ short what)
+{
+ g_assert(ev->cb != NULL);
+
+ if (ev_can_stop(&ev->io)) {
+ ev_io_stop(EV_A_ & ev->io);
+ ev_io_set(&ev->io, ev->io.fd, what);
+ ev_io_start(EV_A_ & ev->io);
+ }
+ else {
+ ev->io.data = ev;
+ ev_io_init(&ev->io, rspamd_ev_watcher_io_cb, ev->io.fd, what);
+ ev_io_start(EV_A_ & ev->io);
+ }
+
+ if (ev->timeout > 0) {
+ if (!(ev_can_stop(&ev->tm))) {
+ /* Update timestamp to avoid timers running early */
+ ev_now_update_if_cheap(loop);
+
+ ev->tm.data = ev;
+ ev_timer_init(&ev->tm, rspamd_ev_watcher_timer_cb, ev->timeout, 0.0);
+ ev_timer_start(EV_A_ & ev->tm);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/libutil/libev_helper.h b/src/libutil/libev_helper.h
new file mode 100644
index 0000000..44d1604
--- /dev/null
+++ b/src/libutil/libev_helper.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_LIBEV_HELPER_H
+#define RSPAMD_LIBEV_HELPER_H
+
+#include "config.h"
+#include "contrib/libev/ev.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This module is a little helper to simplify libevent->libev transition
+ * It allows to create timed IO watchers utilising both
+ */
+
+typedef void (*rspamd_ev_cb)(int fd, short what, void *ud);
+
+struct rspamd_io_ev {
+ ev_io io;
+ ev_timer tm;
+ rspamd_ev_cb cb;
+ void *ud;
+ ev_tstamp timeout;
+};
+
+/**
+ * Initialize watcher similar to event_init
+ * @param ev
+ * @param fd
+ * @param what
+ * @param cb
+ * @param ud
+ */
+void rspamd_ev_watcher_init(struct rspamd_io_ev *ev,
+ int fd, short what, rspamd_ev_cb cb, void *ud);
+
+/**
+ * Start watcher with the specific timeout
+ * @param loop
+ * @param ev
+ * @param timeout
+ */
+void rspamd_ev_watcher_start(struct ev_loop *loop,
+ struct rspamd_io_ev *ev,
+ ev_tstamp timeout);
+
+/**
+ * Stops watcher and clean it up
+ * @param loop
+ * @param ev
+ */
+void rspamd_ev_watcher_stop(struct ev_loop *loop,
+ struct rspamd_io_ev *ev);
+
+/**
+ * Convenience function to reschedule watcher with different events
+ * @param loop
+ * @param ev
+ * @param what
+ */
+void rspamd_ev_watcher_reschedule(struct ev_loop *loop,
+ struct rspamd_io_ev *ev,
+ short what);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c
new file mode 100644
index 0000000..119ade3
--- /dev/null
+++ b/src/libutil/mem_pool.c
@@ -0,0 +1,1327 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include "logger.h"
+#include "ottery.h"
+#include "unix-std.h"
+#include "khash.h"
+#include "cryptobox.h"
+#include "contrib/uthash/utlist.h"
+#include "mem_pool_internal.h"
+
+#ifdef WITH_JEMALLOC
+#include <jemalloc/jemalloc.h>
+#if (JEMALLOC_VERSION_MAJOR == 3 && JEMALLOC_VERSION_MINOR >= 6) || (JEMALLOC_VERSION_MAJOR > 3)
+#define HAVE_MALLOC_SIZE 1
+#define sys_alloc_size(sz) nallocx(sz, 0)
+#endif
+#elif defined(__APPLE__)
+#include <malloc/malloc.h>
+#define HAVE_MALLOC_SIZE 1
+#define sys_alloc_size(sz) malloc_good_size(sz)
+#endif
+
+#ifdef HAVE_SCHED_YIELD
+#include <sched.h>
+#endif
+
+/* Sleep time for spin lock in nanoseconds */
+#define MUTEX_SLEEP_TIME 10000000L
+#define MUTEX_SPIN_COUNT 100
+
+#define POOL_MTX_LOCK() \
+ do { \
+ } while (0)
+#define POOL_MTX_UNLOCK() \
+ do { \
+ } while (0)
+
+/*
+ * This define specify whether we should check all pools for free space for new object
+ * or just begin scan from current (recently attached) pool
+ * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower
+ * but requires less memory). If it is not defined check only current pool and if object is too large
+ * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than
+ * greedy method)
+ */
+#undef MEMORY_GREEDY
+
+
+static inline uint32_t
+rspamd_entry_hash(const char *str)
+{
+ return (guint) rspamd_cryptobox_fast_hash(str, strlen(str), rspamd_hash_seed());
+}
+
+static inline int
+rspamd_entry_equal(const char *k1, const char *k2)
+{
+ return strcmp(k1, k2) == 0;
+}
+
+
+KHASH_INIT(mempool_entry, const gchar *, struct rspamd_mempool_entry_point *,
+ 1, rspamd_entry_hash, rspamd_entry_equal)
+
+static khash_t(mempool_entry) *mempool_entries = NULL;
+
+
+/* Internal statistic */
+static rspamd_mempool_stat_t *mem_pool_stat = NULL;
+/* Environment variable */
+static gboolean env_checked = FALSE;
+static gboolean always_malloc = FALSE;
+
+/**
+ * Function that return free space in pool page
+ * @param x pool page struct
+ */
+static gsize
+pool_chain_free(struct _pool_chain *chain)
+{
+ gint64 occupied = chain->pos - chain->begin + MIN_MEM_ALIGNMENT;
+
+ return (occupied < (gint64) chain->slice_size ? chain->slice_size - occupied : 0);
+}
+
+/* By default allocate 4Kb chunks of memory */
+#define FIXED_POOL_SIZE 4096
+
+static inline struct rspamd_mempool_entry_point *
+rspamd_mempool_entry_new(const gchar *loc)
+{
+ struct rspamd_mempool_entry_point **pentry, *entry;
+ gint r;
+ khiter_t k;
+
+ k = kh_put(mempool_entry, mempool_entries, loc, &r);
+
+ if (r >= 0) {
+ pentry = &kh_value(mempool_entries, k);
+ entry = g_malloc0(sizeof(*entry));
+ *pentry = entry;
+ memset(entry, 0, sizeof(*entry));
+ rspamd_strlcpy(entry->src, loc, sizeof(entry->src));
+#ifdef HAVE_GETPAGESIZE
+ entry->cur_suggestion = MAX(getpagesize(), FIXED_POOL_SIZE);
+#else
+ entry->cur_suggestion = MAX(sysconf(_SC_PAGESIZE), FIXED_POOL_SIZE);
+#endif
+ }
+ else {
+ g_assert_not_reached();
+ }
+
+ return entry;
+}
+
+RSPAMD_CONSTRUCTOR(rspamd_mempool_entries_ctor)
+{
+ if (mempool_entries == NULL) {
+ mempool_entries = kh_init(mempool_entry);
+ }
+}
+
+RSPAMD_DESTRUCTOR(rspamd_mempool_entries_dtor)
+{
+ struct rspamd_mempool_entry_point *elt;
+
+ kh_foreach_value(mempool_entries, elt, {
+ g_free(elt);
+ });
+
+ kh_destroy(mempool_entry, mempool_entries);
+ mempool_entries = NULL;
+}
+
+static inline struct rspamd_mempool_entry_point *
+rspamd_mempool_get_entry(const gchar *loc)
+{
+ khiter_t k;
+ struct rspamd_mempool_entry_point *elt;
+
+ if (G_UNLIKELY(!mempool_entries)) {
+ rspamd_mempool_entries_ctor();
+ }
+
+ k = kh_get(mempool_entry, mempool_entries, loc);
+
+ if (k != kh_end(mempool_entries)) {
+ elt = kh_value(mempool_entries, k);
+
+ return elt;
+ }
+
+ return rspamd_mempool_entry_new(loc);
+}
+
+static struct _pool_chain *
+rspamd_mempool_chain_new(gsize size, gsize alignment, enum rspamd_mempool_chain_type pool_type)
+{
+ struct _pool_chain *chain;
+ gsize total_size = size + sizeof(struct _pool_chain) + alignment,
+ optimal_size = 0;
+ gpointer map;
+
+ g_assert(size > 0);
+
+ if (pool_type == RSPAMD_MEMPOOL_SHARED) {
+#if defined(HAVE_MMAP_ANON)
+ map = mmap(NULL,
+ total_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_SHARED,
+ -1,
+ 0);
+ if (map == MAP_FAILED) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, total_size);
+ abort();
+ }
+ chain = map;
+ chain->begin = ((guint8 *) chain) + sizeof(struct _pool_chain);
+#elif defined(HAVE_MMAP_ZERO)
+ gint fd;
+
+ fd = open("/dev/zero", O_RDWR);
+ if (fd == -1) {
+ return NULL;
+ }
+ map = mmap(NULL,
+ size + sizeof(struct _pool_chain),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (map == MAP_FAILED) {
+ msg_err("cannot allocate %z bytes, aborting", size +
+ sizeof(struct _pool_chain));
+ abort();
+ }
+ chain = map;
+ chain->begin = ((guint8 *) chain) + sizeof(struct _pool_chain);
+#else
+#error No mmap methods are defined
+#endif
+ g_atomic_int_inc(&mem_pool_stat->shared_chunks_allocated);
+ g_atomic_int_add(&mem_pool_stat->bytes_allocated, total_size);
+ }
+ else {
+#ifdef HAVE_MALLOC_SIZE
+ optimal_size = sys_alloc_size(total_size);
+#endif
+ total_size = MAX(total_size, optimal_size);
+ gint ret = posix_memalign(&map, alignment, total_size);
+
+ if (ret != 0 || map == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes: %d - %s",
+ G_STRLOC, total_size, ret, strerror(errno));
+ abort();
+ }
+
+ chain = map;
+ chain->begin = ((guint8 *) chain) + sizeof(struct _pool_chain);
+ g_atomic_int_add(&mem_pool_stat->bytes_allocated, total_size);
+ g_atomic_int_inc(&mem_pool_stat->chunks_allocated);
+ }
+
+ chain->pos = align_ptr(chain->begin, alignment);
+ chain->slice_size = total_size - sizeof(struct _pool_chain);
+
+ return chain;
+}
+
+
+/**
+ * Get the current pool of the specified type, creating the corresponding
+ * array if it's absent
+ * @param pool
+ * @param pool_type
+ * @return
+ */
+static struct _pool_chain *
+rspamd_mempool_get_chain(rspamd_mempool_t *pool,
+ enum rspamd_mempool_chain_type pool_type)
+{
+ g_assert(pool_type >= 0 && pool_type < RSPAMD_MEMPOOL_MAX);
+
+ return pool->priv->pools[pool_type];
+}
+
+static void
+rspamd_mempool_append_chain(rspamd_mempool_t *pool,
+ struct _pool_chain *chain,
+ enum rspamd_mempool_chain_type pool_type)
+{
+ g_assert(pool_type >= 0 && pool_type < RSPAMD_MEMPOOL_MAX);
+ g_assert(chain != NULL);
+
+ LL_PREPEND(pool->priv->pools[pool_type], chain);
+}
+
+/**
+ * Allocate new memory poll
+ * @param size size of pool's page
+ * @return new memory pool object
+ */
+rspamd_mempool_t *
+rspamd_mempool_new_(gsize size, const gchar *tag, gint flags, const gchar *loc)
+{
+ rspamd_mempool_t *new_pool;
+ gpointer map;
+
+ /* Allocate statistic structure if it is not allocated before */
+ if (mem_pool_stat == NULL) {
+#if defined(HAVE_MMAP_ANON)
+ map = mmap(NULL,
+ sizeof(rspamd_mempool_stat_t),
+ PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_SHARED,
+ -1,
+ 0);
+ if (map == MAP_FAILED) {
+ msg_err("cannot allocate %z bytes, aborting",
+ sizeof(rspamd_mempool_stat_t));
+ abort();
+ }
+ mem_pool_stat = (rspamd_mempool_stat_t *) map;
+#elif defined(HAVE_MMAP_ZERO)
+ gint fd;
+
+ fd = open("/dev/zero", O_RDWR);
+ g_assert(fd != -1);
+ map = mmap(NULL,
+ sizeof(rspamd_mempool_stat_t),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (map == MAP_FAILED) {
+ msg_err("cannot allocate %z bytes, aborting",
+ sizeof(rspamd_mempool_stat_t));
+ abort();
+ }
+ mem_pool_stat = (rspamd_mempool_stat_t *) map;
+#else
+#error No mmap methods are defined
+#endif
+ memset(map, 0, sizeof(rspamd_mempool_stat_t));
+ }
+
+ if (!env_checked) {
+ /* Check G_SLICE=always-malloc to allow memory pool debug */
+ const char *g_slice;
+
+ g_slice = getenv("VALGRIND");
+ if (g_slice != NULL) {
+ always_malloc = TRUE;
+ }
+ env_checked = TRUE;
+ }
+
+ struct rspamd_mempool_entry_point *entry = rspamd_mempool_get_entry(loc);
+ gsize total_size;
+
+ if (size == 0 && entry) {
+ size = entry->cur_suggestion;
+ }
+
+ total_size = sizeof(rspamd_mempool_t) +
+ sizeof(struct rspamd_mempool_specific) +
+ MIN_MEM_ALIGNMENT +
+ sizeof(struct _pool_chain) +
+ size;
+
+ if (G_UNLIKELY(flags & RSPAMD_MEMPOOL_DEBUG)) {
+ total_size += sizeof(GHashTable *);
+ }
+ /*
+ * Memory layout:
+ * struct rspamd_mempool_t
+ * <optional debug hash table>
+ * struct rspamd_mempool_specific
+ * struct _pool_chain
+ * alignment (if needed)
+ * memory chunk
+ */
+ guchar *mem_chunk;
+ gint ret = posix_memalign((void **) &mem_chunk, MIN_MEM_ALIGNMENT,
+ total_size);
+ gsize priv_offset;
+
+ if (ret != 0 || mem_chunk == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes: %d - %s",
+ G_STRLOC, total_size, ret, strerror(errno));
+ abort();
+ }
+
+ /* Set memory layout */
+ new_pool = (rspamd_mempool_t *) mem_chunk;
+ if (G_UNLIKELY(flags & RSPAMD_MEMPOOL_DEBUG)) {
+ /* Allocate debug table */
+ GHashTable *debug_tbl;
+
+ debug_tbl = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+ memcpy(mem_chunk + sizeof(rspamd_mempool_t), &debug_tbl,
+ sizeof(GHashTable *));
+ priv_offset = sizeof(rspamd_mempool_t) + sizeof(GHashTable *);
+ }
+ else {
+ priv_offset = sizeof(rspamd_mempool_t);
+ }
+
+ new_pool->priv = (struct rspamd_mempool_specific *) (mem_chunk +
+ priv_offset);
+ /* Zero memory for specific and for the first chain */
+ memset(new_pool->priv, 0, sizeof(struct rspamd_mempool_specific) + sizeof(struct _pool_chain));
+
+ new_pool->priv->entry = entry;
+ new_pool->priv->elt_len = size;
+ new_pool->priv->flags = flags;
+
+ if (tag) {
+ rspamd_strlcpy(new_pool->tag.tagname, tag, sizeof(new_pool->tag.tagname));
+ }
+ else {
+ new_pool->tag.tagname[0] = '\0';
+ }
+
+ /* Generate new uid */
+ uint64_t uid = rspamd_random_uint64_fast();
+ rspamd_encode_hex_buf((unsigned char *) &uid, sizeof(uid),
+ new_pool->tag.uid, sizeof(new_pool->tag.uid) - 1);
+ new_pool->tag.uid[sizeof(new_pool->tag.uid) - 1] = '\0';
+
+ mem_pool_stat->pools_allocated++;
+
+ /* Now we can attach one chunk to speed up simple allocations */
+ struct _pool_chain *nchain;
+
+ nchain = (struct _pool_chain *) (mem_chunk +
+ priv_offset +
+ sizeof(struct rspamd_mempool_specific));
+
+ guchar *unaligned = mem_chunk +
+ priv_offset +
+ sizeof(struct rspamd_mempool_specific) +
+ sizeof(struct _pool_chain);
+
+ nchain->slice_size = size;
+ nchain->begin = unaligned;
+ nchain->slice_size = size;
+ nchain->pos = align_ptr(unaligned, MIN_MEM_ALIGNMENT);
+ new_pool->priv->pools[RSPAMD_MEMPOOL_NORMAL] = nchain;
+ new_pool->priv->used_memory = size;
+
+ /* Adjust stats */
+ g_atomic_int_add(&mem_pool_stat->bytes_allocated,
+ (gint) size);
+ g_atomic_int_add(&mem_pool_stat->chunks_allocated, 1);
+
+ return new_pool;
+}
+
+static void *
+memory_pool_alloc_common(rspamd_mempool_t *pool, gsize size, gsize alignment,
+ enum rspamd_mempool_chain_type pool_type,
+ const gchar *loc)
+ RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL;
+
+
+void rspamd_mempool_notify_alloc_(rspamd_mempool_t *pool, gsize size, const gchar *loc)
+{
+ if (pool && G_UNLIKELY(pool->priv->flags & RSPAMD_MEMPOOL_DEBUG)) {
+ GHashTable *debug_tbl = *(GHashTable **) (((guchar *) pool + sizeof(*pool)));
+ gpointer ptr;
+
+ ptr = g_hash_table_lookup(debug_tbl, loc);
+
+ if (ptr) {
+ ptr = GSIZE_TO_POINTER(GPOINTER_TO_SIZE(ptr) + size);
+ }
+ else {
+ ptr = GSIZE_TO_POINTER(size);
+ }
+
+ g_hash_table_insert(debug_tbl, (gpointer) loc, ptr);
+ }
+}
+
+static void *
+memory_pool_alloc_common(rspamd_mempool_t *pool, gsize size, gsize alignment,
+ enum rspamd_mempool_chain_type pool_type, const gchar *loc)
+{
+ guint8 *tmp;
+ struct _pool_chain *new, *cur;
+ gsize free = 0;
+
+ if (pool) {
+ POOL_MTX_LOCK();
+ pool->priv->used_memory += size;
+
+ if (G_UNLIKELY(pool->priv->flags & RSPAMD_MEMPOOL_DEBUG)) {
+ rspamd_mempool_notify_alloc_(pool, size, loc);
+ }
+
+ if (always_malloc && pool_type != RSPAMD_MEMPOOL_SHARED) {
+ void *ptr;
+
+ if (alignment <= G_MEM_ALIGN) {
+ ptr = g_malloc(size);
+ }
+ else {
+ ptr = g_malloc(size + alignment);
+ ptr = align_ptr(ptr, alignment);
+ }
+ POOL_MTX_UNLOCK();
+
+ if (pool->priv->trash_stack == NULL) {
+ pool->priv->trash_stack = g_ptr_array_sized_new(128);
+ }
+
+ g_ptr_array_add(pool->priv->trash_stack, ptr);
+
+ return ptr;
+ }
+
+ cur = rspamd_mempool_get_chain(pool, pool_type);
+
+ /* Find free space in pool chain */
+ if (cur) {
+ free = pool_chain_free(cur);
+ }
+
+ if (cur == NULL || free < size + alignment) {
+ if (free < size) {
+ pool->priv->wasted_memory += free;
+ }
+
+ /* Allocate new chain element */
+ if (pool->priv->elt_len >= size + alignment) {
+ pool->priv->entry->elts[pool->priv->entry->cur_elts].fragmentation += size;
+ new = rspamd_mempool_chain_new(pool->priv->elt_len, alignment,
+ pool_type);
+ }
+ else {
+ mem_pool_stat->oversized_chunks++;
+ g_atomic_int_add(&mem_pool_stat->fragmented_size,
+ free);
+ pool->priv->entry->elts[pool->priv->entry->cur_elts].fragmentation += free;
+ new = rspamd_mempool_chain_new(size + pool->priv->elt_len, alignment,
+ pool_type);
+ }
+
+ /* Connect to pool subsystem */
+ rspamd_mempool_append_chain(pool, new, pool_type);
+ /* No need to align again, aligned by rspamd_mempool_chain_new */
+ tmp = new->pos;
+ new->pos = tmp + size;
+ POOL_MTX_UNLOCK();
+
+ return tmp;
+ }
+
+ /* No need to allocate page */
+ tmp = align_ptr(cur->pos, alignment);
+ cur->pos = tmp + size;
+ POOL_MTX_UNLOCK();
+
+ return tmp;
+ }
+
+ abort();
+}
+
+
+void *
+rspamd_mempool_alloc_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+{
+ return memory_pool_alloc_common(pool, size, alignment, RSPAMD_MEMPOOL_NORMAL, loc);
+}
+
+/*
+ * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX
+ * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW
+ */
+#define MUL_NO_OVERFLOW (1UL << (sizeof(gsize) * 4))
+
+void *
+rspamd_mempool_alloc_array_(rspamd_mempool_t *pool, gsize nmemb, gsize size, gsize alignment, const gchar *loc)
+{
+ if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) &&
+ nmemb > 0 && G_MAXSIZE / nmemb < size) {
+
+ g_error("alloc_array: overflow %" G_GSIZE_FORMAT " * %" G_GSIZE_FORMAT "",
+ nmemb, size);
+ g_abort();
+ }
+ return memory_pool_alloc_common(pool, size * nmemb, alignment, RSPAMD_MEMPOOL_NORMAL, loc);
+}
+
+void *
+rspamd_mempool_alloc0_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+{
+ void *pointer = rspamd_mempool_alloc_(pool, size, alignment, loc);
+ memset(pointer, 0, size);
+
+ return pointer;
+}
+void *
+rspamd_mempool_alloc0_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+{
+ void *pointer = rspamd_mempool_alloc_shared_(pool, size, alignment, loc);
+
+ memset(pointer, 0, size);
+ return pointer;
+}
+
+void *
+rspamd_mempool_alloc_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+{
+ return memory_pool_alloc_common(pool, size, alignment, RSPAMD_MEMPOOL_SHARED, loc);
+}
+
+
+gchar *
+rspamd_mempool_strdup_(rspamd_mempool_t *pool, const gchar *src, const gchar *loc)
+{
+ if (src == NULL) {
+ return NULL;
+ }
+ return rspamd_mempool_strdup_len_(pool, src, strlen(src), loc);
+}
+
+gchar *
+rspamd_mempool_strdup_len_(rspamd_mempool_t *pool, const gchar *src, gsize len, const gchar *loc)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = rspamd_mempool_alloc_(pool, len + 1, MIN_MEM_ALIGNMENT, loc);
+ memcpy(newstr, src, len);
+ newstr[len] = '\0';
+
+ return newstr;
+}
+
+gchar *
+rspamd_mempool_ftokdup_(rspamd_mempool_t *pool, const rspamd_ftok_t *src,
+ const gchar *loc)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = rspamd_mempool_alloc_(pool, src->len + 1, MIN_MEM_ALIGNMENT, loc);
+ memcpy(newstr, src->begin, src->len);
+ newstr[src->len] = '\0';
+
+ return newstr;
+}
+
+void rspamd_mempool_add_destructor_full(rspamd_mempool_t *pool,
+ rspamd_mempool_destruct_t func,
+ void *data,
+ const gchar *function,
+ const gchar *line)
+{
+ struct _pool_destructors *cur;
+
+ POOL_MTX_LOCK();
+ cur = rspamd_mempool_alloc_(pool, sizeof(*cur),
+ RSPAMD_ALIGNOF(struct _pool_destructors), line);
+ cur->func = func;
+ cur->data = data;
+ cur->function = function;
+ cur->loc = line;
+ cur->next = NULL;
+
+ if (pool->priv->dtors_tail) {
+ pool->priv->dtors_tail->next = cur;
+ pool->priv->dtors_tail = cur;
+ }
+ else {
+ pool->priv->dtors_head = cur;
+ pool->priv->dtors_tail = cur;
+ }
+
+ POOL_MTX_UNLOCK();
+}
+
+void rspamd_mempool_replace_destructor(rspamd_mempool_t *pool,
+ rspamd_mempool_destruct_t func,
+ void *old_data,
+ void *new_data)
+{
+ struct _pool_destructors *tmp;
+
+ LL_FOREACH(pool->priv->dtors_head, tmp)
+ {
+ if (tmp->func == func && tmp->data == old_data) {
+ tmp->func = func;
+ tmp->data = new_data;
+ break;
+ }
+ }
+}
+
+static gint
+cmp_int(gconstpointer a, gconstpointer b)
+{
+ gint i1 = *(const gint *) a, i2 = *(const gint *) b;
+
+ return i1 - i2;
+}
+
+static void
+rspamd_mempool_adjust_entry(struct rspamd_mempool_entry_point *e)
+{
+ gint sz[G_N_ELEMENTS(e->elts)], sel_pos, sel_neg;
+ guint i, jitter;
+
+ for (i = 0; i < G_N_ELEMENTS(sz); i++) {
+ sz[i] = e->elts[i].fragmentation - (gint) e->elts[i].leftover;
+ }
+
+ qsort(sz, G_N_ELEMENTS(sz), sizeof(gint), cmp_int);
+ jitter = rspamd_random_uint64_fast() % 10;
+ /*
+ * Take stochastic quantiles
+ */
+ sel_pos = sz[50 + jitter];
+ sel_neg = sz[4 + jitter];
+
+ if (-sel_neg > sel_pos) {
+ /* We need to reduce current suggestion */
+ e->cur_suggestion /= (1 + (((double) -sel_neg) / e->cur_suggestion)) * 1.5;
+ }
+ else {
+ /* We still want to grow */
+ e->cur_suggestion *= (1 + (((double) sel_pos) / e->cur_suggestion)) * 1.5;
+ }
+
+ /* Some sane limits counting mempool architecture */
+ if (e->cur_suggestion < 1024) {
+ e->cur_suggestion = 1024;
+ }
+ else if (e->cur_suggestion > 1024 * 1024 * 10) {
+ e->cur_suggestion = 1024 * 1024 * 10;
+ }
+
+ memset(e->elts, 0, sizeof(e->elts));
+}
+
+static void
+rspamd_mempool_variables_cleanup(rspamd_mempool_t *pool)
+{
+ if (pool->priv->variables) {
+ struct rspamd_mempool_variable *var;
+ kh_foreach_value_ptr(pool->priv->variables, var, {
+ if (var->dtor) {
+ var->dtor(var->data);
+ }
+ });
+
+ if (pool->priv->entry && pool->priv->entry->cur_vars <
+ kh_size(pool->priv->variables)) {
+ /*
+ * Increase preallocated size in two cases:
+ * 1) Our previous guess was zero
+ * 2) Our new variables count is not more than twice larger than
+ * previous count
+ * 3) Our variables count is less than some hard limit
+ */
+ static const guint max_preallocated_vars = 512;
+
+ guint cur_size = kh_size(pool->priv->variables);
+ guint old_guess = pool->priv->entry->cur_vars;
+ guint new_guess;
+
+ if (old_guess == 0) {
+ new_guess = MIN(cur_size, max_preallocated_vars);
+ }
+ else {
+ if (old_guess * 2 < cur_size) {
+ new_guess = MIN(cur_size, max_preallocated_vars);
+ }
+ else {
+ /* Too large step */
+ new_guess = MIN(old_guess * 2, max_preallocated_vars);
+ }
+ }
+
+ pool->priv->entry->cur_vars = new_guess;
+ }
+
+ kh_destroy(rspamd_mempool_vars_hash, pool->priv->variables);
+ pool->priv->variables = NULL;
+ }
+}
+
+void rspamd_mempool_destructors_enforce(rspamd_mempool_t *pool)
+{
+ struct _pool_destructors *destructor;
+
+ POOL_MTX_LOCK();
+
+ LL_FOREACH(pool->priv->dtors_head, destructor)
+ {
+ /* Avoid calling destructors for NULL pointers */
+ if (destructor->data != NULL) {
+ destructor->func(destructor->data);
+ }
+ }
+
+ pool->priv->dtors_head = pool->priv->dtors_tail = NULL;
+
+ rspamd_mempool_variables_cleanup(pool);
+
+ POOL_MTX_UNLOCK();
+}
+
+struct mempool_debug_elt {
+ gsize sz;
+ const gchar *loc;
+};
+
+static gint
+rspamd_mempool_debug_elt_cmp(const void *a, const void *b)
+{
+ const struct mempool_debug_elt *e1 = a, *e2 = b;
+
+ /* Inverse order */
+ return (gint) ((gssize) e2->sz) - ((gssize) e1->sz);
+}
+
+void rspamd_mempool_delete(rspamd_mempool_t *pool)
+{
+ struct _pool_chain *cur, *tmp;
+ struct _pool_destructors *destructor;
+ gpointer ptr;
+ guint i;
+ gsize len;
+
+ POOL_MTX_LOCK();
+
+ cur = pool->priv->pools[RSPAMD_MEMPOOL_NORMAL];
+
+ if (G_UNLIKELY(pool->priv->flags & RSPAMD_MEMPOOL_DEBUG)) {
+ GHashTable *debug_tbl = *(GHashTable **) (((guchar *) pool) + sizeof(*pool));
+ /* Show debug info */
+ gsize ndtor = 0;
+ LL_COUNT(pool->priv->dtors_head, destructor, ndtor);
+ msg_info_pool("destructing of the memory pool %p; elt size = %z; "
+ "used memory = %Hz; wasted memory = %Hd; "
+ "vars = %z; destructors = %z",
+ pool,
+ pool->priv->elt_len,
+ pool->priv->used_memory,
+ pool->priv->wasted_memory,
+ pool->priv->variables ? (gsize) kh_size(pool->priv->variables) : (gsize) 0,
+ ndtor);
+
+ GHashTableIter it;
+ gpointer k, v;
+ GArray *sorted_debug_size = g_array_sized_new(FALSE, FALSE,
+ sizeof(struct mempool_debug_elt),
+ g_hash_table_size(debug_tbl));
+
+ g_hash_table_iter_init(&it, debug_tbl);
+
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ struct mempool_debug_elt e;
+ e.loc = (const gchar *) k;
+ e.sz = GPOINTER_TO_SIZE(v);
+ g_array_append_val(sorted_debug_size, e);
+ }
+
+ g_array_sort(sorted_debug_size, rspamd_mempool_debug_elt_cmp);
+
+ for (guint _i = 0; _i < sorted_debug_size->len; _i++) {
+ struct mempool_debug_elt *e;
+
+ e = &g_array_index(sorted_debug_size, struct mempool_debug_elt, _i);
+ msg_info_pool("allocated %Hz from %s", e->sz, e->loc);
+ }
+
+ g_array_free(sorted_debug_size, TRUE);
+ g_hash_table_unref(debug_tbl);
+ }
+
+ if (cur && mempool_entries) {
+ pool->priv->entry->elts[pool->priv->entry->cur_elts].leftover =
+ pool_chain_free(cur);
+
+ pool->priv->entry->cur_elts = (pool->priv->entry->cur_elts + 1) %
+ G_N_ELEMENTS(pool->priv->entry->elts);
+
+ if (pool->priv->entry->cur_elts == 0) {
+ rspamd_mempool_adjust_entry(pool->priv->entry);
+ }
+ }
+
+ /* Call all pool destructors */
+ LL_FOREACH(pool->priv->dtors_head, destructor)
+ {
+ /* Avoid calling destructors for NULL pointers */
+ if (destructor->data != NULL) {
+ destructor->func(destructor->data);
+ }
+ }
+
+ rspamd_mempool_variables_cleanup(pool);
+
+ if (pool->priv->trash_stack) {
+ for (i = 0; i < pool->priv->trash_stack->len; i++) {
+ ptr = g_ptr_array_index(pool->priv->trash_stack, i);
+ g_free(ptr);
+ }
+
+ g_ptr_array_free(pool->priv->trash_stack, TRUE);
+ }
+
+ for (i = 0; i < G_N_ELEMENTS(pool->priv->pools); i++) {
+ if (pool->priv->pools[i]) {
+ LL_FOREACH_SAFE(pool->priv->pools[i], cur, tmp)
+ {
+ g_atomic_int_add(&mem_pool_stat->bytes_allocated,
+ -((gint) cur->slice_size));
+ g_atomic_int_add(&mem_pool_stat->chunks_allocated, -1);
+
+ len = cur->slice_size + sizeof(struct _pool_chain);
+
+ if (i == RSPAMD_MEMPOOL_SHARED) {
+ munmap((void *) cur, len);
+ }
+ else {
+ /* The last pool is special, it is a part of the initial chunk */
+ if (cur->next != NULL) {
+ free(cur); /* Not g_free as we use system allocator */
+ }
+ }
+ }
+ }
+ }
+
+ g_atomic_int_inc(&mem_pool_stat->pools_freed);
+ POOL_MTX_UNLOCK();
+ free(pool); /* allocated by posix_memalign */
+}
+
+void rspamd_mempool_stat(rspamd_mempool_stat_t *st)
+{
+ if (mem_pool_stat != NULL) {
+ st->pools_allocated = mem_pool_stat->pools_allocated;
+ st->pools_freed = mem_pool_stat->pools_freed;
+ st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated;
+ st->bytes_allocated = mem_pool_stat->bytes_allocated;
+ st->chunks_allocated = mem_pool_stat->chunks_allocated;
+ st->chunks_freed = mem_pool_stat->chunks_freed;
+ st->oversized_chunks = mem_pool_stat->oversized_chunks;
+ }
+}
+
+void rspamd_mempool_stat_reset(void)
+{
+ if (mem_pool_stat != NULL) {
+ memset(mem_pool_stat, 0, sizeof(rspamd_mempool_stat_t));
+ }
+}
+
+gsize rspamd_mempool_suggest_size_(const char *loc)
+{
+ return 0;
+}
+
+#if !defined(HAVE_PTHREAD_PROCESS_SHARED) || defined(DISABLE_PTHREAD_MUTEX)
+/*
+ * Own emulation
+ */
+static inline gint
+__mutex_spin(rspamd_mempool_mutex_t *mutex)
+{
+ /* check spin count */
+ if (g_atomic_int_dec_and_test(&mutex->spin)) {
+ /* This may be deadlock, so check owner of this lock */
+ if (mutex->owner == getpid()) {
+ /* This mutex was locked by calling process, so it is just double lock and we can easily unlock it */
+ g_atomic_int_set(&mutex->spin, MUTEX_SPIN_COUNT);
+ return 0;
+ }
+ else if (kill(mutex->owner, 0) == -1) {
+ /* Owner process was not found, so release lock */
+ g_atomic_int_set(&mutex->spin, MUTEX_SPIN_COUNT);
+ return 0;
+ }
+ /* Spin again */
+ g_atomic_int_set(&mutex->spin, MUTEX_SPIN_COUNT);
+ }
+
+#ifdef HAVE_SCHED_YIELD
+ (void) sched_yield();
+#elif defined(HAVE_NANOSLEEP)
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = MUTEX_SLEEP_TIME;
+ /* Spin */
+ while (nanosleep(&ts, &ts) == -1 && errno == EINTR)
+ ;
+#else
+#error No methods to spin are defined
+#endif
+ return 1;
+}
+
+static void
+memory_pool_mutex_spin(rspamd_mempool_mutex_t *mutex)
+{
+ while (!g_atomic_int_compare_and_exchange(&mutex->lock, 0, 1)) {
+ if (!__mutex_spin(mutex)) {
+ return;
+ }
+ }
+}
+
+rspamd_mempool_mutex_t *
+rspamd_mempool_get_mutex(rspamd_mempool_t *pool)
+{
+ rspamd_mempool_mutex_t *res;
+ if (pool != NULL) {
+ res =
+ rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_mutex_t));
+ res->lock = 0;
+ res->owner = 0;
+ res->spin = MUTEX_SPIN_COUNT;
+ return res;
+ }
+ return NULL;
+}
+
+void rspamd_mempool_lock_mutex(rspamd_mempool_mutex_t *mutex)
+{
+ memory_pool_mutex_spin(mutex);
+ mutex->owner = getpid();
+}
+
+void rspamd_mempool_unlock_mutex(rspamd_mempool_mutex_t *mutex)
+{
+ mutex->owner = 0;
+ (void) g_atomic_int_compare_and_exchange(&mutex->lock, 1, 0);
+}
+
+rspamd_mempool_rwlock_t *
+rspamd_mempool_get_rwlock(rspamd_mempool_t *pool)
+{
+ rspamd_mempool_rwlock_t *lock;
+
+ lock = rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_rwlock_t));
+ lock->__r_lock = rspamd_mempool_get_mutex(pool);
+ lock->__w_lock = rspamd_mempool_get_mutex(pool);
+
+ return lock;
+}
+
+void rspamd_mempool_rlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ /* Spin on write lock */
+ while (g_atomic_int_get(&lock->__w_lock->lock)) {
+ if (!__mutex_spin(lock->__w_lock)) {
+ break;
+ }
+ }
+
+ g_atomic_int_inc(&lock->__r_lock->lock);
+ lock->__r_lock->owner = getpid();
+}
+
+void rspamd_mempool_wlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ /* Spin on write lock first */
+ rspamd_mempool_lock_mutex(lock->__w_lock);
+ /* Now we have write lock set up */
+ /* Wait all readers */
+ while (g_atomic_int_get(&lock->__r_lock->lock)) {
+ __mutex_spin(lock->__r_lock);
+ }
+}
+
+void rspamd_mempool_runlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ if (g_atomic_int_get(&lock->__r_lock->lock)) {
+ (void) g_atomic_int_dec_and_test(&lock->__r_lock->lock);
+ }
+}
+
+void rspamd_mempool_wunlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ rspamd_mempool_unlock_mutex(lock->__w_lock);
+}
+#else
+
+/*
+ * Pthread bases shared mutexes
+ */
+rspamd_mempool_mutex_t *
+rspamd_mempool_get_mutex(rspamd_mempool_t *pool)
+{
+ rspamd_mempool_mutex_t *res;
+ pthread_mutexattr_t mattr;
+
+ if (pool != NULL) {
+ res =
+ rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_mutex_t));
+
+ pthread_mutexattr_init(&mattr);
+ pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
+ pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST);
+ pthread_mutex_init(res, &mattr);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) pthread_mutex_destroy, res);
+ pthread_mutexattr_destroy(&mattr);
+
+ return res;
+ }
+ return NULL;
+}
+
+void rspamd_mempool_lock_mutex(rspamd_mempool_mutex_t *mutex)
+{
+ pthread_mutex_lock(mutex);
+}
+
+void rspamd_mempool_unlock_mutex(rspamd_mempool_mutex_t *mutex)
+{
+ pthread_mutex_unlock(mutex);
+}
+
+rspamd_mempool_rwlock_t *
+rspamd_mempool_get_rwlock(rspamd_mempool_t *pool)
+{
+ rspamd_mempool_rwlock_t *res;
+ pthread_rwlockattr_t mattr;
+
+ if (pool != NULL) {
+ res =
+ rspamd_mempool_alloc_shared(pool, sizeof(rspamd_mempool_rwlock_t));
+
+ pthread_rwlockattr_init(&mattr);
+ pthread_rwlockattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
+ pthread_rwlock_init(res, &mattr);
+ rspamd_mempool_add_destructor(pool,
+ (rspamd_mempool_destruct_t) pthread_rwlock_destroy, res);
+ pthread_rwlockattr_destroy(&mattr);
+
+ return res;
+ }
+ return NULL;
+}
+
+void rspamd_mempool_rlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ pthread_rwlock_rdlock(lock);
+}
+
+void rspamd_mempool_wlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ pthread_rwlock_wrlock(lock);
+}
+
+void rspamd_mempool_runlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ pthread_rwlock_unlock(lock);
+}
+
+void rspamd_mempool_wunlock_rwlock(rspamd_mempool_rwlock_t *lock)
+{
+ pthread_rwlock_unlock(lock);
+}
+#endif
+
+#define RSPAMD_MEMPOOL_VARS_HASH_SEED 0xb32ad7c55eb2e647ULL
+void rspamd_mempool_set_variable(rspamd_mempool_t *pool,
+ const gchar *name,
+ gpointer value,
+ rspamd_mempool_destruct_t destructor)
+{
+ if (pool->priv->variables == NULL) {
+
+ pool->priv->variables = kh_init(rspamd_mempool_vars_hash);
+
+ if (pool->priv->entry->cur_vars > 0) {
+ /* Preallocate */
+ kh_resize(rspamd_mempool_vars_hash,
+ pool->priv->variables,
+ pool->priv->entry->cur_vars);
+ }
+ }
+
+ gint hv = rspamd_cryptobox_fast_hash(name, strlen(name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+ khiter_t it;
+ gint r;
+
+ it = kh_put(rspamd_mempool_vars_hash, pool->priv->variables, hv, &r);
+
+ if (it == kh_end(pool->priv->variables)) {
+ g_assert_not_reached();
+ }
+ else {
+ struct rspamd_mempool_variable *pvar;
+
+ if (r == 0) {
+ /* Existing entry, maybe need cleanup */
+ pvar = &kh_val(pool->priv->variables, it);
+
+ if (pvar->dtor) {
+ pvar->dtor(pvar->data);
+ }
+ }
+
+ pvar = &kh_val(pool->priv->variables, it);
+ pvar->data = value;
+ pvar->dtor = destructor;
+ }
+}
+
+gpointer
+rspamd_mempool_get_variable(rspamd_mempool_t *pool, const gchar *name)
+{
+ if (pool->priv->variables == NULL) {
+ return NULL;
+ }
+
+ khiter_t it;
+ gint hv = rspamd_cryptobox_fast_hash(name, strlen(name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+
+ it = kh_get(rspamd_mempool_vars_hash, pool->priv->variables, hv);
+
+ if (it != kh_end(pool->priv->variables)) {
+ struct rspamd_mempool_variable *pvar;
+
+ pvar = &kh_val(pool->priv->variables, it);
+ return pvar->data;
+ }
+
+ return NULL;
+}
+
+gpointer
+rspamd_mempool_steal_variable(rspamd_mempool_t *pool, const gchar *name)
+{
+ if (pool->priv->variables == NULL) {
+ return NULL;
+ }
+
+ khiter_t it;
+ gint hv = rspamd_cryptobox_fast_hash(name, strlen(name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+
+ it = kh_get(rspamd_mempool_vars_hash, pool->priv->variables, hv);
+
+ if (it != kh_end(pool->priv->variables)) {
+ struct rspamd_mempool_variable *pvar;
+
+ pvar = &kh_val(pool->priv->variables, it);
+ kh_del(rspamd_mempool_vars_hash, pool->priv->variables, it);
+
+ return pvar->data;
+ }
+
+ return NULL;
+}
+
+void rspamd_mempool_remove_variable(rspamd_mempool_t *pool, const gchar *name)
+{
+ if (pool->priv->variables != NULL) {
+ khiter_t it;
+ gint hv = rspamd_cryptobox_fast_hash(name, strlen(name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+
+ it = kh_get(rspamd_mempool_vars_hash, pool->priv->variables, hv);
+
+ if (it != kh_end(pool->priv->variables)) {
+ struct rspamd_mempool_variable *pvar;
+
+ pvar = &kh_val(pool->priv->variables, it);
+
+ if (pvar->dtor) {
+ pvar->dtor(pvar->data);
+ }
+
+ kh_del(rspamd_mempool_vars_hash, pool->priv->variables, it);
+ }
+ }
+}
+
+GList *
+rspamd_mempool_glist_prepend(rspamd_mempool_t *pool, GList *l, gpointer p)
+{
+ GList *cell;
+
+ cell = rspamd_mempool_alloc(pool, sizeof(*cell));
+ cell->prev = NULL;
+ cell->data = p;
+
+ if (l == NULL) {
+ cell->next = NULL;
+ }
+ else {
+ cell->next = l;
+ l->prev = cell;
+ }
+
+ return cell;
+}
+
+GList *
+rspamd_mempool_glist_append(rspamd_mempool_t *pool, GList *l, gpointer p)
+{
+ GList *cell, *cur;
+
+ cell = rspamd_mempool_alloc(pool, sizeof(*cell));
+ cell->next = NULL;
+ cell->data = p;
+
+ if (l) {
+ for (cur = l; cur->next != NULL; cur = cur->next) {}
+ cur->next = cell;
+ cell->prev = cur;
+ }
+ else {
+ l = cell;
+ l->prev = NULL;
+ }
+
+ return l;
+}
+
+gsize rspamd_mempool_get_used_size(rspamd_mempool_t *pool)
+{
+ return pool->priv->used_memory;
+}
+
+gsize rspamd_mempool_get_wasted_size(rspamd_mempool_t *pool)
+{
+ return pool->priv->wasted_memory;
+}
diff --git a/src/libutil/mem_pool.h b/src/libutil/mem_pool.h
new file mode 100644
index 0000000..de0fea1
--- /dev/null
+++ b/src/libutil/mem_pool.h
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file mem_pool.h
+ * \brief Memory pools library.
+ *
+ * Memory pools library. Library is designed to implement efficient way to
+ * store data in memory avoiding calling of many malloc/free. It has overhead
+ * because of fact that objects live in pool for rather long time and are not freed
+ * immediately after use, but if we know certainly when these objects can be used, we
+ * can use pool for them
+ */
+
+#ifndef RSPAMD_MEM_POOL_H
+#define RSPAMD_MEM_POOL_H
+
+#include "config.h"
+
+
+#if defined(HAVE_PTHREAD_PROCESS_SHARED) && !defined(DISABLE_PTHREAD_MUTEX)
+#include <pthread.h>
+#endif
+
+#ifdef __cplusplus
+#define MEMPOOL_STR_FUNC __FUNCTION__
+#else
+#define MEMPOOL_STR_FUNC G_STRFUNC
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct f_str_s;
+
+#ifdef __has_attribute
+#if __has_attribute(alloc_size)
+#define RSPAMD_ATTR_ALLOC_SIZE(pos) __attribute__((alloc_size(pos)))
+#else
+#define RSPAMD_ATTR_ALLOC_SIZE(pos)
+#endif
+
+#if __has_attribute(assume_aligned)
+#define RSPAMD_ATTR_ALLOC_ALIGN(al) __attribute__((assume_aligned(al)))
+#else
+#define RSPAMD_ATTR_ALLOC_ALIGN(al)
+#endif
+#if __has_attribute(returns_nonnull)
+#define RSPAMD_ATTR_RETURNS_NONNUL __attribute__((returns_nonnull))
+#else
+#define RSPAMD_ATTR_RETURNS_NONNUL
+#endif
+#else
+#define RSPAMD_ATTR_ALLOC_SIZE(pos)
+#define RSPAMD_ATTR_ALLOC_ALIGN(al)
+#define RSPAMD_ATTR_RETURNS_NONNUL
+#endif
+
+#define MEMPOOL_TAG_LEN 16
+#define MEMPOOL_UID_LEN 16
+/* All pointers are aligned as this variable */
+#define MIN_MEM_ALIGNMENT G_MEM_ALIGN
+
+/**
+ * Destructor type definition
+ */
+typedef void (*rspamd_mempool_destruct_t)(void *ptr);
+
+/**
+ * Pool mutex structure
+ */
+#if !defined(HAVE_PTHREAD_PROCESS_SHARED) || defined(DISABLE_PTHREAD_MUTEX)
+typedef struct memory_pool_mutex_s {
+ gint lock;
+ pid_t owner;
+ guint spin;
+} rspamd_mempool_mutex_t;
+/**
+ * Rwlock for locking shared memory regions
+ */
+typedef struct memory_pool_rwlock_s {
+ rspamd_mempool_mutex_t *__r_lock; /**< read mutex (private) */
+ rspamd_mempool_mutex_t *__w_lock; /**< write mutex (private) */
+} rspamd_mempool_rwlock_t;
+#else
+typedef pthread_mutex_t rspamd_mempool_mutex_t;
+typedef pthread_rwlock_t rspamd_mempool_rwlock_t;
+#endif
+
+/**
+ * Tag to use for logging purposes
+ */
+struct rspamd_mempool_tag {
+ gchar tagname[MEMPOOL_TAG_LEN]; /**< readable name */
+ gchar uid[MEMPOOL_UID_LEN]; /**< unique id */
+};
+
+enum rspamd_mempool_flags {
+ RSPAMD_MEMPOOL_DEBUG = (1u << 0u),
+};
+
+/**
+ * Memory pool type
+ */
+struct rspamd_mempool_entry_point;
+struct rspamd_mutex_s;
+struct rspamd_mempool_specific;
+typedef struct memory_pool_s {
+ struct rspamd_mempool_specific *priv;
+ struct rspamd_mempool_tag tag; /**< memory pool tag */
+} rspamd_mempool_t;
+
+/**
+ * Statistics structure
+ */
+typedef struct memory_pool_stat_s {
+ guint pools_allocated; /**< total number of allocated pools */
+ guint pools_freed; /**< number of freed pools */
+ guint bytes_allocated; /**< bytes that are allocated with pool allocator */
+ guint chunks_allocated; /**< number of chunks that are allocated */
+ guint shared_chunks_allocated; /**< shared chunks allocated */
+ guint chunks_freed; /**< chunks freed */
+ guint oversized_chunks; /**< oversized chunks */
+ guint fragmented_size; /**< fragmentation size */
+} rspamd_mempool_stat_t;
+
+
+/**
+ * Allocate new memory poll
+ * @param size size of pool's page
+ * @return new memory pool object
+ */
+rspamd_mempool_t *rspamd_mempool_new_(gsize size, const gchar *tag, gint flags,
+ const gchar *loc);
+
+#define rspamd_mempool_new(size, tag, flags) \
+ rspamd_mempool_new_((size), (tag), (flags), G_STRLOC)
+#define rspamd_mempool_new_default(tag, flags) \
+ rspamd_mempool_new_(rspamd_mempool_suggest_size_(G_STRLOC), (tag), (flags), G_STRLOC)
+
+/**
+ * Get memory from pool
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ * @return pointer to allocated object
+ */
+void *rspamd_mempool_alloc_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL;
+/**
+ * Allocates array handling potential integer overflow
+ * @param pool
+ * @param nmemb
+ * @param size
+ * @param alignment
+ * @param loc
+ * @return
+ */
+void *rspamd_mempool_alloc_array_(rspamd_mempool_t *pool, gsize nmemb, gsize size, gsize alignment, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL;
+#define rspamd_mempool_alloc(pool, size) \
+ rspamd_mempool_alloc_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC))
+#define rspamd_mempool_alloc_array(pool, nmemb, size) \
+ rspamd_mempool_alloc_array_((pool), (nmemb), (size), MIN_MEM_ALIGNMENT, (G_STRLOC))
+#define rspamd_mempool_alloc_array_type(pool, nmemb, type) \
+ (type *) rspamd_mempool_alloc_array_((pool), (nmemb), sizeof(type), MIN_MEM_ALIGNMENT, (G_STRLOC))
+#define rspamd_mempool_alloc_type(pool, type) \
+ (type *) (rspamd_mempool_alloc_((pool), sizeof(type), \
+ MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC)))
+#define rspamd_mempool_alloc_buffer(pool, buflen) \
+ (char *) (rspamd_mempool_alloc_((pool), sizeof(char) * (buflen), MIN_MEM_ALIGNMENT, (G_STRLOC)))
+/**
+ * Notify external memory usage for memory pool
+ * @param pool
+ * @param size
+ * @param loc
+ */
+void rspamd_mempool_notify_alloc_(rspamd_mempool_t *pool, gsize size, const gchar *loc);
+#define rspamd_mempool_notify_alloc(pool, size) \
+ rspamd_mempool_notify_alloc_((pool), (size), (G_STRLOC))
+
+/**
+ * Get memory and set it to zero
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ * @return pointer to allocated object
+ */
+void *rspamd_mempool_alloc0_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL;
+#define rspamd_mempool_alloc0(pool, size) \
+ rspamd_mempool_alloc0_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC))
+#define rspamd_mempool_alloc0_type(pool, type) \
+ (type *) (rspamd_mempool_alloc0_((pool), sizeof(type), \
+ MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC)))
+
+/**
+ * Make a copy of string in pool
+ * @param pool memory pool object
+ * @param src source string
+ * @return pointer to newly created string that is copy of src
+ */
+gchar *rspamd_mempool_strdup_(rspamd_mempool_t *pool, const gchar *src, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT);
+#define rspamd_mempool_strdup(pool, src) \
+ rspamd_mempool_strdup_((pool), (src), (G_STRLOC))
+gchar *rspamd_mempool_strdup_len_(rspamd_mempool_t *pool, const gchar *src, gsize len, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT);
+#define rspamd_mempool_strdup_len(pool, src, len) \
+ rspamd_mempool_strdup_len_((pool), (src), (len), (G_STRLOC))
+
+struct f_str_tok;
+
+/**
+ * Make a copy of fixed string token in pool as null terminated string
+ * @param pool memory pool object
+ * @param src source string
+ * @return pointer to newly created string that is copy of src
+ */
+gchar *rspamd_mempool_ftokdup_(rspamd_mempool_t *pool,
+ const struct f_str_tok *src,
+ const gchar *loc)
+ RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT);
+#define rspamd_mempool_ftokdup(pool, src) \
+ rspamd_mempool_ftokdup_((pool), (src), (G_STRLOC))
+
+/**
+ * Allocate piece of shared memory
+ * @param pool memory pool object
+ * @param size bytes to allocate
+ */
+void *rspamd_mempool_alloc_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL;
+#define rspamd_mempool_alloc_shared(pool, size) \
+ rspamd_mempool_alloc_shared_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC))
+#define rspamd_mempool_alloc_shared_type(pool, type) \
+ (type *) (rspamd_mempool_alloc_shared_((pool), sizeof(type), \
+ MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC)))
+
+void *rspamd_mempool_alloc0_shared_(rspamd_mempool_t *pool, gsize size, gsize alignment, const gchar *loc)
+ RSPAMD_ATTR_ALLOC_SIZE(2) RSPAMD_ATTR_ALLOC_ALIGN(MIN_MEM_ALIGNMENT) RSPAMD_ATTR_RETURNS_NONNUL;
+#define rspamd_mempool_alloc0_shared(pool, size) \
+ rspamd_mempool_alloc0_shared_((pool), (size), MIN_MEM_ALIGNMENT, (G_STRLOC))
+#define rspamd_mempool_alloc0_shared_type(pool, type) \
+ (type *) (rspamd_mempool_alloc0_shared_((pool), sizeof(type), \
+ MAX(MIN_MEM_ALIGNMENT, RSPAMD_ALIGNOF(type)), (G_STRLOC)))
+
+/**
+ * Add destructor callback to pool
+ * @param pool memory pool object
+ * @param func pointer to function-destructor
+ * @param data pointer to data that would be passed to destructor
+ */
+void rspamd_mempool_add_destructor_full(rspamd_mempool_t *pool,
+ rspamd_mempool_destruct_t func,
+ void *data,
+ const gchar *function,
+ const gchar *line);
+
+/* Macros for common usage */
+#define rspamd_mempool_add_destructor(pool, func, data) \
+ rspamd_mempool_add_destructor_full(pool, func, data, (MEMPOOL_STR_FUNC), (G_STRLOC))
+
+/**
+ * Replace destructor callback to pool for specified pointer
+ * @param pool memory pool object
+ * @param func pointer to function-destructor
+ * @param old_data pointer to old data
+ * @param new_data pointer to data that would be passed to destructor
+ */
+void rspamd_mempool_replace_destructor(rspamd_mempool_t *pool,
+ rspamd_mempool_destruct_t func,
+ void *old_data, void *new_data);
+
+/**
+ * Calls all destructors associated with the specific memory pool without removing
+ * of the pool itself
+ * @param pool
+ */
+void rspamd_mempool_destructors_enforce(rspamd_mempool_t *pool);
+
+/**
+ * Delete pool, free all its chunks and call destructors chain
+ * @param pool memory pool object
+ */
+void rspamd_mempool_delete(rspamd_mempool_t *pool);
+
+/**
+ * Get new mutex from pool (allocated in shared memory)
+ * @param pool memory pool object
+ * @return mutex object
+ */
+rspamd_mempool_mutex_t *rspamd_mempool_get_mutex(rspamd_mempool_t *pool);
+
+/**
+ * Lock mutex
+ * @param mutex mutex to lock
+ */
+void rspamd_mempool_lock_mutex(rspamd_mempool_mutex_t *mutex);
+
+/**
+ * Unlock mutex
+ * @param mutex mutex to unlock
+ */
+void rspamd_mempool_unlock_mutex(rspamd_mempool_mutex_t *mutex);
+
+/**
+ * Create new rwlock and place it in shared memory
+ * @param pool memory pool object
+ * @return rwlock object
+ */
+rspamd_mempool_rwlock_t *rspamd_mempool_get_rwlock(rspamd_mempool_t *pool);
+
+/**
+ * Acquire read lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_rlock_rwlock(rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Acquire write lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_wlock_rwlock(rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Release read lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_runlock_rwlock(rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Release write lock
+ * @param lock rwlock object
+ */
+void rspamd_mempool_wunlock_rwlock(rspamd_mempool_rwlock_t *lock);
+
+/**
+ * Get pool allocator statistics
+ * @param st stat pool struct
+ */
+void rspamd_mempool_stat(rspamd_mempool_stat_t *st);
+
+/**
+ * Reset memory pool stat
+ */
+void rspamd_mempool_stat_reset(void);
+
+/**
+ * Get optimal pool size based on page size for this system
+ * @return size of memory page in system
+ */
+#define rspamd_mempool_suggest_size() rspamd_mempool_suggest_size_(G_STRLOC)
+
+gsize rspamd_mempool_suggest_size_(const char *loc);
+
+gsize rspamd_mempool_get_used_size(rspamd_mempool_t *pool);
+gsize rspamd_mempool_get_wasted_size(rspamd_mempool_t *pool);
+
+/**
+ * Set memory pool variable
+ * @param pool memory pool object
+ * @param name name of variable
+ * @param gpointer value of variable
+ * @param destructor pointer to function-destructor
+ */
+void rspamd_mempool_set_variable(rspamd_mempool_t *pool,
+ const gchar *name,
+ gpointer value,
+ rspamd_mempool_destruct_t destructor);
+
+/**
+ * Get memory pool variable
+ * @param pool memory pool object
+ * @param name name of variable
+ * @return NULL or pointer to variable data
+ */
+gpointer rspamd_mempool_get_variable(rspamd_mempool_t *pool,
+ const gchar *name);
+/**
+ * Steal memory pool variable
+ * @param pool
+ * @param name
+ * @return
+ */
+gpointer rspamd_mempool_steal_variable(rspamd_mempool_t *pool,
+ const gchar *name);
+
+/**
+ * Removes variable from memory pool
+ * @param pool memory pool object
+ * @param name name of variable
+ */
+void rspamd_mempool_remove_variable(rspamd_mempool_t *pool,
+ const gchar *name);
+
+/**
+ * Prepend element to a list creating it in the memory pool
+ * @param l
+ * @param p
+ * @return
+ */
+GList *rspamd_mempool_glist_prepend(rspamd_mempool_t *pool,
+ GList *l, gpointer p) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Append element to a list creating it in the memory pool
+ * @param l
+ * @param p
+ * @return
+ */
+GList *rspamd_mempool_glist_append(rspamd_mempool_t *pool,
+ GList *l, gpointer p) G_GNUC_WARN_UNUSED_RESULT;
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+#include <stdexcept> /* For std::runtime_error */
+
+namespace rspamd {
+
+template<class T>
+class mempool_allocator {
+public:
+ typedef T value_type;
+
+ mempool_allocator() = delete;
+ template<class U>
+ mempool_allocator(const mempool_allocator<U> &other)
+ : pool(other.pool)
+ {
+ }
+ mempool_allocator(rspamd_mempool_t *_pool)
+ : pool(_pool)
+ {
+ }
+ [[nodiscard]] constexpr T *allocate(std::size_t n)
+ {
+ if (G_MAXSIZE / 2 / sizeof(T) > n) {
+ throw std::runtime_error("integer overflow");
+ }
+ return reinterpret_cast<T *>(rspamd_mempool_alloc(pool, n * sizeof(T)));
+ }
+ constexpr void deallocate(T *p, std::size_t n)
+ {
+ /* Do nothing */
+ }
+
+private:
+ rspamd_mempool_t *pool;
+};
+
+}// namespace rspamd
+#endif
+
+#endif
diff --git a/src/libutil/mem_pool_internal.h b/src/libutil/mem_pool_internal.h
new file mode 100644
index 0000000..4fea839
--- /dev/null
+++ b/src/libutil/mem_pool_internal.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright 2019 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_MEM_POOL_INTERNAL_H
+#define RSPAMD_MEM_POOL_INTERNAL_H
+
+/*
+ * Internal memory pool stuff
+ */
+
+#define align_ptr(p, a) \
+ ((guint8 *) ((uintptr_t) (p) + ((-(intptr_t) (p)) & ((a) -1))))
+
+enum rspamd_mempool_chain_type {
+ RSPAMD_MEMPOOL_NORMAL = 0,
+ RSPAMD_MEMPOOL_SHARED,
+ RSPAMD_MEMPOOL_MAX
+};
+#define ENTRY_LEN 128
+#define ENTRY_NELTS 64
+
+struct entry_elt {
+ guint32 fragmentation;
+ guint32 leftover;
+};
+
+struct rspamd_mempool_entry_point {
+ gchar src[ENTRY_LEN];
+ guint32 cur_suggestion;
+ guint32 cur_elts;
+ guint32 cur_vars;
+ struct entry_elt elts[ENTRY_NELTS];
+};
+
+/**
+ * Destructors list item structure
+ */
+struct _pool_destructors {
+ rspamd_mempool_destruct_t func; /**< pointer to destructor */
+ void *data; /**< data to free */
+ const gchar *function; /**< function from which this destructor was added */
+ const gchar *loc; /**< line number */
+ struct _pool_destructors *next;
+};
+
+
+struct rspamd_mempool_variable {
+ gpointer data;
+ rspamd_mempool_destruct_t dtor;
+};
+
+KHASH_INIT(rspamd_mempool_vars_hash,
+ guint32, struct rspamd_mempool_variable, 1,
+ kh_int_hash_func, kh_int_hash_equal);
+
+struct rspamd_mempool_specific {
+ struct _pool_chain *pools[RSPAMD_MEMPOOL_MAX];
+ struct _pool_destructors *dtors_head, *dtors_tail;
+ GPtrArray *trash_stack;
+ khash_t(rspamd_mempool_vars_hash) * variables;
+ struct rspamd_mempool_entry_point *entry;
+ gsize elt_len; /**< size of an element */
+ gsize used_memory;
+ guint wasted_memory;
+ gint flags;
+};
+
+/**
+ * Pool page structure
+ */
+struct _pool_chain {
+ guint8 *begin; /**< begin of pool chain block */
+ guint8 *pos; /**< current start of free space in block */
+ gsize slice_size; /**< length of block */
+ struct _pool_chain *next;
+};
+
+
+#endif
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
new file mode 100644
index 0000000..630b1f9
--- /dev/null
+++ b/src/libutil/multipattern.c
@@ -0,0 +1,821 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libutil/multipattern.h"
+#include "libutil/str_util.h"
+#include "libcryptobox/cryptobox.h"
+
+#ifdef WITH_HYPERSCAN
+#include "logger.h"
+#include "unix-std.h"
+#include "hs.h"
+#include "libserver/hyperscan_tools.h"
+#endif
+#include "acism.h"
+#include "libutil/regexp.h"
+#include <stdalign.h>
+
+#define MAX_SCRATCH 4
+
+enum rspamd_hs_check_state {
+ RSPAMD_HS_UNCHECKED = 0,
+ RSPAMD_HS_SUPPORTED,
+ RSPAMD_HS_UNSUPPORTED
+};
+
+static const char *hs_cache_dir = NULL;
+static enum rspamd_hs_check_state hs_suitable_cpu = RSPAMD_HS_UNCHECKED;
+
+
+struct RSPAMD_ALIGNED(64) rspamd_multipattern {
+#ifdef WITH_HYPERSCAN
+ rspamd_cryptobox_hash_state_t hash_state;
+ rspamd_hyperscan_t *hs_db;
+ hs_scratch_t *scratch[MAX_SCRATCH];
+ GArray *hs_pats;
+ GArray *hs_ids;
+ GArray *hs_flags;
+ guint scratch_used;
+#endif
+ ac_trie_t *t;
+ GArray *pats;
+ GArray *res;
+
+ gboolean compiled;
+ guint cnt;
+ enum rspamd_multipattern_flags flags;
+};
+
+static GQuark
+rspamd_multipattern_quark(void)
+{
+ return g_quark_from_static_string("multipattern");
+}
+
+static inline gboolean
+rspamd_hs_check(void)
+{
+#ifdef WITH_HYPERSCAN
+ if (G_UNLIKELY(hs_suitable_cpu == RSPAMD_HS_UNCHECKED)) {
+ if (hs_valid_platform() == HS_SUCCESS) {
+ hs_suitable_cpu = RSPAMD_HS_SUPPORTED;
+ }
+ else {
+ hs_suitable_cpu = RSPAMD_HS_UNSUPPORTED;
+ }
+ }
+#endif
+
+ return hs_suitable_cpu == RSPAMD_HS_SUPPORTED;
+}
+
+void rspamd_multipattern_library_init(const gchar *cache_dir)
+{
+ hs_cache_dir = cache_dir;
+#ifdef WITH_HYPERSCAN
+ rspamd_hs_check();
+#endif
+}
+
+#ifdef WITH_HYPERSCAN
+static gchar *
+rspamd_multipattern_escape_tld_hyperscan(const gchar *pattern, gsize slen,
+ gsize *dst_len)
+{
+ gsize len;
+ const gchar *p, *prefix, *suffix;
+ gchar *res;
+
+ /*
+ * We understand the following cases
+ * 1) blah -> .blah\b
+ * 2) *.blah -> ..*\\.blah\b|$
+ * 3) ???
+ */
+
+ if (pattern[0] == '*') {
+ p = strchr(pattern, '.');
+
+ if (p == NULL) {
+ /* XXX: bad */
+ p = pattern;
+ }
+ else {
+ p++;
+ }
+
+ prefix = "\\.";
+ len = slen + strlen(prefix);
+ }
+ else {
+ prefix = "\\.";
+ p = pattern;
+ len = slen + strlen(prefix);
+ }
+
+ suffix = "(:?\\b|$)";
+ len += strlen(suffix);
+
+ res = g_malloc(len + 1);
+ slen = rspamd_strlcpy(res, prefix, len + 1);
+ slen += rspamd_strlcpy(res + slen, p, len + 1 - slen);
+ slen += rspamd_strlcpy(res + slen, suffix, len + 1 - slen);
+
+ *dst_len = slen;
+
+ return res;
+}
+
+#endif
+static gchar *
+rspamd_multipattern_escape_tld_acism(const gchar *pattern, gsize len,
+ gsize *dst_len)
+{
+ gsize dlen, slen;
+ const gchar *p, *prefix;
+ gchar *res;
+
+ /*
+ * We understand the following cases
+ * 1) blah -> \\.blah
+ * 2) *.blah -> \\..*\\.blah
+ * 3) ???
+ */
+ slen = len;
+
+ if (pattern[0] == '*') {
+ dlen = slen;
+ p = memchr(pattern, '.', len);
+
+ if (p == NULL) {
+ /* XXX: bad */
+ p = pattern;
+ }
+ else {
+ p++;
+ }
+
+ dlen -= p - pattern;
+ prefix = ".";
+ dlen++;
+ }
+ else {
+ dlen = slen + 1;
+ prefix = ".";
+ p = pattern;
+ }
+
+ res = g_malloc(dlen + 1);
+ slen = strlen(prefix);
+ memcpy(res, prefix, slen);
+ rspamd_strlcpy(res + slen, p, dlen - slen + 1);
+
+ *dst_len = dlen;
+
+ return res;
+}
+
+/*
+ * Escapes special characters from specific pattern
+ */
+static gchar *
+rspamd_multipattern_pattern_filter(const gchar *pattern, gsize len,
+ enum rspamd_multipattern_flags flags,
+ gsize *dst_len)
+{
+ gchar *ret = NULL;
+ gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
+
+ if (flags & RSPAMD_MULTIPATTERN_UTF8) {
+ gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
+ }
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ if (flags & RSPAMD_MULTIPATTERN_TLD) {
+ gchar *tmp;
+ gsize tlen;
+ tmp = rspamd_multipattern_escape_tld_hyperscan(pattern, len, &tlen);
+
+ ret = rspamd_str_regexp_escape(tmp, tlen, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_RE);
+ g_free(tmp);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_RE) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags | RSPAMD_REGEXP_ESCAPE_RE);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
+ }
+ else {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags);
+ }
+
+ return ret;
+ }
+#endif
+
+ if (flags & RSPAMD_MULTIPATTERN_TLD) {
+ ret = rspamd_multipattern_escape_tld_acism(pattern, len, dst_len);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_RE) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len, gl_flags | RSPAMD_REGEXP_ESCAPE_RE);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
+ ret = rspamd_str_regexp_escape(pattern, len, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
+ }
+ else {
+ ret = malloc(len + 1);
+ *dst_len = rspamd_strlcpy(ret, pattern, len + 1);
+ }
+
+ return ret;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_create(enum rspamd_multipattern_flags flags)
+{
+ struct rspamd_multipattern *mp;
+
+ /* Align due to blake2b state */
+ (void) !posix_memalign((void **) &mp, RSPAMD_ALIGNOF(struct rspamd_multipattern),
+ sizeof(*mp));
+ g_assert(mp != NULL);
+ memset(mp, 0, sizeof(*mp));
+ mp->flags = flags;
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ mp->hs_pats = g_array_new(FALSE, TRUE, sizeof(gchar *));
+ mp->hs_flags = g_array_new(FALSE, TRUE, sizeof(gint));
+ mp->hs_ids = g_array_new(FALSE, TRUE, sizeof(gint));
+ rspamd_cryptobox_hash_init(&mp->hash_state, NULL, 0);
+
+ return mp;
+ }
+#endif
+
+ mp->pats = g_array_new(FALSE, TRUE, sizeof(ac_trie_pat_t));
+
+ return mp;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_create_sized(guint npatterns,
+ enum rspamd_multipattern_flags flags)
+{
+ struct rspamd_multipattern *mp;
+
+ /* Align due to blake2b state */
+ (void) !posix_memalign((void **) &mp, RSPAMD_ALIGNOF(struct rspamd_multipattern), sizeof(*mp));
+ g_assert(mp != NULL);
+ memset(mp, 0, sizeof(*mp));
+ mp->flags = flags;
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ mp->hs_pats = g_array_sized_new(FALSE, TRUE, sizeof(gchar *), npatterns);
+ mp->hs_flags = g_array_sized_new(FALSE, TRUE, sizeof(gint), npatterns);
+ mp->hs_ids = g_array_sized_new(FALSE, TRUE, sizeof(gint), npatterns);
+ rspamd_cryptobox_hash_init(&mp->hash_state, NULL, 0);
+
+ return mp;
+ }
+#endif
+
+ mp->pats = g_array_sized_new(FALSE, TRUE, sizeof(ac_trie_pat_t), npatterns);
+
+ return mp;
+}
+
+void rspamd_multipattern_add_pattern(struct rspamd_multipattern *mp,
+ const gchar *pattern, gint flags)
+{
+ g_assert(pattern != NULL);
+
+ rspamd_multipattern_add_pattern_len(mp, pattern, strlen(pattern), flags);
+}
+
+void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp,
+ const gchar *pattern, gsize patlen, gint flags)
+{
+ gsize dlen;
+
+ g_assert(pattern != NULL);
+ g_assert(mp != NULL);
+ g_assert(!mp->compiled);
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ gchar *np;
+ gint fl = HS_FLAG_SOM_LEFTMOST;
+ gint adjusted_flags = mp->flags | flags;
+
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_ICASE) {
+ fl |= HS_FLAG_CASELESS;
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_UTF8) {
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_TLD) {
+ fl |= HS_FLAG_UTF8;
+ }
+ else {
+ fl |= HS_FLAG_UTF8 | HS_FLAG_UCP;
+ }
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_DOTALL) {
+ fl |= HS_FLAG_DOTALL;
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_SINGLEMATCH) {
+ fl |= HS_FLAG_SINGLEMATCH;
+ fl &= ~HS_FLAG_SOM_LEFTMOST; /* According to hyperscan docs */
+ }
+ if (adjusted_flags & RSPAMD_MULTIPATTERN_NO_START) {
+ fl &= ~HS_FLAG_SOM_LEFTMOST;
+ }
+
+ g_array_append_val(mp->hs_flags, fl);
+ np = rspamd_multipattern_pattern_filter(pattern, patlen, flags, &dlen);
+ g_array_append_val(mp->hs_pats, np);
+ fl = mp->cnt;
+ g_array_append_val(mp->hs_ids, fl);
+ rspamd_cryptobox_hash_update(&mp->hash_state, np, dlen);
+
+ mp->cnt++;
+
+ return;
+ }
+#endif
+ ac_trie_pat_t pat;
+
+ pat.ptr = rspamd_multipattern_pattern_filter(pattern, patlen, flags, &dlen);
+ pat.len = dlen;
+
+ g_array_append_val(mp->pats, pat);
+
+ mp->cnt++;
+}
+
+struct rspamd_multipattern *
+rspamd_multipattern_create_full(const gchar **patterns,
+ guint npatterns, enum rspamd_multipattern_flags flags)
+{
+ struct rspamd_multipattern *mp;
+ guint i;
+
+ g_assert(npatterns > 0);
+ g_assert(patterns != NULL);
+
+ mp = rspamd_multipattern_create_sized(npatterns, flags);
+
+ for (i = 0; i < npatterns; i++) {
+ rspamd_multipattern_add_pattern(mp, patterns[i], flags);
+ }
+
+ return mp;
+}
+
+#ifdef WITH_HYPERSCAN
+static gboolean
+rspamd_multipattern_try_load_hs(struct rspamd_multipattern *mp,
+ const guchar *hash)
+{
+ gchar fp[PATH_MAX];
+
+ if (hs_cache_dir == NULL) {
+ return FALSE;
+ }
+
+ rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, hash);
+ mp->hs_db = rspamd_hyperscan_maybe_load(fp, 0);
+
+ return mp->hs_db != NULL;
+}
+
+static void
+rspamd_multipattern_try_save_hs(struct rspamd_multipattern *mp,
+ const guchar *hash)
+{
+ gchar fp[PATH_MAX], np[PATH_MAX];
+ char *bytes = NULL;
+ gsize len;
+ gint fd;
+
+ if (hs_cache_dir == NULL) {
+ return;
+ }
+
+ rspamd_snprintf(fp, sizeof(fp), "%s%shsmp-XXXXXXXXXXXXX", G_DIR_SEPARATOR_S,
+ hs_cache_dir);
+
+ if ((fd = g_mkstemp_full(fp, O_CREAT | O_EXCL | O_WRONLY, 00644)) != -1) {
+ int ret;
+ if ((ret = hs_serialize_database(rspamd_hyperscan_get_database(mp->hs_db), &bytes, &len)) == HS_SUCCESS) {
+ if (write(fd, bytes, len) == -1) {
+ msg_warn("cannot write hyperscan cache to %s: %s",
+ fp, strerror(errno));
+ unlink(fp);
+ free(bytes);
+ }
+ else {
+ free(bytes);
+ fsync(fd);
+
+ rspamd_snprintf(np, sizeof(np), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, hash);
+
+ if (rename(fp, np) == -1) {
+ msg_warn("cannot rename hyperscan cache from %s to %s: %s",
+ fp, np, strerror(errno));
+ unlink(fp);
+ }
+ else {
+ rspamd_hyperscan_notice_known(np);
+ }
+ }
+ }
+ else {
+ msg_warn("cannot serialize hyperscan cache to %s: error code %d",
+ fp, ret);
+ unlink(fp);
+ }
+
+
+ close(fd);
+ }
+ else {
+ msg_warn("cannot open a temp file %s to write hyperscan cache: %s", fp, strerror(errno));
+ }
+}
+#endif
+
+gboolean
+rspamd_multipattern_compile(struct rspamd_multipattern *mp, GError **err)
+{
+ g_assert(mp != NULL);
+ g_assert(!mp->compiled);
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ guint i;
+ hs_platform_info_t plt;
+ hs_compile_error_t *hs_errors;
+ guchar hash[rspamd_cryptobox_HASHBYTES];
+
+ if (mp->cnt > 0) {
+ g_assert(hs_populate_platform(&plt) == HS_SUCCESS);
+ rspamd_cryptobox_hash_update(&mp->hash_state, (void *) &plt, sizeof(plt));
+ rspamd_cryptobox_hash_final(&mp->hash_state, hash);
+
+ if (!rspamd_multipattern_try_load_hs(mp, hash)) {
+ hs_database_t *db = NULL;
+
+ if (hs_compile_multi((const char *const *) mp->hs_pats->data,
+ (const unsigned int *) mp->hs_flags->data,
+ (const unsigned int *) mp->hs_ids->data,
+ mp->cnt,
+ HS_MODE_BLOCK,
+ &plt,
+ &db,
+ &hs_errors) != HS_SUCCESS) {
+
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "cannot create tree of regexp when processing '%s': %s",
+ g_array_index(mp->hs_pats, char *, hs_errors->expression),
+ hs_errors->message);
+ hs_free_compile_error(hs_errors);
+
+ return FALSE;
+ }
+
+ if (hs_cache_dir != NULL) {
+ char fpath[PATH_MAX];
+ rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmp", hs_cache_dir,
+ (gint) rspamd_cryptobox_HASHBYTES / 2, hash);
+ mp->hs_db = rspamd_hyperscan_from_raw_db(db, fpath);
+ }
+ else {
+ /* Should not happen in the real life */
+ mp->hs_db = rspamd_hyperscan_from_raw_db(db, NULL);
+ }
+
+ rspamd_multipattern_try_save_hs(mp, hash);
+ }
+
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ mp->scratch[i] = NULL;
+ }
+
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ int ret;
+
+ if ((ret = hs_alloc_scratch(rspamd_hyperscan_get_database(mp->hs_db), &mp->scratch[i])) != HS_SUCCESS) {
+ msg_err("cannot allocate scratch space for hyperscan: error code %d", ret);
+
+ /* Clean all scratches that are non-NULL */
+ for (int ii = 0; ii < MAX_SCRATCH; ii++) {
+ if (mp->scratch[ii] != NULL) {
+ hs_free_scratch(mp->scratch[ii]);
+ }
+ }
+ g_set_error(err, rspamd_multipattern_quark(), EINVAL,
+ "cannot allocate scratch space for hyperscan: error code %d", ret);
+
+ rspamd_hyperscan_free(mp->hs_db, true);
+ mp->hs_db = NULL;
+
+ return FALSE;
+ }
+ }
+ }
+
+ mp->compiled = TRUE;
+
+ return TRUE;
+ }
+#endif
+
+ if (mp->cnt > 0) {
+
+ if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB | RSPAMD_MULTIPATTERN_RE)) {
+ /* Fallback to pcre... */
+ rspamd_regexp_t *re;
+ mp->res = g_array_sized_new(FALSE, TRUE,
+ sizeof(rspamd_regexp_t *), mp->cnt);
+
+ for (guint i = 0; i < mp->cnt; i++) {
+ const ac_trie_pat_t *pat;
+ const gchar *pat_flags = NULL;
+
+ if (mp->flags & RSPAMD_MULTIPATTERN_UTF8) {
+ pat_flags = "u";
+ }
+
+ pat = &g_array_index(mp->pats, ac_trie_pat_t, i);
+ re = rspamd_regexp_new(pat->ptr, pat_flags, err);
+
+ if (re == NULL) {
+ return FALSE;
+ }
+
+ g_array_append_val(mp->res, re);
+ }
+ }
+ else {
+ mp->t = acism_create((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
+ }
+ }
+
+ mp->compiled = TRUE;
+
+ return TRUE;
+}
+
+struct rspamd_multipattern_cbdata {
+ struct rspamd_multipattern *mp;
+ const gchar *in;
+ gsize len;
+ rspamd_multipattern_cb_t cb;
+ gpointer ud;
+ guint nfound;
+ gint ret;
+};
+
+#ifdef WITH_HYPERSCAN
+static gint
+rspamd_multipattern_hs_cb(unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void *ud)
+{
+ struct rspamd_multipattern_cbdata *cbd = ud;
+ gint ret = 0;
+
+ if (to > 0) {
+
+ if (from == HS_OFFSET_PAST_HORIZON) {
+ from = 0;
+ }
+
+ ret = cbd->cb(cbd->mp, id, from, to, cbd->in, cbd->len, cbd->ud);
+
+ cbd->nfound++;
+ cbd->ret = ret;
+ }
+
+ return ret;
+}
+#endif
+
+static gint
+rspamd_multipattern_acism_cb(int strnum, int textpos, void *context)
+{
+ struct rspamd_multipattern_cbdata *cbd = context;
+ gint ret;
+ ac_trie_pat_t pat;
+
+ pat = g_array_index(cbd->mp->pats, ac_trie_pat_t, strnum);
+ ret = cbd->cb(cbd->mp, strnum, textpos - pat.len,
+ textpos, cbd->in, cbd->len, cbd->ud);
+
+ cbd->nfound++;
+ cbd->ret = ret;
+
+ return ret;
+}
+
+gint rspamd_multipattern_lookup(struct rspamd_multipattern *mp,
+ const gchar *in, gsize len, rspamd_multipattern_cb_t cb,
+ gpointer ud, guint *pnfound)
+{
+ struct rspamd_multipattern_cbdata cbd;
+ gint ret = 0;
+
+ g_assert(mp != NULL);
+
+ if (mp->cnt == 0 || !mp->compiled || len == 0) {
+ return 0;
+ }
+
+ cbd.mp = mp;
+ cbd.in = in;
+ cbd.len = len;
+ cbd.cb = cb;
+ cbd.ud = ud;
+ cbd.nfound = 0;
+ cbd.ret = 0;
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ hs_scratch_t *scr = NULL;
+ guint i;
+
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ if (!(mp->scratch_used & (1 << i))) {
+ mp->scratch_used |= (1 << i);
+ scr = mp->scratch[i];
+ break;
+ }
+ }
+
+ g_assert(scr != NULL);
+
+ ret = hs_scan(rspamd_hyperscan_get_database(mp->hs_db), in, len, 0, scr,
+ rspamd_multipattern_hs_cb, &cbd);
+
+ mp->scratch_used &= ~(1 << i);
+
+ if (ret == HS_SUCCESS) {
+ ret = 0;
+ }
+ else if (ret == HS_SCAN_TERMINATED) {
+ ret = cbd.ret;
+ }
+
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
+
+ return ret;
+ }
+#endif
+
+ gint state = 0;
+
+ if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB | RSPAMD_MULTIPATTERN_RE)) {
+ /* Terribly inefficient, but who cares - just use hyperscan */
+ for (guint i = 0; i < mp->cnt; i++) {
+ rspamd_regexp_t *re = g_array_index(mp->res, rspamd_regexp_t *, i);
+ const gchar *start = NULL, *end = NULL;
+
+ while (rspamd_regexp_search(re,
+ in,
+ len,
+ &start,
+ &end,
+ TRUE,
+ NULL)) {
+ if (rspamd_multipattern_acism_cb(i, end - in, &cbd)) {
+ goto out;
+ }
+ }
+ }
+ out:
+ ret = cbd.ret;
+
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
+ }
+ else {
+ /* Plain trie */
+ ret = acism_lookup(mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
+ &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
+
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
+ }
+
+ return ret;
+}
+
+
+void rspamd_multipattern_destroy(struct rspamd_multipattern *mp)
+{
+ guint i;
+
+ if (mp) {
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ gchar *p;
+
+ if (mp->compiled && mp->cnt > 0) {
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ hs_free_scratch(mp->scratch[i]);
+ }
+
+ if (mp->hs_db) {
+ rspamd_hyperscan_free(mp->hs_db, false);
+ }
+ }
+
+ for (i = 0; i < mp->cnt; i++) {
+ p = g_array_index(mp->hs_pats, gchar *, i);
+ g_free(p);
+ }
+
+ g_array_free(mp->hs_pats, TRUE);
+ g_array_free(mp->hs_ids, TRUE);
+ g_array_free(mp->hs_flags, TRUE);
+ free(mp); /* Due to posix_memalign */
+
+ return;
+ }
+#endif
+ ac_trie_pat_t pat;
+
+ if (mp->compiled && mp->cnt > 0) {
+ acism_destroy(mp->t);
+ }
+
+ for (i = 0; i < mp->cnt; i++) {
+ pat = g_array_index(mp->pats, ac_trie_pat_t, i);
+ g_free((gchar *) pat.ptr);
+ }
+
+ g_array_free(mp->pats, TRUE);
+
+ g_free(mp);
+ }
+}
+
+const gchar *
+rspamd_multipattern_get_pattern(struct rspamd_multipattern *mp,
+ guint index)
+{
+ g_assert(mp != NULL);
+ g_assert(index < mp->cnt);
+
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check()) {
+ return g_array_index(mp->hs_pats, gchar *, index);
+ }
+#endif
+
+ ac_trie_pat_t pat;
+
+ pat = g_array_index(mp->pats, ac_trie_pat_t, index);
+
+ return pat.ptr;
+}
+
+guint rspamd_multipattern_get_npatterns(struct rspamd_multipattern *mp)
+{
+ g_assert(mp != NULL);
+
+ return mp->cnt;
+}
+
+gboolean
+rspamd_multipattern_has_hyperscan(void)
+{
+ return rspamd_hs_check();
+}
diff --git a/src/libutil/multipattern.h b/src/libutil/multipattern.h
new file mode 100644
index 0000000..9302766
--- /dev/null
+++ b/src/libutil/multipattern.h
@@ -0,0 +1,173 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_LIBUTIL_MULTIPATTERN_H_
+#define SRC_LIBUTIL_MULTIPATTERN_H_
+
+#include "config.h"
+
+/**
+ * @file multipattern.h
+ *
+ * This file defines structure that acts like a transparent bridge between
+ * hyperscan and ac-trie
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_multipattern_flags {
+ RSPAMD_MULTIPATTERN_DEFAULT = 0,
+ RSPAMD_MULTIPATTERN_ICASE = (1 << 0),
+ RSPAMD_MULTIPATTERN_UTF8 = (1 << 1),
+ RSPAMD_MULTIPATTERN_TLD = (1 << 2),
+ /* Not supported by acism */
+ RSPAMD_MULTIPATTERN_GLOB = (1 << 3),
+ RSPAMD_MULTIPATTERN_RE = (1 << 4),
+ RSPAMD_MULTIPATTERN_DOTALL = (1 << 5),
+ RSPAMD_MULTIPATTERN_SINGLEMATCH = (1 << 6),
+ RSPAMD_MULTIPATTERN_NO_START = (1 << 7),
+};
+
+struct rspamd_multipattern;
+struct rspamd_cryptobox_library_ctx;
+
+/**
+ * Called on pattern match
+ * @param mp multipattern structure
+ * @param strnum number of pattern matched
+ * @param textpos position in the text
+ * @param text input text
+ * @param len length of input text
+ * @param context userdata
+ * @return if 0 then search for another pattern, otherwise return this value to caller
+ */
+typedef gint (*rspamd_multipattern_cb_t)(struct rspamd_multipattern *mp,
+ guint strnum,
+ gint match_start,
+ gint match_pos,
+ const gchar *text,
+ gsize len,
+ void *context);
+
+/**
+ * Init multipart library and set the appropriate cache dir
+ * @param cache_dir
+ */
+void rspamd_multipattern_library_init(const gchar *cache_dir);
+
+/**
+ * Creates empty multipattern structure
+ * @param flags
+ * @return
+ */
+struct rspamd_multipattern *rspamd_multipattern_create(
+ enum rspamd_multipattern_flags flags);
+
+/**
+ * Creates multipattern with preallocated number of patterns to speed up loading
+ * @param flags
+ * @param reserved
+ * @return
+ */
+struct rspamd_multipattern *rspamd_multipattern_create_sized(guint reserved,
+ enum rspamd_multipattern_flags flags);
+
+/**
+ * Creates new multipattern structure
+ * @param patterns vector of null terminated strings
+ * @param npatterns number of patterns
+ * @param flags flags applied to all patterns
+ * @return new multipattern structure
+ */
+struct rspamd_multipattern *rspamd_multipattern_create_full(
+ const gchar **patterns,
+ guint npatterns,
+ enum rspamd_multipattern_flags flags);
+
+/**
+ * Adds new pattern to match engine from zero-terminated string
+ * @param mp
+ * @param pattern
+ */
+void rspamd_multipattern_add_pattern(struct rspamd_multipattern *mp,
+ const gchar *pattern, gint flags);
+
+/**
+ * Adds new pattern from arbitrary string
+ * @param mp
+ * @param pattern
+ * @param patlen
+ * @param flags
+ */
+void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp,
+ const gchar *pattern, gsize patlen, gint flags);
+
+/**
+ * Compiles multipattern structure
+ * @param mp
+ * @return
+ */
+gboolean rspamd_multipattern_compile(struct rspamd_multipattern *mp,
+ GError **err);
+
+/**
+ * Lookups for patterns in a text using the specified callback function
+ * @param mp
+ * @param in
+ * @param len
+ * @param cb if callback returns non-zero, then search is terminated and that value is returned
+ * @param ud callback data
+ * @return
+ */
+gint rspamd_multipattern_lookup(struct rspamd_multipattern *mp,
+ const gchar *in, gsize len, rspamd_multipattern_cb_t cb,
+ gpointer ud, guint *pnfound);
+
+/**
+ * Get pattern string from multipattern identified by index
+ * @param mp
+ * @param index
+ * @return
+ */
+const gchar *rspamd_multipattern_get_pattern(struct rspamd_multipattern *mp,
+ guint index);
+
+/**
+ * Returns number of patterns in a multipattern matcher
+ * @param mp
+ * @return
+ */
+guint rspamd_multipattern_get_npatterns(struct rspamd_multipattern *mp);
+
+/**
+ * Destroys multipattern structure
+ * @param mp
+ */
+void rspamd_multipattern_destroy(struct rspamd_multipattern *mp);
+
+/**
+ * Returns TRUE if hyperscan is supported
+ * @return
+ */
+gboolean rspamd_multipattern_has_hyperscan(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_MULTIPATTERN_H_ */
diff --git a/src/libutil/printf.c b/src/libutil/printf.c
new file mode 100644
index 0000000..ba53b56
--- /dev/null
+++ b/src/libutil/printf.c
@@ -0,0 +1,1097 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright (C) 2002-2015 Igor Sysoev
+ * Copyright (C) 2011-2015 Nginx, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "printf.h"
+#include "str_util.h"
+#include "contrib/fpconv/fpconv.h"
+
+/**
+ * From FreeBSD libutil code
+ */
+static const int maxscale = 6;
+static const gchar _hex[] = "0123456789abcdef";
+static const gchar _HEX[] = "0123456789ABCDEF";
+
+static gchar *
+rspamd_humanize_number(gchar *buf, gchar *last, gint64 num, gboolean bytes)
+{
+ const gchar *prefixes;
+ int i, r, remainder, sign;
+ gint64 divisor;
+ gsize len = last - buf;
+
+ remainder = 0;
+
+ if (!bytes) {
+ divisor = 1000;
+ prefixes = "\0\0\0\0k\0\0\0M\0\0\0G\0\0\0T\0\0\0P\0\0\0E";
+ }
+ else {
+ divisor = 1024;
+ prefixes = "B\0\0\0KiB\0MiB\0GiB\0TiB\0PiB\0EiB";
+ }
+
+#define SCALE2PREFIX(scale) (&prefixes[(scale) *4])
+
+ if (num < 0) {
+ sign = -1;
+ num = -num;
+ }
+ else {
+ sign = 1;
+ }
+
+ /*
+ * Divide the number until it fits the given column.
+ * If there will be an overflow by the rounding below,
+ * divide once more.
+ */
+ for (i = 0; i < maxscale && num > divisor; i++) {
+ remainder = num % divisor;
+ num /= divisor;
+ }
+
+ if (remainder == 0 || num > divisor / 2) {
+ r = rspamd_snprintf(buf, len, "%L%s",
+ sign * (num + (remainder + 50) / divisor),
+ SCALE2PREFIX(i));
+ }
+ else {
+ /* Floating point version */
+ r = rspamd_snprintf(buf, len, "%.2f%s",
+ sign * (num + remainder / (gdouble) divisor),
+ SCALE2PREFIX(i));
+ }
+
+#undef SCALE2PREFIX
+
+ return buf + r;
+}
+
+
+static inline unsigned
+rspamd_decimal_digits32(guint32 val)
+{
+ static const guint32 powers_of_10[] = {
+ 0,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000};
+ unsigned tmp;
+
+#if defined(_MSC_VER)
+ unsigned long r = 0;
+ _BitScanReverse(&r, val | 1);
+ tmp = (r + 1) * 1233 >> 12;
+#elif defined(__GNUC__) && (__GNUC__ >= 3)
+ tmp = (32 - __builtin_clz(val | 1U)) * 1233 >> 12;
+
+#else /* Software version */
+ static const unsigned debruijn_tbl[32] = {0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31};
+ guint32 v = val | 1;
+
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ tmp = (1 + debruijn_tbl[(v * 0x07C4ACDDU) >> 27]) * 1233 >> 12;
+#endif
+ return tmp - (val < powers_of_10[tmp]) + 1;
+}
+
+static inline unsigned
+rspamd_decimal_digits64(guint64 val)
+{
+ static const guint64 powers_of_10[] = {
+ 0,
+ 10ULL,
+ 100ULL,
+ 1000ULL,
+ 10000ULL,
+ 100000ULL,
+ 1000000ULL,
+ 10000000ULL,
+ 100000000ULL,
+ 1000000000ULL,
+ 10000000000ULL,
+ 100000000000ULL,
+ 1000000000000ULL,
+ 10000000000000ULL,
+ 100000000000000ULL,
+ 1000000000000000ULL,
+ 10000000000000000ULL,
+ 100000000000000000ULL,
+ 1000000000000000000ULL,
+ 10000000000000000000ULL};
+ unsigned tmp;
+
+#if defined(_MSC_VER)
+#if _M_IX86
+ unsigned long r = 0;
+ guint64 m = val | 1;
+ if (_BitScanReverse(&r, m >> 32)) {
+ r += 32;
+ }
+ else {
+ _BitScanReverse(&r, m & 0xFFFFFFFF);
+ }
+ tmp = (r + 1) * 1233 >> 12;
+#else
+ unsigned long r = 0;
+ _BitScanReverse64(&r, val | 1);
+ tmp = (r + 1) * 1233 >> 12;
+#endif
+#elif defined(__GNUC__) && (__GNUC__ >= 3)
+ tmp = (64 - __builtin_clzll(val | 1ULL)) * 1233 >> 12;
+#else /* Software version */
+ static const unsigned debruijn_tbl[32] = {0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31};
+ guint32 v = val >> 32;
+
+ if (v) {
+ v |= 1;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ tmp = 32 + debruijn_tbl[(v * 0x07C4ACDDU) >> 27];
+ }
+ else {
+ v = val & 0xFFFFFFFF;
+ v |= 1;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+
+ tmp = debruijn_tbl[(v * 0x07C4ACDDU) >> 27];
+ }
+
+
+ tmp = (tmp + 1) * 1233 >> 12;
+#endif
+
+ return tmp - (val < powers_of_10[tmp]) + 1;
+}
+
+/*
+ * Idea from https://github.com/miloyip/itoa-benchmark:
+ * Uses lookup table (LUT) of digit pairs for division/modulo of 100.
+ *
+ * Mentioned in:
+ * https://www.slideshare.net/andreialexandrescu1/three-optimization-tips-for-c-15708507
+ */
+
+static const char int_lookup_table[200] = {
+ '0', '0', '0', '1', '0', '2', '0', '3', '0', '4',
+ '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+ '1', '0', '1', '1', '1', '2', '1', '3', '1', '4',
+ '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+ '2', '0', '2', '1', '2', '2', '2', '3', '2', '4',
+ '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+ '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
+ '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+ '4', '0', '4', '1', '4', '2', '4', '3', '4', '4',
+ '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+ '5', '0', '5', '1', '5', '2', '5', '3', '5', '4',
+ '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+ '6', '0', '6', '1', '6', '2', '6', '3', '6', '4',
+ '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+ '7', '0', '7', '1', '7', '2', '7', '3', '7', '4',
+ '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+ '8', '0', '8', '1', '8', '2', '8', '3', '8', '4',
+ '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+ '9', '0', '9', '1', '9', '2', '9', '3', '9', '4',
+ '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'};
+
+static inline guint
+rspamd_uint32_print(guint32 in, gchar *out)
+{
+ guint ndigits = rspamd_decimal_digits32(in);
+ gchar *p;
+
+ p = out + ndigits - 1;
+
+ while (in >= 100) {
+ unsigned idx = (in % 100) * 2;
+
+ /* Do two digits at once */
+ *p-- = int_lookup_table[idx + 1];
+ *p-- = int_lookup_table[idx];
+
+ in /= 100;
+ }
+
+ if (in < 10) {
+ *p = ((char) in) + '0';
+ }
+ else {
+ unsigned idx = in * 2;
+
+ *p-- = int_lookup_table[idx + 1];
+ *p = int_lookup_table[idx];
+ }
+
+ return ndigits;
+}
+
+static inline guint
+rspamd_uint64_print(guint64 in, gchar *out)
+{
+ guint ndigits = rspamd_decimal_digits64(in);
+ guint32 v32;
+ gchar *p;
+
+ p = out + ndigits - 1;
+
+ while (in >= 100000000) {
+ v32 = (guint32) (in % 100000000);
+ guint32 a, b, a1, a2, b1, b2;
+
+ /* Initial spill */
+ a = v32 / 10000;
+ b = v32 % 10000;
+ a1 = (a / 100) * 2;
+ a2 = (a % 100) * 2;
+ b1 = (b / 100) * 2;
+ b2 = (b % 100) * 2;
+
+ /* Fill 8 digits at once */
+ *p-- = int_lookup_table[b2 + 1];
+ *p-- = int_lookup_table[b2];
+ *p-- = int_lookup_table[b1 + 1];
+ *p-- = int_lookup_table[b1];
+ *p-- = int_lookup_table[a2 + 1];
+ *p-- = int_lookup_table[a2];
+ *p-- = int_lookup_table[a1 + 1];
+ *p-- = int_lookup_table[a1];
+
+ in /= 100000000;
+ }
+
+ /* Remaining 32 bit */
+ v32 = (guint32) in;
+
+ while (v32 >= 100) {
+ unsigned idx = (v32 % 100) << 1;
+
+ /* Do 2 digits at once */
+ *p-- = int_lookup_table[idx + 1];
+ *p-- = int_lookup_table[idx];
+
+ v32 /= 100;
+ }
+
+ if (v32 < 10) {
+ *p = ((char) v32) + '0';
+ }
+ else {
+ unsigned idx = v32 * 2;
+
+ *p-- = int_lookup_table[idx + 1];
+ *p = int_lookup_table[idx];
+ }
+
+ return ndigits;
+}
+
+static inline int
+rspamd_ffsll(long long n)
+{
+#ifdef __has_builtin
+#if __has_builtin(__builtin_ffsll)
+ return __builtin_ffsll(n);
+#elif __has_builtin(__builtin_ctzll)
+ if (n == 0) {
+ return 0;
+ }
+
+ return __builtin_ctzll(n) + 1;
+#endif
+#endif /* __has_builtin */
+
+#ifdef HAVE_FFSL
+ return ffsl(n);
+#else
+ if (n == 0) {
+ return 0;
+ }
+
+ int bit;
+ for (bit = 1; !(n & 1); bit++) {
+ n = ((unsigned long long) n) >> 1;
+ }
+ return bit;
+#endif
+}
+
+static gchar *
+rspamd_sprintf_num(gchar *buf, gchar *last, guint64 ui64, gchar zero,
+ guint hexadecimal, guint binary, guint width)
+{
+ gchar *p, temp[64];
+ size_t len = 0;
+
+ if (G_LIKELY(hexadecimal == 0 && binary == 0)) {
+ p = temp;
+
+ if (ui64 < G_MAXUINT32) {
+ len = rspamd_uint32_print((guint32) ui64, temp);
+ }
+ else {
+ len = rspamd_uint64_print(ui64, temp);
+ }
+ }
+ else if (hexadecimal == 1) {
+ p = temp + sizeof(temp);
+ do {
+ *--p = _hex[(guint32) (ui64 & 0xf)];
+ } while (ui64 >>= 4);
+
+ len = (temp + sizeof(temp)) - p;
+ }
+ else if (hexadecimal == 2) { /* hexadecimal == 2 */
+ p = temp + sizeof(temp);
+ do {
+ *--p = _HEX[(guint32) (ui64 & 0xf)];
+ } while (ui64 >>= 4);
+
+ len = (temp + sizeof(temp)) - p;
+ }
+ else if (binary > 0) {
+ int first_bit = MIN(sizeof(temp), rspamd_ffsll(ui64));
+
+ p = temp + sizeof(temp);
+ for (int i = 0; i <= first_bit; i++, ui64 >>= 1) {
+ *--p = '0' + (ui64 & 0x1);
+ }
+
+ len = (temp + sizeof(temp)) - p;
+ }
+
+ /* zero or space padding */
+
+ if (len < width) {
+ width -= len;
+
+ while (width-- > 0 && buf < last) {
+ *buf++ = zero;
+ }
+ }
+
+ /* number safe copy */
+
+ if (buf + len > last) {
+ len = last - buf;
+ }
+
+ return ((gchar *) memcpy(buf, p, len)) + len;
+}
+
+struct rspamd_printf_char_buf {
+ char *begin;
+ char *pos;
+ glong remain;
+};
+
+static glong
+rspamd_printf_append_char(const gchar *buf, glong buflen, gpointer ud)
+{
+ struct rspamd_printf_char_buf *dst = (struct rspamd_printf_char_buf *) ud;
+ glong wr;
+
+ if (dst->remain <= 0) {
+ return dst->remain;
+ }
+
+ wr = MIN(dst->remain, buflen);
+ memcpy(dst->pos, buf, wr);
+ dst->remain -= wr;
+ dst->pos += wr;
+
+ return wr;
+}
+
+static glong
+rspamd_printf_append_file(const gchar *buf, glong buflen, gpointer ud)
+{
+ FILE *dst = (FILE *) ud;
+ if (buflen > 0) {
+ return fwrite(buf, 1, buflen, dst);
+ }
+ else {
+ return 0;
+ }
+}
+
+static glong
+rspamd_printf_append_gstring(const gchar *buf, glong buflen, gpointer ud)
+{
+ GString *dst = (GString *) ud;
+
+ if (buflen > 0) {
+ g_string_append_len(dst, buf, buflen);
+ }
+
+ return buflen;
+}
+
+static glong
+rspamd_printf_append_fstring(const gchar *buf, glong buflen, gpointer ud)
+{
+ rspamd_fstring_t **dst = ud;
+
+ if (buflen > 0) {
+ *dst = rspamd_fstring_append(*dst, buf, buflen);
+ }
+
+ return buflen;
+}
+
+glong rspamd_fprintf(FILE *f, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start(args, fmt);
+ r = rspamd_vprintf_common(rspamd_printf_append_file, f, fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+glong rspamd_printf(const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start(args, fmt);
+ r = rspamd_vprintf_common(rspamd_printf_append_file, stdout, fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+glong rspamd_log_fprintf(FILE *f, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start(args, fmt);
+ r = rspamd_vprintf_common(rspamd_printf_append_file, f, fmt, args);
+ va_end(args);
+
+ fflush(f);
+
+ return r;
+}
+
+
+glong rspamd_snprintf(gchar *buf, glong max, const gchar *fmt, ...)
+{
+ gchar *r;
+ va_list args;
+
+ va_start(args, fmt);
+ r = rspamd_vsnprintf(buf, max, fmt, args);
+ va_end(args);
+
+ return (r - buf);
+}
+
+gchar *
+rspamd_vsnprintf(gchar *buf, glong max, const gchar *fmt, va_list args)
+{
+ struct rspamd_printf_char_buf dst;
+
+ dst.begin = buf;
+ dst.pos = dst.begin;
+ dst.remain = max - 1;
+ (void) rspamd_vprintf_common(rspamd_printf_append_char, &dst, fmt, args);
+ *dst.pos = '\0';
+
+ return dst.pos;
+}
+
+glong rspamd_printf_gstring(GString *s, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start(args, fmt);
+ r = rspamd_vprintf_gstring(s, fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+glong rspamd_vprintf_gstring(GString *s, const gchar *fmt, va_list args)
+{
+ return rspamd_vprintf_common(rspamd_printf_append_gstring, s, fmt, args);
+}
+
+glong rspamd_printf_fstring(rspamd_fstring_t **s, const gchar *fmt, ...)
+{
+ va_list args;
+ glong r;
+
+ va_start(args, fmt);
+ r = rspamd_vprintf_fstring(s, fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+glong rspamd_vprintf_fstring(rspamd_fstring_t **s, const gchar *fmt, va_list args)
+{
+ return rspamd_vprintf_common(rspamd_printf_append_fstring, s, fmt, args);
+}
+
+#define RSPAMD_PRINTF_APPEND(buf, len) \
+ do { \
+ RSPAMD_PRINTF_APPEND_BUF(buf, len); \
+ fmt++; \
+ buf_start = fmt; \
+ } while (0)
+
+#define RSPAMD_PRINTF_APPEND_BUF(buf, len) \
+ do { \
+ wr = func((buf), (len), apd); \
+ if (wr < (__typeof(wr)) (len)) { \
+ goto oob; \
+ } \
+ written += wr; \
+ } while (0)
+
+glong rspamd_vprintf_common(rspamd_printf_append_func func,
+ gpointer apd,
+ const gchar *fmt,
+ va_list args)
+{
+ gchar zero, numbuf[G_ASCII_DTOSTR_BUF_SIZE], dtoabuf[32], *p, *last;
+ guchar c;
+ const gchar *buf_start = fmt;
+ gint d;
+ gdouble f;
+ glong written = 0, wr, slen;
+ gint64 i64;
+ guint64 ui64;
+ guint width, sign, hex, humanize, bytes, frac_width, b32, b64;
+ rspamd_fstring_t *v;
+ rspamd_ftok_t *tok;
+ GString *gs;
+ GError *err;
+
+ while (*fmt) {
+
+ /*
+ * "buf < last" means that we could copy at least one character:
+ * the plain character, "%%", "%c", and minus without the checking
+ */
+
+ if (*fmt == '%') {
+
+ /* Append what we have in buf */
+ if (fmt > buf_start) {
+ wr = func(buf_start, fmt - buf_start, apd);
+ if (wr <= 0) {
+ goto oob;
+ }
+ written += wr;
+ }
+
+ i64 = 0;
+ ui64 = 0;
+
+ zero = (gchar) ((*++fmt == '0') ? '0' : ' ');
+ width = 0;
+ sign = 1;
+ hex = 0;
+ b32 = 0;
+ b64 = 0;
+ bytes = 0;
+ humanize = 0;
+ frac_width = 0;
+ slen = -1;
+
+ while (*fmt >= '0' && *fmt <= '9') {
+ width = width * 10 + *fmt++ - '0';
+ }
+
+
+ for (;;) {
+ switch (*fmt) {
+
+ case 'u':
+ sign = 0;
+ fmt++;
+ continue;
+
+ case 'm':
+ fmt++;
+ continue;
+
+ case 'X':
+ hex = 2;
+ sign = 0;
+ fmt++;
+ continue;
+
+ case 'x':
+ hex = 1;
+ sign = 0;
+ fmt++;
+ continue;
+ case 'b':
+ b32 = 1;
+ sign = 0;
+ fmt++;
+ continue;
+ case 'B':
+ b64 = 1;
+ sign = 0;
+ fmt++;
+ continue;
+ case 'H':
+ humanize = 1;
+ bytes = 1;
+ sign = 0;
+ fmt++;
+ continue;
+ case 'h':
+ humanize = 1;
+ sign = 0;
+ fmt++;
+ continue;
+ case '.':
+ fmt++;
+
+ if (*fmt == '*') {
+ d = (gint) va_arg(args, gint);
+ if (G_UNLIKELY(d < 0)) {
+ return 0;
+ }
+ frac_width = (guint) d;
+ fmt++;
+ }
+ else {
+ while (*fmt >= '0' && *fmt <= '9') {
+ frac_width = frac_width * 10 + *fmt++ - '0';
+ }
+ }
+
+ break;
+
+ case '*':
+ d = (gint) va_arg(args, gint);
+ if (G_UNLIKELY(d < 0)) {
+ return 0;
+ }
+ slen = (glong) d;
+ fmt++;
+ continue;
+
+ default:
+ break;
+ }
+
+ break;
+ }
+
+
+ switch (*fmt) {
+
+ case 'V':
+ v = va_arg(args, rspamd_fstring_t *);
+
+ if (v) {
+ slen = v->len;
+
+ if (G_UNLIKELY(width != 0)) {
+ slen = MIN(v->len, width);
+ }
+
+ RSPAMD_PRINTF_APPEND(v->str, slen);
+ }
+ else {
+ RSPAMD_PRINTF_APPEND("(NULL)", 6);
+ }
+
+ continue;
+
+ case 'T':
+ tok = va_arg(args, rspamd_ftok_t *);
+
+ if (tok) {
+ slen = tok->len;
+
+ if (G_UNLIKELY(width != 0)) {
+ slen = MIN(tok->len, width);
+ }
+ RSPAMD_PRINTF_APPEND(tok->begin, slen);
+ }
+ else {
+ RSPAMD_PRINTF_APPEND("(NULL)", 6);
+ }
+ continue;
+
+ case 'v':
+ gs = va_arg(args, GString *);
+
+ if (gs) {
+ slen = gs->len;
+
+ if (G_UNLIKELY(width != 0)) {
+ slen = MIN(gs->len, width);
+ }
+
+ RSPAMD_PRINTF_APPEND(gs->str, slen);
+ }
+ else {
+ RSPAMD_PRINTF_APPEND("(NULL)", 6);
+ }
+
+ continue;
+
+ case 'e':
+ err = va_arg(args, GError *);
+
+ if (err) {
+ p = err->message;
+
+ if (p == NULL) {
+ p = "(NULL)";
+ }
+ }
+ else {
+ p = "unknown error";
+ }
+
+ slen = strlen(p);
+ RSPAMD_PRINTF_APPEND(p, slen);
+
+ continue;
+
+ case 's':
+ p = va_arg(args, gchar *);
+ if (p == NULL) {
+ p = "(NULL)";
+ slen = sizeof("(NULL)") - 1;
+ }
+
+ if (G_UNLIKELY(b32)) {
+ gchar *b32buf;
+
+ if (G_UNLIKELY(slen == -1)) {
+ if (G_LIKELY(width != 0)) {
+ slen = width;
+ }
+ else {
+ /* NULL terminated string */
+ slen = strlen(p);
+ }
+ }
+
+ b32buf = rspamd_encode_base32(p, slen, RSPAMD_BASE32_DEFAULT);
+
+ if (b32buf) {
+ RSPAMD_PRINTF_APPEND(b32buf, strlen(b32buf));
+ g_free(b32buf);
+ }
+ else {
+ RSPAMD_PRINTF_APPEND("(NULL)", sizeof("(NULL)") - 1);
+ }
+ }
+ else if (G_UNLIKELY(hex)) {
+ gchar hexbuf[2];
+
+ if (G_UNLIKELY(slen == -1)) {
+ if (G_LIKELY(width != 0)) {
+ slen = width;
+ }
+ else {
+ /* NULL terminated string */
+ slen = strlen(p);
+ }
+ }
+
+ while (slen) {
+ hexbuf[0] = hex == 2 ? _HEX[(*p >> 4u) & 0xfu] : _hex[(*p >> 4u) & 0xfu];
+ hexbuf[1] = hex == 2 ? _HEX[*p & 0xfu] : _hex[*p & 0xfu];
+ RSPAMD_PRINTF_APPEND_BUF(hexbuf, 2);
+ p++;
+ slen--;
+ }
+
+ fmt++;
+ buf_start = fmt;
+ }
+ else if (G_UNLIKELY(b64)) {
+ gchar *b64buf;
+ gsize olen = 0;
+
+ if (G_UNLIKELY(slen == -1)) {
+ if (G_LIKELY(width != 0)) {
+ slen = width;
+ }
+ else {
+ /* NULL terminated string */
+ slen = strlen(p);
+ }
+ }
+
+ b64buf = rspamd_encode_base64(p, slen, 0, &olen);
+
+ if (b64buf) {
+ RSPAMD_PRINTF_APPEND(b64buf, olen);
+ g_free(b64buf);
+ }
+ else {
+ RSPAMD_PRINTF_APPEND("(NULL)", sizeof("(NULL)") - 1);
+ }
+ }
+ else {
+ if (slen == -1) {
+ /* NULL terminated string */
+ slen = strlen(p);
+ }
+
+ if (G_UNLIKELY(width != 0)) {
+ slen = MIN(slen, width);
+ }
+
+ RSPAMD_PRINTF_APPEND(p, slen);
+ }
+
+ continue;
+
+ case 'O':
+ i64 = (gint64) va_arg(args, off_t);
+ sign = 1;
+ break;
+
+ case 'P':
+ i64 = (gint64) va_arg(args, pid_t);
+ sign = 1;
+ break;
+
+ case 't':
+ i64 = (gint64) va_arg(args, time_t);
+ sign = 1;
+ break;
+
+ case 'z':
+ if (sign) {
+ i64 = (gint64) va_arg(args, ssize_t);
+ }
+ else {
+ ui64 = (guint64) va_arg(args, size_t);
+ }
+ break;
+
+ case 'd':
+ if (sign) {
+ i64 = (gint64) va_arg(args, gint);
+ }
+ else {
+ ui64 = (guint64) va_arg(args, guint);
+ }
+ break;
+
+ case 'l':
+ if (sign) {
+ i64 = (gint64) va_arg(args, glong);
+ }
+ else {
+ ui64 = (guint64) va_arg(args, gulong);
+ }
+ break;
+
+ case 'D':
+ if (sign) {
+ i64 = (gint64) va_arg(args, gint32);
+ }
+ else {
+ ui64 = (guint64) va_arg(args, guint32);
+ }
+ break;
+
+ case 'L':
+ if (sign) {
+ i64 = va_arg(args, gint64);
+ }
+ else {
+ ui64 = va_arg(args, guint64);
+ }
+ break;
+
+
+ case 'f':
+ f = (gdouble) va_arg(args, double);
+ slen = fpconv_dtoa(f, dtoabuf, frac_width, false);
+
+ RSPAMD_PRINTF_APPEND(dtoabuf, slen);
+
+ continue;
+
+ case 'g':
+ f = (gdouble) va_arg(args, double);
+ slen = fpconv_dtoa(f, dtoabuf, 0, true);
+ RSPAMD_PRINTF_APPEND(dtoabuf, slen);
+
+ continue;
+
+ case 'F':
+ f = (gdouble) va_arg(args, long double);
+ slen = fpconv_dtoa(f, dtoabuf, frac_width, false);
+
+ RSPAMD_PRINTF_APPEND(dtoabuf, slen);
+
+ continue;
+
+ case 'G':
+ f = (gdouble) va_arg(args, long double);
+ slen = fpconv_dtoa(f, dtoabuf, 0, true);
+ RSPAMD_PRINTF_APPEND(dtoabuf, slen);
+
+ continue;
+
+ case 'p':
+ ui64 = (uintptr_t) va_arg(args, void *);
+ hex = 2;
+ sign = 0;
+ zero = '0';
+ width = sizeof(void *) * 2;
+ break;
+
+ case 'c':
+ c = va_arg(args, gint);
+ c &= 0xffu;
+ if (G_UNLIKELY(hex)) {
+ gchar hexbuf[2];
+ hexbuf[0] = hex == 2 ? _HEX[(c >> 4u) & 0xfu] : _hex[(c >> 4u) & 0xfu];
+ hexbuf[1] = hex == 2 ? _HEX[c & 0xfu] : _hex[c & 0xfu];
+
+ RSPAMD_PRINTF_APPEND(hexbuf, 2);
+ }
+ else {
+ RSPAMD_PRINTF_APPEND(&c, 1);
+ }
+
+ continue;
+
+ case 'Z':
+ c = '\0';
+ RSPAMD_PRINTF_APPEND(&c, 1);
+
+ continue;
+
+ case 'N':
+ c = '\n';
+ RSPAMD_PRINTF_APPEND(&c, 1);
+
+ continue;
+
+ case '%':
+ c = '%';
+ RSPAMD_PRINTF_APPEND(&c, 1);
+
+ continue;
+
+ default:
+ c = *fmt;
+ RSPAMD_PRINTF_APPEND(&c, 1);
+
+ continue;
+ }
+
+ /* Print number */
+ p = numbuf;
+ last = p + sizeof(numbuf);
+ if (sign) {
+ if (i64 < 0) {
+ *p++ = '-';
+ ui64 = (guint64) -i64;
+ }
+ else {
+ ui64 = (guint64) i64;
+ }
+ }
+
+ if (!humanize) {
+ p = rspamd_sprintf_num(p, last, ui64, zero, hex, b64 + b32, width);
+ }
+ else {
+ p = rspamd_humanize_number(p, last, ui64, bytes);
+ }
+ slen = p - numbuf;
+ RSPAMD_PRINTF_APPEND(numbuf, slen);
+ }
+ else {
+ fmt++;
+ }
+ }
+
+ /* Finish buffer */
+ if (fmt > buf_start) {
+ wr = func(buf_start, fmt - buf_start, apd);
+ if (wr <= 0) {
+ goto oob;
+ }
+ written += wr;
+ }
+
+oob:
+ return written;
+}
diff --git a/src/libutil/printf.h b/src/libutil/printf.h
new file mode 100644
index 0000000..a9420b2
--- /dev/null
+++ b/src/libutil/printf.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef PRINTF_H_
+#define PRINTF_H_
+
+#include "config.h"
+#include "fstring.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * supported formats:
+ * %[0][width][x][X]O off_t
+ * %[0][width]T time_t
+ * %[0][width][u][x|X|h|H|b|B]z ssize_t/size_t
+ * %[0][width][u][x|X|h|H|b|B]d gint/guint
+ * %[0][width][u][x|X|h|H|b|B]l long
+ * %[0][width][u][x|X|h|H|b|B]D gint32/guint32
+ * %[0][width][u][x|X|h|H|b|B]L gint64/guint64
+ * %[0][width][.width]f double
+ * %[0][width][.width]F long double
+ * %[0][width][.width]g double
+ * %[0][width][.width]G long double
+ * %P pid_t
+ * %r rlim_t
+ * %p void *
+ * %V rspamd_fstring_t *
+ * %T rspamd_ftok_t
+ * %v GString *
+ * %s null-terminated string
+ * %xs hex encoded string
+ * %bs base32 encoded string
+ * %Bs base64 encoded string
+ * %*s length and string
+ * %Z '\0'
+ * %N '\n'
+ * %c gchar
+ * %t time_t
+ * %e GError *
+ * %% %
+ *
+ */
+
+/**
+ * Callback used for common printf operations
+ * @param buf buffer to append
+ * @param buflen length of the buffer
+ * @param ud opaque pointer
+ * @return number of characters written
+ */
+typedef glong (*rspamd_printf_append_func)(const gchar *buf, glong buflen,
+ gpointer ud);
+
+glong rspamd_fprintf(FILE *f, const gchar *fmt, ...);
+
+glong rspamd_printf(const gchar *fmt, ...);
+
+glong rspamd_log_fprintf(FILE *f, const gchar *fmt, ...);
+
+glong rspamd_snprintf(gchar *buf, glong max, const gchar *fmt, ...);
+
+gchar *rspamd_vsnprintf(gchar *buf, glong max, const gchar *fmt,
+ va_list args);
+
+glong rspamd_printf_gstring(GString *s, const gchar *fmt, ...);
+
+glong rspamd_vprintf_gstring(GString *s, const gchar *fmt, va_list args);
+
+glong rspamd_printf_fstring(rspamd_fstring_t **s, const gchar *fmt, ...);
+
+glong rspamd_vprintf_fstring(rspamd_fstring_t **s, const gchar *fmt, va_list args);
+
+glong rspamd_vprintf_common(rspamd_printf_append_func func,
+ gpointer apd,
+ const gchar *fmt,
+ va_list args);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PRINTF_H_ */
diff --git a/src/libutil/radix.c b/src/libutil/radix.c
new file mode 100644
index 0000000..93c728c
--- /dev/null
+++ b/src/libutil/radix.c
@@ -0,0 +1,434 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "radix.h"
+#include "rspamd.h"
+#include "mem_pool.h"
+#include "btrie.h"
+
+#define msg_err_radix(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "radix", tree->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_radix(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "radix", tree->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_radix(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "radix", tree->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_radix(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_radix_log_id, "radix", tree->pool->tag.uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(radix)
+
+struct radix_tree_compressed {
+ rspamd_mempool_t *pool;
+ struct btrie *tree;
+ const gchar *name;
+ size_t size;
+ guint duplicates;
+ gboolean own_pool;
+};
+
+uintptr_t
+radix_find_compressed(radix_compressed_t *tree, const guint8 *key, gsize keylen)
+{
+ gconstpointer ret;
+
+ g_assert(tree != NULL);
+
+ ret = btrie_lookup(tree->tree, key, keylen * NBBY);
+
+ if (ret == NULL) {
+ return RADIX_NO_VALUE;
+ }
+
+ return (uintptr_t) ret;
+}
+
+
+uintptr_t
+radix_insert_compressed(radix_compressed_t *tree,
+ guint8 *key, gsize keylen,
+ gsize masklen,
+ uintptr_t value)
+{
+ static const guint max_duplicates = 32;
+ guint keybits = keylen * NBBY;
+ uintptr_t old;
+ gchar ip_str[INET6_ADDRSTRLEN + 1];
+ int ret;
+
+ g_assert(tree != NULL);
+ g_assert(keybits >= masklen);
+
+ msg_debug_radix("%s: want insert value %p with mask %z, key: %*xs",
+ tree->name, (gpointer) value, keybits - masklen, (int) keylen, key);
+
+ old = radix_find_compressed(tree, key, keylen);
+
+ ret = btrie_add_prefix(tree->tree, key, keybits - masklen,
+ (gconstpointer) value);
+
+ if (ret != BTRIE_OKAY) {
+ tree->duplicates++;
+
+ if (tree->duplicates == max_duplicates) {
+ msg_err_radix("%s: maximum duplicates limit reached: %d, "
+ "suppress further errors",
+ tree->name, max_duplicates);
+ }
+ else if (tree->duplicates < max_duplicates) {
+ memset(ip_str, 0, sizeof(ip_str));
+
+ if (keybits == 32) {
+ msg_err_radix("%s: cannot insert %p, key: %s/%d, duplicate value",
+ tree->name,
+ (gpointer) value,
+ inet_ntop(AF_INET, key, ip_str, sizeof(ip_str) - 1),
+ (gint) (keybits - masklen));
+ }
+ else if (keybits == 128) {
+ msg_err_radix("%s: cannot insert %p, key: [%s]/%d, duplicate value",
+ tree->name,
+ (gpointer) value,
+ inet_ntop(AF_INET6, key, ip_str, sizeof(ip_str) - 1),
+ (gint) (keybits - masklen));
+ }
+ else {
+ msg_err_radix("%s: cannot insert %p with mask %z, key: %*xs, duplicate value",
+ tree->name,
+ (gpointer) value,
+ keybits - masklen,
+ (int) keylen, key);
+ }
+ }
+ }
+ else {
+ tree->size++;
+ }
+
+ return old;
+}
+
+
+radix_compressed_t *
+radix_create_compressed(const gchar *tree_name)
+{
+ radix_compressed_t *tree;
+
+ tree = g_malloc(sizeof(*tree));
+ if (tree == NULL) {
+ return NULL;
+ }
+
+ tree->pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), NULL, 0);
+ tree->size = 0;
+ tree->duplicates = 0;
+ tree->tree = btrie_init(tree->pool);
+ tree->own_pool = TRUE;
+ tree->name = tree_name;
+
+ return tree;
+}
+
+radix_compressed_t *
+radix_create_compressed_with_pool(rspamd_mempool_t *pool, const gchar *tree_name)
+{
+ radix_compressed_t *tree;
+
+ tree = rspamd_mempool_alloc(pool, sizeof(*tree));
+ tree->pool = pool;
+ tree->size = 0;
+ tree->duplicates = 0;
+ tree->tree = btrie_init(tree->pool);
+ tree->own_pool = FALSE;
+ tree->name = tree_name;
+
+ return tree;
+}
+
+void radix_destroy_compressed(radix_compressed_t *tree)
+{
+ if (tree) {
+ if (tree->own_pool) {
+ rspamd_mempool_delete(tree->pool);
+ g_free(tree);
+ }
+ }
+}
+
+uintptr_t
+radix_find_compressed_addr(radix_compressed_t *tree,
+ const rspamd_inet_addr_t *addr)
+{
+ const guchar *key;
+ guint klen = 0;
+ guchar buf[16];
+
+ if (addr == NULL) {
+ return RADIX_NO_VALUE;
+ }
+
+ key = rspamd_inet_address_get_hash_key(addr, &klen);
+
+ if (key && klen) {
+ if (klen == 4) {
+ /* Map to ipv6 */
+ memset(buf, 0, 10);
+ buf[10] = 0xffu;
+ buf[11] = 0xffu;
+ memcpy(buf + 12, key, klen);
+
+ key = buf;
+ klen = sizeof(buf);
+ }
+
+ return radix_find_compressed(tree, key, klen);
+ }
+
+ return RADIX_NO_VALUE;
+}
+
+gint rspamd_radix_add_iplist(const gchar *list, const gchar *separators,
+ radix_compressed_t *tree, gconstpointer value,
+ gboolean resolve, const gchar *tree_name)
+{
+ gchar *token, *ipnet, *err_str, **strv, **cur, *brace;
+ union {
+ struct in_addr ina;
+ struct in6_addr ina6;
+ guchar buf[16];
+ } addr_buf;
+ guint k = G_MAXINT;
+ gint af;
+ gint res = 0, r;
+ struct addrinfo hints, *ai_res, *cur_ai;
+
+ /* Split string if there are multiple items inside a single string */
+ strv = g_strsplit_set(list, separators, 0);
+ cur = strv;
+ while (*cur) {
+ af = AF_UNSPEC;
+ if (**cur == '\0') {
+ cur++;
+ continue;
+ }
+
+ /* Extract ipnet */
+ ipnet = g_strstrip(*cur);
+ token = strsep(&ipnet, "/");
+
+ if (ipnet != NULL) {
+ errno = 0;
+ /* Get mask */
+ k = strtoul(ipnet, &err_str, 10);
+ if (errno != 0) {
+ msg_warn_radix(
+ "%s: invalid netmask, error detected on symbol: %s, error: %s",
+ tree_name,
+ err_str,
+ strerror(errno));
+ k = G_MAXINT;
+ }
+ }
+
+ /* Check IP */
+ if (token[0] == '[') {
+ /* Braced IPv6 */
+ brace = strrchr(token, ']');
+
+ if (brace != NULL) {
+ token++;
+ *brace = '\0';
+
+ if (inet_pton(AF_INET6, token, &addr_buf.ina6) == 1) {
+ af = AF_INET6;
+ }
+ else {
+ msg_warn_radix("invalid IP address: %s", token);
+
+ cur++;
+ continue;
+ }
+ }
+ else {
+ msg_warn_radix("invalid IP address: %s", token);
+
+ cur++;
+ continue;
+ }
+ }
+ else {
+ if (inet_pton(AF_INET, token, &addr_buf.ina) == 1) {
+ af = AF_INET;
+ }
+ else if (inet_pton(AF_INET6, token, &addr_buf.ina6) == 1) {
+ af = AF_INET6;
+ }
+ else {
+
+ if (resolve) {
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_socktype = SOCK_STREAM; /* Type of the socket */
+ hints.ai_flags = AI_NUMERICSERV;
+ hints.ai_family = AF_UNSPEC;
+
+ if ((r = getaddrinfo(token, NULL, &hints, &ai_res)) == 0) {
+ for (cur_ai = ai_res; cur_ai != NULL;
+ cur_ai = cur_ai->ai_next) {
+
+ if (cur_ai->ai_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *) cur_ai->ai_addr;
+ if (k > 32) {
+ k = 32;
+ }
+
+ /* Convert to IPv4 mapped IPv6 */
+ memset(addr_buf.buf, 0, 10);
+ addr_buf.buf[10] = 0xffu;
+ addr_buf.buf[11] = 0xffu;
+ memcpy(addr_buf.buf + 12,
+ &sin->sin_addr, 4);
+
+ k += 96;
+
+ radix_insert_compressed(tree,
+ addr_buf.buf,
+ sizeof(addr_buf.buf),
+ 128 - k, (uintptr_t) value);
+ res++;
+ }
+ else if (cur_ai->ai_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *) cur_ai->ai_addr;
+ if (k > 128) {
+ k = 128;
+ }
+
+ memcpy(addr_buf.buf, &sin6->sin6_addr,
+ sizeof(sin6->sin6_addr));
+ radix_insert_compressed(tree,
+ addr_buf.buf,
+ sizeof(addr_buf.buf),
+ 128 - k, (uintptr_t) value);
+ res++;
+ }
+ }
+
+ freeaddrinfo(ai_res);
+ }
+ else {
+ msg_warn_radix("getaddrinfo failed for %s: %s", token,
+ gai_strerror(r));
+ }
+
+ cur++;
+ continue;
+ }
+ else {
+ msg_warn_radix("invalid IP address: %s", token);
+
+ cur++;
+ continue;
+ }
+ }
+ }
+
+ if (af == AF_INET) {
+ if (k > 32) {
+ k = 32;
+ }
+
+ /* Move to the last part of the address */
+ memmove(addr_buf.buf + 12, &addr_buf.ina, 4);
+ memset(addr_buf.buf, 0, 10);
+ addr_buf.buf[10] = 0xffu;
+ addr_buf.buf[11] = 0xffu;
+ k += 96;
+ radix_insert_compressed(tree, addr_buf.buf, sizeof(addr_buf.buf),
+ 128 - k, (uintptr_t) value);
+ res++;
+ }
+ else if (af == AF_INET6) {
+ if (k > 128) {
+ k = 128;
+ }
+
+ radix_insert_compressed(tree, addr_buf.buf, sizeof(addr_buf),
+ 128 - k, (uintptr_t) value);
+ res++;
+ }
+ cur++;
+ }
+
+ g_strfreev(strv);
+
+ return res;
+}
+
+gboolean
+radix_add_generic_iplist(const gchar *ip_list, radix_compressed_t **tree,
+ gboolean resolve, const gchar *tree_name)
+{
+ static const char fill_ptr[] = "1";
+
+ if (*tree == NULL) {
+ *tree = radix_create_compressed(tree_name);
+ }
+
+ return (rspamd_radix_add_iplist(ip_list, ",; ", *tree,
+ fill_ptr, resolve, tree_name) > 0);
+}
+
+
+gsize radix_get_size(radix_compressed_t *tree)
+{
+ if (tree != NULL) {
+ return tree->size;
+ }
+
+ return 0;
+}
+
+
+rspamd_mempool_t *
+radix_get_pool(radix_compressed_t *tree)
+{
+
+ if (tree != NULL) {
+ return tree->pool;
+ }
+
+ return NULL;
+}
+
+const gchar *
+radix_get_info(radix_compressed_t *tree)
+{
+ if (tree == NULL) {
+ return NULL;
+ }
+
+ return btrie_stats(tree->tree, tree->duplicates);
+}
diff --git a/src/libutil/radix.h b/src/libutil/radix.h
new file mode 100644
index 0000000..a85da5b
--- /dev/null
+++ b/src/libutil/radix.h
@@ -0,0 +1,123 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RADIX_H
+#define RADIX_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "util.h"
+
+#define RADIX_NO_VALUE (uintptr_t) - 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct radix_tree_compressed radix_compressed_t;
+
+/**
+ * Insert new key to the radix trie
+ * @param tree radix trie
+ * @param key key to insert (bitstring)
+ * @param keylen length of the key (in bytes)
+ * @param masklen length of mask that should be applied to the key (in bits)
+ * @param value opaque value pointer
+ * @return previous value of the key or `RADIX_NO_VALUE`
+ */
+uintptr_t
+radix_insert_compressed(radix_compressed_t *tree,
+ guint8 *key, gsize keylen,
+ gsize masklen,
+ uintptr_t value);
+
+/**
+ * Find a key in a radix trie
+ * @param tree radix trie
+ * @param key key to find (bitstring)
+ * @param keylen length of a key
+ * @return opaque pointer or `RADIX_NO_VALUE` if no value has been found
+ */
+uintptr_t radix_find_compressed(radix_compressed_t *tree, const guint8 *key,
+ gsize keylen);
+
+/**
+ * Find specified address in tree (works for IPv4 or IPv6 addresses)
+ * @param tree
+ * @param addr
+ * @return
+ */
+uintptr_t radix_find_compressed_addr(radix_compressed_t *tree,
+ const rspamd_inet_addr_t *addr);
+
+/**
+ * Destroy the complete radix trie
+ * @param tree
+ */
+void radix_destroy_compressed(radix_compressed_t *tree);
+
+/**
+ * Create new radix trie
+ * @return
+ */
+radix_compressed_t *radix_create_compressed(const gchar *tree_name);
+
+radix_compressed_t *radix_create_compressed_with_pool(rspamd_mempool_t *pool, const gchar *tree_name);
+
+/**
+ * Insert list of ip addresses and masks to the radix tree
+ * @param list string line of addresses
+ * @param separators string of characters used as separators
+ * @param tree target tree
+ * @return number of elements inserted
+ */
+gint rspamd_radix_add_iplist(const gchar *list, const gchar *separators,
+ radix_compressed_t *tree, gconstpointer value,
+ gboolean resolve, const gchar *tree_name);
+
+/**
+ * Generic version of @see rspamd_radix_add_iplist. This function creates tree
+ * if `tree` is NULL.
+ */
+gboolean
+radix_add_generic_iplist(const gchar *ip_list,
+ radix_compressed_t **tree,
+ gboolean resolve,
+ const gchar *tree_name);
+
+/**
+ * Returns number of elements in the tree
+ * @param tree
+ * @return
+ */
+gsize radix_get_size(radix_compressed_t *tree);
+
+/**
+ * Return string that describes this radix tree (memory, nodes, compression etc)
+ * @param tree
+ * @return constant string
+ */
+const gchar *radix_get_info(radix_compressed_t *tree);
+
+/**
+ * Returns memory pool associated with the radix tree
+ */
+rspamd_mempool_t *radix_get_pool(radix_compressed_t *tree);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libutil/ref.h b/src/libutil/ref.h
new file mode 100644
index 0000000..2a3fd8d
--- /dev/null
+++ b/src/libutil/ref.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef REF_H_
+#define REF_H_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+/**
+ * @file ref.h
+ * A set of macros to handle refcounts
+ */
+
+typedef void (*ref_dtor_cb_t)(void *data);
+
+typedef struct ref_entry_s {
+ unsigned int refcount;
+ ref_dtor_cb_t dtor;
+} ref_entry_t;
+
+#define REF_INIT(obj, dtor_cb) \
+ do { \
+ if ((obj) != NULL) { \
+ (obj)->ref.refcount = 0; \
+ (obj)->ref.dtor = (ref_dtor_cb_t) (dtor_cb); \
+ } \
+ } while (0)
+
+#define REF_INIT_RETAIN(obj, dtor_cb) \
+ do { \
+ if ((obj) != NULL) { \
+ (obj)->ref.refcount = 1; \
+ (obj)->ref.dtor = (ref_dtor_cb_t) (dtor_cb); \
+ } \
+ } while (0)
+
+#ifdef HAVE_ATOMIC_BUILTINS
+#define REF_RETAIN_ATOMIC(obj) \
+ do { \
+ if ((obj) != NULL) { \
+ __atomic_add_fetch(&(obj)->ref.refcount, 1, __ATOMIC_RELEASE); \
+ } \
+ } while (0)
+
+#define REF_RELEASE_ATOMIC(obj) \
+ do { \
+ if ((obj) != NULL) { \
+ unsigned int _rc_priv = __atomic_sub_fetch(&(obj)->ref.refcount, 1, __ATOMIC_ACQ_REL); \
+ if (_rc_priv == 0 && (obj)->ref.dtor) { \
+ (obj)->ref.dtor(obj); \
+ } \
+ } \
+ } while (0)
+
+#else
+#define REF_RETAIN_ATOMIC REF_RETAIN
+#define REF_RELEASE_ATOMIC REF_RELEASE_ATOMIC
+#endif
+
+#define REF_RETAIN(obj) \
+ do { \
+ if ((obj) != NULL) { \
+ (obj)->ref.refcount++; \
+ } \
+ } while (0)
+
+#define REF_RELEASE(obj) \
+ do { \
+ if ((obj) != NULL) { \
+ if (--(obj)->ref.refcount == 0 && (obj)->ref.dtor) { \
+ (obj)->ref.dtor(obj); \
+ } \
+ } \
+ } while (0)
+
+#endif /* REF_H_ */
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
new file mode 100644
index 0000000..9f143ac
--- /dev/null
+++ b/src/libutil/regexp.c
@@ -0,0 +1,1359 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "regexp.h"
+#include "cryptobox.h"
+#include "ref.h"
+#include "util.h"
+#include "rspamd.h"
+#include "contrib/fastutf8/fastutf8.h"
+
+#ifndef WITH_PCRE2
+/* Normal pcre path */
+#include <pcre.h>
+#define PCRE_T pcre
+#define PCRE_EXTRA_T pcre_extra
+#define PCRE_JIT_T pcre_jit_stack
+#define PCRE_FREE pcre_free
+#define PCRE_JIT_STACK_FREE pcre_jit_stack_free
+#define PCRE_FLAG(x) G_PASTE(PCRE_, x)
+#else
+/* PCRE 2 path */
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#define PCRE2_CODE_UNIT_WIDTH 8
+#endif
+
+#include <pcre2.h>
+#define PCRE_T pcre2_code
+#define PCRE_JIT_T pcre2_jit_stack
+#define PCRE_FREE pcre2_code_free
+#define PCRE_JIT_STACK_FREE pcre2_jit_stack_free
+
+#define PCRE_FLAG(x) G_PASTE(PCRE2_, x)
+#endif
+
+typedef guchar regexp_id_t[rspamd_cryptobox_HASHBYTES];
+
+#undef DISABLE_JIT_FAST
+
+struct rspamd_regexp_s {
+ gdouble exec_time;
+ gchar *pattern;
+ PCRE_T *re;
+ PCRE_T *raw_re;
+#ifndef WITH_PCRE2
+ PCRE_EXTRA_T *extra;
+ PCRE_EXTRA_T *raw_extra;
+#else
+ pcre2_match_context *mcontext;
+ pcre2_match_context *raw_mcontext;
+#endif
+ regexp_id_t id;
+ ref_entry_t ref;
+ gpointer ud;
+ gpointer re_class;
+ guint64 cache_id;
+ gsize match_limit;
+ guint max_hits;
+ gint flags;
+ gint pcre_flags;
+ gint ncaptures;
+};
+
+struct rspamd_regexp_cache {
+ GHashTable *tbl;
+#ifdef HAVE_PCRE_JIT
+ PCRE_JIT_T *jstack;
+#endif
+};
+
+static struct rspamd_regexp_cache *global_re_cache = NULL;
+static gboolean can_jit = FALSE;
+static gboolean check_jit = TRUE;
+static const int max_re_cache_size = 8192;
+
+#ifdef WITH_PCRE2
+static pcre2_compile_context *pcre2_ctx = NULL;
+#endif
+
+static GQuark
+rspamd_regexp_quark(void)
+{
+ return g_quark_from_static_string("rspamd-regexp");
+}
+
+static void
+rspamd_regexp_generate_id(const gchar *pattern, const gchar *flags,
+ regexp_id_t out)
+{
+ rspamd_cryptobox_hash_state_t st;
+
+ rspamd_cryptobox_hash_init(&st, NULL, 0);
+
+ if (flags) {
+ rspamd_cryptobox_hash_update(&st, flags, strlen(flags));
+ }
+
+ rspamd_cryptobox_hash_update(&st, pattern, strlen(pattern));
+ rspamd_cryptobox_hash_final(&st, out);
+}
+
+static void
+rspamd_regexp_dtor(rspamd_regexp_t *re)
+{
+ if (re) {
+ if (re->raw_re && re->raw_re != re->re) {
+#ifndef WITH_PCRE2
+ /* PCRE1 version */
+#ifdef HAVE_PCRE_JIT
+ if (re->raw_extra) {
+ pcre_free_study(re->raw_extra);
+ }
+#endif
+#else
+ /* PCRE 2 version */
+ if (re->raw_mcontext) {
+ pcre2_match_context_free(re->raw_mcontext);
+ }
+#endif
+ PCRE_FREE(re->raw_re);
+ }
+
+ if (re->re) {
+#ifndef WITH_PCRE2
+ /* PCRE1 version */
+#ifdef HAVE_PCRE_JIT
+ if (re->extra) {
+ pcre_free_study(re->extra);
+ }
+#endif
+#else
+ /* PCRE 2 version */
+ if (re->mcontext) {
+ pcre2_match_context_free(re->mcontext);
+ }
+#endif
+ PCRE_FREE(re->re);
+ }
+
+ if (re->pattern) {
+ g_free(re->pattern);
+ }
+
+ g_free(re);
+ }
+}
+
+static void
+rspamd_regexp_post_process(rspamd_regexp_t *r)
+{
+ if (global_re_cache == NULL) {
+ rspamd_regexp_library_init(NULL);
+ }
+#if defined(WITH_PCRE2)
+ static const guint max_recursion_depth = 100000, max_backtrack = 1000000;
+
+ /* Create match context */
+ r->mcontext = pcre2_match_context_create(NULL);
+ g_assert(r->mcontext != NULL);
+ pcre2_set_recursion_limit(r->mcontext, max_recursion_depth);
+ pcre2_set_match_limit(r->mcontext, max_backtrack);
+
+ if (r->raw_re && r->re != r->raw_re) {
+ r->raw_mcontext = pcre2_match_context_create(NULL);
+ g_assert(r->raw_mcontext != NULL);
+ pcre2_set_recursion_limit(r->raw_mcontext, max_recursion_depth);
+ pcre2_set_match_limit(r->raw_mcontext, max_backtrack);
+ }
+ else if (r->raw_re) {
+ r->raw_mcontext = r->mcontext;
+ }
+ else {
+ r->raw_mcontext = NULL;
+ }
+
+#ifdef HAVE_PCRE_JIT
+ guint jit_flags = can_jit ? PCRE2_JIT_COMPLETE : 0;
+ gsize jsz;
+ PCRE2_UCHAR errstr[128];
+ int errcode;
+
+ if (can_jit) {
+ if ((errcode = pcre2_jit_compile(r->re, jit_flags)) < 0) {
+ pcre2_get_error_message(errcode, errstr, G_N_ELEMENTS(errstr));
+ msg_err("jit compilation is not supported: %s; pattern: \"%s\"", errstr, r->pattern);
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+ else {
+ if (!(pcre2_pattern_info(r->re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) {
+ msg_err("cannot exec pcre2_pattern_info(PCRE2_INFO_JITSIZE) on \"%s\"", r->pattern);
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+ }
+ }
+ else {
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+
+ if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
+ pcre2_jit_stack_assign(r->mcontext, NULL, global_re_cache->jstack);
+ }
+
+ if (r->raw_re && r->re != r->raw_re && !(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
+ if ((errcode = pcre2_jit_compile(r->raw_re, jit_flags)) < 0) {
+ pcre2_get_error_message(errcode, errstr, G_N_ELEMENTS(errstr));
+ msg_debug("jit compilation is not supported for raw regexp: %s; pattern: \"%s\"", errstr, r->pattern);
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+ else {
+ if (!(pcre2_pattern_info(r->raw_re, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0)) {
+ msg_err("cannot exec pcre2_pattern_info(PCRE2_INFO_JITSIZE) on \"%s\"", r->pattern);
+ }
+ else if (!(r->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT)) {
+ g_assert(r->raw_mcontext != NULL);
+ pcre2_jit_stack_assign(r->raw_mcontext, NULL, global_re_cache->jstack);
+ }
+ }
+ }
+#endif
+
+#else
+ const gchar *err_str = "unknown";
+ gboolean try_jit = TRUE, try_raw_jit = TRUE;
+ gint study_flags = 0;
+
+#if defined(HAVE_PCRE_JIT)
+ study_flags |= PCRE_STUDY_JIT_COMPILE;
+#endif
+
+ /* Pcre 1 needs study */
+ if (r->re) {
+ r->extra = pcre_study(r->re, study_flags, &err_str);
+
+ if (r->extra == NULL) {
+ msg_debug("cannot optimize regexp pattern: '%s': %s",
+ r->pattern, err_str);
+ try_jit = FALSE;
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+ }
+ else {
+ g_assert_not_reached();
+ }
+
+ if (r->raw_re && r->raw_re != r->re) {
+ r->raw_extra = pcre_study(r->re, study_flags, &err_str);
+ }
+ else if (r->raw_re == r->re) {
+ r->raw_extra = r->extra;
+ }
+
+ if (r->raw_extra == NULL) {
+
+ msg_debug("cannot optimize raw regexp pattern: '%s': %s",
+ r->pattern, err_str);
+ try_raw_jit = FALSE;
+ }
+ /* JIT path */
+ if (try_jit) {
+#ifdef HAVE_PCRE_JIT
+ gint jit, n;
+
+ if (can_jit) {
+ jit = 0;
+ n = pcre_fullinfo(r->re, r->extra,
+ PCRE_INFO_JIT, &jit);
+
+ if (n != 0 || jit != 1) {
+ msg_debug("jit compilation of %s is not supported", r->pattern);
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+ else {
+ pcre_assign_jit_stack(r->extra, NULL, global_re_cache->jstack);
+ }
+ }
+#endif
+ }
+ else {
+ msg_debug("cannot optimize regexp pattern: '%s': %s",
+ r->pattern, err_str);
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+
+ if (try_raw_jit) {
+#ifdef HAVE_PCRE_JIT
+ gint jit, n;
+
+ if (can_jit) {
+
+ if (r->raw_re != r->re) {
+ jit = 0;
+ n = pcre_fullinfo(r->raw_re, r->raw_extra,
+ PCRE_INFO_JIT, &jit);
+
+ if (n != 0 || jit != 1) {
+ msg_debug("jit compilation of %s is not supported", r->pattern);
+ r->flags |= RSPAMD_REGEXP_FLAG_DISABLE_JIT;
+ }
+ else {
+ pcre_assign_jit_stack(r->raw_extra, NULL,
+ global_re_cache->jstack);
+ }
+ }
+ }
+#endif
+ }
+#endif /* WITH_PCRE2 */
+}
+
+rspamd_regexp_t *
+rspamd_regexp_new_len(const gchar *pattern, gsize len, const gchar *flags,
+ GError **err)
+{
+ const gchar *start = pattern, *end = start + len, *flags_str = NULL, *flags_end = NULL;
+ gchar *err_str;
+ rspamd_regexp_t *res;
+ gboolean explicit_utf = FALSE;
+ PCRE_T *r;
+ gchar sep = 0, *real_pattern;
+#ifndef WITH_PCRE2
+ gint err_off;
+#else
+ gsize err_off;
+#endif
+ gint regexp_flags = 0, rspamd_flags = 0, err_code, ncaptures;
+ gboolean strict_flags = FALSE;
+
+ rspamd_regexp_library_init(NULL);
+
+ if (pattern == NULL) {
+ g_set_error(err, rspamd_regexp_quark(), EINVAL,
+ "cannot create regexp from a NULL pattern");
+ return NULL;
+ }
+
+ if (flags == NULL && start + 1 < end) {
+ /* We need to parse pattern and detect flags set */
+ if (*start == '/') {
+ sep = '/';
+ }
+ else if (*start == 'm' && start[1] != '\\' && g_ascii_ispunct(start[1])) {
+ start++;
+ sep = *start;
+
+ /* Paired braces */
+ if (sep == '{') {
+ sep = '}';
+ }
+
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH;
+ }
+ if (sep == 0) {
+ /* We have no flags, no separators and just use all line as expr */
+ start = pattern;
+ rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH;
+ }
+ else {
+ gchar *last_sep = rspamd_memrchr(pattern, sep, len);
+
+ if (last_sep == NULL || last_sep <= start) {
+ g_set_error(err, rspamd_regexp_quark(), EINVAL,
+ "pattern is not enclosed with %c: %s",
+ sep, pattern);
+ return NULL;
+ }
+ flags_str = last_sep + 1;
+ flags_end = end;
+ end = last_sep;
+ start++;
+ }
+ }
+ else {
+ /* Strictly check all flags */
+ strict_flags = TRUE;
+ start = pattern;
+ flags_str = flags;
+ if (flags) {
+ flags_end = flags + strlen(flags);
+ }
+ }
+
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
+
+#ifndef WITH_PCRE2
+ regexp_flags &= ~PCRE_FLAG(UTF8);
+ regexp_flags |= PCRE_FLAG(NEWLINE_ANYCRLF);
+#else
+ regexp_flags &= ~PCRE_FLAG(UTF);
+#endif
+
+ if (flags_str != NULL) {
+ while (flags_str < flags_end) {
+ switch (*flags_str) {
+ case 'i':
+ regexp_flags |= PCRE_FLAG(CASELESS);
+ break;
+ case 'm':
+ regexp_flags |= PCRE_FLAG(MULTILINE);
+ break;
+ case 's':
+ regexp_flags |= PCRE_FLAG(DOTALL);
+ break;
+ case 'x':
+ regexp_flags |= PCRE_FLAG(EXTENDED);
+ break;
+ case 'u':
+ rspamd_flags &= ~RSPAMD_REGEXP_FLAG_RAW;
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_UTF;
+#ifndef WITH_PCRE2
+ regexp_flags |= PCRE_FLAG(UTF8);
+#else
+ regexp_flags |= PCRE_FLAG(UTF);
+#endif
+ explicit_utf = TRUE;
+ break;
+ case 'O':
+ /* We optimize all regexps by default */
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT;
+ break;
+ case 'L':
+ /* SOM_LEFTMOST hyperscan flag */
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_LEFTMOST;
+ break;
+ case 'r':
+ rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
+ rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF;
+#ifndef WITH_PCRE2
+ regexp_flags &= ~PCRE_FLAG(UTF8);
+#else
+ regexp_flags &= ~PCRE_FLAG(UTF);
+#endif
+ break;
+ default:
+ if (strict_flags) {
+ g_set_error(err, rspamd_regexp_quark(), EINVAL,
+ "invalid regexp flag: %c in pattern %s",
+ *flags_str, pattern);
+ return NULL;
+ }
+ msg_warn("invalid flag '%c' in pattern %s", *flags_str, pattern);
+ goto fin;
+ break;
+ }
+ flags_str++;
+ }
+ }
+fin:
+
+ real_pattern = g_malloc(end - start + 1);
+ rspamd_strlcpy(real_pattern, start, end - start + 1);
+
+#ifndef WITH_PCRE2
+ r = pcre_compile(real_pattern, regexp_flags,
+ (const char **) &err_str, &err_off, NULL);
+ (void) err_code;
+#else
+ r = pcre2_compile(real_pattern, PCRE2_ZERO_TERMINATED,
+ regexp_flags,
+ &err_code, &err_off, pcre2_ctx);
+
+ if (r == NULL) {
+ err_str = g_alloca(1024);
+ memset(err_str, 0, 1024);
+ pcre2_get_error_message(err_code, err_str, 1024);
+ }
+#endif
+
+ if (r == NULL) {
+ g_set_error(err, rspamd_regexp_quark(), EINVAL,
+ "regexp parsing error: '%s' at position %d; pattern: %s",
+ err_str, (gint) err_off, real_pattern);
+ g_free(real_pattern);
+
+ return NULL;
+ }
+
+ /* Now allocate the target structure */
+ res = g_malloc0(sizeof(*res));
+ REF_INIT_RETAIN(res, rspamd_regexp_dtor);
+ res->flags = rspamd_flags;
+ res->pattern = real_pattern;
+ res->cache_id = RSPAMD_INVALID_ID;
+ res->pcre_flags = regexp_flags;
+ res->max_hits = 0;
+ res->re = r;
+
+ if (rspamd_flags & RSPAMD_REGEXP_FLAG_RAW) {
+ res->raw_re = r;
+ }
+ else if (!explicit_utf) {
+#ifndef WITH_PCRE2
+ res->raw_re = pcre_compile(real_pattern, regexp_flags & ~PCRE_FLAG(UTF8),
+ (const char **) &err_str, &err_off, NULL);
+ (void) err_code;
+#else
+ res->raw_re = pcre2_compile(real_pattern, PCRE2_ZERO_TERMINATED,
+ regexp_flags & ~PCRE_FLAG(UTF),
+ &err_code, &err_off, pcre2_ctx);
+ if (res->raw_re == NULL) {
+ err_str = g_alloca(1024);
+ memset(err_str, 0, 1024);
+ pcre2_get_error_message(err_code, err_str, 1024);
+ }
+#endif
+ if (res->raw_re == NULL) {
+ msg_warn("raw regexp parsing error: '%s': '%s' at position %d",
+ err_str, real_pattern, (gint) err_off);
+ }
+ }
+
+ rspamd_regexp_post_process(res);
+ rspamd_regexp_generate_id(pattern, flags, res->id);
+
+#ifndef WITH_PCRE2
+ /* Check number of captures */
+ if (pcre_fullinfo(res->raw_re, res->extra, PCRE_INFO_CAPTURECOUNT,
+ &ncaptures) == 0) {
+ res->ncaptures = ncaptures;
+ }
+#else
+ /* Check number of captures */
+ if (pcre2_pattern_info(res->raw_re, PCRE2_INFO_CAPTURECOUNT,
+ &ncaptures) == 0) {
+ res->ncaptures = ncaptures;
+ }
+#endif
+
+ return res;
+}
+
+rspamd_regexp_t *
+rspamd_regexp_new(const gchar *pattern, const gchar *flags,
+ GError **err)
+{
+ return rspamd_regexp_new_len(pattern, strlen(pattern), flags, err);
+}
+
+#ifndef WITH_PCRE2
+gboolean
+rspamd_regexp_search(const rspamd_regexp_t *re, const gchar *text, gsize len,
+ const gchar **start, const gchar **end, gboolean raw,
+ GArray *captures)
+{
+ pcre *r;
+ pcre_extra *ext;
+#if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST)
+ pcre_jit_stack *st = NULL;
+#endif
+ const gchar *mt;
+ gsize remain = 0;
+ gint rc, match_flags = 0, *ovec, ncaptures, i;
+ const int junk = 0xdeadbabe;
+
+ g_assert(re != NULL);
+ g_assert(text != NULL);
+
+ if (len == 0) {
+ len = strlen(text);
+ }
+
+ if (re->match_limit > 0 && len > re->match_limit) {
+ len = re->match_limit;
+ }
+
+ if (end != NULL && *end != NULL) {
+ /* Incremental search */
+ mt = (*end);
+
+ if ((gint) len > (mt - text)) {
+ remain = len - (mt - text);
+ }
+ }
+ else {
+ mt = text;
+ remain = len;
+ }
+
+ if (remain == 0) {
+ return FALSE;
+ }
+
+ match_flags = PCRE_NEWLINE_ANYCRLF;
+
+ if ((re->flags & RSPAMD_REGEXP_FLAG_RAW) || raw) {
+ r = re->raw_re;
+ ext = re->raw_extra;
+#if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST)
+ st = global_re_cache->jstack;
+#endif
+ }
+ else {
+ r = re->re;
+ ext = re->extra;
+#if defined(HAVE_PCRE_JIT) && defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST)
+ if (rspamd_fast_utf8_validate(mt, remain) == 0) {
+ st = global_re_cache->jstack;
+ }
+ else {
+ msg_err("bad utf8 input for JIT re '%s'", re->pattern);
+ return FALSE;
+ }
+#endif
+ }
+
+ if (r == NULL) {
+ /* Invalid regexp type for the specified input */
+ return FALSE;
+ }
+
+ ncaptures = (re->ncaptures + 1) * 3;
+ ovec = g_alloca(sizeof(gint) * ncaptures);
+
+
+ for (i = 0; i < ncaptures; i++) {
+ ovec[i] = junk;
+ }
+
+ if (!(re->flags & RSPAMD_REGEXP_FLAG_NOOPT)) {
+#ifdef HAVE_PCRE_JIT
+#if defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST)
+ /* XXX: flags seems to be broken with jit fast path */
+ g_assert(remain > 0);
+ g_assert(mt != NULL);
+
+ if (st != NULL && !(re->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT) && can_jit) {
+ rc = pcre_jit_exec(r, ext, mt, remain, 0, 0, ovec,
+ ncaptures, st);
+ }
+ else {
+ rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec,
+ ncaptures);
+ }
+#else
+ rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec,
+ ncaptures);
+#endif
+#else
+ rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec,
+ ncaptures);
+#endif
+ }
+ else {
+ rc = pcre_exec(r, ext, mt, remain, 0, match_flags, ovec,
+ ncaptures);
+ }
+
+ if (rc >= 0) {
+ if (rc > 0) {
+ if (start) {
+ *start = mt + ovec[0];
+ }
+ if (end) {
+ *end = mt + ovec[1];
+ }
+ }
+ else {
+ if (start) {
+ *start = mt;
+ }
+ if (end) {
+ *end = mt + remain;
+ }
+ }
+
+ if (captures != NULL && rc >= 1) {
+ struct rspamd_re_capture *elt;
+
+ g_assert(g_array_get_element_size(captures) ==
+ sizeof(struct rspamd_re_capture));
+ g_array_set_size(captures, rc);
+
+ for (i = 0; i < rc; i++) {
+ if (ovec[i * 2] != junk && ovec[i * 2] >= 0) {
+ elt = &g_array_index(captures, struct rspamd_re_capture, i);
+ elt->p = mt + ovec[i * 2];
+ elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
+ }
+ else {
+ /* Runtime match returned fewer captures than expected */
+ g_array_set_size(captures, i);
+ break;
+ }
+ }
+ }
+
+ if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) {
+ /* We also ensure that the match is full */
+ if (ovec[0] != 0 || (guint) ovec[1] < len) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+#else
+/* PCRE 2 version */
+gboolean
+rspamd_regexp_search(const rspamd_regexp_t *re, const gchar *text, gsize len,
+ const gchar **start, const gchar **end, gboolean raw,
+ GArray *captures)
+{
+ pcre2_match_data *match_data;
+ pcre2_match_context *mcontext;
+ PCRE_T *r;
+ const gchar *mt;
+ PCRE2_SIZE remain = 0, *ovec;
+ const PCRE2_SIZE junk = 0xdeadbabeeeeeeeeULL;
+ gint rc, match_flags, novec, i;
+ gboolean ret = FALSE;
+
+ g_assert(re != NULL);
+ g_assert(text != NULL);
+
+ if (len == 0) {
+ len = strlen(text);
+ }
+
+ if (re->match_limit > 0 && len > re->match_limit) {
+ len = re->match_limit;
+ }
+
+ if (end != NULL && *end != NULL) {
+ /* Incremental search */
+ mt = (*end);
+
+ if ((gint) len > (mt - text)) {
+ remain = len - (mt - text);
+ }
+ }
+ else {
+ mt = text;
+ remain = len;
+ }
+
+ if (remain == 0) {
+ return FALSE;
+ }
+
+ match_flags = 0;
+
+ if (raw || re->re == re->raw_re) {
+ r = re->raw_re;
+ mcontext = re->raw_mcontext;
+ }
+ else {
+ r = re->re;
+ mcontext = re->mcontext;
+ }
+
+ if (r == NULL) {
+ /* Invalid regexp type for the specified input */
+ return FALSE;
+ }
+
+ match_data = pcre2_match_data_create(re->ncaptures + 1, NULL);
+ novec = pcre2_get_ovector_count(match_data);
+ ovec = pcre2_get_ovector_pointer(match_data);
+
+ /* Fill ovec with crap, so we can stop if actual matches is less than announced */
+ for (i = 0; i < novec; i++) {
+ ovec[i * 2] = junk;
+ ovec[i * 2 + 1] = junk;
+ }
+
+#ifdef HAVE_PCRE_JIT
+ if (!(re->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT) && can_jit) {
+ if (re->re != re->raw_re && rspamd_fast_utf8_validate(mt, remain) != 0) {
+ msg_err("bad utf8 input for JIT re '%s'", re->pattern);
+ return FALSE;
+ }
+
+ rc = pcre2_jit_match(r, mt, remain, 0, match_flags, match_data,
+ mcontext);
+ }
+ else {
+ rc = pcre2_match(r, mt, remain, 0, match_flags, match_data,
+ mcontext);
+ }
+#else
+ rc = pcre2_match(r, mt, remain, 0, match_flags, match_data,
+ mcontext);
+#endif
+
+ if (rc >= 0) {
+ if (novec > 0) {
+ if (start) {
+ *start = mt + ovec[0];
+ }
+ if (end) {
+ *end = mt + ovec[1];
+ }
+ }
+ else {
+ if (start) {
+ *start = mt;
+ }
+ if (end) {
+ *end = mt + remain;
+ }
+ }
+
+ if (captures != NULL && novec >= 1) {
+ struct rspamd_re_capture *elt;
+
+ g_assert(g_array_get_element_size(captures) ==
+ sizeof(struct rspamd_re_capture));
+ g_array_set_size(captures, novec);
+
+ for (i = 0; i < novec; i++) {
+ if (ovec[i * 2] != junk && ovec[i * 2] != PCRE2_UNSET) {
+ elt = &g_array_index(captures, struct rspamd_re_capture, i);
+ elt->p = mt + ovec[i * 2];
+ elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
+ }
+ else {
+ g_array_set_size(captures, i);
+ break;
+ }
+ }
+ }
+
+ ret = TRUE;
+
+ if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) {
+ /* We also ensure that the match is full */
+ if (ovec[0] != 0 || (guint) ovec[1] < len) {
+ ret = FALSE;
+ }
+ }
+ }
+
+ pcre2_match_data_free(match_data);
+
+ return ret;
+}
+#endif
+
+const char *
+rspamd_regexp_get_pattern(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->pattern;
+}
+
+guint rspamd_regexp_set_flags(rspamd_regexp_t *re, guint new_flags)
+{
+ guint old_flags;
+
+ g_assert(re != NULL);
+ old_flags = re->flags;
+ re->flags = new_flags;
+
+ return old_flags;
+}
+
+guint rspamd_regexp_get_flags(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->flags;
+}
+
+guint rspamd_regexp_get_pcre_flags(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->pcre_flags;
+}
+
+guint rspamd_regexp_get_maxhits(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->max_hits;
+}
+
+guint rspamd_regexp_set_maxhits(rspamd_regexp_t *re, guint new_maxhits)
+{
+ guint old_hits;
+
+ g_assert(re != NULL);
+ old_hits = re->max_hits;
+ re->max_hits = new_maxhits;
+
+ return old_hits;
+}
+
+guint64
+rspamd_regexp_get_cache_id(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->cache_id;
+}
+
+guint64
+rspamd_regexp_set_cache_id(rspamd_regexp_t *re, guint64 id)
+{
+ guint64 old;
+
+ g_assert(re != NULL);
+ old = re->cache_id;
+ re->cache_id = id;
+
+ return old;
+}
+
+gsize rspamd_regexp_get_match_limit(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->match_limit;
+}
+
+gsize rspamd_regexp_set_match_limit(rspamd_regexp_t *re, gsize lim)
+{
+ gsize old;
+
+ g_assert(re != NULL);
+ old = re->match_limit;
+ re->match_limit = lim;
+
+ return old;
+}
+
+gboolean
+rspamd_regexp_match(const rspamd_regexp_t *re, const gchar *text, gsize len,
+ gboolean raw)
+{
+ const gchar *start = NULL, *end = NULL;
+
+ g_assert(re != NULL);
+ g_assert(text != NULL);
+
+ if (len == 0) {
+ len = strlen(text);
+ }
+
+ if (rspamd_regexp_search(re, text, len, &start, &end, raw, NULL)) {
+ if (start == text && end == text + len) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void rspamd_regexp_unref(rspamd_regexp_t *re)
+{
+ REF_RELEASE(re);
+}
+
+rspamd_regexp_t *
+rspamd_regexp_ref(rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ REF_RETAIN(re);
+
+ return re;
+}
+
+void rspamd_regexp_set_ud(rspamd_regexp_t *re, gpointer ud)
+{
+ g_assert(re != NULL);
+
+ re->ud = ud;
+}
+
+gpointer
+rspamd_regexp_get_ud(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->ud;
+}
+
+gboolean
+rspamd_regexp_equal(gconstpointer a, gconstpointer b)
+{
+ const guchar *ia = a, *ib = b;
+
+ return (memcmp(ia, ib, sizeof(regexp_id_t)) == 0);
+}
+
+guint32
+rspamd_regexp_hash(gconstpointer a)
+{
+ const guchar *ia = a;
+ guint32 res;
+
+ memcpy(&res, ia, sizeof(res));
+
+ return res;
+}
+
+gboolean
+rspamd_regexp_cmp(gconstpointer a, gconstpointer b)
+{
+ const guchar *ia = a, *ib = b;
+
+ return memcmp(ia, ib, sizeof(regexp_id_t));
+}
+
+struct rspamd_regexp_cache *
+rspamd_regexp_cache_new(void)
+{
+ struct rspamd_regexp_cache *ncache;
+
+ ncache = g_malloc0(sizeof(*ncache));
+ ncache->tbl = g_hash_table_new_full(rspamd_regexp_hash, rspamd_regexp_equal,
+ NULL, (GDestroyNotify) rspamd_regexp_unref);
+#ifdef HAVE_PCRE_JIT
+#ifdef WITH_PCRE2
+ ncache->jstack = pcre2_jit_stack_create(32 * 1024, 1024 * 1024, NULL);
+#else
+ ncache->jstack = pcre_jit_stack_alloc(32 * 1024, 1024 * 1024);
+#endif
+#endif
+ return ncache;
+}
+
+
+rspamd_regexp_t *
+rspamd_regexp_cache_query(struct rspamd_regexp_cache *cache,
+ const gchar *pattern,
+ const gchar *flags)
+{
+ rspamd_regexp_t *res = NULL;
+ regexp_id_t id;
+
+ if (cache == NULL) {
+ rspamd_regexp_library_init(NULL);
+ cache = global_re_cache;
+ }
+
+ g_assert(cache != NULL);
+ rspamd_regexp_generate_id(pattern, flags, id);
+
+ res = g_hash_table_lookup(cache->tbl, id);
+
+ return res;
+}
+
+
+rspamd_regexp_t *
+rspamd_regexp_cache_create(struct rspamd_regexp_cache *cache,
+ const gchar *pattern,
+ const gchar *flags, GError **err)
+{
+ rspamd_regexp_t *res;
+
+ if (cache == NULL) {
+ rspamd_regexp_library_init(NULL);
+ cache = global_re_cache;
+ }
+
+ g_assert(cache != NULL);
+ res = rspamd_regexp_cache_query(cache, pattern, flags);
+
+ if (res != NULL) {
+ return res;
+ }
+
+ res = rspamd_regexp_new(pattern, flags, err);
+
+ if (res) {
+ /* REF_RETAIN (res); */
+ if (g_hash_table_size(cache->tbl) < max_re_cache_size) {
+ g_hash_table_insert(cache->tbl, res->id, res);
+ }
+ else {
+ msg_warn("cannot insert regexp to the cache: maximum size is reached (%d expressions); "
+ "it might be cached regexp misuse; regexp pattern: %s",
+ max_re_cache_size, pattern);
+ }
+ }
+
+ return res;
+}
+
+gboolean
+rspamd_regexp_cache_remove(struct rspamd_regexp_cache *cache,
+ rspamd_regexp_t *re)
+{
+ if (cache == NULL) {
+ cache = global_re_cache;
+ }
+
+ g_assert(cache != NULL);
+ g_assert(re != NULL);
+
+ return g_hash_table_remove(cache->tbl, re->id);
+}
+
+void rspamd_regexp_cache_destroy(struct rspamd_regexp_cache *cache)
+{
+ if (cache != NULL) {
+ g_hash_table_destroy(cache->tbl);
+#ifdef HAVE_PCRE_JIT
+#ifdef WITH_PCRE2
+ if (cache->jstack) {
+ pcre2_jit_stack_free(cache->jstack);
+ }
+#else
+ if (cache->jstack) {
+ pcre_jit_stack_free(cache->jstack);
+ }
+#endif
+#endif
+ g_free(cache);
+ }
+}
+
+RSPAMD_CONSTRUCTOR(rspamd_re_static_pool_ctor)
+{
+ global_re_cache = rspamd_regexp_cache_new();
+#ifdef WITH_PCRE2
+ pcre2_ctx = pcre2_compile_context_create(NULL);
+ pcre2_set_newline(pcre2_ctx, PCRE_FLAG(NEWLINE_ANY));
+#endif
+}
+
+RSPAMD_DESTRUCTOR(rspamd_re_static_pool_dtor)
+{
+ rspamd_regexp_cache_destroy(global_re_cache);
+#ifdef WITH_PCRE2
+ pcre2_compile_context_free(pcre2_ctx);
+#endif
+}
+
+
+void rspamd_regexp_library_init(struct rspamd_config *cfg)
+{
+ if (cfg) {
+ if (cfg->disable_pcre_jit) {
+ can_jit = FALSE;
+ check_jit = FALSE;
+ }
+ else if (!can_jit) {
+ check_jit = TRUE;
+ }
+ }
+
+ if (check_jit) {
+#ifdef HAVE_PCRE_JIT
+ gint jit, rc;
+ gchar *str;
+
+#ifndef WITH_PCRE2
+ rc = pcre_config(PCRE_CONFIG_JIT, &jit);
+#else
+ rc = pcre2_config(PCRE2_CONFIG_JIT, &jit);
+#endif
+
+ if (rc == 0 && jit == 1) {
+#ifndef WITH_PCRE2
+#ifdef PCRE_CONFIG_JITTARGET
+ pcre_config(PCRE_CONFIG_JITTARGET, &str);
+ msg_info("pcre is compiled with JIT for %s", str);
+#else
+ msg_info("pcre is compiled with JIT for unknown target");
+#endif
+#else
+ rc = pcre2_config(PCRE2_CONFIG_JITTARGET, NULL);
+
+ if (rc > 0) {
+ str = g_alloca(rc);
+ pcre2_config(PCRE2_CONFIG_JITTARGET, str);
+ msg_info("pcre2 is compiled with JIT for %s", str);
+ }
+ else {
+ msg_info("pcre2 is compiled with JIT for unknown");
+ }
+
+#endif /* WITH_PCRE2 */
+
+ if (getenv("VALGRIND") == NULL) {
+ can_jit = TRUE;
+ }
+ else {
+ msg_info("disabling PCRE jit as it does not play well with valgrind");
+ can_jit = FALSE;
+ }
+ }
+ else {
+ msg_info("pcre is compiled without JIT support, so many optimizations"
+ " are impossible");
+ can_jit = FALSE;
+ }
+#else
+ msg_info("pcre is too old and has no JIT support, so many optimizations"
+ " are impossible");
+ can_jit = FALSE;
+#endif
+ check_jit = FALSE;
+ }
+}
+
+gpointer
+rspamd_regexp_get_id(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return (gpointer) re->id;
+}
+
+gpointer
+rspamd_regexp_get_class(const rspamd_regexp_t *re)
+{
+ g_assert(re != NULL);
+
+ return re->re_class;
+}
+
+gpointer
+rspamd_regexp_set_class(rspamd_regexp_t *re, gpointer re_class)
+{
+ gpointer old_class;
+
+ g_assert(re != NULL);
+
+ old_class = re->re_class;
+ re->re_class = re_class;
+
+ return old_class;
+}
+
+rspamd_regexp_t *
+rspamd_regexp_from_glob(const gchar *gl, gsize sz, GError **err)
+{
+ GString *out;
+ rspamd_regexp_t *re;
+ const gchar *end;
+ gboolean escaping = FALSE;
+ gint nbraces = 0;
+
+ g_assert(gl != NULL);
+
+ if (sz == 0) {
+ sz = strlen(gl);
+ }
+
+ end = gl + sz;
+ out = g_string_sized_new(sz + 2);
+ g_string_append_c(out, '^');
+
+ while (gl < end) {
+ switch (*gl) {
+ case '*':
+ if (escaping) {
+ g_string_append(out, "\\*");
+ }
+ else {
+ g_string_append(out, ".*");
+ }
+
+ escaping = FALSE;
+ break;
+ case '?':
+ if (escaping) {
+ g_string_append(out, "\\?");
+ }
+ else {
+ g_string_append(out, ".");
+ }
+
+ escaping = FALSE;
+ break;
+ case '.':
+ case '(':
+ case ')':
+ case '+':
+ case '|':
+ case '^':
+ case '$':
+ case '@':
+ case '%':
+ g_string_append_c(out, '\\');
+ g_string_append_c(out, *gl);
+ escaping = FALSE;
+ break;
+ case '\\':
+ if (escaping) {
+ g_string_append(out, "\\\\");
+ escaping = FALSE;
+ }
+ else {
+ escaping = TRUE;
+ }
+ break;
+ case '{':
+ if (escaping) {
+ g_string_append(out, "\\{");
+ }
+ else {
+ g_string_append_c(out, '(');
+ nbraces++;
+ }
+
+ escaping = FALSE;
+ break;
+ case '}':
+ if (nbraces > 0 && !escaping) {
+ g_string_append_c(out, ')');
+ nbraces--;
+ }
+ else if (escaping) {
+ g_string_append(out, "\\}");
+ }
+ else {
+ g_string_append(out, "}");
+ }
+
+ escaping = FALSE;
+ break;
+ case ',':
+ if (nbraces > 0 && !escaping) {
+ g_string_append_c(out, '|');
+ }
+ else if (escaping) {
+ g_string_append(out, "\\,");
+ }
+ else {
+ g_string_append_c(out, ',');
+ }
+
+ break;
+ default:
+ escaping = FALSE;
+ g_string_append_c(out, *gl);
+ break;
+ }
+
+ gl++;
+ }
+
+ g_string_append_c(out, '$');
+ re = rspamd_regexp_new(out->str, "i", err);
+ g_string_free(out, TRUE);
+
+ return re;
+}
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
new file mode 100644
index 0000000..6222ba6
--- /dev/null
+++ b/src/libutil/regexp.h
@@ -0,0 +1,276 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef REGEXP_H_
+#define REGEXP_H_
+
+#include "config.h"
+
+#ifndef WITH_PCRE2
+#define PCRE_FLAG(x) G_PASTE(PCRE_, x)
+#else
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#define PCRE2_CODE_UNIT_WIDTH 8
+#endif
+#define PCRE_FLAG(x) G_PASTE(PCRE2_, x)
+#endif
+
+#define RSPAMD_INVALID_ID ((guint64) -1LL)
+#define RSPAMD_REGEXP_FLAG_RAW (1 << 1)
+#define RSPAMD_REGEXP_FLAG_NOOPT (1 << 2)
+#define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3)
+#define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
+#define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
+#define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
+#define RSPAMD_REGEXP_FLAG_LEFTMOST (1 << 7)
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_config;
+
+typedef struct rspamd_regexp_s rspamd_regexp_t;
+struct rspamd_regexp_cache;
+struct rspamd_re_capture {
+ const char *p;
+ gsize len;
+};
+
+/**
+ * Create new rspamd regexp
+ * @param pattern regexp pattern
+ * @param flags flags (may be enclosed inside pattern)
+ * @param err error pointer set if compilation failed
+ * @return new regexp object
+ */
+rspamd_regexp_t *rspamd_regexp_new(const gchar *pattern, const gchar *flags,
+ GError **err);
+
+/**
+ * Create new rspamd regexp
+ * @param pattern regexp pattern
+ * @param flags flags (may be enclosed inside pattern)
+ * @param err error pointer set if compilation failed
+ * @return new regexp object
+ */
+rspamd_regexp_t *rspamd_regexp_new_len(const gchar *pattern, gsize len, const gchar *flags,
+ GError **err);
+
+/**
+ * Search the specified regexp in the text
+ * @param re
+ * @param text
+ * @param len
+ * @param start position of start of match
+ * @param start position of end of match
+ * @param raw
+ * @param captures array of captured strings of type rspamd_fstring_capture or NULL
+ * @return
+ */
+gboolean rspamd_regexp_search(const rspamd_regexp_t *re,
+ const gchar *text, gsize len,
+ const gchar **start, const gchar **end, gboolean raw,
+ GArray *captures);
+
+
+/**
+ * Exact match of the specified text against the regexp
+ * @param re
+ * @param text
+ * @param len
+ * @return
+ */
+gboolean rspamd_regexp_match(const rspamd_regexp_t *re,
+ const gchar *text, gsize len, gboolean raw);
+
+/**
+ * Increase refcount for a regexp object
+ */
+rspamd_regexp_t *rspamd_regexp_ref(rspamd_regexp_t *re);
+
+/**
+ * Unref regexp object
+ * @param re
+ */
+void rspamd_regexp_unref(rspamd_regexp_t *re);
+
+/**
+ * Set auxiliary userdata for the specified regexp
+ * @param re regexp object
+ * @param ud opaque pointer
+ */
+void rspamd_regexp_set_ud(rspamd_regexp_t *re, gpointer ud);
+
+/**
+ * Get userdata for a regexp object
+ * @param re regexp object
+ * @return opaque pointer
+ */
+gpointer rspamd_regexp_get_ud(const rspamd_regexp_t *re);
+
+/**
+ * Get regexp ID suitable for hashing
+ * @param re
+ * @return
+ */
+gpointer rspamd_regexp_get_id(const rspamd_regexp_t *re);
+
+/**
+ * Get pattern for the specified regexp object
+ * @param re
+ * @return
+ */
+const char *rspamd_regexp_get_pattern(const rspamd_regexp_t *re);
+
+/**
+ * Get PCRE flags for the regexp
+ */
+guint rspamd_regexp_get_pcre_flags(const rspamd_regexp_t *re);
+
+/**
+ * Get rspamd flags for the regexp
+ */
+guint rspamd_regexp_get_flags(const rspamd_regexp_t *re);
+
+/**
+ * Set rspamd flags for the regexp
+ */
+guint rspamd_regexp_set_flags(rspamd_regexp_t *re, guint new_flags);
+
+/**
+ * Set regexp maximum hits
+ */
+guint rspamd_regexp_get_maxhits(const rspamd_regexp_t *re);
+
+/**
+ * Get regexp maximum hits
+ */
+guint rspamd_regexp_set_maxhits(rspamd_regexp_t *re, guint new_maxhits);
+
+/**
+ * Returns cache id for a regexp
+ */
+guint64 rspamd_regexp_get_cache_id(const rspamd_regexp_t *re);
+
+/**
+ * Sets cache id for a regexp
+ */
+guint64 rspamd_regexp_set_cache_id(rspamd_regexp_t *re, guint64 id);
+
+/**
+ * Returns match limit for a regexp
+ */
+gsize rspamd_regexp_get_match_limit(const rspamd_regexp_t *re);
+
+/**
+ * Sets cache id for a regexp
+ */
+gsize rspamd_regexp_set_match_limit(rspamd_regexp_t *re, gsize lim);
+
+/**
+ * Get regexp class for the re object
+ */
+gpointer rspamd_regexp_get_class(const rspamd_regexp_t *re);
+
+/**
+ * Set regexp class for the re object
+ * @return old re class value
+ */
+gpointer rspamd_regexp_set_class(rspamd_regexp_t *re, gpointer re_class);
+
+/**
+ * Create new regexp cache
+ * @return
+ */
+struct rspamd_regexp_cache *rspamd_regexp_cache_new(void);
+
+/**
+ * Query rspamd cache for a specified regexp
+ * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
+ * @param pattern
+ * @param flags
+ * @return
+ */
+rspamd_regexp_t *rspamd_regexp_cache_query(struct rspamd_regexp_cache *cache,
+ const gchar *pattern,
+ const gchar *flags);
+
+/**
+ * Create or get cached regexp from the specified cache
+ * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
+ * @param pattern regexp pattern
+ * @param flags flags (may be enclosed inside pattern)
+ * @param err error pointer set if compilation failed
+ * @return new regexp object
+ */
+rspamd_regexp_t *rspamd_regexp_cache_create(struct rspamd_regexp_cache *cache,
+ const gchar *pattern,
+ const gchar *flags, GError **err);
+
+/**
+ * Remove regexp from the cache
+ * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
+ * @param re re to remove
+ * @return TRUE if a regexp has been removed
+ */
+gboolean rspamd_regexp_cache_remove(struct rspamd_regexp_cache *cache,
+ rspamd_regexp_t *re);
+
+/**
+ * Destroy regexp cache and unref all elements inside it
+ * @param cache
+ */
+void rspamd_regexp_cache_destroy(struct rspamd_regexp_cache *cache);
+
+/**
+ * Return the value for regexp hash based on its ID
+ * @param a
+ * @return
+ */
+guint32 rspamd_regexp_hash(gconstpointer a);
+
+/**
+ * Compare two regexp objects based on theirs ID
+ * @param a
+ * @param b
+ * @return
+ */
+gboolean rspamd_regexp_equal(gconstpointer a, gconstpointer b);
+
+/**
+ * Acts like memcmp but for regexp
+ */
+gint rspamd_regexp_cmp(gconstpointer a, gconstpointer b);
+
+/**
+ * Initialize superglobal regexp cache and library
+ */
+void rspamd_regexp_library_init(struct rspamd_config *cfg);
+
+/**
+ * Create regexp from glob
+ * @param gl
+ * @param err
+ * @return
+ */
+rspamd_regexp_t *rspamd_regexp_from_glob(const gchar *gl, gsize sz, GError **err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* REGEXP_H_ */
diff --git a/src/libutil/rrd.c b/src/libutil/rrd.c
new file mode 100644
index 0000000..451e222
--- /dev/null
+++ b/src/libutil/rrd.c
@@ -0,0 +1,1502 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "rrd.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "logger.h"
+#include "unix-std.h"
+#include "cryptobox.h"
+#include <math.h>
+
+#define RSPAMD_RRD_DS_COUNT METRIC_ACTION_MAX
+#define RSPAMD_RRD_OLD_DS_COUNT 4
+#define RSPAMD_RRD_RRA_COUNT 4
+
+#define msg_err_rrd(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "rrd", file->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_warn_rrd(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
+ "rrd", file->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_rrd(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "rrd", file->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_debug_rrd(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_rrd_log_id, "rrd", file->id, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(rrd)
+
+static GQuark
+rrd_error_quark(void)
+{
+ return g_quark_from_static_string("rrd-error");
+}
+
+/**
+ * Convert rrd dst type from string to numeric value
+ */
+enum rrd_dst_type
+rrd_dst_from_string(const gchar *str)
+{
+ if (g_ascii_strcasecmp(str, "counter") == 0) {
+ return RRD_DST_COUNTER;
+ }
+ else if (g_ascii_strcasecmp(str, "absolute") == 0) {
+ return RRD_DST_ABSOLUTE;
+ }
+ else if (g_ascii_strcasecmp(str, "gauge") == 0) {
+ return RRD_DST_GAUGE;
+ }
+ else if (g_ascii_strcasecmp(str, "cdef") == 0) {
+ return RRD_DST_CDEF;
+ }
+ else if (g_ascii_strcasecmp(str, "derive") == 0) {
+ return RRD_DST_DERIVE;
+ }
+
+ return RRD_DST_INVALID;
+}
+
+/**
+ * Convert numeric presentation of dst to string
+ */
+const gchar *
+rrd_dst_to_string(enum rrd_dst_type type)
+{
+ switch (type) {
+ case RRD_DST_COUNTER:
+ return "COUNTER";
+ case RRD_DST_ABSOLUTE:
+ return "ABSOLUTE";
+ case RRD_DST_GAUGE:
+ return "GAUGE";
+ case RRD_DST_CDEF:
+ return "CDEF";
+ case RRD_DST_DERIVE:
+ return "DERIVE";
+ default:
+ return "U";
+ }
+
+ return "U";
+}
+
+/**
+ * Convert rrd consolidation function type from string to numeric value
+ */
+enum rrd_cf_type
+rrd_cf_from_string(const gchar *str)
+{
+ if (g_ascii_strcasecmp(str, "average") == 0) {
+ return RRD_CF_AVERAGE;
+ }
+ else if (g_ascii_strcasecmp(str, "minimum") == 0) {
+ return RRD_CF_MINIMUM;
+ }
+ else if (g_ascii_strcasecmp(str, "maximum") == 0) {
+ return RRD_CF_MAXIMUM;
+ }
+ else if (g_ascii_strcasecmp(str, "last") == 0) {
+ return RRD_CF_LAST;
+ }
+ /* XXX: add other CF functions supported by rrd */
+
+ return RRD_CF_INVALID;
+}
+
+/**
+ * Convert numeric presentation of cf to string
+ */
+const gchar *
+rrd_cf_to_string(enum rrd_cf_type type)
+{
+ switch (type) {
+ case RRD_CF_AVERAGE:
+ return "AVERAGE";
+ case RRD_CF_MINIMUM:
+ return "MINIMUM";
+ case RRD_CF_MAXIMUM:
+ return "MAXIMUM";
+ case RRD_CF_LAST:
+ return "LAST";
+ default:
+ return "U";
+ }
+
+ /* XXX: add other CF functions supported by rrd */
+
+ return "U";
+}
+
+void rrd_make_default_rra(const gchar *cf_name,
+ gulong pdp_cnt,
+ gulong rows,
+ struct rrd_rra_def *rra)
+{
+ g_assert(cf_name != NULL);
+ g_assert(rrd_cf_from_string(cf_name) != RRD_CF_INVALID);
+
+ rra->pdp_cnt = pdp_cnt;
+ rra->row_cnt = rows;
+ rspamd_strlcpy(rra->cf_nam, cf_name, sizeof(rra->cf_nam));
+ memset(rra->par, 0, sizeof(rra->par));
+ rra->par[RRA_cdp_xff_val].dv = 0.5;
+}
+
+void rrd_make_default_ds(const gchar *name,
+ const gchar *type,
+ gulong pdp_step,
+ struct rrd_ds_def *ds)
+{
+ g_assert(name != NULL);
+ g_assert(type != NULL);
+ g_assert(rrd_dst_from_string(type) != RRD_DST_INVALID);
+
+ rspamd_strlcpy(ds->ds_nam, name, sizeof(ds->ds_nam));
+ rspamd_strlcpy(ds->dst, type, sizeof(ds->dst));
+ memset(ds->par, 0, sizeof(ds->par));
+ ds->par[RRD_DS_mrhb_cnt].lv = pdp_step * 2;
+ ds->par[RRD_DS_min_val].dv = NAN;
+ ds->par[RRD_DS_max_val].dv = NAN;
+}
+
+/**
+ * Check rrd file for correctness (size, cookies, etc)
+ */
+static gboolean
+rspamd_rrd_check_file(const gchar *filename, gboolean need_data, GError **err)
+{
+ gint fd, i;
+ struct stat st;
+ struct rrd_file_head head;
+ struct rrd_rra_def rra;
+ gint head_size;
+
+ fd = open(filename, O_RDWR);
+ if (fd == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd open error: %s", strerror(errno));
+ return FALSE;
+ }
+
+ if (fstat(fd, &st) == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd stat error: %s", strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+ if (st.st_size < (goffset) sizeof(struct rrd_file_head)) {
+ /* We have trimmed file */
+ g_set_error(err, rrd_error_quark(), EINVAL, "rrd size is bad: %ud",
+ (guint) st.st_size);
+ close(fd);
+ return FALSE;
+ }
+
+ /* Try to read header */
+ if (read(fd, &head, sizeof(head)) != sizeof(head)) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd read head error: %s",
+ strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+ /* Check magic */
+ if (memcmp(head.version, RRD_VERSION, sizeof(head.version)) != 0) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd head error: bad cookie");
+ close(fd);
+ return FALSE;
+ }
+ if (head.float_cookie != RRD_FLOAT_COOKIE) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd head error: another architecture "
+ "(file cookie %g != our cookie %g)",
+ head.float_cookie, RRD_FLOAT_COOKIE);
+ close(fd);
+ return FALSE;
+ }
+ /* Check for other params */
+ if (head.ds_cnt <= 0 || head.rra_cnt <= 0) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd head cookies error: bad rra or ds count");
+ close(fd);
+ return FALSE;
+ }
+ /* Now we can calculate the overall size of rrd */
+ head_size = sizeof(struct rrd_file_head) +
+ sizeof(struct rrd_ds_def) * head.ds_cnt +
+ sizeof(struct rrd_rra_def) * head.rra_cnt +
+ sizeof(struct rrd_live_head) +
+ sizeof(struct rrd_pdp_prep) * head.ds_cnt +
+ sizeof(struct rrd_cdp_prep) * head.ds_cnt * head.rra_cnt +
+ sizeof(struct rrd_rra_ptr) * head.rra_cnt;
+ if (st.st_size < (goffset) head_size) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd file seems to have stripped header: %d",
+ head_size);
+ close(fd);
+ return FALSE;
+ }
+
+ if (need_data) {
+ /* Now check rra */
+ if (lseek(fd, sizeof(struct rrd_ds_def) * head.ds_cnt,
+ SEEK_CUR) == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd head lseek error: %s",
+ strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+ for (i = 0; i < (gint) head.rra_cnt; i++) {
+ if (read(fd, &rra, sizeof(rra)) != sizeof(rra)) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd read rra error: %s",
+ strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+ head_size += rra.row_cnt * head.ds_cnt * sizeof(gdouble);
+ }
+
+ if (st.st_size != head_size) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd file seems to have incorrect size: %d, must be %d",
+ (gint) st.st_size, head_size);
+ close(fd);
+ return FALSE;
+ }
+ }
+
+ close(fd);
+ return TRUE;
+}
+
+/**
+ * Adjust pointers in mmapped rrd file
+ * @param file
+ */
+static void
+rspamd_rrd_adjust_pointers(struct rspamd_rrd_file *file, gboolean completed)
+{
+ guint8 *ptr;
+
+ ptr = file->map;
+ file->stat_head = (struct rrd_file_head *) ptr;
+ ptr += sizeof(struct rrd_file_head);
+ file->ds_def = (struct rrd_ds_def *) ptr;
+ ptr += sizeof(struct rrd_ds_def) * file->stat_head->ds_cnt;
+ file->rra_def = (struct rrd_rra_def *) ptr;
+ ptr += sizeof(struct rrd_rra_def) * file->stat_head->rra_cnt;
+ file->live_head = (struct rrd_live_head *) ptr;
+ ptr += sizeof(struct rrd_live_head);
+ file->pdp_prep = (struct rrd_pdp_prep *) ptr;
+ ptr += sizeof(struct rrd_pdp_prep) * file->stat_head->ds_cnt;
+ file->cdp_prep = (struct rrd_cdp_prep *) ptr;
+ ptr += sizeof(struct rrd_cdp_prep) * file->stat_head->rra_cnt *
+ file->stat_head->ds_cnt;
+ file->rra_ptr = (struct rrd_rra_ptr *) ptr;
+ if (completed) {
+ ptr += sizeof(struct rrd_rra_ptr) * file->stat_head->rra_cnt;
+ file->rrd_value = (gdouble *) ptr;
+ }
+ else {
+ file->rrd_value = NULL;
+ }
+}
+
+static void
+rspamd_rrd_calculate_checksum(struct rspamd_rrd_file *file)
+{
+ guchar sigbuf[rspamd_cryptobox_HASHBYTES];
+ struct rrd_ds_def *ds;
+ guint i;
+ rspamd_cryptobox_hash_state_t st;
+
+ if (file->finalized) {
+ rspamd_cryptobox_hash_init(&st, NULL, 0);
+ rspamd_cryptobox_hash_update(&st, file->filename, strlen(file->filename));
+
+ for (i = 0; i < file->stat_head->ds_cnt; i++) {
+ ds = &file->ds_def[i];
+ rspamd_cryptobox_hash_update(&st, ds->ds_nam, sizeof(ds->ds_nam));
+ }
+
+ rspamd_cryptobox_hash_final(&st, sigbuf);
+
+ file->id = rspamd_encode_base32(sigbuf, sizeof(sigbuf), RSPAMD_BASE32_DEFAULT);
+ }
+}
+
+static int
+rspamd_rrd_open_exclusive(const gchar *filename)
+{
+ struct timespec sleep_ts = {
+ .tv_sec = 0,
+ .tv_nsec = 1000000};
+ gint fd;
+
+ fd = open(filename, O_RDWR);
+
+ if (fd == -1) {
+ return -1;
+ }
+
+ for (;;) {
+ if (rspamd_file_lock(fd, TRUE) == -1) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ nanosleep(&sleep_ts, NULL);
+ continue;
+ }
+ else {
+ close(fd);
+ return -1;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ return fd;
+};
+
+/**
+ * Open completed or incompleted rrd file
+ * @param filename
+ * @param completed
+ * @param err
+ * @return
+ */
+static struct rspamd_rrd_file *
+rspamd_rrd_open_common(const gchar *filename, gboolean completed, GError **err)
+{
+ struct rspamd_rrd_file *file;
+ gint fd;
+ struct stat st;
+
+ if (!rspamd_rrd_check_file(filename, completed, err)) {
+ return NULL;
+ }
+
+ file = g_malloc0(sizeof(struct rspamd_rrd_file));
+
+ /* Open file */
+ fd = rspamd_rrd_open_exclusive(filename);
+ if (fd == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd open error: %s", strerror(errno));
+ g_free(file);
+ return FALSE;
+ }
+
+ if (fstat(fd, &st) == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd stat error: %s", strerror(errno));
+ rspamd_file_unlock(fd, FALSE);
+ g_free(file);
+ close(fd);
+ return FALSE;
+ }
+ /* Mmap file */
+ file->size = st.st_size;
+ if ((file->map =
+ mmap(NULL, st.st_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0)) == MAP_FAILED) {
+
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), ENOMEM, "mmap failed: %s", strerror(errno));
+ g_free(file);
+ return NULL;
+ }
+
+ file->fd = fd;
+
+ /* Adjust pointers */
+ rspamd_rrd_adjust_pointers(file, completed);
+
+ /* Mark it as finalized */
+ file->finalized = completed;
+
+ file->filename = g_strdup(filename);
+ rspamd_rrd_calculate_checksum(file);
+
+ return file;
+}
+
+/**
+ * Open (and mmap) existing RRD file
+ * @param filename path
+ * @param err error pointer
+ * @return rrd file structure
+ */
+struct rspamd_rrd_file *
+rspamd_rrd_open(const gchar *filename, GError **err)
+{
+ struct rspamd_rrd_file *file;
+
+ if ((file = rspamd_rrd_open_common(filename, TRUE, err))) {
+ msg_info_rrd("rrd file opened: %s", filename);
+ }
+
+ return file;
+}
+
+/**
+ * Create basic header for rrd file
+ * @param filename file path
+ * @param ds_count number of data sources
+ * @param rra_count number of round robin archives
+ * @param pdp_step step of primary data points
+ * @param err error pointer
+ * @return TRUE if file has been created
+ */
+struct rspamd_rrd_file *
+rspamd_rrd_create(const gchar *filename,
+ gulong ds_count,
+ gulong rra_count,
+ gulong pdp_step,
+ gdouble initial_ticks,
+ GError **err)
+{
+ struct rspamd_rrd_file *new;
+ struct rrd_file_head head;
+ struct rrd_ds_def ds;
+ struct rrd_rra_def rra;
+ struct rrd_live_head lh;
+ struct rrd_pdp_prep pdp;
+ struct rrd_cdp_prep cdp;
+ struct rrd_rra_ptr rra_ptr;
+ gint fd;
+ guint i, j;
+
+ /* Open file */
+ fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0644);
+ if (fd == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd create error: %s",
+ strerror(errno));
+ return NULL;
+ }
+
+ rspamd_file_lock(fd, FALSE);
+
+ /* Fill header */
+ memset(&head, 0, sizeof(head));
+ head.rra_cnt = rra_count;
+ head.ds_cnt = ds_count;
+ head.pdp_step = pdp_step;
+ memcpy(head.cookie, RRD_COOKIE, sizeof(head.cookie));
+ memcpy(head.version, RRD_VERSION, sizeof(head.version));
+ head.float_cookie = RRD_FLOAT_COOKIE;
+
+ if (write(fd, &head, sizeof(head)) != sizeof(head)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s", strerror(errno));
+ return NULL;
+ }
+
+ /* Fill DS section */
+ memset(&ds, 0, sizeof(ds));
+ memset(&ds.ds_nam, 0, sizeof(ds.ds_nam));
+ memcpy(&ds.dst, "COUNTER", sizeof("COUNTER"));
+ memset(&ds.par, 0, sizeof(ds.par));
+ for (i = 0; i < ds_count; i++) {
+ if (write(fd, &ds, sizeof(ds)) != sizeof(ds)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ }
+
+ /* Fill RRA section */
+ memset(&rra, 0, sizeof(rra));
+ memcpy(&rra.cf_nam, "AVERAGE", sizeof("AVERAGE"));
+ rra.pdp_cnt = 1;
+ memset(&rra.par, 0, sizeof(rra.par));
+ for (i = 0; i < rra_count; i++) {
+ if (write(fd, &rra, sizeof(rra)) != sizeof(rra)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ }
+
+ /* Fill live header */
+ memset(&lh, 0, sizeof(lh));
+ lh.last_up = (glong) initial_ticks;
+ lh.last_up_usec = (glong) ((initial_ticks - lh.last_up) * 1e6f);
+
+ if (write(fd, &lh, sizeof(lh)) != sizeof(lh)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s", strerror(errno));
+ return NULL;
+ }
+
+ /* Fill pdp prep */
+ memset(&pdp, 0, sizeof(pdp));
+ memcpy(&pdp.last_ds, "U", sizeof("U"));
+ memset(&pdp.scratch, 0, sizeof(pdp.scratch));
+ pdp.scratch[PDP_val].dv = NAN;
+ pdp.scratch[PDP_unkn_sec_cnt].lv = 0;
+
+ for (i = 0; i < ds_count; i++) {
+ if (write(fd, &pdp, sizeof(pdp)) != sizeof(pdp)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ }
+
+ /* Fill cdp prep */
+ memset(&cdp, 0, sizeof(cdp));
+ memset(&cdp.scratch, 0, sizeof(cdp.scratch));
+ cdp.scratch[CDP_val].dv = NAN;
+ cdp.scratch[CDP_unkn_pdp_cnt].lv = 0;
+
+ for (i = 0; i < rra_count; i++) {
+ for (j = 0; j < ds_count; j++) {
+ if (write(fd, &cdp, sizeof(cdp)) != sizeof(cdp)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ }
+ }
+
+ /* Set row pointers */
+ memset(&rra_ptr, 0, sizeof(rra_ptr));
+ for (i = 0; i < rra_count; i++) {
+ if (write(fd, &rra_ptr, sizeof(rra_ptr)) != sizeof(rra_ptr)) {
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s",
+ strerror(errno));
+ return NULL;
+ }
+ }
+
+ rspamd_file_unlock(fd, FALSE);
+ close(fd);
+
+ new = rspamd_rrd_open_common(filename, FALSE, err);
+
+ return new;
+}
+
+/**
+ * Add data sources to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_ds_def
+ * @param err error pointer
+ * @return TRUE if data sources were added
+ */
+gboolean
+rspamd_rrd_add_ds(struct rspamd_rrd_file *file, GArray *ds, GError **err)
+{
+
+ if (file == NULL || file->stat_head->ds_cnt * sizeof(struct rrd_ds_def) !=
+ ds->len) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd add ds failed: wrong arguments");
+ return FALSE;
+ }
+
+ /* Straightforward memcpy */
+ memcpy(file->ds_def, ds->data, ds->len);
+
+ return TRUE;
+}
+
+/**
+ * Add round robin archives to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_rra_def
+ * @param err error pointer
+ * @return TRUE if archives were added
+ */
+gboolean
+rspamd_rrd_add_rra(struct rspamd_rrd_file *file, GArray *rra, GError **err)
+{
+ if (file == NULL || file->stat_head->rra_cnt *
+ sizeof(struct rrd_rra_def) !=
+ rra->len) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd add rra failed: wrong arguments");
+ return FALSE;
+ }
+
+ /* Straightforward memcpy */
+ memcpy(file->rra_def, rra->data, rra->len);
+
+ return TRUE;
+}
+
+/**
+ * Finalize rrd file header and initialize all RRA in the file
+ * @param filename file path
+ * @param err error pointer
+ * @return TRUE if rrd file is ready for use
+ */
+gboolean
+rspamd_rrd_finalize(struct rspamd_rrd_file *file, GError **err)
+{
+ gint fd;
+ guint i;
+ gint count = 0;
+ gdouble vbuf[1024];
+ struct stat st;
+
+ if (file == NULL || file->filename == NULL || file->fd == -1) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL, "rrd add rra failed: wrong arguments");
+ return FALSE;
+ }
+
+ fd = file->fd;
+
+ if (lseek(fd, 0, SEEK_END) == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd seek error: %s", strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+
+ /* Adjust CDP */
+ for (i = 0; i < file->stat_head->rra_cnt; i++) {
+ file->cdp_prep->scratch[CDP_unkn_pdp_cnt].lv = 0;
+ /* Randomize row pointer (disabled) */
+ /* file->rra_ptr->cur_row = g_random_int () % file->rra_def[i].row_cnt; */
+ file->rra_ptr->cur_row = file->rra_def[i].row_cnt - 1;
+ /* Calculate values count */
+ count += file->rra_def[i].row_cnt * file->stat_head->ds_cnt;
+ }
+
+ munmap(file->map, file->size);
+ /* Write values */
+ for (i = 0; i < G_N_ELEMENTS(vbuf); i++) {
+ vbuf[i] = NAN;
+ }
+
+ while (count > 0) {
+ /* Write values in buffered matter */
+ if (write(fd, vbuf,
+ MIN((gint) G_N_ELEMENTS(vbuf), count) * sizeof(gdouble)) == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd write error: %s",
+ strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+ count -= G_N_ELEMENTS(vbuf);
+ }
+
+ if (fstat(fd, &st) == -1) {
+ g_set_error(err,
+ rrd_error_quark(), errno, "rrd stat error: %s", strerror(errno));
+ close(fd);
+ return FALSE;
+ }
+
+ /* Mmap again */
+ file->size = st.st_size;
+ if ((file->map =
+ mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ 0)) == MAP_FAILED) {
+ close(fd);
+ g_set_error(err,
+ rrd_error_quark(), ENOMEM, "mmap failed: %s", strerror(errno));
+
+ return FALSE;
+ }
+
+ /* Adjust pointers */
+ rspamd_rrd_adjust_pointers(file, TRUE);
+
+ file->finalized = TRUE;
+ rspamd_rrd_calculate_checksum(file);
+ msg_info_rrd("rrd file created: %s", file->filename);
+
+ return TRUE;
+}
+
+/**
+ * Update pdp_prep data
+ * @param file rrd file
+ * @param vals new values
+ * @param pdp_new new pdp array
+ * @param interval time elapsed from the last update
+ * @return
+ */
+static gboolean
+rspamd_rrd_update_pdp_prep(struct rspamd_rrd_file *file,
+ gdouble *vals,
+ gdouble *pdp_new,
+ gdouble interval)
+{
+ guint i;
+ enum rrd_dst_type type;
+
+ for (i = 0; i < file->stat_head->ds_cnt; i++) {
+ type = rrd_dst_from_string(file->ds_def[i].dst);
+
+ if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv < interval) {
+ rspamd_strlcpy(file->pdp_prep[i].last_ds, "U",
+ sizeof(file->pdp_prep[i].last_ds));
+ pdp_new[i] = NAN;
+ msg_debug_rrd("adding unknown point interval %.3f is less than heartbeat %l",
+ interval, file->ds_def[i].par[RRD_DS_mrhb_cnt].lv);
+ }
+ else {
+ switch (type) {
+ case RRD_DST_COUNTER:
+ case RRD_DST_DERIVE:
+ if (file->pdp_prep[i].last_ds[0] == 'U') {
+ pdp_new[i] = NAN;
+ msg_debug_rrd("last point is NaN for point %ud", i);
+ }
+ else {
+ pdp_new[i] = vals[i] - strtod(file->pdp_prep[i].last_ds,
+ NULL);
+ msg_debug_rrd("new PDP %ud, %.3f", i, pdp_new[i]);
+ }
+ break;
+ case RRD_DST_GAUGE:
+ pdp_new[i] = vals[i] * interval;
+ msg_debug_rrd("new PDP %ud, %.3f", i, pdp_new[i]);
+ break;
+ case RRD_DST_ABSOLUTE:
+ pdp_new[i] = vals[i];
+ msg_debug_rrd("new PDP %ud, %.3f", i, pdp_new[i]);
+ break;
+ default:
+ return FALSE;
+ }
+ }
+
+ /* Copy value to the last_ds */
+ if (!isnan(vals[i])) {
+ rspamd_snprintf(file->pdp_prep[i].last_ds,
+ sizeof(file->pdp_prep[i].last_ds), "%.4f", vals[i]);
+ }
+ else {
+ file->pdp_prep[i].last_ds[0] = 'U';
+ file->pdp_prep[i].last_ds[1] = '\0';
+ }
+ }
+
+
+ return TRUE;
+}
+
+/**
+ * Update step for this pdp
+ * @param file
+ * @param pdp_new new pdp array
+ * @param pdp_temp temp pdp array
+ * @param interval time till last update
+ * @param pre_int pre interval
+ * @param post_int post intervall
+ * @param pdp_diff time till last pdp update
+ */
+static void
+rspamd_rrd_update_pdp_step(struct rspamd_rrd_file *file,
+ gdouble *pdp_new,
+ gdouble *pdp_temp,
+ gdouble interval,
+ gulong pdp_diff)
+{
+ guint i;
+ rrd_value_t *scratch;
+ gulong heartbeat;
+
+
+ for (i = 0; i < file->stat_head->ds_cnt; i++) {
+ scratch = file->pdp_prep[i].scratch;
+ heartbeat = file->ds_def[i].par[RRD_DS_mrhb_cnt].lv;
+
+ if (!isnan(pdp_new[i])) {
+ if (isnan(scratch[PDP_val].dv)) {
+ scratch[PDP_val].dv = 0;
+ }
+ }
+
+ /* Check interval value for heartbeat for this DS */
+ if ((interval > heartbeat) ||
+ (file->stat_head->pdp_step / 2.0 < scratch[PDP_unkn_sec_cnt].lv)) {
+ pdp_temp[i] = NAN;
+ }
+ else {
+ pdp_temp[i] = scratch[PDP_val].dv /
+ ((double) (pdp_diff - scratch[PDP_unkn_sec_cnt].lv));
+ }
+
+ if (isnan(pdp_new[i])) {
+ scratch[PDP_unkn_sec_cnt].lv = interval;
+ scratch[PDP_val].dv = NAN;
+ }
+ else {
+ scratch[PDP_unkn_sec_cnt].lv = 0;
+ scratch[PDP_val].dv = pdp_new[i] / interval;
+ }
+
+ msg_debug_rrd("new temp PDP %ud, %.3f -> %.3f, scratch: %3f",
+ i, pdp_new[i], pdp_temp[i],
+ scratch[PDP_val].dv);
+ }
+}
+
+/**
+ * Update CDP for this rra
+ * @param file rrd file
+ * @param pdp_steps how much pdp steps elapsed from the last update
+ * @param pdp_offset offset from pdp
+ * @param rra_steps how much steps must be updated for this rra
+ * @param rra_index index of desired rra
+ * @param pdp_temp temporary pdp points
+ */
+static void
+rspamd_rrd_update_cdp(struct rspamd_rrd_file *file,
+ gdouble pdp_steps,
+ gdouble pdp_offset,
+ gulong *rra_steps,
+ gulong rra_index,
+ gdouble *pdp_temp)
+{
+ guint i;
+ struct rrd_rra_def *rra;
+ rrd_value_t *scratch;
+ enum rrd_cf_type cf;
+ gdouble last_cdp = INFINITY, cur_cdp = INFINITY;
+ gulong pdp_in_cdp;
+
+ rra = &file->rra_def[rra_index];
+ cf = rrd_cf_from_string(rra->cf_nam);
+
+ /* Iterate over all DS for this RRA */
+ for (i = 0; i < file->stat_head->ds_cnt; i++) {
+ /* Get CDP for this RRA and DS */
+ scratch =
+ file->cdp_prep[rra_index * file->stat_head->ds_cnt + i].scratch;
+ if (rra->pdp_cnt > 1) {
+ /* Do we have any CDP to update for this rra ? */
+ if (rra_steps[rra_index] > 0) {
+
+ if (isnan(pdp_temp[i])) {
+ /* New pdp is nan */
+ /* Increment unknown points count */
+ scratch[CDP_unkn_pdp_cnt].lv += pdp_offset;
+ /* Reset secondary value */
+ scratch[CDP_secondary_val].dv = NAN;
+ }
+ else {
+ scratch[CDP_secondary_val].dv = pdp_temp[i];
+ }
+
+ /* Check XFF for this rra */
+ if (scratch[CDP_unkn_pdp_cnt].lv > rra->pdp_cnt *
+ rra->par[RRA_cdp_xff_val].lv) {
+ /* XFF is reached */
+ scratch[CDP_primary_val].dv = NAN;
+ }
+ else {
+ /* Need to initialize CDP using specified consolidation */
+ switch (cf) {
+ case RRD_CF_AVERAGE:
+ last_cdp =
+ isnan(scratch[CDP_val].dv) ? 0.0 : scratch[CDP_val].dv;
+ cur_cdp = isnan(pdp_temp[i]) ? 0.0 : pdp_temp[i];
+ scratch[CDP_primary_val].dv =
+ (last_cdp + cur_cdp *
+ pdp_offset) /
+ (rra->pdp_cnt - scratch[CDP_unkn_pdp_cnt].lv);
+ break;
+ case RRD_CF_MAXIMUM:
+ last_cdp =
+ isnan(scratch[CDP_val].dv) ? -INFINITY : scratch[CDP_val].dv;
+ cur_cdp = isnan(pdp_temp[i]) ? -INFINITY : pdp_temp[i];
+ scratch[CDP_primary_val].dv = MAX(last_cdp, cur_cdp);
+ break;
+ case RRD_CF_MINIMUM:
+ last_cdp =
+ isnan(scratch[CDP_val].dv) ? INFINITY : scratch[CDP_val].dv;
+ cur_cdp = isnan(pdp_temp[i]) ? INFINITY : pdp_temp[i];
+ scratch[CDP_primary_val].dv = MIN(last_cdp, cur_cdp);
+ break;
+ case RRD_CF_LAST:
+ default:
+ scratch[CDP_primary_val].dv = pdp_temp[i];
+ last_cdp = INFINITY;
+ break;
+ }
+ }
+
+ /* Init carry of this CDP */
+ pdp_in_cdp = (pdp_steps - pdp_offset) / rra->pdp_cnt;
+ if (pdp_in_cdp == 0 || isnan(pdp_temp[i])) {
+ /* Set overflow */
+ switch (cf) {
+ case RRD_CF_AVERAGE:
+ scratch[CDP_val].dv = 0;
+ break;
+ case RRD_CF_MAXIMUM:
+ scratch[CDP_val].dv = -INFINITY;
+ break;
+ case RRD_CF_MINIMUM:
+ scratch[CDP_val].dv = INFINITY;
+ break;
+ default:
+ scratch[CDP_val].dv = NAN;
+ break;
+ }
+ }
+ else {
+ /* Special carry for average */
+ if (cf == RRD_CF_AVERAGE) {
+ scratch[CDP_val].dv = pdp_temp[i] * pdp_in_cdp;
+ }
+ else {
+ scratch[CDP_val].dv = pdp_temp[i];
+ }
+ }
+
+ scratch[CDP_unkn_pdp_cnt].lv = 0;
+
+ msg_debug_rrd("update cdp for DS %d with value %.3f, "
+ "stored value: %.3f, carry: %.3f",
+ i, last_cdp,
+ scratch[CDP_primary_val].dv, scratch[CDP_val].dv);
+ }
+ /* In this case we just need to update cdp_prep for this RRA */
+ else {
+ if (isnan(pdp_temp[i])) {
+ /* Just increase undefined zone */
+ scratch[CDP_unkn_pdp_cnt].lv += pdp_steps;
+ }
+ else {
+ /* Calculate cdp value */
+ last_cdp = scratch[CDP_val].dv;
+ switch (cf) {
+ case RRD_CF_AVERAGE:
+ if (isnan(last_cdp)) {
+ scratch[CDP_val].dv = pdp_temp[i] * pdp_steps;
+ }
+ else {
+ scratch[CDP_val].dv = last_cdp + pdp_temp[i] *
+ pdp_steps;
+ }
+ break;
+ case RRD_CF_MAXIMUM:
+ scratch[CDP_val].dv = MAX(last_cdp, pdp_temp[i]);
+ break;
+ case RRD_CF_MINIMUM:
+ scratch[CDP_val].dv = MIN(last_cdp, pdp_temp[i]);
+ break;
+ case RRD_CF_LAST:
+ scratch[CDP_val].dv = pdp_temp[i];
+ break;
+ default:
+ scratch[CDP_val].dv = NAN;
+ break;
+ }
+ }
+
+ msg_debug_rrd("aggregate cdp %d with pdp %.3f, "
+ "stored value: %.3f",
+ i, pdp_temp[i], scratch[CDP_val].dv);
+ }
+ }
+ else {
+ /* We have nothing to consolidate, but we may miss some pdp */
+ if (pdp_steps > 2) {
+ /* Just write PDP value */
+ scratch[CDP_primary_val].dv = pdp_temp[i];
+ scratch[CDP_secondary_val].dv = pdp_temp[i];
+ }
+ }
+ }
+}
+
+/**
+ * Update RRA in a file
+ * @param file rrd file
+ * @param rra_steps steps for each rra
+ * @param now current time
+ */
+void rspamd_rrd_write_rra(struct rspamd_rrd_file *file, gulong *rra_steps)
+{
+ guint i, j, ds_cnt;
+ struct rrd_rra_def *rra;
+ struct rrd_cdp_prep *cdp;
+ gdouble *rra_row = file->rrd_value, *cur_row;
+
+
+ ds_cnt = file->stat_head->ds_cnt;
+ /* Iterate over all RRA */
+ for (i = 0; i < file->stat_head->rra_cnt; i++) {
+ rra = &file->rra_def[i];
+
+ if (rra_steps[i] > 0) {
+
+ /* Move row ptr */
+ if (++file->rra_ptr[i].cur_row >= rra->row_cnt) {
+ file->rra_ptr[i].cur_row = 0;
+ }
+ /* Calculate seek */
+ cdp = &file->cdp_prep[ds_cnt * i];
+ cur_row = rra_row + ds_cnt * file->rra_ptr[i].cur_row;
+ /* Iterate over DS */
+ for (j = 0; j < ds_cnt; j++) {
+ cur_row[j] = cdp[j].scratch[CDP_primary_val].dv;
+ msg_debug_rrd("write cdp %d: %.3f", j, cur_row[j]);
+ }
+ }
+
+ rra_row += rra->row_cnt * ds_cnt;
+ }
+}
+
+/**
+ * Add record to rrd file
+ * @param file rrd file object
+ * @param points points (must be row suitable for this RRA, depending on ds count)
+ * @param err error pointer
+ * @return TRUE if a row has been added
+ */
+gboolean
+rspamd_rrd_add_record(struct rspamd_rrd_file *file,
+ GArray *points,
+ gdouble ticks,
+ GError **err)
+{
+ gdouble interval, *pdp_new, *pdp_temp;
+ guint i;
+ glong seconds, microseconds;
+ gulong pdp_steps, cur_pdp_count, prev_pdp_step, cur_pdp_step,
+ prev_pdp_age, cur_pdp_age, *rra_steps, pdp_offset;
+
+ if (file == NULL || file->stat_head->ds_cnt * sizeof(gdouble) !=
+ points->len) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL,
+ "rrd add points failed: wrong arguments");
+ return FALSE;
+ }
+
+ /* Get interval */
+ seconds = (glong) ticks;
+ microseconds = (glong) ((ticks - seconds) * 1000000.);
+ interval = ticks - ((gdouble) file->live_head->last_up +
+ file->live_head->last_up_usec / 1000000.);
+
+ msg_debug_rrd("update rrd record after %.3f seconds", interval);
+
+ /* Update PDP preparation values */
+ pdp_new = g_malloc0(sizeof(gdouble) * file->stat_head->ds_cnt);
+ pdp_temp = g_malloc0(sizeof(gdouble) * file->stat_head->ds_cnt);
+ /* How much steps need to be updated in each RRA */
+ rra_steps = g_malloc0(sizeof(gulong) * file->stat_head->rra_cnt);
+
+ if (!rspamd_rrd_update_pdp_prep(file, (gdouble *) points->data, pdp_new,
+ interval)) {
+ g_set_error(err,
+ rrd_error_quark(), EINVAL,
+ "rrd update pdp failed: wrong arguments");
+ g_free(pdp_new);
+ g_free(pdp_temp);
+ g_free(rra_steps);
+ return FALSE;
+ }
+
+ /* Calculate elapsed steps */
+ /* Age in seconds for previous pdp store */
+ prev_pdp_age = file->live_head->last_up % file->stat_head->pdp_step;
+ /* Time in seconds for last pdp update */
+ prev_pdp_step = file->live_head->last_up - prev_pdp_age;
+ /* Age in seconds from current time to required pdp time */
+ cur_pdp_age = seconds % file->stat_head->pdp_step;
+ /* Time of desired pdp step */
+ cur_pdp_step = seconds - cur_pdp_age;
+ cur_pdp_count = cur_pdp_step / file->stat_head->pdp_step;
+ pdp_steps = (cur_pdp_step - prev_pdp_step) / file->stat_head->pdp_step;
+
+
+ if (pdp_steps == 0) {
+ /* Simple update of pdp prep */
+ for (i = 0; i < file->stat_head->ds_cnt; i++) {
+ if (isnan(pdp_new[i])) {
+ /* Increment unknown period */
+ file->pdp_prep[i].scratch[PDP_unkn_sec_cnt].lv += floor(
+ interval);
+ }
+ else {
+ if (isnan(file->pdp_prep[i].scratch[PDP_val].dv)) {
+ /* Reset pdp to the current value */
+ file->pdp_prep[i].scratch[PDP_val].dv = pdp_new[i];
+ }
+ else {
+ /* Increment pdp value */
+ file->pdp_prep[i].scratch[PDP_val].dv += pdp_new[i];
+ }
+ }
+ }
+ }
+ else {
+ /* Complex update of PDP, CDP and RRA */
+
+ /* Update PDP for this step */
+ rspamd_rrd_update_pdp_step(file,
+ pdp_new,
+ pdp_temp,
+ interval,
+ pdp_steps * file->stat_head->pdp_step);
+
+
+ /* Update CDP points for each RRA*/
+ for (i = 0; i < file->stat_head->rra_cnt; i++) {
+ /* Calculate pdp offset for this RRA */
+ pdp_offset = file->rra_def[i].pdp_cnt - cur_pdp_count %
+ file->rra_def[i].pdp_cnt;
+ /* How much steps we got for this RRA */
+ if (pdp_offset <= pdp_steps) {
+ rra_steps[i] =
+ (pdp_steps - pdp_offset) / file->rra_def[i].pdp_cnt + 1;
+ }
+ else {
+ /* This rra have not passed enough pdp steps */
+ rra_steps[i] = 0;
+ }
+
+ msg_debug_rrd("cdp: %ud, rra steps: %ul(%ul), pdp steps: %ul",
+ i, rra_steps[i], pdp_offset, pdp_steps);
+
+ /* Update this specific CDP */
+ rspamd_rrd_update_cdp(file,
+ pdp_steps,
+ pdp_offset,
+ rra_steps,
+ i,
+ pdp_temp);
+ }
+
+ /* Write RRA */
+ rspamd_rrd_write_rra(file, rra_steps);
+ }
+ file->live_head->last_up = seconds;
+ file->live_head->last_up_usec = microseconds;
+
+ /* Sync and invalidate */
+ msync(file->map, file->size, MS_ASYNC | MS_INVALIDATE);
+
+ g_free(pdp_new);
+ g_free(pdp_temp);
+ g_free(rra_steps);
+
+ return TRUE;
+}
+
+/**
+ * Close rrd file
+ * @param file
+ * @return
+ */
+gint rspamd_rrd_close(struct rspamd_rrd_file *file)
+{
+ if (file == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ munmap(file->map, file->size);
+ close(file->fd);
+ g_free(file->filename);
+ g_free(file->id);
+
+ g_free(file);
+
+ return 0;
+}
+
+static struct rspamd_rrd_file *
+rspamd_rrd_create_file(const gchar *path, gboolean finalize, GError **err)
+{
+ struct rspamd_rrd_file *file;
+ struct rrd_ds_def ds[RSPAMD_RRD_DS_COUNT];
+ struct rrd_rra_def rra[RSPAMD_RRD_RRA_COUNT];
+ gint i;
+ GArray ar;
+
+ /* Try to create new rrd file */
+
+ file = rspamd_rrd_create(path, RSPAMD_RRD_DS_COUNT, RSPAMD_RRD_RRA_COUNT,
+ 1, rspamd_get_calendar_ticks(), err);
+
+ if (file == NULL) {
+ return NULL;
+ }
+
+ /* Create DS and RRA */
+
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i++) {
+ rrd_make_default_ds(rspamd_action_to_str(i),
+ rrd_dst_to_string(RRD_DST_COUNTER), 1, &ds[i]);
+ }
+
+ ar.data = (gchar *) ds;
+ ar.len = sizeof(ds);
+
+ if (!rspamd_rrd_add_ds(file, &ar, err)) {
+ rspamd_rrd_close(file);
+ return NULL;
+ }
+
+ /* Once per minute for 1 day */
+ rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE),
+ 60, 24 * 60, &rra[0]);
+ /* Once per 5 minutes for 1 week */
+ rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE),
+ 5 * 60, 7 * 24 * 60 / 5, &rra[1]);
+ /* Once per 10 mins for 1 month */
+ rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE),
+ 60 * 10, 30 * 24 * 6, &rra[2]);
+ /* Once per hour for 1 year */
+ rrd_make_default_rra(rrd_cf_to_string(RRD_CF_AVERAGE),
+ 60 * 60, 365 * 24, &rra[3]);
+ ar.data = (gchar *) rra;
+ ar.len = sizeof(rra);
+
+ if (!rspamd_rrd_add_rra(file, &ar, err)) {
+ rspamd_rrd_close(file);
+ return NULL;
+ }
+
+ if (finalize && !rspamd_rrd_finalize(file, err)) {
+ rspamd_rrd_close(file);
+ return NULL;
+ }
+
+ return file;
+}
+
+static void
+rspamd_rrd_convert_ds(struct rspamd_rrd_file *old,
+ struct rspamd_rrd_file *cur, gint idx_old, gint idx_new)
+{
+ struct rrd_pdp_prep *pdp_prep_old, *pdp_prep_new;
+ struct rrd_cdp_prep *cdp_prep_old, *cdp_prep_new;
+ gdouble *val_old, *val_new;
+ gulong rra_cnt, i, j, points_cnt, old_ds, new_ds;
+
+ rra_cnt = old->stat_head->rra_cnt;
+ pdp_prep_old = &old->pdp_prep[idx_old];
+ pdp_prep_new = &cur->pdp_prep[idx_new];
+ memcpy(pdp_prep_new, pdp_prep_old, sizeof(*pdp_prep_new));
+ val_old = old->rrd_value;
+ val_new = cur->rrd_value;
+ old_ds = old->stat_head->ds_cnt;
+ new_ds = cur->stat_head->ds_cnt;
+
+ for (i = 0; i < rra_cnt; i++) {
+ cdp_prep_old = &old->cdp_prep[i * old_ds] + idx_old;
+ cdp_prep_new = &cur->cdp_prep[i * new_ds] + idx_new;
+ memcpy(cdp_prep_new, cdp_prep_old, sizeof(*cdp_prep_new));
+ points_cnt = old->rra_def[i].row_cnt;
+
+ for (j = 0; j < points_cnt; j++) {
+ val_new[j * new_ds + idx_new] = val_old[j * old_ds + idx_old];
+ }
+
+ val_new += points_cnt * new_ds;
+ val_old += points_cnt * old_ds;
+ }
+}
+
+static struct rspamd_rrd_file *
+rspamd_rrd_convert(const gchar *path, struct rspamd_rrd_file *old,
+ GError **err)
+{
+ struct rspamd_rrd_file *rrd;
+ gchar tpath[PATH_MAX];
+
+ g_assert(old != NULL);
+
+ rspamd_snprintf(tpath, sizeof(tpath), "%s.new", path);
+ rrd = rspamd_rrd_create_file(tpath, TRUE, err);
+
+ if (rrd) {
+ /* Copy old data */
+ memcpy(rrd->live_head, old->live_head, sizeof(*rrd->live_head));
+ memcpy(rrd->rra_ptr, old->rra_ptr,
+ sizeof(*old->rra_ptr) * rrd->stat_head->rra_cnt);
+
+ /*
+ * Old DSes:
+ * 0 - spam -> reject
+ * 1 - probable spam -> add header
+ * 2 - greylist -> greylist
+ * 3 - ham -> ham
+ */
+ rspamd_rrd_convert_ds(old, rrd, 0, METRIC_ACTION_REJECT);
+ rspamd_rrd_convert_ds(old, rrd, 1, METRIC_ACTION_ADD_HEADER);
+ rspamd_rrd_convert_ds(old, rrd, 2, METRIC_ACTION_GREYLIST);
+ rspamd_rrd_convert_ds(old, rrd, 3, METRIC_ACTION_NOACTION);
+
+ if (unlink(path) == -1) {
+ g_set_error(err, rrd_error_quark(), errno, "cannot unlink old rrd file %s: %s",
+ path, strerror(errno));
+ unlink(tpath);
+ rspamd_rrd_close(rrd);
+
+ return NULL;
+ }
+
+ if (rename(tpath, path) == -1) {
+ g_set_error(err, rrd_error_quark(), errno, "cannot rename old rrd file %s: %s",
+ path, strerror(errno));
+ unlink(tpath);
+ rspamd_rrd_close(rrd);
+
+ return NULL;
+ }
+ }
+
+ return rrd;
+}
+
+struct rspamd_rrd_file *
+rspamd_rrd_file_default(const gchar *path,
+ GError **err)
+{
+ struct rspamd_rrd_file *file, *nf;
+
+ g_assert(path != NULL);
+
+ if (access(path, R_OK) != -1) {
+ /* We can open rrd file */
+ file = rspamd_rrd_open(path, err);
+
+ if (file == NULL) {
+ return NULL;
+ }
+
+
+ if (file->stat_head->rra_cnt != RSPAMD_RRD_RRA_COUNT) {
+ msg_err_rrd("rrd file is not suitable for rspamd: it has "
+ "%ul ds and %ul rra",
+ file->stat_head->ds_cnt,
+ file->stat_head->rra_cnt);
+ g_set_error(err, rrd_error_quark(), EINVAL, "bad rrd file");
+ rspamd_rrd_close(file);
+
+ return NULL;
+ }
+ else if (file->stat_head->ds_cnt == RSPAMD_RRD_OLD_DS_COUNT) {
+ /* Old rrd, need to convert */
+ msg_info_rrd("rrd file %s is not suitable for rspamd, convert it",
+ path);
+
+ nf = rspamd_rrd_convert(path, file, err);
+ rspamd_rrd_close(file);
+
+ return nf;
+ }
+ else if (file->stat_head->ds_cnt == RSPAMD_RRD_DS_COUNT) {
+ return file;
+ }
+ else {
+ msg_err_rrd("rrd file is not suitable for rspamd: it has "
+ "%ul ds and %ul rra",
+ file->stat_head->ds_cnt,
+ file->stat_head->rra_cnt);
+ g_set_error(err, rrd_error_quark(), EINVAL, "bad rrd file");
+ rspamd_rrd_close(file);
+
+ return NULL;
+ }
+ }
+
+ file = rspamd_rrd_create_file(path, TRUE, err);
+
+ return file;
+}
+
+struct rspamd_rrd_query_result *
+rspamd_rrd_query(struct rspamd_rrd_file *file,
+ gulong rra_num)
+{
+ struct rspamd_rrd_query_result *res;
+ struct rrd_rra_def *rra;
+ const gdouble *rra_offset = NULL;
+ guint i;
+
+ g_assert(file != NULL);
+
+
+ if (rra_num > file->stat_head->rra_cnt) {
+ msg_err_rrd("requested unexisting rra: %l", rra_num);
+
+ return NULL;
+ }
+
+ res = g_malloc0(sizeof(*res));
+ res->ds_count = file->stat_head->ds_cnt;
+ res->last_update = (gdouble) file->live_head->last_up +
+ ((gdouble) file->live_head->last_up_usec / 1e6f);
+ res->pdp_per_cdp = file->rra_def[rra_num].pdp_cnt;
+ res->rra_rows = file->rra_def[rra_num].row_cnt;
+ rra_offset = file->rrd_value;
+
+ for (i = 0; i < file->stat_head->rra_cnt; i++) {
+ rra = &file->rra_def[i];
+
+ if (i == rra_num) {
+ res->cur_row = file->rra_ptr[i].cur_row % rra->row_cnt;
+ break;
+ }
+
+ rra_offset += rra->row_cnt * res->ds_count;
+ }
+
+ res->data = rra_offset;
+
+ return res;
+}
diff --git a/src/libutil/rrd.h b/src/libutil/rrd.h
new file mode 100644
index 0000000..3d81477
--- /dev/null
+++ b/src/libutil/rrd.h
@@ -0,0 +1,362 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RRD_H_
+#define RRD_H_
+
+#include "config.h"
+
+/**
+ * This file contains basic structure and functions to operate with round-robin databases
+ */
+
+#define RRD_COOKIE "RRD"
+#define RRD_VERSION "0003"
+#define RRD_FLOAT_COOKIE ((double) 8.642135E130)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef union {
+ unsigned long lv;
+ double dv;
+} rrd_value_t;
+
+struct rrd_file_head {
+ /* Data Base Identification Section ** */
+ gchar cookie[4]; /* RRD */
+ gchar version[5]; /* version of the format */
+ gdouble float_cookie; /* is it the correct double representation ? */
+
+ /* Data Base Structure Definition **** */
+ gulong ds_cnt; /* how many different ds provide input to the rrd */
+ gulong rra_cnt; /* how many rras will be maintained in the rrd */
+ gulong pdp_step; /* pdp interval in seconds */
+
+ rrd_value_t par[10]; /* global parameters ... unused
+ at the moment */
+};
+
+enum rrd_dst_type {
+ RRD_DST_INVALID = -1,
+ RRD_DST_COUNTER = 0, /* data source types available */
+ RRD_DST_ABSOLUTE,
+ RRD_DST_GAUGE,
+ RRD_DST_DERIVE,
+ RRD_DST_CDEF
+};
+enum rrd_ds_param {
+ RRD_DS_mrhb_cnt = 0, /* minimum required heartbeat */
+ RRD_DS_min_val, /* the processed input of a ds must */
+ RRD_DS_max_val, /* be between max_val and min_val
+ * both can be set to UNKNOWN if you
+ * do not care. Data outside the limits
+ * set to UNKNOWN */
+ RRD_DS_cdef = RRD_DS_mrhb_cnt
+}; /* pointer to encoded rpn expression only applies to DST_CDEF */
+
+
+/* The magic number here is one less than DS_NAM_SIZE */
+#define RRD_DS_NAM_SIZE 20
+
+#define RRD_DST_SIZE 20
+
+struct rrd_ds_def {
+ gchar ds_nam[RRD_DS_NAM_SIZE]; /* Name of the data source (null terminated) */
+ gchar dst[RRD_DST_SIZE]; /* Type of data source (null terminated) */
+ rrd_value_t par[10]; /* index of this array see ds_param_en */
+};
+
+/* RRA definition */
+
+enum rrd_cf_type {
+ RRD_CF_INVALID = -1,
+ RRD_CF_AVERAGE = 0, /* data consolidation functions */
+ RRD_CF_MINIMUM,
+ RRD_CF_MAXIMUM,
+ RRD_CF_LAST,
+};
+
+
+#define MAX_RRA_PAR_EN 10
+
+enum rrd_rra_param {
+ RRA_cdp_xff_val = 0, /* what part of the consolidated
+ * datapoint must be known, to produce a
+ * valid entry in the rra */
+};
+
+
+#define RRD_CF_NAM_SIZE 20
+
+struct rrd_rra_def {
+ gchar cf_nam[RRD_CF_NAM_SIZE]; /* consolidation function (null term) */
+ gulong row_cnt; /* number of entries in the store */
+ gulong pdp_cnt; /* how many primary data points are
+ * required for a consolidated data point?*/
+ rrd_value_t par[MAX_RRA_PAR_EN]; /* index see rra_param_en */
+};
+
+struct rrd_live_head {
+ time_t last_up; /* when was rrd last updated */
+ glong last_up_usec; /* micro seconds part of the update timestamp. Always >= 0 */
+};
+
+#define RRD_LAST_DS_LEN 30
+
+enum rrd_pdp_param {
+ PDP_unkn_sec_cnt = 0, /* how many seconds of the current
+ * pdp value is unknown data? */
+ PDP_val
+}; /* current value of the pdp.
+ this depends on dst */
+
+struct rrd_pdp_prep {
+ gchar last_ds[RRD_LAST_DS_LEN]; /* the last reading from the data
+ * source. this is stored in ASCII
+ * to cater for very large counters
+ * we might encounter in connection
+ * with SNMP. */
+ rrd_value_t scratch[10]; /* contents according to pdp_par_en */
+};
+
+#define RRD_MAX_CDP_PAR_EN 10
+#define RRD_MAX_CDP_FAILURES_IDX 8
+/* max CDP scratch entries avail to record violations for a FAILURES RRA */
+#define RRD_MAX_FAILURES_WINDOW_LEN 28
+
+enum rrd_cdp_param {
+ CDP_val = 0,
+ /* the base_interval is always an
+ * average */
+ CDP_unkn_pdp_cnt,
+ /* how many unknown pdp were
+ * integrated. This and the cdp_xff
+ * will decide if this is going to
+ * be a UNKNOWN or a valid value */
+ CDP_hw_intercept,
+ /* Current intercept coefficient for the Holt-Winters
+ * prediction algorithm. */
+ CDP_hw_last_intercept,
+ /* Last iteration intercept coefficient for the Holt-Winters
+ * prediction algorithm. */
+ CDP_hw_slope,
+ /* Current slope coefficient for the Holt-Winters
+ * prediction algorithm. */
+ CDP_hw_last_slope,
+ /* Last iteration slope coefficient. */
+ CDP_null_count,
+ /* Number of sequential Unknown (DNAN) values + 1 preceding
+ * the current prediction.
+ * */
+ CDP_last_null_count,
+ /* Last iteration count of Unknown (DNAN) values. */
+ CDP_primary_val = 8,
+ /* optimization for bulk updates: the value of the first CDP
+ * value to be written in the bulk update. */
+ CDP_secondary_val = 9,
+ /* optimization for bulk updates: the value of subsequent
+ * CDP values to be written in the bulk update. */
+ CDP_hw_seasonal = CDP_hw_intercept,
+ /* Current seasonal coefficient for the Holt-Winters
+ * prediction algorithm. This is stored in CDP prep to avoid
+ * redundant seek operations. */
+ CDP_hw_last_seasonal = CDP_hw_last_intercept,
+ /* Last iteration seasonal coefficient. */
+ CDP_seasonal_deviation = CDP_hw_intercept,
+ CDP_last_seasonal_deviation = CDP_hw_last_intercept,
+ CDP_init_seasonal = CDP_null_count
+};
+
+struct rrd_cdp_prep {
+ rrd_value_t scratch[RRD_MAX_CDP_PAR_EN];
+ /* contents according to cdp_par_en *
+ * init state should be NAN */
+};
+
+struct rrd_rra_ptr {
+ gulong cur_row; /* current row in the rra */
+};
+
+/* Final rrd file structure */
+struct rspamd_rrd_file {
+ struct rrd_file_head *stat_head; /* the static header */
+ struct rrd_ds_def *ds_def; /* list of data source definitions */
+ struct rrd_rra_def *rra_def; /* list of round robin archive def */
+ struct rrd_live_head *live_head; /* rrd v >= 3 last_up with us */
+ struct rrd_pdp_prep *pdp_prep; /* pdp data prep area */
+ struct rrd_cdp_prep *cdp_prep; /* cdp prep area */
+ struct rrd_rra_ptr *rra_ptr; /* list of rra pointers */
+ gdouble *rrd_value; /* list of rrd values */
+
+ gchar *filename;
+ guint8 *map; /* mmapped area */
+ gsize size; /* its size */
+ gboolean finalized;
+ gchar *id;
+ gint fd;
+};
+
+
+/* Public API */
+
+/**
+ * Open (and mmap) existing RRD file
+ * @param filename path
+ * @param err error pointer
+ * @return rrd file structure
+ */
+struct rspamd_rrd_file *rspamd_rrd_open(const gchar *filename, GError **err);
+
+/**
+ * Create basic header for rrd file
+ * @param filename file path
+ * @param ds_count number of data sources
+ * @param rra_count number of round robin archives
+ * @param pdp_step step of primary data points
+ * @param err error pointer
+ * @return TRUE if file has been created
+ */
+struct rspamd_rrd_file *rspamd_rrd_create(const gchar *filename,
+ gulong ds_count,
+ gulong rra_count,
+ gulong pdp_step,
+ gdouble initial_ticks,
+ GError **err);
+
+/**
+ * Add data sources to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_ds_def
+ * @param err error pointer
+ * @return TRUE if data sources were added
+ */
+gboolean rspamd_rrd_add_ds(struct rspamd_rrd_file *file,
+ GArray *ds,
+ GError **err);
+
+/**
+ * Add round robin archives to rrd file
+ * @param filename path to file
+ * @param ds array of struct rrd_rra_def
+ * @param err error pointer
+ * @return TRUE if archives were added
+ */
+gboolean rspamd_rrd_add_rra(struct rspamd_rrd_file *file,
+ GArray *rra,
+ GError **err);
+
+/**
+ * Finalize rrd file header and initialize all RRA in the file
+ * @param filename file path
+ * @param err error pointer
+ * @return TRUE if rrd file is ready for use
+ */
+gboolean rspamd_rrd_finalize(struct rspamd_rrd_file *file, GError **err);
+
+/**
+ * Add record to rrd file
+ * @param file rrd file object
+ * @param points points (must be row suitable for this RRA, depending on ds count)
+ * @param err error pointer
+ * @return TRUE if a row has been added
+ */
+gboolean rspamd_rrd_add_record(struct rspamd_rrd_file *file,
+ GArray *points,
+ gdouble ticks,
+ GError **err);
+
+/**
+ * Close rrd file
+ * @param file
+ * @return
+ */
+gint rspamd_rrd_close(struct rspamd_rrd_file *file);
+
+/*
+ * Conversion functions
+ */
+
+/**
+ * Convert rrd dst type from string to numeric value
+ */
+enum rrd_dst_type rrd_dst_from_string(const gchar *str);
+
+/**
+ * Convert numeric presentation of dst to string
+ */
+const gchar *rrd_dst_to_string(enum rrd_dst_type type);
+
+/**
+ * Convert rrd consolidation function type from string to numeric value
+ */
+enum rrd_cf_type rrd_cf_from_string(const gchar *str);
+
+/**
+ * Convert numeric presentation of cf to string
+ */
+const gchar *rrd_cf_to_string(enum rrd_cf_type type);
+
+/* Default RRA and DS */
+
+/**
+ * Create default RRA
+ */
+void rrd_make_default_rra(const gchar *cf_name,
+ gulong pdp_cnt,
+ gulong rows,
+ struct rrd_rra_def *rra);
+
+/**
+ * Create default DS
+ */
+void rrd_make_default_ds(const gchar *name,
+ const gchar *type,
+ gulong pdp_step,
+ struct rrd_ds_def *ds);
+
+/**
+ * Open or create the default rspamd rrd file
+ */
+struct rspamd_rrd_file *rspamd_rrd_file_default(const gchar *path,
+ GError **err);
+
+/**
+ * Returned by querying rrd database
+ */
+struct rspamd_rrd_query_result {
+ gulong rra_rows;
+ gulong pdp_per_cdp;
+ gulong ds_count;
+ gdouble last_update;
+ gulong cur_row;
+ const gdouble *data;
+};
+
+/**
+ * Return RRA data
+ * @param file rrd file
+ * @param rra_num number of rra to return data for
+ * @return query result structure, that should be freed (using g_slice_free1) after usage
+ */
+struct rspamd_rrd_query_result *rspamd_rrd_query(struct rspamd_rrd_file *file,
+ gulong rra_num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RRD_H_ */
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c
new file mode 100644
index 0000000..42d5168
--- /dev/null
+++ b/src/libutil/shingles.c
@@ -0,0 +1,412 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "shingles.h"
+#include "fstring.h"
+#include "cryptobox.h"
+#include "images.h"
+#include "libstat/stat_api.h"
+
+#define SHINGLES_WINDOW 3
+#define SHINGLES_KEY_SIZE rspamd_cryptobox_SIPKEYBYTES
+
+static guint
+rspamd_shingles_keys_hash(gconstpointer k)
+{
+ return rspamd_cryptobox_fast_hash(k, SHINGLES_KEY_SIZE,
+ rspamd_hash_seed());
+}
+
+static gboolean
+rspamd_shingles_keys_equal(gconstpointer k1, gconstpointer k2)
+{
+ return (memcmp(k1, k2, SHINGLES_KEY_SIZE) == 0);
+}
+
+static void
+rspamd_shingles_keys_free(gpointer p)
+{
+ guchar **k = p;
+ guint i;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ g_free(k[i]);
+ }
+
+ g_free(k);
+}
+
+static guchar **
+rspamd_shingles_keys_new(void)
+{
+ guchar **k;
+ guint i;
+
+ k = g_malloc0(sizeof(guchar *) * RSPAMD_SHINGLE_SIZE);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ k[i] = g_malloc0(sizeof(guchar) * SHINGLES_KEY_SIZE);
+ }
+
+ return k;
+}
+
+static guchar **
+rspamd_shingles_get_keys_cached(const guchar key[SHINGLES_KEY_SIZE])
+{
+ static GHashTable *ht = NULL;
+ guchar **keys = NULL, *key_cpy;
+ rspamd_cryptobox_hash_state_t bs;
+ const guchar *cur_key;
+ guchar shabuf[rspamd_cryptobox_HASHBYTES], *out_key;
+ guint i;
+
+ if (ht == NULL) {
+ ht = g_hash_table_new_full(rspamd_shingles_keys_hash,
+ rspamd_shingles_keys_equal, g_free, rspamd_shingles_keys_free);
+ }
+ else {
+ keys = g_hash_table_lookup(ht, key);
+ }
+
+ if (keys == NULL) {
+ keys = rspamd_shingles_keys_new();
+ key_cpy = g_malloc(SHINGLES_KEY_SIZE);
+ memcpy(key_cpy, key, SHINGLES_KEY_SIZE);
+
+ /* Generate keys */
+ rspamd_cryptobox_hash_init(&bs, NULL, 0);
+ cur_key = key;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ /*
+ * To generate a set of hashes we just apply sha256 to the
+ * initial key as many times as many hashes are required and
+ * xor left and right parts of sha256 to get a single 16 bytes SIP key.
+ */
+ out_key = keys[i];
+ rspamd_cryptobox_hash_update(&bs, cur_key, 16);
+ rspamd_cryptobox_hash_final(&bs, shabuf);
+
+ memcpy(out_key, shabuf, 16);
+ rspamd_cryptobox_hash_init(&bs, NULL, 0);
+ cur_key = out_key;
+ }
+
+ g_hash_table_insert(ht, key_cpy, keys);
+ }
+
+ return keys;
+}
+
+struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
+ rspamd_shingles_from_text(GArray *input,
+ const guchar key[16],
+ rspamd_mempool_t *pool,
+ rspamd_shingles_filter filter,
+ gpointer filterd,
+ enum rspamd_shingle_alg alg)
+{
+ struct rspamd_shingle *res;
+ guint64 **hashes;
+ guchar **keys;
+ rspamd_fstring_t *row;
+ rspamd_stat_token_t *word;
+ guint64 val;
+ gint i, j, k;
+ gsize hlen, ilen = 0, beg = 0, widx = 0;
+ enum rspamd_cryptobox_fast_hash_type ht;
+
+ if (pool != NULL) {
+ res = rspamd_mempool_alloc(pool, sizeof(*res));
+ }
+ else {
+ res = g_malloc(sizeof(*res));
+ }
+
+ row = rspamd_fstring_sized_new(256);
+
+ for (i = 0; i < input->len; i++) {
+ word = &g_array_index(input, rspamd_stat_token_t, i);
+
+ if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0)) {
+ ilen++;
+ }
+ }
+
+ /* Init hashes pipes and keys */
+ hashes = g_malloc(sizeof(*hashes) * RSPAMD_SHINGLE_SIZE);
+ hlen = ilen > SHINGLES_WINDOW ? (ilen - SHINGLES_WINDOW + 1) : 1;
+ keys = rspamd_shingles_get_keys_cached(key);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ hashes[i] = g_malloc(hlen * sizeof(guint64));
+ }
+
+ /* Now parse input words into a vector of hashes using rolling window */
+ if (alg == RSPAMD_SHINGLES_OLD) {
+ for (i = 0; i <= (gint) ilen; i++) {
+ if (i - beg >= SHINGLES_WINDOW || i == (gint) ilen) {
+ for (j = beg; j < i; j++) {
+
+ word = NULL;
+ while (widx < input->len) {
+ word = &g_array_index(input, rspamd_stat_token_t, widx);
+
+ if ((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0) {
+ widx++;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (word == NULL) {
+ /* Nothing but exceptions */
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ g_free(hashes[i]);
+ }
+
+ g_free(hashes);
+
+ if (pool == NULL) {
+ g_free(res);
+ }
+
+ rspamd_fstring_free(row);
+
+ return NULL;
+ }
+
+ row = rspamd_fstring_append(row, word->stemmed.begin,
+ word->stemmed.len);
+ }
+
+ /* Now we need to create a new row here */
+ for (j = 0; j < RSPAMD_SHINGLE_SIZE; j++) {
+ rspamd_cryptobox_siphash((guchar *) &val, row->str, row->len,
+ keys[j]);
+ g_assert(hlen > beg);
+ hashes[j][beg] = val;
+ }
+
+ beg++;
+ widx++;
+
+ row = rspamd_fstring_assign(row, "", 0);
+ }
+ }
+ }
+ else {
+ guint64 window[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE], seed;
+
+ switch (alg) {
+ case RSPAMD_SHINGLES_XXHASH:
+ ht = RSPAMD_CRYPTOBOX_XXHASH64;
+ break;
+ case RSPAMD_SHINGLES_MUMHASH:
+ ht = RSPAMD_CRYPTOBOX_MUMHASH;
+ break;
+ default:
+ ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT;
+ break;
+ }
+
+ memset(window, 0, sizeof(window));
+ for (i = 0; i <= ilen; i++) {
+ if (i - beg >= SHINGLES_WINDOW || i == ilen) {
+
+ for (j = 0; j < RSPAMD_SHINGLE_SIZE; j++) {
+ /* Shift hashes window to right */
+ for (k = 0; k < SHINGLES_WINDOW - 1; k++) {
+ window[j * SHINGLES_WINDOW + k] =
+ window[j * SHINGLES_WINDOW + k + 1];
+ }
+
+ word = NULL;
+
+ while (widx < input->len) {
+ word = &g_array_index(input, rspamd_stat_token_t, widx);
+
+ if ((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0) {
+ widx++;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (word == NULL) {
+ /* Nothing but exceptions */
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ g_free(hashes[i]);
+ }
+
+ if (pool == NULL) {
+ g_free(res);
+ }
+
+ g_free(hashes);
+ rspamd_fstring_free(row);
+
+ return NULL;
+ }
+
+ /* Insert the last element to the pipe */
+ memcpy(&seed, keys[j], sizeof(seed));
+ window[j * SHINGLES_WINDOW + SHINGLES_WINDOW - 1] =
+ rspamd_cryptobox_fast_hash_specific(ht,
+ word->stemmed.begin, word->stemmed.len,
+ seed);
+ val = 0;
+ for (k = 0; k < SHINGLES_WINDOW; k++) {
+ val ^= window[j * SHINGLES_WINDOW + k] >>
+ (8 * (SHINGLES_WINDOW - k - 1));
+ }
+
+ g_assert(hlen > beg);
+ hashes[j][beg] = val;
+ }
+
+ beg++;
+ widx++;
+ }
+ }
+ }
+
+ /* Now we need to filter all hashes and make a shingles result */
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ res->hashes[i] = filter(hashes[i], hlen,
+ i, key, filterd);
+ g_free(hashes[i]);
+ }
+
+ g_free(hashes);
+
+ rspamd_fstring_free(row);
+
+ return res;
+}
+
+struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
+ rspamd_shingles_from_image(guchar *dct,
+ const guchar key[16],
+ rspamd_mempool_t *pool,
+ rspamd_shingles_filter filter,
+ gpointer filterd,
+ enum rspamd_shingle_alg alg)
+{
+ struct rspamd_shingle *shingle;
+ guint64 **hashes;
+ guchar **keys;
+ guint64 d;
+ guint64 val;
+ gint i, j;
+ gsize hlen, beg = 0;
+ enum rspamd_cryptobox_fast_hash_type ht;
+ guint64 res[SHINGLES_WINDOW * RSPAMD_SHINGLE_SIZE], seed;
+
+ if (pool != NULL) {
+ shingle = rspamd_mempool_alloc(pool, sizeof(*shingle));
+ }
+ else {
+ shingle = g_malloc(sizeof(*shingle));
+ }
+
+ /* Init hashes pipes and keys */
+ hashes = g_malloc(sizeof(*hashes) * RSPAMD_SHINGLE_SIZE);
+ hlen = RSPAMD_DCT_LEN / NBBY + 1;
+ keys = rspamd_shingles_get_keys_cached(key);
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ hashes[i] = g_malloc(hlen * sizeof(guint64));
+ }
+
+ switch (alg) {
+ case RSPAMD_SHINGLES_OLD:
+ ht = RSPAMD_CRYPTOBOX_MUMHASH;
+ break;
+ case RSPAMD_SHINGLES_XXHASH:
+ ht = RSPAMD_CRYPTOBOX_XXHASH64;
+ break;
+ case RSPAMD_SHINGLES_MUMHASH:
+ ht = RSPAMD_CRYPTOBOX_MUMHASH;
+ break;
+ default:
+ ht = RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT;
+ break;
+ }
+
+ memset(res, 0, sizeof(res));
+#define INNER_CYCLE_SHINGLES(s, e) \
+ for (j = (s); j < (e); j++) { \
+ d = dct[beg]; \
+ memcpy(&seed, keys[j], sizeof(seed)); \
+ val = rspamd_cryptobox_fast_hash_specific(ht, \
+ &d, sizeof(d), \
+ seed); \
+ hashes[j][beg] = val; \
+ }
+ for (i = 0; i < RSPAMD_DCT_LEN / NBBY; i++) {
+ INNER_CYCLE_SHINGLES(0, RSPAMD_SHINGLE_SIZE / 4);
+ INNER_CYCLE_SHINGLES(RSPAMD_SHINGLE_SIZE / 4, RSPAMD_SHINGLE_SIZE / 2);
+ INNER_CYCLE_SHINGLES(RSPAMD_SHINGLE_SIZE / 2, 3 * RSPAMD_SHINGLE_SIZE / 4);
+ INNER_CYCLE_SHINGLES(3 * RSPAMD_SHINGLE_SIZE / 4, RSPAMD_SHINGLE_SIZE);
+
+ beg++;
+ }
+#undef INNER_CYCLE_SHINGLES
+ /* Now we need to filter all hashes and make a shingles result */
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ shingle->hashes[i] = filter(hashes[i], hlen,
+ i, key, filterd);
+ g_free(hashes[i]);
+ }
+
+ g_free(hashes);
+
+ return shingle;
+}
+
+guint64
+rspamd_shingles_default_filter(guint64 *input, gsize count,
+ gint shno, const guchar *key, gpointer ud)
+{
+ guint64 minimal = G_MAXUINT64;
+ gsize i;
+
+ for (i = 0; i < count; i++) {
+ if (minimal > input[i]) {
+ minimal = input[i];
+ }
+ }
+
+ return minimal;
+}
+
+
+gdouble rspamd_shingles_compare(const struct rspamd_shingle *a,
+ const struct rspamd_shingle *b)
+{
+ gint i, common = 0;
+
+ for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
+ if (a->hashes[i] == b->hashes[i]) {
+ common++;
+ }
+ }
+
+ return (gdouble) common / (gdouble) RSPAMD_SHINGLE_SIZE;
+}
diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h
new file mode 100644
index 0000000..9a0ca69
--- /dev/null
+++ b/src/libutil/shingles.h
@@ -0,0 +1,101 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SHINGLES_H_
+#define SHINGLES_H_
+
+#include "config.h"
+#include "mem_pool.h"
+
+#define RSPAMD_SHINGLE_SIZE 32
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_shingle {
+ guint64 hashes[RSPAMD_SHINGLE_SIZE];
+};
+
+enum rspamd_shingle_alg {
+ RSPAMD_SHINGLES_OLD = 0,
+ RSPAMD_SHINGLES_XXHASH,
+ RSPAMD_SHINGLES_MUMHASH,
+ RSPAMD_SHINGLES_FAST
+};
+
+/**
+ * Shingles filtering function
+ * @param input input array of hashes
+ * @param count number of hashes in the vector
+ * @return shingle value
+ */
+typedef guint64 (*rspamd_shingles_filter)(guint64 *input, gsize count,
+ gint shno, const guchar *key, gpointer ud);
+
+/**
+ * Generate shingles from the input of fixed size strings using lemmatizer
+ * if needed
+ * @param input array of `rspamd_fstring_t`
+ * @param key secret key used to generate shingles
+ * @param pool pool to allocate shingles array
+ * @param filter hashes filtering function
+ * @param filterd opaque data for filtering function
+ * @return shingles array
+ */
+struct rspamd_shingle *rspamd_shingles_from_text(GArray *input,
+ const guchar key[16],
+ rspamd_mempool_t *pool,
+ rspamd_shingles_filter filter,
+ gpointer filterd,
+ enum rspamd_shingle_alg alg);
+
+/**
+ * Generate shingles from the DCT matrix of an image
+ * @param dct discrete cosine transfor matrix (must be 64x64)
+ * @param key secret key used to generate shingles
+ * @param pool pool to allocate shingles array
+ * @param filter hashes filtering function
+ * @param filterd opaque data for filtering function
+ * @return shingles array
+ */
+struct rspamd_shingle *rspamd_shingles_from_image(guchar *dct,
+ const guchar key[16],
+ rspamd_mempool_t *pool,
+ rspamd_shingles_filter filter,
+ gpointer filterd,
+ enum rspamd_shingle_alg alg);
+
+/**
+ * Compares two shingles and return result as a floating point value - 1.0
+ * for completely similar shingles and 0.0 for completely different ones
+ * @param a
+ * @param b
+ * @return
+ */
+gdouble rspamd_shingles_compare(const struct rspamd_shingle *a,
+ const struct rspamd_shingle *b);
+
+/**
+ * Default filtering function
+ */
+guint64 rspamd_shingles_default_filter(guint64 *input, gsize count,
+ gint shno, const guchar *key, gpointer ud);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SHINGLES_H_ */
diff --git a/src/libutil/sqlite_utils.c b/src/libutil/sqlite_utils.c
new file mode 100644
index 0000000..8aeb598
--- /dev/null
+++ b/src/libutil/sqlite_utils.c
@@ -0,0 +1,620 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "libserver/logger.h"
+#include "libutil/sqlite_utils.h"
+#include "unix-std.h"
+
+
+static GQuark
+rspamd_sqlite3_quark(void)
+{
+ return g_quark_from_static_string("rspamd-sqlite3");
+}
+
+GArray *
+rspamd_sqlite3_init_prstmt(sqlite3 *db,
+ struct rspamd_sqlite3_prstmt *init_stmt,
+ gint max_idx,
+ GError **err)
+{
+ gint i;
+ GArray *res;
+ struct rspamd_sqlite3_prstmt *nst;
+
+ res = g_array_sized_new(FALSE, TRUE, sizeof(struct rspamd_sqlite3_prstmt),
+ max_idx);
+ g_array_set_size(res, max_idx);
+
+ for (i = 0; i < max_idx; i++) {
+ nst = &g_array_index(res, struct rspamd_sqlite3_prstmt, i);
+ memcpy(nst, &init_stmt[i], sizeof(*nst));
+
+ if (sqlite3_prepare_v2(db, init_stmt[i].sql, -1,
+ &nst->stmt, NULL) != SQLITE_OK) {
+ g_set_error(err, rspamd_sqlite3_quark(),
+ -1, "Cannot initialize prepared sql `%s`: %s",
+ nst->sql, sqlite3_errmsg(db));
+ rspamd_sqlite3_close_prstmt(db, res);
+
+ return NULL;
+ }
+ }
+
+ return res;
+}
+
+int rspamd_sqlite3_run_prstmt(rspamd_mempool_t *pool, sqlite3 *db, GArray *stmts,
+ gint idx, ...)
+{
+ gint retcode;
+ va_list ap;
+ sqlite3_stmt *stmt;
+ gint i, rowid, nargs, j;
+ gint64 len;
+ gpointer p;
+ struct rspamd_sqlite3_prstmt *nst;
+ const char *argtypes;
+
+ if (idx < 0 || idx >= (gint) stmts->len) {
+
+ return -1;
+ }
+
+ nst = &g_array_index(stmts, struct rspamd_sqlite3_prstmt, idx);
+ stmt = nst->stmt;
+
+ g_assert(nst != NULL);
+
+ msg_debug_pool("executing `%s`", nst->sql);
+ argtypes = nst->args;
+ sqlite3_clear_bindings(stmt);
+ sqlite3_reset(stmt);
+ va_start(ap, idx);
+ nargs = 1;
+
+ for (i = 0, rowid = 1; argtypes[i] != '\0'; i++) {
+ switch (argtypes[i]) {
+ case 'T':
+
+ for (j = 0; j < nargs; j++, rowid++) {
+ sqlite3_bind_text(stmt, rowid, va_arg(ap, const char *), -1,
+ SQLITE_STATIC);
+ }
+
+ nargs = 1;
+ break;
+ case 'V':
+ case 'B':
+
+ for (j = 0; j < nargs; j++, rowid++) {
+ len = va_arg(ap, gint64);
+ sqlite3_bind_text(stmt, rowid, va_arg(ap, const char *), len,
+ SQLITE_STATIC);
+ }
+
+ nargs = 1;
+ break;
+ case 'I':
+
+ for (j = 0; j < nargs; j++, rowid++) {
+ sqlite3_bind_int64(stmt, rowid, va_arg(ap, gint64));
+ }
+
+ nargs = 1;
+ break;
+ case 'S':
+
+ for (j = 0; j < nargs; j++, rowid++) {
+ sqlite3_bind_int(stmt, rowid, va_arg(ap, gint));
+ }
+
+ nargs = 1;
+ break;
+ case '*':
+ nargs = va_arg(ap, gint);
+ break;
+ }
+ }
+
+ retcode = sqlite3_step(stmt);
+
+ if (retcode == nst->result) {
+ argtypes = nst->ret;
+
+ for (i = 0; argtypes != NULL && argtypes[i] != '\0'; i++) {
+ switch (argtypes[i]) {
+ case 'T':
+ *va_arg(ap, char **) = g_strdup(sqlite3_column_text(stmt, i));
+ break;
+ case 'I':
+ *va_arg(ap, gint64 *) = sqlite3_column_int64(stmt, i);
+ break;
+ case 'S':
+ *va_arg(ap, int *) = sqlite3_column_int(stmt, i);
+ break;
+ case 'L':
+ *va_arg(ap, gint64 *) = sqlite3_last_insert_rowid(db);
+ break;
+ case 'B':
+ len = sqlite3_column_bytes(stmt, i);
+ g_assert(len >= 0);
+ p = g_malloc(len);
+ memcpy(p, sqlite3_column_blob(stmt, i), len);
+ *va_arg(ap, gint64 *) = len;
+ *va_arg(ap, gpointer *) = p;
+ break;
+ }
+ }
+
+ if (!(nst->flags & RSPAMD_SQLITE3_STMT_MULTIPLE)) {
+ sqlite3_clear_bindings(stmt);
+ sqlite3_reset(stmt);
+ }
+
+ va_end(ap);
+
+ return SQLITE_OK;
+ }
+ else if (retcode != SQLITE_DONE && retcode != SQLITE_OK && retcode != SQLITE_ROW) {
+ msg_warn_pool("failed to execute query %s: %d, %s", nst->sql,
+ retcode, sqlite3_errmsg(db));
+ }
+
+ if (!(nst->flags & RSPAMD_SQLITE3_STMT_MULTIPLE)) {
+ sqlite3_clear_bindings(stmt);
+ sqlite3_reset(stmt);
+ }
+
+ va_end(ap);
+
+ return retcode;
+}
+
+void rspamd_sqlite3_close_prstmt(sqlite3 *db, GArray *stmts)
+{
+ guint i;
+ struct rspamd_sqlite3_prstmt *nst;
+
+ for (i = 0; i < stmts->len; i++) {
+ nst = &g_array_index(stmts, struct rspamd_sqlite3_prstmt, i);
+ if (nst->stmt != NULL) {
+ sqlite3_finalize(nst->stmt);
+ }
+ }
+
+ g_array_free(stmts, TRUE);
+
+ return;
+}
+
+static gboolean
+rspamd_sqlite3_wait(rspamd_mempool_t *pool, const gchar *lock)
+{
+ gint fd;
+ pid_t pid;
+ gssize r;
+ struct timespec sleep_ts = {
+ .tv_sec = 0,
+ .tv_nsec = 1000000};
+
+ while ((fd = open(lock, O_WRONLY | O_CREAT | O_EXCL, 00600)) == -1) {
+ if (errno != EBUSY && errno != EEXIST) {
+ msg_err_pool_check("cannot open lock file %s: %s", lock,
+ strerror(errno));
+
+ return FALSE;
+ }
+
+ fd = open(lock, O_RDONLY);
+
+ if (fd == -1) {
+ msg_err_pool_check("cannot open lock file %s: %s", lock,
+ strerror(errno));
+
+ return FALSE;
+ }
+
+ r = read(fd, &pid, sizeof(pid));
+
+ if (r != sizeof(pid)) {
+ msg_warn_pool_check("stale lock file %s, removing", lock);
+ unlink(lock);
+ close(fd);
+
+ return TRUE;
+ }
+
+ /* Now check for process existence */
+ if (pid == getpid()) {
+ msg_warn_pool_check("lock file %s, belongs to me, removing", lock);
+ unlink(lock);
+ close(fd);
+
+ return TRUE;
+ }
+ else if (kill(pid, 0) == -1) {
+ if (errno == ESRCH) {
+ /* Process is already dead */
+ msg_warn_pool_check("stale lock file %s from pid: %P, removing",
+ lock, pid);
+ unlink(lock);
+ close(fd);
+
+ return TRUE;
+ }
+ }
+
+ close(fd);
+
+ if (nanosleep(&sleep_ts, NULL) == -1 && errno != EINTR) {
+ msg_err_pool_check("cannot sleep open lock file %s: %s", lock,
+ strerror(errno));
+
+ return FALSE;
+ }
+ }
+
+ unlink(lock);
+ close(fd);
+
+ return TRUE;
+}
+
+#define RSPAMD_SQLITE_MMAP_LIMIT 268435456
+#define RSPAMD_SQLITE_CACHE_SIZE 262144
+
+sqlite3 *
+rspamd_sqlite3_open_or_create(rspamd_mempool_t *pool, const gchar *path, const gchar *create_sql, guint version, GError **err)
+{
+ sqlite3 *sqlite;
+ gint rc, flags, lock_fd;
+ gchar lock_path[PATH_MAX], dbdir[PATH_MAX], *pdir;
+ static const char sqlite_wal[] =
+ "PRAGMA journal_mode=\"wal\";"
+ "PRAGMA wal_autocheckpoint = 16;"
+ "PRAGMA journal_size_limit = 1536;",
+ exclusive_lock_sql[] = "PRAGMA locking_mode=\"exclusive\";",
+
+ fsync_sql[] = "PRAGMA synchronous=\"NORMAL\";",
+
+ foreign_keys[] = "PRAGMA foreign_keys=\"ON\";",
+
+#if defined(__LP64__) || defined(_LP64)
+ enable_mmap[] = "PRAGMA mmap_size=" G_STRINGIFY(RSPAMD_SQLITE_MMAP_LIMIT) ";",
+#endif
+
+ other_pragmas[] = "PRAGMA read_uncommitted=\"ON\";"
+ "PRAGMA cache_size=" G_STRINGIFY(RSPAMD_SQLITE_CACHE_SIZE) ";",
+ db_version[] = "PRAGMA user_version;";
+ gboolean create = FALSE, has_lock = FALSE;
+
+ flags = SQLITE_OPEN_READWRITE;
+#ifdef SQLITE_OPEN_SHAREDCACHE
+ flags |= SQLITE_OPEN_SHAREDCACHE;
+#endif
+#ifdef SQLITE_OPEN_WAL
+ flags |= SQLITE_OPEN_WAL;
+#endif
+
+ rspamd_strlcpy(dbdir, path, sizeof(dbdir));
+ pdir = dirname(dbdir);
+
+ if (access(pdir, W_OK) == -1) {
+ g_set_error(err, rspamd_sqlite3_quark(),
+ errno, "cannot open sqlite directory %s: %s",
+ pdir, strerror(errno));
+
+ return NULL;
+ }
+
+ rspamd_snprintf(lock_path, sizeof(lock_path), "%s.lock", path);
+
+ if (access(path, R_OK) == -1) {
+ flags |= SQLITE_OPEN_CREATE;
+ create = TRUE;
+ }
+
+
+ rspamd_snprintf(lock_path, sizeof(lock_path), "%s.lock", path);
+ lock_fd = open(lock_path, O_WRONLY | O_CREAT | O_EXCL, 00600);
+
+ if (lock_fd == -1) {
+ if (errno == EEXIST || errno == EBUSY) {
+ msg_debug_pool_check("checking %s to wait for db being initialized", lock_path);
+
+ if (!rspamd_sqlite3_wait(pool, lock_path)) {
+ g_set_error(err, rspamd_sqlite3_quark(),
+ errno, "cannot create sqlite file %s: %s",
+ path, strerror(errno));
+
+ return NULL;
+ }
+
+
+ /* At this point we have database created */
+ create = FALSE;
+ has_lock = FALSE;
+ }
+ else {
+ g_set_error(err, rspamd_sqlite3_quark(),
+ errno, "cannot lock sqlite file %s: %s",
+ path, strerror(errno));
+ }
+ }
+ else {
+ pid_t myself = getpid();
+ msg_debug_pool_check("locking %s to block other processes", lock_path);
+ (void) write(lock_fd, &myself, sizeof(myself));
+
+ g_assert(rspamd_file_lock(lock_fd, FALSE));
+ has_lock = TRUE;
+ }
+
+ if ((rc = sqlite3_open_v2(path, &sqlite,
+ flags, NULL)) != SQLITE_OK) {
+#if SQLITE_VERSION_NUMBER >= 3008000
+ g_set_error(err, rspamd_sqlite3_quark(),
+ rc, "cannot open sqlite db %s: %s",
+ path, sqlite3_errstr(rc));
+#else
+ g_set_error(err, rspamd_sqlite3_quark(),
+ rc, "cannot open sqlite db %s: %d",
+ path, rc);
+#endif
+
+ if (has_lock && lock_fd != -1) {
+ msg_debug_pool_check("removing lock from %s", lock_path);
+ rspamd_file_unlock(lock_fd, FALSE);
+ unlink(lock_path);
+ close(lock_fd);
+ }
+
+ return NULL;
+ }
+
+ if (create && has_lock) {
+ while ((rc = sqlite3_exec(sqlite, sqlite_wal, NULL, NULL, NULL)) != SQLITE_OK) {
+ if (rc == SQLITE_BUSY) {
+ struct timespec sleep_ts = {
+ .tv_sec = 0,
+ .tv_nsec = 1000000};
+
+ nanosleep(&sleep_ts, NULL);
+
+ continue;
+ }
+
+ msg_warn_pool_check("WAL mode is not supported (%s), locking issues might occur",
+ sqlite3_errmsg(sqlite));
+ break;
+ }
+
+ if (sqlite3_exec(sqlite, exclusive_lock_sql, NULL, NULL, NULL) != SQLITE_OK) {
+ msg_warn_pool_check("cannot exclusively lock database to create schema: %s",
+ sqlite3_errmsg(sqlite));
+ }
+
+ if (create_sql) {
+ while ((rc = sqlite3_exec(sqlite, create_sql, NULL, NULL, NULL)) != SQLITE_OK) {
+ if (rc == SQLITE_BUSY) {
+ struct timespec sleep_ts = {
+ .tv_sec = 0,
+ .tv_nsec = 1000000};
+
+ nanosleep(&sleep_ts, NULL);
+
+ continue;
+ }
+
+ g_set_error(err, rspamd_sqlite3_quark(),
+ -1, "cannot execute create sql `%s`: %s",
+ create_sql, sqlite3_errmsg(sqlite));
+ sqlite3_close(sqlite);
+ rspamd_file_unlock(lock_fd, FALSE);
+ unlink(lock_path);
+ if (lock_fd != -1) {
+ close(lock_fd);
+ }
+
+ return NULL;
+ }
+ }
+
+ sqlite3_close(sqlite);
+
+ /* Reopen in normal mode */
+ msg_debug_pool_check("reopening %s in normal mode", path);
+ flags &= ~SQLITE_OPEN_CREATE;
+
+ if ((rc = sqlite3_open_v2(path, &sqlite,
+ flags, NULL)) != SQLITE_OK) {
+#if SQLITE_VERSION_NUMBER >= 3008000
+ g_set_error(err, rspamd_sqlite3_quark(),
+ rc, "cannot open sqlite db after creation %s: %s",
+ path, sqlite3_errstr(rc));
+#else
+ g_set_error(err, rspamd_sqlite3_quark(),
+ rc, "cannot open sqlite db after creation %s: %d",
+ path, rc);
+#endif
+ rspamd_file_unlock(lock_fd, FALSE);
+ unlink(lock_path);
+
+ if (lock_fd != -1) {
+ close(lock_fd);
+ }
+
+ return NULL;
+ }
+ }
+ else if (has_lock && version > 0) {
+ /* Check user version */
+ sqlite3_stmt *stmt = NULL;
+ guint32 db_ver;
+ GString *new_ver_sql;
+
+ if (sqlite3_prepare(sqlite, db_version, -1, &stmt, NULL) != SQLITE_OK) {
+ msg_warn_pool_check("Cannot get user version pragma: %s",
+ sqlite3_errmsg(sqlite));
+ }
+ else {
+ if (sqlite3_step(stmt) != SQLITE_ROW) {
+ msg_warn_pool_check("Cannot get user version pragma, step failed: %s",
+ sqlite3_errmsg(sqlite));
+ sqlite3_finalize(stmt);
+ }
+ else {
+ db_ver = sqlite3_column_int(stmt, 0);
+ sqlite3_reset(stmt);
+ sqlite3_finalize(stmt);
+
+ if (version > db_ver) {
+ msg_warn_pool_check("Database version %ud is less than "
+ "desired version %ud, run create script",
+ db_ver,
+ version);
+
+ if (create_sql) {
+ if (sqlite3_exec(sqlite, create_sql, NULL, NULL, NULL) != SQLITE_OK) {
+ g_set_error(err, rspamd_sqlite3_quark(),
+ -1, "cannot execute create sql `%s`: %s",
+ create_sql, sqlite3_errmsg(sqlite));
+ sqlite3_close(sqlite);
+ rspamd_file_unlock(lock_fd, FALSE);
+ unlink(lock_path);
+ if (lock_fd != -1) {
+ close(lock_fd);
+ }
+
+ return NULL;
+ }
+ }
+
+ new_ver_sql = g_string_new("PRAGMA user_version=");
+ rspamd_printf_gstring(new_ver_sql, "%ud", version);
+
+ if (sqlite3_exec(sqlite, new_ver_sql->str, NULL, NULL, NULL) != SQLITE_OK) {
+ g_set_error(err, rspamd_sqlite3_quark(),
+ -1, "cannot execute update version sql `%s`: %s",
+ new_ver_sql->str, sqlite3_errmsg(sqlite));
+ sqlite3_close(sqlite);
+ rspamd_file_unlock(lock_fd, FALSE);
+ unlink(lock_path);
+ if (lock_fd != -1) {
+ close(lock_fd);
+ }
+
+ g_string_free(new_ver_sql, TRUE);
+
+ return NULL;
+ }
+
+ g_string_free(new_ver_sql, TRUE);
+ }
+ else if (db_ver > version) {
+ msg_warn_pool_check("Database version %ud is more than "
+ "desired version %ud, this could cause"
+ " unexpected behaviour",
+ db_ver,
+ version);
+ }
+ }
+ }
+ }
+
+ while ((rc = sqlite3_exec(sqlite, sqlite_wal, NULL, NULL, NULL)) != SQLITE_OK) {
+ if (rc == SQLITE_BUSY) {
+ struct timespec sleep_ts = {
+ .tv_sec = 0,
+ .tv_nsec = 1000000};
+
+ nanosleep(&sleep_ts, NULL);
+
+ continue;
+ }
+
+ msg_warn_pool_check("WAL mode is not supported (%s), locking issues might occur",
+ sqlite3_errmsg(sqlite));
+ break;
+ }
+
+ if (sqlite3_exec(sqlite, fsync_sql, NULL, NULL, NULL) != SQLITE_OK) {
+ msg_warn_pool_check("cannot set synchronous: %s",
+ sqlite3_errmsg(sqlite));
+ }
+
+ if ((rc = sqlite3_exec(sqlite, foreign_keys, NULL, NULL, NULL)) !=
+ SQLITE_OK) {
+ msg_warn_pool_check("cannot enable foreign keys: %s",
+ sqlite3_errmsg(sqlite));
+ }
+
+#if defined(__LP64__) || defined(_LP64)
+ if ((rc = sqlite3_exec(sqlite, enable_mmap, NULL, NULL, NULL)) != SQLITE_OK) {
+ msg_warn_pool_check("cannot enable mmap: %s",
+ sqlite3_errmsg(sqlite));
+ }
+#endif
+
+ if ((rc = sqlite3_exec(sqlite, other_pragmas, NULL, NULL, NULL)) !=
+ SQLITE_OK) {
+ msg_warn_pool_check("cannot execute tuning pragmas: %s",
+ sqlite3_errmsg(sqlite));
+ }
+
+ if (has_lock && lock_fd != -1) {
+ msg_debug_pool_check("removing lock from %s", lock_path);
+ rspamd_file_unlock(lock_fd, FALSE);
+ unlink(lock_path);
+ close(lock_fd);
+ }
+
+ return sqlite;
+}
+
+gboolean
+rspamd_sqlite3_sync(sqlite3 *db, gint *wal_frames, gint *wal_checkpoints)
+{
+ gint wf = 0, wc = 0, mode;
+
+#ifdef SQLITE_OPEN_WAL
+#ifdef SQLITE_CHECKPOINT_TRUNCATE
+ mode = SQLITE_CHECKPOINT_TRUNCATE;
+#elif defined(SQLITE_CHECKPOINT_RESTART)
+ mode = SQLITE_CHECKPOINT_RESTART;
+#elif defined(SQLITE_CHECKPOINT_FULL)
+ mode = SQLITE_CHECKPOINT_FULL;
+#endif
+ /* Perform wal checkpoint (might be long) */
+ if (sqlite3_wal_checkpoint_v2(db,
+ NULL,
+ mode,
+ &wf,
+ &wc) != SQLITE_OK) {
+ return FALSE;
+ }
+#endif
+
+ if (wal_frames) {
+ *wal_frames = wf;
+ }
+ if (wal_checkpoints) {
+ *wal_checkpoints = wc;
+ }
+
+ return TRUE;
+}
diff --git a/src/libutil/sqlite_utils.h b/src/libutil/sqlite_utils.h
new file mode 100644
index 0000000..5411a47
--- /dev/null
+++ b/src/libutil/sqlite_utils.h
@@ -0,0 +1,90 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_SQLITE_UTILS_H_
+#define SRC_LIBUTIL_SQLITE_UTILS_H_
+
+#include "config.h"
+#include "mem_pool.h"
+#include "sqlite3.h"
+
+#define RSPAMD_SQLITE3_STMT_MULTIPLE (1 << 0)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_sqlite3_prstmt {
+ gint idx;
+ const gchar *sql;
+ const gchar *args;
+ sqlite3_stmt *stmt;
+ gint result;
+ const gchar *ret;
+ gint flags;
+};
+
+/**
+ * Create prepared statements for specified database from init statements
+ * @param db
+ * @param max_idx
+ * @param err
+ * @return new prepared statements array or NULL
+ */
+GArray *rspamd_sqlite3_init_prstmt(sqlite3 *db,
+ struct rspamd_sqlite3_prstmt *init_stmt,
+ gint max_idx,
+ GError **err);
+
+/**
+ * Run prepared statements by its index getting parameters and setting results from
+ * varargs structure
+ * @param db
+ * @param stmts
+ * @param idx
+ * @return
+ */
+gint rspamd_sqlite3_run_prstmt(rspamd_mempool_t *pool, sqlite3 *db, GArray *stmts,
+ gint idx, ...);
+
+/**
+ * Close and free prepared statements
+ * @param db
+ * @param stmts
+ */
+void rspamd_sqlite3_close_prstmt(sqlite3 *db, GArray *stmts);
+
+/**
+ * Creates or opens sqlite database trying to share it between processes
+ * @param path
+ * @param create_sql
+ * @return
+ */
+sqlite3 *rspamd_sqlite3_open_or_create(rspamd_mempool_t *pool,
+ const gchar *path, const gchar *create_sql,
+ guint32 version, GError **err);
+
+
+/**
+ * Sync sqlite3 db ensuring that all wal things are done
+ * @param db
+ */
+gboolean rspamd_sqlite3_sync(sqlite3 *db, gint *wal_frames, gint *wal_checkpoints);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_SQLITE_UTILS_H_ */
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
new file mode 100644
index 0000000..bc99f2a
--- /dev/null
+++ b/src/libutil/str_util.c
@@ -0,0 +1,3886 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "util.h"
+#include "cryptobox.h"
+#include "url.h"
+#include "str_util.h"
+#include "logger.h"
+#include "contrib/t1ha/t1ha.h"
+#include <unicode/uversion.h>
+#include <unicode/ucnv.h>
+#if U_ICU_VERSION_MAJOR_NUM >= 44
+#include <unicode/unorm2.h>
+#endif
+#include <math.h>
+
+#ifdef __x86_64__
+#include <immintrin.h>
+#endif
+
+#include "contrib/fastutf8/fastutf8.h"
+
+const guchar lc_map[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
+
+guint rspamd_str_lc(gchar *str, guint size)
+{
+ guint leftover = size % 4;
+ guint fp, i;
+ const uint8_t *s = (const uint8_t *) str;
+ gchar *dest = str;
+ guchar c1, c2, c3, c4;
+
+ fp = size - leftover;
+
+ for (i = 0; i != fp; i += 4) {
+ c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3];
+ dest[0] = lc_map[c1];
+ dest[1] = lc_map[c2];
+ dest[2] = lc_map[c3];
+ dest[3] = lc_map[c4];
+ dest += 4;
+ }
+
+ switch (leftover) {
+ case 3:
+ *dest++ = lc_map[(guchar) str[i++]];
+ /* FALLTHRU */
+ case 2:
+ *dest++ = lc_map[(guchar) str[i++]];
+ /* FALLTHRU */
+ case 1:
+ *dest = lc_map[(guchar) str[i]];
+ }
+
+ return size;
+}
+
+gsize rspamd_str_copy_lc(const gchar *src, gchar *dst, gsize size)
+{
+ gchar *d = dst;
+
+ /* Find aligned start */
+ while ((0xf & (uintptr_t) src) && size > 0) {
+ *d++ = lc_map[(guchar) *src++];
+ size--;
+ }
+
+ /* Aligned start in src */
+#ifdef __x86_64__
+ while (size >= 16) {
+ __m128i sv = _mm_load_si128((const __m128i *) src);
+ /* From A */
+ __m128i rangeshift = _mm_sub_epi8(sv, _mm_set1_epi8((char) ('A' + 128)));
+ /* To Z */
+ __m128i nomodify = _mm_cmpgt_epi8(rangeshift, _mm_set1_epi8(-128 + 25));
+ /* ^ ' ' */
+ __m128i flip = _mm_andnot_si128(nomodify, _mm_set1_epi8(0x20));
+ __m128i uc = _mm_xor_si128(sv, flip);
+ _mm_storeu_si128((__m128i *) d, uc);
+ d += 16;
+ src += 16;
+ size -= 16;
+ }
+#endif
+
+ /* Leftover */
+ while (size > 0) {
+ *d++ = lc_map[(guchar) *src++];
+ size--;
+ }
+
+ return (d - dst);
+}
+
+gint rspamd_lc_cmp(const gchar *s, const gchar *d, gsize l)
+{
+ gsize fp, i;
+ guchar c1, c2, c3, c4;
+ union {
+ guchar c[4];
+ guint32 n;
+ } cmp1, cmp2;
+ gsize leftover = l % 4;
+ gint ret = 0;
+
+ fp = l - leftover;
+
+ for (i = 0; i != fp; i += 4) {
+ c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3];
+ cmp1.c[0] = lc_map[c1];
+ cmp1.c[1] = lc_map[c2];
+ cmp1.c[2] = lc_map[c3];
+ cmp1.c[3] = lc_map[c4];
+
+ c1 = d[i], c2 = d[i + 1], c3 = d[i + 2], c4 = d[i + 3];
+ cmp2.c[0] = lc_map[c1];
+ cmp2.c[1] = lc_map[c2];
+ cmp2.c[2] = lc_map[c3];
+ cmp2.c[3] = lc_map[c4];
+
+ if (cmp1.n != cmp2.n) {
+ return cmp1.n - cmp2.n;
+ }
+ }
+
+ while (leftover > 0) {
+ if (g_ascii_tolower(s[i]) != g_ascii_tolower(d[i])) {
+ return s[i] - d[i];
+ }
+
+ leftover--;
+ i++;
+ }
+
+ return ret;
+}
+
+/*
+ * The purpose of this function is fast and in place conversion of a unicode
+ * string to lower case, so some locale peculiarities are simply ignored
+ * If the target string is longer than initial one, then we just trim it
+ */
+guint rspamd_str_lc_utf8(gchar *str, guint size)
+{
+ guchar *d = (guchar *) str, tst[6];
+ gint32 i = 0, prev = 0;
+ UChar32 uc;
+
+ while (i < size) {
+ prev = i;
+
+ U8_NEXT((guint8 *) str, i, size, uc);
+ uc = u_tolower(uc);
+
+ gint32 olen = 0;
+ U8_APPEND_UNSAFE(tst, olen, uc);
+
+ if (olen <= (i - prev)) {
+ memcpy(d, tst, olen);
+ d += olen;
+ }
+ else {
+ /* Lowercasing has increased the length, so we need to ignore it */
+ d += i - prev;
+ }
+ }
+
+ return d - (guchar *) str;
+}
+
+gboolean
+rspamd_strcase_equal(gconstpointer v, gconstpointer v2)
+{
+ if (g_ascii_strcasecmp((const gchar *) v, (const gchar *) v2) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+guint64
+rspamd_icase_hash(const gchar *in, gsize len, guint64 seed)
+{
+ guint leftover = len % sizeof(guint64);
+ guint fp, i;
+ const uint8_t *s = (const uint8_t *) in;
+ union {
+ struct {
+ guchar c1, c2, c3, c4, c5, c6, c7, c8;
+ } c;
+ guint64 pp;
+ } u;
+ guint64 h = seed;
+
+ fp = len - leftover;
+
+ for (i = 0; i != fp; i += 8) {
+ u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+ u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7];
+ u.c.c1 = lc_map[u.c.c1];
+ u.c.c2 = lc_map[u.c.c2];
+ u.c.c3 = lc_map[u.c.c3];
+ u.c.c4 = lc_map[u.c.c4];
+ u.c.c5 = lc_map[u.c.c5];
+ u.c.c6 = lc_map[u.c.c6];
+ u.c.c7 = lc_map[u.c.c7];
+ u.c.c8 = lc_map[u.c.c8];
+ h = t1ha(&u.pp, sizeof(u), h);
+ }
+
+ u.pp = 0;
+
+ switch (leftover) {
+ case 7:
+ u.c.c7 = lc_map[(guchar) s[i++]]; /* FALLTHRU */
+ case 6:
+ u.c.c6 = lc_map[(guchar) s[i++]]; /* FALLTHRU */
+ case 5:
+ u.c.c5 = lc_map[(guchar) s[i++]]; /* FALLTHRU */
+ case 4:
+ u.c.c4 = lc_map[(guchar) s[i++]]; /* FALLTHRU */
+ case 3:
+ u.c.c3 = lc_map[(guchar) s[i++]]; /* FALLTHRU */
+ case 2:
+ u.c.c2 = lc_map[(guchar) s[i++]]; /* FALLTHRU */
+ case 1:
+ u.c.c1 = lc_map[(guchar) s[i]];
+ break;
+ }
+
+ h = t1ha(&u.pp, sizeof(u), h);
+
+ return h;
+}
+
+guint rspamd_strcase_hash(gconstpointer key)
+{
+ const gchar *p = key;
+ gsize len;
+
+ len = strlen(p);
+
+ return (guint) rspamd_icase_hash(p, len, rspamd_hash_seed());
+}
+
+guint rspamd_str_hash(gconstpointer key)
+{
+ gsize len;
+
+ len = strlen((const gchar *) key);
+
+ return (guint) rspamd_cryptobox_fast_hash(key, len, rspamd_hash_seed());
+}
+
+gboolean
+rspamd_str_equal(gconstpointer v, gconstpointer v2)
+{
+ return strcmp((const gchar *) v, (const gchar *) v2) == 0;
+}
+
+gboolean
+rspamd_ftok_icase_equal(gconstpointer v, gconstpointer v2)
+{
+ const rspamd_ftok_t *f1 = v, *f2 = v2;
+
+ if (f1->len == f2->len &&
+ rspamd_lc_cmp(f1->begin, f2->begin, f1->len) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+guint rspamd_ftok_icase_hash(gconstpointer key)
+{
+ const rspamd_ftok_t *f = key;
+
+ return (guint) rspamd_icase_hash(f->begin, f->len, rspamd_hash_seed());
+}
+
+gboolean
+rspamd_gstring_icase_equal(gconstpointer v, gconstpointer v2)
+{
+ const GString *f1 = v, *f2 = v2;
+ if (f1->len == f2->len &&
+ rspamd_lc_cmp(f1->str, f2->str, f1->len) == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+guint rspamd_gstring_icase_hash(gconstpointer key)
+{
+ const GString *f = key;
+
+ return (guint) rspamd_icase_hash(f->str, f->len, rspamd_hash_seed());
+}
+
+/* https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define MEM_ALIGN (sizeof(gsize) - 1)
+#if defined(__LP64__) || defined(_LP64)
+#define WORD_TYPE guint64
+#define ZEROMASK 0x7F7F7F7F7F7F7F7FLLU
+#else
+#define WORD_TYPE guint32
+#define ZEROMASK 0x7F7F7F7FU
+#endif
+
+#define HASZERO(x) ~(((((x) &ZEROMASK) + ZEROMASK) | (x)) | ZEROMASK)
+
+gsize rspamd_strlcpy_fast(gchar *dst, const gchar *src, gsize siz)
+{
+ gchar *d = dst;
+ const gchar *s = src;
+ gsize n = siz;
+ WORD_TYPE *wd;
+ const WORD_TYPE *ws;
+
+ /* Copy as many bytes as will fit */
+ if (n-- != 0) {
+ if (((uintptr_t) s & MEM_ALIGN) == ((uintptr_t) d & MEM_ALIGN)) {
+ /* Init copy byte by byte */
+ for (; ((uintptr_t) s & MEM_ALIGN) && n && (*d = *s); n--, s++, d++)
+ ;
+ if (n && *s) {
+ wd = (void *) d;
+ ws = (const void *) s;
+ /*
+ * Copy by 32 or 64 bits (causes valgrind warnings)
+ */
+ for (; n >= sizeof(WORD_TYPE) && !HASZERO(*ws);
+ n -= sizeof(WORD_TYPE), ws++, wd++) {
+ *wd = *ws;
+ }
+
+ d = (void *) wd;
+ s = (const void *) ws;
+ }
+ }
+
+ /* Copy the rest */
+ for (; n && (*d = *s); n--, s++, d++)
+ ;
+
+ *d = 0;
+ }
+ else {
+ return 0;
+ }
+
+ return (d - dst);
+}
+
+gsize rspamd_null_safe_copy(const gchar *src, gsize srclen,
+ gchar *dest, gsize destlen)
+{
+ gsize copied = 0, si = 0, di = 0;
+
+ if (destlen == 0) {
+ return 0;
+ }
+
+ while (si < srclen && di + 1 < destlen) {
+ if (src[si] != '\0') {
+ dest[di++] = src[si++];
+ copied++;
+ }
+ else {
+ si++;
+ }
+ }
+
+ dest[di] = '\0';
+
+ return copied;
+}
+
+
+size_t
+rspamd_strlcpy_safe(gchar *dst, const gchar *src, gsize siz)
+{
+ gchar *d = dst;
+ gsize nleft = siz;
+
+ if (nleft != 0) {
+ while (--nleft != 0) {
+ if ((*d++ = *src++) == '\0') {
+ d--;
+ break;
+ }
+ }
+ }
+
+ if (nleft == 0) {
+ if (siz != 0) {
+ *d = '\0';
+ }
+ }
+
+ return (d - dst);
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtol(const gchar *s, gsize len, glong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ glong v = 0;
+ const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10;
+ gboolean neg;
+
+ /* Case negative values */
+ if (*p == '-') {
+ neg = TRUE;
+ p++;
+ }
+ else {
+ neg = FALSE;
+ }
+ /* Some preparations for range errors */
+
+ while (p < end) {
+ c = *p;
+ if (c >= '0' && c <= '9') {
+ c -= '0';
+ if (v > cutoff || (v == cutoff && c > cutlim)) {
+ /* Range error */
+ *value = neg ? G_MINLONG : G_MAXLONG;
+ return FALSE;
+ }
+ else {
+ v *= 10;
+ v += c;
+ }
+ }
+ else {
+ return FALSE;
+ }
+ p++;
+ }
+
+ *value = neg ? -(v) : v;
+ return TRUE;
+}
+
+/*
+ * Try to convert string of length to long
+ */
+#define CONV_STR_LIM_DECIMAL(max_num) \
+ do { \
+ while (p < end) { \
+ c = *p; \
+ if (c >= '0' && c <= '9') { \
+ c -= '0'; \
+ if (v > cutoff || (v == cutoff && (guint8) c > cutlim)) { \
+ *value = (max_num); \
+ return FALSE; \
+ } \
+ else { \
+ v *= 10; \
+ v += c; \
+ } \
+ } \
+ else { \
+ *value = v; \
+ return FALSE; \
+ } \
+ p++; \
+ } \
+ } while (0)
+
+gboolean
+rspamd_strtoul(const gchar *s, gsize len, gulong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ gulong v = 0;
+ const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
+
+ /* Some preparations for range errors */
+ CONV_STR_LIM_DECIMAL(G_MAXULONG);
+
+ *value = v;
+ return TRUE;
+}
+
+gboolean
+rspamd_strtou64(const gchar *s, gsize len, guint64 *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ guint64 v = 0;
+ const guint64 cutoff = G_MAXUINT64 / 10, cutlim = G_MAXUINT64 % 10;
+
+ /* Some preparations for range errors */
+ CONV_STR_LIM_DECIMAL(G_MAXUINT64);
+
+ *value = v;
+ return TRUE;
+}
+
+gboolean
+rspamd_xstrtoul(const gchar *s, gsize len, gulong *value)
+{
+ const gchar *p = s, *end = s + len;
+ gchar c;
+ gulong v = 0;
+ const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
+
+ /* Some preparations for range errors */
+ while (p < end) {
+ c = g_ascii_tolower(*p);
+ if (c >= '0' && c <= '9') {
+ c -= '0';
+ if (v > cutoff || (v == cutoff && (guint8) c > cutlim)) {
+ /* Range error */
+ *value = G_MAXULONG;
+ return FALSE;
+ }
+ else {
+ v *= 16;
+ v += c;
+ }
+ }
+ else if (c >= 'a' || c <= 'f') {
+ c = c - 'a' + 10;
+ if (v > cutoff || (v == cutoff && (guint8) c > cutlim)) {
+ /* Range error */
+ *value = G_MAXULONG;
+ return FALSE;
+ }
+ else {
+ v *= 16;
+ v += c;
+ }
+ }
+ else {
+ *value = v;
+
+ return FALSE;
+ }
+ p++;
+ }
+
+ *value = v;
+ return TRUE;
+}
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer
+rspamd_str_pool_copy(gconstpointer data, gpointer ud)
+{
+ rspamd_mempool_t *pool = ud;
+
+ return data ? rspamd_mempool_strdup(pool, data) : NULL;
+}
+
+/*
+ * We use here z-base32 encoding described here:
+ * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt
+ */
+
+gint rspamd_encode_base32_buf(const guchar *in, gsize inlen, gchar *out, gsize outlen,
+ enum rspamd_base32_type type)
+{
+ static const char b32_default[] = "ybndrfg8ejkmcpqxot1uwisza345h769",
+ b32_bleach[] = "qpzry9x8gf2tvdw0s3jn54khce6mua7l",
+ b32_rfc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
+ *b32;
+ gchar *o, *end;
+ gsize i;
+ gint remain = -1, x;
+ bool inverse_order = true;
+
+ end = out + outlen;
+ o = out;
+
+ switch (type) {
+ case RSPAMD_BASE32_DEFAULT:
+ b32 = b32_default;
+ break;
+ case RSPAMD_BASE32_BLEACH:
+ b32 = b32_bleach;
+ inverse_order = false;
+ break;
+ case RSPAMD_BASE32_RFC:
+ b32 = b32_rfc;
+ inverse_order = false;
+ break;
+ default:
+ g_assert_not_reached();
+ abort();
+ }
+
+ if (inverse_order) {
+ /* Zbase32 as used in Rspamd */
+ for (i = 0; i < inlen && o < end - 1; i++) {
+ switch (i % 5) {
+ case 0:
+ /* 8 bits of input and 3 to remain */
+ x = in[i];
+ remain = in[i] >> 5;
+ *o++ = b32[x & 0x1F];
+ break;
+ case 1:
+ /* 11 bits of input, 1 to remain */
+ x = remain | in[i] << 3;
+ *o++ = b32[x & 0x1F];
+ *o++ = b32[x >> 5 & 0x1F];
+ remain = x >> 10;
+ break;
+ case 2:
+ /* 9 bits of input, 4 to remain */
+ x = remain | in[i] << 1;
+ *o++ = b32[x & 0x1F];
+ remain = x >> 5;
+ break;
+ case 3:
+ /* 12 bits of input, 2 to remain */
+ x = remain | in[i] << 4;
+ *o++ = b32[x & 0x1F];
+ *o++ = b32[x >> 5 & 0x1F];
+ remain = x >> 10 & 0x3;
+ break;
+ case 4:
+ /* 10 bits of output, nothing to remain */
+ x = remain | in[i] << 2;
+ *o++ = b32[x & 0x1F];
+ *o++ = b32[x >> 5 & 0x1F];
+ remain = -1;
+ break;
+ default:
+ /* Not to be happen */
+ break;
+ }
+ }
+ }
+ else {
+ /* Traditional base32 with no bits inversion */
+ for (i = 0; i < inlen && o < end - 1; i++) {
+ switch (i % 5) {
+ case 0:
+ /* 8 bits of input and 3 to remain */
+ x = in[i] >> 3;
+ remain = (in[i] & 7) << 2;
+ *o++ = b32[x & 0x1F];
+ break;
+ case 1:
+ /* 11 bits of input, 1 to remain */
+ x = (remain << 6) | in[i];
+ *o++ = b32[(x >> 6) & 0x1F];
+ *o++ = b32[(x >> 1) & 0x1F];
+ remain = (x & 0x1) << 4;
+ break;
+ case 2:
+ /* 9 bits of input, 4 to remain */
+ x = (remain << 4) | in[i];
+ *o++ = b32[(x >> 4) & 0x1F];
+ remain = (x & 15) << 1;
+ break;
+ case 3:
+ /* 12 bits of input, 2 to remain */
+ x = (remain << 7) | in[i];
+ *o++ = b32[(x >> 7) & 0x1F];
+ *o++ = b32[(x >> 2) & 0x1F];
+ remain = (x & 3) << 3;
+ break;
+ case 4:
+ /* 10 bits of output, nothing to remain */
+ x = (remain << 5) | in[i];
+ *o++ = b32[(x >> 5) & 0x1F];
+ *o++ = b32[x & 0x1F];
+ remain = -1;
+ break;
+ default:
+ /* Not to be happen */
+ break;
+ }
+ }
+ }
+ if (remain >= 0 && o < end) {
+ *o++ = b32[remain & 0x1F];
+ }
+
+ if (o <= end) {
+ return (o - out);
+ }
+
+ return -1;
+}
+
+gchar *
+rspamd_encode_base32(const guchar *in, gsize inlen, enum rspamd_base32_type type)
+{
+ gsize allocated_len = inlen * 8 / 5 + 2;
+ gchar *out;
+ gint outlen;
+
+ out = g_malloc(allocated_len);
+ outlen = rspamd_encode_base32_buf(in, inlen, out,
+ allocated_len - 1, type);
+
+ if (outlen >= 0) {
+ out[outlen] = 0;
+
+ return out;
+ }
+
+ g_free(out);
+
+ return NULL;
+}
+
+enum rspamd_base32_type
+rspamd_base32_decode_type_from_str(const gchar *str)
+{
+ enum rspamd_base32_type ret = RSPAMD_BASE32_INVALID;
+
+ if (str == NULL) {
+ return RSPAMD_BASE32_DEFAULT;
+ }
+
+ if (strcmp(str, "default") == 0 || strcmp(str, "zbase") == 0) {
+ ret = RSPAMD_BASE32_ZBASE;
+ }
+ else if (strcmp(str, "bleach") == 0) {
+ ret = RSPAMD_BASE32_BLEACH;
+ }
+ else if (strcmp(str, "rfc") == 0) {
+ ret = RSPAMD_BASE32_RFC;
+ }
+
+ return ret;
+}
+
+static const guchar b32_dec_zbase[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d,
+ 0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
+ 0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
+ 0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
+ 0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const guchar b32_dec_bleach[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x0f, 0xff, 0x0a, 0x11, 0x15, 0x14, 0x1a, 0x1e,
+ 0x07, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x1d, 0xff, 0x18, 0x0d, 0x19, 0x09, 0x08,
+ 0x17, 0xff, 0x12, 0x16, 0x1f, 0x1b, 0x13, 0xff,
+ 0x01, 0x00, 0x03, 0x10, 0x0b, 0x1c, 0x0c, 0x0e,
+ 0x06, 0x04, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+static const guchar b32_dec_rfc[] = {
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0x1a,
+ 0x1b,
+ 0x1c,
+ 0x1d,
+ 0x1e,
+ 0x1f,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0x00,
+ 0x01,
+ 0x02,
+ 0x03,
+ 0x04,
+ 0x05,
+ 0x06,
+ 0x07,
+ 0x08,
+ 0x09,
+ 0x0a,
+ 0x0b,
+ 0x0c,
+ 0x0d,
+ 0x0e,
+ 0x0f,
+ 0x10,
+ 0x11,
+ 0x12,
+ 0x13,
+ 0x14,
+ 0x15,
+ 0x16,
+ 0x17,
+ 0x18,
+ 0x19,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+};
+
+
+gint rspamd_decode_base32_buf(const gchar *in, gsize inlen, guchar *out, gsize outlen,
+ enum rspamd_base32_type type)
+{
+ guchar *o, *end, decoded;
+ guchar c;
+ guint acc = 0U;
+ guint processed_bits = 0;
+ gsize i;
+ const guchar *b32_dec;
+ bool inverse_bits = true;
+
+ end = out + outlen;
+ o = out;
+
+ switch (type) {
+ case RSPAMD_BASE32_DEFAULT:
+ b32_dec = b32_dec_zbase;
+ break;
+ case RSPAMD_BASE32_BLEACH:
+ b32_dec = b32_dec_bleach;
+ inverse_bits = false;
+ break;
+ case RSPAMD_BASE32_RFC:
+ b32_dec = b32_dec_rfc;
+ inverse_bits = false;
+ break;
+ default:
+ g_assert_not_reached();
+ abort();
+ }
+
+ if (inverse_bits) {
+ for (i = 0; i < inlen; i++) {
+ c = (guchar) in[i];
+
+ if (processed_bits >= 8) {
+ /* Emit from left to right */
+ processed_bits -= 8;
+ *o++ = acc & 0xFF;
+ acc >>= 8;
+ }
+
+ decoded = b32_dec[c];
+ if (decoded == 0xff || o >= end) {
+ return -1;
+ }
+
+ acc = (decoded << processed_bits) | acc;
+ processed_bits += 5;
+ }
+
+ if (processed_bits > 0 && o < end) {
+ *o++ = (acc & 0xFF);
+ }
+ else if (o > end) {
+ return -1;
+ }
+ }
+ else {
+ for (i = 0; i < inlen; i++) {
+ c = (guchar) in[i];
+
+ decoded = b32_dec[c];
+ if (decoded == 0xff) {
+ return -1;
+ }
+
+ acc = (acc << 5) | decoded;
+ processed_bits += 5;
+
+ if (processed_bits >= 8) {
+ /* Emit from right to left */
+ processed_bits -= 8;
+
+ /* Output buffer overflow */
+ if (o >= end) {
+ return -1;
+ }
+
+ *o++ = (acc >> processed_bits) & 0xFF;
+ /* Preserve lowers at the higher parts of the input */
+ acc = (acc & ((1u << processed_bits) - 1));
+ }
+ }
+
+ if (processed_bits > 0 && o < end && acc != 0) {
+ *o++ = (acc & 0xFF);
+ }
+ else if (o > end) {
+ return -1;
+ }
+ }
+
+ return (o - out);
+}
+
+guchar *
+rspamd_decode_base32(const gchar *in, gsize inlen, gsize *outlen,
+ enum rspamd_base32_type type)
+{
+ guchar *res;
+
+ gsize allocated_len = inlen * 5 / 8 + 2;
+ gssize olen;
+
+ res = g_malloc(allocated_len);
+
+ olen = rspamd_decode_base32_buf(in, inlen, res, allocated_len - 1,
+ type);
+
+ if (olen >= 0) {
+ res[olen] = '\0';
+ }
+ else {
+ g_free(res);
+
+ if (outlen) {
+ *outlen = 0;
+ }
+
+ return NULL;
+ }
+
+ if (outlen) {
+ *outlen = olen;
+ }
+
+ return res;
+}
+
+
+gchar *
+rspamd_encode_base64_common(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, gboolean fold, enum rspamd_newlines_type how)
+{
+#define ADD_SPLIT \
+ do { \
+ if (how == RSPAMD_TASK_NEWLINES_CR || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\r'; \
+ if (how == RSPAMD_TASK_NEWLINES_LF || how == RSPAMD_TASK_NEWLINES_CRLF) *o++ = '\n'; \
+ if (fold) *o++ = '\t'; \
+ } while (0)
+#define CHECK_SPLIT \
+ do { \
+ if (str_len > 0 && cols >= str_len) { \
+ ADD_SPLIT; \
+ cols = 0; \
+ } \
+ } while (0)
+
+ gsize allocated_len = (inlen / 3) * 4 + 5;
+ gchar *out, *o;
+ guint64 n;
+ guint32 rem, t, carry;
+ gint cols, shift;
+ static const char b64_enc[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+ if (str_len > 0) {
+ g_assert(str_len > 8);
+ if (fold) {
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_CR:
+ case RSPAMD_TASK_NEWLINES_LF:
+ allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+ break;
+ default:
+ allocated_len += (allocated_len / str_len + 1) * 3 + 1;
+ break;
+ }
+ }
+ else {
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_CR:
+ case RSPAMD_TASK_NEWLINES_LF:
+ allocated_len += (allocated_len / str_len + 1) * 1 + 1;
+ break;
+ default:
+ allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+ break;
+ }
+ }
+ }
+
+ out = g_malloc(allocated_len);
+ o = out;
+ cols = 0;
+
+ while (inlen > 6) {
+ memcpy(&n, in, sizeof(n));
+ n = GUINT64_TO_BE(n);
+
+ if (str_len <= 0 || cols <= str_len - 8) {
+ *o++ = b64_enc[(n >> 58) & 0x3F];
+ *o++ = b64_enc[(n >> 52) & 0x3F];
+ *o++ = b64_enc[(n >> 46) & 0x3F];
+ *o++ = b64_enc[(n >> 40) & 0x3F];
+ *o++ = b64_enc[(n >> 34) & 0x3F];
+ *o++ = b64_enc[(n >> 28) & 0x3F];
+ *o++ = b64_enc[(n >> 22) & 0x3F];
+ *o++ = b64_enc[(n >> 16) & 0x3F];
+ cols += 8;
+ }
+ else {
+ cols = str_len - cols;
+ shift = 58;
+ while (cols) {
+ *o++ = b64_enc[(n >> shift) & 0x3F];
+ shift -= 6;
+ cols--;
+ }
+
+ ADD_SPLIT;
+
+ /* Remaining bytes */
+ while (shift >= 16) {
+ *o++ = b64_enc[(n >> shift) & 0x3F];
+ shift -= 6;
+ cols++;
+ }
+ }
+
+ in += 6;
+ inlen -= 6;
+ }
+
+ CHECK_SPLIT;
+
+ rem = 0;
+ carry = 0;
+
+ for (;;) {
+ /* Padding + remaining data (0 - 2 bytes) */
+ switch (rem) {
+ case 0:
+ if (inlen-- == 0) {
+ goto end;
+ }
+ t = *in++;
+ *o++ = b64_enc[t >> 2];
+ carry = (t << 4) & 0x30;
+ rem = 1;
+ cols++;
+ case 1:
+ if (inlen-- == 0) {
+ goto end;
+ }
+ CHECK_SPLIT;
+ t = *in++;
+ *o++ = b64_enc[carry | (t >> 4)];
+ carry = (t << 2) & 0x3C;
+ rem = 2;
+ cols++;
+ default:
+ if (inlen-- == 0) {
+ goto end;
+ }
+ CHECK_SPLIT;
+ t = *in++;
+ *o++ = b64_enc[carry | (t >> 6)];
+ cols++;
+ CHECK_SPLIT;
+ *o++ = b64_enc[t & 0x3F];
+ cols++;
+ CHECK_SPLIT;
+ rem = 0;
+ }
+ }
+
+end:
+ if (rem == 1) {
+ *o++ = b64_enc[carry];
+ cols++;
+ CHECK_SPLIT;
+ *o++ = '=';
+ cols++;
+ CHECK_SPLIT;
+ *o++ = '=';
+ cols++;
+ CHECK_SPLIT;
+ }
+ else if (rem == 2) {
+ *o++ = b64_enc[carry];
+ cols++;
+ CHECK_SPLIT;
+ *o++ = '=';
+ cols++;
+ }
+
+ CHECK_SPLIT;
+
+ *o = '\0';
+
+ if (outlen != NULL) {
+ *outlen = o - out;
+ }
+
+ return out;
+}
+
+gchar *
+rspamd_encode_base64(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen)
+{
+ return rspamd_encode_base64_common(in, inlen, str_len, outlen, FALSE,
+ RSPAMD_TASK_NEWLINES_CRLF);
+}
+
+gchar *
+rspamd_encode_base64_fold(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, enum rspamd_newlines_type how)
+{
+ return rspamd_encode_base64_common(in, inlen, str_len, outlen, TRUE, how);
+}
+
+#define QP_RANGE(x) (((x) >= 33 && (x) <= 60) || ((x) >= 62 && (x) <= 126) || (x) == '\r' || (x) == '\n' || (x) == ' ' || (x) == '\t')
+#define QP_SPAN_NORMAL(span, str_len) ((str_len) > 0 && \
+ ((span) + 1) >= (str_len))
+#define QP_SPAN_SPECIAL(span, str_len) ((str_len) > 0 && \
+ ((span) + 4) >= (str_len))
+
+gchar *
+rspamd_encode_qp_fold(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, enum rspamd_newlines_type how)
+{
+ gsize olen = 0, span = 0, i = 0, seen_spaces = 0;
+ gchar *out;
+ gint ch, last_sp;
+ const guchar *end = in + inlen, *p = in;
+ static const gchar hexdigests[16] = "0123456789ABCDEF";
+
+ while (p < end) {
+ ch = *p;
+
+ if (QP_RANGE(ch)) {
+ olen++;
+ span++;
+
+ if (ch == '\r' || ch == '\n') {
+ if (seen_spaces > 0) {
+ /* We must encode spaces at the end of line */
+ olen += 3;
+ seen_spaces = 0;
+ /* Special stuff for space character at the end */
+ if (QP_SPAN_SPECIAL(span, str_len)) {
+ if (how == RSPAMD_TASK_NEWLINES_CRLF) {
+ /* =\r\n */
+ olen += 3;
+ }
+ else {
+ olen += 2;
+ }
+ }
+ /* Continue with the same `ch` but without spaces logic */
+ continue;
+ }
+
+ span = 0;
+ }
+ else if (ch == ' ' || ch == '\t') {
+ seen_spaces++;
+ last_sp = ch;
+ }
+ else {
+ seen_spaces = 0;
+ }
+ }
+ else {
+ if (QP_SPAN_SPECIAL(span, str_len)) {
+ if (how == RSPAMD_TASK_NEWLINES_CRLF) {
+ /* =\r\n */
+ olen += 3;
+ }
+ else {
+ olen += 2;
+ }
+ span = 0;
+ }
+
+ olen += 3;
+ span += 3;
+ }
+
+ if (QP_SPAN_NORMAL(span, str_len)) {
+ if (how == RSPAMD_TASK_NEWLINES_CRLF) {
+ /* =\r\n */
+ olen += 3;
+ }
+ else {
+ olen += 2;
+ }
+ span = 0;
+ }
+
+ p++;
+ }
+
+ if (seen_spaces > 0) {
+ /* Reserve length for the last space encoded */
+ olen += 3;
+ }
+
+ out = g_malloc(olen + 1);
+ p = in;
+ i = 0;
+ span = 0;
+ seen_spaces = 0;
+
+ while (p < end) {
+ ch = *p;
+
+ if (QP_RANGE(ch)) {
+ if (ch == '\r' || ch == '\n') {
+ if (seen_spaces > 0) {
+ if (QP_SPAN_SPECIAL(span, str_len)) {
+ /* Add soft newline */
+ i--;
+
+ if (p + 1 < end || span + 3 >= str_len) {
+ switch (how) {
+ default:
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ out[i++] = '=';
+ out[i++] = '\r';
+ out[i++] = '\n';
+ break;
+ case RSPAMD_TASK_NEWLINES_LF:
+ out[i++] = '=';
+ out[i++] = '\n';
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ out[i++] = '=';
+ out[i++] = '\r';
+ break;
+ }
+ }
+
+ /* Now write encoded `last_sp` but after newline */
+ out[i++] = '=';
+ out[i++] = hexdigests[((last_sp >> 4) & 0xF)];
+ out[i++] = hexdigests[(last_sp & 0xF)];
+
+ span = 0;
+ }
+ else {
+ /* Encode last space */
+ --i;
+ out[i++] = '=';
+ out[i++] = hexdigests[((last_sp >> 4) & 0xF)];
+ out[i++] = hexdigests[(last_sp & 0xF)];
+ seen_spaces = 0;
+ }
+
+ continue;
+ }
+ span = 0;
+ }
+ else if (ch == ' ' || ch == '\t') {
+ seen_spaces++;
+ last_sp = ch;
+ span++;
+ }
+ else {
+ seen_spaces = 0;
+ span++;
+ }
+
+ out[i++] = ch;
+ }
+ else {
+ if (QP_SPAN_SPECIAL(span, str_len)) {
+ /* Add new line and then continue */
+ if (p + 1 < end || span + 3 >= str_len) {
+ switch (how) {
+ default:
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ out[i++] = '=';
+ out[i++] = '\r';
+ out[i++] = '\n';
+ break;
+ case RSPAMD_TASK_NEWLINES_LF:
+ out[i++] = '=';
+ out[i++] = '\n';
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ out[i++] = '=';
+ out[i++] = '\r';
+ break;
+ }
+ span = 0;
+ }
+ }
+
+ out[i++] = '=';
+ out[i++] = hexdigests[((ch >> 4) & 0xF)];
+ out[i++] = hexdigests[(ch & 0xF)];
+ span += 3;
+ seen_spaces = 0;
+ }
+
+ if (QP_SPAN_NORMAL(span, str_len)) {
+ /* Add new line and then continue */
+ if (p + 1 < end || span > str_len || seen_spaces) {
+ switch (how) {
+ default:
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ out[i++] = '=';
+ out[i++] = '\r';
+ out[i++] = '\n';
+ break;
+ case RSPAMD_TASK_NEWLINES_LF:
+ out[i++] = '=';
+ out[i++] = '\n';
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ out[i++] = '=';
+ out[i++] = '\r';
+ break;
+ }
+ span = 0;
+ seen_spaces = 0;
+ }
+ }
+
+ g_assert(i <= olen);
+ p++;
+ }
+
+ /* Deal with the last space character */
+ if (seen_spaces > 0) {
+ i--;
+ out[i++] = '=';
+ out[i++] = hexdigests[((last_sp >> 4) & 0xF)];
+ out[i++] = hexdigests[(last_sp & 0xF)];
+ }
+
+ out[i] = '\0';
+
+ if (outlen) {
+ *outlen = i;
+ }
+
+ return out;
+}
+
+#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
+
+gint rspamd_strings_levenshtein_distance(const gchar *s1, gsize s1len,
+ const gchar *s2, gsize s2len,
+ guint replace_cost)
+{
+ gchar c1, c2, last_c2, last_c1;
+ static GArray *current_row = NULL, *prev_row = NULL, *transp_row = NULL;
+ gint eq;
+ static const guint max_cmp = 8192;
+ gint ret;
+
+ g_assert(s1 != NULL);
+ g_assert(s2 != NULL);
+
+ if (s1len == 0) {
+ s1len = strlen(s1);
+ }
+ if (s2len == 0) {
+ s2len = strlen(s2);
+ }
+
+ if (MAX(s1len, s2len) > max_cmp) {
+ /* Cannot compare too many characters */
+ return max_cmp;
+ }
+
+ if (s1len > s2len) {
+ /* Exchange s1 and s2 */
+ const gchar *tmp;
+ gsize tmplen;
+
+ tmp = s2;
+ s2 = s1;
+ s1 = tmp;
+
+ tmplen = s2len;
+ s2len = s1len;
+ s1len = tmplen;
+ }
+
+ /* Adjust static space */
+ if (current_row == NULL) {
+ current_row = g_array_sized_new(FALSE, FALSE, sizeof(gint), s1len + 1);
+ prev_row = g_array_sized_new(FALSE, FALSE, sizeof(gint), s1len + 1);
+ transp_row = g_array_sized_new(FALSE, FALSE, sizeof(gint), s1len + 1);
+ g_array_set_size(current_row, s1len + 1);
+ g_array_set_size(prev_row, s1len + 1);
+ g_array_set_size(transp_row, s1len + 1);
+ }
+ else if (current_row->len < s1len + 1) {
+ g_array_set_size(current_row, s1len + 1);
+ g_array_set_size(prev_row, s1len + 1);
+ g_array_set_size(transp_row, s1len + 1);
+ }
+
+ memset(current_row->data, 0, (s1len + 1) * sizeof(gint));
+ memset(transp_row->data, 0, (s1len + 1) * sizeof(gint));
+
+ for (gint i = 0; i <= s1len; i++) {
+ g_array_index(prev_row, gint, i) = i;
+ }
+
+ last_c2 = '\0';
+
+ for (gint i = 1; i <= s2len; i++) {
+ c2 = s2[i - 1];
+ g_array_index(current_row, gint, 0) = i;
+ last_c1 = '\0';
+
+ for (gint j = 1; j <= s1len; j++) {
+ c1 = s1[j - 1];
+ eq = c1 == c2 ? 0 : replace_cost;
+ ret = MIN3(g_array_index(current_row, gint, j - 1) + 1, /* Insert */
+ g_array_index(prev_row, gint, j) + 1, /* Remove */
+ g_array_index(prev_row, gint, j - 1) + eq /* Replace */);
+
+ /* Take reordering into account */
+ if (c1 == last_c2 && c2 == last_c1 && j >= 2) {
+ ret = MIN(ret, g_array_index(transp_row, gint, j - 2) + eq);
+ }
+
+ g_array_index(current_row, gint, j) = ret;
+ last_c1 = c1;
+ }
+
+ last_c2 = c2;
+
+ /* Exchange pointers */
+ GArray *tmp;
+ tmp = transp_row;
+ transp_row = prev_row;
+ prev_row = current_row;
+ current_row = tmp;
+ }
+
+ ret = g_array_index(prev_row, gint, s1len);
+
+ return ret;
+}
+
+GString *
+rspamd_header_value_fold(const gchar *name, gsize name_len,
+ const gchar *value,
+ gsize value_len,
+ guint fold_max,
+ enum rspamd_newlines_type how,
+ const gchar *fold_on_chars)
+{
+ GString *res;
+ const guint default_fold_max = 76;
+ guint cur_len;
+ const gchar *p, *c, *end, *fold_sequence;
+ guint nspaces = 0;
+ gboolean first_token = TRUE;
+ enum {
+ fold_before = 0,
+ fold_after
+ } fold_type = fold_before;
+ enum {
+ read_token = 0,
+ read_quoted,
+ after_quote,
+ fold_token,
+ } state = read_token,
+ next_state = read_token;
+
+ g_assert(name != NULL);
+ g_assert(value != NULL);
+
+ /* Filter insane values */
+ if (fold_max < 20) {
+ fold_max = default_fold_max;
+ }
+
+ switch (how) {
+ case RSPAMD_TASK_NEWLINES_LF:
+ fold_sequence = "\n\t";
+ break;
+ case RSPAMD_TASK_NEWLINES_CR:
+ fold_sequence = "\r\t";
+ break;
+ case RSPAMD_TASK_NEWLINES_CRLF:
+ default:
+ fold_sequence = "\r\n\t";
+ break;
+ }
+
+ res = g_string_sized_new(value_len);
+
+ c = value;
+ p = c;
+ end = value + value_len;
+ /* name:<WSP> */
+ cur_len = name_len + 2;
+
+ while (p < end) {
+ switch (state) {
+
+ case read_token:
+ if (fold_on_chars) {
+ if (strchr(fold_on_chars, *p) != NULL) {
+ fold_type = fold_after;
+ state = fold_token;
+ next_state = read_token;
+ }
+
+ p++;
+ }
+ else {
+ if (*p == ',' || *p == ';') {
+ /* We have something similar to the token's end, so check len */
+ if (cur_len > fold_max * 0.8 && cur_len < fold_max) {
+ /* We want fold */
+ fold_type = fold_after;
+ state = fold_token;
+ next_state = read_token;
+ }
+ else if (cur_len > fold_max && !first_token) {
+ fold_type = fold_before;
+ state = fold_token;
+ next_state = read_token;
+ }
+ else {
+ g_string_append_len(res, c, p - c + 1);
+ c = p + 1;
+ first_token = FALSE;
+ }
+ p++;
+ }
+ else if (*p == '"') {
+ /* Fold before quoted tokens */
+ g_string_append_len(res, c, p - c);
+ c = p;
+ state = read_quoted;
+ }
+ else if (*p == '\r' || *p == '\n') {
+ if (cur_len > fold_max && !first_token) {
+ fold_type = fold_before;
+ state = fold_token;
+ next_state = read_token;
+ }
+ else {
+ /* We need to ensure that it is a folding and not something else */
+
+ const char *t = p;
+ bool seen_fold = false;
+
+ while (t < end) {
+ if (*t == ' ' || *t == '\t') {
+ seen_fold = true;
+ break;
+ }
+ else if (!g_ascii_isspace(*t)) {
+ break;
+ }
+
+ t++;
+ }
+
+ if (seen_fold) {
+ /* Reset line length */
+ cur_len = 0;
+
+ while (g_ascii_isspace(*p)) {
+ p++;
+ }
+
+ g_string_append_len(res, c, p - c);
+ c = p;
+ first_token = TRUE;
+ }
+ else {
+ /* Not seen folding, inject it */
+ g_string_append_len(res, c, p - c);
+ g_string_append(res, fold_sequence);
+ p = t; /* Adjust p to ensure that we do not append extra stuff */
+ state = read_token;
+ first_token = TRUE;
+ c = p;
+ }
+ }
+ }
+ else if (g_ascii_isspace(*p)) {
+ if (cur_len > fold_max * 0.8 && cur_len < fold_max) {
+ /* We want fold */
+ fold_type = fold_after;
+ state = fold_token;
+ next_state = read_token;
+ }
+ else if (cur_len > fold_max && !first_token) {
+ fold_type = fold_before;
+ state = fold_token;
+ next_state = read_token;
+ }
+ else {
+ g_string_append_len(res, c, p - c);
+ c = p;
+ first_token = FALSE;
+ p++;
+ cur_len++;
+ }
+ }
+ else {
+ p++;
+ cur_len++;
+ }
+ }
+ break;
+ case fold_token:
+ /* Here, we have token start at 'c' and token end at 'p' */
+ if (fold_type == fold_after) {
+ nspaces = 0;
+ if (p > c) {
+ g_string_append_len(res, c, p - c);
+
+ /*
+ * Check any spaces that are appended to the result
+ * before folding
+ */
+ const gchar *last = &res->str[res->len - 1];
+
+ while (g_ascii_isspace(*last)) {
+ last--;
+ nspaces++;
+ res->len--;
+ }
+ }
+
+ g_string_append(res, fold_sequence);
+
+ /* Skip space if needed */
+ if (g_ascii_isspace(*p)) {
+ p++;
+ }
+
+ /* Move leftover spaces */
+ while (nspaces) {
+ g_string_append_c(res, ' ');
+ nspaces--;
+ }
+
+ cur_len = 0;
+ }
+ else {
+ const gchar *last;
+
+ /* Skip space if needed */
+ if (g_ascii_isspace(*c) && p > c) {
+ c++;
+ }
+
+ /* Avoid double folding */
+ last = &res->str[res->len - 1];
+ last--;
+
+ if (*last != '\r' && *last != '\n') {
+ last++;
+ while (g_ascii_isspace(*last)) {
+ last--;
+ nspaces++;
+ res->len--;
+ }
+
+ g_string_append(res, fold_sequence);
+ }
+
+ /* Move leftover spaces */
+ cur_len = nspaces;
+
+ while (nspaces) {
+ g_string_append_c(res, ' ');
+ nspaces--;
+ }
+
+ if (p > c) {
+ g_string_append_len(res, c, p - c);
+ cur_len += p - c;
+ }
+ else {
+ cur_len = 0;
+ }
+ }
+
+ first_token = TRUE;
+ c = p;
+ state = next_state;
+ break;
+
+ case read_quoted:
+ if (p != c && *p == '"') {
+ state = after_quote;
+ }
+ p++;
+ cur_len++;
+ break;
+
+ case after_quote:
+ state = read_token;
+ /* Skip one more character after the quote */
+ p++;
+ cur_len++;
+ g_string_append_len(res, c, p - c);
+ c = p;
+ first_token = TRUE;
+ break;
+ }
+ }
+
+ /* Last token */
+ switch (state) {
+ case read_token:
+ if (!fold_on_chars && cur_len > fold_max && !first_token) {
+ if (g_ascii_isspace(*c)) {
+ c++;
+ }
+ g_string_append(res, fold_sequence);
+ g_string_append_len(res, c, p - c);
+ }
+ else {
+ g_string_append_len(res, c, p - c);
+ }
+ break;
+ case read_quoted:
+ case after_quote:
+ g_string_append_len(res, c, p - c);
+ break;
+ case fold_token:
+ /* Here, we have token start at 'c' and token end at 'p' */
+ if (g_ascii_isspace(res->str[res->len - 1])) {
+ g_string_append_len(res, c, p - c);
+ }
+ else {
+ if (*c != '\r' && *c != '\n') {
+ /* We need to add folding as well */
+ g_string_append(res, fold_sequence);
+ g_string_append_len(res, c, p - c);
+ }
+ else {
+ g_string_append_len(res, c, p - c);
+ }
+ }
+ break;
+ default:
+ g_assert(p == c);
+ break;
+ }
+
+ return res;
+}
+
+static inline bool rspamd_substring_cmp_func(guchar a, guchar b)
+{
+ return a == b;
+}
+
+static inline bool rspamd_substring_casecmp_func(guchar a, guchar b)
+{
+ return lc_map[a] == lc_map[b];
+}
+
+typedef bool (*rspamd_cmpchar_func_t)(guchar a, guchar b);
+
+static inline void
+rspamd_substring_preprocess_kmp(const gchar *pat, gsize len, goffset *fsm,
+ rspamd_cmpchar_func_t f)
+{
+ goffset i, j;
+
+ i = 0;
+ j = -1;
+ fsm[0] = -1;
+
+ while (i < len) {
+ while (j > -1 && !f(pat[i], pat[j])) {
+ j = fsm[j];
+ }
+
+ i++;
+ j++;
+
+ if (i < len && j < len && f(pat[i], pat[j])) {
+ fsm[i] = fsm[j];
+ }
+ else {
+ fsm[i] = j;
+ }
+ }
+}
+
+static inline goffset
+rspamd_substring_search_preprocessed(const gchar *in, gsize inlen,
+ const gchar *srch,
+ gsize srchlen,
+ const goffset *fsm,
+ rspamd_cmpchar_func_t f)
+{
+ goffset i, j, k, ell;
+
+ for (ell = 1; f(srch[ell - 1], srch[ell]); ell++) {}
+ if (ell == srchlen) {
+ ell = 0;
+ }
+
+ /* Searching */
+ i = ell;
+ j = k = 0;
+
+ while (j <= inlen - srchlen) {
+ while (i < srchlen && f(srch[i], in[i + j])) {
+ ++i;
+ }
+
+ if (i >= srchlen) {
+ while (k < ell && f(srch[k], in[j + k])) {
+ ++k;
+ }
+
+ if (k >= ell) {
+ return j;
+ }
+ }
+
+ j += (i - fsm[i]);
+
+ if (i == ell) {
+ k = MAX(0, k - 1);
+ }
+ else {
+ if (fsm[i] <= ell) {
+ k = MAX(0, fsm[i]);
+ i = ell;
+ }
+ else {
+ k = ell;
+ i = fsm[i];
+ }
+ }
+ }
+
+ return -1;
+}
+
+static inline goffset
+rspamd_substring_search_common(const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen, rspamd_cmpchar_func_t f)
+{
+ static goffset st_fsm[128];
+ goffset *fsm, ret;
+
+ if (G_LIKELY(srchlen < G_N_ELEMENTS(st_fsm))) {
+ fsm = st_fsm;
+ }
+ else {
+ fsm = g_malloc((srchlen + 1) * sizeof(*fsm));
+ }
+
+ rspamd_substring_preprocess_kmp(srch, srchlen, fsm, f);
+ ret = rspamd_substring_search_preprocessed(in, inlen, srch, srchlen, fsm, f);
+
+ if (G_UNLIKELY(srchlen >= G_N_ELEMENTS(st_fsm))) {
+ g_free(fsm);
+ }
+
+ return ret;
+}
+
+goffset
+rspamd_substring_search(const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen)
+{
+ if (inlen > srchlen) {
+ if (G_UNLIKELY(srchlen == 1)) {
+ const gchar *p;
+
+ p = memchr(in, srch[0], inlen);
+
+ if (p) {
+ return p - in;
+ }
+
+ return (-1);
+ }
+ else if (G_UNLIKELY(srchlen == 0)) {
+ return 0;
+ }
+
+ return rspamd_substring_search_common(in, inlen, srch, srchlen,
+ rspamd_substring_cmp_func);
+ }
+ else if (inlen == srchlen) {
+ return (rspamd_lc_cmp(srch, in, srchlen) == 0 ? 0 : -1);
+ }
+ else {
+ return (-1);
+ }
+
+ return (-1);
+}
+
+goffset
+rspamd_substring_search_caseless(const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen)
+{
+ if (inlen > srchlen) {
+ if (G_UNLIKELY(srchlen == 1)) {
+ goffset i;
+ gchar s = lc_map[(guchar) srch[0]];
+
+ for (i = 0; i < inlen; i++) {
+ if (lc_map[(guchar) in[i]] == s) {
+ return i;
+ }
+ }
+
+ return (-1);
+ }
+
+ return rspamd_substring_search_common(in, inlen, srch, srchlen,
+ rspamd_substring_casecmp_func);
+ }
+ else if (inlen == srchlen) {
+ return rspamd_lc_cmp(srch, in, srchlen) == 0 ? 0 : (-1);
+ }
+
+ return (-1);
+}
+
+goffset
+rspamd_string_find_eoh(GString *input, goffset *body_start)
+{
+ const gchar *p, *c = NULL, *end;
+ enum {
+ skip_char = 0,
+ got_cr,
+ got_lf,
+ got_linebreak,
+ got_linebreak_cr,
+ got_linebreak_lf,
+ obs_fws
+ } state = skip_char;
+
+ g_assert(input != NULL);
+
+ p = input->str;
+ end = p + input->len;
+
+ while (p < end) {
+ switch (state) {
+ case skip_char:
+ if (*p == '\r') {
+ p++;
+ state = got_cr;
+ }
+ else if (*p == '\n') {
+ p++;
+ state = got_lf;
+ }
+ else {
+ p++;
+ }
+ break;
+
+ case got_cr:
+ if (*p == '\r') {
+ /*
+ * Double \r\r, so need to check the current char
+ * if it is '\n', then we have \r\r\n sequence, that is NOT
+ * double end of line
+ */
+ if (p < end && p[1] == '\n') {
+ p++;
+ state = got_lf;
+ }
+ else {
+ /* We have \r\r[^\n] */
+ if (body_start) {
+ *body_start = p - input->str + 1;
+ }
+
+ return p - input->str;
+ }
+ }
+ else if (*p == '\n') {
+ p++;
+ state = got_lf;
+ }
+ else if (g_ascii_isspace(*p)) {
+ /* We have \r<space>*, allow to stay in this state */
+ c = p;
+ p++;
+ state = obs_fws;
+ }
+ else {
+ p++;
+ state = skip_char;
+ }
+ break;
+ case got_lf:
+ if (*p == '\n') {
+ /* We have \n\n, which is obviously end of headers */
+ if (body_start) {
+ *body_start = p - input->str + 1;
+ }
+ return p - input->str;
+ }
+ else if (*p == '\r') {
+ state = got_linebreak;
+ }
+ else if (g_ascii_isspace(*p)) {
+ /* We have \n<space>*, allow to stay in this state */
+ c = p;
+ p++;
+ state = obs_fws;
+ }
+ else {
+ p++;
+ state = skip_char;
+ }
+ break;
+ case got_linebreak:
+ if (*p == '\r') {
+ c = p;
+ p++;
+ state = got_linebreak_cr;
+ }
+ else if (*p == '\n') {
+ c = p;
+ p++;
+ state = got_linebreak_lf;
+ }
+ else if (g_ascii_isspace(*p)) {
+ /* We have <linebreak><space>*, allow to stay in this state */
+ c = p;
+ p++;
+ state = obs_fws;
+ }
+ else {
+ p++;
+ state = skip_char;
+ }
+ break;
+ case got_linebreak_cr:
+ if (*p == '\r') {
+ /* Got double \r\r after \n, so does not treat it as EOH */
+ state = got_linebreak_cr;
+ p++;
+ }
+ else if (*p == '\n') {
+ state = got_linebreak_lf;
+ p++;
+ }
+ else if (g_ascii_isspace(*p)) {
+ /* We have \r\n<space>*, allow to keep in this state */
+ c = p;
+ state = obs_fws;
+ p++;
+ }
+ else {
+ p++;
+ state = skip_char;
+ }
+ break;
+ case got_linebreak_lf:
+ g_assert(c != NULL);
+ if (body_start) {
+ /* \r\n\r\n */
+ *body_start = p - input->str;
+ }
+
+ return c - input->str;
+ case obs_fws:
+ if (*p == ' ' || *p == '\t') {
+ p++;
+ }
+ else if (*p == '\r') {
+ /* Perform lookahead due to #2349 */
+ if (end - p > 2) {
+ if (p[1] == '\n' && g_ascii_isspace(p[2])) {
+ /* Real obs_fws state, switch */
+ c = p;
+ p++;
+ state = got_cr;
+ }
+ else if (g_ascii_isspace(p[1])) {
+ p++;
+ state = obs_fws;
+ }
+ else {
+ /*
+ * <nline> <wsp>+ \r <nwsp>.
+ * It is an empty header likely, so we can go further...
+ * https://tools.ietf.org/html/rfc2822#section-4.2
+ */
+ c = p;
+ p++;
+ state = got_cr;
+ }
+ }
+ else {
+ /* shortage */
+ if (body_start) {
+ *body_start = p - input->str + 1;
+ }
+
+ return p - input->str;
+ }
+ }
+ else if (*p == '\n') {
+ /* Perform lookahead due to #2349 */
+ if (end - p > 1) {
+ /* Continue folding with an empty line */
+ if (p[1] == ' ' || p[1] == '\t') {
+ c = p;
+ p++;
+ state = obs_fws;
+ }
+ else if (p[1] == '\r') {
+ /* WTF state: we have seen spaces, \n and then it follows \r */
+ c = p;
+ p++;
+ state = got_lf;
+ }
+ else if (p[1] == '\n') {
+ /*
+ * Switching to got_lf state here will let us to finish
+ * the cycle.
+ */
+ c = p;
+ p++;
+ state = got_lf;
+ }
+ else {
+ /*
+ * <nline> <wsp>+ \n <nwsp>.
+ * It is an empty header likely, so we can go further...
+ * https://tools.ietf.org/html/rfc2822#section-4.2
+ */
+ c = p;
+ p++;
+ state = got_lf;
+ }
+ }
+ else {
+ /* shortage */
+ if (body_start) {
+ *body_start = p - input->str + 1;
+ }
+
+ return p - input->str;
+ }
+ }
+ else {
+ p++;
+ state = skip_char;
+ }
+ break;
+ }
+ }
+
+ if (state == got_linebreak_lf) {
+ if (body_start) {
+ /* \r\n\r\n */
+ *body_start = p - input->str;
+ }
+
+ return c - input->str;
+ }
+
+ return -1;
+}
+
+gint rspamd_encode_hex_buf(const guchar *in, gsize inlen, gchar *out,
+ gsize outlen)
+{
+ gchar *o, *end;
+ const guchar *p;
+ static const gchar hexdigests[16] = "0123456789abcdef";
+
+ end = out + outlen;
+ o = out;
+ p = in;
+
+ while (inlen > 0 && o < end - 1) {
+ *o++ = hexdigests[((*p >> 4) & 0xF)];
+ *o++ = hexdigests[((*p++) & 0xF)];
+ inlen--;
+ }
+
+ if (o <= end) {
+ return (o - out);
+ }
+
+ return -1;
+}
+
+gchar *
+rspamd_encode_hex(const guchar *in, gsize inlen)
+{
+ gchar *out;
+ gsize outlen = inlen * 2 + 1;
+ gint olen;
+
+ if (in == NULL) {
+ return NULL;
+ }
+
+ out = g_malloc(outlen);
+ olen = rspamd_encode_hex_buf(in, inlen, out, outlen - 1);
+
+ if (olen >= 0) {
+ out[olen] = '\0';
+ }
+ else {
+ g_free(out);
+
+ return NULL;
+ }
+
+ return out;
+}
+
+gssize
+rspamd_decode_hex_buf(const gchar *in, gsize inlen,
+ guchar *out, gsize outlen)
+{
+ guchar *o, *end, ret = 0;
+ const gchar *p;
+ gchar c;
+
+ end = out + outlen;
+ o = out;
+ p = in;
+
+ /* We ignore trailing chars if we have not even input */
+ inlen = inlen - inlen % 2;
+
+ while (inlen > 1 && o < end) {
+ c = *p++;
+
+ if (c >= '0' && c <= '9') ret = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ ret = c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ ret = c - 'a' + 10;
+
+ c = *p++;
+ ret *= 16;
+
+ if (c >= '0' && c <= '9') ret += c - '0';
+ else if (c >= 'A' && c <= 'F')
+ ret += c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ ret += c - 'a' + 10;
+
+ *o++ = ret;
+
+ inlen -= 2;
+ }
+
+ if (o <= end) {
+ return (o - out);
+ }
+
+ return -1;
+}
+
+guchar *
+rspamd_decode_hex(const gchar *in, gsize inlen)
+{
+ guchar *out;
+ gsize outlen = (inlen / 2 + inlen % 2) + 1;
+ gint olen;
+
+ if (in == NULL) {
+ return NULL;
+ }
+
+ out = g_malloc(outlen);
+
+ olen = rspamd_decode_hex_buf(in, inlen, out, outlen - 1);
+
+ if (olen >= 0) {
+ out[olen] = '\0';
+
+ return out;
+ }
+
+ g_free(out);
+
+ return NULL;
+}
+
+gssize
+rspamd_decode_qp_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen)
+{
+ gchar *o, *end, *pos, c;
+ const gchar *p;
+ guchar ret;
+ gssize remain, processed;
+
+ p = in;
+ o = out;
+ end = out + outlen;
+ remain = inlen;
+
+ while (remain > 0 && o < end) {
+ if (*p == '=') {
+ remain--;
+
+ if (remain == 0) {
+ /* Last '=' character, bugon */
+ if (end - o > 0) {
+ *o++ = *p;
+ }
+ else {
+ /* Buffer overflow */
+ return (-1);
+ }
+
+ break;
+ }
+
+ p++;
+ decode:
+ /* Decode character after '=' */
+ c = *p++;
+ remain--;
+ ret = 0;
+
+ if (c >= '0' && c <= '9') {
+ ret = c - '0';
+ }
+ else if (c >= 'A' && c <= 'F') {
+ ret = c - 'A' + 10;
+ }
+ else if (c >= 'a' && c <= 'f') {
+ ret = c - 'a' + 10;
+ }
+ else if (c == '\r') {
+ /* Eat one more endline */
+ if (remain > 0 && *p == '\n') {
+ p++;
+ remain--;
+ }
+
+ continue;
+ }
+ else if (c == '\n') {
+ /* Soft line break */
+ continue;
+ }
+ else {
+ /* Hack, hack, hack, treat =<garbage> as =<garbage> */
+ if (end - o > 1) {
+ *o++ = '=';
+ *o++ = *(p - 1);
+ }
+ else {
+ return (-1);
+ }
+
+ continue;
+ }
+
+ if (remain > 0) {
+ c = *p++;
+ ret *= 16;
+ remain--;
+
+ if (c >= '0' && c <= '9') {
+ ret += c - '0';
+ }
+ else if (c >= 'A' && c <= 'F') {
+ ret += c - 'A' + 10;
+ }
+ else if (c >= 'a' && c <= 'f') {
+ ret += c - 'a' + 10;
+ }
+ else {
+ /* Treat =<good><rubbish> as =<good><rubbish> */
+ if (end - o > 2) {
+ *o++ = '=';
+ *o++ = *(p - 2);
+ *o++ = *(p - 1);
+ }
+ else {
+ return (-1);
+ }
+
+ continue;
+ }
+
+ if (end - o > 0) {
+ *o++ = (gchar) ret;
+ }
+ else {
+ return (-1);
+ }
+ }
+ }
+ else {
+ if (end - o >= remain) {
+ if ((pos = memccpy(o, p, '=', remain)) == NULL) {
+ /* All copied */
+ o += remain;
+ break;
+ }
+ else {
+ processed = pos - o;
+ remain -= processed;
+ p += processed;
+
+ if (remain > 0) {
+ o = pos - 1;
+ /*
+ * Skip comparison and jump inside decode branch,
+ * as we know that we have found match
+ */
+ goto decode;
+ }
+ else {
+ /* Last '=' character, bugon */
+ o = pos;
+
+ if (end - o > 0) {
+ *o = '=';
+ }
+ else {
+ /* Buffer overflow */
+ return (-1);
+ }
+
+ break;
+ }
+ }
+ }
+ else {
+ /* Buffer overflow */
+ return (-1);
+ }
+ }
+ }
+
+ return (o - out);
+}
+
+gssize
+rspamd_decode_uue_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen)
+{
+ gchar *o, *out_end;
+ const gchar *p;
+ gssize remain;
+ gboolean base64 = FALSE;
+ goffset pos;
+ const gchar *nline = "\r\n";
+
+ p = in;
+ o = out;
+ out_end = out + outlen;
+ remain = inlen;
+
+ /* Skip newlines */
+#define SKIP_NEWLINE \
+ do { \
+ while (remain > 0 && (*p == '\n' || *p == '\r')) { \
+ p++; \
+ remain--; \
+ } \
+ } while (0)
+ SKIP_NEWLINE;
+
+ /* First of all, we need to read the first line (and probably skip it) */
+ if (remain < sizeof("begin-base64 ")) {
+ /* Obviously truncated */
+ return -1;
+ }
+
+ if (memcmp(p, "begin ", sizeof("begin ") - 1) == 0) {
+ p += sizeof("begin ") - 1;
+ remain -= sizeof("begin ") - 1;
+
+ pos = rspamd_memcspn(p, nline, remain);
+ }
+ else if (memcmp(p, "begin-base64 ", sizeof("begin-base64 ") - 1) == 0) {
+ base64 = TRUE;
+ p += sizeof("begin-base64 ") - 1;
+ remain -= sizeof("begin-base64 ") - 1;
+ pos = rspamd_memcspn(p, nline, remain);
+ }
+ else {
+ /* Crap */
+ return (-1);
+ }
+
+ if (pos == -1 || remain == 0) {
+ /* Crap */
+ return (-1);
+ }
+
+#define DEC(c) (((c) - ' ') & 077) /* single character decode */
+#define IS_DEC(c) ((((c) - ' ') >= 0) && (((c) - ' ') <= 077 + 1))
+#define CHAR_OUT(c) \
+ do { \
+ if (o < out_end) { *o++ = c; } \
+ else { \
+ return (-1); \
+ } \
+ } while (0)
+
+ remain -= pos;
+ p = p + pos;
+ SKIP_NEWLINE;
+
+ if (base64) {
+ if (!rspamd_cryptobox_base64_decode(p,
+ remain,
+ out, &outlen)) {
+ return (-1);
+ }
+
+ return outlen;
+ }
+
+ while (remain > 0 && o < out_end) {
+ /* Main cycle */
+ const gchar *eol;
+ gint i, ch;
+
+ pos = rspamd_memcspn(p, nline, remain);
+
+ if (pos == 0) {
+ /* Skip empty lines */
+ SKIP_NEWLINE;
+
+ if (remain == 0) {
+ break;
+ }
+ }
+
+ eol = p + pos;
+ remain -= eol - p;
+
+ if ((i = DEC(*p)) <= 0) {
+ /* Last pos */
+ break;
+ }
+
+ /* i can be less than eol - p, it means uue padding which we ignore */
+ for (++p; i > 0 && p < eol; p += 4, i -= 3) {
+ if (i >= 3 && p + 3 < eol) {
+ /* Process 4 bytes of input */
+ if (!IS_DEC(*p)) {
+ return (-1);
+ }
+ if (!IS_DEC(*(p + 1))) {
+ return (-1);
+ }
+ if (!IS_DEC(*(p + 2))) {
+ return (-1);
+ }
+ if (!IS_DEC(*(p + 3))) {
+ return (-1);
+ }
+ ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
+ CHAR_OUT(ch);
+ ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
+ CHAR_OUT(ch);
+ ch = DEC(p[2]) << 6 | DEC(p[3]);
+ CHAR_OUT(ch);
+ }
+ else {
+ if (i >= 1 && p + 1 < eol) {
+ if (!IS_DEC(*p)) {
+ return (-1);
+ }
+ if (!IS_DEC(*(p + 1))) {
+ return (-1);
+ }
+
+ ch = DEC(p[0]) << 2 | DEC(p[1]) >> 4;
+ CHAR_OUT(ch);
+ }
+ if (i >= 2 && p + 2 < eol) {
+ if (!IS_DEC(*(p + 1))) {
+ return (-1);
+ }
+ if (!IS_DEC(*(p + 2))) {
+ return (-1);
+ }
+
+ ch = DEC(p[1]) << 4 | DEC(p[2]) >> 2;
+ CHAR_OUT(ch);
+ }
+ }
+ }
+ /* Skip newline */
+ p = eol;
+ SKIP_NEWLINE;
+ }
+
+ return (o - out);
+}
+
+#define BITOP(a, b, op) \
+ ((a)[(gsize) (b) / (8 * sizeof *(a))] op(gsize) 1 << ((gsize) (b) % (8 * sizeof *(a))))
+
+
+gsize rspamd_memcspn(const gchar *s, const gchar *e, gsize len)
+{
+ gsize byteset[32 / sizeof(gsize)];
+ const gchar *p = s, *end = s + len;
+
+ if (!e[1]) {
+ for (; p < end && *p != *e; p++)
+ ;
+ return p - s;
+ }
+
+ memset(byteset, 0, sizeof byteset);
+
+ for (; *e && BITOP(byteset, *(guchar *) e, |=); e++)
+ ;
+ for (; p < end && !BITOP(byteset, *(guchar *) p, &); p++)
+ ;
+
+ return p - s;
+}
+
+gsize rspamd_memspn(const gchar *s, const gchar *e, gsize len)
+{
+ gsize byteset[32 / sizeof(gsize)];
+ const gchar *p = s, *end = s + len;
+
+ if (!e[1]) {
+ for (; p < end && *p == *e; p++)
+ ;
+ return p - s;
+ }
+
+ memset(byteset, 0, sizeof byteset);
+
+ for (; *e && BITOP(byteset, *(guchar *) e, |=); e++)
+ ;
+ for (; p < end && BITOP(byteset, *(guchar *) p, &); p++)
+ ;
+
+ return p - s;
+}
+
+gssize
+rspamd_decode_qp2047_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen)
+{
+ gchar *o, *end, c;
+ const gchar *p;
+ guchar ret;
+ gsize remain, processed;
+
+ p = in;
+ o = out;
+ end = out + outlen;
+ remain = inlen;
+
+ while (remain > 0 && o < end) {
+ if (*p == '=') {
+ p++;
+ remain--;
+
+ if (remain == 0) {
+ if (end - o > 0) {
+ *o++ = *p;
+ break;
+ }
+ }
+ decode:
+ /* Decode character after '=' */
+ c = *p++;
+ remain--;
+ ret = 0;
+
+ if (c >= '0' && c <= '9') { ret = c - '0'; }
+ else if (c >= 'A' && c <= 'F') {
+ ret = c - 'A' + 10;
+ }
+ else if (c >= 'a' && c <= 'f') {
+ ret = c - 'a' + 10;
+ }
+ else if (c == '\r' || c == '\n') {
+ /* Soft line break */
+ while (remain > 0 && (*p == '\r' || *p == '\n')) {
+ remain--;
+ p++;
+ }
+
+ continue;
+ }
+
+ if (remain > 0) {
+ c = *p++;
+ ret *= 16;
+
+ if (c >= '0' && c <= '9') { ret += c - '0'; }
+ else if (c >= 'A' && c <= 'F') {
+ ret += c - 'A' + 10;
+ }
+ else if (c >= 'a' && c <= 'f') {
+ ret += c - 'a' + 10;
+ }
+
+ if (end - o > 0) {
+ *o++ = (gchar) ret;
+ }
+ else {
+ return (-1);
+ }
+
+ remain--;
+ }
+ }
+ else {
+ if (end - o >= remain) {
+ processed = rspamd_memcspn(p, "=_", remain);
+ memcpy(o, p, processed);
+ o += processed;
+
+ if (processed == remain) {
+ break;
+ }
+ else {
+
+ remain -= processed;
+ p += processed;
+
+ if (G_LIKELY(*p == '=')) {
+ p++;
+ /* Skip comparison, as we know that we have found match */
+ remain--;
+ goto decode;
+ }
+ else {
+ *o++ = ' ';
+ p++;
+ remain--;
+ }
+ }
+ }
+ else {
+ /* Buffer overflow */
+ return (-1);
+ }
+ }
+ }
+
+ return (o - out);
+}
+
+gssize
+rspamd_encode_qp2047_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen)
+{
+ gchar *o = out, *end = out + outlen, c;
+ static const gchar hexdigests[16] = "0123456789ABCDEF";
+
+ while (inlen > 0 && o < end) {
+ c = *in;
+
+ if (g_ascii_isalnum(c)) {
+ *o++ = c;
+ }
+ else if (c == ' ') {
+ *o++ = '_';
+ }
+ else if (end - o >= 3) {
+ *o++ = '=';
+ *o++ = hexdigests[((c >> 4) & 0xF)];
+ *o++ = hexdigests[(c & 0xF)];
+ }
+ else {
+ return (-1);
+ }
+
+ in++;
+ inlen--;
+ }
+
+ if (inlen != 0) {
+ return (-1);
+ }
+
+ return (o - out);
+}
+
+
+/*
+ * GString ucl emitting functions
+ */
+static int
+rspamd_gstring_append_character(unsigned char c, size_t len, void *ud)
+{
+ GString *buf = ud;
+ gsize old_len;
+
+ if (len == 1) {
+ g_string_append_c(buf, c);
+ }
+ else {
+ if (buf->allocated_len - buf->len <= len) {
+ old_len = buf->len;
+ g_string_set_size(buf, buf->len + len + 1);
+ buf->len = old_len;
+ }
+ memset(&buf->str[buf->len], c, len);
+ buf->len += len;
+ }
+
+ return 0;
+}
+
+static int
+rspamd_gstring_append_len(const unsigned char *str, size_t len, void *ud)
+{
+ GString *buf = ud;
+
+ g_string_append_len(buf, str, len);
+
+ return 0;
+}
+
+static int
+rspamd_gstring_append_int(int64_t val, void *ud)
+{
+ GString *buf = ud;
+
+ rspamd_printf_gstring(buf, "%L", (intmax_t) val);
+ return 0;
+}
+
+static int
+rspamd_gstring_append_double(double val, void *ud)
+{
+ GString *buf = ud;
+ const double delta = 0.0000001;
+
+ if (isfinite(val)) {
+ if (val == (double) (int) val) {
+ rspamd_printf_gstring(buf, "%.1f", val);
+ }
+ else if (fabs(val - (double) (int) val) < delta) {
+ /* Write at maximum precision */
+ rspamd_printf_gstring(buf, "%.*g", DBL_DIG, val);
+ }
+ else {
+ rspamd_printf_gstring(buf, "%f", val);
+ }
+ }
+ else {
+ rspamd_printf_gstring(buf, "null");
+ }
+
+ return 0;
+}
+
+void rspamd_ucl_emit_gstring_comments(const ucl_object_t *obj,
+ enum ucl_emitter emit_type,
+ GString *target,
+ const ucl_object_t *comments)
+{
+ struct ucl_emitter_functions func = {
+ .ucl_emitter_append_character = rspamd_gstring_append_character,
+ .ucl_emitter_append_len = rspamd_gstring_append_len,
+ .ucl_emitter_append_int = rspamd_gstring_append_int,
+ .ucl_emitter_append_double = rspamd_gstring_append_double};
+
+ func.ud = target;
+ ucl_object_emit_full(obj, emit_type, &func, comments);
+}
+
+/*
+ * FString ucl emitting functions
+ */
+static int
+rspamd_fstring_emit_append_character(unsigned char c, size_t len, void *ud)
+{
+ rspamd_fstring_t **buf = ud;
+
+ *buf = rspamd_fstring_append_chars(*buf, c, len);
+
+ return 0;
+}
+
+static int
+rspamd_fstring_emit_append_len(const unsigned char *str, size_t len, void *ud)
+{
+ rspamd_fstring_t **buf = ud;
+
+ *buf = rspamd_fstring_append(*buf, str, len);
+
+ return 0;
+}
+
+static int
+rspamd_fstring_emit_append_int(int64_t val, void *ud)
+{
+ rspamd_fstring_t **buf = ud;
+
+ rspamd_printf_fstring(buf, "%L", (intmax_t) val);
+ return 0;
+}
+
+static int
+rspamd_fstring_emit_append_double(double val, void *ud)
+{
+ rspamd_fstring_t **buf = ud;
+#define MAX_PRECISION 6
+
+ if (isfinite(val)) {
+ if (val == (double) ((gint) val)) {
+ rspamd_printf_fstring(buf, "%.1f", val);
+ }
+ else {
+ rspamd_printf_fstring(buf, "%." G_STRINGIFY(MAX_PRECISION) "f",
+ val);
+ }
+ }
+ else {
+ rspamd_printf_fstring(buf, "null");
+ }
+
+ return 0;
+}
+
+void rspamd_ucl_emit_fstring_comments(const ucl_object_t *obj,
+ enum ucl_emitter emit_type,
+ rspamd_fstring_t **buf,
+ const ucl_object_t *comments)
+{
+ struct ucl_emitter_functions func = {
+ .ucl_emitter_append_character = rspamd_fstring_emit_append_character,
+ .ucl_emitter_append_len = rspamd_fstring_emit_append_len,
+ .ucl_emitter_append_int = rspamd_fstring_emit_append_int,
+ .ucl_emitter_append_double = rspamd_fstring_emit_append_double};
+
+ func.ud = buf;
+ ucl_object_emit_full(obj, emit_type, &func, comments);
+}
+
+#ifndef HAVE_MEMRCHR
+void *
+rspamd_memrchr(const void *m, gint c, gsize len)
+{
+ const guint8 *p = m;
+
+ for (gsize i = len; i > 0; i--) {
+ if (p[i - 1] == c) {
+ return (void *) (p + i - 1);
+ }
+ }
+
+ return NULL;
+}
+#endif
+
+struct UConverter *
+rspamd_get_utf8_converter(void)
+{
+ static UConverter *utf8_conv = NULL;
+ UErrorCode uc_err = U_ZERO_ERROR;
+
+ if (utf8_conv == NULL) {
+ utf8_conv = ucnv_open("UTF-8", &uc_err);
+ if (!U_SUCCESS(uc_err)) {
+ msg_err("FATAL error: cannot open converter for utf8: %s",
+ u_errorName(uc_err));
+
+ g_assert_not_reached();
+ }
+
+ ucnv_setFromUCallBack(utf8_conv,
+ UCNV_FROM_U_CALLBACK_SUBSTITUTE,
+ NULL,
+ NULL,
+ NULL,
+ &uc_err);
+ ucnv_setToUCallBack(utf8_conv,
+ UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ NULL,
+ NULL,
+ NULL,
+ &uc_err);
+ }
+
+ return utf8_conv;
+}
+
+
+const struct UNormalizer2 *
+rspamd_get_unicode_normalizer(void)
+{
+#if U_ICU_VERSION_MAJOR_NUM >= 44
+ UErrorCode uc_err = U_ZERO_ERROR;
+ static const UNormalizer2 *norm = NULL;
+
+ if (norm == NULL) {
+ norm = unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &uc_err);
+ g_assert(U_SUCCESS(uc_err));
+ }
+
+ return norm;
+#else
+ /* Old libicu */
+ return NULL;
+#endif
+}
+
+gchar *
+rspamd_str_regexp_escape(const gchar *pattern, gsize slen,
+ gsize *dst_len, enum rspamd_regexp_escape_flags flags)
+{
+ const gchar *p, *end = pattern + slen;
+ gchar *res, *d, t, *tmp_utf = NULL, *dend;
+ gsize len;
+ static const gchar hexdigests[16] = "0123456789abcdef";
+
+ len = 0;
+ p = pattern;
+
+ /* [-[\]{}()*+?.,\\^$|#\s] need to be escaped */
+ while (p < end) {
+ t = *p++;
+
+ switch (t) {
+ case '[':
+ case ']':
+ case '-':
+ case '\\':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '?':
+ case '.':
+ case ',':
+ case '^':
+ case '$':
+ case '|':
+ case '#':
+ if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) {
+ len++;
+ }
+ break;
+ default:
+ if (g_ascii_isspace(t)) {
+ len++;
+ }
+ else {
+ if (!g_ascii_isprint(t) || (t & 0x80)) {
+
+ if (flags & RSPAMD_REGEXP_ESCAPE_UTF) {
+ /* \x{code}, where code can be up to 5 digits */
+ len += 4;
+ }
+ else {
+ /* \\xHH -> 4 symbols */
+ len += 3;
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ if (flags & RSPAMD_REGEXP_ESCAPE_UTF) {
+ if (rspamd_fast_utf8_validate(pattern, slen) != 0) {
+ tmp_utf = rspamd_str_make_utf_valid(pattern, slen, NULL, NULL);
+ }
+ }
+
+ if (len == 0) {
+ /* No need to escape anything */
+
+ if (dst_len) {
+ *dst_len = slen;
+ }
+
+ if (tmp_utf) {
+ return tmp_utf;
+ }
+ else {
+ return g_strdup(pattern);
+ }
+ }
+
+ /* Escape logic */
+ if (tmp_utf) {
+ pattern = tmp_utf;
+ }
+
+ len = slen + len;
+ res = g_malloc(len + 1);
+ p = pattern;
+ d = res;
+ dend = d + len;
+
+ while (p < end) {
+ g_assert(d < dend);
+ t = *p++;
+
+ switch (t) {
+ case '[':
+ case ']':
+ case '\\':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '.':
+ case ',':
+ case '^':
+ case '$':
+ case '|':
+ case '#':
+ if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) {
+ *d++ = '\\';
+ }
+ break;
+ case '-':
+ if (flags & RSPAMD_REGEXP_ESCAPE_GLOB) {
+ /*
+ * For glob patterns, we need to ensure that a previous character is alphanumeric
+ * and there is `[` symbol somewhere before
+ */
+ bool seen_brace = false;
+ const char *search = p;
+
+ while (search > pattern) {
+ if (!g_ascii_isalnum(*search) && *search != '-') {
+ break;
+ }
+ if (*search == '[') {
+ seen_brace = true;
+ break;
+ }
+
+ search--;
+ }
+
+ if (!seen_brace) {
+ /* Escape `-` symbol */
+ *d++ = '\\';
+ }
+ }
+ else if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) {
+ *d++ = '\\';
+ }
+ break;
+ case '*':
+ case '?':
+ case '+':
+ if (flags & RSPAMD_REGEXP_ESCAPE_GLOB) {
+ /* Treat * as .* and ? as .? */
+ *d++ = '.';
+ }
+ else if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) {
+ *d++ = '\\';
+ }
+ break;
+ default:
+ if (g_ascii_isspace(t)) {
+ if (!(flags & RSPAMD_REGEXP_ESCAPE_RE)) {
+ *d++ = '\\';
+ }
+ }
+ else if (t & 0x80 || !g_ascii_isprint(t)) {
+ if (!(flags & RSPAMD_REGEXP_ESCAPE_UTF)) {
+ *d++ = '\\';
+ *d++ = 'x';
+ *d++ = hexdigests[((t >> 4) & 0xF)];
+ *d++ = hexdigests[((t) &0xF)];
+ continue; /* To avoid *d++ = t; */
+ }
+ else {
+ if (flags & (RSPAMD_REGEXP_ESCAPE_RE | RSPAMD_REGEXP_ESCAPE_GLOB)) {
+ UChar32 uc;
+ gint32 off = p - pattern - 1;
+ U8_NEXT(pattern, off, slen, uc);
+
+ if (uc > 0) {
+ d += rspamd_snprintf(d, dend - d,
+ "\\x{%xd}", uc);
+ p = pattern + off;
+ }
+
+ continue; /* To avoid *d++ = t; */
+ }
+ }
+ }
+ break;
+ }
+
+ *d++ = t;
+ }
+
+ *d = '\0';
+
+ if (dst_len) {
+ *dst_len = d - res;
+ }
+
+ if (tmp_utf) {
+ g_free(tmp_utf);
+ }
+
+ return res;
+}
+
+
+gchar *
+rspamd_str_make_utf_valid(const guchar *src, gsize slen,
+ gsize *dstlen,
+ rspamd_mempool_t *pool)
+{
+ UChar32 uc;
+ goffset err_offset;
+ const guchar *p;
+ gchar *dst, *d;
+ gsize remain = slen, dlen = 0;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ if (slen == 0) {
+ if (dstlen) {
+ *dstlen = 0;
+ }
+
+ return pool ? rspamd_mempool_strdup(pool, "") : g_strdup("");
+ }
+
+ p = src;
+ dlen = slen + 1; /* As we add '\0' */
+
+ /* Check space required */
+ while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(p, remain)) > 0) {
+ gint i = 0;
+
+ err_offset--; /* As it returns it 1 indexed */
+ p += err_offset;
+ remain -= err_offset;
+ dlen += err_offset;
+
+ /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
+ while (i < remain) {
+ U8_NEXT(p, i, remain, uc);
+
+ if (uc < 0) {
+ dlen += 2;
+ }
+ else {
+ break;
+ }
+ }
+
+ p += i;
+ remain -= i;
+ }
+
+ if (pool) {
+ dst = rspamd_mempool_alloc(pool, dlen + 1);
+ }
+ else {
+ dst = g_malloc(dlen + 1);
+ }
+
+ p = src;
+ d = dst;
+ remain = slen;
+
+ while (remain > 0 && (err_offset = rspamd_fast_utf8_validate(p, remain)) > 0) {
+ /* Copy valid */
+ err_offset--; /* As it returns it 1 indexed */
+ memcpy(d, p, err_offset);
+ d += err_offset;
+
+ /* Append 0xFFFD for each bad character */
+ gint i = 0;
+
+ p += err_offset;
+ remain -= err_offset;
+
+ while (i < remain) {
+ gint old_i = i;
+ U8_NEXT(p, i, remain, uc);
+
+ if (uc < 0) {
+ *d++ = '\357';
+ *d++ = '\277';
+ *d++ = '\275';
+ }
+ else {
+ /* Adjust p and remaining stuff and go to the outer cycle */
+ i = old_i;
+ break;
+ }
+ }
+ /*
+ * Now p is the first valid utf8 character and remain is the rest of the string
+ * so we can continue our loop
+ */
+ p += i;
+ remain -= i;
+ }
+
+ if (err_offset == 0 && remain > 0) {
+ /* Last piece */
+ memcpy(d, p, remain);
+ d += remain;
+ }
+
+ /* Last '\0' */
+ g_assert(dlen > d - dst);
+ *d = '\0';
+
+ if (dstlen) {
+ *dstlen = d - dst;
+ }
+
+ return dst;
+}
+
+gsize rspamd_gstring_strip(GString *s, const gchar *strip_chars)
+{
+ const gchar *p, *sc;
+ gsize strip_len = 0, total = 0;
+
+ p = s->str + s->len - 1;
+
+ while (p >= s->str) {
+ gboolean seen = FALSE;
+
+ sc = strip_chars;
+
+ while (*sc != '\0') {
+ if (*p == *sc) {
+ strip_len++;
+ seen = TRUE;
+ break;
+ }
+
+ sc++;
+ }
+
+ if (!seen) {
+ break;
+ }
+
+ p--;
+ }
+
+ if (strip_len > 0) {
+ s->len -= strip_len;
+ s->str[s->len] = '\0';
+ total += strip_len;
+ }
+
+ if (s->len > 0) {
+ strip_len = rspamd_memspn(s->str, strip_chars, s->len);
+
+ if (strip_len > 0) {
+ memmove(s->str, s->str + strip_len, s->len - strip_len);
+ s->len -= strip_len;
+ total += strip_len;
+ }
+ }
+
+ return total;
+}
+
+const gchar *rspamd_string_len_strip(const gchar *in,
+ gsize *len,
+ const gchar *strip_chars)
+{
+ const gchar *p, *sc;
+ gsize strip_len = 0, old_len = *len;
+
+ p = in + old_len - 1;
+
+ /* Trail */
+ while (p >= in) {
+ gboolean seen = FALSE;
+
+ sc = strip_chars;
+
+ while (*sc != '\0') {
+ if (*p == *sc) {
+ strip_len++;
+ seen = TRUE;
+ break;
+ }
+
+ sc++;
+ }
+
+ if (!seen) {
+ break;
+ }
+
+ p--;
+ }
+
+ if (strip_len > 0) {
+ *len -= strip_len;
+ }
+
+ /* Head */
+ old_len = *len;
+
+ if (old_len > 0) {
+ strip_len = rspamd_memspn(in, strip_chars, old_len);
+
+ if (strip_len > 0) {
+ *len -= strip_len;
+
+ return in + strip_len;
+ }
+ }
+
+ return in;
+}
+
+gchar **
+rspamd_string_len_split(const gchar *in, gsize len, const gchar *spill,
+ gint max_elts, rspamd_mempool_t *pool)
+{
+ const gchar *p = in, *end = in + len;
+ gsize detected_elts = 0;
+ gchar **res;
+
+ /* Detect number of elements */
+ while (p < end) {
+ gsize cur_fragment = rspamd_memcspn(p, spill, end - p);
+
+ if (cur_fragment > 0) {
+ detected_elts++;
+ p += cur_fragment;
+
+ if (max_elts > 0 && detected_elts >= max_elts) {
+ break;
+ }
+ }
+
+ /* Something like a,,b produces {'a', 'b'} not {'a', '', 'b'} */
+ p += rspamd_memspn(p, spill, end - p);
+ }
+
+ res = pool ? rspamd_mempool_alloc(pool, sizeof(gchar *) * (detected_elts + 1)) : g_malloc(sizeof(gchar *) * (detected_elts + 1));
+ /* Last one */
+ res[detected_elts] = NULL;
+ detected_elts = 0;
+ p = in;
+
+ while (p < end) {
+ gsize cur_fragment = rspamd_memcspn(p, spill, end - p);
+
+ if (cur_fragment > 0) {
+ gchar *elt;
+
+ elt = pool ? rspamd_mempool_alloc(pool, cur_fragment + 1) : g_malloc(cur_fragment + 1);
+
+ memcpy(elt, p, cur_fragment);
+ elt[cur_fragment] = '\0';
+
+ res[detected_elts++] = elt;
+ p += cur_fragment;
+
+ if (max_elts > 0 && detected_elts >= max_elts) {
+ break;
+ }
+ }
+
+ p += rspamd_memspn(p, spill, end - p);
+ }
+
+ return res;
+}
+
+#if defined(__x86_64__)
+#include <x86intrin.h>
+#endif
+
+static inline gboolean
+rspamd_str_has_8bit_u64(const guchar *beg, gsize len)
+{
+ guint8 orb = 0;
+
+ if (len >= 16) {
+ const guchar *nextd = beg + sizeof(guint64);
+ guint64 n1 = 0, n2 = 0;
+
+ do {
+ guint64 t;
+ memcpy(&t, beg, sizeof(t));
+ n1 |= t;
+ memcpy(&t, nextd, sizeof(t));
+ n2 |= t;
+ beg += 16;
+ nextd += 16;
+ len -= 16;
+ } while (len >= 16);
+
+ /*
+ * Idea from Benny Halevy <bhalevy@scylladb.com>
+ * - 7-th bit set ==> orb = !(non-zero) - 1 = 0 - 1 = 0xFF
+ * - 7-th bit clear ==> orb = !0 - 1 = 1 - 1 = 0x00
+ */
+ orb = !((n1 | n2) & 0x8080808080808080ULL) - 1;
+ }
+
+ while (len--) {
+ orb |= *beg++;
+ }
+
+ return orb >= 0x80;
+}
+
+gboolean
+rspamd_str_has_8bit(const guchar *beg, gsize len)
+{
+#if defined(__x86_64__)
+ if (len >= 32) {
+ const uint8_t *nextd = beg + 16;
+
+ __m128i n1 = _mm_set1_epi8(0), n2;
+
+ n2 = n1;
+
+ while (len >= 32) {
+ __m128i xmm1 = _mm_loadu_si128((const __m128i *) beg);
+ __m128i xmm2 = _mm_loadu_si128((const __m128i *) nextd);
+
+ n1 = _mm_or_si128(n1, xmm1);
+ n2 = _mm_or_si128(n2, xmm2);
+
+ beg += 32;
+ nextd += 32;
+ len -= 32;
+ }
+
+ n1 = _mm_or_si128(n1, n2);
+
+ /* We assume 2 complement here */
+ if (_mm_movemask_epi8(n1)) {
+ return TRUE;
+ }
+ }
+#endif
+
+ return rspamd_str_has_8bit_u64(beg, len);
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
new file mode 100644
index 0000000..07560cc
--- /dev/null
+++ b/src/libutil/str_util.h
@@ -0,0 +1,565 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBUTIL_STR_UTIL_H_
+#define SRC_LIBUTIL_STR_UTIL_H_
+
+#include "config.h"
+#include "ucl.h"
+#include "fstring.h"
+
+#include <stdalign.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_newlines_type {
+ RSPAMD_TASK_NEWLINES_CR = 0,
+ RSPAMD_TASK_NEWLINES_LF,
+ RSPAMD_TASK_NEWLINES_CRLF,
+ RSPAMD_TASK_NEWLINES_MAX
+};
+
+/**
+ * Compare two memory regions of size `l` using case insensitive matching
+ */
+gint rspamd_lc_cmp(const gchar *s, const gchar *d, gsize l);
+
+/**
+ * Convert string to lowercase in-place using ASCII conversion
+ */
+guint rspamd_str_lc(gchar *str, guint size);
+
+/**
+ * Performs ascii copy & lowercase
+ * @param src
+ * @param size
+ * @return
+ */
+gsize rspamd_str_copy_lc(const gchar *src, gchar *dst, gsize size);
+
+/**
+ * Convert string to lowercase in-place using utf (limited) conversion
+ */
+guint rspamd_str_lc_utf8(gchar *str, guint size);
+
+/*
+ * Hash table utility functions for case insensitive hashing
+ */
+guint64 rspamd_icase_hash(const gchar *in, gsize len, guint64 seed);
+
+guint rspamd_strcase_hash(gconstpointer key);
+
+gboolean rspamd_strcase_equal(gconstpointer v, gconstpointer v2);
+
+/*
+ * Hash table utility functions for case sensitive hashing
+ */
+guint rspamd_str_hash(gconstpointer key);
+
+gboolean rspamd_str_equal(gconstpointer v, gconstpointer v2);
+
+
+/*
+ * Hash table utility functions for hashing fixed strings
+ */
+guint rspamd_ftok_icase_hash(gconstpointer key);
+
+gboolean rspamd_ftok_icase_equal(gconstpointer v, gconstpointer v2);
+
+/* Use in khash for speed */
+#define rspamd_ftok_hash(key) _wyhash32((key)->begin, (key)->len, 0)
+#define rspamd_ftok_equal(v1, v2) ((v1)->len == (v2)->len && memcmp((v1)->begin, (v2)->begin, (v1)->len) == 0)
+
+guint rspamd_gstring_icase_hash(gconstpointer key);
+
+gboolean rspamd_gstring_icase_equal(gconstpointer v, gconstpointer v2);
+
+/**
+ * Copy src to dest limited to len, in compare with standard strlcpy(3) rspamd strlcpy does not
+ * traverse the whole string and it is possible to use it for non NULL terminated strings. This is
+ * more like memccpy(dst, src, size, '\0')
+ *
+ * @param dst destination string
+ * @param src source string
+ * @param siz length of destination buffer
+ * @return bytes copied
+ */
+gsize rspamd_strlcpy_fast(gchar *dst, const gchar *src, gsize siz);
+
+gsize rspamd_strlcpy_safe(gchar *dst, const gchar *src, gsize siz);
+
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define rspamd_strlcpy rspamd_strlcpy_safe
+#else
+#ifdef __SANITIZE_ADDRESS__
+#define rspamd_strlcpy rspamd_strlcpy_safe
+#else
+#define rspamd_strlcpy rspamd_strlcpy_fast
+#endif
+#endif
+#else
+#ifdef __SANITIZE_ADDRESS__
+#define rspamd_strlcpy rspamd_strlcpy_safe
+#else
+#define rspamd_strlcpy rspamd_strlcpy_fast
+#endif
+#endif
+
+/**
+ * Copies `srclen` characters from `src` to `dst` ignoring \0
+ * @param src
+ * @param srclen
+ * @param dest
+ * @param destlen
+ * @return number of bytes copied
+ */
+gsize rspamd_null_safe_copy(const gchar *src, gsize srclen,
+ gchar *dest, gsize destlen);
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean rspamd_strtol(const gchar *s, gsize len, glong *value);
+
+/*
+ * Try to convert a string of length to unsigned long
+ */
+gboolean rspamd_strtoul(const gchar *s, gsize len, gulong *value);
+gboolean rspamd_strtou64(const gchar *s, gsize len, guint64 *value);
+
+/*
+ * Try to convert a hex string of length to unsigned long
+ */
+gboolean rspamd_xstrtoul(const gchar *s, gsize len, gulong *value);
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer rspamd_str_pool_copy(gconstpointer data, gpointer ud);
+
+/**
+ * Encode string using hex encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 encoding of a specified string
+ */
+gchar *rspamd_encode_hex(const guchar *in, gsize inlen);
+
+/**
+ * Decode string using hex encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 decoded value or NULL if input is invalid
+ */
+guchar *rspamd_decode_hex(const gchar *in, gsize inlen);
+
+enum rspamd_base32_type {
+ RSPAMD_BASE32_DEFAULT = 0,
+ RSPAMD_BASE32_ZBASE = 0,
+ RSPAMD_BASE32_BLEACH,
+ RSPAMD_BASE32_RFC,
+ RSPAMD_BASE32_INVALID = -1,
+};
+
+/**
+ * Returns base32 type from a string or RSPAMD_BASE32_INVALID
+ * @param str
+ * @return
+ */
+enum rspamd_base32_type rspamd_base32_decode_type_from_str(const gchar *str);
+
+/**
+ * Encode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 encoding of a specified string
+ */
+gchar *rspamd_encode_base32(const guchar *in, gsize inlen,
+ enum rspamd_base32_type type);
+
+/**
+ * Decode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 decoded value or NULL if input is invalid
+ */
+guchar *rspamd_decode_base32(const gchar *in, gsize inlen, gsize *outlen, enum rspamd_base32_type type);
+
+/**
+ * Encode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @param out output buf
+ * @param outlen output buf len
+ * @return encoded len if `outlen` is enough to encode `inlen`
+ */
+gint rspamd_encode_base32_buf(const guchar *in, gsize inlen, gchar *out,
+ gsize outlen, enum rspamd_base32_type type);
+
+/**
+ * Decode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @param out output buf (may overlap with `in`)
+ * @param outlen output buf len
+ * @return decoded len if in is valid base32 and `outlen` is enough to encode `inlen`
+ */
+gint rspamd_decode_base32_buf(const gchar *in, gsize inlen, guchar *out,
+ gsize outlen, enum rspamd_base32_type type);
+
+/**
+ * Encode string using hex encoding
+ * @param in input
+ * @param inlen input length
+ * @param out output buf
+ * @param outlen output buf len
+ * @return encoded len if `outlen` is enough to encode `inlen`
+ */
+gint rspamd_encode_hex_buf(const guchar *in, gsize inlen, gchar *out,
+ gsize outlen);
+
+
+/**
+ * Decode string using hex encoding
+ * @param in input
+ * @param inlen input length
+ * @param out output buf (may overlap with `in`)
+ * @param outlen output buf len
+ * @return decoded len if in is valid hex and `outlen` is enough to encode `inlen`
+ */
+gssize rspamd_decode_hex_buf(const gchar *in, gsize inlen,
+ guchar *out, gsize outlen);
+
+/**
+ * Common version of base64 encoder
+ * @param in
+ * @param inlen
+ * @param str_len
+ * @param outlen
+ * @param fold
+ * @param how
+ * @return
+ */
+gchar *
+rspamd_encode_base64_common(const guchar *in,
+ gsize inlen,
+ gint str_len,
+ gsize *outlen,
+ gboolean fold,
+ enum rspamd_newlines_type how);
+
+/**
+ * Encode string using base64 encoding
+ * @param in input
+ * @param inlen input length
+ * @param str_len maximum string length (if <= 0 then no lines are split)
+ * @return freshly allocated base64 encoded value or NULL if input is invalid
+ */
+gchar *rspamd_encode_base64(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen);
+
+/**
+ * Encode and fold string using base64 encoding
+ * @param in input
+ * @param inlen input length
+ * @param str_len maximum string length (if <= 0 then no lines are split)
+ * @return freshly allocated base64 encoded value or NULL if input is invalid
+ */
+gchar *rspamd_encode_base64_fold(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, enum rspamd_newlines_type how);
+
+/**
+ * Encode and fold string using quoted printable encoding
+ * @param in input
+ * @param inlen input length
+ * @param str_len maximum string length (if <= 0 then no lines are split)
+ * @return freshly allocated base64 encoded value or NULL if input is invalid
+ */
+gchar *rspamd_encode_qp_fold(const guchar *in, gsize inlen, gint str_len,
+ gsize *outlen, enum rspamd_newlines_type how);
+
+/**
+ * Decode quoted-printable encoded buffer, input and output must not overlap
+ * @param in input
+ * @param inlen length of input
+ * @param out output
+ * @param outlen length of output
+ * @return real size of decoded output or (-1) if outlen is not enough
+ */
+gssize rspamd_decode_qp_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen);
+
+/**
+ * Decode uuencode encoded buffer, input and output must not overlap
+ * @param in input
+ * @param inlen length of input
+ * @param out output
+ * @param outlen length of output
+ * @return real size of decoded output or (-1) if outlen is not enough
+ */
+gssize rspamd_decode_uue_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen);
+
+/**
+ * Decode quoted-printable encoded buffer using rfc2047 format, input and output must not overlap
+ * @param in input
+ * @param inlen length of input
+ * @param out output
+ * @param outlen length of output
+ * @return real size of decoded output or (-1) if outlen is not enough
+ */
+gssize rspamd_decode_qp2047_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen);
+
+/**
+ * Encode quoted-printable buffer using rfc2047 format, input and output must not overlap
+ * @param in
+ * @param inlen
+ * @param out
+ * @param outlen
+ * @return
+ */
+gssize rspamd_encode_qp2047_buf(const gchar *in, gsize inlen,
+ gchar *out, gsize outlen);
+
+#ifndef g_tolower
+#define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
+#endif
+
+/**
+ * Return levenstein distance between two strings
+ * @param s1
+ * @param s1len
+ * @param s2
+ * @param s2len
+ * @return
+ */
+gint rspamd_strings_levenshtein_distance(const gchar *s1, gsize s1len,
+ const gchar *s2, gsize s2len, guint replace_cost);
+
+/**
+ * Fold header using rfc822 rules, return new GString from the previous one
+ * @param name name of header (used just for folding)
+ * @param value value of header
+ * @param fold_max
+ * @param how
+ * @param fold_on_chars
+ * @return new GString with the folded value
+ */
+GString *rspamd_header_value_fold(const gchar *name,
+ gsize name_len,
+ const gchar *value,
+ gsize value_len,
+ guint fold_max,
+ enum rspamd_newlines_type how,
+ const gchar *fold_on_chars);
+
+/**
+ * Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm
+ * http://www-igm.univ-mlv.fr/~lecroq/string/node12.html#SECTION00120
+ * @param in input
+ * @param inlen input len
+ * @param srch search string
+ * @param srchlen length of the search string
+ * @return position of the first substring match or (-1) if not found
+ */
+goffset rspamd_substring_search(const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen);
+
+/**
+ * Search for a substring `srch` in the text `in` using Apostolico-Crochemore algorithm in caseless matter (ASCII only)
+ * http://www-igm.univ-mlv.fr/~lecroq/string/node12.html#SECTION00120
+ * @param in input
+ * @param inlen input len
+ * @param srch search string
+ * @param srchlen length of the search string
+ * @return position of the first substring match or (-1) if not found
+ */
+goffset rspamd_substring_search_caseless(const gchar *in, gsize inlen,
+ const gchar *srch, gsize srchlen);
+
+/**
+ * Search for end-of-headers mark in the input string. Returns position just after
+ * the last header in message (but before the last newline character).
+ * Hence, to obtain the real EOH position, it is also required to skip
+ * space characters
+ */
+goffset rspamd_string_find_eoh(GString *input, goffset *body_start);
+
+
+#define rspamd_ucl_emit_gstring(o, t, target) \
+ rspamd_ucl_emit_gstring_comments((o), (t), (target), NULL)
+
+/**
+ * Emit UCL object to gstring
+ * @param obj object to emit
+ * @param emit_type emitter type
+ * @param comments optional comments object
+ * @param target target string
+ */
+void rspamd_ucl_emit_gstring_comments(const ucl_object_t *obj,
+ enum ucl_emitter emit_type,
+ GString *target,
+ const ucl_object_t *comments);
+
+#define rspamd_ucl_emit_fstring(o, t, target) \
+ rspamd_ucl_emit_fstring_comments((o), (t), (target), NULL)
+
+/**
+ * Emit UCL object to fstring
+ * @param obj object to emit
+ * @param emit_type emitter type
+ * * @param comments optional comments object
+ * @param target target string
+ */
+void rspamd_ucl_emit_fstring_comments(const ucl_object_t *obj,
+ enum ucl_emitter emit_type,
+ rspamd_fstring_t **target,
+ const ucl_object_t *comments);
+
+extern const guchar lc_map[256];
+
+/**
+ * Search for the last occurrence of character `c` in memory block of size `len`
+ * @param m
+ * @param c
+ * @param len
+ * @return pointer to the last occurrence or NULL
+ */
+#ifdef HAVE_MEMRCHR
+#define rspamd_memrchr memrchr
+#else
+void *rspamd_memrchr(const void *m, gint c, gsize len);
+#endif
+
+/**
+ * Return length of memory segment starting in `s` that contains no chars from `e`
+ * @param s any input
+ * @param e zero terminated string of exceptions
+ * @param len length of `s`
+ * @return segment size
+ */
+gsize rspamd_memcspn(const gchar *s, const gchar *e, gsize len);
+
+/**
+ * Return length of memory segment starting in `s` that contains only chars from `e`
+ * @param s any input
+ * @param e zero terminated string of inclusions
+ * @param len length of `s`
+ * @return segment size
+ */
+gsize rspamd_memspn(const gchar *s, const gchar *e, gsize len);
+
+/* https://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord */
+#define rspamd_str_hasmore(x, n) ((((x) + ~0UL / 255 * (127 - (n))) | (x)) & ~0UL / 255 * 128)
+/*
+ * Check if a pointer is aligned; n must be power of two
+ */
+#define rspamd_is_aligned(p, n) (((uintptr_t) (p) & ((uintptr_t) (n) -1)) == 0)
+#define rspamd_is_aligned_as(p, v) rspamd_is_aligned(p, RSPAMD_ALIGNOF(__typeof((v))))
+gboolean rspamd_str_has_8bit(const guchar *beg, gsize len);
+
+struct UConverter;
+
+struct UConverter *rspamd_get_utf8_converter(void);
+
+struct UNormalizer2;
+
+const struct UNormalizer2 *rspamd_get_unicode_normalizer(void);
+
+
+enum rspamd_regexp_escape_flags {
+ RSPAMD_REGEXP_ESCAPE_ASCII = 0,
+ RSPAMD_REGEXP_ESCAPE_UTF = 1u << 0,
+ RSPAMD_REGEXP_ESCAPE_GLOB = 1u << 1,
+ RSPAMD_REGEXP_ESCAPE_RE = 1u << 2,
+};
+
+/**
+ * Escapes special characters when reading plain data to be processed in pcre
+ * @param pattern pattern to process
+ * @param slen source length
+ * @param dst_len destination length pointer (can be NULL)
+ * @param allow_glob allow glob expressions to be translated into pcre
+ * @return newly allocated zero terminated escaped pattern
+ */
+gchar *
+rspamd_str_regexp_escape(const gchar *pattern, gsize slen,
+ gsize *dst_len, enum rspamd_regexp_escape_flags flags) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Returns copy of src (zero terminated) where all unicode is made valid or replaced
+ * to FFFD characters. Caller must free string after usage
+ * @param src
+ * @param slen
+ * @param dstelen
+ * @return
+ */
+gchar *rspamd_str_make_utf_valid(const guchar *src, gsize slen, gsize *dstlen,
+ rspamd_mempool_t *pool) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Strips characters in `strip_chars` from start and end of the GString
+ * @param s
+ * @param strip_chars
+ */
+gsize rspamd_gstring_strip(GString *s, const gchar *strip_chars);
+
+/**
+ * Strips characters in `strip_chars` from start and end of the sized string
+ * @param s
+ * @param strip_chars
+ */
+const gchar *rspamd_string_len_strip(const gchar *in,
+ gsize *len, const gchar *strip_chars) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Returns a NULL terminated list of zero terminated strings based on splitting of
+ * the base string into parts. If pool is not NULL then memory is allocated from
+ * the pool. Otherwise, it is allocated from the heap using `g_malloc` (so
+ * g_strfreev could be used to free stuff)
+ * @param in
+ * @param len
+ * @param spill
+ * @param max_elts
+ * @return
+ */
+gchar **rspamd_string_len_split(const gchar *in, gsize len,
+ const gchar *spill, gint max_elts, rspamd_mempool_t *pool);
+
+#define IS_ZERO_WIDTH_SPACE(uc) ((uc) == 0x200B || \
+ (uc) == 0x200C || \
+ (uc) == 0x200D || \
+ (uc) == 0xFEFF || \
+ (uc) == 0x00AD)
+#define IS_OBSCURED_CHAR(uc) (((uc) >= 0x200B && (uc) <= 0x200F) || \
+ ((uc) >= 0x2028 && (uc) <= 0x202F) || \
+ ((uc) >= 0x205F && (uc) <= 0x206F) || \
+ (uc) == 0xFEFF)
+
+#define RSPAMD_LEN_CHECK_STARTS_WITH(s, len, lit) \
+ ((len) >= sizeof(lit) - 1 && g_ascii_strncasecmp((s), (lit), sizeof(lit) - 1) == 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBUTIL_STR_UTIL_H_ */
diff --git a/src/libutil/unix-std.h b/src/libutil/unix-std.h
new file mode 100644
index 0000000..0ce2442
--- /dev/null
+++ b/src/libutil/unix-std.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_UNIX_STD_H_H
+#define RSPAMD_UNIX_STD_H_H
+
+#include "config.h"
+
+/*
+ * Default unix system includes
+ */
+
+/* sys/file.h */
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
+
+/* sys/uio.h */
+#ifdef HAVE_SYS_UIO_H
+#include <sys/uio.h>
+#endif
+
+/* sys/mman */
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+
+/* timedb */
+#ifdef HAVE_SYS_TIMEB_H
+#include <sys/timeb.h>
+#endif
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+/* strings */
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+
+/* fcntl */
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+
+
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+
+#include <signal.h>
+
+#ifdef HAVE_LIBGEN_H
+#include <libgen.h>
+#endif
+
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+
+#endif
diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c
new file mode 100644
index 0000000..f536a2c
--- /dev/null
+++ b/src/libutil/upstream.c
@@ -0,0 +1,1761 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "upstream.h"
+#include "ottery.h"
+#include "ref.h"
+#include "cfg_file.h"
+#include "rdns.h"
+#include "cryptobox.h"
+#include "utlist.h"
+#include "contrib/libev/ev.h"
+#include "logger.h"
+#include "contrib/librdns/rdns.h"
+#include "contrib/mumhash/mum.h"
+
+#include <math.h>
+
+
+struct upstream_inet_addr_entry {
+ rspamd_inet_addr_t *addr;
+ guint priority;
+ struct upstream_inet_addr_entry *next;
+};
+
+struct upstream_addr_elt {
+ rspamd_inet_addr_t *addr;
+ guint priority;
+ guint errors;
+};
+
+struct upstream_list_watcher {
+ rspamd_upstream_watch_func func;
+ GFreeFunc dtor;
+ gpointer ud;
+ enum rspamd_upstreams_watch_event events_mask;
+ struct upstream_list_watcher *next, *prev;
+};
+
+struct upstream {
+ guint weight;
+ guint cur_weight;
+ guint errors;
+ guint checked;
+ guint dns_requests;
+ gint active_idx;
+ guint ttl;
+ gchar *name;
+ ev_timer ev;
+ gdouble last_fail;
+ gdouble last_resolve;
+ gpointer ud;
+ enum rspamd_upstream_flag flags;
+ struct upstream_list *ls;
+ GList *ctx_pos;
+ struct upstream_ctx *ctx;
+
+ struct {
+ GPtrArray *addr; /* struct upstream_addr_elt */
+ guint cur;
+ } addrs;
+
+ struct upstream_inet_addr_entry *new_addrs;
+ gpointer data;
+ gchar uid[8];
+ ref_entry_t ref;
+#ifdef UPSTREAMS_THREAD_SAFE
+ rspamd_mutex_t *lock;
+#endif
+};
+
+struct upstream_limits {
+ gdouble revive_time;
+ gdouble revive_jitter;
+ gdouble error_time;
+ gdouble dns_timeout;
+ gdouble lazy_resolve_time;
+ guint max_errors;
+ guint dns_retransmits;
+};
+
+struct upstream_list {
+ gchar *ups_line;
+ struct upstream_ctx *ctx;
+ GPtrArray *ups;
+ GPtrArray *alive;
+ struct upstream_list_watcher *watchers;
+ guint64 hash_seed;
+ const struct upstream_limits *limits;
+ enum rspamd_upstream_flag flags;
+ guint cur_elt;
+ enum rspamd_upstream_rotation rot_alg;
+#ifdef UPSTREAMS_THREAD_SAFE
+ rspamd_mutex_t *lock;
+#endif
+};
+
+struct upstream_ctx {
+ struct rdns_resolver *res;
+ struct ev_loop *event_loop;
+ struct upstream_limits limits;
+ GQueue *upstreams;
+ gboolean configured;
+ rspamd_mempool_t *pool;
+ ref_entry_t ref;
+};
+
+#ifndef UPSTREAMS_THREAD_SAFE
+#define RSPAMD_UPSTREAM_LOCK(x) \
+ do { \
+ } while (0)
+#define RSPAMD_UPSTREAM_UNLOCK(x) \
+ do { \
+ } while (0)
+#else
+#define RSPAMD_UPSTREAM_LOCK(x) rspamd_mutex_lock(x->lock)
+#define RSPAMD_UPSTREAM_UNLOCK(x) rspamd_mutex_unlock(x->lock)
+#endif
+
+#define msg_debug_upstream(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_upstream_log_id, "upstream", upstream->uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_info_upstream(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "upstream", upstream->uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+#define msg_err_upstream(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
+ "upstream", upstream->uid, \
+ G_STRFUNC, \
+ __VA_ARGS__)
+
+INIT_LOG_MODULE(upstream)
+
+/* 4 errors in 10 seconds */
+#define DEFAULT_MAX_ERRORS 4
+static const guint default_max_errors = DEFAULT_MAX_ERRORS;
+#define DEFAULT_REVIVE_TIME 60
+static const gdouble default_revive_time = DEFAULT_REVIVE_TIME;
+#define DEFAULT_REVIVE_JITTER 0.4
+static const gdouble default_revive_jitter = DEFAULT_REVIVE_JITTER;
+#define DEFAULT_ERROR_TIME 10
+static const gdouble default_error_time = DEFAULT_ERROR_TIME;
+#define DEFAULT_DNS_TIMEOUT 1.0
+static const gdouble default_dns_timeout = DEFAULT_DNS_TIMEOUT;
+#define DEFAULT_DNS_RETRANSMITS 2
+static const guint default_dns_retransmits = DEFAULT_DNS_RETRANSMITS;
+/* TODO: make it configurable */
+#define DEFAULT_LAZY_RESOLVE_TIME 3600.0
+static const gdouble default_lazy_resolve_time = DEFAULT_LAZY_RESOLVE_TIME;
+
+static const struct upstream_limits default_limits = {
+ .revive_time = DEFAULT_REVIVE_TIME,
+ .revive_jitter = DEFAULT_REVIVE_JITTER,
+ .error_time = DEFAULT_ERROR_TIME,
+ .dns_timeout = DEFAULT_DNS_TIMEOUT,
+ .dns_retransmits = DEFAULT_DNS_RETRANSMITS,
+ .max_errors = DEFAULT_MAX_ERRORS,
+ .lazy_resolve_time = DEFAULT_LAZY_RESOLVE_TIME,
+};
+
+static void rspamd_upstream_lazy_resolve_cb(struct ev_loop *, ev_timer *, int);
+
+void rspamd_upstreams_library_config(struct rspamd_config *cfg,
+ struct upstream_ctx *ctx,
+ struct ev_loop *event_loop,
+ struct rdns_resolver *resolver)
+{
+ g_assert(ctx != NULL);
+ g_assert(cfg != NULL);
+
+ if (cfg->upstream_error_time) {
+ ctx->limits.error_time = cfg->upstream_error_time;
+ }
+ if (cfg->upstream_max_errors) {
+ ctx->limits.max_errors = cfg->upstream_max_errors;
+ }
+ if (cfg->upstream_revive_time) {
+ ctx->limits.revive_time = cfg->upstream_revive_time;
+ }
+ if (cfg->upstream_lazy_resolve_time) {
+ ctx->limits.lazy_resolve_time = cfg->upstream_lazy_resolve_time;
+ }
+ if (cfg->dns_retransmits) {
+ ctx->limits.dns_retransmits = cfg->dns_retransmits;
+ }
+ if (cfg->dns_timeout) {
+ ctx->limits.dns_timeout = cfg->dns_timeout;
+ }
+
+ ctx->event_loop = event_loop;
+ ctx->res = resolver;
+ ctx->configured = TRUE;
+
+ /* Start lazy resolving */
+ if (event_loop && resolver) {
+ GList *cur;
+ struct upstream *upstream;
+
+ cur = ctx->upstreams->head;
+
+ while (cur) {
+ upstream = cur->data;
+ if (!ev_can_stop(&upstream->ev) && upstream->ls &&
+ !(upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE)) {
+ gdouble when;
+
+ if (upstream->flags & RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE) {
+ /* Resolve them immediately ! */
+ when = 0.0;
+ }
+ else {
+ when = rspamd_time_jitter(upstream->ls->limits->lazy_resolve_time,
+ upstream->ls->limits->lazy_resolve_time * .1);
+ }
+
+ ev_timer_init(&upstream->ev, rspamd_upstream_lazy_resolve_cb,
+ when, 0);
+ upstream->ev.data = upstream;
+ ev_timer_start(ctx->event_loop, &upstream->ev);
+ }
+
+ cur = g_list_next(cur);
+ }
+ }
+}
+
+static void
+rspamd_upstream_ctx_dtor(struct upstream_ctx *ctx)
+{
+ GList *cur;
+ struct upstream *u;
+
+ cur = ctx->upstreams->head;
+
+ while (cur) {
+ u = cur->data;
+ u->ctx = NULL;
+ u->ctx_pos = NULL;
+ cur = g_list_next(cur);
+ }
+
+ g_queue_free(ctx->upstreams);
+ rspamd_mempool_delete(ctx->pool);
+ g_free(ctx);
+}
+
+void rspamd_upstreams_library_unref(struct upstream_ctx *ctx)
+{
+ REF_RELEASE(ctx);
+}
+
+struct upstream_ctx *
+rspamd_upstreams_library_init(void)
+{
+ struct upstream_ctx *ctx;
+
+ ctx = g_malloc0(sizeof(*ctx));
+ memcpy(&ctx->limits, &default_limits, sizeof(ctx->limits));
+ ctx->pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "upstreams", 0);
+
+ ctx->upstreams = g_queue_new();
+ REF_INIT_RETAIN(ctx, rspamd_upstream_ctx_dtor);
+
+ return ctx;
+}
+
+static gint
+rspamd_upstream_af_to_weight(const rspamd_inet_addr_t *addr)
+{
+ int ret;
+
+ switch (rspamd_inet_address_get_af(addr)) {
+ case AF_UNIX:
+ ret = 2;
+ break;
+ case AF_INET:
+ ret = 1;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Select IPv4 addresses before IPv6
+ */
+static gint
+rspamd_upstream_addr_sort_func(gconstpointer a, gconstpointer b)
+{
+ const struct upstream_addr_elt *ip1 = *(const struct upstream_addr_elt **) a,
+ *ip2 = *(const struct upstream_addr_elt **) b;
+ gint w1, w2;
+
+ if (ip1->priority == 0 && ip2->priority == 0) {
+ w1 = rspamd_upstream_af_to_weight(ip1->addr);
+ w2 = rspamd_upstream_af_to_weight(ip2->addr);
+ }
+ else {
+ w1 = ip1->priority;
+ w2 = ip2->priority;
+ }
+
+ /* Inverse order */
+ return w2 - w1;
+}
+
+static void
+rspamd_upstream_set_active(struct upstream_list *ls, struct upstream *upstream)
+{
+ RSPAMD_UPSTREAM_LOCK(ls);
+ g_ptr_array_add(ls->alive, upstream);
+ upstream->active_idx = ls->alive->len - 1;
+
+ if (upstream->ctx && upstream->ctx->configured &&
+ !(upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE)) {
+
+ if (ev_can_stop(&upstream->ev)) {
+ ev_timer_stop(upstream->ctx->event_loop, &upstream->ev);
+ }
+
+ /* Start lazy (or not so lazy) names resolution */
+ gdouble when;
+
+ if (upstream->flags & RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE) {
+ /* Resolve them immediately ! */
+ when = 0.0;
+ }
+ else {
+ when = rspamd_time_jitter(upstream->ls->limits->lazy_resolve_time,
+ upstream->ls->limits->lazy_resolve_time * .1);
+ }
+ ev_timer_init(&upstream->ev, rspamd_upstream_lazy_resolve_cb,
+ when, 0);
+ upstream->ev.data = upstream;
+ msg_debug_upstream("start lazy resolving for %s in %.0f seconds",
+ upstream->name, when);
+ ev_timer_start(upstream->ctx->event_loop, &upstream->ev);
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(ls);
+}
+
+static void
+rspamd_upstream_addr_elt_dtor(gpointer a)
+{
+ struct upstream_addr_elt *elt = a;
+
+ if (elt) {
+ rspamd_inet_address_free(elt->addr);
+ g_free(elt);
+ }
+}
+
+static void
+rspamd_upstream_update_addrs(struct upstream *upstream)
+{
+ guint addr_cnt, i, port;
+ gboolean seen_addr, reset_errors = FALSE;
+ struct upstream_inet_addr_entry *cur, *tmp;
+ GPtrArray *new_addrs;
+ struct upstream_addr_elt *addr_elt, *naddr;
+
+ /*
+ * We need first of all get the saved port, since DNS gives us no
+ * idea about what port has been used previously
+ */
+ RSPAMD_UPSTREAM_LOCK(upstream);
+
+ if (upstream->addrs.addr->len > 0 && upstream->new_addrs) {
+ addr_elt = g_ptr_array_index(upstream->addrs.addr, 0);
+ port = rspamd_inet_address_get_port(addr_elt->addr);
+
+ /* Now calculate new addrs count */
+ addr_cnt = 0;
+ LL_FOREACH(upstream->new_addrs, cur)
+ {
+ addr_cnt++;
+ }
+
+ /* At 10% probability reset errors on addr elements */
+ if (rspamd_random_double_fast() > 0.9) {
+ reset_errors = TRUE;
+ msg_debug_upstream("reset errors on upstream %s",
+ upstream->name);
+ }
+
+ new_addrs = g_ptr_array_new_full(addr_cnt, rspamd_upstream_addr_elt_dtor);
+
+ /* Copy addrs back */
+ LL_FOREACH(upstream->new_addrs, cur)
+ {
+ seen_addr = FALSE;
+ naddr = NULL;
+ /* Ports are problematic, set to compare in the next block */
+ rspamd_inet_address_set_port(cur->addr, port);
+
+ PTR_ARRAY_FOREACH(upstream->addrs.addr, i, addr_elt)
+ {
+ if (rspamd_inet_address_compare(addr_elt->addr, cur->addr, FALSE) == 0) {
+ naddr = g_malloc0(sizeof(*naddr));
+ naddr->addr = cur->addr;
+ naddr->errors = reset_errors ? 0 : addr_elt->errors;
+ seen_addr = TRUE;
+
+ break;
+ }
+ }
+
+ if (!seen_addr) {
+ naddr = g_malloc0(sizeof(*naddr));
+ naddr->addr = cur->addr;
+ naddr->errors = 0;
+ msg_debug_upstream("new address for %s: %s",
+ upstream->name,
+ rspamd_inet_address_to_string_pretty(naddr->addr));
+ }
+ else {
+ msg_debug_upstream("existing address for %s: %s",
+ upstream->name,
+ rspamd_inet_address_to_string_pretty(cur->addr));
+ }
+
+ g_ptr_array_add(new_addrs, naddr);
+ }
+
+ /* Free old addresses */
+ g_ptr_array_free(upstream->addrs.addr, TRUE);
+
+ upstream->addrs.cur = 0;
+ upstream->addrs.addr = new_addrs;
+ g_ptr_array_sort(upstream->addrs.addr, rspamd_upstream_addr_sort_func);
+ }
+
+ LL_FOREACH_SAFE(upstream->new_addrs, cur, tmp)
+ {
+ /* Do not free inet address pointer since it has been transferred to up */
+ g_free(cur);
+ }
+
+ upstream->new_addrs = NULL;
+ RSPAMD_UPSTREAM_UNLOCK(upstream);
+}
+
+static void
+rspamd_upstream_dns_cb(struct rdns_reply *reply, void *arg)
+{
+ struct upstream *up = (struct upstream *) arg;
+ struct rdns_reply_entry *entry;
+ struct upstream_inet_addr_entry *up_ent;
+
+ if (reply->code == RDNS_RC_NOERROR) {
+ entry = reply->entries;
+
+ RSPAMD_UPSTREAM_LOCK(up);
+ while (entry) {
+
+ if (entry->type == RDNS_REQUEST_A) {
+ up_ent = g_malloc0(sizeof(*up_ent));
+ up_ent->addr = rspamd_inet_address_new(AF_INET,
+ &entry->content.a.addr);
+ LL_PREPEND(up->new_addrs, up_ent);
+ }
+ else if (entry->type == RDNS_REQUEST_AAAA) {
+ up_ent = g_malloc0(sizeof(*up_ent));
+ up_ent->addr = rspamd_inet_address_new(AF_INET6,
+ &entry->content.aaa.addr);
+ LL_PREPEND(up->new_addrs, up_ent);
+ }
+ entry = entry->next;
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(up);
+ }
+
+ up->dns_requests--;
+
+ if (up->dns_requests == 0) {
+ rspamd_upstream_update_addrs(up);
+ }
+
+ REF_RELEASE(up);
+}
+
+struct rspamd_upstream_srv_dns_cb {
+ struct upstream *up;
+ guint priority;
+ guint port;
+ guint requests_inflight;
+};
+
+/* Used when we have resolved SRV record and resolved addrs */
+static void
+rspamd_upstream_dns_srv_phase2_cb(struct rdns_reply *reply, void *arg)
+{
+ struct rspamd_upstream_srv_dns_cb *cbdata =
+ (struct rspamd_upstream_srv_dns_cb *) arg;
+ struct upstream *up;
+ struct rdns_reply_entry *entry;
+ struct upstream_inet_addr_entry *up_ent;
+
+ up = cbdata->up;
+
+ if (reply->code == RDNS_RC_NOERROR) {
+ entry = reply->entries;
+
+ RSPAMD_UPSTREAM_LOCK(up);
+ while (entry) {
+
+ if (entry->type == RDNS_REQUEST_A) {
+ up_ent = g_malloc0(sizeof(*up_ent));
+ up_ent->addr = rspamd_inet_address_new(AF_INET,
+ &entry->content.a.addr);
+ up_ent->priority = cbdata->priority;
+ rspamd_inet_address_set_port(up_ent->addr, cbdata->port);
+ LL_PREPEND(up->new_addrs, up_ent);
+ }
+ else if (entry->type == RDNS_REQUEST_AAAA) {
+ up_ent = g_malloc0(sizeof(*up_ent));
+ up_ent->addr = rspamd_inet_address_new(AF_INET6,
+ &entry->content.aaa.addr);
+ up_ent->priority = cbdata->priority;
+ rspamd_inet_address_set_port(up_ent->addr, cbdata->port);
+ LL_PREPEND(up->new_addrs, up_ent);
+ }
+ entry = entry->next;
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(up);
+ }
+
+ up->dns_requests--;
+ cbdata->requests_inflight--;
+
+ if (cbdata->requests_inflight == 0) {
+ g_free(cbdata);
+ }
+
+ if (up->dns_requests == 0) {
+ rspamd_upstream_update_addrs(up);
+ }
+
+ REF_RELEASE(up);
+}
+
+static void
+rspamd_upstream_dns_srv_cb(struct rdns_reply *reply, void *arg)
+{
+ struct upstream *upstream = (struct upstream *) arg;
+ struct rdns_reply_entry *entry;
+ struct rspamd_upstream_srv_dns_cb *ncbdata;
+
+ if (reply->code == RDNS_RC_NOERROR) {
+ entry = reply->entries;
+
+ RSPAMD_UPSTREAM_LOCK(upstream);
+ while (entry) {
+ /* XXX: we ignore weight as it contradicts with upstreams logic */
+ if (entry->type == RDNS_REQUEST_SRV) {
+ msg_debug_upstream("got srv reply for %s: %s "
+ "(weight=%d, priority=%d, port=%d)",
+ upstream->name, entry->content.srv.target,
+ entry->content.srv.weight, entry->content.srv.priority,
+ entry->content.srv.port);
+ ncbdata = g_malloc0(sizeof(*ncbdata));
+ ncbdata->priority = entry->content.srv.weight;
+ ncbdata->port = entry->content.srv.port;
+ /* XXX: for all entries? */
+ upstream->ttl = entry->ttl;
+
+ if (rdns_make_request_full(upstream->ctx->res,
+ rspamd_upstream_dns_srv_phase2_cb, ncbdata,
+ upstream->ls->limits->dns_timeout,
+ upstream->ls->limits->dns_retransmits,
+ 1, entry->content.srv.target, RDNS_REQUEST_A) != NULL) {
+ upstream->dns_requests++;
+ REF_RETAIN(upstream);
+ ncbdata->requests_inflight++;
+ }
+
+ if (rdns_make_request_full(upstream->ctx->res,
+ rspamd_upstream_dns_srv_phase2_cb, ncbdata,
+ upstream->ls->limits->dns_timeout,
+ upstream->ls->limits->dns_retransmits,
+ 1, entry->content.srv.target, RDNS_REQUEST_AAAA) != NULL) {
+ upstream->dns_requests++;
+ REF_RETAIN(upstream);
+ ncbdata->requests_inflight++;
+ }
+
+ if (ncbdata->requests_inflight == 0) {
+ g_free(ncbdata);
+ }
+ }
+ entry = entry->next;
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(upstream);
+ }
+
+ upstream->dns_requests--;
+ REF_RELEASE(upstream);
+}
+
+static void
+rspamd_upstream_revive_cb(struct ev_loop *loop, ev_timer *w, int revents)
+{
+ struct upstream *upstream = (struct upstream *) w->data;
+
+ RSPAMD_UPSTREAM_LOCK(upstream);
+ ev_timer_stop(loop, w);
+
+ msg_debug_upstream("revive upstream %s", upstream->name);
+
+ if (upstream->ls) {
+ rspamd_upstream_set_active(upstream->ls, upstream);
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(upstream);
+ g_assert(upstream->ref.refcount > 1);
+ REF_RELEASE(upstream);
+}
+
+static void
+rspamd_upstream_resolve_addrs(const struct upstream_list *ls,
+ struct upstream *upstream)
+{
+ /* XXX: maybe make it configurable */
+ static const gdouble min_resolve_interval = 60.0;
+
+ if (upstream->ctx->res != NULL &&
+ upstream->ctx->configured &&
+ upstream->dns_requests == 0 &&
+ !(upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE)) {
+
+ gdouble now = ev_now(upstream->ctx->event_loop);
+
+ if (now - upstream->last_resolve < min_resolve_interval) {
+ msg_info_upstream("do not resolve upstream %s as it was checked %.0f "
+ "seconds ago (%.0f is minimum)",
+ upstream->name, now - upstream->last_resolve,
+ min_resolve_interval);
+
+ return;
+ }
+
+ /* Resolve name of the upstream one more time */
+ if (upstream->name[0] != '/') {
+ upstream->last_resolve = now;
+
+ /*
+ * If upstream name has a port, then we definitely need to resolve
+ * merely host part!
+ */
+ char dns_name[253 + 1]; /* 253 == max dns name + \0 */
+ const char *semicolon_pos = strchr(upstream->name, ':');
+
+ if (semicolon_pos != NULL && semicolon_pos > upstream->name) {
+ if (sizeof(dns_name) > semicolon_pos - upstream->name) {
+ rspamd_strlcpy(dns_name, upstream->name,
+ semicolon_pos - upstream->name + 1);
+ }
+ else {
+ /* XXX: truncated */
+ msg_err_upstream("internal error: upstream name is larger than"
+ "max DNS name: %s",
+ upstream->name);
+ rspamd_strlcpy(dns_name, upstream->name, sizeof(dns_name));
+ }
+ }
+ else {
+ rspamd_strlcpy(dns_name, upstream->name, sizeof(dns_name));
+ }
+
+ if (upstream->flags & RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE) {
+ if (rdns_make_request_full(upstream->ctx->res,
+ rspamd_upstream_dns_srv_cb, upstream,
+ ls->limits->dns_timeout, ls->limits->dns_retransmits,
+ 1, dns_name, RDNS_REQUEST_SRV) != NULL) {
+ upstream->dns_requests++;
+ REF_RETAIN(upstream);
+ }
+ }
+ else {
+ if (rdns_make_request_full(upstream->ctx->res,
+ rspamd_upstream_dns_cb, upstream,
+ ls->limits->dns_timeout, ls->limits->dns_retransmits,
+ 1, dns_name, RDNS_REQUEST_A) != NULL) {
+ upstream->dns_requests++;
+ REF_RETAIN(upstream);
+ }
+
+ if (rdns_make_request_full(upstream->ctx->res,
+ rspamd_upstream_dns_cb, upstream,
+ ls->limits->dns_timeout, ls->limits->dns_retransmits,
+ 1, dns_name, RDNS_REQUEST_AAAA) != NULL) {
+ upstream->dns_requests++;
+ REF_RETAIN(upstream);
+ }
+ }
+ }
+ }
+ else if (upstream->dns_requests != 0) {
+ msg_info_upstream("do not resolve upstream %s as another request for "
+ "resolving has been already issued",
+ upstream->name);
+ }
+}
+
+static void
+rspamd_upstream_lazy_resolve_cb(struct ev_loop *loop, ev_timer *w, int revents)
+{
+ struct upstream *up = (struct upstream *) w->data;
+
+ RSPAMD_UPSTREAM_LOCK(up);
+ ev_timer_stop(loop, w);
+
+ if (up->ls) {
+ rspamd_upstream_resolve_addrs(up->ls, up);
+
+ if (up->ttl == 0 || up->ttl > up->ls->limits->lazy_resolve_time) {
+ w->repeat = rspamd_time_jitter(up->ls->limits->lazy_resolve_time,
+ up->ls->limits->lazy_resolve_time * .1);
+ }
+ else {
+ w->repeat = up->ttl;
+ }
+
+ ev_timer_again(loop, w);
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(up);
+}
+
+static void
+rspamd_upstream_set_inactive(struct upstream_list *ls, struct upstream *upstream)
+{
+ gdouble ntim;
+ guint i;
+ struct upstream *cur;
+ struct upstream_list_watcher *w;
+
+ RSPAMD_UPSTREAM_LOCK(ls);
+ g_ptr_array_remove_index(ls->alive, upstream->active_idx);
+ upstream->active_idx = -1;
+
+ /* We need to update all indices */
+ for (i = 0; i < ls->alive->len; i++) {
+ cur = g_ptr_array_index(ls->alive, i);
+ cur->active_idx = i;
+ }
+
+ if (upstream->ctx) {
+ rspamd_upstream_resolve_addrs(ls, upstream);
+
+ REF_RETAIN(upstream);
+ ntim = rspamd_time_jitter(ls->limits->revive_time,
+ ls->limits->revive_time * ls->limits->revive_jitter);
+
+ if (ev_can_stop(&upstream->ev)) {
+ ev_timer_stop(upstream->ctx->event_loop, &upstream->ev);
+ }
+
+ msg_debug_upstream("mark upstream %s inactive; revive in %.0f seconds",
+ upstream->name, ntim);
+ ev_timer_init(&upstream->ev, rspamd_upstream_revive_cb, ntim, 0);
+ upstream->ev.data = upstream;
+
+ if (upstream->ctx->event_loop != NULL && upstream->ctx->configured) {
+ ev_timer_start(upstream->ctx->event_loop, &upstream->ev);
+ }
+ }
+
+ DL_FOREACH(upstream->ls->watchers, w)
+ {
+ if (w->events_mask & RSPAMD_UPSTREAM_WATCH_OFFLINE) {
+ w->func(upstream, RSPAMD_UPSTREAM_WATCH_OFFLINE, upstream->errors, w->ud);
+ }
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(ls);
+}
+
+void rspamd_upstream_fail(struct upstream *upstream,
+ gboolean addr_failure,
+ const gchar *reason)
+{
+ gdouble error_rate = 0, max_error_rate = 0;
+ gdouble sec_last, sec_cur;
+ struct upstream_addr_elt *addr_elt;
+ struct upstream_list_watcher *w;
+
+ msg_debug_upstream("upstream %s failed; reason: %s",
+ upstream->name,
+ reason);
+
+ if (upstream->ctx && upstream->active_idx != -1 && upstream->ls) {
+ sec_cur = rspamd_get_ticks(FALSE);
+
+ RSPAMD_UPSTREAM_LOCK(upstream);
+ if (upstream->errors == 0) {
+ /* We have the first error */
+ upstream->last_fail = sec_cur;
+ upstream->errors = 1;
+
+ if (upstream->ls && upstream->dns_requests == 0) {
+ /* Try to re-resolve address immediately */
+ rspamd_upstream_resolve_addrs(upstream->ls, upstream);
+ }
+
+ DL_FOREACH(upstream->ls->watchers, w)
+ {
+ if (w->events_mask & RSPAMD_UPSTREAM_WATCH_FAILURE) {
+ w->func(upstream, RSPAMD_UPSTREAM_WATCH_FAILURE, 1, w->ud);
+ }
+ }
+ }
+ else {
+ sec_last = upstream->last_fail;
+
+ if (sec_cur >= sec_last) {
+ upstream->errors++;
+
+
+ DL_FOREACH(upstream->ls->watchers, w)
+ {
+ if (w->events_mask & RSPAMD_UPSTREAM_WATCH_FAILURE) {
+ w->func(upstream, RSPAMD_UPSTREAM_WATCH_FAILURE,
+ upstream->errors, w->ud);
+ }
+ }
+
+ if (sec_cur - sec_last >= upstream->ls->limits->error_time) {
+ error_rate = ((gdouble) upstream->errors) / (sec_cur - sec_last);
+ max_error_rate = ((gdouble) upstream->ls->limits->max_errors) /
+ upstream->ls->limits->error_time;
+ }
+
+ if (error_rate > max_error_rate) {
+ /* Remove upstream from the active list */
+ if (upstream->ls->ups->len > 1) {
+ msg_debug_upstream("mark upstream %s inactive; "
+ "reason: %s; %.2f "
+ "error rate (%d errors), "
+ "%.2f max error rate, "
+ "%.1f first error time, "
+ "%.1f current ts, "
+ "%d upstreams left",
+ upstream->name,
+ reason,
+ error_rate,
+ upstream->errors,
+ max_error_rate,
+ sec_last,
+ sec_cur,
+ upstream->ls->alive->len - 1);
+ rspamd_upstream_set_inactive(upstream->ls, upstream);
+ upstream->errors = 0;
+ }
+ else {
+ msg_debug_upstream("cannot mark last alive upstream %s "
+ "inactive; reason: %s; %.2f "
+ "error rate (%d errors), "
+ "%.2f max error rate, "
+ "%.1f first error time, "
+ "%.1f current ts",
+ upstream->name,
+ reason,
+ error_rate,
+ upstream->errors,
+ max_error_rate,
+ sec_last,
+ sec_cur);
+ /* Just re-resolve addresses */
+ if (sec_cur - sec_last > upstream->ls->limits->revive_time) {
+ upstream->errors = 0;
+ rspamd_upstream_resolve_addrs(upstream->ls, upstream);
+ }
+ }
+ }
+ else if (sec_cur - sec_last >= upstream->ls->limits->error_time) {
+ /* Forget the whole interval */
+ upstream->last_fail = sec_cur;
+ upstream->errors = 1;
+ }
+ }
+ }
+
+ if (addr_failure) {
+ /* Also increase count of errors for this specific address */
+ if (upstream->addrs.addr) {
+ addr_elt = g_ptr_array_index(upstream->addrs.addr,
+ upstream->addrs.cur);
+ addr_elt->errors++;
+ }
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(upstream);
+ }
+}
+
+void rspamd_upstream_ok(struct upstream *upstream)
+{
+ struct upstream_addr_elt *addr_elt;
+ struct upstream_list_watcher *w;
+
+ RSPAMD_UPSTREAM_LOCK(upstream);
+ if (upstream->errors > 0 && upstream->active_idx != -1 && upstream->ls) {
+ /* We touch upstream if and only if it is active */
+ msg_debug_upstream("reset errors on upstream %s (was %ud)", upstream->name, upstream->errors);
+ upstream->errors = 0;
+
+ if (upstream->addrs.addr) {
+ addr_elt = g_ptr_array_index(upstream->addrs.addr, upstream->addrs.cur);
+ addr_elt->errors = 0;
+ }
+
+ DL_FOREACH(upstream->ls->watchers, w)
+ {
+ if (w->events_mask & RSPAMD_UPSTREAM_WATCH_SUCCESS) {
+ w->func(upstream, RSPAMD_UPSTREAM_WATCH_SUCCESS, 0, w->ud);
+ }
+ }
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(upstream);
+}
+
+void rspamd_upstream_set_weight(struct upstream *up, guint weight)
+{
+ RSPAMD_UPSTREAM_LOCK(up);
+ up->weight = weight;
+ RSPAMD_UPSTREAM_UNLOCK(up);
+}
+
+#define SEED_CONSTANT 0xa574de7df64e9b9dULL
+
+struct upstream_list *
+rspamd_upstreams_create(struct upstream_ctx *ctx)
+{
+ struct upstream_list *ls;
+
+ ls = g_malloc0(sizeof(*ls));
+ ls->hash_seed = SEED_CONSTANT;
+ ls->ups = g_ptr_array_new();
+ ls->alive = g_ptr_array_new();
+
+#ifdef UPSTREAMS_THREAD_SAFE
+ ls->lock = rspamd_mutex_new();
+#endif
+ ls->cur_elt = 0;
+ ls->ctx = ctx;
+ ls->rot_alg = RSPAMD_UPSTREAM_UNDEF;
+
+ if (ctx) {
+ ls->limits = &ctx->limits;
+ }
+ else {
+ ls->limits = &default_limits;
+ }
+
+ return ls;
+}
+
+gsize rspamd_upstreams_count(struct upstream_list *ups)
+{
+ return ups != NULL ? ups->ups->len : 0;
+}
+
+gsize rspamd_upstreams_alive(struct upstream_list *ups)
+{
+ return ups != NULL ? ups->alive->len : 0;
+}
+
+static void
+rspamd_upstream_dtor(struct upstream *up)
+{
+ struct upstream_inet_addr_entry *cur, *tmp;
+
+ if (up->new_addrs) {
+ LL_FOREACH_SAFE(up->new_addrs, cur, tmp)
+ {
+ /* Here we need to free pointer as well */
+ rspamd_inet_address_free(cur->addr);
+ g_free(cur);
+ }
+ }
+
+ if (up->addrs.addr) {
+ g_ptr_array_free(up->addrs.addr, TRUE);
+ }
+
+#ifdef UPSTREAMS_THREAD_SAFE
+ rspamd_mutex_free(up->lock);
+#endif
+
+ if (up->ctx) {
+
+ if (ev_can_stop(&up->ev)) {
+ ev_timer_stop(up->ctx->event_loop, &up->ev);
+ }
+
+ g_queue_delete_link(up->ctx->upstreams, up->ctx_pos);
+ REF_RELEASE(up->ctx);
+ }
+
+ g_free(up);
+}
+
+rspamd_inet_addr_t *
+rspamd_upstream_addr_next(struct upstream *up)
+{
+ guint idx, next_idx;
+ struct upstream_addr_elt *e1, *e2;
+
+ do {
+ idx = up->addrs.cur;
+ next_idx = (idx + 1) % up->addrs.addr->len;
+ e1 = g_ptr_array_index(up->addrs.addr, idx);
+ e2 = g_ptr_array_index(up->addrs.addr, next_idx);
+ up->addrs.cur = next_idx;
+ } while (e2->errors > e1->errors);
+
+ return e2->addr;
+}
+
+rspamd_inet_addr_t *
+rspamd_upstream_addr_cur(const struct upstream *up)
+{
+ struct upstream_addr_elt *elt;
+
+ elt = g_ptr_array_index(up->addrs.addr, up->addrs.cur);
+
+ return elt->addr;
+}
+
+const gchar *
+rspamd_upstream_name(struct upstream *up)
+{
+ return up->name;
+}
+
+gint rspamd_upstream_port(struct upstream *up)
+{
+ struct upstream_addr_elt *elt;
+
+ elt = g_ptr_array_index(up->addrs.addr, up->addrs.cur);
+ return rspamd_inet_address_get_port(elt->addr);
+}
+
+gboolean
+rspamd_upstreams_add_upstream(struct upstream_list *ups, const gchar *str,
+ guint16 def_port, enum rspamd_upstream_parse_type parse_type,
+ void *data)
+{
+ struct upstream *upstream;
+ GPtrArray *addrs = NULL;
+ guint i, slen;
+ rspamd_inet_addr_t *addr;
+ enum rspamd_parse_host_port_result ret = RSPAMD_PARSE_ADDR_FAIL;
+
+ upstream = g_malloc0(sizeof(*upstream));
+ slen = strlen(str);
+
+ switch (parse_type) {
+ case RSPAMD_UPSTREAM_PARSE_DEFAULT:
+ if (slen > sizeof("service=") &&
+ RSPAMD_LEN_CHECK_STARTS_WITH(str, slen, "service=")) {
+ const gchar *plus_pos, *service_pos, *semicolon_pos;
+
+ /* Accept service=srv_name+hostname[:priority] */
+ service_pos = str + sizeof("service=") - 1;
+ plus_pos = strchr(service_pos, '+');
+
+ if (plus_pos != NULL) {
+ semicolon_pos = strchr(plus_pos + 1, ':');
+
+ if (semicolon_pos) {
+ upstream->weight = strtoul(semicolon_pos + 1, NULL, 10);
+ }
+ else {
+ semicolon_pos = plus_pos + strlen(plus_pos);
+ }
+
+ /*
+ * Now our name is _service._tcp.<domain>
+ * where <domain> is string between semicolon_pos and plus_pos +1
+ * while service is a string between service_pos and plus_pos
+ */
+ guint namelen = (semicolon_pos - (plus_pos + 1)) +
+ (plus_pos - service_pos) +
+ (sizeof("tcp") - 1) +
+ 4;
+ addrs = g_ptr_array_sized_new(1);
+ upstream->name = ups->ctx ? rspamd_mempool_alloc(ups->ctx->pool, namelen + 1) : g_malloc(namelen + 1);
+
+ rspamd_snprintf(upstream->name, namelen + 1,
+ "_%*s._tcp.%*s",
+ (gint) (plus_pos - service_pos), service_pos,
+ (gint) (semicolon_pos - (plus_pos + 1)), plus_pos + 1);
+ upstream->flags |= RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE;
+ ret = RSPAMD_PARSE_ADDR_RESOLVED;
+ }
+ }
+ else {
+ ret = rspamd_parse_host_port_priority(str, &addrs,
+ &upstream->weight,
+ &upstream->name, def_port,
+ FALSE,
+ ups->ctx ? ups->ctx->pool : NULL);
+ }
+ break;
+ case RSPAMD_UPSTREAM_PARSE_NAMESERVER:
+ addrs = g_ptr_array_sized_new(1);
+ if (rspamd_parse_inet_address(&addr, str, strlen(str),
+ RSPAMD_INET_ADDRESS_PARSE_DEFAULT)) {
+ if (ups->ctx) {
+ upstream->name = rspamd_mempool_strdup(ups->ctx->pool, str);
+ }
+ else {
+ upstream->name = g_strdup(str);
+ }
+ if (rspamd_inet_address_get_port(addr) == 0) {
+ rspamd_inet_address_set_port(addr, def_port);
+ }
+
+ g_ptr_array_add(addrs, addr);
+ ret = RSPAMD_PARSE_ADDR_NUMERIC;
+
+ if (ups->ctx) {
+ rspamd_mempool_add_destructor(ups->ctx->pool,
+ (rspamd_mempool_destruct_t) rspamd_inet_address_free,
+ addr);
+ rspamd_mempool_add_destructor(ups->ctx->pool,
+ (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
+ addrs);
+ }
+ }
+ else {
+ g_ptr_array_free(addrs, TRUE);
+ }
+
+ break;
+ }
+
+ if (ret == RSPAMD_PARSE_ADDR_FAIL) {
+ g_free(upstream);
+ return FALSE;
+ }
+ else {
+ upstream->flags |= ups->flags;
+
+ if (ret == RSPAMD_PARSE_ADDR_NUMERIC) {
+ /* Add noresolve flag */
+ upstream->flags |= RSPAMD_UPSTREAM_FLAG_NORESOLVE;
+ }
+ for (i = 0; i < addrs->len; i++) {
+ addr = g_ptr_array_index(addrs, i);
+ rspamd_upstream_add_addr(upstream, rspamd_inet_address_copy(addr, NULL));
+ }
+ }
+
+ if (upstream->weight == 0 && ups->rot_alg == RSPAMD_UPSTREAM_MASTER_SLAVE) {
+ /* Special heuristic for master-slave rotation */
+ if (ups->ups->len == 0) {
+ /* Prioritize the first */
+ upstream->weight = 1;
+ }
+ }
+
+ g_ptr_array_add(ups->ups, upstream);
+ upstream->ud = data;
+ upstream->cur_weight = upstream->weight;
+ upstream->ls = ups;
+ REF_INIT_RETAIN(upstream, rspamd_upstream_dtor);
+#ifdef UPSTREAMS_THREAD_SAFE
+ upstream->lock = rspamd_mutex_new();
+#endif
+ upstream->ctx = ups->ctx;
+
+ if (upstream->ctx) {
+ REF_RETAIN(ups->ctx);
+ g_queue_push_tail(ups->ctx->upstreams, upstream);
+ upstream->ctx_pos = g_queue_peek_tail_link(ups->ctx->upstreams);
+ }
+
+ guint h = rspamd_cryptobox_fast_hash(upstream->name,
+ strlen(upstream->name), 0);
+ memset(upstream->uid, 0, sizeof(upstream->uid));
+ rspamd_encode_base32_buf((const guchar *) &h, sizeof(h),
+ upstream->uid, sizeof(upstream->uid) - 1, RSPAMD_BASE32_DEFAULT);
+
+ msg_debug_upstream("added upstream %s (%s)", upstream->name,
+ upstream->flags & RSPAMD_UPSTREAM_FLAG_NORESOLVE ? "numeric ip" : "DNS name");
+ g_ptr_array_sort(upstream->addrs.addr, rspamd_upstream_addr_sort_func);
+ rspamd_upstream_set_active(ups, upstream);
+
+ return TRUE;
+}
+
+void rspamd_upstreams_set_flags(struct upstream_list *ups,
+ enum rspamd_upstream_flag flags)
+{
+ ups->flags = flags;
+}
+
+void rspamd_upstreams_set_rotation(struct upstream_list *ups,
+ enum rspamd_upstream_rotation rot)
+{
+ ups->rot_alg = rot;
+}
+
+gboolean
+rspamd_upstream_add_addr(struct upstream *up, rspamd_inet_addr_t *addr)
+{
+ struct upstream_addr_elt *elt;
+ /*
+ * XXX: slow and inefficient
+ */
+ if (up->addrs.addr == NULL) {
+ up->addrs.addr = g_ptr_array_new_full(8, rspamd_upstream_addr_elt_dtor);
+ }
+
+ elt = g_malloc0(sizeof(*elt));
+ elt->addr = addr;
+ g_ptr_array_add(up->addrs.addr, elt);
+ g_ptr_array_sort(up->addrs.addr, rspamd_upstream_addr_sort_func);
+
+ return TRUE;
+}
+
+gboolean
+rspamd_upstreams_parse_line_len(struct upstream_list *ups,
+ const gchar *str, gsize len, guint16 def_port, void *data)
+{
+ const gchar *end = str + len, *p = str;
+ const gchar *separators = ";, \n\r\t";
+ gchar *tmp;
+ guint span_len;
+ gboolean ret = FALSE;
+
+ if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "random:")) {
+ ups->rot_alg = RSPAMD_UPSTREAM_RANDOM;
+ p += sizeof("random:") - 1;
+ }
+ else if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "master-slave:")) {
+ ups->rot_alg = RSPAMD_UPSTREAM_MASTER_SLAVE;
+ p += sizeof("master-slave:") - 1;
+ }
+ else if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "round-robin:")) {
+ ups->rot_alg = RSPAMD_UPSTREAM_ROUND_ROBIN;
+ p += sizeof("round-robin:") - 1;
+ }
+ else if (RSPAMD_LEN_CHECK_STARTS_WITH(p, len, "hash:")) {
+ ups->rot_alg = RSPAMD_UPSTREAM_HASHED;
+ p += sizeof("hash:") - 1;
+ }
+
+ while (p < end) {
+ span_len = rspamd_memcspn(p, separators, end - p);
+
+ if (span_len > 0) {
+ tmp = g_malloc(span_len + 1);
+ rspamd_strlcpy(tmp, p, span_len + 1);
+
+ if (rspamd_upstreams_add_upstream(ups, tmp, def_port,
+ RSPAMD_UPSTREAM_PARSE_DEFAULT,
+ data)) {
+ ret = TRUE;
+ }
+
+ g_free(tmp);
+ }
+
+ p += span_len;
+ /* Skip separators */
+ if (p < end) {
+ p += rspamd_memspn(p, separators, end - p);
+ }
+ }
+
+ if (!ups->ups_line) {
+ ups->ups_line = g_malloc(len + 1);
+ rspamd_strlcpy(ups->ups_line, str, len + 1);
+ }
+
+ return ret;
+}
+
+
+gboolean
+rspamd_upstreams_parse_line(struct upstream_list *ups,
+ const gchar *str, guint16 def_port, void *data)
+{
+ return rspamd_upstreams_parse_line_len(ups, str, strlen(str),
+ def_port, data);
+}
+
+gboolean
+rspamd_upstreams_from_ucl(struct upstream_list *ups,
+ const ucl_object_t *in, guint16 def_port, void *data)
+{
+ gboolean ret = FALSE;
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = NULL;
+
+ it = ucl_object_iterate_new(in);
+
+ while ((cur = ucl_object_iterate_safe(it, true)) != NULL) {
+ if (ucl_object_type(cur) == UCL_STRING) {
+ ret = rspamd_upstreams_parse_line(ups, ucl_object_tostring(cur),
+ def_port, data);
+ }
+ }
+
+ ucl_object_iterate_free(it);
+
+ return ret;
+}
+
+void rspamd_upstreams_destroy(struct upstream_list *ups)
+{
+ guint i;
+ struct upstream *up;
+ struct upstream_list_watcher *w, *tmp;
+
+ if (ups != NULL) {
+ g_ptr_array_free(ups->alive, TRUE);
+
+ for (i = 0; i < ups->ups->len; i++) {
+ up = g_ptr_array_index(ups->ups, i);
+ up->ls = NULL;
+ REF_RELEASE(up);
+ }
+
+ DL_FOREACH_SAFE(ups->watchers, w, tmp)
+ {
+ if (w->dtor) {
+ w->dtor(w->ud);
+ }
+ g_free(w);
+ }
+
+ g_free(ups->ups_line);
+ g_ptr_array_free(ups->ups, TRUE);
+#ifdef UPSTREAMS_THREAD_SAFE
+ rspamd_mutex_free(ups->lock);
+#endif
+ g_free(ups);
+ }
+}
+
+static void
+rspamd_upstream_restore_cb(gpointer elt, gpointer ls)
+{
+ struct upstream *up = (struct upstream *) elt;
+ struct upstream_list *ups = (struct upstream_list *) ls;
+ struct upstream_list_watcher *w;
+
+ /* Here the upstreams list is already locked */
+ RSPAMD_UPSTREAM_LOCK(up);
+
+ if (ev_can_stop(&up->ev)) {
+ ev_timer_stop(up->ctx->event_loop, &up->ev);
+ }
+
+ g_ptr_array_add(ups->alive, up);
+ up->active_idx = ups->alive->len - 1;
+ RSPAMD_UPSTREAM_UNLOCK(up);
+
+ DL_FOREACH(up->ls->watchers, w)
+ {
+ if (w->events_mask & RSPAMD_UPSTREAM_WATCH_ONLINE) {
+ w->func(up, RSPAMD_UPSTREAM_WATCH_ONLINE, up->errors, w->ud);
+ }
+ }
+
+ /* For revive event */
+ g_assert(up->ref.refcount > 1);
+ REF_RELEASE(up);
+}
+
+static struct upstream *
+rspamd_upstream_get_random(struct upstream_list *ups,
+ struct upstream *except)
+{
+ for (;;) {
+ guint idx = ottery_rand_range(ups->alive->len - 1);
+ struct upstream *up;
+
+ up = g_ptr_array_index(ups->alive, idx);
+
+ if (except && up == except) {
+ continue;
+ }
+
+ return up;
+ }
+}
+
+static struct upstream *
+rspamd_upstream_get_round_robin(struct upstream_list *ups,
+ struct upstream *except,
+ gboolean use_cur)
+{
+ guint max_weight = 0, min_checked = G_MAXUINT;
+ struct upstream *up = NULL, *selected = NULL, *min_checked_sel = NULL;
+ guint i;
+
+ /* Select upstream with the maximum cur_weight */
+ RSPAMD_UPSTREAM_LOCK(ups);
+
+ for (i = 0; i < ups->alive->len; i++) {
+ up = g_ptr_array_index(ups->alive, i);
+
+ if (except != NULL && up == except) {
+ continue;
+ }
+
+ if (use_cur) {
+ if (up->cur_weight > max_weight) {
+ selected = up;
+ max_weight = up->cur_weight;
+ }
+ }
+ else {
+ if (up->weight > max_weight) {
+ selected = up;
+ max_weight = up->weight;
+ }
+ }
+
+ /*
+ * This code is used when all upstreams have zero weight
+ * The logic is to select least currently used upstream and penalise
+ * upstream with errors. The error penalty should no be too high
+ * to avoid sudden traffic drop in this case.
+ */
+ if (up->checked + up->errors * 2 < min_checked) {
+ min_checked_sel = up;
+ min_checked = up->checked;
+ }
+ }
+
+ if (max_weight == 0) {
+ /* All upstreams have zero weight */
+ if (min_checked > G_MAXUINT / 2) {
+ /* Reset all checked counters to avoid overflow */
+ for (i = 0; i < ups->alive->len; i++) {
+ up = g_ptr_array_index(ups->alive, i);
+ up->checked = 0;
+ }
+ }
+
+ selected = min_checked_sel;
+ }
+
+ if (use_cur && selected) {
+ if (selected->cur_weight > 0) {
+ selected->cur_weight--;
+ }
+ else {
+ selected->cur_weight = selected->weight;
+ }
+ }
+
+ RSPAMD_UPSTREAM_UNLOCK(ups);
+
+ return selected;
+}
+
+/*
+ * The key idea of this function is obtained from the following paper:
+ * A Fast, Minimal Memory, Consistent Hash Algorithm
+ * John Lamping, Eric Veach
+ *
+ * http://arxiv.org/abs/1406.2294
+ */
+static guint32
+rspamd_consistent_hash(guint64 key, guint32 nbuckets)
+{
+ gint64 b = -1, j = 0;
+
+ while (j < nbuckets) {
+ b = j;
+ key *= 2862933555777941757ULL + 1;
+ j = (b + 1) * (double) (1ULL << 31) / (double) ((key >> 33) + 1ULL);
+ }
+
+ return b;
+}
+
+static struct upstream *
+rspamd_upstream_get_hashed(struct upstream_list *ups,
+ struct upstream *except,
+ const guint8 *key, guint keylen)
+{
+ guint64 k;
+ guint32 idx;
+ static const guint max_tries = 20;
+ struct upstream *up = NULL;
+
+ /* Generate 64 bits input key */
+ k = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64,
+ key, keylen, ups->hash_seed);
+
+ RSPAMD_UPSTREAM_LOCK(ups);
+ /*
+ * Select new upstream from all upstreams
+ */
+ for (guint i = 0; i < max_tries; i++) {
+ idx = rspamd_consistent_hash(k, ups->ups->len);
+ up = g_ptr_array_index(ups->ups, idx);
+
+ if (up->active_idx < 0 || (except != NULL && up == except)) {
+ /* Found inactive or excluded upstream */
+ k = mum_hash_step(k, ups->hash_seed);
+ }
+ else {
+ break;
+ }
+ }
+ RSPAMD_UPSTREAM_UNLOCK(ups);
+
+ if (up->active_idx >= 0) {
+ return up;
+ }
+
+ /* We failed to find any active upstream */
+ up = rspamd_upstream_get_random(ups, except);
+ msg_info("failed to find hashed upstream for %s, fallback to random: %s",
+ ups->ups_line, up->name);
+
+ return up;
+}
+
+static struct upstream *
+rspamd_upstream_get_common(struct upstream_list *ups,
+ struct upstream *except,
+ enum rspamd_upstream_rotation default_type,
+ const guchar *key, gsize keylen,
+ gboolean forced)
+{
+ enum rspamd_upstream_rotation type;
+ struct upstream *up = NULL;
+
+ RSPAMD_UPSTREAM_LOCK(ups);
+ if (ups->alive->len == 0) {
+ /* We have no upstreams alive */
+ msg_warn("there are no alive upstreams left for %s, revive all of them",
+ ups->ups_line);
+ g_ptr_array_foreach(ups->ups, rspamd_upstream_restore_cb, ups);
+ }
+ RSPAMD_UPSTREAM_UNLOCK(ups);
+
+ if (ups->alive->len == 1 && default_type != RSPAMD_UPSTREAM_SEQUENTIAL) {
+ /* Fast path */
+ up = g_ptr_array_index(ups->alive, 0);
+ goto end;
+ }
+
+ if (!forced) {
+ type = ups->rot_alg != RSPAMD_UPSTREAM_UNDEF ? ups->rot_alg : default_type;
+ }
+ else {
+ type = default_type != RSPAMD_UPSTREAM_UNDEF ? default_type : ups->rot_alg;
+ }
+
+ if (type == RSPAMD_UPSTREAM_HASHED && (keylen == 0 || key == NULL)) {
+ /* Cannot use hashed rotation when no key is specified, switch to random */
+ type = RSPAMD_UPSTREAM_RANDOM;
+ }
+
+ switch (type) {
+ default:
+ case RSPAMD_UPSTREAM_RANDOM:
+ up = rspamd_upstream_get_random(ups, except);
+ break;
+ case RSPAMD_UPSTREAM_HASHED:
+ up = rspamd_upstream_get_hashed(ups, except, key, keylen);
+ break;
+ case RSPAMD_UPSTREAM_ROUND_ROBIN:
+ up = rspamd_upstream_get_round_robin(ups, except, TRUE);
+ break;
+ case RSPAMD_UPSTREAM_MASTER_SLAVE:
+ up = rspamd_upstream_get_round_robin(ups, except, FALSE);
+ break;
+ case RSPAMD_UPSTREAM_SEQUENTIAL:
+ if (ups->cur_elt >= ups->alive->len) {
+ ups->cur_elt = 0;
+ return NULL;
+ }
+
+ up = g_ptr_array_index(ups->alive, ups->cur_elt++);
+ break;
+ }
+
+end:
+ if (up) {
+ up->checked++;
+ }
+
+ return up;
+}
+
+struct upstream *
+rspamd_upstream_get(struct upstream_list *ups,
+ enum rspamd_upstream_rotation default_type,
+ const guchar *key, gsize keylen)
+{
+ return rspamd_upstream_get_common(ups, NULL, default_type, key, keylen, FALSE);
+}
+
+struct upstream *
+rspamd_upstream_get_forced(struct upstream_list *ups,
+ enum rspamd_upstream_rotation forced_type,
+ const guchar *key, gsize keylen)
+{
+ return rspamd_upstream_get_common(ups, NULL, forced_type, key, keylen, TRUE);
+}
+
+struct upstream *rspamd_upstream_get_except(struct upstream_list *ups,
+ struct upstream *except,
+ enum rspamd_upstream_rotation default_type,
+ const guchar *key, gsize keylen)
+{
+ return rspamd_upstream_get_common(ups, except, default_type, key, keylen, FALSE);
+}
+
+void rspamd_upstream_reresolve(struct upstream_ctx *ctx)
+{
+ GList *cur;
+ struct upstream *up;
+
+ cur = ctx->upstreams->head;
+
+ while (cur) {
+ up = cur->data;
+ REF_RETAIN(up);
+ rspamd_upstream_resolve_addrs(up->ls, up);
+ REF_RELEASE(up);
+ cur = g_list_next(cur);
+ }
+}
+
+gpointer
+rspamd_upstream_set_data(struct upstream *up, gpointer data)
+{
+ gpointer prev_data = up->data;
+ up->data = data;
+
+ return prev_data;
+}
+
+gpointer
+rspamd_upstream_get_data(struct upstream *up)
+{
+ return up->data;
+}
+
+
+void rspamd_upstreams_foreach(struct upstream_list *ups,
+ rspamd_upstream_traverse_func cb, void *ud)
+{
+ struct upstream *up;
+ guint i;
+
+ for (i = 0; i < ups->ups->len; i++) {
+ up = g_ptr_array_index(ups->ups, i);
+
+ cb(up, i, ud);
+ }
+}
+
+void rspamd_upstreams_set_limits(struct upstream_list *ups,
+ gdouble revive_time,
+ gdouble revive_jitter,
+ gdouble error_time,
+ gdouble dns_timeout,
+ guint max_errors,
+ guint dns_retransmits)
+{
+ struct upstream_limits *nlimits;
+ g_assert(ups != NULL);
+
+ nlimits = rspamd_mempool_alloc(ups->ctx->pool, sizeof(*nlimits));
+ memcpy(nlimits, ups->limits, sizeof(*nlimits));
+
+ if (!isnan(revive_time)) {
+ nlimits->revive_time = revive_time;
+ }
+
+ if (!isnan(revive_jitter)) {
+ nlimits->revive_jitter = revive_jitter;
+ }
+
+ if (!isnan(error_time)) {
+ nlimits->error_time = error_time;
+ }
+
+ if (!isnan(dns_timeout)) {
+ nlimits->dns_timeout = dns_timeout;
+ }
+
+ if (max_errors > 0) {
+ nlimits->max_errors = max_errors;
+ }
+
+ if (dns_retransmits > 0) {
+ nlimits->dns_retransmits = dns_retransmits;
+ }
+
+ ups->limits = nlimits;
+}
+
+void rspamd_upstreams_add_watch_callback(struct upstream_list *ups,
+ enum rspamd_upstreams_watch_event events,
+ rspamd_upstream_watch_func func,
+ GFreeFunc dtor,
+ gpointer ud)
+{
+ struct upstream_list_watcher *nw;
+
+ g_assert((events & RSPAMD_UPSTREAM_WATCH_ALL) != 0);
+
+ nw = g_malloc(sizeof(*nw));
+ nw->func = func;
+ nw->events_mask = events;
+ nw->ud = ud;
+ nw->dtor = dtor;
+
+ DL_APPEND(ups->watchers, nw);
+}
+
+struct upstream *
+rspamd_upstream_ref(struct upstream *up)
+{
+ REF_RETAIN(up);
+ return up;
+}
+
+void rspamd_upstream_unref(struct upstream *up)
+{
+ REF_RELEASE(up);
+}
diff --git a/src/libutil/upstream.h b/src/libutil/upstream.h
new file mode 100644
index 0000000..22a020c
--- /dev/null
+++ b/src/libutil/upstream.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UPSTREAM_H
+#define UPSTREAM_H
+
+#include "config.h"
+#include "util.h"
+#include "rdns.h"
+#include "ucl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declaration */
+struct ev_loop;
+
+enum rspamd_upstream_rotation {
+ RSPAMD_UPSTREAM_RANDOM = 0,
+ RSPAMD_UPSTREAM_HASHED,
+ RSPAMD_UPSTREAM_ROUND_ROBIN,
+ RSPAMD_UPSTREAM_MASTER_SLAVE,
+ RSPAMD_UPSTREAM_SEQUENTIAL,
+ RSPAMD_UPSTREAM_UNDEF
+};
+
+enum rspamd_upstream_flag {
+ RSPAMD_UPSTREAM_FLAG_NORESOLVE = (1 << 0),
+ RSPAMD_UPSTREAM_FLAG_SRV_RESOLVE = (1 << 1),
+};
+
+struct rspamd_config;
+/* Opaque upstream structures */
+struct upstream;
+struct upstream_list;
+struct upstream_ctx;
+
+/**
+ * Init upstreams library
+ * @param resolver
+ */
+struct upstream_ctx *rspamd_upstreams_library_init(void);
+
+/**
+ * Remove reference from upstreams library
+ */
+void rspamd_upstreams_library_unref(struct upstream_ctx *ctx);
+
+/**
+ * Configure attributes of upstreams library
+ * @param cfg
+ */
+void rspamd_upstreams_library_config(struct rspamd_config *cfg,
+ struct upstream_ctx *ctx, struct ev_loop *event_loop,
+ struct rdns_resolver *resolver);
+
+/**
+ * Upstream error logic
+ * 1. During error time we count upstream_ok and upstream_fail
+ * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time
+ * 3. After dead time we mark upstream as alive and go to the step 1
+ * 4. If all upstreams are dead, marks every upstream as alive
+ */
+
+/**
+ * Add an error to an upstream
+ */
+void rspamd_upstream_fail(struct upstream *upstream, gboolean addr_failure, const gchar *reason);
+
+/**
+ * Increase upstream successes count
+ */
+void rspamd_upstream_ok(struct upstream *up);
+
+/**
+ * Set weight for an upstream
+ * @param up
+ */
+void rspamd_upstream_set_weight(struct upstream *up, guint weight);
+
+/**
+ * Create new list of upstreams
+ * @return
+ */
+struct upstream_list *rspamd_upstreams_create(struct upstream_ctx *ctx);
+
+/**
+ * Sets specific flag to the upstream list
+ * @param ups
+ * @param flags
+ */
+void rspamd_upstreams_set_flags(struct upstream_list *ups,
+ enum rspamd_upstream_flag flags);
+
+/**
+ * Sets custom limits for upstreams
+ * This function allocates memory from the upstreams ctx pool and should
+ * not be called in cycles/constantly as this memory is likely persistent
+ * @param ups
+ * @param revive_time
+ * @param revive_jitter
+ * @param error_time
+ * @param dns_timeout
+ * @param max_errors
+ * @param dns_retransmits
+ */
+void rspamd_upstreams_set_limits(struct upstream_list *ups,
+ gdouble revive_time,
+ gdouble revive_jitter,
+ gdouble error_time,
+ gdouble dns_timeout,
+ guint max_errors,
+ guint dns_retransmits);
+
+/**
+ * Sets rotation policy for upstreams list
+ * @param ups
+ * @param rot
+ */
+void rspamd_upstreams_set_rotation(struct upstream_list *ups,
+ enum rspamd_upstream_rotation rot);
+
+/**
+ * Destroy list of upstreams
+ * @param ups
+ */
+void rspamd_upstreams_destroy(struct upstream_list *ups);
+
+/**
+ * Returns count of upstreams in a list
+ * @param ups
+ * @return
+ */
+gsize rspamd_upstreams_count(struct upstream_list *ups);
+
+/**
+ * Returns the number of upstreams in the list
+ * @param ups
+ * @return
+ */
+gsize rspamd_upstreams_alive(struct upstream_list *ups);
+
+enum rspamd_upstream_parse_type {
+ RSPAMD_UPSTREAM_PARSE_DEFAULT = 0,
+ RSPAMD_UPSTREAM_PARSE_NAMESERVER,
+};
+
+/**
+ * Add upstream from the string
+ * @param ups upstream list
+ * @param str string in format "name[:port[:priority]]"
+ * @param def_port default port number
+ * @param data optional userdata
+ * @return TRUE if upstream has been added
+ */
+gboolean rspamd_upstreams_add_upstream(struct upstream_list *ups, const gchar *str,
+ guint16 def_port, enum rspamd_upstream_parse_type parse_type,
+ void *data);
+
+/**
+ * Add multiple upstreams from comma, semicolon or space separated line
+ * @param ups upstream list
+ * @param str string in format "(<ups>([<sep>+]<ups>)*)+"
+ * @param def_port default port number
+ * @param data optional userdata
+ * @return TRUE if **any** of upstreams has been added
+ */
+gboolean rspamd_upstreams_parse_line(struct upstream_list *ups,
+ const gchar *str, guint16 def_port, void *data);
+
+
+gboolean rspamd_upstreams_parse_line_len(struct upstream_list *ups,
+ const gchar *str, gsize len,
+ guint16 def_port,
+ void *data);
+
+/**
+ * Parse upstreams list from the UCL object
+ * @param ups
+ * @param in
+ * @param def_port
+ * @param data
+ * @return
+ */
+gboolean rspamd_upstreams_from_ucl(struct upstream_list *ups,
+ const ucl_object_t *in, guint16 def_port, void *data);
+
+
+typedef void (*rspamd_upstream_traverse_func)(struct upstream *up, guint idx,
+ void *ud);
+
+/**
+ * Traverse upstreams list calling the function specified
+ * @param ups
+ * @param cb
+ * @param ud
+ */
+void rspamd_upstreams_foreach(struct upstream_list *ups,
+ rspamd_upstream_traverse_func cb, void *ud);
+
+enum rspamd_upstreams_watch_event {
+ RSPAMD_UPSTREAM_WATCH_SUCCESS = 1u << 0,
+ RSPAMD_UPSTREAM_WATCH_FAILURE = 1u << 1,
+ RSPAMD_UPSTREAM_WATCH_OFFLINE = 1u << 2,
+ RSPAMD_UPSTREAM_WATCH_ONLINE = 1u << 3,
+ RSPAMD_UPSTREAM_WATCH_ALL = (1u << 0) | (1u << 1) | (1u << 2) | (1u << 3),
+};
+
+typedef void (*rspamd_upstream_watch_func)(struct upstream *up,
+ enum rspamd_upstreams_watch_event event,
+ guint cur_errors,
+ void *ud);
+
+/**
+ * Adds new watcher to the upstreams list
+ * @param ups
+ * @param events
+ * @param func
+ * @param ud
+ */
+void rspamd_upstreams_add_watch_callback(struct upstream_list *ups,
+ enum rspamd_upstreams_watch_event events,
+ rspamd_upstream_watch_func func,
+ GFreeFunc free_func,
+ gpointer ud);
+
+/**
+ * Returns the next IP address of the upstream (internal rotation)
+ * @param up
+ * @return
+ */
+rspamd_inet_addr_t *rspamd_upstream_addr_next(struct upstream *up);
+
+/**
+ * Returns the current IP address of the upstream
+ * @param up
+ * @return
+ */
+rspamd_inet_addr_t *rspamd_upstream_addr_cur(const struct upstream *up);
+
+/**
+ * Add custom address for an upstream (ownership of addr is transferred to upstream)
+ * @param up
+ * @return
+ */
+gboolean rspamd_upstream_add_addr(struct upstream *up,
+ rspamd_inet_addr_t *addr);
+
+/**
+ * Returns the symbolic name of the upstream
+ * @param up
+ * @return
+ */
+const gchar *rspamd_upstream_name(struct upstream *up);
+
+/**
+ * Returns the port of the current address for the upstream
+ * @param up
+ * @return
+ */
+gint rspamd_upstream_port(struct upstream *up);
+
+/**
+ * Sets opaque user data associated with this upstream
+ * @param up
+ * @param data
+ * @return old data
+ */
+gpointer rspamd_upstream_set_data(struct upstream *up, gpointer data);
+
+/**
+ * Gets opaque user data associated with this upstream
+ * @param up
+ * @return
+ */
+gpointer rspamd_upstream_get_data(struct upstream *up);
+
+/**
+ * Get new upstream from the list
+ * @param ups upstream list
+ * @param type type of rotation algorithm, for `RSPAMD_UPSTREAM_HASHED` it is required to specify `key` and `keylen` as arguments
+ * @return
+ */
+struct upstream *rspamd_upstream_get(struct upstream_list *ups,
+ enum rspamd_upstream_rotation default_type,
+ const guchar *key, gsize keylen);
+
+/**
+ * Get new upstream from the list
+ * @param ups upstream list
+ * @param type type of rotation algorithm, for `RSPAMD_UPSTREAM_HASHED` it is required to specify `key` and `keylen` as arguments
+ * @return
+ */
+struct upstream *rspamd_upstream_get_forced(struct upstream_list *ups,
+ enum rspamd_upstream_rotation forced_type,
+ const guchar *key, gsize keylen);
+
+/**
+ * Get new upstream from the list excepting the upstream specified
+ * @param ups upstream list
+ * @param type type of rotation algorithm, for `RSPAMD_UPSTREAM_HASHED` it is required to specify `key` and `keylen` as arguments
+ * @return
+ */
+struct upstream *rspamd_upstream_get_except(struct upstream_list *ups,
+ struct upstream *except,
+ enum rspamd_upstream_rotation default_type,
+ const guchar *key, gsize keylen);
+
+/**
+ * Re-resolve addresses for all upstreams registered
+ */
+void rspamd_upstream_reresolve(struct upstream_ctx *ctx);
+
+/**
+ * Share ownership on upstream
+ * @param up
+ * @return
+ */
+struct upstream *rspamd_upstream_ref(struct upstream *up);
+/**
+ * Unshare ownership on upstream
+ * @param up
+ */
+void rspamd_upstream_unref(struct upstream *up);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* UPSTREAM_H */
diff --git a/src/libutil/uthash_strcase.h b/src/libutil/uthash_strcase.h
new file mode 100644
index 0000000..86075ee
--- /dev/null
+++ b/src/libutil/uthash_strcase.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef UTHASH_STRCASE_H_
+#define UTHASH_STRCASE_H_
+
+#ifdef UTHASH_H
+#error Invalid include order: uthash is already included
+#endif
+
+#include "libcryptobox/cryptobox.h"
+#include "libutil/util.h"
+
+/* Utils for uthash tuning */
+#ifndef HASH_CASELESS
+#define HASH_FUNCTION(key, keylen, num_bkts, hashv, bkt) \
+ do { \
+ hashv = (__typeof(hashv)) rspamd_cryptobox_fast_hash(key, keylen, rspamd_hash_seed()); \
+ bkt = (hashv) & (num_bkts - 1); \
+ } while (0)
+
+#define HASH_KEYCMP(a, b, len) memcmp(a, b, len)
+#else
+#define HASH_FUNCTION(key, keylen, num_bkts, hashv, bkt) \
+ do { \
+ unsigned _len = keylen; \
+ rspamd_cryptobox_fast_hash_state_t _hst; \
+ unsigned _leftover = keylen % 8; \
+ unsigned _fp, _i; \
+ const uint8_t *_s = (const uint8_t *) (key); \
+ union { \
+ struct { \
+ unsigned char c1, c2, c3, c4, c5, c6, c7, c8; \
+ } c; \
+ uint64_t pp; \
+ } _u; \
+ _fp = _len - _leftover; \
+ rspamd_cryptobox_fast_hash_init(&_hst, rspamd_hash_seed()); \
+ for (_i = 0; _i != _fp; _i += 8) { \
+ _u.c.c1 = _s[_i], _u.c.c2 = _s[_i + 1], _u.c.c3 = _s[_i + 2], _u.c.c4 = _s[_i + 3]; \
+ _u.c.c5 = _s[_i + 4], _u.c.c6 = _s[_i + 5], _u.c.c7 = _s[_i + 6], _u.c.c8 = _s[_i + 7]; \
+ _u.c.c1 = lc_map[_u.c.c1]; \
+ _u.c.c2 = lc_map[_u.c.c2]; \
+ _u.c.c3 = lc_map[_u.c.c3]; \
+ _u.c.c4 = lc_map[_u.c.c4]; \
+ _u.c.c1 = lc_map[_u.c.c5]; \
+ _u.c.c2 = lc_map[_u.c.c6]; \
+ _u.c.c3 = lc_map[_u.c.c7]; \
+ _u.c.c4 = lc_map[_u.c.c8]; \
+ rspamd_cryptobox_fast_hash_update(&_hst, &_u, sizeof(_u)); \
+ } \
+ _u.pp = 0; \
+ switch (_leftover) { \
+ case 7: \
+ /* fallthrough */ _u.c.c7 = lc_map[(unsigned char) _s[_i++]]; \
+ case 6: \
+ /* fallthrough */ _u.c.c6 = lc_map[(unsigned char) _s[_i++]]; \
+ case 5: \
+ /* fallthrough */ _u.c.c5 = lc_map[(unsigned char) _s[_i++]]; \
+ case 4: \
+ /* fallthrough */ _u.c.c4 = lc_map[(unsigned char) _s[_i++]]; \
+ case 3: \
+ /* fallthrough */ _u.c.c3 = lc_map[(unsigned char) _s[_i++]]; \
+ case 2: \
+ /* fallthrough */ _u.c.c2 = lc_map[(unsigned char) _s[_i++]]; \
+ case 1: \
+ /* fallthrough */ _u.c.c1 = lc_map[(unsigned char) _s[_i]]; \
+ rspamd_cryptobox_fast_hash_update(&_hst, &_u, sizeof(_u)); \
+ break; \
+ } \
+ hashv = (__typeof(hashv)) rspamd_cryptobox_fast_hash_final(&_hst); \
+ bkt = (hashv) & (num_bkts - 1); \
+ } while (0)
+#define HASH_KEYCMP(a, b, len) rspamd_lc_cmp(a, b, len)
+#endif
+
+#include "uthash.h"
+
+#endif /* UTHASH_STRCASE_H_ */
diff --git a/src/libutil/util.c b/src/libutil/util.c
new file mode 100644
index 0000000..04200e3
--- /dev/null
+++ b/src/libutil/util.c
@@ -0,0 +1,2746 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "util.h"
+#include "unix-std.h"
+
+#include "ottery.h"
+#include "cryptobox.h"
+#include "contrib/libev/ev.h"
+
+#ifdef HAVE_TERMIOS_H
+#include <termios.h>
+#endif
+#ifdef HAVE_READPASSPHRASE_H
+#include <readpassphrase.h>
+#endif
+/* libutil */
+#ifdef HAVE_LIBUTIL_H
+#include <libutil.h>
+#endif
+#ifdef __APPLE__
+#include <mach/mach_time.h>
+#include <mach/mach_init.h>
+#include <mach/thread_act.h>
+#include <mach/mach_port.h>
+#endif
+/* poll */
+#ifdef HAVE_POLL_H
+#include <poll.h>
+#endif
+
+#ifdef HAVE_SIGINFO_H
+#include <siginfo.h>
+#endif
+/* sys/wait */
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif
+/* sys/resource.h */
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#ifdef HAVE_RDTSC
+#ifdef __x86_64__
+#include <x86intrin.h>
+#endif
+#endif
+
+#include <math.h> /* for pow */
+#include <glob.h> /* in fact, we require this file ultimately */
+
+#include "zlib.h"
+#include "contrib/uthash/utlist.h"
+#include "blas-config.h"
+
+/* Check log messages intensity once per minute */
+#define CHECK_TIME 60
+/* More than 2 log messages per second */
+#define BUF_INTENSITY 2
+/* Default connect timeout for sync sockets */
+#define CONNECT_TIMEOUT 3
+
+/*
+ * Should be defined in a single point
+ */
+const struct rspamd_controller_pbkdf pbkdf_list[] = {
+ {.name = "PBKDF2-blake2b",
+ .alias = "pbkdf2",
+ .description = "standard CPU intensive \"slow\" KDF using blake2b hash function",
+ .type = RSPAMD_CRYPTOBOX_PBKDF2,
+ .id = RSPAMD_PBKDF_ID_V1,
+ .complexity = 16000,
+ .salt_len = 20,
+ .key_len = rspamd_cryptobox_HASHBYTES / 2},
+ {.name = "Catena-Butterfly",
+ .alias = "catena",
+ .description = "modern CPU and memory intensive KDF",
+ .type = RSPAMD_CRYPTOBOX_CATENA,
+ .id = RSPAMD_PBKDF_ID_V2,
+ .complexity = 10,
+ .salt_len = 20,
+ .key_len = rspamd_cryptobox_HASHBYTES / 2}};
+
+gint rspamd_socket_nonblocking(gint fd)
+{
+ gint ofl;
+
+ ofl = fcntl(fd, F_GETFL, 0);
+
+ if (fcntl(fd, F_SETFL, ofl | O_NONBLOCK) == -1) {
+ return -1;
+ }
+ return 0;
+}
+
+gint rspamd_socket_blocking(gint fd)
+{
+ gint ofl;
+
+ ofl = fcntl(fd, F_GETFL, 0);
+
+ if (fcntl(fd, F_SETFL, ofl & (~O_NONBLOCK)) == -1) {
+ return -1;
+ }
+ return 0;
+}
+
+gint rspamd_socket_poll(gint fd, gint timeout, short events)
+{
+ gint r;
+ struct pollfd fds[1];
+
+ fds->fd = fd;
+ fds->events = events;
+ fds->revents = 0;
+ while ((r = poll(fds, 1, timeout)) < 0) {
+ if (errno != EINTR) {
+ break;
+ }
+ }
+
+ return r;
+}
+
+gint rspamd_socket_create(gint af, gint type, gint protocol, gboolean async)
+{
+ gint fd;
+
+ fd = socket(af, type, protocol);
+ if (fd == -1) {
+ return -1;
+ }
+
+ /* Set close on exec */
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
+ close(fd);
+ return -1;
+ }
+ if (async) {
+ if (rspamd_socket_nonblocking(fd) == -1) {
+ close(fd);
+ return -1;
+ }
+ }
+
+ return fd;
+}
+
+static gint
+rspamd_inet_socket_create(gint type, struct addrinfo *addr, gboolean is_server,
+ gboolean async, GList **list)
+{
+ gint fd = -1, r, on = 1, s_error;
+ struct addrinfo *cur;
+ gpointer ptr;
+ socklen_t optlen;
+
+ cur = addr;
+ while (cur) {
+ /* Create socket */
+ fd = rspamd_socket_create(cur->ai_family, type, cur->ai_protocol, TRUE);
+ if (fd == -1) {
+ goto out;
+ }
+
+ if (is_server) {
+ (void) setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *) &on,
+ sizeof(gint));
+#ifdef HAVE_IPV6_V6ONLY
+ if (cur->ai_family == AF_INET6) {
+ setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *) &on,
+ sizeof(gint));
+ }
+#endif
+ r = bind(fd, cur->ai_addr, cur->ai_addrlen);
+ }
+ else {
+ r = connect(fd, cur->ai_addr, cur->ai_addrlen);
+ }
+
+ if (r == -1) {
+ if (errno != EINPROGRESS) {
+ goto out;
+ }
+ if (!async) {
+ /* Try to poll */
+ if (rspamd_socket_poll(fd, CONNECT_TIMEOUT * 1000,
+ POLLOUT) <= 0) {
+ errno = ETIMEDOUT;
+ goto out;
+ }
+ else {
+ /* Make synced again */
+ if (rspamd_socket_blocking(fd) < 0) {
+ goto out;
+ }
+ }
+ }
+ }
+ else {
+ /* Still need to check SO_ERROR on socket */
+ optlen = sizeof(s_error);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *) &s_error, &optlen) != -1) {
+ if (s_error) {
+ errno = s_error;
+ goto out;
+ }
+ }
+ }
+ if (list == NULL) {
+ /* Go out immediately */
+ break;
+ }
+ else if (fd != -1) {
+ ptr = GINT_TO_POINTER(fd);
+ *list = g_list_prepend(*list, ptr);
+ cur = cur->ai_next;
+ continue;
+ }
+ out:
+ if (fd != -1) {
+ close(fd);
+ }
+ fd = -1;
+ cur = cur->ai_next;
+ }
+
+ return (fd);
+}
+
+gint rspamd_socket_tcp(struct addrinfo *addr, gboolean is_server, gboolean async)
+{
+ return rspamd_inet_socket_create(SOCK_STREAM, addr, is_server, async, NULL);
+}
+
+gint rspamd_socket_udp(struct addrinfo *addr, gboolean is_server, gboolean async)
+{
+ return rspamd_inet_socket_create(SOCK_DGRAM, addr, is_server, async, NULL);
+}
+
+gint rspamd_socket_unix(const gchar *path,
+ struct sockaddr_un *addr,
+ gint type,
+ gboolean is_server,
+ gboolean async)
+{
+
+ socklen_t optlen;
+ gint fd = -1, s_error, r, serrno, on = 1;
+ struct stat st;
+
+ if (path == NULL)
+ return -1;
+
+ addr->sun_family = AF_UNIX;
+
+ rspamd_strlcpy(addr->sun_path, path, sizeof(addr->sun_path));
+#ifdef FREEBSD
+ addr->sun_len = SUN_LEN(addr);
+#endif
+
+ if (is_server) {
+ /* Unlink socket if it exists already */
+ if (lstat(addr->sun_path, &st) != -1) {
+ if (S_ISSOCK(st.st_mode)) {
+ if (unlink(addr->sun_path) == -1) {
+ goto out;
+ }
+ }
+ else {
+ goto out;
+ }
+ }
+ }
+ fd = socket(PF_LOCAL, type, 0);
+
+ if (fd == -1) {
+ return -1;
+ }
+
+ if (rspamd_socket_nonblocking(fd) < 0) {
+ goto out;
+ }
+
+ /* Set close on exec */
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
+ goto out;
+ }
+ if (is_server) {
+ (void) setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *) &on,
+ sizeof(gint));
+ r = bind(fd, (struct sockaddr *) addr, SUN_LEN(addr));
+ }
+ else {
+ r = connect(fd, (struct sockaddr *) addr, SUN_LEN(addr));
+ }
+
+ if (r == -1) {
+ if (errno != EINPROGRESS) {
+ goto out;
+ }
+ if (!async) {
+ /* Try to poll */
+ if (rspamd_socket_poll(fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) {
+ errno = ETIMEDOUT;
+ goto out;
+ }
+ else {
+ /* Make synced again */
+ if (rspamd_socket_blocking(fd) < 0) {
+ goto out;
+ }
+ }
+ }
+ }
+ else {
+ /* Still need to check SO_ERROR on socket */
+ optlen = sizeof(s_error);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *) &s_error, &optlen) != -1) {
+ if (s_error) {
+ errno = s_error;
+ goto out;
+ }
+ }
+ }
+
+
+ return (fd);
+
+out:
+ serrno = errno;
+ if (fd != -1) {
+ close(fd);
+ }
+ errno = serrno;
+ return (-1);
+}
+
+static int
+rspamd_prefer_v4_hack(const struct addrinfo *a1, const struct addrinfo *a2)
+{
+ return a1->ai_addr->sa_family - a2->ai_addr->sa_family;
+}
+
+/**
+ * Make a universal socket
+ * @param credits host, ip or path to unix socket
+ * @param port port (used for network sockets)
+ * @param async make this socket async
+ * @param is_server make this socket as server socket
+ * @param try_resolve try name resolution for a socket (BLOCKING)
+ */
+gint rspamd_socket(const gchar *credits, guint16 port,
+ gint type, gboolean async, gboolean is_server, gboolean try_resolve)
+{
+ struct sockaddr_un un;
+ struct stat st;
+ struct addrinfo hints, *res;
+ gint r;
+ gchar portbuf[8];
+
+ if (*credits == '/') {
+ if (is_server) {
+ return rspamd_socket_unix(credits, &un, type, is_server, async);
+ }
+ else {
+ r = stat(credits, &st);
+ if (r == -1) {
+ /* Unix socket doesn't exists it must be created first */
+ errno = ENOENT;
+ return -1;
+ }
+ else {
+ if ((st.st_mode & S_IFSOCK) == 0) {
+ /* Path is not valid socket */
+ errno = EINVAL;
+ return -1;
+ }
+ else {
+ return rspamd_socket_unix(credits,
+ &un,
+ type,
+ is_server,
+ async);
+ }
+ }
+ }
+ }
+ else {
+ /* TCP related part */
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */
+ hints.ai_socktype = type; /* Type of the socket */
+ hints.ai_flags = is_server ? AI_PASSIVE : 0;
+ hints.ai_protocol = 0; /* Any protocol */
+ hints.ai_canonname = NULL;
+ hints.ai_addr = NULL;
+ hints.ai_next = NULL;
+
+ if (!try_resolve) {
+ hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV;
+ }
+
+ rspamd_snprintf(portbuf, sizeof(portbuf), "%d", (int) port);
+ if ((r = getaddrinfo(credits, portbuf, &hints, &res)) == 0) {
+ LL_SORT2(res, rspamd_prefer_v4_hack, ai_next);
+ r = rspamd_inet_socket_create(type, res, is_server, async, NULL);
+ freeaddrinfo(res);
+ return r;
+ }
+ else {
+ return -1;
+ }
+ }
+}
+
+gboolean
+rspamd_socketpair(gint pair[2], gint af)
+{
+ gint r = -1, serrno;
+
+#ifdef HAVE_SOCK_SEQPACKET
+ if (af == SOCK_SEQPACKET) {
+ r = socketpair(AF_LOCAL, SOCK_SEQPACKET, 0, pair);
+
+ if (r == -1) {
+ r = socketpair(AF_LOCAL, SOCK_DGRAM, 0, pair);
+ }
+ }
+#endif
+ if (r == -1) {
+ r = socketpair(AF_LOCAL, af, 0, pair);
+ }
+
+ if (r == -1) {
+ return -1;
+ }
+
+ /* Set close on exec */
+ if (fcntl(pair[0], F_SETFD, FD_CLOEXEC) == -1) {
+ goto out;
+ }
+ if (fcntl(pair[1], F_SETFD, FD_CLOEXEC) == -1) {
+ goto out;
+ }
+
+ return TRUE;
+
+out:
+ serrno = errno;
+ close(pair[0]);
+ close(pair[1]);
+ errno = serrno;
+
+ return FALSE;
+}
+
+#ifdef HAVE_SA_SIGINFO
+void rspamd_signals_init(struct sigaction *signals, void (*sig_handler)(gint,
+ siginfo_t *,
+ void *))
+#else
+void rspamd_signals_init(struct sigaction *signals, void (*sig_handler)(gint))
+#endif
+{
+ struct sigaction sigpipe_act;
+ /* Setting up signal handlers */
+ /* SIGUSR1 - reopen config file */
+ /* SIGUSR2 - worker is ready for accept */
+ sigemptyset(&signals->sa_mask);
+ sigaddset(&signals->sa_mask, SIGTERM);
+ sigaddset(&signals->sa_mask, SIGINT);
+ sigaddset(&signals->sa_mask, SIGHUP);
+ sigaddset(&signals->sa_mask, SIGCHLD);
+ sigaddset(&signals->sa_mask, SIGUSR1);
+ sigaddset(&signals->sa_mask, SIGUSR2);
+ sigaddset(&signals->sa_mask, SIGALRM);
+#ifdef SIGPOLL
+ sigaddset(&signals->sa_mask, SIGPOLL);
+#endif
+#ifdef SIGIO
+ sigaddset(&signals->sa_mask, SIGIO);
+#endif
+
+#ifdef HAVE_SA_SIGINFO
+ signals->sa_flags = SA_SIGINFO;
+ signals->sa_handler = NULL;
+ signals->sa_sigaction = sig_handler;
+#else
+ signals->sa_handler = sig_handler;
+ signals->sa_flags = 0;
+#endif
+ sigaction(SIGTERM, signals, NULL);
+ sigaction(SIGINT, signals, NULL);
+ sigaction(SIGHUP, signals, NULL);
+ sigaction(SIGCHLD, signals, NULL);
+ sigaction(SIGUSR1, signals, NULL);
+ sigaction(SIGUSR2, signals, NULL);
+ sigaction(SIGALRM, signals, NULL);
+#ifdef SIGPOLL
+ sigaction(SIGPOLL, signals, NULL);
+#endif
+#ifdef SIGIO
+ sigaction(SIGIO, signals, NULL);
+#endif
+
+ /* Ignore SIGPIPE as we handle write errors manually */
+ sigemptyset(&sigpipe_act.sa_mask);
+ sigaddset(&sigpipe_act.sa_mask, SIGPIPE);
+ sigpipe_act.sa_handler = SIG_IGN;
+ sigpipe_act.sa_flags = 0;
+ sigaction(SIGPIPE, &sigpipe_act, NULL);
+}
+
+#ifndef HAVE_SETPROCTITLE
+
+#ifdef LINUX
+static gchar *title_buffer = NULL;
+static size_t title_buffer_size = 0;
+static gchar *title_progname, *title_progname_full;
+gchar **old_environ = NULL;
+
+static void
+rspamd_title_dtor(gpointer d)
+{
+ /* Restore old environment */
+ if (old_environ != NULL) {
+ environ = old_environ;
+ }
+
+ gchar **env = (gchar **) d;
+ guint i;
+
+ for (i = 0; env[i] != NULL; i++) {
+ g_free(env[i]);
+ }
+
+ g_free(env);
+}
+#endif /* ifdef LINUX */
+
+#endif /* ifndef HAVE_SETPROCTITLE */
+
+gint rspamd_init_title(rspamd_mempool_t *pool,
+ gint argc, gchar *argv[], gchar *envp[])
+{
+#if defined(LINUX) && !defined(HAVE_SETPROCTITLE)
+ gchar *begin_of_buffer = 0, *end_of_buffer = 0;
+ gint i;
+
+ for (i = 0; i < argc; ++i) {
+ if (!begin_of_buffer) {
+ begin_of_buffer = argv[i];
+ }
+ if (!end_of_buffer || end_of_buffer + 1 == argv[i]) {
+ end_of_buffer = argv[i] + strlen(argv[i]);
+ }
+ }
+
+ for (i = 0; envp[i]; ++i) {
+ if (!begin_of_buffer) {
+ begin_of_buffer = envp[i];
+ }
+ if (!end_of_buffer || end_of_buffer + 1 == envp[i]) {
+ end_of_buffer = envp[i] + strlen(envp[i]);
+ }
+ }
+
+ if (!end_of_buffer) {
+ return 0;
+ }
+
+ gchar **new_environ = g_malloc((i + 1) * sizeof(envp[0]));
+
+ for (i = 0; envp[i]; ++i) {
+ new_environ[i] = g_strdup(envp[i]);
+ }
+
+ new_environ[i] = NULL;
+
+ if (program_invocation_name) {
+ title_progname_full = g_strdup(program_invocation_name);
+
+ gchar *p = strrchr(title_progname_full, '/');
+
+ if (p) {
+ title_progname = p + 1;
+ }
+ else {
+ title_progname = title_progname_full;
+ }
+
+ program_invocation_name = title_progname_full;
+ program_invocation_short_name = title_progname;
+ }
+
+ old_environ = environ;
+ environ = new_environ;
+ title_buffer = begin_of_buffer;
+ title_buffer_size = end_of_buffer - begin_of_buffer;
+
+ rspamd_mempool_add_destructor(pool,
+ rspamd_title_dtor,
+ new_environ);
+#endif
+
+ return 0;
+}
+
+gint rspamd_setproctitle(const gchar *fmt, ...)
+{
+#ifdef HAVE_SETPROCTITLE
+ if (fmt) {
+ static char titlebuf[4096];
+ va_list ap;
+
+ va_start(ap, fmt);
+ rspamd_vsnprintf(titlebuf, sizeof(titlebuf), fmt, ap);
+ va_end(ap);
+
+ setproctitle("%s", titlebuf);
+ }
+#else
+#if defined(LINUX)
+ if (!title_buffer || !title_buffer_size) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ memset(title_buffer, '\0', title_buffer_size);
+
+ ssize_t written;
+
+ if (fmt) {
+ va_list ap;
+
+ written = rspamd_snprintf(title_buffer,
+ title_buffer_size,
+ "%s: ",
+ title_progname);
+ if (written < 0 || (size_t) written >= title_buffer_size)
+ return -1;
+
+ va_start(ap, fmt);
+ rspamd_vsnprintf(title_buffer + written,
+ title_buffer_size - written,
+ fmt,
+ ap);
+ va_end(ap);
+ }
+ else {
+ written = rspamd_snprintf(title_buffer,
+ title_buffer_size,
+ "%s",
+ title_progname);
+ if (written < 0 || (size_t) written >= title_buffer_size)
+ return -1;
+ }
+
+ written = strlen(title_buffer);
+ memset(title_buffer + written, '\0', title_buffer_size - written);
+#elif defined(__APPLE__)
+ /* OSX is broken, ignore this brain damaged system */
+#else
+ /* Last resort (usually broken, but eh...) */
+ GString *dest;
+ va_list ap;
+
+ dest = g_string_new("");
+ va_start(ap, fmt);
+ rspamd_vprintf_gstring(dest, fmt, ap);
+ va_end(ap);
+
+ g_set_prgname(dest->str);
+ g_string_free(dest, TRUE);
+
+#endif /* defined(LINUX) */
+
+#endif /* HAVE_SETPROCTITLE */
+ return 0;
+}
+
+
+#ifndef HAVE_PIDFILE
+static gint _rspamd_pidfile_remove(rspamd_pidfh_t *pfh, gint freeit);
+
+static gint
+rspamd_pidfile_verify(rspamd_pidfh_t *pfh)
+{
+ struct stat sb;
+
+ if (pfh == NULL || pfh->pf_fd == -1)
+ return (-1);
+ /*
+ * Check remembered descriptor.
+ */
+ if (fstat(pfh->pf_fd, &sb) == -1)
+ return (errno);
+ if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino)
+ return -1;
+ return 0;
+}
+
+static gint
+rspamd_pidfile_read(const gchar *path, pid_t *pidptr)
+{
+ gchar buf[16], *endptr;
+ gint error, fd, i;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return (errno);
+
+ i = read(fd, buf, sizeof(buf) - 1);
+ error = errno; /* Remember errno in case close() wants to change it. */
+ close(fd);
+ if (i == -1)
+ return error;
+ else if (i == 0)
+ return EAGAIN;
+ buf[i] = '\0';
+
+ *pidptr = strtol(buf, &endptr, 10);
+ if (endptr != &buf[i])
+ return EINVAL;
+
+ return 0;
+}
+
+rspamd_pidfh_t *
+rspamd_pidfile_open(const gchar *path, mode_t mode, pid_t *pidptr)
+{
+ rspamd_pidfh_t *pfh;
+ struct stat sb;
+ gint error, fd, len, count;
+ struct timespec rqtp;
+
+ pfh = g_malloc(sizeof(*pfh));
+ if (pfh == NULL)
+ return NULL;
+
+ if (path == NULL)
+ len = snprintf(pfh->pf_path,
+ sizeof(pfh->pf_path),
+ "/var/run/%s.pid",
+ g_get_prgname());
+ else
+ len = snprintf(pfh->pf_path, sizeof(pfh->pf_path), "%s", path);
+ if (len >= (gint) sizeof(pfh->pf_path)) {
+ g_free(pfh);
+ errno = ENAMETOOLONG;
+ return NULL;
+ }
+
+ /*
+ * Open the PID file and obtain exclusive lock.
+ * We truncate PID file here only to remove old PID immediately,
+ * PID file will be truncated again in pidfile_write(), so
+ * pidfile_write() can be called multiple times.
+ */
+ fd = open(pfh->pf_path, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode);
+ rspamd_file_lock(fd, TRUE);
+ if (fd == -1) {
+ count = 0;
+ rqtp.tv_sec = 0;
+ rqtp.tv_nsec = 5000000;
+ if (errno == EWOULDBLOCK && pidptr != NULL) {
+ again:
+ errno = rspamd_pidfile_read(pfh->pf_path, pidptr);
+ if (errno == 0)
+ errno = EEXIST;
+ else if (errno == EAGAIN) {
+ if (++count <= 3) {
+ nanosleep(&rqtp, 0);
+ goto again;
+ }
+ }
+ }
+ g_free(pfh);
+ return NULL;
+ }
+ /*
+ * Remember file information, so in pidfile_write() we are sure we write
+ * to the proper descriptor.
+ */
+ if (fstat(fd, &sb) == -1) {
+ error = errno;
+ unlink(pfh->pf_path);
+ close(fd);
+ g_free(pfh);
+ errno = error;
+ return NULL;
+ }
+
+ pfh->pf_fd = fd;
+ pfh->pf_dev = sb.st_dev;
+ pfh->pf_ino = sb.st_ino;
+
+ return pfh;
+}
+
+gint rspamd_pidfile_write(rspamd_pidfh_t *pfh)
+{
+ gchar pidstr[16];
+ gint error, fd;
+
+ /*
+ * Check remembered descriptor, so we don't overwrite some other
+ * file if pidfile was closed and descriptor reused.
+ */
+ errno = rspamd_pidfile_verify(pfh);
+ if (errno != 0) {
+ /*
+ * Don't close descriptor, because we are not sure if it's ours.
+ */
+ return -1;
+ }
+ fd = pfh->pf_fd;
+
+ /*
+ * Truncate PID file, so multiple calls of pidfile_write() are allowed.
+ */
+ if (ftruncate(fd, 0) == -1) {
+ error = errno;
+ _rspamd_pidfile_remove(pfh, 0);
+ errno = error;
+ return -1;
+ }
+
+ rspamd_snprintf(pidstr, sizeof(pidstr), "%P", getpid());
+ if (pwrite(fd, pidstr, strlen(pidstr), 0) != (ssize_t) strlen(pidstr)) {
+ error = errno;
+ _rspamd_pidfile_remove(pfh, 0);
+ errno = error;
+ return -1;
+ }
+
+ return 0;
+}
+
+gint rspamd_pidfile_close(rspamd_pidfh_t *pfh)
+{
+ gint error;
+
+ error = rspamd_pidfile_verify(pfh);
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+
+ if (close(pfh->pf_fd) == -1)
+ error = errno;
+ g_free(pfh);
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+ return 0;
+}
+
+static gint
+_rspamd_pidfile_remove(rspamd_pidfh_t *pfh, gint freeit)
+{
+ gint error;
+
+ error = rspamd_pidfile_verify(pfh);
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+
+ if (unlink(pfh->pf_path) == -1)
+ error = errno;
+ if (!rspamd_file_unlock(pfh->pf_fd, FALSE)) {
+ if (error == 0)
+ error = errno;
+ }
+ if (close(pfh->pf_fd) == -1) {
+ if (error == 0)
+ error = errno;
+ }
+ if (freeit)
+ g_free(pfh);
+ else
+ pfh->pf_fd = -1;
+ if (error != 0) {
+ errno = error;
+ return -1;
+ }
+ return 0;
+}
+
+gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh)
+{
+
+ return (_rspamd_pidfile_remove(pfh, 1));
+}
+#endif
+
+/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */
+gchar *
+resolve_stat_filename(rspamd_mempool_t *pool,
+ gchar *pattern,
+ gchar *rcpt,
+ gchar *from)
+{
+ gint need_to_format = 0, len = 0;
+ gint rcptlen, fromlen;
+ gchar *c = pattern, *new, *s;
+
+ if (rcpt) {
+ rcptlen = strlen(rcpt);
+ }
+ else {
+ rcptlen = 0;
+ }
+
+ if (from) {
+ fromlen = strlen(from);
+ }
+ else {
+ fromlen = 0;
+ }
+
+ /* Calculate length */
+ while (*c++) {
+ if (*c == '%' && *(c + 1) == 'r') {
+ len += rcptlen;
+ c += 2;
+ need_to_format = 1;
+ continue;
+ }
+ else if (*c == '%' && *(c + 1) == 'f') {
+ len += fromlen;
+ c += 2;
+ need_to_format = 1;
+ continue;
+ }
+ len++;
+ }
+
+ /* Do not allocate extra memory if we do not need to format string */
+ if (!need_to_format) {
+ return pattern;
+ }
+
+ /* Allocate new string */
+ new = rspamd_mempool_alloc(pool, len);
+ c = pattern;
+ s = new;
+
+ /* Format string */
+ while (*c++) {
+ if (*c == '%' && *(c + 1) == 'r') {
+ c += 2;
+ memcpy(s, rcpt, rcptlen);
+ s += rcptlen;
+ continue;
+ }
+ *s++ = *c;
+ }
+
+ *s = '\0';
+
+ return new;
+}
+
+const gchar *
+rspamd_log_check_time(gdouble start, gdouble end, gint resolution)
+{
+ gdouble diff;
+ static gchar res[64];
+ gchar fmt[32];
+
+ diff = (end - start) * 1000.0;
+
+ rspamd_snprintf(fmt, sizeof(fmt), "%%.%dfms", resolution);
+ rspamd_snprintf(res, sizeof(res), fmt, diff);
+
+ return (const gchar *) res;
+}
+
+
+#ifdef HAVE_FLOCK
+/* Flock version */
+gboolean
+rspamd_file_lock(gint fd, gboolean async)
+{
+ gint flags;
+
+ if (async) {
+ flags = LOCK_EX | LOCK_NB;
+ }
+ else {
+ flags = LOCK_EX;
+ }
+
+ if (flock(fd, flags) == -1) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_file_unlock(gint fd, gboolean async)
+{
+ gint flags;
+
+ if (async) {
+ flags = LOCK_UN | LOCK_NB;
+ }
+ else {
+ flags = LOCK_UN;
+ }
+
+ if (flock(fd, flags) == -1) {
+ if (async && errno == EAGAIN) {
+ return FALSE;
+ }
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+#else /* HAVE_FLOCK */
+/* Fctnl version */
+gboolean
+rspamd_file_lock(gint fd, gboolean async)
+{
+ struct flock fl = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0};
+
+ if (fcntl(fd, async ? F_SETLK : F_SETLKW, &fl) == -1) {
+ if (async && (errno == EAGAIN || errno == EACCES)) {
+ return FALSE;
+ }
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_file_unlock(gint fd, gboolean async)
+{
+ struct flock fl = {
+ .l_type = F_UNLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = 0};
+
+ if (fcntl(fd, async ? F_SETLK : F_SETLKW, &fl) == -1) {
+ if (async && (errno == EAGAIN || errno == EACCES)) {
+ return FALSE;
+ }
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+#endif /* HAVE_FLOCK */
+
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22))
+void g_ptr_array_unref(GPtrArray *array)
+{
+ g_ptr_array_free(array, TRUE);
+}
+gboolean
+g_int64_equal(gconstpointer v1, gconstpointer v2)
+{
+ return *((const gint64 *) v1) == *((const gint64 *) v2);
+}
+guint g_int64_hash(gconstpointer v)
+{
+ guint64 v64 = *(guint64 *) v;
+
+ return (guint) (v ^ (v >> 32));
+}
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14))
+void g_queue_clear(GQueue *queue)
+{
+ g_return_if_fail(queue != NULL);
+
+ g_list_free(queue->head);
+ queue->head = queue->tail = NULL;
+ queue->length = 0;
+}
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 30))
+GPtrArray *
+g_ptr_array_new_full(guint reserved_size,
+ GDestroyNotify element_free_func)
+{
+ GPtrArray *array;
+
+ array = g_ptr_array_sized_new(reserved_size);
+ g_ptr_array_set_free_func(array, element_free_func);
+
+ return array;
+}
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32))
+void g_queue_free_full(GQueue *queue, GDestroyNotify free_func)
+{
+ GList *cur;
+
+ cur = queue->head;
+
+ while (cur) {
+ free_func(cur->data);
+ cur = g_list_next(cur);
+ }
+
+ g_queue_free(queue);
+}
+#endif
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 40))
+void g_ptr_array_insert(GPtrArray *array, gint index_, gpointer data)
+{
+ g_return_if_fail(array);
+ g_return_if_fail(index_ >= -1);
+ g_return_if_fail(index_ <= (gint) array->len);
+
+ g_ptr_array_set_size(array, array->len + 1);
+
+ if (index_ < 0) {
+ index_ = array->len;
+ }
+
+ if (index_ < array->len) {
+ memmove(&(array->pdata[index_ + 1]), &(array->pdata[index_]),
+ (array->len - index_) * sizeof(gpointer));
+ }
+
+ array->pdata[index_] = data;
+}
+#endif
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32))
+const gchar *
+g_environ_getenv(gchar **envp, const gchar *variable)
+{
+ gsize len;
+ gint i;
+
+ if (envp == NULL) {
+ return NULL;
+ }
+
+ len = strlen(variable);
+
+ for (i = 0; envp[i]; i++) {
+ if (strncmp(envp[i], variable, len) == 0 && envp[i][len] == '=') {
+ return envp[i] + len + 1;
+ }
+ }
+
+ return NULL;
+}
+#endif
+
+gint rspamd_fallocate(gint fd, off_t offset, off_t len)
+{
+#if defined(HAVE_FALLOCATE)
+ return fallocate(fd, 0, offset, len);
+#elif defined(HAVE_POSIX_FALLOCATE)
+ return posix_fallocate(fd, offset, len);
+#else
+ /* Return 0 as nothing can be done on this system */
+ return 0;
+#endif
+}
+
+
+/**
+ * Create new mutex
+ * @return mutex or NULL
+ */
+inline rspamd_mutex_t *
+rspamd_mutex_new(void)
+{
+ rspamd_mutex_t *new;
+
+ new = g_malloc0(sizeof(rspamd_mutex_t));
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_init(&new->mtx);
+#else
+ g_static_mutex_init(&new->mtx);
+#endif
+
+ return new;
+}
+
+/**
+ * Lock mutex
+ * @param mtx
+ */
+inline void
+rspamd_mutex_lock(rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_lock(&mtx->mtx);
+#else
+ g_static_mutex_lock(&mtx->mtx);
+#endif
+}
+
+/**
+ * Unlock mutex
+ * @param mtx
+ */
+inline void
+rspamd_mutex_unlock(rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_unlock(&mtx->mtx);
+#else
+ g_static_mutex_unlock(&mtx->mtx);
+#endif
+}
+
+void rspamd_mutex_free(rspamd_mutex_t *mtx)
+{
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ g_mutex_clear(&mtx->mtx);
+#endif
+ g_free(mtx);
+}
+
+struct rspamd_thread_data {
+ gchar *name;
+ gint id;
+ GThreadFunc func;
+ gpointer data;
+};
+
+static gpointer
+rspamd_thread_func(gpointer ud)
+{
+ struct rspamd_thread_data *td = ud;
+ sigset_t s_mask;
+
+ /* Ignore signals in thread */
+ sigemptyset(&s_mask);
+ sigaddset(&s_mask, SIGINT);
+ sigaddset(&s_mask, SIGHUP);
+ sigaddset(&s_mask, SIGCHLD);
+ sigaddset(&s_mask, SIGUSR1);
+ sigaddset(&s_mask, SIGUSR2);
+ sigaddset(&s_mask, SIGALRM);
+ sigaddset(&s_mask, SIGPIPE);
+
+ pthread_sigmask(SIG_BLOCK, &s_mask, NULL);
+
+ ud = td->func(td->data);
+ g_free(td->name);
+ g_free(td);
+
+ return ud;
+}
+
+struct hash_copy_callback_data {
+ gpointer (*key_copy_func)(gconstpointer data, gpointer ud);
+ gpointer (*value_copy_func)(gconstpointer data, gpointer ud);
+ gpointer ud;
+ GHashTable *dst;
+};
+
+static void
+copy_foreach_callback(gpointer key, gpointer value, gpointer ud)
+{
+ struct hash_copy_callback_data *cb = ud;
+ gpointer nkey, nvalue;
+
+ nkey = cb->key_copy_func ? cb->key_copy_func(key, cb->ud) : (gpointer) key;
+ nvalue =
+ cb->value_copy_func ? cb->value_copy_func(value,
+ cb->ud)
+ : (gpointer) value;
+ g_hash_table_insert(cb->dst, nkey, nvalue);
+}
+/**
+ * Deep copy of one hash table to another
+ * @param src source hash
+ * @param dst destination hash
+ * @param key_copy_func function called to copy or modify keys (or NULL)
+ * @param value_copy_func function called to copy or modify values (or NULL)
+ * @param ud user data for copy functions
+ */
+void rspamd_hash_table_copy(GHashTable *src, GHashTable *dst,
+ gpointer (*key_copy_func)(gconstpointer data, gpointer ud),
+ gpointer (*value_copy_func)(gconstpointer data, gpointer ud),
+ gpointer ud)
+{
+ struct hash_copy_callback_data cb;
+ if (src != NULL && dst != NULL) {
+ cb.key_copy_func = key_copy_func;
+ cb.value_copy_func = value_copy_func;
+ cb.ud = ud;
+ cb.dst = dst;
+ g_hash_table_foreach(src, copy_foreach_callback, &cb);
+ }
+}
+
+static volatile sig_atomic_t saved_signo[NSIG];
+
+static void
+read_pass_tmp_sig_handler(int s)
+{
+
+ saved_signo[s] = 1;
+}
+
+#ifndef _PATH_TTY
+#define _PATH_TTY "/dev/tty"
+#endif
+
+gint rspamd_read_passphrase_with_prompt(const gchar *prompt, gchar *buf, gint size, bool echo, gpointer key)
+{
+#ifdef HAVE_READPASSPHRASE_H
+ int flags = echo ? RPP_ECHO_ON : RPP_ECHO_OFF;
+ if (readpassphrase(prompt, buf, size, flags | RPP_REQUIRE_TTY) == NULL) {
+ return 0;
+ }
+
+ return strlen(buf);
+#else
+ struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm;
+ struct sigaction savetstp, savettin, savettou, savepipe;
+ struct termios term, oterm;
+ gint input, output, i;
+ gchar *end, *p, ch;
+
+restart:
+ if ((input = output = open(_PATH_TTY, O_RDWR)) == -1) {
+ errno = ENOTTY;
+ return 0;
+ }
+
+ (void) fcntl(input, F_SETFD, FD_CLOEXEC);
+
+ /* Turn echo off */
+ if (tcgetattr(input, &oterm) != 0) {
+ close(input);
+ errno = ENOTTY;
+ return 0;
+ }
+
+ memcpy(&term, &oterm, sizeof(term));
+
+ if (!echo) {
+ term.c_lflag &= ~(ECHO | ECHONL);
+ }
+
+ if (tcsetattr(input, TCSAFLUSH, &term) == -1) {
+ errno = ENOTTY;
+ close(input);
+ return 0;
+ }
+
+ g_assert(write(output, prompt, sizeof("Enter passphrase: ") - 1) != -1);
+
+ /* Save the current sighandler */
+ for (i = 0; i < NSIG; i++) {
+ saved_signo[i] = 0;
+ }
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = read_pass_tmp_sig_handler;
+ (void) sigaction(SIGALRM, &sa, &savealrm);
+ (void) sigaction(SIGHUP, &sa, &savehup);
+ (void) sigaction(SIGINT, &sa, &saveint);
+ (void) sigaction(SIGPIPE, &sa, &savepipe);
+ (void) sigaction(SIGQUIT, &sa, &savequit);
+ (void) sigaction(SIGTERM, &sa, &saveterm);
+ (void) sigaction(SIGTSTP, &sa, &savetstp);
+ (void) sigaction(SIGTTIN, &sa, &savettin);
+ (void) sigaction(SIGTTOU, &sa, &savettou);
+
+ /* Now read a passphrase */
+ p = buf;
+ end = p + size - 1;
+ while (read(input, &ch, 1) == 1 && ch != '\n' && ch != '\r') {
+ if (p < end) {
+ *p++ = ch;
+ }
+ }
+ *p = '\0';
+ g_assert(write(output, "\n", 1) == 1);
+
+ /* Restore terminal state */
+ if (memcmp(&term, &oterm, sizeof(term)) != 0) {
+ while (tcsetattr(input, TCSAFLUSH, &oterm) == -1 &&
+ errno == EINTR && !saved_signo[SIGTTOU])
+ ;
+ }
+
+ /* Restore signal handlers */
+ (void) sigaction(SIGALRM, &savealrm, NULL);
+ (void) sigaction(SIGHUP, &savehup, NULL);
+ (void) sigaction(SIGINT, &saveint, NULL);
+ (void) sigaction(SIGQUIT, &savequit, NULL);
+ (void) sigaction(SIGPIPE, &savepipe, NULL);
+ (void) sigaction(SIGTERM, &saveterm, NULL);
+ (void) sigaction(SIGTSTP, &savetstp, NULL);
+ (void) sigaction(SIGTTIN, &savettin, NULL);
+ (void) sigaction(SIGTTOU, &savettou, NULL);
+
+ close(input);
+
+ /* Send signals pending */
+ for (i = 0; i < NSIG; i++) {
+ if (saved_signo[i]) {
+ kill(getpid(), i);
+ switch (i) {
+ case SIGTSTP:
+ case SIGTTIN:
+ case SIGTTOU:
+ goto restart;
+ }
+ }
+ }
+
+ return (p - buf);
+#endif
+}
+
+#ifdef HAVE_CLOCK_GETTIME
+#ifdef CLOCK_MONOTONIC_COARSE
+#define RSPAMD_FAST_MONOTONIC_CLOCK CLOCK_MONOTONIC_COARSE
+#elif defined(CLOCK_MONOTONIC_FAST)
+#define RSPAMD_FAST_MONOTONIC_CLOCK CLOCK_MONOTONIC_FAST
+#else
+#define RSPAMD_FAST_MONOTONIC_CLOCK CLOCK_MONOTONIC
+#endif
+#endif
+
+gdouble
+rspamd_get_ticks(gboolean rdtsc_ok)
+{
+ gdouble res;
+
+#ifdef HAVE_RDTSC
+#ifdef __x86_64__
+ guint64 r64;
+
+ if (rdtsc_ok) {
+ __builtin_ia32_lfence();
+ r64 = __rdtsc();
+ /* Preserve lower 52 bits */
+ res = r64 & ((1ULL << 53) - 1);
+ return res;
+ }
+#endif
+#endif
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts;
+ gint clk_id = RSPAMD_FAST_MONOTONIC_CLOCK;
+
+ clock_gettime(clk_id, &ts);
+
+ if (rdtsc_ok) {
+ res = (double) ts.tv_sec * 1e9 + ts.tv_nsec;
+ }
+ else {
+ res = (double) ts.tv_sec + ts.tv_nsec / 1000000000.;
+ }
+#elif defined(__APPLE__)
+ if (rdtsc_ok) {
+ res = mach_absolute_time();
+ }
+ else {
+ res = mach_absolute_time() / 1000000000.;
+ }
+#else
+ struct timeval tv;
+
+ (void) gettimeofday(&tv, NULL);
+ if (rdtsc_ok) {
+ res = (double) ts.tv_sec * 1e9 + tv.tv_usec * 1e3;
+ }
+ else {
+ res = (double) tv.tv_sec + tv.tv_usec / 1000000.;
+ }
+#endif
+
+ return res;
+}
+
+gdouble
+rspamd_get_virtual_ticks(void)
+{
+ gdouble res;
+
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts;
+ static clockid_t cid = (clockid_t) -1;
+ if (cid == (clockid_t) -1) {
+#ifdef HAVE_CLOCK_GETCPUCLOCKID
+ if (clock_getcpuclockid(0, &cid) == -1) {
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ cid = CLOCK_PROCESS_CPUTIME_ID;
+#elif defined(CLOCK_PROF)
+ cid = CLOCK_PROF;
+#else
+ cid = CLOCK_REALTIME;
+#endif
+#ifdef HAVE_CLOCK_GETCPUCLOCKID
+ }
+#endif
+ }
+
+ clock_gettime(cid, &ts);
+ res = (double) ts.tv_sec + ts.tv_nsec / 1000000000.;
+#elif defined(__APPLE__)
+ thread_port_t thread = mach_thread_self();
+
+ mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
+ thread_basic_info_data_t info;
+ if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count) != KERN_SUCCESS) {
+ return -1;
+ }
+
+ res = info.user_time.seconds + info.system_time.seconds;
+ res += ((gdouble) (info.user_time.microseconds + info.system_time.microseconds)) / 1e6;
+ mach_port_deallocate(mach_task_self(), thread);
+#elif defined(HAVE_RUSAGE_SELF)
+ struct rusage rusage;
+ if (getrusage(RUSAGE_SELF, &rusage) != -1) {
+ res = (double) rusage.ru_utime.tv_sec +
+ (double) rusage.ru_utime.tv_usec / 1000000.0;
+ }
+#else
+ res = clock() / (double) CLOCKS_PER_SEC;
+#endif
+
+ return res;
+}
+
+gdouble
+rspamd_get_calendar_ticks(void)
+{
+ gdouble res;
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+ res = ts_to_double(&ts);
+#else
+ struct timeval tv;
+
+ if (gettimeofday(&tv, NULL) == 0) {
+ res = tv_to_double(&tv);
+ }
+ else {
+ res = time(NULL);
+ }
+#endif
+
+ return res;
+}
+
+void rspamd_random_hex(gchar *buf, guint64 len)
+{
+ static const gchar hexdigests[16] = "0123456789abcdef";
+ gint64 i;
+
+ g_assert(len > 0);
+
+ ottery_rand_bytes((void *) buf, ceil(len / 2.0));
+
+ for (i = (gint64) len - 1; i >= 0; i -= 2) {
+ buf[i] = hexdigests[buf[i / 2] & 0xf];
+
+ if (i > 0) {
+ buf[i - 1] = hexdigests[(buf[i / 2] >> 4) & 0xf];
+ }
+ }
+}
+
+gint rspamd_shmem_mkstemp(gchar *pattern)
+{
+ gint fd = -1;
+ gchar *nbuf, *xpos;
+ gsize blen;
+
+ xpos = strchr(pattern, 'X');
+
+ if (xpos == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ blen = strlen(pattern);
+ nbuf = g_malloc(blen + 1);
+ rspamd_strlcpy(nbuf, pattern, blen + 1);
+ xpos = nbuf + (xpos - pattern);
+
+ for (;;) {
+ rspamd_random_hex(xpos, blen - (xpos - nbuf));
+
+ fd = shm_open(nbuf, O_RDWR | O_EXCL | O_CREAT, 0600);
+
+ if (fd != -1) {
+ rspamd_strlcpy(pattern, nbuf, blen + 1);
+ break;
+ }
+ else if (errno != EEXIST) {
+ g_free(nbuf);
+
+ return -1;
+ }
+ }
+
+ g_free(nbuf);
+
+ return fd;
+}
+
+void rspamd_ptr_array_free_hard(gpointer p)
+{
+ GPtrArray *ar = (GPtrArray *) p;
+
+ g_ptr_array_free(ar, TRUE);
+}
+
+void rspamd_array_free_hard(gpointer p)
+{
+ GArray *ar = (GArray *) p;
+
+ g_array_free(ar, TRUE);
+}
+
+void rspamd_gstring_free_hard(gpointer p)
+{
+ GString *ar = (GString *) p;
+
+ g_string_free(ar, TRUE);
+}
+
+void rspamd_gerror_free_maybe(gpointer p)
+{
+ GError **err;
+
+ if (p) {
+ err = (GError **) p;
+
+ if (*err) {
+ g_error_free(*err);
+ }
+ }
+}
+
+/*
+ * Openblas creates threads that are not supported by
+ * jemalloc allocator (aside of being bloody stupid). So this hack
+ * is intended to set number of threads to one by default.
+ * FIXME: is it legit to do so in ctor?
+ */
+#ifdef HAVE_OPENBLAS_SET_NUM_THREADS
+extern void openblas_set_num_threads(int num_threads);
+RSPAMD_CONSTRUCTOR(openblas_thread_fix_ctor)
+{
+ openblas_set_num_threads(1);
+}
+#endif
+#ifdef HAVE_BLI_THREAD_SET_NUM_THREADS
+extern void bli_thread_set_num_threads(int num_threads);
+RSPAMD_CONSTRUCTOR(blis_thread_fix_ctor)
+{
+ bli_thread_set_num_threads(1);
+}
+#endif
+
+guint64
+rspamd_hash_seed(void)
+{
+#if 0
+ static guint64 seed;
+
+ if (seed == 0) {
+ seed = ottery_rand_uint64 ();
+ }
+#endif
+
+ /* Proved to be random, I promise! */
+ /*
+ * TODO: discover if it worth to use random seed on run
+ * with ordinary hash function or we need to switch to
+ * siphash1-3 or other slow cooker function...
+ */
+ return 0xabf9727ba290690bULL;
+}
+
+static inline gdouble
+rspamd_double_from_int64(guint64 x)
+{
+ const union {
+ guint64 i;
+ double d;
+ } u = {
+ .i = G_GUINT64_CONSTANT(0x3FF) << 52 | x >> 12};
+
+ return u.d - 1.0;
+}
+
+gdouble
+rspamd_random_double(void)
+{
+ guint64 rnd_int;
+
+ rnd_int = ottery_rand_uint64();
+
+ return rspamd_double_from_int64(rnd_int);
+}
+
+
+static guint64 *
+rspamd_fast_random_seed(void)
+{
+ static guint64 seed;
+
+ if (G_UNLIKELY(seed == 0)) {
+ ottery_rand_bytes((void *) &seed, sizeof(seed));
+ }
+
+ return &seed;
+}
+
+/* wyrand */
+inline uint64_t
+rspamd_random_uint64_fast_seed(uint64_t *seed)
+{
+ *seed += UINT64_C(0xa0761d6478bd642f);
+#ifdef __SIZEOF_INT128__
+#if defined(__aarch64__)
+ uint64_t lo, hi, p = *seed ^ UINT64_C(0xe7037ed1a0b428db), v = *seed;
+ lo = v * p;
+ __asm__("umulh %0, %1, %2"
+ : "=r"(hi)
+ : "r"(v), "r"(p));
+ return lo ^ hi;
+#else
+ __uint128_t t = (__uint128_t) *seed * (*seed ^ UINT64_C(0xe7037ed1a0b428db));
+ return (t >> 64) ^ t;
+#endif
+#else
+ /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+ * bit multiplication. */
+ uint64_t lo, hi, p = *seed ^ UINT64_C(0xe7037ed1a0b428db), v = *seed;
+ uint64_t hv = v >> 32, hp = p >> 32;
+ uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+ uint64_t rh = hv * hp;
+ uint64_t rm_0 = hv * lp;
+ uint64_t rm_1 = hp * lv;
+ uint64_t rl = lv * lp;
+ uint64_t t;
+
+ /* We could ignore a carry bit here if we did not care about the
+ same hash for 32-bit and 64-bit targets. */
+ t = rl + (rm_0 << 32);
+ lo = t + (rm_1 << 32);
+ hi = rh + (rm_0 >> 32) + (rm_1 >> 32);
+ return lo ^ hi;
+#endif
+}
+
+gdouble
+rspamd_random_double_fast(void)
+{
+ return rspamd_random_double_fast_seed(rspamd_fast_random_seed());
+}
+
+/* xoshiro256+ */
+inline gdouble
+rspamd_random_double_fast_seed(guint64 *seed)
+{
+ return rspamd_double_from_int64(rspamd_random_uint64_fast_seed(seed));
+}
+
+guint64
+rspamd_random_uint64_fast(void)
+{
+ return rspamd_random_uint64_fast_seed(rspamd_fast_random_seed());
+}
+
+void rspamd_random_seed_fast(void)
+{
+ (void) rspamd_fast_random_seed();
+}
+
+gdouble
+rspamd_time_jitter(gdouble in, gdouble jitter)
+{
+ if (jitter == 0) {
+ jitter = in;
+ }
+
+ return in + jitter * rspamd_random_double();
+}
+
+gboolean
+rspamd_constant_memcmp(const void *a, const void *b, gsize len)
+{
+ gsize lena, lenb, i;
+ guint16 d, r = 0, m;
+ guint16 v;
+ const guint8 *aa = (const guint8 *) a,
+ *bb = (const guint8 *) b;
+
+ if (len == 0) {
+ lena = strlen((const char *) a);
+ lenb = strlen((const char *) b);
+
+ if (lena != lenb) {
+ return FALSE;
+ }
+
+ len = lena;
+ }
+
+ for (i = 0; i < len; i++) {
+ v = ((guint16) (guint8) r) + 255;
+ m = v / 256 - 1;
+ d = (guint16) ((int) aa[i] - (int) bb[i]);
+ r |= (d & m);
+ }
+
+ return (((gint32) (guint16) ((guint32) r + 0x8000) - 0x8000) == 0);
+}
+
+int rspamd_file_xopen(const char *fname, int oflags, guint mode,
+ gboolean allow_symlink)
+{
+ struct stat sb;
+ int fd, flags = oflags;
+
+ if (!(oflags & O_CREAT)) {
+ if (lstat(fname, &sb) == -1) {
+
+ if (errno != ENOENT) {
+ return (-1);
+ }
+ }
+ else if (!S_ISREG(sb.st_mode)) {
+ if (S_ISLNK(sb.st_mode)) {
+ if (!allow_symlink) {
+ return -1;
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+
+#ifdef HAVE_OCLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+
+#ifdef HAVE_ONOFOLLOW
+ if (!allow_symlink) {
+ flags |= O_NOFOLLOW;
+ fd = open(fname, flags, mode);
+ }
+ else {
+ fd = open(fname, flags, mode);
+ }
+#else
+ fd = open(fname, flags, mode);
+#endif
+
+#ifndef HAVE_OCLOEXEC
+ int serrno;
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
+ serrno = errno;
+ close(fd);
+ errno = serrno;
+
+ return -1;
+ }
+#endif
+
+ return (fd);
+}
+
+gpointer
+rspamd_file_xmap(const char *fname, guint mode, gsize *size,
+ gboolean allow_symlink)
+{
+ gint fd;
+ struct stat sb;
+ gpointer map;
+
+ g_assert(fname != NULL);
+ g_assert(size != NULL);
+
+ if (mode & PROT_WRITE) {
+ fd = rspamd_file_xopen(fname, O_RDWR, 0, allow_symlink);
+ }
+ else {
+ fd = rspamd_file_xopen(fname, O_RDONLY, 0, allow_symlink);
+ }
+
+ if (fd == -1) {
+ return NULL;
+ }
+
+ if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) {
+ close(fd);
+ *size = (gsize) -1;
+
+ return NULL;
+ }
+
+ if (sb.st_size == 0) {
+ close(fd);
+ *size = (gsize) 0;
+
+ return NULL;
+ }
+
+ map = mmap(NULL, sb.st_size, mode, MAP_SHARED, fd, 0);
+ close(fd);
+
+ if (map == MAP_FAILED) {
+ return NULL;
+ }
+
+ *size = sb.st_size;
+
+ return map;
+}
+
+
+gpointer
+rspamd_shmem_xmap(const char *fname, guint mode,
+ gsize *size)
+{
+ gint fd;
+ struct stat sb;
+ gpointer map;
+
+ g_assert(fname != NULL);
+ g_assert(size != NULL);
+
+#ifdef HAVE_SANE_SHMEM
+ if (mode & PROT_WRITE) {
+ fd = shm_open(fname, O_RDWR, 0);
+ }
+ else {
+ fd = shm_open(fname, O_RDONLY, 0);
+ }
+#else
+ if (mode & PROT_WRITE) {
+ fd = open(fname, O_RDWR, 0);
+ }
+ else {
+ fd = open(fname, O_RDONLY, 0);
+ }
+#endif
+
+ if (fd == -1) {
+ return NULL;
+ }
+
+ if (fstat(fd, &sb) == -1) {
+ close(fd);
+
+ return NULL;
+ }
+
+ map = mmap(NULL, sb.st_size, mode, MAP_SHARED, fd, 0);
+ close(fd);
+
+ if (map == MAP_FAILED) {
+ return NULL;
+ }
+
+ *size = sb.st_size;
+
+ return map;
+}
+
+/*
+ * A(x - 0.5)^4 + B(x - 0.5)^3 + C(x - 0.5)^2 + D(x - 0.5)
+ * A = 32,
+ * B = -6
+ * C = -7
+ * D = 3
+ * y = 32(x - 0.5)^4 - 6(x - 0.5)^3 - 7(x - 0.5)^2 + 3(x - 0.5)
+ *
+ * New approach:
+ * y = ((x - bias)*2)^8
+ */
+gdouble
+rspamd_normalize_probability(gdouble x, gdouble bias)
+{
+ gdouble xx;
+
+ xx = (x - bias) * 2.0;
+
+ return pow(xx, 8);
+}
+
+/*
+ * Calculations from musl libc
+ */
+guint64
+rspamd_tm_to_time(const struct tm *tm, glong tz)
+{
+ guint64 result;
+ gboolean is_leap = FALSE;
+ gint leaps, y = tm->tm_year, cycles, rem, centuries;
+ glong offset = (tz / 100) * 3600 + (tz % 100) * 60;
+
+ /* How many seconds in each month from the beginning of the year */
+ static const gint secs_through_month[] = {
+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400};
+
+ /* Convert year */
+ if (tm->tm_year - 2ULL <= 136) {
+ leaps = (y - 68) / 4;
+
+ if (!((y - 68) & 3)) {
+ leaps--;
+ is_leap = 1;
+ }
+
+ result = 31536000 * (y - 70) + 86400 * leaps;
+ }
+ else {
+ cycles = (y - 100) / 400;
+ rem = (y - 100) % 400;
+ if (rem < 0) {
+ cycles--;
+ rem += 400;
+ }
+
+ if (!rem) {
+ is_leap = 1;
+ centuries = 0;
+ leaps = 0;
+ }
+ else {
+ if (rem >= 200) {
+ if (rem >= 300) {
+ centuries = 3;
+ rem -= 300;
+ }
+ else {
+ centuries = 2;
+ rem -= 200;
+ }
+ }
+ else {
+ if (rem >= 100) {
+ centuries = 1;
+ rem -= 100;
+ }
+ else {
+ centuries = 0;
+ }
+ }
+
+ if (!rem) {
+ is_leap = 1;
+ leaps = 0;
+ }
+ else {
+ leaps = rem / 4U;
+ rem %= 4U;
+ is_leap = !rem;
+ }
+ }
+
+ leaps += 97 * cycles + 24 * centuries - (gint) is_leap;
+ result = (y - 100) * 31536000LL + leaps * 86400LL + 946684800 + 86400;
+ }
+
+ /* Now convert months to seconds */
+ result += secs_through_month[tm->tm_mon];
+ /* One more day */
+ if (is_leap && tm->tm_mon >= 2) {
+ result += 86400;
+ }
+
+ result += 86400LL * (tm->tm_mday - 1);
+ result += 3600LL * tm->tm_hour;
+ result += 60LL * tm->tm_min;
+ result += tm->tm_sec;
+
+ /* Now apply tz offset */
+ result -= offset;
+
+ return result;
+}
+
+
+void rspamd_gmtime(gint64 ts, struct tm *dest)
+{
+ guint64 days, secs, years;
+ int remdays, remsecs, remyears;
+ int leap_400_cycles, leap_100_cycles, leap_4_cycles;
+ int months;
+ int wday, yday, leap;
+ /* From March */
+ static const uint8_t days_in_month[] = {31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29};
+ static const guint64 leap_epoch = 946684800ULL + 86400 * (31 + 29);
+ static const guint64 days_per_400y = 365 * 400 + 97;
+ static const guint64 days_per_100y = 365 * 100 + 24;
+ static const guint64 days_per_4y = 365 * 4 + 1;
+
+ secs = ts - leap_epoch;
+ days = secs / 86400;
+ remsecs = secs % 86400;
+
+ if (remsecs < 0) {
+ remsecs += 86400;
+ days--;
+ }
+
+ wday = (3 + days) % 7;
+ if (wday < 0) {
+ wday += 7;
+ }
+
+ /* Deal with gregorian adjustments */
+ leap_400_cycles = days / days_per_400y;
+ remdays = days % days_per_400y;
+
+ if (remdays < 0) {
+ remdays += days_per_400y;
+ leap_400_cycles--;
+ }
+
+ leap_100_cycles = remdays / days_per_100y;
+ if (leap_100_cycles == 4) {
+ /* 400 years */
+ leap_100_cycles--;
+ }
+
+ remdays -= leap_100_cycles * days_per_100y;
+
+ leap_4_cycles = remdays / days_per_4y;
+ if (leap_4_cycles == 25) {
+ /* 100 years */
+ leap_4_cycles--;
+ }
+ remdays -= leap_4_cycles * days_per_4y;
+
+ remyears = remdays / 365;
+ if (remyears == 4) {
+ /* Ordinary leap year */
+ remyears--;
+ }
+ remdays -= remyears * 365;
+
+ leap = !remyears && (leap_4_cycles || !leap_100_cycles);
+ yday = remdays + 31 + 28 + leap;
+
+ if (yday >= 365 + leap) {
+ yday -= 365 + leap;
+ }
+
+ years = remyears + 4 * leap_4_cycles + 100 * leap_100_cycles +
+ 400ULL * leap_400_cycles;
+
+ for (months = 0; days_in_month[months] <= remdays; months++) {
+ remdays -= days_in_month[months];
+ }
+
+ if (months >= 10) {
+ months -= 12;
+ years++;
+ }
+
+ dest->tm_year = years + 100;
+ dest->tm_mon = months + 2;
+ dest->tm_mday = remdays + 1;
+ dest->tm_wday = wday;
+ dest->tm_yday = yday;
+
+ dest->tm_hour = remsecs / 3600;
+ dest->tm_min = remsecs / 60 % 60;
+ dest->tm_sec = remsecs % 60;
+#if !defined(__sun)
+ dest->tm_gmtoff = 0;
+ dest->tm_zone = "GMT";
+#endif
+}
+
+void rspamd_localtime(gint64 ts, struct tm *dest)
+{
+ time_t t = ts;
+ localtime_r(&t, dest);
+}
+
+gboolean
+rspamd_fstring_gzip(rspamd_fstring_t **in)
+{
+ z_stream strm;
+ rspamd_fstring_t *buf = *in;
+ int ret;
+ unsigned tmp_remain;
+ unsigned char temp[BUFSIZ];
+
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+ MAX_WBITS + 16, MAX_MEM_LEVEL - 1, Z_DEFAULT_STRATEGY);
+
+ if (ret != Z_OK) {
+ return FALSE;
+ }
+
+ if (buf->allocated < deflateBound(&strm, buf->len)) {
+ buf = rspamd_fstring_grow(buf, deflateBound(&strm, buf->len));
+ *in = buf;
+ }
+
+ strm.next_in = buf->str;
+ strm.avail_in = buf->len;
+
+ strm.next_out = temp;
+ strm.avail_out = sizeof(temp) > buf->allocated ? buf->allocated : sizeof(temp);
+ ret = deflate(&strm, Z_FINISH);
+ if (ret == Z_STREAM_ERROR) {
+ deflateEnd(&strm);
+ return FALSE;
+ }
+
+ /* Try to compress in-place */
+ tmp_remain = strm.next_out - temp;
+ if (tmp_remain <= (strm.avail_in ? buf->len - strm.avail_in : buf->allocated)) {
+ memcpy(buf->str, temp, tmp_remain);
+ strm.next_out = (unsigned char *) buf->str + tmp_remain;
+ tmp_remain = 0;
+ while (ret == Z_OK) {
+ strm.avail_out = strm.avail_in ? strm.next_in - strm.next_out : ((unsigned char *) buf->str + buf->allocated) - strm.next_out;
+ ret = deflate(&strm, Z_FINISH);
+ }
+ if (ret != Z_BUF_ERROR || strm.avail_in == 0) {
+ buf->len = strm.next_out - (unsigned char *) buf->str;
+ *in = buf;
+ deflateEnd(&strm);
+
+ return ret == Z_STREAM_END;
+ }
+ }
+
+ /*
+ * The case when input and output has caught each other, hold the remaining
+ * in a temporary buffer and compress it separately
+ */
+ unsigned char *hold = g_malloc(strm.avail_in);
+ memcpy(hold, strm.next_in, strm.avail_in);
+ strm.next_in = hold;
+ if (tmp_remain) {
+ memcpy(buf->str, temp, tmp_remain);
+ strm.next_out = (unsigned char *) buf->str + tmp_remain;
+ }
+ strm.avail_out = ((unsigned char *) buf->str + buf->allocated) - strm.next_out;
+ ret = deflate(&strm, Z_FINISH);
+ g_free(hold);
+ buf->len = strm.next_out - (unsigned char *) buf->str;
+ *in = buf;
+ deflateEnd(&strm);
+
+ return ret == Z_STREAM_END;
+}
+
+gboolean
+rspamd_fstring_gunzip(rspamd_fstring_t **in)
+{
+ z_stream strm;
+ rspamd_fstring_t *buf = *in, *out = rspamd_fstring_sized_new((*in)->len);
+ int ret;
+
+ memset(&strm, 0, sizeof(strm));
+ ret = inflateInit2(&strm, MAX_WBITS + 16);
+
+ if (ret != Z_OK) {
+ return FALSE;
+ }
+
+ strm.next_in = buf->str;
+ strm.avail_in = buf->len;
+
+ gsize total_out = 0;
+
+ do {
+ strm.next_out = out->str + total_out;
+ strm.avail_out = out->allocated - total_out;
+
+ ret = inflate(&strm, Z_NO_FLUSH);
+ if (ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR) {
+ break;
+ }
+
+ gsize out_remain = strm.avail_out;
+ total_out = out->allocated - out_remain;
+ if (out_remain == 0 && ret != Z_STREAM_END) {
+ out = rspamd_fstring_grow(out, out->allocated * 2);
+ }
+
+ } while (ret != Z_STREAM_END);
+
+ if (ret == Z_STREAM_END) {
+ *in = out;
+ out->len = total_out;
+ rspamd_fstring_free(buf);
+ }
+ else {
+ /* Revert */
+ *in = buf;
+ rspamd_fstring_free(out);
+ }
+
+ inflateEnd(&strm);
+
+ return ret == Z_STREAM_END;
+}
+
+static gboolean
+rspamd_glob_dir(const gchar *full_path, const gchar *pattern,
+ gboolean recursive, guint rec_len,
+ GPtrArray *res, GError **err)
+{
+ glob_t globbuf;
+ const gchar *path;
+ static gchar pathbuf[PATH_MAX]; /* Static to help recursion */
+ guint i;
+ gint rc;
+ static const guint rec_lim = 16;
+ struct stat st;
+
+ if (rec_len > rec_lim) {
+ g_set_error(err, g_quark_from_static_string("glob"), EOVERFLOW,
+ "maximum nesting is reached: %d", rec_lim);
+
+ return FALSE;
+ }
+
+ memset(&globbuf, 0, sizeof(globbuf));
+
+ if ((rc = glob(full_path, 0, NULL, &globbuf)) != 0) {
+
+ if (rc != GLOB_NOMATCH) {
+ g_set_error(err, g_quark_from_static_string("glob"), errno,
+ "glob %s failed: %s", full_path, strerror(errno));
+ globfree(&globbuf);
+
+ return FALSE;
+ }
+ else {
+ globfree(&globbuf);
+
+ return TRUE;
+ }
+ }
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ path = globbuf.gl_pathv[i];
+
+ if (stat(path, &st) == -1) {
+ if (errno == EPERM || errno == EACCES || errno == ELOOP) {
+ /* Silently ignore */
+ continue;
+ }
+
+ g_set_error(err, g_quark_from_static_string("glob"), errno,
+ "stat %s failed: %s", path, strerror(errno));
+ globfree(&globbuf);
+
+ return FALSE;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ g_ptr_array_add(res, g_strdup(path));
+ }
+ else if (recursive && S_ISDIR(st.st_mode)) {
+ rspamd_snprintf(pathbuf, sizeof(pathbuf), "%s%c%s",
+ path, G_DIR_SEPARATOR, pattern);
+
+ if (!rspamd_glob_dir(full_path, pattern, recursive, rec_len + 1,
+ res, err)) {
+ globfree(&globbuf);
+
+ return FALSE;
+ }
+ }
+ }
+
+ globfree(&globbuf);
+
+ return TRUE;
+}
+
+GPtrArray *
+rspamd_glob_path(const gchar *dir,
+ const gchar *pattern,
+ gboolean recursive,
+ GError **err)
+{
+ gchar path[PATH_MAX];
+ GPtrArray *res;
+
+ res = g_ptr_array_new_full(32, (GDestroyNotify) g_free);
+ rspamd_snprintf(path, sizeof(path), "%s%c%s", dir, G_DIR_SEPARATOR, pattern);
+
+ if (!rspamd_glob_dir(path, pattern, recursive, 0, res, err)) {
+ g_ptr_array_free(res, TRUE);
+
+ return NULL;
+ }
+
+ return res;
+}
+
+double
+rspamd_set_counter(struct rspamd_counter_data *cd, gdouble value)
+{
+ gdouble cerr;
+
+ /* Cumulative moving average using per-process counter data */
+ if (cd->number == 0) {
+ cd->mean = 0;
+ cd->stddev = 0;
+ }
+
+ cd->mean += (value - cd->mean) / (gdouble) (++cd->number);
+ cerr = (value - cd->mean) * (value - cd->mean);
+ cd->stddev += (cerr - cd->stddev) / (gdouble) (cd->number);
+
+ return cd->mean;
+}
+
+float rspamd_set_counter_ema(struct rspamd_counter_data *cd,
+ float value,
+ float alpha)
+{
+ float diff, incr;
+
+ /* Cumulative moving average using per-process counter data */
+ if (cd->number == 0) {
+ cd->mean = 0;
+ cd->stddev = 0;
+ }
+
+ diff = value - cd->mean;
+ incr = diff * alpha;
+ cd->mean += incr;
+ cd->stddev = (1.0f - alpha) * (cd->stddev + diff * incr);
+ cd->number++;
+
+ return cd->mean;
+}
+
+void rspamd_ptr_array_shuffle(GPtrArray *ar)
+{
+ if (ar->len < 2) {
+ return;
+ }
+
+ guint n = ar->len;
+
+ for (guint i = 0; i < n - 1; i++) {
+ guint j = i + rspamd_random_uint64_fast() % (n - i);
+ gpointer t = g_ptr_array_index(ar, j);
+ g_ptr_array_index(ar, j) = g_ptr_array_index(ar, i);
+ g_ptr_array_index(ar, i) = t;
+ }
+}
+
+float rspamd_sum_floats(float *ar, gsize *nelts)
+{
+ float sum = 0.0f;
+ volatile float c = 0.0f; /* We don't want any optimisations around c */
+ gsize cnt = 0;
+
+ for (gsize i = 0; i < *nelts; i++) {
+ float elt = ar[i];
+
+ if (!isnan(elt)) {
+ cnt++;
+ float y = elt - c;
+ float t = sum + y;
+ c = (t - sum) - y;
+ sum = t;
+ }
+ }
+
+ *nelts = cnt;
+ return sum;
+}
+
+void rspamd_normalize_path_inplace(gchar *path, guint len, gsize *nlen)
+{
+ const gchar *p, *end, *slash = NULL, *dot = NULL;
+ gchar *o;
+ enum {
+ st_normal = 0,
+ st_got_dot,
+ st_got_dot_dot,
+ st_got_slash,
+ st_got_slash_slash,
+ } state = st_normal;
+
+ p = path;
+ end = path + len;
+ o = path;
+
+ while (p < end) {
+ switch (state) {
+ case st_normal:
+ if (G_UNLIKELY(*p == '/')) {
+ state = st_got_slash;
+ slash = p;
+ }
+ else if (G_UNLIKELY(*p == '.')) {
+ state = st_got_dot;
+ dot = p;
+ }
+ else {
+ *o++ = *p;
+ }
+ p++;
+ break;
+ case st_got_slash:
+ if (G_UNLIKELY(*p == '/')) {
+ /* Ignore double slash */
+ *o++ = *p;
+ state = st_got_slash_slash;
+ }
+ else if (G_UNLIKELY(*p == '.')) {
+ dot = p;
+ state = st_got_dot;
+ }
+ else {
+ *o++ = '/';
+ *o++ = *p;
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ }
+ p++;
+ break;
+ case st_got_slash_slash:
+ if (G_LIKELY(*p != '/')) {
+ slash = p - 1;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+ p++;
+ break;
+ case st_got_dot:
+ if (G_UNLIKELY(*p == '/')) {
+ /* Remove any /./ or ./ paths */
+ if (((o > path && *(o - 1) != '/') || (o == path)) && slash) {
+ /* Preserve one slash */
+ *o++ = '/';
+ }
+
+ slash = p;
+ dot = NULL;
+ /* Ignore last slash */
+ state = st_normal;
+ }
+ else if (*p == '.') {
+ /* Double dot character */
+ state = st_got_dot_dot;
+ }
+ else {
+ /* We have something like .some or /.some */
+ if (dot && p > dot) {
+ if (slash == dot - 1 && (o > path && *(o - 1) != '/')) {
+ /* /.blah */
+ memmove(o, slash, p - slash);
+ o += p - slash;
+ }
+ else {
+ memmove(o, dot, p - dot);
+ o += p - dot;
+ }
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+
+ p++;
+ break;
+ case st_got_dot_dot:
+ if (*p == '/') {
+ /* We have something like /../ or ../ */
+ if (slash) {
+ /* We need to remove the last component from o if it is there */
+ if (o > path + 2 && *(o - 1) == '/') {
+ slash = rspamd_memrchr(path, '/', o - path - 2);
+ }
+ else if (o > path + 1) {
+ slash = rspamd_memrchr(path, '/', o - path - 1);
+ }
+ else {
+ slash = NULL;
+ }
+
+ if (slash) {
+ o = (gchar *) slash;
+ }
+ /* Otherwise we keep these dots */
+ slash = p;
+ state = st_got_slash;
+ }
+ else {
+ /* We have something like bla../, so we need to copy it as is */
+ if (o > path && dot && p > dot) {
+ memmove(o, dot, p - dot);
+ o += p - dot;
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+ }
+ else {
+ /* We have something like ..bla or ... */
+ if (slash) {
+ *o++ = '/';
+ }
+
+ if (dot && p > dot) {
+ memmove(o, dot, p - dot);
+ o += p - dot;
+ }
+
+ slash = NULL;
+ dot = NULL;
+ state = st_normal;
+ continue;
+ }
+
+ p++;
+ break;
+ }
+ }
+
+ /* Leftover */
+ switch (state) {
+ case st_got_dot_dot:
+ /* Trailing .. */
+ if (slash) {
+ /* We need to remove the last component from o if it is there */
+ if (o > path + 2 && *(o - 1) == '/') {
+ slash = rspamd_memrchr(path, '/', o - path - 2);
+ }
+ else if (o > path + 1) {
+ slash = rspamd_memrchr(path, '/', o - path - 1);
+ }
+ else {
+ if (o == path) {
+ /* Corner case */
+ *o++ = '/';
+ }
+
+ slash = NULL;
+ }
+
+ if (slash) {
+ /* Remove last / */
+ o = (gchar *) slash;
+ }
+ }
+ else {
+ /* Corner case */
+ if (o == path) {
+ *o++ = '/';
+ }
+ else {
+ if (dot && p > dot) {
+ memmove(o, dot, p - dot);
+ o += p - dot;
+ }
+ }
+ }
+ break;
+ case st_got_dot:
+ if (slash) {
+ /* /. -> must be / */
+ *o++ = '/';
+ }
+ else {
+ if (o > path) {
+ *o++ = '.';
+ }
+ }
+ break;
+ case st_got_slash:
+ *o++ = '/';
+ break;
+ default:
+#if 0
+ if (o > path + 1 && *(o - 1) == '/') {
+ o --;
+ }
+#endif
+ break;
+ }
+
+ if (nlen) {
+ *nlen = (o - path);
+ }
+}
diff --git a/src/libutil/util.h b/src/libutil/util.h
new file mode 100644
index 0000000..7111a07
--- /dev/null
+++ b/src/libutil/util.h
@@ -0,0 +1,581 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_UTIL_H
+#define RSPAMD_UTIL_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "printf.h"
+#include "fstring.h"
+#include "addr.h"
+#include "str_util.h"
+
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+
+#include <time.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_config;
+
+enum rspamd_exception_type {
+ RSPAMD_EXCEPTION_NEWLINE = 0,
+ RSPAMD_EXCEPTION_URL,
+ RSPAMD_EXCEPTION_GENERIC,
+};
+/**
+ * Structure to point exception in text from processing
+ */
+struct rspamd_process_exception {
+ goffset pos;
+ guint len;
+ gpointer ptr;
+ enum rspamd_exception_type type;
+};
+
+/**
+ * Create generic socket
+ * @param af address family
+ * @param type socket type
+ * @param protocol socket protocol
+ * @param async set non-blocking on a socket
+ * @return socket FD or -1 in case of error
+ */
+gint rspamd_socket_create(gint af, gint type, gint protocol, gboolean async);
+
+/*
+ * Create socket and bind or connect it to specified address and port
+ */
+gint rspamd_socket_tcp(struct addrinfo *, gboolean is_server, gboolean async);
+
+/*
+ * Create socket and bind or connect it to specified address and port
+ */
+gint rspamd_socket_udp(struct addrinfo *, gboolean is_server, gboolean async);
+
+/*
+ * Create and bind or connect unix socket
+ */
+gint rspamd_socket_unix(const gchar *,
+ struct sockaddr_un *,
+ gint type,
+ gboolean is_server,
+ gboolean async);
+
+/**
+ * Make a universal socket
+ * @param credits host, ip or path to unix socket
+ * @param port port (used for network sockets)
+ * @param type type of socket (SO_STREAM or SO_DGRAM)
+ * @param async make this socket async
+ * @param is_server make this socket as server socket
+ * @param try_resolve try name resolution for a socket (BLOCKING)
+ */
+gint rspamd_socket(const gchar *credits, guint16 port, gint type,
+ gboolean async, gboolean is_server, gboolean try_resolve);
+
+
+/*
+ * Create socketpair
+ */
+gboolean rspamd_socketpair(gint pair[2], gint af);
+
+/*
+ * Make specified socket non-blocking
+ */
+gint rspamd_socket_nonblocking(gint);
+
+/*
+ * Make specified socket blocking
+ */
+gint rspamd_socket_blocking(gint);
+
+/*
+ * Poll a sync socket for specified events
+ */
+gint rspamd_socket_poll(gint fd, gint timeout, short events);
+
+/*
+ * Init signals
+ */
+#ifdef HAVE_SA_SIGINFO
+
+void rspamd_signals_init(struct sigaction *sa, void (*sig_handler)(gint,
+ siginfo_t *,
+ void *));
+
+#else
+void rspamd_signals_init(struct sigaction *sa, void (*sig_handler)(gint));
+#endif
+
+/*
+ * Process title utility functions
+ */
+gint rspamd_init_title(rspamd_mempool_t *pool, gint argc, gchar *argv[], gchar *envp[]);
+gint rspamd_setproctitle(const gchar *fmt, ...);
+
+#ifndef HAVE_PIDFILE
+/*
+ * Pidfile functions from FreeBSD libutil code
+ */
+typedef struct rspamd_pidfh_s {
+ gint pf_fd;
+#ifdef HAVE_PATH_MAX
+ gchar pf_path[PATH_MAX + 1];
+#elif defined(HAVE_MAXPATHLEN)
+ gchar pf_path[MAXPATHLEN + 1];
+#else
+ gchar pf_path[1024 + 1];
+#endif
+ dev_t pf_dev;
+ ino_t pf_ino;
+} rspamd_pidfh_t;
+
+rspamd_pidfh_t *rspamd_pidfile_open(const gchar *path,
+ mode_t mode,
+ pid_t *pidptr);
+
+gint rspamd_pidfile_write(rspamd_pidfh_t *pfh);
+
+gint rspamd_pidfile_close(rspamd_pidfh_t *pfh);
+
+gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh);
+
+#else
+typedef struct pidfh rspamd_pidfh_t;
+#define rspamd_pidfile_open pidfile_open
+#define rspamd_pidfile_write pidfile_write
+#define rspamd_pidfile_close pidfile_close
+#define rspamd_pidfile_remove pidfile_remove
+#endif
+
+/*
+ * Replace %r with rcpt value and %f with from value, new string is allocated in pool
+ */
+gchar *resolve_stat_filename(rspamd_mempool_t *pool,
+ gchar *pattern,
+ gchar *rcpt,
+ gchar *from);
+
+const gchar *
+rspamd_log_check_time(gdouble start, gdouble end, gint resolution);
+
+/*
+ * File locking functions
+ */
+gboolean rspamd_file_lock(gint fd, gboolean async);
+
+gboolean rspamd_file_unlock(gint fd, gboolean async);
+
+/*
+ * Workarounds for older versions of glib
+ */
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22))
+void g_ptr_array_unref(GPtrArray *array);
+gboolean g_int64_equal(gconstpointer v1, gconstpointer v2);
+guint g_int64_hash(gconstpointer v);
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14))
+void g_queue_clear(GQueue *queue);
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32))
+void g_queue_free_full(GQueue *queue, GDestroyNotify free_func);
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 40))
+void g_ptr_array_insert(GPtrArray *array, gint index_, gpointer data);
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 30))
+GPtrArray *g_ptr_array_new_full(guint reserved_size,
+ GDestroyNotify element_free_func);
+#endif
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 32))
+const gchar *g_environ_getenv(gchar **envp, const gchar *variable);
+#endif
+
+/*
+ * Convert milliseconds to timeval fields
+ */
+#define msec_to_tv(msec, tv) \
+ do { \
+ (tv)->tv_sec = (msec) / 1000; \
+ (tv)->tv_usec = \
+ ((msec) - (tv)->tv_sec * 1000) * 1000; \
+ } while (0)
+#define double_to_tv(dbl, tv) \
+ do { \
+ (tv)->tv_sec = (int) (dbl); \
+ (tv)->tv_usec = \
+ ((dbl) - (int) (dbl)) * 1000 * 1000; \
+ } while (0)
+#define double_to_ts(dbl, ts) \
+ do { \
+ (ts)->tv_sec = (int) (dbl); \
+ (ts)->tv_nsec = \
+ ((dbl) - (int) (dbl)) * 1e9; \
+ } while (0)
+#define tv_to_msec(tv) ((tv)->tv_sec * 1000LLU + (tv)->tv_usec / 1000LLU)
+#define tv_to_double(tv) ((double) (tv)->tv_sec + (tv)->tv_usec / 1.0e6)
+#define ts_to_usec(ts) ((ts)->tv_sec * 1000000LLU + \
+ (ts)->tv_nsec / 1000LLU)
+#define ts_to_double(tv) ((double) (tv)->tv_sec + (tv)->tv_nsec / 1.0e9)
+
+/**
+ * Try to allocate a file on filesystem (using fallocate or posix_fallocate)
+ * @param fd descriptor
+ * @param offset offset of file
+ * @param len length to allocate
+ * @return -1 in case of failure
+ */
+gint rspamd_fallocate(gint fd, off_t offset, off_t len);
+
+/**
+ * Utils for working with threads to be compatible with all glib versions
+ */
+typedef struct rspamd_mutex_s {
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ GMutex mtx;
+#else
+ GStaticMutex mtx;
+#endif
+} rspamd_mutex_t;
+
+
+/**
+ * Create new mutex
+ * @return mutex or NULL
+ */
+rspamd_mutex_t *rspamd_mutex_new(void);
+
+/**
+ * Lock mutex
+ * @param mtx
+ */
+void rspamd_mutex_lock(rspamd_mutex_t *mtx);
+
+/**
+ * Unlock mutex
+ * @param mtx
+ */
+void rspamd_mutex_unlock(rspamd_mutex_t *mtx);
+
+/**
+ * Clear rspamd mutex
+ * @param mtx
+ */
+void rspamd_mutex_free(rspamd_mutex_t *mtx);
+
+/**
+ * Deep copy of one hash table to another
+ * @param src source hash
+ * @param dst destination hash
+ * @param key_copy_func function called to copy or modify keys (or NULL)
+ * @param value_copy_func function called to copy or modify values (or NULL)
+ * @param ud user data for copy functions
+ */
+void rspamd_hash_table_copy(GHashTable *src, GHashTable *dst,
+ gpointer (*key_copy_func)(gconstpointer data, gpointer ud),
+ gpointer (*value_copy_func)(gconstpointer data, gpointer ud),
+ gpointer ud);
+
+
+/**
+ * Read passphrase from tty
+ * @param buf buffer to fill with a password
+ * @param size size of the buffer
+ * @param echo turn echo on or off
+ * @param key unused key
+ * @return size of password read
+ */
+#define rspamd_read_passphrase(buf, size, echo, key) (rspamd_read_passphrase_with_prompt("Enter passphrase: ", (buf), (size), (echo), (key)))
+
+/**
+ * Read passphrase from tty with prompt
+ * @param prompt prompt to use
+ * @param buf buffer to fill with a password
+ * @param size size of the buffer
+ * @param echo turn echo on or off
+ * @param key unused key
+ * @return
+ */
+gint rspamd_read_passphrase_with_prompt(const gchar *prompt, gchar *buf, gint size, bool echo, gpointer key);
+
+/**
+ * Portably return the current clock ticks as seconds
+ * @return
+ */
+gdouble rspamd_get_ticks(gboolean rdtsc_ok);
+
+/**
+ * Portably return the current virtual clock ticks as seconds
+ * @return
+ */
+gdouble rspamd_get_virtual_ticks(void);
+
+
+/**
+ * Return the real timestamp as unixtime
+ */
+gdouble rspamd_get_calendar_ticks(void);
+
+/**
+ * Special utility to help array freeing in rspamd_mempool
+ * @param p
+ */
+void rspamd_ptr_array_free_hard(gpointer p);
+
+/**
+ * Special utility to help array freeing in rspamd_mempool
+ * @param p
+ */
+void rspamd_array_free_hard(gpointer p);
+
+/**
+ * Special utility to help GString freeing in rspamd_mempool
+ * @param p
+ */
+void rspamd_gstring_free_hard(gpointer p);
+
+/**
+ * Special utility to help GError freeing in rspamd_mempool
+ * @param p
+ */
+void rspamd_gerror_free_maybe(gpointer p);
+
+/**
+ * Special utility to help GString freeing (without freeing the memory segment) in rspamd_mempool
+ * @param p
+ */
+void rspamd_gstring_free_soft(gpointer p);
+
+
+/**
+ * Returns some statically initialized random hash seed
+ * @return hash seed
+ */
+guint64 rspamd_hash_seed(void);
+
+/**
+ * Returns random hex string of the specified length
+ * @param buf
+ * @param len
+ */
+void rspamd_random_hex(gchar *buf, guint64 len);
+
+/**
+ * Returns
+ * @param pattern pattern to create (should end with some number of X symbols), modified by this function
+ * @return
+ */
+gint rspamd_shmem_mkstemp(gchar *pattern);
+
+/**
+ * Return jittered time value
+ */
+gdouble rspamd_time_jitter(gdouble in, gdouble jitter);
+
+/**
+ * Return random double in range [0..1)
+ * @return
+ */
+gdouble rspamd_random_double(void);
+
+/**
+ * Return random double in range [0..1) using xoroshiro128+ algorithm (not crypto secure)
+ * @return
+ */
+gdouble rspamd_random_double_fast(void);
+gdouble rspamd_random_double_fast_seed(guint64 *seed);
+uint64_t rspamd_random_uint64_fast_seed(uint64_t *seed);
+guint64 rspamd_random_uint64_fast(void);
+
+/**
+ * Seed fast rng
+ */
+void rspamd_random_seed_fast(void);
+
+/**
+ * Constant time version of memcmp
+ */
+gboolean rspamd_constant_memcmp(const void *a, const void *b, gsize len);
+
+/**
+ * Open file without following symlinks or special stuff
+ * @param fname filename
+ * @param oflags open flags
+ * @param mode mode to open
+ * @return fd or -1 in case of error
+ */
+int rspamd_file_xopen(const char *fname, int oflags, guint mode,
+ gboolean allow_symlink);
+
+/**
+ * Map file without following symlinks or special stuff
+ * @param fname filename
+ * @param mode mode to open
+ * @param size target size (must NOT be NULL)
+ * @return pointer to memory (should be freed using munmap) or NULL in case of error
+ */
+gpointer rspamd_file_xmap(const char *fname, guint mode, gsize *size,
+ gboolean allow_symlink);
+
+/**
+ * Map named shared memory segment
+ * @param fname filename
+ * @param mode mode to open
+ * @param size target size (must NOT be NULL)
+ * @return pointer to memory (should be freed using munmap) or NULL in case of error
+ */
+gpointer rspamd_shmem_xmap(const char *fname, guint mode,
+ gsize *size);
+
+/**
+ * Normalize probabilities using polynomial function
+ * @param x probability (bias .. 1)
+ * @return
+ */
+gdouble rspamd_normalize_probability(gdouble x, gdouble bias);
+
+/**
+ * Converts struct tm to time_t
+ * @param tm
+ * @param tz timezone in format (hours * 100) + minutes
+ * @return
+ */
+guint64 rspamd_tm_to_time(const struct tm *tm, glong tz);
+
+/**
+ * Splits unix timestamp into struct tm using GMT timezone
+ * @param ts
+ * @param dest
+ */
+void rspamd_gmtime(gint64 ts, struct tm *dest);
+
+/**
+ * Split unix timestamp into struct tm using local timezone
+ * @param ts
+ * @param dest
+ */
+void rspamd_localtime(gint64 ts, struct tm *dest);
+
+#define PTR_ARRAY_FOREACH(ar, i, cur) for ((i) = 0; (ar) != NULL && (i) < (ar)->len && (((cur) = (__typeof__(cur)) g_ptr_array_index((ar), (i))) || 1); ++(i))
+
+/**
+ * Compresses the input string using gzip+zlib. Old string is replaced and freed
+ * if compressed.
+ * @param in
+ * @return TRUE if a string has been compressed
+ */
+gboolean rspamd_fstring_gzip(rspamd_fstring_t **in);
+
+/**
+ * Compresses the input string using gzip+zlib. Old string is replaced and freed
+ * if compressed. If not compressed it is untouched.
+ * @param in
+ * @return TRUE if a string has been compressed
+ */
+gboolean rspamd_fstring_gunzip(rspamd_fstring_t **in);
+
+/**
+ * Perform globbing searching for the specified path. Allow recursion,
+ * returns an error if maximum nesting is reached.
+ * @param pattern
+ * @param recursive
+ * @param err
+ * @return GPtrArray of gchar *, elements are freed when array is freed
+ */
+GPtrArray *rspamd_glob_path(const gchar *dir,
+ const gchar *pattern,
+ gboolean recursive,
+ GError **err);
+
+struct rspamd_counter_data {
+ float mean;
+ float stddev;
+ guint64 number;
+};
+
+/**
+ * Sets counter's data using exponential moving average
+ * @param cd counter
+ * @param value new counter value
+ * @param alpha decay coefficient (0..1)
+ * @return new counter value
+ */
+float rspamd_set_counter_ema(struct rspamd_counter_data *cd,
+ float value,
+ float alpha);
+
+/**
+ * Sets counter's data using flat moving average
+ * @param cd counter
+ * @param value new counter value
+ * @return new counter value
+ */
+double rspamd_set_counter(struct rspamd_counter_data *cd,
+ gdouble value);
+
+/**
+ * Shuffle elements in an array inplace
+ * @param ar
+ */
+void rspamd_ptr_array_shuffle(GPtrArray *ar);
+
+enum rspamd_pbkdf_version_id {
+ RSPAMD_PBKDF_ID_V1 = 1,
+ RSPAMD_PBKDF_ID_V2 = 2,
+ RSPAMD_PBKDF_ID_MAX
+};
+
+struct rspamd_controller_pbkdf {
+ const char *name;
+ const char *alias;
+ const char *description;
+ int type; /* enum rspamd_cryptobox_pbkdf_type */
+ gint id;
+ guint complexity;
+ gsize salt_len;
+ gsize key_len;
+};
+
+extern const struct rspamd_controller_pbkdf pbkdf_list[];
+
+/**
+ * Sum array of floats using Kahan sum algorithm
+ * @param ar
+ * @param nelts
+ * @return
+ */
+float rspamd_sum_floats(float *ar, gsize *nelts);
+
+/**
+ * Normalize file path removing dot sequences and repeating '/' symbols as
+ * per rfc3986#section-5.2
+ * @param path
+ * @param len
+ * @param nlen
+ */
+void rspamd_normalize_path_inplace(gchar *path, guint len, gsize *nlen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif