diff options
Diffstat (limited to 'src/hs_helper.c')
-rw-r--r-- | src/hs_helper.c | 426 |
1 files changed, 426 insertions, 0 deletions
diff --git a/src/hs_helper.c b/src/hs_helper.c new file mode 100644 index 0000000..438035e --- /dev/null +++ b/src/hs_helper.c @@ -0,0 +1,426 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "libutil/util.h" +#include "libserver/cfg_file.h" +#include "libserver/cfg_rcl.h" +#include "libserver/worker_util.h" +#include "libserver/rspamd_control.h" +#include "unix-std.h" + +#ifdef HAVE_GLOB_H +#include <glob.h> +#endif + +static gpointer init_hs_helper(struct rspamd_config *cfg); +__attribute__((noreturn)) static void start_hs_helper(struct rspamd_worker *worker); + +worker_t hs_helper_worker = { + "hs_helper", /* Name */ + init_hs_helper, /* Init function */ + start_hs_helper, /* Start function */ + RSPAMD_WORKER_UNIQUE | RSPAMD_WORKER_KILLABLE | RSPAMD_WORKER_ALWAYS_START | RSPAMD_WORKER_NO_TERMINATE_DELAY, + RSPAMD_WORKER_SOCKET_NONE, + RSPAMD_WORKER_VER /* Version info */ +}; + +static const gdouble default_max_time = 1.0; +static const gdouble default_recompile_time = 60.0; +static const guint64 rspamd_hs_helper_magic = 0x22d310157a2288a0ULL; + +/* + * Worker's context + */ +struct hs_helper_ctx { + guint64 magic; + /* Events base */ + struct ev_loop *event_loop; + /* DNS resolver */ + struct rspamd_dns_resolver *resolver; + /* Config */ + struct rspamd_config *cfg; + /* END OF COMMON PART */ + gchar *hs_dir; + gboolean loaded; + gdouble max_time; + gdouble recompile_time; + ev_timer recompile_timer; +}; + +static gpointer +init_hs_helper(struct rspamd_config *cfg) +{ + struct hs_helper_ctx *ctx; + GQuark type; + + type = g_quark_try_string("hs_helper"); + ctx = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(*ctx)); + + ctx->magic = rspamd_hs_helper_magic; + ctx->cfg = cfg; + ctx->hs_dir = NULL; + ctx->max_time = default_max_time; + ctx->recompile_time = default_recompile_time; + + rspamd_rcl_register_worker_option(cfg, + type, + "cache_dir", + rspamd_rcl_parse_struct_string, + ctx, + G_STRUCT_OFFSET(struct hs_helper_ctx, hs_dir), + 0, + "Directory where to save hyperscan compiled expressions"); + rspamd_rcl_register_worker_option(cfg, + type, + "max_time", + rspamd_rcl_parse_struct_time, + ctx, + G_STRUCT_OFFSET(struct hs_helper_ctx, max_time), + RSPAMD_CL_FLAG_TIME_FLOAT, + "Maximum time to wait for compilation of a single expression"); + rspamd_rcl_register_worker_option(cfg, + type, + "recompile", + rspamd_rcl_parse_struct_time, + ctx, + G_STRUCT_OFFSET(struct hs_helper_ctx, recompile_time), + RSPAMD_CL_FLAG_TIME_FLOAT, + "Time between recompilation checks"); + rspamd_rcl_register_worker_option(cfg, + type, + "timeout", + rspamd_rcl_parse_struct_time, + ctx, + G_STRUCT_OFFSET(struct hs_helper_ctx, max_time), + RSPAMD_CL_FLAG_TIME_FLOAT, + "Maximum time to wait for compilation of a single expression"); + + return ctx; +} + +/** + * Clean + */ +static gboolean +rspamd_hs_helper_cleanup_dir(struct hs_helper_ctx *ctx, gboolean forced) +{ + struct stat st; + glob_t globbuf; + guint len, i; + gint rc; + gchar *pattern; + gboolean ret = TRUE; + pid_t our_pid = getpid(); + + if (getenv("RSPAMD_NO_CLEANUP")) { + /* Skip all cleanup */ + return TRUE; + } + + if (stat(ctx->hs_dir, &st) == -1) { + msg_err("cannot stat path %s, %s", + ctx->hs_dir, + strerror(errno)); + return FALSE; + } + + globbuf.gl_offs = 0; + /* + * We reuse this buffer for .new patterns as well, so allocate with some + * margin + */ + len = strlen(ctx->hs_dir) + 1 + sizeof("*.hs") + sizeof(G_DIR_SEPARATOR); + pattern = g_malloc(len); + rspamd_snprintf(pattern, len, "%s%c%s", ctx->hs_dir, G_DIR_SEPARATOR, "*.hs"); + + if ((rc = glob(pattern, 0, NULL, &globbuf)) == 0) { + for (i = 0; i < globbuf.gl_pathc; i++) { + GError *err = NULL; + + if (forced) { + g_set_error(&err, g_quark_from_static_string("re_cache"), + 0, "forced removal"); + } + + if (forced || + !rspamd_re_cache_is_valid_hyperscan_file(ctx->cfg->re_cache, + globbuf.gl_pathv[i], TRUE, TRUE, &err)) { + if (unlink(globbuf.gl_pathv[i]) == -1) { + msg_err("cannot unlink %s: %s; reason for expiration: %e", globbuf.gl_pathv[i], + strerror(errno), err); + ret = FALSE; + } + else { + msg_notice("successfully removed outdated hyperscan file: %s; reason for expiration: %e", + globbuf.gl_pathv[i], err); + } + } + + if (err) { + g_error_free(err); + } + } + } + else if (rc != GLOB_NOMATCH) { + msg_err("glob %s failed: %s", pattern, strerror(errno)); + ret = FALSE; + } + + globfree(&globbuf); + + memset(&globbuf, 0, sizeof(globbuf)); + rspamd_snprintf(pattern, len, "%s%c%s", ctx->hs_dir, G_DIR_SEPARATOR, "*.hs.new"); + if ((rc = glob(pattern, 0, NULL, &globbuf)) == 0) { + for (i = 0; i < globbuf.gl_pathc; i++) { + /* Check if we have a pid in the filename */ + const gchar *end_num = globbuf.gl_pathv[i] + + strlen(globbuf.gl_pathv[i]) - (sizeof(".hs.new") - 1); + const gchar *p = end_num - 1; + pid_t foreign_pid = -1; + + while (p > globbuf.gl_pathv[i]) { + if (g_ascii_isdigit(*p)) { + p--; + } + else { + p++; + break; + } + } + + gulong ul; + if (p < end_num && rspamd_strtoul(p, end_num - p, &ul)) { + foreign_pid = ul; + } + + /* + * Remove only files that was left by us or some non-existing process + * There could be another race condition but it would just leave + * extra files which is relatively innocent? + */ + if (foreign_pid == -1 || foreign_pid == our_pid || kill(foreign_pid, 0) == -1) { + if (unlink(globbuf.gl_pathv[i]) == -1) { + msg_err("cannot unlink %s: %s", globbuf.gl_pathv[i], + strerror(errno)); + ret = FALSE; + } + else { + msg_notice("successfully removed outdated hyperscan temporary file: %s; " + "pid of the file creator process: %P", + globbuf.gl_pathv[i], + foreign_pid); + } + } + else { + msg_notice("skip removal of the hyperscan temporary file: %s; " + "pid of the file creator process: %P", + globbuf.gl_pathv[i], + foreign_pid); + } + } + } + else if (rc != GLOB_NOMATCH) { + msg_err("glob %s failed: %s", pattern, strerror(errno)); + ret = FALSE; + } + + globfree(&globbuf); + g_free(pattern); + + return ret; +} + +/* Bad hack, but who cares */ +static gboolean hack_global_forced; + +static void +rspamd_rs_delayed_cb(EV_P_ ev_timer *w, int revents) +{ + struct rspamd_worker *worker = (struct rspamd_worker *) w->data; + static struct rspamd_srv_command srv_cmd; + struct hs_helper_ctx *ctx; + + ctx = (struct hs_helper_ctx *) worker->ctx; + memset(&srv_cmd, 0, sizeof(srv_cmd)); + srv_cmd.type = RSPAMD_SRV_HYPERSCAN_LOADED; + rspamd_strlcpy(srv_cmd.cmd.hs_loaded.cache_dir, ctx->hs_dir, + sizeof(srv_cmd.cmd.hs_loaded.cache_dir)); + srv_cmd.cmd.hs_loaded.forced = hack_global_forced; + hack_global_forced = FALSE; + + rspamd_srv_send_command(worker, + ctx->event_loop, &srv_cmd, -1, NULL, NULL); + ev_timer_stop(EV_A_ w); + g_free(w); + + ev_timer_again(EV_A_ & ctx->recompile_timer); +} + +static void +rspamd_rs_compile_cb(guint ncompiled, GError *err, void *cbd) +{ + struct rspamd_worker *worker = (struct rspamd_worker *) cbd; + ev_timer *tm; + ev_tstamp when = 0.0; + struct hs_helper_ctx *ctx; + + ctx = (struct hs_helper_ctx *) worker->ctx; + + if (err != NULL) { + /* Failed to compile: log and go out */ + msg_err("cannot compile Hyperscan database: %e", err); + + return; + } + + if (ncompiled > 0) { + /* Enforce update for other workers */ + hack_global_forced = TRUE; + } + + /* + * Do not send notification unless all other workers are started + * XXX: now we just sleep for 1 seconds to ensure that + */ + if (!ctx->loaded) { + when = 1.0; /* Postpone */ + ctx->loaded = TRUE; + msg_info("compiled %d regular expressions to the hyperscan tree, " + "postpone loaded notification for %.0f seconds to avoid races", + ncompiled, + when); + } + else { + msg_info("compiled %d regular expressions to the hyperscan tree, " + "send loaded notification", + ncompiled); + } + + tm = g_malloc0(sizeof(*tm)); + tm->data = (void *) worker; + ev_timer_init(tm, rspamd_rs_delayed_cb, when, 0); + ev_timer_start(ctx->event_loop, tm); +} + +static gboolean +rspamd_rs_compile(struct hs_helper_ctx *ctx, struct rspamd_worker *worker, + gboolean forced) +{ +#if !defined(__aarch64__) && !defined(__powerpc64__) + if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) { + msg_warn("CPU doesn't have SSSE3 instructions set " + "required for hyperscan, disable hyperscan compilation"); + return FALSE; + } +#endif + + if (!rspamd_hs_helper_cleanup_dir(ctx, forced)) { + msg_warn("cannot cleanup cache dir '%s'", ctx->hs_dir); + } + + hack_global_forced = forced; /* killmeplease */ + rspamd_re_cache_compile_hyperscan(ctx->cfg->re_cache, + ctx->hs_dir, ctx->max_time, !forced, + ctx->event_loop, + rspamd_rs_compile_cb, + (void *) worker); + + return TRUE; +} + +static gboolean +rspamd_hs_helper_reload(struct rspamd_main *rspamd_main, + struct rspamd_worker *worker, gint fd, + gint attached_fd, + struct rspamd_control_command *cmd, + gpointer ud) +{ + struct rspamd_control_reply rep; + struct hs_helper_ctx *ctx = ud; + + msg_info("recompiling hyperscan expressions after receiving reload command"); + memset(&rep, 0, sizeof(rep)); + rep.type = RSPAMD_CONTROL_RECOMPILE; + rep.reply.recompile.status = 0; + + /* We write reply before actual recompilation as it takes a lot of time */ + if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) { + msg_err("cannot write reply to the control socket: %s", + strerror(errno)); + } + + /* Stop recompile */ + ev_timer_stop(ctx->event_loop, &ctx->recompile_timer); + rspamd_rs_compile(ctx, worker, TRUE); + + return TRUE; +} + +static void +rspamd_hs_helper_timer(EV_P_ ev_timer *w, int revents) +{ + struct rspamd_worker *worker = (struct rspamd_worker *) w->data; + struct hs_helper_ctx *ctx; + double tim; + + ctx = worker->ctx; + tim = rspamd_time_jitter(ctx->recompile_time, 0); + w->repeat = tim; + rspamd_rs_compile(ctx, worker, FALSE); +} + +static void +start_hs_helper(struct rspamd_worker *worker) +{ + struct hs_helper_ctx *ctx = worker->ctx; + double tim; + + g_assert(rspamd_worker_check_context(worker->ctx, rspamd_hs_helper_magic)); + ctx->cfg = worker->srv->cfg; + + if (ctx->hs_dir == NULL) { + ctx->hs_dir = ctx->cfg->hs_cache_dir; + } + if (ctx->hs_dir == NULL) { + ctx->hs_dir = RSPAMD_DBDIR "/"; + } + + ctx->event_loop = rspamd_prepare_worker(worker, + "hs_helper", + NULL); + + if (!rspamd_rs_compile(ctx, worker, FALSE)) { + /* Tell main not to respawn more workers */ + exit(EXIT_SUCCESS); + } + + rspamd_control_worker_add_cmd_handler(worker, RSPAMD_CONTROL_RECOMPILE, + rspamd_hs_helper_reload, ctx); + + ctx->recompile_timer.data = worker; + tim = rspamd_time_jitter(ctx->recompile_time, 0); + ev_timer_init(&ctx->recompile_timer, rspamd_hs_helper_timer, tim, 0.0); + ev_timer_start(ctx->event_loop, &ctx->recompile_timer); + + ev_loop(ctx->event_loop, 0); + rspamd_worker_block_signals(); + + rspamd_log_close(worker->srv->logger); + REF_RELEASE(ctx->cfg); + rspamd_unset_crash_handler(worker->srv); + + exit(EXIT_SUCCESS); +} |