diff options
Diffstat (limited to '')
-rw-r--r-- | src/plugins/regexp.c | 564 |
1 files changed, 564 insertions, 0 deletions
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c new file mode 100644 index 0000000..59a84c5 --- /dev/null +++ b/src/plugins/regexp.c @@ -0,0 +1,564 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/***MODULE:regexp + * rspamd module that implements different regexp rules + */ + + +#include "config.h" +#include "libmime/message.h" +#include "expression.h" +#include "mime_expressions.h" +#include "libserver/maps/map.h" +#include "lua/lua_common.h" + +static const guint64 rspamd_regexp_cb_magic = 0xca9d9649fc3e2659ULL; + +struct regexp_module_item { + guint64 magic; + struct rspamd_expression *expr; + const gchar *symbol; + struct ucl_lua_funcdata *lua_function; +}; + +struct regexp_ctx { + struct module_ctx ctx; + gsize max_size; +}; + +static void process_regexp_item(struct rspamd_task *task, + struct rspamd_symcache_dynamic_item *item, + void *user_data); + + +/* Initialization */ +gint regexp_module_init(struct rspamd_config *cfg, struct module_ctx **ctx); +gint regexp_module_config(struct rspamd_config *cfg, bool validate); +gint regexp_module_reconfig(struct rspamd_config *cfg); + +module_t regexp_module = { + "regexp", + regexp_module_init, + regexp_module_config, + regexp_module_reconfig, + NULL, + RSPAMD_MODULE_VER, + (guint) -1, +}; + + +static inline struct regexp_ctx * +regexp_get_context(struct rspamd_config *cfg) +{ + return (struct regexp_ctx *) g_ptr_array_index(cfg->c_modules, + regexp_module.ctx_offset); +} + +/* Process regexp expression */ +static gboolean +read_regexp_expression(rspamd_mempool_t *pool, + struct regexp_module_item *chain, + const gchar *symbol, + const gchar *line, + struct rspamd_mime_expr_ud *ud) +{ + struct rspamd_expression *e = NULL; + GError *err = NULL; + + if (!rspamd_parse_expression(line, 0, &mime_expr_subr, ud, pool, &err, + &e)) { + msg_warn_pool("%s = \"%s\" is invalid regexp expression: %e", symbol, + line, + err); + g_error_free(err); + + return FALSE; + } + + g_assert(e != NULL); + chain->expr = e; + + return TRUE; +} + + +/* Init function */ +gint regexp_module_init(struct rspamd_config *cfg, struct module_ctx **ctx) +{ + struct regexp_ctx *regexp_module_ctx; + + regexp_module_ctx = rspamd_mempool_alloc0(cfg->cfg_pool, + sizeof(*regexp_module_ctx)); + + *ctx = (struct module_ctx *) regexp_module_ctx; + + rspamd_rcl_add_doc_by_path(cfg, + NULL, + "Regular expressions rules plugin", + "regexp", + UCL_OBJECT, + NULL, + 0, + NULL, + 0); + + rspamd_rcl_add_doc_by_path(cfg, + "regexp", + "Maximum size of data chunk scanned with any regexp (further data is truncated)", + "max_size", + UCL_INT, + NULL, + 0, + NULL, + 0); + + return 0; +} + +gint regexp_module_config(struct rspamd_config *cfg, bool validate) +{ + struct regexp_ctx *regexp_module_ctx = regexp_get_context(cfg); + struct regexp_module_item *cur_item = NULL; + const ucl_object_t *sec, *value, *elt; + ucl_object_iter_t it = NULL; + gint res = TRUE, nre = 0, nlua = 0, nshots = cfg->default_max_shots; + + if (!rspamd_config_is_module_enabled(cfg, "regexp")) { + return TRUE; + } + + sec = ucl_object_lookup(cfg->cfg_ucl_obj, "regexp"); + if (sec == NULL) { + msg_err_config("regexp module enabled, but no rules are defined"); + return TRUE; + } + + regexp_module_ctx->max_size = 0; + + while ((value = ucl_object_iterate(sec, &it, true)) != NULL) { + if (g_ascii_strncasecmp(ucl_object_key(value), "max_size", + sizeof("max_size") - 1) == 0) { + regexp_module_ctx->max_size = ucl_obj_toint(value); + rspamd_re_cache_set_limit(cfg->re_cache, regexp_module_ctx->max_size); + } + else if (g_ascii_strncasecmp(ucl_object_key(value), "max_threads", + sizeof("max_threads") - 1) == 0) { + msg_warn_config("regexp module is now single threaded, max_threads is ignored"); + } + else if (value->type == UCL_STRING) { + struct rspamd_mime_expr_ud ud; + + cur_item = rspamd_mempool_alloc0(cfg->cfg_pool, + sizeof(struct regexp_module_item)); + cur_item->symbol = ucl_object_key(value); + cur_item->magic = rspamd_regexp_cb_magic; + + ud.conf_obj = NULL; + ud.cfg = cfg; + + if (!read_regexp_expression(cfg->cfg_pool, + cur_item, ucl_object_key(value), + ucl_obj_tostring(value), &ud)) { + if (validate) { + return FALSE; + } + } + else { + rspamd_symcache_add_symbol(cfg->cache, + cur_item->symbol, + 0, + process_regexp_item, + cur_item, + SYMBOL_TYPE_NORMAL, -1); + nre++; + } + } + else if (value->type == UCL_USERDATA) { + /* Just a lua function */ + cur_item = rspamd_mempool_alloc0(cfg->cfg_pool, + sizeof(struct regexp_module_item)); + cur_item->magic = rspamd_regexp_cb_magic; + cur_item->symbol = ucl_object_key(value); + cur_item->lua_function = ucl_object_toclosure(value); + + rspamd_symcache_add_symbol(cfg->cache, + cur_item->symbol, + 0, + process_regexp_item, + cur_item, + SYMBOL_TYPE_NORMAL, -1); + nlua++; + } + else if (value->type == UCL_OBJECT) { + const gchar *description = NULL, *group = NULL; + gdouble score = 0.0; + guint flags = 0, priority = 0; + gboolean is_lua = FALSE, valid_expression = TRUE; + struct rspamd_mime_expr_ud ud; + + /* We have some lua table, extract its arguments */ + elt = ucl_object_lookup(value, "callback"); + + if (elt == NULL || elt->type != UCL_USERDATA) { + + /* Try plain regexp expression */ + elt = ucl_object_lookup_any(value, "regexp", "re", NULL); + + if (elt != NULL && ucl_object_type(elt) == UCL_STRING) { + cur_item = rspamd_mempool_alloc0(cfg->cfg_pool, + sizeof(struct regexp_module_item)); + cur_item->symbol = ucl_object_key(value); + cur_item->magic = rspamd_regexp_cb_magic; + ud.cfg = cfg; + ud.conf_obj = value; + + if (!read_regexp_expression(cfg->cfg_pool, + cur_item, ucl_object_key(value), + ucl_obj_tostring(elt), &ud)) { + if (validate) { + return FALSE; + } + } + else { + valid_expression = TRUE; + nre++; + } + } + else { + msg_err_config( + "no callback/expression defined for regexp symbol: " + "%s", + ucl_object_key(value)); + } + } + else { + is_lua = TRUE; + nlua++; + cur_item = rspamd_mempool_alloc0( + cfg->cfg_pool, + sizeof(struct regexp_module_item)); + cur_item->magic = rspamd_regexp_cb_magic; + cur_item->symbol = ucl_object_key(value); + cur_item->lua_function = ucl_object_toclosure(value); + } + + if (cur_item && (is_lua || valid_expression)) { + + flags = SYMBOL_TYPE_NORMAL; + elt = ucl_object_lookup(value, "mime_only"); + + if (elt) { + if (ucl_object_type(elt) != UCL_BOOLEAN) { + msg_err_config( + "mime_only attribute is not boolean for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + if (ucl_object_toboolean(elt)) { + flags |= SYMBOL_TYPE_MIME_ONLY; + } + } + } + + rspamd_symcache_add_symbol(cfg->cache, + cur_item->symbol, + 0, + process_regexp_item, + cur_item, + flags, -1); + + /* Reset flags */ + flags = 0; + + elt = ucl_object_lookup(value, "condition"); + + if (elt != NULL && ucl_object_type(elt) == UCL_USERDATA) { + struct ucl_lua_funcdata *conddata; + + g_assert(cur_item->symbol != NULL); + conddata = ucl_object_toclosure(elt); + rspamd_symcache_add_condition_delayed(cfg->cache, + cur_item->symbol, + conddata->L, conddata->idx); + } + + elt = ucl_object_lookup(value, "description"); + + if (elt) { + description = ucl_object_tostring(elt); + } + + elt = ucl_object_lookup(value, "group"); + + if (elt) { + group = ucl_object_tostring(elt); + } + + elt = ucl_object_lookup(value, "score"); + + if (elt) { + if (ucl_object_type(elt) != UCL_FLOAT && ucl_object_type(elt) != UCL_INT) { + msg_err_config( + "score attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + score = ucl_object_todouble(elt); + } + } + + elt = ucl_object_lookup(value, "one_shot"); + + if (elt) { + if (ucl_object_type(elt) != UCL_BOOLEAN) { + msg_err_config( + "one_shot attribute is not boolean for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + if (ucl_object_toboolean(elt)) { + nshots = 1; + } + } + } + + if ((elt = ucl_object_lookup(value, "any_shot")) != NULL) { + if (ucl_object_type(elt) != UCL_BOOLEAN) { + msg_err_config( + "any_shot attribute is not boolean for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + if (ucl_object_toboolean(elt)) { + nshots = -1; + } + } + } + + if ((elt = ucl_object_lookup(value, "nshots")) != NULL) { + if (ucl_object_type(elt) != UCL_FLOAT && ucl_object_type(elt) != UCL_INT) { + msg_err_config( + "nshots attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + nshots = ucl_object_toint(elt); + } + } + + elt = ucl_object_lookup(value, "one_param"); + + if (elt) { + if (ucl_object_type(elt) != UCL_BOOLEAN) { + msg_err_config( + "one_param attribute is not boolean for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + if (ucl_object_toboolean(elt)) { + flags |= RSPAMD_SYMBOL_FLAG_ONEPARAM; + } + } + } + + elt = ucl_object_lookup(value, "priority"); + + if (elt) { + if (ucl_object_type(elt) != UCL_FLOAT && ucl_object_type(elt) != UCL_INT) { + msg_err_config( + "priority attribute is not numeric for symbol: '%s'", + cur_item->symbol); + + if (validate) { + return FALSE; + } + } + else { + priority = ucl_object_toint(elt); + } + } + else { + priority = 0; + } + + rspamd_config_add_symbol(cfg, cur_item->symbol, + score, description, group, flags, priority, nshots); + + elt = ucl_object_lookup(value, "groups"); + + if (elt) { + ucl_object_iter_t gr_it; + const ucl_object_t *cur_gr; + + gr_it = ucl_object_iterate_new(elt); + + while ((cur_gr = ucl_object_iterate_safe(gr_it, true)) != NULL) { + rspamd_config_add_symbol_group(cfg, cur_item->symbol, + ucl_object_tostring(cur_gr)); + } + + ucl_object_iterate_free(gr_it); + } + } + } + else { + msg_warn_config("unknown type of attribute %s for regexp module", + ucl_object_key(value)); + } + } + + if (res) { + msg_info_config("init internal regexp module, %d regexp rules and %d " + "lua rules are loaded", + nre, nlua); + } + else { + msg_err_config("fatal regexp module error"); + } + + return res; +} + +gint regexp_module_reconfig(struct rspamd_config *cfg) +{ + return regexp_module_config(cfg, false); +} + +static gboolean +rspamd_lua_call_expression_func(struct ucl_lua_funcdata *lua_data, + struct rspamd_task *task, + GArray *args, gdouble *res, + const gchar *symbol) +{ + lua_State *L = lua_data->L; + struct rspamd_task **ptask; + struct expression_argument *arg; + gint pop = 0, i, nargs = 0; + + lua_rawgeti(L, LUA_REGISTRYINDEX, lua_data->idx); + /* Now we got function in top of stack */ + ptask = lua_newuserdata(L, sizeof(struct rspamd_task *)); + rspamd_lua_setclass(L, "rspamd{task}", -1); + *ptask = task; + + /* Now push all arguments */ + if (args) { + for (i = 0; i < (gint) args->len; i++) { + arg = &g_array_index(args, struct expression_argument, i); + if (arg) { + switch (arg->type) { + case EXPRESSION_ARGUMENT_NORMAL: + lua_pushstring(L, (const gchar *) arg->data); + break; + case EXPRESSION_ARGUMENT_BOOL: + lua_pushboolean(L, (gboolean) GPOINTER_TO_SIZE(arg->data)); + break; + default: + msg_err_task("%s: cannot pass custom params to lua function", + symbol); + return FALSE; + } + } + } + nargs = args->len; + } + + if (lua_pcall(L, nargs + 1, 1, 0) != 0) { + msg_info_task("%s: call to lua function failed: %s", symbol, + lua_tostring(L, -1)); + lua_pop(L, 1); + + return FALSE; + } + + pop++; + + if (lua_type(L, -1) == LUA_TNUMBER) { + *res = lua_tonumber(L, -1); + } + else if (lua_type(L, -1) == LUA_TBOOLEAN) { + *res = lua_toboolean(L, -1); + } + else { + msg_info_task("%s: lua function must return a boolean", symbol); + *res = FALSE; + } + + lua_pop(L, pop); + + return TRUE; +} + + +static void +process_regexp_item(struct rspamd_task *task, + struct rspamd_symcache_dynamic_item *symcache_item, + void *user_data) +{ + struct regexp_module_item *item = user_data; + gdouble res = FALSE; + + /* Non-threaded version */ + if (item->lua_function) { + /* Just call function */ + res = FALSE; + if (!rspamd_lua_call_expression_func(item->lua_function, task, NULL, + &res, item->symbol)) { + msg_err_task("error occurred when checking symbol %s", + item->symbol); + } + } + else { + /* Process expression */ + if (item->expr) { + res = rspamd_process_expression(item->expr, 0, task); + } + else { + msg_warn_task("FIXME: %s symbol is broken with new expressions", + item->symbol); + } + } + + if (res != 0) { + rspamd_task_insert_result(task, item->symbol, res, NULL); + } + + rspamd_symcache_finalize_item(task, symcache_item); +} |