diff options
Diffstat (limited to 'src/libserver/composites/composites.cxx')
-rw-r--r-- | src/libserver/composites/composites.cxx | 989 |
1 files changed, 989 insertions, 0 deletions
diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx new file mode 100644 index 0000000..aa231a3 --- /dev/null +++ b/src/libserver/composites/composites.cxx @@ -0,0 +1,989 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "logger.h" +#include "expression.h" +#include "task.h" +#include "utlist.h" +#include "scan_result.h" +#include "composites.h" + +#include <cmath> +#include <vector> +#include <variant> +#include "libutil/cxx/util.hxx" +#include "contrib/ankerl/unordered_dense.h" + +#include "composites_internal.hxx" + +#define msg_err_composites(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "composites", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_warn_composites(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \ + "composites", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) +#define msg_info_composites(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \ + "composites", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) + +#define msg_debug_composites(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \ + rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \ + RSPAMD_LOG_FUNC, \ + __VA_ARGS__) + +INIT_LOG_MODULE(composites) + + +namespace rspamd::composites { +static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line, gsize len, + rspamd_mempool_t *pool, + gpointer ud, GError **err); +static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom); +static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom); +static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom); +static void composites_foreach_callback(gpointer key, gpointer value, void *data); + +const struct rspamd_atom_subr composite_expr_subr = { + .parse = rspamd::composites::rspamd_composite_expr_parse, + .process = rspamd::composites::rspamd_composite_expr_process, + .priority = rspamd::composites::rspamd_composite_expr_priority, + .destroy = rspamd::composites::rspamd_composite_expr_destroy}; +}// namespace rspamd::composites + +namespace rspamd::composites { + +static constexpr const double epsilon = 0.00001; + +struct symbol_remove_data { + const char *sym; + struct rspamd_composite *comp; + GNode *parent; + std::uint8_t action; +}; + +struct composites_data { + struct rspamd_task *task; + struct rspamd_composite *composite; + struct rspamd_scan_result *metric_res; + ankerl::unordered_dense::map<std::string_view, + std::vector<symbol_remove_data>> + symbols_to_remove; + std::vector<bool> checked; + + explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres) + : task(task), composite(nullptr), metric_res(mres) + { + checked.resize(rspamd_composites_manager_nelts(task->cfg->composites_manager) * 2, + false); + } +}; + +struct rspamd_composite_option_match { + rspamd_regexp_t *re; + std::string match; + + explicit rspamd_composite_option_match(const char *start, std::size_t len) noexcept + : re(nullptr), match(start, len) + { + } + + explicit rspamd_composite_option_match(rspamd_regexp_t *re) noexcept + : re(rspamd_regexp_ref(re)) + { + } + + rspamd_composite_option_match(const rspamd_composite_option_match &other) noexcept + { + if (other.re) { + re = rspamd_regexp_ref(other.re); + } + else { + match = other.match; + re = nullptr; + } + } + rspamd_composite_option_match &operator=(const rspamd_composite_option_match &other) noexcept + { + if (other.re) { + if (re) { + rspamd_regexp_unref(re); + } + re = rspamd_regexp_ref(other.re); + } + else { + if (re) { + rspamd_regexp_unref(re); + } + re = nullptr; + match = other.match; + } + + return *this; + } + + rspamd_composite_option_match(rspamd_composite_option_match &&other) noexcept + { + if (other.re) { + re = other.re; + other.re = nullptr; + } + else { + re = nullptr; + match = std::move(other.match); + } + } + rspamd_composite_option_match &operator=(rspamd_composite_option_match &&other) noexcept + { + if (other.re) { + if (re) { + rspamd_regexp_unref(re); + } + re = other.re; + other.re = nullptr; + } + else { + if (re) { + rspamd_regexp_unref(re); + } + re = nullptr; + match = std::move(other.match); + } + + return *this; + } + + ~rspamd_composite_option_match() + { + if (re) { + rspamd_regexp_unref(re); + } + } + + auto match_opt(const std::string_view &data) const -> bool + { + if (re) { + return rspamd_regexp_search(re, + data.data(), data.size(), + nullptr, nullptr, false, nullptr); + } + else { + return data == match; + } + } + + auto get_pat() const -> std::string_view + { + if (re) { + return std::string_view(rspamd_regexp_get_pattern(re)); + } + else { + return match; + } + } +}; + +enum class rspamd_composite_atom_type { + ATOM_UNKNOWN, + ATOM_COMPOSITE, + ATOM_PLAIN +}; + +struct rspamd_composite_atom { + std::string symbol; + std::string_view norm_symbol; + rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN; + const struct rspamd_composite *ncomp; /* underlying composite */ + std::vector<rspamd_composite_option_match> opts; +}; + +enum rspamd_composite_action : std::uint8_t { + RSPAMD_COMPOSITE_UNTOUCH = 0, + RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0), + RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1), + RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2) +}; + +static GQuark +rspamd_composites_quark(void) +{ + return g_quark_from_static_string("composites"); +} + +static auto +rspamd_composite_atom_dtor(void *ptr) +{ + auto *atom = reinterpret_cast<rspamd_composite_atom *>(ptr); + + delete atom; +} + +static rspamd_expression_atom_t * +rspamd_composite_expr_parse(const gchar *line, gsize len, + rspamd_mempool_t *pool, + gpointer ud, GError **err) +{ + gsize clen = 0; + const gchar *p, *end; + enum composite_expr_state { + comp_state_read_symbol = 0, + comp_state_read_obrace, + comp_state_read_option, + comp_state_read_regexp, + comp_state_read_regexp_end, + comp_state_read_comma, + comp_state_read_ebrace, + comp_state_read_end + } state = comp_state_read_symbol; + + end = line + len; + p = line; + + /* Find length of the atom using a reduced state machine */ + while (p < end) { + if (state == comp_state_read_end) { + break; + } + + switch (state) { + case comp_state_read_symbol: + clen = rspamd_memcspn(p, "[; \t()><!|&\n", len); + p += clen; + + if (*p == '[') { + state = comp_state_read_obrace; + } + else { + state = comp_state_read_end; + } + break; + case comp_state_read_obrace: + p++; + + if (*p == '/') { + p++; + state = comp_state_read_regexp; + } + else { + state = comp_state_read_option; + } + break; + case comp_state_read_regexp: + if (*p == '\\' && p + 1 < end) { + /* Escaping */ + p++; + } + else if (*p == '/') { + /* End of regexp, possible flags */ + state = comp_state_read_regexp_end; + } + p++; + break; + case comp_state_read_option: + case comp_state_read_regexp_end: + if (*p == ',') { + p++; + state = comp_state_read_comma; + } + else if (*p == ']') { + state = comp_state_read_ebrace; + } + else { + p++; + } + break; + case comp_state_read_comma: + if (!g_ascii_isspace(*p)) { + if (*p == '/') { + state = comp_state_read_regexp; + } + else if (*p == ']') { + state = comp_state_read_ebrace; + } + else { + state = comp_state_read_option; + } + } + else { + /* Skip spaces after comma */ + p++; + } + break; + case comp_state_read_ebrace: + p++; + state = comp_state_read_end; + break; + case comp_state_read_end: + g_assert_not_reached(); + } + } + + if (state != comp_state_read_end) { + g_set_error(err, rspamd_composites_quark(), 100, "invalid composite: %s;" + "parser stopped in state %d", + line, state); + return NULL; + } + + clen = p - line; + p = line; + state = comp_state_read_symbol; + + auto *atom = new rspamd_composite_atom; + auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t); + res->len = clen; + res->str = line; + + /* Full state machine to fill a composite atom */ + const gchar *opt_start = nullptr; + + while (p < end) { + if (state == comp_state_read_end) { + break; + } + + switch (state) { + case comp_state_read_symbol: { + clen = rspamd_memcspn(p, "[; \t()><!|&\n", len); + p += clen; + + if (*p == '[') { + state = comp_state_read_obrace; + } + else { + state = comp_state_read_end; + } + + atom->symbol = std::string{line, clen}; + auto norm_start = std::find_if(atom->symbol.begin(), atom->symbol.end(), + [](char c) { return g_ascii_isalnum(c); }); + if (norm_start == atom->symbol.end()) { + msg_err_pool("invalid composite atom: %s", atom->symbol.c_str()); + } + atom->norm_symbol = make_string_view_from_it(norm_start, atom->symbol.end()); + break; + } + case comp_state_read_obrace: + p++; + + if (*p == '/') { + opt_start = p; + p++; /* Starting slash */ + state = comp_state_read_regexp; + } + else { + state = comp_state_read_option; + opt_start = p; + } + + break; + case comp_state_read_regexp: + if (*p == '\\' && p + 1 < end) { + /* Escaping */ + p++; + } + else if (*p == '/') { + /* End of regexp, possible flags */ + state = comp_state_read_regexp_end; + } + p++; + break; + case comp_state_read_option: + if (*p == ',' || *p == ']') { + /* Plain match, copy option to ensure string_view validity */ + gint opt_len = p - opt_start; + auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1); + rspamd_strlcpy(opt_buf, opt_start, opt_len + 1); + opt_buf = g_strstrip(opt_buf); + atom->opts.emplace_back(opt_buf, strlen(opt_buf)); + + if (*p == ',') { + p++; + state = comp_state_read_comma; + } + else { + state = comp_state_read_ebrace; + } + } + else { + p++; + } + break; + case comp_state_read_regexp_end: + if (*p == ',' || *p == ']') { + auto opt_len = p - opt_start; + rspamd_regexp_t *re; + GError *re_err = nullptr; + + re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err); + + if (re == nullptr) { + msg_err_pool("cannot create regexp from string %*s: %e", + opt_len, opt_start, re_err); + + g_error_free(re_err); + } + else { + atom->opts.emplace_back(re); + rspamd_regexp_unref(re); + } + + if (*p == ',') { + p++; + state = comp_state_read_comma; + } + else { + state = comp_state_read_ebrace; + } + } + else { + p++; + } + break; + case comp_state_read_comma: + if (!g_ascii_isspace(*p)) { + if (*p == '/') { + state = comp_state_read_regexp; + opt_start = p; + } + else if (*p == ']') { + state = comp_state_read_ebrace; + } + else { + opt_start = p; + state = comp_state_read_option; + } + } + else { + /* Skip spaces after comma */ + p++; + } + break; + case comp_state_read_ebrace: + p++; + state = comp_state_read_end; + break; + case comp_state_read_end: + g_assert_not_reached(); + } + } + + res->data = atom; + + return res; +} + +static auto +process_symbol_removal(rspamd_expression_atom_t *atom, + struct composites_data *cd, + struct rspamd_symbol_result *ms, + const std::string &beg) -> void +{ + struct rspamd_task *task = cd->task; + + if (ms == nullptr) { + return; + } + + /* + * At this point we know that we need to do something about this symbol, + * however, we don't know whether we need to delete it unfortunately, + * that depends on the later decisions when the complete expression is + * evaluated. + */ + auto rd_it = cd->symbols_to_remove.find(ms->name); + + auto fill_removal_structure = [&](symbol_remove_data &nrd) { + nrd.sym = ms->name; + + /* By default remove symbols */ + switch (cd->composite->policy) { + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL: + default: + nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT); + break; + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL: + nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL; + break; + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT: + nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT; + break; + case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE: + nrd.action = 0; + break; + } + + for (auto t: beg) { + if (t == '~') { + nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL; + } + else if (t == '-') { + nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT | + RSPAMD_COMPOSITE_REMOVE_SYMBOL); + } + else if (t == '^') { + nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED; + } + else { + break; + } + } + + nrd.comp = cd->composite; + nrd.parent = atom->parent; + }; + + if (rd_it != cd->symbols_to_remove.end()) { + fill_removal_structure(rd_it->second.emplace_back()); + msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s", + cd->metric_res->name, + ms->name, rd_it->second.back().action, + cd->composite->sym.c_str()); + } + else { + std::vector<symbol_remove_data> nrd; + fill_removal_structure(nrd.emplace_back()); + msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s", + cd->metric_res->name, + ms->name, nrd.front().action, + cd->composite->sym.c_str()); + cd->symbols_to_remove[ms->name] = std::move(nrd); + } +} + +static auto +process_single_symbol(struct composites_data *cd, + std::string_view sym, + struct rspamd_symbol_result **pms, + struct rspamd_composite_atom *atom) -> double +{ + struct rspamd_symbol_result *ms = nullptr; + gdouble rc = 0; + struct rspamd_task *task = cd->task; + + if ((ms = rspamd_task_find_symbol_result(cd->task, sym.data(), cd->metric_res)) == nullptr) { + msg_debug_composites("not found symbol %s in composite %s", sym.data(), + cd->composite->sym.c_str()); + + if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) { + const struct rspamd_composite *ncomp; + + if ((ncomp = COMPOSITE_MANAGER_FROM_PTR(task->cfg->composites_manager)->find(sym)) != NULL) { + atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE; + atom->ncomp = ncomp; + } + else { + atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN; + } + } + + if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) { + msg_debug_composites("symbol %s for composite %s is another composite", + sym.data(), cd->composite->sym.c_str()); + + if (!cd->checked[atom->ncomp->id * 2]) { + msg_debug_composites("composite dependency %s for %s is not checked", + sym.data(), cd->composite->sym.c_str()); + /* Set checked for this symbol to avoid cyclic references */ + cd->checked[cd->composite->id * 2] = true; + auto *saved = cd->composite; /* Save the current composite */ + composites_foreach_callback((gpointer) atom->ncomp->sym.c_str(), + (gpointer) atom->ncomp, (gpointer) cd); + /* Restore state */ + cd->composite = saved; + cd->checked[cd->composite->id * 2] = false; + + ms = rspamd_task_find_symbol_result(cd->task, sym.data(), + cd->metric_res); + } + else { + /* + * XXX: in case of cyclic references this would return 0 + */ + if (cd->checked[atom->ncomp->id * 2 + 1]) { + ms = rspamd_task_find_symbol_result(cd->task, sym.data(), + cd->metric_res); + } + } + } + } + + if (ms) { + msg_debug_composites("found symbol %s in composite %s, weight: %.3f", + sym.data(), cd->composite->sym.c_str(), ms->score); + + /* Now check options */ + for (const auto &cur_opt: atom->opts) { + struct rspamd_symbol_option *opt; + auto found = false; + + DL_FOREACH(ms->opts_head, opt) + { + if (cur_opt.match_opt({opt->option, opt->optlen})) { + found = true; + break; + } + } + + if (!found) { + auto pat = cur_opt.get_pat(); + msg_debug_composites("symbol %s in composite %s misses required option %*s", + sym.data(), + cd->composite->sym.c_str(), + (int) pat.size(), pat.data()); + ms = nullptr; + + break; + } + } + + if (ms) { + if (ms->score == 0) { + rc = epsilon * 16.0; /* Distinguish from 0 */ + } + else { + rc = ms->score; + } + } + } + + *pms = ms; + return rc; +} + +static auto +rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double +{ + struct composites_data *cd = (struct composites_data *) ud; + struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data; + + struct rspamd_symbol_result *ms = NULL; + struct rspamd_task *task = cd->task; + gdouble rc = 0; + + if (cd->checked[cd->composite->id * 2]) { + /* We have already checked this composite, so just return its value */ + if (cd->checked[cd->composite->id * 2 + 1]) { + ms = rspamd_task_find_symbol_result(cd->task, + comp_atom->norm_symbol.data(), + cd->metric_res); + } + + if (ms) { + if (ms->score == 0) { + rc = epsilon; /* Distinguish from 0 */ + } + else { + /* Treat negative and positive scores equally... */ + rc = fabs(ms->score); + } + } + + msg_debug_composites("composite %s is already checked, result: %.2f", + cd->composite->sym.c_str(), rc); + + return rc; + } + + /* Note: sym is zero terminated as it is a view on std::string */ + auto sym = comp_atom->norm_symbol; + auto group_process_functor = [&](auto cond, int sub_start) -> double { + auto max = 0.; + GHashTableIter it; + gpointer k, v; + struct rspamd_symbols_group *gr; + + gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups, + sym.substr(sub_start).data()); + + if (gr != nullptr) { + g_hash_table_iter_init(&it, gr->symbols); + + while (g_hash_table_iter_next(&it, &k, &v)) { + auto *sdef = (rspamd_symbol *) v; + + if (cond(sdef->score)) { + rc = process_single_symbol(cd, + std::string_view(sdef->name), + &ms, + comp_atom); + + if (fabs(rc) > epsilon) { + process_symbol_removal(atom, + cd, + ms, + comp_atom->symbol); + + if (fabs(rc) > max) { + max = fabs(rc); + } + } + } + } + } + + return max; + }; + + if (sym.size() > 2) { + if (sym.substr(0, 2) == "g:") { + rc = group_process_functor([](auto _) { return true; }, 2); + } + else if (sym.substr(0, 3) == "g+:") { + /* Group, positive symbols only */ + rc = group_process_functor([](auto sc) { return sc > 0.; }, 3); + } + else if (sym.substr(0, 3) == "g-:") { + rc = group_process_functor([](auto sc) { return sc < 0.; }, 3); + } + else { + rc = process_single_symbol(cd, sym, &ms, comp_atom); + + if (fabs(rc) > epsilon) { + process_symbol_removal(atom, + cd, + ms, + comp_atom->symbol); + } + } + } + else { + rc = process_single_symbol(cd, sym, &ms, comp_atom); + + if (fabs(rc) > epsilon) { + process_symbol_removal(atom, + cd, + ms, + comp_atom->symbol); + } + } + + msg_debug_composites("%s: result for atom %s in composite %s is %.4f", + cd->metric_res->name, + comp_atom->norm_symbol.data(), + cd->composite->sym.c_str(), rc); + + return rc; +} + +/* + * We don't have preferences for composites + */ +static gint +rspamd_composite_expr_priority(rspamd_expression_atom_t *atom) +{ + return 0; +} + +static void +rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom) +{ + rspamd_composite_atom_dtor(atom->data); +} + +static void +composites_foreach_callback(gpointer key, gpointer value, void *data) +{ + auto *cd = (struct composites_data *) data; + auto *comp = (struct rspamd_composite *) value; + auto *str_key = (const gchar *) key; + struct rspamd_task *task; + gdouble rc; + + cd->composite = comp; + task = cd->task; + + msg_debug_composites("process composite %s", str_key); + + if (!cd->checked[cd->composite->id * 2]) { + if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache, + str_key)) { + msg_debug_composites("composite %s is checked in symcache but not " + "in composites bitfield", + cd->composite->sym.c_str()); + cd->checked[comp->id * 2] = true; + cd->checked[comp->id * 2 + 1] = false; + } + else { + if (rspamd_task_find_symbol_result(cd->task, str_key, + cd->metric_res) != nullptr) { + /* Already set, no need to check */ + msg_debug_composites("composite %s is already in metric " + "in composites bitfield", + cd->composite->sym.c_str()); + cd->checked[comp->id * 2] = true; + cd->checked[comp->id * 2 + 1] = true; + + return; + } + + msg_debug_composites("%s: start processing composite %s", + cd->metric_res->name, + cd->composite->sym.c_str()); + + rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, + cd); + + /* Checked bit */ + cd->checked[comp->id * 2] = true; + + msg_debug_composites("%s: final result for composite %s is %.4f", + cd->metric_res->name, + cd->composite->sym.c_str(), rc); + + /* Result bit */ + if (fabs(rc) > epsilon) { + cd->checked[comp->id * 2 + 1] = true; + rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL, + RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res); + } + else { + cd->checked[comp->id * 2 + 1] = false; + } + } + } +} + + +static auto +remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void +{ + struct rspamd_task *task = cd.task; + gboolean skip = FALSE, + has_valid_op = FALSE, + want_remove_score = TRUE, + want_remove_symbol = TRUE, + want_forced = FALSE; + const gchar *disable_score_reason = "no policy", + *disable_symbol_reason = "no policy"; + + task = cd.task; + + for (const auto &cur: rd) { + if (!cd.checked[cur.comp->id * 2 + 1]) { + continue; + } + /* + * First of all exclude all elements with any parent that is negation: + * !A || B -> here we can have both !A and B matched, but we do *NOT* + * want to remove symbol in that case + */ + auto *par = cur.parent; + skip = FALSE; + + while (par) { + if (rspamd_expression_node_is_op(par, OP_NOT)) { + skip = TRUE; + break; + } + + par = par->parent; + } + + if (skip) { + continue; + } + + has_valid_op = TRUE; + /* + * Now we can try to remove symbols/scores + * + * We apply the following logic here: + * - if no composites would like to save score then we remove score + * - if no composites would like to save symbol then we remove symbol + */ + if (!want_forced) { + if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) { + want_remove_symbol = FALSE; + disable_symbol_reason = cur.comp->sym.c_str(); + } + + if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) { + want_remove_score = FALSE; + disable_score_reason = cur.comp->sym.c_str(); + } + + if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) { + want_forced = TRUE; + disable_symbol_reason = cur.comp->sym.c_str(); + disable_score_reason = cur.comp->sym.c_str(); + } + } + } + + auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res); + + if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) { + + if (want_remove_score || want_forced) { + msg_debug_composites("%s: %s remove symbol weight for %s (was %.2f), " + "score removal affected by %s, symbol removal affected by %s", + cd.metric_res->name, + (want_forced ? "forced" : "normal"), rd.front().sym, ms->score, + disable_score_reason, disable_symbol_reason); + cd.metric_res->score -= ms->score; + ms->score = 0.0; + } + + if (want_remove_symbol || want_forced) { + ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED; + msg_debug_composites("%s: %s remove symbol %s (score %.2f), " + "score removal affected by %s, symbol removal affected by %s", + cd.metric_res->name, + (want_forced ? "forced" : "normal"), rd.front().sym, ms->score, + disable_score_reason, disable_symbol_reason); + } + } +} + +static void +composites_metric_callback(struct rspamd_task *task) +{ + std::vector<composites_data> comp_data_vec; + struct rspamd_scan_result *mres; + + comp_data_vec.reserve(1); + + DL_FOREACH(task->result, mres) + { + auto &cd = comp_data_vec.emplace_back(task, mres); + + /* Process metric result */ + rspamd_symcache_composites_foreach(task, + task->cfg->cache, + composites_foreach_callback, + &cd); + } + + for (const auto &cd: comp_data_vec) { + /* Remove symbols that are in composites */ + for (const auto &srd_it: cd.symbols_to_remove) { + remove_symbols(cd, srd_it.second); + } + } +} + +}// namespace rspamd::composites + + +void rspamd_composites_process_task(struct rspamd_task *task) +{ + if (task->result && !RSPAMD_TASK_IS_SKIPPED(task)) { + rspamd::composites::composites_metric_callback(task); + } +} |