diff options
Diffstat (limited to 'lualib/lua_cfg_transform.lua')
-rw-r--r-- | lualib/lua_cfg_transform.lua | 634 |
1 files changed, 634 insertions, 0 deletions
diff --git a/lualib/lua_cfg_transform.lua b/lualib/lua_cfg_transform.lua new file mode 100644 index 0000000..d6243ad --- /dev/null +++ b/lualib/lua_cfg_transform.lua @@ -0,0 +1,634 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local logger = require "rspamd_logger" +local lua_util = require "lua_util" +local rspamd_util = require "rspamd_util" +local fun = require "fun" + +local function is_implicit(t) + local mt = getmetatable(t) + + return mt and mt.class and mt.class == 'ucl.type.impl_array' +end + +local function metric_pairs(t) + -- collect the keys + local keys = {} + local implicit_array = is_implicit(t) + + local function gen_keys(tbl) + if implicit_array then + for _, v in ipairs(tbl) do + if v.name then + table.insert(keys, { v.name, v }) + v.name = nil + else + -- Very tricky to distinguish: + -- group {name = "foo" ... } + group "blah" { ... } + for gr_name, gr in pairs(v) do + if type(gr_name) ~= 'number' then + -- We can also have implicit arrays here + local gr_implicit = is_implicit(gr) + + if gr_implicit then + for _, gr_elt in ipairs(gr) do + table.insert(keys, { gr_name, gr_elt }) + end + else + table.insert(keys, { gr_name, gr }) + end + end + end + end + end + else + if tbl.name then + table.insert(keys, { tbl.name, tbl }) + tbl.name = nil + else + for k, v in pairs(tbl) do + if type(k) ~= 'number' then + -- We can also have implicit arrays here + local sym_implicit = is_implicit(v) + + if sym_implicit then + for _, elt in ipairs(v) do + table.insert(keys, { k, elt }) + end + else + table.insert(keys, { k, v }) + end + end + end + end + end + end + + gen_keys(t) + + -- return the iterator function + local i = 0 + return function() + i = i + 1 + if keys[i] then + return keys[i][1], keys[i][2] + end + end +end + +local function group_transform(cfg, k, v) + if v.name then + k = v.name + end + + local new_group = { + symbols = {} + } + + if v.enabled then + new_group.enabled = v.enabled + end + if v.disabled then + new_group.disabled = v.disabled + end + if v.max_score then + new_group.max_score = v.max_score + end + + if v.symbol then + for sk, sv in metric_pairs(v.symbol) do + if sv.name then + sk = sv.name + sv.name = nil -- Remove field + end + + new_group.symbols[sk] = sv + end + end + + if not cfg.group then + cfg.group = {} + end + + if cfg.group[k] then + cfg.group[k] = lua_util.override_defaults(cfg.group[k], new_group) + else + cfg.group[k] = new_group + end + + logger.infox("overriding group %s from the legacy metric settings", k) +end + +local function symbol_transform(cfg, k, v) + -- first try to find any group where there is a definition of this symbol + for gr_n, gr in pairs(cfg.group) do + if gr.symbols and gr.symbols[k] then + -- We override group symbol with ungrouped symbol + logger.infox("overriding group symbol %s in the group %s", k, gr_n) + gr.symbols[k] = lua_util.override_defaults(gr.symbols[k], v) + return + end + end + -- Now check what Rspamd knows about this symbol + local sym = rspamd_config:get_symbol(k) + + if not sym or not sym.group then + -- Otherwise we just use group 'ungrouped' + if not cfg.group.ungrouped then + cfg.group.ungrouped = { + symbols = {} + } + end + + cfg.group.ungrouped.symbols[k] = v + logger.debugx("adding symbol %s to the group 'ungrouped'", k) + end +end + +local function test_groups(groups) + for gr_name, gr in pairs(groups) do + if not gr.symbols then + local cnt = 0 + for _, _ in pairs(gr) do + cnt = cnt + 1 + end + + if cnt == 0 then + logger.debugx('group %s is empty', gr_name) + else + logger.infox('group %s has no symbols', gr_name) + end + end + end +end + +local function convert_metric(cfg, metric) + if metric.actions then + cfg.actions = lua_util.override_defaults(cfg.actions, metric.actions) + logger.infox("overriding actions from the legacy metric settings") + end + if metric.unknown_weight then + cfg.actions.unknown_weight = metric.unknown_weight + end + + if metric.subject then + logger.infox("overriding subject from the legacy metric settings") + cfg.actions.subject = metric.subject + end + + if metric.group then + for k, v in metric_pairs(metric.group) do + group_transform(cfg, k, v) + end + else + if not cfg.group then + cfg.group = { + ungrouped = { + symbols = {} + } + } + end + end + + if metric.symbol then + for k, v in metric_pairs(metric.symbol) do + symbol_transform(cfg, k, v) + end + end + + return cfg +end + +-- Converts a table of groups indexed by number (implicit array) to a +-- merged group definition +local function merge_groups(groups) + local ret = {} + for k, gr in pairs(groups) do + if type(k) == 'number' then + for key, sec in pairs(gr) do + ret[key] = sec + end + else + ret[k] = gr + end + end + + return ret +end + +-- Checks configuration files for statistics +local function check_statistics_sanity() + local local_conf = rspamd_paths['LOCAL_CONFDIR'] + local local_stat = string.format('%s/local.d/%s', local_conf, + 'statistic.conf') + local local_bayes = string.format('%s/local.d/%s', local_conf, + 'classifier-bayes.conf') + + if rspamd_util.file_exists(local_stat) and + rspamd_util.file_exists(local_bayes) then + logger.warnx(rspamd_config, 'conflicting files %s and %s are found: ' .. + 'Rspamd classifier configuration might be broken!', local_stat, local_bayes) + end +end + +-- Converts surbl module config to rbl module +local function surbl_section_convert(cfg, section) + local rbl_section = cfg.rbl.rbls + local wl = section.whitelist + for name, value in pairs(section.rules or {}) do + if rbl_section[name] then + logger.warnx(rspamd_config, 'conflicting names in surbl and rbl rules: %s, prefer surbl rule!', + name) + end + local converted = { + urls = true, + ignore_defaults = true, + } + + if wl then + converted.whitelist = wl + end + + for k, v in pairs(value) do + local skip = false + -- Rename + if k == 'suffix' then + k = 'rbl' + end + if k == 'ips' then + k = 'returncodes' + end + if k == 'bits' then + k = 'returnbits' + end + if k == 'noip' then + k = 'no_ip' + end + -- Crappy legacy + if k == 'options' then + if v == 'noip' or v == 'no_ip' then + converted.no_ip = true + skip = true + end + end + if k:match('check_') then + local n = k:match('check_(.*)') + k = n + end + + if k == 'dkim' and v then + converted.dkim_domainonly = false + converted.dkim_match_from = true + end + + if k == 'emails' and v then + -- To match surbl behaviour + converted.emails_domainonly = true + end + + if not skip then + converted[k] = lua_util.deepcopy(v) + end + end + rbl_section[name] = lua_util.override_defaults(rbl_section[name], converted) + end +end + +-- Converts surbl module config to rbl module +local function emails_section_convert(cfg, section) + local rbl_section = cfg.rbl.rbls + local wl = section.whitelist + for name, value in pairs(section.rules or {}) do + if rbl_section[name] then + logger.warnx(rspamd_config, 'conflicting names in emails and rbl rules: %s, prefer emails rule!', + name) + end + local converted = { + emails = true, + ignore_defaults = true, + } + + if wl then + converted.whitelist = wl + end + + for k, v in pairs(value) do + local skip = false + -- Rename + if k == 'dnsbl' then + k = 'rbl' + end + if k == 'check_replyto' then + k = 'replyto' + end + if k == 'hashlen' then + k = 'hash_len' + end + if k == 'encoding' then + k = 'hash_format' + end + if k == 'domain_only' then + k = 'emails_domainonly' + end + if k == 'delimiter' then + k = 'emails_delimiter' + end + if k == 'skip_body' then + skip = true + if v then + -- Hack + converted.emails = false + converted.replyto = true + else + converted.emails = true + end + end + if k == 'expect_ip' then + -- Another stupid hack + if not converted.return_codes then + converted.returncodes = {} + end + local symbol = value.symbol or name + converted.returncodes[symbol] = { v } + skip = true + end + + if not skip then + converted[k] = lua_util.deepcopy(v) + end + end + rbl_section[name] = lua_util.override_defaults(rbl_section[name], converted) + end +end + +return function(cfg) + local ret = false + + if cfg['metric'] then + for _, v in metric_pairs(cfg.metric) do + cfg = convert_metric(cfg, v) + end + ret = true + end + + if cfg.symbols then + for k, v in metric_pairs(cfg.symbols) do + symbol_transform(cfg, k, v) + end + end + + check_statistics_sanity() + + if not cfg.actions then + logger.errx('no actions defined') + else + -- Perform sanity check for actions + local actions_defs = { 'no action', 'no_action', -- In case if that's added + 'greylist', 'add header', 'add_header', + 'rewrite subject', 'rewrite_subject', 'quarantine', + 'reject', 'discard' } + + if not cfg.actions['no action'] and not cfg.actions['no_action'] and + not cfg.actions['accept'] then + for _, d in ipairs(actions_defs) do + if cfg.actions[d] then + + local action_score = nil + if type(cfg.actions[d]) == 'number' then + action_score = cfg.actions[d] + elseif type(cfg.actions[d]) == 'table' and cfg.actions[d]['score'] then + action_score = cfg.actions[d]['score'] + end + + if type(cfg.actions[d]) ~= 'table' and not action_score then + cfg.actions[d] = nil + elseif type(action_score) == 'number' and action_score < 0 then + cfg.actions['no_action'] = cfg.actions[d] - 0.001 + logger.infox(rspamd_config, 'set no_action score to: %s, as action %s has negative score', + cfg.actions['no_action'], d) + break + end + end + end + end + + local actions_set = lua_util.list_to_hash(actions_defs) + + -- Now check actions section for garbage + actions_set['unknown_weight'] = true + actions_set['grow_factor'] = true + actions_set['subject'] = true + + for k, _ in pairs(cfg.actions) do + if not actions_set[k] then + logger.warnx(rspamd_config, 'unknown element in actions section: %s', k) + end + end + + -- Performs thresholds sanity + -- We exclude greylist here as it can be set to whatever threshold in practice + local actions_order = { + 'no_action', + 'add_header', + 'rewrite_subject', + 'quarantine', + 'reject', + 'discard' + } + for i = 1, (#actions_order - 1) do + local act = actions_order[i] + + if cfg.actions[act] and type(cfg.actions[act]) == 'number' then + local score = cfg.actions[act] + + for j = i + 1, #actions_order do + local next_act = actions_order[j] + if cfg.actions[next_act] and type(cfg.actions[next_act]) == 'number' then + local next_score = cfg.actions[next_act] + if next_score <= score then + logger.errx(rspamd_config, 'invalid actions thresholds order: action %s (%s) must have lower ' .. + 'score than action %s (%s)', act, score, next_act, next_score) + ret = false + end + end + end + end + end + end + + if not cfg.group then + logger.errx('no symbol groups defined') + else + if cfg.group[1] then + -- We need to merge groups + cfg.group = merge_groups(cfg.group) + ret = true + end + test_groups(cfg.group) + end + + -- Deal with dkim settings + if not cfg.dkim then + cfg.dkim = {} + else + if cfg.dkim.sign_condition then + -- We have an obsoleted sign condition, so we need to either add dkim_signing and move it + -- there or just move sign condition there... + if not cfg.dkim_signing then + logger.warnx('obsoleted DKIM signing method used, converting it to "dkim_signing" module') + cfg.dkim_signing = { + sign_condition = cfg.dkim.sign_condition + } + else + if not cfg.dkim_signing.sign_condition then + logger.warnx('obsoleted DKIM signing method used, move it to "dkim_signing" module') + cfg.dkim_signing.sign_condition = cfg.dkim.sign_condition + else + logger.warnx('obsoleted DKIM signing method used, ignore it as "dkim_signing" also defines condition!') + end + end + end + end + + -- Again: legacy stuff :( + if not cfg.dkim.sign_headers then + local sec = cfg.dkim_signing + if sec and sec[1] then + sec = cfg.dkim_signing[1] + end + + if sec and sec.sign_headers then + cfg.dkim.sign_headers = sec.sign_headers + end + end + + -- DKIM signing/ARC legacy + for _, mod in ipairs({ 'dkim_signing', 'arc' }) do + if cfg[mod] then + if cfg[mod].auth_only ~= nil then + if cfg[mod].sign_authenticated ~= nil then + logger.warnx(rspamd_config, + 'both auth_only (%s) and sign_authenticated (%s) for %s are specified, prefer auth_only', + cfg[mod].auth_only, cfg[mod].sign_authenticated, mod) + end + cfg[mod].sign_authenticated = cfg[mod].auth_only + end + end + end + + if cfg.dkim and cfg.dkim.sign_headers and type(cfg.dkim.sign_headers) == 'table' then + -- Flatten + cfg.dkim.sign_headers = table.concat(cfg.dkim.sign_headers, ':') + end + + -- Try to find some obvious issues with configuration + for k, v in pairs(cfg) do + if type(v) == 'table' and v[k] and type(v[k]) == 'table' then + logger.errx('nested section: %s { %s { ... } }, it is likely a configuration error', + k, k) + end + end + + -- If neural network is enabled we MUST have `check_all_filters` flag + if cfg.neural then + if not cfg.options then + cfg.options = {} + end + + if not cfg.options.check_all_filters then + logger.infox(rspamd_config, 'enable `options.check_all_filters` for neural network') + cfg.options.check_all_filters = true + end + end + + -- Deal with IP_SCORE + if cfg.ip_score and (cfg.ip_score.servers or cfg.redis.servers) then + logger.warnx(rspamd_config, 'ip_score module is deprecated in honor of reputation module!') + + if not cfg.reputation then + cfg.reputation = { + rules = {} + } + end + + if not cfg.reputation.rules then + cfg.reputation.rules = {} + end + + if not fun.any(function(_, v) + return v.selector and v.selector.ip + end, + cfg.reputation.rules) then + logger.infox(rspamd_config, 'attach ip reputation element to use it') + + cfg.reputation.rules.ip_score = { + selector = { + ip = {}, + }, + backend = { + redis = {}, + } + } + + if cfg.ip_score.servers then + cfg.reputation.rules.ip_score.backend.redis.servers = cfg.ip_score.servers + end + + if cfg.symbols and cfg.symbols['IP_SCORE'] then + local t = cfg.symbols['IP_SCORE'] + + if not cfg.symbols['SENDER_REP_SPAM'] then + cfg.symbols['SENDER_REP_SPAM'] = t + cfg.symbols['SENDER_REP_HAM'] = t + cfg.symbols['SENDER_REP_HAM'].weight = -(t.weight or 0) + end + end + else + logger.infox(rspamd_config, 'ip reputation already exists, do not do any IP_SCORE transforms') + end + end + + if cfg.surbl then + if not cfg.rbl then + cfg.rbl = { + rbls = {} + } + end + if not cfg.rbl.rbls then + cfg.rbl.rbls = {} + end + surbl_section_convert(cfg, cfg.surbl) + logger.infox(rspamd_config, 'converted surbl rules to rbl rules') + cfg.surbl = {} + end + + if cfg.emails then + if not cfg.rbl then + cfg.rbl = { + rbls = {} + } + end + if not cfg.rbl.rbls then + cfg.rbl.rbls = {} + end + emails_section_convert(cfg, cfg.emails) + logger.infox(rspamd_config, 'converted emails rules to rbl rules') + cfg.emails = {} + end + + return ret, cfg +end |