summaryrefslogtreecommitdiffstats
path: root/src/plugins/lua/spamassassin.lua
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/lua/spamassassin.lua')
-rw-r--r--src/plugins/lua/spamassassin.lua1774
1 files changed, 1774 insertions, 0 deletions
diff --git a/src/plugins/lua/spamassassin.lua b/src/plugins/lua/spamassassin.lua
new file mode 100644
index 0000000..3ea7944
--- /dev/null
+++ b/src/plugins/lua/spamassassin.lua
@@ -0,0 +1,1774 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+if confighelp then
+ return
+end
+
+-- This plugin is intended to read and parse spamassassin rules with regexp
+-- rules. SA plugins or statistics are not supported
+
+local E = {}
+local N = 'spamassassin'
+
+local rspamd_logger = require "rspamd_logger"
+local rspamd_regexp = require "rspamd_regexp"
+local rspamd_expression = require "rspamd_expression"
+local rspamd_trie = require "rspamd_trie"
+local util = require "rspamd_util"
+local lua_util = require "lua_util"
+local fun = require "fun"
+
+-- Known plugins
+local known_plugins = {
+ 'Mail::SpamAssassin::Plugin::FreeMail',
+ 'Mail::SpamAssassin::Plugin::HeaderEval',
+ 'Mail::SpamAssassin::Plugin::ReplaceTags',
+ 'Mail::SpamAssassin::Plugin::RelayEval',
+ 'Mail::SpamAssassin::Plugin::MIMEEval',
+ 'Mail::SpamAssassin::Plugin::BodyEval',
+ 'Mail::SpamAssassin::Plugin::MIMEHeader',
+ 'Mail::SpamAssassin::Plugin::WLBLEval',
+ 'Mail::SpamAssassin::Plugin::HTMLEval',
+}
+
+-- Table that replaces SA symbol with rspamd equivalent
+-- Used for dependency resolution
+local symbols_replacements = {
+ -- SPF replacements
+ USER_IN_SPF_WHITELIST = 'WHITELIST_SPF',
+ USER_IN_DEF_SPF_WL = 'WHITELIST_SPF',
+ SPF_PASS = 'R_SPF_ALLOW',
+ SPF_FAIL = 'R_SPF_FAIL',
+ SPF_SOFTFAIL = 'R_SPF_SOFTFAIL',
+ SPF_HELO_PASS = 'R_SPF_ALLOW',
+ SPF_HELLO_FAIL = 'R_SPF_FAIL',
+ SPF_HELLO_SOFTFAIL = 'R_SPF_SOFTFAIL',
+ -- DKIM replacements
+ USER_IN_DKIM_WHITELIST = 'WHITELIST_DKIM',
+ USER_IN_DEF_DKIM_WL = 'WHITELIST_DKIM',
+ DKIM_VALID = 'R_DKIM_ALLOW',
+ -- SURBL replacements
+ URIBL_SBL_A = 'URIBL_SBL',
+ URIBL_DBL_SPAM = 'DBL_SPAM',
+ URIBL_DBL_PHISH = 'DBL_PHISH',
+ URIBL_DBL_MALWARE = 'DBL_MALWARE',
+ URIBL_DBL_BOTNETCC = 'DBL_BOTNET',
+ URIBL_DBL_ABUSE_SPAM = 'DBL_ABUSE',
+ URIBL_DBL_ABUSE_REDIR = 'DBL_ABUSE_REDIR',
+ URIBL_DBL_ABUSE_MALW = 'DBL_ABUSE_MALWARE',
+ URIBL_DBL_ABUSE_BOTCC = 'DBL_ABUSE_BOTNET',
+ URIBL_WS_SURBL = 'WS_SURBL_MULTI',
+ URIBL_PH_SURBL = 'PH_SURBL_MULTI',
+ URIBL_MW_SURBL = 'MW_SURBL_MULTI',
+ URIBL_CR_SURBL = 'CRACKED_SURBL',
+ URIBL_ABUSE_SURBL = 'ABUSE_SURBL',
+ -- Misc rules
+ BODY_URI_ONLY = 'R_EMPTY_IMAGE',
+ HTML_IMAGE_ONLY_04 = 'HTML_SHORT_LINK_IMG_1',
+ HTML_IMAGE_ONLY_08 = 'HTML_SHORT_LINK_IMG_1',
+ HTML_IMAGE_ONLY_12 = 'HTML_SHORT_LINK_IMG_1',
+ HTML_IMAGE_ONLY_16 = 'HTML_SHORT_LINK_IMG_2',
+ HTML_IMAGE_ONLY_20 = 'HTML_SHORT_LINK_IMG_2',
+ HTML_IMAGE_ONLY_24 = 'HTML_SHORT_LINK_IMG_3',
+ HTML_IMAGE_ONLY_28 = 'HTML_SHORT_LINK_IMG_3',
+ HTML_IMAGE_ONLY_32 = 'HTML_SHORT_LINK_IMG_3',
+}
+
+-- Internal variables
+local rules = {}
+local atoms = {}
+local scores = {}
+local scores_added = {}
+local external_deps = {}
+local freemail_domains = {}
+local pcre_only_regexps = {}
+local freemail_trie
+local replace = {
+ tags = {},
+ pre = {},
+ inter = {},
+ post = {},
+ rules = {},
+}
+local internal_regexp = {
+ date_shift = rspamd_regexp.create("^\\(\\s*'((?:-?\\d+)|(?:undef))'\\s*,\\s*'((?:-?\\d+)|(?:undef))'\\s*\\)$")
+}
+
+-- Mail::SpamAssassin::Plugin::WLBLEval plugin
+local sa_lists = {
+ from_blacklist = {},
+ from_whitelist = {},
+ from_def_whitelist = {},
+ to_blacklist = {},
+ to_whitelist = {},
+ elts = 0,
+}
+
+local func_cache = {}
+local section = rspamd_config:get_all_opt("spamassassin")
+if not (section and type(section) == 'table') then
+ rspamd_logger.infox(rspamd_config, 'Module is unconfigured')
+end
+
+-- Minimum score to treat symbols as meta
+local meta_score_alpha = 0.5
+
+-- Maximum size of regexp checked
+local match_limit = 0
+
+-- Default priority of the scores registered in the metric
+-- Historically this is set to 2 allowing SA scores to override Rspamd scores
+local scores_priority = 2
+
+local function split(str, delim)
+ local result = {}
+
+ if not delim then
+ delim = '[^%s]+'
+ end
+
+ for token in string.gmatch(str, delim) do
+ table.insert(result, token)
+ end
+
+ return result
+end
+
+local function replace_symbol(s)
+ local rspamd_symbol = symbols_replacements[s]
+ if not rspamd_symbol then
+ return s, false
+ end
+ return rspamd_symbol, true
+end
+
+local ffi
+if type(jit) == 'table' then
+ ffi = require("ffi")
+ ffi.cdef [[
+ int rspamd_re_cache_type_from_string (const char *str);
+ int rspamd_re_cache_process_ffi (void *ptask,
+ void *pre,
+ int type,
+ const char *type_data,
+ int is_strong);
+]]
+end
+
+local function process_regexp_opt(re, task, re_type, header, strong)
+ --[[
+ -- This is now broken with lua regexp conditions!
+ if type(jit) == 'table' then
+ -- Use ffi call
+ local itype = ffi.C.rspamd_re_cache_type_from_string(re_type)
+
+ if not strong then
+ strong = 0
+ else
+ strong = 1
+ end
+ local iret = ffi.C.rspamd_re_cache_process_ffi (task, re, itype, header, strong)
+
+ return tonumber(iret)
+ else
+ return task:process_regexp(re, re_type, header, strong)
+ end
+ --]]
+ return task:process_regexp(re, re_type, header, strong)
+end
+
+local function is_pcre_only(name)
+ if pcre_only_regexps[name] then
+ rspamd_logger.infox(rspamd_config, 'mark re %s as PCRE only', name)
+ return true
+ end
+ return false
+end
+
+local function handle_header_def(hline, cur_rule)
+ --Now check for modifiers inside header's name
+ local hdrs = split(hline, '[^|]+')
+ local hdr_params = {}
+ local cur_param = {}
+ -- Check if an re is an ordinary re
+ local ordinary = true
+
+ for _, h in ipairs(hdrs) do
+ if h == 'ALL' or h == 'ALL:raw' then
+ ordinary = false
+ cur_rule['type'] = 'function'
+ -- Pack closure
+ local re = cur_rule['re']
+ -- Rule to match all headers
+ rspamd_config:register_regexp({
+ re = re,
+ type = 'allheader',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ cur_rule['function'] = function(task)
+ if not re then
+ rspamd_logger.errx(task, 're is missing for rule %1', h)
+ return 0
+ end
+
+ return process_regexp_opt(re, task, 'allheader')
+ end
+ else
+ local args = split(h, '[^:]+')
+ cur_param['strong'] = false
+ cur_param['raw'] = false
+ cur_param['header'] = args[1]
+
+ if args[2] then
+ -- We have some ops that are required for the header, so it's not ordinary
+ ordinary = false
+ end
+
+ fun.each(function(func)
+ if func == 'addr' then
+ cur_param['function'] = function(str)
+ local addr_parsed = util.parse_mail_address(str)
+ local ret = {}
+ if addr_parsed then
+ for _, elt in ipairs(addr_parsed) do
+ if elt['addr'] then
+ table.insert(ret, elt['addr'])
+ end
+ end
+ end
+
+ return ret
+ end
+ elseif func == 'name' then
+ cur_param['function'] = function(str)
+ local addr_parsed = util.parse_mail_address(str)
+ local ret = {}
+ if addr_parsed then
+ for _, elt in ipairs(addr_parsed) do
+ if elt['name'] then
+ table.insert(ret, elt['name'])
+ end
+ end
+ end
+
+ return ret
+ end
+ elseif func == 'raw' then
+ cur_param['raw'] = true
+ elseif func == 'case' then
+ cur_param['strong'] = true
+ else
+ rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
+ func, cur_rule['symbol'])
+ end
+ end, fun.tail(args))
+
+ local function split_hdr_param(param, headers)
+ for _, hh in ipairs(headers) do
+ local nparam = {}
+ for k, v in pairs(param) do
+ if k ~= 'header' then
+ nparam[k] = v
+ end
+ end
+
+ nparam['header'] = hh
+ table.insert(hdr_params, nparam)
+ end
+ end
+ -- Some header rules require splitting to check of multiple headers
+ if cur_param['header'] == 'MESSAGEID' then
+ -- Special case for spamassassin
+ ordinary = false
+ split_hdr_param(cur_param, {
+ 'Message-ID',
+ 'X-Message-ID',
+ 'Resent-Message-ID' })
+ elseif cur_param['header'] == 'ToCc' then
+ ordinary = false
+ split_hdr_param(cur_param, { 'To', 'Cc', 'Bcc' })
+ else
+ table.insert(hdr_params, cur_param)
+ end
+ end
+
+ cur_rule['ordinary'] = ordinary
+ cur_rule['header'] = hdr_params
+ end
+end
+
+local function freemail_search(input)
+ local res = 0
+ local function trie_callback(number, pos)
+ lua_util.debugm(N, rspamd_config, 'Matched pattern %1 at pos %2', freemail_domains[number], pos)
+ res = res + 1
+ end
+
+ if input then
+ freemail_trie:match(input, trie_callback, true)
+ end
+
+ return res
+end
+
+local function gen_eval_rule(arg)
+ local eval_funcs = {
+ { 'check_freemail_from', function(task)
+ local from = task:get_from('mime')
+ if from and from[1] then
+ return freemail_search(string.lower(from[1]['addr']))
+ end
+ return 0
+ end },
+ { 'check_freemail_replyto',
+ function(task)
+ return freemail_search(task:get_header('Reply-To'))
+ end
+ },
+ { 'check_freemail_header',
+ function(task, remain)
+ -- Remain here contains one or two args: header and regexp to match
+ local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
+ local re = nil
+ if not larg then
+ larg, re = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*,%s*['\"]([^%s]+)['\"]%s*%)$")
+ end
+
+ if larg then
+ local h
+ if larg == 'EnvelopeFrom' then
+ h = task:get_from('smtp')
+ if h then
+ h = h[1]['addr']
+ end
+ else
+ h = task:get_header(larg)
+ end
+ if h then
+ local hdr_freemail = freemail_search(string.lower(h))
+
+ if hdr_freemail > 0 and re then
+ local r = rspamd_regexp.create_cached(re)
+ if r then
+ if r:match(h) then
+ return 1
+ end
+ return 0
+ else
+ rspamd_logger.infox(rspamd_config, 'cannot create regexp %1', re)
+ return 0
+ end
+ end
+
+ return hdr_freemail
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_for_missing_to_header',
+ function(task)
+ local th = task:get_recipients('mime')
+ if not th or #th == 0 then
+ return 1
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_relays_unparseable',
+ function(task)
+ local rh_mime = task:get_header_full('Received')
+ local rh_parsed = task:get_received_headers()
+
+ local rh_cnt = 0
+ if rh_mime then
+ rh_cnt = #rh_mime
+ end
+ local parsed_cnt = 0
+ if rh_parsed then
+ parsed_cnt = #rh_parsed
+ end
+
+ return rh_cnt - parsed_cnt
+ end
+ },
+ {
+ 'check_for_shifted_date',
+ function(task, remain)
+ -- Remain here contains two args: start and end hours shift
+ local matches = internal_regexp['date_shift']:search(remain, true, true)
+ if matches and matches[1] then
+ local min_diff = matches[1][2]
+ local max_diff = matches[1][3]
+
+ if min_diff == 'undef' then
+ min_diff = 0
+ else
+ min_diff = tonumber(min_diff) * 3600
+ end
+ if max_diff == 'undef' then
+ max_diff = 0
+ else
+ max_diff = tonumber(max_diff) * 3600
+ end
+
+ -- Now get the difference between Date and message received date
+ local dm = task:get_date { format = 'message', gmt = true }
+ local dt = task:get_date { format = 'connect', gmt = true }
+ local diff = dm - dt
+
+ if (max_diff == 0 and diff >= min_diff) or
+ (min_diff == 0 and diff <= max_diff) or
+ (diff >= min_diff and diff <= max_diff) then
+ return 1
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_for_mime',
+ function(task, remain)
+ local larg = string.match(remain, "^%(%s*['\"]([^%s]+)['\"]%s*%)$")
+
+ if larg then
+ if larg == 'mime_attachment' then
+ local parts = task:get_parts()
+ if parts then
+ for _, p in ipairs(parts) do
+ if p:get_filename() then
+ return 1
+ end
+ end
+ end
+ else
+ rspamd_logger.infox(task, 'unimplemented mime check %1', arg)
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_from_in_blacklist',
+ function(task)
+ local from = task:get_from('mime')
+ if ((from or E)[1] or E).addr then
+ if sa_lists['from_blacklist'][string.lower(from[1]['addr'])] then
+ return 1
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_from_in_whitelist',
+ function(task)
+ local from = task:get_from('mime')
+ if ((from or E)[1] or E).addr then
+ if sa_lists['from_whitelist'][string.lower(from[1]['addr'])] then
+ return 1
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_from_in_default_whitelist',
+ function(task)
+ local from = task:get_from('mime')
+ if ((from or E)[1] or E).addr then
+ if sa_lists['from_def_whitelist'][string.lower(from[1]['addr'])] then
+ return 1
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_to_in_blacklist',
+ function(task)
+ local rcpt = task:get_recipients('mime')
+ if rcpt then
+ for _, r in ipairs(rcpt) do
+ if sa_lists['to_blacklist'][string.lower(r['addr'])] then
+ return 1
+ end
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'check_to_in_whitelist',
+ function(task)
+ local rcpt = task:get_recipients('mime')
+ if rcpt then
+ for _, r in ipairs(rcpt) do
+ if sa_lists['to_whitelist'][string.lower(r['addr'])] then
+ return 1
+ end
+ end
+ end
+
+ return 0
+ end
+ },
+ {
+ 'html_tag_exists',
+ function(task, remain)
+ local tp = task:get_text_parts()
+
+ for _, p in ipairs(tp) do
+ if p:is_html() then
+ local hc = p:get_html()
+
+ if hc:has_tag(remain) then
+ return 1
+ end
+ end
+ end
+
+ return 0
+ end
+ }
+ }
+
+ for _, f in ipairs(eval_funcs) do
+ local pat = string.format('^%s', f[1])
+ local first, last = string.find(arg, pat)
+
+ if first then
+ local func_arg = string.sub(arg, last + 1)
+ return function(task)
+ return f[2](task, func_arg)
+ end
+ end
+ end
+end
+
+-- Returns parser function or nil
+local function maybe_parse_sa_function(line)
+ local arg
+ local elts = split(line, '[^:]+')
+ arg = elts[2]
+
+ lua_util.debugm(N, rspamd_config, 'trying to parse SA function %1 with args %2',
+ elts[1], elts[2])
+ local substitutions = {
+ { '^exists:',
+ function(task)
+ -- filter
+ local hdrs_check
+ if arg == 'MESSAGEID' then
+ hdrs_check = {
+ 'Message-ID',
+ 'X-Message-ID',
+ 'Resent-Message-ID'
+ }
+ elseif arg == 'ToCc' then
+ hdrs_check = { 'To', 'Cc', 'Bcc' }
+ else
+ hdrs_check = { arg }
+ end
+
+ for _, h in ipairs(hdrs_check) do
+ if task:has_header(h) then
+ return 1
+ end
+ end
+ return 0
+ end,
+ },
+ { '^eval:',
+ function(task)
+ local func = func_cache[arg]
+ if not func then
+ func = gen_eval_rule(arg)
+ func_cache[arg] = func
+ end
+
+ if not func then
+ rspamd_logger.errx(task, 'cannot find appropriate eval rule for function %1',
+ arg)
+ else
+ return func(task)
+ end
+
+ return 0
+ end
+ },
+ }
+
+ for _, s in ipairs(substitutions) do
+ if string.find(line, s[1]) then
+ return s[2]
+ end
+ end
+
+ return nil
+end
+
+local function words_to_re(words, start)
+ return table.concat(fun.totable(fun.drop_n(start, words)), " ");
+end
+
+local function process_tflags(rule, flags)
+ fun.each(function(flag)
+ if flag == 'publish' then
+ rule['publish'] = true
+ elseif flag == 'multiple' then
+ rule['multiple'] = true
+ elseif string.match(flag, '^maxhits=(%d+)$') then
+ rule['maxhits'] = tonumber(string.match(flag, '^maxhits=(%d+)$'))
+ elseif flag == 'nice' then
+ rule['nice'] = true
+ end
+ end, fun.drop_n(1, flags))
+
+ if rule['re'] then
+ if rule['maxhits'] then
+ rule['re']:set_max_hits(rule['maxhits'])
+ elseif rule['multiple'] then
+ rule['re']:set_max_hits(0)
+ else
+ rule['re']:set_max_hits(1)
+ end
+ end
+end
+
+local function process_replace(words, tbl)
+ local re = words_to_re(words, 2)
+ tbl[words[2]] = re
+end
+
+local function process_sa_conf(f)
+ local cur_rule = {}
+ local valid_rule = false
+
+ local function insert_cur_rule()
+ if cur_rule['type'] ~= 'meta' and cur_rule['publish'] then
+ -- Create meta rule from this rule
+ local nsym = '__fake' .. cur_rule['symbol']
+ local nrule = {
+ type = 'meta',
+ symbol = cur_rule['symbol'],
+ score = cur_rule['score'],
+ meta = nsym,
+ description = cur_rule['description'],
+ }
+ rules[nrule['symbol']] = nrule
+ cur_rule['symbol'] = nsym
+ end
+ -- We have previous rule valid
+ if not cur_rule['symbol'] then
+ rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
+ end
+ rules[cur_rule['symbol']] = cur_rule
+ cur_rule = {}
+ valid_rule = false
+ end
+
+ local function parse_score(words)
+ if #words == 3 then
+ -- score rule <x>
+ lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[3])
+ return tonumber(words[3])
+ elseif #words == 6 then
+ -- score rule <x1> <x2> <x3> <x4>
+ -- we assume here that bayes and network are enabled and select <x4>
+ lua_util.debugm(N, rspamd_config, 'found score for %1: %2', words[2], words[6])
+ return tonumber(words[6])
+ else
+ rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
+ end
+
+ return 0
+ end
+
+ local skip_to_endif = false
+ local if_nested = 0
+ for l in f:lines() do
+ (function()
+ l = lua_util.rspamd_str_trim(l)
+ -- Replace bla=~/re/ with bla =~ /re/ (#2372)
+ l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
+
+ if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
+ return
+ end
+
+ -- Unbalanced if/endif
+ if if_nested < 0 then
+ if_nested = 0
+ end
+ if skip_to_endif then
+ if string.match(l, '^endif') then
+ if_nested = if_nested - 1
+
+ if if_nested == 0 then
+ skip_to_endif = false
+ end
+ elseif string.match(l, '^if') then
+ if_nested = if_nested + 1
+ elseif string.match(l, '^else') then
+ -- Else counterpart for if
+ skip_to_endif = false
+ end
+ return
+ else
+ if string.match(l, '^ifplugin') then
+ local ls = split(l)
+
+ if not fun.any(function(pl)
+ if pl == ls[2] then
+ return true
+ end
+ return false
+ end, known_plugins) then
+ skip_to_endif = true
+ end
+ if_nested = if_nested + 1
+ elseif string.match(l, '^if !plugin%(') then
+ local pname = string.match(l, '^if !plugin%(([A-Za-z:]+)%)')
+ if fun.any(function(pl)
+ if pl == pname then
+ return true
+ end
+ return false
+ end, known_plugins) then
+ skip_to_endif = true
+ end
+ if_nested = if_nested + 1
+ elseif string.match(l, '^if') then
+ -- Unknown if
+ skip_to_endif = true
+ if_nested = if_nested + 1
+ elseif string.match(l, '^else') then
+ -- Else counterpart for if
+ skip_to_endif = true
+ elseif string.match(l, '^endif') then
+ if_nested = if_nested - 1
+ end
+ end
+
+ -- Skip comments
+ local words = fun.totable(fun.take_while(
+ function(w)
+ return string.sub(w, 1, 1) ~= '#'
+ end,
+ fun.filter(function(w)
+ return w ~= ""
+ end,
+ fun.iter(split(l)))))
+
+ if words[1] == "header" or words[1] == 'mimeheader' then
+ -- header SYMBOL Header ~= /regexp/
+ if valid_rule then
+ insert_cur_rule()
+ end
+ if words[4] and (words[4] == '=~' or words[4] == '!~') then
+ cur_rule['type'] = 'header'
+ cur_rule['symbol'] = words[2]
+
+ if words[4] == '!~' then
+ cur_rule['not'] = true
+ end
+
+ cur_rule['re_expr'] = words_to_re(words, 4)
+ local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
+ if unset_comp then
+ -- We have optional part that needs to be processed
+ local unset = string.match(string.sub(cur_rule['re_expr'], unset_comp),
+ '%[if%-unset:%s*([^%]%s]+)]')
+ cur_rule['unset'] = unset
+ -- Cut it down
+ cur_rule['re_expr'] = string.sub(cur_rule['re_expr'], 1, unset_comp - 1)
+ end
+
+ cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
+
+ if not cur_rule['re'] then
+ rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
+ cur_rule['re_expr'], cur_rule['symbol'])
+ else
+ cur_rule['re']:set_max_hits(1)
+ handle_header_def(words[3], cur_rule)
+ end
+
+ if cur_rule['unset'] then
+ cur_rule['ordinary'] = false
+ end
+
+ if words[1] == 'mimeheader' then
+ cur_rule['mime'] = true
+ else
+ cur_rule['mime'] = false
+ end
+
+ if cur_rule['re'] and cur_rule['symbol'] and
+ (cur_rule['header'] or cur_rule['function']) then
+ valid_rule = true
+ cur_rule['re']:set_max_hits(1)
+ if cur_rule['header'] and cur_rule['ordinary'] then
+ for _, h in ipairs(cur_rule['header']) do
+ if type(h) == 'string' then
+ if cur_rule['mime'] then
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'mimeheader',
+ header = h,
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ else
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'header',
+ header = h,
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ end
+ else
+ h['mime'] = cur_rule['mime']
+ if cur_rule['mime'] then
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'mimeheader',
+ header = h['header'],
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ else
+ if h['raw'] then
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'rawheader',
+ header = h['header'],
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ else
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'header',
+ header = h['header'],
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ end
+ end
+ end
+ end
+ cur_rule['re']:set_limit(match_limit)
+ cur_rule['re']:set_max_hits(1)
+ end
+ end
+ else
+ -- Maybe we know the function and can convert it
+ local args = words_to_re(words, 2)
+ local func = maybe_parse_sa_function(args)
+
+ if func then
+ cur_rule['type'] = 'function'
+ cur_rule['symbol'] = words[2]
+ cur_rule['function'] = func
+ valid_rule = true
+ else
+ rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ end
+ end
+ elseif words[1] == "body" then
+ -- body SYMBOL /regexp/
+ if valid_rule then
+ insert_cur_rule()
+ end
+
+ cur_rule['symbol'] = words[2]
+ if words[3] and (string.sub(words[3], 1, 1) == '/'
+ or string.sub(words[3], 1, 1) == 'm') then
+ cur_rule['type'] = 'sabody'
+ cur_rule['re_expr'] = words_to_re(words, 2)
+ cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
+ if cur_rule['re'] then
+
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'sabody',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ valid_rule = true
+ cur_rule['re']:set_limit(match_limit)
+ cur_rule['re']:set_max_hits(1)
+ end
+ else
+ -- might be function
+ local args = words_to_re(words, 2)
+ local func = maybe_parse_sa_function(args)
+
+ if func then
+ cur_rule['type'] = 'function'
+ cur_rule['symbol'] = words[2]
+ cur_rule['function'] = func
+ valid_rule = true
+ else
+ rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ end
+ end
+ elseif words[1] == "rawbody" then
+ -- body SYMBOL /regexp/
+ if valid_rule then
+ insert_cur_rule()
+ end
+
+ cur_rule['symbol'] = words[2]
+ if words[3] and (string.sub(words[3], 1, 1) == '/'
+ or string.sub(words[3], 1, 1) == 'm') then
+ cur_rule['type'] = 'sarawbody'
+ cur_rule['re_expr'] = words_to_re(words, 2)
+ cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
+ if cur_rule['re'] then
+
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'sarawbody',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ valid_rule = true
+ cur_rule['re']:set_limit(match_limit)
+ cur_rule['re']:set_max_hits(1)
+ end
+ else
+ -- might be function
+ local args = words_to_re(words, 2)
+ local func = maybe_parse_sa_function(args)
+
+ if func then
+ cur_rule['type'] = 'function'
+ cur_rule['symbol'] = words[2]
+ cur_rule['function'] = func
+ valid_rule = true
+ else
+ rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ end
+ end
+ elseif words[1] == "full" then
+ -- body SYMBOL /regexp/
+ if valid_rule then
+ insert_cur_rule()
+ end
+
+ cur_rule['symbol'] = words[2]
+
+ if words[3] and (string.sub(words[3], 1, 1) == '/'
+ or string.sub(words[3], 1, 1) == 'm') then
+ cur_rule['type'] = 'message'
+ cur_rule['re_expr'] = words_to_re(words, 2)
+ cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
+ cur_rule['raw'] = true
+ if cur_rule['re'] then
+ valid_rule = true
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'body',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ cur_rule['re']:set_limit(match_limit)
+ cur_rule['re']:set_max_hits(1)
+ end
+ else
+ -- might be function
+ local args = words_to_re(words, 2)
+ local func = maybe_parse_sa_function(args)
+
+ if func then
+ cur_rule['type'] = 'function'
+ cur_rule['symbol'] = words[2]
+ cur_rule['function'] = func
+ valid_rule = true
+ else
+ rspamd_logger.infox(rspamd_config, 'unknown function %1', args)
+ end
+ end
+ elseif words[1] == "uri" then
+ -- uri SYMBOL /regexp/
+ if valid_rule then
+ insert_cur_rule()
+ end
+ cur_rule['type'] = 'uri'
+ cur_rule['symbol'] = words[2]
+ cur_rule['re_expr'] = words_to_re(words, 2)
+ cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
+ if cur_rule['re'] and cur_rule['symbol'] then
+ valid_rule = true
+ rspamd_config:register_regexp({
+ re = cur_rule['re'],
+ type = 'url',
+ pcre_only = is_pcre_only(cur_rule['symbol']),
+ })
+ cur_rule['re']:set_limit(match_limit)
+ cur_rule['re']:set_max_hits(1)
+ end
+ elseif words[1] == "meta" then
+ -- meta SYMBOL expression
+ if valid_rule then
+ insert_cur_rule()
+ end
+ cur_rule['type'] = 'meta'
+ cur_rule['symbol'] = words[2]
+ cur_rule['meta'] = words_to_re(words, 2)
+ if cur_rule['meta'] and cur_rule['symbol']
+ and cur_rule['meta'] ~= '0' then
+ valid_rule = true
+ end
+ elseif words[1] == "describe" and valid_rule then
+ cur_rule['description'] = words_to_re(words, 2)
+ elseif words[1] == "score" then
+ scores[words[2]] = parse_score(words)
+ elseif words[1] == 'freemail_domains' then
+ fun.each(function(dom)
+ table.insert(freemail_domains, '@' .. dom)
+ end, fun.drop_n(1, words))
+ elseif words[1] == 'blacklist_from' then
+ sa_lists['from_blacklist'][words[2]] = 1
+ sa_lists['elts'] = sa_lists['elts'] + 1
+ elseif words[1] == 'whitelist_from' then
+ sa_lists['from_whitelist'][words[2]] = 1
+ sa_lists['elts'] = sa_lists['elts'] + 1
+ elseif words[1] == 'whitelist_to' then
+ sa_lists['to_whitelist'][words[2]] = 1
+ sa_lists['elts'] = sa_lists['elts'] + 1
+ elseif words[1] == 'blacklist_to' then
+ sa_lists['to_blacklist'][words[2]] = 1
+ sa_lists['elts'] = sa_lists['elts'] + 1
+ elseif words[1] == 'tflags' then
+ process_tflags(cur_rule, words)
+ elseif words[1] == 'replace_tag' then
+ process_replace(words, replace['tags'])
+ elseif words[1] == 'replace_pre' then
+ process_replace(words, replace['pre'])
+ elseif words[1] == 'replace_inter' then
+ process_replace(words, replace['inter'])
+ elseif words[1] == 'replace_post' then
+ process_replace(words, replace['post'])
+ elseif words[1] == 'replace_rules' then
+ fun.each(function(r)
+ table.insert(replace['rules'], r)
+ end,
+ fun.drop_n(1, words))
+ end
+ end)()
+ end
+ if valid_rule then
+ insert_cur_rule()
+ end
+end
+
+-- Now check all valid rules and add the according rspamd rules
+
+local function calculate_score(sym, rule)
+ if fun.all(function(c)
+ return c == '_'
+ end, fun.take_n(2, fun.iter(sym))) then
+ return 0.0
+ end
+
+ if rule['nice'] or (rule['score'] and rule['score'] < 0.0) then
+ return -1.0
+ end
+
+ return 1.0
+end
+
+local function add_sole_meta(sym, rule)
+ local r = {
+ type = 'meta',
+ meta = rule['symbol'],
+ score = rule['score'],
+ description = rule['description']
+ }
+ rules[sym] = r
+end
+
+local function sa_regexp_match(data, re, raw, rule)
+ local res = 0
+ if not re then
+ return 0
+ end
+ if rule['multiple'] then
+ local lim = -1
+ if rule['maxhits'] then
+ lim = rule['maxhits']
+ end
+ res = res + re:matchn(data, lim, raw)
+ else
+ if re:match(data, raw) then
+ res = 1
+ end
+ end
+
+ return res
+end
+
+local function apply_replacements(str)
+ local pre = ""
+ local post = ""
+ local inter = ""
+
+ local function check_specific_tag(prefix, s, tbl)
+ local replacement = nil
+ local ret = s
+ fun.each(function(n, t)
+ local ns, matches = string.gsub(s, string.format("<%s%s>", prefix, n), "")
+ if matches > 0 then
+ replacement = t
+ ret = ns
+ end
+ end, tbl)
+
+ return ret, replacement
+ end
+
+ local repl
+ str, repl = check_specific_tag("pre ", str, replace['pre'])
+ if repl then
+ pre = repl
+ end
+ str, repl = check_specific_tag("inter ", str, replace['inter'])
+ if repl then
+ inter = repl
+ end
+ str, repl = check_specific_tag("post ", str, replace['post'])
+ if repl then
+ post = repl
+ end
+
+ -- XXX: ugly hack
+ if inter then
+ str = string.gsub(str, "><", string.format(">%s<", inter))
+ end
+
+ local function replace_all_tags(s)
+ local sstr
+ sstr = s
+ fun.each(function(n, t)
+ local rep = string.format("%s%s%s", pre, t, post)
+ rep = string.gsub(rep, '%%', '%%%%')
+ sstr = string.gsub(sstr, string.format("<%s>", n), rep)
+ end, replace['tags'])
+
+ return sstr
+ end
+
+ local s = replace_all_tags(str)
+
+ if str ~= s then
+ return true, s
+ end
+
+ return false, str
+end
+
+local function parse_atom(str)
+ local atom = table.concat(fun.totable(fun.take_while(function(c)
+ if string.find(', \t()><+!|&\n', c, 1, true) then
+ return false
+ end
+ return true
+ end, fun.iter(str))), '')
+
+ return atom
+end
+
+local function gen_process_atom_cb(result_name, task)
+ return function(atom)
+ local atom_cb = atoms[atom]
+
+ if atom_cb then
+ local res = atom_cb(task, result_name)
+
+ if not res then
+ lua_util.debugm(N, task, 'metric: %s, atom: %s, NULL result', result_name, atom)
+ elseif res > 0 then
+ lua_util.debugm(N, task, 'metric: %s, atom: %s, result: %s', result_name, atom, res)
+ end
+ return res
+ else
+ -- This is likely external atom
+ local real_sym = atom
+ if symbols_replacements[atom] then
+ real_sym = symbols_replacements[atom]
+ end
+ if task:has_symbol(real_sym, result_name) then
+ lua_util.debugm(N, task, 'external atom: %s, result: 1, named_result: %s', real_sym, result_name)
+ return 1
+ end
+ lua_util.debugm(N, task, 'external atom: %s, result: 0, , named_result: %s', real_sym, result_name)
+ end
+ return 0
+ end
+end
+
+local function post_process()
+ -- Replace rule tags
+ local ntags = {}
+ local function rec_replace_tags(tag, tagv)
+ if ntags[tag] then
+ return ntags[tag]
+ end
+ fun.each(function(n, t)
+ if n ~= tag then
+ local s, matches = string.gsub(tagv, string.format("<%s>", n), t)
+ if matches > 0 then
+ ntags[tag] = rec_replace_tags(tag, s)
+ end
+ end
+ end, replace['tags'])
+
+ if not ntags[tag] then
+ ntags[tag] = tagv
+ end
+ return ntags[tag]
+ end
+
+ fun.each(function(n, t)
+ rec_replace_tags(n, t)
+ end, replace['tags'])
+ fun.each(function(n, t)
+ replace['tags'][n] = t
+ end, ntags)
+
+ fun.each(function(r)
+ local rule = rules[r]
+
+ if rule['re_expr'] and rule['re'] then
+ local res, nexpr = apply_replacements(rule['re_expr'])
+ if res then
+ local nre = rspamd_regexp.create(nexpr)
+ if not nre then
+ rspamd_logger.errx(rspamd_config, 'cannot apply replacement for rule %1', r)
+ --rule['re'] = nil
+ else
+ local old_max_hits = rule['re']:get_max_hits()
+ lua_util.debugm(N, rspamd_config, 'replace %1 -> %2', r, nexpr)
+ rspamd_config:replace_regexp({
+ old_re = rule['re'],
+ new_re = nre,
+ pcre_only = is_pcre_only(rule['symbol']),
+ })
+ rule['re'] = nre
+ rule['re_expr'] = nexpr
+ nre:set_limit(match_limit)
+ nre:set_max_hits(old_max_hits)
+ end
+ end
+ end
+ end, replace['rules'])
+
+ fun.each(function(key, score)
+ if rules[key] then
+ rules[key]['score'] = score
+ end
+ end, scores)
+
+ -- Header rules
+ fun.each(function(k, r)
+ local f = function(task)
+
+ local raw = false
+ local check = {}
+ -- Cached path for ordinary expressions
+ if r['ordinary'] then
+ local h = r['header'][1]
+ local t = 'header'
+
+ if h['raw'] then
+ t = 'rawheader'
+ end
+
+ if not r['re'] then
+ rspamd_logger.errx(task, 're is missing for rule %1 (%2 header)', k,
+ h['header'])
+ return 0
+ end
+
+ local ret = process_regexp_opt(r.re, task, t, h.header, h.strong)
+
+ if r['not'] then
+ if ret ~= 0 then
+ ret = 0
+ else
+ ret = 1
+ end
+ end
+
+ return ret
+ end
+
+ -- Slow path
+ fun.each(function(h)
+ local hname = h['header']
+
+ local hdr
+ if h['mime'] then
+ local parts = task:get_parts()
+ for _, p in ipairs(parts) do
+ local m_hdr = p:get_header_full(hname, h['strong'])
+
+ if m_hdr then
+ if not hdr then
+ hdr = {}
+ end
+ for _, mh in ipairs(m_hdr) do
+ table.insert(hdr, mh)
+ end
+ end
+ end
+ else
+ hdr = task:get_header_full(hname, h['strong'])
+ end
+
+ if hdr then
+ for _, rh in ipairs(hdr) do
+ -- Subject for optimization
+ local str
+ if h['raw'] then
+ str = rh['value']
+ raw = true
+ else
+ str = rh['decoded']
+ end
+ if not str then
+ return 0
+ end
+
+ if h['function'] then
+ str = h['function'](str)
+ end
+
+ if type(str) == 'string' then
+ table.insert(check, str)
+ else
+ for _, c in ipairs(str) do
+ table.insert(check, c)
+ end
+ end
+ end
+ elseif r['unset'] then
+ table.insert(check, r['unset'])
+ end
+ end, r['header'])
+
+ if #check == 0 then
+ if r['not'] then
+ return 1
+ end
+ return 0
+ end
+
+ local ret = 0
+ for _, c in ipairs(check) do
+ local match = sa_regexp_match(c, r['re'], raw, r)
+ if (match > 0 and not r['not']) or (match == 0 and r['not']) then
+ ret = 1
+ end
+ end
+
+ return ret
+ end
+ if r['score'] then
+ local real_score = r['score'] * calculate_score(k, r)
+ if math.abs(real_score) > meta_score_alpha then
+ add_sole_meta(k, r)
+ end
+ end
+ atoms[k] = f
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'header' and r['header']
+ end,
+ rules))
+
+ -- Custom function rules
+ fun.each(function(k, r)
+ local f = function(task)
+ local res = r['function'](task)
+ if res and res > 0 then
+ return res
+ end
+ return 0
+ end
+ if r['score'] then
+ local real_score = r['score'] * calculate_score(k, r)
+ if math.abs(real_score) > meta_score_alpha then
+ add_sole_meta(k, r)
+ end
+ end
+ atoms[k] = f
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'function' and r['function']
+ end,
+ rules))
+
+ -- Parts rules
+ fun.each(function(k, r)
+ local f = function(task)
+ if not r['re'] then
+ rspamd_logger.errx(task, 're is missing for rule %1', k)
+ return 0
+ end
+
+ local t = 'mime'
+ if r['raw'] then
+ t = 'rawmime'
+ end
+
+ return process_regexp_opt(r.re, task, t)
+ end
+ if r['score'] then
+ local real_score = r['score'] * calculate_score(k, r)
+ if math.abs(real_score) > meta_score_alpha then
+ add_sole_meta(k, r)
+ end
+ end
+ atoms[k] = f
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'part'
+ end, rules))
+
+ -- SA body rules
+ fun.each(function(k, r)
+ local f = function(task)
+ if not r['re'] then
+ rspamd_logger.errx(task, 're is missing for rule %1', k)
+ return 0
+ end
+
+ local t = r['type']
+
+ local ret = process_regexp_opt(r.re, task, t)
+ return ret
+ end
+ if r['score'] then
+ local real_score = r['score'] * calculate_score(k, r)
+ if math.abs(real_score) > meta_score_alpha then
+ add_sole_meta(k, r)
+ end
+ end
+ atoms[k] = f
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'sabody' or r['type'] == 'message' or r['type'] == 'sarawbody'
+ end, rules))
+
+ -- URL rules
+ fun.each(function(k, r)
+ local f = function(task)
+ if not r['re'] then
+ rspamd_logger.errx(task, 're is missing for rule %1', k)
+ return 0
+ end
+
+ return process_regexp_opt(r.re, task, 'url')
+ end
+ if r['score'] then
+ local real_score = r['score'] * calculate_score(k, r)
+ if math.abs(real_score) > meta_score_alpha then
+ add_sole_meta(k, r)
+ end
+ end
+ atoms[k] = f
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'uri'
+ end,
+ rules))
+ -- Meta rules
+ fun.each(function(k, r)
+ local expression = nil
+ -- Meta function callback
+ -- Here are dragons!
+ -- This function can be called from 2 DIFFERENT type of invocations:
+ -- 1) Invocation from Rspamd itself where `res_name` will be nil
+ -- 2) Invocation from other meta during expression:process_traced call
+ -- So we need to distinguish that and return different stuff to be able to deal with atoms
+ local meta_cb = function(task, res_name)
+ lua_util.debugm(N, task, 'meta callback for %s; result name: %s', k, res_name)
+ local cached = task:cache_get('sa_metas_processed')
+
+ -- We avoid many task methods invocations here (likely)
+ if not cached then
+ cached = {}
+ task:cache_set('sa_metas_processed', cached)
+ end
+
+ local already_processed = cached[k]
+
+ -- Exclude elements that are named in the same way as the symbol itself
+ local function exclude_sym_filter(sopt)
+ return sopt ~= k
+ end
+
+ if not (already_processed and already_processed[res_name or 'default']) then
+ -- Execute symbol
+ local function exec_symbol(cur_res)
+ local res, trace = expression:process_traced(gen_process_atom_cb(cur_res, task))
+ lua_util.debugm(N, task, 'meta result for %s: %s; result name: %s', k, res, cur_res)
+ if res > 0 then
+ -- Symbol should be one shot to make it working properly
+ task:insert_result_named(cur_res, k, res, fun.totable(fun.filter(exclude_sym_filter, trace)))
+ end
+
+ if not cached[k] then
+ cached[k] = {}
+ end
+
+ cached[k][cur_res] = res
+ end
+
+ if not res_name then
+ -- Invoke for all named results
+ local named_results = task:get_all_named_results()
+ for _, cur_res in ipairs(named_results) do
+ exec_symbol(cur_res)
+ end
+ else
+ -- Invoked from another meta
+ exec_symbol(res_name)
+ return cached[k][res_name] or 0
+ end
+ else
+ -- We have cached the result
+ local res = already_processed[res_name or 'default'] or 0
+ lua_util.debugm(N, task, 'cached meta result for %s: %s; result name: %s',
+ k, res, res_name)
+
+ if res_name then
+ return res
+ end
+ end
+
+ -- No return if invoked directly from Rspamd as we use task:insert_result_named directly
+ end
+
+ expression = rspamd_expression.create(r['meta'], parse_atom, rspamd_config:get_mempool())
+ if not expression then
+ rspamd_logger.errx(rspamd_config, 'Cannot parse expression ' .. r['meta'])
+ else
+
+ if r['score'] then
+ rspamd_config:set_metric_symbol {
+ name = k, score = r['score'],
+ description = r['description'],
+ priority = scores_priority,
+ one_shot = true
+ }
+ scores_added[k] = 1
+ rspamd_config:register_symbol {
+ name = k,
+ weight = calculate_score(k, r),
+ callback = meta_cb
+ }
+ else
+ -- Add 0 score to avoid issues
+ rspamd_config:register_symbol {
+ name = k,
+ weight = calculate_score(k, r),
+ callback = meta_cb,
+ score = 0,
+ }
+ end
+
+ r['expression'] = expression
+
+ if not atoms[k] then
+ atoms[k] = meta_cb
+ end
+ end
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'meta'
+ end,
+ rules))
+
+ -- Check meta rules for foreign symbols and register dependencies
+ -- First direct dependencies:
+ fun.each(function(k, r)
+ if r['expression'] then
+ local expr_atoms = r['expression']:atoms()
+
+ for _, a in ipairs(expr_atoms) do
+ if not atoms[a] then
+ local rspamd_symbol = replace_symbol(a)
+ if not external_deps[k] then
+ external_deps[k] = {}
+ end
+
+ if not external_deps[k][rspamd_symbol] then
+ rspamd_config:register_dependency(k, rspamd_symbol)
+ external_deps[k][rspamd_symbol] = true
+ lua_util.debugm(N, rspamd_config,
+ 'atom %1 is a direct foreign dependency, ' ..
+ 'register dependency for %2 on %3',
+ a, k, rspamd_symbol)
+ end
+ end
+ end
+ end
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'meta'
+ end,
+ rules))
+
+ -- ... And then indirect ones ...
+ local nchanges
+ repeat
+ nchanges = 0
+ fun.each(function(k, r)
+ if r['expression'] then
+ local expr_atoms = r['expression']:atoms()
+ for _, a in ipairs(expr_atoms) do
+ if type(external_deps[a]) == 'table' then
+ for dep in pairs(external_deps[a]) do
+ if not external_deps[k] then
+ external_deps[k] = {}
+ end
+ if not external_deps[k][dep] then
+ rspamd_config:register_dependency(k, dep)
+ external_deps[k][dep] = true
+ lua_util.debugm(N, rspamd_config,
+ 'atom %1 is an indirect foreign dependency, ' ..
+ 'register dependency for %2 on %3',
+ a, k, dep)
+ nchanges = nchanges + 1
+ end
+ end
+ else
+ local rspamd_symbol, replaced_symbol = replace_symbol(a)
+ if replaced_symbol then
+ external_deps[a] = { [rspamd_symbol] = true }
+ else
+ external_deps[a] = {}
+ end
+ end
+ end
+ end
+ end,
+ fun.filter(function(_, r)
+ return r['type'] == 'meta'
+ end,
+ rules))
+ until nchanges == 0
+
+ -- Set missing symbols
+ fun.each(function(key, score)
+ if not scores_added[key] then
+ rspamd_config:set_metric_symbol({
+ name = key, score = score,
+ priority = 2, flags = 'ignore' })
+ end
+ end, scores)
+
+ -- Logging output
+ if freemail_domains then
+ freemail_trie = rspamd_trie.create(freemail_domains)
+ rspamd_logger.infox(rspamd_config, 'loaded %1 freemail domains definitions',
+ #freemail_domains)
+ end
+ rspamd_logger.infox(rspamd_config, 'loaded %1 blacklist/whitelist elements',
+ sa_lists['elts'])
+end
+
+local has_rules = false
+
+if type(section) == "table" then
+ local keywords = {
+ pcre_only = { 'table', function(v)
+ pcre_only_regexps = lua_util.list_to_hash(v)
+ end },
+ alpha = { 'number', function(v)
+ meta_score_alpha = tonumber(v)
+ end },
+ match_limit = { 'number', function(v)
+ match_limit = tonumber(v)
+ end },
+ scores_priority = { 'number', function(v)
+ scores_priority = tonumber(v)
+ end },
+ }
+
+ for k, fn in pairs(section) do
+ local kw = keywords[k]
+ if kw and type(fn) == kw[1] then
+ kw[2](fn)
+ else
+ -- SA rule file
+ if type(fn) == 'table' then
+ for _, elt in ipairs(fn) do
+ local files = util.glob(elt)
+
+ if not files or #files == 0 then
+ rspamd_logger.errx(rspamd_config, "cannot find any files matching pattern %s", elt)
+ else
+ for _, matched in ipairs(files) do
+ local f = io.open(matched, "r")
+ if f then
+ rspamd_logger.infox(rspamd_config, 'loading SA rules from %s', matched)
+ process_sa_conf(f)
+ has_rules = true
+ else
+ rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
+ end
+ end
+ end
+ end
+ else
+ -- assume string
+ local files = util.glob(fn)
+
+ if not files or #files == 0 then
+ rspamd_logger.errx(rspamd_config, "cannot find any files matching pattern %s", fn)
+ else
+ for _, matched in ipairs(files) do
+ local f = io.open(matched, "r")
+ if f then
+ rspamd_logger.infox(rspamd_config, 'loading SA rules from %s', matched)
+ process_sa_conf(f)
+ has_rules = true
+ else
+ rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
+ end
+ end
+ end
+ end
+ end
+ end
+end
+
+if has_rules then
+ post_process()
+else
+ lua_util.disable_module(N, "config")
+end