summaryrefslogtreecommitdiffstats
path: root/lualib/lua_scanners/common.lua
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /lualib/lua_scanners/common.lua
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lualib/lua_scanners/common.lua')
-rw-r--r--lualib/lua_scanners/common.lua539
1 files changed, 539 insertions, 0 deletions
diff --git a/lualib/lua_scanners/common.lua b/lualib/lua_scanners/common.lua
new file mode 100644
index 0000000..11f5e1f
--- /dev/null
+++ b/lualib/lua_scanners/common.lua
@@ -0,0 +1,539 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+Copyright (c) 2019, Carsten Rosenberg <c.rosenberg@heinlein-support.de>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[[
+-- @module lua_scanners_common
+-- This module contains common external scanners functions
+--]]
+
+local rspamd_logger = require "rspamd_logger"
+local rspamd_regexp = require "rspamd_regexp"
+local lua_util = require "lua_util"
+local lua_redis = require "lua_redis"
+local lua_magic_types = require "lua_magic/types"
+local fun = require "fun"
+
+local exports = {}
+
+local function log_clean(task, rule, msg)
+
+ msg = msg or 'message or mime_part is clean'
+
+ if rule.log_clean then
+ rspamd_logger.infox(task, '%s: %s', rule.log_prefix, msg)
+ else
+ lua_util.debugm(rule.name, task, '%s: %s', rule.log_prefix, msg)
+ end
+
+end
+
+local function match_patterns(default_sym, found, patterns, dyn_weight)
+ if type(patterns) ~= 'table' then
+ return default_sym, dyn_weight
+ end
+ if not patterns[1] then
+ for sym, pat in pairs(patterns) do
+ if pat:match(found) then
+ return sym, '1'
+ end
+ end
+ return default_sym, dyn_weight
+ else
+ for _, p in ipairs(patterns) do
+ for sym, pat in pairs(p) do
+ if pat:match(found) then
+ return sym, '1'
+ end
+ end
+ end
+ return default_sym, dyn_weight
+ end
+end
+
+local function yield_result(task, rule, vname, dyn_weight, is_fail, maybe_part)
+ local all_whitelisted = true
+ local patterns
+ local symbol
+ local threat_table
+ local threat_info
+ local flags
+
+ if type(vname) == 'string' then
+ threat_table = { vname }
+ elseif type(vname) == 'table' then
+ threat_table = vname
+ end
+
+
+ -- This should be more generic
+ if not is_fail then
+ patterns = rule.patterns
+ symbol = rule.symbol
+ threat_info = rule.detection_category .. 'found'
+ if not dyn_weight then
+ dyn_weight = 1.0
+ end
+ elseif is_fail == 'fail' then
+ patterns = rule.patterns_fail
+ symbol = rule.symbol_fail
+ threat_info = "FAILED with error"
+ dyn_weight = 0.0
+ elseif is_fail == 'encrypted' then
+ patterns = rule.patterns
+ symbol = rule.symbol_encrypted
+ threat_info = "Scan has returned that input was encrypted"
+ dyn_weight = 1.0
+ elseif is_fail == 'macro' then
+ patterns = rule.patterns
+ symbol = rule.symbol_macro
+ threat_info = "Scan has returned that input contains macros"
+ dyn_weight = 1.0
+ end
+
+ for _, tm in ipairs(threat_table) do
+ local symname, symscore = match_patterns(symbol, tm, patterns, dyn_weight)
+ if rule.whitelist and rule.whitelist:get_key(tm) then
+ rspamd_logger.infox(task, '%s: "%s" is in whitelist', rule.log_prefix, tm)
+ else
+ all_whitelisted = false
+ rspamd_logger.infox(task, '%s: result - %s: "%s - score: %s"',
+ rule.log_prefix, threat_info, tm, symscore)
+
+ if maybe_part and rule.show_attachments and maybe_part:get_filename() then
+ local fname = maybe_part:get_filename()
+ task:insert_result(symname, symscore, string.format("%s|%s",
+ tm, fname))
+ else
+ task:insert_result(symname, symscore, tm)
+ end
+
+ end
+ end
+
+ if rule.action and is_fail ~= 'fail' and not all_whitelisted then
+ threat_table = table.concat(threat_table, '; ')
+ if rule.action ~= 'reject' then
+ flags = 'least'
+ end
+ task:set_pre_result(rule.action,
+ lua_util.template(rule.message or 'Rejected', {
+ SCANNER = rule.name,
+ VIRUS = threat_table,
+ }), rule.name, nil, nil, flags)
+ end
+end
+
+local function message_not_too_large(task, content, rule)
+ local max_size = tonumber(rule.max_size)
+ if not max_size then
+ return true
+ end
+ if #content > max_size then
+ rspamd_logger.infox(task, "skip %s check as it is too large: %s (%s is allowed)",
+ rule.log_prefix, #content, max_size)
+ return false
+ end
+ return true
+end
+
+local function message_not_too_small(task, content, rule)
+ local min_size = tonumber(rule.min_size)
+ if not min_size then
+ return true
+ end
+ if #content < min_size then
+ rspamd_logger.infox(task, "skip %s check as it is too small: %s (%s is allowed)",
+ rule.log_prefix, #content, min_size)
+ return false
+ end
+ return true
+end
+
+local function message_min_words(task, rule)
+ if rule.text_part_min_words and tonumber(rule.text_part_min_words) > 0 then
+ local text_part_above_limit = false
+ local text_parts = task:get_text_parts()
+
+ local filter_func = function(p)
+ return p:get_words_count() >= tonumber(rule.text_part_min_words)
+ end
+
+ fun.each(function(p)
+ text_part_above_limit = true
+ end, fun.filter(filter_func, text_parts))
+
+ if not text_part_above_limit then
+ rspamd_logger.infox(task, '%s: #words in all text parts is below text_part_min_words limit: %s',
+ rule.log_prefix, rule.text_part_min_words)
+ end
+
+ return text_part_above_limit
+ else
+ return true
+ end
+end
+
+local function dynamic_scan(task, rule)
+ if rule.dynamic_scan then
+ if rule.action ~= 'reject' then
+ local metric_result = task:get_metric_score()
+ local metric_action = task:get_metric_action()
+ local has_pre_result = task:has_pre_result()
+ -- ToDo: needed?
+ -- Sometimes leads to FPs
+ --if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
+ -- rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, "result is already reject")
+ -- return false
+ --elseif metric_result[1] > metric_result[2]*2 then
+ if metric_result[1] > metric_result[2] * 2 then
+ rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'score > 2 * reject_level: ' .. metric_result[1])
+ return false
+ elseif has_pre_result and metric_action == 'reject' then
+ rspamd_logger.infox(task, '%s: aborting: %s', rule.log_prefix, 'pre_result reject is set')
+ return false
+ else
+ return true, 'undecided'
+ end
+ else
+ return true, 'dynamic_scan is not possible with config `action=reject;`'
+ end
+ else
+ return true
+ end
+end
+
+local function need_check(task, content, rule, digest, fn, maybe_part)
+
+ local uncached = true
+ local key = digest
+
+ local function redis_av_cb(err, data)
+ if data and type(data) == 'string' then
+ -- Cached
+ data = lua_util.str_split(data, '\t')
+ local threat_string = lua_util.str_split(data[1], '\v')
+ local score = data[2] or rule.default_score
+
+ if threat_string[1] ~= 'OK' then
+ if threat_string[1] == 'MACRO' then
+ yield_result(task, rule, 'File contains macros',
+ 0.0, 'macro', maybe_part)
+ elseif threat_string[1] == 'ENCRYPTED' then
+ yield_result(task, rule, 'File is encrypted',
+ 0.0, 'encrypted', maybe_part)
+ else
+ lua_util.debugm(rule.name, task, '%s: got cached threat result for %s: %s - score: %s',
+ rule.log_prefix, key, threat_string[1], score)
+ yield_result(task, rule, threat_string, score, false, maybe_part)
+ end
+
+ else
+ lua_util.debugm(rule.name, task, '%s: got cached negative result for %s: %s',
+ rule.log_prefix, key, threat_string[1])
+ end
+ uncached = false
+ else
+ if err then
+ rspamd_logger.errx(task, 'got error checking cache: %s', err)
+ end
+ end
+
+ local f_message_not_too_large = message_not_too_large(task, content, rule)
+ local f_message_not_too_small = message_not_too_small(task, content, rule)
+ local f_message_min_words = message_min_words(task, rule)
+ local f_dynamic_scan = dynamic_scan(task, rule)
+
+ if uncached and
+ f_message_not_too_large and
+ f_message_not_too_small and
+ f_message_min_words and
+ f_dynamic_scan then
+
+ fn()
+
+ end
+
+ end
+
+ if rule.redis_params and not rule.no_cache then
+
+ key = rule.prefix .. key
+
+ if lua_redis.redis_make_request(task,
+ rule.redis_params, -- connect params
+ key, -- hash key
+ false, -- is write
+ redis_av_cb, --callback
+ 'GET', -- command
+ { key } -- arguments)
+ ) then
+ return true
+ end
+ end
+
+ return false
+
+end
+
+local function save_cache(task, digest, rule, to_save, dyn_weight, maybe_part)
+ local key = digest
+ if not dyn_weight then
+ dyn_weight = 1.0
+ end
+
+ local function redis_set_cb(err)
+ -- Do nothing
+ if err then
+ rspamd_logger.errx(task, 'failed to save %s cache for %s -> "%s": %s',
+ rule.detection_category, to_save, key, err)
+ else
+ lua_util.debugm(rule.name, task, '%s: saved cached result for %s: %s - score %s - ttl %s',
+ rule.log_prefix, key, to_save, dyn_weight, rule.cache_expire)
+ end
+ end
+
+ if type(to_save) == 'table' then
+ to_save = table.concat(to_save, '\v')
+ end
+
+ local value_tbl = { to_save, dyn_weight }
+ if maybe_part and rule.show_attachments and maybe_part:get_filename() then
+ local fname = maybe_part:get_filename()
+ table.insert(value_tbl, fname)
+ end
+ local value = table.concat(value_tbl, '\t')
+
+ if rule.redis_params and rule.prefix then
+ key = rule.prefix .. key
+
+ lua_redis.redis_make_request(task,
+ rule.redis_params, -- connect params
+ key, -- hash key
+ true, -- is write
+ redis_set_cb, --callback
+ 'SETEX', -- command
+ { key, rule.cache_expire or 0, value }
+ )
+ end
+
+ return false
+end
+
+local function create_regex_table(patterns)
+ local regex_table = {}
+ if patterns[1] then
+ for i, p in ipairs(patterns) do
+ if type(p) == 'table' then
+ local new_set = {}
+ for k, v in pairs(p) do
+ new_set[k] = rspamd_regexp.create_cached(v)
+ end
+ regex_table[i] = new_set
+ else
+ regex_table[i] = {}
+ end
+ end
+ else
+ for k, v in pairs(patterns) do
+ regex_table[k] = rspamd_regexp.create_cached(v)
+ end
+ end
+ return regex_table
+end
+
+local function match_filter(task, rule, found, patterns, pat_type)
+ if type(patterns) ~= 'table' or not found then
+ return false
+ end
+ if not patterns[1] then
+ for _, pat in pairs(patterns) do
+ if pat_type == 'ext' and tostring(pat) == tostring(found) then
+ return true
+ elseif pat_type == 'regex' and pat:match(found) then
+ return true
+ end
+ end
+ return false
+ else
+ for _, p in ipairs(patterns) do
+ for _, pat in ipairs(p) do
+ if pat_type == 'ext' and tostring(pat) == tostring(found) then
+ return true
+ elseif pat_type == 'regex' and pat:match(found) then
+ return true
+ end
+ end
+ end
+ return false
+ end
+end
+
+-- borrowed from mime_types.lua
+-- ext is the last extension, LOWERCASED
+-- ext2 is the one before last extension LOWERCASED
+local function gen_extension(fname)
+ local filename_parts = lua_util.str_split(fname, '.')
+
+ local ext = {}
+ for n = 1, 2 do
+ ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
+ end
+ return ext[1], ext[2], filename_parts
+end
+
+local function check_parts_match(task, rule)
+
+ local filter_func = function(p)
+ local mtype, msubtype = p:get_type()
+ local detected_ext = p:get_detected_ext()
+ local fname = p:get_filename()
+ local ext, ext2
+
+ if rule.scan_all_mime_parts == false then
+ -- check file extension and filename regex matching
+ --lua_util.debugm(rule.name, task, '%s: filename: |%s|%s|', rule.log_prefix, fname)
+ if fname ~= nil then
+ ext, ext2 = gen_extension(fname)
+ --lua_util.debugm(rule.name, task, '%s: extension, fname: |%s|%s|%s|', rule.log_prefix, ext, ext2, fname)
+ if match_filter(task, rule, ext, rule.mime_parts_filter_ext, 'ext')
+ or match_filter(task, rule, ext2, rule.mime_parts_filter_ext, 'ext') then
+ lua_util.debugm(rule.name, task, '%s: extension matched: |%s|%s|', rule.log_prefix, ext, ext2)
+ return true
+ elseif match_filter(task, rule, fname, rule.mime_parts_filter_regex, 'regex') then
+ lua_util.debugm(rule.name, task, '%s: filename regex matched', rule.log_prefix)
+ return true
+ end
+ end
+ -- check content type string regex matching
+ if mtype ~= nil and msubtype ~= nil then
+ local ct = string.format('%s/%s', mtype, msubtype):lower()
+ if match_filter(task, rule, ct, rule.mime_parts_filter_regex, 'regex') then
+ lua_util.debugm(rule.name, task, '%s: regex content-type: %s', rule.log_prefix, ct)
+ return true
+ end
+ end
+ -- check detected content type (libmagic) regex matching
+ if detected_ext then
+ local magic = lua_magic_types[detected_ext] or {}
+ if match_filter(task, rule, detected_ext, rule.mime_parts_filter_ext, 'ext') then
+ lua_util.debugm(rule.name, task, '%s: detected extension matched: |%s|', rule.log_prefix, detected_ext)
+ return true
+ elseif magic.ct and match_filter(task, rule, magic.ct, rule.mime_parts_filter_regex, 'regex') then
+ lua_util.debugm(rule.name, task, '%s: regex detected libmagic content-type: %s',
+ rule.log_prefix, magic.ct)
+ return true
+ end
+ end
+ -- check filenames in archives
+ if p:is_archive() then
+ local arch = p:get_archive()
+ local filelist = arch:get_files_full(1000)
+ for _, f in ipairs(filelist) do
+ ext, ext2 = gen_extension(f.name)
+ if match_filter(task, rule, ext, rule.mime_parts_filter_ext, 'ext')
+ or match_filter(task, rule, ext2, rule.mime_parts_filter_ext, 'ext') then
+ lua_util.debugm(rule.name, task, '%s: extension matched in archive: |%s|%s|', rule.log_prefix, ext, ext2)
+ --lua_util.debugm(rule.name, task, '%s: extension matched in archive: %s', rule.log_prefix, ext)
+ return true
+ elseif match_filter(task, rule, f.name, rule.mime_parts_filter_regex, 'regex') then
+ lua_util.debugm(rule.name, task, '%s: filename regex matched in archive', rule.log_prefix)
+ return true
+ end
+ end
+ end
+ end
+
+ -- check text_part has more words than text_part_min_words_check
+ if rule.scan_text_mime and rule.text_part_min_words and p:is_text() and
+ p:get_words_count() >= tonumber(rule.text_part_min_words) then
+ return true
+ end
+
+ if rule.scan_image_mime and p:is_image() then
+ return true
+ end
+
+ if rule.scan_all_mime_parts ~= false then
+ local is_part_checkable = (p:is_attachment() and (not p:is_image() or rule.scan_image_mime))
+ if detected_ext then
+ -- We know what to scan!
+ local magic = lua_magic_types[detected_ext] or {}
+
+ if magic.av_check ~= false or is_part_checkable then
+ return true
+ end
+ elseif is_part_checkable then
+ -- Just rely on attachment property
+ return true
+ end
+ end
+
+ return false
+ end
+
+ return fun.filter(filter_func, task:get_parts())
+end
+
+local function check_metric_results(task, rule)
+
+ if rule.action ~= 'reject' then
+ local metric_result = task:get_metric_score()
+ local metric_action = task:get_metric_action()
+ local has_pre_result = task:has_pre_result()
+
+ if rule.symbol_type == 'postfilter' and metric_action == 'reject' then
+ return true, 'result is already reject'
+ elseif metric_result[1] > metric_result[2] * 2 then
+ return true, 'score > 2 * reject_level: ' .. metric_result[1]
+ elseif has_pre_result and metric_action == 'reject' then
+ return true, 'pre_result reject is set'
+ else
+ return false, 'undecided'
+ end
+ else
+ return false, 'dynamic_scan is not possible with config `action=reject;`'
+ end
+end
+
+exports.log_clean = log_clean
+exports.yield_result = yield_result
+exports.match_patterns = match_patterns
+exports.condition_check_and_continue = need_check
+exports.save_cache = save_cache
+exports.create_regex_table = create_regex_table
+exports.check_parts_match = check_parts_match
+exports.check_metric_results = check_metric_results
+
+setmetatable(exports, {
+ __call = function(t, override)
+ for k, v in pairs(t) do
+ if _G[k] ~= nil then
+ local msg = 'function ' .. k .. ' already exists in global scope.'
+ if override then
+ _G[k] = v
+ print('WARNING: ' .. msg .. ' Overwritten.')
+ else
+ print('NOTICE: ' .. msg .. ' Skipped.')
+ end
+ else
+ _G[k] = v
+ end
+ end
+ end,
+})
+
+return exports