diff options
Diffstat (limited to 'rules/misc.lua')
-rw-r--r-- | rules/misc.lua | 864 |
1 files changed, 864 insertions, 0 deletions
diff --git a/rules/misc.lua b/rules/misc.lua new file mode 100644 index 0000000..faf4a8f --- /dev/null +++ b/rules/misc.lua @@ -0,0 +1,864 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Misc rules + +local E = {} +local fun = require "fun" +local rspamd_util = require "rspamd_util" +local rspamd_parsers = require "rspamd_parsers" +local rspamd_regexp = require "rspamd_regexp" +local lua_util = require "lua_util" +local bit = require "bit" +local rspamd_url = require "rspamd_url" +local url_flags_tab = rspamd_url.flags + +-- Different text parts +rspamd_config.R_PARTS_DIFFER = { + callback = function(task) + local distance = task:get_mempool():get_variable('parts_distance', 'double') + + if distance then + local nd = tonumber(distance) + -- ND is relation of different words to total words + if nd >= 0.5 then + local tw = task:get_mempool():get_variable('total_words', 'int') + + if tw then + local score + if tw > 30 then + -- We are confident about difference + score = (nd - 0.5) * 2.0 + else + -- We are not so confident about difference + score = (nd - 0.5) + end + task:insert_result('R_PARTS_DIFFER', score, + string.format('%.1f%%', tostring(100.0 * nd))) + end + end + end + return false + end, + score = 1.0, + description = 'Text and HTML parts differ', + group = 'body' +} + +-- Date issues +local date_id = rspamd_config:register_symbol({ + name = 'DATE_CB', + type = 'callback,mime', + callback = function(task) + local date_time = task:get_header('Date') + if date_time == nil or date_time == '' then + task:insert_result('MISSING_DATE', 1.0) + return + end + + local dm, err = rspamd_parsers.parse_smtp_date(date_time) + if err then + task:insert_result('INVALID_DATE', 1.0) + return + end + + local dt = task:get_date({ format = 'connect', gmt = true }) + local date_diff = dt - dm + + if date_diff > 86400 then + -- Older than a day + task:insert_result('DATE_IN_PAST', 1.0, tostring(math.floor(date_diff / 3600))) + elseif -date_diff > 7200 then + -- More than 2 hours in the future + task:insert_result('DATE_IN_FUTURE', 1.0, tostring(math.floor(-date_diff / 3600))) + end + end +}) + +rspamd_config:register_symbol({ + name = 'MISSING_DATE', + score = 1.0, + description = 'Date header is missing', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +rspamd_config:register_symbol({ + name = 'INVALID_DATE', + score = 1.5, + description = 'Malformed Date header', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +rspamd_config:register_symbol({ + name = 'DATE_IN_FUTURE', + score = 4.0, + description = 'Message date is in the future', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +rspamd_config:register_symbol({ + name = 'DATE_IN_PAST', + score = 1.0, + description = 'Message date is in the past', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +local obscured_id = rspamd_config:register_symbol { + callback = function(task) + local susp_urls = task:get_urls_filtered({ 'obscured', 'zw_spaces' }) + + if susp_urls and susp_urls[1] then + local obs_flag = url_flags_tab.obscured + local zw_flag = url_flags_tab.zw_spaces + + for _, u in ipairs(susp_urls) do + local fl = u:get_flags_num() + if bit.band(fl, obs_flag) ~= 0 then + task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host()) + end + if bit.band(fl, zw_flag) ~= 0 then + task:insert_result('ZERO_WIDTH_SPACE_URL', 1.0, u:get_host()) + end + end + end + + return false + end, + name = 'R_SUSPICIOUS_URL', + score = 5.0, + one_shot = true, + description = 'A message has been identified to contain an obfuscated or suspicious URL', + group = 'url' +} + +rspamd_config:register_symbol { + type = 'virtual', + name = 'ZERO_WIDTH_SPACE_URL', + score = 7.0, + one_shot = true, + description = 'Zero width space in URL', + group = 'url', + parent = obscured_id, +} + +rspamd_config.ENVFROM_PRVS = { + callback = function(task) + --[[ + Detect PRVS/BATV addresses to avoid FORGED_SENDER + https://en.wikipedia.org/wiki/Bounce_Address_Tag_Validation + + Signature syntax: + + prvs=TAG=USER@example.com BATV draft (https://tools.ietf.org/html/draft-levine-smtp-batv-01) + prvs=USER=TAG@example.com + btv1==TAG==USER@example.com Barracuda appliance + msprvs1=TAG=USER@example.com Sparkpost email delivery service + ]]-- + if not (task:has_from(1) and task:has_from(2)) then + return false + end + local envfrom = task:get_from(1) + local re_text = '^(?:(prvs|msprvs1)=([^=]+)=|btv1==[^=]+==)(.+@(.+))$' + local re = rspamd_regexp.create_cached(re_text) + local c = re:search(envfrom[1].addr:lower(), false, true) + if not c then + return false + end + local ef = c[1][4] + -- See if it matches the From header + local from = task:get_from(2) + if ef == from[1].addr:lower() then + return true + end + -- Check for prvs=USER=TAG@example.com + local t = c[1][2] + if t == 'prvs' then + local efr = c[1][3] .. '@' .. c[1][5] + if efr == from[1].addr:lower() then + return true + end + end + return false + end, + score = 0.0, + description = "Envelope From is a PRVS address that matches the From address", + group = 'headers', + type = 'mime', +} + +rspamd_config.ENVFROM_VERP = { + callback = function(task) + if not (task:has_from(1) and task:has_recipients(1)) then + return false + end + local envfrom = task:get_from(1) + local envrcpts = task:get_recipients(1) + -- VERP only works for single recipient messages + if #envrcpts > 1 then + return false + end + -- Get recipient and compute VERP address + local rcpt = envrcpts[1].addr:lower() + local verp = rcpt:gsub('@', '=') + -- Get the user portion of the envfrom + local ef_user = envfrom[1].user:lower() + -- See if the VERP representation of the recipient appears in it + if ef_user:find(verp, 1, true) + and not ef_user:find('+caf_=' .. verp, 1, true) -- Google Forwarding + and not ef_user:find('^srs[01]=') -- SRS + then + return true + end + return false + end, + score = 0.0, + description = "Envelope From is a VERP address", + group = "headers", + type = 'mime', +} + +local check_rcvd = rspamd_config:register_symbol { + name = 'CHECK_RCVD', + group = 'headers', + callback = function(task) + local rcvds = task:get_received_headers() + if not rcvds or #rcvds == 0 then + return false + end + + local all_tls = fun.all(function(rc) + return rc.flags and rc.flags['ssl'] + end, fun.filter(function(rc) + return rc.by_hostname and rc.by_hostname ~= 'localhost' + end, rcvds)) + + -- See if only the last hop was encrypted + if all_tls then + task:insert_result('RCVD_TLS_ALL', 1.0) + else + local rcvd = rcvds[1] + if rcvd.by_hostname and rcvd.by_hostname == 'localhost' then + -- Ignore artificial header from Rmilter + rcvd = rcvds[2] or {} + end + if rcvd.flags and rcvd.flags['ssl'] then + task:insert_result('RCVD_TLS_LAST', 1.0) + else + task:insert_result('RCVD_NO_TLS_LAST', 1.0) + end + end + + local auth = fun.any(function(rc) + return rc.flags and rc.flags['authenticated'] + end, rcvds) + + if auth then + task:insert_result('RCVD_VIA_SMTP_AUTH', 1.0) + end + end, + type = 'callback,mime', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_TLS_ALL', + description = 'All hops used encrypted transports', + score = 0.0, + group = 'headers' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_TLS_LAST', + description = 'Last hop used encrypted transports', + score = 0.0, + group = 'headers' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_NO_TLS_LAST', + description = 'Last hop did not use encrypted transports', + score = 0.1, + group = 'headers' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_VIA_SMTP_AUTH', + -- NB This does not mean sender was authenticated; see task:get_user() + description = 'Authenticated hand-off was seen in Received headers', + score = 0.0, + group = 'headers' +} + +rspamd_config.RCVD_HELO_USER = { + callback = function(task) + -- Check HELO argument from MTA + local helo = task:get_helo() + if (helo and helo:lower():find('^user$')) then + return true + end + -- Check Received headers + local rcvds = task:get_header_full('Received') + if not rcvds then + return false + end + for _, rcvd in ipairs(rcvds) do + local r = rcvd['decoded']:lower() + if (r:find("^%s*from%suser%s")) then + return true + end + if (r:find("helo[%s=]user[%s%)]")) then + return true + end + end + end, + description = 'HELO User spam pattern', + group = 'headers', + type = 'mime', + score = 3.0 +} + +rspamd_config.URI_COUNT_ODD = { + callback = function(task) + local ct = task:get_header('Content-Type') + if (ct and ct:lower():find('^multipart/alternative')) then + local urls = task:get_urls_filtered(nil, { 'subject', 'html_displayed', 'special' }) or {} + local nurls = fun.foldl(function(acc, val) + return acc + val:get_count() + end, 0, urls) + + if nurls % 2 == 1 then + return true, 1.0, tostring(nurls) + end + end + end, + description = 'Odd number of URIs in multipart/alternative message', + score = 1.0, + group = 'url', +} + +rspamd_config.HAS_ATTACHMENT = { + callback = function(task) + local parts = task:get_parts() + if parts and #parts > 1 then + for _, p in ipairs(parts) do + local cd = p:get_header('Content-Disposition') + if (cd and cd:lower():match('^attachment')) then + return true + end + end + end + end, + description = 'Message contains attachments', + group = 'body', +} + +-- Requires freemail maps loaded in multimap +local function freemail_reply_neq_from(task) + if not task:has_symbol('FREEMAIL_REPLYTO') or not task:has_symbol('FREEMAIL_FROM') then + return false + end + local frt = task:get_symbol('FREEMAIL_REPLYTO') + local ff = task:get_symbol('FREEMAIL_FROM') + local frt_opts = frt[1]['options'] + local ff_opts = ff[1]['options'] + return (frt_opts and ff_opts and frt_opts[1] ~= ff_opts[1]) +end + +rspamd_config:register_symbol({ + name = 'FREEMAIL_REPLYTO_NEQ_FROM_DOM', + callback = freemail_reply_neq_from, + description = 'The From and Reply-To addresses in the email are from different freemail services', + score = 3.0, + group = 'headers', +}) +rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_REPLYTO') +rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_FROM') + +rspamd_config.OMOGRAPH_URL = { + callback = function(task) + local urls = task:get_urls() + + if urls then + local bad_omographs = 0 + local single_bad_omograps = 0 + local bad_urls = {} + local seen = {} + + fun.each(function(u) + if u:is_phished() then + + local h1 = u:get_host() + local h2 = u:get_phished() + if h2 then + -- Due to changes of the phished flag in 2.8 + h2 = h2:get_host() + end + if h1 and h2 then + local selt = string.format('%s->%s', h1, h2) + if not seen[selt] and rspamd_util.is_utf_spoofed(h1, h2) then + bad_urls[#bad_urls + 1] = selt + bad_omographs = bad_omographs + 1 + end + seen[selt] = true + end + end + if not u:is_html_displayed() then + local h = u:get_tld() + + if h then + if not seen[h] and rspamd_util.is_utf_spoofed(h) then + bad_urls[#bad_urls + 1] = h + single_bad_omograps = single_bad_omograps + 1 + end + seen[h] = true + end + end + end, urls) + + if bad_omographs > 0 then + return true, 1.0, bad_urls + elseif single_bad_omograps > 0 then + return true, 0.5, bad_urls + end + end + + return false + end, + score = 5.0, + group = 'url', + description = 'URL contains both latin and non-latin characters' +} + +rspamd_config.URL_IN_SUBJECT = { + callback = function(task) + local urls = task:get_urls() + + if urls then + for _, u in ipairs(urls) do + local flags = u:get_flags() + if flags.subject then + if flags.schemaless then + return true, 0.1, u:get_host() + end + local subject = task:get_subject() + + if subject then + if tostring(u) == subject then + return true, 1.0, u:get_host() + end + end + return true, 0.25, u:get_host() + end + end + end + + return false + end, + score = 4.0, + group = 'subject', + type = 'mime', + description = 'Subject contains URL' +} + +local aliases_id = rspamd_config:register_symbol { + type = 'prefilter', + name = 'EMAIL_PLUS_ALIASES', + callback = function(task) + local function check_from(type) + if task:has_from(type) then + local addr = task:get_from(type)[1] + local na, tags = lua_util.remove_email_aliases(addr) + if na then + task:set_from(type, addr, 'alias') + task:insert_result('TAGGED_FROM', 1.0, fun.totable( + fun.filter(function(t) + return t and #t > 0 + end, tags))) + end + end + end + + check_from('smtp') + check_from('mime') + + local function check_rcpt(type) + if task:has_recipients(type) then + local modified = false + local all_tags = {} + local addrs = task:get_recipients(type) + + for _, addr in ipairs(addrs) do + local na, tags = lua_util.remove_email_aliases(addr) + if na then + modified = true + fun.each(function(t) + table.insert(all_tags, t) + end, + fun.filter(function(t) + return t and #t > 0 + end, tags)) + end + end + + if modified then + task:set_recipients(type, addrs, 'alias') + task:insert_result('TAGGED_RCPT', 1.0, all_tags) + end + end + end + + check_rcpt('smtp') + check_rcpt('mime') + end, + priority = lua_util.symbols_priorities.top + 1, + description = 'Removes plus aliases from the email', + group = 'headers', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = aliases_id, + name = 'TAGGED_RCPT', + description = 'SMTP recipients have plus tags', + group = 'headers', + score = 0.0, +} +rspamd_config:register_symbol { + type = 'virtual', + parent = aliases_id, + name = 'TAGGED_FROM', + description = 'SMTP from has plus tags', + group = 'headers', + score = 0.0, +} + +local check_from_display_name = rspamd_config:register_symbol { + type = 'callback,mime', + name = 'FROM_DISPLAY_CALLBACK', + callback = function(task) + local from = task:get_from(2) + if not (from and from[1] and from[1].name) then + return false + end + -- See if we can parse an email address from the name + local parsed = rspamd_parsers.parse_mail_address(from[1].name, task:get_mempool()) + if not parsed then + return false + end + if not (parsed[1] and parsed[1]['addr']) then + return false + end + -- Make sure we did not mistake e.g. <something>@<name> for an email address + if not parsed[1]['domain'] or not parsed[1]['domain']:find('%.') then + return false + end + -- See if the parsed domains differ + if not rspamd_util.strequal_caseless(from[1]['domain'], parsed[1]['domain']) then + -- See if the destination domain is the same as the spoof + local mto = task:get_recipients(2) + local sto = task:get_recipients(1) + if mto then + for _, to in ipairs(mto) do + if to['domain'] ~= '' and rspamd_util.strequal_caseless(to['domain'], parsed[1]['domain']) then + task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain']) + return false + end + end + end + if sto then + for _, to in ipairs(sto) do + if to['domain'] ~= '' and rspamd_util.strequal_caseless(to['domain'], parsed[1]['domain']) then + task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain']) + return false + end + end + end + task:insert_result('FROM_NEQ_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain']) + end + return false + end, + group = 'headers', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_from_display_name, + name = 'SPOOF_DISPLAY_NAME', + description = 'Display name is being used to spoof and trick the recipient', + group = 'headers', + score = 8.0, +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_from_display_name, + name = 'FROM_NEQ_DISPLAY_NAME', + group = 'headers', + description = 'Display name contains an email address different to the From address', + score = 4.0, +} + +rspamd_config.SPOOF_REPLYTO = { + callback = function(task) + -- First check for a Reply-To header + local rt = task:get_header_full('Reply-To') + if not rt or not rt[1] then + return false + end + -- Get From and To headers + rt = rt[1]['value'] + local from = task:get_from(2) + local to = task:get_recipients(2) + if not (from and from[1] and from[1].addr) then + return false + end + if (to and to[1] and to[1].addr) then + -- Handle common case for Web Contact forms of From = To + if rspamd_util.strequal_caseless(from[1].addr, to[1].addr) then + return false + end + end + -- SMTP recipients must contain From domain + to = task:get_recipients(1) + if not to then + return false + end + -- Try mitigate some possible FPs on mailing list posts + if #to == 1 and rspamd_util.strequal_caseless(to[1].addr, from[1].addr) then + return false + end + local found_fromdom = false + for _, t in ipairs(to) do + if rspamd_util.strequal_caseless(t.domain, from[1].domain) then + found_fromdom = true + break + end + end + if not found_fromdom then + return false + end + -- Parse Reply-To header + local parsed = ((rspamd_parsers.parse_mail_address(rt, task:get_mempool()) or E)[1] or E).domain + if not parsed then + return false + end + -- Reply-To domain must be different to From domain + if not rspamd_util.strequal_caseless(parsed, from[1].domain) then + return true, from[1].domain, parsed + end + return false + end, + group = 'headers', + type = 'mime', + description = 'Reply-To is being used to spoof and trick the recipient to send an off-domain reply', + score = 6.0 +} + +rspamd_config.INFO_TO_INFO_LU = { + callback = function(task) + if not task:has_header('List-Unsubscribe') then + return false + end + local from = task:get_from('mime') + if not (from and from[1] and rspamd_util.strequal_caseless(from[1].user, 'info')) then + return false + end + local to = task:get_recipients('smtp') + if not to then + return false + end + local found = false + for _, r in ipairs(to) do + if rspamd_util.strequal_caseless(r['user'], 'info') then + found = true + end + end + if found then + return true + end + return false + end, + description = 'info@ From/To address with List-Unsubscribe headers', + group = 'headers', + score = 2.0, + type = 'mime', +} + +-- Detects bad content-transfer-encoding for text parts + +rspamd_config.R_BAD_CTE_7BIT = { + callback = function(task) + local tp = task:get_text_parts() or {} + + for _, p in ipairs(tp) do + local cte = p:get_mimepart():get_cte() or '' + if cte ~= '8bit' and p:has_8bit_raw() then + local _, _, attrs = p:get_mimepart():get_type_full() + local mul = 1.0 + local params = { cte } + if attrs then + if attrs.charset and attrs.charset:lower() == "utf-8" then + -- Penalise rule as people don't know that utf8 is surprisingly + -- eight bit encoding + mul = 0.3 + table.insert(params, "utf8") + end + end + + return true, mul, params + end + end + + return false + end, + score = 3.5, + description = 'Detects bad Content-Transfer-Encoding for text parts', + group = 'headers', + type = 'mime', +} + +local check_encrypted_name = rspamd_config:register_symbol { + name = 'BOGUS_ENCRYPTED_AND_TEXT', + callback = function(task) + local parts = task:get_parts() or {} + local seen_encrypted, seen_text + local opts = {} + + local function check_part(part) + if part:is_multipart() then + local children = part:get_children() or {} + local text_kids = {} + + for _, cld in ipairs(children) do + if cld:is_multipart() then + check_part(cld) + elseif cld:is_text() then + seen_text = true + text_kids[#text_kids + 1] = cld + else + local type, subtype, _ = cld:get_type_full() + + if type:lower() == 'application' then + if string.find(subtype:lower(), 'pkcs7%-mime') then + -- S/MIME encrypted part + seen_encrypted = true + table.insert(opts, 'smime part') + task:insert_result('ENCRYPTED_SMIME', 1.0) + elseif string.find(subtype:lower(), 'pkcs7%-signature') then + task:insert_result('SIGNED_SMIME', 1.0) + elseif string.find(subtype:lower(), 'pgp%-encrypted') then + -- PGP/GnuPG encrypted part + seen_encrypted = true + table.insert(opts, 'pgp part') + task:insert_result('ENCRYPTED_PGP', 1.0) + elseif string.find(subtype:lower(), 'pgp%-signature') then + task:insert_result('SIGNED_PGP', 1.0) + end + end + end + if seen_text and seen_encrypted then + -- Ensure that our seen text is not really part of pgp #3205 + for _, tp in ipairs(text_kids) do + local t, _ = tp:get_type() + seen_text = false -- reset temporary + if t and t == 'text' then + seen_text = true + break + end + end + end + end + end + end + + for _, part in ipairs(parts) do + check_part(part) + end + + if seen_text and seen_encrypted then + return true, 1.0, opts + end + + return false + end, + score = 10.0, + description = 'Bogus mix of encrypted and text/html payloads', + group = 'mime_types', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'ENCRYPTED_PGP', + description = 'Message is encrypted with PGP', + group = 'mime_types', + score = -0.5, + one_shot = true +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'ENCRYPTED_SMIME', + description = 'Message is encrypted with S/MIME', + group = 'mime_types', + score = -0.5, + one_shot = true +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'SIGNED_PGP', + description = 'Message is signed with PGP', + group = 'mime_types', + score = -2.0, + one_shot = true +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'SIGNED_SMIME', + description = 'Message is signed with S/MIME', + group = 'mime_types', + score = -2.0, + one_shot = true +} + +rspamd_config.COMPLETELY_EMPTY = { + callback = function(task) + return (task:get_size() == 0) + end, + flags = 'empty', + group = 'blankspam', + score = 15 +} |