diff options
Diffstat (limited to 'rules')
-rw-r--r-- | rules/bitcoin.lua | 237 | ||||
-rw-r--r-- | rules/bounce.lua | 117 | ||||
-rw-r--r-- | rules/content.lua | 118 | ||||
-rw-r--r-- | rules/controller/fuzzy.lua | 46 | ||||
-rw-r--r-- | rules/controller/init.lua | 67 | ||||
-rw-r--r-- | rules/controller/maps.lua | 220 | ||||
-rw-r--r-- | rules/controller/neural.lua | 70 | ||||
-rw-r--r-- | rules/controller/selectors.lua | 73 | ||||
-rw-r--r-- | rules/forwarding.lua | 163 | ||||
-rw-r--r-- | rules/headers_checks.lua | 1174 | ||||
-rw-r--r-- | rules/html.lua | 462 | ||||
-rw-r--r-- | rules/mid.lua | 131 | ||||
-rw-r--r-- | rules/misc.lua | 864 | ||||
-rw-r--r-- | rules/parts.lua | 11 | ||||
-rw-r--r-- | rules/regexp/compromised_hosts.lua | 223 | ||||
-rw-r--r-- | rules/regexp/headers.lua | 1046 | ||||
-rw-r--r-- | rules/regexp/misc.lua | 117 | ||||
-rw-r--r-- | rules/regexp/upstream_spam_filters.lua | 60 | ||||
-rw-r--r-- | rules/rspamd.lua | 71 | ||||
-rw-r--r-- | rules/subject_checks.lua | 70 |
20 files changed, 5340 insertions, 0 deletions
diff --git a/rules/bitcoin.lua b/rules/bitcoin.lua new file mode 100644 index 0000000..6a70721 --- /dev/null +++ b/rules/bitcoin.lua @@ -0,0 +1,237 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Bitcoin filter rules + +local fun = require "fun" +local bit = require "bit" +local lua_util = require "lua_util" +local rspamd_util = require "rspamd_util" +local N = "bitcoin" + +local off = 0 +local base58_dec = fun.tomap(fun.map( + function(c) + off = off + 1 + return c, (off - 1) + end, + "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz")) + +local function is_traditional_btc_address(word) + local hash = require "rspamd_cryptobox_hash" + + local bytes = {} + for i = 1, 25 do + bytes[i] = 0 + end + -- Base58 decode loop + fun.each(function(ch) + local acc = base58_dec[ch] or 0 + for i = 25, 1, -1 do + acc = acc + (58 * bytes[i]); + bytes[i] = acc % 256 + acc = math.floor(acc / 256); + end + end, word) + -- Now create a validation tag + local sha256 = hash.create_specific('sha256') + for i = 1, 21 do + sha256:update(string.char(bytes[i])) + end + sha256 = hash.create_specific('sha256', sha256:bin()):bin() + + -- Compare tags + local valid = true + for i = 1, 4 do + if string.sub(sha256, i, i) ~= string.char(bytes[21 + i]) then + valid = false + end + end + + return valid +end + +-- Beach32 checksum combiner +local function polymod(...) + local chk = 1; + local gen = { 0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3 }; + for _, t in ipairs({ ... }) do + for _, v in ipairs(t) do + local top = bit.rshift(chk, 25) + + chk = bit.bxor(bit.lshift(bit.band(chk, 0x1ffffff), 5), v) + for i = 1, 5 do + if bit.band(bit.rshift(top, i - 1), 0x1) ~= 0 then + chk = bit.bxor(chk, gen[i]) + end + end + end + end + + return chk +end + +-- Beach32 expansion function +local function hrpExpand(hrp) + local ret = {} + fun.each(function(byte) + ret[#ret + 1] = bit.rshift(byte, 5) + end, fun.map(string.byte, fun.iter(hrp))) + ret[#ret + 1] = 0 + fun.each(function(byte) + ret[#ret + 1] = bit.band(byte, 0x1f) + end, fun.map(string.byte, fun.iter(hrp))) + + return ret +end + +local function verify_beach32_cksum(hrp, elts) + return polymod(hrpExpand(hrp), elts) == 1 +end + +local function gen_bleach32_table(input) + local d = {} + local i = 1 + local res = true + local charset = 'qpzry9x8gf2tvdw0s3jn54khce6mua7l' + + fun.each(function(byte) + if res then + local pos = charset:find(byte, 1, true) + if not pos then + res = false + else + d[i] = pos - 1 + i = i + 1 + end + end + end, fun.iter(input)) + + return res and d or nil +end + +local function is_segwit_bech32_address(task, word) + local semicolon_pos = string.find(word, ':') + local address_part = word + if semicolon_pos then + address_part = string.sub(word, semicolon_pos + 1) + end + + local prefix = address_part:sub(1, 3) + + if prefix == 'bc1' or prefix:sub(1, 1) == '1' or prefix:sub(1, 1) == '3' then + -- Strip beach32 prefix in bitcoin + address_part = address_part:lower() + local last_one_pos = address_part:find('1[^1]*$') + if not last_one_pos or (last_one_pos < 1 or last_one_pos + 7 > #address_part) then + return false + end + local hrp = address_part:sub(1, last_one_pos - 1) + local addr = address_part:sub(last_one_pos + 1, -1) + local decoded = gen_bleach32_table(addr) + + if decoded then + return verify_beach32_cksum(hrp, decoded) + end + else + -- Bitcoin cash address + -- https://www.bitcoincash.org/spec/cashaddr.html + local decoded = gen_bleach32_table(address_part) + lua_util.debugm(N, task, 'check %s, %s decoded', word, decoded) + + if decoded and #decoded > 8 then + if semicolon_pos then + prefix = word:sub(1, semicolon_pos - 1) + else + prefix = 'bitcoincash' + end + + local polymod_tbl = {} + fun.each(function(byte) + local b = bit.band(string.byte(byte), 0x1f) + table.insert(polymod_tbl, b) + end, fun.iter(prefix)) + + -- For semicolon + table.insert(polymod_tbl, 0) + + fun.each(function(byte) + table.insert(polymod_tbl, byte) + end, decoded) + lua_util.debugm(N, task, 'final polymod table: %s', polymod_tbl) + + return rspamd_util.btc_polymod(polymod_tbl) + end + end +end + +local normal_wallet_re = [[/\b[13LM][1-9A-Za-z]{25,34}\b/AL{sa_body}]] +local btc_bleach_re = [[/\b(?:(?:[a-zA-Z]\w+:)|(?:bc1))?[qpzry9x8gf2tvdw0s3jn54khce6mua7l]{14,}\b/AL{sa_body}]] + +config.regexp['BITCOIN_ADDR'] = { + description = 'Message has a valid bitcoin wallet address', + -- Use + operator to ensure that each expression is always evaluated + re = string.format('(%s) + (%s) > 0', normal_wallet_re, btc_bleach_re), + re_conditions = { + [normal_wallet_re] = function(task, txt, s, e) + local len = e - s + if len <= 2 or len > 1024 then + return false + end + + local word = lua_util.str_trim(txt:sub(s + 1, e)) + local valid = is_traditional_btc_address(word) + + if valid then + -- To save option + task:insert_result('BITCOIN_ADDR', 1.0, word) + lua_util.debugm(N, task, 'found valid traditional bitcoin addr in the word: %s', + word) + return true + else + lua_util.debugm(N, task, 'found invalid bitcoin addr in the word: %s', + word) + + return false + end + end, + [btc_bleach_re] = function(task, txt, s, e) + local len = e - s + if len <= 2 or len > 1024 then + return false + end + + local word = tostring(lua_util.str_trim(txt:sub(s + 1, e))) + local valid = is_segwit_bech32_address(task, word) + + if valid then + -- To save option + task:insert_result('BITCOIN_ADDR', 1.0, word) + lua_util.debugm(N, task, 'found valid bleach bitcoin addr in the word: %s', + word) + return true + else + lua_util.debugm(N, task, 'found invalid bitcoin addr in the word: %s', + word) + + return false + end + end, + }, + score = 0.0, + one_shot = true, + group = 'scams', +} diff --git a/rules/bounce.lua b/rules/bounce.lua new file mode 100644 index 0000000..fb74b97 --- /dev/null +++ b/rules/bounce.lua @@ -0,0 +1,117 @@ +--[[ +Copyright (c) 2020, Anton Yuzhaninov <citrin@citrin.ru> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Rule to detect bounces: +-- RFC 3464 Delivery status notifications and most common non-standard ones + +local function make_subj_bounce_keywords_re() + -- Words and phrases commonly used in Subjects for bounces + -- We cannot practically test all localized Subjects, but luckily English is by far the most common here + local keywords = { + 'could not send message', + "couldn't be delivered", + 'delivery failed', + 'delivery failure', + 'delivery report', + 'delivery status', + 'delivery warning', + 'failure delivery', + 'failure notice', + "hasn't been delivered", + 'mail failure', + 'returned mail', + 'undeliverable', + 'undelivered', + } + return string.format([[Subject=/\b(%s)\b/i{header}]], table.concat(keywords, '|')) +end + +config.regexp.SUBJ_BOUNCE_WORDS = { + re = make_subj_bounce_keywords_re(), + group = 'headers', + score = 0.0, + description = 'Words/phrases typical for DSN' +} + +rspamd_config.BOUNCE = { + callback = function(task) + local from = task:get_from('smtp') + if from and from[1].addr ~= '' then + -- RFC 3464: + -- Whenever an SMTP transaction is used to send a DSN, the MAIL FROM + -- command MUST use a NULL return address, i.e., "MAIL FROM:<>" + -- In practise it is almost always the case for DSN + return false + end + + local parts = task:get_parts() + local top_type, top_subtype, params = parts[1]:get_type_full() + -- RFC 3464, RFC 8098 + if top_type == 'multipart' and top_subtype == 'report' and params and + (params['report-type'] == 'delivery-status' or params['report-type'] == 'disposition-notification') then + -- Assume that inner parts are OK, don't check them to save time + return true, 1.0, 'DSN' + end + + -- Apply heuristics for non-standard bounces + local bounce_sender + local mime_from = task:get_from('mime') + if mime_from then + local from_user = mime_from[1].user:lower() + -- Check common bounce senders + if (from_user == 'postmaster' or from_user == 'mailer-daemon') then + bounce_sender = from_user + -- MDaemon >= 14.5 sends multipart/report (RFC 3464) DSN covered above, + -- but older versions send non-standard bounces with localized subjects and they + -- are still around + elseif from_user == 'mdaemon' and task:has_header('X-MDDSN-Message') then + return true, 1.0, 'MDaemon' + end + end + + local subj_keywords = task:has_symbol('SUBJ_BOUNCE_WORDS') + + if not (bounce_sender or subj_keywords) then + return false + end + + if bounce_sender and subj_keywords then + return true, 0.5, bounce_sender .. '+subj' + end + + -- Look for a message/rfc822(-headers) part inside + local rfc822_part + parts[10] = nil -- limit number of parts to check + for _, p in ipairs(parts) do + local mime_type, mime_subtype = p:get_type() + if (mime_subtype == 'rfc822' or mime_subtype == 'rfc822-headers') and + (mime_type == 'message' or mime_type == 'text') then + rfc822_part = mime_type .. '/' .. mime_subtype + break + end + end + + if rfc822_part and bounce_sender then + return true, 0.5, bounce_sender .. '+' .. rfc822_part + elseif rfc822_part and subj_keywords then + return true, 0.2, rfc822_part .. '+subj' + end + end, + description = '(Non) Delivery Status Notification', + group = 'headers', +} + +rspamd_config:register_dependency('BOUNCE', 'SUBJ_BOUNCE_WORDS') diff --git a/rules/content.lua b/rules/content.lua new file mode 100644 index 0000000..667b7ec --- /dev/null +++ b/rules/content.lua @@ -0,0 +1,118 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local function process_pdf_specific(task, part, specific) + local suspicious_factor = 0 + if specific.encrypted then + task:insert_result('PDF_ENCRYPTED', 1.0, part:get_filename() or 'unknown') + suspicious_factor = suspicious_factor + 0.1 + if specific.openaction then + suspicious_factor = suspicious_factor + 0.5 + end + end + + if specific.scripts then + task:insert_result('PDF_JAVASCRIPT', 1.0, part:get_filename() or 'unknown') + suspicious_factor = suspicious_factor + 0.1 + end + + if specific.suspicious then + suspicious_factor = suspicious_factor + specific.suspicious + end + + if suspicious_factor > 0.5 then + if suspicious_factor > 1.0 then + suspicious_factor = 1.0 + end + task:insert_result('PDF_SUSPICIOUS', suspicious_factor, part:get_filename() or 'unknown') + end + + if specific.long_trailer then + task:insert_result('PDF_LONG_TRAILER', 1.0, string.format('%s:%d', + part:get_filename() or 'unknown', specific.long_trailer)) + end + if specific.many_objects then + task:insert_result('PDF_MANY_OBJECTS', 1.0, string.format('%s:%d', + part:get_filename() or 'unknown', specific.many_objects)) + end + if specific.timeout_processing then + task:insert_result('PDF_TIMEOUT', 1.0, string.format('%s:%.3f', + part:get_filename() or 'unknown', specific.timeout_processing)) + end +end + +local tags_processors = { + pdf = process_pdf_specific +} + +local function process_specific_cb(task) + local parts = task:get_parts() or {} + + for _, p in ipairs(parts) do + if p:is_specific() then + local data = p:get_specific() + + if data and type(data) == 'table' and data.tag then + if tags_processors[data.tag] then + tags_processors[data.tag](task, p, data) + end + end + end + end +end + +local id = rspamd_config:register_symbol { + type = 'callback', + name = 'SPECIFIC_CONTENT_CHECK', + callback = process_specific_cb +} + +rspamd_config:register_symbol { + type = 'virtual', + name = 'PDF_ENCRYPTED', + parent = id, + groups = { "content", "pdf" }, +} +rspamd_config:register_symbol { + type = 'virtual', + name = 'PDF_JAVASCRIPT', + parent = id, + groups = { "content", "pdf" }, +} +rspamd_config:register_symbol { + type = 'virtual', + name = 'PDF_SUSPICIOUS', + parent = id, + groups = { "content", "pdf" }, +} +rspamd_config:register_symbol { + type = 'virtual', + name = 'PDF_LONG_TRAILER', + parent = id, + groups = { "content", "pdf" }, +} +rspamd_config:register_symbol { + type = 'virtual', + name = 'PDF_MANY_OBJECTS', + parent = id, + groups = { "content", "pdf" }, +} +rspamd_config:register_symbol { + type = 'virtual', + name = 'PDF_TIMEOUT', + parent = id, + groups = { "content", "pdf" }, +} diff --git a/rules/controller/fuzzy.lua b/rules/controller/fuzzy.lua new file mode 100644 index 0000000..193e6fd --- /dev/null +++ b/rules/controller/fuzzy.lua @@ -0,0 +1,46 @@ +--[[ +Copyright (c) 2023, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local function handle_gen_fuzzy(task, conn, req_params) + if type(rspamd_plugins.fuzzy_check) == 'table' then + local ret, hashes + task:process_message() + if req_params.rule then + ret, hashes = pcall(rspamd_plugins.fuzzy_check.hex_hashes, task, req_params.rule) + elseif req_params.flag then + ret, hashes = pcall(rspamd_plugins.fuzzy_check.hex_hashes, task, tonumber(req_params.flag)) + else + conn:send_error(404, 'missing rule or flag') + return + end + + if ret then + conn:send_ucl({ success = true, hashes = hashes }) + else + conn:send_error(500, 'cannot generate hashes') + end + else + conn:send_error(404, 'fuzzy_check is not enabled') + end +end + +return { + hashes = { + handler = handle_gen_fuzzy, + need_task = true, + enable = false + }, +}
\ No newline at end of file diff --git a/rules/controller/init.lua b/rules/controller/init.lua new file mode 100644 index 0000000..17fbbfc --- /dev/null +++ b/rules/controller/init.lua @@ -0,0 +1,67 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Controller endpoints + +local local_conf = rspamd_paths['LOCAL_CONFDIR'] +local local_rules = rspamd_paths['RULESDIR'] +local rspamd_util = require "rspamd_util" +local lua_util = require "lua_util" +local rspamd_logger = require "rspamd_logger" + +-- Define default controller paths, could be overridden in local.d/controller.lua + +local controller_plugin_paths = { + maps = dofile(local_rules .. "/controller/maps.lua"), + neural = dofile(local_rules .. "/controller/neural.lua"), + selectors = dofile(local_rules .. "/controller/selectors.lua"), + fuzzy = dofile(local_rules .. "/controller/fuzzy.lua"), +} + +if rspamd_util.file_exists(local_conf .. '/controller.lua') then + local controller_overrides = dofile(local_conf .. '/controller.lua') + + if controller_overrides and type(controller_overrides) == 'table' then + controller_plugin_paths = lua_util.override_defaults(controller_plugin_paths, controller_overrides) + end +end + +for plug, paths in pairs(controller_plugin_paths) do + if not rspamd_plugins[plug] then + rspamd_plugins[plug] = {} + end + if not rspamd_plugins[plug].webui then + rspamd_plugins[plug].webui = {} + end + + local webui = rspamd_plugins[plug].webui + + for path, attrs in pairs(paths) do + if type(attrs) == 'table' then + if type(attrs.handler) ~= 'function' then + rspamd_logger.infox(rspamd_config, 'controller plugin %s; webui path %s has invalid handler: %s; ignore it', + plug, path, type(attrs.handler)) + else + webui[path] = lua_util.shallowcopy(attrs) + rspamd_logger.infox(rspamd_config, 'controller plugin %s; register webui path %s', + plug, path) + end + else + rspamd_logger.infox(rspamd_config, 'controller plugin %s; webui path %s has invalid type: %s; ignore it', + plug, path, type(attrs)) + end + end +end diff --git a/rules/controller/maps.lua b/rules/controller/maps.lua new file mode 100644 index 0000000..718e292 --- /dev/null +++ b/rules/controller/maps.lua @@ -0,0 +1,220 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Controller maps plugin +local maps_cache +local maps_aliases +local lua_util = require "lua_util" +local ts = require("tableshape").types +local ucl = require "ucl" + +local function maybe_fill_maps_cache() + if not maps_cache then + maps_cache = {} + maps_aliases = {} + local maps = rspamd_config:get_maps() + for _, m in ipairs(maps) do + -- We get the first url here and that's it + local url = m:get_uri() + if url ~= 'static' then + if not maps_cache[url] then + local alias = url:match('/([^/]+)$') + maps_cache[url] = m + if not maps_aliases[alias] then + maps_aliases[alias] = url + end + else + -- Do not override, as we don't care about duplicate maps that come from different + -- sources. + -- In theory, that should be cached but there are some exceptions even so far... + url = math.random() -- to shut luacheck about empty branch with a comment + end + end + end + end +end + +local function check_specific_map(input, uri, m, results, report_misses) + local value = m:get_key(input) + + if value then + local result = { + map = uri, + alias = uri:match('/([^/]+)$'), + value = value, + key = input, + hit = true, + } + table.insert(results, result) + elseif report_misses then + local result = { + map = uri, + alias = uri:match('/([^/]+)$'), + key = input, + hit = false, + } + table.insert(results, result) + end +end + +local function handle_query_map(_, conn, req_params) + maybe_fill_maps_cache() + local keys_to_check = {} + + if req_params.value and req_params.value ~= '' then + keys_to_check[1] = req_params.value + elseif req_params.values then + keys_to_check = lua_util.str_split(req_params.values, ',') + end + + local results = {} + for _, key in ipairs(keys_to_check) do + for uri, m in pairs(maps_cache) do + check_specific_map(key, uri, m, results, req_params.report_misses) + end + end + conn:send_ucl { + success = (#results > 0), + results = results + } +end + +local function handle_query_specific_map(_, conn, req_params) + maybe_fill_maps_cache() + -- Fill keys to check + local keys_to_check = {} + if req_params.value and req_params.value ~= '' then + keys_to_check[1] = req_params.value + elseif req_params.values then + keys_to_check = lua_util.str_split(req_params.values, ',') + end + local maps_to_check = maps_cache + -- Fill maps to check + if req_params.maps then + local map_names = lua_util.str_split(req_params.maps, ',') + maps_to_check = {} + for _, mn in ipairs(map_names) do + if maps_cache[mn] then + maps_to_check[mn] = maps_cache[mn] + else + local alias = maps_aliases[mn] + + if alias then + maps_to_check[alias] = maps_cache[alias] + else + conn:send_error(404, 'no such map: ' .. mn) + end + end + end + end + + local results = {} + for _, key in ipairs(keys_to_check) do + for uri, m in pairs(maps_to_check) do + check_specific_map(key, uri, m, results, req_params.report_misses) + end + end + + conn:send_ucl { + success = (#results > 0), + results = results + } +end + +local function handle_list_maps(_, conn, _) + maybe_fill_maps_cache() + conn:send_ucl { + maps = lua_util.keys(maps_cache), + aliases = maps_aliases + } +end + +local query_json_schema = ts.shape { + maps = ts.array_of(ts.string):is_optional(), + report_misses = ts.boolean:is_optional(), + values = ts.array_of(ts.string), +} + +local function handle_query_json(task, conn) + maybe_fill_maps_cache() + + local parser = ucl.parser() + local ok, err = parser:parse_text(task:get_rawbody()) + if not ok then + conn:send_error(400, err) + return + end + local obj = parser:get_object() + + ok, err = query_json_schema:transform(obj) + if not ok then + conn:send_error(400, err) + return + end + + local maps_to_check = {} + local report_misses = obj.report_misses + local results = {} + + if obj.maps then + for _, mn in ipairs(obj.maps) do + if maps_cache[mn] then + maps_to_check[mn] = maps_cache[mn] + else + local alias = maps_aliases[mn] + + if alias then + maps_to_check[alias] = maps_cache[alias] + else + conn:send_error(400, 'no such map: ' .. mn) + return + end + end + end + else + maps_to_check = maps_cache + end + + for _, key in ipairs(obj.values) do + for uri, m in pairs(maps_to_check) do + check_specific_map(key, uri, m, results, report_misses) + end + end + conn:send_ucl { + success = (#results > 0), + results = results + } +end + +return { + query = { + handler = handle_query_map, + enable = false, + }, + query_json = { + handler = handle_query_json, + enable = false, + need_task = true, + }, + query_specific = { + handler = handle_query_specific_map, + enable = false, + }, + list = { + handler = handle_list_maps, + enable = false, + }, +} diff --git a/rules/controller/neural.lua b/rules/controller/neural.lua new file mode 100644 index 0000000..aef1042 --- /dev/null +++ b/rules/controller/neural.lua @@ -0,0 +1,70 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local neural_common = require "plugins/neural" +local ts = require("tableshape").types +local ucl = require "ucl" + +local E = {} + +-- Controller neural plugin + +local learn_request_schema = ts.shape { + ham_vec = ts.array_of(ts.array_of(ts.number)), + rule = ts.string:is_optional(), + spam_vec = ts.array_of(ts.array_of(ts.number)), +} + +local function handle_learn(task, conn) + local parser = ucl.parser() + local ok, err = parser:parse_text(task:get_rawbody()) + if not ok then + conn:send_error(400, err) + return + end + local req_params = parser:get_object() + + ok, err = learn_request_schema:transform(req_params) + if not ok then + conn:send_error(400, err) + return + end + + local rule_name = req_params.rule or 'default' + local rule = neural_common.settings.rules[rule_name] + local set = neural_common.get_rule_settings(task, rule) + local version = ((set.ann or E).version or 0) + 1 + + neural_common.spawn_train { + ev_base = task:get_ev_base(), + ann_key = neural_common.new_ann_key(rule, set, version), + set = set, + rule = rule, + ham_vec = req_params.ham_vec, + spam_vec = req_params.spam_vec, + worker = task:get_worker(), + } + + conn:send_string('{"success" : true}') +end + +return { + learn = { + handler = handle_learn, + enable = true, + need_task = true, + }, +} diff --git a/rules/controller/selectors.lua b/rules/controller/selectors.lua new file mode 100644 index 0000000..7fc2894 --- /dev/null +++ b/rules/controller/selectors.lua @@ -0,0 +1,73 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local lua_selectors = require "lua_selectors" + +-- Controller selectors plugin + +local function handle_list_transforms(_, conn) + conn:send_ucl(lua_selectors.list_transforms()) +end + +local function handle_list_extractors(_, conn) + conn:send_ucl(lua_selectors.list_extractors()) +end + +local function handle_check_selector(_, conn, req_params) + if req_params.selector and req_params.selector ~= '' then + local selector = lua_selectors.create_selector_closure(rspamd_config, + req_params.selector, '', true) + conn:send_ucl({ success = selector and true }) + else + conn:send_error(404, 'missing selector') + end +end + +local function handle_check_message(task, conn, req_params) + if req_params.selector and req_params.selector ~= '' then + local selector = lua_selectors.create_selector_closure(rspamd_config, + req_params.selector, '', true) + if not selector then + conn:send_error(500, 'invalid selector') + else + task:process_message() + local elts = selector(task) + conn:send_ucl({ success = true, data = elts }) + end + else + conn:send_error(404, 'missing selector') + end +end + +return { + list_extractors = { + handler = handle_list_extractors, + enable = true, + }, + list_transforms = { + handler = handle_list_transforms, + enable = true, + }, + check_selector = { + handler = handle_check_selector, + enable = true, + }, + check_message = { + handler = handle_check_message, + enable = true, + need_task = true, + } +} diff --git a/rules/forwarding.lua b/rules/forwarding.lua new file mode 100644 index 0000000..a008c58 --- /dev/null +++ b/rules/forwarding.lua @@ -0,0 +1,163 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Rules to detect forwarding + +local rspamd_util = require "rspamd_util" + +rspamd_config.FWD_GOOGLE = { + callback = function(task) + if not (task:has_from(1) and task:has_recipients(1)) then + return false + end + local envfrom = task:get_from { 'smtp', 'orig' } + local envrcpts = task:get_recipients(1) + -- Forwarding will only be to a single recipient + if #envrcpts > 1 then + return false + end + -- Get recipient and compute VERP address + local rcpt = envrcpts[1].addr:lower() + local verp = rcpt:gsub('@', '=') + -- Get the user portion of the envfrom + local ef_user = envfrom[1].user:lower() + -- Check for a match + if ef_user:find('+caf_=' .. verp, 1, true) then + local _, _, user = ef_user:find('^(.+)+caf_=') + if user then + user = user .. '@' .. envfrom[1].domain + return true, user + end + end + return false + end, + score = 0.0, + description = "Message was forwarded by Google", + group = "forwarding" +} + +rspamd_config.FWD_YANDEX = { + callback = function(task) + if not (task:has_from(1) and task:has_recipients(1)) then + return false + end + local hostname = task:get_hostname() + if hostname and hostname:lower():find('%.yandex%.[a-z]+$') then + return task:has_header('X-Yandex-Forward') + end + return false + end, + score = 0.0, + description = "Message was forwarded by Yandex", + group = "forwarding" +} + +rspamd_config.FWD_MAILRU = { + callback = function(task) + if not (task:has_from(1) and task:has_recipients(1)) then + return false + end + local hostname = task:get_hostname() + if hostname and hostname:lower():find('%.mail%.ru$') then + return task:has_header('X-MailRu-Forward') + end + return false + end, + score = 0.0, + description = "Message was forwarded by Mail.ru", + group = "forwarding" +} + +rspamd_config.FWD_SRS = { + callback = function(task) + if not (task:has_from(1) and task:has_recipients(1)) then + return false + end + local envfrom = task:get_from(1) + local envrcpts = task:get_recipients(1) + -- Forwarding is only to a single recipient + if #envrcpts > 1 then + return false + end + -- Get recipient and compute rewritten SRS address + local srs = '=' .. envrcpts[1].domain:lower() .. + '=' .. envrcpts[1].user:lower() + if envfrom[1].user:lower():find('^srs[01]=') and + envfrom[1].user:lower():find(srs, 1, false) + then + return true + end + return false + end, + score = 0.0, + description = "Message was forwarded using Sender Rewriting Scheme (SRS)", + group = "forwarding" +} + +rspamd_config.FORWARDED = { + callback = function(task) + local function normalize_addr(addr) + addr = string.match(addr, '^<?([^>]*)>?$') or addr + local cap, _, domain = string.match(addr, '^([^%+][^%+]*)(%+[^@]*)@(.*)$') + if cap then + addr = string.format('%s@%s', cap, domain) + end + + return addr + end + + if not task:has_recipients(1) or not task:has_recipients(2) then + return false + end + local envrcpts = task:get_recipients(1) + -- Forwarding will only be for single recipient messages + if #envrcpts > 1 then + return false + end + -- Get any other headers we might need + local has_list_unsub = task:has_header('List-Unsubscribe') + local to = task:get_recipients(2) + local matches = 0 + -- Retrieve and loop through all Received headers + local rcvds = task:get_received_headers() + + if rcvds then + for _, rcvd in ipairs(rcvds) do + local addr = rcvd['for'] + if addr then + addr = normalize_addr(addr) + matches = matches + 1 + -- Check that it doesn't match the envrcpt + if not rspamd_util.strequal_caseless(addr, envrcpts[1].addr) then + -- Check for mailing-lists as they will have the same signature + if matches < 2 and has_list_unsub and to and rspamd_util.strequal_caseless(to[1].addr, addr) then + return false + else + return true, 1.0, addr + end + end + -- Prevent any other iterations as we only want + -- process the first matching Received header + return false + end + end + end + return false + end, + score = 0.0, + description = "Message was forwarded", + group = "forwarding" +} diff --git a/rules/headers_checks.lua b/rules/headers_checks.lua new file mode 100644 index 0000000..92ebb0c --- /dev/null +++ b/rules/headers_checks.lua @@ -0,0 +1,1174 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local util = require "rspamd_util" +local ipairs = ipairs +local pairs = pairs +local table = table +local tostring = tostring +local tonumber = tonumber +local fun = require "fun" +local E = {} + +local rcvd_cb_id = rspamd_config:register_symbol { + name = 'CHECK_RECEIVED', + type = 'callback', + score = 0.0, + group = 'headers', + callback = function(task) + local cnts = { + [1] = 'ONE', + [2] = 'TWO', + [3] = 'THREE', + [5] = 'FIVE', + [7] = 'SEVEN', + [12] = 'TWELVE' + } + local def = 'ZERO' + local received = task:get_received_headers() + local nreceived = fun.reduce(function(acc, rcvd) + return acc + 1 + end, 0, fun.filter(function(h) + return not h['flags']['artificial'] + end, received)) + + for k, v in pairs(cnts) do + if nreceived >= tonumber(k) then + def = v + end + end + + task:insert_result('RCVD_COUNT_' .. def, 1.0, tostring(nreceived)) + end +} + +rspamd_config:register_symbol { + name = 'RCVD_COUNT_ZERO', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has no Received headers', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCVD_COUNT_ONE', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has one Received header', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCVD_COUNT_TWO', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has two Received headers', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCVD_COUNT_THREE', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has 3-5 Received headers', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCVD_COUNT_FIVE', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has 5-7 Received headers', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCVD_COUNT_SEVEN', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has 7-11 Received headers', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCVD_COUNT_TWELVE', + score = 0.0, + parent = rcvd_cb_id, + type = 'virtual', + description = 'Message has 12 or more Received headers', + group = 'headers', +} + +local prio_cb_id = rspamd_config:register_symbol { + name = 'HAS_X_PRIO', + type = 'callback', + description = 'X-Priority check callback rule', + score = 0.0, + group = 'headers', + callback = function(task) + local cnts = { + [1] = 'ONE', + [2] = 'TWO', + [3] = 'THREE', + [5] = 'FIVE', + } + local def = 'ZERO' + local xprio = task:get_header('X-Priority'); + if not xprio then + return false + end + local _, _, x = xprio:find('^%s?(%d+)'); + if (x) then + x = tonumber(x) + for k, v in pairs(cnts) do + if x >= tonumber(k) then + def = v + end + end + task:insert_result('HAS_X_PRIO_' .. def, 1.0, tostring(x)) + end + end +} +rspamd_config:register_symbol { + name = 'HAS_X_PRIO_ZERO', + score = 0.0, + parent = prio_cb_id, + type = 'virtual', + description = 'Message has X-Priority header set to 0', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'HAS_X_PRIO_ONE', + score = 0.0, + parent = prio_cb_id, + type = 'virtual', + description = 'Message has X-Priority header set to 1', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'HAS_X_PRIO_TWO', + score = 0.0, + parent = prio_cb_id, + type = 'virtual', + description = 'Message has X-Priority header set to 2', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'HAS_X_PRIO_THREE', + score = 0.0, + parent = prio_cb_id, + type = 'virtual', + description = 'Message has X-Priority header set to 3 or 4', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'HAS_X_PRIO_FIVE', + score = 0.0, + parent = prio_cb_id, + type = 'virtual', + description = 'Message has X-Priority header set to 5 or higher', + group = 'headers', +} + +local function get_raw_header(task, name) + return ((task:get_header_full(name) or {})[1] or {})['value'] +end + +local check_replyto_id = rspamd_config:register_symbol({ + type = 'callback', + name = 'CHECK_REPLYTO', + score = 0.0, + group = 'headers', + callback = function(task) + local replyto = get_raw_header(task, 'Reply-To') + if not replyto then + return false + end + local rt = util.parse_mail_address(replyto, task:get_mempool()) + if not (rt and rt[1] and (string.len(rt[1].addr) > 0)) then + task:insert_result('REPLYTO_UNPARSEABLE', 1.0) + return false + else + local rta = rt[1].addr + task:insert_result('HAS_REPLYTO', 1.0, rta) + -- Check if Reply-To address starts with title seen in display name + local sym = task:get_symbol('FROM_NAME_HAS_TITLE') + local title = (((sym or E)[1] or E).options or E)[1] + if title then + rta = rta:lower() + if rta:find('^' .. title) then + task:insert_result('REPLYTO_EMAIL_HAS_TITLE', 1.0) + end + end + end + + -- See if Reply-To matches From in some way + local from = task:get_from { 'mime', 'orig' } + local from_h = get_raw_header(task, 'From') + if not (from and from[1]) then + return false + end + if (from_h and from_h == replyto) then + -- From and Reply-To are identical + task:insert_result('REPLYTO_EQ_FROM', 1.0) + else + if (from and from[1]) then + -- See if From and Reply-To addresses match + if (util.strequal_caseless(from[1].addr, rt[1].addr)) then + task:insert_result('REPLYTO_ADDR_EQ_FROM', 1.0) + elseif from[1].domain and rt[1].domain then + if (util.strequal_caseless(from[1].domain, rt[1].domain)) then + task:insert_result('REPLYTO_DOM_EQ_FROM_DOM', 1.0) + else + -- See if Reply-To matches the To address + local to = task:get_recipients(2) + if (to and to[1] and to[1].addr:lower() == rt[1].addr:lower()) then + -- Ignore this for mailing-lists and automatic submissions + if (not (task:get_header('List-Unsubscribe') or + task:get_header('X-To-Get-Off-This-List') or + task:get_header('X-List') or + task:get_header('Auto-Submitted'))) + then + task:insert_result('REPLYTO_EQ_TO_ADDR', 1.0) + end + else + task:insert_result('REPLYTO_DOM_NEQ_FROM_DOM', 1.0) + end + end + end + -- See if the Display Names match + if (from[1].name and rt[1].name and + util.strequal_caseless(from[1].name, rt[1].name)) then + task:insert_result('REPLYTO_DN_EQ_FROM_DN', 1.0) + end + end + end + end +}) + +rspamd_config:register_symbol { + name = 'REPLYTO_UNPARSEABLE', + score = 1.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To header could not be parsed', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'HAS_REPLYTO', + score = 0.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Has Reply-To header', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_EQ_FROM', + score = 0.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To header is identical to From header', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_ADDR_EQ_FROM', + score = 0.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To header is identical to SMTP From', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_DOM_EQ_FROM_DOM', + score = 0.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To domain matches the From domain', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_DOM_NEQ_FROM_DOM', + score = 0.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To domain does not match the From domain', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_DN_EQ_FROM_DN', + score = 0.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To display name matches From', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_EMAIL_HAS_TITLE', + score = 2.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To header has title', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'REPLYTO_EQ_TO_ADDR', + score = 5.0, + parent = check_replyto_id, + type = 'virtual', + description = 'Reply-To is the same as the To address', + group = 'headers', +} + +rspamd_config:register_dependency('CHECK_REPLYTO', 'CHECK_FROM') + +local check_mime_id = rspamd_config:register_symbol { + name = 'CHECK_MIME', + type = 'callback', + group = 'headers', + score = 0.0, + callback = function(task) + -- Check if there is a MIME-Version header + local missing_mime = false + if not task:has_header('MIME-Version') then + missing_mime = true + end + + -- Check presence of MIME specific headers + local has_ct_header = task:has_header('Content-Type') + local has_cte_header = task:has_header('Content-Transfer-Encoding') + + -- Add the symbol if we have MIME headers, but no MIME-Version + -- (do not add the symbol for RFC822 messages) + if (has_ct_header or has_cte_header) and missing_mime then + task:insert_result('MISSING_MIME_VERSION', 1.0) + end + + local found_ma = false + local found_plain = false + local found_html = false + + for _, p in ipairs(task:get_parts()) do + local mtype, subtype = p:get_type() + local ctype = mtype:lower() .. '/' .. subtype:lower() + if (ctype == 'multipart/alternative') then + found_ma = true + end + if (ctype == 'text/plain') then + found_plain = true + end + if (ctype == 'text/html') then + found_html = true + end + end + + if (found_ma) then + if (not found_plain) then + task:insert_result('MIME_MA_MISSING_TEXT', 1.0) + end + if (not found_html) then + task:insert_result('MIME_MA_MISSING_HTML', 1.0) + end + end + end +} + +rspamd_config:register_symbol { + name = 'MISSING_MIME_VERSION', + score = 2.0, + parent = check_mime_id, + type = 'virtual', + description = 'MIME-Version header is missing in MIME message', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'MIME_MA_MISSING_TEXT', + score = 2.0, + parent = check_mime_id, + type = 'virtual', + description = 'MIME multipart/alternative missing text/plain part', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'MIME_MA_MISSING_HTML', + score = 1.0, + parent = check_mime_id, + type = 'virtual', + description = 'MIME multipart/alternative missing text/html part', + group = 'headers', +} + +-- Used to be called IS_LIST +rspamd_config.PREVIOUSLY_DELIVERED = { + callback = function(task) + if not task:has_recipients(2) then + return false + end + local to = task:get_recipients(2) + local rcvds = task:get_header_full('Received') + if not rcvds then + return false + end + for _, rcvd in ipairs(rcvds) do + local _, _, addr = rcvd['decoded']:lower():find("%sfor%s<(.-)>") + if addr then + for _, toa in ipairs(to) do + if toa and toa.addr:lower() == addr then + return true, addr + end + end + return false + end + end + end, + description = 'Message either to a list or was forwarded', + group = 'headers', + score = 0.0 +} +rspamd_config.BROKEN_HEADERS = { + callback = function(task) + return task:has_flag('broken_headers') + end, + score = 10.0, + group = 'headers', + description = 'Headers structure is likely broken' +} + +rspamd_config.BROKEN_CONTENT_TYPE = { + callback = function(task) + return fun.any(function(p) + return p:is_broken() + end, + task:get_parts()) + end, + score = 1.5, + group = 'headers', + description = 'Message has part with broken content type' +} + +rspamd_config.HEADER_RCONFIRM_MISMATCH = { + callback = function(task) + local header_from = nil + local cread = task:get_header('X-Confirm-Reading-To') + + if task:has_from('mime') then + header_from = task:get_from('mime')[1] + end + + local header_cread = nil + if cread then + local headers_cread = util.parse_mail_address(cread, task:get_mempool()) + if headers_cread then + header_cread = headers_cread[1] + end + end + + if header_from and header_cread then + if not string.find(header_from['addr'], header_cread['addr']) then + return true + end + end + + return false + end, + + score = 2.0, + group = 'headers', + description = 'Read confirmation address is different to from address' +} + +rspamd_config.HEADER_FORGED_MDN = { + callback = function(task) + local mdn = task:get_header('Disposition-Notification-To') + if not mdn then + return false + end + local header_rp = nil + + if task:has_from('smtp') then + header_rp = task:get_from('smtp')[1] + end + + -- Parse mail addr + local headers_mdn = util.parse_mail_address(mdn, task:get_mempool()) + + if headers_mdn and not header_rp then + return true + end + if header_rp and not headers_mdn then + return false + end + if not headers_mdn and not header_rp then + return false + end + + local found_match = false + for _, h in ipairs(headers_mdn) do + if util.strequal_caseless(h['addr'], header_rp['addr']) then + found_match = true + break + end + end + + return (not found_match) + end, + + score = 2.0, + group = 'headers', + description = 'Read confirmation address is different to return path' +} + +local headers_unique = { + ['Content-Type'] = 1.0, + ['Content-Transfer-Encoding'] = 1.0, + -- https://tools.ietf.org/html/rfc5322#section-3.6 + ['Date'] = 0.1, + ['From'] = 1.0, + ['Sender'] = 1.0, + ['Reply-To'] = 1.0, + ['To'] = 0.2, + ['Cc'] = 0.1, + ['Bcc'] = 0.1, + ['Message-ID'] = 0.7, + ['In-Reply-To'] = 0.7, + ['References'] = 0.3, + ['Subject'] = 0.7 +} + +local multiple_unique_headers_id = rspamd_config:register_symbol { + name = 'MULTIPLE_UNIQUE_HEADERS', + callback = function(task) + local res = 0 + local max_mult = 0.0 + local res_tbl = {} + local found = 0 + + for hdr, mult in pairs(headers_unique) do + local hc = task:get_header_count(hdr) + found = found + hc + + if hc > 1 then + res = res + 1 + table.insert(res_tbl, hdr) + if max_mult < mult then + max_mult = mult + end + end + end + + if res > 0 then + task:insert_result('MULTIPLE_UNIQUE_HEADERS', max_mult, table.concat(res_tbl, ',')) + elseif found == 0 then + task:insert_result('MISSING_ESSENTIAL_HEADERS', 1.0) + end + end, + + score = 7.0, + group = 'headers', + one_shot = true, + description = 'Repeated unique headers' +} + +rspamd_config:register_symbol { + name = 'MISSING_ESSENTIAL_HEADERS', + score = 7.0, + group = 'blankspam', + parent = multiple_unique_headers_id, + type = 'virtual', + description = 'Common headers were entirely absent', +} + +rspamd_config.MISSING_FROM = { + callback = function(task) + local from = task:get_header('From') + if from == nil or from == '' then + return true + end + return false + end, + score = 2.0, + group = 'headers', + description = 'Missing From header' +} + +rspamd_config.MULTIPLE_FROM = { + callback = function(task) + local from = task:get_from('mime') + if from and from[2] then + return true, 1.0, fun.totable(fun.map(function(a) + return a.raw + end, from)) + end + return false + end, + score = 8.0, + group = 'headers', + description = 'Multiple addresses in From header' +} + +rspamd_config.MV_CASE = { + callback = function(task) + return task:has_header('Mime-Version', true) + end, + description = 'Mime-Version .vs. MIME-Version', + score = 0.5, + group = 'headers' +} + +local check_from_id = rspamd_config:register_symbol { + name = 'CHECK_FROM', + type = 'callback', + score = 0.0, + group = 'headers', + callback = function(task) + local envfrom = task:get_from(1) + local from = task:get_from(2) + if (envfrom and envfrom[1] and not envfrom[1]["flags"]["valid"]) then + task:insert_result('ENVFROM_INVALID', 1.0) + end + if (from and from[1]) then + if not (from[1]["flags"]["valid"]) then + task:insert_result('FROM_INVALID', 1.0) + end + if (from[1].name == nil or from[1].name == '') then + task:insert_result('FROM_NO_DN', 1.0) + elseif (from[1].name and + util.strequal_caseless(from[1].name, from[1].addr)) then + task:insert_result('FROM_DN_EQ_ADDR', 1.0) + elseif (from[1].name and from[1].name ~= '') then + task:insert_result('FROM_HAS_DN', 1.0) + -- Look for Mr/Mrs/Dr titles + local n = from[1].name:lower() + local match, match_end + match, match_end = n:find('^mrs?[%.%s]') + if match then + task:insert_result('FROM_NAME_HAS_TITLE', 1.0, n:sub(match, match_end - 1)) + end + match, match_end = n:find('^dr[%.%s]') + if match then + task:insert_result('FROM_NAME_HAS_TITLE', 1.0, n:sub(match, match_end - 1)) + end + -- Check for excess spaces + if n:find('%s%s') then + task:insert_result('FROM_NAME_EXCESS_SPACE', 1.0) + end + end + + if envfrom then + if util.strequal_caseless(envfrom[1].addr, from[1].addr) then + task:insert_result('FROM_EQ_ENVFROM', 1.0) + elseif envfrom[1].addr ~= '' then + task:insert_result('FROM_NEQ_ENVFROM', 1.0, from[1].addr, envfrom[1].addr) + end + end + end + + local to = task:get_recipients(2) + if not (to and to[1] and #to == 1 and from and from[1]) then + return false + end + -- Check if FROM == TO + if (util.strequal_caseless(to[1].addr, from[1].addr)) then + task:insert_result('TO_EQ_FROM', 1.0) + elseif (to[1].domain and from[1].domain and + util.strequal_caseless(to[1].domain, from[1].domain)) + then + task:insert_result('TO_DOM_EQ_FROM_DOM', 1.0) + end + end +} + +rspamd_config:register_symbol { + name = 'ENVFROM_INVALID', + score = 2.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'Envelope from does not have a valid format', +} +rspamd_config:register_symbol { + name = 'FROM_INVALID', + score = 2.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From header does not have a valid format', +} +rspamd_config:register_symbol { + name = 'FROM_NO_DN', + score = 0.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From header does not have a display name', +} +rspamd_config:register_symbol { + name = 'FROM_DN_EQ_ADDR', + score = 1.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From header display name is the same as the address', +} +rspamd_config:register_symbol { + name = 'FROM_HAS_DN', + score = 0.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From header has a display name', +} +rspamd_config:register_symbol { + name = 'FROM_NAME_EXCESS_SPACE', + score = 1.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From header display name contains excess whitespace', +} +rspamd_config:register_symbol { + name = 'FROM_NAME_HAS_TITLE', + score = 1.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From header display name has a title (Mr/Mrs/Dr)', +} +rspamd_config:register_symbol { + name = 'FROM_EQ_ENVFROM', + score = 0.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From address is the same as the envelope', +} +rspamd_config:register_symbol { + name = 'FROM_NEQ_ENVFROM', + score = 0.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'From address is different to the envelope', +} +rspamd_config:register_symbol { + name = 'TO_EQ_FROM', + score = 0.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'To address matches the From address', +} +rspamd_config:register_symbol { + name = 'TO_DOM_EQ_FROM_DOM', + score = 0.0, + group = 'headers', + parent = check_from_id, + type = 'virtual', + description = 'To domain is the same as the From domain', +} + +local check_to_cc_id = rspamd_config:register_symbol { + name = 'CHECK_TO_CC', + type = 'callback', + score = 0.0, + group = 'headers,mime', + callback = function(task) + local rcpts = task:get_recipients(1) + local to = task:get_recipients(2) + local to_match_envrcpt = 0 + local cnts = { + [1] = 'ONE', + [2] = 'TWO', + [3] = 'THREE', + [5] = 'FIVE', + [7] = 'SEVEN', + [12] = 'TWELVE', + [50] = 'GT_50' + } + local def = 'ZERO' + if (not to) then + return false + end + -- Add symbol for recipient count + local nrcpt = #to + for k, v in pairs(cnts) do + if nrcpt >= tonumber(k) then + def = v + end + end + task:insert_result('RCPT_COUNT_' .. def, 1.0, tostring(nrcpt)) + -- Check for display names + local to_dn_count = 0 + local to_dn_eq_addr_count = 0 + for _, toa in ipairs(to) do + -- To: Recipients <noreply@dropbox.com> + if (toa['name'] and (toa['name']:lower() == 'recipient' + or toa['name']:lower() == 'recipients')) then + task:insert_result('TO_DN_RECIPIENTS', 1.0) + end + if (toa['name'] and util.strequal_caseless(toa['name'], toa['addr'])) then + to_dn_eq_addr_count = to_dn_eq_addr_count + 1 + elseif (toa['name'] and toa['name'] ~= '') then + to_dn_count = to_dn_count + 1 + end + -- See if header recipients match envrcpts + if (rcpts) then + for _, rcpt in ipairs(rcpts) do + if (toa and toa['addr'] and rcpt and rcpt['addr'] and + util.strequal_caseless(rcpt['addr'], toa['addr'])) + then + to_match_envrcpt = to_match_envrcpt + 1 + end + end + end + end + if (to_dn_count == 0 and to_dn_eq_addr_count == 0) then + task:insert_result('TO_DN_NONE', 1.0) + elseif (to_dn_count == #to) then + task:insert_result('TO_DN_ALL', 1.0) + elseif (to_dn_count > 0) then + task:insert_result('TO_DN_SOME', 1.0) + end + if (to_dn_eq_addr_count == #to) then + task:insert_result('TO_DN_EQ_ADDR_ALL', 1.0) + elseif (to_dn_eq_addr_count > 0) then + task:insert_result('TO_DN_EQ_ADDR_SOME', 1.0) + end + + -- See if header recipients match envelope recipients + if (to_match_envrcpt == #to) then + task:insert_result('TO_MATCH_ENVRCPT_ALL', 1.0) + elseif (to_match_envrcpt > 0) then + task:insert_result('TO_MATCH_ENVRCPT_SOME', 1.0) + end + end +} + +rspamd_config:register_symbol { + name = 'RCPT_COUNT_ZERO', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = 'No recipients', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_ONE', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = 'One recipient', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_TWO', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = 'Two recipients', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_THREE', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = '3-5 recipients', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_FIVE', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = '5-7 recipients', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_SEVEN', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = '7-11 recipients', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_TWELVE', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = '12-50 recipients', + group = 'headers', +} +rspamd_config:register_symbol { + name = 'RCPT_COUNT_GT_50', + score = 0.0, + parent = check_to_cc_id, + type = 'virtual', + description = '50+ recipients', + group = 'headers', +} + +rspamd_config:register_symbol { + name = 'TO_DN_RECIPIENTS', + score = 2.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'To header display name is "Recipients"', +} +rspamd_config:register_symbol { + name = 'TO_DN_NONE', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'None of the recipients have display names', +} +rspamd_config:register_symbol { + name = 'TO_DN_ALL', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'All the recipients have display names', +} +rspamd_config:register_symbol { + name = 'TO_DN_SOME', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'Some of the recipients have display names', +} +rspamd_config:register_symbol { + name = 'TO_DN_EQ_ADDR_ALL', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'All of the recipients have display names that are the same as their address', +} +rspamd_config:register_symbol { + name = 'TO_DN_EQ_ADDR_SOME', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'Some of the recipients have display names that are the same as their address', +} +rspamd_config:register_symbol { + name = 'TO_MATCH_ENVRCPT_ALL', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'All of the recipients match the envelope', +} +rspamd_config:register_symbol { + name = 'TO_MATCH_ENVRCPT_SOME', + score = 0.0, + group = 'headers', + parent = check_to_cc_id, + type = 'virtual', + description = 'Some of the recipients match the envelope', +} + +-- TODO: rewrite this rule, it should not touch headers directly +rspamd_config.CTYPE_MISSING_DISPOSITION = { + callback = function(task) + local parts = task:get_parts() + if (not parts) or (parts and #parts < 1) then + return false + end + for _, p in ipairs(parts) do + local ct = p:get_header('Content-Type') + if (ct and ct:lower():match('^application/octet%-stream') ~= nil) then + local cd = p:get_header('Content-Disposition') + if (not cd) or (cd and cd:lower():find('^attachment') == nil) then + local ci = p:get_header('Content-ID') + if ci or (#parts > 1 and (cd and cd:find('filename=.+%.asc') ~= nil)) + then + return false + end + + local parent = p:get_parent() + + if parent then + local t, st = parent:get_type() + + if t == 'multipart' and st == 'encrypted' then + -- Special case + return false + end + end + + return true + end + end + end + return false + end, + description = 'Binary content-type not specified as an attachment', + score = 4.0, + group = 'mime' +} + +rspamd_config.CTYPE_MIXED_BOGUS = { + callback = function(task) + local ct = task:get_header('Content-Type') + if (not ct) then + return false + end + local parts = task:get_parts() + if (not parts) then + return false + end + if (not ct:lower():match('^multipart/mixed')) then + return false + end + local found = false + -- Check each part and look for a part that isn't multipart/* or text/plain or text/html + local ntext_parts = 0 + for _, p in ipairs(parts) do + local mtype, _ = p:get_type() + if mtype then + if mtype == 'text' and not p:is_attachment() then + ntext_parts = ntext_parts + 1 + if ntext_parts > 2 then + found = true + break + end + elseif mtype ~= 'multipart' then + found = true + break + end + end + end + if (not found) then + return true + end + return false + end, + description = 'multipart/mixed without non-textual part', + score = 1.0, + group = 'mime' +} + +local function check_for_base64_text(part) + local ct = part:get_header('Content-Type') + if (not ct) then + return false + end + ct = ct:lower() + if (ct:match('^text')) then + -- Check encoding + local cte = part:get_header('Content-Transfer-Encoding') + if (cte and cte:lower():match('^base64')) then + return true + end + end + return false +end + +rspamd_config.MIME_BASE64_TEXT = { + callback = function(task) + -- Check outer part + if (check_for_base64_text(task)) then + return true + else + local parts = task:get_parts() + if (not parts) then + return false + end + -- Check each part and look for base64 encoded text parts + for _, part in ipairs(parts) do + if (check_for_base64_text(part)) then + return true + end + end + end + return false + end, + description = 'Has text part encoded in base64', + score = 0.1, + group = 'mime' +} + +rspamd_config.MIME_BASE64_TEXT_BOGUS = { + callback = function(task) + local parts = task:get_text_parts() + if (not parts) then + return false + end + -- Check each part and look for base64 encoded text parts + -- where the part does not have any 8bit characters within it + for _, part in ipairs(parts) do + local mimepart = part:get_mimepart(); + if (check_for_base64_text(mimepart) and not part:has_8bit()) then + return true + end + end + return false + end, + description = 'Has text part encoded in base64 that does not contain any 8bit characters', + score = 1.0, + group = 'mime' +} + +local function is_8bit_addr(addr) + if addr.flags and addr.flags['8bit'] then + return true + end + + return false; +end + +rspamd_config.INVALID_FROM_8BIT = { + callback = function(task) + local from = (task:get_from('mime') or {})[1] or {} + if is_8bit_addr(from) then + return true + end + return false + end, + description = 'Invalid 8bit character in From header', + score = 6.0, + group = 'headers' +} + +rspamd_config.INVALID_RCPT_8BIT = { + callback = function(task) + local rcpts = task:get_recipients('mime') or {} + return fun.any(function(rcpt) + if is_8bit_addr(rcpt) then + return true + end + return false + end, rcpts) + end, + description = 'Invalid 8bit character in recipients headers', + score = 6.0, + group = 'headers' +} + +rspamd_config.XM_CASE = { + callback = function(task) + return task:has_header('X-mailer', true) + end, + description = 'X-mailer .vs. X-Mailer', + score = 0.5, + group = 'headers' +} diff --git a/rules/html.lua b/rules/html.lua new file mode 100644 index 0000000..7c352c2 --- /dev/null +++ b/rules/html.lua @@ -0,0 +1,462 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +local reconf = config['regexp'] + +local rspamd_regexp = require "rspamd_regexp" + +-- Messages that have only HTML part +reconf['MIME_HTML_ONLY'] = { + re = 'has_only_html_part()', + score = 0.2, + description = 'Message has only an HTML part', + group = 'headers' +} + +local function has_anchor_parent(tag) + local parent = tag + repeat + parent = parent:get_parent() + if parent then + if parent:get_type() == 'a' then + return true + end + end + until not parent + + return false +end + +local function check_html_image(task, min, max) + local tp = task:get_text_parts() + + for _, p in ipairs(tp) do + if p:is_html() then + local hc = p:get_html() + local len = p:get_length() + + if hc and len >= min and len < max then + local images = hc:get_images() + if images then + for _, i in ipairs(images) do + local tag = i['tag'] + if tag then + if has_anchor_parent(tag) then + -- do not trigger on small and unknown size images + if i['height'] + i['width'] >= 210 and i['embedded'] then + return true + end + end + end + end + end + end + end + end +end + +rspamd_config.HTML_SHORT_LINK_IMG_1 = { + callback = function(task) + return check_html_image(task, 0, 1024) + end, + score = 2.0, + group = 'html', + description = 'Short HTML part (0..1K) with a link to an image' +} + +rspamd_config.HTML_SHORT_LINK_IMG_2 = { + callback = function(task) + return check_html_image(task, 1024, 1536) + end, + score = 1.0, + group = 'html', + description = 'Short HTML part (1K..1.5K) with a link to an image' +} + +rspamd_config.HTML_SHORT_LINK_IMG_3 = { + callback = function(task) + return check_html_image(task, 1536, 2048) + end, + score = 0.5, + group = 'html', + description = 'Short HTML part (1.5K..2K) with a link to an image' +} + +rspamd_config.R_EMPTY_IMAGE = { + callback = function(task) + local tp = task:get_text_parts() -- get text parts in a message + + for _, p in ipairs(tp) do + -- iterate over text parts array using `ipairs` + if p:is_html() then + -- if the current part is html part + local hc = p:get_html() -- we get HTML context + local len = p:get_length() -- and part's length + if hc and len < 50 then + -- if we have a part that has less than 50 bytes of text + local images = hc:get_images() -- then we check for HTML images + + if images then + -- if there are images + for _, i in ipairs(images) do + -- then iterate over images in the part + if i['height'] + i['width'] >= 400 then + -- if we have a large image + local tag = i['tag'] + if tag then + if not has_anchor_parent(tag) then + return true + end + end + end + end + end + end + end + end + end, + + score = 2.0, + group = 'html', + description = 'Message contains empty parts and image' +} + +rspamd_config.R_SUSPICIOUS_IMAGES = { + callback = function(task) + local tp = task:get_text_parts() -- get text parts in a message + + for _, p in ipairs(tp) do + local h = p:get_html() + + if h then + local l = p:get_words_count() + local img = h:get_images() + local pic_words = 0 + + if img then + for _, i in ipairs(img) do + local dim = i['width'] + i['height'] + local tag = i['tag'] + + if tag then + if has_anchor_parent(tag) then + if dim > 100 and dim < 3000 then + -- We assume that a single picture 100x200 contains approx 3 words of text + pic_words = pic_words + dim / 100 + end + end + end + end + end + + if l + pic_words > 0 then + local rel = pic_words / (l + pic_words) + + if rel > 0.5 then + return true, (rel - 0.5) * 2 + end + end + end + end + + return false + end, + + score = 5.0, + group = 'html', + description = 'Message contains many suspicious messages' +} + +local vis_check_id = rspamd_config:register_symbol { + name = 'HTML_VISIBLE_CHECKS', + type = 'callback', + group = 'html', + callback = function(task) + --local logger = require "rspamd_logger" + local tp = task:get_text_parts() -- get text parts in a message + local ret = false + local transp_rate = 0 + local invisible_blocks = 0 + local zero_size_blocks = 0 + local arg + + local normal_len = 0 + local transp_len = 0 + + for _, p in ipairs(tp) do + -- iterate over text parts array using `ipairs` + normal_len = normal_len + p:get_length() + if p:is_html() and p:get_html() then + -- if the current part is html part + local hc = p:get_html() -- we get HTML context + + hc:foreach_tag({ 'font', 'span', 'div', 'p', 'td' }, function(tag, clen, is_leaf) + local bl = tag:get_style() + if bl then + if not bl.visible and clen > 0 and is_leaf then + invisible_blocks = invisible_blocks + 1 + end + + if (bl.font_size or 12) == 0 and clen > 0 and is_leaf then + zero_size_blocks = zero_size_blocks + 1 + end + + if bl.transparent and is_leaf then + ret = true + invisible_blocks = invisible_blocks + 1 -- This block is invisible + transp_len = transp_len + clen + normal_len = normal_len - clen + local tr = transp_len / (normal_len + transp_len) + if tr > transp_rate then + transp_rate = tr + if not bl.color then + bl.color = { 0, 0, 0 } + end + if not bl.bgcolor then + bl.bgcolor = { 0, 0, 0 } + end + arg = string.format('%s color #%x%x%x bgcolor #%x%x%x', + tag:get_type(), + bl.color[1], bl.color[2], bl.color[3], + bl.bgcolor[1], bl.bgcolor[2], bl.bgcolor[3]) + end + end + end + + return false -- Continue search + end) + + end + end + + if ret then + transp_rate = transp_len / (normal_len + transp_len) + + if transp_rate > 0.1 then + if transp_rate > 0.5 or transp_rate ~= transp_rate then + transp_rate = 0.5 + end + + task:insert_result('R_WHITE_ON_WHITE', (transp_rate * 2.0), arg) + end + end + + if invisible_blocks > 0 then + if invisible_blocks > 10 then + invisible_blocks = 10 + end + local rates = { -- From 1 to 10 + 0.05, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 1.0, + } + task:insert_result('MANY_INVISIBLE_PARTS', rates[invisible_blocks], + tostring(invisible_blocks)) + end + + if zero_size_blocks > 0 then + if zero_size_blocks > 5 then + if zero_size_blocks > 10 then + -- Full score + task:insert_result('ZERO_FONT', 1.0, + tostring(zero_size_blocks)) + else + zero_size_blocks = 5 + end + end + + if zero_size_blocks <= 5 then + local rates = { -- From 1 to 5 + 0.1, + 0.2, + 0.2, + 0.3, + 0.5, + } + task:insert_result('ZERO_FONT', rates[zero_size_blocks], + tostring(zero_size_blocks)) + end + end + end, +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = vis_check_id, + name = 'R_WHITE_ON_WHITE', + description = 'Message contains low contrast text', + score = 4.0, + group = 'html', + one_shot = true, +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = vis_check_id, + name = 'ZERO_FONT', + description = 'Zero sized font used', + score = 1.0, -- Reached if more than 5 elements have zero size + one_shot = true, + group = 'html' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = vis_check_id, + name = 'MANY_INVISIBLE_PARTS', + description = 'Many parts are visually hidden', + score = 1.0, -- Reached if more than 10 elements are hidden + one_shot = true, + group = 'html' +} + +rspamd_config.EXT_CSS = { + callback = function(task) + local regexp_lib = require "rspamd_regexp" + local re = regexp_lib.create_cached('/^.*\\.css(?:[?#].*)?$/i') + local tp = task:get_text_parts() -- get text parts in a message + local ret = false + for _, p in ipairs(tp) do + -- iterate over text parts array using `ipairs` + if p:is_html() and p:get_html() then + -- if the current part is html part + local hc = p:get_html() -- we get HTML context + hc:foreach_tag({ 'link' }, function(tag) + local bl = tag:get_extra() + if bl then + local s = tostring(bl) + if s and re:match(s) then + ret = true + end + end + + return ret -- Continue search + end) + + end + end + + return ret + end, + + score = 1.0, + group = 'html', + description = 'Message contains external CSS reference' +} + +local https_re = rspamd_regexp.create_cached('/^https:/i') + +rspamd_config.HTTP_TO_HTTPS = { + callback = function(task) + local found_opts + local tp = task:get_text_parts() or {} + + for _, p in ipairs(tp) do + if p:is_html() then + local hc = p:get_html() + if (not hc) then + return false + end + + local found = false + + hc:foreach_tag('a', function(tag, _) + -- Skip this loop if we already have a match + if (found) then + return true + end + + local c = tag:get_content() + if (c) then + if (not https_re:match(c)) then + return false + end + + local u = tag:get_extra() + if (not u) then + return false + end + local url_proto = u:get_protocol() + + if url_proto ~= 'http' then + return false + end + -- Capture matches for http in href to https in visible part only + found = true + found_opts = u:get_host() + return true + end + + return false + end) + + if (found) then + return true, 1.0, found_opts + end + + return false + end + end + return false + end, + description = 'The anchor text contains a distinct scheme compared to the target URL', + score = 0.5, + group = 'html' +} + +rspamd_config.HTTP_TO_IP = { + callback = function(task) + local tp = task:get_text_parts() + if (not tp) then + return false + end + for _, p in ipairs(tp) do + if p:is_html() then + local hc = p:get_html() + if (not hc) then + return false + end + local found = false + hc:foreach_tag('a', function(tag, length) + if (found) then + return true + end + local u = tag:get_extra() + if (u) then + u = tostring(u):lower() + if (u:match('^https?://%d+%.%d+%.%d+%.%d+')) then + found = true + end + end + return false + end) + if found then + return true + end + return false + end + end + end, + description = 'HTML anchor points to an IP address', + score = 1.0, + group = 'html' +} diff --git a/rules/mid.lua b/rules/mid.lua new file mode 100644 index 0000000..1bac26c --- /dev/null +++ b/rules/mid.lua @@ -0,0 +1,131 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> +Copyright (c) 2016, Steve Freegard <steve@freegard.name> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- +local rspamd_util = require "rspamd_util" +local function mid_check_func(task) + local mid = task:get_header('Message-ID') + if not mid then + return false + end + -- Check for 'bare' IP addresses in RHS + if mid:find("@%d+%.%d+%.%d+%.%d+>$") then + task:insert_result('MID_BARE_IP', 1.0) + end + -- Check for non-FQDN RHS + if mid:find("@[^%.]+>?$") then + task:insert_result('MID_RHS_NOT_FQDN', 1.0) + end + -- Check for missing <>'s + if not mid:find('^<[^>]+>$') then + task:insert_result('MID_MISSING_BRACKETS', 1.0) + end + -- Check for IP literal in RHS + if mid:find("@%[%d+%.%d+%.%d+%.%d+%]") then + task:insert_result('MID_RHS_IP_LITERAL', 1.0) + end + -- Check From address attributes against MID + local from = task:get_from(2) + local fd + if (from and from[1] and from[1].domain and from[1].domain ~= '') then + fd = from[1].domain:lower() + local _, _, md = mid:find("@([^>]+)>?$") + -- See if all or part of the From address + -- can be found in the Message-ID + -- extract tld + local fdtld = nil + local mdtld = nil + if md then + fdtld = rspamd_util.get_tld(fd) + mdtld = rspamd_util.get_tld(md) + end + if (mid:lower():find(from[1].addr:lower(), 1, true)) then + task:insert_result('MID_CONTAINS_FROM', 1.0) + elseif (md and fd == md:lower()) then + task:insert_result('MID_RHS_MATCH_FROM', 1.0) + elseif (mdtld ~= nil and fdtld ~= nil and mdtld:lower() == fdtld) then + task:insert_result('MID_RHS_MATCH_FROMTLD', 1.0) + end + end + -- Check To address attributes against MID + local to = task:get_recipients(2) + if (to and to[1] and to[1].domain and to[1].domain ~= '') then + local td = to[1].domain:lower() + local _, _, md = mid:find("@([^>]+)>?$") + -- Skip if from domain == to domain + if ((fd and fd ~= td) or not fd) then + -- See if all or part of the To address + -- can be found in the Message-ID + if (mid:lower():find(to[1].addr:lower(), 1, true)) then + task:insert_result('MID_CONTAINS_TO', 1.0) + elseif (md and td == md:lower()) then + task:insert_result('MID_RHS_MATCH_TO', 1.0) + end + end + end +end + +-- MID checks from Steve Freegard +local check_mid_id = rspamd_config:register_symbol({ + name = 'CHECK_MID', + score = 0.0, + group = 'mid', + type = 'callback,mime', + callback = mid_check_func +}) +rspamd_config:register_virtual_symbol('MID_BARE_IP', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_BARE_IP', 2.0, 'Message-ID RHS is a bare IP address', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_RHS_NOT_FQDN', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_RHS_NOT_FQDN', 0.5, + 'Message-ID RHS is not a fully-qualified domain name', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_MISSING_BRACKETS', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_MISSING_BRACKETS', 0.5, 'Message-ID is missing <>\'s', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_RHS_IP_LITERAL', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_RHS_IP_LITERAL', 0.5, 'Message-ID RHS is an IP-literal', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_CONTAINS_FROM', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_CONTAINS_FROM', 1.0, 'Message-ID contains From address', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_RHS_MATCH_FROM', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_RHS_MATCH_FROM', 0.0, + 'Message-ID RHS matches From domain', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_RHS_MATCH_FROMTLD', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_RHS_MATCH_FROMTLD', 0.0, + 'Message-ID RHS matches From domain tld', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_CONTAINS_TO', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_CONTAINS_TO', 1.0, 'Message-ID contains To address', 'default', 'Message ID') +rspamd_config:register_virtual_symbol('MID_RHS_MATCH_TO', 1.0, check_mid_id) +rspamd_config:set_metric_symbol('MID_RHS_MATCH_TO', 1.0, 'Message-ID RHS matches To domain', 'default', 'Message ID') + +-- Another check from https://github.com/rspamd/rspamd/issues/4299 +rspamd_config:register_symbol { + type = 'normal,mime', + group = 'mid', + name = 'MID_END_EQ_FROM_USER_PART', + description = 'Message-ID RHS (after @) and MIME from local part are the same', + score = 4.0, + + callback = function(task) + local mid = task:get_header('Message-ID') + if not mid then + return + end + local mime_from = task:get_from('mime') + local _, _, mid_realm = mid:find("@([a-z]+)>?$") + if mid_realm and mime_from and mime_from[1] and mime_from[1].user then + if (mid_realm == mime_from[1].user) then + return true + end + end + end +} diff --git a/rules/misc.lua b/rules/misc.lua new file mode 100644 index 0000000..faf4a8f --- /dev/null +++ b/rules/misc.lua @@ -0,0 +1,864 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Misc rules + +local E = {} +local fun = require "fun" +local rspamd_util = require "rspamd_util" +local rspamd_parsers = require "rspamd_parsers" +local rspamd_regexp = require "rspamd_regexp" +local lua_util = require "lua_util" +local bit = require "bit" +local rspamd_url = require "rspamd_url" +local url_flags_tab = rspamd_url.flags + +-- Different text parts +rspamd_config.R_PARTS_DIFFER = { + callback = function(task) + local distance = task:get_mempool():get_variable('parts_distance', 'double') + + if distance then + local nd = tonumber(distance) + -- ND is relation of different words to total words + if nd >= 0.5 then + local tw = task:get_mempool():get_variable('total_words', 'int') + + if tw then + local score + if tw > 30 then + -- We are confident about difference + score = (nd - 0.5) * 2.0 + else + -- We are not so confident about difference + score = (nd - 0.5) + end + task:insert_result('R_PARTS_DIFFER', score, + string.format('%.1f%%', tostring(100.0 * nd))) + end + end + end + return false + end, + score = 1.0, + description = 'Text and HTML parts differ', + group = 'body' +} + +-- Date issues +local date_id = rspamd_config:register_symbol({ + name = 'DATE_CB', + type = 'callback,mime', + callback = function(task) + local date_time = task:get_header('Date') + if date_time == nil or date_time == '' then + task:insert_result('MISSING_DATE', 1.0) + return + end + + local dm, err = rspamd_parsers.parse_smtp_date(date_time) + if err then + task:insert_result('INVALID_DATE', 1.0) + return + end + + local dt = task:get_date({ format = 'connect', gmt = true }) + local date_diff = dt - dm + + if date_diff > 86400 then + -- Older than a day + task:insert_result('DATE_IN_PAST', 1.0, tostring(math.floor(date_diff / 3600))) + elseif -date_diff > 7200 then + -- More than 2 hours in the future + task:insert_result('DATE_IN_FUTURE', 1.0, tostring(math.floor(-date_diff / 3600))) + end + end +}) + +rspamd_config:register_symbol({ + name = 'MISSING_DATE', + score = 1.0, + description = 'Date header is missing', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +rspamd_config:register_symbol({ + name = 'INVALID_DATE', + score = 1.5, + description = 'Malformed Date header', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +rspamd_config:register_symbol({ + name = 'DATE_IN_FUTURE', + score = 4.0, + description = 'Message date is in the future', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +rspamd_config:register_symbol({ + name = 'DATE_IN_PAST', + score = 1.0, + description = 'Message date is in the past', + group = 'headers', + type = 'virtual', + parent = date_id, +}) + +local obscured_id = rspamd_config:register_symbol { + callback = function(task) + local susp_urls = task:get_urls_filtered({ 'obscured', 'zw_spaces' }) + + if susp_urls and susp_urls[1] then + local obs_flag = url_flags_tab.obscured + local zw_flag = url_flags_tab.zw_spaces + + for _, u in ipairs(susp_urls) do + local fl = u:get_flags_num() + if bit.band(fl, obs_flag) ~= 0 then + task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host()) + end + if bit.band(fl, zw_flag) ~= 0 then + task:insert_result('ZERO_WIDTH_SPACE_URL', 1.0, u:get_host()) + end + end + end + + return false + end, + name = 'R_SUSPICIOUS_URL', + score = 5.0, + one_shot = true, + description = 'A message has been identified to contain an obfuscated or suspicious URL', + group = 'url' +} + +rspamd_config:register_symbol { + type = 'virtual', + name = 'ZERO_WIDTH_SPACE_URL', + score = 7.0, + one_shot = true, + description = 'Zero width space in URL', + group = 'url', + parent = obscured_id, +} + +rspamd_config.ENVFROM_PRVS = { + callback = function(task) + --[[ + Detect PRVS/BATV addresses to avoid FORGED_SENDER + https://en.wikipedia.org/wiki/Bounce_Address_Tag_Validation + + Signature syntax: + + prvs=TAG=USER@example.com BATV draft (https://tools.ietf.org/html/draft-levine-smtp-batv-01) + prvs=USER=TAG@example.com + btv1==TAG==USER@example.com Barracuda appliance + msprvs1=TAG=USER@example.com Sparkpost email delivery service + ]]-- + if not (task:has_from(1) and task:has_from(2)) then + return false + end + local envfrom = task:get_from(1) + local re_text = '^(?:(prvs|msprvs1)=([^=]+)=|btv1==[^=]+==)(.+@(.+))$' + local re = rspamd_regexp.create_cached(re_text) + local c = re:search(envfrom[1].addr:lower(), false, true) + if not c then + return false + end + local ef = c[1][4] + -- See if it matches the From header + local from = task:get_from(2) + if ef == from[1].addr:lower() then + return true + end + -- Check for prvs=USER=TAG@example.com + local t = c[1][2] + if t == 'prvs' then + local efr = c[1][3] .. '@' .. c[1][5] + if efr == from[1].addr:lower() then + return true + end + end + return false + end, + score = 0.0, + description = "Envelope From is a PRVS address that matches the From address", + group = 'headers', + type = 'mime', +} + +rspamd_config.ENVFROM_VERP = { + callback = function(task) + if not (task:has_from(1) and task:has_recipients(1)) then + return false + end + local envfrom = task:get_from(1) + local envrcpts = task:get_recipients(1) + -- VERP only works for single recipient messages + if #envrcpts > 1 then + return false + end + -- Get recipient and compute VERP address + local rcpt = envrcpts[1].addr:lower() + local verp = rcpt:gsub('@', '=') + -- Get the user portion of the envfrom + local ef_user = envfrom[1].user:lower() + -- See if the VERP representation of the recipient appears in it + if ef_user:find(verp, 1, true) + and not ef_user:find('+caf_=' .. verp, 1, true) -- Google Forwarding + and not ef_user:find('^srs[01]=') -- SRS + then + return true + end + return false + end, + score = 0.0, + description = "Envelope From is a VERP address", + group = "headers", + type = 'mime', +} + +local check_rcvd = rspamd_config:register_symbol { + name = 'CHECK_RCVD', + group = 'headers', + callback = function(task) + local rcvds = task:get_received_headers() + if not rcvds or #rcvds == 0 then + return false + end + + local all_tls = fun.all(function(rc) + return rc.flags and rc.flags['ssl'] + end, fun.filter(function(rc) + return rc.by_hostname and rc.by_hostname ~= 'localhost' + end, rcvds)) + + -- See if only the last hop was encrypted + if all_tls then + task:insert_result('RCVD_TLS_ALL', 1.0) + else + local rcvd = rcvds[1] + if rcvd.by_hostname and rcvd.by_hostname == 'localhost' then + -- Ignore artificial header from Rmilter + rcvd = rcvds[2] or {} + end + if rcvd.flags and rcvd.flags['ssl'] then + task:insert_result('RCVD_TLS_LAST', 1.0) + else + task:insert_result('RCVD_NO_TLS_LAST', 1.0) + end + end + + local auth = fun.any(function(rc) + return rc.flags and rc.flags['authenticated'] + end, rcvds) + + if auth then + task:insert_result('RCVD_VIA_SMTP_AUTH', 1.0) + end + end, + type = 'callback,mime', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_TLS_ALL', + description = 'All hops used encrypted transports', + score = 0.0, + group = 'headers' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_TLS_LAST', + description = 'Last hop used encrypted transports', + score = 0.0, + group = 'headers' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_NO_TLS_LAST', + description = 'Last hop did not use encrypted transports', + score = 0.1, + group = 'headers' +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_rcvd, + name = 'RCVD_VIA_SMTP_AUTH', + -- NB This does not mean sender was authenticated; see task:get_user() + description = 'Authenticated hand-off was seen in Received headers', + score = 0.0, + group = 'headers' +} + +rspamd_config.RCVD_HELO_USER = { + callback = function(task) + -- Check HELO argument from MTA + local helo = task:get_helo() + if (helo and helo:lower():find('^user$')) then + return true + end + -- Check Received headers + local rcvds = task:get_header_full('Received') + if not rcvds then + return false + end + for _, rcvd in ipairs(rcvds) do + local r = rcvd['decoded']:lower() + if (r:find("^%s*from%suser%s")) then + return true + end + if (r:find("helo[%s=]user[%s%)]")) then + return true + end + end + end, + description = 'HELO User spam pattern', + group = 'headers', + type = 'mime', + score = 3.0 +} + +rspamd_config.URI_COUNT_ODD = { + callback = function(task) + local ct = task:get_header('Content-Type') + if (ct and ct:lower():find('^multipart/alternative')) then + local urls = task:get_urls_filtered(nil, { 'subject', 'html_displayed', 'special' }) or {} + local nurls = fun.foldl(function(acc, val) + return acc + val:get_count() + end, 0, urls) + + if nurls % 2 == 1 then + return true, 1.0, tostring(nurls) + end + end + end, + description = 'Odd number of URIs in multipart/alternative message', + score = 1.0, + group = 'url', +} + +rspamd_config.HAS_ATTACHMENT = { + callback = function(task) + local parts = task:get_parts() + if parts and #parts > 1 then + for _, p in ipairs(parts) do + local cd = p:get_header('Content-Disposition') + if (cd and cd:lower():match('^attachment')) then + return true + end + end + end + end, + description = 'Message contains attachments', + group = 'body', +} + +-- Requires freemail maps loaded in multimap +local function freemail_reply_neq_from(task) + if not task:has_symbol('FREEMAIL_REPLYTO') or not task:has_symbol('FREEMAIL_FROM') then + return false + end + local frt = task:get_symbol('FREEMAIL_REPLYTO') + local ff = task:get_symbol('FREEMAIL_FROM') + local frt_opts = frt[1]['options'] + local ff_opts = ff[1]['options'] + return (frt_opts and ff_opts and frt_opts[1] ~= ff_opts[1]) +end + +rspamd_config:register_symbol({ + name = 'FREEMAIL_REPLYTO_NEQ_FROM_DOM', + callback = freemail_reply_neq_from, + description = 'The From and Reply-To addresses in the email are from different freemail services', + score = 3.0, + group = 'headers', +}) +rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_REPLYTO') +rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_FROM') + +rspamd_config.OMOGRAPH_URL = { + callback = function(task) + local urls = task:get_urls() + + if urls then + local bad_omographs = 0 + local single_bad_omograps = 0 + local bad_urls = {} + local seen = {} + + fun.each(function(u) + if u:is_phished() then + + local h1 = u:get_host() + local h2 = u:get_phished() + if h2 then + -- Due to changes of the phished flag in 2.8 + h2 = h2:get_host() + end + if h1 and h2 then + local selt = string.format('%s->%s', h1, h2) + if not seen[selt] and rspamd_util.is_utf_spoofed(h1, h2) then + bad_urls[#bad_urls + 1] = selt + bad_omographs = bad_omographs + 1 + end + seen[selt] = true + end + end + if not u:is_html_displayed() then + local h = u:get_tld() + + if h then + if not seen[h] and rspamd_util.is_utf_spoofed(h) then + bad_urls[#bad_urls + 1] = h + single_bad_omograps = single_bad_omograps + 1 + end + seen[h] = true + end + end + end, urls) + + if bad_omographs > 0 then + return true, 1.0, bad_urls + elseif single_bad_omograps > 0 then + return true, 0.5, bad_urls + end + end + + return false + end, + score = 5.0, + group = 'url', + description = 'URL contains both latin and non-latin characters' +} + +rspamd_config.URL_IN_SUBJECT = { + callback = function(task) + local urls = task:get_urls() + + if urls then + for _, u in ipairs(urls) do + local flags = u:get_flags() + if flags.subject then + if flags.schemaless then + return true, 0.1, u:get_host() + end + local subject = task:get_subject() + + if subject then + if tostring(u) == subject then + return true, 1.0, u:get_host() + end + end + return true, 0.25, u:get_host() + end + end + end + + return false + end, + score = 4.0, + group = 'subject', + type = 'mime', + description = 'Subject contains URL' +} + +local aliases_id = rspamd_config:register_symbol { + type = 'prefilter', + name = 'EMAIL_PLUS_ALIASES', + callback = function(task) + local function check_from(type) + if task:has_from(type) then + local addr = task:get_from(type)[1] + local na, tags = lua_util.remove_email_aliases(addr) + if na then + task:set_from(type, addr, 'alias') + task:insert_result('TAGGED_FROM', 1.0, fun.totable( + fun.filter(function(t) + return t and #t > 0 + end, tags))) + end + end + end + + check_from('smtp') + check_from('mime') + + local function check_rcpt(type) + if task:has_recipients(type) then + local modified = false + local all_tags = {} + local addrs = task:get_recipients(type) + + for _, addr in ipairs(addrs) do + local na, tags = lua_util.remove_email_aliases(addr) + if na then + modified = true + fun.each(function(t) + table.insert(all_tags, t) + end, + fun.filter(function(t) + return t and #t > 0 + end, tags)) + end + end + + if modified then + task:set_recipients(type, addrs, 'alias') + task:insert_result('TAGGED_RCPT', 1.0, all_tags) + end + end + end + + check_rcpt('smtp') + check_rcpt('mime') + end, + priority = lua_util.symbols_priorities.top + 1, + description = 'Removes plus aliases from the email', + group = 'headers', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = aliases_id, + name = 'TAGGED_RCPT', + description = 'SMTP recipients have plus tags', + group = 'headers', + score = 0.0, +} +rspamd_config:register_symbol { + type = 'virtual', + parent = aliases_id, + name = 'TAGGED_FROM', + description = 'SMTP from has plus tags', + group = 'headers', + score = 0.0, +} + +local check_from_display_name = rspamd_config:register_symbol { + type = 'callback,mime', + name = 'FROM_DISPLAY_CALLBACK', + callback = function(task) + local from = task:get_from(2) + if not (from and from[1] and from[1].name) then + return false + end + -- See if we can parse an email address from the name + local parsed = rspamd_parsers.parse_mail_address(from[1].name, task:get_mempool()) + if not parsed then + return false + end + if not (parsed[1] and parsed[1]['addr']) then + return false + end + -- Make sure we did not mistake e.g. <something>@<name> for an email address + if not parsed[1]['domain'] or not parsed[1]['domain']:find('%.') then + return false + end + -- See if the parsed domains differ + if not rspamd_util.strequal_caseless(from[1]['domain'], parsed[1]['domain']) then + -- See if the destination domain is the same as the spoof + local mto = task:get_recipients(2) + local sto = task:get_recipients(1) + if mto then + for _, to in ipairs(mto) do + if to['domain'] ~= '' and rspamd_util.strequal_caseless(to['domain'], parsed[1]['domain']) then + task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain']) + return false + end + end + end + if sto then + for _, to in ipairs(sto) do + if to['domain'] ~= '' and rspamd_util.strequal_caseless(to['domain'], parsed[1]['domain']) then + task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain']) + return false + end + end + end + task:insert_result('FROM_NEQ_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain']) + end + return false + end, + group = 'headers', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_from_display_name, + name = 'SPOOF_DISPLAY_NAME', + description = 'Display name is being used to spoof and trick the recipient', + group = 'headers', + score = 8.0, +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_from_display_name, + name = 'FROM_NEQ_DISPLAY_NAME', + group = 'headers', + description = 'Display name contains an email address different to the From address', + score = 4.0, +} + +rspamd_config.SPOOF_REPLYTO = { + callback = function(task) + -- First check for a Reply-To header + local rt = task:get_header_full('Reply-To') + if not rt or not rt[1] then + return false + end + -- Get From and To headers + rt = rt[1]['value'] + local from = task:get_from(2) + local to = task:get_recipients(2) + if not (from and from[1] and from[1].addr) then + return false + end + if (to and to[1] and to[1].addr) then + -- Handle common case for Web Contact forms of From = To + if rspamd_util.strequal_caseless(from[1].addr, to[1].addr) then + return false + end + end + -- SMTP recipients must contain From domain + to = task:get_recipients(1) + if not to then + return false + end + -- Try mitigate some possible FPs on mailing list posts + if #to == 1 and rspamd_util.strequal_caseless(to[1].addr, from[1].addr) then + return false + end + local found_fromdom = false + for _, t in ipairs(to) do + if rspamd_util.strequal_caseless(t.domain, from[1].domain) then + found_fromdom = true + break + end + end + if not found_fromdom then + return false + end + -- Parse Reply-To header + local parsed = ((rspamd_parsers.parse_mail_address(rt, task:get_mempool()) or E)[1] or E).domain + if not parsed then + return false + end + -- Reply-To domain must be different to From domain + if not rspamd_util.strequal_caseless(parsed, from[1].domain) then + return true, from[1].domain, parsed + end + return false + end, + group = 'headers', + type = 'mime', + description = 'Reply-To is being used to spoof and trick the recipient to send an off-domain reply', + score = 6.0 +} + +rspamd_config.INFO_TO_INFO_LU = { + callback = function(task) + if not task:has_header('List-Unsubscribe') then + return false + end + local from = task:get_from('mime') + if not (from and from[1] and rspamd_util.strequal_caseless(from[1].user, 'info')) then + return false + end + local to = task:get_recipients('smtp') + if not to then + return false + end + local found = false + for _, r in ipairs(to) do + if rspamd_util.strequal_caseless(r['user'], 'info') then + found = true + end + end + if found then + return true + end + return false + end, + description = 'info@ From/To address with List-Unsubscribe headers', + group = 'headers', + score = 2.0, + type = 'mime', +} + +-- Detects bad content-transfer-encoding for text parts + +rspamd_config.R_BAD_CTE_7BIT = { + callback = function(task) + local tp = task:get_text_parts() or {} + + for _, p in ipairs(tp) do + local cte = p:get_mimepart():get_cte() or '' + if cte ~= '8bit' and p:has_8bit_raw() then + local _, _, attrs = p:get_mimepart():get_type_full() + local mul = 1.0 + local params = { cte } + if attrs then + if attrs.charset and attrs.charset:lower() == "utf-8" then + -- Penalise rule as people don't know that utf8 is surprisingly + -- eight bit encoding + mul = 0.3 + table.insert(params, "utf8") + end + end + + return true, mul, params + end + end + + return false + end, + score = 3.5, + description = 'Detects bad Content-Transfer-Encoding for text parts', + group = 'headers', + type = 'mime', +} + +local check_encrypted_name = rspamd_config:register_symbol { + name = 'BOGUS_ENCRYPTED_AND_TEXT', + callback = function(task) + local parts = task:get_parts() or {} + local seen_encrypted, seen_text + local opts = {} + + local function check_part(part) + if part:is_multipart() then + local children = part:get_children() or {} + local text_kids = {} + + for _, cld in ipairs(children) do + if cld:is_multipart() then + check_part(cld) + elseif cld:is_text() then + seen_text = true + text_kids[#text_kids + 1] = cld + else + local type, subtype, _ = cld:get_type_full() + + if type:lower() == 'application' then + if string.find(subtype:lower(), 'pkcs7%-mime') then + -- S/MIME encrypted part + seen_encrypted = true + table.insert(opts, 'smime part') + task:insert_result('ENCRYPTED_SMIME', 1.0) + elseif string.find(subtype:lower(), 'pkcs7%-signature') then + task:insert_result('SIGNED_SMIME', 1.0) + elseif string.find(subtype:lower(), 'pgp%-encrypted') then + -- PGP/GnuPG encrypted part + seen_encrypted = true + table.insert(opts, 'pgp part') + task:insert_result('ENCRYPTED_PGP', 1.0) + elseif string.find(subtype:lower(), 'pgp%-signature') then + task:insert_result('SIGNED_PGP', 1.0) + end + end + end + if seen_text and seen_encrypted then + -- Ensure that our seen text is not really part of pgp #3205 + for _, tp in ipairs(text_kids) do + local t, _ = tp:get_type() + seen_text = false -- reset temporary + if t and t == 'text' then + seen_text = true + break + end + end + end + end + end + end + + for _, part in ipairs(parts) do + check_part(part) + end + + if seen_text and seen_encrypted then + return true, 1.0, opts + end + + return false + end, + score = 10.0, + description = 'Bogus mix of encrypted and text/html payloads', + group = 'mime_types', +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'ENCRYPTED_PGP', + description = 'Message is encrypted with PGP', + group = 'mime_types', + score = -0.5, + one_shot = true +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'ENCRYPTED_SMIME', + description = 'Message is encrypted with S/MIME', + group = 'mime_types', + score = -0.5, + one_shot = true +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'SIGNED_PGP', + description = 'Message is signed with PGP', + group = 'mime_types', + score = -2.0, + one_shot = true +} + +rspamd_config:register_symbol { + type = 'virtual', + parent = check_encrypted_name, + name = 'SIGNED_SMIME', + description = 'Message is signed with S/MIME', + group = 'mime_types', + score = -2.0, + one_shot = true +} + +rspamd_config.COMPLETELY_EMPTY = { + callback = function(task) + return (task:get_size() == 0) + end, + flags = 'empty', + group = 'blankspam', + score = 15 +} diff --git a/rules/parts.lua b/rules/parts.lua new file mode 100644 index 0000000..2be9ff8 --- /dev/null +++ b/rules/parts.lua @@ -0,0 +1,11 @@ +rspamd_config.SINGLE_SHORT_PART = { + callback = function(task) + local parts = task:get_parts() + if #parts ~= 1 then return end + local text = parts[1]:get_text() + if not text then return end + if text:get_length() >= 64 then return end + return true + end, + score = 0.0, +} diff --git a/rules/regexp/compromised_hosts.lua b/rules/regexp/compromised_hosts.lua new file mode 100644 index 0000000..e120b18 --- /dev/null +++ b/rules/regexp/compromised_hosts.lua @@ -0,0 +1,223 @@ +local reconf = config['regexp'] +local rspamd_regexp = require 'rspamd_regexp' +local util = require 'rspamd_util' + +reconf['HAS_PHPMAILER_SIG'] = { + -- PHPMailer 6.0.0 and older used hex hash in boundary: + -- boundary="b1_2a45d5e29f78d3408e318878b049f474" + -- Since 6.0.1 it uses base64 (without =+/): + -- boundary="b1_uBN0UPD3n6RU04VPxI54tENiDgaCGoh15l9s73oFnlM" + -- boundary="b1_Ez5tmpb4bSqknyUZ1B1hIvLAfR1MlspDEKGioCOXc" + -- https://github.com/PHPMailer/PHPMailer/blob/v6.4.0/src/PHPMailer.php#L2660 + re = [[X-Mailer=/^PHPMailer /H || Content-Type=/boundary="b1_[0-9a-zA-Z]+"/H]], + description = "PHPMailer signature", + group = "compromised_hosts" +} + +reconf['PHP_SCRIPT_ROOT'] = { + re = "X-PHP-Originating-Script=/^0:/Hi", + description = "PHP Script executed by root UID", + score = 1.0, + group = "compromised_hosts" +} + +reconf['HAS_X_POS'] = { + re = "header_exists('X-PHP-Originating-Script')", + description = "Has X-PHP-Originating-Script header", + group = "compromised_hosts" +} + +reconf['HAS_X_PHP_SCRIPT'] = { + re = "header_exists('X-PHP-Script')", + description = "Has X-PHP-Script header", + group = "compromised_hosts" +} + +-- X-Source: +-- X-Source-Args: /usr/sbin/proxyexec -q -d -s /var/run/proxyexec/cagefs.sock/socket /bin/cagefs.server +-- X-Source-Dir: silvianimberg.com:/public_html/wp-content/themes/ultimatum +reconf['HAS_X_SOURCE'] = { + re = "header_exists('X-Source') || header_exists('X-Source-Args') || header_exists('X-Source-Dir')", + description = "Has X-Source headers", + group = "compromised_hosts" +} + +-- X-Authenticated-Sender: accord.host-care.com: sales@cortaflex.si +rspamd_config.HAS_X_AS = { + callback = function(task) + local xas = task:get_header('X-Authenticated-Sender') + if not xas then + return false + end + local _, _, auth = xas:find('[^:]+:%s(.+)$') + if auth then + -- TODO: see if we can parse an e-mail address from auth + -- and see if it matches the from address or not + return true, auth + else + return true + end + end, + description = 'Has X-Authenticated-Sender header', + group = "compromised_hosts", + score = 0.0 +} + +-- X-Get-Message-Sender-Via: accord.host-care.com: authenticated_id: sales@cortaflex.si +rspamd_config.HAS_X_GMSV = { + callback = function(task) + local xgmsv = task:get_header('X-Get-Message-Sender-Via') + if not xgmsv then + return false + end + local _, _, auth = xgmsv:find('authenticated_id: (.+)$') + if auth then + -- TODO: see if we can parse an e-mail address from auth + -- and see if it matches the from address or not. + return true, auth + else + return true + end + end, + description = 'Has X-Get-Message-Sender-Via: header', + group = "compromised_hosts", + score = 0.0, +} + +-- X-AntiAbuse: This header was added to track abuse, please include it with any abuse report +-- X-AntiAbuse: Primary Hostname - accord.host-care.com +-- X-AntiAbuse: Original Domain - swaney.com +-- X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12] +-- X-AntiAbuse: Sender Address Domain - dropbox.com +reconf['HAS_X_ANTIABUSE'] = { + re = "header_exists('X-AntiAbuse')", + description = "Has X-AntiAbuse headers", + group = "compromised_hosts" +} + +reconf['X_PHP_EVAL'] = { + re = [[X-PHP-Script=/eval\(\)'d code/H || X-PHP-Originating-Script=/eval\(\)'d code/H]], + description = "Message sent using eval'd PHP", + score = 4.0, + group = "compromised_hosts" +} + +reconf['HAS_WP_URI'] = { + re = '/\\/wp-[^\\/]+\\//Ui', + description = "Contains WordPress URIs", + one_shot = true, + group = "compromised_hosts" +} + +reconf['WP_COMPROMISED'] = { + re = '/\\/wp-(?:content|includes)[^\\/]+\\//Ui', + description = "URL that is pointing to a compromised WordPress installation", + one_shot = true, + group = "compromised_hosts" +} + +reconf['PHP_XPS_PATTERN'] = { + re = 'X-PHP-Script=/^[^\\. ]+\\.[^\\.\\/ ]+\\/sendmail\\.php\\b/Hi', + description = "Message contains X-PHP-Script pattern", + group = "compromised_hosts" +} + +reconf['HAS_XAW'] = { + re = "header_exists('X-Authentication-Warning')", + description = "Has X-Authentication-Warning header", + group = "compromised_hosts" +} + +-- X-Authentication-Warning: localhost.localdomain: www-data set sender to info@globalstock.lv using -f +reconf['XAW_SERVICE_ACCT'] = { + re = "X-Authentication-Warning=/\\b(?:www-data|anonymous|ftp|apache|nobody|guest|nginx|web|www) set sender to\\b/Hi", + description = "Message originally from a service account", + score = 1.0, + group = "compromised_hosts" +} + +reconf['ENVFROM_SERVICE_ACCT'] = { + re = "check_smtp_data('from',/^(?:www-data|anonymous|ftp|apache|nobody|guest|nginx|web|www)@/i)", + description = "Envelope from is a service account", + score = 1.0, + group = "compromised_hosts" +} + +reconf['HIDDEN_SOURCE_OBJ'] = { + re = "X-PHP-Script=/\\/\\..+/Hi || X-PHP-Originating-Script=/(?:^\\d+:|\\/)\\..+/Hi || X-Source-Args=/\\/\\..+/Hi", + description = "UNIX hidden file/directory in path", + score = 2.0, + group = "compromised_hosts" +} + +local hidden_uri_re = rspamd_regexp.create_cached('/(?!\\/\\.well[-_]known\\/)(?:^\\.[A-Za-z0-9]|\\/' .. + '\\.[A-Za-z0-9]|\\/\\.\\.\\/)/i') +rspamd_config.URI_HIDDEN_PATH = { + callback = function(task) + local urls = task:get_urls(false) + if (urls) then + for _, url in ipairs(urls) do + if (not (url:is_subject() and url:is_html_displayed())) then + local path = url:get_path() + if (hidden_uri_re:match(path)) then + -- TODO: need url:is_schemeless() to improve this + return true, 1.0, url:get_text() + end + end + end + end + end, + description = 'Message contains URI with a hidden path', + score = 1.0, + group = 'compromised_hosts', +} + +reconf['MID_RHS_WWW'] = { + re = "Message-Id=/@www\\./Hi", + description = "Message-ID from www host", + score = 0.5, + group = "compromised_hosts" +} + +rspamd_config.FROM_SERVICE_ACCT = { + callback = function(task) + local re = rspamd_regexp.create_cached('/^(?:www-data|anonymous|ftp|apache|nobody|guest|nginx|web|www)@/i'); + -- From + local from = task:get_from(2) + if (from and from[1]) then + if (re:match(from[1].addr)) then + return true + end + end + -- Sender + local sender = task:get_header('Sender') + if sender then + local s = util.parse_mail_address(sender, task:get_mempool()) + if (s and s[1]) then + if (re:match(s[1].addr)) then + return true + end + end + end + -- Reply-To + local replyto = task:get_header('Reply-To') + if replyto then + local rt = util.parse_mail_address(replyto, task:get_mempool()) + if (rt and rt[1]) then + if (re:match(rt[1].addr)) then + return true + end + end + end + end, + description = "Sender/From/Reply-To is a service account", + score = 1.0, + group = "compromised_hosts" +} + +reconf['WWW_DOT_DOMAIN'] = { + re = "From=/@www\\./Hi || Sender=/@www\\./Hi || Reply-To=/@www\\./Hi || check_smtp_data('from',/@www\\./i)", + description = "From/Sender/Reply-To or Envelope is @www.domain.com", + score = 0.5, + group = "compromised_hosts" +} diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua new file mode 100644 index 0000000..0624997 --- /dev/null +++ b/rules/regexp/headers.lua @@ -0,0 +1,1046 @@ +-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license: +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +-- Definitions of header regexps + +local reconf = config['regexp'] + +-- Subject needs encoding +-- Define encodings types +local subject_encoded_b64 = 'Subject=/=\\?\\S+\\?B\\?/iX' +local subject_encoded_qp = 'Subject=/=\\?\\S+\\?Q\\?/iX' +-- Define whether subject must be encoded (contains non-7bit characters) +local subject_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X' +-- Final rule +reconf['SUBJECT_NEEDS_ENCODING'] = { + re = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime), + score = 1.0, + mime_only = true, + description = 'Subject needs encoding', + group = 'headers' +} + +local from_encoded_b64 = 'From=/=\\?\\S+\\?B\\?/iX' +local from_encoded_qp = 'From=/=\\?\\S+\\?Q\\?/iX' +local raw_from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X' +reconf['FROM_NEEDS_ENCODING'] = { + re = string.format('!(%s) & !(%s) & (%s)', from_encoded_b64, from_encoded_qp, raw_from_needs_mime), + score = 1.0, + mime_only = true, + description = 'From header needs encoding', + group = 'headers' +} + +local to_encoded_b64 = 'To=/=\\?\\S+\\?B\\?/iX' +local to_encoded_qp = 'To=/=\\?\\S+\\?Q\\?/iX' +local raw_to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X' +reconf['TO_NEEDS_ENCODING'] = { + re = string.format('!(%s) & !(%s) & (%s)', to_encoded_b64, to_encoded_qp, raw_to_needs_mime), + score = 1.0, + mime_only = true, + description = 'To header needs encoding', + group = 'headers' +} + +-- Detects that there is no space in From header (e.g. Some Name<some@host>) +reconf['R_NO_SPACE_IN_FROM'] = { + re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X', + score = 1.0, + mime_only = true, + description = 'No space in From header', + group = 'headers' +} + +reconf['TO_WRAPPED_IN_SPACES'] = { + re = [[To=/<\s[-.\w]+\@[-.\w]+\s>/X]], + score = 2.0, + mime_only = true, + description = 'To address is wrapped in spaces inside angle brackets (e.g. display-name < local-part@domain >)', + group = 'headers' +} + +-- Detects missing Subject header +reconf['MISSING_SUBJECT'] = { + re = '!raw_header_exists(Subject)', + score = 2.0, + mime_only = true, + description = 'Subject header is missing', + group = 'headers' +} + +rspamd_config.EMPTY_SUBJECT = { + score = 1.0, + mime_only = true, + description = 'Subject header is empty', + group = 'headers', + callback = function(task) + local hdr = task:get_header('Subject') + if hdr and #hdr == 0 then + return true + end + return false + end +} + +-- Detects missing To header +reconf['MISSING_TO'] = { + re = '!raw_header_exists(To)', + score = 2.0, + description = 'To header is missing', + group = 'headers', + mime_only = true, +} + +-- Detects undisclosed recipients +reconf['R_UNDISC_RCPT'] = { + -- match: + -- To: undisclosed-recipients:; + -- To: Undisclosed recipients:; + -- To: undisclosed-recipients: ; + -- To: <Undisclosed-Recipient:;> + -- To: <"Undisclosed-Recipient:;"> + -- To: "undisclosed-recipients (utajeni adresati)": ; + -- To: Undisclosed recipients: + -- but do not match: + -- Undisclosed Recipient <user@example.org> + re = [[To=/^<?"?undisclosed[- ]recipients?\b.*:/i{header}]], + score = 3.0, + description = 'Recipients are absent or undisclosed', + group = 'headers', + mime_only = true, +} + +-- Detects missing Message-ID +local has_mid = 'header_exists(Message-Id)' +reconf['MISSING_MID'] = { + re = '!header_exists(Message-Id)', + score = 2.5, + description = 'Message-ID header is missing', + group = 'headers', + mime_only = true, +} + +-- Received seems to be fake +reconf['R_RCVD_SPAMBOTS'] = { + re = 'Received=/^from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by [-.\\w+]{5,255}; [SMTWF][a-z][a-z],' .. + ' [\\s\\d]?\\d [JFMAJSOND][a-z][a-z] \\d{4} \\d{2}:\\d{2}:\\d{2} [-+]\\d{4}$/mH', + score = 3.0, + description = 'Spambots signatures in received headers', + group = 'headers', + mime_only = true, +} + +-- Charset is missing in message +reconf['R_MISSING_CHARSET'] = { + re = string.format('!is_empty_body() & content_type_is_type(text) & content_type_is_subtype(plain) & !content_type_has_param(charset) & !%s', + 'compare_transfer_encoding(7bit)'), + score = 0.5, + description = 'Charset header is missing', + group = 'headers', + mime_only = true, +} + +-- Find forged Outlook MUA +-- Yahoo groups messages +local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H' +-- Outlook MUA +local outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' +local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H' +reconf['FORGED_OUTLOOK_HTML'] = { + re = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, 'has_only_html_part()'), + score = 5.0, + description = 'Forged Outlook HTML signature', + group = 'headers', + mime_only = true, +} + +-- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients) +reconf['SUSPICIOUS_RECIPS'] = { + re = 'compare_recipients_distance(0.65)', + score = 1.5, + description = 'Recipients seems to be autogenerated (works if recipients count is more than 5)', + group = 'headers', + mime_only = true, +} + +-- Recipients list seems to be sorted +reconf['SORTED_RECIPS'] = { + re = 'is_recipients_sorted()', + score = 3.5, + description = 'Recipients list seems to be sorted', + group = 'headers', + mime_only = true, +} + +-- Spam string at the end of message to make statistics faults +reconf['TRACKER_ID'] = { + re = '/^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\\s*\\z/isPr', + score = 3.84, + description = 'Spam string at the end of message to make statistics fault', + group = 'headers', + mime_only = true, +} + +-- From contains only 7bit characters (parsed headers are used) +local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- From that contains encoded characters while base 64 is not needed as all symbols are 7bit +reconf['FROM_EXCESS_BASE64'] = { + re = string.format('%s & !%s', from_encoded_b64, from_needs_mime), + score = 1.5, + description = 'From header is unnecessarily encoded in base64', + group = 'excessb64', + mime_only = true, +} + +-- From that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +reconf['FROM_EXCESS_QP'] = { + re = string.format('%s & !%s', from_encoded_qp, from_needs_mime), + score = 1.2, + description = 'From header is unnecessarily encoded in quoted-printable', + group = 'excessqp' +} + +-- To contains only 7bit characters (parsed headers are used) +local to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- To that contains encoded characters while base 64 is not needed as all symbols are 7bit +reconf['TO_EXCESS_BASE64'] = { + re = string.format('%s & !%s', to_encoded_b64, to_needs_mime), + score = 1.5, + description = 'To header is unnecessarily encoded in base64', + group = 'excessb64' +} + +-- To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Final rule +reconf['TO_EXCESS_QP'] = { + re = string.format('%s & !%s', to_encoded_qp, to_needs_mime), + score = 1.2, + description = 'To header is unnecessarily encoded in quoted-printable', + group = 'excessqp' +} + +-- Reply-To that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that Reply-To header is encoded with base64 (search in raw headers) +local replyto_encoded_b64 = 'Reply-To=/\\=\\?\\S+\\?B\\?/iX' +-- Reply-To contains only 7bit characters (parsed headers are used) +local replyto_needs_mime = 'Reply-To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['REPLYTO_EXCESS_BASE64'] = { + re = string.format('%s & !%s', replyto_encoded_b64, replyto_needs_mime), + score = 1.5, + description = 'Reply-To header is unnecessarily encoded in base64', + group = 'excessb64' +} + +-- Reply-To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that Reply-To header is encoded with quoted-printable (search in raw headers) +local replyto_encoded_qp = 'Reply-To=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['REPLYTO_EXCESS_QP'] = { + re = string.format('%s & !%s', replyto_encoded_qp, replyto_needs_mime), + score = 1.2, + description = 'Reply-To header is unnecessarily encoded in quoted-printable', + group = 'excessqp' +} + +-- Cc that contains encoded characters while base 64 is not needed as all symbols are 7bit +-- Regexp that checks that Cc header is encoded with base64 (search in raw headers) +local cc_encoded_b64 = 'Cc=/\\=\\?\\S+\\?B\\?/iX' +-- Co contains only 7bit characters (parsed headers are used) +local cc_needs_mime = 'Cc=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +-- Final rule +reconf['CC_EXCESS_BASE64'] = { + re = string.format('%s & !%s', cc_encoded_b64, cc_needs_mime), + score = 1.5, + description = 'Cc header is unnecessarily encoded in base64', + group = 'excessb64' +} + +-- Cc that contains encoded characters while quoted-printable is not needed as all symbols are 7bit +-- Regexp that checks that Cc header is encoded with quoted-printable (search in raw headers) +local cc_encoded_qp = 'Cc=/\\=\\?\\S+\\?Q\\?/iX' +-- Final rule +reconf['CC_EXCESS_QP'] = { + re = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime), + score = 1.2, + description = 'Cc header is unnecessarily encoded in quoted-printable', + group = 'excessqp' +} + +local subj_encoded_b64 = 'Subject=/\\=\\?\\S+\\?B\\?/iX' +local subj_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr' +reconf['SUBJ_EXCESS_BASE64'] = { + re = string.format('%s & !%s', subj_encoded_b64, subj_needs_mime), + score = 1.5, + description = 'Subject header is unnecessarily encoded in base64', + group = 'excessb64' +} + +local subj_encoded_qp = 'Subject=/\\=\\?\\S+\\?Q\\?/iX' +reconf['SUBJ_EXCESS_QP'] = { + re = string.format('%s & !%s', subj_encoded_qp, subj_needs_mime), + score = 1.2, + description = 'Subject header is unnecessarily encoded in quoted-printable', + group = 'excessqp' +} + +-- Detect forged outlook headers +-- OE X-Mailer header +local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H' +-- OE Message ID format +local oe_msgid_1 = 'Message-Id=/^<?[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com>?$/mH' +local oe_msgid_2 = 'Message-Id=/^<?(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+>?$/H' +-- EZLM remail of message +local lyris_ezml_remailer = 'List-Unsubscribe=/<mailto:(?:leave-\\S+|\\S+-unsubscribe)\\@\\S+>$/H' +-- Header of wacky sendmail +local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H' +-- Iplanet received header +local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H' +-- Hotmail message id +local hotmail_baydav_msgid = 'Message-Id=/^<?BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl?>$/H' +-- Sympatico message id +local sympatico_msgid = 'Message-Id=/^<?BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE>?$/H' +-- Mailman message id +-- https://bazaar.launchpad.net/~mailman-coders/mailman/2.1/view/head:/Mailman/Utils.py#L811 +local mailman_msgid = [[Message-ID=/^<mailman\.\d+\.\d+\.\d+\.[-+.:=\w]+@[-a-zA-Z\d.]+>$/H]] +-- Message id seems to be forged +local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)', + lyris_ezml_remailer, wacky_sendmail_version, + iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid, mailman_msgid) +-- Outlook express data seems to be forged +local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid) +-- Outlook specific headers +local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H' +local outlook_dollars_other = 'Message-Id=/^<?\\!\\~\\!>?/H' +local vista_msgid = 'Message-Id=/^<?[A-F\\d]{32}\\@\\S+>?$/H' +local ims_msgid = 'Message-Id=/^<?[A-F\\d]{36,40}\\@\\S+>?$/H' +-- Forged outlook headers +local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s)', + outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid) +-- Outlook versions that should be excluded from summary rule +local fmo_excl_o3416 = 'X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H' +local fmo_excl_oe3790 = 'X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H' +-- Summary rule for forged outlook +reconf['FORGED_MUA_OUTLOOK'] = { + re = string.format('(%s | %s) & !%s & !%s & !%s', + forged_oe, forged_outlook_dollars, fmo_excl_o3416, fmo_excl_oe3790, vista_msgid), + score = 3.0, + description = 'Forged Outlook MUA', + group = 'mua' +} + +-- HTML outlook signs +local mime_html = 'content_type_is_type(text) & content_type_is_subtype(/.?html/)' +local tag_exists_html = 'has_html_tag(html)' +local tag_exists_head = 'has_html_tag(head)' +local tag_exists_meta = 'has_html_tag(meta)' +local tag_exists_body = 'has_html_tag(body)' +reconf['FORGED_OUTLOOK_TAGS'] = { + re = string.format('!%s & %s & %s & !(%s & %s & %s & %s)', + yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head, + tag_exists_meta, tag_exists_body), + score = 2.1, + description = "Message pretends to be send from Outlook but has 'strange' tags", + group = 'headers' +} + +-- Forged OE/MSO boundary +reconf['SUSPICIOUS_BOUNDARY'] = { + re = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(00EBFFA4|0102FFA4|32C6FFA4|3302FFA4)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX', + score = 5.0, + description = 'Suspicious boundary in Content-Type header', + group = 'mua' +} +-- Forged OE/MSO boundary +reconf['SUSPICIOUS_BOUNDARY2'] = { + re = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(01C6527E)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX', + score = 4.0, + description = 'Suspicious boundary in Content-Type header', + group = 'mua' +} +-- Forged OE/MSO boundary +reconf['SUSPICIOUS_BOUNDARY3'] = { + re = 'Content-Type=/^\\s*multipart.+boundary="-----000-00\\d\\d-01C[\\dA-F]{5}-[\\dA-F]{8}"[\\r\\n]*$/siX', + score = 3.0, + description = 'Suspicious boundary in Content-Type header', + group = 'mua' +} +-- Forged OE/MSO boundary +local suspicious_boundary_01C4 = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_01C4[\\dA-F]{4}\\.[A-Z\\d]{8}"[\\r\\n]*$/siX' +local suspicious_boundary_01C4_date = 'Date=/^\\s*\\w\\w\\w,\\s+\\d+\\s+\\w\\w\\w 20(0[56789]|1\\d)/' +reconf['SUSPICIOUS_BOUNDARY4'] = { + re = string.format('(%s) & (%s)', suspicious_boundary_01C4, suspicious_boundary_01C4_date), + score = 4.0, + description = 'Suspicious boundary in Content-Type header', + group = 'mua' +} + +-- Detect forged The Bat! headers +-- The Bat! X-Mailer header +local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H' +-- The Bat! common Message-ID template +local thebat_msgid_common = 'Message-ID=/^<?\\d+\\.\\d+\\@\\S+>?$/mH' +-- Correct The Bat! Message-ID template +local thebat_msgid = 'Message-ID=/^<?\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+>?/mH' +-- Summary rule for forged The Bat! Message-ID header +reconf['FORGED_MUA_THEBAT_MSGID'] = { + re = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid), + score = 4.0, + description = 'Message pretends to be send from The Bat! but has forged Message-ID', + group = 'mua' +} +-- Summary rule for forged The Bat! Message-ID header with unknown template +reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = { + re = string.format('(%s) & !(%s) & !(%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid), + score = 3.0, + description = 'Message pretends to be send from The Bat! but has forged Message-ID', + group = 'mua' +} + +-- Detect forged KMail headers +-- KMail User-Agent header +local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H' +-- KMail common Message-ID template +local kmail_msgid_common = 'Message-Id=/^<?\\s*\\d+\\.\\d+\\.\\S+\\@\\S+>?$/mH' +-- Summary rule for forged KMail Message-ID header with unknown template +reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = { + re = string.format('(%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, unusable_msgid), + score = 2.5, + description = 'Message pretends to be send from KMail but has forged Message-ID', + group = 'mua' +} + +-- Detect forged Opera Mail headers +-- Opera Mail User-Agent header +local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H' +-- Opera Mail Message-ID template +local opera1x_msgid = 'Message-ID=/^<?op\\.[a-z\\d]{14}\\@\\S+>?$/H' +-- Rule for forged Opera Mail Message-ID header +reconf['FORGED_MUA_OPERA_MSGID'] = { + re = string.format('(%s) & !(%s) & !(%s)', opera1x_mua, opera1x_msgid, unusable_msgid), + score = 4.0, + description = 'Message pretends to be send from Opera Mail but has forged Message-ID', + group = 'mua' +} + +-- Detect forged Mozilla Mail/Thunderbird/Seamonkey/Postbox headers +-- Mozilla based X-Mailer +local user_agent_mozilla5 = 'User-Agent=/^\\s*Mozilla\\/5\\.0/H' +local user_agent_thunderbird = 'User-Agent=/^\\s*(Thunderbird|Mozilla Thunderbird|Mozilla\\/.*Gecko\\/.*(Thunderbird|Betterbird|Icedove)\\/)/H' +local user_agent_seamonkey = 'User-Agent=/^\\s*Mozilla\\/5\\.0\\s.+\\sSeaMonkey\\/\\d+\\.\\d+/H' +local user_agent_postbox = [[User-Agent=/^\s*Mozilla\/5\.0\s\([^)]+\)\sGecko\/\d+\sPostboxApp\/\d+(?:\.\d+){2,3}$/H]] +local user_agent_mozilla = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla5, user_agent_thunderbird, + user_agent_seamonkey, user_agent_postbox) +-- Mozilla based common Message-ID template +local mozilla_msgid_common = 'Message-ID=/^\\s*<[\\dA-F]{8}\\.\\d{1,7}\\@([^>\\.]+\\.)+[^>\\.]+>$/H' +local mozilla_msgid_common_sec = 'Message-ID=/^\\s*<[\\da-f]{8}-([\\da-f]{4}-){3}[\\da-f]{12}\\@([^>\\.]+\\.)+[^>\\.]+>$/H' +local mozilla_msgid = 'Message-ID=/^\\s*<(3[3-9A-F]|[4-9A-F][\\dA-F])[\\dA-F]{6}\\.(\\d0){1,4}\\d\\@([^>\\.]+\\.)+[^>\\.]+>$/H' +-- Summary rule for forged Mozilla Mail Message-ID header +reconf['FORGED_MUA_MOZILLA_MAIL_MSGID'] = { + re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, + unusable_msgid), + score = 4.0, + description = 'Message pretends to be send from Mozilla Mail but has forged Message-ID', + group = 'mua' +} +reconf['FORGED_MUA_MOZILLA_MAIL_MSGID_UNKNOWN'] = { + re = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid, + unusable_msgid), + score = 2.5, + description = 'Message pretends to be send from Mozilla Mail but has forged Message-ID', + group = 'mua' +} + +-- Summary rule for forged Thunderbird Message-ID header +reconf['FORGED_MUA_THUNDERBIRD_MSGID'] = { + re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid, + unusable_msgid), + score = 4.0, + description = 'Forged mail pretending to be from Mozilla Thunderbird but has forged Message-ID', + group = 'mua' +} +reconf['FORGED_MUA_THUNDERBIRD_MSGID_UNKNOWN'] = { + re = string.format('(%s) & !((%s) | (%s)) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, + mozilla_msgid_common_sec, mozilla_msgid, unusable_msgid), + score = 2.5, + description = 'Forged mail pretending to be from Mozilla Thunderbird but has forged Message-ID', + group = 'mua' +} +-- Summary rule for forged Seamonkey Message-ID header +reconf['FORGED_MUA_SEAMONKEY_MSGID'] = { + re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid, + unusable_msgid), + score = 4.0, + description = 'Forged mail pretending to be from Mozilla Seamonkey but has forged Message-ID', + group = 'mua' +} +reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = { + re = string.format('(%s) & !((%s) | (%s)) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, + mozilla_msgid_common_sec, mozilla_msgid, unusable_msgid), + score = 2.5, + description = 'Forged mail pretending to be from Mozilla Seamonkey but has forged Message-ID', + group = 'mua' +} +-- Summary rule for forged Postbox Message-ID header +reconf['FORGED_MUA_POSTBOX_MSGID'] = { + re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_postbox, mozilla_msgid_common, mozilla_msgid, + unusable_msgid), + score = 4.0, + description = 'Forged mail pretending to be from Postbox but has forged Message-ID', + group = 'mua' +} +reconf['FORGED_MUA_POSTBOX_MSGID_UNKNOWN'] = { + re = string.format('(%s) & !((%s) | (%s)) & !(%s) & !(%s)', user_agent_postbox, mozilla_msgid_common, + mozilla_msgid_common_sec, mozilla_msgid, unusable_msgid), + score = 2.5, + description = 'Forged mail pretending to be from Postbox but has forged Message-ID', + group = 'mua' +} + +-- Message id validity +local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H' +local msgid_comment = 'Message-Id=/\\(.*\\)/H' +reconf['INVALID_MSGID'] = { + re = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment), + score = 1.7, + description = 'Message-ID header is incorrect', + group = 'headers' +} + +-- Only Content-Type header without other MIME headers +local cd = 'header_exists(Content-Disposition)' +local cte = 'header_exists(Content-Transfer-Encoding)' +local ct = 'header_exists(Content-Type)' +local mime_version = 'raw_header_exists(MIME-Version)' +local ct_text_plain = 'content_type_is_type(text) & content_type_is_subtype(plain)' +reconf['MIME_HEADER_CTYPE_ONLY'] = { + re = string.format('!(%s) & !(%s) & (%s) & !(%s) & !(%s)', cd, cte, ct, mime_version, ct_text_plain), + score = 2.0, + description = 'Only Content-Type header without other MIME headers', + group = 'headers' +} + +-- Forged Exchange messages +local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/H' +local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H' +local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H' +reconf['RATWARE_MS_HASH'] = { + re = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange), + score = 2.0, + description = 'Forged Exchange messages', + group = 'headers' +} + +-- Reply-type in content-type +reconf['STOX_REPLY_TYPE'] = { + re = 'Content-Type=/text\\/plain; .* reply-type=original/H', + score = 1.0, + description = 'Reply-type in Content-Type header', + group = 'headers' +} + +-- Forged yahoo msgid +local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH' +local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH' +reconf['FORGED_MSGID_YAHOO'] = { + re = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com), + score = 2.0, + description = 'Forged Yahoo Message-ID header', + group = 'headers' +} + +-- Forged The Bat! MUA headers +local thebat_mua_v1 = 'X-Mailer=/^The Bat! \\(v1\\./H' +local ctype_has_boundary = 'Content-Type=/boundary/iH' +local bat_boundary = 'Content-Type=/boundary=\\"?-{10}/H' +local mailman_21 = 'X-Mailman-Version=/\\d/H' +reconf['FORGED_MUA_THEBAT_BOUN'] = { + re = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21), + score = 2.0, + description = 'Forged The Bat! MUA headers', + group = 'headers' +} + +-- Detect Mail.Ru web-mail +local xm_mail_ru_mailer_1_0 = 'X-Mailer=/^Mail\\.Ru Mailer 1\\.0$/H' +local rcvd_e_mail_ru = 'Received=/^(?:from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] )?by e\\.mail\\.ru with HTTP;/mH' +reconf['MAIL_RU_MAILER'] = { + re = string.format('(%s) & (%s)', xm_mail_ru_mailer_1_0, rcvd_e_mail_ru), + score = 0.0, + description = 'Sent with Mail.Ru webmail', + group = 'headers' +} + +-- Detect yandex.ru web-mail +local xm_yandex_ru_mailer_5_0 = 'X-Mailer=/^Yamail \\[ http:\\/\\/yandex\\.ru \\] 5\\.0$/H' +local rcvd_web_yandex_ru = 'Received=/^by web\\d{1,2}[a-z]\\.yandex\\.ru with HTTP;/mH' +reconf['YANDEX_RU_MAILER'] = { + re = string.format('(%s) & (%s)', xm_yandex_ru_mailer_5_0, rcvd_web_yandex_ru), + score = 0.0, + description = 'Sent with Yandex webmail', + group = 'headers' +} + +-- Detect 1C v8.2 and v8.3 mailers +reconf['MAILER_1C_8'] = { + re = 'X-Mailer=/^1C:Enterprise 8\\.[23]$/H', + score = 0.0, + description = 'Sent with 1C:Enterprise 8', + group = 'headers' +} + +-- Detect rogue 'strongmail' MTA with IPv4 and '(-)' in Received line +reconf['STRONGMAIL'] = { + re = [[Received=/^from\s+strongmail\s+\(\[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]\) by \S+ \(-\); /mH]], + score = 6.0, + description = 'Sent via rogue "strongmail" MTA', + group = 'headers' +} + +-- Two received headers with ip addresses +local double_ip_spam_1 = 'Received=/from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with/H' +local double_ip_spam_2 = 'Received=/from\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s+by\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3};/H' +reconf['RCVD_DOUBLE_IP_SPAM'] = { + re = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2), + score = 2.0, + description = 'Has two Received headers containing bare IP addresses', + group = 'headers' +} + +-- Quoted reply-to from yahoo (seems to be forged) +local repto_quote = 'Reply-To=/\\".*\\"\\s*\\</H' +reconf['REPTO_QUOTE_YAHOO'] = { + re = string.format('(%s) & ((%s) | (%s))', repto_quote, from_yahoo_com, at_yahoo_msgid), + score = 2.0, + description = 'Quoted Reply-To header from Yahoo (seems to be forged)', + group = 'headers' +} + +reconf['FAKE_REPLY'] = { + re = [[Subject=/^re:/i{header} & !(header_exists(In-Reply-To) | header_exists(References))]], + description = 'Fake reply', + score = 1.0, + group = 'headers' +} + +-- Mime-OLE is needed but absent (e.g. fake Outlook or fake Exchange) +local has_msmail_pri = 'header_exists(X-MSMail-Priority)' +local has_mimeole = 'header_exists(X-MimeOLE)' +local has_squirrelmail_in_mailer = 'X-Mailer=/SquirrelMail\\b/H' +local has_office_version_in_mailer = [[X-Mailer=/^Microsoft (?:Office )?Outlook [12]\d\.0/]] +local has_x_android_message_id = 'header_exists(X-Android-Message-Id)' +reconf['MISSING_MIMEOLE'] = { + re = string.format('(%s) & !(%s) & !(%s) & !(%s) & !(%s)', + has_msmail_pri, + has_mimeole, + has_squirrelmail_in_mailer, + has_office_version_in_mailer, + has_x_android_message_id), + score = 2.0, + description = 'Mime-OLE is needed but absent (e.g. fake Outlook or fake Exchange)', + group = 'headers' +} + +-- Empty delimiters between header names and header values +local function gen_check_header_delimiter_empty(header_name) + return function(task) + for _, rh in ipairs(task:get_header_full(header_name) or {}) do + if rh['empty_separator'] then + return true + end + end + return false + end +end +reconf['HEADER_FROM_EMPTY_DELIMITER'] = { + re = string.format('(%s)', 'lua:check_from_delim_empty'), + score = 1.0, + description = 'From header has no delimiter between header name and header value', + group = 'headers', + functions = { + check_from_delim_empty = gen_check_header_delimiter_empty('From') + } +} +reconf['HEADER_TO_EMPTY_DELIMITER'] = { + re = string.format('(%s)', 'lua:check_to_delim_empty'), + score = 1.0, + description = 'To header has no delimiter between header name and header value', + group = 'headers', + functions = { + check_to_delim_empty = gen_check_header_delimiter_empty('To') + } +} +reconf['HEADER_CC_EMPTY_DELIMITER'] = { + re = string.format('(%s)', 'lua:check_cc_delim_empty'), + score = 1.0, + description = 'Cc header has no delimiter between header name and header value', + group = 'headers', + functions = { + check_cc_delim_empty = gen_check_header_delimiter_empty('Cc') + } +} +reconf['HEADER_REPLYTO_EMPTY_DELIMITER'] = { + re = string.format('(%s)', 'lua:check_repto_delim_empty'), + score = 1.0, + description = 'Reply-To header has no delimiter between header name and header value', + group = 'headers', + functions = { + check_repto_delim_empty = gen_check_header_delimiter_empty('Reply-To') + } +} +reconf['HEADER_DATE_EMPTY_DELIMITER'] = { + re = string.format('(%s)', 'lua:check_date_delim_empty'), + score = 1.0, + description = 'Date header has no delimiter between header name and header value', + group = 'headers', + functions = { + check_date_delim_empty = gen_check_header_delimiter_empty('Date') + } +} + +-- Definitions of received headers regexp +reconf['RCVD_ILLEGAL_CHARS'] = { + re = 'Received=/[\\x80-\\xff]/X', + score = 4.0, + description = 'Received header has raw illegal character', + group = 'headers' +} + +local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX' +local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX' +local MAIL_RU_From = 'From=/\\@mail\\.ru>?$/iX' +local MAIL_RU_Received = 'Received=/from mail\\.ru \\(/mH' + +reconf['FAKE_RECEIVED_mail_ru'] = { + re = string.format('(%s) & !(((%s) | (%s)) & (%s))', + MAIL_RU_Received, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, MAIL_RU_From), + score = 4.0, + description = 'Fake HELO mail.ru in Received header from non-mail.ru sender address', + group = 'headers' +} + +local GMAIL_COM_Return_Path = 'Return-path=/^\\s*<.+\\@gmail\\.com>$/iX' +local GMAIL_COM_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@gmail\\.com>$/iX' +local GMAIL_COM_From = 'From=/\\@gmail\\.com>?$/iX' + +local UKR_NET_Return_Path = 'Return-path=/^\\s*<.+\\@ukr\\.net>$/iX' +local UKR_NET_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@ukr\\.net>$/iX' +local UKR_NET_From = 'From=/\\@ukr\\.net>?$/iX' + +local RECEIVED_smtp_yandex_ru_1 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\((port=\\d+ )?helo=smtp\\.yandex\\.ru\\)/iX' +local RECEIVED_smtp_yandex_ru_2 = 'Received=/from \\[UNAVAILABLE\\] \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX' +local RECEIVED_smtp_yandex_ru_3 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX' +local RECEIVED_smtp_yandex_ru_4 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\(account \\S+ HELO smtp\\.yandex\\.ru\\)/iX' +local RECEIVED_smtp_yandex_ru_5 = 'Received=/from smtp\\.yandex\\.ru \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX' +local RECEIVED_smtp_yandex_ru_6 = 'Received=/from smtp\\.yandex\\.ru \\(\\S+ \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX' +local RECEIVED_smtp_yandex_ru_7 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\S+\\@\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX' +local RECEIVED_smtp_yandex_ru_8 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX' +local RECEIVED_smtp_yandex_ru_9 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] helo=smtp\\.yandex\\.ru\\)/iX' + +reconf['FAKE_RECEIVED_smtp_yandex_ru'] = { + re = string.format('(((%s) & ((%s) | (%s))) | ((%s) & ((%s) | (%s))) ' .. + ' | ((%s) & ((%s) | (%s)))) & (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s)', + MAIL_RU_From, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, GMAIL_COM_From, + GMAIL_COM_Return_Path, GMAIL_COM_X_Envelope_From, UKR_NET_From, UKR_NET_Return_Path, + UKR_NET_X_Envelope_From, RECEIVED_smtp_yandex_ru_1, RECEIVED_smtp_yandex_ru_2, + RECEIVED_smtp_yandex_ru_3, RECEIVED_smtp_yandex_ru_4, RECEIVED_smtp_yandex_ru_5, + RECEIVED_smtp_yandex_ru_6, RECEIVED_smtp_yandex_ru_7, RECEIVED_smtp_yandex_ru_8, + RECEIVED_smtp_yandex_ru_9), + score = 4.0, + description = 'Fake smtp.yandex.ru Received header', + group = 'headers' +} + +reconf['FORGED_GENERIC_RECEIVED'] = { + re = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by (([\\w\\d-]+\\.)+[a-zA-Z]{2,6}|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}); \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X', + score = 3.6, + description = 'Forged generic Received header', + group = 'headers' +} + +reconf['FORGED_GENERIC_RECEIVED2'] = { + re = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by ([\\w\\d-]+\\.)+[a-z]{2,6} id [\\w\\d]{12}; \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X', + score = 3.6, + description = 'Forged generic Received header', + group = 'headers' +} + +reconf['FORGED_GENERIC_RECEIVED3'] = { + re = 'Received=/^\\s*(.+\\n)*by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with SMTP id [a-zA-Z]{14}\\.\\d{13};[\\r\\n\\s]*\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0 \\(GMT\\)/X', + score = 3.6, + description = 'Forged generic Received header', + group = 'headers' +} + +reconf['FORGED_GENERIC_RECEIVED4'] = { + re = 'Received=/^\\s*(.+\\n)*from localhost by \\S+;\\s+\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0[\\s\\r\\n]*$/X', + score = 3.6, + description = 'Forged generic Received header', + group = 'headers' +} + +reconf['INVALID_POSTFIX_RECEIVED'] = { + re = 'Received=/ \\(Postfix\\) with ESMTP id [A-Z\\d]+([\\s\\r\\n]+for <\\S+?>)?;[\\s\\r\\n]*[A-Z][a-z]{2}, \\d{1,2} [A-Z][a-z]{2} \\d\\d\\d\\d \\d\\d:\\d\\d:\\d\\d [\\+\\-]\\d\\d\\d\\d$/X', + score = 3.0, + description = 'Invalid Postfix Received header', + group = 'headers' +} + +reconf['X_PHP_FORGED_0X'] = { + re = "X-PHP-Originating-Script=/^0\\d/X", + score = 4.0, + description = "X-PHP-Originating-Script header appears forged", + group = 'headers' +} + +reconf['GOOGLE_FORWARDING_MID_MISSING'] = { + re = "Message-ID=/SMTPIN_ADDED_MISSING\\@mx\\.google\\.com>$/X", + score = 2.5, + description = "Message was missing Message-ID pre-forwarding", + group = 'headers' +} + +reconf['GOOGLE_FORWARDING_MID_BROKEN'] = { + re = "Message-ID=/SMTPIN_ADDED_BROKEN\\@mx\\.google\\.com>$/X", + score = 1.7, + description = "Message had invalid Message-ID pre-forwarding", + group = 'headers' +} + +reconf['CTE_CASE'] = { + re = 'Content-Transfer-Encoding=/^[78]B/X', + description = '[78]Bit .vs. [78]bit', + score = 0.5, + group = 'headers' +} + +reconf['HAS_INTERSPIRE_SIG'] = { + re = string.format('((%s) & (%s) & (%s) & (%s)) | (%s)', + 'header_exists(X-Mailer-LID)', + 'header_exists(X-Mailer-RecptId)', + 'header_exists(X-Mailer-SID)', + 'header_exists(X-Mailer-Sent-By)', + 'List-Unsubscribe=/\\/unsubscribe\\.php\\?M=[^&]+&C=[^&]+&L=[^&]+&N=[^>]+>$/Xi'), + description = "Has Interspire fingerprint", + score = 1.0, + group = 'headers' +} + +reconf['CT_EXTRA_SEMI'] = { + re = 'Content-Type=/;$/X', + description = 'Content-Type header ends with a semi-colon', + score = 1.0, + group = 'headers' +} + +reconf['SUBJECT_ENDS_EXCLAIM'] = { + re = 'Subject=/!\\s*$/H', + description = 'Subject ends with an exclamation mark', + score = 0.0, + group = 'headers' +} + +reconf['SUBJECT_HAS_EXCLAIM'] = { + re = string.format('%s & !%s', 'Subject=/!/H', 'Subject=/!\\s*$/H'), + description = 'Subject contains an exclamation mark', + score = 0.0, + group = 'headers' +} + +reconf['SUBJECT_ENDS_QUESTION'] = { + re = 'Subject=/\\?\\s*$/Hu', + description = 'Subject ends with a question mark', + score = 1.0, + group = 'headers' +} + +reconf['SUBJECT_HAS_QUESTION'] = { + re = string.format('%s & !%s', 'Subject=/\\?/H', 'Subject=/\\?\\s*$/Hu'), + description = 'Subject contains a question mark', + score = 0.0, + group = 'headers' +} + +reconf['SUBJECT_HAS_CURRENCY'] = { + re = 'Subject=/\\p{Sc}/Hu', + description = 'Subject contains currency', + score = 1.0, + group = 'headers' +} + +reconf['SUBJECT_ENDS_SPACES'] = { + re = 'Subject=/\\s+$/H', + description = 'Subject ends with space characters', + score = 0.5, + group = 'headers' +} + +reconf['HAS_ORG_HEADER'] = { + re = string.format('%s || %s', 'header_exists(Organization)', 'header_exists(Organisation)'), + description = 'Has Organization header', + score = 0.0, + group = 'headers' +} + +reconf['X_PHPOS_FAKE'] = { + re = 'X-PHP-Originating-Script=/^\\d{7}:/Hi', + description = 'Fake X-PHP-Originating-Script header', + score = 3.0, + group = 'headers' +} + +reconf['HAS_XOIP'] = { + re = "header_exists('X-Originating-IP')", + description = "Has X-Originating-IP header", + score = 0.0, + group = 'headers' +} + +reconf['HAS_LIST_UNSUB'] = { + re = string.format('%s', 'header_exists(List-Unsubscribe)'), + description = 'Has List-Unsubscribe header', + score = -0.01, + group = 'headers' +} + +reconf['HAS_GUC_PROXY_URI'] = { + re = '/\\.googleusercontent\\.com\\/proxy/{url}i', + description = 'Has googleusercontent.com proxy URL', + score = 1.0, + group = 'url' +} + +reconf['HAS_GOOGLE_REDIR'] = { + re = '/\\.google\\.([a-z]{2,3}(|\\.[a-z]{2,3})|info|jobs)\\/(amp\\/s\\/|url\\?)/{url}i', + description = 'Has google.com/url or alike Google redirection URL', + score = 1.0, + group = 'url' +} + +reconf['HAS_GOOGLE_FIREBASE_URL'] = { + re = '/\\.firebasestorage\\.googleapis\\.com\\//{url}i', + description = 'Contains firebasestorage.googleapis.com URL', + score = 2.0, + group = 'url' +} + +reconf['XM_UA_NO_VERSION'] = { + re = string.format('(!%s && !%s) && (%s || %s)', + 'X-Mailer=/https?:/H', + 'User-Agent=/https?:/H', + 'X-Mailer=/^[^0-9]+$/H', + 'User-Agent=/^[^0-9]+$/H'), + description = 'X-Mailer/User-Agent header has no version number', + score = 0.01, + group = 'experimental' +} + +-- Detects messages missing both X-Mailer and User-Agent header +local has_ua = 'header_exists(User-Agent)' +local has_xmailer = 'header_exists(X-Mailer)' +reconf['MISSING_XM_UA'] = { + re = string.format('!%s && !%s', has_xmailer, has_ua), + score = 0.0, + description = 'Message has neither X-Mailer nor User-Agent header', + group = 'headers', +} + +-- X-Mailer for old MUA versions which are forged by spammers +local old_x_mailers = { + -- Outlook Express 6.0 was last included in Windows XP (EOL 2014). Windows + -- XP is still used (in 2020) by relatively small number of internet users, + -- but this header is widely abused by spammers. + 'Microsoft Outlook Express', + -- Qualcomm Eudora for Windows 7.1.0.9 was released in 2006 + [[QUALCOMM Windows Eudora (Pro )?Version [1-6]\.]], + -- The Bat 3.0 was released in 2004 + [[The Bat! \(v[12]\.]], + -- Can be found in public maillist archives, messages circa 2000 + [[Microsoft Outlook IMO, Build 9\.0\.]], + -- Outlook 2002 (Office XP) + [[Microsoft Outlook, Build 10\.]], + -- Some old Apple iOS versions are used on old devices, match only very old + -- versions (iOS 4.3.5 buid 8L1 was supported until 2013) and less old + -- versions frequently seen in spam + [[i(Phone|Pad) Mail \((?:[1-8][A-L]|12H|13E)]], +} + +reconf['OLD_X_MAILER'] = { + description = 'X-Mailer header has a very old MUA version', + re = string.format('X-Mailer=/^(?:%s)/{header}', table.concat(old_x_mailers, '|')), + score = 2.0, + group = 'headers', +} + +-- Detect Apple Mail +local apple_x_mailer = [[Apple Mail \((?:(?:Version )?[1-9]\d{0,2}\.\d{1,3}|[1-9]\d{0,2}\.\d{1,4}\.\d{1,4}\.\d{1,4})\)]] +reconf['APPLE_MAILER'] = { + description = 'Sent with Apple Mail', + re = string.format('X-Mailer=/^%s/{header}', apple_x_mailer), + score = 0.0, + group = 'headers', +} + +-- Detect Apple iPhone/iPad Mail +-- Apple iPhone/iPad Mail X-Mailer contains iOS build number, e. g. 9B206, 16H5, 18G5023c +-- https://en.wikipedia.org/wiki/IOS_version_history +local apple_ios_x_mailer = [[i(?:Phone|Pad) Mail \(\d{1,2}[A-Z]\d{1,4}[a-z]?\)]] +reconf['APPLE_IOS_MAILER'] = { + description = 'Sent with Apple iPhone/iPad Mail', + re = string.format('X-Mailer=/^%s/{header}', apple_ios_x_mailer), + score = 0.0, + group = 'headers', +} + +-- X-Mailer header values which should not occur (in the modern mail) at all +local bad_x_mailers = { + -- header name repeated in the header value + [[X-Mailer: ]], + -- Mozilla Thunderbird uses User-Agent header, not X-Mailer + -- Early Thunderbird had U-A like: + -- Mozilla Thunderbird 1.0.2 (Windows/20050317) + -- Thunderbird 2.0.0.23 (X11/20090812) + [[(?:Mozilla )?Thunderbird \d]], + -- Was used by Yahoo Groups in 2000s, no one expected to use this in 2020s + [[eGroups Message Poster]], + -- Regexp for genuine iOS X-Mailer is below, anything which doesn't match it, + -- but starts with 'iPhone Mail' or 'iPad Mail' is likely fake + [[i(?:Phone|Pad) Mail]], +} + +reconf['FORGED_X_MAILER'] = { + description = 'Forged X-Mailer header', + re = string.format('X-Mailer=/^(?:%s)/{header} && !X-Mailer=/^%s/{header}', + table.concat(bad_x_mailers, '|'), apple_ios_x_mailer), + score = 4.5, + group = 'headers', +} + +-- X-Mailer headers like: 'Internet Mail Service (5.5.2650.21)' are being +-- forged by spammers, but MS Exchange 5.5 is still being used (in 2020) on +-- some mail servers. Example of genuine headers (DC-EXMPL is a hostname which +-- can be a FQDN): +-- Received: by DC-EXMPL with Internet Mail Service (5.5.2656.59) +-- id <HKH4BJQX>; Tue, 8 Dec 2020 07:10:54 -0600 +-- Message-ID: <E7209F9DB64FCC4BB1051420F0E955DD05C9D59F@DC-EXMPL> +-- X-Mailer: Internet Mail Service (5.5.2656.59) +reconf['FORGED_IMS'] = { + description = 'Forged X-Mailer: Internet Mail Service', + re = [[X-Mailer=/^Internet Mail Service \(5\./{header} & !Received=/^by \S+ with Internet Mail Service \(5\./{header}]], + score = 3.0, + group = 'headers', +} diff --git a/rules/regexp/misc.lua b/rules/regexp/misc.lua new file mode 100644 index 0000000..d723f29 --- /dev/null +++ b/rules/regexp/misc.lua @@ -0,0 +1,117 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + + +local reconf = config['regexp'] + +reconf['HTML_META_REFRESH_URL'] = { + -- Requires options { check_attachements = true; } + re = '/<meta\\s+http-equiv="refresh"\\s+content="\\d+\\s*;\\s*url=/{sa_raw_body}i', + description = "Has HTML Meta refresh URL", + score = 5.0, + one_shot = true, + group = 'HTML' +} + +reconf['HAS_DATA_URI'] = { + -- Requires options { check_attachements = true; } + re = '/data:[^\\/]+\\/[^; ]+;base64,/{sa_raw_body}i', + description = "Has Data URI encoding", + group = 'HTML', + one_shot = true, +} + +reconf['DATA_URI_OBFU'] = { + -- Requires options { check_attachements = true; } + re = '/data:text\\/(?:plain|html);base64,/{sa_raw_body}i', + description = "Uses Data URI encoding to obfuscate plain or HTML in base64", + group = 'HTML', + one_shot = true, + score = 2.0 +} + +reconf['INTRODUCTION'] = { + re = '/\\b(?:my name is\\b|(?:i am|this is)\\s+(?:mr|mrs|ms|miss|master|sir|prof(?:essor)?|d(?:octo)?r|rev(?:erend)?)(?:\\.|\\b))/{sa_body}i', + description = "Sender introduces themselves", + score = 2.0, + one_shot = true, + group = 'scams' +} + +-- Message contains a link to a .onion URI (Tor hidden service) +local onion_uri_v2 = '/[a-z0-9]{16}\\.onion?/{url}i' +local onion_uri_v3 = '/[a-z0-9]{56}\\.onion?/{url}i' +reconf['HAS_ONION_URI'] = { + re = string.format('(%s | %s)', onion_uri_v2, onion_uri_v3), + description = 'Contains .onion hidden service URI', + score = 0.0, + group = 'url' +} + +local my_victim = [[/(?:victim|prey)/{words}]] +local your_webcam = [[/webcam/{words}]] +local your_onan = [[/(?:mast[ur]{2}bati(?:on|ng)|onanism|solitary)/{words}]] +local password_in_words = [[/^pass(?:(?:word)|(?:phrase))$/i{words}]] +local btc_wallet_address = [[has_symbol(BITCOIN_ADDR)]] +local wallet_word = [[/^wallet$/{words}]] +local broken_unicode = [[has_flag(bad_unicode)]] +local list_unsub = [[header_exists(List-Unsubscribe)]] +local x_php_origin = [[header_exists(X-PHP-Originating-Script)]] + +reconf['LEAKED_PASSWORD_SCAM_RE'] = { + re = string.format('%s & (%s | %s | %s | %s | %s | %s | %s | %s | %s)', + btc_wallet_address, password_in_words, wallet_word, + my_victim, your_webcam, your_onan, + broken_unicode, 'lua:check_data_images', + list_unsub, x_php_origin), + description = 'Contains BTC wallet address and malicious regexps', + functions = { + check_data_images = function(task) + local tp = task:get_text_parts() or {} + + for _, p in ipairs(tp) do + if p:is_html() then + local hc = p:get_html() + + if hc and hc:has_property('data_urls') then + return true + end + end + end + + return false + end + }, + score = 0.0, + group = 'scams' +} + +rspamd_config:register_dependency('LEAKED_PASSWORD_SCAM', 'BITCOIN_ADDR') + +-- Heurististic for detecting InterPlanetary File System (IPFS) gateway URLs: +-- These contain "ipfs" somewhere (either in the FQDN or the URL path) and a +-- content identifier (CID), comprising of either "qm", followed by 44 alphanumerical +-- characters (CIDv0), or a CIDv1 of an alphanumerical string of unspecified length, +-- depending on the hash algorithm used, but starting with a multibase prefix. +local ipfs_cid = '/(qm[a-z0-9]{44}|[079fvtbchkzmup][a-z0-9]{44,128})/{url}i' +local ipfs_string = '/ipfs(\\.|-|_|\\/|\\?)/{url}i' +reconf['HAS_IPFS_GATEWAY_URL'] = { + description = 'Message contains InterPlanetary File System (IPFS) gateway URL, likely malicious', + re = string.format('(%s & %s)', ipfs_cid, ipfs_string), + score = 6.0, + one_shot = true, + group = 'url', +} diff --git a/rules/regexp/upstream_spam_filters.lua b/rules/regexp/upstream_spam_filters.lua new file mode 100644 index 0000000..b92f473 --- /dev/null +++ b/rules/regexp/upstream_spam_filters.lua @@ -0,0 +1,60 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- Rules for upstream services that have already run spam checks + +local reconf = config['regexp'] + +reconf['PRECEDENCE_BULK'] = { + re = 'Precedence=/bulk/Hi', + score = 0.0, + description = "Message marked as bulk", + group = 'upstream_spam_filters' +} + +reconf['MICROSOFT_SPAM'] = { + -- https://technet.microsoft.com/en-us/library/dn205071(v=exchg.150).aspx + re = 'X-Forefront-Antispam-Report=/SFV:SPM/H', + score = 4.0, + description = "Microsoft says the message is spam", + group = 'upstream_spam_filters' +} + +reconf['KLMS_SPAM'] = { + re = 'X-KLMS-AntiSpam-Status=/^spam/H', + score = 5.0, + description = "Kaspersky Security for Mail Server says this message is spam", + group = 'upstream_spam_filters' +} + +reconf['SPAM_FLAG'] = { + re = string.format('%s || %s || %s', + 'X-Spam-Flag=/^(?:yes|true)/Hi', + 'X-Spam=/^(?:yes|true)/Hi', + 'X-Spam-Status=/^(?:yes|true)/Hi'), + score = 5.0, + description = "Message was already marked as spam", + group = 'upstream_spam_filters' +} + +reconf['UNITEDINTERNET_SPAM'] = { + re = string.format('%s || %s', + 'X-UI-Filterresults=/^junk:/H', + 'X-UI-Out-Filterresults=/^junk:/H'), + score = 5.0, + description = "United Internet says this message is spam", + group = 'upstream_spam_filters' +} diff --git a/rules/rspamd.lua b/rules/rspamd.lua new file mode 100644 index 0000000..6b2c1a5 --- /dev/null +++ b/rules/rspamd.lua @@ -0,0 +1,71 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- This is main lua config file for rspamd + +require "global_functions"() + +config['regexp'] = {} +rspamd_maps = {} -- Global maps + +local local_conf = rspamd_paths['LOCAL_CONFDIR'] +local local_rules = rspamd_paths['RULESDIR'] +local rspamd_util = require "rspamd_util" + +dofile(local_rules .. '/regexp/headers.lua') +dofile(local_rules .. '/regexp/misc.lua') +dofile(local_rules .. '/regexp/upstream_spam_filters.lua') +dofile(local_rules .. '/regexp/compromised_hosts.lua') +dofile(local_rules .. '/html.lua') +dofile(local_rules .. '/headers_checks.lua') +dofile(local_rules .. '/subject_checks.lua') +dofile(local_rules .. '/misc.lua') +dofile(local_rules .. '/forwarding.lua') +dofile(local_rules .. '/mid.lua') +dofile(local_rules .. '/parts.lua') +dofile(local_rules .. '/bitcoin.lua') +dofile(local_rules .. '/bounce.lua') +dofile(local_rules .. '/content.lua') +dofile(local_rules .. '/controller/init.lua') + +if rspamd_util.file_exists(local_conf .. '/rspamd.local.lua') then + dofile(local_conf .. '/rspamd.local.lua') +else + -- Legacy lua/rspamd.local.lua + if rspamd_util.file_exists(local_conf .. '/lua/rspamd.local.lua') then + dofile(local_conf .. '/lua/rspamd.local.lua') + end +end + +if rspamd_util.file_exists(local_conf .. '/local.d/rspamd.lua') then + dofile(local_conf .. '/local.d/rspamd.lua') +end + +local rmaps = rspamd_config:get_all_opt("lua_maps") +if rmaps and type(rmaps) == 'table' then + local rspamd_logger = require "rspamd_logger" + for k, v in pairs(rmaps) do + local status, map_or_err = pcall(function() + return rspamd_config:add_map(v) + end) + + if not status then + rspamd_logger.errx(rspamd_config, "cannot add map %s: %s", k, map_or_err) + else + rspamd_maps[k] = map_or_err + end + end +end diff --git a/rules/subject_checks.lua b/rules/subject_checks.lua new file mode 100644 index 0000000..f781e1d --- /dev/null +++ b/rules/subject_checks.lua @@ -0,0 +1,70 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local rspamd_regexp = require "rspamd_regexp" +local util = require "rspamd_util" + +-- Uncategorized rules +local subject_re = rspamd_regexp.create('/^(?:(?:Re|Fwd|Fw|Aw|Antwort|Sv):\\s*)+(.+)$/i') + +local function test_subject(task, check_function, rate) + local function normalize_linear(a, x) + local f = a * x + return true, ((f < 1) and f or 1), tostring(x) + end + + local sbj = task:get_header('Subject') + + if sbj then + local stripped_subject = subject_re:search(sbj, false, true) + if stripped_subject and stripped_subject[1] and stripped_subject[1][2] then + sbj = stripped_subject[1][2] + end + + local l = util.strlen_utf8(sbj) + if check_function(sbj, l) then + return normalize_linear(rate, l) + end + end + + return false +end + +rspamd_config.SUBJ_ALL_CAPS = { + callback = function(task) + local caps_test = function(sbj) + return util.is_uppercase(sbj) + end + return test_subject(task, caps_test, 1.0 / 40.0) + end, + score = 3.0, + group = 'subject', + type = 'mime', + description = 'Subject contains mostly capital letters' +} + +rspamd_config.LONG_SUBJ = { + callback = function(task) + local length_test = function(_, len) + return len > 200 + end + return test_subject(task, length_test, 1.0 / 400.0) + end, + score = 3.0, + group = 'subject', + type = 'mime', + description = 'Subject is very long' +} |