summaryrefslogtreecommitdiffstats
path: root/rules
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /rules
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'rules')
-rw-r--r--rules/bitcoin.lua237
-rw-r--r--rules/bounce.lua117
-rw-r--r--rules/content.lua118
-rw-r--r--rules/controller/fuzzy.lua46
-rw-r--r--rules/controller/init.lua67
-rw-r--r--rules/controller/maps.lua220
-rw-r--r--rules/controller/neural.lua70
-rw-r--r--rules/controller/selectors.lua73
-rw-r--r--rules/forwarding.lua163
-rw-r--r--rules/headers_checks.lua1174
-rw-r--r--rules/html.lua462
-rw-r--r--rules/mid.lua131
-rw-r--r--rules/misc.lua864
-rw-r--r--rules/parts.lua11
-rw-r--r--rules/regexp/compromised_hosts.lua223
-rw-r--r--rules/regexp/headers.lua1046
-rw-r--r--rules/regexp/misc.lua117
-rw-r--r--rules/regexp/upstream_spam_filters.lua60
-rw-r--r--rules/rspamd.lua71
-rw-r--r--rules/subject_checks.lua70
20 files changed, 5340 insertions, 0 deletions
diff --git a/rules/bitcoin.lua b/rules/bitcoin.lua
new file mode 100644
index 0000000..6a70721
--- /dev/null
+++ b/rules/bitcoin.lua
@@ -0,0 +1,237 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Bitcoin filter rules
+
+local fun = require "fun"
+local bit = require "bit"
+local lua_util = require "lua_util"
+local rspamd_util = require "rspamd_util"
+local N = "bitcoin"
+
+local off = 0
+local base58_dec = fun.tomap(fun.map(
+ function(c)
+ off = off + 1
+ return c, (off - 1)
+ end,
+ "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"))
+
+local function is_traditional_btc_address(word)
+ local hash = require "rspamd_cryptobox_hash"
+
+ local bytes = {}
+ for i = 1, 25 do
+ bytes[i] = 0
+ end
+ -- Base58 decode loop
+ fun.each(function(ch)
+ local acc = base58_dec[ch] or 0
+ for i = 25, 1, -1 do
+ acc = acc + (58 * bytes[i]);
+ bytes[i] = acc % 256
+ acc = math.floor(acc / 256);
+ end
+ end, word)
+ -- Now create a validation tag
+ local sha256 = hash.create_specific('sha256')
+ for i = 1, 21 do
+ sha256:update(string.char(bytes[i]))
+ end
+ sha256 = hash.create_specific('sha256', sha256:bin()):bin()
+
+ -- Compare tags
+ local valid = true
+ for i = 1, 4 do
+ if string.sub(sha256, i, i) ~= string.char(bytes[21 + i]) then
+ valid = false
+ end
+ end
+
+ return valid
+end
+
+-- Beach32 checksum combiner
+local function polymod(...)
+ local chk = 1;
+ local gen = { 0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3 };
+ for _, t in ipairs({ ... }) do
+ for _, v in ipairs(t) do
+ local top = bit.rshift(chk, 25)
+
+ chk = bit.bxor(bit.lshift(bit.band(chk, 0x1ffffff), 5), v)
+ for i = 1, 5 do
+ if bit.band(bit.rshift(top, i - 1), 0x1) ~= 0 then
+ chk = bit.bxor(chk, gen[i])
+ end
+ end
+ end
+ end
+
+ return chk
+end
+
+-- Beach32 expansion function
+local function hrpExpand(hrp)
+ local ret = {}
+ fun.each(function(byte)
+ ret[#ret + 1] = bit.rshift(byte, 5)
+ end, fun.map(string.byte, fun.iter(hrp)))
+ ret[#ret + 1] = 0
+ fun.each(function(byte)
+ ret[#ret + 1] = bit.band(byte, 0x1f)
+ end, fun.map(string.byte, fun.iter(hrp)))
+
+ return ret
+end
+
+local function verify_beach32_cksum(hrp, elts)
+ return polymod(hrpExpand(hrp), elts) == 1
+end
+
+local function gen_bleach32_table(input)
+ local d = {}
+ local i = 1
+ local res = true
+ local charset = 'qpzry9x8gf2tvdw0s3jn54khce6mua7l'
+
+ fun.each(function(byte)
+ if res then
+ local pos = charset:find(byte, 1, true)
+ if not pos then
+ res = false
+ else
+ d[i] = pos - 1
+ i = i + 1
+ end
+ end
+ end, fun.iter(input))
+
+ return res and d or nil
+end
+
+local function is_segwit_bech32_address(task, word)
+ local semicolon_pos = string.find(word, ':')
+ local address_part = word
+ if semicolon_pos then
+ address_part = string.sub(word, semicolon_pos + 1)
+ end
+
+ local prefix = address_part:sub(1, 3)
+
+ if prefix == 'bc1' or prefix:sub(1, 1) == '1' or prefix:sub(1, 1) == '3' then
+ -- Strip beach32 prefix in bitcoin
+ address_part = address_part:lower()
+ local last_one_pos = address_part:find('1[^1]*$')
+ if not last_one_pos or (last_one_pos < 1 or last_one_pos + 7 > #address_part) then
+ return false
+ end
+ local hrp = address_part:sub(1, last_one_pos - 1)
+ local addr = address_part:sub(last_one_pos + 1, -1)
+ local decoded = gen_bleach32_table(addr)
+
+ if decoded then
+ return verify_beach32_cksum(hrp, decoded)
+ end
+ else
+ -- Bitcoin cash address
+ -- https://www.bitcoincash.org/spec/cashaddr.html
+ local decoded = gen_bleach32_table(address_part)
+ lua_util.debugm(N, task, 'check %s, %s decoded', word, decoded)
+
+ if decoded and #decoded > 8 then
+ if semicolon_pos then
+ prefix = word:sub(1, semicolon_pos - 1)
+ else
+ prefix = 'bitcoincash'
+ end
+
+ local polymod_tbl = {}
+ fun.each(function(byte)
+ local b = bit.band(string.byte(byte), 0x1f)
+ table.insert(polymod_tbl, b)
+ end, fun.iter(prefix))
+
+ -- For semicolon
+ table.insert(polymod_tbl, 0)
+
+ fun.each(function(byte)
+ table.insert(polymod_tbl, byte)
+ end, decoded)
+ lua_util.debugm(N, task, 'final polymod table: %s', polymod_tbl)
+
+ return rspamd_util.btc_polymod(polymod_tbl)
+ end
+ end
+end
+
+local normal_wallet_re = [[/\b[13LM][1-9A-Za-z]{25,34}\b/AL{sa_body}]]
+local btc_bleach_re = [[/\b(?:(?:[a-zA-Z]\w+:)|(?:bc1))?[qpzry9x8gf2tvdw0s3jn54khce6mua7l]{14,}\b/AL{sa_body}]]
+
+config.regexp['BITCOIN_ADDR'] = {
+ description = 'Message has a valid bitcoin wallet address',
+ -- Use + operator to ensure that each expression is always evaluated
+ re = string.format('(%s) + (%s) > 0', normal_wallet_re, btc_bleach_re),
+ re_conditions = {
+ [normal_wallet_re] = function(task, txt, s, e)
+ local len = e - s
+ if len <= 2 or len > 1024 then
+ return false
+ end
+
+ local word = lua_util.str_trim(txt:sub(s + 1, e))
+ local valid = is_traditional_btc_address(word)
+
+ if valid then
+ -- To save option
+ task:insert_result('BITCOIN_ADDR', 1.0, word)
+ lua_util.debugm(N, task, 'found valid traditional bitcoin addr in the word: %s',
+ word)
+ return true
+ else
+ lua_util.debugm(N, task, 'found invalid bitcoin addr in the word: %s',
+ word)
+
+ return false
+ end
+ end,
+ [btc_bleach_re] = function(task, txt, s, e)
+ local len = e - s
+ if len <= 2 or len > 1024 then
+ return false
+ end
+
+ local word = tostring(lua_util.str_trim(txt:sub(s + 1, e)))
+ local valid = is_segwit_bech32_address(task, word)
+
+ if valid then
+ -- To save option
+ task:insert_result('BITCOIN_ADDR', 1.0, word)
+ lua_util.debugm(N, task, 'found valid bleach bitcoin addr in the word: %s',
+ word)
+ return true
+ else
+ lua_util.debugm(N, task, 'found invalid bitcoin addr in the word: %s',
+ word)
+
+ return false
+ end
+ end,
+ },
+ score = 0.0,
+ one_shot = true,
+ group = 'scams',
+}
diff --git a/rules/bounce.lua b/rules/bounce.lua
new file mode 100644
index 0000000..fb74b97
--- /dev/null
+++ b/rules/bounce.lua
@@ -0,0 +1,117 @@
+--[[
+Copyright (c) 2020, Anton Yuzhaninov <citrin@citrin.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Rule to detect bounces:
+-- RFC 3464 Delivery status notifications and most common non-standard ones
+
+local function make_subj_bounce_keywords_re()
+ -- Words and phrases commonly used in Subjects for bounces
+ -- We cannot practically test all localized Subjects, but luckily English is by far the most common here
+ local keywords = {
+ 'could not send message',
+ "couldn't be delivered",
+ 'delivery failed',
+ 'delivery failure',
+ 'delivery report',
+ 'delivery status',
+ 'delivery warning',
+ 'failure delivery',
+ 'failure notice',
+ "hasn't been delivered",
+ 'mail failure',
+ 'returned mail',
+ 'undeliverable',
+ 'undelivered',
+ }
+ return string.format([[Subject=/\b(%s)\b/i{header}]], table.concat(keywords, '|'))
+end
+
+config.regexp.SUBJ_BOUNCE_WORDS = {
+ re = make_subj_bounce_keywords_re(),
+ group = 'headers',
+ score = 0.0,
+ description = 'Words/phrases typical for DSN'
+}
+
+rspamd_config.BOUNCE = {
+ callback = function(task)
+ local from = task:get_from('smtp')
+ if from and from[1].addr ~= '' then
+ -- RFC 3464:
+ -- Whenever an SMTP transaction is used to send a DSN, the MAIL FROM
+ -- command MUST use a NULL return address, i.e., "MAIL FROM:<>"
+ -- In practise it is almost always the case for DSN
+ return false
+ end
+
+ local parts = task:get_parts()
+ local top_type, top_subtype, params = parts[1]:get_type_full()
+ -- RFC 3464, RFC 8098
+ if top_type == 'multipart' and top_subtype == 'report' and params and
+ (params['report-type'] == 'delivery-status' or params['report-type'] == 'disposition-notification') then
+ -- Assume that inner parts are OK, don't check them to save time
+ return true, 1.0, 'DSN'
+ end
+
+ -- Apply heuristics for non-standard bounces
+ local bounce_sender
+ local mime_from = task:get_from('mime')
+ if mime_from then
+ local from_user = mime_from[1].user:lower()
+ -- Check common bounce senders
+ if (from_user == 'postmaster' or from_user == 'mailer-daemon') then
+ bounce_sender = from_user
+ -- MDaemon >= 14.5 sends multipart/report (RFC 3464) DSN covered above,
+ -- but older versions send non-standard bounces with localized subjects and they
+ -- are still around
+ elseif from_user == 'mdaemon' and task:has_header('X-MDDSN-Message') then
+ return true, 1.0, 'MDaemon'
+ end
+ end
+
+ local subj_keywords = task:has_symbol('SUBJ_BOUNCE_WORDS')
+
+ if not (bounce_sender or subj_keywords) then
+ return false
+ end
+
+ if bounce_sender and subj_keywords then
+ return true, 0.5, bounce_sender .. '+subj'
+ end
+
+ -- Look for a message/rfc822(-headers) part inside
+ local rfc822_part
+ parts[10] = nil -- limit number of parts to check
+ for _, p in ipairs(parts) do
+ local mime_type, mime_subtype = p:get_type()
+ if (mime_subtype == 'rfc822' or mime_subtype == 'rfc822-headers') and
+ (mime_type == 'message' or mime_type == 'text') then
+ rfc822_part = mime_type .. '/' .. mime_subtype
+ break
+ end
+ end
+
+ if rfc822_part and bounce_sender then
+ return true, 0.5, bounce_sender .. '+' .. rfc822_part
+ elseif rfc822_part and subj_keywords then
+ return true, 0.2, rfc822_part .. '+subj'
+ end
+ end,
+ description = '(Non) Delivery Status Notification',
+ group = 'headers',
+}
+
+rspamd_config:register_dependency('BOUNCE', 'SUBJ_BOUNCE_WORDS')
diff --git a/rules/content.lua b/rules/content.lua
new file mode 100644
index 0000000..667b7ec
--- /dev/null
+++ b/rules/content.lua
@@ -0,0 +1,118 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local function process_pdf_specific(task, part, specific)
+ local suspicious_factor = 0
+ if specific.encrypted then
+ task:insert_result('PDF_ENCRYPTED', 1.0, part:get_filename() or 'unknown')
+ suspicious_factor = suspicious_factor + 0.1
+ if specific.openaction then
+ suspicious_factor = suspicious_factor + 0.5
+ end
+ end
+
+ if specific.scripts then
+ task:insert_result('PDF_JAVASCRIPT', 1.0, part:get_filename() or 'unknown')
+ suspicious_factor = suspicious_factor + 0.1
+ end
+
+ if specific.suspicious then
+ suspicious_factor = suspicious_factor + specific.suspicious
+ end
+
+ if suspicious_factor > 0.5 then
+ if suspicious_factor > 1.0 then
+ suspicious_factor = 1.0
+ end
+ task:insert_result('PDF_SUSPICIOUS', suspicious_factor, part:get_filename() or 'unknown')
+ end
+
+ if specific.long_trailer then
+ task:insert_result('PDF_LONG_TRAILER', 1.0, string.format('%s:%d',
+ part:get_filename() or 'unknown', specific.long_trailer))
+ end
+ if specific.many_objects then
+ task:insert_result('PDF_MANY_OBJECTS', 1.0, string.format('%s:%d',
+ part:get_filename() or 'unknown', specific.many_objects))
+ end
+ if specific.timeout_processing then
+ task:insert_result('PDF_TIMEOUT', 1.0, string.format('%s:%.3f',
+ part:get_filename() or 'unknown', specific.timeout_processing))
+ end
+end
+
+local tags_processors = {
+ pdf = process_pdf_specific
+}
+
+local function process_specific_cb(task)
+ local parts = task:get_parts() or {}
+
+ for _, p in ipairs(parts) do
+ if p:is_specific() then
+ local data = p:get_specific()
+
+ if data and type(data) == 'table' and data.tag then
+ if tags_processors[data.tag] then
+ tags_processors[data.tag](task, p, data)
+ end
+ end
+ end
+ end
+end
+
+local id = rspamd_config:register_symbol {
+ type = 'callback',
+ name = 'SPECIFIC_CONTENT_CHECK',
+ callback = process_specific_cb
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'PDF_ENCRYPTED',
+ parent = id,
+ groups = { "content", "pdf" },
+}
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'PDF_JAVASCRIPT',
+ parent = id,
+ groups = { "content", "pdf" },
+}
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'PDF_SUSPICIOUS',
+ parent = id,
+ groups = { "content", "pdf" },
+}
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'PDF_LONG_TRAILER',
+ parent = id,
+ groups = { "content", "pdf" },
+}
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'PDF_MANY_OBJECTS',
+ parent = id,
+ groups = { "content", "pdf" },
+}
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'PDF_TIMEOUT',
+ parent = id,
+ groups = { "content", "pdf" },
+}
diff --git a/rules/controller/fuzzy.lua b/rules/controller/fuzzy.lua
new file mode 100644
index 0000000..193e6fd
--- /dev/null
+++ b/rules/controller/fuzzy.lua
@@ -0,0 +1,46 @@
+--[[
+Copyright (c) 2023, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local function handle_gen_fuzzy(task, conn, req_params)
+ if type(rspamd_plugins.fuzzy_check) == 'table' then
+ local ret, hashes
+ task:process_message()
+ if req_params.rule then
+ ret, hashes = pcall(rspamd_plugins.fuzzy_check.hex_hashes, task, req_params.rule)
+ elseif req_params.flag then
+ ret, hashes = pcall(rspamd_plugins.fuzzy_check.hex_hashes, task, tonumber(req_params.flag))
+ else
+ conn:send_error(404, 'missing rule or flag')
+ return
+ end
+
+ if ret then
+ conn:send_ucl({ success = true, hashes = hashes })
+ else
+ conn:send_error(500, 'cannot generate hashes')
+ end
+ else
+ conn:send_error(404, 'fuzzy_check is not enabled')
+ end
+end
+
+return {
+ hashes = {
+ handler = handle_gen_fuzzy,
+ need_task = true,
+ enable = false
+ },
+} \ No newline at end of file
diff --git a/rules/controller/init.lua b/rules/controller/init.lua
new file mode 100644
index 0000000..17fbbfc
--- /dev/null
+++ b/rules/controller/init.lua
@@ -0,0 +1,67 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Controller endpoints
+
+local local_conf = rspamd_paths['LOCAL_CONFDIR']
+local local_rules = rspamd_paths['RULESDIR']
+local rspamd_util = require "rspamd_util"
+local lua_util = require "lua_util"
+local rspamd_logger = require "rspamd_logger"
+
+-- Define default controller paths, could be overridden in local.d/controller.lua
+
+local controller_plugin_paths = {
+ maps = dofile(local_rules .. "/controller/maps.lua"),
+ neural = dofile(local_rules .. "/controller/neural.lua"),
+ selectors = dofile(local_rules .. "/controller/selectors.lua"),
+ fuzzy = dofile(local_rules .. "/controller/fuzzy.lua"),
+}
+
+if rspamd_util.file_exists(local_conf .. '/controller.lua') then
+ local controller_overrides = dofile(local_conf .. '/controller.lua')
+
+ if controller_overrides and type(controller_overrides) == 'table' then
+ controller_plugin_paths = lua_util.override_defaults(controller_plugin_paths, controller_overrides)
+ end
+end
+
+for plug, paths in pairs(controller_plugin_paths) do
+ if not rspamd_plugins[plug] then
+ rspamd_plugins[plug] = {}
+ end
+ if not rspamd_plugins[plug].webui then
+ rspamd_plugins[plug].webui = {}
+ end
+
+ local webui = rspamd_plugins[plug].webui
+
+ for path, attrs in pairs(paths) do
+ if type(attrs) == 'table' then
+ if type(attrs.handler) ~= 'function' then
+ rspamd_logger.infox(rspamd_config, 'controller plugin %s; webui path %s has invalid handler: %s; ignore it',
+ plug, path, type(attrs.handler))
+ else
+ webui[path] = lua_util.shallowcopy(attrs)
+ rspamd_logger.infox(rspamd_config, 'controller plugin %s; register webui path %s',
+ plug, path)
+ end
+ else
+ rspamd_logger.infox(rspamd_config, 'controller plugin %s; webui path %s has invalid type: %s; ignore it',
+ plug, path, type(attrs))
+ end
+ end
+end
diff --git a/rules/controller/maps.lua b/rules/controller/maps.lua
new file mode 100644
index 0000000..718e292
--- /dev/null
+++ b/rules/controller/maps.lua
@@ -0,0 +1,220 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Controller maps plugin
+local maps_cache
+local maps_aliases
+local lua_util = require "lua_util"
+local ts = require("tableshape").types
+local ucl = require "ucl"
+
+local function maybe_fill_maps_cache()
+ if not maps_cache then
+ maps_cache = {}
+ maps_aliases = {}
+ local maps = rspamd_config:get_maps()
+ for _, m in ipairs(maps) do
+ -- We get the first url here and that's it
+ local url = m:get_uri()
+ if url ~= 'static' then
+ if not maps_cache[url] then
+ local alias = url:match('/([^/]+)$')
+ maps_cache[url] = m
+ if not maps_aliases[alias] then
+ maps_aliases[alias] = url
+ end
+ else
+ -- Do not override, as we don't care about duplicate maps that come from different
+ -- sources.
+ -- In theory, that should be cached but there are some exceptions even so far...
+ url = math.random() -- to shut luacheck about empty branch with a comment
+ end
+ end
+ end
+ end
+end
+
+local function check_specific_map(input, uri, m, results, report_misses)
+ local value = m:get_key(input)
+
+ if value then
+ local result = {
+ map = uri,
+ alias = uri:match('/([^/]+)$'),
+ value = value,
+ key = input,
+ hit = true,
+ }
+ table.insert(results, result)
+ elseif report_misses then
+ local result = {
+ map = uri,
+ alias = uri:match('/([^/]+)$'),
+ key = input,
+ hit = false,
+ }
+ table.insert(results, result)
+ end
+end
+
+local function handle_query_map(_, conn, req_params)
+ maybe_fill_maps_cache()
+ local keys_to_check = {}
+
+ if req_params.value and req_params.value ~= '' then
+ keys_to_check[1] = req_params.value
+ elseif req_params.values then
+ keys_to_check = lua_util.str_split(req_params.values, ',')
+ end
+
+ local results = {}
+ for _, key in ipairs(keys_to_check) do
+ for uri, m in pairs(maps_cache) do
+ check_specific_map(key, uri, m, results, req_params.report_misses)
+ end
+ end
+ conn:send_ucl {
+ success = (#results > 0),
+ results = results
+ }
+end
+
+local function handle_query_specific_map(_, conn, req_params)
+ maybe_fill_maps_cache()
+ -- Fill keys to check
+ local keys_to_check = {}
+ if req_params.value and req_params.value ~= '' then
+ keys_to_check[1] = req_params.value
+ elseif req_params.values then
+ keys_to_check = lua_util.str_split(req_params.values, ',')
+ end
+ local maps_to_check = maps_cache
+ -- Fill maps to check
+ if req_params.maps then
+ local map_names = lua_util.str_split(req_params.maps, ',')
+ maps_to_check = {}
+ for _, mn in ipairs(map_names) do
+ if maps_cache[mn] then
+ maps_to_check[mn] = maps_cache[mn]
+ else
+ local alias = maps_aliases[mn]
+
+ if alias then
+ maps_to_check[alias] = maps_cache[alias]
+ else
+ conn:send_error(404, 'no such map: ' .. mn)
+ end
+ end
+ end
+ end
+
+ local results = {}
+ for _, key in ipairs(keys_to_check) do
+ for uri, m in pairs(maps_to_check) do
+ check_specific_map(key, uri, m, results, req_params.report_misses)
+ end
+ end
+
+ conn:send_ucl {
+ success = (#results > 0),
+ results = results
+ }
+end
+
+local function handle_list_maps(_, conn, _)
+ maybe_fill_maps_cache()
+ conn:send_ucl {
+ maps = lua_util.keys(maps_cache),
+ aliases = maps_aliases
+ }
+end
+
+local query_json_schema = ts.shape {
+ maps = ts.array_of(ts.string):is_optional(),
+ report_misses = ts.boolean:is_optional(),
+ values = ts.array_of(ts.string),
+}
+
+local function handle_query_json(task, conn)
+ maybe_fill_maps_cache()
+
+ local parser = ucl.parser()
+ local ok, err = parser:parse_text(task:get_rawbody())
+ if not ok then
+ conn:send_error(400, err)
+ return
+ end
+ local obj = parser:get_object()
+
+ ok, err = query_json_schema:transform(obj)
+ if not ok then
+ conn:send_error(400, err)
+ return
+ end
+
+ local maps_to_check = {}
+ local report_misses = obj.report_misses
+ local results = {}
+
+ if obj.maps then
+ for _, mn in ipairs(obj.maps) do
+ if maps_cache[mn] then
+ maps_to_check[mn] = maps_cache[mn]
+ else
+ local alias = maps_aliases[mn]
+
+ if alias then
+ maps_to_check[alias] = maps_cache[alias]
+ else
+ conn:send_error(400, 'no such map: ' .. mn)
+ return
+ end
+ end
+ end
+ else
+ maps_to_check = maps_cache
+ end
+
+ for _, key in ipairs(obj.values) do
+ for uri, m in pairs(maps_to_check) do
+ check_specific_map(key, uri, m, results, report_misses)
+ end
+ end
+ conn:send_ucl {
+ success = (#results > 0),
+ results = results
+ }
+end
+
+return {
+ query = {
+ handler = handle_query_map,
+ enable = false,
+ },
+ query_json = {
+ handler = handle_query_json,
+ enable = false,
+ need_task = true,
+ },
+ query_specific = {
+ handler = handle_query_specific_map,
+ enable = false,
+ },
+ list = {
+ handler = handle_list_maps,
+ enable = false,
+ },
+}
diff --git a/rules/controller/neural.lua b/rules/controller/neural.lua
new file mode 100644
index 0000000..aef1042
--- /dev/null
+++ b/rules/controller/neural.lua
@@ -0,0 +1,70 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local neural_common = require "plugins/neural"
+local ts = require("tableshape").types
+local ucl = require "ucl"
+
+local E = {}
+
+-- Controller neural plugin
+
+local learn_request_schema = ts.shape {
+ ham_vec = ts.array_of(ts.array_of(ts.number)),
+ rule = ts.string:is_optional(),
+ spam_vec = ts.array_of(ts.array_of(ts.number)),
+}
+
+local function handle_learn(task, conn)
+ local parser = ucl.parser()
+ local ok, err = parser:parse_text(task:get_rawbody())
+ if not ok then
+ conn:send_error(400, err)
+ return
+ end
+ local req_params = parser:get_object()
+
+ ok, err = learn_request_schema:transform(req_params)
+ if not ok then
+ conn:send_error(400, err)
+ return
+ end
+
+ local rule_name = req_params.rule or 'default'
+ local rule = neural_common.settings.rules[rule_name]
+ local set = neural_common.get_rule_settings(task, rule)
+ local version = ((set.ann or E).version or 0) + 1
+
+ neural_common.spawn_train {
+ ev_base = task:get_ev_base(),
+ ann_key = neural_common.new_ann_key(rule, set, version),
+ set = set,
+ rule = rule,
+ ham_vec = req_params.ham_vec,
+ spam_vec = req_params.spam_vec,
+ worker = task:get_worker(),
+ }
+
+ conn:send_string('{"success" : true}')
+end
+
+return {
+ learn = {
+ handler = handle_learn,
+ enable = true,
+ need_task = true,
+ },
+}
diff --git a/rules/controller/selectors.lua b/rules/controller/selectors.lua
new file mode 100644
index 0000000..7fc2894
--- /dev/null
+++ b/rules/controller/selectors.lua
@@ -0,0 +1,73 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local lua_selectors = require "lua_selectors"
+
+-- Controller selectors plugin
+
+local function handle_list_transforms(_, conn)
+ conn:send_ucl(lua_selectors.list_transforms())
+end
+
+local function handle_list_extractors(_, conn)
+ conn:send_ucl(lua_selectors.list_extractors())
+end
+
+local function handle_check_selector(_, conn, req_params)
+ if req_params.selector and req_params.selector ~= '' then
+ local selector = lua_selectors.create_selector_closure(rspamd_config,
+ req_params.selector, '', true)
+ conn:send_ucl({ success = selector and true })
+ else
+ conn:send_error(404, 'missing selector')
+ end
+end
+
+local function handle_check_message(task, conn, req_params)
+ if req_params.selector and req_params.selector ~= '' then
+ local selector = lua_selectors.create_selector_closure(rspamd_config,
+ req_params.selector, '', true)
+ if not selector then
+ conn:send_error(500, 'invalid selector')
+ else
+ task:process_message()
+ local elts = selector(task)
+ conn:send_ucl({ success = true, data = elts })
+ end
+ else
+ conn:send_error(404, 'missing selector')
+ end
+end
+
+return {
+ list_extractors = {
+ handler = handle_list_extractors,
+ enable = true,
+ },
+ list_transforms = {
+ handler = handle_list_transforms,
+ enable = true,
+ },
+ check_selector = {
+ handler = handle_check_selector,
+ enable = true,
+ },
+ check_message = {
+ handler = handle_check_message,
+ enable = true,
+ need_task = true,
+ }
+}
diff --git a/rules/forwarding.lua b/rules/forwarding.lua
new file mode 100644
index 0000000..a008c58
--- /dev/null
+++ b/rules/forwarding.lua
@@ -0,0 +1,163 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Rules to detect forwarding
+
+local rspamd_util = require "rspamd_util"
+
+rspamd_config.FWD_GOOGLE = {
+ callback = function(task)
+ if not (task:has_from(1) and task:has_recipients(1)) then
+ return false
+ end
+ local envfrom = task:get_from { 'smtp', 'orig' }
+ local envrcpts = task:get_recipients(1)
+ -- Forwarding will only be to a single recipient
+ if #envrcpts > 1 then
+ return false
+ end
+ -- Get recipient and compute VERP address
+ local rcpt = envrcpts[1].addr:lower()
+ local verp = rcpt:gsub('@', '=')
+ -- Get the user portion of the envfrom
+ local ef_user = envfrom[1].user:lower()
+ -- Check for a match
+ if ef_user:find('+caf_=' .. verp, 1, true) then
+ local _, _, user = ef_user:find('^(.+)+caf_=')
+ if user then
+ user = user .. '@' .. envfrom[1].domain
+ return true, user
+ end
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Message was forwarded by Google",
+ group = "forwarding"
+}
+
+rspamd_config.FWD_YANDEX = {
+ callback = function(task)
+ if not (task:has_from(1) and task:has_recipients(1)) then
+ return false
+ end
+ local hostname = task:get_hostname()
+ if hostname and hostname:lower():find('%.yandex%.[a-z]+$') then
+ return task:has_header('X-Yandex-Forward')
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Message was forwarded by Yandex",
+ group = "forwarding"
+}
+
+rspamd_config.FWD_MAILRU = {
+ callback = function(task)
+ if not (task:has_from(1) and task:has_recipients(1)) then
+ return false
+ end
+ local hostname = task:get_hostname()
+ if hostname and hostname:lower():find('%.mail%.ru$') then
+ return task:has_header('X-MailRu-Forward')
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Message was forwarded by Mail.ru",
+ group = "forwarding"
+}
+
+rspamd_config.FWD_SRS = {
+ callback = function(task)
+ if not (task:has_from(1) and task:has_recipients(1)) then
+ return false
+ end
+ local envfrom = task:get_from(1)
+ local envrcpts = task:get_recipients(1)
+ -- Forwarding is only to a single recipient
+ if #envrcpts > 1 then
+ return false
+ end
+ -- Get recipient and compute rewritten SRS address
+ local srs = '=' .. envrcpts[1].domain:lower() ..
+ '=' .. envrcpts[1].user:lower()
+ if envfrom[1].user:lower():find('^srs[01]=') and
+ envfrom[1].user:lower():find(srs, 1, false)
+ then
+ return true
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Message was forwarded using Sender Rewriting Scheme (SRS)",
+ group = "forwarding"
+}
+
+rspamd_config.FORWARDED = {
+ callback = function(task)
+ local function normalize_addr(addr)
+ addr = string.match(addr, '^<?([^>]*)>?$') or addr
+ local cap, _, domain = string.match(addr, '^([^%+][^%+]*)(%+[^@]*)@(.*)$')
+ if cap then
+ addr = string.format('%s@%s', cap, domain)
+ end
+
+ return addr
+ end
+
+ if not task:has_recipients(1) or not task:has_recipients(2) then
+ return false
+ end
+ local envrcpts = task:get_recipients(1)
+ -- Forwarding will only be for single recipient messages
+ if #envrcpts > 1 then
+ return false
+ end
+ -- Get any other headers we might need
+ local has_list_unsub = task:has_header('List-Unsubscribe')
+ local to = task:get_recipients(2)
+ local matches = 0
+ -- Retrieve and loop through all Received headers
+ local rcvds = task:get_received_headers()
+
+ if rcvds then
+ for _, rcvd in ipairs(rcvds) do
+ local addr = rcvd['for']
+ if addr then
+ addr = normalize_addr(addr)
+ matches = matches + 1
+ -- Check that it doesn't match the envrcpt
+ if not rspamd_util.strequal_caseless(addr, envrcpts[1].addr) then
+ -- Check for mailing-lists as they will have the same signature
+ if matches < 2 and has_list_unsub and to and rspamd_util.strequal_caseless(to[1].addr, addr) then
+ return false
+ else
+ return true, 1.0, addr
+ end
+ end
+ -- Prevent any other iterations as we only want
+ -- process the first matching Received header
+ return false
+ end
+ end
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Message was forwarded",
+ group = "forwarding"
+}
diff --git a/rules/headers_checks.lua b/rules/headers_checks.lua
new file mode 100644
index 0000000..92ebb0c
--- /dev/null
+++ b/rules/headers_checks.lua
@@ -0,0 +1,1174 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local util = require "rspamd_util"
+local ipairs = ipairs
+local pairs = pairs
+local table = table
+local tostring = tostring
+local tonumber = tonumber
+local fun = require "fun"
+local E = {}
+
+local rcvd_cb_id = rspamd_config:register_symbol {
+ name = 'CHECK_RECEIVED',
+ type = 'callback',
+ score = 0.0,
+ group = 'headers',
+ callback = function(task)
+ local cnts = {
+ [1] = 'ONE',
+ [2] = 'TWO',
+ [3] = 'THREE',
+ [5] = 'FIVE',
+ [7] = 'SEVEN',
+ [12] = 'TWELVE'
+ }
+ local def = 'ZERO'
+ local received = task:get_received_headers()
+ local nreceived = fun.reduce(function(acc, rcvd)
+ return acc + 1
+ end, 0, fun.filter(function(h)
+ return not h['flags']['artificial']
+ end, received))
+
+ for k, v in pairs(cnts) do
+ if nreceived >= tonumber(k) then
+ def = v
+ end
+ end
+
+ task:insert_result('RCVD_COUNT_' .. def, 1.0, tostring(nreceived))
+ end
+}
+
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_ZERO',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has no Received headers',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_ONE',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has one Received header',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_TWO',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has two Received headers',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_THREE',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has 3-5 Received headers',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_FIVE',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has 5-7 Received headers',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_SEVEN',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has 7-11 Received headers',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCVD_COUNT_TWELVE',
+ score = 0.0,
+ parent = rcvd_cb_id,
+ type = 'virtual',
+ description = 'Message has 12 or more Received headers',
+ group = 'headers',
+}
+
+local prio_cb_id = rspamd_config:register_symbol {
+ name = 'HAS_X_PRIO',
+ type = 'callback',
+ description = 'X-Priority check callback rule',
+ score = 0.0,
+ group = 'headers',
+ callback = function(task)
+ local cnts = {
+ [1] = 'ONE',
+ [2] = 'TWO',
+ [3] = 'THREE',
+ [5] = 'FIVE',
+ }
+ local def = 'ZERO'
+ local xprio = task:get_header('X-Priority');
+ if not xprio then
+ return false
+ end
+ local _, _, x = xprio:find('^%s?(%d+)');
+ if (x) then
+ x = tonumber(x)
+ for k, v in pairs(cnts) do
+ if x >= tonumber(k) then
+ def = v
+ end
+ end
+ task:insert_result('HAS_X_PRIO_' .. def, 1.0, tostring(x))
+ end
+ end
+}
+rspamd_config:register_symbol {
+ name = 'HAS_X_PRIO_ZERO',
+ score = 0.0,
+ parent = prio_cb_id,
+ type = 'virtual',
+ description = 'Message has X-Priority header set to 0',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'HAS_X_PRIO_ONE',
+ score = 0.0,
+ parent = prio_cb_id,
+ type = 'virtual',
+ description = 'Message has X-Priority header set to 1',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'HAS_X_PRIO_TWO',
+ score = 0.0,
+ parent = prio_cb_id,
+ type = 'virtual',
+ description = 'Message has X-Priority header set to 2',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'HAS_X_PRIO_THREE',
+ score = 0.0,
+ parent = prio_cb_id,
+ type = 'virtual',
+ description = 'Message has X-Priority header set to 3 or 4',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'HAS_X_PRIO_FIVE',
+ score = 0.0,
+ parent = prio_cb_id,
+ type = 'virtual',
+ description = 'Message has X-Priority header set to 5 or higher',
+ group = 'headers',
+}
+
+local function get_raw_header(task, name)
+ return ((task:get_header_full(name) or {})[1] or {})['value']
+end
+
+local check_replyto_id = rspamd_config:register_symbol({
+ type = 'callback',
+ name = 'CHECK_REPLYTO',
+ score = 0.0,
+ group = 'headers',
+ callback = function(task)
+ local replyto = get_raw_header(task, 'Reply-To')
+ if not replyto then
+ return false
+ end
+ local rt = util.parse_mail_address(replyto, task:get_mempool())
+ if not (rt and rt[1] and (string.len(rt[1].addr) > 0)) then
+ task:insert_result('REPLYTO_UNPARSEABLE', 1.0)
+ return false
+ else
+ local rta = rt[1].addr
+ task:insert_result('HAS_REPLYTO', 1.0, rta)
+ -- Check if Reply-To address starts with title seen in display name
+ local sym = task:get_symbol('FROM_NAME_HAS_TITLE')
+ local title = (((sym or E)[1] or E).options or E)[1]
+ if title then
+ rta = rta:lower()
+ if rta:find('^' .. title) then
+ task:insert_result('REPLYTO_EMAIL_HAS_TITLE', 1.0)
+ end
+ end
+ end
+
+ -- See if Reply-To matches From in some way
+ local from = task:get_from { 'mime', 'orig' }
+ local from_h = get_raw_header(task, 'From')
+ if not (from and from[1]) then
+ return false
+ end
+ if (from_h and from_h == replyto) then
+ -- From and Reply-To are identical
+ task:insert_result('REPLYTO_EQ_FROM', 1.0)
+ else
+ if (from and from[1]) then
+ -- See if From and Reply-To addresses match
+ if (util.strequal_caseless(from[1].addr, rt[1].addr)) then
+ task:insert_result('REPLYTO_ADDR_EQ_FROM', 1.0)
+ elseif from[1].domain and rt[1].domain then
+ if (util.strequal_caseless(from[1].domain, rt[1].domain)) then
+ task:insert_result('REPLYTO_DOM_EQ_FROM_DOM', 1.0)
+ else
+ -- See if Reply-To matches the To address
+ local to = task:get_recipients(2)
+ if (to and to[1] and to[1].addr:lower() == rt[1].addr:lower()) then
+ -- Ignore this for mailing-lists and automatic submissions
+ if (not (task:get_header('List-Unsubscribe') or
+ task:get_header('X-To-Get-Off-This-List') or
+ task:get_header('X-List') or
+ task:get_header('Auto-Submitted')))
+ then
+ task:insert_result('REPLYTO_EQ_TO_ADDR', 1.0)
+ end
+ else
+ task:insert_result('REPLYTO_DOM_NEQ_FROM_DOM', 1.0)
+ end
+ end
+ end
+ -- See if the Display Names match
+ if (from[1].name and rt[1].name and
+ util.strequal_caseless(from[1].name, rt[1].name)) then
+ task:insert_result('REPLYTO_DN_EQ_FROM_DN', 1.0)
+ end
+ end
+ end
+ end
+})
+
+rspamd_config:register_symbol {
+ name = 'REPLYTO_UNPARSEABLE',
+ score = 1.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To header could not be parsed',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'HAS_REPLYTO',
+ score = 0.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Has Reply-To header',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_EQ_FROM',
+ score = 0.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To header is identical to From header',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_ADDR_EQ_FROM',
+ score = 0.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To header is identical to SMTP From',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_DOM_EQ_FROM_DOM',
+ score = 0.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To domain matches the From domain',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_DOM_NEQ_FROM_DOM',
+ score = 0.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To domain does not match the From domain',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_DN_EQ_FROM_DN',
+ score = 0.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To display name matches From',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_EMAIL_HAS_TITLE',
+ score = 2.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To header has title',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'REPLYTO_EQ_TO_ADDR',
+ score = 5.0,
+ parent = check_replyto_id,
+ type = 'virtual',
+ description = 'Reply-To is the same as the To address',
+ group = 'headers',
+}
+
+rspamd_config:register_dependency('CHECK_REPLYTO', 'CHECK_FROM')
+
+local check_mime_id = rspamd_config:register_symbol {
+ name = 'CHECK_MIME',
+ type = 'callback',
+ group = 'headers',
+ score = 0.0,
+ callback = function(task)
+ -- Check if there is a MIME-Version header
+ local missing_mime = false
+ if not task:has_header('MIME-Version') then
+ missing_mime = true
+ end
+
+ -- Check presence of MIME specific headers
+ local has_ct_header = task:has_header('Content-Type')
+ local has_cte_header = task:has_header('Content-Transfer-Encoding')
+
+ -- Add the symbol if we have MIME headers, but no MIME-Version
+ -- (do not add the symbol for RFC822 messages)
+ if (has_ct_header or has_cte_header) and missing_mime then
+ task:insert_result('MISSING_MIME_VERSION', 1.0)
+ end
+
+ local found_ma = false
+ local found_plain = false
+ local found_html = false
+
+ for _, p in ipairs(task:get_parts()) do
+ local mtype, subtype = p:get_type()
+ local ctype = mtype:lower() .. '/' .. subtype:lower()
+ if (ctype == 'multipart/alternative') then
+ found_ma = true
+ end
+ if (ctype == 'text/plain') then
+ found_plain = true
+ end
+ if (ctype == 'text/html') then
+ found_html = true
+ end
+ end
+
+ if (found_ma) then
+ if (not found_plain) then
+ task:insert_result('MIME_MA_MISSING_TEXT', 1.0)
+ end
+ if (not found_html) then
+ task:insert_result('MIME_MA_MISSING_HTML', 1.0)
+ end
+ end
+ end
+}
+
+rspamd_config:register_symbol {
+ name = 'MISSING_MIME_VERSION',
+ score = 2.0,
+ parent = check_mime_id,
+ type = 'virtual',
+ description = 'MIME-Version header is missing in MIME message',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'MIME_MA_MISSING_TEXT',
+ score = 2.0,
+ parent = check_mime_id,
+ type = 'virtual',
+ description = 'MIME multipart/alternative missing text/plain part',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'MIME_MA_MISSING_HTML',
+ score = 1.0,
+ parent = check_mime_id,
+ type = 'virtual',
+ description = 'MIME multipart/alternative missing text/html part',
+ group = 'headers',
+}
+
+-- Used to be called IS_LIST
+rspamd_config.PREVIOUSLY_DELIVERED = {
+ callback = function(task)
+ if not task:has_recipients(2) then
+ return false
+ end
+ local to = task:get_recipients(2)
+ local rcvds = task:get_header_full('Received')
+ if not rcvds then
+ return false
+ end
+ for _, rcvd in ipairs(rcvds) do
+ local _, _, addr = rcvd['decoded']:lower():find("%sfor%s<(.-)>")
+ if addr then
+ for _, toa in ipairs(to) do
+ if toa and toa.addr:lower() == addr then
+ return true, addr
+ end
+ end
+ return false
+ end
+ end
+ end,
+ description = 'Message either to a list or was forwarded',
+ group = 'headers',
+ score = 0.0
+}
+rspamd_config.BROKEN_HEADERS = {
+ callback = function(task)
+ return task:has_flag('broken_headers')
+ end,
+ score = 10.0,
+ group = 'headers',
+ description = 'Headers structure is likely broken'
+}
+
+rspamd_config.BROKEN_CONTENT_TYPE = {
+ callback = function(task)
+ return fun.any(function(p)
+ return p:is_broken()
+ end,
+ task:get_parts())
+ end,
+ score = 1.5,
+ group = 'headers',
+ description = 'Message has part with broken content type'
+}
+
+rspamd_config.HEADER_RCONFIRM_MISMATCH = {
+ callback = function(task)
+ local header_from = nil
+ local cread = task:get_header('X-Confirm-Reading-To')
+
+ if task:has_from('mime') then
+ header_from = task:get_from('mime')[1]
+ end
+
+ local header_cread = nil
+ if cread then
+ local headers_cread = util.parse_mail_address(cread, task:get_mempool())
+ if headers_cread then
+ header_cread = headers_cread[1]
+ end
+ end
+
+ if header_from and header_cread then
+ if not string.find(header_from['addr'], header_cread['addr']) then
+ return true
+ end
+ end
+
+ return false
+ end,
+
+ score = 2.0,
+ group = 'headers',
+ description = 'Read confirmation address is different to from address'
+}
+
+rspamd_config.HEADER_FORGED_MDN = {
+ callback = function(task)
+ local mdn = task:get_header('Disposition-Notification-To')
+ if not mdn then
+ return false
+ end
+ local header_rp = nil
+
+ if task:has_from('smtp') then
+ header_rp = task:get_from('smtp')[1]
+ end
+
+ -- Parse mail addr
+ local headers_mdn = util.parse_mail_address(mdn, task:get_mempool())
+
+ if headers_mdn and not header_rp then
+ return true
+ end
+ if header_rp and not headers_mdn then
+ return false
+ end
+ if not headers_mdn and not header_rp then
+ return false
+ end
+
+ local found_match = false
+ for _, h in ipairs(headers_mdn) do
+ if util.strequal_caseless(h['addr'], header_rp['addr']) then
+ found_match = true
+ break
+ end
+ end
+
+ return (not found_match)
+ end,
+
+ score = 2.0,
+ group = 'headers',
+ description = 'Read confirmation address is different to return path'
+}
+
+local headers_unique = {
+ ['Content-Type'] = 1.0,
+ ['Content-Transfer-Encoding'] = 1.0,
+ -- https://tools.ietf.org/html/rfc5322#section-3.6
+ ['Date'] = 0.1,
+ ['From'] = 1.0,
+ ['Sender'] = 1.0,
+ ['Reply-To'] = 1.0,
+ ['To'] = 0.2,
+ ['Cc'] = 0.1,
+ ['Bcc'] = 0.1,
+ ['Message-ID'] = 0.7,
+ ['In-Reply-To'] = 0.7,
+ ['References'] = 0.3,
+ ['Subject'] = 0.7
+}
+
+local multiple_unique_headers_id = rspamd_config:register_symbol {
+ name = 'MULTIPLE_UNIQUE_HEADERS',
+ callback = function(task)
+ local res = 0
+ local max_mult = 0.0
+ local res_tbl = {}
+ local found = 0
+
+ for hdr, mult in pairs(headers_unique) do
+ local hc = task:get_header_count(hdr)
+ found = found + hc
+
+ if hc > 1 then
+ res = res + 1
+ table.insert(res_tbl, hdr)
+ if max_mult < mult then
+ max_mult = mult
+ end
+ end
+ end
+
+ if res > 0 then
+ task:insert_result('MULTIPLE_UNIQUE_HEADERS', max_mult, table.concat(res_tbl, ','))
+ elseif found == 0 then
+ task:insert_result('MISSING_ESSENTIAL_HEADERS', 1.0)
+ end
+ end,
+
+ score = 7.0,
+ group = 'headers',
+ one_shot = true,
+ description = 'Repeated unique headers'
+}
+
+rspamd_config:register_symbol {
+ name = 'MISSING_ESSENTIAL_HEADERS',
+ score = 7.0,
+ group = 'blankspam',
+ parent = multiple_unique_headers_id,
+ type = 'virtual',
+ description = 'Common headers were entirely absent',
+}
+
+rspamd_config.MISSING_FROM = {
+ callback = function(task)
+ local from = task:get_header('From')
+ if from == nil or from == '' then
+ return true
+ end
+ return false
+ end,
+ score = 2.0,
+ group = 'headers',
+ description = 'Missing From header'
+}
+
+rspamd_config.MULTIPLE_FROM = {
+ callback = function(task)
+ local from = task:get_from('mime')
+ if from and from[2] then
+ return true, 1.0, fun.totable(fun.map(function(a)
+ return a.raw
+ end, from))
+ end
+ return false
+ end,
+ score = 8.0,
+ group = 'headers',
+ description = 'Multiple addresses in From header'
+}
+
+rspamd_config.MV_CASE = {
+ callback = function(task)
+ return task:has_header('Mime-Version', true)
+ end,
+ description = 'Mime-Version .vs. MIME-Version',
+ score = 0.5,
+ group = 'headers'
+}
+
+local check_from_id = rspamd_config:register_symbol {
+ name = 'CHECK_FROM',
+ type = 'callback',
+ score = 0.0,
+ group = 'headers',
+ callback = function(task)
+ local envfrom = task:get_from(1)
+ local from = task:get_from(2)
+ if (envfrom and envfrom[1] and not envfrom[1]["flags"]["valid"]) then
+ task:insert_result('ENVFROM_INVALID', 1.0)
+ end
+ if (from and from[1]) then
+ if not (from[1]["flags"]["valid"]) then
+ task:insert_result('FROM_INVALID', 1.0)
+ end
+ if (from[1].name == nil or from[1].name == '') then
+ task:insert_result('FROM_NO_DN', 1.0)
+ elseif (from[1].name and
+ util.strequal_caseless(from[1].name, from[1].addr)) then
+ task:insert_result('FROM_DN_EQ_ADDR', 1.0)
+ elseif (from[1].name and from[1].name ~= '') then
+ task:insert_result('FROM_HAS_DN', 1.0)
+ -- Look for Mr/Mrs/Dr titles
+ local n = from[1].name:lower()
+ local match, match_end
+ match, match_end = n:find('^mrs?[%.%s]')
+ if match then
+ task:insert_result('FROM_NAME_HAS_TITLE', 1.0, n:sub(match, match_end - 1))
+ end
+ match, match_end = n:find('^dr[%.%s]')
+ if match then
+ task:insert_result('FROM_NAME_HAS_TITLE', 1.0, n:sub(match, match_end - 1))
+ end
+ -- Check for excess spaces
+ if n:find('%s%s') then
+ task:insert_result('FROM_NAME_EXCESS_SPACE', 1.0)
+ end
+ end
+
+ if envfrom then
+ if util.strequal_caseless(envfrom[1].addr, from[1].addr) then
+ task:insert_result('FROM_EQ_ENVFROM', 1.0)
+ elseif envfrom[1].addr ~= '' then
+ task:insert_result('FROM_NEQ_ENVFROM', 1.0, from[1].addr, envfrom[1].addr)
+ end
+ end
+ end
+
+ local to = task:get_recipients(2)
+ if not (to and to[1] and #to == 1 and from and from[1]) then
+ return false
+ end
+ -- Check if FROM == TO
+ if (util.strequal_caseless(to[1].addr, from[1].addr)) then
+ task:insert_result('TO_EQ_FROM', 1.0)
+ elseif (to[1].domain and from[1].domain and
+ util.strequal_caseless(to[1].domain, from[1].domain))
+ then
+ task:insert_result('TO_DOM_EQ_FROM_DOM', 1.0)
+ end
+ end
+}
+
+rspamd_config:register_symbol {
+ name = 'ENVFROM_INVALID',
+ score = 2.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'Envelope from does not have a valid format',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_INVALID',
+ score = 2.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From header does not have a valid format',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_NO_DN',
+ score = 0.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From header does not have a display name',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_DN_EQ_ADDR',
+ score = 1.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From header display name is the same as the address',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_HAS_DN',
+ score = 0.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From header has a display name',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_NAME_EXCESS_SPACE',
+ score = 1.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From header display name contains excess whitespace',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_NAME_HAS_TITLE',
+ score = 1.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From header display name has a title (Mr/Mrs/Dr)',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_EQ_ENVFROM',
+ score = 0.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From address is the same as the envelope',
+}
+rspamd_config:register_symbol {
+ name = 'FROM_NEQ_ENVFROM',
+ score = 0.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'From address is different to the envelope',
+}
+rspamd_config:register_symbol {
+ name = 'TO_EQ_FROM',
+ score = 0.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'To address matches the From address',
+}
+rspamd_config:register_symbol {
+ name = 'TO_DOM_EQ_FROM_DOM',
+ score = 0.0,
+ group = 'headers',
+ parent = check_from_id,
+ type = 'virtual',
+ description = 'To domain is the same as the From domain',
+}
+
+local check_to_cc_id = rspamd_config:register_symbol {
+ name = 'CHECK_TO_CC',
+ type = 'callback',
+ score = 0.0,
+ group = 'headers,mime',
+ callback = function(task)
+ local rcpts = task:get_recipients(1)
+ local to = task:get_recipients(2)
+ local to_match_envrcpt = 0
+ local cnts = {
+ [1] = 'ONE',
+ [2] = 'TWO',
+ [3] = 'THREE',
+ [5] = 'FIVE',
+ [7] = 'SEVEN',
+ [12] = 'TWELVE',
+ [50] = 'GT_50'
+ }
+ local def = 'ZERO'
+ if (not to) then
+ return false
+ end
+ -- Add symbol for recipient count
+ local nrcpt = #to
+ for k, v in pairs(cnts) do
+ if nrcpt >= tonumber(k) then
+ def = v
+ end
+ end
+ task:insert_result('RCPT_COUNT_' .. def, 1.0, tostring(nrcpt))
+ -- Check for display names
+ local to_dn_count = 0
+ local to_dn_eq_addr_count = 0
+ for _, toa in ipairs(to) do
+ -- To: Recipients <noreply@dropbox.com>
+ if (toa['name'] and (toa['name']:lower() == 'recipient'
+ or toa['name']:lower() == 'recipients')) then
+ task:insert_result('TO_DN_RECIPIENTS', 1.0)
+ end
+ if (toa['name'] and util.strequal_caseless(toa['name'], toa['addr'])) then
+ to_dn_eq_addr_count = to_dn_eq_addr_count + 1
+ elseif (toa['name'] and toa['name'] ~= '') then
+ to_dn_count = to_dn_count + 1
+ end
+ -- See if header recipients match envrcpts
+ if (rcpts) then
+ for _, rcpt in ipairs(rcpts) do
+ if (toa and toa['addr'] and rcpt and rcpt['addr'] and
+ util.strequal_caseless(rcpt['addr'], toa['addr']))
+ then
+ to_match_envrcpt = to_match_envrcpt + 1
+ end
+ end
+ end
+ end
+ if (to_dn_count == 0 and to_dn_eq_addr_count == 0) then
+ task:insert_result('TO_DN_NONE', 1.0)
+ elseif (to_dn_count == #to) then
+ task:insert_result('TO_DN_ALL', 1.0)
+ elseif (to_dn_count > 0) then
+ task:insert_result('TO_DN_SOME', 1.0)
+ end
+ if (to_dn_eq_addr_count == #to) then
+ task:insert_result('TO_DN_EQ_ADDR_ALL', 1.0)
+ elseif (to_dn_eq_addr_count > 0) then
+ task:insert_result('TO_DN_EQ_ADDR_SOME', 1.0)
+ end
+
+ -- See if header recipients match envelope recipients
+ if (to_match_envrcpt == #to) then
+ task:insert_result('TO_MATCH_ENVRCPT_ALL', 1.0)
+ elseif (to_match_envrcpt > 0) then
+ task:insert_result('TO_MATCH_ENVRCPT_SOME', 1.0)
+ end
+ end
+}
+
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_ZERO',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'No recipients',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_ONE',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'One recipient',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_TWO',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'Two recipients',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_THREE',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = '3-5 recipients',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_FIVE',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = '5-7 recipients',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_SEVEN',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = '7-11 recipients',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_TWELVE',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = '12-50 recipients',
+ group = 'headers',
+}
+rspamd_config:register_symbol {
+ name = 'RCPT_COUNT_GT_50',
+ score = 0.0,
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = '50+ recipients',
+ group = 'headers',
+}
+
+rspamd_config:register_symbol {
+ name = 'TO_DN_RECIPIENTS',
+ score = 2.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'To header display name is "Recipients"',
+}
+rspamd_config:register_symbol {
+ name = 'TO_DN_NONE',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'None of the recipients have display names',
+}
+rspamd_config:register_symbol {
+ name = 'TO_DN_ALL',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'All the recipients have display names',
+}
+rspamd_config:register_symbol {
+ name = 'TO_DN_SOME',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'Some of the recipients have display names',
+}
+rspamd_config:register_symbol {
+ name = 'TO_DN_EQ_ADDR_ALL',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'All of the recipients have display names that are the same as their address',
+}
+rspamd_config:register_symbol {
+ name = 'TO_DN_EQ_ADDR_SOME',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'Some of the recipients have display names that are the same as their address',
+}
+rspamd_config:register_symbol {
+ name = 'TO_MATCH_ENVRCPT_ALL',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'All of the recipients match the envelope',
+}
+rspamd_config:register_symbol {
+ name = 'TO_MATCH_ENVRCPT_SOME',
+ score = 0.0,
+ group = 'headers',
+ parent = check_to_cc_id,
+ type = 'virtual',
+ description = 'Some of the recipients match the envelope',
+}
+
+-- TODO: rewrite this rule, it should not touch headers directly
+rspamd_config.CTYPE_MISSING_DISPOSITION = {
+ callback = function(task)
+ local parts = task:get_parts()
+ if (not parts) or (parts and #parts < 1) then
+ return false
+ end
+ for _, p in ipairs(parts) do
+ local ct = p:get_header('Content-Type')
+ if (ct and ct:lower():match('^application/octet%-stream') ~= nil) then
+ local cd = p:get_header('Content-Disposition')
+ if (not cd) or (cd and cd:lower():find('^attachment') == nil) then
+ local ci = p:get_header('Content-ID')
+ if ci or (#parts > 1 and (cd and cd:find('filename=.+%.asc') ~= nil))
+ then
+ return false
+ end
+
+ local parent = p:get_parent()
+
+ if parent then
+ local t, st = parent:get_type()
+
+ if t == 'multipart' and st == 'encrypted' then
+ -- Special case
+ return false
+ end
+ end
+
+ return true
+ end
+ end
+ end
+ return false
+ end,
+ description = 'Binary content-type not specified as an attachment',
+ score = 4.0,
+ group = 'mime'
+}
+
+rspamd_config.CTYPE_MIXED_BOGUS = {
+ callback = function(task)
+ local ct = task:get_header('Content-Type')
+ if (not ct) then
+ return false
+ end
+ local parts = task:get_parts()
+ if (not parts) then
+ return false
+ end
+ if (not ct:lower():match('^multipart/mixed')) then
+ return false
+ end
+ local found = false
+ -- Check each part and look for a part that isn't multipart/* or text/plain or text/html
+ local ntext_parts = 0
+ for _, p in ipairs(parts) do
+ local mtype, _ = p:get_type()
+ if mtype then
+ if mtype == 'text' and not p:is_attachment() then
+ ntext_parts = ntext_parts + 1
+ if ntext_parts > 2 then
+ found = true
+ break
+ end
+ elseif mtype ~= 'multipart' then
+ found = true
+ break
+ end
+ end
+ end
+ if (not found) then
+ return true
+ end
+ return false
+ end,
+ description = 'multipart/mixed without non-textual part',
+ score = 1.0,
+ group = 'mime'
+}
+
+local function check_for_base64_text(part)
+ local ct = part:get_header('Content-Type')
+ if (not ct) then
+ return false
+ end
+ ct = ct:lower()
+ if (ct:match('^text')) then
+ -- Check encoding
+ local cte = part:get_header('Content-Transfer-Encoding')
+ if (cte and cte:lower():match('^base64')) then
+ return true
+ end
+ end
+ return false
+end
+
+rspamd_config.MIME_BASE64_TEXT = {
+ callback = function(task)
+ -- Check outer part
+ if (check_for_base64_text(task)) then
+ return true
+ else
+ local parts = task:get_parts()
+ if (not parts) then
+ return false
+ end
+ -- Check each part and look for base64 encoded text parts
+ for _, part in ipairs(parts) do
+ if (check_for_base64_text(part)) then
+ return true
+ end
+ end
+ end
+ return false
+ end,
+ description = 'Has text part encoded in base64',
+ score = 0.1,
+ group = 'mime'
+}
+
+rspamd_config.MIME_BASE64_TEXT_BOGUS = {
+ callback = function(task)
+ local parts = task:get_text_parts()
+ if (not parts) then
+ return false
+ end
+ -- Check each part and look for base64 encoded text parts
+ -- where the part does not have any 8bit characters within it
+ for _, part in ipairs(parts) do
+ local mimepart = part:get_mimepart();
+ if (check_for_base64_text(mimepart) and not part:has_8bit()) then
+ return true
+ end
+ end
+ return false
+ end,
+ description = 'Has text part encoded in base64 that does not contain any 8bit characters',
+ score = 1.0,
+ group = 'mime'
+}
+
+local function is_8bit_addr(addr)
+ if addr.flags and addr.flags['8bit'] then
+ return true
+ end
+
+ return false;
+end
+
+rspamd_config.INVALID_FROM_8BIT = {
+ callback = function(task)
+ local from = (task:get_from('mime') or {})[1] or {}
+ if is_8bit_addr(from) then
+ return true
+ end
+ return false
+ end,
+ description = 'Invalid 8bit character in From header',
+ score = 6.0,
+ group = 'headers'
+}
+
+rspamd_config.INVALID_RCPT_8BIT = {
+ callback = function(task)
+ local rcpts = task:get_recipients('mime') or {}
+ return fun.any(function(rcpt)
+ if is_8bit_addr(rcpt) then
+ return true
+ end
+ return false
+ end, rcpts)
+ end,
+ description = 'Invalid 8bit character in recipients headers',
+ score = 6.0,
+ group = 'headers'
+}
+
+rspamd_config.XM_CASE = {
+ callback = function(task)
+ return task:has_header('X-mailer', true)
+ end,
+ description = 'X-mailer .vs. X-Mailer',
+ score = 0.5,
+ group = 'headers'
+}
diff --git a/rules/html.lua b/rules/html.lua
new file mode 100644
index 0000000..7c352c2
--- /dev/null
+++ b/rules/html.lua
@@ -0,0 +1,462 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to you under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at:
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+local reconf = config['regexp']
+
+local rspamd_regexp = require "rspamd_regexp"
+
+-- Messages that have only HTML part
+reconf['MIME_HTML_ONLY'] = {
+ re = 'has_only_html_part()',
+ score = 0.2,
+ description = 'Message has only an HTML part',
+ group = 'headers'
+}
+
+local function has_anchor_parent(tag)
+ local parent = tag
+ repeat
+ parent = parent:get_parent()
+ if parent then
+ if parent:get_type() == 'a' then
+ return true
+ end
+ end
+ until not parent
+
+ return false
+end
+
+local function check_html_image(task, min, max)
+ local tp = task:get_text_parts()
+
+ for _, p in ipairs(tp) do
+ if p:is_html() then
+ local hc = p:get_html()
+ local len = p:get_length()
+
+ if hc and len >= min and len < max then
+ local images = hc:get_images()
+ if images then
+ for _, i in ipairs(images) do
+ local tag = i['tag']
+ if tag then
+ if has_anchor_parent(tag) then
+ -- do not trigger on small and unknown size images
+ if i['height'] + i['width'] >= 210 and i['embedded'] then
+ return true
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+end
+
+rspamd_config.HTML_SHORT_LINK_IMG_1 = {
+ callback = function(task)
+ return check_html_image(task, 0, 1024)
+ end,
+ score = 2.0,
+ group = 'html',
+ description = 'Short HTML part (0..1K) with a link to an image'
+}
+
+rspamd_config.HTML_SHORT_LINK_IMG_2 = {
+ callback = function(task)
+ return check_html_image(task, 1024, 1536)
+ end,
+ score = 1.0,
+ group = 'html',
+ description = 'Short HTML part (1K..1.5K) with a link to an image'
+}
+
+rspamd_config.HTML_SHORT_LINK_IMG_3 = {
+ callback = function(task)
+ return check_html_image(task, 1536, 2048)
+ end,
+ score = 0.5,
+ group = 'html',
+ description = 'Short HTML part (1.5K..2K) with a link to an image'
+}
+
+rspamd_config.R_EMPTY_IMAGE = {
+ callback = function(task)
+ local tp = task:get_text_parts() -- get text parts in a message
+
+ for _, p in ipairs(tp) do
+ -- iterate over text parts array using `ipairs`
+ if p:is_html() then
+ -- if the current part is html part
+ local hc = p:get_html() -- we get HTML context
+ local len = p:get_length() -- and part's length
+ if hc and len < 50 then
+ -- if we have a part that has less than 50 bytes of text
+ local images = hc:get_images() -- then we check for HTML images
+
+ if images then
+ -- if there are images
+ for _, i in ipairs(images) do
+ -- then iterate over images in the part
+ if i['height'] + i['width'] >= 400 then
+ -- if we have a large image
+ local tag = i['tag']
+ if tag then
+ if not has_anchor_parent(tag) then
+ return true
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end,
+
+ score = 2.0,
+ group = 'html',
+ description = 'Message contains empty parts and image'
+}
+
+rspamd_config.R_SUSPICIOUS_IMAGES = {
+ callback = function(task)
+ local tp = task:get_text_parts() -- get text parts in a message
+
+ for _, p in ipairs(tp) do
+ local h = p:get_html()
+
+ if h then
+ local l = p:get_words_count()
+ local img = h:get_images()
+ local pic_words = 0
+
+ if img then
+ for _, i in ipairs(img) do
+ local dim = i['width'] + i['height']
+ local tag = i['tag']
+
+ if tag then
+ if has_anchor_parent(tag) then
+ if dim > 100 and dim < 3000 then
+ -- We assume that a single picture 100x200 contains approx 3 words of text
+ pic_words = pic_words + dim / 100
+ end
+ end
+ end
+ end
+ end
+
+ if l + pic_words > 0 then
+ local rel = pic_words / (l + pic_words)
+
+ if rel > 0.5 then
+ return true, (rel - 0.5) * 2
+ end
+ end
+ end
+ end
+
+ return false
+ end,
+
+ score = 5.0,
+ group = 'html',
+ description = 'Message contains many suspicious messages'
+}
+
+local vis_check_id = rspamd_config:register_symbol {
+ name = 'HTML_VISIBLE_CHECKS',
+ type = 'callback',
+ group = 'html',
+ callback = function(task)
+ --local logger = require "rspamd_logger"
+ local tp = task:get_text_parts() -- get text parts in a message
+ local ret = false
+ local transp_rate = 0
+ local invisible_blocks = 0
+ local zero_size_blocks = 0
+ local arg
+
+ local normal_len = 0
+ local transp_len = 0
+
+ for _, p in ipairs(tp) do
+ -- iterate over text parts array using `ipairs`
+ normal_len = normal_len + p:get_length()
+ if p:is_html() and p:get_html() then
+ -- if the current part is html part
+ local hc = p:get_html() -- we get HTML context
+
+ hc:foreach_tag({ 'font', 'span', 'div', 'p', 'td' }, function(tag, clen, is_leaf)
+ local bl = tag:get_style()
+ if bl then
+ if not bl.visible and clen > 0 and is_leaf then
+ invisible_blocks = invisible_blocks + 1
+ end
+
+ if (bl.font_size or 12) == 0 and clen > 0 and is_leaf then
+ zero_size_blocks = zero_size_blocks + 1
+ end
+
+ if bl.transparent and is_leaf then
+ ret = true
+ invisible_blocks = invisible_blocks + 1 -- This block is invisible
+ transp_len = transp_len + clen
+ normal_len = normal_len - clen
+ local tr = transp_len / (normal_len + transp_len)
+ if tr > transp_rate then
+ transp_rate = tr
+ if not bl.color then
+ bl.color = { 0, 0, 0 }
+ end
+ if not bl.bgcolor then
+ bl.bgcolor = { 0, 0, 0 }
+ end
+ arg = string.format('%s color #%x%x%x bgcolor #%x%x%x',
+ tag:get_type(),
+ bl.color[1], bl.color[2], bl.color[3],
+ bl.bgcolor[1], bl.bgcolor[2], bl.bgcolor[3])
+ end
+ end
+ end
+
+ return false -- Continue search
+ end)
+
+ end
+ end
+
+ if ret then
+ transp_rate = transp_len / (normal_len + transp_len)
+
+ if transp_rate > 0.1 then
+ if transp_rate > 0.5 or transp_rate ~= transp_rate then
+ transp_rate = 0.5
+ end
+
+ task:insert_result('R_WHITE_ON_WHITE', (transp_rate * 2.0), arg)
+ end
+ end
+
+ if invisible_blocks > 0 then
+ if invisible_blocks > 10 then
+ invisible_blocks = 10
+ end
+ local rates = { -- From 1 to 10
+ 0.05,
+ 0.1,
+ 0.2,
+ 0.3,
+ 0.4,
+ 0.5,
+ 0.6,
+ 0.7,
+ 0.8,
+ 1.0,
+ }
+ task:insert_result('MANY_INVISIBLE_PARTS', rates[invisible_blocks],
+ tostring(invisible_blocks))
+ end
+
+ if zero_size_blocks > 0 then
+ if zero_size_blocks > 5 then
+ if zero_size_blocks > 10 then
+ -- Full score
+ task:insert_result('ZERO_FONT', 1.0,
+ tostring(zero_size_blocks))
+ else
+ zero_size_blocks = 5
+ end
+ end
+
+ if zero_size_blocks <= 5 then
+ local rates = { -- From 1 to 5
+ 0.1,
+ 0.2,
+ 0.2,
+ 0.3,
+ 0.5,
+ }
+ task:insert_result('ZERO_FONT', rates[zero_size_blocks],
+ tostring(zero_size_blocks))
+ end
+ end
+ end,
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = vis_check_id,
+ name = 'R_WHITE_ON_WHITE',
+ description = 'Message contains low contrast text',
+ score = 4.0,
+ group = 'html',
+ one_shot = true,
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = vis_check_id,
+ name = 'ZERO_FONT',
+ description = 'Zero sized font used',
+ score = 1.0, -- Reached if more than 5 elements have zero size
+ one_shot = true,
+ group = 'html'
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = vis_check_id,
+ name = 'MANY_INVISIBLE_PARTS',
+ description = 'Many parts are visually hidden',
+ score = 1.0, -- Reached if more than 10 elements are hidden
+ one_shot = true,
+ group = 'html'
+}
+
+rspamd_config.EXT_CSS = {
+ callback = function(task)
+ local regexp_lib = require "rspamd_regexp"
+ local re = regexp_lib.create_cached('/^.*\\.css(?:[?#].*)?$/i')
+ local tp = task:get_text_parts() -- get text parts in a message
+ local ret = false
+ for _, p in ipairs(tp) do
+ -- iterate over text parts array using `ipairs`
+ if p:is_html() and p:get_html() then
+ -- if the current part is html part
+ local hc = p:get_html() -- we get HTML context
+ hc:foreach_tag({ 'link' }, function(tag)
+ local bl = tag:get_extra()
+ if bl then
+ local s = tostring(bl)
+ if s and re:match(s) then
+ ret = true
+ end
+ end
+
+ return ret -- Continue search
+ end)
+
+ end
+ end
+
+ return ret
+ end,
+
+ score = 1.0,
+ group = 'html',
+ description = 'Message contains external CSS reference'
+}
+
+local https_re = rspamd_regexp.create_cached('/^https:/i')
+
+rspamd_config.HTTP_TO_HTTPS = {
+ callback = function(task)
+ local found_opts
+ local tp = task:get_text_parts() or {}
+
+ for _, p in ipairs(tp) do
+ if p:is_html() then
+ local hc = p:get_html()
+ if (not hc) then
+ return false
+ end
+
+ local found = false
+
+ hc:foreach_tag('a', function(tag, _)
+ -- Skip this loop if we already have a match
+ if (found) then
+ return true
+ end
+
+ local c = tag:get_content()
+ if (c) then
+ if (not https_re:match(c)) then
+ return false
+ end
+
+ local u = tag:get_extra()
+ if (not u) then
+ return false
+ end
+ local url_proto = u:get_protocol()
+
+ if url_proto ~= 'http' then
+ return false
+ end
+ -- Capture matches for http in href to https in visible part only
+ found = true
+ found_opts = u:get_host()
+ return true
+ end
+
+ return false
+ end)
+
+ if (found) then
+ return true, 1.0, found_opts
+ end
+
+ return false
+ end
+ end
+ return false
+ end,
+ description = 'The anchor text contains a distinct scheme compared to the target URL',
+ score = 0.5,
+ group = 'html'
+}
+
+rspamd_config.HTTP_TO_IP = {
+ callback = function(task)
+ local tp = task:get_text_parts()
+ if (not tp) then
+ return false
+ end
+ for _, p in ipairs(tp) do
+ if p:is_html() then
+ local hc = p:get_html()
+ if (not hc) then
+ return false
+ end
+ local found = false
+ hc:foreach_tag('a', function(tag, length)
+ if (found) then
+ return true
+ end
+ local u = tag:get_extra()
+ if (u) then
+ u = tostring(u):lower()
+ if (u:match('^https?://%d+%.%d+%.%d+%.%d+')) then
+ found = true
+ end
+ end
+ return false
+ end)
+ if found then
+ return true
+ end
+ return false
+ end
+ end
+ end,
+ description = 'HTML anchor points to an IP address',
+ score = 1.0,
+ group = 'html'
+}
diff --git a/rules/mid.lua b/rules/mid.lua
new file mode 100644
index 0000000..1bac26c
--- /dev/null
+++ b/rules/mid.lua
@@ -0,0 +1,131 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+Copyright (c) 2016, Steve Freegard <steve@freegard.name>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+local rspamd_util = require "rspamd_util"
+local function mid_check_func(task)
+ local mid = task:get_header('Message-ID')
+ if not mid then
+ return false
+ end
+ -- Check for 'bare' IP addresses in RHS
+ if mid:find("@%d+%.%d+%.%d+%.%d+>$") then
+ task:insert_result('MID_BARE_IP', 1.0)
+ end
+ -- Check for non-FQDN RHS
+ if mid:find("@[^%.]+>?$") then
+ task:insert_result('MID_RHS_NOT_FQDN', 1.0)
+ end
+ -- Check for missing <>'s
+ if not mid:find('^<[^>]+>$') then
+ task:insert_result('MID_MISSING_BRACKETS', 1.0)
+ end
+ -- Check for IP literal in RHS
+ if mid:find("@%[%d+%.%d+%.%d+%.%d+%]") then
+ task:insert_result('MID_RHS_IP_LITERAL', 1.0)
+ end
+ -- Check From address attributes against MID
+ local from = task:get_from(2)
+ local fd
+ if (from and from[1] and from[1].domain and from[1].domain ~= '') then
+ fd = from[1].domain:lower()
+ local _, _, md = mid:find("@([^>]+)>?$")
+ -- See if all or part of the From address
+ -- can be found in the Message-ID
+ -- extract tld
+ local fdtld = nil
+ local mdtld = nil
+ if md then
+ fdtld = rspamd_util.get_tld(fd)
+ mdtld = rspamd_util.get_tld(md)
+ end
+ if (mid:lower():find(from[1].addr:lower(), 1, true)) then
+ task:insert_result('MID_CONTAINS_FROM', 1.0)
+ elseif (md and fd == md:lower()) then
+ task:insert_result('MID_RHS_MATCH_FROM', 1.0)
+ elseif (mdtld ~= nil and fdtld ~= nil and mdtld:lower() == fdtld) then
+ task:insert_result('MID_RHS_MATCH_FROMTLD', 1.0)
+ end
+ end
+ -- Check To address attributes against MID
+ local to = task:get_recipients(2)
+ if (to and to[1] and to[1].domain and to[1].domain ~= '') then
+ local td = to[1].domain:lower()
+ local _, _, md = mid:find("@([^>]+)>?$")
+ -- Skip if from domain == to domain
+ if ((fd and fd ~= td) or not fd) then
+ -- See if all or part of the To address
+ -- can be found in the Message-ID
+ if (mid:lower():find(to[1].addr:lower(), 1, true)) then
+ task:insert_result('MID_CONTAINS_TO', 1.0)
+ elseif (md and td == md:lower()) then
+ task:insert_result('MID_RHS_MATCH_TO', 1.0)
+ end
+ end
+ end
+end
+
+-- MID checks from Steve Freegard
+local check_mid_id = rspamd_config:register_symbol({
+ name = 'CHECK_MID',
+ score = 0.0,
+ group = 'mid',
+ type = 'callback,mime',
+ callback = mid_check_func
+})
+rspamd_config:register_virtual_symbol('MID_BARE_IP', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_BARE_IP', 2.0, 'Message-ID RHS is a bare IP address', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_RHS_NOT_FQDN', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_RHS_NOT_FQDN', 0.5,
+ 'Message-ID RHS is not a fully-qualified domain name', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_MISSING_BRACKETS', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_MISSING_BRACKETS', 0.5, 'Message-ID is missing <>\'s', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_RHS_IP_LITERAL', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_RHS_IP_LITERAL', 0.5, 'Message-ID RHS is an IP-literal', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_CONTAINS_FROM', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_CONTAINS_FROM', 1.0, 'Message-ID contains From address', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_RHS_MATCH_FROM', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_RHS_MATCH_FROM', 0.0,
+ 'Message-ID RHS matches From domain', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_RHS_MATCH_FROMTLD', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_RHS_MATCH_FROMTLD', 0.0,
+ 'Message-ID RHS matches From domain tld', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_CONTAINS_TO', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_CONTAINS_TO', 1.0, 'Message-ID contains To address', 'default', 'Message ID')
+rspamd_config:register_virtual_symbol('MID_RHS_MATCH_TO', 1.0, check_mid_id)
+rspamd_config:set_metric_symbol('MID_RHS_MATCH_TO', 1.0, 'Message-ID RHS matches To domain', 'default', 'Message ID')
+
+-- Another check from https://github.com/rspamd/rspamd/issues/4299
+rspamd_config:register_symbol {
+ type = 'normal,mime',
+ group = 'mid',
+ name = 'MID_END_EQ_FROM_USER_PART',
+ description = 'Message-ID RHS (after @) and MIME from local part are the same',
+ score = 4.0,
+
+ callback = function(task)
+ local mid = task:get_header('Message-ID')
+ if not mid then
+ return
+ end
+ local mime_from = task:get_from('mime')
+ local _, _, mid_realm = mid:find("@([a-z]+)>?$")
+ if mid_realm and mime_from and mime_from[1] and mime_from[1].user then
+ if (mid_realm == mime_from[1].user) then
+ return true
+ end
+ end
+ end
+}
diff --git a/rules/misc.lua b/rules/misc.lua
new file mode 100644
index 0000000..faf4a8f
--- /dev/null
+++ b/rules/misc.lua
@@ -0,0 +1,864 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Misc rules
+
+local E = {}
+local fun = require "fun"
+local rspamd_util = require "rspamd_util"
+local rspamd_parsers = require "rspamd_parsers"
+local rspamd_regexp = require "rspamd_regexp"
+local lua_util = require "lua_util"
+local bit = require "bit"
+local rspamd_url = require "rspamd_url"
+local url_flags_tab = rspamd_url.flags
+
+-- Different text parts
+rspamd_config.R_PARTS_DIFFER = {
+ callback = function(task)
+ local distance = task:get_mempool():get_variable('parts_distance', 'double')
+
+ if distance then
+ local nd = tonumber(distance)
+ -- ND is relation of different words to total words
+ if nd >= 0.5 then
+ local tw = task:get_mempool():get_variable('total_words', 'int')
+
+ if tw then
+ local score
+ if tw > 30 then
+ -- We are confident about difference
+ score = (nd - 0.5) * 2.0
+ else
+ -- We are not so confident about difference
+ score = (nd - 0.5)
+ end
+ task:insert_result('R_PARTS_DIFFER', score,
+ string.format('%.1f%%', tostring(100.0 * nd)))
+ end
+ end
+ end
+ return false
+ end,
+ score = 1.0,
+ description = 'Text and HTML parts differ',
+ group = 'body'
+}
+
+-- Date issues
+local date_id = rspamd_config:register_symbol({
+ name = 'DATE_CB',
+ type = 'callback,mime',
+ callback = function(task)
+ local date_time = task:get_header('Date')
+ if date_time == nil or date_time == '' then
+ task:insert_result('MISSING_DATE', 1.0)
+ return
+ end
+
+ local dm, err = rspamd_parsers.parse_smtp_date(date_time)
+ if err then
+ task:insert_result('INVALID_DATE', 1.0)
+ return
+ end
+
+ local dt = task:get_date({ format = 'connect', gmt = true })
+ local date_diff = dt - dm
+
+ if date_diff > 86400 then
+ -- Older than a day
+ task:insert_result('DATE_IN_PAST', 1.0, tostring(math.floor(date_diff / 3600)))
+ elseif -date_diff > 7200 then
+ -- More than 2 hours in the future
+ task:insert_result('DATE_IN_FUTURE', 1.0, tostring(math.floor(-date_diff / 3600)))
+ end
+ end
+})
+
+rspamd_config:register_symbol({
+ name = 'MISSING_DATE',
+ score = 1.0,
+ description = 'Date header is missing',
+ group = 'headers',
+ type = 'virtual',
+ parent = date_id,
+})
+
+rspamd_config:register_symbol({
+ name = 'INVALID_DATE',
+ score = 1.5,
+ description = 'Malformed Date header',
+ group = 'headers',
+ type = 'virtual',
+ parent = date_id,
+})
+
+rspamd_config:register_symbol({
+ name = 'DATE_IN_FUTURE',
+ score = 4.0,
+ description = 'Message date is in the future',
+ group = 'headers',
+ type = 'virtual',
+ parent = date_id,
+})
+
+rspamd_config:register_symbol({
+ name = 'DATE_IN_PAST',
+ score = 1.0,
+ description = 'Message date is in the past',
+ group = 'headers',
+ type = 'virtual',
+ parent = date_id,
+})
+
+local obscured_id = rspamd_config:register_symbol {
+ callback = function(task)
+ local susp_urls = task:get_urls_filtered({ 'obscured', 'zw_spaces' })
+
+ if susp_urls and susp_urls[1] then
+ local obs_flag = url_flags_tab.obscured
+ local zw_flag = url_flags_tab.zw_spaces
+
+ for _, u in ipairs(susp_urls) do
+ local fl = u:get_flags_num()
+ if bit.band(fl, obs_flag) ~= 0 then
+ task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host())
+ end
+ if bit.band(fl, zw_flag) ~= 0 then
+ task:insert_result('ZERO_WIDTH_SPACE_URL', 1.0, u:get_host())
+ end
+ end
+ end
+
+ return false
+ end,
+ name = 'R_SUSPICIOUS_URL',
+ score = 5.0,
+ one_shot = true,
+ description = 'A message has been identified to contain an obfuscated or suspicious URL',
+ group = 'url'
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ name = 'ZERO_WIDTH_SPACE_URL',
+ score = 7.0,
+ one_shot = true,
+ description = 'Zero width space in URL',
+ group = 'url',
+ parent = obscured_id,
+}
+
+rspamd_config.ENVFROM_PRVS = {
+ callback = function(task)
+ --[[
+ Detect PRVS/BATV addresses to avoid FORGED_SENDER
+ https://en.wikipedia.org/wiki/Bounce_Address_Tag_Validation
+
+ Signature syntax:
+
+ prvs=TAG=USER@example.com BATV draft (https://tools.ietf.org/html/draft-levine-smtp-batv-01)
+ prvs=USER=TAG@example.com
+ btv1==TAG==USER@example.com Barracuda appliance
+ msprvs1=TAG=USER@example.com Sparkpost email delivery service
+ ]]--
+ if not (task:has_from(1) and task:has_from(2)) then
+ return false
+ end
+ local envfrom = task:get_from(1)
+ local re_text = '^(?:(prvs|msprvs1)=([^=]+)=|btv1==[^=]+==)(.+@(.+))$'
+ local re = rspamd_regexp.create_cached(re_text)
+ local c = re:search(envfrom[1].addr:lower(), false, true)
+ if not c then
+ return false
+ end
+ local ef = c[1][4]
+ -- See if it matches the From header
+ local from = task:get_from(2)
+ if ef == from[1].addr:lower() then
+ return true
+ end
+ -- Check for prvs=USER=TAG@example.com
+ local t = c[1][2]
+ if t == 'prvs' then
+ local efr = c[1][3] .. '@' .. c[1][5]
+ if efr == from[1].addr:lower() then
+ return true
+ end
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Envelope From is a PRVS address that matches the From address",
+ group = 'headers',
+ type = 'mime',
+}
+
+rspamd_config.ENVFROM_VERP = {
+ callback = function(task)
+ if not (task:has_from(1) and task:has_recipients(1)) then
+ return false
+ end
+ local envfrom = task:get_from(1)
+ local envrcpts = task:get_recipients(1)
+ -- VERP only works for single recipient messages
+ if #envrcpts > 1 then
+ return false
+ end
+ -- Get recipient and compute VERP address
+ local rcpt = envrcpts[1].addr:lower()
+ local verp = rcpt:gsub('@', '=')
+ -- Get the user portion of the envfrom
+ local ef_user = envfrom[1].user:lower()
+ -- See if the VERP representation of the recipient appears in it
+ if ef_user:find(verp, 1, true)
+ and not ef_user:find('+caf_=' .. verp, 1, true) -- Google Forwarding
+ and not ef_user:find('^srs[01]=') -- SRS
+ then
+ return true
+ end
+ return false
+ end,
+ score = 0.0,
+ description = "Envelope From is a VERP address",
+ group = "headers",
+ type = 'mime',
+}
+
+local check_rcvd = rspamd_config:register_symbol {
+ name = 'CHECK_RCVD',
+ group = 'headers',
+ callback = function(task)
+ local rcvds = task:get_received_headers()
+ if not rcvds or #rcvds == 0 then
+ return false
+ end
+
+ local all_tls = fun.all(function(rc)
+ return rc.flags and rc.flags['ssl']
+ end, fun.filter(function(rc)
+ return rc.by_hostname and rc.by_hostname ~= 'localhost'
+ end, rcvds))
+
+ -- See if only the last hop was encrypted
+ if all_tls then
+ task:insert_result('RCVD_TLS_ALL', 1.0)
+ else
+ local rcvd = rcvds[1]
+ if rcvd.by_hostname and rcvd.by_hostname == 'localhost' then
+ -- Ignore artificial header from Rmilter
+ rcvd = rcvds[2] or {}
+ end
+ if rcvd.flags and rcvd.flags['ssl'] then
+ task:insert_result('RCVD_TLS_LAST', 1.0)
+ else
+ task:insert_result('RCVD_NO_TLS_LAST', 1.0)
+ end
+ end
+
+ local auth = fun.any(function(rc)
+ return rc.flags and rc.flags['authenticated']
+ end, rcvds)
+
+ if auth then
+ task:insert_result('RCVD_VIA_SMTP_AUTH', 1.0)
+ end
+ end,
+ type = 'callback,mime',
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_rcvd,
+ name = 'RCVD_TLS_ALL',
+ description = 'All hops used encrypted transports',
+ score = 0.0,
+ group = 'headers'
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_rcvd,
+ name = 'RCVD_TLS_LAST',
+ description = 'Last hop used encrypted transports',
+ score = 0.0,
+ group = 'headers'
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_rcvd,
+ name = 'RCVD_NO_TLS_LAST',
+ description = 'Last hop did not use encrypted transports',
+ score = 0.1,
+ group = 'headers'
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_rcvd,
+ name = 'RCVD_VIA_SMTP_AUTH',
+ -- NB This does not mean sender was authenticated; see task:get_user()
+ description = 'Authenticated hand-off was seen in Received headers',
+ score = 0.0,
+ group = 'headers'
+}
+
+rspamd_config.RCVD_HELO_USER = {
+ callback = function(task)
+ -- Check HELO argument from MTA
+ local helo = task:get_helo()
+ if (helo and helo:lower():find('^user$')) then
+ return true
+ end
+ -- Check Received headers
+ local rcvds = task:get_header_full('Received')
+ if not rcvds then
+ return false
+ end
+ for _, rcvd in ipairs(rcvds) do
+ local r = rcvd['decoded']:lower()
+ if (r:find("^%s*from%suser%s")) then
+ return true
+ end
+ if (r:find("helo[%s=]user[%s%)]")) then
+ return true
+ end
+ end
+ end,
+ description = 'HELO User spam pattern',
+ group = 'headers',
+ type = 'mime',
+ score = 3.0
+}
+
+rspamd_config.URI_COUNT_ODD = {
+ callback = function(task)
+ local ct = task:get_header('Content-Type')
+ if (ct and ct:lower():find('^multipart/alternative')) then
+ local urls = task:get_urls_filtered(nil, { 'subject', 'html_displayed', 'special' }) or {}
+ local nurls = fun.foldl(function(acc, val)
+ return acc + val:get_count()
+ end, 0, urls)
+
+ if nurls % 2 == 1 then
+ return true, 1.0, tostring(nurls)
+ end
+ end
+ end,
+ description = 'Odd number of URIs in multipart/alternative message',
+ score = 1.0,
+ group = 'url',
+}
+
+rspamd_config.HAS_ATTACHMENT = {
+ callback = function(task)
+ local parts = task:get_parts()
+ if parts and #parts > 1 then
+ for _, p in ipairs(parts) do
+ local cd = p:get_header('Content-Disposition')
+ if (cd and cd:lower():match('^attachment')) then
+ return true
+ end
+ end
+ end
+ end,
+ description = 'Message contains attachments',
+ group = 'body',
+}
+
+-- Requires freemail maps loaded in multimap
+local function freemail_reply_neq_from(task)
+ if not task:has_symbol('FREEMAIL_REPLYTO') or not task:has_symbol('FREEMAIL_FROM') then
+ return false
+ end
+ local frt = task:get_symbol('FREEMAIL_REPLYTO')
+ local ff = task:get_symbol('FREEMAIL_FROM')
+ local frt_opts = frt[1]['options']
+ local ff_opts = ff[1]['options']
+ return (frt_opts and ff_opts and frt_opts[1] ~= ff_opts[1])
+end
+
+rspamd_config:register_symbol({
+ name = 'FREEMAIL_REPLYTO_NEQ_FROM_DOM',
+ callback = freemail_reply_neq_from,
+ description = 'The From and Reply-To addresses in the email are from different freemail services',
+ score = 3.0,
+ group = 'headers',
+})
+rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_REPLYTO')
+rspamd_config:register_dependency('FREEMAIL_REPLYTO_NEQ_FROM_DOM', 'FREEMAIL_FROM')
+
+rspamd_config.OMOGRAPH_URL = {
+ callback = function(task)
+ local urls = task:get_urls()
+
+ if urls then
+ local bad_omographs = 0
+ local single_bad_omograps = 0
+ local bad_urls = {}
+ local seen = {}
+
+ fun.each(function(u)
+ if u:is_phished() then
+
+ local h1 = u:get_host()
+ local h2 = u:get_phished()
+ if h2 then
+ -- Due to changes of the phished flag in 2.8
+ h2 = h2:get_host()
+ end
+ if h1 and h2 then
+ local selt = string.format('%s->%s', h1, h2)
+ if not seen[selt] and rspamd_util.is_utf_spoofed(h1, h2) then
+ bad_urls[#bad_urls + 1] = selt
+ bad_omographs = bad_omographs + 1
+ end
+ seen[selt] = true
+ end
+ end
+ if not u:is_html_displayed() then
+ local h = u:get_tld()
+
+ if h then
+ if not seen[h] and rspamd_util.is_utf_spoofed(h) then
+ bad_urls[#bad_urls + 1] = h
+ single_bad_omograps = single_bad_omograps + 1
+ end
+ seen[h] = true
+ end
+ end
+ end, urls)
+
+ if bad_omographs > 0 then
+ return true, 1.0, bad_urls
+ elseif single_bad_omograps > 0 then
+ return true, 0.5, bad_urls
+ end
+ end
+
+ return false
+ end,
+ score = 5.0,
+ group = 'url',
+ description = 'URL contains both latin and non-latin characters'
+}
+
+rspamd_config.URL_IN_SUBJECT = {
+ callback = function(task)
+ local urls = task:get_urls()
+
+ if urls then
+ for _, u in ipairs(urls) do
+ local flags = u:get_flags()
+ if flags.subject then
+ if flags.schemaless then
+ return true, 0.1, u:get_host()
+ end
+ local subject = task:get_subject()
+
+ if subject then
+ if tostring(u) == subject then
+ return true, 1.0, u:get_host()
+ end
+ end
+ return true, 0.25, u:get_host()
+ end
+ end
+ end
+
+ return false
+ end,
+ score = 4.0,
+ group = 'subject',
+ type = 'mime',
+ description = 'Subject contains URL'
+}
+
+local aliases_id = rspamd_config:register_symbol {
+ type = 'prefilter',
+ name = 'EMAIL_PLUS_ALIASES',
+ callback = function(task)
+ local function check_from(type)
+ if task:has_from(type) then
+ local addr = task:get_from(type)[1]
+ local na, tags = lua_util.remove_email_aliases(addr)
+ if na then
+ task:set_from(type, addr, 'alias')
+ task:insert_result('TAGGED_FROM', 1.0, fun.totable(
+ fun.filter(function(t)
+ return t and #t > 0
+ end, tags)))
+ end
+ end
+ end
+
+ check_from('smtp')
+ check_from('mime')
+
+ local function check_rcpt(type)
+ if task:has_recipients(type) then
+ local modified = false
+ local all_tags = {}
+ local addrs = task:get_recipients(type)
+
+ for _, addr in ipairs(addrs) do
+ local na, tags = lua_util.remove_email_aliases(addr)
+ if na then
+ modified = true
+ fun.each(function(t)
+ table.insert(all_tags, t)
+ end,
+ fun.filter(function(t)
+ return t and #t > 0
+ end, tags))
+ end
+ end
+
+ if modified then
+ task:set_recipients(type, addrs, 'alias')
+ task:insert_result('TAGGED_RCPT', 1.0, all_tags)
+ end
+ end
+ end
+
+ check_rcpt('smtp')
+ check_rcpt('mime')
+ end,
+ priority = lua_util.symbols_priorities.top + 1,
+ description = 'Removes plus aliases from the email',
+ group = 'headers',
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = aliases_id,
+ name = 'TAGGED_RCPT',
+ description = 'SMTP recipients have plus tags',
+ group = 'headers',
+ score = 0.0,
+}
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = aliases_id,
+ name = 'TAGGED_FROM',
+ description = 'SMTP from has plus tags',
+ group = 'headers',
+ score = 0.0,
+}
+
+local check_from_display_name = rspamd_config:register_symbol {
+ type = 'callback,mime',
+ name = 'FROM_DISPLAY_CALLBACK',
+ callback = function(task)
+ local from = task:get_from(2)
+ if not (from and from[1] and from[1].name) then
+ return false
+ end
+ -- See if we can parse an email address from the name
+ local parsed = rspamd_parsers.parse_mail_address(from[1].name, task:get_mempool())
+ if not parsed then
+ return false
+ end
+ if not (parsed[1] and parsed[1]['addr']) then
+ return false
+ end
+ -- Make sure we did not mistake e.g. <something>@<name> for an email address
+ if not parsed[1]['domain'] or not parsed[1]['domain']:find('%.') then
+ return false
+ end
+ -- See if the parsed domains differ
+ if not rspamd_util.strequal_caseless(from[1]['domain'], parsed[1]['domain']) then
+ -- See if the destination domain is the same as the spoof
+ local mto = task:get_recipients(2)
+ local sto = task:get_recipients(1)
+ if mto then
+ for _, to in ipairs(mto) do
+ if to['domain'] ~= '' and rspamd_util.strequal_caseless(to['domain'], parsed[1]['domain']) then
+ task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
+ return false
+ end
+ end
+ end
+ if sto then
+ for _, to in ipairs(sto) do
+ if to['domain'] ~= '' and rspamd_util.strequal_caseless(to['domain'], parsed[1]['domain']) then
+ task:insert_result('SPOOF_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
+ return false
+ end
+ end
+ end
+ task:insert_result('FROM_NEQ_DISPLAY_NAME', 1.0, from[1]['domain'], parsed[1]['domain'])
+ end
+ return false
+ end,
+ group = 'headers',
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_from_display_name,
+ name = 'SPOOF_DISPLAY_NAME',
+ description = 'Display name is being used to spoof and trick the recipient',
+ group = 'headers',
+ score = 8.0,
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_from_display_name,
+ name = 'FROM_NEQ_DISPLAY_NAME',
+ group = 'headers',
+ description = 'Display name contains an email address different to the From address',
+ score = 4.0,
+}
+
+rspamd_config.SPOOF_REPLYTO = {
+ callback = function(task)
+ -- First check for a Reply-To header
+ local rt = task:get_header_full('Reply-To')
+ if not rt or not rt[1] then
+ return false
+ end
+ -- Get From and To headers
+ rt = rt[1]['value']
+ local from = task:get_from(2)
+ local to = task:get_recipients(2)
+ if not (from and from[1] and from[1].addr) then
+ return false
+ end
+ if (to and to[1] and to[1].addr) then
+ -- Handle common case for Web Contact forms of From = To
+ if rspamd_util.strequal_caseless(from[1].addr, to[1].addr) then
+ return false
+ end
+ end
+ -- SMTP recipients must contain From domain
+ to = task:get_recipients(1)
+ if not to then
+ return false
+ end
+ -- Try mitigate some possible FPs on mailing list posts
+ if #to == 1 and rspamd_util.strequal_caseless(to[1].addr, from[1].addr) then
+ return false
+ end
+ local found_fromdom = false
+ for _, t in ipairs(to) do
+ if rspamd_util.strequal_caseless(t.domain, from[1].domain) then
+ found_fromdom = true
+ break
+ end
+ end
+ if not found_fromdom then
+ return false
+ end
+ -- Parse Reply-To header
+ local parsed = ((rspamd_parsers.parse_mail_address(rt, task:get_mempool()) or E)[1] or E).domain
+ if not parsed then
+ return false
+ end
+ -- Reply-To domain must be different to From domain
+ if not rspamd_util.strequal_caseless(parsed, from[1].domain) then
+ return true, from[1].domain, parsed
+ end
+ return false
+ end,
+ group = 'headers',
+ type = 'mime',
+ description = 'Reply-To is being used to spoof and trick the recipient to send an off-domain reply',
+ score = 6.0
+}
+
+rspamd_config.INFO_TO_INFO_LU = {
+ callback = function(task)
+ if not task:has_header('List-Unsubscribe') then
+ return false
+ end
+ local from = task:get_from('mime')
+ if not (from and from[1] and rspamd_util.strequal_caseless(from[1].user, 'info')) then
+ return false
+ end
+ local to = task:get_recipients('smtp')
+ if not to then
+ return false
+ end
+ local found = false
+ for _, r in ipairs(to) do
+ if rspamd_util.strequal_caseless(r['user'], 'info') then
+ found = true
+ end
+ end
+ if found then
+ return true
+ end
+ return false
+ end,
+ description = 'info@ From/To address with List-Unsubscribe headers',
+ group = 'headers',
+ score = 2.0,
+ type = 'mime',
+}
+
+-- Detects bad content-transfer-encoding for text parts
+
+rspamd_config.R_BAD_CTE_7BIT = {
+ callback = function(task)
+ local tp = task:get_text_parts() or {}
+
+ for _, p in ipairs(tp) do
+ local cte = p:get_mimepart():get_cte() or ''
+ if cte ~= '8bit' and p:has_8bit_raw() then
+ local _, _, attrs = p:get_mimepart():get_type_full()
+ local mul = 1.0
+ local params = { cte }
+ if attrs then
+ if attrs.charset and attrs.charset:lower() == "utf-8" then
+ -- Penalise rule as people don't know that utf8 is surprisingly
+ -- eight bit encoding
+ mul = 0.3
+ table.insert(params, "utf8")
+ end
+ end
+
+ return true, mul, params
+ end
+ end
+
+ return false
+ end,
+ score = 3.5,
+ description = 'Detects bad Content-Transfer-Encoding for text parts',
+ group = 'headers',
+ type = 'mime',
+}
+
+local check_encrypted_name = rspamd_config:register_symbol {
+ name = 'BOGUS_ENCRYPTED_AND_TEXT',
+ callback = function(task)
+ local parts = task:get_parts() or {}
+ local seen_encrypted, seen_text
+ local opts = {}
+
+ local function check_part(part)
+ if part:is_multipart() then
+ local children = part:get_children() or {}
+ local text_kids = {}
+
+ for _, cld in ipairs(children) do
+ if cld:is_multipart() then
+ check_part(cld)
+ elseif cld:is_text() then
+ seen_text = true
+ text_kids[#text_kids + 1] = cld
+ else
+ local type, subtype, _ = cld:get_type_full()
+
+ if type:lower() == 'application' then
+ if string.find(subtype:lower(), 'pkcs7%-mime') then
+ -- S/MIME encrypted part
+ seen_encrypted = true
+ table.insert(opts, 'smime part')
+ task:insert_result('ENCRYPTED_SMIME', 1.0)
+ elseif string.find(subtype:lower(), 'pkcs7%-signature') then
+ task:insert_result('SIGNED_SMIME', 1.0)
+ elseif string.find(subtype:lower(), 'pgp%-encrypted') then
+ -- PGP/GnuPG encrypted part
+ seen_encrypted = true
+ table.insert(opts, 'pgp part')
+ task:insert_result('ENCRYPTED_PGP', 1.0)
+ elseif string.find(subtype:lower(), 'pgp%-signature') then
+ task:insert_result('SIGNED_PGP', 1.0)
+ end
+ end
+ end
+ if seen_text and seen_encrypted then
+ -- Ensure that our seen text is not really part of pgp #3205
+ for _, tp in ipairs(text_kids) do
+ local t, _ = tp:get_type()
+ seen_text = false -- reset temporary
+ if t and t == 'text' then
+ seen_text = true
+ break
+ end
+ end
+ end
+ end
+ end
+ end
+
+ for _, part in ipairs(parts) do
+ check_part(part)
+ end
+
+ if seen_text and seen_encrypted then
+ return true, 1.0, opts
+ end
+
+ return false
+ end,
+ score = 10.0,
+ description = 'Bogus mix of encrypted and text/html payloads',
+ group = 'mime_types',
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_encrypted_name,
+ name = 'ENCRYPTED_PGP',
+ description = 'Message is encrypted with PGP',
+ group = 'mime_types',
+ score = -0.5,
+ one_shot = true
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_encrypted_name,
+ name = 'ENCRYPTED_SMIME',
+ description = 'Message is encrypted with S/MIME',
+ group = 'mime_types',
+ score = -0.5,
+ one_shot = true
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_encrypted_name,
+ name = 'SIGNED_PGP',
+ description = 'Message is signed with PGP',
+ group = 'mime_types',
+ score = -2.0,
+ one_shot = true
+}
+
+rspamd_config:register_symbol {
+ type = 'virtual',
+ parent = check_encrypted_name,
+ name = 'SIGNED_SMIME',
+ description = 'Message is signed with S/MIME',
+ group = 'mime_types',
+ score = -2.0,
+ one_shot = true
+}
+
+rspamd_config.COMPLETELY_EMPTY = {
+ callback = function(task)
+ return (task:get_size() == 0)
+ end,
+ flags = 'empty',
+ group = 'blankspam',
+ score = 15
+}
diff --git a/rules/parts.lua b/rules/parts.lua
new file mode 100644
index 0000000..2be9ff8
--- /dev/null
+++ b/rules/parts.lua
@@ -0,0 +1,11 @@
+rspamd_config.SINGLE_SHORT_PART = {
+ callback = function(task)
+ local parts = task:get_parts()
+ if #parts ~= 1 then return end
+ local text = parts[1]:get_text()
+ if not text then return end
+ if text:get_length() >= 64 then return end
+ return true
+ end,
+ score = 0.0,
+}
diff --git a/rules/regexp/compromised_hosts.lua b/rules/regexp/compromised_hosts.lua
new file mode 100644
index 0000000..e120b18
--- /dev/null
+++ b/rules/regexp/compromised_hosts.lua
@@ -0,0 +1,223 @@
+local reconf = config['regexp']
+local rspamd_regexp = require 'rspamd_regexp'
+local util = require 'rspamd_util'
+
+reconf['HAS_PHPMAILER_SIG'] = {
+ -- PHPMailer 6.0.0 and older used hex hash in boundary:
+ -- boundary="b1_2a45d5e29f78d3408e318878b049f474"
+ -- Since 6.0.1 it uses base64 (without =+/):
+ -- boundary="b1_uBN0UPD3n6RU04VPxI54tENiDgaCGoh15l9s73oFnlM"
+ -- boundary="b1_Ez5tmpb4bSqknyUZ1B1hIvLAfR1MlspDEKGioCOXc"
+ -- https://github.com/PHPMailer/PHPMailer/blob/v6.4.0/src/PHPMailer.php#L2660
+ re = [[X-Mailer=/^PHPMailer /H || Content-Type=/boundary="b1_[0-9a-zA-Z]+"/H]],
+ description = "PHPMailer signature",
+ group = "compromised_hosts"
+}
+
+reconf['PHP_SCRIPT_ROOT'] = {
+ re = "X-PHP-Originating-Script=/^0:/Hi",
+ description = "PHP Script executed by root UID",
+ score = 1.0,
+ group = "compromised_hosts"
+}
+
+reconf['HAS_X_POS'] = {
+ re = "header_exists('X-PHP-Originating-Script')",
+ description = "Has X-PHP-Originating-Script header",
+ group = "compromised_hosts"
+}
+
+reconf['HAS_X_PHP_SCRIPT'] = {
+ re = "header_exists('X-PHP-Script')",
+ description = "Has X-PHP-Script header",
+ group = "compromised_hosts"
+}
+
+-- X-Source:
+-- X-Source-Args: /usr/sbin/proxyexec -q -d -s /var/run/proxyexec/cagefs.sock/socket /bin/cagefs.server
+-- X-Source-Dir: silvianimberg.com:/public_html/wp-content/themes/ultimatum
+reconf['HAS_X_SOURCE'] = {
+ re = "header_exists('X-Source') || header_exists('X-Source-Args') || header_exists('X-Source-Dir')",
+ description = "Has X-Source headers",
+ group = "compromised_hosts"
+}
+
+-- X-Authenticated-Sender: accord.host-care.com: sales@cortaflex.si
+rspamd_config.HAS_X_AS = {
+ callback = function(task)
+ local xas = task:get_header('X-Authenticated-Sender')
+ if not xas then
+ return false
+ end
+ local _, _, auth = xas:find('[^:]+:%s(.+)$')
+ if auth then
+ -- TODO: see if we can parse an e-mail address from auth
+ -- and see if it matches the from address or not
+ return true, auth
+ else
+ return true
+ end
+ end,
+ description = 'Has X-Authenticated-Sender header',
+ group = "compromised_hosts",
+ score = 0.0
+}
+
+-- X-Get-Message-Sender-Via: accord.host-care.com: authenticated_id: sales@cortaflex.si
+rspamd_config.HAS_X_GMSV = {
+ callback = function(task)
+ local xgmsv = task:get_header('X-Get-Message-Sender-Via')
+ if not xgmsv then
+ return false
+ end
+ local _, _, auth = xgmsv:find('authenticated_id: (.+)$')
+ if auth then
+ -- TODO: see if we can parse an e-mail address from auth
+ -- and see if it matches the from address or not.
+ return true, auth
+ else
+ return true
+ end
+ end,
+ description = 'Has X-Get-Message-Sender-Via: header',
+ group = "compromised_hosts",
+ score = 0.0,
+}
+
+-- X-AntiAbuse: This header was added to track abuse, please include it with any abuse report
+-- X-AntiAbuse: Primary Hostname - accord.host-care.com
+-- X-AntiAbuse: Original Domain - swaney.com
+-- X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12]
+-- X-AntiAbuse: Sender Address Domain - dropbox.com
+reconf['HAS_X_ANTIABUSE'] = {
+ re = "header_exists('X-AntiAbuse')",
+ description = "Has X-AntiAbuse headers",
+ group = "compromised_hosts"
+}
+
+reconf['X_PHP_EVAL'] = {
+ re = [[X-PHP-Script=/eval\(\)'d code/H || X-PHP-Originating-Script=/eval\(\)'d code/H]],
+ description = "Message sent using eval'd PHP",
+ score = 4.0,
+ group = "compromised_hosts"
+}
+
+reconf['HAS_WP_URI'] = {
+ re = '/\\/wp-[^\\/]+\\//Ui',
+ description = "Contains WordPress URIs",
+ one_shot = true,
+ group = "compromised_hosts"
+}
+
+reconf['WP_COMPROMISED'] = {
+ re = '/\\/wp-(?:content|includes)[^\\/]+\\//Ui',
+ description = "URL that is pointing to a compromised WordPress installation",
+ one_shot = true,
+ group = "compromised_hosts"
+}
+
+reconf['PHP_XPS_PATTERN'] = {
+ re = 'X-PHP-Script=/^[^\\. ]+\\.[^\\.\\/ ]+\\/sendmail\\.php\\b/Hi',
+ description = "Message contains X-PHP-Script pattern",
+ group = "compromised_hosts"
+}
+
+reconf['HAS_XAW'] = {
+ re = "header_exists('X-Authentication-Warning')",
+ description = "Has X-Authentication-Warning header",
+ group = "compromised_hosts"
+}
+
+-- X-Authentication-Warning: localhost.localdomain: www-data set sender to info@globalstock.lv using -f
+reconf['XAW_SERVICE_ACCT'] = {
+ re = "X-Authentication-Warning=/\\b(?:www-data|anonymous|ftp|apache|nobody|guest|nginx|web|www) set sender to\\b/Hi",
+ description = "Message originally from a service account",
+ score = 1.0,
+ group = "compromised_hosts"
+}
+
+reconf['ENVFROM_SERVICE_ACCT'] = {
+ re = "check_smtp_data('from',/^(?:www-data|anonymous|ftp|apache|nobody|guest|nginx|web|www)@/i)",
+ description = "Envelope from is a service account",
+ score = 1.0,
+ group = "compromised_hosts"
+}
+
+reconf['HIDDEN_SOURCE_OBJ'] = {
+ re = "X-PHP-Script=/\\/\\..+/Hi || X-PHP-Originating-Script=/(?:^\\d+:|\\/)\\..+/Hi || X-Source-Args=/\\/\\..+/Hi",
+ description = "UNIX hidden file/directory in path",
+ score = 2.0,
+ group = "compromised_hosts"
+}
+
+local hidden_uri_re = rspamd_regexp.create_cached('/(?!\\/\\.well[-_]known\\/)(?:^\\.[A-Za-z0-9]|\\/' ..
+ '\\.[A-Za-z0-9]|\\/\\.\\.\\/)/i')
+rspamd_config.URI_HIDDEN_PATH = {
+ callback = function(task)
+ local urls = task:get_urls(false)
+ if (urls) then
+ for _, url in ipairs(urls) do
+ if (not (url:is_subject() and url:is_html_displayed())) then
+ local path = url:get_path()
+ if (hidden_uri_re:match(path)) then
+ -- TODO: need url:is_schemeless() to improve this
+ return true, 1.0, url:get_text()
+ end
+ end
+ end
+ end
+ end,
+ description = 'Message contains URI with a hidden path',
+ score = 1.0,
+ group = 'compromised_hosts',
+}
+
+reconf['MID_RHS_WWW'] = {
+ re = "Message-Id=/@www\\./Hi",
+ description = "Message-ID from www host",
+ score = 0.5,
+ group = "compromised_hosts"
+}
+
+rspamd_config.FROM_SERVICE_ACCT = {
+ callback = function(task)
+ local re = rspamd_regexp.create_cached('/^(?:www-data|anonymous|ftp|apache|nobody|guest|nginx|web|www)@/i');
+ -- From
+ local from = task:get_from(2)
+ if (from and from[1]) then
+ if (re:match(from[1].addr)) then
+ return true
+ end
+ end
+ -- Sender
+ local sender = task:get_header('Sender')
+ if sender then
+ local s = util.parse_mail_address(sender, task:get_mempool())
+ if (s and s[1]) then
+ if (re:match(s[1].addr)) then
+ return true
+ end
+ end
+ end
+ -- Reply-To
+ local replyto = task:get_header('Reply-To')
+ if replyto then
+ local rt = util.parse_mail_address(replyto, task:get_mempool())
+ if (rt and rt[1]) then
+ if (re:match(rt[1].addr)) then
+ return true
+ end
+ end
+ end
+ end,
+ description = "Sender/From/Reply-To is a service account",
+ score = 1.0,
+ group = "compromised_hosts"
+}
+
+reconf['WWW_DOT_DOMAIN'] = {
+ re = "From=/@www\\./Hi || Sender=/@www\\./Hi || Reply-To=/@www\\./Hi || check_smtp_data('from',/@www\\./i)",
+ description = "From/Sender/Reply-To or Envelope is @www.domain.com",
+ score = 0.5,
+ group = "compromised_hosts"
+}
diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua
new file mode 100644
index 0000000..0624997
--- /dev/null
+++ b/rules/regexp/headers.lua
@@ -0,0 +1,1046 @@
+-- Actually these regular expressions were obtained from SpamAssassin project, so they are licensed by apache license:
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to you under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at:
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+-- Definitions of header regexps
+
+local reconf = config['regexp']
+
+-- Subject needs encoding
+-- Define encodings types
+local subject_encoded_b64 = 'Subject=/=\\?\\S+\\?B\\?/iX'
+local subject_encoded_qp = 'Subject=/=\\?\\S+\\?Q\\?/iX'
+-- Define whether subject must be encoded (contains non-7bit characters)
+local subject_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X'
+-- Final rule
+reconf['SUBJECT_NEEDS_ENCODING'] = {
+ re = string.format('!(%s) & !(%s) & (%s)', subject_encoded_b64, subject_encoded_qp, subject_needs_mime),
+ score = 1.0,
+ mime_only = true,
+ description = 'Subject needs encoding',
+ group = 'headers'
+}
+
+local from_encoded_b64 = 'From=/=\\?\\S+\\?B\\?/iX'
+local from_encoded_qp = 'From=/=\\?\\S+\\?Q\\?/iX'
+local raw_from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X'
+reconf['FROM_NEEDS_ENCODING'] = {
+ re = string.format('!(%s) & !(%s) & (%s)', from_encoded_b64, from_encoded_qp, raw_from_needs_mime),
+ score = 1.0,
+ mime_only = true,
+ description = 'From header needs encoding',
+ group = 'headers'
+}
+
+local to_encoded_b64 = 'To=/=\\?\\S+\\?B\\?/iX'
+local to_encoded_qp = 'To=/=\\?\\S+\\?Q\\?/iX'
+local raw_to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/X'
+reconf['TO_NEEDS_ENCODING'] = {
+ re = string.format('!(%s) & !(%s) & (%s)', to_encoded_b64, to_encoded_qp, raw_to_needs_mime),
+ score = 1.0,
+ mime_only = true,
+ description = 'To header needs encoding',
+ group = 'headers'
+}
+
+-- Detects that there is no space in From header (e.g. Some Name<some@host>)
+reconf['R_NO_SPACE_IN_FROM'] = {
+ re = 'From=/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/X',
+ score = 1.0,
+ mime_only = true,
+ description = 'No space in From header',
+ group = 'headers'
+}
+
+reconf['TO_WRAPPED_IN_SPACES'] = {
+ re = [[To=/<\s[-.\w]+\@[-.\w]+\s>/X]],
+ score = 2.0,
+ mime_only = true,
+ description = 'To address is wrapped in spaces inside angle brackets (e.g. display-name < local-part@domain >)',
+ group = 'headers'
+}
+
+-- Detects missing Subject header
+reconf['MISSING_SUBJECT'] = {
+ re = '!raw_header_exists(Subject)',
+ score = 2.0,
+ mime_only = true,
+ description = 'Subject header is missing',
+ group = 'headers'
+}
+
+rspamd_config.EMPTY_SUBJECT = {
+ score = 1.0,
+ mime_only = true,
+ description = 'Subject header is empty',
+ group = 'headers',
+ callback = function(task)
+ local hdr = task:get_header('Subject')
+ if hdr and #hdr == 0 then
+ return true
+ end
+ return false
+ end
+}
+
+-- Detects missing To header
+reconf['MISSING_TO'] = {
+ re = '!raw_header_exists(To)',
+ score = 2.0,
+ description = 'To header is missing',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Detects undisclosed recipients
+reconf['R_UNDISC_RCPT'] = {
+ -- match:
+ -- To: undisclosed-recipients:;
+ -- To: Undisclosed recipients:;
+ -- To: undisclosed-recipients: ;
+ -- To: <Undisclosed-Recipient:;>
+ -- To: <"Undisclosed-Recipient:;">
+ -- To: "undisclosed-recipients (utajeni adresati)": ;
+ -- To: Undisclosed recipients:
+ -- but do not match:
+ -- Undisclosed Recipient <user@example.org>
+ re = [[To=/^<?"?undisclosed[- ]recipients?\b.*:/i{header}]],
+ score = 3.0,
+ description = 'Recipients are absent or undisclosed',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Detects missing Message-ID
+local has_mid = 'header_exists(Message-Id)'
+reconf['MISSING_MID'] = {
+ re = '!header_exists(Message-Id)',
+ score = 2.5,
+ description = 'Message-ID header is missing',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Received seems to be fake
+reconf['R_RCVD_SPAMBOTS'] = {
+ re = 'Received=/^from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by [-.\\w+]{5,255}; [SMTWF][a-z][a-z],' ..
+ ' [\\s\\d]?\\d [JFMAJSOND][a-z][a-z] \\d{4} \\d{2}:\\d{2}:\\d{2} [-+]\\d{4}$/mH',
+ score = 3.0,
+ description = 'Spambots signatures in received headers',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Charset is missing in message
+reconf['R_MISSING_CHARSET'] = {
+ re = string.format('!is_empty_body() & content_type_is_type(text) & content_type_is_subtype(plain) & !content_type_has_param(charset) & !%s',
+ 'compare_transfer_encoding(7bit)'),
+ score = 0.5,
+ description = 'Charset header is missing',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Find forged Outlook MUA
+-- Yahoo groups messages
+local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H'
+-- Outlook MUA
+local outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H'
+local any_outlook_mua = 'X-Mailer=/^Microsoft Outlook\\b/H'
+reconf['FORGED_OUTLOOK_HTML'] = {
+ re = string.format('!%s & %s & %s', yahoo_bulk, outlook_mua, 'has_only_html_part()'),
+ score = 5.0,
+ description = 'Forged Outlook HTML signature',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Recipients seems to be likely with each other (only works when recipients count is more than 5 recipients)
+reconf['SUSPICIOUS_RECIPS'] = {
+ re = 'compare_recipients_distance(0.65)',
+ score = 1.5,
+ description = 'Recipients seems to be autogenerated (works if recipients count is more than 5)',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Recipients list seems to be sorted
+reconf['SORTED_RECIPS'] = {
+ re = 'is_recipients_sorted()',
+ score = 3.5,
+ description = 'Recipients list seems to be sorted',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- Spam string at the end of message to make statistics faults
+reconf['TRACKER_ID'] = {
+ re = '/^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\\s*\\z/isPr',
+ score = 3.84,
+ description = 'Spam string at the end of message to make statistics fault',
+ group = 'headers',
+ mime_only = true,
+}
+
+-- From contains only 7bit characters (parsed headers are used)
+local from_needs_mime = 'From=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
+-- From that contains encoded characters while base 64 is not needed as all symbols are 7bit
+reconf['FROM_EXCESS_BASE64'] = {
+ re = string.format('%s & !%s', from_encoded_b64, from_needs_mime),
+ score = 1.5,
+ description = 'From header is unnecessarily encoded in base64',
+ group = 'excessb64',
+ mime_only = true,
+}
+
+-- From that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
+reconf['FROM_EXCESS_QP'] = {
+ re = string.format('%s & !%s', from_encoded_qp, from_needs_mime),
+ score = 1.2,
+ description = 'From header is unnecessarily encoded in quoted-printable',
+ group = 'excessqp'
+}
+
+-- To contains only 7bit characters (parsed headers are used)
+local to_needs_mime = 'To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
+-- To that contains encoded characters while base 64 is not needed as all symbols are 7bit
+reconf['TO_EXCESS_BASE64'] = {
+ re = string.format('%s & !%s', to_encoded_b64, to_needs_mime),
+ score = 1.5,
+ description = 'To header is unnecessarily encoded in base64',
+ group = 'excessb64'
+}
+
+-- To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
+-- Final rule
+reconf['TO_EXCESS_QP'] = {
+ re = string.format('%s & !%s', to_encoded_qp, to_needs_mime),
+ score = 1.2,
+ description = 'To header is unnecessarily encoded in quoted-printable',
+ group = 'excessqp'
+}
+
+-- Reply-To that contains encoded characters while base 64 is not needed as all symbols are 7bit
+-- Regexp that checks that Reply-To header is encoded with base64 (search in raw headers)
+local replyto_encoded_b64 = 'Reply-To=/\\=\\?\\S+\\?B\\?/iX'
+-- Reply-To contains only 7bit characters (parsed headers are used)
+local replyto_needs_mime = 'Reply-To=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
+-- Final rule
+reconf['REPLYTO_EXCESS_BASE64'] = {
+ re = string.format('%s & !%s', replyto_encoded_b64, replyto_needs_mime),
+ score = 1.5,
+ description = 'Reply-To header is unnecessarily encoded in base64',
+ group = 'excessb64'
+}
+
+-- Reply-To that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
+-- Regexp that checks that Reply-To header is encoded with quoted-printable (search in raw headers)
+local replyto_encoded_qp = 'Reply-To=/\\=\\?\\S+\\?Q\\?/iX'
+-- Final rule
+reconf['REPLYTO_EXCESS_QP'] = {
+ re = string.format('%s & !%s', replyto_encoded_qp, replyto_needs_mime),
+ score = 1.2,
+ description = 'Reply-To header is unnecessarily encoded in quoted-printable',
+ group = 'excessqp'
+}
+
+-- Cc that contains encoded characters while base 64 is not needed as all symbols are 7bit
+-- Regexp that checks that Cc header is encoded with base64 (search in raw headers)
+local cc_encoded_b64 = 'Cc=/\\=\\?\\S+\\?B\\?/iX'
+-- Co contains only 7bit characters (parsed headers are used)
+local cc_needs_mime = 'Cc=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
+-- Final rule
+reconf['CC_EXCESS_BASE64'] = {
+ re = string.format('%s & !%s', cc_encoded_b64, cc_needs_mime),
+ score = 1.5,
+ description = 'Cc header is unnecessarily encoded in base64',
+ group = 'excessb64'
+}
+
+-- Cc that contains encoded characters while quoted-printable is not needed as all symbols are 7bit
+-- Regexp that checks that Cc header is encoded with quoted-printable (search in raw headers)
+local cc_encoded_qp = 'Cc=/\\=\\?\\S+\\?Q\\?/iX'
+-- Final rule
+reconf['CC_EXCESS_QP'] = {
+ re = string.format('%s & !%s', cc_encoded_qp, cc_needs_mime),
+ score = 1.2,
+ description = 'Cc header is unnecessarily encoded in quoted-printable',
+ group = 'excessqp'
+}
+
+local subj_encoded_b64 = 'Subject=/\\=\\?\\S+\\?B\\?/iX'
+local subj_needs_mime = 'Subject=/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/Hr'
+reconf['SUBJ_EXCESS_BASE64'] = {
+ re = string.format('%s & !%s', subj_encoded_b64, subj_needs_mime),
+ score = 1.5,
+ description = 'Subject header is unnecessarily encoded in base64',
+ group = 'excessb64'
+}
+
+local subj_encoded_qp = 'Subject=/\\=\\?\\S+\\?Q\\?/iX'
+reconf['SUBJ_EXCESS_QP'] = {
+ re = string.format('%s & !%s', subj_encoded_qp, subj_needs_mime),
+ score = 1.2,
+ description = 'Subject header is unnecessarily encoded in quoted-printable',
+ group = 'excessqp'
+}
+
+-- Detect forged outlook headers
+-- OE X-Mailer header
+local oe_mua = 'X-Mailer=/\\bOutlook Express [456]\\./H'
+-- OE Message ID format
+local oe_msgid_1 = 'Message-Id=/^<?[A-Za-z0-9-]{7}[A-Za-z0-9]{20}\\@hotmail\\.com>?$/mH'
+local oe_msgid_2 = 'Message-Id=/^<?(?:[0-9a-f]{8}|[0-9a-f]{12})\\$[0-9a-f]{8}\\$[0-9a-f]{8}\\@\\S+>?$/H'
+-- EZLM remail of message
+local lyris_ezml_remailer = 'List-Unsubscribe=/<mailto:(?:leave-\\S+|\\S+-unsubscribe)\\@\\S+>$/H'
+-- Header of wacky sendmail
+local wacky_sendmail_version = 'Received=/\\/CWT\\/DCE\\)/H'
+-- Iplanet received header
+local iplanet_messaging_server = 'Received=/iPlanet Messaging Server/H'
+-- Hotmail message id
+local hotmail_baydav_msgid = 'Message-Id=/^<?BAY\\d+-DAV\\d+[A-Z0-9]{25}\\@phx\\.gbl?>$/H'
+-- Sympatico message id
+local sympatico_msgid = 'Message-Id=/^<?BAYC\\d+-PASMTP\\d+[A-Z0-9]{25}\\@CEZ\\.ICE>?$/H'
+-- Mailman message id
+-- https://bazaar.launchpad.net/~mailman-coders/mailman/2.1/view/head:/Mailman/Utils.py#L811
+local mailman_msgid = [[Message-ID=/^<mailman\.\d+\.\d+\.\d+\.[-+.:=\w]+@[-a-zA-Z\d.]+>$/H]]
+-- Message id seems to be forged
+local unusable_msgid = string.format('(%s | %s | %s | %s | %s | %s)',
+ lyris_ezml_remailer, wacky_sendmail_version,
+ iplanet_messaging_server, hotmail_baydav_msgid, sympatico_msgid, mailman_msgid)
+-- Outlook express data seems to be forged
+local forged_oe = string.format('(%s & !%s & !%s & !%s)', oe_mua, oe_msgid_1, oe_msgid_2, unusable_msgid)
+-- Outlook specific headers
+local outlook_dollars_mua = 'X-Mailer=/^Microsoft Outlook(?: 8| CWS, Build 9|, Build 10)\\./H'
+local outlook_dollars_other = 'Message-Id=/^<?\\!\\~\\!>?/H'
+local vista_msgid = 'Message-Id=/^<?[A-F\\d]{32}\\@\\S+>?$/H'
+local ims_msgid = 'Message-Id=/^<?[A-F\\d]{36,40}\\@\\S+>?$/H'
+-- Forged outlook headers
+local forged_outlook_dollars = string.format('(%s & !%s & !%s & !%s & !%s & !%s)',
+ outlook_dollars_mua, oe_msgid_2, outlook_dollars_other, vista_msgid, ims_msgid, unusable_msgid)
+-- Outlook versions that should be excluded from summary rule
+local fmo_excl_o3416 = 'X-Mailer=/^Microsoft Outlook, Build 10.0.3416$/H'
+local fmo_excl_oe3790 = 'X-Mailer=/^Microsoft Outlook Express 6.00.3790.3959$/H'
+-- Summary rule for forged outlook
+reconf['FORGED_MUA_OUTLOOK'] = {
+ re = string.format('(%s | %s) & !%s & !%s & !%s',
+ forged_oe, forged_outlook_dollars, fmo_excl_o3416, fmo_excl_oe3790, vista_msgid),
+ score = 3.0,
+ description = 'Forged Outlook MUA',
+ group = 'mua'
+}
+
+-- HTML outlook signs
+local mime_html = 'content_type_is_type(text) & content_type_is_subtype(/.?html/)'
+local tag_exists_html = 'has_html_tag(html)'
+local tag_exists_head = 'has_html_tag(head)'
+local tag_exists_meta = 'has_html_tag(meta)'
+local tag_exists_body = 'has_html_tag(body)'
+reconf['FORGED_OUTLOOK_TAGS'] = {
+ re = string.format('!%s & %s & %s & !(%s & %s & %s & %s)',
+ yahoo_bulk, any_outlook_mua, mime_html, tag_exists_html, tag_exists_head,
+ tag_exists_meta, tag_exists_body),
+ score = 2.1,
+ description = "Message pretends to be send from Outlook but has 'strange' tags",
+ group = 'headers'
+}
+
+-- Forged OE/MSO boundary
+reconf['SUSPICIOUS_BOUNDARY'] = {
+ re = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(00EBFFA4|0102FFA4|32C6FFA4|3302FFA4)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX',
+ score = 5.0,
+ description = 'Suspicious boundary in Content-Type header',
+ group = 'mua'
+}
+-- Forged OE/MSO boundary
+reconf['SUSPICIOUS_BOUNDARY2'] = {
+ re = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_(01C6527E)\\.[A-Z\\d]{8}"[\\r\\n]*$/siX',
+ score = 4.0,
+ description = 'Suspicious boundary in Content-Type header',
+ group = 'mua'
+}
+-- Forged OE/MSO boundary
+reconf['SUSPICIOUS_BOUNDARY3'] = {
+ re = 'Content-Type=/^\\s*multipart.+boundary="-----000-00\\d\\d-01C[\\dA-F]{5}-[\\dA-F]{8}"[\\r\\n]*$/siX',
+ score = 3.0,
+ description = 'Suspicious boundary in Content-Type header',
+ group = 'mua'
+}
+-- Forged OE/MSO boundary
+local suspicious_boundary_01C4 = 'Content-Type=/^\\s*multipart.+boundary="----=_NextPart_000_[A-Z\\d]{4}_01C4[\\dA-F]{4}\\.[A-Z\\d]{8}"[\\r\\n]*$/siX'
+local suspicious_boundary_01C4_date = 'Date=/^\\s*\\w\\w\\w,\\s+\\d+\\s+\\w\\w\\w 20(0[56789]|1\\d)/'
+reconf['SUSPICIOUS_BOUNDARY4'] = {
+ re = string.format('(%s) & (%s)', suspicious_boundary_01C4, suspicious_boundary_01C4_date),
+ score = 4.0,
+ description = 'Suspicious boundary in Content-Type header',
+ group = 'mua'
+}
+
+-- Detect forged The Bat! headers
+-- The Bat! X-Mailer header
+local thebat_mua_any = 'X-Mailer=/^\\s*The Bat!/H'
+-- The Bat! common Message-ID template
+local thebat_msgid_common = 'Message-ID=/^<?\\d+\\.\\d+\\@\\S+>?$/mH'
+-- Correct The Bat! Message-ID template
+local thebat_msgid = 'Message-ID=/^<?\\d+\\.(19[789]\\d|20\\d\\d)(0\\d|1[012])([012]\\d|3[01])([0-5]\\d)([0-5]\\d)([0-5]\\d)\\@\\S+>?/mH'
+-- Summary rule for forged The Bat! Message-ID header
+reconf['FORGED_MUA_THEBAT_MSGID'] = {
+ re = string.format('(%s) & !(%s) & (%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid),
+ score = 4.0,
+ description = 'Message pretends to be send from The Bat! but has forged Message-ID',
+ group = 'mua'
+}
+-- Summary rule for forged The Bat! Message-ID header with unknown template
+reconf['FORGED_MUA_THEBAT_MSGID_UNKNOWN'] = {
+ re = string.format('(%s) & !(%s) & !(%s) & !(%s)', thebat_mua_any, thebat_msgid, thebat_msgid_common, unusable_msgid),
+ score = 3.0,
+ description = 'Message pretends to be send from The Bat! but has forged Message-ID',
+ group = 'mua'
+}
+
+-- Detect forged KMail headers
+-- KMail User-Agent header
+local kmail_mua = 'User-Agent=/^\\s*KMail\\/1\\.\\d+\\.\\d+/H'
+-- KMail common Message-ID template
+local kmail_msgid_common = 'Message-Id=/^<?\\s*\\d+\\.\\d+\\.\\S+\\@\\S+>?$/mH'
+-- Summary rule for forged KMail Message-ID header with unknown template
+reconf['FORGED_MUA_KMAIL_MSGID_UNKNOWN'] = {
+ re = string.format('(%s) & !(%s) & !(%s)', kmail_mua, kmail_msgid_common, unusable_msgid),
+ score = 2.5,
+ description = 'Message pretends to be send from KMail but has forged Message-ID',
+ group = 'mua'
+}
+
+-- Detect forged Opera Mail headers
+-- Opera Mail User-Agent header
+local opera1x_mua = 'User-Agent=/^\\s*Opera Mail\\/1[01]\\.\\d+ /H'
+-- Opera Mail Message-ID template
+local opera1x_msgid = 'Message-ID=/^<?op\\.[a-z\\d]{14}\\@\\S+>?$/H'
+-- Rule for forged Opera Mail Message-ID header
+reconf['FORGED_MUA_OPERA_MSGID'] = {
+ re = string.format('(%s) & !(%s) & !(%s)', opera1x_mua, opera1x_msgid, unusable_msgid),
+ score = 4.0,
+ description = 'Message pretends to be send from Opera Mail but has forged Message-ID',
+ group = 'mua'
+}
+
+-- Detect forged Mozilla Mail/Thunderbird/Seamonkey/Postbox headers
+-- Mozilla based X-Mailer
+local user_agent_mozilla5 = 'User-Agent=/^\\s*Mozilla\\/5\\.0/H'
+local user_agent_thunderbird = 'User-Agent=/^\\s*(Thunderbird|Mozilla Thunderbird|Mozilla\\/.*Gecko\\/.*(Thunderbird|Betterbird|Icedove)\\/)/H'
+local user_agent_seamonkey = 'User-Agent=/^\\s*Mozilla\\/5\\.0\\s.+\\sSeaMonkey\\/\\d+\\.\\d+/H'
+local user_agent_postbox = [[User-Agent=/^\s*Mozilla\/5\.0\s\([^)]+\)\sGecko\/\d+\sPostboxApp\/\d+(?:\.\d+){2,3}$/H]]
+local user_agent_mozilla = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla5, user_agent_thunderbird,
+ user_agent_seamonkey, user_agent_postbox)
+-- Mozilla based common Message-ID template
+local mozilla_msgid_common = 'Message-ID=/^\\s*<[\\dA-F]{8}\\.\\d{1,7}\\@([^>\\.]+\\.)+[^>\\.]+>$/H'
+local mozilla_msgid_common_sec = 'Message-ID=/^\\s*<[\\da-f]{8}-([\\da-f]{4}-){3}[\\da-f]{12}\\@([^>\\.]+\\.)+[^>\\.]+>$/H'
+local mozilla_msgid = 'Message-ID=/^\\s*<(3[3-9A-F]|[4-9A-F][\\dA-F])[\\dA-F]{6}\\.(\\d0){1,4}\\d\\@([^>\\.]+\\.)+[^>\\.]+>$/H'
+-- Summary rule for forged Mozilla Mail Message-ID header
+reconf['FORGED_MUA_MOZILLA_MAIL_MSGID'] = {
+ re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid,
+ unusable_msgid),
+ score = 4.0,
+ description = 'Message pretends to be send from Mozilla Mail but has forged Message-ID',
+ group = 'mua'
+}
+reconf['FORGED_MUA_MOZILLA_MAIL_MSGID_UNKNOWN'] = {
+ re = string.format('(%s) & !(%s) & !(%s) & !(%s)', user_agent_mozilla, mozilla_msgid_common, mozilla_msgid,
+ unusable_msgid),
+ score = 2.5,
+ description = 'Message pretends to be send from Mozilla Mail but has forged Message-ID',
+ group = 'mua'
+}
+
+-- Summary rule for forged Thunderbird Message-ID header
+reconf['FORGED_MUA_THUNDERBIRD_MSGID'] = {
+ re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common, mozilla_msgid,
+ unusable_msgid),
+ score = 4.0,
+ description = 'Forged mail pretending to be from Mozilla Thunderbird but has forged Message-ID',
+ group = 'mua'
+}
+reconf['FORGED_MUA_THUNDERBIRD_MSGID_UNKNOWN'] = {
+ re = string.format('(%s) & !((%s) | (%s)) & !(%s) & !(%s)', user_agent_thunderbird, mozilla_msgid_common,
+ mozilla_msgid_common_sec, mozilla_msgid, unusable_msgid),
+ score = 2.5,
+ description = 'Forged mail pretending to be from Mozilla Thunderbird but has forged Message-ID',
+ group = 'mua'
+}
+-- Summary rule for forged Seamonkey Message-ID header
+reconf['FORGED_MUA_SEAMONKEY_MSGID'] = {
+ re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common, mozilla_msgid,
+ unusable_msgid),
+ score = 4.0,
+ description = 'Forged mail pretending to be from Mozilla Seamonkey but has forged Message-ID',
+ group = 'mua'
+}
+reconf['FORGED_MUA_SEAMONKEY_MSGID_UNKNOWN'] = {
+ re = string.format('(%s) & !((%s) | (%s)) & !(%s) & !(%s)', user_agent_seamonkey, mozilla_msgid_common,
+ mozilla_msgid_common_sec, mozilla_msgid, unusable_msgid),
+ score = 2.5,
+ description = 'Forged mail pretending to be from Mozilla Seamonkey but has forged Message-ID',
+ group = 'mua'
+}
+-- Summary rule for forged Postbox Message-ID header
+reconf['FORGED_MUA_POSTBOX_MSGID'] = {
+ re = string.format('(%s) & (%s) & !(%s) & !(%s)', user_agent_postbox, mozilla_msgid_common, mozilla_msgid,
+ unusable_msgid),
+ score = 4.0,
+ description = 'Forged mail pretending to be from Postbox but has forged Message-ID',
+ group = 'mua'
+}
+reconf['FORGED_MUA_POSTBOX_MSGID_UNKNOWN'] = {
+ re = string.format('(%s) & !((%s) | (%s)) & !(%s) & !(%s)', user_agent_postbox, mozilla_msgid_common,
+ mozilla_msgid_common_sec, mozilla_msgid, unusable_msgid),
+ score = 2.5,
+ description = 'Forged mail pretending to be from Postbox but has forged Message-ID',
+ group = 'mua'
+}
+
+-- Message id validity
+local sane_msgid = 'Message-Id=/^<?[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+\\@[^<>\\\\ \\t\\n\\r\\x0b\\x80-\\xff]+>?\\s*$/H'
+local msgid_comment = 'Message-Id=/\\(.*\\)/H'
+reconf['INVALID_MSGID'] = {
+ re = string.format('(%s) & !((%s) | (%s))', has_mid, sane_msgid, msgid_comment),
+ score = 1.7,
+ description = 'Message-ID header is incorrect',
+ group = 'headers'
+}
+
+-- Only Content-Type header without other MIME headers
+local cd = 'header_exists(Content-Disposition)'
+local cte = 'header_exists(Content-Transfer-Encoding)'
+local ct = 'header_exists(Content-Type)'
+local mime_version = 'raw_header_exists(MIME-Version)'
+local ct_text_plain = 'content_type_is_type(text) & content_type_is_subtype(plain)'
+reconf['MIME_HEADER_CTYPE_ONLY'] = {
+ re = string.format('!(%s) & !(%s) & (%s) & !(%s) & !(%s)', cd, cte, ct, mime_version, ct_text_plain),
+ score = 2.0,
+ description = 'Only Content-Type header without other MIME headers',
+ group = 'headers'
+}
+
+-- Forged Exchange messages
+local msgid_dollars_ok = 'Message-Id=/[0-9a-f]{4,}\\$[0-9a-f]{4,}\\$[0-9a-f]{4,}\\@\\S+/H'
+local mimeole_ms = 'X-MimeOLE=/^Produced By Microsoft MimeOLE/H'
+local rcvd_with_exchange = 'Received=/with Microsoft Exchange Server/H'
+reconf['RATWARE_MS_HASH'] = {
+ re = string.format('(%s) & !(%s) & !(%s)', msgid_dollars_ok, mimeole_ms, rcvd_with_exchange),
+ score = 2.0,
+ description = 'Forged Exchange messages',
+ group = 'headers'
+}
+
+-- Reply-type in content-type
+reconf['STOX_REPLY_TYPE'] = {
+ re = 'Content-Type=/text\\/plain; .* reply-type=original/H',
+ score = 1.0,
+ description = 'Reply-type in Content-Type header',
+ group = 'headers'
+}
+
+-- Forged yahoo msgid
+local at_yahoo_msgid = 'Message-Id=/\\@yahoo\\.com\\b/iH'
+local from_yahoo_com = 'From=/\\@yahoo\\.com\\b/iH'
+reconf['FORGED_MSGID_YAHOO'] = {
+ re = string.format('(%s) & !(%s)', at_yahoo_msgid, from_yahoo_com),
+ score = 2.0,
+ description = 'Forged Yahoo Message-ID header',
+ group = 'headers'
+}
+
+-- Forged The Bat! MUA headers
+local thebat_mua_v1 = 'X-Mailer=/^The Bat! \\(v1\\./H'
+local ctype_has_boundary = 'Content-Type=/boundary/iH'
+local bat_boundary = 'Content-Type=/boundary=\\"?-{10}/H'
+local mailman_21 = 'X-Mailman-Version=/\\d/H'
+reconf['FORGED_MUA_THEBAT_BOUN'] = {
+ re = string.format('(%s) & (%s) & !(%s) & !(%s)', thebat_mua_v1, ctype_has_boundary, bat_boundary, mailman_21),
+ score = 2.0,
+ description = 'Forged The Bat! MUA headers',
+ group = 'headers'
+}
+
+-- Detect Mail.Ru web-mail
+local xm_mail_ru_mailer_1_0 = 'X-Mailer=/^Mail\\.Ru Mailer 1\\.0$/H'
+local rcvd_e_mail_ru = 'Received=/^(?:from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] )?by e\\.mail\\.ru with HTTP;/mH'
+reconf['MAIL_RU_MAILER'] = {
+ re = string.format('(%s) & (%s)', xm_mail_ru_mailer_1_0, rcvd_e_mail_ru),
+ score = 0.0,
+ description = 'Sent with Mail.Ru webmail',
+ group = 'headers'
+}
+
+-- Detect yandex.ru web-mail
+local xm_yandex_ru_mailer_5_0 = 'X-Mailer=/^Yamail \\[ http:\\/\\/yandex\\.ru \\] 5\\.0$/H'
+local rcvd_web_yandex_ru = 'Received=/^by web\\d{1,2}[a-z]\\.yandex\\.ru with HTTP;/mH'
+reconf['YANDEX_RU_MAILER'] = {
+ re = string.format('(%s) & (%s)', xm_yandex_ru_mailer_5_0, rcvd_web_yandex_ru),
+ score = 0.0,
+ description = 'Sent with Yandex webmail',
+ group = 'headers'
+}
+
+-- Detect 1C v8.2 and v8.3 mailers
+reconf['MAILER_1C_8'] = {
+ re = 'X-Mailer=/^1C:Enterprise 8\\.[23]$/H',
+ score = 0.0,
+ description = 'Sent with 1C:Enterprise 8',
+ group = 'headers'
+}
+
+-- Detect rogue 'strongmail' MTA with IPv4 and '(-)' in Received line
+reconf['STRONGMAIL'] = {
+ re = [[Received=/^from\s+strongmail\s+\(\[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]\) by \S+ \(-\); /mH]],
+ score = 6.0,
+ description = 'Sent via rogue "strongmail" MTA',
+ group = 'headers'
+}
+
+-- Two received headers with ip addresses
+local double_ip_spam_1 = 'Received=/from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with/H'
+local double_ip_spam_2 = 'Received=/from\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\s+by\\s+\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3};/H'
+reconf['RCVD_DOUBLE_IP_SPAM'] = {
+ re = string.format('(%s) | (%s)', double_ip_spam_1, double_ip_spam_2),
+ score = 2.0,
+ description = 'Has two Received headers containing bare IP addresses',
+ group = 'headers'
+}
+
+-- Quoted reply-to from yahoo (seems to be forged)
+local repto_quote = 'Reply-To=/\\".*\\"\\s*\\</H'
+reconf['REPTO_QUOTE_YAHOO'] = {
+ re = string.format('(%s) & ((%s) | (%s))', repto_quote, from_yahoo_com, at_yahoo_msgid),
+ score = 2.0,
+ description = 'Quoted Reply-To header from Yahoo (seems to be forged)',
+ group = 'headers'
+}
+
+reconf['FAKE_REPLY'] = {
+ re = [[Subject=/^re:/i{header} & !(header_exists(In-Reply-To) | header_exists(References))]],
+ description = 'Fake reply',
+ score = 1.0,
+ group = 'headers'
+}
+
+-- Mime-OLE is needed but absent (e.g. fake Outlook or fake Exchange)
+local has_msmail_pri = 'header_exists(X-MSMail-Priority)'
+local has_mimeole = 'header_exists(X-MimeOLE)'
+local has_squirrelmail_in_mailer = 'X-Mailer=/SquirrelMail\\b/H'
+local has_office_version_in_mailer = [[X-Mailer=/^Microsoft (?:Office )?Outlook [12]\d\.0/]]
+local has_x_android_message_id = 'header_exists(X-Android-Message-Id)'
+reconf['MISSING_MIMEOLE'] = {
+ re = string.format('(%s) & !(%s) & !(%s) & !(%s) & !(%s)',
+ has_msmail_pri,
+ has_mimeole,
+ has_squirrelmail_in_mailer,
+ has_office_version_in_mailer,
+ has_x_android_message_id),
+ score = 2.0,
+ description = 'Mime-OLE is needed but absent (e.g. fake Outlook or fake Exchange)',
+ group = 'headers'
+}
+
+-- Empty delimiters between header names and header values
+local function gen_check_header_delimiter_empty(header_name)
+ return function(task)
+ for _, rh in ipairs(task:get_header_full(header_name) or {}) do
+ if rh['empty_separator'] then
+ return true
+ end
+ end
+ return false
+ end
+end
+reconf['HEADER_FROM_EMPTY_DELIMITER'] = {
+ re = string.format('(%s)', 'lua:check_from_delim_empty'),
+ score = 1.0,
+ description = 'From header has no delimiter between header name and header value',
+ group = 'headers',
+ functions = {
+ check_from_delim_empty = gen_check_header_delimiter_empty('From')
+ }
+}
+reconf['HEADER_TO_EMPTY_DELIMITER'] = {
+ re = string.format('(%s)', 'lua:check_to_delim_empty'),
+ score = 1.0,
+ description = 'To header has no delimiter between header name and header value',
+ group = 'headers',
+ functions = {
+ check_to_delim_empty = gen_check_header_delimiter_empty('To')
+ }
+}
+reconf['HEADER_CC_EMPTY_DELIMITER'] = {
+ re = string.format('(%s)', 'lua:check_cc_delim_empty'),
+ score = 1.0,
+ description = 'Cc header has no delimiter between header name and header value',
+ group = 'headers',
+ functions = {
+ check_cc_delim_empty = gen_check_header_delimiter_empty('Cc')
+ }
+}
+reconf['HEADER_REPLYTO_EMPTY_DELIMITER'] = {
+ re = string.format('(%s)', 'lua:check_repto_delim_empty'),
+ score = 1.0,
+ description = 'Reply-To header has no delimiter between header name and header value',
+ group = 'headers',
+ functions = {
+ check_repto_delim_empty = gen_check_header_delimiter_empty('Reply-To')
+ }
+}
+reconf['HEADER_DATE_EMPTY_DELIMITER'] = {
+ re = string.format('(%s)', 'lua:check_date_delim_empty'),
+ score = 1.0,
+ description = 'Date header has no delimiter between header name and header value',
+ group = 'headers',
+ functions = {
+ check_date_delim_empty = gen_check_header_delimiter_empty('Date')
+ }
+}
+
+-- Definitions of received headers regexp
+reconf['RCVD_ILLEGAL_CHARS'] = {
+ re = 'Received=/[\\x80-\\xff]/X',
+ score = 4.0,
+ description = 'Received header has raw illegal character',
+ group = 'headers'
+}
+
+local MAIL_RU_Return_Path = 'Return-path=/^\\s*<.+\\@mail\\.ru>$/iX'
+local MAIL_RU_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@mail\\.ru>$/iX'
+local MAIL_RU_From = 'From=/\\@mail\\.ru>?$/iX'
+local MAIL_RU_Received = 'Received=/from mail\\.ru \\(/mH'
+
+reconf['FAKE_RECEIVED_mail_ru'] = {
+ re = string.format('(%s) & !(((%s) | (%s)) & (%s))',
+ MAIL_RU_Received, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, MAIL_RU_From),
+ score = 4.0,
+ description = 'Fake HELO mail.ru in Received header from non-mail.ru sender address',
+ group = 'headers'
+}
+
+local GMAIL_COM_Return_Path = 'Return-path=/^\\s*<.+\\@gmail\\.com>$/iX'
+local GMAIL_COM_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@gmail\\.com>$/iX'
+local GMAIL_COM_From = 'From=/\\@gmail\\.com>?$/iX'
+
+local UKR_NET_Return_Path = 'Return-path=/^\\s*<.+\\@ukr\\.net>$/iX'
+local UKR_NET_X_Envelope_From = 'X-Envelope-From=/^\\s*<.+\\@ukr\\.net>$/iX'
+local UKR_NET_From = 'From=/\\@ukr\\.net>?$/iX'
+
+local RECEIVED_smtp_yandex_ru_1 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\((port=\\d+ )?helo=smtp\\.yandex\\.ru\\)/iX'
+local RECEIVED_smtp_yandex_ru_2 = 'Received=/from \\[UNAVAILABLE\\] \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX'
+local RECEIVED_smtp_yandex_ru_3 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]:\\d+ helo=smtp\\.yandex\\.ru\\)/iX'
+local RECEIVED_smtp_yandex_ru_4 = 'Received=/from \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] \\(account \\S+ HELO smtp\\.yandex\\.ru\\)/iX'
+local RECEIVED_smtp_yandex_ru_5 = 'Received=/from smtp\\.yandex\\.ru \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX'
+local RECEIVED_smtp_yandex_ru_6 = 'Received=/from smtp\\.yandex\\.ru \\(\\S+ \\[\\d+\\.\\d+\\.\\d+\\.\\d+\\]\\)/iX'
+local RECEIVED_smtp_yandex_ru_7 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\S+\\@\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX'
+local RECEIVED_smtp_yandex_ru_8 = 'Received=/from \\S+ \\(HELO smtp\\.yandex\\.ru\\) \\(\\d+\\.\\d+\\.\\d+\\.\\d+\\)/iX'
+local RECEIVED_smtp_yandex_ru_9 = 'Received=/from \\S+ \\(\\[\\d+\\.\\d+\\.\\d+\\.\\d+\\] helo=smtp\\.yandex\\.ru\\)/iX'
+
+reconf['FAKE_RECEIVED_smtp_yandex_ru'] = {
+ re = string.format('(((%s) & ((%s) | (%s))) | ((%s) & ((%s) | (%s))) ' ..
+ ' | ((%s) & ((%s) | (%s)))) & (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s) | (%s)',
+ MAIL_RU_From, MAIL_RU_Return_Path, MAIL_RU_X_Envelope_From, GMAIL_COM_From,
+ GMAIL_COM_Return_Path, GMAIL_COM_X_Envelope_From, UKR_NET_From, UKR_NET_Return_Path,
+ UKR_NET_X_Envelope_From, RECEIVED_smtp_yandex_ru_1, RECEIVED_smtp_yandex_ru_2,
+ RECEIVED_smtp_yandex_ru_3, RECEIVED_smtp_yandex_ru_4, RECEIVED_smtp_yandex_ru_5,
+ RECEIVED_smtp_yandex_ru_6, RECEIVED_smtp_yandex_ru_7, RECEIVED_smtp_yandex_ru_8,
+ RECEIVED_smtp_yandex_ru_9),
+ score = 4.0,
+ description = 'Fake smtp.yandex.ru Received header',
+ group = 'headers'
+}
+
+reconf['FORGED_GENERIC_RECEIVED'] = {
+ re = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by (([\\w\\d-]+\\.)+[a-zA-Z]{2,6}|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}); \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X',
+ score = 3.6,
+ description = 'Forged generic Received header',
+ group = 'headers'
+}
+
+reconf['FORGED_GENERIC_RECEIVED2'] = {
+ re = 'Received=/^\\s*(.+\\n)*from \\[\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\] by ([\\w\\d-]+\\.)+[a-z]{2,6} id [\\w\\d]{12}; \\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0/X',
+ score = 3.6,
+ description = 'Forged generic Received header',
+ group = 'headers'
+}
+
+reconf['FORGED_GENERIC_RECEIVED3'] = {
+ re = 'Received=/^\\s*(.+\\n)*by \\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3} with SMTP id [a-zA-Z]{14}\\.\\d{13};[\\r\\n\\s]*\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0 \\(GMT\\)/X',
+ score = 3.6,
+ description = 'Forged generic Received header',
+ group = 'headers'
+}
+
+reconf['FORGED_GENERIC_RECEIVED4'] = {
+ re = 'Received=/^\\s*(.+\\n)*from localhost by \\S+;\\s+\\w{3}, \\d+ \\w{3} 20\\d\\d \\d\\d\\:\\d\\d\\:\\d\\d [+-]\\d\\d\\d0[\\s\\r\\n]*$/X',
+ score = 3.6,
+ description = 'Forged generic Received header',
+ group = 'headers'
+}
+
+reconf['INVALID_POSTFIX_RECEIVED'] = {
+ re = 'Received=/ \\(Postfix\\) with ESMTP id [A-Z\\d]+([\\s\\r\\n]+for <\\S+?>)?;[\\s\\r\\n]*[A-Z][a-z]{2}, \\d{1,2} [A-Z][a-z]{2} \\d\\d\\d\\d \\d\\d:\\d\\d:\\d\\d [\\+\\-]\\d\\d\\d\\d$/X',
+ score = 3.0,
+ description = 'Invalid Postfix Received header',
+ group = 'headers'
+}
+
+reconf['X_PHP_FORGED_0X'] = {
+ re = "X-PHP-Originating-Script=/^0\\d/X",
+ score = 4.0,
+ description = "X-PHP-Originating-Script header appears forged",
+ group = 'headers'
+}
+
+reconf['GOOGLE_FORWARDING_MID_MISSING'] = {
+ re = "Message-ID=/SMTPIN_ADDED_MISSING\\@mx\\.google\\.com>$/X",
+ score = 2.5,
+ description = "Message was missing Message-ID pre-forwarding",
+ group = 'headers'
+}
+
+reconf['GOOGLE_FORWARDING_MID_BROKEN'] = {
+ re = "Message-ID=/SMTPIN_ADDED_BROKEN\\@mx\\.google\\.com>$/X",
+ score = 1.7,
+ description = "Message had invalid Message-ID pre-forwarding",
+ group = 'headers'
+}
+
+reconf['CTE_CASE'] = {
+ re = 'Content-Transfer-Encoding=/^[78]B/X',
+ description = '[78]Bit .vs. [78]bit',
+ score = 0.5,
+ group = 'headers'
+}
+
+reconf['HAS_INTERSPIRE_SIG'] = {
+ re = string.format('((%s) & (%s) & (%s) & (%s)) | (%s)',
+ 'header_exists(X-Mailer-LID)',
+ 'header_exists(X-Mailer-RecptId)',
+ 'header_exists(X-Mailer-SID)',
+ 'header_exists(X-Mailer-Sent-By)',
+ 'List-Unsubscribe=/\\/unsubscribe\\.php\\?M=[^&]+&C=[^&]+&L=[^&]+&N=[^>]+>$/Xi'),
+ description = "Has Interspire fingerprint",
+ score = 1.0,
+ group = 'headers'
+}
+
+reconf['CT_EXTRA_SEMI'] = {
+ re = 'Content-Type=/;$/X',
+ description = 'Content-Type header ends with a semi-colon',
+ score = 1.0,
+ group = 'headers'
+}
+
+reconf['SUBJECT_ENDS_EXCLAIM'] = {
+ re = 'Subject=/!\\s*$/H',
+ description = 'Subject ends with an exclamation mark',
+ score = 0.0,
+ group = 'headers'
+}
+
+reconf['SUBJECT_HAS_EXCLAIM'] = {
+ re = string.format('%s & !%s', 'Subject=/!/H', 'Subject=/!\\s*$/H'),
+ description = 'Subject contains an exclamation mark',
+ score = 0.0,
+ group = 'headers'
+}
+
+reconf['SUBJECT_ENDS_QUESTION'] = {
+ re = 'Subject=/\\?\\s*$/Hu',
+ description = 'Subject ends with a question mark',
+ score = 1.0,
+ group = 'headers'
+}
+
+reconf['SUBJECT_HAS_QUESTION'] = {
+ re = string.format('%s & !%s', 'Subject=/\\?/H', 'Subject=/\\?\\s*$/Hu'),
+ description = 'Subject contains a question mark',
+ score = 0.0,
+ group = 'headers'
+}
+
+reconf['SUBJECT_HAS_CURRENCY'] = {
+ re = 'Subject=/\\p{Sc}/Hu',
+ description = 'Subject contains currency',
+ score = 1.0,
+ group = 'headers'
+}
+
+reconf['SUBJECT_ENDS_SPACES'] = {
+ re = 'Subject=/\\s+$/H',
+ description = 'Subject ends with space characters',
+ score = 0.5,
+ group = 'headers'
+}
+
+reconf['HAS_ORG_HEADER'] = {
+ re = string.format('%s || %s', 'header_exists(Organization)', 'header_exists(Organisation)'),
+ description = 'Has Organization header',
+ score = 0.0,
+ group = 'headers'
+}
+
+reconf['X_PHPOS_FAKE'] = {
+ re = 'X-PHP-Originating-Script=/^\\d{7}:/Hi',
+ description = 'Fake X-PHP-Originating-Script header',
+ score = 3.0,
+ group = 'headers'
+}
+
+reconf['HAS_XOIP'] = {
+ re = "header_exists('X-Originating-IP')",
+ description = "Has X-Originating-IP header",
+ score = 0.0,
+ group = 'headers'
+}
+
+reconf['HAS_LIST_UNSUB'] = {
+ re = string.format('%s', 'header_exists(List-Unsubscribe)'),
+ description = 'Has List-Unsubscribe header',
+ score = -0.01,
+ group = 'headers'
+}
+
+reconf['HAS_GUC_PROXY_URI'] = {
+ re = '/\\.googleusercontent\\.com\\/proxy/{url}i',
+ description = 'Has googleusercontent.com proxy URL',
+ score = 1.0,
+ group = 'url'
+}
+
+reconf['HAS_GOOGLE_REDIR'] = {
+ re = '/\\.google\\.([a-z]{2,3}(|\\.[a-z]{2,3})|info|jobs)\\/(amp\\/s\\/|url\\?)/{url}i',
+ description = 'Has google.com/url or alike Google redirection URL',
+ score = 1.0,
+ group = 'url'
+}
+
+reconf['HAS_GOOGLE_FIREBASE_URL'] = {
+ re = '/\\.firebasestorage\\.googleapis\\.com\\//{url}i',
+ description = 'Contains firebasestorage.googleapis.com URL',
+ score = 2.0,
+ group = 'url'
+}
+
+reconf['XM_UA_NO_VERSION'] = {
+ re = string.format('(!%s && !%s) && (%s || %s)',
+ 'X-Mailer=/https?:/H',
+ 'User-Agent=/https?:/H',
+ 'X-Mailer=/^[^0-9]+$/H',
+ 'User-Agent=/^[^0-9]+$/H'),
+ description = 'X-Mailer/User-Agent header has no version number',
+ score = 0.01,
+ group = 'experimental'
+}
+
+-- Detects messages missing both X-Mailer and User-Agent header
+local has_ua = 'header_exists(User-Agent)'
+local has_xmailer = 'header_exists(X-Mailer)'
+reconf['MISSING_XM_UA'] = {
+ re = string.format('!%s && !%s', has_xmailer, has_ua),
+ score = 0.0,
+ description = 'Message has neither X-Mailer nor User-Agent header',
+ group = 'headers',
+}
+
+-- X-Mailer for old MUA versions which are forged by spammers
+local old_x_mailers = {
+ -- Outlook Express 6.0 was last included in Windows XP (EOL 2014). Windows
+ -- XP is still used (in 2020) by relatively small number of internet users,
+ -- but this header is widely abused by spammers.
+ 'Microsoft Outlook Express',
+ -- Qualcomm Eudora for Windows 7.1.0.9 was released in 2006
+ [[QUALCOMM Windows Eudora (Pro )?Version [1-6]\.]],
+ -- The Bat 3.0 was released in 2004
+ [[The Bat! \(v[12]\.]],
+ -- Can be found in public maillist archives, messages circa 2000
+ [[Microsoft Outlook IMO, Build 9\.0\.]],
+ -- Outlook 2002 (Office XP)
+ [[Microsoft Outlook, Build 10\.]],
+ -- Some old Apple iOS versions are used on old devices, match only very old
+ -- versions (iOS 4.3.5 buid 8L1 was supported until 2013) and less old
+ -- versions frequently seen in spam
+ [[i(Phone|Pad) Mail \((?:[1-8][A-L]|12H|13E)]],
+}
+
+reconf['OLD_X_MAILER'] = {
+ description = 'X-Mailer header has a very old MUA version',
+ re = string.format('X-Mailer=/^(?:%s)/{header}', table.concat(old_x_mailers, '|')),
+ score = 2.0,
+ group = 'headers',
+}
+
+-- Detect Apple Mail
+local apple_x_mailer = [[Apple Mail \((?:(?:Version )?[1-9]\d{0,2}\.\d{1,3}|[1-9]\d{0,2}\.\d{1,4}\.\d{1,4}\.\d{1,4})\)]]
+reconf['APPLE_MAILER'] = {
+ description = 'Sent with Apple Mail',
+ re = string.format('X-Mailer=/^%s/{header}', apple_x_mailer),
+ score = 0.0,
+ group = 'headers',
+}
+
+-- Detect Apple iPhone/iPad Mail
+-- Apple iPhone/iPad Mail X-Mailer contains iOS build number, e. g. 9B206, 16H5, 18G5023c
+-- https://en.wikipedia.org/wiki/IOS_version_history
+local apple_ios_x_mailer = [[i(?:Phone|Pad) Mail \(\d{1,2}[A-Z]\d{1,4}[a-z]?\)]]
+reconf['APPLE_IOS_MAILER'] = {
+ description = 'Sent with Apple iPhone/iPad Mail',
+ re = string.format('X-Mailer=/^%s/{header}', apple_ios_x_mailer),
+ score = 0.0,
+ group = 'headers',
+}
+
+-- X-Mailer header values which should not occur (in the modern mail) at all
+local bad_x_mailers = {
+ -- header name repeated in the header value
+ [[X-Mailer: ]],
+ -- Mozilla Thunderbird uses User-Agent header, not X-Mailer
+ -- Early Thunderbird had U-A like:
+ -- Mozilla Thunderbird 1.0.2 (Windows/20050317)
+ -- Thunderbird 2.0.0.23 (X11/20090812)
+ [[(?:Mozilla )?Thunderbird \d]],
+ -- Was used by Yahoo Groups in 2000s, no one expected to use this in 2020s
+ [[eGroups Message Poster]],
+ -- Regexp for genuine iOS X-Mailer is below, anything which doesn't match it,
+ -- but starts with 'iPhone Mail' or 'iPad Mail' is likely fake
+ [[i(?:Phone|Pad) Mail]],
+}
+
+reconf['FORGED_X_MAILER'] = {
+ description = 'Forged X-Mailer header',
+ re = string.format('X-Mailer=/^(?:%s)/{header} && !X-Mailer=/^%s/{header}',
+ table.concat(bad_x_mailers, '|'), apple_ios_x_mailer),
+ score = 4.5,
+ group = 'headers',
+}
+
+-- X-Mailer headers like: 'Internet Mail Service (5.5.2650.21)' are being
+-- forged by spammers, but MS Exchange 5.5 is still being used (in 2020) on
+-- some mail servers. Example of genuine headers (DC-EXMPL is a hostname which
+-- can be a FQDN):
+-- Received: by DC-EXMPL with Internet Mail Service (5.5.2656.59)
+-- id <HKH4BJQX>; Tue, 8 Dec 2020 07:10:54 -0600
+-- Message-ID: <E7209F9DB64FCC4BB1051420F0E955DD05C9D59F@DC-EXMPL>
+-- X-Mailer: Internet Mail Service (5.5.2656.59)
+reconf['FORGED_IMS'] = {
+ description = 'Forged X-Mailer: Internet Mail Service',
+ re = [[X-Mailer=/^Internet Mail Service \(5\./{header} & !Received=/^by \S+ with Internet Mail Service \(5\./{header}]],
+ score = 3.0,
+ group = 'headers',
+}
diff --git a/rules/regexp/misc.lua b/rules/regexp/misc.lua
new file mode 100644
index 0000000..d723f29
--- /dev/null
+++ b/rules/regexp/misc.lua
@@ -0,0 +1,117 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+
+local reconf = config['regexp']
+
+reconf['HTML_META_REFRESH_URL'] = {
+ -- Requires options { check_attachements = true; }
+ re = '/<meta\\s+http-equiv="refresh"\\s+content="\\d+\\s*;\\s*url=/{sa_raw_body}i',
+ description = "Has HTML Meta refresh URL",
+ score = 5.0,
+ one_shot = true,
+ group = 'HTML'
+}
+
+reconf['HAS_DATA_URI'] = {
+ -- Requires options { check_attachements = true; }
+ re = '/data:[^\\/]+\\/[^; ]+;base64,/{sa_raw_body}i',
+ description = "Has Data URI encoding",
+ group = 'HTML',
+ one_shot = true,
+}
+
+reconf['DATA_URI_OBFU'] = {
+ -- Requires options { check_attachements = true; }
+ re = '/data:text\\/(?:plain|html);base64,/{sa_raw_body}i',
+ description = "Uses Data URI encoding to obfuscate plain or HTML in base64",
+ group = 'HTML',
+ one_shot = true,
+ score = 2.0
+}
+
+reconf['INTRODUCTION'] = {
+ re = '/\\b(?:my name is\\b|(?:i am|this is)\\s+(?:mr|mrs|ms|miss|master|sir|prof(?:essor)?|d(?:octo)?r|rev(?:erend)?)(?:\\.|\\b))/{sa_body}i',
+ description = "Sender introduces themselves",
+ score = 2.0,
+ one_shot = true,
+ group = 'scams'
+}
+
+-- Message contains a link to a .onion URI (Tor hidden service)
+local onion_uri_v2 = '/[a-z0-9]{16}\\.onion?/{url}i'
+local onion_uri_v3 = '/[a-z0-9]{56}\\.onion?/{url}i'
+reconf['HAS_ONION_URI'] = {
+ re = string.format('(%s | %s)', onion_uri_v2, onion_uri_v3),
+ description = 'Contains .onion hidden service URI',
+ score = 0.0,
+ group = 'url'
+}
+
+local my_victim = [[/(?:victim|prey)/{words}]]
+local your_webcam = [[/webcam/{words}]]
+local your_onan = [[/(?:mast[ur]{2}bati(?:on|ng)|onanism|solitary)/{words}]]
+local password_in_words = [[/^pass(?:(?:word)|(?:phrase))$/i{words}]]
+local btc_wallet_address = [[has_symbol(BITCOIN_ADDR)]]
+local wallet_word = [[/^wallet$/{words}]]
+local broken_unicode = [[has_flag(bad_unicode)]]
+local list_unsub = [[header_exists(List-Unsubscribe)]]
+local x_php_origin = [[header_exists(X-PHP-Originating-Script)]]
+
+reconf['LEAKED_PASSWORD_SCAM_RE'] = {
+ re = string.format('%s & (%s | %s | %s | %s | %s | %s | %s | %s | %s)',
+ btc_wallet_address, password_in_words, wallet_word,
+ my_victim, your_webcam, your_onan,
+ broken_unicode, 'lua:check_data_images',
+ list_unsub, x_php_origin),
+ description = 'Contains BTC wallet address and malicious regexps',
+ functions = {
+ check_data_images = function(task)
+ local tp = task:get_text_parts() or {}
+
+ for _, p in ipairs(tp) do
+ if p:is_html() then
+ local hc = p:get_html()
+
+ if hc and hc:has_property('data_urls') then
+ return true
+ end
+ end
+ end
+
+ return false
+ end
+ },
+ score = 0.0,
+ group = 'scams'
+}
+
+rspamd_config:register_dependency('LEAKED_PASSWORD_SCAM', 'BITCOIN_ADDR')
+
+-- Heurististic for detecting InterPlanetary File System (IPFS) gateway URLs:
+-- These contain "ipfs" somewhere (either in the FQDN or the URL path) and a
+-- content identifier (CID), comprising of either "qm", followed by 44 alphanumerical
+-- characters (CIDv0), or a CIDv1 of an alphanumerical string of unspecified length,
+-- depending on the hash algorithm used, but starting with a multibase prefix.
+local ipfs_cid = '/(qm[a-z0-9]{44}|[079fvtbchkzmup][a-z0-9]{44,128})/{url}i'
+local ipfs_string = '/ipfs(\\.|-|_|\\/|\\?)/{url}i'
+reconf['HAS_IPFS_GATEWAY_URL'] = {
+ description = 'Message contains InterPlanetary File System (IPFS) gateway URL, likely malicious',
+ re = string.format('(%s & %s)', ipfs_cid, ipfs_string),
+ score = 6.0,
+ one_shot = true,
+ group = 'url',
+}
diff --git a/rules/regexp/upstream_spam_filters.lua b/rules/regexp/upstream_spam_filters.lua
new file mode 100644
index 0000000..b92f473
--- /dev/null
+++ b/rules/regexp/upstream_spam_filters.lua
@@ -0,0 +1,60 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- Rules for upstream services that have already run spam checks
+
+local reconf = config['regexp']
+
+reconf['PRECEDENCE_BULK'] = {
+ re = 'Precedence=/bulk/Hi',
+ score = 0.0,
+ description = "Message marked as bulk",
+ group = 'upstream_spam_filters'
+}
+
+reconf['MICROSOFT_SPAM'] = {
+ -- https://technet.microsoft.com/en-us/library/dn205071(v=exchg.150).aspx
+ re = 'X-Forefront-Antispam-Report=/SFV:SPM/H',
+ score = 4.0,
+ description = "Microsoft says the message is spam",
+ group = 'upstream_spam_filters'
+}
+
+reconf['KLMS_SPAM'] = {
+ re = 'X-KLMS-AntiSpam-Status=/^spam/H',
+ score = 5.0,
+ description = "Kaspersky Security for Mail Server says this message is spam",
+ group = 'upstream_spam_filters'
+}
+
+reconf['SPAM_FLAG'] = {
+ re = string.format('%s || %s || %s',
+ 'X-Spam-Flag=/^(?:yes|true)/Hi',
+ 'X-Spam=/^(?:yes|true)/Hi',
+ 'X-Spam-Status=/^(?:yes|true)/Hi'),
+ score = 5.0,
+ description = "Message was already marked as spam",
+ group = 'upstream_spam_filters'
+}
+
+reconf['UNITEDINTERNET_SPAM'] = {
+ re = string.format('%s || %s',
+ 'X-UI-Filterresults=/^junk:/H',
+ 'X-UI-Out-Filterresults=/^junk:/H'),
+ score = 5.0,
+ description = "United Internet says this message is spam",
+ group = 'upstream_spam_filters'
+}
diff --git a/rules/rspamd.lua b/rules/rspamd.lua
new file mode 100644
index 0000000..6b2c1a5
--- /dev/null
+++ b/rules/rspamd.lua
@@ -0,0 +1,71 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- This is main lua config file for rspamd
+
+require "global_functions"()
+
+config['regexp'] = {}
+rspamd_maps = {} -- Global maps
+
+local local_conf = rspamd_paths['LOCAL_CONFDIR']
+local local_rules = rspamd_paths['RULESDIR']
+local rspamd_util = require "rspamd_util"
+
+dofile(local_rules .. '/regexp/headers.lua')
+dofile(local_rules .. '/regexp/misc.lua')
+dofile(local_rules .. '/regexp/upstream_spam_filters.lua')
+dofile(local_rules .. '/regexp/compromised_hosts.lua')
+dofile(local_rules .. '/html.lua')
+dofile(local_rules .. '/headers_checks.lua')
+dofile(local_rules .. '/subject_checks.lua')
+dofile(local_rules .. '/misc.lua')
+dofile(local_rules .. '/forwarding.lua')
+dofile(local_rules .. '/mid.lua')
+dofile(local_rules .. '/parts.lua')
+dofile(local_rules .. '/bitcoin.lua')
+dofile(local_rules .. '/bounce.lua')
+dofile(local_rules .. '/content.lua')
+dofile(local_rules .. '/controller/init.lua')
+
+if rspamd_util.file_exists(local_conf .. '/rspamd.local.lua') then
+ dofile(local_conf .. '/rspamd.local.lua')
+else
+ -- Legacy lua/rspamd.local.lua
+ if rspamd_util.file_exists(local_conf .. '/lua/rspamd.local.lua') then
+ dofile(local_conf .. '/lua/rspamd.local.lua')
+ end
+end
+
+if rspamd_util.file_exists(local_conf .. '/local.d/rspamd.lua') then
+ dofile(local_conf .. '/local.d/rspamd.lua')
+end
+
+local rmaps = rspamd_config:get_all_opt("lua_maps")
+if rmaps and type(rmaps) == 'table' then
+ local rspamd_logger = require "rspamd_logger"
+ for k, v in pairs(rmaps) do
+ local status, map_or_err = pcall(function()
+ return rspamd_config:add_map(v)
+ end)
+
+ if not status then
+ rspamd_logger.errx(rspamd_config, "cannot add map %s: %s", k, map_or_err)
+ else
+ rspamd_maps[k] = map_or_err
+ end
+ end
+end
diff --git a/rules/subject_checks.lua b/rules/subject_checks.lua
new file mode 100644
index 0000000..f781e1d
--- /dev/null
+++ b/rules/subject_checks.lua
@@ -0,0 +1,70 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local rspamd_regexp = require "rspamd_regexp"
+local util = require "rspamd_util"
+
+-- Uncategorized rules
+local subject_re = rspamd_regexp.create('/^(?:(?:Re|Fwd|Fw|Aw|Antwort|Sv):\\s*)+(.+)$/i')
+
+local function test_subject(task, check_function, rate)
+ local function normalize_linear(a, x)
+ local f = a * x
+ return true, ((f < 1) and f or 1), tostring(x)
+ end
+
+ local sbj = task:get_header('Subject')
+
+ if sbj then
+ local stripped_subject = subject_re:search(sbj, false, true)
+ if stripped_subject and stripped_subject[1] and stripped_subject[1][2] then
+ sbj = stripped_subject[1][2]
+ end
+
+ local l = util.strlen_utf8(sbj)
+ if check_function(sbj, l) then
+ return normalize_linear(rate, l)
+ end
+ end
+
+ return false
+end
+
+rspamd_config.SUBJ_ALL_CAPS = {
+ callback = function(task)
+ local caps_test = function(sbj)
+ return util.is_uppercase(sbj)
+ end
+ return test_subject(task, caps_test, 1.0 / 40.0)
+ end,
+ score = 3.0,
+ group = 'subject',
+ type = 'mime',
+ description = 'Subject contains mostly capital letters'
+}
+
+rspamd_config.LONG_SUBJ = {
+ callback = function(task)
+ local length_test = function(_, len)
+ return len > 200
+ end
+ return test_subject(task, length_test, 1.0 / 400.0)
+ end,
+ score = 3.0,
+ group = 'subject',
+ type = 'mime',
+ description = 'Subject is very long'
+}