summaryrefslogtreecommitdiffstats
path: root/lualib/lua_selectors
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /lualib/lua_selectors
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lualib/lua_selectors')
-rw-r--r--lualib/lua_selectors/common.lua95
-rw-r--r--lualib/lua_selectors/extractors.lua565
-rw-r--r--lualib/lua_selectors/init.lua668
-rw-r--r--lualib/lua_selectors/maps.lua19
-rw-r--r--lualib/lua_selectors/transforms.lua571
5 files changed, 1918 insertions, 0 deletions
diff --git a/lualib/lua_selectors/common.lua b/lualib/lua_selectors/common.lua
new file mode 100644
index 0000000..7b2372d
--- /dev/null
+++ b/lualib/lua_selectors/common.lua
@@ -0,0 +1,95 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local ts = require("tableshape").types
+local exports = {}
+local cr_hash = require 'rspamd_cryptobox_hash'
+
+local blake2b_key = cr_hash.create_specific('blake2'):update('rspamd'):bin()
+
+local function digest_schema()
+ return { ts.one_of { 'hex', 'base32', 'bleach32', 'rbase32', 'base64' }:is_optional(),
+ ts.one_of { 'blake2', 'sha256', 'sha1', 'sha512', 'md5' }:is_optional() }
+end
+
+exports.digest_schema = digest_schema
+
+local function create_raw_digest(data, args)
+ local ht = args[2] or 'blake2'
+
+ local h
+
+ if ht == 'blake2' then
+ -- Hack to be compatible with various 'get_digest' methods
+ h = cr_hash.create_keyed(blake2b_key):update(data)
+ else
+ h = cr_hash.create_specific(ht):update(data)
+ end
+
+ return h
+end
+
+local function encode_digest(h, args)
+ local encoding = args[1] or 'hex'
+
+ local s
+ if encoding == 'hex' then
+ s = h:hex()
+ elseif encoding == 'base32' then
+ s = h:base32()
+ elseif encoding == 'bleach32' then
+ s = h:base32('bleach')
+ elseif encoding == 'rbase32' then
+ s = h:base32('rfc')
+ elseif encoding == 'base64' then
+ s = h:base64()
+ end
+
+ return s
+end
+
+local function create_digest(data, args)
+ local h = create_raw_digest(data, args)
+ return encode_digest(h, args)
+end
+
+local function get_cached_or_raw_digest(task, idx, mime_part, args)
+ if #args == 0 then
+ -- Optimise as we already have this hash in the API
+ return mime_part:get_digest()
+ end
+
+ local ht = args[2] or 'blake2'
+ local cache_key = 'mp_digest_' .. ht .. tostring(idx)
+
+ local cached = task:cache_get(cache_key)
+
+ if cached then
+ return encode_digest(cached, args)
+ end
+
+ local h = create_raw_digest(mime_part:get_content('raw_parsed'), args)
+ task:cache_set(cache_key, h)
+
+ return encode_digest(h, args)
+end
+
+exports.create_digest = create_digest
+exports.create_raw_digest = create_raw_digest
+exports.get_cached_or_raw_digest = get_cached_or_raw_digest
+exports.encode_digest = encode_digest
+
+return exports \ No newline at end of file
diff --git a/lualib/lua_selectors/extractors.lua b/lualib/lua_selectors/extractors.lua
new file mode 100644
index 0000000..81dfa9d
--- /dev/null
+++ b/lualib/lua_selectors/extractors.lua
@@ -0,0 +1,565 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local fun = require 'fun'
+local meta_functions = require "lua_meta"
+local lua_util = require "lua_util"
+local rspamd_url = require "rspamd_url"
+local common = require "lua_selectors/common"
+local ts = require("tableshape").types
+local maps = require "lua_selectors/maps"
+local E = {}
+local M = "selectors"
+
+local url_flags_ts = ts.array_of(ts.one_of(lua_util.keys(rspamd_url.flags))):is_optional()
+
+local function gen_exclude_flags_filter(exclude_flags)
+ return function(u)
+ local got_flags = u:get_flags()
+ for _, flag in ipairs(exclude_flags) do
+ if got_flags[flag] then
+ return false
+ end
+ end
+ return true
+ end
+end
+
+local extractors = {
+ -- Plain id function
+ ['id'] = {
+ ['get_value'] = function(_, args)
+ if args[1] then
+ return args[1], 'string'
+ end
+
+ return '', 'string'
+ end,
+ ['description'] = [[Return value from function's argument or an empty string,
+For example, `id('Something')` returns a string 'Something']],
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Similar but for making lists
+ ['list'] = {
+ ['get_value'] = function(_, args)
+ if args[1] then
+ return fun.map(tostring, args), 'string_list'
+ end
+
+ return {}, 'string_list'
+ end,
+ ['description'] = [[Return a list from function's arguments or an empty list,
+For example, `list('foo', 'bar')` returns a list {'foo', 'bar'}]],
+ },
+ -- Get source IP address
+ ['ip'] = {
+ ['get_value'] = function(task)
+ local ip = task:get_ip()
+ if ip and ip:is_valid() then
+ return ip, 'userdata'
+ end
+ return nil
+ end,
+ ['description'] = [[Get source IP address]],
+ },
+ -- Get MIME from
+ ['from'] = {
+ ['get_value'] = function(task, args)
+ local from
+ if type(args) == 'table' then
+ from = task:get_from(args)
+ else
+ from = task:get_from(0)
+ end
+ if ((from or E)[1] or E).addr then
+ return from[1], 'table'
+ end
+ return nil
+ end,
+ ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`,
+uses any type by default)]],
+ },
+ ['rcpts'] = {
+ ['get_value'] = function(task, args)
+ local rcpts
+ if type(args) == 'table' then
+ rcpts = task:get_recipients(args)
+ else
+ rcpts = task:get_recipients(0)
+ end
+ if ((rcpts or E)[1] or E).addr then
+ return rcpts, 'table_list'
+ end
+ return nil
+ end,
+ ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`,
+uses any type by default)]],
+ },
+ -- Get country (ASN module must be executed first)
+ ['country'] = {
+ ['get_value'] = function(task)
+ local country = task:get_mempool():get_variable('country')
+ if not country then
+ return nil
+ else
+ return country, 'string'
+ end
+ end,
+ ['description'] = [[Get country (ASN module must be executed first)]],
+ },
+ -- Get ASN number
+ ['asn'] = {
+ ['type'] = 'string',
+ ['get_value'] = function(task)
+ local asn = task:get_mempool():get_variable('asn')
+ if not asn then
+ return nil
+ else
+ return asn, 'string'
+ end
+ end,
+ ['description'] = [[Get AS number (ASN module must be executed first)]],
+ },
+ -- Get authenticated username
+ ['user'] = {
+ ['get_value'] = function(task)
+ local auser = task:get_user()
+ if not auser then
+ return nil
+ else
+ return auser, 'string'
+ end
+ end,
+ ['description'] = 'Get authenticated user name',
+ },
+ -- Get principal recipient
+ ['to'] = {
+ ['get_value'] = function(task)
+ return task:get_principal_recipient(), 'string'
+ end,
+ ['description'] = 'Get principal recipient',
+ },
+ -- Get content digest
+ ['digest'] = {
+ ['get_value'] = function(task)
+ return task:get_digest(), 'string'
+ end,
+ ['description'] = 'Get content digest',
+ },
+ -- Get list of all attachments digests
+ ['attachments'] = {
+ ['get_value'] = function(task, args)
+ local parts = task:get_parts() or E
+ local digests = {}
+ for i, p in ipairs(parts) do
+ if p:is_attachment() then
+ table.insert(digests, common.get_cached_or_raw_digest(task, i, p, args))
+ end
+ end
+
+ if #digests > 0 then
+ return digests, 'string_list'
+ end
+
+ return nil
+ end,
+ ['description'] = [[Get list of all attachments digests.
+The first optional argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`),
+the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
+ ['args_schema'] = common.digest_schema()
+
+ },
+ -- Get all attachments files
+ ['files'] = {
+ ['get_value'] = function(task)
+ local parts = task:get_parts() or E
+ local files = {}
+
+ for _, p in ipairs(parts) do
+ local fname = p:get_filename()
+ if fname then
+ table.insert(files, fname)
+ end
+ end
+
+ if #files > 0 then
+ return files, 'string_list'
+ end
+
+ return nil
+ end,
+ ['description'] = 'Get all attachments files',
+ },
+ -- Get languages for text parts
+ ['languages'] = {
+ ['get_value'] = function(task)
+ local text_parts = task:get_text_parts() or E
+ local languages = {}
+
+ for _, p in ipairs(text_parts) do
+ local lang = p:get_language()
+ if lang then
+ table.insert(languages, lang)
+ end
+ end
+
+ if #languages > 0 then
+ return languages, 'string_list'
+ end
+
+ return nil
+ end,
+ ['description'] = 'Get languages for text parts',
+ },
+ -- Get helo value
+ ['helo'] = {
+ ['get_value'] = function(task)
+ return task:get_helo(), 'string'
+ end,
+ ['description'] = 'Get helo value',
+ },
+ -- Get header with the name that is expected as an argument. Returns list of
+ -- headers with this name
+ ['header'] = {
+ ['get_value'] = function(task, args)
+ local strong = false
+ if args[2] then
+ if args[2]:match('strong') then
+ strong = true
+ end
+
+ if args[2]:match('full') then
+ return task:get_header_full(args[1], strong), 'table_list'
+ end
+
+ return task:get_header(args[1], strong), 'string'
+ else
+ return task:get_header(args[1]), 'string'
+ end
+ end,
+ ['description'] = [[Get header with the name that is expected as an argument.
+The optional second argument accepts list of flags:
+ - `full`: returns all headers with this name with all data (like task:get_header_full())
+ - `strong`: use case sensitive match when matching header's name]],
+ ['args_schema'] = { ts.string,
+ (ts.pattern("strong") + ts.pattern("full")):is_optional() }
+ },
+ -- Get list of received headers (returns list of tables)
+ ['received'] = {
+ ['get_value'] = function(task, args)
+ local rh = task:get_received_headers()
+ if not rh[1] then
+ return nil
+ end
+ if args[1] then
+ return fun.map(function(r)
+ return r[args[1]]
+ end, rh), 'string_list'
+ end
+
+ return rh, 'table_list'
+ end,
+ ['description'] = [[Get list of received headers.
+If no arguments specified, returns list of tables. Otherwise, selects a specific element,
+e.g. `by_hostname`]],
+ },
+ -- Get all urls
+ ['urls'] = {
+ ['get_value'] = function(task, args)
+ local urls = task:get_urls()
+ if not urls[1] then
+ return nil
+ end
+ if args[1] then
+ return fun.map(function(r)
+ return r[args[1]](r)
+ end, urls), 'string_list'
+ end
+ return urls, 'userdata_list'
+ end,
+ ['description'] = [[Get list of all urls.
+If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
+e.g. `get_tld`]],
+ },
+ -- Get specific urls
+ ['specific_urls'] = {
+ ['get_value'] = function(task, args)
+ local params = args[1] or {}
+ params.task = task
+ params.no_cache = true
+ if params.exclude_flags then
+ params.filter = gen_exclude_flags_filter(params.exclude_flags)
+ end
+ local urls = lua_util.extract_specific_urls(params)
+ if not urls[1] then
+ return nil
+ end
+ return urls, 'userdata_list'
+ end,
+ ['description'] = [[Get most specific urls. Arguments are equal to the Lua API function]],
+ ['args_schema'] = { ts.shape {
+ limit = ts.number + ts.string / tonumber,
+ esld_limit = (ts.number + ts.string / tonumber):is_optional(),
+ exclude_flags = url_flags_ts,
+ flags = url_flags_ts,
+ flags_mode = ts.one_of { 'explicit' }:is_optional(),
+ prefix = ts.string:is_optional(),
+ need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ } }
+ },
+ ['specific_urls_filter_map'] = {
+ ['get_value'] = function(task, args)
+ local map = maps[args[1]]
+ if not map then
+ lua_util.debugm(M, "invalid/unknown map: %s", args[1])
+ end
+ local params = args[2] or {}
+ params.task = task
+ params.no_cache = true
+ if params.exclude_flags then
+ params.filter = gen_exclude_flags_filter(params.exclude_flags)
+ end
+ local urls = lua_util.extract_specific_urls(params)
+ if not urls[1] then
+ return nil
+ end
+ return fun.filter(function(u)
+ return map:get_key(tostring(u))
+ end, urls), 'userdata_list'
+ end,
+ ['description'] = [[Get most specific urls, filtered by some map. Arguments are equal to the Lua API function]],
+ ['args_schema'] = { ts.string, ts.shape {
+ limit = ts.number + ts.string / tonumber,
+ esld_limit = (ts.number + ts.string / tonumber):is_optional(),
+ exclude_flags = url_flags_ts,
+ flags = url_flags_ts,
+ flags_mode = ts.one_of { 'explicit' }:is_optional(),
+ prefix = ts.string:is_optional(),
+ need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),
+ } }
+ },
+ -- URLs filtered by flags
+ ['urls_filtered'] = {
+ ['get_value'] = function(task, args)
+ local urls = task:get_urls_filtered(args[1], args[2])
+ if not urls[1] then
+ return nil
+ end
+ return urls, 'userdata_list'
+ end,
+ ['description'] = [[Get list of all urls filtered by flags_include/exclude
+(see rspamd_task:get_urls_filtered for description)]],
+ ['args_schema'] = { ts.array_of {
+ url_flags_ts:is_optional(), url_flags_ts:is_optional()
+ } }
+ },
+ -- Get all emails
+ ['emails'] = {
+ ['get_value'] = function(task, args)
+ local urls = task:get_emails()
+ if not urls[1] then
+ return nil
+ end
+ if args[1] then
+ return fun.map(function(r)
+ return r[args[1]](r)
+ end, urls), 'string_list'
+ end
+ return urls, 'userdata_list'
+ end,
+ ['description'] = [[Get list of all emails.
+If no arguments specified, returns list of url objects. Otherwise, calls a specific method,
+e.g. `get_user`]],
+ },
+ -- Get specific pool var. The first argument must be variable name,
+ -- the second argument is optional and defines the type (string by default)
+ ['pool_var'] = {
+ ['get_value'] = function(task, args)
+ local type = args[2] or 'string'
+ return task:get_mempool():get_variable(args[1], type), (type)
+ end,
+ ['description'] = [[Get specific pool var. The first argument must be variable name,
+the second argument is optional and defines the type (string by default)]],
+ ['args_schema'] = { ts.string, ts.string:is_optional() }
+ },
+ -- Get value of specific key from task cache
+ ['task_cache'] = {
+ ['get_value'] = function(task, args)
+ local val = task:cache_get(args[1])
+ if not val then
+ return
+ end
+ if type(val) == 'table' then
+ if not val[1] then
+ return
+ end
+ return val, 'string_list'
+ end
+ return val, 'string'
+ end,
+ ['description'] = [[Get value of specific key from task cache. The first argument must be
+the key name]],
+ ['args_schema'] = { ts.string }
+ },
+ -- Get specific HTTP request header. The first argument must be header name.
+ ['request_header'] = {
+ ['get_value'] = function(task, args)
+ local hdr = task:get_request_header(args[1])
+ if hdr then
+ return hdr, 'string'
+ end
+
+ return nil
+ end,
+ ['description'] = [[Get specific HTTP request header.
+The first argument must be header name.]],
+ ['args_schema'] = { ts.string }
+ },
+ -- Get task date, optionally formatted
+ ['time'] = {
+ ['get_value'] = function(task, args)
+ local what = args[1] or 'message'
+ local dt = task:get_date { format = what, gmt = true }
+
+ if dt then
+ if args[2] then
+ -- Should be in format !xxx, as dt is in GMT
+ return os.date(args[2], dt), 'string'
+ end
+
+ return tostring(dt), 'string'
+ end
+
+ return nil
+ end,
+ ['description'] = [[Get task timestamp. The first argument is type:
+ - `connect`: connection timestamp (default)
+ - `message`: timestamp as defined by `Date` header
+
+ The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
+ ['args_schema'] = { ts.one_of { 'connect', 'message' }:is_optional(),
+ ts.string:is_optional() }
+ },
+ -- Get text words from a message
+ ['words'] = {
+ ['get_value'] = function(task, args)
+ local how = args[1] or 'stem'
+ local tp = task:get_text_parts()
+
+ if tp then
+ local rtype = 'string_list'
+ if how == 'full' then
+ rtype = 'table_list'
+ end
+
+ return lua_util.flatten(
+ fun.map(function(p)
+ return p:get_words(how)
+ end, tp)), rtype
+ end
+
+ return nil
+ end,
+ ['description'] = [[Get words from text parts
+ - `stem`: stemmed words (default)
+ - `raw`: raw words
+ - `norm`: normalised words (lowercased)
+ - `full`: list of tables
+ ]],
+ ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional() },
+ },
+ -- Get queue ID
+ ['queueid'] = {
+ ['get_value'] = function(task)
+ local queueid = task:get_queue_id()
+ if queueid then
+ return queueid, 'string'
+ end
+ return nil
+ end,
+ ['description'] = [[Get queue ID]],
+ },
+ -- Get ID of the task being processed
+ ['uid'] = {
+ ['get_value'] = function(task)
+ local uid = task:get_uid()
+ if uid then
+ return uid, 'string'
+ end
+ return nil
+ end,
+ ['description'] = [[Get ID of the task being processed]],
+ },
+ -- Get message ID of the task being processed
+ ['messageid'] = {
+ ['get_value'] = function(task)
+ local mid = task:get_message_id()
+ if mid then
+ return mid, 'string'
+ end
+ return nil
+ end,
+ ['description'] = [[Get message ID]],
+ },
+ -- Get specific symbol
+ ['symbol'] = {
+ ['get_value'] = function(task, args)
+ local symbol = task:get_symbol(args[1], args[2])
+ if symbol then
+ return symbol[1], 'table'
+ end
+ end,
+ ['description'] = 'Get specific symbol. The first argument must be the symbol name. ' ..
+ 'The second argument is an optional shadow result name. ' ..
+ 'Returns the symbol table. See task:get_symbol()',
+ ['args_schema'] = { ts.string, ts.string:is_optional() }
+ },
+ -- Get full scan result
+ ['scan_result'] = {
+ ['get_value'] = function(task, args)
+ local res = task:get_metric_result(args[1])
+ if res then
+ return res, 'table'
+ end
+ end,
+ ['description'] = 'Get full scan result (either default or shadow if shadow result name is specified)' ..
+ 'Returns the result table. See task:get_metric_result()',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Get list of metatokens as strings
+ ['metatokens'] = {
+ ['get_value'] = function(task)
+ local tokens = meta_functions.gen_metatokens(task)
+ if not tokens[1] then
+ return nil
+ end
+ local res = {}
+ for _, t in ipairs(tokens) do
+ table.insert(res, tostring(t))
+ end
+ return res, 'string_list'
+ end,
+ ['description'] = 'Get metatokens for a message as strings',
+ },
+}
+
+return extractors
diff --git a/lualib/lua_selectors/init.lua b/lualib/lua_selectors/init.lua
new file mode 100644
index 0000000..5fcdb38
--- /dev/null
+++ b/lualib/lua_selectors/init.lua
@@ -0,0 +1,668 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+-- This module contains 'selectors' implementation: code to extract data
+-- from Rspamd tasks and compose those together
+--
+-- Read more at https://rspamd.com/doc/configuration/selectors.html
+
+--[[[
+-- @module lua_selectors
+-- This module contains 'selectors' implementation: code to extract data
+-- from Rspamd tasks and compose those together.
+-- Typical selector looks like this: header(User).lower.substring(1, 2):ip
+--]]
+
+local exports = {
+ maps = require "lua_selectors/maps"
+}
+
+local logger = require 'rspamd_logger'
+local fun = require 'fun'
+local lua_util = require "lua_util"
+local M = "selectors"
+local rspamd_text = require "rspamd_text"
+local unpack_function = table.unpack or unpack
+local E = {}
+
+local extractors = require "lua_selectors/extractors"
+local transform_function = require "lua_selectors/transforms"
+
+local text_cookie = rspamd_text.cookie
+
+local function pure_type(ltype)
+ return ltype:match('^(.*)_list$')
+end
+
+local function implicit_tostring(t, ud_or_table)
+ if t == 'table' then
+ -- Table (very special)
+ if ud_or_table.value then
+ return ud_or_table.value, 'string'
+ elseif ud_or_table.addr then
+ return ud_or_table.addr, 'string'
+ end
+
+ return logger.slog("%s", ud_or_table), 'string'
+ elseif (t == 'string' or t == 'text') and type(ud_or_table) == 'userdata' then
+ if ud_or_table.cookie and ud_or_table.cookie == text_cookie then
+ -- Preserve opaque
+ return ud_or_table, 'string'
+ else
+ return tostring(ud_or_table), 'string'
+ end
+ elseif t ~= 'nil' then
+ return tostring(ud_or_table), 'string'
+ end
+
+ return nil
+end
+
+local function process_selector(task, sel)
+ local function allowed_type(t)
+ if t == 'string' or t == 'string_list' then
+ return true
+ end
+
+ return false
+ end
+
+ local function list_type(t)
+ return pure_type(t)
+ end
+
+ local input, etype = sel.selector.get_value(task, sel.selector.args)
+
+ if not input then
+ lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name)
+ return nil
+ end
+
+ lua_util.debugm(M, task, 'extracted %s, type %s',
+ sel.selector.name, etype)
+
+ local pipe = sel.processor_pipe or E
+ local first_elt = pipe[1]
+
+ if first_elt and (first_elt.method or
+ fun.any(function(t)
+ return t == 'userdata' or t == 'table'
+ end, first_elt.types)) then
+ -- Explicit conversion
+ local meth = first_elt
+
+ if meth.types[etype] then
+ lua_util.debugm(M, task, 'apply method `%s` to %s',
+ meth.name, etype)
+ input, etype = meth.process(input, etype, meth.args)
+ else
+ local pt = pure_type(etype)
+
+ if meth.types[pt] then
+ lua_util.debugm(M, task, 'map method `%s` to list of %s',
+ meth.name, pt)
+ -- Map method to a list of inputs, excluding empty elements
+ -- We need to fold it down here to get a proper type resolution
+ input = fun.totable(fun.filter(function(map_elt, _)
+ return map_elt
+ end,
+ fun.map(function(list_elt)
+ local ret, ty = meth.process(list_elt, pt, meth.args)
+ if ret then
+ etype = ty
+ end
+ return ret
+ end, input)))
+ if input and etype then
+ etype = etype .. "_list"
+ else
+ input = nil
+ end
+ end
+ end
+ -- Remove method from the pipeline
+ pipe = fun.drop_n(1, pipe)
+ elseif etype:match('^userdata') or etype:match('^table') then
+ -- Implicit conversion
+ local pt = pure_type(etype)
+
+ if not pt then
+ lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype)
+ input = implicit_tostring(etype, input)
+ etype = 'string'
+ else
+ lua_util.debugm(M, task, 'apply implicit map %s->string', pt)
+ input = fun.filter(function(map_elt)
+ return map_elt
+ end,
+ fun.map(function(list_elt)
+ local ret = implicit_tostring(pt, list_elt)
+ return ret
+ end, input))
+ etype = 'string_list'
+ end
+ else
+ lua_util.debugm(M, task, 'avoid implicit conversion as the transformer accepts complex input')
+ end
+
+ -- Now we fold elements using left fold
+ local function fold_function(acc, x)
+ if acc == nil or acc[1] == nil then
+ lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name)
+ return nil
+ end
+
+ local value = acc[1]
+ local t = acc[2]
+
+ if not x.types[t] then
+ local pt = pure_type(t)
+
+ if pt and x.types['list'] then
+ -- Generic list processor
+ lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t)
+ return { x.process(value, t, x.args) }
+ elseif pt and x.map_type and x.types[pt] then
+ local map_type = x.map_type .. '_list'
+ lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s',
+ x.name, pt, map_type)
+ -- Apply map, filtering empty values
+ return {
+ fun.filter(function(map_elt)
+ return map_elt
+ end,
+ fun.map(function(list_elt)
+ if not list_elt then
+ return nil
+ end
+ local ret, _ = x.process(list_elt, pt, x.args)
+ return ret
+ end, value)),
+ map_type -- Returned type
+ }
+ end
+ logger.errx(task, 'cannot apply transform %s for type %s', x.name, t)
+ return nil
+ end
+
+ lua_util.debugm(M, task, 'apply %s to %s', x.name, t)
+ return { x.process(value, t, x.args) }
+ end
+
+ local res = fun.foldl(fold_function,
+ { input, etype },
+ pipe)
+
+ if not res or not res[1] then
+ return nil
+ end -- Pipeline failed
+
+ if not allowed_type(res[2]) then
+ -- Search for implicit conversion
+ local pt = pure_type(res[2])
+
+ if pt then
+ lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt)
+ res[1] = fun.map(function(e)
+ return implicit_tostring(pt, e)
+ end, res[1])
+ res[2] = 'string_list'
+ else
+ res[1] = implicit_tostring(res[2], res[1])
+ res[2] = 'string'
+ end
+ end
+
+ if list_type(res[2]) then
+ -- Convert to table as it might have a functional form
+ res[1] = fun.totable(res[1])
+ end
+
+ lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1])
+
+ return res[1]
+end
+
+local function make_grammar()
+ local l = require "lpeg"
+ local spc = l.S(" \t\n") ^ 0
+ local cont = l.R("\128\191") -- continuation byte
+ local utf8_high = l.R("\194\223") * cont
+ + l.R("\224\239") * cont * cont
+ + l.R("\240\244") * cont * cont * cont
+ local atom_start = (l.R("az") + l.R("AZ") + l.R("09") + utf8_high + l.S "-") ^ 1
+ local atom_end = (l.R("az") + l.R("AZ") + l.R("09") + l.S "-_" + utf8_high) ^ 1
+ local atom_mid = (1 - l.S("'\r\n\f\\,)(}{= " .. '"')) ^ 1
+ local atom_argument = l.C(atom_start * atom_mid ^ 0 * atom_end ^ 0) -- We allow more characters for the arguments
+ local atom = l.C(atom_start * atom_end ^ 0) -- We are more strict about selector names itself
+ local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P '\\' * 1)) ^ 0) * "'"
+ local doublequoted_string = l.P '"' * l.C(((1 - l.S '"\r\n\f\\') + (l.P '\\' * 1)) ^ 0) * '"'
+ local argument = atom_argument + singlequoted_string + doublequoted_string
+ local dot = l.P(".")
+ local semicolon = l.P(":")
+ local obrace = "(" * spc
+ local tbl_obrace = "{" * spc
+ local eqsign = spc * "=" * spc
+ local tbl_ebrace = spc * "}"
+ local ebrace = spc * ")"
+ local comma = spc * "," * spc
+ local sel_separator = spc * l.S ";*" * spc
+
+ return l.P {
+ "LIST";
+ LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR"))) ^ 0,
+ EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD")) ^ -1 * (dot * l.V("PROCESSOR")) ^ 0,
+ PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0),
+ FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0),
+ METHOD = l.Ct(atom / function(e)
+ return '__' .. e
+ end * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0),
+ ARG_LIST = l.Ct((l.V("ARG") * comma ^ 0) ^ 0),
+ ARG = l.Cf(tbl_obrace * l.V("NAMED_ARG") * tbl_ebrace, rawset) + argument + l.V("LIST_ARGS"),
+ NAMED_ARG = (l.Ct("") * l.Cg(argument * eqsign * (argument + l.V("LIST_ARGS")) * comma ^ 0) ^ 0),
+ LIST_ARGS = l.Ct(tbl_obrace * l.V("LIST_ARG") * tbl_ebrace),
+ LIST_ARG = l.Cg(argument * comma ^ 0) ^ 0,
+ }
+end
+
+local parser = make_grammar()
+
+--[[[
+-- @function lua_selectors.parse_selector(cfg, str)
+--]]
+exports.parse_selector = function(cfg, str)
+ local parsed = { parser:match(str) }
+ local output = {}
+
+ if not parsed or not parsed[1] then
+ return nil
+ end
+
+ local function check_args(name, schema, args)
+ if schema then
+ if getmetatable(schema) then
+ -- Schema covers all arguments
+ local res, err = schema:transform(args)
+ if not res then
+ logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err)
+ return false
+ else
+ for i, elt in ipairs(res) do
+ args[i] = elt
+ end
+ end
+ else
+ for i, selt in ipairs(schema) do
+ local res, err = selt:transform(args[i])
+
+ if err then
+ logger.errx(rspamd_config, 'invalid arguments for %s: argument number: %s, error: %s', name, i, err)
+ return false
+ else
+ args[i] = res
+ end
+ end
+ end
+ end
+
+ return true
+ end
+
+ -- Output AST format is the following:
+ -- table of individual selectors
+ -- each selector: list of functions
+ -- each function: function name + optional list of arguments
+ for _, sel in ipairs(parsed) do
+ local res = {
+ selector = {},
+ processor_pipe = {},
+ }
+
+ local selector_tbl = sel[1]
+ if not selector_tbl then
+ logger.errx(cfg, 'no selector represented')
+ return nil
+ end
+ if not extractors[selector_tbl[1]] then
+ logger.errx(cfg, 'selector %s is unknown', selector_tbl[1])
+ return nil
+ end
+
+ res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]])
+ res.selector.name = selector_tbl[1]
+ res.selector.args = selector_tbl[2] or E
+
+ if not check_args(res.selector.name,
+ res.selector.args_schema,
+ res.selector.args) then
+ return nil
+ end
+
+ lua_util.debugm(M, cfg, 'processed selector %s, args: %s',
+ res.selector.name, res.selector.args)
+
+ local pipeline_error = false
+ -- Now process processors pipe
+ fun.each(function(proc_tbl)
+ local proc_name = proc_tbl[1]
+
+ if proc_name:match('^__') then
+ -- Special case - method
+ local method_name = proc_name:match('^__(.*)$')
+ -- Check array indexing...
+ if tonumber(method_name) then
+ method_name = tonumber(method_name)
+ end
+ local processor = {
+ name = tostring(method_name),
+ method = true,
+ args = proc_tbl[2] or E,
+ types = {
+ userdata = true,
+ table = true,
+ string = true,
+ },
+ map_type = 'string',
+ process = function(inp, t, args)
+ local ret
+ if t == 'table' then
+ -- Plain table field
+ ret = inp[method_name]
+ else
+ -- We call method unpacking arguments and dropping all but the first result returned
+ ret = (inp[method_name](inp, unpack_function(args or E)))
+ end
+
+ local ret_type = type(ret)
+
+ if ret_type == 'nil' then
+ return nil
+ end
+ -- Now apply types heuristic
+ if ret_type == 'string' then
+ return ret, 'string'
+ elseif ret_type == 'table' then
+ -- TODO: we need to ensure that 1) table is numeric 2) table has merely strings
+ return ret, 'string_list'
+ else
+ return implicit_tostring(ret_type, ret)
+ end
+ end,
+ }
+ lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s',
+ proc_name, res.selector.name, processor.args)
+ table.insert(res.processor_pipe, processor)
+ else
+
+ if not transform_function[proc_name] then
+ logger.errx(cfg, 'processor %s is unknown', proc_name)
+ pipeline_error = proc_name
+ return nil
+ end
+ local processor = lua_util.shallowcopy(transform_function[proc_name])
+ processor.name = proc_name
+ processor.args = proc_tbl[2] or E
+
+ if not check_args(processor.name, processor.args_schema, processor.args) then
+ pipeline_error = 'args schema for ' .. proc_name
+ return nil
+ end
+
+ lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s',
+ proc_name, res.selector.name, processor.args)
+ table.insert(res.processor_pipe, processor)
+ end
+ end, fun.tail(sel))
+
+ if pipeline_error then
+ logger.errx(cfg, 'unknown or invalid processor used: "%s", exiting', pipeline_error)
+ return nil
+ end
+
+ table.insert(output, res)
+ end
+
+ return output
+end
+
+--[[[
+-- @function lua_selectors.register_extractor(cfg, name, selector)
+--]]
+exports.register_extractor = function(cfg, name, selector)
+ if selector.get_value then
+ if extractors[name] then
+ logger.warnx(cfg, 'redefining selector %s', name)
+ end
+ extractors[name] = selector
+
+ return true
+ end
+
+ logger.errx(cfg, 'bad selector %s', name)
+ return false
+end
+
+--[[[
+-- @function lua_selectors.register_transform(cfg, name, transform)
+--]]
+exports.register_transform = function(cfg, name, transform)
+ if transform.process and transform.types then
+ if transform_function[name] then
+ logger.warnx(cfg, 'redefining transform function %s', name)
+ end
+ transform_function[name] = transform
+
+ return true
+ end
+
+ logger.errx(cfg, 'bad transform function %s', name)
+ return false
+end
+
+--[[[
+-- @function lua_selectors.process_selectors(task, selectors_pipe)
+--]]
+exports.process_selectors = function(task, selectors_pipe)
+ local ret = {}
+
+ for _, sel in ipairs(selectors_pipe) do
+ local r = process_selector(task, sel)
+
+ -- If any element is nil, then the whole selector is nil
+ if not r then
+ return nil
+ end
+ table.insert(ret, r)
+ end
+
+ return ret
+end
+
+--[[[
+-- @function lua_selectors.combine_selectors(task, selectors, delimiter)
+--]]
+exports.combine_selectors = function(_, selectors, delimiter)
+ if not delimiter then
+ delimiter = ''
+ end
+
+ if not selectors then
+ return nil
+ end
+
+ local have_tables, have_userdata
+
+ for _, s in ipairs(selectors) do
+ if type(s) == 'table' then
+ have_tables = true
+ elseif type(s) == 'userdata' then
+ have_userdata = true
+ end
+ end
+
+ if not have_tables then
+ if not have_userdata then
+ return table.concat(selectors, delimiter)
+ else
+ return rspamd_text.fromtable(selectors, delimiter)
+ end
+ else
+ -- We need to do a spill on each table selector and make a cortesian product
+ -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ...
+ local tbl = {}
+ local res = {}
+
+ for i, s in ipairs(selectors) do
+ if type(s) == 'string' then
+ rawset(tbl, i, fun.duplicate(s))
+ elseif type(s) == 'userdata' then
+ rawset(tbl, i, fun.duplicate(tostring(s)))
+ else
+ -- Raw table
+ rawset(tbl, i, fun.map(tostring, s))
+ end
+ end
+
+ fun.each(function(...)
+ table.insert(res, table.concat({ ... }, delimiter))
+ end, fun.zip(lua_util.unpack(tbl)))
+
+ return res
+ end
+end
+
+--[[[
+-- @function lua_selectors.flatten_selectors(selectors)
+-- Convert selectors to a flat table of elements
+--]]
+exports.flatten_selectors = function(_, selectors, _)
+ local res = {}
+
+ local function fill(tbl)
+ for _, s in ipairs(tbl) do
+ if type(s) == 'string' then
+ rawset(res, #res + 1, s)
+ elseif type(s) == 'userdata' then
+ rawset(res, #res + 1, tostring(s))
+ else
+ fill(s)
+ end
+ end
+ end
+
+ fill(selectors)
+
+ return res
+end
+
+--[[[
+-- @function lua_selectors.kv_table_from_pairs(selectors)
+-- Convert selectors to a table where the odd elements are keys and even are elements
+-- Similarly to make a map from (k, v) pairs list
+-- To specify the concrete constant keys, one can use the `id` extractor
+--]]
+exports.kv_table_from_pairs = function(log_obj, selectors, _)
+ local res = {}
+ local rspamd_logger = require "rspamd_logger"
+
+ local function fill(tbl)
+ local tbl_len = #tbl
+ if tbl_len % 2 ~= 0 or tbl_len == 0 then
+ rspamd_logger.errx(log_obj, "invalid invocation of the `kv_table_from_pairs`: table length is invalid %s",
+ tbl_len)
+ return
+ end
+ for i = 1, tbl_len, 2 do
+ local k = tostring(tbl[i])
+ local v = tbl[i + 1]
+ if type(v) == 'string' then
+ res[k] = v
+ elseif type(v) == 'userdata' then
+ res[k] = tostring(v)
+ else
+ res[k] = fun.totable(fun.map(function(elt)
+ return tostring(elt)
+ end, v))
+ end
+ end
+ end
+
+ fill(selectors)
+
+ return res
+end
+
+
+--[[[
+-- @function lua_selectors.create_closure(log_obj, cfg, selector_str, delimiter, fn)
+-- Creates a closure from a string selector, using the specific combinator function
+--]]
+exports.create_selector_closure_fn = function(log_obj, cfg, selector_str, delimiter, fn)
+ local selector = exports.parse_selector(cfg, selector_str)
+
+ if not selector then
+ return nil
+ end
+
+ return function(task)
+ local res = exports.process_selectors(task, selector)
+
+ if res then
+ return fn(log_obj, res, delimiter)
+ end
+
+ return nil
+ end
+end
+
+--[[[
+-- @function lua_selectors.create_closure(cfg, selector_str, delimiter='', flatten=false)
+-- Creates a closure from a string selector
+--]]
+exports.create_selector_closure = function(cfg, selector_str, delimiter, flatten)
+ local combinator_fn = flatten and exports.flatten_selectors or exports.combine_selectors
+
+ return exports.create_selector_closure_fn(nil, cfg, selector_str, delimiter, combinator_fn)
+end
+
+local function display_selectors(tbl)
+ return fun.tomap(fun.map(function(k, v)
+ return k, fun.tomap(fun.filter(function(kk, vv)
+ return type(vv) ~= 'function'
+ end, v))
+ end, tbl))
+end
+
+exports.list_extractors = function()
+ return display_selectors(extractors)
+end
+
+exports.list_transforms = function()
+ return display_selectors(transform_function)
+end
+
+exports.add_map = function(name, map)
+ if not exports.maps[name] then
+ exports.maps[name] = map
+ else
+ logger.errx(rspamd_config, "duplicate map redefinition for the selectors: %s", name)
+ end
+end
+
+-- Publish log target
+exports.M = M
+
+return exports
diff --git a/lualib/lua_selectors/maps.lua b/lualib/lua_selectors/maps.lua
new file mode 100644
index 0000000..85b54a6
--- /dev/null
+++ b/lualib/lua_selectors/maps.lua
@@ -0,0 +1,19 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local maps = {} -- Shared within selectors, indexed by name
+
+return maps \ No newline at end of file
diff --git a/lualib/lua_selectors/transforms.lua b/lualib/lua_selectors/transforms.lua
new file mode 100644
index 0000000..6c6bc71
--- /dev/null
+++ b/lualib/lua_selectors/transforms.lua
@@ -0,0 +1,571 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local fun = require 'fun'
+local lua_util = require "lua_util"
+local rspamd_util = require "rspamd_util"
+local ts = require("tableshape").types
+local logger = require 'rspamd_logger'
+local common = require "lua_selectors/common"
+local M = "selectors"
+
+local maps = require "lua_selectors/maps"
+
+local function pure_type(ltype)
+ return ltype:match('^(.*)_list$')
+end
+
+local transform_function = {
+ -- Returns the lowercased string
+ ['lower'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _)
+ return inp:lower(), 'string'
+ end,
+ ['description'] = 'Returns the lowercased string',
+ },
+ -- Returns the lowercased utf8 string
+ ['lower_utf8'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t)
+ return rspamd_util.lower_utf8(inp), t
+ end,
+ ['description'] = 'Returns the lowercased utf8 string',
+ },
+ -- Returns the first element
+ ['first'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t)
+ return fun.head(inp), pure_type(t)
+ end,
+ ['description'] = 'Returns the first element',
+ },
+ -- Returns the last element
+ ['last'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t)
+ return fun.nth(fun.length(inp), inp), pure_type(t)
+ end,
+ ['description'] = 'Returns the last element',
+ },
+ -- Returns the nth element
+ ['nth'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t, args)
+ return fun.nth(args[1] or 1, inp), pure_type(t)
+ end,
+ ['description'] = 'Returns the nth element',
+ ['args_schema'] = { ts.number + ts.string / tonumber }
+ },
+ ['take_n'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t, args)
+ return fun.take_n(args[1] or 1, inp), t
+ end,
+ ['description'] = 'Returns the n first elements',
+ ['args_schema'] = { ts.number + ts.string / tonumber }
+ },
+ ['drop_n'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t, args)
+ return fun.drop_n(args[1] or 1, inp), t
+ end,
+ ['description'] = 'Returns list without the first n elements',
+ ['args_schema'] = { ts.number + ts.string / tonumber }
+ },
+ -- Joins strings into a single string using separator in the argument
+ ['join'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['process'] = function(inp, _, args)
+ return table.concat(fun.totable(inp), args[1] or ''), 'string'
+ end,
+ ['description'] = 'Joins strings into a single string using separator in the argument',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Joins strings into a set of strings using N elements and a separator in the argument
+ ['join_nth'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['process'] = function(inp, _, args)
+ local step = args[1]
+ local sep = args[2] or ''
+ local inp_t = fun.totable(inp)
+ local res = {}
+
+ for i = 1, #inp_t, step do
+ table.insert(res, table.concat(inp_t, sep, i, i + step))
+ end
+ return res, 'string_list'
+ end,
+ ['description'] = 'Joins strings into a set of strings using N elements and a separator in the argument',
+ ['args_schema'] = { ts.number + ts.string / tonumber, ts.string:is_optional() }
+ },
+ -- Joins tables into a table of strings
+ ['join_tables'] = {
+ ['types'] = {
+ ['list'] = true
+ },
+ ['process'] = function(inp, _, args)
+ local sep = args[1] or ''
+ return fun.map(function(t)
+ return table.concat(t, sep)
+ end, inp), 'string_list'
+ end,
+ ['description'] = 'Joins tables into a table of strings',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Sort strings
+ ['sort'] = {
+ ['types'] = {
+ ['list'] = true
+ },
+ ['process'] = function(inp, t, _)
+ table.sort(inp)
+ return inp, t
+ end,
+ ['description'] = 'Sort strings lexicographically',
+ },
+ -- Return unique elements based on hashing (can work without sorting)
+ ['uniq'] = {
+ ['types'] = {
+ ['list'] = true
+ },
+ ['process'] = function(inp, t, _)
+ local tmp = {}
+ fun.each(function(val)
+ tmp[val] = true
+ end, inp)
+
+ return fun.map(function(k, _)
+ return k
+ end, tmp), t
+ end,
+ ['description'] = 'Returns a list of unique elements (using a hash table)',
+ },
+ -- Create a digest from string or a list of strings
+ ['digest'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ return common.create_digest(inp, args), 'string'
+ end,
+ ['description'] = [[Create a digest from a string.
+The first argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`),
+the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
+ ['args_schema'] = common.digest_schema()
+ },
+ -- Extracts substring
+ ['substring'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local start_pos = args[1] or 1
+ local end_pos = args[2] or -1
+
+ return inp:sub(start_pos, end_pos), 'string'
+ end,
+ ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)',
+ ['args_schema'] = { (ts.number + ts.string / tonumber):is_optional(),
+ (ts.number + ts.string / tonumber):is_optional() }
+ },
+ -- Prepends a string or a strings list
+ ['prepend'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local prepend = table.concat(args, '')
+
+ return prepend .. inp, 'string'
+ end,
+ ['description'] = 'Prepends a string or a strings list',
+ },
+ -- Appends a string or a strings list
+ ['append'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local append = table.concat(args, '')
+
+ return inp .. append, 'string'
+ end,
+ ['description'] = 'Appends a string or a strings list',
+ },
+ -- Regexp matching
+ ['regexp'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local rspamd_regexp = require "rspamd_regexp"
+
+ local re = rspamd_regexp.create_cached(args[1])
+
+ if not re then
+ logger.errx('invalid regexp: %s', args[1])
+ return nil
+ end
+
+ local res = re:search(inp, false, true)
+
+ if res then
+ -- Map all results in a single list
+ local flattened_table = {}
+ local function flatten_table(tbl)
+ for _, v in ipairs(tbl) do
+ if type(v) == 'table' then
+ flatten_table(v)
+ else
+ table.insert(flattened_table, v)
+ end
+ end
+ end
+ flatten_table(res)
+ return flattened_table, 'string_list'
+ end
+
+ return nil
+ end,
+ ['description'] = 'Regexp matching, returns all matches flattened in a single list',
+ ['args_schema'] = { ts.string }
+ },
+ -- Returns a value if it exists in some map (or acts like a `filter` function)
+ ['filter_map'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ local map = maps[args[1]]
+
+ if not map then
+ logger.errx('invalid map name: %s', args[1])
+ return nil
+ end
+
+ local res = map:get_key(inp)
+
+ if res then
+ return inp, t
+ end
+
+ return nil
+ end,
+ ['description'] = 'Returns a value if it exists in some map (or acts like a `filter` function)',
+ ['args_schema'] = { ts.string }
+ },
+ -- Returns a value if it exists in some map (or acts like a `filter` function)
+ ['except_map'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ local map = maps[args[1]]
+
+ if not map then
+ logger.errx('invalid map name: %s', args[1])
+ return nil
+ end
+
+ local res = map:get_key(inp)
+
+ if not res then
+ return inp, t
+ end
+
+ return nil
+ end,
+ ['description'] = 'Returns a value if it does not exists in some map (or acts like a `except` function)',
+ ['args_schema'] = { ts.string }
+ },
+ -- Returns a value from some map corresponding to some key (or acts like a `map` function)
+ ['apply_map'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ local map = maps[args[1]]
+
+ if not map then
+ logger.errx('invalid map name: %s', args[1])
+ return nil
+ end
+
+ local res = map:get_key(inp)
+
+ if res then
+ return res, t
+ end
+
+ return nil
+ end,
+ ['description'] = 'Returns a value from some map corresponding to some key (or acts like a `map` function)',
+ ['args_schema'] = { ts.string }
+ },
+ -- Drops input value and return values from function's arguments or an empty string
+ ['id'] = {
+ ['types'] = {
+ ['string'] = true,
+ ['list'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(_, _, args)
+ if args[1] and args[2] then
+ return fun.map(tostring, args), 'string_list'
+ elseif args[1] then
+ return args[1], 'string'
+ end
+
+ return '', 'string'
+ end,
+ ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
+ ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
+ },
+ ['equal'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ if inp == args[1] then
+ return inp, 'string'
+ end
+
+ return nil
+ end,
+ ['description'] = [[Boolean function equal.
+Returns either nil or its argument if input is equal to argument]],
+ ['args_schema'] = { ts.string }
+ },
+ -- Boolean function in, returns either nil or its input if input is in args list
+ ['in'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ for _, a in ipairs(args) do
+ if a == inp then
+ return inp, t
+ end
+ end
+ return nil
+ end,
+ ['description'] = [[Boolean function in.
+Returns either nil or its input if input is in args list]],
+ ['args_schema'] = ts.array_of(ts.string)
+ },
+ ['not_in'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ for _, a in ipairs(args) do
+ if a == inp then
+ return nil
+ end
+ end
+ return inp, t
+ end,
+ ['description'] = [[Boolean function not in.
+Returns either nil or its input if input is not in args list]],
+ ['args_schema'] = ts.array_of(ts.string)
+ },
+ ['inverse'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ if inp then
+ return nil
+ else
+ return (args[1] or 'true'), 'string'
+ end
+ end,
+ ['description'] = [[Inverses input.
+Empty string comes the first argument or 'true', non-empty string comes nil]],
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ ['ipmask'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local rspamd_ip = require "rspamd_ip"
+ -- Non optimal: convert string to an IP address
+ local ip = rspamd_ip.from_string(inp)
+
+ if not ip or not ip:is_valid() then
+ lua_util.debugm(M, "cannot convert %s to IP", inp)
+ return nil
+ end
+
+ if ip:get_version() == 4 then
+ local mask = tonumber(args[1])
+
+ return ip:apply_mask(mask):to_string(), 'string'
+ else
+ -- IPv6 takes the second argument or the first one...
+ local mask_str = args[2] or args[1]
+ local mask = tonumber(mask_str)
+
+ return ip:apply_mask(mask):to_string(), 'string'
+ end
+ end,
+ ['description'] = 'Applies mask to IP address.' ..
+ ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.',
+ ['args_schema'] = { (ts.number + ts.string / tonumber),
+ (ts.number + ts.string / tonumber):is_optional() }
+ },
+ -- Returns the string(s) with all non ascii chars replaced
+ ['to_ascii'] = {
+ ['types'] = {
+ ['string'] = true,
+ ['list'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ if type(inp) == 'table' then
+ return fun.map(
+ function(s)
+ return string.gsub(tostring(s), '[\128-\255]', args[1] or '?')
+ end, inp), 'string_list'
+ else
+ return string.gsub(tostring(inp), '[\128-\255]', '?'), 'string'
+ end
+ end,
+ ['description'] = 'Returns the string with all non-ascii bytes replaced with the character ' ..
+ 'given as second argument or `?`',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Extracts tld from a hostname
+ ['get_tld'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, _)
+ return rspamd_util.get_tld(inp), 'string'
+ end,
+ ['description'] = 'Extracts tld from a hostname represented as a string',
+ ['args_schema'] = {}
+ },
+ -- Converts list of strings to numbers and returns a packed string
+ ['pack_numbers'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local fmt = args[1] or 'f'
+ local res = {}
+ for _, s in ipairs(inp) do
+ table.insert(res, tonumber(s))
+ end
+ return rspamd_util.pack(string.rep(fmt, #res), lua_util.unpack(res)), 'string'
+ end,
+ ['description'] = 'Converts a list of strings to numbers & returns a packed string',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Filter nils from a list
+ ['filter_string_nils'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['process'] = function(inp, _, _)
+ return fun.filter(function(val)
+ return type(val) == 'string' and val ~= 'nil'
+ end, inp), 'string_list'
+ end,
+ ['description'] = 'Removes all nils from a list of strings (when converted implicitly)',
+ ['args_schema'] = {}
+ },
+ -- Call a set of methods on a userdata object
+ ['apply_methods'] = {
+ ['types'] = {
+ ['userdata'] = true,
+ },
+ ['process'] = function(inp, _, args)
+ local res = {}
+ for _, arg in ipairs(args) do
+ local meth = inp[arg]
+ local ret = meth(inp)
+ if ret then
+ table.insert(res, tostring(ret))
+ end
+ end
+ return res, 'string_list'
+ end,
+ ['description'] = 'Apply a list of method calls to the userdata object',
+ },
+ -- Apply method to list of userdata and use it as a filter, excluding elements for which method returns false/nil
+ ['filter_method'] = {
+ ['types'] = {
+ ['userdata_list'] = true
+ },
+ ['process'] = function(inp, t, args)
+ local meth = args[1]
+
+ if not meth then
+ logger.errx('invalid method name: %s', args[1])
+ return nil
+ end
+
+ return fun.filter(function(val)
+ return val[meth](val)
+ end, inp), 'userdata_list'
+ end,
+ ['description'] = 'Apply method to list of userdata and use it as a filter,' ..
+ ' excluding elements for which method returns false/nil',
+ ['args_schema'] = { ts.string }
+ },
+}
+
+transform_function.match = transform_function.regexp
+
+return transform_function