diff options
Diffstat (limited to 'lualib/lua_selectors')
-rw-r--r-- | lualib/lua_selectors/common.lua | 95 | ||||
-rw-r--r-- | lualib/lua_selectors/extractors.lua | 565 | ||||
-rw-r--r-- | lualib/lua_selectors/init.lua | 668 | ||||
-rw-r--r-- | lualib/lua_selectors/maps.lua | 19 | ||||
-rw-r--r-- | lualib/lua_selectors/transforms.lua | 571 |
5 files changed, 1918 insertions, 0 deletions
diff --git a/lualib/lua_selectors/common.lua b/lualib/lua_selectors/common.lua new file mode 100644 index 0000000..7b2372d --- /dev/null +++ b/lualib/lua_selectors/common.lua @@ -0,0 +1,95 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local ts = require("tableshape").types +local exports = {} +local cr_hash = require 'rspamd_cryptobox_hash' + +local blake2b_key = cr_hash.create_specific('blake2'):update('rspamd'):bin() + +local function digest_schema() + return { ts.one_of { 'hex', 'base32', 'bleach32', 'rbase32', 'base64' }:is_optional(), + ts.one_of { 'blake2', 'sha256', 'sha1', 'sha512', 'md5' }:is_optional() } +end + +exports.digest_schema = digest_schema + +local function create_raw_digest(data, args) + local ht = args[2] or 'blake2' + + local h + + if ht == 'blake2' then + -- Hack to be compatible with various 'get_digest' methods + h = cr_hash.create_keyed(blake2b_key):update(data) + else + h = cr_hash.create_specific(ht):update(data) + end + + return h +end + +local function encode_digest(h, args) + local encoding = args[1] or 'hex' + + local s + if encoding == 'hex' then + s = h:hex() + elseif encoding == 'base32' then + s = h:base32() + elseif encoding == 'bleach32' then + s = h:base32('bleach') + elseif encoding == 'rbase32' then + s = h:base32('rfc') + elseif encoding == 'base64' then + s = h:base64() + end + + return s +end + +local function create_digest(data, args) + local h = create_raw_digest(data, args) + return encode_digest(h, args) +end + +local function get_cached_or_raw_digest(task, idx, mime_part, args) + if #args == 0 then + -- Optimise as we already have this hash in the API + return mime_part:get_digest() + end + + local ht = args[2] or 'blake2' + local cache_key = 'mp_digest_' .. ht .. tostring(idx) + + local cached = task:cache_get(cache_key) + + if cached then + return encode_digest(cached, args) + end + + local h = create_raw_digest(mime_part:get_content('raw_parsed'), args) + task:cache_set(cache_key, h) + + return encode_digest(h, args) +end + +exports.create_digest = create_digest +exports.create_raw_digest = create_raw_digest +exports.get_cached_or_raw_digest = get_cached_or_raw_digest +exports.encode_digest = encode_digest + +return exports
\ No newline at end of file diff --git a/lualib/lua_selectors/extractors.lua b/lualib/lua_selectors/extractors.lua new file mode 100644 index 0000000..81dfa9d --- /dev/null +++ b/lualib/lua_selectors/extractors.lua @@ -0,0 +1,565 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local fun = require 'fun' +local meta_functions = require "lua_meta" +local lua_util = require "lua_util" +local rspamd_url = require "rspamd_url" +local common = require "lua_selectors/common" +local ts = require("tableshape").types +local maps = require "lua_selectors/maps" +local E = {} +local M = "selectors" + +local url_flags_ts = ts.array_of(ts.one_of(lua_util.keys(rspamd_url.flags))):is_optional() + +local function gen_exclude_flags_filter(exclude_flags) + return function(u) + local got_flags = u:get_flags() + for _, flag in ipairs(exclude_flags) do + if got_flags[flag] then + return false + end + end + return true + end +end + +local extractors = { + -- Plain id function + ['id'] = { + ['get_value'] = function(_, args) + if args[1] then + return args[1], 'string' + end + + return '', 'string' + end, + ['description'] = [[Return value from function's argument or an empty string, +For example, `id('Something')` returns a string 'Something']], + ['args_schema'] = { ts.string:is_optional() } + }, + -- Similar but for making lists + ['list'] = { + ['get_value'] = function(_, args) + if args[1] then + return fun.map(tostring, args), 'string_list' + end + + return {}, 'string_list' + end, + ['description'] = [[Return a list from function's arguments or an empty list, +For example, `list('foo', 'bar')` returns a list {'foo', 'bar'}]], + }, + -- Get source IP address + ['ip'] = { + ['get_value'] = function(task) + local ip = task:get_ip() + if ip and ip:is_valid() then + return ip, 'userdata' + end + return nil + end, + ['description'] = [[Get source IP address]], + }, + -- Get MIME from + ['from'] = { + ['get_value'] = function(task, args) + local from + if type(args) == 'table' then + from = task:get_from(args) + else + from = task:get_from(0) + end + if ((from or E)[1] or E).addr then + return from[1], 'table' + end + return nil + end, + ['description'] = [[Get MIME or SMTP from (e.g. `from('smtp')` or `from('mime')`, +uses any type by default)]], + }, + ['rcpts'] = { + ['get_value'] = function(task, args) + local rcpts + if type(args) == 'table' then + rcpts = task:get_recipients(args) + else + rcpts = task:get_recipients(0) + end + if ((rcpts or E)[1] or E).addr then + return rcpts, 'table_list' + end + return nil + end, + ['description'] = [[Get MIME or SMTP rcpts (e.g. `rcpts('smtp')` or `rcpts('mime')`, +uses any type by default)]], + }, + -- Get country (ASN module must be executed first) + ['country'] = { + ['get_value'] = function(task) + local country = task:get_mempool():get_variable('country') + if not country then + return nil + else + return country, 'string' + end + end, + ['description'] = [[Get country (ASN module must be executed first)]], + }, + -- Get ASN number + ['asn'] = { + ['type'] = 'string', + ['get_value'] = function(task) + local asn = task:get_mempool():get_variable('asn') + if not asn then + return nil + else + return asn, 'string' + end + end, + ['description'] = [[Get AS number (ASN module must be executed first)]], + }, + -- Get authenticated username + ['user'] = { + ['get_value'] = function(task) + local auser = task:get_user() + if not auser then + return nil + else + return auser, 'string' + end + end, + ['description'] = 'Get authenticated user name', + }, + -- Get principal recipient + ['to'] = { + ['get_value'] = function(task) + return task:get_principal_recipient(), 'string' + end, + ['description'] = 'Get principal recipient', + }, + -- Get content digest + ['digest'] = { + ['get_value'] = function(task) + return task:get_digest(), 'string' + end, + ['description'] = 'Get content digest', + }, + -- Get list of all attachments digests + ['attachments'] = { + ['get_value'] = function(task, args) + local parts = task:get_parts() or E + local digests = {} + for i, p in ipairs(parts) do + if p:is_attachment() then + table.insert(digests, common.get_cached_or_raw_digest(task, i, p, args)) + end + end + + if #digests > 0 then + return digests, 'string_list' + end + + return nil + end, + ['description'] = [[Get list of all attachments digests. +The first optional argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`), +the second optional argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]], + ['args_schema'] = common.digest_schema() + + }, + -- Get all attachments files + ['files'] = { + ['get_value'] = function(task) + local parts = task:get_parts() or E + local files = {} + + for _, p in ipairs(parts) do + local fname = p:get_filename() + if fname then + table.insert(files, fname) + end + end + + if #files > 0 then + return files, 'string_list' + end + + return nil + end, + ['description'] = 'Get all attachments files', + }, + -- Get languages for text parts + ['languages'] = { + ['get_value'] = function(task) + local text_parts = task:get_text_parts() or E + local languages = {} + + for _, p in ipairs(text_parts) do + local lang = p:get_language() + if lang then + table.insert(languages, lang) + end + end + + if #languages > 0 then + return languages, 'string_list' + end + + return nil + end, + ['description'] = 'Get languages for text parts', + }, + -- Get helo value + ['helo'] = { + ['get_value'] = function(task) + return task:get_helo(), 'string' + end, + ['description'] = 'Get helo value', + }, + -- Get header with the name that is expected as an argument. Returns list of + -- headers with this name + ['header'] = { + ['get_value'] = function(task, args) + local strong = false + if args[2] then + if args[2]:match('strong') then + strong = true + end + + if args[2]:match('full') then + return task:get_header_full(args[1], strong), 'table_list' + end + + return task:get_header(args[1], strong), 'string' + else + return task:get_header(args[1]), 'string' + end + end, + ['description'] = [[Get header with the name that is expected as an argument. +The optional second argument accepts list of flags: + - `full`: returns all headers with this name with all data (like task:get_header_full()) + - `strong`: use case sensitive match when matching header's name]], + ['args_schema'] = { ts.string, + (ts.pattern("strong") + ts.pattern("full")):is_optional() } + }, + -- Get list of received headers (returns list of tables) + ['received'] = { + ['get_value'] = function(task, args) + local rh = task:get_received_headers() + if not rh[1] then + return nil + end + if args[1] then + return fun.map(function(r) + return r[args[1]] + end, rh), 'string_list' + end + + return rh, 'table_list' + end, + ['description'] = [[Get list of received headers. +If no arguments specified, returns list of tables. Otherwise, selects a specific element, +e.g. `by_hostname`]], + }, + -- Get all urls + ['urls'] = { + ['get_value'] = function(task, args) + local urls = task:get_urls() + if not urls[1] then + return nil + end + if args[1] then + return fun.map(function(r) + return r[args[1]](r) + end, urls), 'string_list' + end + return urls, 'userdata_list' + end, + ['description'] = [[Get list of all urls. +If no arguments specified, returns list of url objects. Otherwise, calls a specific method, +e.g. `get_tld`]], + }, + -- Get specific urls + ['specific_urls'] = { + ['get_value'] = function(task, args) + local params = args[1] or {} + params.task = task + params.no_cache = true + if params.exclude_flags then + params.filter = gen_exclude_flags_filter(params.exclude_flags) + end + local urls = lua_util.extract_specific_urls(params) + if not urls[1] then + return nil + end + return urls, 'userdata_list' + end, + ['description'] = [[Get most specific urls. Arguments are equal to the Lua API function]], + ['args_schema'] = { ts.shape { + limit = ts.number + ts.string / tonumber, + esld_limit = (ts.number + ts.string / tonumber):is_optional(), + exclude_flags = url_flags_ts, + flags = url_flags_ts, + flags_mode = ts.one_of { 'explicit' }:is_optional(), + prefix = ts.string:is_optional(), + need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + } } + }, + ['specific_urls_filter_map'] = { + ['get_value'] = function(task, args) + local map = maps[args[1]] + if not map then + lua_util.debugm(M, "invalid/unknown map: %s", args[1]) + end + local params = args[2] or {} + params.task = task + params.no_cache = true + if params.exclude_flags then + params.filter = gen_exclude_flags_filter(params.exclude_flags) + end + local urls = lua_util.extract_specific_urls(params) + if not urls[1] then + return nil + end + return fun.filter(function(u) + return map:get_key(tostring(u)) + end, urls), 'userdata_list' + end, + ['description'] = [[Get most specific urls, filtered by some map. Arguments are equal to the Lua API function]], + ['args_schema'] = { ts.string, ts.shape { + limit = ts.number + ts.string / tonumber, + esld_limit = (ts.number + ts.string / tonumber):is_optional(), + exclude_flags = url_flags_ts, + flags = url_flags_ts, + flags_mode = ts.one_of { 'explicit' }:is_optional(), + prefix = ts.string:is_optional(), + need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + need_emails = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + need_images = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + ignore_redirected = (ts.boolean + ts.string / lua_util.toboolean):is_optional(), + } } + }, + -- URLs filtered by flags + ['urls_filtered'] = { + ['get_value'] = function(task, args) + local urls = task:get_urls_filtered(args[1], args[2]) + if not urls[1] then + return nil + end + return urls, 'userdata_list' + end, + ['description'] = [[Get list of all urls filtered by flags_include/exclude +(see rspamd_task:get_urls_filtered for description)]], + ['args_schema'] = { ts.array_of { + url_flags_ts:is_optional(), url_flags_ts:is_optional() + } } + }, + -- Get all emails + ['emails'] = { + ['get_value'] = function(task, args) + local urls = task:get_emails() + if not urls[1] then + return nil + end + if args[1] then + return fun.map(function(r) + return r[args[1]](r) + end, urls), 'string_list' + end + return urls, 'userdata_list' + end, + ['description'] = [[Get list of all emails. +If no arguments specified, returns list of url objects. Otherwise, calls a specific method, +e.g. `get_user`]], + }, + -- Get specific pool var. The first argument must be variable name, + -- the second argument is optional and defines the type (string by default) + ['pool_var'] = { + ['get_value'] = function(task, args) + local type = args[2] or 'string' + return task:get_mempool():get_variable(args[1], type), (type) + end, + ['description'] = [[Get specific pool var. The first argument must be variable name, +the second argument is optional and defines the type (string by default)]], + ['args_schema'] = { ts.string, ts.string:is_optional() } + }, + -- Get value of specific key from task cache + ['task_cache'] = { + ['get_value'] = function(task, args) + local val = task:cache_get(args[1]) + if not val then + return + end + if type(val) == 'table' then + if not val[1] then + return + end + return val, 'string_list' + end + return val, 'string' + end, + ['description'] = [[Get value of specific key from task cache. The first argument must be +the key name]], + ['args_schema'] = { ts.string } + }, + -- Get specific HTTP request header. The first argument must be header name. + ['request_header'] = { + ['get_value'] = function(task, args) + local hdr = task:get_request_header(args[1]) + if hdr then + return hdr, 'string' + end + + return nil + end, + ['description'] = [[Get specific HTTP request header. +The first argument must be header name.]], + ['args_schema'] = { ts.string } + }, + -- Get task date, optionally formatted + ['time'] = { + ['get_value'] = function(task, args) + local what = args[1] or 'message' + local dt = task:get_date { format = what, gmt = true } + + if dt then + if args[2] then + -- Should be in format !xxx, as dt is in GMT + return os.date(args[2], dt), 'string' + end + + return tostring(dt), 'string' + end + + return nil + end, + ['description'] = [[Get task timestamp. The first argument is type: + - `connect`: connection timestamp (default) + - `message`: timestamp as defined by `Date` header + + The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]], + ['args_schema'] = { ts.one_of { 'connect', 'message' }:is_optional(), + ts.string:is_optional() } + }, + -- Get text words from a message + ['words'] = { + ['get_value'] = function(task, args) + local how = args[1] or 'stem' + local tp = task:get_text_parts() + + if tp then + local rtype = 'string_list' + if how == 'full' then + rtype = 'table_list' + end + + return lua_util.flatten( + fun.map(function(p) + return p:get_words(how) + end, tp)), rtype + end + + return nil + end, + ['description'] = [[Get words from text parts + - `stem`: stemmed words (default) + - `raw`: raw words + - `norm`: normalised words (lowercased) + - `full`: list of tables + ]], + ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional() }, + }, + -- Get queue ID + ['queueid'] = { + ['get_value'] = function(task) + local queueid = task:get_queue_id() + if queueid then + return queueid, 'string' + end + return nil + end, + ['description'] = [[Get queue ID]], + }, + -- Get ID of the task being processed + ['uid'] = { + ['get_value'] = function(task) + local uid = task:get_uid() + if uid then + return uid, 'string' + end + return nil + end, + ['description'] = [[Get ID of the task being processed]], + }, + -- Get message ID of the task being processed + ['messageid'] = { + ['get_value'] = function(task) + local mid = task:get_message_id() + if mid then + return mid, 'string' + end + return nil + end, + ['description'] = [[Get message ID]], + }, + -- Get specific symbol + ['symbol'] = { + ['get_value'] = function(task, args) + local symbol = task:get_symbol(args[1], args[2]) + if symbol then + return symbol[1], 'table' + end + end, + ['description'] = 'Get specific symbol. The first argument must be the symbol name. ' .. + 'The second argument is an optional shadow result name. ' .. + 'Returns the symbol table. See task:get_symbol()', + ['args_schema'] = { ts.string, ts.string:is_optional() } + }, + -- Get full scan result + ['scan_result'] = { + ['get_value'] = function(task, args) + local res = task:get_metric_result(args[1]) + if res then + return res, 'table' + end + end, + ['description'] = 'Get full scan result (either default or shadow if shadow result name is specified)' .. + 'Returns the result table. See task:get_metric_result()', + ['args_schema'] = { ts.string:is_optional() } + }, + -- Get list of metatokens as strings + ['metatokens'] = { + ['get_value'] = function(task) + local tokens = meta_functions.gen_metatokens(task) + if not tokens[1] then + return nil + end + local res = {} + for _, t in ipairs(tokens) do + table.insert(res, tostring(t)) + end + return res, 'string_list' + end, + ['description'] = 'Get metatokens for a message as strings', + }, +} + +return extractors diff --git a/lualib/lua_selectors/init.lua b/lualib/lua_selectors/init.lua new file mode 100644 index 0000000..5fcdb38 --- /dev/null +++ b/lualib/lua_selectors/init.lua @@ -0,0 +1,668 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +-- This module contains 'selectors' implementation: code to extract data +-- from Rspamd tasks and compose those together +-- +-- Read more at https://rspamd.com/doc/configuration/selectors.html + +--[[[ +-- @module lua_selectors +-- This module contains 'selectors' implementation: code to extract data +-- from Rspamd tasks and compose those together. +-- Typical selector looks like this: header(User).lower.substring(1, 2):ip +--]] + +local exports = { + maps = require "lua_selectors/maps" +} + +local logger = require 'rspamd_logger' +local fun = require 'fun' +local lua_util = require "lua_util" +local M = "selectors" +local rspamd_text = require "rspamd_text" +local unpack_function = table.unpack or unpack +local E = {} + +local extractors = require "lua_selectors/extractors" +local transform_function = require "lua_selectors/transforms" + +local text_cookie = rspamd_text.cookie + +local function pure_type(ltype) + return ltype:match('^(.*)_list$') +end + +local function implicit_tostring(t, ud_or_table) + if t == 'table' then + -- Table (very special) + if ud_or_table.value then + return ud_or_table.value, 'string' + elseif ud_or_table.addr then + return ud_or_table.addr, 'string' + end + + return logger.slog("%s", ud_or_table), 'string' + elseif (t == 'string' or t == 'text') and type(ud_or_table) == 'userdata' then + if ud_or_table.cookie and ud_or_table.cookie == text_cookie then + -- Preserve opaque + return ud_or_table, 'string' + else + return tostring(ud_or_table), 'string' + end + elseif t ~= 'nil' then + return tostring(ud_or_table), 'string' + end + + return nil +end + +local function process_selector(task, sel) + local function allowed_type(t) + if t == 'string' or t == 'string_list' then + return true + end + + return false + end + + local function list_type(t) + return pure_type(t) + end + + local input, etype = sel.selector.get_value(task, sel.selector.args) + + if not input then + lua_util.debugm(M, task, 'no value extracted for %s', sel.selector.name) + return nil + end + + lua_util.debugm(M, task, 'extracted %s, type %s', + sel.selector.name, etype) + + local pipe = sel.processor_pipe or E + local first_elt = pipe[1] + + if first_elt and (first_elt.method or + fun.any(function(t) + return t == 'userdata' or t == 'table' + end, first_elt.types)) then + -- Explicit conversion + local meth = first_elt + + if meth.types[etype] then + lua_util.debugm(M, task, 'apply method `%s` to %s', + meth.name, etype) + input, etype = meth.process(input, etype, meth.args) + else + local pt = pure_type(etype) + + if meth.types[pt] then + lua_util.debugm(M, task, 'map method `%s` to list of %s', + meth.name, pt) + -- Map method to a list of inputs, excluding empty elements + -- We need to fold it down here to get a proper type resolution + input = fun.totable(fun.filter(function(map_elt, _) + return map_elt + end, + fun.map(function(list_elt) + local ret, ty = meth.process(list_elt, pt, meth.args) + if ret then + etype = ty + end + return ret + end, input))) + if input and etype then + etype = etype .. "_list" + else + input = nil + end + end + end + -- Remove method from the pipeline + pipe = fun.drop_n(1, pipe) + elseif etype:match('^userdata') or etype:match('^table') then + -- Implicit conversion + local pt = pure_type(etype) + + if not pt then + lua_util.debugm(M, task, 'apply implicit conversion %s->string', etype) + input = implicit_tostring(etype, input) + etype = 'string' + else + lua_util.debugm(M, task, 'apply implicit map %s->string', pt) + input = fun.filter(function(map_elt) + return map_elt + end, + fun.map(function(list_elt) + local ret = implicit_tostring(pt, list_elt) + return ret + end, input)) + etype = 'string_list' + end + else + lua_util.debugm(M, task, 'avoid implicit conversion as the transformer accepts complex input') + end + + -- Now we fold elements using left fold + local function fold_function(acc, x) + if acc == nil or acc[1] == nil then + lua_util.debugm(M, task, 'do not apply %s, accumulator is nil', x.name) + return nil + end + + local value = acc[1] + local t = acc[2] + + if not x.types[t] then + local pt = pure_type(t) + + if pt and x.types['list'] then + -- Generic list processor + lua_util.debugm(M, task, 'apply list function `%s` to %s', x.name, t) + return { x.process(value, t, x.args) } + elseif pt and x.map_type and x.types[pt] then + local map_type = x.map_type .. '_list' + lua_util.debugm(M, task, 'map `%s` to list of %s resulting %s', + x.name, pt, map_type) + -- Apply map, filtering empty values + return { + fun.filter(function(map_elt) + return map_elt + end, + fun.map(function(list_elt) + if not list_elt then + return nil + end + local ret, _ = x.process(list_elt, pt, x.args) + return ret + end, value)), + map_type -- Returned type + } + end + logger.errx(task, 'cannot apply transform %s for type %s', x.name, t) + return nil + end + + lua_util.debugm(M, task, 'apply %s to %s', x.name, t) + return { x.process(value, t, x.args) } + end + + local res = fun.foldl(fold_function, + { input, etype }, + pipe) + + if not res or not res[1] then + return nil + end -- Pipeline failed + + if not allowed_type(res[2]) then + -- Search for implicit conversion + local pt = pure_type(res[2]) + + if pt then + lua_util.debugm(M, task, 'apply implicit map %s->string_list', pt) + res[1] = fun.map(function(e) + return implicit_tostring(pt, e) + end, res[1]) + res[2] = 'string_list' + else + res[1] = implicit_tostring(res[2], res[1]) + res[2] = 'string' + end + end + + if list_type(res[2]) then + -- Convert to table as it might have a functional form + res[1] = fun.totable(res[1]) + end + + lua_util.debugm(M, task, 'final selector type: %s, value: %s', res[2], res[1]) + + return res[1] +end + +local function make_grammar() + local l = require "lpeg" + local spc = l.S(" \t\n") ^ 0 + local cont = l.R("\128\191") -- continuation byte + local utf8_high = l.R("\194\223") * cont + + l.R("\224\239") * cont * cont + + l.R("\240\244") * cont * cont * cont + local atom_start = (l.R("az") + l.R("AZ") + l.R("09") + utf8_high + l.S "-") ^ 1 + local atom_end = (l.R("az") + l.R("AZ") + l.R("09") + l.S "-_" + utf8_high) ^ 1 + local atom_mid = (1 - l.S("'\r\n\f\\,)(}{= " .. '"')) ^ 1 + local atom_argument = l.C(atom_start * atom_mid ^ 0 * atom_end ^ 0) -- We allow more characters for the arguments + local atom = l.C(atom_start * atom_end ^ 0) -- We are more strict about selector names itself + local singlequoted_string = l.P "'" * l.C(((1 - l.S "'\r\n\f\\") + (l.P '\\' * 1)) ^ 0) * "'" + local doublequoted_string = l.P '"' * l.C(((1 - l.S '"\r\n\f\\') + (l.P '\\' * 1)) ^ 0) * '"' + local argument = atom_argument + singlequoted_string + doublequoted_string + local dot = l.P(".") + local semicolon = l.P(":") + local obrace = "(" * spc + local tbl_obrace = "{" * spc + local eqsign = spc * "=" * spc + local tbl_ebrace = spc * "}" + local ebrace = spc * ")" + local comma = spc * "," * spc + local sel_separator = spc * l.S ";*" * spc + + return l.P { + "LIST"; + LIST = l.Ct(l.V("EXPR")) * (sel_separator * l.Ct(l.V("EXPR"))) ^ 0, + EXPR = l.V("FUNCTION") * (semicolon * l.V("METHOD")) ^ -1 * (dot * l.V("PROCESSOR")) ^ 0, + PROCESSOR = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0), + FUNCTION = l.Ct(atom * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0), + METHOD = l.Ct(atom / function(e) + return '__' .. e + end * spc * (obrace * l.V("ARG_LIST") * ebrace) ^ 0), + ARG_LIST = l.Ct((l.V("ARG") * comma ^ 0) ^ 0), + ARG = l.Cf(tbl_obrace * l.V("NAMED_ARG") * tbl_ebrace, rawset) + argument + l.V("LIST_ARGS"), + NAMED_ARG = (l.Ct("") * l.Cg(argument * eqsign * (argument + l.V("LIST_ARGS")) * comma ^ 0) ^ 0), + LIST_ARGS = l.Ct(tbl_obrace * l.V("LIST_ARG") * tbl_ebrace), + LIST_ARG = l.Cg(argument * comma ^ 0) ^ 0, + } +end + +local parser = make_grammar() + +--[[[ +-- @function lua_selectors.parse_selector(cfg, str) +--]] +exports.parse_selector = function(cfg, str) + local parsed = { parser:match(str) } + local output = {} + + if not parsed or not parsed[1] then + return nil + end + + local function check_args(name, schema, args) + if schema then + if getmetatable(schema) then + -- Schema covers all arguments + local res, err = schema:transform(args) + if not res then + logger.errx(rspamd_config, 'invalid arguments for %s: %s', name, err) + return false + else + for i, elt in ipairs(res) do + args[i] = elt + end + end + else + for i, selt in ipairs(schema) do + local res, err = selt:transform(args[i]) + + if err then + logger.errx(rspamd_config, 'invalid arguments for %s: argument number: %s, error: %s', name, i, err) + return false + else + args[i] = res + end + end + end + end + + return true + end + + -- Output AST format is the following: + -- table of individual selectors + -- each selector: list of functions + -- each function: function name + optional list of arguments + for _, sel in ipairs(parsed) do + local res = { + selector = {}, + processor_pipe = {}, + } + + local selector_tbl = sel[1] + if not selector_tbl then + logger.errx(cfg, 'no selector represented') + return nil + end + if not extractors[selector_tbl[1]] then + logger.errx(cfg, 'selector %s is unknown', selector_tbl[1]) + return nil + end + + res.selector = lua_util.shallowcopy(extractors[selector_tbl[1]]) + res.selector.name = selector_tbl[1] + res.selector.args = selector_tbl[2] or E + + if not check_args(res.selector.name, + res.selector.args_schema, + res.selector.args) then + return nil + end + + lua_util.debugm(M, cfg, 'processed selector %s, args: %s', + res.selector.name, res.selector.args) + + local pipeline_error = false + -- Now process processors pipe + fun.each(function(proc_tbl) + local proc_name = proc_tbl[1] + + if proc_name:match('^__') then + -- Special case - method + local method_name = proc_name:match('^__(.*)$') + -- Check array indexing... + if tonumber(method_name) then + method_name = tonumber(method_name) + end + local processor = { + name = tostring(method_name), + method = true, + args = proc_tbl[2] or E, + types = { + userdata = true, + table = true, + string = true, + }, + map_type = 'string', + process = function(inp, t, args) + local ret + if t == 'table' then + -- Plain table field + ret = inp[method_name] + else + -- We call method unpacking arguments and dropping all but the first result returned + ret = (inp[method_name](inp, unpack_function(args or E))) + end + + local ret_type = type(ret) + + if ret_type == 'nil' then + return nil + end + -- Now apply types heuristic + if ret_type == 'string' then + return ret, 'string' + elseif ret_type == 'table' then + -- TODO: we need to ensure that 1) table is numeric 2) table has merely strings + return ret, 'string_list' + else + return implicit_tostring(ret_type, ret) + end + end, + } + lua_util.debugm(M, cfg, 'attached method %s to selector %s, args: %s', + proc_name, res.selector.name, processor.args) + table.insert(res.processor_pipe, processor) + else + + if not transform_function[proc_name] then + logger.errx(cfg, 'processor %s is unknown', proc_name) + pipeline_error = proc_name + return nil + end + local processor = lua_util.shallowcopy(transform_function[proc_name]) + processor.name = proc_name + processor.args = proc_tbl[2] or E + + if not check_args(processor.name, processor.args_schema, processor.args) then + pipeline_error = 'args schema for ' .. proc_name + return nil + end + + lua_util.debugm(M, cfg, 'attached processor %s to selector %s, args: %s', + proc_name, res.selector.name, processor.args) + table.insert(res.processor_pipe, processor) + end + end, fun.tail(sel)) + + if pipeline_error then + logger.errx(cfg, 'unknown or invalid processor used: "%s", exiting', pipeline_error) + return nil + end + + table.insert(output, res) + end + + return output +end + +--[[[ +-- @function lua_selectors.register_extractor(cfg, name, selector) +--]] +exports.register_extractor = function(cfg, name, selector) + if selector.get_value then + if extractors[name] then + logger.warnx(cfg, 'redefining selector %s', name) + end + extractors[name] = selector + + return true + end + + logger.errx(cfg, 'bad selector %s', name) + return false +end + +--[[[ +-- @function lua_selectors.register_transform(cfg, name, transform) +--]] +exports.register_transform = function(cfg, name, transform) + if transform.process and transform.types then + if transform_function[name] then + logger.warnx(cfg, 'redefining transform function %s', name) + end + transform_function[name] = transform + + return true + end + + logger.errx(cfg, 'bad transform function %s', name) + return false +end + +--[[[ +-- @function lua_selectors.process_selectors(task, selectors_pipe) +--]] +exports.process_selectors = function(task, selectors_pipe) + local ret = {} + + for _, sel in ipairs(selectors_pipe) do + local r = process_selector(task, sel) + + -- If any element is nil, then the whole selector is nil + if not r then + return nil + end + table.insert(ret, r) + end + + return ret +end + +--[[[ +-- @function lua_selectors.combine_selectors(task, selectors, delimiter) +--]] +exports.combine_selectors = function(_, selectors, delimiter) + if not delimiter then + delimiter = '' + end + + if not selectors then + return nil + end + + local have_tables, have_userdata + + for _, s in ipairs(selectors) do + if type(s) == 'table' then + have_tables = true + elseif type(s) == 'userdata' then + have_userdata = true + end + end + + if not have_tables then + if not have_userdata then + return table.concat(selectors, delimiter) + else + return rspamd_text.fromtable(selectors, delimiter) + end + else + -- We need to do a spill on each table selector and make a cortesian product + -- e.g. s:tbl:s -> s:telt1:s + s:telt2:s ... + local tbl = {} + local res = {} + + for i, s in ipairs(selectors) do + if type(s) == 'string' then + rawset(tbl, i, fun.duplicate(s)) + elseif type(s) == 'userdata' then + rawset(tbl, i, fun.duplicate(tostring(s))) + else + -- Raw table + rawset(tbl, i, fun.map(tostring, s)) + end + end + + fun.each(function(...) + table.insert(res, table.concat({ ... }, delimiter)) + end, fun.zip(lua_util.unpack(tbl))) + + return res + end +end + +--[[[ +-- @function lua_selectors.flatten_selectors(selectors) +-- Convert selectors to a flat table of elements +--]] +exports.flatten_selectors = function(_, selectors, _) + local res = {} + + local function fill(tbl) + for _, s in ipairs(tbl) do + if type(s) == 'string' then + rawset(res, #res + 1, s) + elseif type(s) == 'userdata' then + rawset(res, #res + 1, tostring(s)) + else + fill(s) + end + end + end + + fill(selectors) + + return res +end + +--[[[ +-- @function lua_selectors.kv_table_from_pairs(selectors) +-- Convert selectors to a table where the odd elements are keys and even are elements +-- Similarly to make a map from (k, v) pairs list +-- To specify the concrete constant keys, one can use the `id` extractor +--]] +exports.kv_table_from_pairs = function(log_obj, selectors, _) + local res = {} + local rspamd_logger = require "rspamd_logger" + + local function fill(tbl) + local tbl_len = #tbl + if tbl_len % 2 ~= 0 or tbl_len == 0 then + rspamd_logger.errx(log_obj, "invalid invocation of the `kv_table_from_pairs`: table length is invalid %s", + tbl_len) + return + end + for i = 1, tbl_len, 2 do + local k = tostring(tbl[i]) + local v = tbl[i + 1] + if type(v) == 'string' then + res[k] = v + elseif type(v) == 'userdata' then + res[k] = tostring(v) + else + res[k] = fun.totable(fun.map(function(elt) + return tostring(elt) + end, v)) + end + end + end + + fill(selectors) + + return res +end + + +--[[[ +-- @function lua_selectors.create_closure(log_obj, cfg, selector_str, delimiter, fn) +-- Creates a closure from a string selector, using the specific combinator function +--]] +exports.create_selector_closure_fn = function(log_obj, cfg, selector_str, delimiter, fn) + local selector = exports.parse_selector(cfg, selector_str) + + if not selector then + return nil + end + + return function(task) + local res = exports.process_selectors(task, selector) + + if res then + return fn(log_obj, res, delimiter) + end + + return nil + end +end + +--[[[ +-- @function lua_selectors.create_closure(cfg, selector_str, delimiter='', flatten=false) +-- Creates a closure from a string selector +--]] +exports.create_selector_closure = function(cfg, selector_str, delimiter, flatten) + local combinator_fn = flatten and exports.flatten_selectors or exports.combine_selectors + + return exports.create_selector_closure_fn(nil, cfg, selector_str, delimiter, combinator_fn) +end + +local function display_selectors(tbl) + return fun.tomap(fun.map(function(k, v) + return k, fun.tomap(fun.filter(function(kk, vv) + return type(vv) ~= 'function' + end, v)) + end, tbl)) +end + +exports.list_extractors = function() + return display_selectors(extractors) +end + +exports.list_transforms = function() + return display_selectors(transform_function) +end + +exports.add_map = function(name, map) + if not exports.maps[name] then + exports.maps[name] = map + else + logger.errx(rspamd_config, "duplicate map redefinition for the selectors: %s", name) + end +end + +-- Publish log target +exports.M = M + +return exports diff --git a/lualib/lua_selectors/maps.lua b/lualib/lua_selectors/maps.lua new file mode 100644 index 0000000..85b54a6 --- /dev/null +++ b/lualib/lua_selectors/maps.lua @@ -0,0 +1,19 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local maps = {} -- Shared within selectors, indexed by name + +return maps
\ No newline at end of file diff --git a/lualib/lua_selectors/transforms.lua b/lualib/lua_selectors/transforms.lua new file mode 100644 index 0000000..6c6bc71 --- /dev/null +++ b/lualib/lua_selectors/transforms.lua @@ -0,0 +1,571 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local fun = require 'fun' +local lua_util = require "lua_util" +local rspamd_util = require "rspamd_util" +local ts = require("tableshape").types +local logger = require 'rspamd_logger' +local common = require "lua_selectors/common" +local M = "selectors" + +local maps = require "lua_selectors/maps" + +local function pure_type(ltype) + return ltype:match('^(.*)_list$') +end + +local transform_function = { + -- Returns the lowercased string + ['lower'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, _) + return inp:lower(), 'string' + end, + ['description'] = 'Returns the lowercased string', + }, + -- Returns the lowercased utf8 string + ['lower_utf8'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, t) + return rspamd_util.lower_utf8(inp), t + end, + ['description'] = 'Returns the lowercased utf8 string', + }, + -- Returns the first element + ['first'] = { + ['types'] = { + ['list'] = true, + }, + ['process'] = function(inp, t) + return fun.head(inp), pure_type(t) + end, + ['description'] = 'Returns the first element', + }, + -- Returns the last element + ['last'] = { + ['types'] = { + ['list'] = true, + }, + ['process'] = function(inp, t) + return fun.nth(fun.length(inp), inp), pure_type(t) + end, + ['description'] = 'Returns the last element', + }, + -- Returns the nth element + ['nth'] = { + ['types'] = { + ['list'] = true, + }, + ['process'] = function(inp, t, args) + return fun.nth(args[1] or 1, inp), pure_type(t) + end, + ['description'] = 'Returns the nth element', + ['args_schema'] = { ts.number + ts.string / tonumber } + }, + ['take_n'] = { + ['types'] = { + ['list'] = true, + }, + ['process'] = function(inp, t, args) + return fun.take_n(args[1] or 1, inp), t + end, + ['description'] = 'Returns the n first elements', + ['args_schema'] = { ts.number + ts.string / tonumber } + }, + ['drop_n'] = { + ['types'] = { + ['list'] = true, + }, + ['process'] = function(inp, t, args) + return fun.drop_n(args[1] or 1, inp), t + end, + ['description'] = 'Returns list without the first n elements', + ['args_schema'] = { ts.number + ts.string / tonumber } + }, + -- Joins strings into a single string using separator in the argument + ['join'] = { + ['types'] = { + ['string_list'] = true + }, + ['process'] = function(inp, _, args) + return table.concat(fun.totable(inp), args[1] or ''), 'string' + end, + ['description'] = 'Joins strings into a single string using separator in the argument', + ['args_schema'] = { ts.string:is_optional() } + }, + -- Joins strings into a set of strings using N elements and a separator in the argument + ['join_nth'] = { + ['types'] = { + ['string_list'] = true + }, + ['process'] = function(inp, _, args) + local step = args[1] + local sep = args[2] or '' + local inp_t = fun.totable(inp) + local res = {} + + for i = 1, #inp_t, step do + table.insert(res, table.concat(inp_t, sep, i, i + step)) + end + return res, 'string_list' + end, + ['description'] = 'Joins strings into a set of strings using N elements and a separator in the argument', + ['args_schema'] = { ts.number + ts.string / tonumber, ts.string:is_optional() } + }, + -- Joins tables into a table of strings + ['join_tables'] = { + ['types'] = { + ['list'] = true + }, + ['process'] = function(inp, _, args) + local sep = args[1] or '' + return fun.map(function(t) + return table.concat(t, sep) + end, inp), 'string_list' + end, + ['description'] = 'Joins tables into a table of strings', + ['args_schema'] = { ts.string:is_optional() } + }, + -- Sort strings + ['sort'] = { + ['types'] = { + ['list'] = true + }, + ['process'] = function(inp, t, _) + table.sort(inp) + return inp, t + end, + ['description'] = 'Sort strings lexicographically', + }, + -- Return unique elements based on hashing (can work without sorting) + ['uniq'] = { + ['types'] = { + ['list'] = true + }, + ['process'] = function(inp, t, _) + local tmp = {} + fun.each(function(val) + tmp[val] = true + end, inp) + + return fun.map(function(k, _) + return k + end, tmp), t + end, + ['description'] = 'Returns a list of unique elements (using a hash table)', + }, + -- Create a digest from string or a list of strings + ['digest'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + return common.create_digest(inp, args), 'string' + end, + ['description'] = [[Create a digest from a string. +The first argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`), +the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]], + ['args_schema'] = common.digest_schema() + }, + -- Extracts substring + ['substring'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + local start_pos = args[1] or 1 + local end_pos = args[2] or -1 + + return inp:sub(start_pos, end_pos), 'string' + end, + ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)', + ['args_schema'] = { (ts.number + ts.string / tonumber):is_optional(), + (ts.number + ts.string / tonumber):is_optional() } + }, + -- Prepends a string or a strings list + ['prepend'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + local prepend = table.concat(args, '') + + return prepend .. inp, 'string' + end, + ['description'] = 'Prepends a string or a strings list', + }, + -- Appends a string or a strings list + ['append'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + local append = table.concat(args, '') + + return inp .. append, 'string' + end, + ['description'] = 'Appends a string or a strings list', + }, + -- Regexp matching + ['regexp'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + local rspamd_regexp = require "rspamd_regexp" + + local re = rspamd_regexp.create_cached(args[1]) + + if not re then + logger.errx('invalid regexp: %s', args[1]) + return nil + end + + local res = re:search(inp, false, true) + + if res then + -- Map all results in a single list + local flattened_table = {} + local function flatten_table(tbl) + for _, v in ipairs(tbl) do + if type(v) == 'table' then + flatten_table(v) + else + table.insert(flattened_table, v) + end + end + end + flatten_table(res) + return flattened_table, 'string_list' + end + + return nil + end, + ['description'] = 'Regexp matching, returns all matches flattened in a single list', + ['args_schema'] = { ts.string } + }, + -- Returns a value if it exists in some map (or acts like a `filter` function) + ['filter_map'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, t, args) + local map = maps[args[1]] + + if not map then + logger.errx('invalid map name: %s', args[1]) + return nil + end + + local res = map:get_key(inp) + + if res then + return inp, t + end + + return nil + end, + ['description'] = 'Returns a value if it exists in some map (or acts like a `filter` function)', + ['args_schema'] = { ts.string } + }, + -- Returns a value if it exists in some map (or acts like a `filter` function) + ['except_map'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, t, args) + local map = maps[args[1]] + + if not map then + logger.errx('invalid map name: %s', args[1]) + return nil + end + + local res = map:get_key(inp) + + if not res then + return inp, t + end + + return nil + end, + ['description'] = 'Returns a value if it does not exists in some map (or acts like a `except` function)', + ['args_schema'] = { ts.string } + }, + -- Returns a value from some map corresponding to some key (or acts like a `map` function) + ['apply_map'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, t, args) + local map = maps[args[1]] + + if not map then + logger.errx('invalid map name: %s', args[1]) + return nil + end + + local res = map:get_key(inp) + + if res then + return res, t + end + + return nil + end, + ['description'] = 'Returns a value from some map corresponding to some key (or acts like a `map` function)', + ['args_schema'] = { ts.string } + }, + -- Drops input value and return values from function's arguments or an empty string + ['id'] = { + ['types'] = { + ['string'] = true, + ['list'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(_, _, args) + if args[1] and args[2] then + return fun.map(tostring, args), 'string_list' + elseif args[1] then + return args[1], 'string' + end + + return '', 'string' + end, + ['description'] = 'Drops input value and return values from function\'s arguments or an empty string', + ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional() + }, + ['equal'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + if inp == args[1] then + return inp, 'string' + end + + return nil + end, + ['description'] = [[Boolean function equal. +Returns either nil or its argument if input is equal to argument]], + ['args_schema'] = { ts.string } + }, + -- Boolean function in, returns either nil or its input if input is in args list + ['in'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, t, args) + for _, a in ipairs(args) do + if a == inp then + return inp, t + end + end + return nil + end, + ['description'] = [[Boolean function in. +Returns either nil or its input if input is in args list]], + ['args_schema'] = ts.array_of(ts.string) + }, + ['not_in'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, t, args) + for _, a in ipairs(args) do + if a == inp then + return nil + end + end + return inp, t + end, + ['description'] = [[Boolean function not in. +Returns either nil or its input if input is not in args list]], + ['args_schema'] = ts.array_of(ts.string) + }, + ['inverse'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + if inp then + return nil + else + return (args[1] or 'true'), 'string' + end + end, + ['description'] = [[Inverses input. +Empty string comes the first argument or 'true', non-empty string comes nil]], + ['args_schema'] = { ts.string:is_optional() } + }, + ['ipmask'] = { + ['types'] = { + ['string'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + local rspamd_ip = require "rspamd_ip" + -- Non optimal: convert string to an IP address + local ip = rspamd_ip.from_string(inp) + + if not ip or not ip:is_valid() then + lua_util.debugm(M, "cannot convert %s to IP", inp) + return nil + end + + if ip:get_version() == 4 then + local mask = tonumber(args[1]) + + return ip:apply_mask(mask):to_string(), 'string' + else + -- IPv6 takes the second argument or the first one... + local mask_str = args[2] or args[1] + local mask = tonumber(mask_str) + + return ip:apply_mask(mask):to_string(), 'string' + end + end, + ['description'] = 'Applies mask to IP address.' .. + ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.', + ['args_schema'] = { (ts.number + ts.string / tonumber), + (ts.number + ts.string / tonumber):is_optional() } + }, + -- Returns the string(s) with all non ascii chars replaced + ['to_ascii'] = { + ['types'] = { + ['string'] = true, + ['list'] = true, + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + if type(inp) == 'table' then + return fun.map( + function(s) + return string.gsub(tostring(s), '[\128-\255]', args[1] or '?') + end, inp), 'string_list' + else + return string.gsub(tostring(inp), '[\128-\255]', '?'), 'string' + end + end, + ['description'] = 'Returns the string with all non-ascii bytes replaced with the character ' .. + 'given as second argument or `?`', + ['args_schema'] = { ts.string:is_optional() } + }, + -- Extracts tld from a hostname + ['get_tld'] = { + ['types'] = { + ['string'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, _) + return rspamd_util.get_tld(inp), 'string' + end, + ['description'] = 'Extracts tld from a hostname represented as a string', + ['args_schema'] = {} + }, + -- Converts list of strings to numbers and returns a packed string + ['pack_numbers'] = { + ['types'] = { + ['string_list'] = true + }, + ['map_type'] = 'string', + ['process'] = function(inp, _, args) + local fmt = args[1] or 'f' + local res = {} + for _, s in ipairs(inp) do + table.insert(res, tonumber(s)) + end + return rspamd_util.pack(string.rep(fmt, #res), lua_util.unpack(res)), 'string' + end, + ['description'] = 'Converts a list of strings to numbers & returns a packed string', + ['args_schema'] = { ts.string:is_optional() } + }, + -- Filter nils from a list + ['filter_string_nils'] = { + ['types'] = { + ['string_list'] = true + }, + ['process'] = function(inp, _, _) + return fun.filter(function(val) + return type(val) == 'string' and val ~= 'nil' + end, inp), 'string_list' + end, + ['description'] = 'Removes all nils from a list of strings (when converted implicitly)', + ['args_schema'] = {} + }, + -- Call a set of methods on a userdata object + ['apply_methods'] = { + ['types'] = { + ['userdata'] = true, + }, + ['process'] = function(inp, _, args) + local res = {} + for _, arg in ipairs(args) do + local meth = inp[arg] + local ret = meth(inp) + if ret then + table.insert(res, tostring(ret)) + end + end + return res, 'string_list' + end, + ['description'] = 'Apply a list of method calls to the userdata object', + }, + -- Apply method to list of userdata and use it as a filter, excluding elements for which method returns false/nil + ['filter_method'] = { + ['types'] = { + ['userdata_list'] = true + }, + ['process'] = function(inp, t, args) + local meth = args[1] + + if not meth then + logger.errx('invalid method name: %s', args[1]) + return nil + end + + return fun.filter(function(val) + return val[meth](val) + end, inp), 'userdata_list' + end, + ['description'] = 'Apply method to list of userdata and use it as a filter,' .. + ' excluding elements for which method returns false/nil', + ['args_schema'] = { ts.string } + }, +} + +transform_function.match = transform_function.regexp + +return transform_function |