summaryrefslogtreecommitdiffstats
path: root/lualib/lua_selectors/transforms.lua
diff options
context:
space:
mode:
Diffstat (limited to 'lualib/lua_selectors/transforms.lua')
-rw-r--r--lualib/lua_selectors/transforms.lua571
1 files changed, 571 insertions, 0 deletions
diff --git a/lualib/lua_selectors/transforms.lua b/lualib/lua_selectors/transforms.lua
new file mode 100644
index 0000000..6c6bc71
--- /dev/null
+++ b/lualib/lua_selectors/transforms.lua
@@ -0,0 +1,571 @@
+--[[
+Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local fun = require 'fun'
+local lua_util = require "lua_util"
+local rspamd_util = require "rspamd_util"
+local ts = require("tableshape").types
+local logger = require 'rspamd_logger'
+local common = require "lua_selectors/common"
+local M = "selectors"
+
+local maps = require "lua_selectors/maps"
+
+local function pure_type(ltype)
+ return ltype:match('^(.*)_list$')
+end
+
+local transform_function = {
+ -- Returns the lowercased string
+ ['lower'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _)
+ return inp:lower(), 'string'
+ end,
+ ['description'] = 'Returns the lowercased string',
+ },
+ -- Returns the lowercased utf8 string
+ ['lower_utf8'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t)
+ return rspamd_util.lower_utf8(inp), t
+ end,
+ ['description'] = 'Returns the lowercased utf8 string',
+ },
+ -- Returns the first element
+ ['first'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t)
+ return fun.head(inp), pure_type(t)
+ end,
+ ['description'] = 'Returns the first element',
+ },
+ -- Returns the last element
+ ['last'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t)
+ return fun.nth(fun.length(inp), inp), pure_type(t)
+ end,
+ ['description'] = 'Returns the last element',
+ },
+ -- Returns the nth element
+ ['nth'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t, args)
+ return fun.nth(args[1] or 1, inp), pure_type(t)
+ end,
+ ['description'] = 'Returns the nth element',
+ ['args_schema'] = { ts.number + ts.string / tonumber }
+ },
+ ['take_n'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t, args)
+ return fun.take_n(args[1] or 1, inp), t
+ end,
+ ['description'] = 'Returns the n first elements',
+ ['args_schema'] = { ts.number + ts.string / tonumber }
+ },
+ ['drop_n'] = {
+ ['types'] = {
+ ['list'] = true,
+ },
+ ['process'] = function(inp, t, args)
+ return fun.drop_n(args[1] or 1, inp), t
+ end,
+ ['description'] = 'Returns list without the first n elements',
+ ['args_schema'] = { ts.number + ts.string / tonumber }
+ },
+ -- Joins strings into a single string using separator in the argument
+ ['join'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['process'] = function(inp, _, args)
+ return table.concat(fun.totable(inp), args[1] or ''), 'string'
+ end,
+ ['description'] = 'Joins strings into a single string using separator in the argument',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Joins strings into a set of strings using N elements and a separator in the argument
+ ['join_nth'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['process'] = function(inp, _, args)
+ local step = args[1]
+ local sep = args[2] or ''
+ local inp_t = fun.totable(inp)
+ local res = {}
+
+ for i = 1, #inp_t, step do
+ table.insert(res, table.concat(inp_t, sep, i, i + step))
+ end
+ return res, 'string_list'
+ end,
+ ['description'] = 'Joins strings into a set of strings using N elements and a separator in the argument',
+ ['args_schema'] = { ts.number + ts.string / tonumber, ts.string:is_optional() }
+ },
+ -- Joins tables into a table of strings
+ ['join_tables'] = {
+ ['types'] = {
+ ['list'] = true
+ },
+ ['process'] = function(inp, _, args)
+ local sep = args[1] or ''
+ return fun.map(function(t)
+ return table.concat(t, sep)
+ end, inp), 'string_list'
+ end,
+ ['description'] = 'Joins tables into a table of strings',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Sort strings
+ ['sort'] = {
+ ['types'] = {
+ ['list'] = true
+ },
+ ['process'] = function(inp, t, _)
+ table.sort(inp)
+ return inp, t
+ end,
+ ['description'] = 'Sort strings lexicographically',
+ },
+ -- Return unique elements based on hashing (can work without sorting)
+ ['uniq'] = {
+ ['types'] = {
+ ['list'] = true
+ },
+ ['process'] = function(inp, t, _)
+ local tmp = {}
+ fun.each(function(val)
+ tmp[val] = true
+ end, inp)
+
+ return fun.map(function(k, _)
+ return k
+ end, tmp), t
+ end,
+ ['description'] = 'Returns a list of unique elements (using a hash table)',
+ },
+ -- Create a digest from string or a list of strings
+ ['digest'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ return common.create_digest(inp, args), 'string'
+ end,
+ ['description'] = [[Create a digest from a string.
+The first argument is encoding (`hex`, `base32` (and forms `bleach32`, `rbase32`), `base64`),
+the second argument is optional hash type (`blake2`, `sha256`, `sha1`, `sha512`, `md5`)]],
+ ['args_schema'] = common.digest_schema()
+ },
+ -- Extracts substring
+ ['substring'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local start_pos = args[1] or 1
+ local end_pos = args[2] or -1
+
+ return inp:sub(start_pos, end_pos), 'string'
+ end,
+ ['description'] = 'Extracts substring; the first argument is start, the second is the last (like in Lua)',
+ ['args_schema'] = { (ts.number + ts.string / tonumber):is_optional(),
+ (ts.number + ts.string / tonumber):is_optional() }
+ },
+ -- Prepends a string or a strings list
+ ['prepend'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local prepend = table.concat(args, '')
+
+ return prepend .. inp, 'string'
+ end,
+ ['description'] = 'Prepends a string or a strings list',
+ },
+ -- Appends a string or a strings list
+ ['append'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local append = table.concat(args, '')
+
+ return inp .. append, 'string'
+ end,
+ ['description'] = 'Appends a string or a strings list',
+ },
+ -- Regexp matching
+ ['regexp'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local rspamd_regexp = require "rspamd_regexp"
+
+ local re = rspamd_regexp.create_cached(args[1])
+
+ if not re then
+ logger.errx('invalid regexp: %s', args[1])
+ return nil
+ end
+
+ local res = re:search(inp, false, true)
+
+ if res then
+ -- Map all results in a single list
+ local flattened_table = {}
+ local function flatten_table(tbl)
+ for _, v in ipairs(tbl) do
+ if type(v) == 'table' then
+ flatten_table(v)
+ else
+ table.insert(flattened_table, v)
+ end
+ end
+ end
+ flatten_table(res)
+ return flattened_table, 'string_list'
+ end
+
+ return nil
+ end,
+ ['description'] = 'Regexp matching, returns all matches flattened in a single list',
+ ['args_schema'] = { ts.string }
+ },
+ -- Returns a value if it exists in some map (or acts like a `filter` function)
+ ['filter_map'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ local map = maps[args[1]]
+
+ if not map then
+ logger.errx('invalid map name: %s', args[1])
+ return nil
+ end
+
+ local res = map:get_key(inp)
+
+ if res then
+ return inp, t
+ end
+
+ return nil
+ end,
+ ['description'] = 'Returns a value if it exists in some map (or acts like a `filter` function)',
+ ['args_schema'] = { ts.string }
+ },
+ -- Returns a value if it exists in some map (or acts like a `filter` function)
+ ['except_map'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ local map = maps[args[1]]
+
+ if not map then
+ logger.errx('invalid map name: %s', args[1])
+ return nil
+ end
+
+ local res = map:get_key(inp)
+
+ if not res then
+ return inp, t
+ end
+
+ return nil
+ end,
+ ['description'] = 'Returns a value if it does not exists in some map (or acts like a `except` function)',
+ ['args_schema'] = { ts.string }
+ },
+ -- Returns a value from some map corresponding to some key (or acts like a `map` function)
+ ['apply_map'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ local map = maps[args[1]]
+
+ if not map then
+ logger.errx('invalid map name: %s', args[1])
+ return nil
+ end
+
+ local res = map:get_key(inp)
+
+ if res then
+ return res, t
+ end
+
+ return nil
+ end,
+ ['description'] = 'Returns a value from some map corresponding to some key (or acts like a `map` function)',
+ ['args_schema'] = { ts.string }
+ },
+ -- Drops input value and return values from function's arguments or an empty string
+ ['id'] = {
+ ['types'] = {
+ ['string'] = true,
+ ['list'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(_, _, args)
+ if args[1] and args[2] then
+ return fun.map(tostring, args), 'string_list'
+ elseif args[1] then
+ return args[1], 'string'
+ end
+
+ return '', 'string'
+ end,
+ ['description'] = 'Drops input value and return values from function\'s arguments or an empty string',
+ ['args_schema'] = (ts.string + ts.array_of(ts.string)):is_optional()
+ },
+ ['equal'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ if inp == args[1] then
+ return inp, 'string'
+ end
+
+ return nil
+ end,
+ ['description'] = [[Boolean function equal.
+Returns either nil or its argument if input is equal to argument]],
+ ['args_schema'] = { ts.string }
+ },
+ -- Boolean function in, returns either nil or its input if input is in args list
+ ['in'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ for _, a in ipairs(args) do
+ if a == inp then
+ return inp, t
+ end
+ end
+ return nil
+ end,
+ ['description'] = [[Boolean function in.
+Returns either nil or its input if input is in args list]],
+ ['args_schema'] = ts.array_of(ts.string)
+ },
+ ['not_in'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, t, args)
+ for _, a in ipairs(args) do
+ if a == inp then
+ return nil
+ end
+ end
+ return inp, t
+ end,
+ ['description'] = [[Boolean function not in.
+Returns either nil or its input if input is not in args list]],
+ ['args_schema'] = ts.array_of(ts.string)
+ },
+ ['inverse'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ if inp then
+ return nil
+ else
+ return (args[1] or 'true'), 'string'
+ end
+ end,
+ ['description'] = [[Inverses input.
+Empty string comes the first argument or 'true', non-empty string comes nil]],
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ ['ipmask'] = {
+ ['types'] = {
+ ['string'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local rspamd_ip = require "rspamd_ip"
+ -- Non optimal: convert string to an IP address
+ local ip = rspamd_ip.from_string(inp)
+
+ if not ip or not ip:is_valid() then
+ lua_util.debugm(M, "cannot convert %s to IP", inp)
+ return nil
+ end
+
+ if ip:get_version() == 4 then
+ local mask = tonumber(args[1])
+
+ return ip:apply_mask(mask):to_string(), 'string'
+ else
+ -- IPv6 takes the second argument or the first one...
+ local mask_str = args[2] or args[1]
+ local mask = tonumber(mask_str)
+
+ return ip:apply_mask(mask):to_string(), 'string'
+ end
+ end,
+ ['description'] = 'Applies mask to IP address.' ..
+ ' The first argument is the mask for IPv4 addresses, the second is the mask for IPv6 addresses.',
+ ['args_schema'] = { (ts.number + ts.string / tonumber),
+ (ts.number + ts.string / tonumber):is_optional() }
+ },
+ -- Returns the string(s) with all non ascii chars replaced
+ ['to_ascii'] = {
+ ['types'] = {
+ ['string'] = true,
+ ['list'] = true,
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ if type(inp) == 'table' then
+ return fun.map(
+ function(s)
+ return string.gsub(tostring(s), '[\128-\255]', args[1] or '?')
+ end, inp), 'string_list'
+ else
+ return string.gsub(tostring(inp), '[\128-\255]', '?'), 'string'
+ end
+ end,
+ ['description'] = 'Returns the string with all non-ascii bytes replaced with the character ' ..
+ 'given as second argument or `?`',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Extracts tld from a hostname
+ ['get_tld'] = {
+ ['types'] = {
+ ['string'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, _)
+ return rspamd_util.get_tld(inp), 'string'
+ end,
+ ['description'] = 'Extracts tld from a hostname represented as a string',
+ ['args_schema'] = {}
+ },
+ -- Converts list of strings to numbers and returns a packed string
+ ['pack_numbers'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['map_type'] = 'string',
+ ['process'] = function(inp, _, args)
+ local fmt = args[1] or 'f'
+ local res = {}
+ for _, s in ipairs(inp) do
+ table.insert(res, tonumber(s))
+ end
+ return rspamd_util.pack(string.rep(fmt, #res), lua_util.unpack(res)), 'string'
+ end,
+ ['description'] = 'Converts a list of strings to numbers & returns a packed string',
+ ['args_schema'] = { ts.string:is_optional() }
+ },
+ -- Filter nils from a list
+ ['filter_string_nils'] = {
+ ['types'] = {
+ ['string_list'] = true
+ },
+ ['process'] = function(inp, _, _)
+ return fun.filter(function(val)
+ return type(val) == 'string' and val ~= 'nil'
+ end, inp), 'string_list'
+ end,
+ ['description'] = 'Removes all nils from a list of strings (when converted implicitly)',
+ ['args_schema'] = {}
+ },
+ -- Call a set of methods on a userdata object
+ ['apply_methods'] = {
+ ['types'] = {
+ ['userdata'] = true,
+ },
+ ['process'] = function(inp, _, args)
+ local res = {}
+ for _, arg in ipairs(args) do
+ local meth = inp[arg]
+ local ret = meth(inp)
+ if ret then
+ table.insert(res, tostring(ret))
+ end
+ end
+ return res, 'string_list'
+ end,
+ ['description'] = 'Apply a list of method calls to the userdata object',
+ },
+ -- Apply method to list of userdata and use it as a filter, excluding elements for which method returns false/nil
+ ['filter_method'] = {
+ ['types'] = {
+ ['userdata_list'] = true
+ },
+ ['process'] = function(inp, t, args)
+ local meth = args[1]
+
+ if not meth then
+ logger.errx('invalid method name: %s', args[1])
+ return nil
+ end
+
+ return fun.filter(function(val)
+ return val[meth](val)
+ end, inp), 'userdata_list'
+ end,
+ ['description'] = 'Apply method to list of userdata and use it as a filter,' ..
+ ' excluding elements for which method returns false/nil',
+ ['args_schema'] = { ts.string }
+ },
+}
+
+transform_function.match = transform_function.regexp
+
+return transform_function