diff options
Diffstat (limited to 'lualib/lua_maps.lua')
-rw-r--r-- | lualib/lua_maps.lua | 612 |
1 files changed, 612 insertions, 0 deletions
diff --git a/lualib/lua_maps.lua b/lualib/lua_maps.lua new file mode 100644 index 0000000..d357310 --- /dev/null +++ b/lualib/lua_maps.lua @@ -0,0 +1,612 @@ +--[[[ +-- @module lua_maps +-- This module contains helper functions for managing rspamd maps +--]] + +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +local rspamd_logger = require "rspamd_logger" +local ts = require("tableshape").types +local lua_util = require "lua_util" + +local exports = {} + +local maps_cache = {} + +local function map_hash_key(data, mtype) + local hash = require "rspamd_cryptobox_hash" + local st = hash.create_specific('xxh64') + st:update(data) + st:update(mtype) + + return st:hex() +end + +local function starts(where, st) + return string.sub(where, 1, string.len(st)) == st +end + +local function cut_prefix(where, st) + return string.sub(where, #st + 1) +end + +local function maybe_adjust_type(data, mtype) + local function check_prefix(prefix, t) + if starts(data, prefix) then + data = cut_prefix(data, prefix) + mtype = t + + return true + end + + return false + end + + local known_types = { + { 'regexp;', 'regexp' }, + { 're;', 'regexp' }, + { 'regexp_multi;', 'regexp_multi' }, + { 're_multi;', 'regexp_multi' }, + { 'glob;', 'glob' }, + { 'glob_multi;', 'glob_multi' }, + { 'radix;', 'radix' }, + { 'ipnet;', 'radix' }, + { 'set;', 'set' }, + { 'hash;', 'hash' }, + { 'plain;', 'hash' }, + { 'cdb;', 'cdb' }, + { 'cdb:/', 'cdb' }, + } + + if mtype == 'callback' then + return mtype + end + + for _, t in ipairs(known_types) do + if check_prefix(t[1], t[2]) then + return data, mtype + end + end + + -- No change + return data, mtype +end + +local external_map_schema = ts.shape { + external = ts.equivalent(true), -- must be true + backend = ts.string, -- where to get data, required + method = ts.one_of { "body", "header", "query" }, -- how to pass input + encode = ts.one_of { "json", "messagepack" }:is_optional(), -- how to encode input (if relevant) + timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(), +} + +local rspamd_http = require "rspamd_http" +local ucl = require "ucl" + +local function url_encode_string(str) + str = string.gsub(str, "([^%w _%%%-%.~])", + function(c) + return string.format("%%%02X", string.byte(c)) + end) + str = string.gsub(str, " ", "+") + return str +end + +assert(url_encode_string('上海+中國') == '%E4%B8%8A%E6%B5%B7%2B%E4%B8%AD%E5%9C%8B') +assert(url_encode_string('? and the Mysterians') == '%3F+and+the+Mysterians') + +local function query_external_map(map_config, upstreams, key, callback, task) + local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET' + local upstream = upstreams:get_upstream_round_robin() + local http_headers = { + ['Accept'] = '*/*' + } + local http_body = nil + local url = map_config.backend + + if type(key) == 'string' or type(key) == 'userdata' then + if map_config.method == 'body' then + http_body = key + http_headers['Content-Type'] = 'text/plain' + elseif map_config.method == 'header' then + http_headers = { + key = key + } + elseif map_config.method == 'query' then + url = string.format('%s?key=%s', url, url_encode_string(tostring(key))) + end + elseif type(key) == 'table' then + if map_config.method == 'body' then + if map_config.encode == 'json' then + http_body = ucl.to_format(key, 'json-compact', true) + http_headers['Content-Type'] = 'application/json' + elseif map_config.encode == 'messagepack' then + http_body = ucl.to_format(key, 'messagepack', true) + http_headers['Content-Type'] = 'application/msgpack' + else + local caller = debug.getinfo(2) or {} + rspamd_logger.errx(task, + "requested external map key with a wrong combination body method and missing encode; caller: %s:%s", + caller.short_src, caller.currentline) + callback(false, 'invalid map usage', 500, task) + end + else + -- query/header and no encode + if map_config.method == 'query' then + local params_table = {} + for k, v in pairs(key) do + if type(v) == 'string' then + table.insert(params_table, string.format('%s=%s', url_encode_string(k), url_encode_string(v))) + end + end + url = string.format('%s?%s', url, table.concat(params_table, '&')) + elseif map_config.method == 'header' then + http_headers = key + else + local caller = debug.getinfo(2) or {} + rspamd_logger.errx(task, + "requested external map key with a wrong combination of encode and input; caller: %s:%s", + caller.short_src, caller.currentline) + callback(false, 'invalid map usage', 500, task) + return + end + end + end + + local function map_callback(err, code, body, _) + if err then + callback(false, err, code, task) + elseif code == 200 then + callback(true, body, 200, task) + else + callback(false, err, code, task) + end + end + + local ret = rspamd_http.request { + task = task, + url = url, + callback = map_callback, + timeout = map_config.timeout or 1.0, + keepalive = true, + upstream = upstream, + method = http_method, + headers = http_headers, + body = http_body, + } + + if not ret then + callback(false, 'http request error', 500, task) + end +end + +--[[[ +-- @function lua_maps.map_add_from_ucl(opt, mtype, description) +-- Creates a map from static data +-- Returns true if map was added or nil +-- @param {string or table} opt data for map (or URL) +-- @param {string} mtype type of map (`set`, `map`, `radix`, `regexp`) +-- @param {string} description human-readable description of map +-- @param {function} callback optional callback that will be called on map match (required for external maps) +-- @return {bool} true on success, or `nil` +--]] +local function rspamd_map_add_from_ucl(opt, mtype, description, callback) + local ret = { + get_key = function(t, k, key_callback, task) + if t.__data then + local cb = key_callback or callback + if t.__external then + if not cb or not task then + local caller = debug.getinfo(2) or {} + rspamd_logger.errx(rspamd_config, "requested external map key without callback or task; caller: %s:%s", + caller.short_src, caller.currentline) + return nil + end + query_external_map(t.__data, t.__upstreams, k, cb, task) + else + local result = t.__data:get_key(k) + if cb then + if result then + cb(true, result, 200, task) + else + cb(false, 'not found', 404, task) + end + else + return result + end + end + end + + return nil + end, + foreach = function(t, cb) + return t.__data:foreach(cb) + end, + on_load = function(t, cb) + t.__data:on_load(cb) + end + } + local ret_mt = { + __index = function(t, k, key_callback, task) + if t.__data then + return t.get_key(k, key_callback, task) + end + + return nil + end + } + + if not opt then + return nil + end + + local function maybe_register_selector() + if opt.selector_alias then + local lua_selectors = require "lua_selectors" + lua_selectors.add_map(opt.selector_alias, ret) + end + end + + if type(opt) == 'string' then + opt, mtype = maybe_adjust_type(opt, mtype) + local cache_key = map_hash_key(opt, mtype) + if not callback and maps_cache[cache_key] then + rspamd_logger.infox(rspamd_config, 'reuse url for %s(%s)', + opt, mtype) + + return maps_cache[cache_key] + end + -- We have a single string, so we treat it as a map + local map = rspamd_config:add_map { + type = mtype, + description = description, + url = opt, + } + + if map then + ret.__data = map + ret.hash = cache_key + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + return ret + end + elseif type(opt) == 'table' then + local cache_key = lua_util.table_digest(opt) + if not callback and maps_cache[cache_key] then + rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s', + cache_key:sub(1, 8), description) + + return maps_cache[cache_key] + end + + if opt[1] then + -- Adjust each element if needed + local adjusted + for i, source in ipairs(opt) do + local nsrc, ntype = maybe_adjust_type(source, mtype) + + if mtype ~= ntype then + if not adjusted then + mtype = ntype + end + adjusted = true + end + opt[i] = nsrc + end + + if mtype == 'radix' then + + if string.find(opt[1], '^%d') then + local map = rspamd_config:radix_from_ucl(opt) + + if map then + ret.__data = map + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + end + else + -- Plain table + local map = rspamd_config:add_map { + type = mtype, + description = description, + url = opt, + } + if map then + ret.__data = map + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + end + end + elseif mtype == 'regexp' or mtype == 'glob' then + if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then + -- Plain table + local map = rspamd_config:add_map { + type = mtype, + description = description, + url = opt, + } + if map then + ret.__data = map + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + end + else + local map = rspamd_config:add_map { + type = mtype, + description = description, + url = { + url = 'static', + data = opt, + } + } + if map then + ret.__data = map + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + end + end + else + if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then + -- Plain table + local map = rspamd_config:add_map { + type = mtype, + description = description, + url = opt, + } + if map then + ret.__data = map + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + end + else + local data = {} + local nelts = 0 + -- Plain array of keys, count merely numeric elts + for _, elt in ipairs(opt) do + if type(elt) == 'string' then + -- Numeric table + if mtype == 'hash' then + -- Treat as KV pair + local pieces = lua_util.str_split(elt, ' ') + if #pieces > 1 then + local key = table.remove(pieces, 1) + data[key] = table.concat(pieces, ' ') + else + data[elt] = true + end + else + data[elt] = true + end + + nelts = nelts + 1 + end + end + + if nelts > 0 then + -- Plain Lua table that is used as a map + ret.__data = data + ret.get_key = function(t, k) + if k ~= '__data' then + return t.__data[k] + end + + return nil + end + ret.foreach = function(_, func) + for k, v in pairs(ret.__data) do + if not func(k, v) then + return false + end + end + + return true + end + ret.on_load = function(_, cb) + rspamd_config:add_on_load(function(_, _, _) + cb() + end) + end + + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + else + -- Empty map, huh? + rspamd_logger.errx(rspamd_config, 'invalid map element: %s', + opt) + end + end + end + else + if opt.external then + -- External map definition, missing fields are handled by schema + local parse_res, parse_err = external_map_schema(opt) + + if parse_res then + ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend) + if ret.__upstreams then + ret.__data = opt + ret.__external = true + setmetatable(ret, ret_mt) + maybe_register_selector() + + return ret + else + rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s', + opt.backend) + end + else + rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s', + parse_err) + end + else + -- Adjust lua specific augmentations in a trivial case + if type(opt.url) == 'string' then + local nsrc, ntype = maybe_adjust_type(opt.url, mtype) + if nsrc and ntype then + opt.url = nsrc + mtype = ntype + end + end + -- We have some non-trivial object so let C code to deal with it somehow... + local map = rspamd_config:add_map { + type = mtype, + description = description, + url = opt, + } + if map then + ret.__data = map + setmetatable(ret, ret_mt) + maps_cache[cache_key] = ret + maybe_register_selector() + + return ret + end + end + end -- opt[1] + end + + return nil +end + +--[[[ +-- @function lua_maps.map_add(mname, optname, mtype, description) +-- Creates a map from configuration elements (static data or URL) +-- Returns true if map was added or nil +-- @param {string} mname config section to use +-- @param {string} optname option name to use +-- @param {string} mtype type of map ('set', 'hash', 'radix', 'regexp', 'glob') +-- @param {string} description human-readable description of map +-- @param {function} callback optional callback that will be called on map match (required for external maps) +-- @return {bool} true on success, or `nil` +--]] + +local function rspamd_map_add(mname, optname, mtype, description, callback) + local opt = rspamd_config:get_module_opt(mname, optname) + + return rspamd_map_add_from_ucl(opt, mtype, description, callback) +end + +exports.rspamd_map_add = rspamd_map_add +exports.map_add = rspamd_map_add +exports.rspamd_map_add_from_ucl = rspamd_map_add_from_ucl +exports.map_add_from_ucl = rspamd_map_add_from_ucl + +-- Check `what` for being lua_map name, otherwise just compares key with what +local function rspamd_maybe_check_map(key, what) + local fun = require "fun" + + if type(what) == "table" then + return fun.any(function(elt) + return rspamd_maybe_check_map(key, elt) + end, what) + end + if type(rspamd_maps) == "table" then + local mn + if starts(key, "map:") then + mn = string.sub(key, 5) + elseif starts(key, "map://") then + mn = string.sub(key, 7) + end + + if mn and rspamd_maps[mn] then + return rspamd_maps[mn]:get_key(what) + end + end + + return what:lower() == key +end + +exports.rspamd_maybe_check_map = rspamd_maybe_check_map + +--[[[ +-- @function lua_maps.fill_config_maps(mname, options, defs) +-- Fill maps that could be defined in defs, from the config in the options +-- Defs is a table indexed by a map's parameter name and defining it's config, +-- @example +-- defs = { +-- my_map = { +-- type = 'map', +-- description = 'my cool map', +-- optional = true, +-- } +-- } +-- -- Then this function will look for opts.my_map parameter and try to replace it with +-- -- a map with the specific type, description but not failing if it was empty. +-- -- It will also set options.my_map_orig to the original value defined in the map. +--]] +exports.fill_config_maps = function(mname, opts, map_defs) + assert(type(opts) == 'table') + assert(type(map_defs) == 'table') + for k, v in pairs(map_defs) do + if opts[k] then + local map = rspamd_map_add_from_ucl(opts[k], v.type or 'map', v.description) + if not map then + rspamd_logger.errx(rspamd_config, 'map add error %s for module %s', k, mname) + return false + end + opts[k .. '_orig'] = opts[k] + opts[k] = map + elseif not v.optional then + rspamd_logger.errx(rspamd_config, 'cannot find non optional map %s for module %s', k, mname) + return false + end + end + + return true +end + +local direct_map_schema = ts.shape { -- complex object + name = ts.string:is_optional(), + description = ts.string:is_optional(), + selector_alias = ts.string:is_optional(), -- an optional alias for the selectos framework + timeout = ts.number, + data = ts.array_of(ts.string):is_optional(), + -- Tableshape has no options support for something like key1 or key2? + upstreams = ts.one_of { + ts.string, + ts.array_of(ts.string), + } :is_optional(), + url = ts.one_of { + ts.string, + ts.array_of(ts.string), + } :is_optional(), +} + +exports.map_schema = ts.one_of { + ts.string, -- 'http://some_map' + ts.array_of(ts.string), -- ['foo', 'bar'] + ts.one_of { direct_map_schema, external_map_schema } +} + +return exports |