diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /lualib/lua_mime.lua | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lualib/lua_mime.lua')
-rw-r--r-- | lualib/lua_mime.lua | 760 |
1 files changed, 760 insertions, 0 deletions
diff --git a/lualib/lua_mime.lua b/lualib/lua_mime.lua new file mode 100644 index 0000000..0f5aa75 --- /dev/null +++ b/lualib/lua_mime.lua @@ -0,0 +1,760 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]]-- + +--[[[ +-- @module lua_mime +-- This module contains helper functions to modify mime parts +--]] + +local logger = require "rspamd_logger" +local rspamd_util = require "rspamd_util" +local rspamd_text = require "rspamd_text" +local ucl = require "ucl" + +local exports = {} + +local function newline(task) + local t = task:get_newlines_type() + + if t == 'cr' then + return '\r' + elseif t == 'lf' then + return '\n' + end + + return '\r\n' +end + +local function do_append_footer(task, part, footer, is_multipart, out, state) + local tp = part:get_text() + local ct = 'text/plain' + local cte = 'quoted-printable' + local newline_s = state.newline_s + + if tp:is_html() then + ct = 'text/html' + end + + local encode_func = function(input) + return rspamd_util.encode_qp(input, 80, task:get_newlines_type()) + end + + if part:get_cte() == '7bit' then + cte = '7bit' + encode_func = function(input) + if type(input) == 'userdata' then + return input + else + return rspamd_text.fromstring(input) + end + end + end + + if is_multipart then + out[#out + 1] = string.format('Content-Type: %s; charset=utf-8%s' .. + 'Content-Transfer-Encoding: %s', + ct, newline_s, cte) + out[#out + 1] = '' + else + state.new_cte = cte + end + + local content = tp:get_content('raw_utf') or '' + local double_nline = newline_s .. newline_s + local nlen = #double_nline + -- Hack, if part ends with 2 newline, then we append it after footer + if content:sub(-(nlen), nlen + 1) == double_nline then + -- content without last newline + content = content:sub(-(#newline_s), #newline_s + 1) .. footer + out[#out + 1] = { encode_func(content), true } + out[#out + 1] = '' + else + content = content .. footer + out[#out + 1] = { encode_func(content), true } + out[#out + 1] = '' + end + +end + +--[[[ +-- @function lua_mime.add_text_footer(task, html_footer, text_footer) +-- Adds a footer to all text parts in a message. It returns a table with the following +-- fields: +-- * out: new content (body only) +-- * need_rewrite_ct: boolean field that means if we must rewrite content type +-- * new_ct: new content type (type => string, subtype => string) +-- * new_cte: new content-transfer encoding (string) +--]] +exports.add_text_footer = function(task, html_footer, text_footer) + local newline_s = newline(task) + local state = { + newline_s = newline_s + } + local out = {} + local text_parts = task:get_text_parts() + + if not (html_footer or text_footer) or not (text_parts and #text_parts > 0) then + return false + end + + if html_footer or text_footer then + -- We need to take extra care about content-type and cte + local ct = task:get_header('Content-Type') + if ct then + ct = rspamd_util.parse_content_type(ct, task:get_mempool()) + end + + if ct then + if ct.type and ct.type == 'text' then + if ct.subtype then + if html_footer and (ct.subtype == 'html' or ct.subtype == 'htm') then + state.need_rewrite_ct = true + elseif text_footer and ct.subtype == 'plain' then + state.need_rewrite_ct = true + end + else + if text_footer then + state.need_rewrite_ct = true + end + end + + state.new_ct = ct + end + else + + if text_parts then + + if #text_parts == 1 then + state.need_rewrite_ct = true + state.new_ct = { + type = 'text', + subtype = 'plain' + } + elseif #text_parts > 1 then + -- XXX: in fact, it cannot be + state.new_ct = { + type = 'multipart', + subtype = 'mixed' + } + end + end + end + end + + local boundaries = {} + local cur_boundary + for _, part in ipairs(task:get_parts()) do + local boundary = part:get_boundary() + if part:is_multipart() then + if cur_boundary then + out[#out + 1] = string.format('--%s', + boundaries[#boundaries]) + end + + boundaries[#boundaries + 1] = boundary or '--XXX' + cur_boundary = boundary + + local rh = part:get_raw_headers() + if #rh > 0 then + out[#out + 1] = { rh, true } + end + elseif part:is_message() then + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = string.format('--%s--%s', + boundaries[#boundaries], newline_s) + table.remove(boundaries) + cur_boundary = nil + end + out[#out + 1] = string.format('--%s', + boundary) + end + + out[#out + 1] = { part:get_raw_headers(), true } + else + local append_footer = false + local skip_footer = part:is_attachment() + + local parent = part:get_parent() + if parent then + local t, st = parent:get_type() + + if t == 'multipart' and st == 'signed' then + -- Do not modify signed parts + skip_footer = true + end + end + if text_footer and part:is_text() then + local tp = part:get_text() + + if not tp:is_html() then + append_footer = text_footer + end + end + + if html_footer and part:is_text() then + local tp = part:get_text() + + if tp:is_html() then + append_footer = html_footer + end + end + + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = string.format('--%s--%s', + boundaries[#boundaries], newline_s) + table.remove(boundaries) + cur_boundary = boundary + end + out[#out + 1] = string.format('--%s', + boundary) + end + + if append_footer and not skip_footer then + do_append_footer(task, part, append_footer, + parent and parent:is_multipart(), out, state) + else + out[#out + 1] = { part:get_raw_headers(), true } + out[#out + 1] = { part:get_raw_content(), false } + end + end + end + + -- Close remaining + local b = table.remove(boundaries) + while b do + out[#out + 1] = string.format('--%s--', b) + if #boundaries > 0 then + out[#out + 1] = '' + end + b = table.remove(boundaries) + end + + state.out = out + + return state +end + +local function do_replacement (task, part, mp, replacements, + is_multipart, out, state) + + local tp = part:get_text() + local ct = 'text/plain' + local cte = 'quoted-printable' + local newline_s = state.newline_s + + if tp:is_html() then + ct = 'text/html' + end + + local encode_func = function(input) + return rspamd_util.encode_qp(input, 80, task:get_newlines_type()) + end + + if part:get_cte() == '7bit' then + cte = '7bit' + encode_func = function(input) + if type(input) == 'userdata' then + return input + else + return rspamd_text.fromstring(input) + end + end + end + + local content = tp:get_content('raw_utf') or rspamd_text.fromstring('') + local match_pos = mp:match(content, true) + + if match_pos then + -- sort matches and form the table: + -- start .. end for inclusion position + local matches_flattened = {} + for npat, matches in pairs(match_pos) do + for _, m in ipairs(matches) do + table.insert(matches_flattened, { m, npat }) + end + end + + -- Handle the case of empty match + if #matches_flattened == 0 then + out[#out + 1] = { part:get_raw_headers(), true } + out[#out + 1] = { part:get_raw_content(), false } + + return + end + + if is_multipart then + out[#out + 1] = { string.format('Content-Type: %s; charset="utf-8"%s' .. + 'Content-Transfer-Encoding: %s', + ct, newline_s, cte), true } + out[#out + 1] = { '', true } + else + state.new_cte = cte + end + + state.has_matches = true + -- now sort flattened by start of match and eliminate all overlaps + table.sort(matches_flattened, function(m1, m2) + return m1[1][1] < m2[1][1] + end) + + for i = 1, #matches_flattened - 1 do + local st = matches_flattened[i][1][1] -- current start of match + local e = matches_flattened[i][1][2] -- current end of match + local max_npat = matches_flattened[i][2] + for j = i + 1, #matches_flattened do + if matches_flattened[j][1][1] == st then + -- overlap + if matches_flattened[j][1][2] > e then + -- larger exclusion and switch replacement + e = matches_flattened[j][1][2] + max_npat = matches_flattened[j][2] + end + else + break + end + end + -- Maximum overlap for all matches + for j = i, #matches_flattened do + if matches_flattened[j][1][1] == st then + if e > matches_flattened[j][1][2] then + matches_flattened[j][1][2] = e + matches_flattened[j][2] = max_npat + end + else + break + end + end + end + -- Off-by one: match returns 0 based positions while we use 1 based in Lua + for _, m in ipairs(matches_flattened) do + m[1][1] = m[1][1] + 1 + m[1][2] = m[1][2] + 1 + end + + -- Now flattened match table is sorted by start pos and has the maximum overlapped pattern + -- Matches with the same start and end are covering the same replacement + -- e.g. we had something like [1 .. 2] -> replacement 1 and [1 .. 4] -> replacement 2 + -- after flattening we should have [1 .. 4] -> 2 and [1 .. 4] -> 2 + -- we can safely ignore those duplicates in the following code + + local cur_start = 1 + local fragments = {} + for _, m in ipairs(matches_flattened) do + if m[1][1] >= cur_start then + fragments[#fragments + 1] = content:sub(cur_start, m[1][1] - 1) + fragments[#fragments + 1] = replacements[m[2]] + cur_start = m[1][2] -- end of match + end + end + + -- last part + if cur_start < #content then + fragments[#fragments + 1] = content:span(cur_start) + end + + -- Final stuff + out[#out + 1] = { encode_func(rspamd_text.fromtable(fragments)), false } + else + -- No matches + out[#out + 1] = { part:get_raw_headers(), true } + out[#out + 1] = { part:get_raw_content(), false } + end +end + +--[[[ +-- @function lua_mime.multipattern_text_replace(task, mp, replacements) +-- Replaces text according to multipattern matches. It returns a table with the following +-- fields: +-- * out: new content (body only) +-- * need_rewrite_ct: boolean field that means if we must rewrite content type +-- * new_ct: new content type (type => string, subtype => string) +-- * new_cte: new content-transfer encoding (string) +--]] +exports.multipattern_text_replace = function(task, mp, replacements) + local newline_s = newline(task) + local state = { + newline_s = newline_s + } + local out = {} + local text_parts = task:get_text_parts() + + if not mp or not (text_parts and #text_parts > 0) then + return false + end + + -- We need to take extra care about content-type and cte + local ct = task:get_header('Content-Type') + if ct then + ct = rspamd_util.parse_content_type(ct, task:get_mempool()) + end + + if ct then + if ct.type and ct.type == 'text' then + state.need_rewrite_ct = true + state.new_ct = ct + end + else + -- No explicit CT, need to guess + if text_parts then + if #text_parts == 1 then + state.need_rewrite_ct = true + state.new_ct = { + type = 'text', + subtype = 'plain' + } + elseif #text_parts > 1 then + -- XXX: in fact, it cannot be + state.new_ct = { + type = 'multipart', + subtype = 'mixed' + } + end + end + end + + local boundaries = {} + local cur_boundary + for _, part in ipairs(task:get_parts()) do + local boundary = part:get_boundary() + if part:is_multipart() then + if cur_boundary then + out[#out + 1] = { string.format('--%s', + boundaries[#boundaries]), true } + end + + boundaries[#boundaries + 1] = boundary or '--XXX' + cur_boundary = boundary + + local rh = part:get_raw_headers() + if #rh > 0 then + out[#out + 1] = { rh, true } + end + elseif part:is_message() then + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = { string.format('--%s--', + boundaries[#boundaries]), true } + table.remove(boundaries) + cur_boundary = nil + end + out[#out + 1] = { string.format('--%s', + boundary), true } + end + + out[#out + 1] = { part:get_raw_headers(), true } + else + local skip_replacement = part:is_attachment() + + local parent = part:get_parent() + if parent then + local t, st = parent:get_type() + + if t == 'multipart' and st == 'signed' then + -- Do not modify signed parts + skip_replacement = true + end + end + if not part:is_text() then + skip_replacement = true + end + + if boundary then + if cur_boundary and boundary ~= cur_boundary then + -- Need to close boundary + out[#out + 1] = { string.format('--%s--', + boundaries[#boundaries]), true } + table.remove(boundaries) + cur_boundary = boundary + end + out[#out + 1] = { string.format('--%s', + boundary), true } + end + + if not skip_replacement then + do_replacement(task, part, mp, replacements, + parent and parent:is_multipart(), out, state) + else + -- Append as is + out[#out + 1] = { part:get_raw_headers(), true } + out[#out + 1] = { part:get_raw_content(), false } + end + end + end + + -- Close remaining + local b = table.remove(boundaries) + while b do + out[#out + 1] = { string.format('--%s--', b), true } + if #boundaries > 0 then + out[#out + 1] = { '', true } + end + b = table.remove(boundaries) + end + + state.out = out + + return state +end + +--[[[ +-- @function lua_mime.modify_headers(task, {add = {hname = {value = 'value', order = 1}}, remove = {hname = {1,2}}}) +-- Adds/removes headers both internal and in the milter reply +-- Mode defines to be compatible with Rspamd <=3.2 and is the default (equal to 'compat') +--]] +exports.modify_headers = function(task, hdr_alterations, mode) + -- Assume default mode compatibility + if not mode then + mode = 'compat' + end + local add = hdr_alterations.add or {} + local remove = hdr_alterations.remove or {} + + local add_headers = {} -- For Milter reply + local hdr_flattened = {} -- For C API + + local function flatten_add_header(hname, hdr) + if not add_headers[hname] then + add_headers[hname] = {} + end + if not hdr_flattened[hname] then + hdr_flattened[hname] = { add = {} } + end + local add_tbl = hdr_flattened[hname].add + if hdr.value then + table.insert(add_headers[hname], { + order = (tonumber(hdr.order) or -1), + value = hdr.value, + }) + table.insert(add_tbl, { tonumber(hdr.order) or -1, hdr.value }) + elseif type(hdr) == 'table' then + for _, v in ipairs(hdr) do + flatten_add_header(hname, v) + end + elseif type(hdr) == 'string' then + table.insert(add_headers[hname], { + order = -1, + value = hdr, + }) + table.insert(add_tbl, { -1, hdr }) + else + logger.errx(task, 'invalid modification of header: %s', hdr) + end + + if mode == 'compat' and #add_headers[hname] == 1 then + -- Switch to the compatibility mode + add_headers[hname] = add_headers[hname][1] + end + end + if hdr_alterations.order then + -- Get headers alterations ordered + for _, hname in ipairs(hdr_alterations.order) do + flatten_add_header(hname, add[hname]) + end + else + for hname, hdr in pairs(add) do + flatten_add_header(hname, hdr) + end + end + + for hname, hdr in pairs(remove) do + if not hdr_flattened[hname] then + hdr_flattened[hname] = { remove = {} } + end + if not hdr_flattened[hname].remove then + hdr_flattened[hname].remove = {} + end + local remove_tbl = hdr_flattened[hname].remove + if type(hdr) == 'number' then + table.insert(remove_tbl, hdr) + else + for _, num in ipairs(hdr) do + table.insert(remove_tbl, num) + end + end + end + + if mode == 'compat' then + -- Clear empty alterations in the compat mode + if add_headers and not next(add_headers) then + add_headers = nil + end + if hdr_alterations.remove and not next(hdr_alterations.remove) then + hdr_alterations.remove = nil + end + end + task:set_milter_reply({ + add_headers = add_headers, + remove_headers = hdr_alterations.remove + }) + + for hname, flat_rules in pairs(hdr_flattened) do + task:modify_header(hname, flat_rules) + end +end + +--[[[ +-- @function lua_mime.message_to_ucl(task, [stringify_content]) +-- Exports a message to an ucl object +--]] +exports.message_to_ucl = function(task, stringify_content) + local E = {} + + local maybe_stringify_f = stringify_content and + tostring or function(t) + return t + end + local result = { + size = task:get_size(), + digest = task:get_digest(), + newlines = task:get_newlines_type(), + headers = task:get_headers(true) + } + + -- Utility to convert ip addr to a string or nil if invalid/absent + local function maybe_stringify_ip(addr) + if addr and addr:is_valid() then + return addr:to_string() + end + + return nil + end + + -- Envelope (smtp) information from email (nil if empty) + result.envelope = { + from_smtp = (task:get_from('smtp') or E)[1], + recipients_smtp = task:get_recipients('smtp'), + helo = task:get_helo(), + hostname = task:get_hostname(), + client_ip = maybe_stringify_ip(task:get_client_ip()), + from_ip = maybe_stringify_ip(task:get_from_ip()), + } + if not next(result.envelope) then + result.envelope = ucl.null + end + + local parts = task:get_parts() or E + result.parts = {} + for _, part in ipairs(parts) do + if not part:is_multipart() and not part:is_message() then + local p = { + size = part:get_length(), + type = string.format('%s/%s', part:get_type()), + detected_type = string.format('%s/%s', part:get_detected_type()), + filename = part:get_filename(), + content = maybe_stringify_f(part:get_content()), + headers = part:get_headers(true) or E, + boundary = part:get_enclosing_boundary(), + } + table.insert(result.parts, p) + else + -- Service part: multipart container or message/rfc822 + local p = { + type = string.format('%s/%s', part:get_type()), + headers = part:get_headers(true) or E, + boundary = part:get_enclosing_boundary(), + size = 0, + } + + if part:is_multipart() then + p.multipart_boundary = part:get_boundary() + end + + table.insert(result.parts, p) + end + end + + return result +end + +--[[[ +-- @function lua_mime.message_to_ucl_schema() +-- Returns schema for a message to verify result/document fields +--]] +exports.message_to_ucl_schema = function() + local ts = require("tableshape").types + + local function headers_schema() + return ts.shape { + order = ts.integer:describe('Header order in a message'), + raw = ts.string:describe('Raw header value'):is_optional(), + empty_separator = ts.boolean:describe('Whether header has an empty separator'), + separator = ts.string:describe('Separator between a header and a value'), + decoded = ts.string:describe('Decoded value'):is_optional(), + value = ts.string:describe('Decoded value'):is_optional(), + name = ts.string:describe('Header name'), + tab_separated = ts.boolean:describe('Whether header has tab as a separator') + } + end + + local function part_schema() + return ts.shape { + content = ts.string:describe('Decoded content'):is_optional(), + multipart_boundary = ts.string:describe('Multipart service boundary'):is_optional(), + size = ts.integer:describe('Size of the part'), + type = ts.string:describe('Announced type'):is_optional(), + detected_type = ts.string:describe('Detected type'):is_optional(), + boundary = ts.string:describe('Eclosing boundary'):is_optional(), + filename = ts.string:describe('File name for attachments'):is_optional(), + headers = ts.array_of(headers_schema()):describe('Part headers'), + } + end + + local function email_addr_schema() + return ts.shape { + addr = ts.string:describe('Parsed address'):is_optional(), + raw = ts.string:describe('Raw address'), + flags = ts.shape { + valid = ts.boolean:describe('Valid address'):is_optional(), + ip = ts.boolean:describe('IP like address'):is_optional(), + braced = ts.boolean:describe('Have braces around address'):is_optional(), + quoted = ts.boolean:describe('Have quotes around address'):is_optional(), + empty = ts.boolean:describe('Empty address'):is_optional(), + backslash = ts.boolean:describe('Backslash in address'):is_optional(), + ['8bit'] = ts.boolean:describe('8 bit characters in address'):is_optional(), + }, + user = ts.string:describe('Parsed user part'):is_optional(), + name = ts.string:describe('Displayed name'):is_optional(), + domain = ts.string:describe('Parsed domain part'):is_optional(), + } + end + local function envelope_schema() + return ts.shape { + from_smtp = email_addr_schema():describe('SMTP from'):is_optional(), + recipients_smtp = ts.array_of(email_addr_schema()):describe('SMTP recipients'):is_optional(), + helo = ts.string:describe('SMTP Helo'):is_optional(), + hostname = ts.string:describe('Sender hostname'):is_optional(), + client_ip = ts.string:describe('Client ip'):is_optional(), + from_ip = ts.string:describe('Sender ip'):is_optional(), + } + end + + return ts.shape { + headers = ts.array_of(headers_schema()), + parts = ts.array_of(part_schema()), + digest = ts.pattern(string.format('^%s$', string.rep('%x', 32))) + :describe('Message digest'), + newlines = ts.one_of({ "cr", "lf", "crlf" }):describe('Newlines type'), + size = ts.integer:describe('Size of the message in bytes'), + envelope = envelope_schema() + } +end + +return exports |