diff options
Diffstat (limited to 'test/lua/unit')
37 files changed, 4102 insertions, 0 deletions
diff --git a/test/lua/unit/addr.lua b/test/lua/unit/addr.lua new file mode 100644 index 0000000..6da72d3 --- /dev/null +++ b/test/lua/unit/addr.lua @@ -0,0 +1,46 @@ +-- inet addr tests + +context("Inet addr check functions", function() + local ffi = require("ffi") + + ffi.cdef[[ + typedef struct rspamd_inet_addr_s rspamd_inet_addr_t; + bool rspamd_parse_inet_address (rspamd_inet_addr_t **target, + const char *src, size_t len); + void rspamd_inet_address_free (rspamd_inet_addr_t *addr); + ]] + + local cases = { + {'192.168.1.1', true}, + {'2a01:4f8:190:43b5::99', true}, + {'256.1.1.1', false}, + {'/tmp/socket', true}, + {'./socket', true}, + {'[fe80::f919:8b26:ff93:3092%5]', true}, + {'[fe80::f919:8b26:ff93:3092]', true}, + {'IPv6:::1', true}, + {'IPv6:[::1]', true}, + {'IPv6[:::1]', false}, + {'[::]', true}, + {'[1::]', true}, + {'[000:01:02:003:004:5:6:007]', true}, -- leading zeros + {'[A:b:c:DE:fF:0:1:aC]', true}, -- mixed case + {'[::192.168.0.1]', true}, -- embedded ipv4 + {'[1:2:192.168.0.1:5:6]', false}, -- poor octets + {'[::ffff:192.1.2]', false}, -- ipv4 without last octet (maybe should be true?) + {'[0:0::0:0:8]', true}, -- bogus zeros + {'[::192.168.0.0.1]', false}, -- invalid mapping + } + + for i,c in ipairs(cases) do + test("Create inet addr from string " .. c[1] .. '; expect ' .. tostring(c[2]), function() + local ip = ffi.new("rspamd_inet_addr_t* [1]"); + local res = ffi.C.rspamd_parse_inet_address(ip, c[1], #c[1]) + assert_equal(res, c[2], "Expect " .. tostring(c[2]) .. " while parsing " .. c[1]) + if res then + ffi.C.rspamd_inet_address_free(ip[0]) + end + end) + + end +end)
\ No newline at end of file diff --git a/test/lua/unit/base32.lua b/test/lua/unit/base32.lua new file mode 100644 index 0000000..eb582f5 --- /dev/null +++ b/test/lua/unit/base32.lua @@ -0,0 +1,55 @@ +-- Test zbase32 encoding/decoding + +context("Base32 encodning", function() + local ffi = require("ffi") + ffi.cdef[[ + void ottery_rand_bytes(void *buf, size_t n); + unsigned ottery_rand_unsigned(void); + unsigned char* rspamd_decode_base32 (const char *in, size_t inlen, size_t *outlen, int how); + char * rspamd_encode_base32 (const unsigned char *in, size_t inlen, int how); + void g_free(void *ptr); + int memcmp(const void *a1, const void *a2, size_t len); + ]] + + local function random_buf(max_size) + local l = ffi.C.ottery_rand_unsigned() % max_size + 1 + local buf = ffi.new("unsigned char[?]", l) + ffi.C.ottery_rand_bytes(buf, l) + + return buf, l + end + + test("Base32 encode test", function() + local cases = { + {'test123', 'wm3g84fg13cy'}, + {'hello', 'em3ags7p'} + } + + for _,c in ipairs(cases) do + local b = ffi.C.rspamd_encode_base32(c[1], #c[1], 0) + local s = ffi.string(b) + ffi.C.g_free(b) + assert_equal(s, c[2], s .. " not equal " .. c[2]) + end + end) + + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + test("Base32 fuzz test: zbase32", function() + for i = 1,1000 do + local b, l = random_buf(4096) + local how = math.floor(math.random(3) - 1) + local ben = ffi.C.rspamd_encode_base32(b, l, how) + local bs = ffi.string(ben) + local nl = ffi.new("size_t [1]") + local nb = ffi.C.rspamd_decode_base32(bs, #bs, nl, how) + + assert_equal(tonumber(nl[0]), l, + string.format("invalid size reported: %d reported vs %d expected", tonumber(nl[0]), l)) + local cmp = ffi.C.memcmp(b, nb, l) + ffi.C.g_free(ben) + ffi.C.g_free(nb) + assert_equal(cmp, 0, "fuzz test failed for length: " .. tostring(l)) + end + end) + end +end)
\ No newline at end of file diff --git a/test/lua/unit/base64.lua b/test/lua/unit/base64.lua new file mode 100644 index 0000000..02948e2 --- /dev/null +++ b/test/lua/unit/base64.lua @@ -0,0 +1,194 @@ +context("Base64 encoding", function() + local ffi = require("ffi") + local util = require("rspamd_util") + local logger = require "rspamd_logger" + ffi.cdef[[ + void rspamd_cryptobox_init (void); + void ottery_rand_bytes(void *buf, size_t n); + unsigned ottery_rand_unsigned(void); + unsigned char* g_base64_decode (const char *in, size_t *outlen); + char * rspamd_encode_base64 (const unsigned char *in, size_t inlen, + size_t str_len, size_t *outlen); + void g_free(void *ptr); + int memcmp(const void *a1, const void *a2, size_t len); + double base64_test (bool generic, size_t niters, size_t len, size_t str_len); + double rspamd_get_ticks (int); + ]] + + ffi.C.rspamd_cryptobox_init() + + local function random_buf(max_size) + local l = ffi.C.ottery_rand_unsigned() % max_size + 1 + local buf = ffi.new("unsigned char[?]", l) + ffi.C.ottery_rand_bytes(buf, l) + + return buf, l + end + + local function random_safe_buf(max_size) + local l = ffi.C.ottery_rand_unsigned() % max_size + 1 + local buf = ffi.new("unsigned char[?]", l) + + for i = 0,l-1 do + buf[i] = ffi.C.ottery_rand_unsigned() % 20 + string.byte('A') + end + + buf[l - 1] = 0; + + return buf, l + end + + test("Base64 encode test", function() + local cases = { + {"", ""}, + {"f", "Zg=="}, + {"fo", "Zm8="}, + {"foo", "Zm9v"}, + {"foob", "Zm9vYg=="}, + {"fooba", "Zm9vYmE="}, + {"foobar", "Zm9vYmFy"}, + } + + local nl = ffi.new("size_t [1]") + for _,c in ipairs(cases) do + local b = ffi.C.rspamd_encode_base64(c[1], #c[1], 0, nl) + local s = ffi.string(b) + ffi.C.g_free(b) + assert_equal(s, c[2], s .. " not equal " .. c[2]) + end + end) + + test("Base64 decode test", function() + local cases = { + {"", ""}, + {"f", "Zg=="}, + {"fo", "Zm8="}, + {"foo", "Zm9v"}, + {"foob", "Zm9vYg=="}, + {"fooba", "Zm9vYmE="}, + {"foobar", "Zm9vYmFy"}, + } + + for _,c in ipairs(cases) do + local b = tostring(util.decode_base64(c[2])) + assert_equal(b, c[1], b .. " not equal " .. c[1]) + end + end) + + test("Base64 line split encode test", function() + local text = [[ +Man is distinguished, not only by his reason, but by this singular passion from +other animals, which is a lust of the mind, that by a perseverance of delight +in the continued and indefatigable generation of knowledge, exceeds the short +vehemence of any carnal pleasure.]] + local b64 = "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz\r\nIHNpbmd1bGFyIHBhc3Npb24gZnJvbQpvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg\r\ndGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodAppbiB0aGUgY29udGlu\r\ndWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo\r\nZSBzaG9ydAp2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=" + local nl = ffi.new("size_t [1]") + local b = ffi.C.rspamd_encode_base64(text, #text, 76, nl) + local cmp = ffi.C.memcmp(b, b64, nl[0]) + ffi.C.g_free(b) + assert_equal(cmp, 0) + end) + + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + test("Base64 fuzz test", function() + for i = 1,1000 do + local b, l = random_safe_buf(4096) + local lim = ffi.C.ottery_rand_unsigned() % 64 + 10 + local orig = ffi.string(b) + local ben = util.encode_base64(orig, lim) + local dec = util.decode_base64(ben) + assert_equal(orig, tostring(dec), "fuzz test failed for length: " .. #orig) + end + end) + test("Base64 fuzz test (ffi)", function() + for i = 1,1000 do + local b, l = random_buf(4096) + local nl = ffi.new("size_t [1]") + local lim = ffi.C.ottery_rand_unsigned() % 64 + 10 + local ben = ffi.C.rspamd_encode_base64(b, l, lim, nl) + local bs = ffi.string(ben) + local ol = ffi.new("size_t [1]") + local nb = ffi.C.g_base64_decode(ben, ol) + + local cmp = ffi.C.memcmp(b, nb, l) + ffi.C.g_free(ben) + ffi.C.g_free(nb) + assert_equal(cmp, 0, "fuzz test failed for length: " .. tostring(l)) + end + end) + + local speed_iters = 10000 + + local function perform_base64_speed_test(chunk, is_reference, line_len) + local ticks = ffi.C.base64_test(is_reference, speed_iters, chunk, line_len) + local what = 'Optimized' + if is_reference then + what = 'Reference' + end + logger.messagex("%s base64 %s chunk (%s line len): %s ticks per iter, %s ticks per byte", + what, chunk, line_len, + ticks / speed_iters, ticks / speed_iters / chunk) + + return 1 + end + test("Base64 test reference vectors 78", function() + local res = perform_base64_speed_test(78, true, 0) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 78", function() + local res = perform_base64_speed_test(78, false, 0) + assert_not_equal(res, 0) + end) + + test("Base64 test reference vectors 512", function() + local res = perform_base64_speed_test(512, true, 0) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 512", function() + local res = perform_base64_speed_test(512, false, 0) + assert_not_equal(res, 0) + end) + test("Base64 test reference vectors 512 (78 line len)", function() + local res = perform_base64_speed_test(512, true, 78) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 512 (78 line len)", function() + local res = perform_base64_speed_test(512, false, 78) + assert_not_equal(res, 0) + end) + + test("Base64 test reference vectors 1K", function() + local res = perform_base64_speed_test(1024, true, 0) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 1K", function() + local res = perform_base64_speed_test(1024, false, 0) + assert_not_equal(res, 0) + end) + test("Base64 test reference vectors 1K (78 line len)", function() + local res = perform_base64_speed_test(1024, true, 78) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 1K (78 line len)", function() + local res = perform_base64_speed_test(1024, false, 78) + assert_not_equal(res, 0) + end) + + test("Base64 test reference vectors 10K", function() + local res = perform_base64_speed_test(10 * 1024, true, 0) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 10K", function() + local res = perform_base64_speed_test(10 * 1024, false, 0) + assert_not_equal(res, 0) + end) + test("Base64 test reference vectors 10K (78 line len)", function() + local res = perform_base64_speed_test(10 * 1024, true, 78) + assert_not_equal(res, 0) + end) + test("Base64 test optimized vectors 10K (78 line len)", function() + local res = perform_base64_speed_test(10 * 1024, false, 78) + assert_not_equal(res, 0) + end) + end +end) diff --git a/test/lua/unit/compression.lua b/test/lua/unit/compression.lua new file mode 100644 index 0000000..d5c682c --- /dev/null +++ b/test/lua/unit/compression.lua @@ -0,0 +1,58 @@ +-- Compression unit tests + +context("Rspamd compression", function() + local rspamd_zstd = require "rspamd_zstd" + local rspamd_text = require "rspamd_text" + + test("Compressed can be decompressed", function() + local str = 'test' + local cctx = rspamd_zstd.compress_ctx() + local dctx = rspamd_zstd.decompress_ctx() + assert_rspamd_eq({actual = dctx:stream(cctx:stream(str, 'end')), + expect = rspamd_text.fromstring(str)}) + end) + test("Compressed concatenation can be decompressed", function() + local str = 'test' + local cctx = rspamd_zstd.compress_ctx() + local dctx = rspamd_zstd.decompress_ctx() + assert_rspamd_eq({actual = dctx:stream(cctx:stream(str) .. cctx:stream(str, 'end')), + expect = rspamd_text.fromstring(str .. str)}) + end) + + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + local sizes = {10, 100, 1000, 10000} + for _,sz in ipairs(sizes) do + test("Compressed fuzz size: " .. tostring(sz), function() + for _=1,1000 do + local rnd = rspamd_text.randombytes(sz) + local cctx = rspamd_zstd.compress_ctx() + local dctx = rspamd_zstd.decompress_ctx() + assert_rspamd_eq({actual = dctx:stream(cctx:stream(rnd, 'end')), + expect = rnd}) + end + end) + end + end + + test("Compressed chunks", function() + local cctx = rspamd_zstd.compress_ctx() + local tin = {} + local tout = {} + for i=1,1000 do + local rnd = rspamd_text.randombytes(i) + tin[#tin + 1] = rnd + end + for i=1,1000 do + local o + if i == 1000 then + o = cctx:stream(tin[i], 'end') + else + o = cctx:stream(tin[i]) + end + tout[#tout + 1] = o + end + local dctx = rspamd_zstd.decompress_ctx() + assert_rspamd_eq({actual = dctx:stream(rspamd_text.fromtable(tout)), + expect = rspamd_text.fromtable(tin)}) + end) +end)
\ No newline at end of file diff --git a/test/lua/unit/expressions.lua b/test/lua/unit/expressions.lua new file mode 100644 index 0000000..3d05685 --- /dev/null +++ b/test/lua/unit/expressions.lua @@ -0,0 +1,111 @@ +-- Expressions unit tests + +context("Rspamd expressions", function() + local rspamd_expression = require "rspamd_expression" + local rspamd_mempool = require "rspamd_mempool" + local rspamd_regexp = require "rspamd_regexp" + local split_re = rspamd_regexp.create('/\\s+|\\)|\\(/') + + local function parse_func(str) + -- extract token till the first space character + local token = str + local t = split_re:split(str) + if t then + token = t[1] + end + -- Return token name + return token + end + + local atoms = { + A = 1.0, + B = 0, + C = 1, + D = 0, + E = 1, + F = 0, + G = 0, + H = 0, + I = 0, + J = 0, + K = 0, + } + local function process_func(token, input) + + --print(token) + local t = input[token] + + return t + end + + local pool = rspamd_mempool.create() + + local cases = { + {'A & (!B | C)', '(A) (B) ! (C) | &'}, + {'A & B | !C', '(C) ! (A) (B) & |'}, + {'A & (B | !C)', '(A) (B) (C) ! | &'}, + {'A & B &', nil}, + -- Unbalanced braces + {'(((A))', nil}, + -- Balanced braces + {'(((A)))', '(A)'}, + -- Plus and comparison operators + {'A + B + C + D > 2', '(A) (B) (C) (D) +(4) 2 >'}, + -- Plus and logic operators + {'((A + B + C + D) > 2) & D', '(D) (A) (B) (C) (D) +(4) 2 > &'}, + -- Associativity + {'A | B | C & D & E', '(A) (B) (C) (D) (E) &(3) |(3)'}, + -- More associativity + {'1 | 0 & 0 | 0', '(1) (0) (0) (0) & |(3)'}, + {'(A) & (B) & ((C) | (D) | (E) | (F))', '(A) (B) (C) (D) (E) (F) |(4) &(3)' }, + -- Extra space + {'A & B | ! C', '(C) ! (A) (B) & |'}, + -- False minus + {'A + B + -C', '(A) (B) (-C) +(3)'}, + } + for _,c in ipairs(cases) do + test("Expression creation function: " .. c[1], function() + local expr,err = rspamd_expression.create(c[1], + {parse_func, process_func}, pool) + + if not c[2] then + assert_nil(expr, "Should not be able to parse " .. c[1]) + else + assert_not_nil(expr, "Cannot parse " .. c[1] .. '; error: ' .. (err or 'wut??')) + assert_equal(expr:to_string(), c[2], string.format("Evaluated expr to '%s', expected: '%s'", + expr:to_string(), c[2])) + end + end) + end + -- Expression is destroyed when the corresponding pool is destroyed + cases = { + {'(E) && ((B + B + B + B) >= 1)', 0}, + {'A & B | !C', 0}, + {'A & (!B | C)', 1}, + {'A + B + C + D + E + F >= 2', 1}, + {'((A + B + C + D) > 1) & F', 0}, + {'(A + B + C + D) > 1 && F || E', 1}, + {'(A + B + C + D) > 100 && F || !E', 0}, + {'F && ((A + B + C + D) > 1)', 0}, + {'(E) && ((B + B + B + B) >= 1)', 0}, + {'!!C', 1}, + {'(B) & (D) & ((G) | (H) | (I) | (A))', 0}, + {'A & C & (!D || !C || !E)', 1}, + {'A & C & !(D || C || E)', 0}, + {'A + B + C', 2}, + {'A * 2.0 + B + C', 3}, + {'A * 2.0 + B - C', 1}, + {'A / 2.0 + B - C', -0.5}, + } + for _,c in ipairs(cases) do + test("Expression process function: " .. c[1], function() + local expr,err = rspamd_expression.create(c[1], + {parse_func, process_func}, pool) + + assert_not_nil(expr, "Cannot parse " .. c[1] .. '; error: ' .. (err or 'wut??')) + res = expr:process(atoms) + assert_equal(res, c[2], string.format("Processed expr '%s'{%s} returned '%d', expected: '%d'", + expr:to_string(), c[1], res, c[2])) + end) + end +end) diff --git a/test/lua/unit/folding.lua b/test/lua/unit/folding.lua new file mode 100644 index 0000000..8a92384 --- /dev/null +++ b/test/lua/unit/folding.lua @@ -0,0 +1,66 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +]]-- + +context("Headers folding unit test", function() + local util = require("rspamd_util") + -- {header, value}, "expected result" + local cases = { + {{"test", "test"}, "test"}, + {{"test1", "_abc _def _ghi _fdjhfd _fhdjkfh _dkhkjd _fdjkf _dshfdks _fhdjfdkhfk _dshfds _fdsjk _fdkhfdks _fdsjf _dkf"}, + "_abc _def _ghi _fdjhfd _fhdjkfh _dkhkjd _fdjkf _dshfdks\r\n\t_fhdjfdkhfk _dshfds _fdsjk _fdkhfdks _fdsjf _dkf" + }, + {{"Test1", "_abc _def _ghi _fdjhfd _fhdjkfh _dkhaaaaaaaaaaakjdfdjkfdshfdksfhdjfdkhfkdshfdsfdsjkfdkhfdksfdsjf _dkf"}, + "_abc _def _ghi _fdjhfd _fhdjkfh\r\n\t_dkhaaaaaaaaaaakjdfdjkfdshfdksfhdjfdkhfkdshfdsfdsjkfdkhfdksfdsjf\r\n\t_dkf" + }, + {{"Content-Type", "multipart/mixed; boundary=\"---- =_NextPart_000_01BDBF1F.DA8F77EE\"hhhhhhhhhhhhhhhhhhhhhhhhh fjsdhfkjsd fhdjsfhkj"}, + "multipart/mixed; boundary=\"---- =_NextPart_000_01BDBF1F.DA8F77EE\"hhhhhhhhhhhhhhhhhhhhhhhhh\r\n\tfjsdhfkjsd fhdjsfhkj" + }, + {{"Content-Type", "multipart/mixed; boundary=\"---- =_NextPart_000_01BDBF1F.DA8F77EE\"hkjhgkfhgfhgf\"hfkjdhf fhjf fghjghf fdshjfhdsj\" hgjhgfjk"}, + "multipart/mixed; boundary=\"---- =_NextPart_000_01BDBF1F.DA8F77EE\"hkjhgkfhgfhgf\"hfkjdhf fhjf fghjghf fdshjfhdsj\" hgjhgfjk" + }, + {{"Content-Type", "Content-Type: multipart/mixed; boundary=\"---- =_NextPart_000_01BDBF1F.DA8F77EE\" abc def ghfdgfdsgj fdshfgfsdgfdsg hfsdgjfsdg fgsfgjsg"}, + "Content-Type: multipart/mixed; boundary=\"---- =_NextPart_000_01BDBF1F.DA8F77EE\" abc\r\n\tdef ghfdgfdsgj fdshfgfsdgfdsg hfsdgjfsdg fgsfgjsg" + }, + {{"X-Spam-Symbols", "Returnpath_BL2,HFILTER_FROM_BOUNCE,R_PARTS_DIFFER,R_IP_PBL,R_ONE_RCPT,R_googleredir,R_TO_SEEMS_AUTO,R_SPF_NEUTRAL,R_PRIORITY_3,RBL_SPAMHAUS_PBL,HFILTER_MID_NOT_FQDN,MISSING_CTE,R_HAS_URL,RBL_SPAMHAUS_CSS,RBL_SPAMHAUS_XBL,BAYES_SPAM,RECEIVED_RBL10", ','}, + "Returnpath_BL2,\r\n\tHFILTER_FROM_BOUNCE,\r\n\tR_PARTS_DIFFER,\r\n\tR_IP_PBL,\r\n\tR_ONE_RCPT,\r\n\tR_googleredir,\r\n\tR_TO_SEEMS_AUTO,\r\n\tR_SPF_NEUTRAL,\r\n\tR_PRIORITY_3,\r\n\tRBL_SPAMHAUS_PBL,\r\n\tHFILTER_MID_NOT_FQDN,\r\n\tMISSING_CTE,\r\n\tR_HAS_URL,\r\n\tRBL_SPAMHAUS_CSS,\r\n\tRBL_SPAMHAUS_XBL,\r\n\tBAYES_SPAM,\r\n\tRECEIVED_RBL10" + }, + } + local function escape_spaces(str) + str = string.gsub(str, '[\r\n]+', '<NL>') + str = string.gsub(str, '[ ]', '<SP>') + str = string.gsub(str, '[\t]', '<TB>') + + return str + end + for i,c in ipairs(cases) do + test("Headers folding: " .. i, function() + local fv = util.fold_header(c[1][1], c[1][2], 'crlf', c[1][3]) + assert_not_nil(fv) + assert_equal(fv, c[2], string.format("'%s' doesn't match with '%s'", + escape_spaces(c[2]), escape_spaces(fv))) + end) + end +end) diff --git a/test/lua/unit/fpconv.lua b/test/lua/unit/fpconv.lua new file mode 100644 index 0000000..e64626f --- /dev/null +++ b/test/lua/unit/fpconv.lua @@ -0,0 +1,97 @@ +-- fpconv tests + +context("Fpconv printf functions", function() + local ffi = require("ffi") + local niter_fuzz = 100000 + local function small_double() + return math.random() + end + local function large_double() + return math.random() * math.random(2^52) + end + local function huge_double() + return math.random(2^52) * math.random(2^52) + end + local function tiny_double() + return math.random() / math.random(2^52) + end + ffi.cdef[[ +int snprintf(char *str, size_t size, const char *format, ...); +long rspamd_snprintf(char *str, size_t size, const char *format, ...); +long rspamd_printf(const char *format, ...); +]] + local benchmarks = { + {'tiny fixed', small_double, '%f'}, + {'small fixed', tiny_double, '%f'}, + {'large fixed', large_double, '%.3f'}, + {'huge fixed', huge_double, '%.3f'}, + {'tiny scientific', small_double, '%g'}, + {'small scientific', tiny_double, '%g'}, + {'large scientific', large_double, '%g'}, + {'huge scientific', huge_double, '%g'}, + } + + local generic = { + {0, '%f', '0'}, + {0, '%.1f', '0.0'}, + {0, '%.2f', '0.00'}, + {0, '%.32f', '0.000000000000000000000000000'}, -- max + {0, '%.150f', '0.000000000000000000000000000'}, -- too large + {1/3, '%f', '0.3333333333333333'}, + {1/3, '%.1f', '0.3'}, + {1/3, '%.2f', '0.33'}, + {-1/3, '%.32f', '-0.333333333333333300000000000'}, + {-1/3, '%.150f', '-0.333333333333333300000000000'}, + {-3.6817595395344857e-68, '%f', '-3.6817595395344857e-68'}, + {3.5844466002796428e+298, '%f', '3.5844466002796428e+298'}, + {9223372036854775808, '%f', '9223372036854776000'}, -- 2^63 with precision lost + {2^50 + 0.2, '%f', '1125899906842624.3'}, -- 2^50 with precision lost + {2^50 + 0.2, '%.2f', '1125899906842624.30'}, -- 2^50 with precision lost + {-3.6817595395344857e-68, '%.3f', '-0.000'}, -- not enough precision + {3.5844466002796428e+298, '%.3f', '3.5844466002796428e+298'}, + {9223372036854775808, '%.3f', '9223372036854776000.000'}, -- 2^63 with precision lost + {math.huge, '%f', 'inf'}, + {-math.huge, '%f', '-inf'}, + {0.0/0.0, '%f', 'nan'}, + {math.huge, '%.1f', 'inf'}, + {-math.huge, '%.2f', '-inf'}, + {0.0/0.0, '%.3f', 'nan'}, + {math.huge, '%g', 'inf'}, + {-math.huge, '%g', '-inf'}, + {0.0/0.0, '%g', 'nan'}, + } + + local buf = ffi.new("char[64]") + local buf2 = ffi.new("char[64]") + + for i,c in ipairs(generic) do + test("Generic fp test fmt: " .. c[2] .. '; ' .. tostring(c[1]), function() + ffi.C.rspamd_snprintf(buf, 64, c[2], c[1]) + local sbuf = ffi.string(buf) + assert_equal(sbuf, c[3], c[3] .. " but test returned " .. sbuf) + end) + end + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + for _,c in ipairs(benchmarks) do + test("Fuzz fp test " .. c[1], function() + for _=1,niter_fuzz do + local sign = 1 + if math.random() > 0.5 then + sign = -1 + end + local d = c[2]() * sign + ffi.C.snprintf(buf, 64, c[3], d) + ffi.C.rspamd_snprintf(buf2, 64, c[3], d) + + local sbuf = ffi.string(buf) + local sbuf2 = ffi.string(buf2) + + assert_less_than(math.abs(d - tonumber(sbuf2))/math.abs(d), + 0.00001, + string.format('rspamd emitted: %s, libc emitted: %s, original number: %g', + sbuf2, sbuf, d)) + end + end) + end + end +end)
\ No newline at end of file diff --git a/test/lua/unit/html.lua b/test/lua/unit/html.lua new file mode 100644 index 0000000..81c52ec --- /dev/null +++ b/test/lua/unit/html.lua @@ -0,0 +1,113 @@ +context("HTML processing", function() + local rspamd_util = require("rspamd_util") + local logger = require("rspamd_logger") + local cases = { + -- Entities + {[[<html><body>.firebaseapp.com</body></html>]], + [[.firebaseapp.com]]}, + {[[ +<?xml version="1.0" encoding="iso-8859-1"?> + <!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title> + Wikibooks + </title> + </head> + <body> + <p> + Hello, world! + + </p> + </body> + </html>]], 'Hello, world!\n'}, + {[[ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + <style><!-- +- -a -a -a -- --- - + --></head> + <body> + <!-- page content --> + Hello, world! + </body> +</html> + ]], 'Hello, world!'}, + {[[ +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + </head> + <body> + <!-- page content --> + Hello, world!<br>test</br><br>content</hr>more content<br> + <div> + content inside div + </div> + </body> +</html> + ]], 'Hello, world!\ntest\ncontentmore content\ncontent inside div\n'}, + {[[ +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + </head> + <body> + <!-- tabular content --> + <table> + content + </table> + <table> + <tr> + <th>heada</th> + <th>headb</th> + </tr> + <tr> + <td>data1</td> + <td>data2</td> + </tr> + </table> + + </body> +</html> + ]], 'content\nheada headb\ndata1 data2\n'}, + {[[ +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>title</title> + <link rel="stylesheet" href="style.css"> + <script src="script.js"></script> + </head> + <body> + <!-- escape content --> + a b a > b a < b a & b 'a "a" + </body> +</html> + ]], 'a b a > b a < b a & b \'a "a"'}, + } + + for i,c in ipairs(cases) do + test("Extract text from HTML " .. tostring(i), function() + local t = rspamd_util.parse_html(c[1]) + + assert_not_nil(t) + assert_equal(c[2], tostring(t), string.format("'%s' doesn't match with '%s'", + c[2], t)) + + end) + end +end) diff --git a/test/lua/unit/kann.lua b/test/lua/unit/kann.lua new file mode 100644 index 0000000..4f8185b --- /dev/null +++ b/test/lua/unit/kann.lua @@ -0,0 +1,46 @@ +-- Simple kann test (xor function vs 2 layer MLP) + +context("Kann test", function() + local kann = require "rspamd_kann" + local k + local inputs = { + {0, 0}, + {0, 1}, + {1, 0}, + {1, 1} + } + + local outputs = { + {0}, + {1}, + {1}, + {0} + } + + local t = kann.layer.input(2) + t = kann.transform.relu(t) + t = kann.transform.tanh(kann.layer.dense(t, 2)); + t = kann.layer.cost(t, 1, kann.cost.mse) + k = kann.new.kann(t) + + local iters = 500 + local niter = k:train1(inputs, outputs, { + lr = 0.01, + max_epoch = iters, + mini_size = 80, + }) + + local ser = k:save() + k = kann.load(ser) + + for i,inp in ipairs(inputs) do + test(string.format("Check XOR MLP %s ^ %s == %s", inp[1], inp[2], outputs[i][1]), + function() + local res = math.floor(k:apply1(inp)[1] + 0.5) + assert_equal(outputs[i][1], res, + tostring(outputs[i][1]) .. " but test returned " .. tostring(res)) + end) + end + + +end)
\ No newline at end of file diff --git a/test/lua/unit/logger.lua b/test/lua/unit/logger.lua new file mode 100644 index 0000000..dc01207 --- /dev/null +++ b/test/lua/unit/logger.lua @@ -0,0 +1,27 @@ +context("Logger unit tests", function() + test("Logger functions", function() + local log = require "rspamd_logger" + + local cases = { + {'string', 'string'}, + {'%1', 'string', 'string'}, + {'%1', '1.1', 1.1}, + {'%1', '1', 1}, + {'%1', 'true', true}, + {'%1', '{[1] = 1, [2] = test}', {1, 'test'}}, + {'%1', '{[1] = 1, [2] = 2.1, [k2] = test}', {1, 2.1, k2='test'}}, + {'%s', 'true', true}, + } + + for _,c in ipairs(cases) do + local s + if c[3] then + s = log.slog(c[1], c[3]) + else + s = log.slog(c[1]) + end + assert_equal(s, c[2], string.format("'%s' doesn't match with '%s'", + c[2], s)) + end + end) +end)
\ No newline at end of file diff --git a/test/lua/unit/lua_mime.message_to_ucl.lua b/test/lua/unit/lua_mime.message_to_ucl.lua new file mode 100644 index 0000000..e88fb72 --- /dev/null +++ b/test/lua/unit/lua_mime.message_to_ucl.lua @@ -0,0 +1,330 @@ + +--[=========[ ******************* message ******************* ]=========] +local cases = { + { message = [[ +Received: from mail0.mindspring.com (unknown [1.1.1.1]) + (using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits)) + (No client certificate requested) + by mail.example.com (Postfix) with ESMTPS id 88A0C6B332 + for <example@example.com>; Wed, 24 Nov 2021 19:05:43 +0000 (GMT) +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +lol +</html> +]], + expected = [[{ + "parts": [ + { + "type": "multipart/alternative", + "multipart_boundary": "_000_6be055295eab48a5af7ad4022f33e2d0_", + "size": 0, + "headers": [] + }, + { + "content": "Hello world\n\n\n", + "size": 14, + "type": "text/plain", + "boundary": "_000_6be055295eab48a5af7ad4022f33e2d0_", + "detected_type": "text/plain", + "headers": [ + { + "order": 0, + "raw": "Content-Type: text/plain; charset=\"utf-8\"\n", + "empty_separator": false, + "value": "text/plain; charset=\"utf-8\"", + "separator": " ", + "decoded": "text/plain; charset=\"utf-8\"", + "name": "Content-Type", + "tab_separated": false + }, + { + "order": 1, + "raw": "Content-Transfer-Encoding: 7bit\n", + "empty_separator": false, + "value": "7bit", + "separator": " ", + "decoded": "7bit", + "name": "Content-Transfer-Encoding", + "tab_separated": false + } + ] + }, + { + "content": "<html><body>\nlol\n</html>\n", + "size": 25, + "type": "text/html", + "boundary": "_000_6be055295eab48a5af7ad4022f33e2d0_", + "detected_type": "text/html", + "headers": [ + { + "order": 0, + "raw": "Content-Type: text/html; charset=\"utf-8\"\n", + "empty_separator": false, + "value": "text/html; charset=\"utf-8\"", + "separator": " ", + "decoded": "text/html; charset=\"utf-8\"", + "name": "Content-Type", + "tab_separated": false + } + ] + } + ], + "newlines": "lf", + "digest": "043cf1a314d0a1af95951d6aec932faf", + "envelope": { + "recipients_smtp": [ + { + "addr": "test1@example.com", + "raw": "<test1@example.com>", + "flags": { + "valid": true + }, + "user": "test1", + "name": "Test1", + "domain": "example.com" + }, + { + "addr": "test2@example.com", + "raw": "<test2@example.com>", + "flags": { + "valid": true + }, + "user": "test2", + "name": "Test2", + "domain": "example.com" + } + ], + "from_smtp": { + "addr": "test@example.com", + "raw": "<test@example.com>", + "flags": { + "valid": true + }, + "user": "test", + "name": "Test", + "domain": "example.com" + }, + "helo": "hello mail", + "from_ip": "198.172.22.91" + }, + "size": 666, + "headers": [ + { + "order": 0, + "raw": "Received: from mail0.mindspring.com (unknown [1.1.1.1])\n\t(using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby mail.example.com (Postfix) with ESMTPS id 88A0C6B332\n\tfor <example@example.com>; Wed, 24 Nov 2021 19:05:43 +0000 (GMT)\n", + "empty_separator": false, + "value": "from mail0.mindspring.com (unknown [1.1.1.1]) (using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.example.com (Postfix) with ESMTPS id 88A0C6B332 for <example@example.com>; Wed, 24 Nov 2021 19:05:43 +0000 (GMT)", + "separator": " ", + "decoded": "from mail0.mindspring.com (unknown [1.1.1.1]) (using TLSv1.2 with cipher ECDHE-ECDSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.example.com (Postfix) with ESMTPS id 88A0C6B332 for <example@example.com>; Wed, 24 Nov 2021 19:05:43 +0000 (GMT)", + "name": "Received", + "tab_separated": false + }, + { + "order": 1, + "raw": "From: <>\n", + "empty_separator": false, + "value": "<>", + "separator": " ", + "decoded": "<>", + "name": "From", + "tab_separated": false + }, + { + "order": 2, + "raw": "To: <nobody@example.com>\n", + "empty_separator": false, + "value": "<nobody@example.com>", + "separator": " ", + "decoded": "<nobody@example.com>", + "name": "To", + "tab_separated": false + }, + { + "order": 3, + "raw": "Subject: test\n", + "empty_separator": false, + "value": "test", + "separator": " ", + "decoded": "test", + "name": "Subject", + "tab_separated": false + }, + { + "order": 4, + "raw": "Content-Type: multipart/alternative;\n boundary=\"_000_6be055295eab48a5af7ad4022f33e2d0_\"\n", + "empty_separator": false, + "value": "multipart/alternative; boundary=\"_000_6be055295eab48a5af7ad4022f33e2d0_\"", + "separator": " ", + "decoded": "multipart/alternative; boundary=\"_000_6be055295eab48a5af7ad4022f33e2d0_\"", + "name": "Content-Type", + "tab_separated": false + } + ] +}]] + }, + { + message = [[ +From: <> +Content-Type: multipart/mixed; boundary="-" + +123 +--- +321 + +-- +WBR yours +-- +a +----------------- +b +------ +c +- +d +-------- +f +----- +]], + expected = [[ +{ + "parts": [ + { + "type": "multipart/mixed", + "multipart_boundary": "-", + "size": 0, + "headers": [] + }, + { + "content": "--\nWBR yours\n--\na\n-----------------\nb\n------\nc\n-\nd\n--------\nf\n", + "size": 62, + "type": "text/plain", + "boundary": "-", + "detected_type": "text/plain", + "headers": [] + } + ], + "newlines": "lf", + "digest": "1a680eb7563f32a2fbf67cf45e90f045", + "envelope": { + "recipients_smtp": [ + { + "addr": "test1@example.com", + "raw": "<test1@example.com>", + "flags": { + "valid": true + }, + "user": "test1", + "name": "Test1", + "domain": "example.com" + }, + { + "addr": "test2@example.com", + "raw": "<test2@example.com>", + "flags": { + "valid": true + }, + "user": "test2", + "name": "Test2", + "domain": "example.com" + } + ], + "from_smtp": { + "addr": "test@example.com", + "raw": "<test@example.com>", + "flags": { + "valid": true + }, + "user": "test", + "name": "Test", + "domain": "example.com" + }, + "helo": "hello mail", + "from_ip": "198.172.22.91" + }, + "size": 135, + "headers": [ + { + "order": 0, + "raw": "From: <>\n", + "empty_separator": false, + "value": "<>", + "separator": " ", + "decoded": "<>", + "name": "From", + "tab_separated": false + }, + { + "order": 1, + "raw": "Content-Type: multipart/mixed; boundary=\"-\"\n", + "empty_separator": false, + "value": "multipart/mixed; boundary=\"-\"", + "separator": " ", + "decoded": "multipart/mixed; boundary=\"-\"", + "name": "Content-Type", + "tab_separated": false + } + ] +}]] + } +} + +context("Task piecewise split", function() + local rspamd_task = require "rspamd_task" + local rspamd_util = require "rspamd_util" + local rspamd_test_helper = require "rspamd_test_helper" + local lua_mime = require "lua_mime" + local ucl = require "ucl" + local rspamd_parsers = require "rspamd_parsers" + + rspamd_test_helper.init_url_parser() + local cfg = rspamd_util.config_from_ucl(rspamd_test_helper.default_config(), + "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS") + + for i,case in ipairs(cases) do + test("Simple message split case " .. tostring(i), function() + local res,task = rspamd_task.load_from_string(case.message, cfg) + + if not res or not task then + assert_true(false, "failed to load message") + end + + task:set_from('smtp', rspamd_parsers.parse_mail_address("Test <test@example.com>")[1]) + task:set_recipients('smtp', { + rspamd_parsers.parse_mail_address("Test1 <test1@example.com>")[1], + rspamd_parsers.parse_mail_address("Test2 <test2@example.com>")[1] + }, 'rewrite') + task:set_from_ip("198.172.22.91") + task:set_user("cool user name") + task:set_helo("hello mail") + task:process_message() + local parser = ucl.parser() + local res = parser:parse_string(case.expected) + assert_true(res) + local expected = parser:get_object() + local ucl_object = lua_mime.message_to_ucl(task, true) + local schema = lua_mime.message_to_ucl_schema() + assert_true(schema(ucl_object)) + assert_rspamd_table_eq({ + actual = ucl_object, + expect = expected + }) + task:destroy() + end) + end + +end)
\ No newline at end of file diff --git a/test/lua/unit/lua_util.extract_specific_urls.lua b/test/lua/unit/lua_util.extract_specific_urls.lua new file mode 100644 index 0000000..a7e2f9f --- /dev/null +++ b/test/lua/unit/lua_util.extract_specific_urls.lua @@ -0,0 +1,345 @@ + +local msg, msg_img +local logger = require "rspamd_logger" +local rspamd_util = require "rspamd_util" +local rspamd_task = require "rspamd_task" +local util = require 'lua_util' +local mpool = require "rspamd_mempool" +local fun = require "fun" +local url = require "rspamd_url" + +--[=========[ ******************* message ******************* ]=========] +msg = [[ +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net">http://example.net</a> +<a href="http://example1.net">http://example1.net</a> +<a href="http://example2.net">http://example2.net</a> +<a href="http://example3.net">http://example3.net</a> +<a href="http://example4.net">http://example4.net</a> +<a href="http://domain1.com">http://domain1.com</a> +<a href="http://domain2.com">http://domain2.com</a> +<a href="http://domain3.com">http://domain3.com</a> +<a href="http://domain4.com">http://domain4.com</a> +<a href="http://domain5.com">http://domain5.com</a> +<a href="http://domain.com">http://example.net/</a> +<img src="http://example5.org">hahaha</img> +</html> +]] +msg_img = [[ +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net">http://example.net</a> +<a href="http://domain.com">http://example.net</a> +<img src="http://example5.org">hahaha</img> +</html> +]] + +local function prepare_actual_result(actual) + return fun.totable(fun.map( + function(u) return u:get_raw():gsub('^%w+://', '') end, + actual + )) +end + +context("Lua util - extract_specific_urls plain", function() + local test_helper = require "rspamd_test_helper" + + test_helper.init_url_parser() + + local task_object = { + urls = {}, + cache_set = function(self, ...) end, + cache_get = function(self, ...) end, + get_urls = function(self, need_emails) return self.urls end + } + + local url_list = { + "google.com", + "mail.com", + "bizz.com", + "bing.com", + "example.com", + "gov.co.net", + "tesco.co.net", + "domain1.co.net", + "domain2.co.net", + "domain3.co.net", + "domain4.co.net", + "abc.org", + "icq.org", + "meet.org", + "domain1.org", + "domain2.org", + "domain3.org", + "test.com", + } + + local cases = { + {expect = url_list, filter = nil, limit = 9999, need_emails = true, prefix = 'p'}, + {expect = {}, filter = (function() return false end), limit = 9999, need_emails = true, prefix = 'p'}, + {expect = {"domain4.co.net", "test.com", "domain3.org"}, filter = nil, limit = 3, need_emails = true, prefix = 'p'}, + { + expect = {"gov.co.net", "tesco.co.net", "domain1.co.net", "domain2.co.net", "domain3.co.net", "domain4.co.net"}, + filter = (function(s) return s:get_host():sub(-4) == ".net" end), + limit = 9999, + need_emails = true, + prefix = 'p' + }, + { + input = {"a.google.com", "b.google.com", "c.google.com", "a.net", "bb.net", "a.bb.net", "b.bb.net"}, + expect = {"a.bb.net", "b.google.com", "a.net", "bb.net", "a.google.com"}, + filter = nil, + limit = 9999, + esld_limit = 2, + need_emails = true, + prefix = 'p' + }, + { + input = {"abc@a.google.com", "b.google.com", "c.google.com", "a.net", "bb.net", "a.bb.net", "b.bb.net"}, + expect = {"abc@a.google.com", "a.bb.net", "b.google.com", "a.net", "bb.net"}, + filter = nil, + limit = 9999, + esld_limit = 2, + need_emails = true, + prefix = 'p' + } + } + + local pool = mpool.create() + + local function prepare_url_list(list) + return fun.totable(fun.map( + function (u) return url.create(pool, u) end, + list or url_list + )) + end + + for i,c in ipairs(cases) do + test("extract_specific_urls, backward compatibility case #" .. i, function() + task_object.urls = prepare_url_list(c.input) + if (c.esld_limit) then + -- not awailable in deprecated version + return + end + local actual = util.extract_specific_urls(task_object, c.limit, c.need_emails, c.filter, c.prefix) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("%1 =?= %2", c.expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = c.expect}) + end) + + test("extract_specific_urls " .. i, function() + task_object.urls = prepare_url_list(c.input) + + local actual = util.extract_specific_urls({ + task = task_object, + limit = c.limit, + esld_limit = c.esld_limit, + need_emails = c.need_emails, + filter = c.filter, + prefix = c.prefix, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, c.expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = c.expect}) + end) + end + + test("extract_specific_urls, another case", function() + task_object.urls = prepare_url_list {"abc.net", "abc.com", "abc.net", "abc.za.org"} + local actual = util.extract_specific_urls(task_object, 3, true) + + local actual_result = prepare_actual_result(actual) + --[[ + local s = logger.slog("%1 =?= %2", c.expect, actual_result) + print(s) --]] + + local expect = {"abc.com", "abc.net", "abc.za.org"} + assert_rspamd_table_eq_sorted({actual = actual_result, expect = expect}) + end) +end) + +context("Lua util - extract_specific_urls message", function() + +--[[ ******************* kinda functional *************************************** ]] + + local test_helper = require "rspamd_test_helper" + local cfg = rspamd_util.config_from_ucl(test_helper.default_config(), + "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS") + local res,task = rspamd_task.load_from_string(msg, cfg) + + if not res then + assert(false, "failed to load message") + end + + if not task:process_message() then + assert(false, "failed to process message") + end + + test("extract_specific_urls - from email 1 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 1, + esld_limit = 1, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = {"domain.com"}}) + + end) + test("extract_specific_urls - from email 2 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 1, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = {"domain.com", "example.net"}}) + + end) + + res,task = rspamd_task.load_from_string(msg_img, rspamd_config) + + if not res then + assert_true(false, "failed to load message") + end + + if not task:process_message() then + assert_true(false, "failed to process message") + end + test("extract_specific_urls - from email image 1 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 1, + esld_limit = 1, + need_images = false, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = {"domain.com"}}) + + end) + test("extract_specific_urls - from email image 2 limit", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 1, + need_images = false, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = {"domain.com", "example.net"}}) + + end) + test("extract_specific_urls - from email image 3 limit, no images", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 3, + esld_limit = 1, + need_images = false, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, expect = {"domain.com", "example.net"}}) + end) + test("extract_specific_urls - from email image 3 limit, has images", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 3, + esld_limit = 1, + need_images = true, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, + expect = {"domain.com", "example.net", "example5.org"}}) + end) + test("extract_specific_urls - from email image 2 limit, has images", function() + local actual = util.extract_specific_urls({ + task = task, + limit = 2, + esld_limit = 1, + need_images = true, + }) + + local actual_result = prepare_actual_result(actual) + + --[[ + local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result) + print(s) --]] + + assert_rspamd_table_eq_sorted({actual = actual_result, + expect = {"domain.com", "example.net"}}) + end) +end) diff --git a/test/lua/unit/lua_util.misc.lua b/test/lua/unit/lua_util.misc.lua new file mode 100644 index 0000000..bab44a3 --- /dev/null +++ b/test/lua/unit/lua_util.misc.lua @@ -0,0 +1,61 @@ +local util = require 'lua_util' + +context("Lua util - callback_from_string", function() + local cases = { + {'return function', 'return function(a, b) return a + b end'}, + {'function', 'function(a, b) return a + b end'}, + {'plain ops', 'local c = select(1, ...)\nreturn c + select(2, ...)'}, + } + local fail_cases = { + nil, + '', + 'return function(a, b) ( end', + 'function(a, b) ( end', + 'return a + b' + } + + for _,c in ipairs(cases) do + test('Success case: ' .. c[1], function() + local ret,f = util.callback_from_string(c[2]) + assert_true(ret, f) + assert_equal(f(2, 2), 4) + end) + end + for i,c in ipairs(fail_cases) do + test('Failure case: ' .. tostring(i), function() + local ret,f = util.callback_from_string(c) + assert_false(ret) + end) + end +end) + +context("Lua util - str_endswith", function() + local ending = { + {'a', 'a'}, + {'ab', 'b'}, + {'ab', 'ab'}, + {'abc', 'bc'}, + {'any', ''}, + } + local not_ending = { + {'a', 'b'}, + {'', 'a'}, + {'ab', 'a'}, + {'ab', 'ba'}, + {'ab', 'lab'}, + {'abc', 'ab'}, + {'abcd', 'bc'}, + {'a', 'A'}, + {'aB', 'b'}, + } + for _, c in ipairs(ending) do + test(string.format('True case: str_endswith("%s", "%s")', c[1], c[2]), function() + assert_true(util.str_endswith(c[1], c[2])) + end) + end + for _, c in ipairs(not_ending) do + test(string.format('False case: str_endswith("%s", "%s")', c[1], c[2]), function() + assert_false(util.str_endswith(c[1], c[2])) + end) + end +end) diff --git a/test/lua/unit/mempool.lua b/test/lua/unit/mempool.lua new file mode 100644 index 0000000..fefd3d2 --- /dev/null +++ b/test/lua/unit/mempool.lua @@ -0,0 +1,47 @@ +context("Memory pool unit tests", function() + test("Mempool variables", function() + local mempool = require "rspamd_mempool" + + local pool = mempool.create() + + assert_not_nil(pool) + + -- string + pool:set_variable('a', 'bcd') + local var = pool:get_variable('a') + assert_equal(var, 'bcd') + + -- integer + pool:set_variable('a', 1) + var = pool:get_variable('a', 'double') + assert_equal(var, 1) + + -- float + pool:set_variable('a', 1.01) + var = pool:get_variable('a', 'double') + assert_equal(var, 1.01) + + -- boolean + pool:set_variable('a', false) + var = pool:get_variable('a', 'bool') + assert_equal(var, false) + + -- multiple + pool:set_variable('a', 'bcd', 1, 1.01, false) + local v1, v2, v3, v4 = pool:get_variable('a', 'string,double,double,bool') + assert_equal(v1, 'bcd') + assert_equal(v2, 1) + assert_equal(v3, 1.01) + assert_equal(v4, false) + + local t = {1,2,3,4,5} + pool:set_variable('a', t) + local bucket = pool:get_variable('a', 'bucket') + assert_rspamd_table_eq({ + expect = t, + actual = bucket + }) + + pool:destroy() + end) +end)
\ No newline at end of file diff --git a/test/lua/unit/quoted_printable.lua b/test/lua/unit/quoted_printable.lua new file mode 100644 index 0000000..99a21a1 --- /dev/null +++ b/test/lua/unit/quoted_printable.lua @@ -0,0 +1,164 @@ +context("Quoted-Printable encoding", function() + local rspamd_util = require "rspamd_util" + -- These test cases are derived from https://github.com/mathiasbynens/quoted-printable + local cases = { + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=3D', + 'Exactly 73 chars of which the last one is `=`' + }, + { + 'If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.', + 'If you believe that truth=3Dbeauty, then surely mathematics is the most bea=\r\nutiful branch of philosophy.', + 'Equals sign' + }, + { + 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum claritatem. Investigationes demonstraverunt lectores legere me lius quod ii legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum.', + 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy =\r\nnibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wi=\r\nsi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lo=\r\nbortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure d=\r\nolor in hendrerit in vulputate velit esse molestie consequat, vel illum dol=\r\nore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio digni=\r\nssim qui blandit praesent luptatum zzril delenit augue duis dolore te feuga=\r\nit nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue=\r\n nihil imperdiet doming id quod mazim placerat facer possim assum. Typi non=\r\n habent claritatem insitam; est usus legentis in iis qui facit eorum clarit=\r\natem. Investigationes demonstraverunt lectores legere me lius quod ii legun=\r\nt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem =\r\nconsuetudium lectorum. Mirum est notare quam littera gothica, quam nunc put=\r\namus parum claram, anteposuerit litterarum formas humanitatis per seacula q=\r\nuarta decima et quinta decima. Eodem modo typi, qui nunc nobis videntur par=\r\num clari, fiant sollemnes in futurum.', + '76-char line limit', + }, + { + 'foo ', + 'foo=20', + 'Trailing space' + }, + { + 'foo\t', + 'foo=09', + 'Trailing tab' + }, + + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=\r\n=3D', + 'Exactly 74 chars of which the last one is `=`' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=\r\n=3D', + 'Exactly 75 chars of which the last one is `=`' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=\r\n=3D', + 'Exactly 76 chars of which the last one is `=`', + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=\r\nx=3D', + 'Exactly 77 chars of which the last one is `=`' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=20', + 'Exactly 73 chars of which the last one is a space' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=20', + 'Exactly 74 chars of which the last one is a space' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx =\r\n', + 'Exactly 75 chars of which the last one is a space' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=\r\n=20', + 'Exactly 76 chars of which the last one is a space' + }, + { + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', + 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=\r\nx=20', + 'Exactly 77 chars of which the last one is a space' + }, + { + 'fdafadsf\r\n-- • Test\r\n', + 'fdafadsf\r\n-- =E2=80=A2 Test\r\n', + 'Newlines', + }, + } + for _,c in ipairs(cases) do + test("QP sanity test case: " .. c[3], function() + local res = { + expect = c[1], + actual = tostring(rspamd_util.decode_qp((rspamd_util.encode_qp(c[1], 76)))) + } + assert_rspamd_eq(res) + end) + test("QP encoding test case: " .. c[3], function() + local res = { + expect = c[2], + actual = tostring(rspamd_util.encode_qp(c[1], 76)) + } + assert_rspamd_eq(res) + end) + end + -- Decode issues + cases = { + { + 'Mailscape External Mail Flow Outbound Test=', + 'Mailscape External Mail Flow Outbound Test=', + 'asan found' + }, + { + 'foo=\n\nbar', + 'foo\nbar', + 'Soft newline followed by hard newline (LF)', + }, + { + 'foo=\r\n\r\nbar', + 'foo\r\nbar', + 'Soft newline followed by hard newline (CRLF)', + }, + { + '=gB', + '=gB', + 'Second character is okay, the first character is garbage' + }, + { + '=bG', + '=bG', + 'First character okay, the second character is rubbish' + } + } + + for _,c in ipairs(cases) do + test("QP decoding test case: " .. c[3], function() + local res = { + expect = c[2], + actual = tostring(rspamd_util.decode_qp(c[1])) + } + assert_rspamd_eq(res) + end) + end + + + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + -- Fuzz testing + local charset = {} + for i = 0, 255 do table.insert(charset, string.char(i)) end + + local function random_string(length) + + if length > 0 then + return random_string(length - 1) .. charset[math.random(1, #charset)] + else + return "" + end + end + for _,l in ipairs({10, 100, 1000, 10000}) do + test("QP fuzz test max length " .. tostring(l), function() + for _=1,100 do + local inp = random_string(math.random() * l + 1) + local res = { + expect = inp, + actual = tostring(rspamd_util.decode_qp((rspamd_util.encode_qp(inp, 0)))) + } + assert_rspamd_eq(res) + end + end) + end + end +end) diff --git a/test/lua/unit/regxep.lua b/test/lua/unit/regxep.lua new file mode 100644 index 0000000..a27e7b3 --- /dev/null +++ b/test/lua/unit/regxep.lua @@ -0,0 +1,90 @@ +context("Regexp unit tests", function() + local re = require("rspamd_regexp") + + test("Regexp creation", function() + assert_not_nil(re.create_cached('/test$/m')) + assert_not_nil(re.create_cached('^test$', 'm')) + assert_not_nil(re.create_cached('m,test,m')) + assert_not_nil(re.create_cached('m|test|m')) + end) + test("Regexp match", function() + local cases = { + {'/Тест/iu', 'тест', true}, + {'/test$/m', '123test', true}, + {'/^test$/m', '123test', false}, + {'m,test,', 'test', true}, + {'m,test,', 'test123', false}, + {'m{https?://[^/?\\s]+?:\\d+(?<!:80)(?<!:443)(?<!:8080)(?:/|\\s|$)}', '', false}, + {'/test/i', 'TeSt123', true}, + -- Raw regexp + {'/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/r', 'some<example@example.com>', true}, + -- Cyrillic utf8 letter + {'/\\S<[-\\w\\.]+\\@[-\\w\\.]+>/r', 'some<example@exаmple.com>', false}, + } + + for _,c in ipairs(cases) do + local r = re.create_cached(c[1]) + assert_not_nil(r, "cannot parse " .. c[1]) + local res = r:match(c[2]) + + assert_equal(res, c[3], string.format("'%s' doesn't match with '%s'", + c[2], c[1])) + end + end) + + test("Regexp capture", function() + local cases = { + {'Body=(\\S+)(?: Fuz1=(\\S+))?(?: Fuz2=(\\S+))?', + 'mc-filter4 1120; Body=1 Fuz1=2 Fuz2=3', + {'Body=1 Fuz1=2 Fuz2=3', '1', '2', '3'}}, + {'Body=(\\S+)(?: Fuz1=(\\S+))?(?: Fuz2=(\\S+))?', + 'mc-filter4 1120; Body=1 Fuz1=2', {'Body=1 Fuz1=2', '1', '2'}}, + {'Body=(\\S+)(?: Fuz1=(\\S+))?(?: Fuz2=(\\S+))?', + 'mc-filter4 1120; Body=1 Fuz1=2 mc-filter4 1120; Body=1 Fuz1=2 Fuz2=3', + {'Body=1 Fuz1=2', '1', '2'}, {'Body=1 Fuz1=2 Fuz2=3', '1', '2', '3'}}, + } + for _,c in ipairs(cases) do + local r = re.create_cached(c[1]) + assert_not_nil(r, "cannot parse " .. c[1]) + local res = r:search(c[2], false, true) + + assert_not_nil(res, "cannot find pattern") + + for k = 3, table.maxn(c) do + for n,m in ipairs(c[k]) do + assert_equal(res[k - 2][n], c[k][n], string.format("'%s' doesn't match with '%s'", + c[k][n], res[k - 2][n])) + end + end + end + end) + + test("Regexp split", function() + local cases = { + {'\\s', 'one', {'one'}}, -- one arg + {'\\s', 'one two', {'one', 'two'}}, -- trivial + {'/,/i', '1,2', {'1', '2'}}, -- trivial + {'\\s', 'one two', {'one', 'two'}}, -- multiple delimiters + {'\\s', ' one two ', {'one', 'two'}}, -- multiple delimiters + {'\\s', ' one ', {'one'}}, -- multiple delimiters + {'[:,]', ',,,:::one,two,,', {'one', 'two'}}, -- multiple delimiters + {'[\\|\\s]', '16265 | 1.1.1.0/22 | TR | ripencc | 2014-02-28', + {'16265', '1.1.1.0/22', 'TR', 'ripencc', '2014-02-28'}}, -- practical + {'|', '16265 | 1.1.1.0/22 | TR | ripencc | 2014-02-28', {}} -- bad re + } + + for _,c in ipairs(cases) do + local r = re.create_cached(c[1]) + assert_not_nil(r, "cannot parse " .. c[1]) + + local res = r:split(c[2]) + assert_not_nil(res, "cannot split " .. c[2]) + + for i,r in ipairs(c[3]) do + assert_equal(res[i], r) + end + end + end) + + end +)
\ No newline at end of file diff --git a/test/lua/unit/rfc2047.lua b/test/lua/unit/rfc2047.lua new file mode 100644 index 0000000..658f202 --- /dev/null +++ b/test/lua/unit/rfc2047.lua @@ -0,0 +1,92 @@ +--[[ +Copyright (c) 2022, Vsevolod Stakhov <vsevolod@rspamd.com> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +]]-- + +context("RFC2047 decoding", function() + local ffi = require("ffi") + + ffi.cdef[[ + const char * rspamd_mime_header_decode (void *pool, const char *in, size_t inlen); + void * rspamd_mempool_new_ (size_t sz, const char *name, int flags, const char *strloc); + void rspamd_mempool_delete (void *pool); + ]] + + test("Decode rfc2047 tokens", function() + -- Test -> expected + local cases = { + {"=?US-ASCII*EN?Q?Keith_Moore?= <moore@cs.utk.edu>", "Keith Moore <moore@cs.utk.edu>"}, + {[[=?windows-1251?Q?=C2=FB_=F1=EC=EE=E6=E5=F2=E5_=F5=E0=F0?= + =?windows-1251?Q?=E0=EA=F2=E5=F0=E8=E7=EE=E2=E0=F2=FC=F1?= + =?windows-1251?Q?=FF_=E7=EE=F0=EA=E8=EC_=E7=F0=E5=ED=E8?= + =?windows-1251?Q?=E5=EC?=]], "Вы сможете характеризоваться зорким зрением"}, + {'v=1; a=rsa-sha256; c=relaxed/relaxed; d=yoni.za.org; s=testdkim1;', + 'v=1; a=rsa-sha256; c=relaxed/relaxed; d=yoni.za.org; s=testdkim1;'}, + {"=?windows-1251?B?xO7q8+zl7fIuc2NyLnV1ZQ==?=", "Документ.scr.uue"}, + {"=?UTF-8?Q?=20wie=20ist=20es=20Ihnen=20ergangen?.pdf?=", " wie ist es Ihnen ergangen?.pdf"}, -- ? inside + {"=?UTF-8?Q?=20wie=20ist=20es=20Ihnen=20ergangen??=", " wie ist es Ihnen ergangen?"}, -- ending ? inside + } + + local pool = ffi.C.rspamd_mempool_new_(4096, "lua", 0, "rfc2047.lua:49") + + for _,c in ipairs(cases) do + local res = ffi.C.rspamd_mime_header_decode(pool, c[1], #c[1]) + res = ffi.string(res) + assert_not_nil(res, "cannot decode " .. c[1]) + assert_rspamd_eq({actual = res, expect = c[2]}) + + end + + ffi.C.rspamd_mempool_delete(pool) + end) + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + test("Fuzz test for rfc2047 tokens", function() + local util = require("rspamd_util") + local pool = ffi.C.rspamd_mempool_new_(4096, "lua", 0, "rfc2047.lua:63") + local str = "Тест Тест Тест Тест Тест" + + for _ = 0,1000 do + local r1 = math.random() + local r2 = math.random() + local sl1 = #str / 2.0 * r1 + local sl2 = #str / 2.0 * r2 + + local s1 = tostring(util.encode_base64(string.sub(str, 1, sl1))) + local s2 = tostring(util.encode_base64(string.sub(str, sl1 + 1, sl2))) + local s3 = tostring(util.encode_base64(string.sub(str, sl2 + 1))) + + if #s1 > 0 and #s2 > 0 and #s3 > 0 then + local s = string.format('=?UTF-8?B?%s?= =?UTF-8?B?%s?= =?UTF-8?B?%s?=', + s1, s2, s3) + local res = ffi.C.rspamd_mime_header_decode(pool, s, #s) + res = ffi.string(res) + assert_not_nil(res, "cannot decode " .. s) + assert_rspamd_eq({actual = res, expect = str}) + end + end + + ffi.C.rspamd_mempool_delete(pool) + end) + end +end) diff --git a/test/lua/unit/rsa.lua b/test/lua/unit/rsa.lua new file mode 100644 index 0000000..c67a36a --- /dev/null +++ b/test/lua/unit/rsa.lua @@ -0,0 +1,50 @@ +-- Test rsa signing + +context("RSA signature verification test", function() + local rsa_privkey = require "rspamd_rsa_privkey" + local rsa_pubkey = require "rspamd_rsa_pubkey" + local rsa_signature = require "rspamd_rsa_signature" + local rsa = require "rspamd_rsa" + local hash = require "rspamd_cryptobox_hash" + local pubkey = 'testkey.pub' + local privkey = 'testkey.sec' + local data = 'test.data' + local signature = 'test.sig' + local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1") + local rsa_key, rsa_sig + + test("RSA sign", function() + -- Signing test + rsa_key = rsa_privkey.load_file(string.format('%s/%s', test_dir, privkey)) + assert_not_nil(rsa_key) + + local h = hash.create_specific('sha256') + local d = io.open(string.format('%s/%s', test_dir, data), "rb"):read "*a" + h:update(d) + local sig = rsa.sign_memory(rsa_key, h:bin()) + assert_not_nil(sig) + sig:save(string.format('%s/%s', test_dir, signature), true) + end) + + test("RSA verify", function() + -- Verifying test + local h = hash.create_specific('sha256') + local d = io.open(string.format('%s/%s', test_dir, data), "rb"):read "*a" + h:update(d) + rsa_key = rsa_pubkey.load(string.format('%s/%s', test_dir, pubkey)) + assert_not_nil(rsa_key) + rsa_sig = rsa_signature.load(string.format('%s/%s', test_dir, signature)) + assert_not_nil(rsa_sig) + assert_true(rsa.verify_memory(rsa_key, rsa_sig, h:bin())) + end) + + test("RSA keypair + sign + verify", function() + local sk, pk = rsa.keypair() + local sig = rsa.sign_memory(sk, "test") + assert_true(rsa.verify_memory(pk, sig, "test")) + assert_false(rsa.verify_memory(pk, sig, "test1")) + -- Overwrite + sk, pk = rsa.keypair() + assert_false(rsa.verify_memory(pk, sig, "test")) + end) +end) diff --git a/test/lua/unit/rspamd_resolver.lua b/test/lua/unit/rspamd_resolver.lua new file mode 100644 index 0000000..e987ff0 --- /dev/null +++ b/test/lua/unit/rspamd_resolver.lua @@ -0,0 +1,31 @@ +-- Rspamd resolver Lua tests + +context("Check punycoding UTF-8 URL", function() + local rspamd_resolver = require "rspamd_resolver" + local rspamd_util = require "rspamd_util" + + local resolver = rspamd_resolver.init(rspamd_util.create_event_base(), rspamd_config) + + local cases = { + -- https://unicode.org/reports/tr46/#Deviations + ['faß.de'] = 'fass.de', -- IDNA2008 result: xn--fa-hia.de + ['βόλος.com'] = 'xn--nxasmq6b.com', -- IDNA2008 result: xn--nxasmm1c.com + ['نامهای.com'] = 'xn--mgba3gch31f.com', -- IDNA2008 result: xn--mgba3gch31f060k.com + ['ශ්රී.com'] = 'xn--10cl1a0b.com', -- IDNA2008 result: xn--10cl1a0b660p.com + + -- https://unicode.org/reports/tr46/#Table_Example_Processing + ['日本語。JP'] = 'xn--wgv71a119e.jp', -- Fullwidth characters are remapped, including 。 + --['u¨.com'] = 'xn--tda.com', -- Normalize changes u + umlaut to ü + ['☕.us'] = 'xn--53h.us', -- Post-Unicode 3.2 characters are allowed + + -- Other + ['example.рф'] = 'example.xn--p1ai', + } + + for k, v in pairs(cases) do + test(string.format("punycode %s -> %s", k, v), function() + local res = resolver:idna_convert_utf8(k) + assert_equal(res, v) + end) + end +end) diff --git a/test/lua/unit/rspamd_text.lua b/test/lua/unit/rspamd_text.lua new file mode 100644 index 0000000..d643d9e --- /dev/null +++ b/test/lua/unit/rspamd_text.lua @@ -0,0 +1,79 @@ +context("Rspamd_text:byte() test", function() + local rspamd_text = require "rspamd_text" + + local str = 'OMG' + local txt = rspamd_text.fromstring(str) + local fmt = 'case rspamd_text:byte(%s,%s)' + local cases = { + {'1', 'nil'}, + {'nil', '1'}, + } + + for start = -4, 4 do + for stop = -4, 4 do + table.insert(cases, {tostring(start), tostring(stop)}) + end + end + + for _, case in ipairs(cases) do + local name = string.format(fmt, case[1], case[2]) + test(name, function() + local txt_bytes = {txt:byte(tonumber(case[1]), tonumber(case[2]))} + local str_bytes = {str:byte(tonumber(case[1]), tonumber(case[2]))} + assert_rspamd_table_eq({ + expect = str_bytes, + actual = txt_bytes + }) + end) + end +end) + +context("Rspamd_text:find() test", function() + local rspamd_text = require "rspamd_text" + + local cases = { + {{'foobarfoo', 'f'}, {1, 1}}, + {{'foobarfoo', 'foo'}, {1, 3}}, + {{'foobarfoo', 'bar'}, {4, 6}}, + {{'foobarfoo', 'baz'}, nil}, + {{'foobarfoo', 'rfoo'}, {6, 9}}, + {{'foo', 'bar'}, nil}, + {{'x', 'xxxx'}, nil}, + {{'', ''}, {1, 0}}, + {{'', '_'}, nil}, + {{'x', ''}, {1, 0}}, + } + + for _, case in ipairs(cases) do + local name = string.format('case rspamd_text:find(%s,%s)', case[1][1], case[1][2]) + test(name, function() + local t = rspamd_text.fromstring(case[1][1]) + local s,e = t:find(case[1][2]) + + if case[2] then + assert_rspamd_table_eq({ + expect = case[2], + actual = {s, e} + }) + else + assert_nil(s) + end + local ss,ee = string.find(case[1][1], case[1][2], 1, true) + assert_rspamd_table_eq({ + expect = { ss, ee }, + actual = { s, e } + }) + end) + -- Compare with vanila lua + name = string.format('case lua string vs rspamd_text:find(%s,%s)', case[1][1], case[1][2]) + test(name, function() + local t = rspamd_text.fromstring(case[1][1]) + local s,e = t:find(case[1][2]) + local ss,ee = string.find(case[1][1], case[1][2], 1, true) + assert_rspamd_table_eq({ + expect = { ss, ee }, + actual = { s, e } + }) + end) + end +end) diff --git a/test/lua/unit/rspamd_util.lua b/test/lua/unit/rspamd_util.lua new file mode 100644 index 0000000..56f13d6 --- /dev/null +++ b/test/lua/unit/rspamd_util.lua @@ -0,0 +1,136 @@ +context("Rspamd util for lua - check generic functions", function() + local util = require 'rspamd_util' + + + local cases = { + { + input = "test1", + result = false, + mixed_script = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "test test xxx", + result = false, + mixed_script = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "АбЫрвАлг", + result = true, + mixed_script = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "АбЫрвАлг example", + result = true, + mixed_script = true, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "example ąłśćżłóę", + result = false, + mixed_script = false, + range_start = 0x0000, + range_end = 0x017f + }, + { + input = "ąłśćżłóę АбЫрвАлг", + result = true, + mixed_script = true, + range_start = 0x0000, + range_end = 0x017f + }, + } + + for i,c in ipairs(cases) do + test("is_utf_outside_range, test case #" .. i, function() + local actual = util.is_utf_outside_range(c.input, c.range_start, c.range_end) + + assert_equal(c.result, actual) + end) + end + + test("is_utf_outside_range, check cache", function () + cache_size = 20 + for i = 1,cache_size do + local res = util.is_utf_outside_range("a", 0x0000, 0x0000+i) + end + end) + + test("is_utf_outside_range, check empty string", function () + assert_error(util.is_utf_outside_range) + end) + + test("get_string_stats, test case", function() + local res = util.get_string_stats("this is test 99") + assert_equal(res["letters"], 10) + assert_equal(res["digits"], 2) + end) + + for i,c in ipairs(cases) do + test("is_utf_mixed_script, test case #" .. i, function() + local actual = util.is_utf_mixed_script(c.input) + + assert_equal(c.mixed_script, actual) + end) + end + + test("is_utf_mixed_script, invalid utf str should return errror", function() + assert_error(util.is_utf_mixed_script,'\200\213\202') + end) + + test("is_utf_mixed_script, empty str should return errror", function() + assert_error(util.is_utf_mixed_script,'\200\213\202') + end) +end) + +context("Rspamd string utility", function() + local ffi = require 'ffi' + + ffi.cdef[[ +char ** rspamd_string_len_split (const char *in, size_t len, + const char *spill, int max_elts, void *pool); + void g_strfreev (char **str_array); +]] + local NULL = ffi.new 'void*' + local cases = { + {'', ';,', {}}, + {'', '', {}}, + {'a', ';,', {'a'}}, + {'a', '', {'a'}}, + {'a;b', ';', {'a', 'b'}}, + {'a;;b', ';', {'a', 'b'}}, + {';a;;b;', ';', {'a', 'b'}}, + {'ab', ';', {'ab'}}, + {'a,;b', ',', {'a', ';b'}}, + {'a,;b', ';,', {'a', 'b'}}, + {',a,;b', ';,', {'a', 'b'}}, + {',,;', ';,', {}}, + {',,;a', ';,', {'a'}}, + {'a,,;', ';,', {'a'}}, + } + + for i,case in ipairs(cases) do + test("rspamd_string_len_split: case " .. tostring(i), function() + local ret = ffi.C.rspamd_string_len_split(case[1], #case[1], + case[2], -1, NULL) + local actual = {} + + while ret[#actual] ~= NULL do + actual[#actual + 1] = ffi.string(ret[#actual]) + end + + assert_rspamd_table_eq({ + expect = case[3], + actual = actual + }) + + ffi.C.g_strfreev(ret) + end) + end +end)
\ No newline at end of file diff --git a/test/lua/unit/selectors.combined.lua b/test/lua/unit/selectors.combined.lua new file mode 100644 index 0000000..2c1aa08 --- /dev/null +++ b/test/lua/unit/selectors.combined.lua @@ -0,0 +1,130 @@ +local msg +context("Selectors test", function() + local rspamd_task = require "rspamd_task" + local logger = require "rspamd_logger" + local lua_selectors = require "lua_selectors" + local test_helper = require "rspamd_test_helper" + local cfg = rspamd_config + local task + + test_helper.init_url_parser() + + before(function() + local res + res,task = rspamd_task.load_from_string(msg, cfg) + task:set_from_ip("198.172.22.91") + task:set_user("cool user name") + task:set_helo("hello mail") + task:set_request_header("hdr1", "value1") + task:process_message() + task:get_mempool():set_variable("int_var", 1) + task:get_mempool():set_variable("str_var", "str 1") + if not res then + assert_true(false, "failed to load message") + end + end) + + local function check_selector(selector_string) + local sels = lua_selectors.parse_selector(cfg, selector_string) + local elts = lua_selectors.process_selectors(task, sels) + local res = lua_selectors.combine_selectors(task, elts, ':') + return res + end + + local cases = { + ["rcpts + weekend"] = { + selector = "rcpts:addr.take_n(5).lower;time('message', '!%w').in(6, 7).id('weekends')", + expect = { + "nobody@example.com:weekends", + "no-one@example.com:weekends"}}, + + ["weekend + rcpts"] = { + selector = "time('message', '!%w').in(6, 7).id('weekends');rcpts:addr.take_n(5).lower", + expect = { + "weekends:nobody@example.com", + "weekends:no-one@example.com"}}, + + ["id(rcpt) + rcpts + weekend"] = { + selector = "id('rcpt');rcpts:addr.take_n(5).lower;time('message', '!%w').in(6, 7).id('weekends')", + expect = { + "rcpt:nobody@example.com:weekends", + "rcpt:no-one@example.com:weekends"}}, + + ["id(rcpt) + id(2) rcpts + weekend"] = { + selector = "id('rcpt'); id(2); rcpts:addr.take_n(5).lower; time('message', '!%w').in(6, 7).id('weekends')", + expect = { + "rcpt:2:nobody@example.com:weekends", + "rcpt:2:no-one@example.com:weekends"} + }, + + -- There are two rcpts but only one url in the message + -- resulting table size is the size of the smallest table + ["id(rcpt) + id(2) + rcpts and urls + weekend"] = { + selector = "id('rcpt'); id(2); rcpts:addr.take_n(5).lower; id('urls'); urls:get_host; time('message', '!%w').in(6, 7).id('weekends')", + expect = { "rcpt:2:nobody@example.com:urls:example.net:weekends"} + }, + ["url + apply_methods"] = { + selector = "urls.apply_methods('get_host', 'get_path').join_tables('/')", + expect = {"example.net/path"} + }, + } + + for case_name, case in pairs(cases) do + test("case " .. case_name, function() + local elts = check_selector(case.selector) + assert_not_nil(elts) + assert_rspamd_table_eq({actual = elts, expect = case.expect}) + end) + end +end) + + +--[=========[ ******************* message ******************* ]=========] +msg = [[ +Received: from ca-18-193-131.service.infuturo.it ([151.18.193.131] helo=User) + by server.chat-met-vreemden.nl with esmtpa (Exim 4.76) + (envelope-from <upwest201diana@outlook.com>) + id 1ZC1sl-0006b4-TU; Mon, 06 Jul 2015 10:36:08 +0200 +From: <whoknows@nowhere.com> +To: <nobody@example.com>, <no-one@example.com> +Date: Sat, 22 Sep 2018 14:36:51 +0100 (BST) +subject: Second, lower-cased header subject +Subject: Test subject +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net/path?query">http://example.net/path?query</a> +<a href="mailto:test@example.net">mail me</a> +</html> + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: application/zip; name=f.zip +Content-Disposition: attachment; size=166; filename=f.zip +Content-Transfer-Encoding: base64 + +UEsDBAoAAAAAAINe6kgAAAAAAAAAAAAAAAAIABwAZmFrZS5leGVVVAkAA8YaglfGGoJXdXgLAAEE +6AMAAAToAwAAUEsBAh4DCgAAAAAAg17qSAAAAAAAAAAAAAAAAAgAGAAAAAAAAAAAALSBAAAAAGZh +a2UuZXhlVVQFAAPGGoJXdXgLAAEE6AMAAAToAwAAUEsFBgAAAAABAAEATgAAAEIAAAAAAA== + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: application/zip; name=f.zip +Content-Disposition: attachment; size=166; filename=f2.zip +Content-Transfer-Encoding: base64 + +UEsDBAoAAAAAAINe6kgAAAAAAAAAAAAAAAAIABwAZmFrZS5leGVVVAkAA8YaglfGGoJXdXgLAAEE +6AMAAAToAwAAUEsBAh4DCgAAAAAAg17qSAAAAAAAAAAAAAAAAAgAGAAAAAAAAAAAALSBAAAAAGZh +a2UuZXhlVVQFAAPGGoJXdXgLAAEE6AMAAAToAwAAUEsFBgAAAAABAAEATgAAAEIAAAAAAA== +]] diff --git a/test/lua/unit/selectors.custom.lua b/test/lua/unit/selectors.custom.lua new file mode 100644 index 0000000..cf82fe6 --- /dev/null +++ b/test/lua/unit/selectors.custom.lua @@ -0,0 +1,81 @@ +local msg +context("Selectors test", function() + local rspamd_task = require "rspamd_task" + local logger = require "rspamd_logger" + local lua_selectors = require "lua_selectors" + local test_helper = require "rspamd_test_helper" + local cfg = rspamd_config + local task + + test_helper.init_url_parser() + + before(function() + local res + res,task = rspamd_task.load_from_string(msg, cfg) + if not res then + assert_true(false, "failed to load message") + end + end) + + local function check_selector(selector_string) + local sels = lua_selectors.parse_selector(cfg, selector_string) + local elts = lua_selectors.process_selectors(task, sels) + return elts + end + + test("custom selector", function() + lua_selectors.register_extractor(rspamd_config, "get_something", { + get_value = function(task, args) -- mandatory field + return 'simple value','string' -- result + type + end, + description = 'Sample extractor' -- optional + }) + + local elts = check_selector('get_something') + assert_not_nil(elts) + assert_rspamd_table_eq({actual = elts, expect = {'simple value'}}) + end) + + test("custom transform", function() + lua_selectors.register_extractor(rspamd_config, "get_something", { + get_value = function(task, args) -- mandatory field + return 'simple value','string' -- result + type + end, + description = 'Sample extractor' -- optional + }) + + lua_selectors.register_transform(rspamd_config, "append_string", { + types = {['string'] = true}, -- accepted types + process = function(input, type, args) + return input .. table.concat(args or {}),'string' -- result + type + end, + map_type = 'string', -- can be used in map like invocation, always return 'string' type + description = 'Adds all arguments to the input string' + }) + + local elts = check_selector('get_something.append_string(" and a simple tail")') + assert_not_nil(elts) + assert_rspamd_table_eq({actual = elts, expect = {'simple value and a simple tail'}}) + + local elts = check_selector('get_something.append_string(" and", " a", " simple", " nail")') + assert_not_nil(elts) + assert_rspamd_table_eq({actual = elts, expect = {'simple value and a simple nail'}}) + end) +end) + + +--[=========[ ******************* message ******************* ]=========] +msg = [[ +From: <whoknows@nowhere.com> +To: <nobody@example.com>, <no-one@example.com> +Date: Wed, 19 Sep 2018 14:36:51 +0100 (BST) +Subject: Test subject +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world +]] diff --git a/test/lua/unit/selectors.lua b/test/lua/unit/selectors.lua new file mode 100644 index 0000000..6362e5c --- /dev/null +++ b/test/lua/unit/selectors.lua @@ -0,0 +1,472 @@ +local msg +context("Selectors test", function() + local rspamd_task = require "rspamd_task" + local logger = require "rspamd_logger" + local lua_selectors = require "lua_selectors" + local lua_maps = require "lua_maps" + local test_helper = require "rspamd_test_helper" + local lua_util = require "lua_util" + local cfg = rspamd_config + local task + + test_helper.init_url_parser() + + lua_selectors.maps.test_map = lua_maps.map_add_from_ucl({ + 'key value', + 'key1 value1', + 'key3 value1', + }, 'hash', 'test selectors maps') + + before(function() + local res + res,task = rspamd_task.load_from_string(msg, cfg) + task:set_from_ip("198.172.22.91") + task:set_user("cool user name") + task:set_helo("hello mail") + task:set_request_header("hdr1", "value1") + task:process_message() + task:get_mempool():set_variable("int_var", 1) + task:get_mempool():set_variable("str_var", "str 1") + task:cache_set('cachevar1', 'hello\x00world') + task:cache_set('cachevar2', {'hello', 'world'}) + if not res then + assert_true(false, "failed to load message") + end + end) + + local function check_selector_plain(selector_string) + local sels = lua_selectors.create_selector_closure_fn(nil, cfg, selector_string, nil, + function(_, res, _) return res end) + local elts = sels(task) + return elts + end + + local function check_selector_kv(selector_string) + local sels = lua_selectors.create_selector_closure_fn(nil, cfg, selector_string, nil, + lua_selectors.kv_table_from_pairs) + local elts = sels(task) + return elts + end + + local cases_plain = { + ["ip"] = { + selector = "ip", + expect = {"198.172.22.91"} + }, + + ["header Subject"] = { + selector = "header(Subject)", + expect = {"Second, lower-cased header subject"} + }, + + ["header Subject lower"] = { + selector = "header(Subject).lower", + expect = {"second, lower-cased header subject"} + }, + + ["header Subject lower_utf8"] = { + selector = "header(Subject).lower_utf8", + expect = {"second, lower-cased header subject"} + }, + + ["header full Subject lower"] = { + selector = "header(Subject, 'full').lower", + expect = {{"second, lower-cased header subject", "test subject"}} + }, + + ["header full strong Subject"] = { + selector = "header(Subject, 'full,strong')", + expect = {{"Test subject"}} + }, + + ["header full strong lower-cased Subject"] = { + selector = "header(subject, 'full,strong')", + expect = {{"Second, lower-cased header subject"}} + }, + + ["digest"] = { + selector = "digest", + expect = {"1ac109c58a7d0f5f532100ac14e9f4d9"} + }, + + ["user"] = { + selector = "user", + expect = {"cool user name"} + }, + + ["from"] = { + selector = "from", + expect = {"whoknows@nowhere.com"} + }, + + ["rcpts"] = { + selector = "rcpts", + expect = {{"nobody@example.com", "no-one@example.com"}} + }, + + ["1st rcpts"] = { + selector = "rcpts.nth(1)", + expect = {"nobody@example.com"} + }, + + ["lower rcpts"] = { + selector = "rcpts.lower.first", + expect = {"nobody@example.com"} + }, + + ["first rcpts"] = { + selector = "rcpts.first", + expect = {"nobody@example.com"} + }, + + ["first addr rcpts"] = { + selector = "rcpts:addr.first", + expect = {"nobody@example.com"} + }, + + ["rcpts_uniq_domains"] = { + selector = "rcpts:domain.uniq", + expect = {{"example.com"}} + }, + + ["rcpts_sorted"] = { + selector = "rcpts:addr.sort", + expect = {{"nobody@example.com", "no-one@example.com"}} + }, + + ["to"] = { + selector = "to", + expect = {"nobody@example.com"}}, + + ["attachments"] = { + selector = "attachments", + expect = {{"ce112d07c52ae649f9646f3d0b5aaab5d4834836d771c032d1a75059d31fed84f38e00c0b205918f6d354934c2055d33d19d045f783a62561f467728ebcf0160", + "ce112d07c52ae649f9646f3d0b5aaab5d4834836d771c032d1a75059d31fed84f38e00c0b205918f6d354934c2055d33d19d045f783a62561f467728ebcf0160" + }} + }, + + ["attachments blake2 base32"] = { + selector = "attachments('base32', 'blake2')", + expect = {{"qqr41dwakt3uwhucxmxsypjiifi8er3gzqhyc3r48fw1ij9dp8b8x8nyyscmoe6tpmp1r4eafezguezurazo87ecs48cw5bfm9udyob", + "qqr41dwakt3uwhucxmxsypjiifi8er3gzqhyc3r48fw1ij9dp8b8x8nyyscmoe6tpmp1r4eafezguezurazo87ecs48cw5bfm9udyob" + }} + }, + + ["attachments blake2 base64"] = { + selector = "attachments('base64', 'blake2')", + expect = {{"zhEtB8Uq5kn5ZG89C1qqtdSDSDbXccAy0adQWdMf7YTzjgDAsgWRj201STTCBV0z0Z0EX3g6YlYfRnco688BYA==", + "zhEtB8Uq5kn5ZG89C1qqtdSDSDbXccAy0adQWdMf7YTzjgDAsgWRj201STTCBV0z0Z0EX3g6YlYfRnco688BYA==" + }} + }, + + ["attachments blake2 rfc base32"] = { + selector = "attachments('rbase32', 'blake2')", + expect = {{"ZYIS2B6FFLTET6LEN46QWWVKWXKIGSBW25Y4AMWRU5IFTUY75WCPHDQAYCZALEMPNU2USNGCAVOTHUM5ARPXQOTCKYPUM5ZI5PHQCYA", + "ZYIS2B6FFLTET6LEN46QWWVKWXKIGSBW25Y4AMWRU5IFTUY75WCPHDQAYCZALEMPNU2USNGCAVOTHUM5ARPXQOTCKYPUM5ZI5PHQCYA" + }} + }, + + ["attachments md5 rfc base32"] = { + selector = "attachments('rbase32', 'md5')", + expect = {{"LYXF2IMILRFFO4LLTDTM66MKEA", + "LYXF2IMILRFFO4LLTDTM66MKEA" + }} + }, + + ["attachments id"] = { + selector = "attachments.id", + expect = {""}}, + + ["files"] = { + selector = "files", + expect = {{"f.zip", "f2.zip"}}}, + + ["helo"] = { + selector = "helo", + expect = {"hello mail"}}, + + ["received ip"] = { + selector = "received:by_hostname.filter_string_nils", + expect = {{"server1.chat-met-vreemden.nl", "server2.chat-met-vreemden.nl"}}}, + + ["received by hostname last"] = { + selector = "received:by_hostname.filter_string_nils.last", + expect = {"server2.chat-met-vreemden.nl"} + }, + + ["received by hostname first"] = { + selector = "received:by_hostname.filter_string_nils.first", + expect = {"server1.chat-met-vreemden.nl"} + }, + + ["urls"] = { + selector = "urls", + expect = {{"http://subdomain.example.net"}}}, + + ["emails"] = { + selector = "emails", + expect = {{"test@example.net"}}}, + + ["specific_urls"] = { + selector = "specific_urls({limit = 1})", + expect = {{"http://subdomain.example.net"}}}, + + ["specific_urls + emails"] = { + selector = "specific_urls({need_emails = true, limit = 2})", + expect = {{"test@example.net", "http://subdomain.example.net"}}}, + + -- Broken test as order depends on the hash function internally + --["specific_urls + emails limit"] = { + -- selector = "specific_urls({need_emails = true, limit = 1})", + -- expect = {{"test@example.net"}}}, + + ["pool_var str, default type"] = { + selector = [[pool_var("str_var")]], + expect = {"str 1"}}, + + ["pool_var str"] = { + selector = [[pool_var("str_var", 'string')]], + expect = {"str 1"}}, + + ["pool_var double"] = { + selector = [[pool_var("int_var", 'double')]], + expect = {"1"}}, + + ["time"] = { + selector = "time", + expect = {"1537364211"}}, + +-- ["request_header"] = { +-- selector = "request_header(hdr1)", +-- expect = {"value1"}}, + + ["get_host"] = { + selector = "urls:get_host", + expect = {{"subdomain.example.net"}}}, + + ["get_tld_method"] = { + selector = "urls:get_tld", + expect = {{"example.net"}}}, + ["get_tld_transform"] = { + selector = "urls:get_host.get_tld", + expect = {{"example.net"}}}, + + ["transformation regexp"] = { + selector = "urls:get_tld.regexp('\\.([\\w]+)$')", + expect = {{{".net", "net"}}}}, + + ["transformation id"] = { + selector = "urls:get_tld.id", + expect = {''}}, + + ["transformation id arg"] = { + selector = "urls:get_tld.id('1')", + expect = {'1'}}, + + ["transformation id args"] = { + selector = "urls:get_tld.id('1', '2', '3')", + expect = {{'1', '2', '3'}}}, + + ["transformation in"] = { + selector = "time(message, '!%w').in(2,3,4)", + expect = {'3'}}, + + ["transformation in id"] = { + selector = "time(message, '!%w').in(2,3,4).id", + expect = {''}}, + + ["transformation not in"] = { + selector = "time(message, '!%w').not_in(1,6,7)", + expect = {'3'}}, + + ["transformation in not id"] = { + selector = "time(message, '!%w').not_in(1,6,7).id", + expect = {''}}, + + ["transformation in not id 1"] = { + selector = "time(message, '!%w').not_in(1,6,7).id(1)", + expect = {'1'}}, + + ["transformation take"] = { + selector = "rcpts.take_n(1).lower", + expect = {{'nobody@example.com'}}}, + + ["transformation take 2"] = { + selector = "rcpts.take_n(2).lower", + expect = {{'nobody@example.com', 'no-one@example.com'}}}, + + ["transformation take 3"] = { + selector = "rcpts.take_n(3).lower", + expect = {{'nobody@example.com', 'no-one@example.com'}}}, + + ["transformation nth"] = { + selector = "rcpts.nth(1).lower", + expect = {'nobody@example.com'}}, + + ["transformation nth 2"] = { + selector = "rcpts.nth(2).lower", + expect = {'no-one@example.com'}}, + + ["transformation last"] = { + selector = "rcpts.last.lower", + expect = {'no-one@example.com'}}, + + ["transformation substring"] = { + selector = "header(Subject, strong).substring(6)", + expect = {'subject'}}, + + ["transformation substring 2"] = { + selector = "header(Subject, strong).substring(6, 7)", + expect = {'su'}}, + + ["transformation substring -4"] = { + selector = "header(Subject, strong).substring(-4)", + expect = {'ject'} + }, + ["map filter"] = { + selector = "id('key').filter_map(test_map)", + expect = {'key'} + }, + ["map except"] = { + selector = "list('key', 'key1', 'key2', 'key3', 'key4').except_map(test_map)", + expect = {{'key2', 'key4'}} + }, + ["map apply"] = { + selector = "id('key').apply_map(test_map)", + expect = {'value'} + }, + ["map filter list"] = { + selector = "list('key', 'key1', 'key2').filter_map(test_map)", + expect = {{'key', 'key1'}} + }, + ["map apply list"] = { + selector = "list('key', 'key1', 'key2', 'key3').apply_map(test_map)", + expect = {{'value', 'value1', 'value1'}} + }, + ["map apply list uniq"] = { + selector = "list('key', 'key1', 'key2', 'key3').apply_map(test_map).uniq", + expect = {{'value1', 'value'}} + }, + ["words"] = { + selector = "words('norm')", + expect = {{'hello', 'world', 'mail', 'me'}} + }, + ["words_full"] = { + selector = "words('full'):2", + expect = {{'hello', 'world', '', 'mail', 'me'}} + }, + ["header X-Test first"] = { + selector = "header(X-Test, full).first", + expect = {"1"} + }, + ["header X-Test last"] = { + selector = "header(X-Test, full).last", + expect = {"3"} + }, + ["header lower digest substring"] = { + selector = "header('Subject').lower.digest('hex').substring(1, 16)", + expect = {"736ad5f50fc95d73"} + }, + ["header gsub"] = { + selector = "header('Subject'):gsub('a', 'b')", + expect = {"Second, lower-cbsed hebder subject"} + }, + ["header regexp first"] = { + selector = "header('Subject').regexp('.*').first", + expect = {"Second, lower-cased header subject"} + }, + + ["task cache string"] = { + selector = "task_cache('cachevar1')", + expect = {"hello\x00world"} + }, + ["task cache table"] = { + selector = "task_cache('cachevar2')", + expect = {{"hello", "world"}} + }, + } + + for case_name, case in lua_util.spairs(cases_plain) do + test("plain case " .. case_name, function() + local elts = check_selector_plain(case.selector) + assert_not_nil(elts) + assert_rspamd_table_eq_sorted({actual = elts, expect = case.expect}) + end) + end + + local cases_kv = { + ["ip"] = { + selector = "id('ip');ip", + expect = { ip = "198.172.22.91" } + }, + ["ip+words"] = { + selector = "id('ip');ip;id('words');words('full'):2", + expect = { ip = "198.172.22.91", words = {'hello', 'world', '', 'mail', 'me'} } + }, + } + for case_name, case in lua_util.spairs(cases_kv) do + test("kv case " .. case_name, function() + local elts = check_selector_kv(case.selector) + assert_not_nil(elts) + assert_rspamd_table_eq_sorted({actual = elts, expect = case.expect}) + end) + end +end) + + +--[=========[ ******************* message ******************* ]=========] +msg = [[ +Received: from ca-18-193-131.service1.infuturo.it ([151.18.193.131] helo=User) + by server1.chat-met-vreemden.nl with esmtpa (Exim 4.76) + (envelope-from <upwest201diana@outlook.com>) + id 1ZC1sl-0006b4-TU; Mon, 06 Jul 2015 10:36:08 +0200 +Received: from ca-18-193-131.service2.infuturo.it ([151.18.193.132] helo=User) + by server2.chat-met-vreemden.nl with esmtpa (Exim 4.76) + (envelope-from <upwest201diana@outlook.com>) + id 1ZC1sl-0006b4-TU; Mon, 06 Jul 2015 10:36:08 +0200 +From: <whoknows@nowhere.com> +To: <nobody@example.com>, <no-one@example.com> +Date: Wed, 19 Sep 2018 14:36:51 +0100 (BST) +subject: Second, lower-cased header subject +Subject: Test subject +X-Test: 1 +X-Test: 2 +X-Test: 3 +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://subdomain.example.net">http://subdomain.example.net</a> +<a href="mailto:test@example.net">mail me</a> +</html> + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: application/zip; name=f.zip +Content-Disposition: attachment; size=166; filename=f.zip +Content-Transfer-Encoding: base64 + +UEsDBAoAAAAAAINe6kgAAAAAAAAAAAAAAAAIABwAZmFrZS5leGVVVAkAA8YaglfGGoJXdXgLAAEE +6AMAAAToAwAAUEsBAh4DCgAAAAAAg17qSAAAAAAAAAAAAAAAAAgAGAAAAAAAAAAAALSBAAAAAGZh +a2UuZXhlVVQFAAPGGoJXdXgLAAEE6AMAAAToAwAAUEsFBgAAAAABAAEATgAAAEIAAAAAAA== + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: application/zip; name=f.zip +Content-Disposition: attachment; size=166; filename=f2.zip +Content-Transfer-Encoding: base64 + +UEsDBAoAAAAAAINe6kgAAAAAAAAAAAAAAAAIABwAZmFrZS5leGVVVAkAA8YaglfGGoJXdXgLAAEE +6AMAAAToAwAAUEsBAh4DCgAAAAAAg17qSAAAAAAAAAAAAAAAAAgAGAAAAAAAAAAAALSBAAAAAGZh +a2UuZXhlVVQFAAPGGoJXdXgLAAEE6AMAAAToAwAAUEsFBgAAAAABAAEATgAAAEIAAAAAAA== +]] diff --git a/test/lua/unit/selectors.negative.lua b/test/lua/unit/selectors.negative.lua new file mode 100644 index 0000000..4262400 --- /dev/null +++ b/test/lua/unit/selectors.negative.lua @@ -0,0 +1,113 @@ +local msg +context("Selectors test", function() + local rspamd_task = require "rspamd_task" + local logger = require "rspamd_logger" + local lua_selectors = require "lua_selectors" + local ffi = require "ffi" + local cfg = rspamd_config + + local task + + ffi.cdef[[ + void rspamd_url_init (const char *tld_file); + ]] + + local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1") + + ffi.C.rspamd_url_init(string.format('%s/%s', test_dir, "test_tld.dat")) + + before(function() + local res + res,task = rspamd_task.load_from_string(msg, cfg) + task:set_from_ip("198.172.22.91") + task:set_user("cool user name") + task:set_helo("hello mail") + task:set_request_header("hdr1", "value1") + task:process_message() + task:get_mempool():set_variable("int_var", 1) + task:get_mempool():set_variable("str_var", "str 1") + if not res then + assert_true(false, "failed to load message") + end + end) + + local function check_selector(selector_string) + local sels = lua_selectors.parse_selector(cfg, selector_string) + local elts = lua_selectors.process_selectors(task, sels) + return elts + end + + -- Selectors which should not be parse + local cases = { + ["random string"] = { + selector = "'xxx'"}, + + ["random nonsense"] = { + selector = "13 / sd 42 x"}, + + ["unknown selector"] = { + selector = "unknownselector"}, + + ["unknown transformation"] = { + selector = "urls.somethingnew"}, + } + + for case_name, case in pairs(cases) do + test("case " .. case_name, function() + local sels = lua_selectors.parse_selector(cfg, case.selector) + print(logger.slog("%1", sels)) + assert_nil(sels) + end) + end +end) + + +--[=========[ ******************* message ******************* ]=========] +msg = [[ +Received: from ca-18-193-131.service.infuturo.it ([151.18.193.131] helo=User) + by server.chat-met-vreemden.nl with esmtpa (Exim 4.76) + (envelope-from <upwest201diana@outlook.com>) + id 1ZC1sl-0006b4-TU; Mon, 06 Jul 2015 10:36:08 +0200 +From: <whoknows@nowhere.com> +To: <nobody@example.com>, <no-one@example.com> +Date: Wed, 19 Sep 2018 14:36:51 +0100 (BST) +subject: Second, lower-cased header subject +Subject: Test subject +Content-Type: multipart/alternative; + boundary="_000_6be055295eab48a5af7ad4022f33e2d0_" + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +Hello world + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: text/html; charset="utf-8" + +<html><body> +<a href="http://example.net">http://example.net</a> +<a href="mailto:test@example.net">mail me</a> +</html> + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: application/zip; name=f.zip +Content-Disposition: attachment; size=166; filename=f.zip +Content-Transfer-Encoding: base64 + +UEsDBAoAAAAAAINe6kgAAAAAAAAAAAAAAAAIABwAZmFrZS5leGVVVAkAA8YaglfGGoJXdXgLAAEE +6AMAAAToAwAAUEsBAh4DCgAAAAAAg17qSAAAAAAAAAAAAAAAAAgAGAAAAAAAAAAAALSBAAAAAGZh +a2UuZXhlVVQFAAPGGoJXdXgLAAEE6AMAAAToAwAAUEsFBgAAAAABAAEATgAAAEIAAAAAAA== + + +--_000_6be055295eab48a5af7ad4022f33e2d0_ +Content-Type: application/zip; name=f.zip +Content-Disposition: attachment; size=166; filename=f2.zip +Content-Transfer-Encoding: base64 + +UEsDBAoAAAAAAINe6kgAAAAAAAAAAAAAAAAIABwAZmFrZS5leGVVVAkAA8YaglfGGoJXdXgLAAEE +6AMAAAToAwAAUEsBAh4DCgAAAAAAg17qSAAAAAAAAAAAAAAAAAgAGAAAAAAAAAAAALSBAAAAAGZh +a2UuZXhlVVQFAAPGGoJXdXgLAAEE6AMAAAToAwAAUEsFBgAAAAABAAEATgAAAEIAAAAAAA== +]] diff --git a/test/lua/unit/smtp_addr.lua b/test/lua/unit/smtp_addr.lua new file mode 100644 index 0000000..2cb7755 --- /dev/null +++ b/test/lua/unit/smtp_addr.lua @@ -0,0 +1,110 @@ +-- SMTP address parser tests + +context("SMTP address check functions", function() + local logger = require("rspamd_logger") + local ffi = require("ffi") + local util = require("rspamd_util") + local fun = require "fun" + ffi.cdef[[ + struct rspamd_email_address { + const char *raw; + const char *addr; + const char *user; + const char *domain; + const char *name; + + unsigned raw_len; + unsigned addr_len; + unsigned domain_len; + uint16_t user_len; + unsigned char flags; + }; + struct rspamd_email_address * rspamd_email_address_from_smtp (const char *str, unsigned len); + void rspamd_email_address_free (struct rspamd_email_address *addr); + ]] + + local cases_valid = { + {'<>', {addr = ''}}, + {'<a@example.com>', {user = 'a', domain = 'example.com', addr = 'a@example.com'}}, + {'<a-b@example.com>', {user = 'a-b', domain = 'example.com', addr = 'a-b@example.com'}}, + {'<a-b@ex-ample.com>', {user = 'a-b', domain = 'ex-ample.com', addr = 'a-b@ex-ample.com'}}, + {'1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370@example.220-volt.ru', + {user = '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370', + domain = 'example.220-volt.ru', + addr = '1367=dec2a6ce-81bd-4fa9-ad02-ec5956466c04=9=1655370@example.220-volt.ru'}}, + {'notification+kjdm---m7wwd@facebookmail.com', {user = 'notification+kjdm---m7wwd'}}, + {'a@example.com', {user = 'a', domain = 'example.com', addr = 'a@example.com'}}, + {'a+b@example.com', {user = 'a+b', domain = 'example.com', addr = 'a+b@example.com'}}, + {'"a"@example.com', {user = 'a', domain = 'example.com', addr = 'a@example.com'}}, + {'"a+b"@example.com', {user = 'a+b', domain = 'example.com', addr = 'a+b@example.com'}}, + {'"<>"@example.com', {user = '<>', domain = 'example.com', addr = '<>@example.com'}}, + {'<"<>"@example.com>', {user = '<>', domain = 'example.com', addr = '<>@example.com'}}, + {'"\\""@example.com', {user = '"', domain = 'example.com', addr = '"@example.com'}}, + {'"\\"abc"@example.com', {user = '"abc', domain = 'example.com', addr = '"abc@example.com'}}, + {'<@domain1,@domain2,@domain3:abc@example.com>', + {user = 'abc', domain = 'example.com', addr = 'abc@example.com'}}, + + } + + + fun.each(function(case) + test("Parse valid smtp addr: " .. case[1], function() + local st = ffi.C.rspamd_email_address_from_smtp(case[1], #case[1]) + + assert_not_nil(st, "should be able to parse " .. case[1]) + + fun.each(function(k, ex) + if k == 'user' then + local str = ffi.string(st.user, st.user_len) + assert_equal(str, ex) + elseif k == 'domain' then + local str = ffi.string(st.domain, st.domain_len) + assert_equal(str, ex) + elseif k == 'addr' then + local str = ffi.string(st.addr, st.addr_len) + assert_equal(str, ex) + end + end, case[2]) + ffi.C.rspamd_email_address_free(st) + end) + end, cases_valid) + + local cases_invalid = { + 'a', + 'a"b"@example.com', + 'a"@example.com', + '"a@example.com', + '<a@example.com', + 'a@example.com>', + '<a@.example.com>', + '<a@example.com>>', + '<a@example.com><>', + } + + fun.each(function(case) + test("Parse invalid smtp addr: " .. case, function() + local st = ffi.C.rspamd_email_address_from_smtp(case, #case) + + assert_nil(st, "should not be able to parse " .. case) + end) + end, cases_invalid) + + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + test("Speed test", function() + local case = '<@domain1,@domain2,@domain3:abc%d@example.com>' + local niter = 100000 + local total = 0 + + for i = 1,niter do + local ncase = string.format(case, i) + local t1 = util.get_ticks() + local st = ffi.C.rspamd_email_address_from_smtp(ncase, #ncase) + local t2 = util.get_ticks() + ffi.C.rspamd_email_address_free(st) + total = total + t2 - t1 + end + + print(string.format('Spend %f seconds in processing addrs', total)) + end) + end +end) diff --git a/test/lua/unit/smtp_date.lua b/test/lua/unit/smtp_date.lua new file mode 100644 index 0000000..aa8fbce --- /dev/null +++ b/test/lua/unit/smtp_date.lua @@ -0,0 +1,58 @@ +context("SMTP date functions", function() + local rspamd_util = require "rspamd_util" + + local cases = { + { 'Mon, 05 Oct 2020 19:05:57 -0000', 1601924757 }, + -- space instead of leading zero + { 'Mon, 5 Oct 2020 19:05:57 -0000', 1601924757 }, + -- no padding + { 'Mon, 5 Oct 2020 19:05:57 -0000', 1601924757 }, + -- no weekday + { '5 Oct 2020 19:05:57 -0000', 1601924757 }, + -- different TZ offsets + { 'Tue, 22 Sep 2020 00:03:14 -0800', 1600761794 }, + { 'Fri, 02 Oct 2020 20:00:40 +0100', 1601665240 }, + { 'Mon, 5 Oct 2020 15:48:32 +0530', 1601893112 }, + { 'Mon, 05 Oct 2020 10:30:36 +1200', 1601850636 }, + -- extra comment + { 'Thu, 18 May 2006 16:08:11 +0400 (MSD)', 1147954091 }, + { 'Thu, 18 May 2006 16:08:11 +0400', 1147954091 }, + -- obs_zone + { 'Sat, 26 Sep 2020 17:36:21 GMT', 1601141781 }, + { 'Sat, 26 Sep 2020 17:36:21 UT', 1601141781 }, + { 'Sat, 26 Sep 2020 17:36:21 +0000', 1601141781 }, + { 'Wed, 30 Sep 2020 20:32:31 EDT', 1601512351 }, + { 'Wed, 30 Sep 2020 20:32:31 -0400', 1601512351 }, + { 'Wed, 30 Sep 2020 17:32:31 PDT', 1601512351 }, + { 'Wed, 30 Sep 2020 17:32:31 -0700', 1601512351 }, + -- 2 digit year < 50 + { 'Mon, 05 Oct 20 06:35:38 GMT', 1601879738 }, + { 'Mon, 05 Oct 2020 06:35:38 GMT', 1601879738 }, + -- 2 digit year >= 50 + { '26 Aug 76 14:30 EDT', 209932200 }, + { '26 Aug 1976 14:30 EDT', 209932200 }, + -- Year 2038 problem (broken on 32-bit systems, see #4754) + --{ 'Tue, 19 Jan 2038 03:14:07 GMT', 2 ^ 31 - 1 }, + --{ 'Tue, 19 Jan 2038 03:14:09 GMT', 2 ^ 31 + 1 }, + -- double space before TZ + { 'Sat, 29 Aug 2020 08:25:15 +0700', 1598664315 }, + -- XXX timestamp corresponding to Sat Dec 30 00:00:00 GMT 1899 returned on error + --{'Sat, Dec 30 1899 00:00:00 GMT', -2209161600}, + -- Invalid format + { 'Mon Oct 5 20:29:23 BST 2020', nil }, + -- Wrong date + { '32 Jan 2020 00:00 GMT', nil }, + -- Wrong time + { '1 Jan 2020 25:00 GMT', nil } + } + + for _, case in ipairs(cases) do + test("Parse date: " .. case[1], function() + local timestamp = rspamd_util.parse_smtp_date(case[1]) + assert_rspamd_eq({ + expect = case[2], + actual = timestamp + }) + end) + end +end)
\ No newline at end of file diff --git a/test/lua/unit/sqlite3.lua b/test/lua/unit/sqlite3.lua new file mode 100644 index 0000000..c431258 --- /dev/null +++ b/test/lua/unit/sqlite3.lua @@ -0,0 +1,50 @@ +context("Sqlite3 API", function() + local sqlite3 = require "rspamd_sqlite3" + local tmpdir = os.getenv("TMPDIR") or "/tmp" + + test("Sqlite3 open", function() + os.remove(tmpdir .. '/rspamd_unit_test_sqlite3.sqlite') + local db = sqlite3.open(tmpdir .. '/rspamd_unit_test_sqlite3.sqlite') + assert_not_nil(db, "should be able to create sqlite3 db") + db = sqlite3.open('/non/existent/path/rspamd_unit_test_sqlite3.sqlite') + assert_nil(db, "should not be able to create sqlite3 db") + os.remove(tmpdir .. '/rspamd_unit_test_sqlite3.sqlite') + end) + + test("Sqlite3 query", function() + os.remove(tmpdir .. '/rspamd_unit_test_sqlite3-1.sqlite') + local db = sqlite3.open(tmpdir .. '/rspamd_unit_test_sqlite3-1.sqlite') + assert_not_nil(db, "should be able to create sqlite3 db") + + local ret = db:sql([[ + CREATE TABLE x (id INT, value TEXT); + ]]) + assert_true(ret, "should be able to create table") + local ret = db:sql([[ + INSERT INTO x VALUES (?1, ?2); + ]], 1, 'test') + assert_true(ret, "should be able to insert row") + os.remove(tmpdir .. '/rspamd_unit_test_sqlite3-1.sqlite') + end) + + test("Sqlite3 rows", function() + os.remove(tmpdir .. '/rspamd_unit_test_sqlite3-2.sqlite') + local db = sqlite3.open(tmpdir .. '/rspamd_unit_test_sqlite3-2.sqlite') + assert_not_nil(db, "should be able to create sqlite3 db") + + local ret = db:sql([[ + CREATE TABLE x (id INT, value TEXT); + ]]) + assert_true(ret, "should be able to create table") + local ret = db:sql([[ + INSERT INTO x VALUES (?1, ?2); + ]], 1, 'test') + assert_true(ret, "should be able to insert row") + + for row in db:rows([[SELECT * FROM x;]]) do + assert_equal(row.id, '1') + assert_equal(row.value, 'test') + end + os.remove(tmpdir .. '/rspamd_unit_test_sqlite3-2.sqlite') + end) +end)
\ No newline at end of file diff --git a/test/lua/unit/task.lua b/test/lua/unit/task.lua new file mode 100644 index 0000000..0739a2b --- /dev/null +++ b/test/lua/unit/task.lua @@ -0,0 +1,162 @@ +context("Task processing", function() + local fun = require("fun") + local rspamd_task = require("rspamd_task") + + test("Process a simple task", function() + --local cfg = rspamd_util.config_from_ucl(config) + --assert_not_nil(cfg) + + local msg = [[ +From: <> +To: <nobody@example.com> +Subject: test +Content-Type: text/plain + +Test. +]] + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + task:destroy() + end) + + local hdrs = [[ +From: <> +To: <nobody@example.com> +Subject: test +]] + local mpart = [[ +Content-Type: multipart/mixed; boundary=XXX +]] + local body = [[ +Content-Type: text/html +Content-Transfer-Encoding: quoted-printable + +<html> +<body> +=0DAttached is your new documents. +<br> +<a href=3D"http://evil.com/Information/">http:= +//example.com/privacy/XXX/YYY_April_25_2019.doc</a> +<br> +<br> +<br> +Thank you, +<br> +<b>Haloclaims.co</b> +</body></html> +]] + test("Process mime nesting: simple", function() + local msg = hdrs .. body + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + assert_rspamd_table_eq_sorted({actual = fun.totable(fun.map(function(u) + return u:get_host() + end, task:get_urls())), expect = { + 'evil.com', 'example.com' + }}) + task:destroy() + end) + test("Process mime nesting: multipart", function() + local msg = table.concat{ + hdrs, mpart, '\n', '--XXX\n', body, '\n--XXX--\n' + } + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + assert_rspamd_table_eq_sorted({ + actual = fun.totable(fun.map(function(u) + return u:get_host() + end, task:get_urls())), + + expect = { + 'evil.com', 'example.com' + }}) + task:destroy() + end) + test("Process mime nesting: multipart, broken", function() + local msg = table.concat{ + hdrs, mpart, '\n', '--XXX\n', 'garbadge\n', '\n--XXX--\n', '--XXX\n', body + } + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + assert_rspamd_table_eq_sorted({ + actual = fun.totable(fun.map(function(u) + return u:get_host() + end, task:get_urls())), + + expect = { + 'evil.com', 'example.com' + }}) + + task:destroy() + end) + test("Process mime nesting: message", function() + local msg = table.concat{ + hdrs, 'Content-Type: message/rfc822\n', '\n', hdrs, body + } + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + assert_rspamd_table_eq_sorted({ + actual = fun.totable(fun.map(function(u) + return u:get_host() + end, task:get_urls())), + + expect = { + 'evil.com', 'example.com' + }}) + + task:destroy() + end) + test("Process mime nesting: message in multipart", function() + local msg = table.concat{ + hdrs, mpart, '\n', + '--XXX\n', + 'Content-Type: message/rfc822\n', '\n', hdrs, body , + '\n--XXX--\n', + } + + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + assert_rspamd_table_eq_sorted({ + actual = fun.totable(fun.map(function(u) + return u:get_host() + end, task:get_urls())), + + expect = { + 'evil.com', 'example.com' + }}) + + task:destroy() + end) + test("Process mime nesting: multipart message in multipart", function() + local msg = table.concat{ + hdrs, mpart, '\n', + '--XXX\n', + 'Content-Type: message/rfc822\n', '\n', hdrs, mpart, '\n', + + '--XXX\n', + body , + '\n--XXX--\n', + + '\n--XXX--\n', + } + local res,task = rspamd_task.load_from_string(msg) + assert_true(res, "failed to load message") + task:process_message() + assert_rspamd_table_eq_sorted({ + actual = fun.totable(fun.map(function(u) + return u:get_host() + end, task:get_urls())), + + expect = { + 'evil.com', 'example.com' + }}) + + task:destroy() + end) +end)
\ No newline at end of file diff --git a/test/lua/unit/test.data b/test/lua/unit/test.data new file mode 100644 index 0000000..696972e --- /dev/null +++ b/test/lua/unit/test.data @@ -0,0 +1,10 @@ +RLvXs8ZWOYXidwy4RSErSJFAGiRhimvMhHNIOzbxkkDC1IQz03tf9jvglA45PXAb +AyYIMAlMn1DrRCwsGKV/u8EEjkO34ujwirJ6ytbiZkjTnANBhGtZdjMCfsEUIY9a +y35d3CeKZF9KaRdlWRDJdfBbZE9mn4rSUQ1X0+HweUZ3AmMHwWLa9SB+ii7ysEEl ++6QLqHczu7K0Ji3LVKI+NzPJOWmWWCHjJyhs8HsuHpUrJ3iSeLxfW0TD8x6eZ52C +EWC0BbR32vtquw8r5O+yR6hbBUJj8mTqTs3yAaTEs8Q+7y5uFuGsv+0NrmEOASyT +NvGaxODKLO1A/8kXXsko3I3hZOoi+9GG/eAncMRWtdwllE/KqZfp9uzi5aYh1MMb +px4SFqH5FQfvveZwGgEl9+BCkRQIptqv1fMlWouy35n5AeHkfflyyA4wC6iwgJAL +R5R95Y8y2UPWoRkB+HFvoEryCNrkdC1QmW07n5shHO9NzNk34tQIzfjvYwcPi2yy +3e/YNr3jyKOs86jTK6z9M/4htai/OxuF34rGS9pau/NINrDOpCNNy4zDgsQkvm5l +H4CzhH5tNvYaog== diff --git a/test/lua/unit/test_tld.dat b/test/lua/unit/test_tld.dat new file mode 100644 index 0000000..da12d76 --- /dev/null +++ b/test/lua/unit/test_tld.dat @@ -0,0 +1,20 @@ +com +org +net +рф +za.org +xn--p1ai +ac +b.br +co +co.za +in.net +star.kawasaki.jp +net.in +star.nom.br +org.ac +ru.com +za.net +za.org +org.za +tk diff --git a/test/lua/unit/testkey.pub b/test/lua/unit/testkey.pub new file mode 100644 index 0000000..6407aa0 --- /dev/null +++ b/test/lua/unit/testkey.pub @@ -0,0 +1,9 @@ +-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAxUYMGsqMbNZl4vw65Afi +vuC5rXDzbvP8zqj96L8t9M/2bV7Df1k4Pit+TKBczhs3HolQStq46AmrhoyNbLJx +yaA8g+8ETXOhHzQUR74ud/xQaPqx02E02kbR3LnQTp/wdrJARMAB8CsPm8X2wrpF +CRus+DMdDGWQXV3RFc0FbeYFMehn46k3+5dB96Y3Wh4cK3/aS2zpR2ddynN6vBaW +sSTNfadGbUtIodZgl50ecdyVeExmL/H9HWhcafcNJVUeI0jd79Px90puB1auK6fu +MVinDv2zJL3HIbz3qUTRAlVHdmphf/UoRq0hkZmnbTR0v9eC0FDwJV/XKspicJbv +1QIDAQAB +-----END PUBLIC KEY----- diff --git a/test/lua/unit/testkey.sec b/test/lua/unit/testkey.sec new file mode 100644 index 0000000..4a0325b --- /dev/null +++ b/test/lua/unit/testkey.sec @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEAxUYMGsqMbNZl4vw65AfivuC5rXDzbvP8zqj96L8t9M/2bV7D +f1k4Pit+TKBczhs3HolQStq46AmrhoyNbLJxyaA8g+8ETXOhHzQUR74ud/xQaPqx +02E02kbR3LnQTp/wdrJARMAB8CsPm8X2wrpFCRus+DMdDGWQXV3RFc0FbeYFMehn +46k3+5dB96Y3Wh4cK3/aS2zpR2ddynN6vBaWsSTNfadGbUtIodZgl50ecdyVeExm +L/H9HWhcafcNJVUeI0jd79Px90puB1auK6fuMVinDv2zJL3HIbz3qUTRAlVHdmph +f/UoRq0hkZmnbTR0v9eC0FDwJV/XKspicJbv1QIDAQABAoIBAEmUQteLTK0bmoz6 +/wwmVNBVCWxDgMiVgGmkZm/1PrLdDlDk044gPPYTStxRw8usIvbkyGnjAqypTqy0 +p9svA3nspiWfdL9erW3yAs5vhO2D0ooVV1Y8H3Z6i7QEKknpJctf2NDLvO1TYlL7 +l3ox96XaCL3acq85AouQfnffLHM8e0sCj5zc2gDIAz2Vjh5eTR0qIPHNxVL1xeD0 +KGnhTz4WveHraoa3ARzB+fDskilLSdCHrvn8SjaotlFwcSIHVc6ymutBxC/wFIu9 +0O6YamR6b8J1smkVyi+UcIGrXfeLndm3t5jLhmhMJC9D350XhFRLKfoGSTweD1r7 +yGFgDtECgYEA/PRzBeZIcydKQDuFsrItMwrtno0xA9WOJMbKx1QXggX907lJqyZI +CjeP8GLO7YgXxibHuH8HUC9dy8K33GyZ9YjLkg3f3DZwSUIHwJhKvTbYHDs53oeZ +4Py5HrDRXYuIZJMpWUCJYEmRsiG1aNL1rTI1V6BrqjUuvu6BXAjKunMCgYEAx6YA +Ly5i6s7Vcp2/Sm1p+6YhOr0IVjWeJYxJzUYhQKk/EEw/pjsOuVPqH2ikGXnnIOIn +jveNwfhxEFEL+MU1vEAVOHegDlzP4AdDUSXYV61UN+oJPGRqtByROyR6/s6hbhRr +HpzOhzndg61N3XzN80pG56UwhLVs1nRjIVRlspcCgYAIkY9D5+UUYYRWYK3Ku9Zk +ID6kXEwIwTcrb8B2uBaDxQgwH9qq/YT7M56gmfhAe5eykqW5TjRFNxWKTXJE+TjN +5HBg5i9rGjz7fk0c7Qu7FRyE+EyhiR5hTK5Ip9yvuoZIQePorhL5PAS/b/zhLCQ+ +VbEQ4tJC1cJbnWCsaW/UmQKBgQCH8Xe0qMBAJKBg0BseAcylxuRfi7HuicnqxVDH +jtY7okLHxTOd7B7FgOctheIfWstPr87B4bzL9HCAbL9bIGXLjlMkxQfeX8JISInE +6qEaanKrNBgf4Dxr8fvOKrP5ZxeyzgJ2sM1MdNFpxQr9IutVmyEWwHt+Ec9PY6bQ +Xhh1dwKBgBAGqEz10PnWmJWYylP1wgxNSltS3kGQWP/vqsL7xo4NV0TOYbsu8Iun +MvLD3C5sSqTD7ycOiweglIFLaZCWtKEp+01WiEsceQ6G9mZ131Zb1uzdp+fmykyi +IL7R1kM99vpYh0JMj9l8AyNeHG2MKxriOdcDiAOolVxCjYBlnsD0 +-----END RSA PRIVATE KEY----- diff --git a/test/lua/unit/tokenizer.lua b/test/lua/unit/tokenizer.lua new file mode 100644 index 0000000..fbf7ee3 --- /dev/null +++ b/test/lua/unit/tokenizer.lua @@ -0,0 +1,81 @@ +context("Text tokenization test", function() + local util = require "rspamd_util" + local logger = require "rspamd_logger" + + local cases = { + {"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer mattis, nibh", + {"Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", + "Integer", "mattis", "nibh" + } + }, + {"Հետաքրքրվողների համար ոտորև ներկայացված", + {"Հետաքրքրվողների", "համար", "ոտորև", "ներկայացված"} + }, + {"", {}}, + {",,,,,", {}}, + {"word,,,,,word ", {"word", "word"}}, + {"word", {"word"}}, + {",,,,word,,,", {"word"}} + } + + for i,c in ipairs(cases) do + test("Tokenize simple " .. i, function() + local w = util.tokenize_text(c[1]) + if #c[2] == 0 then + assert_equal(#w, 0, "must not have tokens " .. c[1]) + else + assert_not_nil(w, "must tokenize " .. c[1]) + + for i,wrd in ipairs(w) do + assert_equal(wrd, c[2][i]) + end + end + end) + end + + cases = { + {"word https://example.com/path word", + {{5, 24}}, + {"word", "!!EX!!", "word"} + }, + {"համար https://example.com/path համար", + {{11, 24}}, + {"համար", "!!EX!!", "համար"} + }, + {"word https://example.com/path https://example.com/path word", + {{5, 24}, {30, 24}}, + {"word", "!!EX!!", "!!EX!!", "word"} + }, + {"word https://example.com/path https://example.com/path", + {{5, 24}, {30, 24}}, + {"word", "!!EX!!", "!!EX!!"} + }, + {"https://example.com/path https://example.com/path word", + {{0, 24}, {25, 24}}, + {"!!EX!!", "!!EX!!", "word"} + }, + {"https://example.com/path https://example.com/path", + {{0, 24}, {25, 24}}, + {"!!EX!!", "!!EX!!"} + }, + {",,,,https://example.com/path https://example.com/path ", + {{4, 24}, {29, 24}}, + {"!!EX!!", "!!EX!!"} + }, + } + + for i,c in ipairs(cases) do + test("Tokenize with exceptions " .. i, function() + local w = util.tokenize_text(c[1], c[2]) + if #c[3] == 0 then + assert_equal(#w, 0, "must not have tokens " .. c[1]) + else + assert_not_nil(w, "must tokenize " .. c[1]) + for i,wrd in ipairs(w) do + assert_equal(wrd, c[3][i]) + end + end + end) + end + +end)
\ No newline at end of file diff --git a/test/lua/unit/trie.lua b/test/lua/unit/trie.lua new file mode 100644 index 0000000..9532eae --- /dev/null +++ b/test/lua/unit/trie.lua @@ -0,0 +1,81 @@ +-- Trie search tests + +context("Trie search functions", function() + local t = require "rspamd_trie" + local logger = require "rspamd_logger" + local patterns = { + 'test', + 'est', + 'he', + 'she', + 'str\1ing' + } + + local trie = t.create(patterns) + + local cases = { + {'test', true, {{4, 1}, {4, 2}}}, + {'she test test', true, {{3, 4}, {3, 3}, {8, 1}, {8, 2}, {13, 1}, {13, 2}}}, + {'non-existent', false}, + {'str\1ing test', true, {{7, 5}, {12, 1}, {12, 2}}}, + } + + local function cmp_tables(t1, t2) + if t1[2] ~= t2[2] then + return t1[2] < t2[2] + else + return t1[1] < t2[1] + end + end + + for i,c in ipairs(cases) do + test("Trie search " .. i, function() + local res = {} + local function cb(idx, pos) + table.insert(res, {pos, idx}) + + return 0 + end + + ret = trie:match(c[1], cb) + + assert_equal(c[2], ret, tostring(c[2]) .. ' while matching ' .. c[1]) + + if ret then + table.sort(c[3], cmp_tables) + table.sort(res, cmp_tables) + assert_rspamd_table_eq({ + expect = c[3], + actual = res + }) + end + end) + end + + for i,c in ipairs(cases) do + test("Trie search, table version " .. i, function() + local match = {} + + match = trie:match(c[1]) + + assert_equal(c[2], #match > 0, tostring(c[2]) .. ' while matching ' .. c[1]) + + if match and #match > 0 then + local res = {} + -- Convert to something that this test expects + for pat,hits in pairs(match) do + for _,pos in ipairs(hits) do + table.insert(res, {pos, pat}) + end + end + table.sort(c[3], cmp_tables) + table.sort(res, cmp_tables) + assert_rspamd_table_eq({ + expect = c[3], + actual = res + }) + end + end) + end + +end) diff --git a/test/lua/unit/url.lua b/test/lua/unit/url.lua new file mode 100644 index 0000000..52b88d2 --- /dev/null +++ b/test/lua/unit/url.lua @@ -0,0 +1,253 @@ +-- URL parser tests + +context("URL check functions", function() + local mpool = require("rspamd_mempool") + local lua_urls_compose = require "lua_urls_compose" + local url = require("rspamd_url") + local lua_util = require("lua_util") + local logger = require("rspamd_logger") + local test_helper = require("rspamd_test_helper") + local ffi = require("ffi") + + ffi.cdef [[ + void rspamd_normalize_path_inplace(char *path, size_t len, size_t *nlen); + ]] + + test_helper.init_url_parser() + + local pool = mpool.create() + + local cases = { + { "test.com", { "test.com", nil } }, + { " test.com", { "test.com", nil } }, + { "<test.com> text", { "test.com", nil } }, + { "test.com. text", { "test.com", nil } }, + { "mailto:A.User@example.com text", { "example.com", "A.User" } }, + { "http://Тест.Рф:18 text", { "тест.рф", nil } }, + { "http://user:password@тест2.РФ:18 text", { "тест2.рф", "user" } }, + { "somebody@example.com", { "example.com", "somebody" } }, + { "https://127.0.0.1/abc text", { "127.0.0.1", nil } }, + { "https:\\\\127.0.0.1/abc text", { "127.0.0.1", nil } }, + { "https:\\\\127.0.0.1", { "127.0.0.1", nil } }, + { "https://127.0.0.1 text", { "127.0.0.1", nil } }, + { "https://[::1]:1", { "::1", nil } }, + { "https://user:password@[::1]:1", { "::1", nil } }, + { "https://user:password@[::1]", { "::1", nil } }, + { "https://user:password@[::1]/1", { "::1", nil } }, + } + + for i, c in ipairs(cases) do + local res = url.create(pool, c[1]) + + test("Extract urls from text" .. i, function() + assert_not_nil(res, "cannot parse " .. c[1]) + local t = res:to_table() + --local s = logger.slog("%1 -> %2", c[1], t) + --print(s) + assert_not_nil(t, "cannot convert to table " .. c[1]) + assert_equal(c[2][1], t['host'], + logger.slog('expected host "%s", but got "%s" in url %s => %s', + c[2][1], t['host'], c[1], t)) + + if c[2][2] then + assert_equal(c[2][1], t['host'], + logger.slog('expected user "%s", but got "%s" in url %s => %s', + c[2][1], t['host'], c[1], t)) + end + end) + end + + cases = { + { [[http://example.net/path/]], true, { + host = 'example.net', path = 'path/' + } }, + { 'http://example.net/hello%20world.php?arg=x#fragment', true, { + host = 'example.net', fragment = 'fragment', query = 'arg=x', + path = 'hello world.php', + } }, + { 'http://example.net/?arg=%23#fragment', true, { + host = 'example.net', fragment = 'fragment', query = 'arg=#', + } }, + { "http:/\\[::eeee:192.168.0.1]/#test", true, { + host = '::eeee:c0a8:1', fragment = 'test' + } }, + { "http:/\\[::eeee:192.168.0.1]#test", true, { + host = '::eeee:c0a8:1', fragment = 'test' + } }, + { "http:/\\[::eeee:192.168.0.1]?test", true, { + host = '::eeee:c0a8:1', query = 'test' + } }, + { "http:\\\\%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01 + host = '192.168.0.1', + } }, + { "http:/\\www.google.com/foo?bar=baz#", true, { + host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com' + } }, + { "http://[www.google.com]/", true, { + host = 'www.google.com', + } }, + { "<test.com", true, { + host = 'test.com', tld = 'test.com', + } }, + { "test.com>", false }, + { ",test.com text", false }, + { "ht\ttp:@www.google.com:80/;p?#", false }, + { "http://user:pass@/", false }, + { "http://foo:-80/", false }, + { "http:////////user:@google.com:99?foo", true, { + host = 'google.com', user = 'user', port = 99, query = 'foo' + } }, + { "http://%25DOMAIN:foobar@foodomain.com/", true, { + host = 'foodomain.com', user = '%25DOMAIN' + } }, + { "http://0.0xFFFFFF", true, { + host = '0.255.255.255' + } }, + --{"http:/\\030052000001", true, { + -- host = '192.168.0.1' + --}}, + { "http:\\/0xc0.052000001", true, { + host = '192.168.0.1' + } }, + { "http://192.168.0.1.?foo", true, { + host = '192.168.0.1', query = 'foo', + } }, + { "http://twitter.com#test", true, { + host = 'twitter.com', fragment = 'test' + } }, + { "http:www.twitter.com#test", true, { + host = 'www.twitter.com', fragment = 'test' + } }, + { "http://example。com#test", true, { + host = 'example.com', fragment = 'test' + } }, + { "http://hoho.example。com#test", true, { + host = 'hoho.example.com', fragment = 'test' + } }, + { "http://hoho。example。com#test", true, { + host = 'hoho.example.com', fragment = 'test' + } }, + { "http://hoho.example。com#test", true, { + host = 'hoho.example.com', fragment = 'test' + } }, + { "http://hehe。example。com#test", true, { + host = 'hehe.example.com', fragment = 'test' + } }, + { "http:////$%^&****((@example.org//#f@f", true, { + user = '$%^&****((', host = 'example.org', fragment = 'f@f' + } }, + { "http://@@example.com", true, { + user = "@", host = "example.com" + } }, + { "https://example.com\\_Resources\\ClientImages\\UserData?ol\\o#ololo\\", true, { + host = "example.com", path = "_Resources\\ClientImages\\UserData", + query = "ol\\o", fragment = "ololo\\", + } }, + { + "http://0x3f8f29a4/pro/au.html", true, { + host = "63.143.41.164", + path = "pro/au.html", + } }, + { + "http://localhost", true, { + host = "localhost", + tld = "localhost", + } }, + { + "http://localhost.", true, { + host = "localhost.", + tld = "localhost", + } }, + } + + -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc + for i, c in ipairs(cases) do + local res = url.create(pool, c[1]) + + test("Parse url: " .. c[1], function() + if c[2] then + assert_not_nil(res, "we are able to parse url: " .. c[1]) + + local uf = res:to_table() + + for k, v in pairs(c[3]) do + assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v) + assert_equal(uf[k], v, logger.slog('expected " %s ", for %s, but got " %s " in url %s => %s', + v, k, uf[k], c[1], uf)) + end + for k, v in pairs(uf) do + if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then + assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1]) + end + end + else + assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res)) + end + end) + end + + cases = { + { "/././foo", "/foo" }, + { "/a/b/c/./../../g", "/a/g" }, + { "/./.foo", "/.foo" }, + { "/foo/.", "/foo/" }, + { "/foo/./", "/foo/" }, + { "/foo/bar/..", "/foo" }, + { "/foo/bar/../", "/foo/" }, + { "/foo/..bar", "/foo/..bar" }, + { "/foo/bar/../ton", "/foo/ton" }, + { "/foo/bar/../ton/../../a", "/a" }, + { "/foo/../../..", "/" }, + { "/foo/../../../ton", "/ton" }, + { "////../..", "/" }, + { "./", "" }, + { "/./", "/" }, + { "/./././././././", "/" }, + { "/", "/" }, + { "/a/b", "/a/b" }, + { "/a/b/", "/a/b/" }, + { "..", "/" }, + { "/../", "/" }, + { "../", "/" }, + { "///foo", "/foo" }, + } + + for i, v in ipairs(cases) do + test(string.format("Normalize paths '%s'", v[1]), function() + local buf = ffi.new("uint8_t[?]", #v[1]) + local sizbuf = ffi.new("size_t[1]") + ffi.copy(buf, v[1], #v[1]) + ffi.C.rspamd_normalize_path_inplace(buf, #v[1], sizbuf) + local res = ffi.string(buf, tonumber(sizbuf[0])) + assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1]) + end) + end + + cases = { + { 'example.com', 'example.com' }, + { 'baz.example.com', 'baz.example.com' }, + { '3.baz.example.com', 'baz.example.com' }, + { 'bar.example.com', 'example.com' }, + { 'foo.example.com', 'foo.example.com' }, + { '3.foo.example.com', '3.foo.example.com' }, + { 'foo.com', 'foo.com' }, + { 'bar.foo.com', 'foo.com' }, + } + + local excl_rules1 = { + 'example.com', + '*.foo.example.com', + '!bar.example.com' + } + + local comp_rules = lua_urls_compose.inject_composition_rules(rspamd_config, excl_rules1) + + for _, v in ipairs(cases) do + test("URL composition " .. v[1], function() + local u = url.create(pool, v[1]) + assert_not_nil(u, "we are able to parse url:" .. v[1]) + local res = comp_rules:process_url(nil, u:get_tld(), u:get_host()) + assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1]) + end) + end +end) diff --git a/test/lua/unit/utf.lua b/test/lua/unit/utf.lua new file mode 100644 index 0000000..dbdab7f --- /dev/null +++ b/test/lua/unit/utf.lua @@ -0,0 +1,207 @@ +-- Test utf routines + +context("UTF8 check functions", function() + local ffi = require("ffi") + ffi.cdef[[ + unsigned int rspamd_str_lc_utf8 (char *str, unsigned int size); + unsigned int rspamd_str_lc (char *str, unsigned int size); + void rspamd_fast_utf8_library_init (unsigned flags); + void ottery_rand_bytes(void *buf, size_t n); + double rspamd_get_ticks(int allow); + size_t rspamd_fast_utf8_validate (const unsigned char *data, size_t len); + size_t rspamd_fast_utf8_validate_ref (const unsigned char *data, size_t len); + size_t rspamd_fast_utf8_validate_sse41 (const unsigned char *data, size_t len); + size_t rspamd_fast_utf8_validate_avx2 (const unsigned char *data, size_t len); + char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen, void *); + ]] + + local cases = { + {"АбЫрвАлг", "абырвалг"}, + {"АAБBвc", "аaбbвc"}, + --{"STRASSE", "straße"}, XXX: NYI + {"KEÇİ", "keçi"}, + } + + for i,c in ipairs(cases) do + test("UTF lowercase " .. tostring(i), function() + local buf = ffi.new("char[?]", #c[1] + 1) + ffi.copy(buf, c[1]) + local nlen = ffi.C.rspamd_str_lc_utf8(buf, #c[1]) + local s = ffi.string(buf, nlen) + assert_equal(s, c[2]) + end) + end + + cases = { + {"AbCdEf", "abcdef"}, + {"A", "a"}, + {"AaAa", "aaaa"}, + {"AaAaAaAa", "aaaaaaaa"} + } + + for i,c in ipairs(cases) do + test("ASCII lowercase " .. tostring(i), function() + local buf = ffi.new("char[?]", #c[1] + 1) + ffi.copy(buf, c[1]) + ffi.C.rspamd_str_lc(buf, #c[1]) + local s = ffi.string(buf) + assert_equal(s, c[2]) + end) + end + + cases = { + {'тест', 'тест'}, + {'\200\213\202', '���'}, + {'тест\200\213\202test', 'тест���test'}, + {'\200\213\202test', '���test'}, + {'\200\213\202test\200\213\202', '���test���'}, + {'тест\200\213\202test\200\213\202', 'тест���test���'}, + {'тест\200\213\202test\200\213\202тест', 'тест���test���тест'}, + } + + local NULL = ffi.new 'void*' + for i,c in ipairs(cases) do + test("Unicode make valid " .. tostring(i), function() + local buf = ffi.new("char[?]", #c[1] + 1) + ffi.copy(buf, c[1]) + + local s = ffi.string(ffi.C.rspamd_str_make_utf_valid(buf, #c[1], NULL, NULL)) + local function to_hex(s) + return (s:gsub('.', function (c) + return string.format('%02X', string.byte(c)) + end)) + end + print(to_hex(s)) + print(to_hex(c[2])) + assert_equal(s, c[2]) + end) + end + + -- Enable sse and avx2 + ffi.C.rspamd_fast_utf8_library_init(3) + local valid_cases = { + "a", + "\xc3\xb1", + "\xe2\x82\xa1", + "\xf0\x90\x8c\xbc", + "안녕하세요, 세상" + } + for i,c in ipairs(valid_cases) do + test("Unicode validate success: " .. tostring(i), function() + local buf = ffi.new("char[?]", #c + 1) + ffi.copy(buf, c) + + local ret = ffi.C.rspamd_fast_utf8_validate(buf, #c) + assert_equal(ret, 0) + end) + end + local invalid_cases = { + "\xc3\x28", + "\xa0\xa1", + "\xe2\x28\xa1", + "\xe2\x82\x28", + "\xf0\x28\x8c\xbc", + "\xf0\x90\x28\xbc", + "\xf0\x28\x8c\x28", + "\xc0\x9f", + "\xf5\xff\xff\xff", + "\xed\xa0\x81", + "\xf8\x90\x80\x80\x80", + "123456789012345\xed", + "123456789012345\xf1", + "123456789012345\xc2", + "\xC2\x7F" + } + for i,c in ipairs(invalid_cases) do + test("Unicode validate fail: " .. tostring(i), function() + local buf = ffi.new("char[?]", #c + 1) + ffi.copy(buf, c) + + local ret = ffi.C.rspamd_fast_utf8_validate(buf, #c) + assert_not_equal(ret, 0) + end) + end + + if os.getenv("RSPAMD_LUA_EXPENSIVE_TESTS") then + local speed_iters = 10000 + local function test_size(buflen, is_valid, impl) + local logger = require "rspamd_logger" + local test_str + if is_valid then + test_str = table.concat(valid_cases) + else + test_str = table.concat(valid_cases) .. table.concat(invalid_cases) + end + + local buf = ffi.new("char[?]", buflen) + if #test_str < buflen then + local t = {} + local len = #test_str + while len < buflen do + t[#t + 1] = test_str + len = len + #test_str + end + test_str = table.concat(t) + end + ffi.copy(buf, test_str:sub(1, buflen)) + + local tm = 0 + + for _=1,speed_iters do + if impl == 'ref' then + local t1 = ffi.C.rspamd_get_ticks(1) + ffi.C.rspamd_fast_utf8_validate_ref(buf, buflen) + local t2 = ffi.C.rspamd_get_ticks(1) + tm = tm + (t2 - t1) + elseif impl == 'sse' then + local t1 = ffi.C.rspamd_get_ticks(1) + ffi.C.rspamd_fast_utf8_validate_sse41(buf, buflen) + local t2 = ffi.C.rspamd_get_ticks(1) + tm = tm + (t2 - t1) + else + local t1 = ffi.C.rspamd_get_ticks(1) + ffi.C.rspamd_fast_utf8_validate_avx2(buf, buflen) + local t2 = ffi.C.rspamd_get_ticks(1) + tm = tm + (t2 - t1) + end + end + + logger.messagex("%s utf8 %s check (valid = %s): %s ticks per iter, %s ticks per byte", + impl, buflen, is_valid, + tm / speed_iters, tm / speed_iters / buflen) + + return 0 + end + + for _,sz in ipairs({78, 512, 65535}) do + test(string.format("Utf8 test %s %d buffer, %s", 'ref', sz, 'valid'), function() + local res = test_size(sz, true, 'ref') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'ref', sz, 'invalid'), function() + local res = test_size(sz, false, 'ref') + assert_equal(res, 0) + end) + + if jit.arch == 'x64' then + test(string.format("Utf8 test %s %d buffer, %s", 'sse', sz, 'valid'), function() + local res = test_size(sz, true, 'sse') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'sse', sz, 'invalid'), function() + local res = test_size(sz, false, 'sse') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'avx2', sz, 'valid'), function() + local res = test_size(sz, true, 'avx2') + assert_equal(res, 0) + end) + test(string.format("Utf8 test %s %d buffer, %s", 'avx2', sz, 'invalid'), function() + local res = test_size(sz, false, 'avx2') + assert_equal(res, 0) + end) + end + end + end + +end)
\ No newline at end of file |