diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib-mail/test-message-search.c | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/src/lib-mail/test-message-search.c b/src/lib-mail/test-message-search.c new file mode 100644 index 0000000..d137a58 --- /dev/null +++ b/src/lib-mail/test-message-search.c @@ -0,0 +1,521 @@ +/* Copyright (c) 2016-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "str.h" +#include "unichar.h" +#include "message-parser.h" +#include "message-search.h" +#include "test-common.h" + +struct test_case_data { + const unsigned char *value; + size_t value_len; +}; + +#define TEST_CASE_DATA(x) \ + { .value = (const unsigned char*)((x)), .value_len = sizeof((x))-1 } +#define TEST_CASE_DATA_EMPTY \ + { .value = NULL, .value_len = 0 } +#define TEST_CASE_PLAIN_PREAMBLE \ +"Content-Type: text/plain\n" \ +"Content-Transfer-Encoding: binary\n" + +struct test_case { + struct test_case_data input; + const char *search; + struct test_case_data output; + bool expect_found; + bool expect_body; + bool expect_header; + const char *hdr_name; +}; + +static void compare_search_result(const struct test_case *tc, + const struct message_block *block, + size_t i) +{ + if (block->hdr != NULL) { + /* found header */ + test_assert_idx(tc->expect_header == TRUE, i); + test_assert_strcmp_idx(tc->hdr_name, block->hdr->name, i); + test_assert_idx(block->hdr->full_value != NULL && + tc->output.value != NULL && + tc->output.value_len <= block->hdr->full_value_len && + memcmp(tc->output.value, block->hdr->full_value, + tc->output.value_len) == 0, i); + } else if (block->data != NULL) { + /* found body */ + test_assert_idx(tc->expect_body == TRUE, i); + test_assert_idx(block->data != NULL && + tc->output.value != NULL && + tc->output.value_len <= block->size && + memcmp(tc->output.value, block->data, + tc->output.value_len) == 0, i); + } else { + test_assert_idx(tc->expect_header == FALSE, i); + test_assert_idx(tc->expect_body == FALSE, i); + } +} + +#define SIGNED_MIME_CORPUS \ +"Subject: Hide and seek\n" \ +"MIME-Version: 1.0\n" \ +"Content-Type: multipart/mixed; boundary=1\n" \ +"\n--1\n" \ +"Content-Type: multipart/signed; protocol=\"signature/plain\"; migalc=\"pen+paper\"; boundary=2\n" \ +"X-Signature-Type: penmanship\n" \ +"\n--2\n" \ +"Content-Type: multipart/alternative; boundary=3\n" \ +"\n--3\n" \ +"Content-Type: text/html; charset=us-ascii\n\n" \ +"<html><head><title>Search me</title></head><body><p>Don't find me here</p></body></html>\n" \ +"\n--3\n" \ +TEST_CASE_PLAIN_PREAMBLE \ +"\n" \ +"Search me, and Find me here" \ +"\n--3--\n" \ +"\n--2\n" \ +"Content-Type: signature/plain; charset=us-ascii\n" \ +"\n" \ +"Signed by undersigned" \ +"\n--2--\n" \ +"\n--1--" + +#define PARTIAL_MESSAGE_CORPUS \ +"X-Weird-Header-1: Bar\n" \ +"X-Weird-Header-2: Hello\n" \ +"Message-ID: <c6cceebc-1dcf-11eb-be8c-f7ca132cbfea@example.org>\n" \ +"Content-Type: text/plain; charset=\"us-ascii\"\n" \ +"Content-Transfer-Encoding: base64\n" \ +"\n" \ +"dGhpcyBpcyB0aGUgZmlyc3QgcGFydCBvZiB0aGUgbWVzc2FnZQo=" + +#define PARTIAL_MIME_CORPUS \ +"Subject: In parts\n" \ +"MIME-Version: 1.0\n" \ +"Content-Type: multipart/mixed; boundary=1\n" \ +"\n--1\n" \ +TEST_CASE_PLAIN_PREAMBLE \ +"\n" \ +"Hi, this is the fancy thing I was talking about!" \ +"\n--1\n" \ +"Content-Type: Message/Partial; number=1; total=5; id=\"heks68ewe@example.org\"\n" \ +"\n" \ +PARTIAL_MESSAGE_CORPUS \ +"\n--1--\n" + +#define UT8_CORPUS_CONTENT \ +"\xe4\xba\xba\xe6\xa8\xa9\xe3\x81\xae\xe7\x84\xa1\xe8\xa6\x96\xe5\x8f\x8a" + +#define UTF8_CORPUS \ +"Subject: =?UTF-8?B?44GT44KT44Gr44Gh44Gv?=\n" \ +"MIME-Version: 1.0\n" \ +"Content-Type: multipart/mixed; boundary=1;\n" \ +" comment=\"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf\xe5\xa2\x83\xe7\x95\x8c\xe3" \ + "\x81\xae\xe3\x81\x82\xe3\x82\x8b\xe3\x83\xa1\xe3\x83\x83\xe3\x82" \ + "\xbb\xe3\x83\xbc\xe3\x82\xb8\xe3\x81\xa7\xe3\x81\x99\"\n" \ +"\n--1\n" \ +TEST_CASE_PLAIN_PREAMBLE \ +"Content-Language: ja\n" \ +"\n" \ +UT8_CORPUS_CONTENT \ +"\n--1--" + +#define MULTIPART_DIGEST_CORPUS \ +"From: Moderator-Address <moderator>\n" \ +"Content-Type: multipart/digest; boundary=1;\n" \ +"\n\n--1\n" \ +"From: someone-else <someone@else>\n" \ +"Subject: my opinion\n" \ +"\n" \ +"This is my opinion" \ +"\n--1\n\n" \ +"From: another one <another@one>\n" \ +"Subject: i disagree\n" \ +"\n" \ +"Not agreeing one bit!" \ +"\n--1\n\n" \ +"From: attachment <attachment@user>\n" \ +"Subject: funny hat\n" \ +"Content-Type: multipart/mixed; boundary=2\n" \ +"\n--2\n" \ +TEST_CASE_PLAIN_PREAMBLE \ +"\n" \ +"Lovely attachment for you" \ +"\n--2\n" \ +"Content-Type: application/octet-stream; disposition=attachment; name=\"test.txt\"\n" \ +"Content-Transfer-Encoding: binary\n" \ +"\n" \ +"Foobar" \ +"\n--2--" \ +"\n--1--" + +static void test_message_search(void) +{ + const struct test_case test_cases[] = { + { /* basic test */ + .input = TEST_CASE_DATA( +"MIME-Version: 1.0\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hello, world"), + .search = "Hello", + .output = TEST_CASE_DATA("Hello, world"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* look for something that's not found */ + .input = TEST_CASE_DATA( +"MIME-Version: 1.0\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hallo, world"), + .search = "Hello", + .output = TEST_CASE_DATA_EMPTY, + .expect_found = FALSE, + }, + { /* header value search */ + .input = TEST_CASE_DATA( +"Subject: Hello, World\n" +"MIME-Version: 1.0\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hallo, world"), + .search = "Hello", + .output = TEST_CASE_DATA("Hello, World"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Subject", + }, + { /* header value wrapped in base64 */ + .input = TEST_CASE_DATA( +"Subject: =?UTF-8?B?SGVsbG8sIFdvcmxk?=\n" +"MIME-Version: 1.0\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hallo, world"), + .search = "Hello", + .output = TEST_CASE_DATA("Hello, World"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Subject", + }, + { /* hidden inside one multipart */ + .input = TEST_CASE_DATA( +"Subject: Hide and seek\n" +"MIME-Version: 1.0\n" +"CONTENT-TYPE: MULTIPART/MIXED; BOUNDARY=\"A\"\n\n" +"--A\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hallo, world" +"\n--A\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hullo, world" +"\n--A\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Hello, world" +"\n--A--\n" +), + .search = "Hello", + .output = TEST_CASE_DATA("Hello, world"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* same with emoji boundary */ + .input = TEST_CASE_DATA( +"Subject: Hide and seek\n" +"MIME-Version: 1.0\n" +"CONTENT-TYPE: MULTIPART/MIXED; BOUNDARY=\"\xF0\x9F\x98\x82\"; COMMENT=\"Boundary is U+1F602\"\n\n" +"--\xF0\x9F\x98\x82\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Face with Tears of Joy" +"\n--\xF0\x9F\x98\x82\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Emoji" +"\n--\xF0\x9F\x98\x82--\n" +), + .search = "Emoji", + .output = TEST_CASE_DATA("Emoji"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* Nested body search */ + .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), + .search = "Find me here", + .output = TEST_CASE_DATA("Search me, and Find me here"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* Nested body search (won't look into signature/plain) */ + .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), + .search = "undersigned", + .output = TEST_CASE_DATA_EMPTY, + .expect_found = FALSE, + }, + { /* Nested mime part header search */ + .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), + .search = "penmanship", + .output = TEST_CASE_DATA("penmanship"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "X-Signature-Type", + }, + { /* Nested mime part header parameter search */ + .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), + .search = "pen+paper", + .output = TEST_CASE_DATA("multipart/signed; protocol=\"signature/plain\"; migalc=\"pen+paper\"; boundary=2"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Content-Type", + }, + { /* Partial message - must not parse the content */ + .input = TEST_CASE_DATA(PARTIAL_MIME_CORPUS), + .search = "Bar", + .output = TEST_CASE_DATA(PARTIAL_MESSAGE_CORPUS), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* Partial message - must not parse the content */ + .input = TEST_CASE_DATA(PARTIAL_MIME_CORPUS), + .search = "fancy thing", + .output = TEST_CASE_DATA("Hi, this is the fancy thing I was talking about!"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* UTF-8 searches */ + .input = TEST_CASE_DATA(UTF8_CORPUS), + .search = "\xe4\xba\xba\xe6\xa8\xa9", + .output = TEST_CASE_DATA(UT8_CORPUS_CONTENT), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* UTF-8 search header */ + .input = TEST_CASE_DATA(UTF8_CORPUS), + .search = "\xe3\x81\x93\xe3\x82\x93", + .output = TEST_CASE_DATA("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Subject", + }, + { /* UTF-8 searches content-type parameter */ + .input = TEST_CASE_DATA(UTF8_CORPUS), + .search = "\xe3\x81\xa7\xe3\x81\x99", + .output = TEST_CASE_DATA( +"multipart/mixed; boundary=1;\n comment=\"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf" +"\xe5\xa2\x83\xe7\x95\x8c\xe3\x81\xae\xe3\x81\x82\xe3\x82\x8b\xe3\x83\xa1\xe3" +"\x83\x83\xe3\x82\xbb\xe3\x83\xbc\xe3\x82\xb8\xe3\x81\xa7\xe3\x81\x99\""), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Content-Type", + }, + { + /* Invalid UTF-8 boundary (should not matter) */ + .input = TEST_CASE_DATA( +"Content-Type: multipart/mixed; boundary=\"\xff\xff\xff\xff\"\n" +"\n--\xff\xff\xff\xff\n" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Can you find me?" +"\n--\xff\xff\xff\xff--"), + .search = "Can you find me?", + .output = TEST_CASE_DATA("Can you find me?"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { + /* Invalid UTF-8 in subject (should work) */ + .input = TEST_CASE_DATA( +"Subject: =?UTF-8?B?Um90dGVuIP////8gdGV4dA==?=" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Such horror"), + .search = "Rotten", + .output = TEST_CASE_DATA("Rotten "UNICODE_REPLACEMENT_CHAR_UTF8" text"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Subject", + }, + { + /* Invalid UTF-8 in body (should work) */ + .input = TEST_CASE_DATA( +"Subject: =?UTF-8?B?Um90dGVuIP////8gdGV4dA==?=" +TEST_CASE_PLAIN_PREAMBLE +"\n" +"Such horror \xff\xff\xff\xff"), + .search = "Such horror", + .output = TEST_CASE_DATA("Such horror "UNICODE_REPLACEMENT_CHAR_UTF8), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { + /* UTF-8 in content-type parameter */ + .input = TEST_CASE_DATA( +"Content-Type: multipart/mixed; boundary=1; \xF0\x9F\x98\xAD=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\"\n" +"\n--1--\n"), + .search = "U+1F62D", + .output = TEST_CASE_DATA("multipart/mixed; boundary=1; \xF0\x9F\x98\xAD=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\""), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Content-Type", + }, + { + /* Broken UTF-8 in content-type parameter */ + .input = TEST_CASE_DATA( +"Content-Type: multipart/mixed; boundary=1;" +" \xFF\xFF\xFF\xFF=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\"\n" +"\n--1--\n"), + .search = "U+1F62D", + .output = TEST_CASE_DATA("multipart/mixed; boundary=1; "UNICODE_REPLACEMENT_CHAR_UTF8"=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\""), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Content-Type", + }, + { /* Multipart digest */ + .input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS), + .search = "Not agreeing", + .output = TEST_CASE_DATA("Not agreeing one bit!"), + .expect_found = TRUE, + .expect_body = TRUE, + }, + { /* Multipart digest header */ + .input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS), + .search = "someone-else", + .output = TEST_CASE_DATA("someone-else <someone@else>"), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "From", + }, + { /* Multipart digest header parameter */ + .input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS), + .search = "test.txt", + .output = TEST_CASE_DATA("application/octet-stream; disposition=attachment; name=\"test.txt\""), + .expect_found = TRUE, + .expect_body = FALSE, + .expect_header = TRUE, + .hdr_name = "Content-Type", + }, +}; + + test_begin("message search"); + + for (size_t i = 0; i < N_ELEMENTS(test_cases); i++) T_BEGIN { + struct message_search_context *sctx; + struct message_block raw_block, decoded_block; + struct message_part *parts; + const char *error; + bool found = FALSE; + const struct test_case *tc = &test_cases[i]; + struct message_parser_settings set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP, + }; + pool_t pool = pool_alloconly_create("message parser", 10240); + struct istream *is = + test_istream_create_data(tc->input.value, tc->input.value_len); + struct message_parser_ctx *pctx = + message_parser_init(pool, is, &set); + int ret; + sctx = message_search_init(tc->search, NULL, tc->expect_header ? + 0 : MESSAGE_SEARCH_FLAG_SKIP_HEADERS); + while ((ret = message_parser_parse_next_block(pctx, &raw_block)) > 0) { + if (message_search_more_get_decoded(sctx, &raw_block, + &decoded_block)) { + found = TRUE; + compare_search_result(tc, &decoded_block, i); + } + } + test_assert(ret == -1); + test_assert_idx(tc->expect_found == found, i); + message_parser_deinit(&pctx, &parts); + test_assert(is->stream_errno == 0); + i_stream_seek(is, 0); + if ((ret = message_search_msg(sctx, is, parts, &error)) < 0) { + i_error("Search error: %s", error); + } else { + test_assert_idx(tc->expect_found == (ret == 1), i); + } + /* and once more */ + i_stream_seek(is, 0); + if ((ret = message_search_msg(sctx, is, NULL, &error)) < 0) { + i_error("Search error: %s", error); + } else { + test_assert_idx(tc->expect_found == (ret == 1), i); + } + message_search_deinit(&sctx); + test_assert(is->stream_errno == 0); + i_stream_unref(&is); + pool_unref(&pool); + } T_END; + + test_end(); + +} + +static void test_message_search_more_get_decoded(void) +{ + const char input[] = "p\xC3\xB6\xC3\xB6"; + const unsigned char text_plain[] = "text/plain; charset=utf-8"; + struct message_search_context *ctx1, *ctx2; + struct message_block raw_block, decoded_block; + struct message_header_line hdr; + struct message_part part; + unsigned int i; + + test_begin("message_search_more_get_decoded()"); + + ctx1 = message_search_init("p\xC3\xA4\xC3\xA4", NULL, 0); + ctx2 = message_search_init("p\xC3\xB6\xC3\xB6", NULL, 0); + + i_zero(&raw_block); + raw_block.part = ∂ + + /* feed the Content-Type header */ + i_zero(&hdr); + hdr.name = "Content-Type"; hdr.name_len = strlen(hdr.name); + hdr.value = hdr.full_value = text_plain; + hdr.value_len = hdr.full_value_len = sizeof(text_plain)-1; + raw_block.hdr = &hdr; + test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block)); + test_assert(!message_search_more_decoded(ctx2, &decoded_block)); + + /* EOH */ + raw_block.hdr = NULL; + test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block)); + test_assert(!message_search_more_decoded(ctx2, &decoded_block)); + + /* body */ + raw_block.size = 1; + for (i = 0; input[i] != '\0'; i++) { + raw_block.data = (const void *)&input[i]; + test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block)); + test_assert(message_search_more_decoded(ctx2, &decoded_block) == (input[i+1] == '\0')); + } + message_search_deinit(&ctx1); + message_search_deinit(&ctx2); + test_end(); +} + +int main(void) +{ + static void (*const test_functions[])(void) = { + test_message_search, + test_message_search_more_get_decoded, + NULL + }; + return test_run(test_functions); +} |