Search me

/* Copyright (c) 2016-2018 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "istream.h" #include "str.h" #include "unichar.h" #include "message-parser.h" #include "message-search.h" #include "test-common.h" struct test_case_data { const unsigned char *value; size_t value_len; }; #define TEST_CASE_DATA(x) \ { .value = (const unsigned char*)((x)), .value_len = sizeof((x))-1 } #define TEST_CASE_DATA_EMPTY \ { .value = NULL, .value_len = 0 } #define TEST_CASE_PLAIN_PREAMBLE \ "Content-Type: text/plain\n" \ "Content-Transfer-Encoding: binary\n" struct test_case { struct test_case_data input; const char *search; struct test_case_data output; bool expect_found; bool expect_body; bool expect_header; const char *hdr_name; }; static void compare_search_result(const struct test_case *tc, const struct message_block *block, size_t i) { if (block->hdr != NULL) { /* found header */ test_assert_idx(tc->expect_header == TRUE, i); test_assert_strcmp_idx(tc->hdr_name, block->hdr->name, i); test_assert_idx(block->hdr->full_value != NULL && tc->output.value != NULL && tc->output.value_len <= block->hdr->full_value_len && memcmp(tc->output.value, block->hdr->full_value, tc->output.value_len) == 0, i); } else if (block->data != NULL) { /* found body */ test_assert_idx(tc->expect_body == TRUE, i); test_assert_idx(block->data != NULL && tc->output.value != NULL && tc->output.value_len <= block->size && memcmp(tc->output.value, block->data, tc->output.value_len) == 0, i); } else { test_assert_idx(tc->expect_header == FALSE, i); test_assert_idx(tc->expect_body == FALSE, i); } } #define SIGNED_MIME_CORPUS \ "Subject: Hide and seek\n" \ "MIME-Version: 1.0\n" \ "Content-Type: multipart/mixed; boundary=1\n" \ "\n--1\n" \ "Content-Type: multipart/signed; protocol=\"signature/plain\"; migalc=\"pen+paper\"; boundary=2\n" \ "X-Signature-Type: penmanship\n" \ "\n--2\n" \ "Content-Type: multipart/alternative; boundary=3\n" \ "\n--3\n" \ "Content-Type: text/html; charset=us-ascii\n\n" \ "Search me

Don't find me here

\n" \ "\n--3\n" \ TEST_CASE_PLAIN_PREAMBLE \ "\n" \ "Search me, and Find me here" \ "\n--3--\n" \ "\n--2\n" \ "Content-Type: signature/plain; charset=us-ascii\n" \ "\n" \ "Signed by undersigned" \ "\n--2--\n" \ "\n--1--" #define PARTIAL_MESSAGE_CORPUS \ "X-Weird-Header-1: Bar\n" \ "X-Weird-Header-2: Hello\n" \ "Message-ID: \n" \ "Content-Type: text/plain; charset=\"us-ascii\"\n" \ "Content-Transfer-Encoding: base64\n" \ "\n" \ "dGhpcyBpcyB0aGUgZmlyc3QgcGFydCBvZiB0aGUgbWVzc2FnZQo=" #define PARTIAL_MIME_CORPUS \ "Subject: In parts\n" \ "MIME-Version: 1.0\n" \ "Content-Type: multipart/mixed; boundary=1\n" \ "\n--1\n" \ TEST_CASE_PLAIN_PREAMBLE \ "\n" \ "Hi, this is the fancy thing I was talking about!" \ "\n--1\n" \ "Content-Type: Message/Partial; number=1; total=5; id=\"heks68ewe@example.org\"\n" \ "\n" \ PARTIAL_MESSAGE_CORPUS \ "\n--1--\n" #define UT8_CORPUS_CONTENT \ "\xe4\xba\xba\xe6\xa8\xa9\xe3\x81\xae\xe7\x84\xa1\xe8\xa6\x96\xe5\x8f\x8a" #define UTF8_CORPUS \ "Subject: =?UTF-8?B?44GT44KT44Gr44Gh44Gv?=\n" \ "MIME-Version: 1.0\n" \ "Content-Type: multipart/mixed; boundary=1;\n" \ " comment=\"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf\xe5\xa2\x83\xe7\x95\x8c\xe3" \ "\x81\xae\xe3\x81\x82\xe3\x82\x8b\xe3\x83\xa1\xe3\x83\x83\xe3\x82" \ "\xbb\xe3\x83\xbc\xe3\x82\xb8\xe3\x81\xa7\xe3\x81\x99\"\n" \ "\n--1\n" \ TEST_CASE_PLAIN_PREAMBLE \ "Content-Language: ja\n" \ "\n" \ UT8_CORPUS_CONTENT \ "\n--1--" #define MULTIPART_DIGEST_CORPUS \ "From: Moderator-Address \n" \ "Content-Type: multipart/digest; boundary=1;\n" \ "\n\n--1\n" \ "From: someone-else \n" \ "Subject: my opinion\n" \ "\n" \ "This is my opinion" \ "\n--1\n\n" \ "From: another one \n" \ "Subject: i disagree\n" \ "\n" \ "Not agreeing one bit!" \ "\n--1\n\n" \ "From: attachment \n" \ "Subject: funny hat\n" \ "Content-Type: multipart/mixed; boundary=2\n" \ "\n--2\n" \ TEST_CASE_PLAIN_PREAMBLE \ "\n" \ "Lovely attachment for you" \ "\n--2\n" \ "Content-Type: application/octet-stream; disposition=attachment; name=\"test.txt\"\n" \ "Content-Transfer-Encoding: binary\n" \ "\n" \ "Foobar" \ "\n--2--" \ "\n--1--" static void test_message_search(void) { const struct test_case test_cases[] = { { /* basic test */ .input = TEST_CASE_DATA( "MIME-Version: 1.0\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hello, world"), .search = "Hello", .output = TEST_CASE_DATA("Hello, world"), .expect_found = TRUE, .expect_body = TRUE, }, { /* look for something that's not found */ .input = TEST_CASE_DATA( "MIME-Version: 1.0\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hallo, world"), .search = "Hello", .output = TEST_CASE_DATA_EMPTY, .expect_found = FALSE, }, { /* header value search */ .input = TEST_CASE_DATA( "Subject: Hello, World\n" "MIME-Version: 1.0\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hallo, world"), .search = "Hello", .output = TEST_CASE_DATA("Hello, World"), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Subject", }, { /* header value wrapped in base64 */ .input = TEST_CASE_DATA( "Subject: =?UTF-8?B?SGVsbG8sIFdvcmxk?=\n" "MIME-Version: 1.0\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hallo, world"), .search = "Hello", .output = TEST_CASE_DATA("Hello, World"), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Subject", }, { /* hidden inside one multipart */ .input = TEST_CASE_DATA( "Subject: Hide and seek\n" "MIME-Version: 1.0\n" "CONTENT-TYPE: MULTIPART/MIXED; BOUNDARY=\"A\"\n\n" "--A\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hallo, world" "\n--A\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hullo, world" "\n--A\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Hello, world" "\n--A--\n" ), .search = "Hello", .output = TEST_CASE_DATA("Hello, world"), .expect_found = TRUE, .expect_body = TRUE, }, { /* same with emoji boundary */ .input = TEST_CASE_DATA( "Subject: Hide and seek\n" "MIME-Version: 1.0\n" "CONTENT-TYPE: MULTIPART/MIXED; BOUNDARY=\"\xF0\x9F\x98\x82\"; COMMENT=\"Boundary is U+1F602\"\n\n" "--\xF0\x9F\x98\x82\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Face with Tears of Joy" "\n--\xF0\x9F\x98\x82\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Emoji" "\n--\xF0\x9F\x98\x82--\n" ), .search = "Emoji", .output = TEST_CASE_DATA("Emoji"), .expect_found = TRUE, .expect_body = TRUE, }, { /* Nested body search */ .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), .search = "Find me here", .output = TEST_CASE_DATA("Search me, and Find me here"), .expect_found = TRUE, .expect_body = TRUE, }, { /* Nested body search (won't look into signature/plain) */ .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), .search = "undersigned", .output = TEST_CASE_DATA_EMPTY, .expect_found = FALSE, }, { /* Nested mime part header search */ .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), .search = "penmanship", .output = TEST_CASE_DATA("penmanship"), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "X-Signature-Type", }, { /* Nested mime part header parameter search */ .input = TEST_CASE_DATA(SIGNED_MIME_CORPUS), .search = "pen+paper", .output = TEST_CASE_DATA("multipart/signed; protocol=\"signature/plain\"; migalc=\"pen+paper\"; boundary=2"), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Content-Type", }, { /* Partial message - must not parse the content */ .input = TEST_CASE_DATA(PARTIAL_MIME_CORPUS), .search = "Bar", .output = TEST_CASE_DATA(PARTIAL_MESSAGE_CORPUS), .expect_found = TRUE, .expect_body = TRUE, }, { /* Partial message - must not parse the content */ .input = TEST_CASE_DATA(PARTIAL_MIME_CORPUS), .search = "fancy thing", .output = TEST_CASE_DATA("Hi, this is the fancy thing I was talking about!"), .expect_found = TRUE, .expect_body = TRUE, }, { /* UTF-8 searches */ .input = TEST_CASE_DATA(UTF8_CORPUS), .search = "\xe4\xba\xba\xe6\xa8\xa9", .output = TEST_CASE_DATA(UT8_CORPUS_CONTENT), .expect_found = TRUE, .expect_body = TRUE, }, { /* UTF-8 search header */ .input = TEST_CASE_DATA(UTF8_CORPUS), .search = "\xe3\x81\x93\xe3\x82\x93", .output = TEST_CASE_DATA("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf"), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Subject", }, { /* UTF-8 searches content-type parameter */ .input = TEST_CASE_DATA(UTF8_CORPUS), .search = "\xe3\x81\xa7\xe3\x81\x99", .output = TEST_CASE_DATA( "multipart/mixed; boundary=1;\n comment=\"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf" "\xe5\xa2\x83\xe7\x95\x8c\xe3\x81\xae\xe3\x81\x82\xe3\x82\x8b\xe3\x83\xa1\xe3" "\x83\x83\xe3\x82\xbb\xe3\x83\xbc\xe3\x82\xb8\xe3\x81\xa7\xe3\x81\x99\""), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Content-Type", }, { /* Invalid UTF-8 boundary (should not matter) */ .input = TEST_CASE_DATA( "Content-Type: multipart/mixed; boundary=\"\xff\xff\xff\xff\"\n" "\n--\xff\xff\xff\xff\n" TEST_CASE_PLAIN_PREAMBLE "\n" "Can you find me?" "\n--\xff\xff\xff\xff--"), .search = "Can you find me?", .output = TEST_CASE_DATA("Can you find me?"), .expect_found = TRUE, .expect_body = TRUE, }, { /* Invalid UTF-8 in subject (should work) */ .input = TEST_CASE_DATA( "Subject: =?UTF-8?B?Um90dGVuIP////8gdGV4dA==?=" TEST_CASE_PLAIN_PREAMBLE "\n" "Such horror"), .search = "Rotten", .output = TEST_CASE_DATA("Rotten "UNICODE_REPLACEMENT_CHAR_UTF8" text"), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Subject", }, { /* Invalid UTF-8 in body (should work) */ .input = TEST_CASE_DATA( "Subject: =?UTF-8?B?Um90dGVuIP////8gdGV4dA==?=" TEST_CASE_PLAIN_PREAMBLE "\n" "Such horror \xff\xff\xff\xff"), .search = "Such horror", .output = TEST_CASE_DATA("Such horror "UNICODE_REPLACEMENT_CHAR_UTF8), .expect_found = TRUE, .expect_body = TRUE, }, { /* UTF-8 in content-type parameter */ .input = TEST_CASE_DATA( "Content-Type: multipart/mixed; boundary=1; \xF0\x9F\x98\xAD=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\"\n" "\n--1--\n"), .search = "U+1F62D", .output = TEST_CASE_DATA("multipart/mixed; boundary=1; \xF0\x9F\x98\xAD=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\""), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Content-Type", }, { /* Broken UTF-8 in content-type parameter */ .input = TEST_CASE_DATA( "Content-Type: multipart/mixed; boundary=1;" " \xFF\xFF\xFF\xFF=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\"\n" "\n--1--\n"), .search = "U+1F62D", .output = TEST_CASE_DATA("multipart/mixed; boundary=1; "UNICODE_REPLACEMENT_CHAR_UTF8"=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\""), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Content-Type", }, { /* Multipart digest */ .input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS), .search = "Not agreeing", .output = TEST_CASE_DATA("Not agreeing one bit!"), .expect_found = TRUE, .expect_body = TRUE, }, { /* Multipart digest header */ .input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS), .search = "someone-else", .output = TEST_CASE_DATA("someone-else "), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "From", }, { /* Multipart digest header parameter */ .input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS), .search = "test.txt", .output = TEST_CASE_DATA("application/octet-stream; disposition=attachment; name=\"test.txt\""), .expect_found = TRUE, .expect_body = FALSE, .expect_header = TRUE, .hdr_name = "Content-Type", }, }; test_begin("message search"); for (size_t i = 0; i < N_ELEMENTS(test_cases); i++) T_BEGIN { struct message_search_context *sctx; struct message_block raw_block, decoded_block; struct message_part *parts; const char *error; bool found = FALSE; const struct test_case *tc = &test_cases[i]; struct message_parser_settings set = { .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP, }; pool_t pool = pool_alloconly_create("message parser", 10240); struct istream *is = test_istream_create_data(tc->input.value, tc->input.value_len); struct message_parser_ctx *pctx = message_parser_init(pool, is, &set); int ret; sctx = message_search_init(tc->search, NULL, tc->expect_header ? 0 : MESSAGE_SEARCH_FLAG_SKIP_HEADERS); while ((ret = message_parser_parse_next_block(pctx, &raw_block)) > 0) { if (message_search_more_get_decoded(sctx, &raw_block, &decoded_block)) { found = TRUE; compare_search_result(tc, &decoded_block, i); } } test_assert(ret == -1); test_assert_idx(tc->expect_found == found, i); message_parser_deinit(&pctx, &parts); test_assert(is->stream_errno == 0); i_stream_seek(is, 0); if ((ret = message_search_msg(sctx, is, parts, &error)) < 0) { i_error("Search error: %s", error); } else { test_assert_idx(tc->expect_found == (ret == 1), i); } /* and once more */ i_stream_seek(is, 0); if ((ret = message_search_msg(sctx, is, NULL, &error)) < 0) { i_error("Search error: %s", error); } else { test_assert_idx(tc->expect_found == (ret == 1), i); } message_search_deinit(&sctx); test_assert(is->stream_errno == 0); i_stream_unref(&is); pool_unref(&pool); } T_END; test_end(); } static void test_message_search_more_get_decoded(void) { const char input[] = "p\xC3\xB6\xC3\xB6"; const unsigned char text_plain[] = "text/plain; charset=utf-8"; struct message_search_context *ctx1, *ctx2; struct message_block raw_block, decoded_block; struct message_header_line hdr; struct message_part part; unsigned int i; test_begin("message_search_more_get_decoded()"); ctx1 = message_search_init("p\xC3\xA4\xC3\xA4", NULL, 0); ctx2 = message_search_init("p\xC3\xB6\xC3\xB6", NULL, 0); i_zero(&raw_block); raw_block.part = ∂ /* feed the Content-Type header */ i_zero(&hdr); hdr.name = "Content-Type"; hdr.name_len = strlen(hdr.name); hdr.value = hdr.full_value = text_plain; hdr.value_len = hdr.full_value_len = sizeof(text_plain)-1; raw_block.hdr = &hdr; test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block)); test_assert(!message_search_more_decoded(ctx2, &decoded_block)); /* EOH */ raw_block.hdr = NULL; test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block)); test_assert(!message_search_more_decoded(ctx2, &decoded_block)); /* body */ raw_block.size = 1; for (i = 0; input[i] != '\0'; i++) { raw_block.data = (const void *)&input[i]; test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block)); test_assert(message_search_more_decoded(ctx2, &decoded_block) == (input[i+1] == '\0')); } message_search_deinit(&ctx1); message_search_deinit(&ctx2); test_end(); } int main(void) { static void (*const test_functions[])(void) = { test_message_search, test_message_search_more_get_decoded, NULL }; return test_run(test_functions); }