1 files changed, 521 insertions, 0 deletions
diff --git a/src/lib-mail/test-message-search.c b/src/lib-mail/test-message-search.c
new file mode 100644
index 0000000..d137a58
--- /dev/null
+++ b/src/lib-mail/test-message-search.c
@@ -0,0 +1,521 @@
+/* Copyright (c) 2016-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "istream.h"
+#include "str.h"
+#include "unichar.h"
+#include "message-parser.h"
+#include "message-search.h"
+#include "test-common.h"
+
+struct test_case_data {
+	const unsigned char *value;
+	size_t value_len;
+};
+
+#define TEST_CASE_DATA(x) \
+	{ .value = (const unsigned char*)((x)), .value_len = sizeof((x))-1 }
+#define TEST_CASE_DATA_EMPTY \
+	{ .value = NULL, .value_len = 0 }
+#define TEST_CASE_PLAIN_PREAMBLE \
+"Content-Type: text/plain\n" \
+"Content-Transfer-Encoding: binary\n"
+
+struct test_case {
+	struct test_case_data input;
+	const char *search;
+	struct test_case_data output;
+	bool expect_found;
+	bool expect_body;
+	bool expect_header;
+	const char *hdr_name;
+};
+
+static void compare_search_result(const struct test_case *tc,
+				  const struct message_block *block,
+				  size_t i)
+{
+	if (block->hdr != NULL) {
+		/* found header */
+		test_assert_idx(tc->expect_header == TRUE, i);
+		test_assert_strcmp_idx(tc->hdr_name, block->hdr->name, i);
+		test_assert_idx(block->hdr->full_value != NULL &&
+				tc->output.value != NULL &&
+				tc->output.value_len <= block->hdr->full_value_len &&
+				memcmp(tc->output.value, block->hdr->full_value,
+				       tc->output.value_len) == 0, i);
+	} else if (block->data != NULL) {
+		/* found body */
+		test_assert_idx(tc->expect_body == TRUE, i);
+		test_assert_idx(block->data != NULL &&
+				tc->output.value != NULL &&
+				tc->output.value_len <= block->size &&
+				memcmp(tc->output.value, block->data,
+				       tc->output.value_len) == 0, i);
+	} else {
+		test_assert_idx(tc->expect_header == FALSE, i);
+		test_assert_idx(tc->expect_body == FALSE, i);
+	}
+}
+
+#define SIGNED_MIME_CORPUS \
+"Subject: Hide and seek\n" \
+"MIME-Version: 1.0\n" \
+"Content-Type: multipart/mixed; boundary=1\n" \
+"\n--1\n" \
+"Content-Type: multipart/signed; protocol=\"signature/plain\"; migalc=\"pen+paper\"; boundary=2\n" \
+"X-Signature-Type: penmanship\n" \
+"\n--2\n" \
+"Content-Type: multipart/alternative; boundary=3\n" \
+"\n--3\n" \
+"Content-Type: text/html; charset=us-ascii\n\n" \
+"<html><head><title>Search me</title></head><body><p>Don't find me here</p></body></html>\n" \
+"\n--3\n" \
+TEST_CASE_PLAIN_PREAMBLE \
+"\n" \
+"Search me, and Find me here" \
+"\n--3--\n" \
+"\n--2\n" \
+"Content-Type: signature/plain; charset=us-ascii\n" \
+"\n" \
+"Signed by undersigned" \
+"\n--2--\n" \
+"\n--1--"
+
+#define PARTIAL_MESSAGE_CORPUS \
+"X-Weird-Header-1: Bar\n" \
+"X-Weird-Header-2: Hello\n" \
+"Message-ID: <c6cceebc-1dcf-11eb-be8c-f7ca132cbfea@example.org>\n" \
+"Content-Type: text/plain; charset=\"us-ascii\"\n" \
+"Content-Transfer-Encoding: base64\n" \
+"\n" \
+"dGhpcyBpcyB0aGUgZmlyc3QgcGFydCBvZiB0aGUgbWVzc2FnZQo="
+
+#define PARTIAL_MIME_CORPUS \
+"Subject: In parts\n" \
+"MIME-Version: 1.0\n" \
+"Content-Type: multipart/mixed; boundary=1\n" \
+"\n--1\n" \
+TEST_CASE_PLAIN_PREAMBLE \
+"\n" \
+"Hi, this is the fancy thing I was talking about!" \
+"\n--1\n" \
+"Content-Type: Message/Partial; number=1; total=5; id=\"heks68ewe@example.org\"\n" \
+"\n" \
+PARTIAL_MESSAGE_CORPUS \
+"\n--1--\n"
+
+#define UT8_CORPUS_CONTENT \
+"\xe4\xba\xba\xe6\xa8\xa9\xe3\x81\xae\xe7\x84\xa1\xe8\xa6\x96\xe5\x8f\x8a"
+
+#define UTF8_CORPUS \
+"Subject: =?UTF-8?B?44GT44KT44Gr44Gh44Gv?=\n" \
+"MIME-Version: 1.0\n" \
+"Content-Type: multipart/mixed; boundary=1;\n" \
+"  comment=\"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf\xe5\xa2\x83\xe7\x95\x8c\xe3" \
+	    "\x81\xae\xe3\x81\x82\xe3\x82\x8b\xe3\x83\xa1\xe3\x83\x83\xe3\x82" \
+	    "\xbb\xe3\x83\xbc\xe3\x82\xb8\xe3\x81\xa7\xe3\x81\x99\"\n" \
+"\n--1\n" \
+TEST_CASE_PLAIN_PREAMBLE \
+"Content-Language: ja\n" \
+"\n" \
+UT8_CORPUS_CONTENT \
+"\n--1--"
+
+#define MULTIPART_DIGEST_CORPUS \
+"From: Moderator-Address <moderator>\n" \
+"Content-Type: multipart/digest; boundary=1;\n" \
+"\n\n--1\n" \
+"From: someone-else <someone@else>\n" \
+"Subject: my opinion\n" \
+"\n" \
+"This is my opinion" \
+"\n--1\n\n" \
+"From: another one <another@one>\n" \
+"Subject: i disagree\n" \
+"\n" \
+"Not agreeing one bit!" \
+"\n--1\n\n" \
+"From: attachment <attachment@user>\n" \
+"Subject: funny hat\n" \
+"Content-Type: multipart/mixed; boundary=2\n" \
+"\n--2\n" \
+TEST_CASE_PLAIN_PREAMBLE \
+"\n" \
+"Lovely attachment for you" \
+"\n--2\n" \
+"Content-Type: application/octet-stream; disposition=attachment; name=\"test.txt\"\n" \
+"Content-Transfer-Encoding: binary\n" \
+"\n" \
+"Foobar" \
+"\n--2--" \
+"\n--1--"
+
+static void test_message_search(void)
+{
+	const struct test_case test_cases[] = {
+	{	/* basic test */
+		.input = TEST_CASE_DATA(
+"MIME-Version: 1.0\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hello, world"),
+		.search = "Hello",
+		.output = TEST_CASE_DATA("Hello, world"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{	/* look for something that's not found */
+		.input = TEST_CASE_DATA(
+"MIME-Version: 1.0\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hallo, world"),
+		.search = "Hello",
+		.output = TEST_CASE_DATA_EMPTY,
+		.expect_found = FALSE,
+	},
+	{	/* header value search */
+		.input = TEST_CASE_DATA(
+"Subject: Hello, World\n"
+"MIME-Version: 1.0\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hallo, world"),
+		.search = "Hello",
+		.output = TEST_CASE_DATA("Hello, World"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Subject",
+	},
+	{	/* header value wrapped in base64 */
+		.input = TEST_CASE_DATA(
+"Subject: =?UTF-8?B?SGVsbG8sIFdvcmxk?=\n"
+"MIME-Version: 1.0\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hallo, world"),
+		.search = "Hello",
+		.output = TEST_CASE_DATA("Hello, World"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Subject",
+	},
+	{	/* hidden inside one multipart */
+		.input = TEST_CASE_DATA(
+"Subject: Hide and seek\n"
+"MIME-Version: 1.0\n"
+"CONTENT-TYPE: MULTIPART/MIXED; BOUNDARY=\"A\"\n\n"
+"--A\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hallo, world"
+"\n--A\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hullo, world"
+"\n--A\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Hello, world"
+"\n--A--\n"
+),
+		.search = "Hello",
+		.output = TEST_CASE_DATA("Hello, world"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{	/* same with emoji boundary */
+		.input = TEST_CASE_DATA(
+"Subject: Hide and seek\n"
+"MIME-Version: 1.0\n"
+"CONTENT-TYPE: MULTIPART/MIXED; BOUNDARY=\"\xF0\x9F\x98\x82\"; COMMENT=\"Boundary is U+1F602\"\n\n"
+"--\xF0\x9F\x98\x82\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Face with Tears of Joy"
+"\n--\xF0\x9F\x98\x82\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Emoji"
+"\n--\xF0\x9F\x98\x82--\n"
+),
+		.search = "Emoji",
+		.output = TEST_CASE_DATA("Emoji"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{       /* Nested body search */
+		.input = TEST_CASE_DATA(SIGNED_MIME_CORPUS),
+		.search = "Find me here",
+		.output = TEST_CASE_DATA("Search me, and Find me here"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{       /* Nested body search (won't look into signature/plain) */
+		.input = TEST_CASE_DATA(SIGNED_MIME_CORPUS),
+		.search = "undersigned",
+		.output = TEST_CASE_DATA_EMPTY,
+		.expect_found = FALSE,
+	},
+	{       /* Nested mime part header search */
+		.input = TEST_CASE_DATA(SIGNED_MIME_CORPUS),
+		.search = "penmanship",
+		.output = TEST_CASE_DATA("penmanship"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "X-Signature-Type",
+	},
+	{       /* Nested mime part header parameter search */
+		.input = TEST_CASE_DATA(SIGNED_MIME_CORPUS),
+		.search = "pen+paper",
+		.output = TEST_CASE_DATA("multipart/signed; protocol=\"signature/plain\"; migalc=\"pen+paper\"; boundary=2"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Content-Type",
+	},
+	{       /* Partial message - must not parse the content */
+		.input = TEST_CASE_DATA(PARTIAL_MIME_CORPUS),
+		.search = "Bar",
+		.output = TEST_CASE_DATA(PARTIAL_MESSAGE_CORPUS),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{	/* Partial message - must not parse the content */
+		.input = TEST_CASE_DATA(PARTIAL_MIME_CORPUS),
+		.search = "fancy thing",
+		.output = TEST_CASE_DATA("Hi, this is the fancy thing I was talking about!"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{	/* UTF-8 searches */
+		.input = TEST_CASE_DATA(UTF8_CORPUS),
+		.search = "\xe4\xba\xba\xe6\xa8\xa9",
+		.output = TEST_CASE_DATA(UT8_CORPUS_CONTENT),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{       /* UTF-8 search header */
+		.input = TEST_CASE_DATA(UTF8_CORPUS),
+		.search = "\xe3\x81\x93\xe3\x82\x93",
+		.output = TEST_CASE_DATA("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Subject",
+	},
+	{       /* UTF-8 searches content-type parameter */
+		.input = TEST_CASE_DATA(UTF8_CORPUS),
+		.search = "\xe3\x81\xa7\xe3\x81\x99",
+		.output = TEST_CASE_DATA(
+"multipart/mixed; boundary=1;\n  comment=\"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf"
+"\xe5\xa2\x83\xe7\x95\x8c\xe3\x81\xae\xe3\x81\x82\xe3\x82\x8b\xe3\x83\xa1\xe3"
+"\x83\x83\xe3\x82\xbb\xe3\x83\xbc\xe3\x82\xb8\xe3\x81\xa7\xe3\x81\x99\""),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Content-Type",
+	},
+	{
+		/* Invalid UTF-8 boundary (should not matter) */
+		.input = TEST_CASE_DATA(
+"Content-Type: multipart/mixed; boundary=\"\xff\xff\xff\xff\"\n"
+"\n--\xff\xff\xff\xff\n"
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Can you find me?"
+"\n--\xff\xff\xff\xff--"),
+		.search = "Can you find me?",
+		.output = TEST_CASE_DATA("Can you find me?"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{
+		/* Invalid UTF-8 in subject (should work) */
+		.input = TEST_CASE_DATA(
+"Subject: =?UTF-8?B?Um90dGVuIP////8gdGV4dA==?="
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Such horror"),
+		.search = "Rotten",
+		.output = TEST_CASE_DATA("Rotten "UNICODE_REPLACEMENT_CHAR_UTF8" text"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Subject",
+	},
+	{
+		/* Invalid UTF-8 in body (should work) */
+		.input = TEST_CASE_DATA(
+"Subject: =?UTF-8?B?Um90dGVuIP////8gdGV4dA==?="
+TEST_CASE_PLAIN_PREAMBLE
+"\n"
+"Such horror \xff\xff\xff\xff"),
+		.search = "Such horror",
+		.output = TEST_CASE_DATA("Such horror "UNICODE_REPLACEMENT_CHAR_UTF8),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{
+		/* UTF-8 in content-type parameter */
+		.input = TEST_CASE_DATA(
+"Content-Type: multipart/mixed; boundary=1; \xF0\x9F\x98\xAD=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\"\n"
+"\n--1--\n"),
+		.search = "U+1F62D",
+		.output = TEST_CASE_DATA("multipart/mixed; boundary=1; \xF0\x9F\x98\xAD=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\""),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Content-Type",
+	},
+	{
+		/* Broken UTF-8 in content-type parameter */
+		.input = TEST_CASE_DATA(
+"Content-Type: multipart/mixed; boundary=1;"
+" \xFF\xFF\xFF\xFF=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\"\n"
+"\n--1--\n"),
+		.search = "U+1F62D",
+		.output = TEST_CASE_DATA("multipart/mixed; boundary=1; "UNICODE_REPLACEMENT_CHAR_UTF8"=\"\xF0\x9F\xA5\xBA U+1F62D=U+1F97A\""),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Content-Type",
+	},
+	{	/* Multipart digest */
+		.input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS),
+		.search = "Not agreeing",
+		.output = TEST_CASE_DATA("Not agreeing one bit!"),
+		.expect_found = TRUE,
+		.expect_body = TRUE,
+	},
+	{       /* Multipart digest header */
+		.input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS),
+		.search = "someone-else",
+		.output = TEST_CASE_DATA("someone-else <someone@else>"),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "From",
+	},
+	{       /* Multipart digest header parameter */
+		.input = TEST_CASE_DATA(MULTIPART_DIGEST_CORPUS),
+		.search = "test.txt",
+		.output = TEST_CASE_DATA("application/octet-stream; disposition=attachment; name=\"test.txt\""),
+		.expect_found = TRUE,
+		.expect_body = FALSE,
+		.expect_header = TRUE,
+		.hdr_name = "Content-Type",
+	},
+};
+
+	test_begin("message search");
+
+	for (size_t i = 0; i < N_ELEMENTS(test_cases); i++) T_BEGIN {
+		struct message_search_context *sctx;
+		struct message_block raw_block, decoded_block;
+		struct message_part *parts;
+		const char *error;
+		bool found = FALSE;
+		const struct test_case *tc = &test_cases[i];
+		struct message_parser_settings set = {
+			.hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP,
+		};
+		pool_t pool = pool_alloconly_create("message parser", 10240);
+		struct istream *is =
+			test_istream_create_data(tc->input.value, tc->input.value_len);
+		struct message_parser_ctx *pctx =
+			message_parser_init(pool, is, &set);
+		int ret;
+		sctx = message_search_init(tc->search, NULL, tc->expect_header ?
+					   0 : MESSAGE_SEARCH_FLAG_SKIP_HEADERS);
+		while ((ret = message_parser_parse_next_block(pctx, &raw_block)) > 0) {
+			if (message_search_more_get_decoded(sctx, &raw_block,
+							    &decoded_block)) {
+				found = TRUE;
+				compare_search_result(tc, &decoded_block, i);
+			}
+		}
+		test_assert(ret == -1);
+		test_assert_idx(tc->expect_found == found, i);
+		message_parser_deinit(&pctx, &parts);
+		test_assert(is->stream_errno == 0);
+		i_stream_seek(is, 0);
+		if ((ret = message_search_msg(sctx, is, parts, &error)) < 0) {
+			i_error("Search error: %s", error);
+		} else {
+			test_assert_idx(tc->expect_found == (ret == 1), i);
+		}
+		/* and once more */
+		i_stream_seek(is, 0);
+		if ((ret = message_search_msg(sctx, is, NULL, &error)) < 0) {
+			i_error("Search error: %s", error);
+		} else {
+			test_assert_idx(tc->expect_found == (ret == 1), i);
+		}
+		message_search_deinit(&sctx);
+		test_assert(is->stream_errno == 0);
+		i_stream_unref(&is);
+		pool_unref(&pool);
+	} T_END;
+
+	test_end();
+
+}
+
+static void test_message_search_more_get_decoded(void)
+{
+	const char input[] = "p\xC3\xB6\xC3\xB6";
+	const unsigned char text_plain[] = "text/plain; charset=utf-8";
+	struct message_search_context *ctx1, *ctx2;
+	struct message_block raw_block, decoded_block;
+	struct message_header_line hdr;
+	struct message_part part;
+	unsigned int i;
+
+	test_begin("message_search_more_get_decoded()");
+
+	ctx1 = message_search_init("p\xC3\xA4\xC3\xA4", NULL, 0);
+	ctx2 = message_search_init("p\xC3\xB6\xC3\xB6", NULL, 0);
+
+	i_zero(&raw_block);
+	raw_block.part = &part;
+
+	/* feed the Content-Type header */
+	i_zero(&hdr);
+	hdr.name = "Content-Type"; hdr.name_len = strlen(hdr.name);
+	hdr.value = hdr.full_value = text_plain;
+	hdr.value_len = hdr.full_value_len = sizeof(text_plain)-1;
+	raw_block.hdr = &hdr;
+	test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block));
+	test_assert(!message_search_more_decoded(ctx2, &decoded_block));
+
+	/* EOH */
+	raw_block.hdr = NULL;
+	test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block));
+	test_assert(!message_search_more_decoded(ctx2, &decoded_block));
+
+	/* body */
+	raw_block.size = 1;
+	for (i = 0; input[i] != '\0'; i++) {
+		raw_block.data = (const void *)&input[i];
+		test_assert(!message_search_more_get_decoded(ctx1, &raw_block, &decoded_block));
+		test_assert(message_search_more_decoded(ctx2, &decoded_block) == (input[i+1] == '\0'));
+	}
+	message_search_deinit(&ctx1);
+	message_search_deinit(&ctx2);
+	test_end();
+}
+
+int main(void)
+{
+	static void (*const test_functions[])(void) = {
+		test_message_search,
+		test_message_search_more_get_decoded,
+		NULL
+	};
+	return test_run(test_functions);
+}