summaryrefslogtreecommitdiffstats
path: root/src/lib-mail/test-message-decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-mail/test-message-decoder.c')
-rw-r--r--src/lib-mail/test-message-decoder.c513
1 files changed, 513 insertions, 0 deletions
diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c
new file mode 100644
index 0000000..edf9210
--- /dev/null
+++ b/src/lib-mail/test-message-decoder.c
@@ -0,0 +1,513 @@
+/* Copyright (c) 2007-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "istream.h"
+#include "charset-utf8.h"
+#include "message-parser.h"
+#include "message-header-decode.h"
+#include "message-decoder.h"
+#include "message-part-data.h"
+#include "test-common.h"
+
+void message_header_decode_utf8(const unsigned char *data, size_t size,
+ buffer_t *dest,
+ normalizer_func_t *normalizer ATTR_UNUSED)
+{
+ buffer_append(dest, data, size);
+}
+
+static void test_message_decoder(void)
+{
+ struct message_decoder_context *ctx;
+ struct message_part part;
+ struct message_header_line hdr;
+ struct message_block input, output;
+
+ test_begin("message decoder");
+
+ i_zero(&part);
+ i_zero(&input);
+ memset(&output, 0xff, sizeof(output));
+ input.part = ∂
+
+ ctx = message_decoder_init(NULL, 0);
+
+ i_zero(&hdr);
+ hdr.name = "Content-Transfer-Encoding";
+ hdr.name_len = strlen(hdr.name);
+ hdr.full_value = (const void *)"quoted-printable";
+ hdr.full_value_len = strlen((const char *)hdr.full_value);
+ input.hdr = &hdr;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+
+ input.hdr = NULL;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+ input.hdr = NULL;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+ input.data = (const void *)"foo ";
+ input.size = strlen((const char *)input.data);
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 3);
+ test_assert(memcmp(output.data, "foo", 3) == 0);
+
+ input.data = (const void *)"bar";
+ input.size = strlen((const char *)input.data);
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 14);
+ test_assert(memcmp(output.data, " bar", 14) == 0);
+
+ /* partial text - \xC3\xA4 in quoted-printable. we should get a single
+ UTF-8 letter as result */
+ input.data = (const void *)"="; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"C"; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"3"; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"=A"; input.size = 2;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"4"; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 2);
+ test_assert(memcmp(output.data, "\xC3\xA4", 2) == 0);
+
+ message_decoder_deinit(&ctx);
+
+ test_end();
+}
+
+static void test_message_decoder_multipart(void)
+{
+ static const char test_message_input[] =
+ "Content-Type: multipart/mixed; boundary=foo\n"
+ "\n"
+ "--foo\n"
+ "Content-Transfer-Encoding: quoted-printable\n"
+ "Content-Type: text/plain; charset=utf-8\n"
+ "\n"
+ "p=C3=A4iv=C3=A4=C3=A4\n"
+ "\n"
+ "--foo\n"
+ "Content-Transfer-Encoding: base64\n"
+ "Content-Type: text/plain; charset=utf-8\n"
+ "\n"
+ "ecO2dMOkIHZhYW4uCg== ignored\n"
+ "--foo\n"
+ "Content-Transfer-Encoding: base64\n"
+ "Content-Type: text/plain; charset=utf-8\n"
+ "\n"
+ "?garbage\n"
+ "--foo--\n";
+ const struct message_parser_settings parser_set = { .flags = 0, };
+ struct message_parser_ctx *parser;
+ struct message_decoder_context *decoder;
+ struct message_part *parts;
+ struct message_block input, output;
+ struct istream *istream;
+ string_t *str_out = t_str_new(20);
+ int ret;
+
+ test_begin("message decoder multipart");
+
+ istream = test_istream_create(test_message_input);
+ parser = message_parser_init(pool_datastack_create(), istream, &parser_set);
+ decoder = message_decoder_init(NULL, 0);
+
+ test_istream_set_allow_eof(istream, FALSE);
+ for (size_t i = 0; i < sizeof(test_message_input); i++) {
+ if (i == sizeof(test_message_input)-1)
+ test_istream_set_allow_eof(istream, TRUE);
+ test_istream_set_size(istream, i);
+ while ((ret = message_parser_parse_next_block(parser, &input)) > 0) {
+ if (message_decoder_decode_next_block(decoder, &input, &output) &&
+ output.hdr == NULL && output.size > 0)
+ str_append_data(str_out, output.data, output.size);
+ }
+ if (i == sizeof(test_message_input)-1)
+ test_assert(ret == -1);
+ else
+ test_assert(ret == 0);
+ }
+ /* NOTE: qp-decoder decoder changes \n into \r\n */
+ test_assert_strcmp(str_c(str_out), "p\xC3\xA4iv\xC3\xA4\xC3\xA4\r\ny\xC3\xB6t\xC3\xA4 vaan.\n");
+
+ message_decoder_deinit(&decoder);
+ message_parser_deinit(&parser, &parts);
+ test_assert(istream->stream_errno == 0);
+ i_stream_unref(&istream);
+ test_end();
+}
+
+static void test_message_decoder_current_content_type(void)
+{
+ struct message_decoder_context *ctx;
+ struct message_part part, part2, part3;
+ struct message_header_line hdr;
+ struct message_block input, output;
+
+ test_begin("message_decoder_current_content_type()");
+
+ i_zero(&part);
+ part2 = part3 = part;
+
+ i_zero(&input);
+ memset(&output, 0xff, sizeof(output));
+ input.part = &part;
+
+ ctx = message_decoder_init(NULL, 0);
+ test_assert(message_decoder_current_content_type(ctx) == NULL);
+
+ /* multipart/mixed */
+ i_zero(&hdr);
+ hdr.name = "Content-Type";
+ hdr.name_len = strlen(hdr.name);
+ hdr.full_value = (const void *)"multipart/mixed; boundary=x";
+ hdr.full_value_len = strlen((const char *)hdr.full_value);
+ input.hdr = &hdr;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+ input.hdr = NULL;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(strcmp(message_decoder_current_content_type(ctx), "multipart/mixed") == 0);
+
+ /* child 1 */
+ input.part = &part2;
+ hdr.full_value = (const void *)"text/plain";
+ hdr.full_value_len = strlen((const char *)hdr.full_value);
+ input.hdr = &hdr;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+ input.hdr = NULL;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(strcmp(message_decoder_current_content_type(ctx), "text/plain") == 0);
+
+ /* child 2 */
+ input.part = &part3;
+ hdr.full_value = (const void *)"application/pdf";
+ hdr.full_value_len = strlen((const char *)hdr.full_value);
+ input.hdr = &hdr;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+
+ input.hdr = NULL;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(strcmp(message_decoder_current_content_type(ctx), "application/pdf") == 0);
+
+ /* reset */
+ message_decoder_decode_reset(ctx);
+ test_assert(message_decoder_current_content_type(ctx) == NULL);
+
+ message_decoder_deinit(&ctx);
+
+ test_end();
+}
+
+static void test_message_decoder_content_transfer_encoding(void)
+{
+ static const unsigned char test_message_input[] =
+"Content-Type: multipart/mixed; boundary=\"1\"\n"
+"MIME-Version: 1.0\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 7bit\n"
+"Content-Type: text/plain; charset=us-ascii\n\n"
+"Move black king to queen's bishop\n\n"
+"--1\n"
+"Content-Transfer-Encoding:\t\t\t\tbinary\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 8bit\t\t\t\r\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding: quoted-printable \r\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move =E2=99=9A to =E2=99=9B's =E2=99=9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding: base64\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"TW92ZSDimZogdG8g4pmbJ3Mg4pmdCg==\n\n"
+"--1--\n";
+
+ static const char test_message_output[] =
+"Move black king to queen's bishop\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\r\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n";
+
+ test_begin("message decoder content transfer encoding");
+
+ const struct message_parser_settings parser_set = { .flags = 0, };
+ struct message_parser_ctx *parser;
+ struct message_decoder_context *decoder;
+ struct message_part *parts, *part;
+ struct message_block input, output;
+ struct istream *istream;
+ string_t *str_out = t_str_new(20);
+ int ret;
+
+ pool_t pool = pool_alloconly_create("message parser", 10240);
+ istream = test_istream_create_data(test_message_input,
+ sizeof(test_message_input)-1);
+ parser = message_parser_init(pool, istream, &parser_set);
+ decoder = message_decoder_init(NULL, 0);
+
+ while ((ret = message_parser_parse_next_block(parser, &input)) > 0) {
+ message_part_data_parse_from_header(pool, input.part, input.hdr);
+ if (message_decoder_decode_next_block(decoder, &input, &output) &&
+ output.hdr == NULL && output.size > 0)
+ str_append_data(str_out, output.data, output.size);
+ }
+
+ test_assert(ret == -1);
+ test_assert_strcmp(test_message_output, str_c(str_out));
+ message_decoder_deinit(&decoder);
+ message_parser_deinit(&parser, &parts);
+ test_assert(istream->stream_errno == 0);
+
+ /* validate parts */
+
+ part = parts;
+ test_assert(part->children_count == 5);
+ part = part->children;
+ test_assert_strcmp(part->data->content_type, "text");
+ test_assert_strcmp(part->data->content_subtype, "plain");
+ test_assert_strcmp(part->data->content_transfer_encoding, "7bit");
+ test_assert_strcmp(part->data->content_type, "text");
+
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "binary");
+ test_assert_strcmp(part->data->content_type, "text");
+ test_assert_strcmp(part->data->content_subtype, "plain");
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "8bit");
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "quoted-printable");
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "base64");
+ i_stream_unref(&istream);
+ pool_unref(&pool);
+ test_end();
+}
+
+static void test_message_decoder_invalid_content_transfer_encoding(void)
+{
+ static const unsigned char test_message_input[] =
+ /* all of the child parts have invalid content transfer encoding */
+"Content-Type: multipart/mixed; boundary=\"1\"\n"
+"MIME-Version: 1.0\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 6bit\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move black king to queen's bishop\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 7bits\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 8 bit\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 7-bit\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n"
+"--1\n"
+"Content-Transfer-Encoding:\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"Move =E2=99=9A to =E2=99=9B's =E2=99=9D\n\n"
+"--1--\n";
+
+ const char *test_message_output = "";
+
+ test_begin("message decoder content transfer invalid encoding");
+
+ const struct message_parser_settings parser_set = { .flags = 0 };
+ struct message_parser_ctx *parser;
+ struct message_decoder_context *decoder;
+ struct message_part *parts, *part;
+ struct message_block input, output;
+ struct istream *istream;
+ string_t *str_out = t_str_new(20);
+ int ret;
+
+ pool_t pool = pool_alloconly_create("message parser", 10240);
+ istream = test_istream_create_data(test_message_input,
+ sizeof(test_message_input)-1);
+ parser = message_parser_init(pool, istream, &parser_set);
+ decoder = message_decoder_init(NULL, 0);
+
+ while ((ret = message_parser_parse_next_block(parser, &input)) > 0) {
+ message_part_data_parse_from_header(pool, input.part, input.hdr);
+ if (input.hdr != NULL &&
+ strcasecmp(input.hdr->name, "content-transfer-encoding") == 0) {
+ enum message_cte cte = message_decoder_parse_cte(input.hdr);
+ test_assert(cte == MESSAGE_CTE_UNKNOWN);
+ }
+ if (message_decoder_decode_next_block(decoder, &input, &output) &&
+ output.hdr == NULL && output.size > 0)
+ str_append_data(str_out, output.data, output.size);
+ }
+
+ test_assert(ret == -1);
+ test_assert_strcmp(test_message_output, str_c(str_out));
+ message_decoder_deinit(&decoder);
+ message_parser_deinit(&parser, &parts);
+ test_assert(istream->stream_errno == 0);
+
+ part = parts;
+ test_assert(part->children_count == 6);
+ part = part->children;
+ test_assert_strcmp(part->data->content_type, "text");
+ test_assert_strcmp(part->data->content_subtype, "plain");
+ test_assert_strcmp(part->data->content_transfer_encoding, "6bit");
+ test_assert_strcmp(part->data->content_type, "text");
+
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "7bits");
+ test_assert_strcmp(part->data->content_type, "text");
+ test_assert_strcmp(part->data->content_subtype, "plain");
+ part = part->next;
+ test_assert(part->data->content_transfer_encoding == NULL);
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "7-bit");
+ part = part->next;
+ test_assert_strcmp(part->data->content_transfer_encoding, "8-bit");
+ part = part->next;
+ test_assert(part->next == NULL);
+ i_stream_unref(&istream);
+ pool_unref(&pool);
+
+#define X10(a) a a a a a a a a a a
+
+#undef TEST_CASE
+#define TEST_CASE(value, result) \
+ { \
+ .hdr = { \
+ .name = "Content-Transfer-Encoding", \
+ .name_len = 25, \
+ .full_value = (const unsigned char*)value, \
+ .full_value_len = sizeof(value)-1, \
+ }, \
+ .cte = result, \
+ }
+
+ const struct {
+ const struct message_header_line hdr;
+ enum message_cte cte;
+ } test_case[] = {
+ TEST_CASE("(binary comment) base64", MESSAGE_CTE_BASE64),
+ TEST_CASE("(\"binary\" ( (comment) test) ) base64", MESSAGE_CTE_BASE64),
+ TEST_CASE("base64 binary", MESSAGE_CTE_UNKNOWN),
+ TEST_CASE("base64\0binary", MESSAGE_CTE_UNKNOWN),
+ TEST_CASE("\0binary", MESSAGE_CTE_UNKNOWN),
+ TEST_CASE("( " X10(X10(X10(X10("a")))) " ) base64", MESSAGE_CTE_BASE64),
+ TEST_CASE("( " X10(X10(X10(X10("a")))) " ) base64 ( " X10(X10(X10(X10("a")))) ")", MESSAGE_CTE_BASE64),
+ TEST_CASE("( base64", MESSAGE_CTE_UNKNOWN),
+ TEST_CASE("base64 (", MESSAGE_CTE_BASE64),
+ TEST_CASE(X10(X10(X10(X10(" ")))) " base64", MESSAGE_CTE_BASE64),
+ TEST_CASE("base64 ; logging-type=\"foobar\"", MESSAGE_CTE_BASE64),
+ };
+
+ for (size_t i = 0; i < N_ELEMENTS(test_case); i++) {
+ test_assert_idx(message_decoder_parse_cte(&test_case[i].hdr) == test_case[i].cte, i);
+ }
+
+ test_end();
+}
+
+static void test_message_decoder_charset(void)
+{
+ /* ensure we decode correctly */
+ static const unsigned char test_message_input[] =
+ /* none of these should work */
+"Content-Type: multipart/mixed; boundary=\"1\"\n"
+"MIME-Version: 1.0\n\n"
+"--1\n"
+"Content-Transfer-Encoding: binary\n"
+"Content-Type: text/plain; charset=utf-16le\n\n"
+"\x54\x00\x65\x00\x73\x00\x74\x00\x20\x00\x6d\x00\x65\x00\x73\x00\x73\x00\x61\x00\x67\x00\x65\x00\n\x00\n"
+"--1\n"
+"Content-Transfer-Encoding: base64\n"
+"Content-Type: text/plain; charset=utf-16be\n\n"
+"AFQAZQBzAHQAIABtAGUAcwBzAGEAZwBlAAo=\n\n"
+"--1\n"
+"Content-Transfer-Encoding: base64\n"
+"Content-Type: text/plain; charset=utf-16le\n\n"
+"VABlAHMAdAAgAG0AZQBzAHMAYQBnAGUACgA=\n\n"
+"--1\n"
+"Content-Transfer-Encoding: base64\n"
+"Content-Type: text/plain; charset=EUC-JP\n\n"
+"odjApLOmv824osDruMCh2Q==\n\n"
+"--1\n"
+"Content-Transfer-Encoding: binary\n"
+"Content-Type: text/plain; charset=UTF-8\n\n"
+"\xad\xad\xad\xad\xad\xad\n"
+"--1--\n";
+
+ static const char *test_message_output =
+"Test message\nTest message\nTest message\n"
+"\xe3\x80\x8e\xe4\xb8\x96\xe7\x95\x8c\xe4\xba\xba"
+"\xe6\xa8\xa9\xe5\xae\xa3\xe8\xa8\x80\xe3\x80\x8f"
+UNICODE_REPLACEMENT_CHAR_UTF8;
+
+ test_begin("message decoder charset");
+
+ const struct message_parser_settings parser_set = { .flags = 0, };
+ struct message_parser_ctx *parser;
+ struct message_decoder_context *decoder;
+ struct message_part *parts;
+ struct message_block input, output;
+ struct istream *istream;
+ string_t *str_out = t_str_new(20);
+ int ret;
+
+ pool_t pool = pool_alloconly_create("message parser", 10240);
+ istream = test_istream_create_data(test_message_input,
+ sizeof(test_message_input)-1);
+ parser = message_parser_init(pool, istream, &parser_set);
+ decoder = message_decoder_init(NULL, 0);
+
+ while ((ret = message_parser_parse_next_block(parser, &input)) > 0) {
+ message_part_data_parse_from_header(pool, input.part, input.hdr);
+ if (message_decoder_decode_next_block(decoder, &input, &output) &&
+ output.hdr == NULL && output.size > 0)
+ str_append_data(str_out, output.data, output.size);
+ }
+
+ test_assert(ret == -1);
+ test_assert_strcmp(test_message_output, str_c(str_out));
+ message_decoder_deinit(&decoder);
+ message_parser_deinit(&parser, &parts);
+ test_assert(istream->stream_errno == 0);
+
+ i_stream_unref(&istream);
+ pool_unref(&pool);
+ test_end();
+}
+
+int main(void)
+{
+ static void (*const test_functions[])(void) = {
+ test_message_decoder,
+ test_message_decoder_multipart,
+ test_message_decoder_current_content_type,
+ test_message_decoder_content_transfer_encoding,
+ test_message_decoder_invalid_content_transfer_encoding,
+ test_message_decoder_charset,
+ NULL
+ };
+ return test_run(test_functions);
+}