diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib-mail/test-message-decoder.c | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c new file mode 100644 index 0000000..edf9210 --- /dev/null +++ b/src/lib-mail/test-message-decoder.c @@ -0,0 +1,513 @@ +/* Copyright (c) 2007-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "istream.h" +#include "charset-utf8.h" +#include "message-parser.h" +#include "message-header-decode.h" +#include "message-decoder.h" +#include "message-part-data.h" +#include "test-common.h" + +void message_header_decode_utf8(const unsigned char *data, size_t size, + buffer_t *dest, + normalizer_func_t *normalizer ATTR_UNUSED) +{ + buffer_append(dest, data, size); +} + +static void test_message_decoder(void) +{ + struct message_decoder_context *ctx; + struct message_part part; + struct message_header_line hdr; + struct message_block input, output; + + test_begin("message decoder"); + + i_zero(&part); + i_zero(&input); + memset(&output, 0xff, sizeof(output)); + input.part = ∂ + + ctx = message_decoder_init(NULL, 0); + + i_zero(&hdr); + hdr.name = "Content-Transfer-Encoding"; + hdr.name_len = strlen(hdr.name); + hdr.full_value = (const void *)"quoted-printable"; + hdr.full_value_len = strlen((const char *)hdr.full_value); + input.hdr = &hdr; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + + input.hdr = NULL; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + + input.hdr = NULL; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + + input.data = (const void *)"foo "; + input.size = strlen((const char *)input.data); + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 3); + test_assert(memcmp(output.data, "foo", 3) == 0); + + input.data = (const void *)"bar"; + input.size = strlen((const char *)input.data); + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 14); + test_assert(memcmp(output.data, " bar", 14) == 0); + + /* partial text - \xC3\xA4 in quoted-printable. we should get a single + UTF-8 letter as result */ + input.data = (const void *)"="; input.size = 1; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + input.data = (const void *)"C"; input.size = 1; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + input.data = (const void *)"3"; input.size = 1; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + input.data = (const void *)"=A"; input.size = 2; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 0); + input.data = (const void *)"4"; input.size = 1; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(output.size == 2); + test_assert(memcmp(output.data, "\xC3\xA4", 2) == 0); + + message_decoder_deinit(&ctx); + + test_end(); +} + +static void test_message_decoder_multipart(void) +{ + static const char test_message_input[] = + "Content-Type: multipart/mixed; boundary=foo\n" + "\n" + "--foo\n" + "Content-Transfer-Encoding: quoted-printable\n" + "Content-Type: text/plain; charset=utf-8\n" + "\n" + "p=C3=A4iv=C3=A4=C3=A4\n" + "\n" + "--foo\n" + "Content-Transfer-Encoding: base64\n" + "Content-Type: text/plain; charset=utf-8\n" + "\n" + "ecO2dMOkIHZhYW4uCg== ignored\n" + "--foo\n" + "Content-Transfer-Encoding: base64\n" + "Content-Type: text/plain; charset=utf-8\n" + "\n" + "?garbage\n" + "--foo--\n"; + const struct message_parser_settings parser_set = { .flags = 0, }; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_part *parts; + struct message_block input, output; + struct istream *istream; + string_t *str_out = t_str_new(20); + int ret; + + test_begin("message decoder multipart"); + + istream = test_istream_create(test_message_input); + parser = message_parser_init(pool_datastack_create(), istream, &parser_set); + decoder = message_decoder_init(NULL, 0); + + test_istream_set_allow_eof(istream, FALSE); + for (size_t i = 0; i < sizeof(test_message_input); i++) { + if (i == sizeof(test_message_input)-1) + test_istream_set_allow_eof(istream, TRUE); + test_istream_set_size(istream, i); + while ((ret = message_parser_parse_next_block(parser, &input)) > 0) { + if (message_decoder_decode_next_block(decoder, &input, &output) && + output.hdr == NULL && output.size > 0) + str_append_data(str_out, output.data, output.size); + } + if (i == sizeof(test_message_input)-1) + test_assert(ret == -1); + else + test_assert(ret == 0); + } + /* NOTE: qp-decoder decoder changes \n into \r\n */ + test_assert_strcmp(str_c(str_out), "p\xC3\xA4iv\xC3\xA4\xC3\xA4\r\ny\xC3\xB6t\xC3\xA4 vaan.\n"); + + message_decoder_deinit(&decoder); + message_parser_deinit(&parser, &parts); + test_assert(istream->stream_errno == 0); + i_stream_unref(&istream); + test_end(); +} + +static void test_message_decoder_current_content_type(void) +{ + struct message_decoder_context *ctx; + struct message_part part, part2, part3; + struct message_header_line hdr; + struct message_block input, output; + + test_begin("message_decoder_current_content_type()"); + + i_zero(&part); + part2 = part3 = part; + + i_zero(&input); + memset(&output, 0xff, sizeof(output)); + input.part = ∂ + + ctx = message_decoder_init(NULL, 0); + test_assert(message_decoder_current_content_type(ctx) == NULL); + + /* multipart/mixed */ + i_zero(&hdr); + hdr.name = "Content-Type"; + hdr.name_len = strlen(hdr.name); + hdr.full_value = (const void *)"multipart/mixed; boundary=x"; + hdr.full_value_len = strlen((const char *)hdr.full_value); + input.hdr = &hdr; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + + input.hdr = NULL; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(strcmp(message_decoder_current_content_type(ctx), "multipart/mixed") == 0); + + /* child 1 */ + input.part = &part2; + hdr.full_value = (const void *)"text/plain"; + hdr.full_value_len = strlen((const char *)hdr.full_value); + input.hdr = &hdr; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + + input.hdr = NULL; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(strcmp(message_decoder_current_content_type(ctx), "text/plain") == 0); + + /* child 2 */ + input.part = &part3; + hdr.full_value = (const void *)"application/pdf"; + hdr.full_value_len = strlen((const char *)hdr.full_value); + input.hdr = &hdr; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + + input.hdr = NULL; + test_assert(message_decoder_decode_next_block(ctx, &input, &output)); + test_assert(strcmp(message_decoder_current_content_type(ctx), "application/pdf") == 0); + + /* reset */ + message_decoder_decode_reset(ctx); + test_assert(message_decoder_current_content_type(ctx) == NULL); + + message_decoder_deinit(&ctx); + + test_end(); +} + +static void test_message_decoder_content_transfer_encoding(void) +{ + static const unsigned char test_message_input[] = +"Content-Type: multipart/mixed; boundary=\"1\"\n" +"MIME-Version: 1.0\n\n" +"--1\n" +"Content-Transfer-Encoding: 7bit\n" +"Content-Type: text/plain; charset=us-ascii\n\n" +"Move black king to queen's bishop\n\n" +"--1\n" +"Content-Transfer-Encoding:\t\t\t\tbinary\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n" +"--1\n" +"Content-Transfer-Encoding: 8bit\t\t\t\r\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n" +"--1\n" +"Content-Transfer-Encoding: quoted-printable \r\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move =E2=99=9A to =E2=99=9B's =E2=99=9D\n\n" +"--1\n" +"Content-Transfer-Encoding: base64\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"TW92ZSDimZogdG8g4pmbJ3Mg4pmdCg==\n\n" +"--1--\n"; + + static const char test_message_output[] = +"Move black king to queen's bishop\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\r\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n"; + + test_begin("message decoder content transfer encoding"); + + const struct message_parser_settings parser_set = { .flags = 0, }; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_part *parts, *part; + struct message_block input, output; + struct istream *istream; + string_t *str_out = t_str_new(20); + int ret; + + pool_t pool = pool_alloconly_create("message parser", 10240); + istream = test_istream_create_data(test_message_input, + sizeof(test_message_input)-1); + parser = message_parser_init(pool, istream, &parser_set); + decoder = message_decoder_init(NULL, 0); + + while ((ret = message_parser_parse_next_block(parser, &input)) > 0) { + message_part_data_parse_from_header(pool, input.part, input.hdr); + if (message_decoder_decode_next_block(decoder, &input, &output) && + output.hdr == NULL && output.size > 0) + str_append_data(str_out, output.data, output.size); + } + + test_assert(ret == -1); + test_assert_strcmp(test_message_output, str_c(str_out)); + message_decoder_deinit(&decoder); + message_parser_deinit(&parser, &parts); + test_assert(istream->stream_errno == 0); + + /* validate parts */ + + part = parts; + test_assert(part->children_count == 5); + part = part->children; + test_assert_strcmp(part->data->content_type, "text"); + test_assert_strcmp(part->data->content_subtype, "plain"); + test_assert_strcmp(part->data->content_transfer_encoding, "7bit"); + test_assert_strcmp(part->data->content_type, "text"); + + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "binary"); + test_assert_strcmp(part->data->content_type, "text"); + test_assert_strcmp(part->data->content_subtype, "plain"); + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "8bit"); + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "quoted-printable"); + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "base64"); + i_stream_unref(&istream); + pool_unref(&pool); + test_end(); +} + +static void test_message_decoder_invalid_content_transfer_encoding(void) +{ + static const unsigned char test_message_input[] = + /* all of the child parts have invalid content transfer encoding */ +"Content-Type: multipart/mixed; boundary=\"1\"\n" +"MIME-Version: 1.0\n\n" +"--1\n" +"Content-Transfer-Encoding: 6bit\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move black king to queen's bishop\n\n" +"--1\n" +"Content-Transfer-Encoding: 7bits\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n" +"--1\n" +"Content-Transfer-Encoding: 8 bit\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n" +"--1\n" +"Content-Transfer-Encoding: 7-bit\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n" +"--1\n" +"Content-Transfer-Encoding: 8-bit\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move \xE2\x99\x9A to \xE2\x99\x9B's \xE2\x99\x9D\n\n" +"--1\n" +"Content-Transfer-Encoding:\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"Move =E2=99=9A to =E2=99=9B's =E2=99=9D\n\n" +"--1--\n"; + + const char *test_message_output = ""; + + test_begin("message decoder content transfer invalid encoding"); + + const struct message_parser_settings parser_set = { .flags = 0 }; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_part *parts, *part; + struct message_block input, output; + struct istream *istream; + string_t *str_out = t_str_new(20); + int ret; + + pool_t pool = pool_alloconly_create("message parser", 10240); + istream = test_istream_create_data(test_message_input, + sizeof(test_message_input)-1); + parser = message_parser_init(pool, istream, &parser_set); + decoder = message_decoder_init(NULL, 0); + + while ((ret = message_parser_parse_next_block(parser, &input)) > 0) { + message_part_data_parse_from_header(pool, input.part, input.hdr); + if (input.hdr != NULL && + strcasecmp(input.hdr->name, "content-transfer-encoding") == 0) { + enum message_cte cte = message_decoder_parse_cte(input.hdr); + test_assert(cte == MESSAGE_CTE_UNKNOWN); + } + if (message_decoder_decode_next_block(decoder, &input, &output) && + output.hdr == NULL && output.size > 0) + str_append_data(str_out, output.data, output.size); + } + + test_assert(ret == -1); + test_assert_strcmp(test_message_output, str_c(str_out)); + message_decoder_deinit(&decoder); + message_parser_deinit(&parser, &parts); + test_assert(istream->stream_errno == 0); + + part = parts; + test_assert(part->children_count == 6); + part = part->children; + test_assert_strcmp(part->data->content_type, "text"); + test_assert_strcmp(part->data->content_subtype, "plain"); + test_assert_strcmp(part->data->content_transfer_encoding, "6bit"); + test_assert_strcmp(part->data->content_type, "text"); + + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "7bits"); + test_assert_strcmp(part->data->content_type, "text"); + test_assert_strcmp(part->data->content_subtype, "plain"); + part = part->next; + test_assert(part->data->content_transfer_encoding == NULL); + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "7-bit"); + part = part->next; + test_assert_strcmp(part->data->content_transfer_encoding, "8-bit"); + part = part->next; + test_assert(part->next == NULL); + i_stream_unref(&istream); + pool_unref(&pool); + +#define X10(a) a a a a a a a a a a + +#undef TEST_CASE +#define TEST_CASE(value, result) \ + { \ + .hdr = { \ + .name = "Content-Transfer-Encoding", \ + .name_len = 25, \ + .full_value = (const unsigned char*)value, \ + .full_value_len = sizeof(value)-1, \ + }, \ + .cte = result, \ + } + + const struct { + const struct message_header_line hdr; + enum message_cte cte; + } test_case[] = { + TEST_CASE("(binary comment) base64", MESSAGE_CTE_BASE64), + TEST_CASE("(\"binary\" ( (comment) test) ) base64", MESSAGE_CTE_BASE64), + TEST_CASE("base64 binary", MESSAGE_CTE_UNKNOWN), + TEST_CASE("base64\0binary", MESSAGE_CTE_UNKNOWN), + TEST_CASE("\0binary", MESSAGE_CTE_UNKNOWN), + TEST_CASE("( " X10(X10(X10(X10("a")))) " ) base64", MESSAGE_CTE_BASE64), + TEST_CASE("( " X10(X10(X10(X10("a")))) " ) base64 ( " X10(X10(X10(X10("a")))) ")", MESSAGE_CTE_BASE64), + TEST_CASE("( base64", MESSAGE_CTE_UNKNOWN), + TEST_CASE("base64 (", MESSAGE_CTE_BASE64), + TEST_CASE(X10(X10(X10(X10(" ")))) " base64", MESSAGE_CTE_BASE64), + TEST_CASE("base64 ; logging-type=\"foobar\"", MESSAGE_CTE_BASE64), + }; + + for (size_t i = 0; i < N_ELEMENTS(test_case); i++) { + test_assert_idx(message_decoder_parse_cte(&test_case[i].hdr) == test_case[i].cte, i); + } + + test_end(); +} + +static void test_message_decoder_charset(void) +{ + /* ensure we decode correctly */ + static const unsigned char test_message_input[] = + /* none of these should work */ +"Content-Type: multipart/mixed; boundary=\"1\"\n" +"MIME-Version: 1.0\n\n" +"--1\n" +"Content-Transfer-Encoding: binary\n" +"Content-Type: text/plain; charset=utf-16le\n\n" +"\x54\x00\x65\x00\x73\x00\x74\x00\x20\x00\x6d\x00\x65\x00\x73\x00\x73\x00\x61\x00\x67\x00\x65\x00\n\x00\n" +"--1\n" +"Content-Transfer-Encoding: base64\n" +"Content-Type: text/plain; charset=utf-16be\n\n" +"AFQAZQBzAHQAIABtAGUAcwBzAGEAZwBlAAo=\n\n" +"--1\n" +"Content-Transfer-Encoding: base64\n" +"Content-Type: text/plain; charset=utf-16le\n\n" +"VABlAHMAdAAgAG0AZQBzAHMAYQBnAGUACgA=\n\n" +"--1\n" +"Content-Transfer-Encoding: base64\n" +"Content-Type: text/plain; charset=EUC-JP\n\n" +"odjApLOmv824osDruMCh2Q==\n\n" +"--1\n" +"Content-Transfer-Encoding: binary\n" +"Content-Type: text/plain; charset=UTF-8\n\n" +"\xad\xad\xad\xad\xad\xad\n" +"--1--\n"; + + static const char *test_message_output = +"Test message\nTest message\nTest message\n" +"\xe3\x80\x8e\xe4\xb8\x96\xe7\x95\x8c\xe4\xba\xba" +"\xe6\xa8\xa9\xe5\xae\xa3\xe8\xa8\x80\xe3\x80\x8f" +UNICODE_REPLACEMENT_CHAR_UTF8; + + test_begin("message decoder charset"); + + const struct message_parser_settings parser_set = { .flags = 0, }; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_part *parts; + struct message_block input, output; + struct istream *istream; + string_t *str_out = t_str_new(20); + int ret; + + pool_t pool = pool_alloconly_create("message parser", 10240); + istream = test_istream_create_data(test_message_input, + sizeof(test_message_input)-1); + parser = message_parser_init(pool, istream, &parser_set); + decoder = message_decoder_init(NULL, 0); + + while ((ret = message_parser_parse_next_block(parser, &input)) > 0) { + message_part_data_parse_from_header(pool, input.part, input.hdr); + if (message_decoder_decode_next_block(decoder, &input, &output) && + output.hdr == NULL && output.size > 0) + str_append_data(str_out, output.data, output.size); + } + + test_assert(ret == -1); + test_assert_strcmp(test_message_output, str_c(str_out)); + message_decoder_deinit(&decoder); + message_parser_deinit(&parser, &parts); + test_assert(istream->stream_errno == 0); + + i_stream_unref(&istream); + pool_unref(&pool); + test_end(); +} + +int main(void) +{ + static void (*const test_functions[])(void) = { + test_message_decoder, + test_message_decoder_multipart, + test_message_decoder_current_content_type, + test_message_decoder_content_transfer_encoding, + test_message_decoder_invalid_content_transfer_encoding, + test_message_decoder_charset, + NULL + }; + return test_run(test_functions); +} |