diff options
Diffstat (limited to 'src/lib-charset/test-charset.c')
-rw-r--r-- | src/lib-charset/test-charset.c | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/src/lib-charset/test-charset.c b/src/lib-charset/test-charset.c new file mode 100644 index 0000000..2f9ba2b --- /dev/null +++ b/src/lib-charset/test-charset.c @@ -0,0 +1,231 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "str.h" +#include "test-common.h" +#include "charset-utf8.h" + +#include <unistd.h> + +static void test_charset_is_utf8(void) +{ + test_begin("charset_is_utf8"); + test_assert(charset_is_utf8("AScII")); + test_assert(charset_is_utf8("us-AScII")); + test_assert(charset_is_utf8("uTF8")); + test_assert(charset_is_utf8("uTF-8")); + test_end(); +} + +static void test_charset_utf8_common(const char *input_charset) +{ + static const struct { + const char *input; + const char *output; + enum charset_result result; + } tests[] = { + { "p\xC3\xA4\xC3", "p\xC3\xA4", CHARSET_RET_INCOMPLETE_INPUT }, + { "p\xC3\xA4\xC3""a", "p\xC3\xA4"UNICODE_REPLACEMENT_CHAR_UTF8"a", CHARSET_RET_INVALID_INPUT } + }; + string_t *src, *str = t_str_new(256); + enum charset_result result; + unsigned int i; + + for (i = 0; i < N_ELEMENTS(tests); i++) { + str_truncate(str, 0); + test_assert_idx(charset_to_utf8_str(input_charset, NULL, + tests[i].input, str, &result) == 0, i); + test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i); + test_assert_idx(result == tests[i].result, i); + } + /* check that E2BIG handling works. We assume that iconv() is called + with 8192 byte buffer (tmpbuf[8192]) */ + src = str_new(default_pool, 16384); + for (i = 0; i < 8190; i++) + str_append_c(src, 'a' + i % ('z'-'a'+1)); + for (i = 0; i < 256; i++) { + str_truncate(str, 0); + str_append_c(src, 'A' + i % ('Z'-'A'+1)); + test_assert_idx(charset_to_utf8_str(input_charset, NULL, + str_c(src), str, &result) == 0, i); + } + str_free(&src); +} + +static void test_charset_utf8(void) +{ + test_begin("charset utf8"); + test_charset_utf8_common("UTF-8"); + test_end(); +} + +#ifdef HAVE_ICONV +static void test_charset_iconv(void) +{ + static const struct { + const char *charset; + const char *input; + const char *output; + enum charset_result result; + } tests[] = { + { "ISO-8859-1", "p\xE4\xE4", "p\xC3\xA4\xC3\xA4", CHARSET_RET_OK }, + { "UTF-7", "+AOQA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDk", + "\xC3\xA4\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" + "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" + "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" + "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" + "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4", CHARSET_RET_OK } + }; + string_t *str = t_str_new(128); + struct charset_translation *trans; + enum charset_result result; + size_t pos, left, limit, len; + unsigned int i; + + test_begin("charset iconv"); + for (i = 0; i < N_ELEMENTS(tests); i++) { + str_truncate(str, 0); + test_assert_idx(charset_to_utf8_str(tests[i].charset, NULL, + tests[i].input, str, &result) == 0, i); + test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i); + test_assert_idx(result == tests[i].result, i); + + str_truncate(str, 0); + test_assert_idx(charset_to_utf8_begin(tests[i].charset, NULL, &trans) == 0, i); + len = strlen(tests[i].input); + for (pos = 0, limit = 1; limit <= len; pos += left, limit++) { + left = limit - pos; + result = charset_to_utf8(trans, (const void *)(tests[i].input + pos), + &left, str); + if (result != CHARSET_RET_INCOMPLETE_INPUT && + result != CHARSET_RET_OK) + break; + } + test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i); + test_assert_idx(result == tests[i].result, i); + charset_to_utf8_end(&trans); + } + /* Use //IGNORE just to force handling to be done by iconv + instead of our own UTF-8 routines. */ + test_charset_utf8_common("UTF-8//TEST"); + test_end(); +} +static void test_charset_iconv_crashes(void) +{ + static const struct { + const char *charset; + const char *input; + } tests[] = { + { "CP932", "\203\334" } + }; + string_t *str = t_str_new(128); + enum charset_result result; + unsigned int i; + + test_begin("charset iconv crashes"); + for (i = 0; i < N_ELEMENTS(tests); i++) { + str_truncate(str, 0); + /* we don't care about checking the result. we only want to + verify that there's no crash. */ + (void)charset_to_utf8_str(tests[i].charset, NULL, + tests[i].input, str, &result); + } + test_end(); +} + +static void test_charset_iconv_utf7_state(void) +{ + struct charset_translation *trans; + string_t *str = t_str_new(32); + unsigned char nextbuf[5+CHARSET_MAX_PENDING_BUF_SIZE+1]; + size_t size; + + test_begin("charset iconv utf7 state"); + test_assert(charset_to_utf8_begin("UTF-7", NULL, &trans) == 0); + size = 2; + test_assert(charset_to_utf8(trans, (const void *)"a+", &size, str) == CHARSET_RET_INCOMPLETE_INPUT); + test_assert(strcmp(str_c(str), "a") == 0); + test_assert(size == 1); + memset(nextbuf, '?', sizeof(nextbuf)); + memcpy(nextbuf, "+AOQ-", 5); + size = sizeof(nextbuf); + test_assert(charset_to_utf8(trans, nextbuf, &size, str) == CHARSET_RET_OK); + test_assert(strcmp(str_c(str), "a\xC3\xA4???????????") == 0); + charset_to_utf8_end(&trans); + test_end(); +} +#endif + +static int convert(const char *charset, const char *path) +{ + struct istream *input; + const unsigned char *data; + size_t size; + struct charset_translation *trans; + buffer_t *buf = buffer_create_dynamic(default_pool, IO_BLOCK_SIZE); + enum charset_result last_ret = CHARSET_RET_OK; + bool seen_invalid_input = FALSE; + + input = path == NULL ? i_stream_create_fd(STDIN_FILENO, IO_BLOCK_SIZE) : + i_stream_create_file(path, IO_BLOCK_SIZE); + + if (charset_to_utf8_begin(charset, NULL, &trans) < 0) + i_fatal("Failed to initialize charset '%s'", charset); + + size_t need = 1; + while (i_stream_read_bytes(input, &data, &size, need) > 0) { + last_ret = charset_to_utf8(trans, data, &size, buf); + if (size > 0) + need = 1; + i_stream_skip(input, size); + switch (last_ret) { + case CHARSET_RET_OK: + break; + case CHARSET_RET_INCOMPLETE_INPUT: + need++; + break; + case CHARSET_RET_INVALID_INPUT: + seen_invalid_input = TRUE; + break; + } + if (write(STDOUT_FILENO, buf->data, buf->used) != (ssize_t)buf->used) + i_fatal("write(stdout) failed: %m"); + buffer_set_used_size(buf, 0); + } + if (input->stream_errno != 0) + i_error("read() failed: %s", i_stream_get_error(input)); + charset_to_utf8_end(&trans); + i_stream_destroy(&input); + buffer_free(&buf); + + if (seen_invalid_input) { + i_error("Seen invalid input"); + return 1; + } + if (last_ret == CHARSET_RET_INCOMPLETE_INPUT) { + i_error("Incomplete input"); + return 2; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + static void (*const test_functions[])(void) = { + test_charset_is_utf8, + test_charset_utf8, +#ifdef HAVE_ICONV + test_charset_iconv, + test_charset_iconv_crashes, + test_charset_iconv_utf7_state, +#endif + NULL + }; + + if (argc >= 2) { + /* <charset> [<input path>] */ + return convert(argv[1], argv[2]); + } + return test_run(test_functions); +} |