/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "buffer.h" #include "charset-utf8-private.h" #ifdef HAVE_ICONV #include #include struct charset_translation { iconv_t cd; normalizer_func_t *normalizer; }; static int iconv_charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, struct charset_translation **t_r) { struct charset_translation *t; iconv_t cd; if (charset_is_utf8(charset)) cd = (iconv_t)-1; else { if (strcmp(charset, "UTF-8//TEST") == 0) charset = "UTF-8"; cd = iconv_open("UTF-8", charset); if (cd == (iconv_t)-1) return -1; } t = i_new(struct charset_translation, 1); t->cd = cd; t->normalizer = normalizer; *t_r = t; return 0; } static void iconv_charset_to_utf8_end(struct charset_translation *t) { if (t->cd != (iconv_t)-1) iconv_close(t->cd); i_free(t); } static void iconv_charset_to_utf8_reset(struct charset_translation *t) { if (t->cd != (iconv_t)-1) (void)iconv(t->cd, NULL, NULL, NULL, NULL); } static bool charset_to_utf8_try(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest, enum charset_result *result) { ICONV_CONST char *ic_srcbuf; char tmpbuf[8192], *ic_destbuf; size_t srcleft, destleft, tmpbuf_used; bool ret = TRUE; if (t->cd == (iconv_t)-1) { /* input is already supposed to be UTF-8 */ *result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest); return TRUE; } destleft = sizeof(tmpbuf); ic_destbuf = tmpbuf; srcleft = *src_size; ic_srcbuf = (ICONV_CONST char *) src; if (iconv(t->cd, &ic_srcbuf, &srcleft, &ic_destbuf, &destleft) != SIZE_MAX) { i_assert(srcleft == 0); *result = CHARSET_RET_OK; } else if (errno == E2BIG) { /* set result just to avoid compiler warning */ *result = CHARSET_RET_INCOMPLETE_INPUT; ret = FALSE; } else if (errno == EINVAL) { i_assert(srcleft <= CHARSET_MAX_PENDING_BUF_SIZE); *result = CHARSET_RET_INCOMPLETE_INPUT; } else { /* should be EILSEQ */ *result = CHARSET_RET_INVALID_INPUT; ret = FALSE; } *src_size -= srcleft; /* we just converted data to UTF-8. it shouldn't be invalid, but Solaris iconv appears to pass invalid data through sometimes (e.g. 8 bit characters with UTF-7) */ tmpbuf_used = sizeof(tmpbuf) - destleft; if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf, &tmpbuf_used, dest) != CHARSET_RET_OK) *result = CHARSET_RET_INVALID_INPUT; return ret; } static enum charset_result iconv_charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest) { enum charset_result result; size_t pos, size; size_t prev_invalid_pos = SIZE_MAX; bool ret; for (pos = 0;;) { i_assert(pos <= *src_size); size = *src_size - pos; ret = charset_to_utf8_try(t, src + pos, &size, dest, &result); pos += size; if (ret) break; if (result == CHARSET_RET_INVALID_INPUT) { if (prev_invalid_pos != dest->used) { buffer_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8, strlen(UNICODE_REPLACEMENT_CHAR_UTF8)); prev_invalid_pos = dest->used; } if (pos < *src_size) pos++; } } if (prev_invalid_pos != SIZE_MAX) result = CHARSET_RET_INVALID_INPUT; i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE); *src_size = pos; return result; } const struct charset_utf8_vfuncs charset_iconv = { .to_utf8_begin = iconv_charset_to_utf8_begin, .to_utf8_end = iconv_charset_to_utf8_end, .to_utf8_reset = iconv_charset_to_utf8_reset, .to_utf8 = iconv_charset_to_utf8, }; #endif