summaryrefslogtreecommitdiffstats
path: root/src/lib-charset/charset-iconv.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-charset/charset-iconv.c')
-rw-r--r--src/lib-charset/charset-iconv.c147
1 files changed, 147 insertions, 0 deletions
diff --git a/src/lib-charset/charset-iconv.c b/src/lib-charset/charset-iconv.c
new file mode 100644
index 0000000..7b29219
--- /dev/null
+++ b/src/lib-charset/charset-iconv.c
@@ -0,0 +1,147 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "charset-utf8-private.h"
+
+#ifdef HAVE_ICONV
+
+#include <iconv.h>
+#include <ctype.h>
+
+struct charset_translation {
+ iconv_t cd;
+ normalizer_func_t *normalizer;
+};
+
+static int
+iconv_charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
+ struct charset_translation **t_r)
+{
+ struct charset_translation *t;
+ iconv_t cd;
+
+ if (charset_is_utf8(charset))
+ cd = (iconv_t)-1;
+ else {
+ if (strcmp(charset, "UTF-8//TEST") == 0)
+ charset = "UTF-8";
+ cd = iconv_open("UTF-8", charset);
+ if (cd == (iconv_t)-1)
+ return -1;
+ }
+
+ t = i_new(struct charset_translation, 1);
+ t->cd = cd;
+ t->normalizer = normalizer;
+ *t_r = t;
+ return 0;
+}
+
+static void iconv_charset_to_utf8_end(struct charset_translation *t)
+{
+ if (t->cd != (iconv_t)-1)
+ iconv_close(t->cd);
+ i_free(t);
+}
+
+static void iconv_charset_to_utf8_reset(struct charset_translation *t)
+{
+ if (t->cd != (iconv_t)-1)
+ (void)iconv(t->cd, NULL, NULL, NULL, NULL);
+}
+
+static bool
+charset_to_utf8_try(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size, buffer_t *dest,
+ enum charset_result *result)
+{
+ ICONV_CONST char *ic_srcbuf;
+ char tmpbuf[8192], *ic_destbuf;
+ size_t srcleft, destleft, tmpbuf_used;
+ bool ret = TRUE;
+
+ if (t->cd == (iconv_t)-1) {
+ /* input is already supposed to be UTF-8 */
+ *result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
+ return TRUE;
+ }
+ destleft = sizeof(tmpbuf);
+ ic_destbuf = tmpbuf;
+ srcleft = *src_size;
+ ic_srcbuf = (ICONV_CONST char *) src;
+
+ if (iconv(t->cd, &ic_srcbuf, &srcleft,
+ &ic_destbuf, &destleft) != SIZE_MAX) {
+ i_assert(srcleft == 0);
+ *result = CHARSET_RET_OK;
+ } else if (errno == E2BIG) {
+ /* set result just to avoid compiler warning */
+ *result = CHARSET_RET_INCOMPLETE_INPUT;
+ ret = FALSE;
+ } else if (errno == EINVAL) {
+ i_assert(srcleft <= CHARSET_MAX_PENDING_BUF_SIZE);
+ *result = CHARSET_RET_INCOMPLETE_INPUT;
+ } else {
+ /* should be EILSEQ */
+ *result = CHARSET_RET_INVALID_INPUT;
+ ret = FALSE;
+ }
+ *src_size -= srcleft;
+
+ /* we just converted data to UTF-8. it shouldn't be invalid, but
+ Solaris iconv appears to pass invalid data through sometimes
+ (e.g. 8 bit characters with UTF-7) */
+ tmpbuf_used = sizeof(tmpbuf) - destleft;
+ if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf,
+ &tmpbuf_used, dest) != CHARSET_RET_OK)
+ *result = CHARSET_RET_INVALID_INPUT;
+ return ret;
+}
+
+static enum charset_result
+iconv_charset_to_utf8(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size,
+ buffer_t *dest)
+{
+ enum charset_result result;
+ size_t pos, size;
+ size_t prev_invalid_pos = SIZE_MAX;
+ bool ret;
+
+ for (pos = 0;;) {
+ i_assert(pos <= *src_size);
+ size = *src_size - pos;
+ ret = charset_to_utf8_try(t, src + pos, &size, dest, &result);
+ pos += size;
+
+ if (ret)
+ break;
+
+ if (result == CHARSET_RET_INVALID_INPUT) {
+ if (prev_invalid_pos != dest->used) {
+ buffer_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8,
+ strlen(UNICODE_REPLACEMENT_CHAR_UTF8));
+ prev_invalid_pos = dest->used;
+ }
+ if (pos < *src_size)
+ pos++;
+ }
+ }
+
+ if (prev_invalid_pos != SIZE_MAX)
+ result = CHARSET_RET_INVALID_INPUT;
+
+ i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
+ *src_size = pos;
+ return result;
+}
+
+const struct charset_utf8_vfuncs charset_iconv = {
+ .to_utf8_begin = iconv_charset_to_utf8_begin,
+ .to_utf8_end = iconv_charset_to_utf8_end,
+ .to_utf8_reset = iconv_charset_to_utf8_reset,
+ .to_utf8 = iconv_charset_to_utf8,
+};
+
+#endif