1 files changed, 147 insertions, 0 deletions
diff --git a/src/lib-charset/charset-iconv.c b/src/lib-charset/charset-iconv.c
new file mode 100644
index 0000000..7b29219
--- /dev/null
+++ b/src/lib-charset/charset-iconv.c
@@ -0,0 +1,147 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "charset-utf8-private.h"
+
+#ifdef HAVE_ICONV
+
+#include <iconv.h>
+#include <ctype.h>
+
+struct charset_translation {
+	iconv_t cd;
+	normalizer_func_t *normalizer;
+};
+
+static int
+iconv_charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
+			    struct charset_translation **t_r)
+{
+	struct charset_translation *t;
+	iconv_t cd;
+
+	if (charset_is_utf8(charset))
+		cd = (iconv_t)-1;
+	else {
+		if (strcmp(charset, "UTF-8//TEST") == 0)
+			charset = "UTF-8";
+		cd = iconv_open("UTF-8", charset);
+		if (cd == (iconv_t)-1)
+			return -1;
+	}
+
+	t = i_new(struct charset_translation, 1);
+	t->cd = cd;
+	t->normalizer = normalizer;
+	*t_r = t;
+	return 0;
+}
+
+static void iconv_charset_to_utf8_end(struct charset_translation *t)
+{
+	if (t->cd != (iconv_t)-1)
+		iconv_close(t->cd);
+	i_free(t);
+}
+
+static void iconv_charset_to_utf8_reset(struct charset_translation *t)
+{
+	if (t->cd != (iconv_t)-1)
+		(void)iconv(t->cd, NULL, NULL, NULL, NULL);
+}
+
+static bool
+charset_to_utf8_try(struct charset_translation *t,
+		    const unsigned char *src, size_t *src_size, buffer_t *dest,
+		    enum charset_result *result)
+{
+	ICONV_CONST char *ic_srcbuf;
+	char tmpbuf[8192], *ic_destbuf;
+	size_t srcleft, destleft, tmpbuf_used;
+	bool ret = TRUE;
+
+	if (t->cd == (iconv_t)-1) {
+		/* input is already supposed to be UTF-8 */
+		*result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
+		return TRUE;
+	}
+	destleft = sizeof(tmpbuf);
+	ic_destbuf = tmpbuf;
+	srcleft = *src_size;
+	ic_srcbuf = (ICONV_CONST char *) src;
+
+	if (iconv(t->cd, &ic_srcbuf, &srcleft,
+		  &ic_destbuf, &destleft) != SIZE_MAX) {
+		i_assert(srcleft == 0);
+		*result = CHARSET_RET_OK;
+	} else if (errno == E2BIG) {
+		/* set result just to avoid compiler warning */
+		*result = CHARSET_RET_INCOMPLETE_INPUT;
+		ret = FALSE;
+	} else if (errno == EINVAL) {
+		i_assert(srcleft <= CHARSET_MAX_PENDING_BUF_SIZE);
+		*result = CHARSET_RET_INCOMPLETE_INPUT;
+	} else {
+		/* should be EILSEQ */
+		*result = CHARSET_RET_INVALID_INPUT;
+		ret = FALSE;
+	}
+	*src_size -= srcleft;
+
+	/* we just converted data to UTF-8. it shouldn't be invalid, but
+	   Solaris iconv appears to pass invalid data through sometimes
+	   (e.g. 8 bit characters with UTF-7) */
+	tmpbuf_used = sizeof(tmpbuf) - destleft;
+	if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf,
+				 &tmpbuf_used, dest) != CHARSET_RET_OK)
+		*result = CHARSET_RET_INVALID_INPUT;
+	return ret;
+}
+
+static enum charset_result
+iconv_charset_to_utf8(struct charset_translation *t,
+		      const unsigned char *src, size_t *src_size,
+		      buffer_t *dest)
+{
+	enum charset_result result;
+	size_t pos, size;
+	size_t prev_invalid_pos = SIZE_MAX;
+	bool ret;
+
+	for (pos = 0;;) {
+		i_assert(pos <= *src_size);
+		size = *src_size - pos;
+		ret = charset_to_utf8_try(t, src + pos, &size, dest, &result);
+		pos += size;
+
+		if (ret)
+			break;
+
+		if (result == CHARSET_RET_INVALID_INPUT) {
+			if (prev_invalid_pos != dest->used) {
+				buffer_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8,
+					      strlen(UNICODE_REPLACEMENT_CHAR_UTF8));
+				prev_invalid_pos = dest->used;
+			}
+			if (pos < *src_size)
+				pos++;
+		}
+	}
+
+	if (prev_invalid_pos != SIZE_MAX)
+		result = CHARSET_RET_INVALID_INPUT;
+
+	i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
+	*src_size = pos;
+	return result;
+}
+
+const struct charset_utf8_vfuncs charset_iconv = {
+	.to_utf8_begin = iconv_charset_to_utf8_begin,
+	.to_utf8_end = iconv_charset_to_utf8_end,
+	.to_utf8_reset = iconv_charset_to_utf8_reset,
+	.to_utf8 = iconv_charset_to_utf8,
+};
+
+#endif