Adding upstream version 3.8.1.upstream/3.8.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-10 21:30:40 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-10 21:30:40 +0000
commit: 133a45c109da5310add55824db21af5239951f93 (patch)
tree: ba6ac4c0a950a0dda56451944315d66409923918 /src/libmime
parent: Initial commit. (diff)
download: rspamd-upstream.tar.xz
rspamd-upstream.zip
33 files changed, 21349 insertions, 0 deletions
diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt
new file mode 100644
index 0000000..09e5dbf
--- /dev/null
+++ b/src/libmime/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Librspamd mime
+SET(LIBRSPAMDMIMESRC
+		${CMAKE_CURRENT_SOURCE_DIR}/received.cxx
+				${CMAKE_CURRENT_SOURCE_DIR}/email_addr.c
+				${CMAKE_CURRENT_SOURCE_DIR}/mime_expressions.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/scan_result.c
+				${CMAKE_CURRENT_SOURCE_DIR}/images.c
+				${CMAKE_CURRENT_SOURCE_DIR}/message.c
+				${CMAKE_CURRENT_SOURCE_DIR}/archives.c
+				${CMAKE_CURRENT_SOURCE_DIR}/content_type.c
+				${CMAKE_CURRENT_SOURCE_DIR}/mime_headers.c
+				${CMAKE_CURRENT_SOURCE_DIR}/mime_parser.c
+				${CMAKE_CURRENT_SOURCE_DIR}/mime_encoding.c
+				${CMAKE_CURRENT_SOURCE_DIR}/lang_detection.c
+		${CMAKE_CURRENT_SOURCE_DIR}/lang_detection_fasttext.cxx
+		${CMAKE_CURRENT_SOURCE_DIR}/mime_string.cxx
+		)
+
+SET(RSPAMD_MIME ${LIBRSPAMDMIMESRC} PARENT_SCOPE)
+\ No newline at end of file
diff --git a/src/libmime/archives.c b/src/libmime/archives.c
new file mode 100644
index 0000000..ea0ea55
--- /dev/null
+++ b/src/libmime/archives.c
@@ -0,0 +1,2057 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "message.h"
+#include "task.h"
+#include "archives.h"
+#include "libmime/mime_encoding.h"
+#include <unicode/uchar.h>
+#include <unicode/utf8.h>
+#include <unicode/utf16.h>
+#include <unicode/ucnv.h>
+
+#define msg_debug_archive(...) rspamd_conditional_debug_fast(NULL, NULL,                                                 \
+															 rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \
+															 G_STRFUNC,                                                  \
+															 __VA_ARGS__)
+
+INIT_LOG_MODULE(archive)
+
+static void
+rspamd_archive_dtor(gpointer p)
+{
+	struct rspamd_archive *arch = p;
+	struct rspamd_archive_file *f;
+	guint i;
+
+	for (i = 0; i < arch->files->len; i++) {
+		f = g_ptr_array_index(arch->files, i);
+
+		if (f->fname) {
+			g_string_free(f->fname, TRUE);
+		}
+
+		g_free(f);
+	}
+
+	g_ptr_array_free(arch->files, TRUE);
+}
+
+static bool
+rspamd_archive_file_try_utf(struct rspamd_task *task,
+							struct rspamd_archive *arch,
+							struct rspamd_archive_file *fentry,
+							const gchar *in, gsize inlen)
+{
+	const gchar *charset = NULL, *p, *end;
+	GString *res;
+
+	charset = rspamd_mime_charset_find_by_content(in, inlen, TRUE);
+
+	if (charset) {
+		UChar *tmp;
+		UErrorCode uc_err = U_ZERO_ERROR;
+		gint32 r, clen, dlen;
+		struct rspamd_charset_converter *conv;
+		UConverter *utf8_converter;
+
+		conv = rspamd_mime_get_converter_cached(charset, task->task_pool,
+												TRUE, &uc_err);
+		utf8_converter = rspamd_get_utf8_converter();
+
+		if (conv == NULL) {
+			msg_info_task("cannot open converter for %s: %s",
+						  charset, u_errorName(uc_err));
+			fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
+			fentry->fname = g_string_new_len(in, inlen);
+
+			return false;
+		}
+
+		tmp = g_malloc(sizeof(*tmp) * (inlen + 1));
+		r = rspamd_converter_to_uchars(conv, tmp, inlen + 1,
+									   in, inlen, &uc_err);
+		if (!U_SUCCESS(uc_err)) {
+			msg_info_task("cannot convert data to unicode from %s: %s",
+						  charset, u_errorName(uc_err));
+			g_free(tmp);
+
+			fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
+			fentry->fname = g_string_new_len(in, inlen);
+
+			return NULL;
+		}
+
+		int i = 0;
+
+		while (i < r) {
+			UChar32 uc;
+
+			U16_NEXT(tmp, i, r, uc);
+
+			if (IS_ZERO_WIDTH_SPACE(uc) || u_iscntrl(uc)) {
+				msg_info_task("control character in archive file name found: 0x%02xd "
+							  "(filename=%T)",
+							  uc, arch->archive_name);
+				fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
+				break;
+			}
+		}
+
+		clen = ucnv_getMaxCharSize(utf8_converter);
+		dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen);
+		res = g_string_sized_new(dlen);
+		r = ucnv_fromUChars(utf8_converter, res->str, dlen, tmp, r, &uc_err);
+
+		if (!U_SUCCESS(uc_err)) {
+			msg_info_task("cannot convert data from unicode from %s: %s",
+						  charset, u_errorName(uc_err));
+			g_free(tmp);
+			g_string_free(res, TRUE);
+			fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
+			fentry->fname = g_string_new_len(in, inlen);
+
+			return NULL;
+		}
+
+		g_free(tmp);
+		res->len = r;
+
+		msg_debug_archive("converted from %s to UTF-8 inlen: %z, outlen: %d",
+						  charset, inlen, r);
+		fentry->fname = res;
+	}
+	else {
+		/* Convert unsafe characters to '?' */
+		res = g_string_sized_new(inlen);
+		p = in;
+		end = in + inlen;
+
+		while (p < end) {
+			if (g_ascii_isgraph(*p)) {
+				g_string_append_c(res, *p);
+			}
+			else {
+				g_string_append_c(res, '?');
+
+				if (*p < 0x7f && (g_ascii_iscntrl(*p) || *p == '\0')) {
+					if (!(fentry->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED)) {
+						msg_info_task("suspicious character in archive file name found: 0x%02xd "
+									  "(filename=%T)",
+									  (int) *p, arch->archive_name);
+						fentry->flags |= RSPAMD_ARCHIVE_FILE_OBFUSCATED;
+					}
+				}
+			}
+
+			p++;
+		}
+		fentry->fname = res;
+	}
+
+	return true;
+}
+
+static void
+rspamd_archive_process_zip(struct rspamd_task *task,
+						   struct rspamd_mime_part *part)
+{
+	const guchar *p, *start, *end, *eocd = NULL, *cd;
+	const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
+	const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
+	const guint max_processed = 1024;
+	guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
+	guint16 extra_len, fname_len, comment_len;
+	struct rspamd_archive *arch;
+	struct rspamd_archive_file *f = NULL;
+
+	/* Zip files have interesting data at the end of archive */
+	p = part->parsed_data.begin + part->parsed_data.len - 1;
+	start = part->parsed_data.begin;
+	end = p;
+
+	/* Search for EOCD:
+	 * 22 bytes is a typical size of eocd without a comment and
+	 * end points one byte after the last character
+	 */
+	p -= 21;
+
+	while (p > start + sizeof(guint32)) {
+		guint32 t;
+
+		if (processed > max_processed) {
+			break;
+		}
+
+		/* XXX: not an efficient approach */
+		memcpy(&t, p, sizeof(t));
+
+		if (GUINT32_FROM_LE(t) == eocd_magic) {
+			eocd = p;
+			break;
+		}
+
+		p--;
+		processed++;
+	}
+
+
+	if (eocd == NULL) {
+		/* Not a zip file */
+		msg_info_task("zip archive is invalid (no EOCD)");
+
+		return;
+	}
+
+	if (end - eocd < 21) {
+		msg_info_task("zip archive is invalid (short EOCD)");
+
+		return;
+	}
+
+
+	memcpy(&cd_size, eocd + 12, sizeof(cd_size));
+	cd_size = GUINT32_FROM_LE(cd_size);
+	memcpy(&cd_offset, eocd + 16, sizeof(cd_offset));
+	cd_offset = GUINT32_FROM_LE(cd_offset);
+
+	/* We need to check sanity as well */
+	if (cd_offset + cd_size > (guint) (eocd - start)) {
+		msg_info_task("zip archive is invalid (bad size/offset for CD)");
+
+		return;
+	}
+
+	cd = start + cd_offset;
+
+	arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
+	arch->files = g_ptr_array_new();
+	arch->type = RSPAMD_ARCHIVE_ZIP;
+	if (part->cd) {
+		arch->archive_name = &part->cd->filename;
+	}
+	rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
+								  arch);
+
+	while (cd < start + cd_offset + cd_size) {
+		guint16 flags;
+
+		/* Read central directory record */
+		if (eocd - cd < cd_basic_len ||
+			memcmp(cd, cd_magic, sizeof(cd_magic)) != 0) {
+			msg_info_task("zip archive is invalid (bad cd record)");
+
+			return;
+		}
+
+		memcpy(&flags, cd + 8, sizeof(guint16));
+		flags = GUINT16_FROM_LE(flags);
+		memcpy(&comp_size, cd + 20, sizeof(guint32));
+		comp_size = GUINT32_FROM_LE(comp_size);
+		memcpy(&uncomp_size, cd + 24, sizeof(guint32));
+		uncomp_size = GUINT32_FROM_LE(uncomp_size);
+		memcpy(&fname_len, cd + 28, sizeof(fname_len));
+		fname_len = GUINT16_FROM_LE(fname_len);
+		memcpy(&extra_len, cd + 30, sizeof(extra_len));
+		extra_len = GUINT16_FROM_LE(extra_len);
+		memcpy(&comment_len, cd + 32, sizeof(comment_len));
+		comment_len = GUINT16_FROM_LE(comment_len);
+
+		if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
+			msg_info_task("zip archive is invalid (too large cd record)");
+
+			return;
+		}
+
+		f = g_malloc0(sizeof(*f));
+		rspamd_archive_file_try_utf(task, arch, f, cd + cd_basic_len, fname_len);
+
+		f->compressed_size = comp_size;
+		f->uncompressed_size = uncomp_size;
+
+		if (flags & 0x41u) {
+			f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
+		}
+
+		if (f->fname) {
+			if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
+				arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
+			}
+
+			g_ptr_array_add(arch->files, f);
+			msg_debug_archive("found file in zip archive: %v", f->fname);
+		}
+		else {
+			g_free(f);
+
+			return;
+		}
+
+		/* Process extra fields */
+		const guchar *extra = cd + fname_len + cd_basic_len;
+		p = extra;
+
+		while (p + sizeof(guint16) * 2 < extra + extra_len) {
+			guint16 hid, hlen;
+
+			memcpy(&hid, p, sizeof(guint16));
+			hid = GUINT16_FROM_LE(hid);
+			memcpy(&hlen, p + sizeof(guint16), sizeof(guint16));
+			hlen = GUINT16_FROM_LE(hlen);
+
+			if (hid == 0x0017) {
+				f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
+			}
+
+			p += hlen + sizeof(guint16) * 2;
+		}
+
+		cd += fname_len + comment_len + extra_len + cd_basic_len;
+	}
+
+	part->part_type = RSPAMD_MIME_PART_ARCHIVE;
+	part->specific.arch = arch;
+
+	arch->size = part->parsed_data.len;
+}
+
+static inline gint
+rspamd_archive_rar_read_vint(const guchar *start, gsize remain, guint64 *res)
+{
+	/*
+	 * From http://www.rarlab.com/technote.htm:
+	 * Variable length integer. Can include one or more bytes, where
+	 * lower 7 bits of every byte contain integer data and highest bit
+	 * in every byte is the continuation flag.
+	 * If highest bit is 0, this is the last byte in sequence.
+	 * So first byte contains 7 least significant bits of integer and
+	 * continuation flag. Second byte, if present, contains next 7 bits and so on.
+	 */
+	guint64 t = 0;
+	guint shift = 0;
+	const guchar *p = start;
+
+	while (remain > 0 && shift <= 57) {
+		if (*p & 0x80) {
+			t |= ((guint64) (*p & 0x7f)) << shift;
+		}
+		else {
+			t |= ((guint64) (*p & 0x7f)) << shift;
+			p++;
+			break;
+		}
+
+		shift += 7;
+		p++;
+		remain--;
+	}
+
+	if (remain == 0 || shift > 64) {
+		return -1;
+	}
+
+	*res = GUINT64_FROM_LE(t);
+
+	return p - start;
+}
+
+#define RAR_SKIP_BYTES(n)                                                 \
+	do {                                                                  \
+		if ((n) <= 0) {                                                   \
+			msg_debug_archive("rar archive is invalid (bad skip value)"); \
+			return;                                                       \
+		}                                                                 \
+		if ((gsize) (end - p) < (n)) {                                    \
+			msg_debug_archive("rar archive is invalid (truncated)");      \
+			return;                                                       \
+		}                                                                 \
+		p += (n);                                                         \
+	} while (0)
+
+#define RAR_READ_VINT()                                                    \
+	do {                                                                   \
+		r = rspamd_archive_rar_read_vint(p, end - p, &vint);               \
+		if (r == -1) {                                                     \
+			msg_debug_archive("rar archive is invalid (bad vint)");        \
+			return;                                                        \
+		}                                                                  \
+		else if (r == 0) {                                                 \
+			msg_debug_archive("rar archive is invalid (BAD vint offset)"); \
+			return;                                                        \
+		}                                                                  \
+	} while (0)
+
+#define RAR_READ_VINT_SKIP()                                        \
+	do {                                                            \
+		r = rspamd_archive_rar_read_vint(p, end - p, &vint);        \
+		if (r == -1) {                                              \
+			msg_debug_archive("rar archive is invalid (bad vint)"); \
+			return;                                                 \
+		}                                                           \
+		p += r;                                                     \
+	} while (0)
+
+#define RAR_READ_UINT16(n)                                           \
+	do {                                                             \
+		if (end - p < (glong) sizeof(guint16)) {                     \
+			msg_debug_archive("rar archive is invalid (bad int16)"); \
+			return;                                                  \
+		}                                                            \
+		n = p[0] + (p[1] << 8);                                      \
+		p += sizeof(guint16);                                        \
+	} while (0)
+
+#define RAR_READ_UINT32(n)                                                                    \
+	do {                                                                                      \
+		if (end - p < (glong) sizeof(guint32)) {                                              \
+			msg_debug_archive("rar archive is invalid (bad int32)");                          \
+			return;                                                                           \
+		}                                                                                     \
+		n = (guint) p[0] + ((guint) p[1] << 8) + ((guint) p[2] << 16) + ((guint) p[3] << 24); \
+		p += sizeof(guint32);                                                                 \
+	} while (0)
+
+static void
+rspamd_archive_process_rar_v4(struct rspamd_task *task, const guchar *start,
+							  const guchar *end, struct rspamd_mime_part *part)
+{
+	const guchar *p = start, *start_section;
+	guint8 type;
+	guint flags;
+	guint64 sz, comp_sz = 0, uncomp_sz = 0;
+	struct rspamd_archive *arch;
+	struct rspamd_archive_file *f;
+
+	arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
+	arch->files = g_ptr_array_new();
+	arch->type = RSPAMD_ARCHIVE_RAR;
+	if (part->cd) {
+		arch->archive_name = &part->cd->filename;
+	}
+	rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
+								  arch);
+
+	while (p < end) {
+		/* Crc16 */
+		start_section = p;
+		RAR_SKIP_BYTES(sizeof(guint16));
+		type = *p;
+		p++;
+		RAR_READ_UINT16(flags);
+
+		if (type == 0x73) {
+			/* Main header, check for encryption */
+			if (flags & 0x80) {
+				arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
+				goto end;
+			}
+		}
+
+		RAR_READ_UINT16(sz);
+
+		if (flags & 0x8000) {
+			/* We also need to read ADD_SIZE element */
+			guint32 tmp;
+
+			RAR_READ_UINT32(tmp);
+			sz += tmp;
+			/* This is also used as PACK_SIZE */
+			comp_sz = tmp;
+		}
+
+		if (sz == 0) {
+			/* Zero sized block - error */
+			msg_debug_archive("rar archive is invalid (zero size block)");
+
+			return;
+		}
+
+		if (type == 0x74) {
+			guint fname_len;
+
+			/* File header */
+			/* Uncompressed size */
+			RAR_READ_UINT32(uncomp_sz);
+			/* Skip to NAME_SIZE element */
+			RAR_SKIP_BYTES(11);
+			RAR_READ_UINT16(fname_len);
+
+			if (fname_len == 0 || fname_len > (gsize) (end - p)) {
+				msg_debug_archive("rar archive is invalid (bad filename size: %d)",
+								  fname_len);
+
+				return;
+			}
+
+			/* Attrs */
+			RAR_SKIP_BYTES(4);
+
+			if (flags & 0x100) {
+				/* We also need to read HIGH_PACK_SIZE */
+				guint32 tmp;
+
+				RAR_READ_UINT32(tmp);
+				sz += tmp;
+				comp_sz += tmp;
+				/* HIGH_UNP_SIZE  */
+				RAR_READ_UINT32(tmp);
+				uncomp_sz += tmp;
+			}
+
+			f = g_malloc0(sizeof(*f));
+
+			if (flags & 0x200) {
+				/* We have unicode + normal version */
+				guchar *tmp;
+
+				tmp = memchr(p, '\0', fname_len);
+
+				if (tmp != NULL) {
+					/* Just use ASCII version */
+					rspamd_archive_file_try_utf(task, arch, f, p, tmp - p);
+					msg_debug_archive("found ascii filename in rarv4 archive: %v",
+									  f->fname);
+				}
+				else {
+					/* We have UTF8 filename, use it as is */
+					rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
+					msg_debug_archive("found utf filename in rarv4 archive: %v",
+									  f->fname);
+				}
+			}
+			else {
+				rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
+				msg_debug_archive("found ascii (old) filename in rarv4 archive: %v",
+								  f->fname);
+			}
+
+			f->compressed_size = comp_sz;
+			f->uncompressed_size = uncomp_sz;
+
+			if (flags & 0x4) {
+				f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
+			}
+
+			if (f->fname) {
+				if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
+					arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
+				}
+				g_ptr_array_add(arch->files, f);
+			}
+			else {
+				g_free(f);
+			}
+		}
+
+		p = start_section;
+		RAR_SKIP_BYTES(sz);
+	}
+
+end:
+	part->part_type = RSPAMD_MIME_PART_ARCHIVE;
+	part->specific.arch = arch;
+	arch->size = part->parsed_data.len;
+}
+
+static void
+rspamd_archive_process_rar(struct rspamd_task *task,
+						   struct rspamd_mime_part *part)
+{
+	const guchar *p, *end, *section_start;
+	const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
+				 rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
+	const guint rar_encrypted_header = 4, rar_main_header = 1,
+				rar_file_header = 2;
+	guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0,
+					  extra_sz = 0;
+	struct rspamd_archive *arch;
+	struct rspamd_archive_file *f;
+	gint r;
+
+	p = part->parsed_data.begin;
+	end = p + part->parsed_data.len;
+
+	if ((gsize) (end - p) <= sizeof(rar_v5_magic)) {
+		msg_debug_archive("rar archive is invalid (too small)");
+
+		return;
+	}
+
+	if (memcmp(p, rar_v5_magic, sizeof(rar_v5_magic)) == 0) {
+		p += sizeof(rar_v5_magic);
+	}
+	else if (memcmp(p, rar_v4_magic, sizeof(rar_v4_magic)) == 0) {
+		p += sizeof(rar_v4_magic);
+
+		rspamd_archive_process_rar_v4(task, p, end, part);
+		return;
+	}
+	else {
+		msg_debug_archive("rar archive is invalid (no rar magic)");
+
+		return;
+	}
+
+	/* Rar v5 format */
+	arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
+	arch->files = g_ptr_array_new();
+	arch->type = RSPAMD_ARCHIVE_RAR;
+	if (part->cd) {
+		arch->archive_name = &part->cd->filename;
+	}
+	rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
+								  arch);
+
+	/* Now we can have either encryption header or archive header */
+	/* Crc 32 */
+	RAR_SKIP_BYTES(sizeof(guint32));
+	/* Size */
+	RAR_READ_VINT_SKIP();
+	sz = vint;
+	/* Type */
+	section_start = p;
+	RAR_READ_VINT_SKIP();
+	type = vint;
+	/* Header flags */
+	RAR_READ_VINT_SKIP();
+	flags = vint;
+
+	if (flags & 0x1) {
+		/* Have extra zone */
+		RAR_READ_VINT_SKIP();
+	}
+	if (flags & 0x2) {
+		/* Data zone is presented */
+		RAR_READ_VINT_SKIP();
+		sz += vint;
+	}
+
+	if (type == rar_encrypted_header) {
+		/* We can't read any further information as archive is encrypted */
+		arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
+		goto end;
+	}
+	else if (type != rar_main_header) {
+		msg_debug_archive("rar archive is invalid (bad main header)");
+
+		return;
+	}
+
+	/* Nothing useful in main header */
+	p = section_start;
+	RAR_SKIP_BYTES(sz);
+
+	while (p < end) {
+		gboolean has_extra = FALSE;
+		/* Read the next header */
+		/* Crc 32 */
+		RAR_SKIP_BYTES(sizeof(guint32));
+		/* Size */
+		RAR_READ_VINT_SKIP();
+
+		sz = vint;
+		if (sz == 0) {
+			/* Zero sized block - error */
+			msg_debug_archive("rar archive is invalid (zero size block)");
+
+			return;
+		}
+
+		section_start = p;
+		/* Type */
+		RAR_READ_VINT_SKIP();
+		type = vint;
+		/* Header flags */
+		RAR_READ_VINT_SKIP();
+		flags = vint;
+
+		if (flags & 0x1) {
+			/* Have extra zone */
+			RAR_READ_VINT_SKIP();
+			extra_sz = vint;
+			has_extra = TRUE;
+		}
+
+		if (flags & 0x2) {
+			/* Data zone is presented */
+			RAR_READ_VINT_SKIP();
+			sz += vint;
+			comp_sz = vint;
+		}
+
+		if (type != rar_file_header) {
+			p = section_start;
+			RAR_SKIP_BYTES(sz);
+		}
+		else {
+			/* We have a file header, go forward */
+			guint64 fname_len;
+			bool is_directory = false;
+
+			/* File header specific flags */
+			RAR_READ_VINT_SKIP();
+			flags = vint;
+
+			/* Unpacked size */
+			RAR_READ_VINT_SKIP();
+			uncomp_sz = vint;
+			/* Attributes */
+			RAR_READ_VINT_SKIP();
+
+			if (flags & 0x2) {
+				/* Unix mtime */
+				RAR_SKIP_BYTES(sizeof(guint32));
+			}
+			if (flags & 0x4) {
+				/* Crc32 */
+				RAR_SKIP_BYTES(sizeof(guint32));
+			}
+			if (flags & 0x1) {
+				/* Ignore directories for sanity purposes */
+				is_directory = true;
+				msg_debug_archive("skip directory record in a rar archive");
+			}
+
+			if (!is_directory) {
+				/* Compression */
+				RAR_READ_VINT_SKIP();
+				/* Host OS */
+				RAR_READ_VINT_SKIP();
+				/* Filename length (finally!) */
+				RAR_READ_VINT_SKIP();
+				fname_len = vint;
+
+				if (fname_len == 0 || fname_len > (gsize) (end - p)) {
+					msg_debug_archive("rar archive is invalid (bad filename size)");
+
+					return;
+				}
+
+				f = g_malloc0(sizeof(*f));
+				f->uncompressed_size = uncomp_sz;
+				f->compressed_size = comp_sz;
+				rspamd_archive_file_try_utf(task, arch, f, p, fname_len);
+
+				if (f->fname) {
+					msg_debug_archive("added rarv5 file: %v", f->fname);
+					g_ptr_array_add(arch->files, f);
+					if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
+						arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
+					}
+				}
+				else {
+					g_free(f);
+					f = NULL;
+				}
+
+				if (f && has_extra && extra_sz > 0 &&
+					p + fname_len + extra_sz < end) {
+					/* Try to find encryption record in extra field */
+					const guchar *ex = p + fname_len;
+
+					while (ex < p + extra_sz) {
+						const guchar *t;
+						gint64 cur_sz = 0, sec_type = 0;
+
+						r = rspamd_archive_rar_read_vint(ex, extra_sz, &cur_sz);
+						if (r == -1) {
+							msg_debug_archive("rar archive is invalid (bad vint)");
+							return;
+						}
+
+						t = ex + r;
+
+						r = rspamd_archive_rar_read_vint(t, extra_sz - r, &sec_type);
+						if (r == -1) {
+							msg_debug_archive("rar archive is invalid (bad vint)");
+							return;
+						}
+
+						if (sec_type == 0x01) {
+							f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
+							arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
+							break;
+						}
+
+						ex += cur_sz;
+					}
+				}
+			}
+
+			/* Restore p to the beginning of the header */
+			p = section_start;
+			RAR_SKIP_BYTES(sz);
+		}
+	}
+
+end:
+	part->part_type = RSPAMD_MIME_PART_ARCHIVE;
+	part->specific.arch = arch;
+	arch->size = part->parsed_data.len;
+}
+
+static inline gint
+rspamd_archive_7zip_read_vint(const guchar *start, gsize remain, guint64 *res)
+{
+	/*
+	 * REAL_UINT64 means real UINT64.
+	 * UINT64 means real UINT64 encoded with the following scheme:
+	 *
+	 * Size of encoding sequence depends from first byte:
+	 * First_Byte  Extra_Bytes        Value
+	 * (binary)
+	 * 0xxxxxxx               : ( xxxxxxx           )
+	 * 10xxxxxx    BYTE y[1]  : (  xxxxxx << (8 * 1)) + y
+	 * 110xxxxx    BYTE y[2]  : (   xxxxx << (8 * 2)) + y
+	 * ...
+	 * 1111110x    BYTE y[6]  : (       x << (8 * 6)) + y
+	 * 11111110    BYTE y[7]  :                         y
+	 * 11111111    BYTE y[8]  :                         y
+	 */
+	guchar t;
+
+	if (remain == 0) {
+		return -1;
+	}
+
+	t = *start;
+
+	if (!isset(&t, 7)) {
+		/* Trivial case */
+		*res = t;
+		return 1;
+	}
+	else if (t == 0xFF) {
+		if (remain >= sizeof(guint64) + 1) {
+			memcpy(res, start + 1, sizeof(guint64));
+			*res = GUINT64_FROM_LE(*res);
+
+			return sizeof(guint64) + 1;
+		}
+	}
+	else {
+		gint cur_bit = 6, intlen = 1;
+		const guchar bmask = 0xFF;
+		guint64 tgt;
+
+		while (cur_bit > 0) {
+			if (!isset(&t, cur_bit)) {
+				if (remain >= intlen + 1) {
+					memcpy(&tgt, start + 1, intlen);
+					tgt = GUINT64_FROM_LE(tgt);
+					/* Shift back */
+					tgt >>= sizeof(tgt) - NBBY * intlen;
+					/* Add masked value */
+					tgt += (guint64) (t & (bmask >> (NBBY - cur_bit)))
+						   << (NBBY * intlen);
+					*res = tgt;
+
+					return intlen + 1;
+				}
+			}
+			cur_bit--;
+			intlen++;
+		}
+	}
+
+	return -1;
+}
+
+#define SZ_READ_VINT_SKIP()                                        \
+	do {                                                           \
+		r = rspamd_archive_7zip_read_vint(p, end - p, &vint);      \
+		if (r == -1) {                                             \
+			msg_debug_archive("7z archive is invalid (bad vint)"); \
+			return;                                                \
+		}                                                          \
+		p += r;                                                    \
+	} while (0)
+#define SZ_READ_VINT(var)                                                        \
+	do {                                                                         \
+		int r;                                                                   \
+		r = rspamd_archive_7zip_read_vint(p, end - p, &(var));                   \
+		if (r == -1) {                                                           \
+			msg_debug_archive("7z archive is invalid (bad vint): %s", G_STRLOC); \
+			return NULL;                                                         \
+		}                                                                        \
+		p += r;                                                                  \
+	} while (0)
+
+#define SZ_READ_UINT64(n)                                                            \
+	do {                                                                             \
+		if (end - p < (goffset) sizeof(guint64)) {                                   \
+			msg_debug_archive("7zip archive is invalid (bad uint64): %s", G_STRLOC); \
+			return;                                                                  \
+		}                                                                            \
+		memcpy(&(n), p, sizeof(guint64));                                            \
+		n = GUINT64_FROM_LE(n);                                                      \
+		p += sizeof(guint64);                                                        \
+	} while (0)
+#define SZ_SKIP_BYTES(n)                                                                                                                             \
+	do {                                                                                                                                             \
+		if (end - p >= (n)) {                                                                                                                        \
+			p += (n);                                                                                                                                \
+		}                                                                                                                                            \
+		else {                                                                                                                                       \
+			msg_debug_archive("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint) (n), (gint) (end - p), G_STRLOC); \
+			return NULL;                                                                                                                             \
+		}                                                                                                                                            \
+	} while (0)
+
+enum rspamd_7zip_header_mark {
+	kEnd = 0x00,
+	kHeader = 0x01,
+	kArchiveProperties = 0x02,
+	kAdditionalStreamsInfo = 0x03,
+	kMainStreamsInfo = 0x04,
+	kFilesInfo = 0x05,
+	kPackInfo = 0x06,
+	kUnPackInfo = 0x07,
+	kSubStreamsInfo = 0x08,
+	kSize = 0x09,
+	kCRC = 0x0A,
+	kFolder = 0x0B,
+	kCodersUnPackSize = 0x0C,
+	kNumUnPackStream = 0x0D,
+	kEmptyStream = 0x0E,
+	kEmptyFile = 0x0F,
+	kAnti = 0x10,
+	kName = 0x11,
+	kCTime = 0x12,
+	kATime = 0x13,
+	kMTime = 0x14,
+	kWinAttributes = 0x15,
+	kComment = 0x16,
+	kEncodedHeader = 0x17,
+	kStartPos = 0x18,
+	kDummy = 0x19,
+};
+
+
+#define _7Z_CRYPTO_MAIN_ZIP 0x06F10101        /* Main Zip crypto algo */
+#define _7Z_CRYPTO_RAR_29 0x06F10303          /* Rar29 AES-128 + (modified SHA-1) */
+#define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */
+
+#define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \
+								   ((codec_id) == _7Z_CRYPTO_RAR_29) ||   \
+								   ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256))
+
+static const guchar *
+rspamd_7zip_read_bits(struct rspamd_task *task,
+					  const guchar *p, const guchar *end,
+					  struct rspamd_archive *arch, guint nbits,
+					  guint *pbits_set)
+{
+	unsigned mask = 0, avail = 0, i;
+	gboolean bit_set = 0;
+
+	for (i = 0; i < nbits; i++) {
+		if (mask == 0) {
+			avail = *p;
+			SZ_SKIP_BYTES(1);
+			mask = 0x80;
+		}
+
+		bit_set = (avail & mask) ? 1 : 0;
+
+		if (bit_set && pbits_set) {
+			(*pbits_set)++;
+		}
+
+		mask >>= 1;
+	}
+
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_digest(struct rspamd_task *task,
+						const guchar *p, const guchar *end,
+						struct rspamd_archive *arch,
+						guint64 num_streams,
+						guint *pdigest_read)
+{
+	guchar all_defined = *p;
+	guint64 i;
+	guint num_defined = 0;
+	/*
+	 * BYTE AllAreDefined
+	 *  if (AllAreDefined == 0)
+	 *  {
+	 *    for(NumStreams)
+	 *    BIT Defined
+	 *  }
+	 *  UINT32 CRCs[NumDefined]
+	 */
+	SZ_SKIP_BYTES(1);
+
+	if (all_defined) {
+		num_defined = num_streams;
+	}
+	else {
+		if (num_streams > 8192) {
+			/* Gah */
+			return NULL;
+		}
+
+		p = rspamd_7zip_read_bits(task, p, end, arch, num_streams, &num_defined);
+
+		if (p == NULL) {
+			return NULL;
+		}
+	}
+
+	for (i = 0; i < num_defined; i++) {
+		SZ_SKIP_BYTES(sizeof(guint32));
+	}
+
+	if (pdigest_read) {
+		*pdigest_read = num_defined;
+	}
+
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_pack_info(struct rspamd_task *task,
+						   const guchar *p, const guchar *end,
+						   struct rspamd_archive *arch)
+{
+	guint64 pack_pos = 0, pack_streams = 0, i, cur_sz;
+	guint num_digests = 0;
+	guchar t;
+	/*
+	 *  UINT64 PackPos
+	 *  UINT64 NumPackStreams
+	 *
+	 *  []
+	 *  BYTE NID::kSize    (0x09)
+	 *  UINT64 PackSizes[NumPackStreams]
+	 *  []
+	 *
+	 *  []
+	 *  BYTE NID::kCRC      (0x0A)
+	 *  PackStreamDigests[NumPackStreams]
+	 *  []
+	 *  BYTE NID::kEnd
+	 */
+
+	SZ_READ_VINT(pack_pos);
+	SZ_READ_VINT(pack_streams);
+
+	while (p != NULL && p < end) {
+		t = *p;
+		SZ_SKIP_BYTES(1);
+		msg_debug_archive("7zip: read pack info %xc", t);
+
+		switch (t) {
+		case kSize:
+			/* We need to skip pack_streams VINTS */
+			for (i = 0; i < pack_streams; i++) {
+				SZ_READ_VINT(cur_sz);
+			}
+			break;
+		case kCRC:
+			/* CRCs are more complicated */
+			p = rspamd_7zip_read_digest(task, p, end, arch, pack_streams,
+										&num_digests);
+			break;
+		case kEnd:
+			goto end;
+			break;
+		default:
+			p = NULL;
+			msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
+			goto end;
+			break;
+		}
+	}
+
+end:
+
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_folder(struct rspamd_task *task,
+						const guchar *p, const guchar *end,
+						struct rspamd_archive *arch, guint *pnstreams, guint *ndigests)
+{
+	guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0;
+
+	SZ_READ_VINT(ncoders);
+
+	for (i = 0; i < ncoders && p != NULL && p < end; i++) {
+		guint64 sz, tmp;
+		guchar t;
+		/*
+		 * BYTE
+		 * {
+		 *   0:3 CodecIdSize
+		 *   4:  Is Complex Coder
+		 *   5:  There Are Attributes
+		 *   6:  Reserved
+		 *   7:  There are more alternative methods. (Not used anymore, must be 0).
+		 * }
+		 * BYTE CodecId[CodecIdSize]
+		 * if (Is Complex Coder)
+		 * {
+		 *   UINT64 NumInStreams;
+		 *   UINT64 NumOutStreams;
+		 * }
+		 * if (There Are Attributes)
+		 * {
+		 *   UINT64 PropertiesSize
+		 *   BYTE Properties[PropertiesSize]
+		 * }
+		 */
+		t = *p;
+		SZ_SKIP_BYTES(1);
+		sz = t & 0xF;
+		/* Codec ID */
+		tmp = 0;
+		for (j = 0; j < sz; j++) {
+			tmp <<= 8;
+			tmp += p[j];
+		}
+
+		msg_debug_archive("7zip: read codec id: %L", tmp);
+
+		if (IS_SZ_ENCRYPTED(tmp)) {
+			arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
+		}
+
+		SZ_SKIP_BYTES(sz);
+
+		if (t & (1u << 4)) {
+			/* Complex */
+			SZ_READ_VINT(tmp); /* InStreams */
+			ninstreams += tmp;
+			SZ_READ_VINT(tmp); /* OutStreams */
+			noutstreams += tmp;
+		}
+		else {
+			/* XXX: is it correct ? */
+			noutstreams++;
+			ninstreams++;
+		}
+		if (t & (1u << 5)) {
+			/* Attributes ... */
+			SZ_READ_VINT(tmp); /* Size of attrs */
+			SZ_SKIP_BYTES(tmp);
+		}
+	}
+
+	if (noutstreams > 1) {
+		/* BindPairs, WTF, huh */
+		for (i = 0; i < noutstreams - 1; i++) {
+			guint64 tmp;
+
+			SZ_READ_VINT(tmp);
+			SZ_READ_VINT(tmp);
+		}
+	}
+
+	gint64 npacked = (gint64) ninstreams - (gint64) noutstreams + 1;
+	msg_debug_archive("7zip: instreams=%L, outstreams=%L, packed=%L",
+					  ninstreams, noutstreams, npacked);
+
+	if (npacked > 1) {
+		/* Gah... */
+		for (i = 0; i < npacked; i++) {
+			guint64 tmp;
+
+			SZ_READ_VINT(tmp);
+		}
+	}
+
+	*pnstreams = noutstreams;
+	(*ndigests) += npacked;
+
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_coders_info(struct rspamd_task *task,
+							 const guchar *p, const guchar *end,
+							 struct rspamd_archive *arch,
+							 guint *pnum_folders, guint *pnum_nodigest)
+{
+	guint64 num_folders = 0, i, tmp;
+	guchar t;
+	guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0;
+
+	while (p != NULL && p < end) {
+		/*
+		 * BYTE NID::kFolder  (0x0B)
+		 *  UINT64 NumFolders
+		 *  BYTE External
+		 *  switch(External)
+		 *  {
+		 * 	case 0:
+		 * 	  Folders[NumFolders]
+		 * 	case 1:
+		 * 	  UINT64 DataStreamIndex
+		 *   }
+		 *   BYTE ID::kCodersUnPackSize  (0x0C)
+		 *   for(Folders)
+		 * 	for(Folder.NumOutStreams)
+		 * 	 UINT64 UnPackSize;
+		 *   []
+		 *   BYTE NID::kCRC   (0x0A)
+		 *   UnPackDigests[NumFolders]
+		 *   []
+		 *   BYTE NID::kEnd
+		 */
+
+		t = *p;
+		SZ_SKIP_BYTES(1);
+		msg_debug_archive("7zip: read coders info %xc", t);
+
+		switch (t) {
+		case kFolder:
+			SZ_READ_VINT(num_folders);
+			msg_debug_archive("7zip: nfolders=%L", num_folders);
+
+			if (*p != 0) {
+				/* External folders */
+				SZ_SKIP_BYTES(1);
+				SZ_READ_VINT(tmp);
+			}
+			else {
+				SZ_SKIP_BYTES(1);
+
+				if (num_folders > 8192) {
+					/* Gah */
+					return NULL;
+				}
+
+				if (folder_nstreams) {
+					g_free(folder_nstreams);
+				}
+
+				folder_nstreams = g_malloc(sizeof(int) * num_folders);
+
+				for (i = 0; i < num_folders && p != NULL && p < end; i++) {
+					p = rspamd_7zip_read_folder(task, p, end, arch,
+												&folder_nstreams[i], &num_digests);
+				}
+			}
+			break;
+		case kCodersUnPackSize:
+			for (i = 0; i < num_folders && p != NULL && p < end; i++) {
+				if (folder_nstreams) {
+					for (guint j = 0; j < folder_nstreams[i]; j++) {
+						SZ_READ_VINT(tmp); /* Unpacked size */
+						msg_debug_archive("7zip: unpacked size "
+										  "(folder=%d, stream=%d) = %L",
+										  (gint) i, j, tmp);
+					}
+				}
+				else {
+					msg_err_task("internal 7zip error");
+				}
+			}
+			break;
+		case kCRC:
+			/*
+			 * Here are dragons. Spec tells that here there could be up
+			 * to nfolders digests. However, according to the actual source
+			 * code, in case of multiple out streams there should be digests
+			 * for all out streams.
+			 *
+			 * In the real life (tm) it is even more idiotic: all these digests
+			 * are in another section! But that section needs number of digests
+			 * that are absent here. It is the most stupid thing I've ever seen
+			 * in any file format.
+			 *
+			 * I hope there *WAS* some reason to do such shit...
+			 */
+			p = rspamd_7zip_read_digest(task, p, end, arch, num_digests,
+										&digests_read);
+			break;
+		case kEnd:
+			goto end;
+			break;
+		default:
+			p = NULL;
+			msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
+			goto end;
+			break;
+		}
+	}
+
+end:
+
+	if (pnum_nodigest) {
+		*pnum_nodigest = num_digests - digests_read;
+	}
+	if (pnum_folders) {
+		*pnum_folders = num_folders;
+	}
+
+	if (folder_nstreams) {
+		g_free(folder_nstreams);
+	}
+
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_substreams_info(struct rspamd_task *task,
+								 const guchar *p, const guchar *end,
+								 struct rspamd_archive *arch,
+								 guint num_folders, guint num_nodigest)
+{
+	guchar t;
+	guint i;
+	guint64 *folder_nstreams;
+
+	if (num_folders > 8192) {
+		/* Gah */
+		return NULL;
+	}
+
+	folder_nstreams = g_alloca(sizeof(guint64) * num_folders);
+	memset(folder_nstreams, 0, sizeof(guint64) * num_folders);
+
+	while (p != NULL && p < end) {
+		/*
+		 * []
+		 *  BYTE NID::kNumUnPackStream; (0x0D)
+		 *  UINT64 NumUnPackStreamsInFolders[NumFolders];
+		 *  []
+		 *
+		 *  []
+		 *  BYTE NID::kSize  (0x09)
+		 *  UINT64 UnPackSizes[??]
+		 *  []
+		 *
+		 *
+		 *  []
+		 *  BYTE NID::kCRC  (0x0A)
+		 *  Digests[Number of streams with unknown CRC]
+		 *  []
+
+		 */
+		t = *p;
+		SZ_SKIP_BYTES(1);
+
+		msg_debug_archive("7zip: read substream info %xc", t);
+
+		switch (t) {
+		case kNumUnPackStream:
+			for (i = 0; i < num_folders; i++) {
+				guint64 tmp;
+
+				SZ_READ_VINT(tmp);
+				folder_nstreams[i] = tmp;
+			}
+			break;
+		case kCRC:
+			/*
+			 * Read the comment in the rspamd_7zip_read_coders_info
+			 */
+			p = rspamd_7zip_read_digest(task, p, end, arch, num_nodigest,
+										NULL);
+			break;
+		case kSize:
+			/*
+			 * Another brain damaged logic, but we have to support it
+			 * as there are no ways to proceed without it.
+			 * In fact, it is just absent in the real life...
+			 */
+			for (i = 0; i < num_folders; i++) {
+				for (guint j = 0; j < folder_nstreams[i]; j++) {
+					guint64 tmp;
+
+					SZ_READ_VINT(tmp); /* Who cares indeed */
+				}
+			}
+			break;
+		case kEnd:
+			goto end;
+			break;
+		default:
+			p = NULL;
+			msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
+			goto end;
+			break;
+		}
+	}
+
+end:
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_main_streams_info(struct rspamd_task *task,
+								   const guchar *p, const guchar *end,
+								   struct rspamd_archive *arch)
+{
+	guchar t;
+	guint num_folders = 0, unknown_digests = 0;
+
+	while (p != NULL && p < end) {
+		t = *p;
+		SZ_SKIP_BYTES(1);
+		msg_debug_archive("7zip: read main streams info %xc", t);
+
+		/*
+		 *
+		 *  []
+		 *  PackInfo
+		 *  []
+
+		 *  []
+		 *  CodersInfo
+		 *  []
+		 *
+		 *  []
+		 *  SubStreamsInfo
+		 *  []
+		 *
+		 *  BYTE NID::kEnd
+		 */
+		switch (t) {
+		case kPackInfo:
+			p = rspamd_7zip_read_pack_info(task, p, end, arch);
+			break;
+		case kUnPackInfo:
+			p = rspamd_7zip_read_coders_info(task, p, end, arch, &num_folders,
+											 &unknown_digests);
+			break;
+		case kSubStreamsInfo:
+			p = rspamd_7zip_read_substreams_info(task, p, end, arch, num_folders,
+												 unknown_digests);
+			break;
+			break;
+		case kEnd:
+			goto end;
+			break;
+		default:
+			p = NULL;
+			msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
+			goto end;
+			break;
+		}
+	}
+
+end:
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_archive_props(struct rspamd_task *task,
+							   const guchar *p, const guchar *end,
+							   struct rspamd_archive *arch)
+{
+	guchar proptype;
+	guint64 proplen;
+
+	/*
+	 * for (;;)
+	 * {
+	 *   BYTE PropertyType;
+	 *   if (aType == 0)
+	 *     break;
+	 *   UINT64 PropertySize;
+	 *   BYTE PropertyData[PropertySize];
+	 * }
+	 */
+
+	if (p != NULL) {
+		proptype = *p;
+		SZ_SKIP_BYTES(1);
+
+		while (proptype != 0) {
+			SZ_READ_VINT(proplen);
+
+			if (p + proplen < end) {
+				p += proplen;
+			}
+			else {
+				return NULL;
+			}
+
+			proptype = *p;
+			SZ_SKIP_BYTES(1);
+		}
+	}
+
+	return p;
+}
+
+static GString *
+rspamd_7zip_ucs2_to_utf8(struct rspamd_task *task, const guchar *p,
+						 const guchar *end)
+{
+	GString *res;
+	goffset dest_pos = 0, src_pos = 0;
+	const gsize len = (end - p) / sizeof(guint16);
+	guint16 *up;
+	UChar32 wc;
+	UBool is_error = 0;
+
+	res = g_string_sized_new((end - p) * 3 / 2 + sizeof(wc) + 1);
+	up = (guint16 *) p;
+
+	while (src_pos < len) {
+		U16_NEXT(up, src_pos, len, wc);
+
+		if (wc > 0) {
+			U8_APPEND(res->str, dest_pos,
+					  res->allocated_len - 1,
+					  wc, is_error);
+		}
+
+		if (is_error) {
+			g_string_free(res, TRUE);
+
+			return NULL;
+		}
+	}
+
+	g_assert(dest_pos < res->allocated_len);
+
+	res->len = dest_pos;
+	res->str[dest_pos] = '\0';
+
+	return res;
+}
+
+static const guchar *
+rspamd_7zip_read_files_info(struct rspamd_task *task,
+							const guchar *p, const guchar *end,
+							struct rspamd_archive *arch)
+{
+	guint64 nfiles = 0, sz, i;
+	guchar t, b;
+	struct rspamd_archive_file *fentry;
+
+	SZ_READ_VINT(nfiles);
+
+	for (; p != NULL && p < end;) {
+		t = *p;
+		SZ_SKIP_BYTES(1);
+
+		msg_debug_archive("7zip: read file data type %xc", t);
+
+		if (t == kEnd) {
+			goto end;
+		}
+
+		/* This is SO SPECIAL, gah */
+		SZ_READ_VINT(sz);
+
+		switch (t) {
+		case kEmptyStream:
+		case kEmptyFile:
+		case kAnti: /* AntiFile, OMFG */
+					/* We don't care about these bits */
+		case kCTime:
+		case kATime:
+		case kMTime:
+			/* We don't care of these guys, but we still have to parse them, gah */
+			if (sz > 0) {
+				SZ_SKIP_BYTES(sz);
+			}
+			break;
+		case kName:
+			/* The most useful part in this whole bloody format */
+			b = *p; /* External flag */
+			SZ_SKIP_BYTES(1);
+
+			if (b) {
+				/* TODO: for the god sake, do something about external
+				 * filenames...
+				 */
+				guint64 tmp;
+
+				SZ_READ_VINT(tmp);
+			}
+			else {
+				for (i = 0; i < nfiles; i++) {
+					/* Zero terminated wchar_t: happy converting... */
+					/* First, find terminator */
+					const guchar *fend = NULL, *tp = p;
+					GString *res;
+
+					while (tp < end - 1) {
+						if (*tp == 0 && *(tp + 1) == 0) {
+							fend = tp;
+							break;
+						}
+
+						tp += 2;
+					}
+
+					if (fend == NULL || fend - p == 0) {
+						/* Crap instead of fname */
+						msg_debug_archive("bad 7zip name; %s", G_STRLOC);
+						goto end;
+					}
+
+					res = rspamd_7zip_ucs2_to_utf8(task, p, fend);
+
+					if (res != NULL) {
+						fentry = g_malloc0(sizeof(*fentry));
+						fentry->fname = res;
+						g_ptr_array_add(arch->files, fentry);
+						msg_debug_archive("7zip: found file %v", res);
+					}
+					else {
+						msg_debug_archive("bad 7zip name; %s", G_STRLOC);
+					}
+					/* Skip zero terminating character */
+					p = fend + 2;
+				}
+			}
+			break;
+		case kDummy:
+		case kWinAttributes:
+			if (sz > 0) {
+				SZ_SKIP_BYTES(sz);
+			}
+			break;
+		default:
+			p = NULL;
+			msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
+			goto end;
+			break;
+		}
+	}
+
+end:
+	return p;
+}
+
+static const guchar *
+rspamd_7zip_read_next_section(struct rspamd_task *task,
+							  const guchar *p, const guchar *end,
+							  struct rspamd_archive *arch)
+{
+	guchar t = *p;
+
+	SZ_SKIP_BYTES(1);
+
+	msg_debug_archive("7zip: read section %xc", t);
+
+	switch (t) {
+	case kHeader:
+		/* We just skip byte and go further */
+		break;
+	case kEncodedHeader:
+		/*
+		 * In fact, headers are just packed, but we assume it as
+		 * encrypted to distinguish from the normal archives
+		 */
+		msg_debug_archive("7zip: encoded header, needs to be uncompressed");
+		arch->flags |= RSPAMD_ARCHIVE_CANNOT_READ;
+		p = NULL; /* Cannot get anything useful */
+		break;
+	case kArchiveProperties:
+		p = rspamd_7zip_read_archive_props(task, p, end, arch);
+		break;
+	case kMainStreamsInfo:
+		p = rspamd_7zip_read_main_streams_info(task, p, end, arch);
+		break;
+	case kAdditionalStreamsInfo:
+		p = rspamd_7zip_read_main_streams_info(task, p, end, arch);
+		break;
+	case kFilesInfo:
+		p = rspamd_7zip_read_files_info(task, p, end, arch);
+		break;
+	case kEnd:
+		p = NULL;
+		msg_debug_archive("7zip: read final section");
+		break;
+	default:
+		p = NULL;
+		msg_debug_archive("bad 7zip type: %xc; %s", t, G_STRLOC);
+		break;
+	}
+
+	return p;
+}
+
+static void
+rspamd_archive_process_7zip(struct rspamd_task *task,
+							struct rspamd_mime_part *part)
+{
+	struct rspamd_archive *arch;
+	const guchar *start, *p, *end;
+	const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
+	guint64 section_offset = 0, section_length = 0;
+
+	start = part->parsed_data.begin;
+	p = start;
+	end = p + part->parsed_data.len;
+
+	if (end - p <= sizeof(guint64) + sizeof(guint32) ||
+		memcmp(p, sz_magic, sizeof(sz_magic)) != 0) {
+		msg_debug_archive("7z archive is invalid (no 7z magic)");
+
+		return;
+	}
+
+	arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
+	arch->files = g_ptr_array_new();
+	arch->type = RSPAMD_ARCHIVE_7ZIP;
+	rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
+								  arch);
+
+	/* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
+	p += sizeof(guint64) + sizeof(guint32);
+
+	SZ_READ_UINT64(section_offset);
+	SZ_READ_UINT64(section_length);
+
+	if (end - p > sizeof(guint32)) {
+		p += sizeof(guint32);
+	}
+	else {
+		msg_debug_archive("7z archive is invalid (truncated crc)");
+
+		return;
+	}
+
+	if (end - p > section_offset) {
+		p += section_offset;
+	}
+	else {
+		msg_debug_archive("7z archive is invalid (incorrect section offset)");
+
+		return;
+	}
+
+	while ((p = rspamd_7zip_read_next_section(task, p, end, arch)) != NULL)
+		;
+
+	part->part_type = RSPAMD_MIME_PART_ARCHIVE;
+	part->specific.arch = arch;
+	if (part->cd != NULL) {
+		arch->archive_name = &part->cd->filename;
+	}
+	arch->size = part->parsed_data.len;
+}
+
+static void
+rspamd_archive_process_gzip(struct rspamd_task *task,
+							struct rspamd_mime_part *part)
+{
+	struct rspamd_archive *arch;
+	const guchar *start, *p, *end;
+	const guchar gz_magic[] = {0x1F, 0x8B};
+	guchar flags;
+
+	start = part->parsed_data.begin;
+	p = start;
+	end = p + part->parsed_data.len;
+
+	if (end - p <= 10 || memcmp(p, gz_magic, sizeof(gz_magic)) != 0) {
+		msg_debug_archive("gzip archive is invalid (no gzip magic)");
+
+		return;
+	}
+
+	arch = rspamd_mempool_alloc0(task->task_pool, sizeof(*arch));
+	arch->files = g_ptr_array_sized_new(1);
+	arch->type = RSPAMD_ARCHIVE_GZIP;
+	if (part->cd) {
+		arch->archive_name = &part->cd->filename;
+	}
+	rspamd_mempool_add_destructor(task->task_pool, rspamd_archive_dtor,
+								  arch);
+
+	flags = p[3];
+
+	if (flags & (1u << 5)) {
+		arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
+	}
+
+	if (flags & (1u << 3)) {
+		/* We have file name presented in archive, try to use it */
+		if (flags & (1u << 1)) {
+			/* Multipart */
+			p += 12;
+		}
+		else {
+			p += 10;
+		}
+
+		if (flags & (1u << 2)) {
+			/* Optional section */
+			guint16 optlen = 0;
+
+			RAR_READ_UINT16(optlen);
+
+			if (end <= p + optlen) {
+				msg_debug_archive("gzip archive is invalid, bad extra length: %d",
+								  (int) optlen);
+
+				return;
+			}
+
+			p += optlen;
+		}
+
+		/* Read file name */
+		const guchar *fname_start = p;
+
+		while (p < end) {
+			if (*p == '\0') {
+				if (p > fname_start) {
+					struct rspamd_archive_file *f;
+
+					f = g_malloc0(sizeof(*f));
+
+					rspamd_archive_file_try_utf(task, arch, f,
+												fname_start, p - fname_start);
+
+					if (f->fname) {
+						g_ptr_array_add(arch->files, f);
+
+						if (f->flags & RSPAMD_ARCHIVE_FILE_OBFUSCATED) {
+							arch->flags |= RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES;
+						}
+					}
+					else {
+						/* Invalid filename, skip */
+						g_free(f);
+					}
+
+					goto set;
+				}
+			}
+
+			p++;
+		}
+
+		/* Wrong filename, not zero terminated */
+		msg_debug_archive("gzip archive is invalid, bad filename at pos %d",
+						  (int) (p - start));
+
+		return;
+	}
+
+	/* Fallback, we need to extract file name from archive name if possible */
+	if (part->cd && part->cd->filename.len > 0) {
+		const gchar *dot_pos, *slash_pos;
+
+		dot_pos = rspamd_memrchr(part->cd->filename.begin, '.',
+								 part->cd->filename.len);
+
+		if (dot_pos) {
+			struct rspamd_archive_file *f;
+
+			slash_pos = rspamd_memrchr(part->cd->filename.begin, '/',
+									   part->cd->filename.len);
+
+			if (slash_pos && slash_pos < dot_pos) {
+				f = g_malloc0(sizeof(*f));
+				f->fname = g_string_sized_new(dot_pos - slash_pos);
+				g_string_append_len(f->fname, slash_pos + 1,
+									dot_pos - slash_pos - 1);
+
+				msg_debug_archive("fallback to gzip filename based on cd: %v",
+								  f->fname);
+
+				g_ptr_array_add(arch->files, f);
+
+				goto set;
+			}
+			else {
+				const gchar *fname_start = part->cd->filename.begin;
+
+				f = g_malloc0(sizeof(*f));
+
+				if (memchr(fname_start, '.', part->cd->filename.len) != dot_pos) {
+					/* Double dots, something like foo.exe.gz */
+					f->fname = g_string_sized_new(dot_pos - fname_start);
+					g_string_append_len(f->fname, fname_start,
+										dot_pos - fname_start);
+				}
+				else {
+					/* Single dot, something like foo.gzz */
+					f->fname = g_string_sized_new(part->cd->filename.len);
+					g_string_append_len(f->fname, fname_start,
+										part->cd->filename.len);
+				}
+
+				msg_debug_archive("fallback to gzip filename based on cd: %v",
+								  f->fname);
+
+				g_ptr_array_add(arch->files, f);
+
+				goto set;
+			}
+		}
+	}
+
+	return;
+
+set:
+	/* Set archive data */
+	part->part_type = RSPAMD_MIME_PART_ARCHIVE;
+	part->specific.arch = arch;
+	arch->size = part->parsed_data.len;
+}
+
+static gboolean
+rspamd_archive_cheat_detect(struct rspamd_mime_part *part, const gchar *str,
+							const guchar *magic_start, gsize magic_len)
+{
+	struct rspamd_content_type *ct;
+	const gchar *p;
+	rspamd_ftok_t srch, *fname;
+
+	ct = part->ct;
+	RSPAMD_FTOK_ASSIGN(&srch, "application");
+
+	if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp(&ct->type, &srch) == 0) {
+		if (rspamd_substring_search_caseless(ct->subtype.begin, ct->subtype.len,
+											 str, strlen(str)) != -1) {
+			/* We still need to check magic, see #1848 */
+			if (magic_start != NULL) {
+				if (part->parsed_data.len > magic_len &&
+					memcmp(part->parsed_data.begin,
+						   magic_start, magic_len) == 0) {
+					return TRUE;
+				}
+				/* No magic, refuse this type of archive */
+				return FALSE;
+			}
+			else {
+				return TRUE;
+			}
+		}
+	}
+
+	if (part->cd) {
+		fname = &part->cd->filename;
+
+		if (fname && fname->len > strlen(str)) {
+			p = fname->begin + fname->len - strlen(str);
+
+			if (rspamd_lc_cmp(p, str, strlen(str)) == 0) {
+				if (*(p - 1) == '.') {
+					if (magic_start != NULL) {
+						if (part->parsed_data.len > magic_len &&
+							memcmp(part->parsed_data.begin,
+								   magic_start, magic_len) == 0) {
+							return TRUE;
+						}
+						/* No magic, refuse this type of archive */
+						return FALSE;
+					}
+
+					return TRUE;
+				}
+			}
+		}
+
+		if (magic_start != NULL) {
+			if (part->parsed_data.len > magic_len &&
+				memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) {
+				return TRUE;
+			}
+		}
+	}
+	else {
+		if (magic_start != NULL) {
+			if (part->parsed_data.len > magic_len &&
+				memcmp(part->parsed_data.begin, magic_start, magic_len) == 0) {
+				return TRUE;
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+void rspamd_archives_process(struct rspamd_task *task)
+{
+	guint i;
+	struct rspamd_mime_part *part;
+	const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
+	const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
+	const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
+	const guchar gz_magic[] = {0x1F, 0x8B, 0x08};
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
+			if (part->parsed_data.len > 0) {
+				if (rspamd_archive_cheat_detect(part, "zip",
+												zip_magic, sizeof(zip_magic))) {
+					rspamd_archive_process_zip(task, part);
+				}
+				else if (rspamd_archive_cheat_detect(part, "rar",
+													 rar_magic, sizeof(rar_magic))) {
+					rspamd_archive_process_rar(task, part);
+				}
+				else if (rspamd_archive_cheat_detect(part, "7z",
+													 sz_magic, sizeof(sz_magic))) {
+					rspamd_archive_process_7zip(task, part);
+				}
+				else if (rspamd_archive_cheat_detect(part, "gz",
+													 gz_magic, sizeof(gz_magic))) {
+					rspamd_archive_process_gzip(task, part);
+				}
+
+				if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
+					part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
+					part->specific.arch) {
+					struct rspamd_archive *arch = part->specific.arch;
+
+					msg_info_task("found %s archive with incorrect content-type: %T/%T",
+								  rspamd_archive_type_str(arch->type),
+								  &part->ct->type, &part->ct->subtype);
+
+					if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
+						part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+					}
+				}
+			}
+		}
+	}
+}
+
+
+const gchar *
+rspamd_archive_type_str(enum rspamd_archive_type type)
+{
+	const gchar *ret = "unknown";
+
+	switch (type) {
+	case RSPAMD_ARCHIVE_ZIP:
+		ret = "zip";
+		break;
+	case RSPAMD_ARCHIVE_RAR:
+		ret = "rar";
+		break;
+	case RSPAMD_ARCHIVE_7ZIP:
+		ret = "7z";
+		break;
+	case RSPAMD_ARCHIVE_GZIP:
+		ret = "gz";
+		break;
+	}
+
+	return ret;
+}
diff --git a/src/libmime/archives.h b/src/libmime/archives.h
new file mode 100644
index 0000000..56beb62
--- /dev/null
+++ b/src/libmime/archives.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_ARCHIVES_H_
+#define SRC_LIBMIME_ARCHIVES_H_
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_archive_type {
+	RSPAMD_ARCHIVE_ZIP,
+	RSPAMD_ARCHIVE_RAR,
+	RSPAMD_ARCHIVE_7ZIP,
+	RSPAMD_ARCHIVE_GZIP,
+};
+
+enum rspamd_archive_flags {
+	RSPAMD_ARCHIVE_ENCRYPTED = (1u << 0u),
+	RSPAMD_ARCHIVE_CANNOT_READ = (1u << 1u),
+	RSPAMD_ARCHIVE_HAS_OBFUSCATED_FILES = (1u << 2u),
+};
+
+enum rspamd_archive_file_flags {
+	RSPAMD_ARCHIVE_FILE_ENCRYPTED = (1u << 0u),
+	RSPAMD_ARCHIVE_FILE_OBFUSCATED = (1u << 1u),
+};
+
+struct rspamd_archive_file {
+	GString *fname;
+	gsize compressed_size;
+	gsize uncompressed_size;
+	enum rspamd_archive_file_flags flags;
+};
+
+struct rspamd_archive {
+	enum rspamd_archive_type type;
+	const rspamd_ftok_t *archive_name;
+	gsize size;
+	enum rspamd_archive_flags flags;
+	GPtrArray *files; /* Array of struct rspamd_archive_file */
+};
+
+/**
+ * Process archives from a worker task
+ */
+void rspamd_archives_process(struct rspamd_task *task);
+
+/**
+ * Get textual representation of an archive's type
+ */
+const gchar *rspamd_archive_type_str(enum rspamd_archive_type type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_ARCHIVES_H_ */
diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
new file mode 100644
index 0000000..765cb87
--- /dev/null
+++ b/src/libmime/content_type.c
@@ -0,0 +1,884 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libmime/content_type.h"
+#include "smtp_parsers.h"
+#include "utlist.h"
+#include "libserver/url.h"
+#include "libmime/mime_encoding.h"
+
+static gboolean
+rspamd_rfc2231_decode(rspamd_mempool_t *pool,
+					  struct rspamd_content_type_param *param,
+					  gchar *value_start, gchar *value_end)
+{
+	gchar *quote_pos;
+
+	quote_pos = memchr(value_start, '\'', value_end - value_start);
+
+	if (quote_pos == NULL) {
+		/* Plain percent encoding */
+		gsize r = rspamd_url_decode(value_start, value_start,
+									value_end - value_start);
+		param->value.begin = value_start;
+		param->value.len = r;
+	}
+	else {
+		/*
+		 * We can have encoding'language'data, or
+		 * encoding'data (in theory).
+		 * Try to handle both...
+		 */
+		const gchar *charset = NULL;
+		rspamd_ftok_t ctok;
+
+		ctok.begin = value_start;
+		ctok.len = quote_pos - value_start;
+
+		if (ctok.len > 0) {
+			charset = rspamd_mime_detect_charset(&ctok, pool);
+		}
+
+		/* Now, we can check for either next quote sign or, eh, ignore that */
+		value_start = quote_pos + 1;
+
+		quote_pos = memchr(value_start, '\'', value_end - value_start);
+
+		if (quote_pos) {
+			/* Ignore language */
+			value_start = quote_pos + 1;
+		}
+
+		/* Perform percent decoding */
+		gsize r = rspamd_url_decode(value_start, value_start,
+									value_end - value_start);
+		GError *err = NULL;
+
+		if (charset == NULL) {
+			/* Try heuristic */
+			charset = rspamd_mime_charset_find_by_content(value_start, r, TRUE);
+		}
+
+		if (charset == NULL) {
+			msg_warn_pool("cannot convert parameter from charset %T", &ctok);
+
+			return FALSE;
+		}
+
+		param->value.begin = rspamd_mime_text_to_utf8(pool,
+													  value_start, r,
+													  charset, &param->value.len, &err);
+
+		if (param->value.begin == NULL) {
+			msg_warn_pool("cannot convert parameter from charset %s: %e",
+						  charset, err);
+
+			if (err) {
+				g_error_free(err);
+			}
+
+			return FALSE;
+		}
+	}
+
+	param->flags |= RSPAMD_CONTENT_PARAM_RFC2231;
+
+	return TRUE;
+}
+
+static gboolean
+rspamd_param_maybe_rfc2231_process(rspamd_mempool_t *pool,
+								   struct rspamd_content_type_param *param,
+								   gchar *name_start, gchar *name_end,
+								   gchar *value_start, gchar *value_end)
+{
+	const gchar *star_pos;
+
+	star_pos = memchr(name_start, '*', name_end - name_start);
+
+	if (star_pos == NULL) {
+		return FALSE;
+	}
+
+	/* We have three possibilities here:
+	 * 1. name* (just name + 2231 encoding)
+	 * 2. name*(\d+) (piecewise stuff but no rfc2231 encoding)
+	 * 3. name*(\d+)* (piecewise stuff and rfc2231 encoding)
+	 */
+
+	if (star_pos == name_end - 1) {
+		/* First */
+		if (rspamd_rfc2231_decode(pool, param, value_start, value_end)) {
+			param->name.begin = name_start;
+			param->name.len = name_end - name_start - 1;
+		}
+	}
+	else if (*(name_end - 1) == '*') {
+		/* Third */
+		/* Check number */
+		gulong tmp;
+
+		if (!rspamd_strtoul(star_pos + 1, name_end - star_pos - 2, &tmp)) {
+			return FALSE;
+		}
+
+		param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE | RSPAMD_CONTENT_PARAM_RFC2231;
+		param->rfc2231_id = tmp;
+		param->name.begin = name_start;
+		param->name.len = star_pos - name_start;
+		param->value.begin = value_start;
+		param->value.len = value_end - value_start;
+
+		/* Deal with that later... */
+	}
+	else {
+		/* Second case */
+		gulong tmp;
+
+		if (!rspamd_strtoul(star_pos + 1, name_end - star_pos - 1, &tmp)) {
+			return FALSE;
+		}
+
+		param->flags |= RSPAMD_CONTENT_PARAM_PIECEWISE;
+		param->rfc2231_id = tmp;
+		param->name.begin = name_start;
+		param->name.len = star_pos - name_start;
+		param->value.begin = value_start;
+		param->value.len = value_end - value_start;
+	}
+
+	return TRUE;
+}
+
+static gint32
+rspamd_cmp_pieces(struct rspamd_content_type_param *p1, struct rspamd_content_type_param *p2)
+{
+	return p1->rfc2231_id - p2->rfc2231_id;
+}
+
+static void
+rspamd_postprocess_ct_attributes(rspamd_mempool_t *pool,
+								 GHashTable *htb,
+								 void (*proc)(rspamd_mempool_t *, struct rspamd_content_type_param *, gpointer ud),
+								 gpointer procd)
+{
+	GHashTableIter it;
+	gpointer k, v;
+	struct rspamd_content_type_param *param, *sorted, *cur;
+
+	if (htb == NULL) {
+		return;
+	}
+
+	g_hash_table_iter_init(&it, htb);
+
+	while (g_hash_table_iter_next(&it, &k, &v)) {
+		param = (struct rspamd_content_type_param *) v;
+
+		if (param->flags & RSPAMD_CONTENT_PARAM_PIECEWISE) {
+			/* Reconstruct param */
+			gsize tlen = 0;
+			gchar *ndata, *pos;
+
+			sorted = param;
+			DL_SORT(sorted, rspamd_cmp_pieces);
+
+			DL_FOREACH(sorted, cur)
+			{
+				tlen += cur->value.len;
+			}
+
+			ndata = rspamd_mempool_alloc(pool, tlen);
+			pos = ndata;
+
+			DL_FOREACH(sorted, cur)
+			{
+				memcpy(pos, cur->value.begin, cur->value.len);
+				pos += cur->value.len;
+			}
+
+			if (param->flags & RSPAMD_CONTENT_PARAM_RFC2231) {
+				if (!rspamd_rfc2231_decode(pool, param,
+										   ndata, pos)) {
+					param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
+					param->value.begin = ndata;
+					param->value.len = tlen;
+				}
+			}
+			else {
+				param->value.begin = ndata;
+				param->value.len = tlen;
+			}
+
+			/* Detach from list */
+			param->next = NULL;
+			param->prev = param;
+		}
+
+		gboolean invalid_utf = FALSE;
+
+		if (param->value.begin != NULL && param->value.len > 0) {
+			param->value.begin = rspamd_mime_header_decode(pool, param->value.begin,
+														   param->value.len, &invalid_utf);
+			param->value.len = strlen(param->value.begin);
+		}
+
+		if (invalid_utf) {
+			param->flags |= RSPAMD_CONTENT_PARAM_BROKEN;
+		}
+
+		proc(pool, param, procd);
+	}
+}
+
+static void
+rspamd_content_type_postprocess(rspamd_mempool_t *pool,
+								struct rspamd_content_type_param *param,
+								gpointer ud)
+{
+	rspamd_ftok_t srch;
+	struct rspamd_content_type_param *found = NULL;
+
+	struct rspamd_content_type *ct = (struct rspamd_content_type *) ud;
+
+	RSPAMD_FTOK_ASSIGN(&srch, "charset");
+
+	if (rspamd_ftok_icase_equal(&param->name, &srch)) {
+		/* Adjust charset */
+		found = param;
+		ct->charset.begin = param->value.begin;
+		ct->charset.len = param->value.len;
+	}
+
+	RSPAMD_FTOK_ASSIGN(&srch, "boundary");
+
+	if (rspamd_ftok_icase_equal(&param->name, &srch)) {
+		found = param;
+		gchar *lc_boundary;
+		/* Adjust boundary */
+		lc_boundary = rspamd_mempool_alloc(pool, param->value.len);
+		memcpy(lc_boundary, param->value.begin, param->value.len);
+		rspamd_str_lc(lc_boundary, param->value.len);
+		ct->boundary.begin = lc_boundary;
+		ct->boundary.len = param->value.len;
+		/* Preserve original (case sensitive) boundary */
+		ct->orig_boundary.begin = param->value.begin;
+		ct->orig_boundary.len = param->value.len;
+	}
+
+	if (!found) {
+		RSPAMD_FTOK_ASSIGN(&srch, "name");
+		if (!rspamd_ftok_icase_equal(&param->name, &srch)) {
+			/* Just lowercase */
+			rspamd_str_lc_utf8((gchar *) param->value.begin, param->value.len);
+		}
+	}
+}
+
+static void
+rspamd_content_disposition_postprocess(rspamd_mempool_t *pool,
+									   struct rspamd_content_type_param *param,
+									   gpointer ud)
+{
+	rspamd_ftok_t srch;
+	struct rspamd_content_disposition *cd = (struct rspamd_content_disposition *) ud;
+
+	srch.begin = "filename";
+	srch.len = 8;
+
+	if (rspamd_ftok_icase_equal(&param->name, &srch)) {
+		/* Adjust filename */
+		cd->filename.begin = param->value.begin;
+		cd->filename.len = param->value.len;
+	}
+}
+
+void rspamd_content_type_add_param(rspamd_mempool_t *pool,
+								   struct rspamd_content_type *ct,
+								   gchar *name_start, gchar *name_end,
+								   gchar *value_start, gchar *value_end)
+{
+	struct rspamd_content_type_param *nparam;
+	rspamd_ftok_t srch;
+	struct rspamd_content_type_param *found = NULL;
+
+	g_assert(ct != NULL);
+
+	nparam = rspamd_mempool_alloc0(pool, sizeof(*nparam));
+	rspamd_str_lc(name_start, name_end - name_start);
+
+	if (!rspamd_param_maybe_rfc2231_process(pool, nparam, name_start,
+											name_end, value_start, value_end)) {
+		nparam->name.begin = name_start;
+		nparam->name.len = name_end - name_start;
+		nparam->value.begin = value_start;
+		nparam->value.len = value_end - value_start;
+	}
+
+	srch.begin = nparam->name.begin;
+	srch.len = nparam->name.len;
+
+	if (ct->attrs) {
+		found = g_hash_table_lookup(ct->attrs, &srch);
+	}
+	else {
+		ct->attrs = g_hash_table_new(rspamd_ftok_icase_hash,
+									 rspamd_ftok_icase_equal);
+	}
+
+	if (!found) {
+		DL_APPEND(found, nparam);
+		g_hash_table_insert(ct->attrs, &nparam->name, nparam);
+	}
+	else {
+		DL_APPEND(found, nparam);
+	}
+}
+
+static struct rspamd_content_type *
+rspamd_content_type_parser(gchar *in, gsize len, rspamd_mempool_t *pool)
+{
+	guint obraces = 0, ebraces = 0, qlen = 0;
+	gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
+	struct rspamd_content_type *res = NULL, val;
+	gboolean eqsign_seen = FALSE;
+	enum {
+		parse_type,
+		parse_subtype,
+		parse_after_subtype,
+		parse_param_name,
+		parse_param_after_name,
+		parse_param_value,
+		parse_param_value_after_quote,
+		parse_space,
+		parse_quoted,
+		parse_comment,
+	} state = parse_space,
+	  next_state = parse_type;
+
+	p = in;
+	c = p;
+	end = p + len;
+	memset(&val, 0, sizeof(val));
+	val.cpy = in;
+
+	while (p < end) {
+		switch (state) {
+		case parse_type:
+			if (g_ascii_isspace(*p) || *p == ';') {
+				/* We have type without subtype */
+				val.type.begin = c;
+				val.type.len = p - c;
+				state = parse_after_subtype;
+			}
+			else if (*p == '/') {
+				val.type.begin = c;
+				val.type.len = p - c;
+				state = parse_space;
+				next_state = parse_subtype;
+				p++;
+			}
+			else {
+				p++;
+			}
+			break;
+		case parse_subtype:
+			if (g_ascii_isspace(*p) || *p == ';') {
+				val.subtype.begin = c;
+				val.subtype.len = p - c;
+				state = parse_after_subtype;
+			}
+			else {
+				p++;
+			}
+			break;
+		case parse_after_subtype:
+			if (*p == ';' || g_ascii_isspace(*p)) {
+				p++;
+			}
+			else if (*p == '(') {
+				c = p;
+				state = parse_comment;
+				next_state = parse_param_name;
+				obraces = 1;
+				ebraces = 0;
+				pname_start = NULL;
+				pname_end = NULL;
+				eqsign_seen = FALSE;
+				p++;
+			}
+			else {
+				c = p;
+				state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+				eqsign_seen = FALSE;
+			}
+			break;
+		case parse_param_name:
+			if (*p == '=') {
+				pname_start = c;
+				pname_end = p;
+				state = parse_param_after_name;
+				eqsign_seen = TRUE;
+				p++;
+			}
+			else if (g_ascii_isspace(*p)) {
+				pname_start = c;
+				pname_end = p;
+				state = parse_param_after_name;
+			}
+			else {
+				p++;
+			}
+			break;
+		case parse_param_after_name:
+			if (g_ascii_isspace(*p)) {
+				p++;
+			}
+			else if (*p == '=') {
+				if (eqsign_seen) {
+					/* Treat as value start */
+					c = p;
+					eqsign_seen = FALSE;
+					state = parse_param_value;
+					p++;
+				}
+				else {
+					eqsign_seen = TRUE;
+					p++;
+				}
+			}
+			else {
+				if (eqsign_seen) {
+					state = parse_param_value;
+					c = p;
+				}
+				else {
+					/* Invalid parameter without value */
+					c = p;
+					state = parse_param_name;
+					pname_start = NULL;
+					pname_end = NULL;
+				}
+			}
+			break;
+		case parse_param_value:
+			if (*p == '"') {
+				p++;
+				c = p;
+				state = parse_quoted;
+				next_state = parse_param_value_after_quote;
+			}
+			else if (g_ascii_isspace(*p)) {
+				if (pname_start && pname_end && pname_end > pname_start) {
+					rspamd_content_type_add_param(pool, &val, pname_start,
+												  pname_end, c, p);
+				}
+
+				state = parse_space;
+				next_state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+			}
+			else if (*p == '(') {
+				if (pname_start && pname_end && pname_end > pname_start) {
+					rspamd_content_type_add_param(pool, &val, pname_start,
+												  pname_end, c, p);
+				}
+
+				obraces = 1;
+				ebraces = 0;
+				p++;
+				state = parse_comment;
+				next_state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+			}
+			else if (*p == ';') {
+				if (pname_start && pname_end && pname_end > pname_start) {
+					rspamd_content_type_add_param(pool, &val, pname_start,
+												  pname_end, c, p);
+				}
+
+				p++;
+				state = parse_space;
+				next_state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+			}
+			else {
+				p++;
+			}
+			break;
+		case parse_param_value_after_quote:
+			if (pname_start && pname_end && pname_end > pname_start) {
+				rspamd_content_type_add_param(pool, &val, pname_start,
+											  pname_end, c, c + qlen);
+			}
+
+			if (*p == '"') {
+				p++;
+
+				if (p == end) {
+					/* Last quote: done... */
+					state = parse_space;
+					break;
+				}
+
+				if (*p == ';') {
+					p++;
+					state = parse_space;
+					next_state = parse_param_name;
+					pname_start = NULL;
+					pname_end = NULL;
+					continue;
+				}
+			}
+
+			/* We should not normally be here in fact */
+			if (g_ascii_isspace(*p)) {
+				state = parse_space;
+				next_state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+			}
+			else if (*p == '(') {
+				obraces = 1;
+				ebraces = 0;
+				p++;
+				state = parse_comment;
+				next_state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+			}
+			else {
+				state = parse_param_name;
+				pname_start = NULL;
+				pname_end = NULL;
+				c = p;
+			}
+			break;
+		case parse_quoted:
+			if (*p == '\\') {
+				/* Quoted pair */
+				if (p + 1 < end) {
+					p += 2;
+				}
+				else {
+					p++;
+				}
+			}
+			else if (*p == '"') {
+				qlen = p - c;
+				state = next_state;
+			}
+			else {
+				p++;
+			}
+			break;
+		case parse_comment:
+			if (*p == '(') {
+				obraces++;
+				p++;
+			}
+			else if (*p == ')') {
+				ebraces++;
+				p++;
+
+				if (ebraces == obraces && p < end) {
+					if (g_ascii_isspace(*p)) {
+						state = parse_space;
+					}
+					else {
+						c = p;
+						state = next_state;
+					}
+				}
+			}
+			else {
+				p++;
+			}
+			break;
+		case parse_space:
+			if (g_ascii_isspace(*p)) {
+				p++;
+			}
+			else if (*p == '(') {
+				obraces = 1;
+				ebraces = 0;
+				p++;
+				state = parse_comment;
+			}
+			else {
+				c = p;
+				state = next_state;
+			}
+			break;
+		}
+	}
+
+	/* Process leftover */
+	switch (state) {
+	case parse_type:
+		val.type.begin = c;
+		val.type.len = p - c;
+		break;
+	case parse_subtype:
+		val.subtype.begin = c;
+		val.subtype.len = p - c;
+		break;
+	case parse_param_value:
+		if (pname_start && pname_end && pname_end > pname_start) {
+			if (p > c && *(p - 1) == ';') {
+				p--;
+			}
+
+			rspamd_content_type_add_param(pool, &val, pname_start,
+										  pname_end, c, p);
+		}
+		break;
+	case parse_param_value_after_quote:
+		if (pname_start && pname_end && pname_end > pname_start) {
+			rspamd_content_type_add_param(pool, &val, pname_start,
+										  pname_end, c, c + qlen);
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (val.type.len > 0) {
+		gchar *tmp;
+
+		res = rspamd_mempool_alloc(pool, sizeof(val));
+		memcpy(res, &val, sizeof(val));
+
+		/*
+		 * Lowercase type and subtype as they are specified as case insensitive
+		 * in rfc2045 section 5.1
+		 */
+		tmp = rspamd_mempool_alloc(pool, val.type.len);
+		memcpy(tmp, val.type.begin, val.type.len);
+		rspamd_str_lc(tmp, val.type.len);
+		res->type.begin = tmp;
+
+		if (val.subtype.len > 0) {
+			tmp = rspamd_mempool_alloc(pool, val.subtype.len);
+			memcpy(tmp, val.subtype.begin, val.subtype.len);
+			rspamd_str_lc(tmp, val.subtype.len);
+			res->subtype.begin = tmp;
+		}
+	}
+
+	return res;
+}
+
+struct rspamd_content_type *
+rspamd_content_type_parse(const gchar *in,
+						  gsize len, rspamd_mempool_t *pool)
+{
+	struct rspamd_content_type *res = NULL;
+	rspamd_ftok_t srch;
+	gchar *cpy;
+
+	cpy = rspamd_mempool_alloc(pool, len + 1);
+	rspamd_strlcpy(cpy, in, len + 1);
+
+	if ((res = rspamd_content_type_parser(cpy, len, pool)) != NULL) {
+		if (res->attrs) {
+			rspamd_mempool_add_destructor(pool,
+										  (rspamd_mempool_destruct_t) g_hash_table_unref, res->attrs);
+
+			rspamd_postprocess_ct_attributes(pool, res->attrs,
+											 rspamd_content_type_postprocess, res);
+		}
+
+		/* Now do some hacks to work with broken content types */
+		if (res->subtype.len == 0) {
+			res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+			RSPAMD_FTOK_ASSIGN(&srch, "text");
+
+			if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
+				/* Workaround for Content-Type: text */
+				/* Assume text/plain */
+				RSPAMD_FTOK_ASSIGN(&srch, "plain");
+			}
+			else {
+				RSPAMD_FTOK_ASSIGN(&srch, "html");
+
+				if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
+					/* Workaround for Content-Type: html */
+					RSPAMD_FTOK_ASSIGN(&res->type, "text");
+					RSPAMD_FTOK_ASSIGN(&res->subtype, "html");
+				}
+				else {
+					RSPAMD_FTOK_ASSIGN(&srch, "application");
+
+					if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
+						RSPAMD_FTOK_ASSIGN(&res->subtype, "octet-stream");
+					}
+				}
+			}
+		}
+		else {
+			/* Common mistake done by retards */
+			RSPAMD_FTOK_ASSIGN(&srch, "alternate");
+
+			if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) {
+				res->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+				RSPAMD_FTOK_ASSIGN(&res->subtype, "alternative");
+			}
+
+			/* PKCS7 smime */
+			RSPAMD_FTOK_ASSIGN(&srch, "pkcs7-mime");
+			if (rspamd_substring_search(res->subtype.begin, res->subtype.len,
+										srch.begin, srch.len) != -1) {
+				res->flags |= RSPAMD_CONTENT_TYPE_SMIME;
+			}
+		}
+
+		RSPAMD_FTOK_ASSIGN(&srch, "multipart");
+
+		if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
+			res->flags |= RSPAMD_CONTENT_TYPE_MULTIPART;
+
+			RSPAMD_FTOK_ASSIGN(&srch, "encrypted");
+			if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) {
+				res->flags |= RSPAMD_CONTENT_TYPE_ENCRYPTED;
+			}
+		}
+		else {
+			RSPAMD_FTOK_ASSIGN(&srch, "text");
+
+			if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
+				res->flags |= RSPAMD_CONTENT_TYPE_TEXT;
+			}
+			else {
+				RSPAMD_FTOK_ASSIGN(&srch, "message");
+
+				if (rspamd_ftok_casecmp(&res->type, &srch) == 0) {
+					RSPAMD_FTOK_ASSIGN(&srch, "delivery-status");
+
+					if (rspamd_ftok_casecmp(&res->subtype, &srch) == 0) {
+						res->flags |= RSPAMD_CONTENT_TYPE_TEXT | RSPAMD_CONTENT_TYPE_DSN;
+					}
+					else {
+						RSPAMD_FTOK_ASSIGN(&srch, "notification");
+
+						if (rspamd_substring_search_caseless(res->subtype.begin,
+															 res->subtype.len, srch.begin, srch.len) != -1) {
+							res->flags |= RSPAMD_CONTENT_TYPE_TEXT |
+										  RSPAMD_CONTENT_TYPE_DSN;
+						}
+						else {
+							res->flags |= RSPAMD_CONTENT_TYPE_MESSAGE;
+						}
+					}
+				}
+			}
+		}
+	}
+	else {
+		msg_warn_pool("cannot parse content type: %*s", (gint) len, cpy);
+	}
+
+	return res;
+}
+
+void rspamd_content_disposition_add_param(rspamd_mempool_t *pool,
+										  struct rspamd_content_disposition *cd,
+										  const gchar *name_start, const gchar *name_end,
+										  const gchar *value_start, const gchar *value_end)
+{
+	rspamd_ftok_t srch;
+	gchar *name_cpy, *value_cpy, *name_cpy_end, *value_cpy_end;
+	struct rspamd_content_type_param *found = NULL, *nparam;
+
+	g_assert(cd != NULL);
+
+	name_cpy = rspamd_mempool_alloc(pool, name_end - name_start);
+	memcpy(name_cpy, name_start, name_end - name_start);
+	name_cpy_end = name_cpy + (name_end - name_start);
+
+	value_cpy = rspamd_mempool_alloc(pool, value_end - value_start);
+	memcpy(value_cpy, value_start, value_end - value_start);
+	value_cpy_end = value_cpy + (value_end - value_start);
+
+	nparam = rspamd_mempool_alloc0(pool, sizeof(*nparam));
+	rspamd_str_lc(name_cpy, name_cpy_end - name_cpy);
+
+	if (!rspamd_param_maybe_rfc2231_process(pool, nparam, name_cpy,
+											name_cpy_end, value_cpy, value_cpy_end)) {
+		nparam->name.begin = name_cpy;
+		nparam->name.len = name_cpy_end - name_cpy;
+		nparam->value.begin = value_cpy;
+		nparam->value.len = value_cpy_end - value_cpy;
+	}
+
+	srch.begin = nparam->name.begin;
+	srch.len = nparam->name.len;
+
+	if (cd->attrs) {
+		found = g_hash_table_lookup(cd->attrs, &srch);
+	}
+	else {
+		cd->attrs = g_hash_table_new(rspamd_ftok_icase_hash,
+									 rspamd_ftok_icase_equal);
+	}
+
+	if (!found) {
+		DL_APPEND(found, nparam);
+		g_hash_table_insert(cd->attrs, &nparam->name, nparam);
+	}
+	else {
+		DL_APPEND(found, nparam);
+	}
+}
+
+struct rspamd_content_disposition *
+rspamd_content_disposition_parse(const gchar *in,
+								 gsize len, rspamd_mempool_t *pool)
+{
+	struct rspamd_content_disposition *res = NULL, val;
+
+	if (rspamd_content_disposition_parser(in, len, &val, pool)) {
+
+		if (val.type == RSPAMD_CT_UNKNOWN) {
+			/* 'Fix' type to attachment as MUA does */
+			val.type = RSPAMD_CT_ATTACHMENT;
+		}
+
+		res = rspamd_mempool_alloc(pool, sizeof(val));
+		memcpy(res, &val, sizeof(val));
+		res->lc_data = rspamd_mempool_alloc(pool, len + 1);
+		rspamd_strlcpy(res->lc_data, in, len + 1);
+		rspamd_str_lc(res->lc_data, len);
+
+		if (res->attrs) {
+			rspamd_postprocess_ct_attributes(pool, res->attrs,
+											 rspamd_content_disposition_postprocess, res);
+			rspamd_mempool_add_destructor(pool,
+										  (rspamd_mempool_destruct_t) g_hash_table_unref, res->attrs);
+		}
+	}
+	else {
+		msg_warn_pool("cannot parse content disposition: %*s",
+					  (gint) len, in);
+	}
+
+	return res;
+}
diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h
new file mode 100644
index 0000000..ac49bdc
--- /dev/null
+++ b/src/libmime/content_type.h
@@ -0,0 +1,130 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_CONTENT_TYPE_H_
+#define SRC_LIBMIME_CONTENT_TYPE_H_
+
+#include "config.h"
+#include "libutil/fstring.h"
+#include "libutil/mem_pool.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rspamd_content_type_flags {
+	RSPAMD_CONTENT_TYPE_VALID = 0,
+	RSPAMD_CONTENT_TYPE_BROKEN = 1 << 0,
+	RSPAMD_CONTENT_TYPE_MULTIPART = 1 << 1,
+	RSPAMD_CONTENT_TYPE_TEXT = 1 << 2,
+	RSPAMD_CONTENT_TYPE_MESSAGE = 1 << 3,
+	RSPAMD_CONTENT_TYPE_DSN = 1 << 4,
+	RSPAMD_CONTENT_TYPE_MISSING = 1 << 5,
+	RSPAMD_CONTENT_TYPE_ENCRYPTED = 1 << 6,
+	RSPAMD_CONTENT_TYPE_SMIME = 1 << 7,
+};
+
+enum rspamd_content_param_flags {
+	RSPAMD_CONTENT_PARAM_NORMAL = 0,
+	RSPAMD_CONTENT_PARAM_RFC2231 = (1 << 0),
+	RSPAMD_CONTENT_PARAM_PIECEWISE = (1 << 1),
+	RSPAMD_CONTENT_PARAM_BROKEN = (1 << 2),
+};
+
+struct rspamd_content_type_param {
+	rspamd_ftok_t name;
+	rspamd_ftok_t value;
+	guint rfc2231_id;
+	enum rspamd_content_param_flags flags;
+	struct rspamd_content_type_param *prev, *next;
+};
+
+struct rspamd_content_type {
+	gchar *cpy;
+	rspamd_ftok_t type;
+	rspamd_ftok_t subtype;
+	rspamd_ftok_t charset;
+	rspamd_ftok_t boundary;
+	rspamd_ftok_t orig_boundary;
+	enum rspamd_content_type_flags flags;
+	GHashTable *attrs; /* Can be empty */
+};
+
+enum rspamd_content_disposition_type {
+	RSPAMD_CT_UNKNOWN = 0,
+	RSPAMD_CT_INLINE = 1,
+	RSPAMD_CT_ATTACHMENT = 2,
+};
+
+struct rspamd_content_disposition {
+	gchar *lc_data;
+	enum rspamd_content_disposition_type type;
+	rspamd_ftok_t filename;
+	GHashTable *attrs; /* Can be empty */
+};
+
+/**
+ * Adds new parameter to content type structure
+ * @param ct
+ * @param name_start (can be modified)
+ * @param name_end
+ * @param value_start (can be modified)
+ * @param value_end
+ */
+void rspamd_content_type_add_param(rspamd_mempool_t *pool,
+								   struct rspamd_content_type *ct,
+								   gchar *name_start, gchar *name_end,
+								   gchar *value_start, gchar *value_end);
+
+/**
+ * Parse content type from the header (performs copy + lowercase)
+ * @param in
+ * @param len
+ * @param pool
+ * @return
+ */
+struct rspamd_content_type *rspamd_content_type_parse(const gchar *in,
+													  gsize len, rspamd_mempool_t *pool);
+
+/**
+ * Adds new param for content disposition header
+ * @param pool
+ * @param cd
+ * @param name_start
+ * @param name_end
+ * @param value_start
+ * @param value_end
+ */
+void rspamd_content_disposition_add_param(rspamd_mempool_t *pool,
+										  struct rspamd_content_disposition *cd,
+										  const gchar *name_start, const gchar *name_end,
+										  const gchar *value_start, const gchar *value_end);
+
+/**
+ * Parse content-disposition header
+ * @param in
+ * @param len
+ * @param pool
+ * @return
+ */
+struct rspamd_content_disposition *rspamd_content_disposition_parse(const gchar *in,
+																	gsize len,
+																	rspamd_mempool_t *pool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_CONTENT_TYPE_H_ */
diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c
new file mode 100644
index 0000000..0af7388
--- /dev/null
+++ b/src/libmime/email_addr.c
@@ -0,0 +1,563 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "email_addr.h"
+#include "message.h"
+#include "printf.h"
+#include "smtp_parsers.h"
+
+static void
+rspamd_email_address_unescape(struct rspamd_email_address *addr)
+{
+	const char *h, *end;
+	char *t, *d;
+
+	if (addr->user_len == 0) {
+		return;
+	}
+
+	d = g_malloc(addr->user_len);
+	t = d;
+	h = addr->user;
+	end = h + addr->user_len;
+
+	while (h < end) {
+		if (*h != '\\') {
+			*t++ = *h;
+		}
+		h++;
+	}
+
+	addr->user = d;
+	addr->user_len = t - d;
+	addr->flags |= RSPAMD_EMAIL_ADDR_USER_ALLOCATED;
+}
+
+struct rspamd_email_address *
+rspamd_email_address_from_smtp(const gchar *str, guint len)
+{
+	struct rspamd_email_address addr, *ret;
+	gsize nlen;
+
+	if (str == NULL || len == 0) {
+		return NULL;
+	}
+
+	rspamd_smtp_addr_parse(str, len, &addr);
+
+	if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) {
+		ret = g_malloc(sizeof(*ret));
+		memcpy(ret, &addr, sizeof(addr));
+
+		if ((ret->flags & RSPAMD_EMAIL_ADDR_QUOTED) && ret->addr[0] == '"') {
+			if (ret->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) {
+				/* We also need to unquote user */
+				rspamd_email_address_unescape(ret);
+			}
+
+			/* We need to unquote addr */
+			nlen = ret->domain_len + ret->user_len + 2;
+			ret->addr = g_malloc(nlen + 1);
+			ret->addr_len = rspamd_snprintf((char *) ret->addr, nlen, "%*s@%*s",
+											(gint) ret->user_len, ret->user,
+											(gint) ret->domain_len, ret->domain);
+			ret->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED;
+		}
+
+		return ret;
+	}
+
+	return NULL;
+}
+
+void rspamd_email_address_free(struct rspamd_email_address *addr)
+{
+	if (addr) {
+		if (addr->flags & RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED) {
+			g_free((void *) addr->addr);
+		}
+
+		if (addr->flags & RSPAMD_EMAIL_ADDR_USER_ALLOCATED) {
+			g_free((void *) addr->user);
+		}
+
+		g_free(addr);
+	}
+}
+
+static inline void
+rspamd_email_address_add(rspamd_mempool_t *pool,
+						 GPtrArray *ar,
+						 struct rspamd_email_address *addr,
+						 GString *name)
+{
+	struct rspamd_email_address *elt;
+	guint nlen;
+
+	elt = g_malloc0(sizeof(*elt));
+	rspamd_mempool_notify_alloc(pool, sizeof(*elt));
+
+	if (addr != NULL) {
+		memcpy(elt, addr, sizeof(*addr));
+	}
+	else {
+		elt->addr = "";
+		elt->domain = "";
+		elt->raw = "<>";
+		elt->raw_len = 2;
+		elt->user = "";
+		elt->flags |= RSPAMD_EMAIL_ADDR_EMPTY;
+	}
+
+	if ((elt->flags & RSPAMD_EMAIL_ADDR_QUOTED) && elt->addr[0] == '"') {
+		if (elt->flags & RSPAMD_EMAIL_ADDR_HAS_BACKSLASH) {
+			/* We also need to unquote user */
+			rspamd_email_address_unescape(elt);
+		}
+
+		/* We need to unquote addr */
+		nlen = elt->domain_len + elt->user_len + 2;
+		elt->addr = g_malloc(nlen + 1);
+		rspamd_mempool_notify_alloc(pool, nlen + 1);
+		elt->addr_len = rspamd_snprintf((char *) elt->addr, nlen, "%*s@%*s",
+										(gint) elt->user_len, elt->user,
+										(gint) elt->domain_len, elt->domain);
+		elt->flags |= RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED;
+	}
+
+	if (name->len > 0) {
+		rspamd_gstring_strip(name, " \t\v");
+		elt->name = rspamd_mime_header_decode(pool, name->str, name->len, NULL);
+	}
+
+	rspamd_mempool_notify_alloc(pool, name->len);
+	g_ptr_array_add(ar, elt);
+}
+
+/*
+ * Tries to parse an email address that doesn't conform RFC
+ */
+static gboolean
+rspamd_email_address_parse_heuristic(const char *data, size_t len,
+									 struct rspamd_email_address *addr)
+{
+	const gchar *p = data, *at = NULL, *end = data + len;
+	gboolean ret = FALSE;
+
+	memset(addr, 0, sizeof(*addr));
+
+	if (*p == '<' && len > 1) {
+		/* Angled address */
+		addr->addr_len = rspamd_memcspn(p + 1, ">", len - 1);
+		addr->addr = p + 1;
+		addr->raw = p;
+		addr->raw_len = len;
+		ret = TRUE;
+
+		p = p + 1;
+		len = addr->addr_len;
+		end = p + len;
+	}
+	else if (len > 0) {
+		addr->addr = p;
+		addr->addr_len = len;
+		addr->raw = p;
+		addr->raw_len = len;
+		ret = TRUE;
+	}
+
+	if (ret) {
+		at = rspamd_memrchr(p, '@', len);
+
+		if (at != NULL && at + 1 < end) {
+			addr->domain = at + 1;
+			addr->domain_len = end - (at + 1);
+			addr->user = p;
+			addr->user_len = at - p;
+		}
+
+		if (rspamd_str_has_8bit(p, len)) {
+			addr->flags |= RSPAMD_EMAIL_ADDR_HAS_8BIT;
+		}
+	}
+
+	return ret;
+}
+
+static inline int
+rspamd_email_address_check_and_add(const gchar *start, gsize len,
+								   GPtrArray *res,
+								   rspamd_mempool_t *pool,
+								   GString *ns,
+								   gint max_elements)
+{
+	struct rspamd_email_address addr;
+
+	g_assert(res != NULL);
+
+	if (max_elements > 0 && res->len >= max_elements) {
+		msg_info_pool_check("reached maximum number of elements %d when adding %v",
+							max_elements,
+							ns);
+
+		return -1;
+	}
+
+	/* The whole email is likely address */
+	memset(&addr, 0, sizeof(addr));
+	rspamd_smtp_addr_parse(start, len, &addr);
+
+	if (addr.flags & RSPAMD_EMAIL_ADDR_VALID) {
+		rspamd_email_address_add(pool, res, &addr, ns);
+	}
+	else {
+		/* Try heuristic */
+		if (rspamd_email_address_parse_heuristic(start,
+												 len, &addr)) {
+			rspamd_email_address_add(pool, res, &addr, ns);
+
+			return 1;
+		}
+		else {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+GPtrArray *
+rspamd_email_address_from_mime(rspamd_mempool_t *pool, const gchar *hdr,
+							   guint len,
+							   GPtrArray *src,
+							   gint max_elements)
+{
+	GPtrArray *res = src;
+	gboolean seen_at = FALSE, seen_obrace = FALSE;
+
+	const gchar *p = hdr, *end = hdr + len, *c = hdr, *t;
+	GString *ns, *cpy;
+	gint obraces, ebraces;
+	enum {
+		parse_name = 0,
+		parse_quoted,
+		parse_addr,
+		skip_spaces
+	} state = parse_name,
+	  next_state = parse_name;
+
+	if (res == NULL) {
+		res = g_ptr_array_sized_new(2);
+		rspamd_mempool_add_destructor(pool, rspamd_email_address_list_destroy,
+									  res);
+	}
+	else if (max_elements > 0 && res->len >= max_elements) {
+		msg_info_pool_check("reached maximum number of elements %d", max_elements);
+
+		return res;
+	}
+
+	ns = g_string_sized_new(len);
+	cpy = g_string_sized_new(len);
+
+	rspamd_mempool_add_destructor(pool, rspamd_gstring_free_hard, cpy);
+
+	/* First, we need to remove all comments as they are terrible */
+	obraces = 0;
+	ebraces = 0;
+
+	while (p < end) {
+		if (state == parse_name) {
+			if (*p == '\\') {
+				if (obraces == 0) {
+					g_string_append_c(cpy, *p);
+				}
+
+				p++;
+			}
+			else {
+				if (*p == '"') {
+					state = parse_quoted;
+				}
+				else if (*p == '(') {
+					obraces++; /* To avoid ) itself being copied */
+				}
+				else if (*p == ')') {
+					ebraces++;
+					p++;
+				}
+
+				if (obraces == ebraces) {
+					obraces = 0;
+					ebraces = 0;
+				}
+			}
+
+			if (p < end && obraces == 0) {
+				g_string_append_c(cpy, *p);
+			}
+		}
+		else {
+			/* Quoted elt */
+			if (*p == '\\') {
+				g_string_append_c(cpy, *p);
+				p++;
+			}
+			else {
+				if (*p == '"') {
+					state = parse_name;
+				}
+			}
+
+			if (p < end) {
+				g_string_append_c(cpy, *p);
+			}
+		}
+
+		p++;
+	}
+
+	state = parse_name;
+
+	p = cpy->str;
+	c = p;
+	end = p + cpy->len;
+
+	while (p < end) {
+		switch (state) {
+		case parse_name:
+			if (*p == '"') {
+				/* We need to strip last spaces and update `ns` */
+				if (p > c) {
+					guint nspaces = 0;
+
+					t = p - 1;
+
+					while (t > c && g_ascii_isspace(*t)) {
+						t--;
+						nspaces++;
+					}
+
+					g_string_append_len(ns, c, t - c + 1);
+
+					if (nspaces > 0) {
+						g_string_append_c(ns, ' ');
+					}
+				}
+
+				state = parse_quoted;
+				c = p + 1;
+			}
+			else if (*p == '<') {
+				if (p > c) {
+					t = p - 1;
+
+					while (t > c && g_ascii_isspace(*t)) {
+						t--;
+					}
+
+					g_string_append_len(ns, c, t - c + 1);
+				}
+
+				c = p;
+				state = parse_addr;
+			}
+			else if (*p == ',') {
+				if (p > c && seen_at) {
+					/*
+					 * Last token must be the address:
+					 * e.g. Some name name@domain.com
+					 */
+					t = p - 1;
+
+					while (t > c && g_ascii_isspace(*t)) {
+						t--;
+					}
+
+					int check = rspamd_email_address_check_and_add(c, t - c + 1,
+																   res, pool, ns, max_elements);
+
+					if (check == 0 && res->len == 0) {
+						/* Insert fake address */
+						rspamd_email_address_add(pool, res, NULL, ns);
+					}
+					else if (check != 1) {
+						goto end;
+					}
+
+					/* Cleanup for the next use */
+					g_string_set_size(ns, 0);
+					seen_at = FALSE;
+				}
+
+				state = skip_spaces;
+				next_state = parse_name;
+			}
+			else if (*p == '@') {
+				seen_at = TRUE;
+			}
+
+			p++;
+			break;
+		case parse_quoted:
+			if (*p == '\\') {
+				if (p > c) {
+					g_string_append_len(ns, c, p - c);
+				}
+
+				p++;
+				c = p;
+			}
+			else if (*p == '"') {
+				if (p > c) {
+					g_string_append_len(ns, c, p - c);
+				}
+
+				if (p + 1 < end && g_ascii_isspace(p[1])) {
+					g_string_append_c(ns, ' ');
+				}
+
+				state = skip_spaces;
+				next_state = parse_name;
+			}
+			else if (*p == '@' && seen_obrace) {
+				seen_at = TRUE;
+			}
+			else if (*p == '<') {
+				seen_obrace = TRUE;
+			}
+			p++;
+			break;
+		case parse_addr:
+			if (*p == '>') {
+				int check = rspamd_email_address_check_and_add(c, p - c + 1,
+															   res, pool, ns, max_elements);
+				if (check == 0 && res->len == 0) {
+					/* Insert a fake address */
+					rspamd_email_address_add(pool, res, NULL, ns);
+				}
+				else if (check != 1) {
+					goto end;
+				}
+
+				/* Cleanup for the next use */
+				g_string_set_size(ns, 0);
+				seen_at = FALSE;
+				state = skip_spaces;
+				next_state = parse_name;
+			}
+			else if (*p == '@') {
+				seen_at = TRUE;
+			}
+			p++;
+			break;
+		case skip_spaces:
+			if (!g_ascii_isspace(*p)) {
+				c = p;
+				state = next_state;
+			}
+			else {
+				p++;
+			}
+			break;
+		}
+	}
+
+	/* Handle leftover */
+	switch (state) {
+	case parse_name:
+		/* Assume the whole header as name (bad thing) */
+		if (p > c) {
+			while (p > c && g_ascii_isspace(*p)) {
+				p--;
+			}
+
+			if (p > c) {
+				if (seen_at) {
+					/* The whole email is likely address */
+					int check = rspamd_email_address_check_and_add(c, p - c,
+																   res, pool, ns, max_elements);
+					if (check == 0 && res->len == 0) {
+						/* Insert a fake address */
+						rspamd_email_address_add(pool, res, NULL, ns);
+					}
+					else if (check != 1) {
+						goto end;
+					}
+				}
+				else {
+					/* No @ seen */
+					g_string_append_len(ns, c, p - c);
+
+					if (res->len == 0) {
+						rspamd_email_address_add(pool, res, NULL, ns);
+					}
+				}
+			}
+			else if (res->len == 0) {
+				rspamd_email_address_add(pool, res, NULL, ns);
+			}
+		}
+		break;
+	case parse_addr:
+		if (p > c) {
+			if (rspamd_email_address_check_and_add(c, p - c,
+												   res, pool, ns, max_elements) == 0) {
+				if (res->len == 0) {
+					rspamd_email_address_add(pool, res, NULL, ns);
+				}
+			}
+		}
+		break;
+	case parse_quoted:
+		/* Unfinished quoted string or a comment */
+		/* If we have seen obrace + at, then we still can try to resolve address */
+		if (seen_at && seen_obrace) {
+			p = rspamd_memrchr(cpy->str, '<', cpy->len);
+			g_assert(p != NULL);
+			if (rspamd_email_address_check_and_add(p, end - p,
+												   res, pool, ns, max_elements) == 0) {
+				if (res->len == 0) {
+					rspamd_email_address_add(pool, res, NULL, ns);
+				}
+			}
+		}
+		break;
+	default:
+		/* Do nothing */
+		break;
+	}
+end:
+	rspamd_mempool_notify_alloc(pool, cpy->len);
+	g_string_free(ns, TRUE);
+
+	return res;
+}
+
+void rspamd_email_address_list_destroy(gpointer ptr)
+{
+	GPtrArray *ar = ptr;
+	guint i;
+	struct rspamd_email_address *addr;
+
+	PTR_ARRAY_FOREACH(ar, i, addr)
+	{
+		rspamd_email_address_free(addr);
+	}
+
+	g_ptr_array_free(ar, TRUE);
+}
+\ No newline at end of file
diff --git a/src/libmime/email_addr.h b/src/libmime/email_addr.h
new file mode 100644
index 0000000..ed00722
--- /dev/null
+++ b/src/libmime/email_addr.h
@@ -0,0 +1,97 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_EMAIL_ADDR_H_
+#define SRC_LIBMIME_EMAIL_ADDR_H_
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "libutil/ref.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_mime_header;
+
+enum rspamd_email_address_flags {
+	RSPAMD_EMAIL_ADDR_VALID = (1 << 0),
+	RSPAMD_EMAIL_ADDR_IP = (1 << 1),
+	RSPAMD_EMAIL_ADDR_BRACED = (1 << 2),
+	RSPAMD_EMAIL_ADDR_QUOTED = (1 << 3),
+	RSPAMD_EMAIL_ADDR_EMPTY = (1 << 4),
+	RSPAMD_EMAIL_ADDR_HAS_BACKSLASH = (1 << 5),
+	RSPAMD_EMAIL_ADDR_ADDR_ALLOCATED = (1 << 6),
+	RSPAMD_EMAIL_ADDR_USER_ALLOCATED = (1 << 7),
+	RSPAMD_EMAIL_ADDR_HAS_8BIT = (1 << 8),
+	RSPAMD_EMAIL_ADDR_ALIASED = (1 << 9),
+	RSPAMD_EMAIL_ADDR_ORIGINAL = (1 << 10),
+};
+
+/*
+ * Structure that represents email address in a convenient way
+ */
+struct rspamd_email_address {
+	const gchar *raw;
+	const gchar *addr;
+	const gchar *user;
+	const gchar *domain;
+	const gchar *name;
+
+	guint raw_len;
+	guint addr_len;
+	guint domain_len;
+	guint user_len;
+	guint flags;
+};
+
+struct rspamd_task;
+
+/**
+ * Create email address from a single rfc822 address (e.g. from mail from:)
+ * @param str string to use
+ * @param len length of string
+ * @return
+ */
+struct rspamd_email_address *rspamd_email_address_from_smtp(const gchar *str, guint len);
+
+/**
+ * Parses email address from the mime header, decodes names and return the array
+ * of `rspamd_email_address`. If `src` is NULL, then this function creates a new
+ * array and adds a destructor to remove elements when `pool` is destroyed.
+ * Otherwise, addresses are appended to `src`.
+ * @param hdr
+ * @param len
+ * @return
+ */
+GPtrArray *
+rspamd_email_address_from_mime(rspamd_mempool_t *pool, const gchar *hdr, guint len,
+							   GPtrArray *src, gint max_elements);
+
+/**
+ * Destroys list of email addresses
+ * @param ptr
+ */
+void rspamd_email_address_list_destroy(gpointer ptr);
+
+void rspamd_email_address_free(struct rspamd_email_address *addr);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_EMAIL_ADDR_H_ */
diff --git a/src/libmime/images.c b/src/libmime/images.c
new file mode 100644
index 0000000..1344d91
--- /dev/null
+++ b/src/libmime/images.c
@@ -0,0 +1,718 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "images.h"
+#include "task.h"
+#include "message.h"
+#include "libserver/html/html.h"
+
+#define msg_debug_images(...) rspamd_conditional_debug_fast(NULL, NULL,                                               \
+															rspamd_images_log_id, "images", task->task_pool->tag.uid, \
+															G_STRFUNC,                                                \
+															__VA_ARGS__)
+
+INIT_LOG_MODULE(images)
+
+#ifdef USABLE_GD
+#include "gd.h"
+#include "hash.h"
+#include <math.h>
+
+#define RSPAMD_NORMALIZED_DIM 64
+
+static rspamd_lru_hash_t *images_hash = NULL;
+#endif
+
+static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
+static const guint8 jpg_sig1[] = {0xff, 0xd8};
+static const guint8 jpg_sig_jfif[] = {0xff, 0xe0};
+static const guint8 jpg_sig_exif[] = {0xff, 0xe1};
+static const guint8 gif_signature[] = {'G', 'I', 'F', '8'};
+static const guint8 bmp_signature[] = {'B', 'M'};
+
+static bool process_image(struct rspamd_task *task, struct rspamd_mime_part *part);
+
+
+bool rspamd_images_process_mime_part_maybe(struct rspamd_task *task,
+										   struct rspamd_mime_part *part)
+{
+	if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
+		if (part->detected_type &&
+			strcmp(part->detected_type, "image") == 0 &&
+			part->parsed_data.len > 0) {
+
+			return process_image(task, part);
+		}
+	}
+
+	return false;
+}
+
+void rspamd_images_process(struct rspamd_task *task)
+{
+	guint i;
+	struct rspamd_mime_part *part;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		rspamd_images_process_mime_part_maybe(task, part);
+	}
+}
+
+static enum rspamd_image_type
+detect_image_type(rspamd_ftok_t *data)
+{
+	if (data->len > sizeof(png_signature) / sizeof(png_signature[0])) {
+		if (memcmp(data->begin, png_signature, sizeof(png_signature)) == 0) {
+			return IMAGE_TYPE_PNG;
+		}
+	}
+	if (data->len > 10) {
+		if (memcmp(data->begin, jpg_sig1, sizeof(jpg_sig1)) == 0) {
+			if (memcmp(data->begin + 2, jpg_sig_jfif, sizeof(jpg_sig_jfif)) == 0 ||
+				memcmp(data->begin + 2, jpg_sig_exif, sizeof(jpg_sig_exif)) == 0) {
+				return IMAGE_TYPE_JPG;
+			}
+		}
+	}
+	if (data->len > sizeof(gif_signature) / sizeof(gif_signature[0])) {
+		if (memcmp(data->begin, gif_signature, sizeof(gif_signature)) == 0) {
+			return IMAGE_TYPE_GIF;
+		}
+	}
+	if (data->len > sizeof(bmp_signature) / sizeof(bmp_signature[0])) {
+		if (memcmp(data->begin, bmp_signature, sizeof(bmp_signature)) == 0) {
+			return IMAGE_TYPE_BMP;
+		}
+	}
+
+	return IMAGE_TYPE_UNKNOWN;
+}
+
+
+static struct rspamd_image *
+process_png_image(rspamd_mempool_t *pool, rspamd_ftok_t *data)
+{
+	struct rspamd_image *img;
+	guint32 t;
+	const guint8 *p;
+
+	if (data->len < 24) {
+		msg_info_pool("bad png detected (maybe striped)");
+		return NULL;
+	}
+
+	/* In png we should find iHDR section and get data from it */
+	/* Skip signature and read header section */
+	p = data->begin + 12;
+	if (memcmp(p, "IHDR", 4) != 0) {
+		msg_info_pool("png doesn't begins with IHDR section");
+		return NULL;
+	}
+
+	img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image));
+	img->type = IMAGE_TYPE_PNG;
+	img->data = data;
+
+	p += 4;
+	memcpy(&t, p, sizeof(guint32));
+	img->width = ntohl(t);
+	p += 4;
+	memcpy(&t, p, sizeof(guint32));
+	img->height = ntohl(t);
+
+	return img;
+}
+
+static struct rspamd_image *
+process_jpg_image(rspamd_mempool_t *pool, rspamd_ftok_t *data)
+{
+	const guint8 *p, *end;
+	guint16 h, w;
+	struct rspamd_image *img;
+
+	img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image));
+	img->type = IMAGE_TYPE_JPG;
+	img->data = data;
+
+	p = data->begin;
+	end = p + data->len - 8;
+	p += 2;
+
+	while (p < end) {
+		if (p[0] == 0xFF && p[1] != 0xFF) {
+			guint len = p[2] * 256 + p[3];
+
+			p++;
+
+			if (*p == 0xc0 || *p == 0xc1 || *p == 0xc2 || *p == 0xc3 ||
+				*p == 0xc9 || *p == 0xca || *p == 0xcb) {
+				memcpy(&h, p + 4, sizeof(guint16));
+				h = p[4] * 0xff + p[5];
+				img->height = h;
+				w = p[6] * 0xff + p[7];
+				img->width = w;
+
+				return img;
+			}
+
+
+			p += len;
+		}
+		else {
+			p++;
+		}
+	}
+
+	return NULL;
+}
+
+static struct rspamd_image *
+process_gif_image(rspamd_mempool_t *pool, rspamd_ftok_t *data)
+{
+	struct rspamd_image *img;
+	const guint8 *p;
+	guint16 t;
+
+	if (data->len < 10) {
+		msg_info_pool("bad gif detected (maybe striped)");
+		return NULL;
+	}
+
+	img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image));
+	img->type = IMAGE_TYPE_GIF;
+	img->data = data;
+
+	p = data->begin + 6;
+	memcpy(&t, p, sizeof(guint16));
+	img->width = GUINT16_FROM_LE(t);
+	memcpy(&t, p + 2, sizeof(guint16));
+	img->height = GUINT16_FROM_LE(t);
+
+	return img;
+}
+
+static struct rspamd_image *
+process_bmp_image(rspamd_mempool_t *pool, rspamd_ftok_t *data)
+{
+	struct rspamd_image *img;
+	gint32 t;
+	const guint8 *p;
+
+	if (data->len < 28) {
+		msg_info_pool("bad bmp detected (maybe striped)");
+		return NULL;
+	}
+
+	img = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_image));
+	img->type = IMAGE_TYPE_BMP;
+	img->data = data;
+	p = data->begin + 18;
+	memcpy(&t, p, sizeof(guint32));
+	img->width = GUINT32_FROM_LE(t);
+	memcpy(&t, p + 4, sizeof(gint32));
+	img->height = GUINT32_FROM_LE(t);
+
+	return img;
+}
+
+#ifdef USABLE_GD
+/*
+ * DCT from Emil Mikulic.
+ * http://unix4lyfe.org/dct/
+ */
+static void
+rspamd_image_dct_block(gint pixels[8][8], gdouble *out)
+{
+	gint i;
+	gint rows[8][8];
+
+	static const gint c1 = 1004 /* cos(pi/16) << 10 */,
+					  s1 = 200 /* sin(pi/16) */,
+					  c3 = 851 /* cos(3pi/16) << 10 */,
+					  s3 = 569 /* sin(3pi/16) << 10 */,
+					  r2c6 = 554 /* sqrt(2)*cos(6pi/16) << 10 */,
+					  r2s6 = 1337 /* sqrt(2)*sin(6pi/16) << 10 */,
+					  r2 = 181; /* sqrt(2) << 7*/
+
+	gint x0, x1, x2, x3, x4, x5, x6, x7, x8;
+
+	/* transform rows */
+	for (i = 0; i < 8; i++) {
+		x0 = pixels[0][i];
+		x1 = pixels[1][i];
+		x2 = pixels[2][i];
+		x3 = pixels[3][i];
+		x4 = pixels[4][i];
+		x5 = pixels[5][i];
+		x6 = pixels[6][i];
+		x7 = pixels[7][i];
+
+		/* Stage 1 */
+		x8 = x7 + x0;
+		x0 -= x7;
+		x7 = x1 + x6;
+		x1 -= x6;
+		x6 = x2 + x5;
+		x2 -= x5;
+		x5 = x3 + x4;
+		x3 -= x4;
+
+		/* Stage 2 */
+		x4 = x8 + x5;
+		x8 -= x5;
+		x5 = x7 + x6;
+		x7 -= x6;
+		x6 = c1 * (x1 + x2);
+		x2 = (-s1 - c1) * x2 + x6;
+		x1 = (s1 - c1) * x1 + x6;
+		x6 = c3 * (x0 + x3);
+		x3 = (-s3 - c3) * x3 + x6;
+		x0 = (s3 - c3) * x0 + x6;
+
+		/* Stage 3 */
+		x6 = x4 + x5;
+		x4 -= x5;
+		x5 = r2c6 * (x7 + x8);
+		x7 = (-r2s6 - r2c6) * x7 + x5;
+		x8 = (r2s6 - r2c6) * x8 + x5;
+		x5 = x0 + x2;
+		x0 -= x2;
+		x2 = x3 + x1;
+		x3 -= x1;
+
+		/* Stage 4 and output */
+		rows[i][0] = x6;
+		rows[i][4] = x4;
+		rows[i][2] = x8 >> 10;
+		rows[i][6] = x7 >> 10;
+		rows[i][7] = (x2 - x5) >> 10;
+		rows[i][1] = (x2 + x5) >> 10;
+		rows[i][3] = (x3 * r2) >> 17;
+		rows[i][5] = (x0 * r2) >> 17;
+	}
+
+	/* transform columns */
+	for (i = 0; i < 8; i++) {
+		x0 = rows[0][i];
+		x1 = rows[1][i];
+		x2 = rows[2][i];
+		x3 = rows[3][i];
+		x4 = rows[4][i];
+		x5 = rows[5][i];
+		x6 = rows[6][i];
+		x7 = rows[7][i];
+
+		/* Stage 1 */
+		x8 = x7 + x0;
+		x0 -= x7;
+		x7 = x1 + x6;
+		x1 -= x6;
+		x6 = x2 + x5;
+		x2 -= x5;
+		x5 = x3 + x4;
+		x3 -= x4;
+
+		/* Stage 2 */
+		x4 = x8 + x5;
+		x8 -= x5;
+		x5 = x7 + x6;
+		x7 -= x6;
+		x6 = c1 * (x1 + x2);
+		x2 = (-s1 - c1) * x2 + x6;
+		x1 = (s1 - c1) * x1 + x6;
+		x6 = c3 * (x0 + x3);
+		x3 = (-s3 - c3) * x3 + x6;
+		x0 = (s3 - c3) * x0 + x6;
+
+		/* Stage 3 */
+		x6 = x4 + x5;
+		x4 -= x5;
+		x5 = r2c6 * (x7 + x8);
+		x7 = (-r2s6 - r2c6) * x7 + x5;
+		x8 = (r2s6 - r2c6) * x8 + x5;
+		x5 = x0 + x2;
+		x0 -= x2;
+		x2 = x3 + x1;
+		x3 -= x1;
+
+		/* Stage 4 and output */
+		out[i * 8] = (double) ((x6 + 16) >> 3);
+		out[i * 8 + 1] = (double) ((x4 + 16) >> 3);
+		out[i * 8 + 2] = (double) ((x8 + 16384) >> 13);
+		out[i * 8 + 3] = (double) ((x7 + 16384) >> 13);
+		out[i * 8 + 4] = (double) ((x2 - x5 + 16384) >> 13);
+		out[i * 8 + 5] = (double) ((x2 + x5 + 16384) >> 13);
+		out[i * 8 + 6] = (double) (((x3 >> 8) * r2 + 8192) >> 12);
+		out[i * 8 + 7] = (double) (((x0 >> 8) * r2 + 8192) >> 12);
+	}
+}
+
+struct rspamd_image_cache_entry {
+	guchar digest[64];
+	guchar dct[RSPAMD_DCT_LEN / NBBY];
+};
+
+static void
+rspamd_image_cache_entry_dtor(gpointer p)
+{
+	struct rspamd_image_cache_entry *entry = p;
+	g_free(entry);
+}
+
+static guint32
+rspamd_image_dct_hash(gconstpointer p)
+{
+	return rspamd_cryptobox_fast_hash(p, rspamd_cryptobox_HASHBYTES,
+									  rspamd_hash_seed());
+}
+
+static gboolean
+rspamd_image_dct_equal(gconstpointer a, gconstpointer b)
+{
+	return memcmp(a, b, rspamd_cryptobox_HASHBYTES) == 0;
+}
+
+static void
+rspamd_image_create_cache(struct rspamd_config *cfg)
+{
+	images_hash = rspamd_lru_hash_new_full(cfg->images_cache_size, NULL,
+										   rspamd_image_cache_entry_dtor,
+										   rspamd_image_dct_hash, rspamd_image_dct_equal);
+}
+
+static gboolean
+rspamd_image_check_hash(struct rspamd_task *task, struct rspamd_image *img)
+{
+	struct rspamd_image_cache_entry *found;
+
+	if (images_hash == NULL) {
+		rspamd_image_create_cache(task->cfg);
+	}
+
+	found = rspamd_lru_hash_lookup(images_hash, img->parent->digest,
+								   task->tv.tv_sec);
+
+	if (found) {
+		/* We need to decompress */
+		img->dct = g_malloc(RSPAMD_DCT_LEN / NBBY);
+		rspamd_mempool_add_destructor(task->task_pool, g_free,
+									  img->dct);
+		/* Copy as found could be destroyed by LRU */
+		memcpy(img->dct, found->dct, RSPAMD_DCT_LEN / NBBY);
+		img->is_normalized = TRUE;
+
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+static void
+rspamd_image_save_hash(struct rspamd_task *task, struct rspamd_image *img)
+{
+	struct rspamd_image_cache_entry *found;
+
+	if (img->is_normalized) {
+		found = rspamd_lru_hash_lookup(images_hash, img->parent->digest,
+									   task->tv.tv_sec);
+
+		if (!found) {
+			found = g_malloc0(sizeof(*found));
+			memcpy(found->dct, img->dct, RSPAMD_DCT_LEN / NBBY);
+			memcpy(found->digest, img->parent->digest, sizeof(found->digest));
+
+			rspamd_lru_hash_insert(images_hash, found->digest, found,
+								   task->tv.tv_sec, 0);
+		}
+	}
+}
+
+#endif
+
+void rspamd_image_normalize(struct rspamd_task *task, struct rspamd_image *img)
+{
+#ifdef USABLE_GD
+	gdImagePtr src = NULL, dst = NULL;
+	guint i, j, k, l;
+	gdouble *dct;
+
+	if (img->data->len == 0 || img->data->len > G_MAXINT32) {
+		return;
+	}
+
+	if (img->height <= RSPAMD_NORMALIZED_DIM ||
+		img->width <= RSPAMD_NORMALIZED_DIM) {
+		return;
+	}
+
+	if (img->data->len > task->cfg->max_pic_size) {
+		return;
+	}
+
+	if (rspamd_image_check_hash(task, img)) {
+		return;
+	}
+
+	switch (img->type) {
+	case IMAGE_TYPE_JPG:
+		src = gdImageCreateFromJpegPtr(img->data->len, (void *) img->data->begin);
+		break;
+	case IMAGE_TYPE_PNG:
+		src = gdImageCreateFromPngPtr(img->data->len, (void *) img->data->begin);
+		break;
+	case IMAGE_TYPE_GIF:
+		src = gdImageCreateFromGifPtr(img->data->len, (void *) img->data->begin);
+		break;
+	case IMAGE_TYPE_BMP:
+		src = gdImageCreateFromBmpPtr(img->data->len, (void *) img->data->begin);
+		break;
+	default:
+		return;
+	}
+
+	if (src == NULL) {
+		msg_info_task("cannot load image of type %s from %T",
+					  rspamd_image_type_str(img->type), img->filename);
+	}
+	else {
+		gdImageSetInterpolationMethod(src, GD_BILINEAR_FIXED);
+
+		dst = gdImageScale(src, RSPAMD_NORMALIZED_DIM, RSPAMD_NORMALIZED_DIM);
+		gdImageGrayScale(dst);
+		gdImageDestroy(src);
+
+		img->is_normalized = TRUE;
+		dct = g_malloc0(sizeof(gdouble) * RSPAMD_DCT_LEN);
+		img->dct = g_malloc0(RSPAMD_DCT_LEN / NBBY);
+		rspamd_mempool_add_destructor(task->task_pool, g_free,
+									  img->dct);
+
+		/*
+		 * Split message into blocks:
+		 *
+		 * ****
+		 * ****
+		 *
+		 * Get sum of saturation values, and set bit if sum is > avg
+		 * Then go further
+		 *
+		 * ****
+		 * ****
+		 *
+		 * and repeat this algorithm.
+		 *
+		 * So on each iteration we move by 16 pixels and calculate 2 elements of
+		 * signature
+		 */
+		for (i = 0; i < RSPAMD_NORMALIZED_DIM; i += 8) {
+			for (j = 0; j < RSPAMD_NORMALIZED_DIM; j += 8) {
+				gint p[8][8];
+
+				for (k = 0; k < 8; k++) {
+					p[k][0] = gdImageGetPixel(dst, i + k, j);
+					p[k][1] = gdImageGetPixel(dst, i + k, j + 1);
+					p[k][2] = gdImageGetPixel(dst, i + k, j + 2);
+					p[k][3] = gdImageGetPixel(dst, i + k, j + 3);
+					p[k][4] = gdImageGetPixel(dst, i + k, j + 4);
+					p[k][5] = gdImageGetPixel(dst, i + k, j + 5);
+					p[k][6] = gdImageGetPixel(dst, i + k, j + 6);
+					p[k][7] = gdImageGetPixel(dst, i + k, j + 7);
+				}
+
+				rspamd_image_dct_block(p,
+									   dct + i * RSPAMD_NORMALIZED_DIM + j);
+
+				gdouble avg = 0.0;
+
+				for (k = 0; k < 8; k++) {
+					for (l = 0; l < 8; l++) {
+						gdouble x = *(dct +
+									  i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l);
+						avg += (x - avg) / (gdouble) (k * 8 + l + 1);
+					}
+				}
+
+
+				for (k = 0; k < 8; k++) {
+					for (l = 0; l < 8; l++) {
+						guint idx = i * RSPAMD_NORMALIZED_DIM + j + k * 8 + l;
+
+						if (dct[idx] >= avg) {
+							setbit(img->dct, idx);
+						}
+					}
+				}
+			}
+		}
+
+		gdImageDestroy(dst);
+		g_free(dct);
+		rspamd_image_save_hash(task, img);
+	}
+#endif
+}
+
+struct rspamd_image *
+rspamd_maybe_process_image(rspamd_mempool_t *pool,
+						   rspamd_ftok_t *data)
+{
+	enum rspamd_image_type type;
+	struct rspamd_image *img = NULL;
+
+	if ((type = detect_image_type(data)) != IMAGE_TYPE_UNKNOWN) {
+		switch (type) {
+		case IMAGE_TYPE_PNG:
+			img = process_png_image(pool, data);
+			break;
+		case IMAGE_TYPE_JPG:
+			img = process_jpg_image(pool, data);
+			break;
+		case IMAGE_TYPE_GIF:
+			img = process_gif_image(pool, data);
+			break;
+		case IMAGE_TYPE_BMP:
+			img = process_bmp_image(pool, data);
+			break;
+		default:
+			img = NULL;
+			break;
+		}
+	}
+
+	return img;
+}
+
+static bool
+process_image(struct rspamd_task *task, struct rspamd_mime_part *part)
+{
+	struct rspamd_image *img;
+
+	img = rspamd_maybe_process_image(task->task_pool, &part->parsed_data);
+
+	if (img != NULL) {
+		msg_debug_images("detected %s image of size %ud x %ud",
+						 rspamd_image_type_str(img->type),
+						 img->width, img->height);
+
+		if (part->cd) {
+			img->filename = &part->cd->filename;
+		}
+
+		img->parent = part;
+
+		part->part_type = RSPAMD_MIME_PART_IMAGE;
+		part->specific.img = img;
+
+		return true;
+	}
+
+	return false;
+}
+
+const gchar *
+rspamd_image_type_str(enum rspamd_image_type type)
+{
+	switch (type) {
+	case IMAGE_TYPE_PNG:
+		return "PNG";
+		break;
+	case IMAGE_TYPE_JPG:
+		return "JPEG";
+		break;
+	case IMAGE_TYPE_GIF:
+		return "GIF";
+		break;
+	case IMAGE_TYPE_BMP:
+		return "BMP";
+		break;
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+static void
+rspamd_image_process_part(struct rspamd_task *task, struct rspamd_mime_part *part)
+{
+	struct rspamd_mime_header *rh;
+	struct rspamd_mime_text_part *tp;
+	struct html_image *himg;
+	const gchar *cid;
+	guint cid_len, i;
+	struct rspamd_image *img;
+
+	img = (struct rspamd_image *) part->specific.img;
+
+	if (img) {
+		/* Check Content-Id */
+		rh = rspamd_message_get_header_from_hash(part->raw_headers,
+												 "Content-Id", FALSE);
+
+		if (rh) {
+			cid = rh->decoded;
+
+			if (*cid == '<') {
+				cid++;
+			}
+
+			cid_len = strlen(cid);
+
+			if (cid_len > 0) {
+				if (cid[cid_len - 1] == '>') {
+					cid_len--;
+				}
+
+				PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, tp)
+				{
+					if (IS_TEXT_PART_HTML(tp) && tp->html != NULL) {
+						himg = rspamd_html_find_embedded_image(tp->html, cid, cid_len);
+
+						if (himg != NULL) {
+							img->html_image = himg;
+							himg->embedded_image = img;
+
+							msg_debug_images("found linked image by cid: <%s>",
+											 cid);
+
+							if (himg->height == 0) {
+								himg->height = img->height;
+							}
+
+							if (himg->width == 0) {
+								himg->width = img->width;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+void rspamd_images_link(struct rspamd_task *task)
+{
+	struct rspamd_mime_part *part;
+	guint i;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		if (part->part_type == RSPAMD_MIME_PART_IMAGE) {
+			rspamd_image_process_part(task, part);
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/libmime/images.h b/src/libmime/images.h
new file mode 100644
index 0000000..bf8b3be
--- /dev/null
+++ b/src/libmime/images.h
@@ -0,0 +1,76 @@
+#ifndef IMAGES_H_
+#define IMAGES_H_
+
+#include "config.h"
+#include "fstring.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct html_image;
+struct rspamd_task;
+struct rspamd_mime_part;
+
+#define RSPAMD_DCT_LEN (64 * 64)
+
+enum rspamd_image_type {
+	IMAGE_TYPE_PNG = 0,
+	IMAGE_TYPE_JPG,
+	IMAGE_TYPE_GIF,
+	IMAGE_TYPE_BMP,
+	IMAGE_TYPE_UNKNOWN
+};
+
+struct rspamd_image {
+	struct rspamd_mime_part *parent;
+	rspamd_ftok_t *data;
+	rspamd_ftok_t *filename;
+	struct html_image *html_image;
+	enum rspamd_image_type type;
+	guint32 width;
+	guint32 height;
+	gboolean is_normalized;
+	guchar *dct;
+};
+
+/*
+ * Process images from a worker task
+ */
+void rspamd_images_process(struct rspamd_task *task);
+
+/**
+ * Process image if possible in a single mime part
+ * @param task
+ * @param part
+ * @return
+ */
+bool rspamd_images_process_mime_part_maybe(struct rspamd_task *task,
+										   struct rspamd_mime_part *part);
+
+/*
+ * Link embedded images to the HTML parts
+ */
+void rspamd_images_link(struct rspamd_task *task);
+
+/**
+ * Processes image in raw data
+ * @param task
+ * @param data
+ * @return
+ */
+struct rspamd_image *rspamd_maybe_process_image(rspamd_mempool_t *pool,
+												rspamd_ftok_t *data);
+
+/*
+ * Get textual representation of an image's type
+ */
+const gchar *rspamd_image_type_str(enum rspamd_image_type type);
+
+void rspamd_image_normalize(struct rspamd_task *task, struct rspamd_image *img);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* IMAGES_H_ */
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
new file mode 100644
index 0000000..bdd0aad
--- /dev/null
+++ b/src/libmime/lang_detection.c
@@ -0,0 +1,2103 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_detection.h"
+#include "lang_detection_fasttext.h"
+#include "libserver/logger.h"
+#include "libcryptobox/cryptobox.h"
+#include "libutil/multipattern.h"
+#include "ucl.h"
+#include "khash.h"
+#include "libstemmer.h"
+
+#include <glob.h>
+#include <unicode/utf8.h>
+#include <unicode/utf16.h>
+#include <unicode/ucnv.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <math.h>
+
+static const gsize default_short_text_limit = 10;
+static const gsize default_words = 80;
+static const gdouble update_prob = 0.6;
+static const gchar *default_languages_path = RSPAMD_SHAREDIR "/languages";
+
+#undef EXTRA_LANGDET_DEBUG
+
+struct rspamd_language_unicode_match {
+	const gchar *lang;
+	gint unicode_code;
+};
+
+/*
+ * List of languages detected by unicode scripts
+ */
+static const struct rspamd_language_unicode_match unicode_langs[] = {
+	{"el", RSPAMD_UNICODE_GREEK},
+	{"ml", RSPAMD_UNICODE_MALAYALAM},
+	{"te", RSPAMD_UNICODE_TELUGU},
+	{"ta", RSPAMD_UNICODE_TAMIL},
+	{"gu", RSPAMD_UNICODE_GUJARATI},
+	{"th", RSPAMD_UNICODE_THAI},
+	{"ka", RSPAMD_UNICODE_GEORGIAN},
+	{"si", RSPAMD_UNICODE_SINHALA},
+	{"hy", RSPAMD_UNICODE_ARMENIAN},
+	{"ja", RSPAMD_UNICODE_JP},
+	{"ko", RSPAMD_UNICODE_HANGUL},
+};
+
+/*
+ * Top languages
+ */
+static const gchar *tier0_langs[] = {
+	"en",
+};
+static const gchar *tier1_langs[] = {
+	"fr", "it", "de", "es", "nl",
+	"pt", "ru", "pl", "tk", "th", "ar"};
+
+enum rspamd_language_category {
+	RSPAMD_LANGUAGE_LATIN = 0,
+	RSPAMD_LANGUAGE_CYRILLIC,
+	RSPAMD_LANGUAGE_DEVANAGARI,
+	RSPAMD_LANGUAGE_ARAB,
+	RSPAMD_LANGUAGE_MAX,
+};
+
+struct rspamd_language_elt {
+	const gchar *name; /* e.g. "en" or "ru" */
+	gint flags;        /* enum rspamd_language_elt_flags */
+	enum rspamd_language_category category;
+	guint trigrams_words;
+	guint stop_words;
+	gdouble mean;
+	gdouble std;
+	guint occurrences; /* total number of parts with this language */
+};
+
+struct rspamd_ngramm_elt {
+	struct rspamd_language_elt *elt;
+	gdouble prob;
+};
+
+struct rspamd_ngramm_chain {
+	GPtrArray *languages;
+	gdouble mean;
+	gdouble std;
+	gchar *utf;
+};
+
+struct rspamd_stop_word_range {
+	guint start;
+	guint stop;
+	struct rspamd_language_elt *elt;
+};
+
+struct rspamd_stop_word_elt {
+	struct rspamd_multipattern *mp;
+	GArray *ranges; /* of rspamd_stop_word_range */
+};
+
+#define msg_debug_lang_det(...) rspamd_conditional_debug_fast(NULL, NULL,                                                 \
+															  rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \
+															  G_STRFUNC,                                                  \
+															  __VA_ARGS__)
+#define msg_debug_lang_det_cfg(...) rspamd_conditional_debug_fast(NULL, NULL,                                               \
+																  rspamd_langdet_log_id, "langdet", cfg->cfg_pool->tag.uid, \
+																  G_STRFUNC,                                                \
+																  __VA_ARGS__)
+
+INIT_LOG_MODULE_PUBLIC(langdet)
+
+static const struct rspamd_language_unicode_match *
+rspamd_language_search_unicode_match(const gchar *key,
+									 const struct rspamd_language_unicode_match *elts, size_t nelts)
+{
+	size_t i;
+
+	for (i = 0; i < nelts; i++) {
+		if (strcmp(elts[i].lang, key) == 0) {
+			return &elts[i];
+		}
+	}
+
+	return NULL;
+}
+
+static gboolean
+rspamd_language_search_str(const gchar *key, const gchar *elts[], size_t nelts)
+{
+	size_t i;
+
+	for (i = 0; i < nelts; i++) {
+		if (strcmp(elts[i], key) == 0) {
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
+static guint
+rspamd_trigram_hash_func(gconstpointer key)
+{
+	return rspamd_cryptobox_fast_hash(key, 3 * sizeof(UChar32),
+									  rspamd_hash_seed());
+}
+
+static gboolean
+rspamd_trigram_equal_func(gconstpointer v, gconstpointer v2)
+{
+	return memcmp(v, v2, 3 * sizeof(UChar32)) == 0;
+}
+
+KHASH_INIT(rspamd_trigram_hash, const UChar32 *, struct rspamd_ngramm_chain, true,
+		   rspamd_trigram_hash_func, rspamd_trigram_equal_func);
+KHASH_INIT(rspamd_candidates_hash, const gchar *,
+		   struct rspamd_lang_detector_res *, true,
+		   rspamd_str_hash, rspamd_str_equal);
+KHASH_INIT(rspamd_stopwords_hash, rspamd_ftok_t *,
+		   char, false,
+		   rspamd_ftok_hash, rspamd_ftok_equal);
+
+KHASH_INIT(rspamd_languages_hash, const gchar *, struct rspamd_language_elt *, true,
+		   rspamd_str_hash, rspamd_str_equal);
+struct rspamd_lang_detector {
+	khash_t(rspamd_languages_hash) * languages;
+	khash_t(rspamd_trigram_hash) * trigrams[RSPAMD_LANGUAGE_MAX]; /* trigrams frequencies */
+	struct rspamd_stop_word_elt stop_words[RSPAMD_LANGUAGE_MAX];
+	khash_t(rspamd_stopwords_hash) * stop_words_norm;
+	UConverter *uchar_converter;
+	gsize short_text_limit;
+	bool prefer_fasttext;
+	gsize total_occurrences; /* number of all languages found */
+	gpointer fasttext_detector;
+	ref_entry_t ref;
+};
+
+static void
+rspamd_language_detector_ucs_lowercase(UChar32 *s, gsize len)
+{
+	gsize i;
+
+	for (i = 0; i < len; i++) {
+		s[i] = u_tolower(s[i]);
+	}
+}
+
+static gboolean
+rspamd_language_detector_ucs_is_latin(const UChar32 *s, gsize len)
+{
+	gsize i;
+	gboolean ret = TRUE;
+
+	for (i = 0; i < len; i++) {
+		if (s[i] >= 128 || !(g_ascii_isalnum(s[i]) || s[i] == ' ')) {
+			ret = FALSE;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+struct rspamd_language_ucs_elt {
+	guint freq;
+	const gchar *utf;
+	UChar32 s[0];
+};
+
+static void
+rspamd_language_detector_init_ngramm(struct rspamd_config *cfg,
+									 struct rspamd_lang_detector *d,
+									 struct rspamd_language_elt *lelt,
+									 struct rspamd_language_ucs_elt *ucs,
+									 guint len,
+									 guint freq,
+									 guint total,
+									 khash_t(rspamd_trigram_hash) * htb)
+{
+	struct rspamd_ngramm_chain *chain = NULL, st_chain;
+	struct rspamd_ngramm_elt *elt;
+	khiter_t k;
+	guint i;
+	gboolean found;
+
+	switch (len) {
+	case 1:
+	case 2:
+		g_assert_not_reached();
+		break;
+	case 3:
+		k = kh_get(rspamd_trigram_hash, htb, ucs->s);
+		if (k != kh_end(htb)) {
+			chain = &kh_value(htb, k);
+		}
+		break;
+	default:
+		g_assert_not_reached();
+		break;
+	}
+
+	if (chain == NULL) {
+		/* New element */
+		chain = &st_chain;
+		memset(chain, 0, sizeof(st_chain));
+		chain->languages = g_ptr_array_sized_new(32);
+		rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard,
+									  chain->languages);
+		chain->utf = rspamd_mempool_strdup(cfg->cfg_pool, ucs->utf);
+		elt = rspamd_mempool_alloc(cfg->cfg_pool, sizeof(*elt));
+		elt->elt = lelt;
+		elt->prob = ((gdouble) freq) / ((gdouble) total);
+		g_ptr_array_add(chain->languages, elt);
+
+		k = kh_put(rspamd_trigram_hash, htb, ucs->s, &i);
+		kh_value(htb, k) = *chain;
+	}
+	else {
+		/* Check sanity */
+		found = FALSE;
+
+		PTR_ARRAY_FOREACH(chain->languages, i, elt)
+		{
+			if (strcmp(elt->elt->name, lelt->name) == 0) {
+				found = TRUE;
+				elt->prob += ((gdouble) freq) / ((gdouble) total);
+				break;
+			}
+		}
+
+		if (!found) {
+			elt = rspamd_mempool_alloc(cfg->cfg_pool, sizeof(*elt));
+			elt->elt = lelt;
+			elt->prob = ((gdouble) freq) / ((gdouble) total);
+			g_ptr_array_add(chain->languages, elt);
+		}
+	}
+}
+
+static inline enum rspamd_language_category
+rspamd_language_detector_get_category(guint uflags)
+{
+	enum rspamd_language_category cat = RSPAMD_LANGUAGE_LATIN;
+
+	if (uflags & RSPAMD_UNICODE_CYRILLIC) {
+		cat = RSPAMD_LANGUAGE_CYRILLIC;
+	}
+	else if (uflags & RSPAMD_UNICODE_DEVANAGARI) {
+		cat = RSPAMD_LANGUAGE_DEVANAGARI;
+	}
+	else if (uflags & RSPAMD_UNICODE_ARABIC) {
+		cat = RSPAMD_LANGUAGE_ARAB;
+	}
+
+	return cat;
+}
+
+static const gchar *
+rspamd_language_detector_print_flags(struct rspamd_language_elt *elt)
+{
+	static gchar flags_buf[256];
+	goffset r = 0;
+
+	if (elt->flags & RS_LANGUAGE_TIER1) {
+		r += rspamd_snprintf(flags_buf + r, sizeof(flags_buf) - r, "tier1,");
+	}
+	if (elt->flags & RS_LANGUAGE_TIER0) {
+		r += rspamd_snprintf(flags_buf + r, sizeof(flags_buf) - r, "tier0,");
+	}
+	if (elt->flags & RS_LANGUAGE_LATIN) {
+		r += rspamd_snprintf(flags_buf + r, sizeof(flags_buf) - r, "latin,");
+	}
+
+	if (r > 0) {
+		flags_buf[r - 1] = '\0';
+	}
+	else {
+		flags_buf[r] = '\0';
+	}
+
+	return flags_buf;
+}
+
+static gint
+rspamd_language_detector_cmp_ngramm(gconstpointer a, gconstpointer b)
+{
+	struct rspamd_language_ucs_elt *e1 = *(struct rspamd_language_ucs_elt **) a;
+	struct rspamd_language_ucs_elt *e2 = *(struct rspamd_language_ucs_elt **) b;
+
+	return (gint) e2->freq - (gint) e1->freq;
+}
+
+static void
+rspamd_language_detector_read_file(struct rspamd_config *cfg,
+								   struct rspamd_lang_detector *d,
+								   const gchar *path,
+								   const ucl_object_t *stop_words)
+{
+	struct ucl_parser *parser;
+	ucl_object_t *top;
+	const ucl_object_t *freqs, *n_words, *cur, *type, *flags;
+	ucl_object_iter_t it = NULL;
+	UErrorCode uc_err = U_ZERO_ERROR;
+	struct rspamd_language_elt *nelt;
+	struct rspamd_language_ucs_elt *ucs_elt;
+	khash_t(rspamd_trigram_hash) *htb = NULL;
+	gchar *pos;
+	guint total = 0, total_latin = 0, total_ngramms = 0, i, skipped,
+		  loaded;
+	gdouble mean = 0, std = 0, delta = 0, delta2 = 0, m2 = 0;
+	enum rspamd_language_category cat = RSPAMD_LANGUAGE_MAX;
+
+	parser = ucl_parser_new(UCL_PARSER_NO_FILEVARS);
+	if (!ucl_parser_add_file(parser, path)) {
+		msg_warn_config("cannot parse file %s: %s", path,
+						ucl_parser_get_error(parser));
+		ucl_parser_free(parser);
+
+		return;
+	}
+
+	top = ucl_parser_get_object(parser);
+	ucl_parser_free(parser);
+
+	freqs = ucl_object_lookup(top, "freq");
+
+	if (freqs == NULL) {
+		msg_warn_config("file %s has no 'freq' key", path);
+		ucl_object_unref(top);
+
+		return;
+	}
+
+	pos = strrchr(path, '/');
+	g_assert(pos != NULL);
+	nelt = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(*nelt));
+	nelt->name = rspamd_mempool_strdup(cfg->cfg_pool, pos + 1);
+	/* Remove extension */
+	pos = strchr(nelt->name, '.');
+	g_assert(pos != NULL);
+	*pos = '\0';
+
+	n_words = ucl_object_lookup(top, "n_words");
+
+	if (n_words == NULL || ucl_object_type(n_words) != UCL_ARRAY ||
+		n_words->len != 3) {
+		msg_warn_config("cannot find n_words in language %s", nelt->name);
+		ucl_object_unref(top);
+
+		return;
+	}
+	else {
+		nelt->trigrams_words = ucl_object_toint(ucl_array_find_index(n_words,
+																	 2));
+	}
+
+	type = ucl_object_lookup(top, "type");
+
+	if (type == NULL || ucl_object_type(type) != UCL_STRING) {
+		msg_debug_config("cannot find type in language %s", nelt->name);
+		ucl_object_unref(top);
+
+		return;
+	}
+	else {
+		const gchar *stype = ucl_object_tostring(type);
+
+		if (strcmp(stype, "latin") == 0) {
+			cat = RSPAMD_LANGUAGE_LATIN;
+		}
+		else if (strcmp(stype, "cyrillic") == 0) {
+			cat = RSPAMD_LANGUAGE_CYRILLIC;
+		}
+		else if (strcmp(stype, "arab") == 0) {
+			cat = RSPAMD_LANGUAGE_ARAB;
+		}
+		else if (strcmp(stype, "devanagari") == 0) {
+			cat = RSPAMD_LANGUAGE_DEVANAGARI;
+		}
+		else {
+			msg_debug_config("unknown type %s of language %s", stype, nelt->name);
+			ucl_object_unref(top);
+
+			return;
+		}
+	}
+
+	flags = ucl_object_lookup(top, "flags");
+
+	if (flags != NULL && ucl_object_type(flags) == UCL_ARRAY) {
+		ucl_object_iter_t it = NULL;
+		const ucl_object_t *cur;
+
+		while ((cur = ucl_object_iterate(flags, &it, true)) != NULL) {
+			const gchar *fl = ucl_object_tostring(cur);
+
+			if (cur) {
+				if (strcmp(fl, "diacritics") == 0) {
+					nelt->flags |= RS_LANGUAGE_DIACRITICS;
+				}
+				else if (strcmp(fl, "ascii") == 0) {
+					nelt->flags |= RS_LANGUAGE_ASCII;
+				}
+				else {
+					msg_debug_config("unknown flag %s of language %s", fl, nelt->name);
+				}
+			}
+			else {
+				msg_debug_config("unknown flags type of language %s", nelt->name);
+			}
+		}
+	}
+
+	if (stop_words) {
+		const ucl_object_t *specific_stop_words;
+
+		specific_stop_words = ucl_object_lookup(stop_words, nelt->name);
+
+		if (specific_stop_words) {
+			struct sb_stemmer *stem = NULL;
+			it = NULL;
+			const ucl_object_t *w;
+			guint start, stop;
+
+			stem = sb_stemmer_new(nelt->name, "UTF_8");
+			start = rspamd_multipattern_get_npatterns(d->stop_words[cat].mp);
+
+			while ((w = ucl_object_iterate(specific_stop_words, &it, true)) != NULL) {
+				gsize wlen;
+				const char *word = ucl_object_tolstring(w, &wlen);
+				const char *saved;
+				guint mp_flags = RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8;
+
+				if (rspamd_multipattern_has_hyperscan()) {
+					mp_flags |= RSPAMD_MULTIPATTERN_RE;
+				}
+
+				rspamd_multipattern_add_pattern_len(d->stop_words[cat].mp,
+													word, wlen,
+													mp_flags);
+				nelt->stop_words++;
+
+				/* Also lemmatise and store normalised */
+				if (stem) {
+					const char *nw = sb_stemmer_stem(stem, word, wlen);
+
+
+					if (nw) {
+						saved = nw;
+						wlen = strlen(nw);
+					}
+					else {
+						saved = word;
+					}
+				}
+				else {
+					saved = word;
+				}
+
+				if (saved) {
+					gint rc;
+					rspamd_ftok_t *tok;
+					gchar *dst;
+
+					tok = rspamd_mempool_alloc(cfg->cfg_pool,
+											   sizeof(*tok) + wlen + 1);
+					dst = ((gchar *) tok) + sizeof(*tok);
+					rspamd_strlcpy(dst, saved, wlen + 1);
+					tok->begin = dst;
+					tok->len = wlen;
+
+					kh_put(rspamd_stopwords_hash, d->stop_words_norm,
+						   tok, &rc);
+				}
+			}
+
+			if (stem) {
+				sb_stemmer_delete(stem);
+			}
+
+			stop = rspamd_multipattern_get_npatterns(d->stop_words[cat].mp);
+
+			struct rspamd_stop_word_range r;
+
+			r.start = start;
+			r.stop = stop;
+			r.elt = nelt;
+
+			g_array_append_val(d->stop_words[cat].ranges, r);
+			it = NULL;
+		}
+	}
+
+	nelt->category = cat;
+	htb = d->trigrams[cat];
+
+	GPtrArray *ngramms;
+	guint nsym;
+
+	if (rspamd_language_search_str(nelt->name, tier1_langs,
+								   G_N_ELEMENTS(tier1_langs))) {
+		nelt->flags |= RS_LANGUAGE_TIER1;
+	}
+
+	if (rspamd_language_search_str(nelt->name, tier0_langs,
+								   G_N_ELEMENTS(tier0_langs))) {
+		nelt->flags |= RS_LANGUAGE_TIER0;
+	}
+
+	it = NULL;
+	ngramms = g_ptr_array_sized_new(freqs->len);
+	i = 0;
+	skipped = 0;
+	loaded = 0;
+
+	while ((cur = ucl_object_iterate(freqs, &it, true)) != NULL) {
+		const gchar *key;
+		gsize keylen;
+		guint freq;
+
+		key = ucl_object_keyl(cur, &keylen);
+		freq = ucl_object_toint(cur);
+
+		i++;
+		delta = freq - mean;
+		mean += delta / i;
+		delta2 = freq - mean;
+		m2 += delta * delta2;
+
+		if (key != NULL) {
+			UChar32 *cur_ucs;
+			const char *end = key + keylen, *cur_utf = key;
+
+			ucs_elt = rspamd_mempool_alloc(cfg->cfg_pool,
+										   sizeof(*ucs_elt) + (keylen + 1) * sizeof(UChar32));
+
+			cur_ucs = ucs_elt->s;
+			nsym = 0;
+			uc_err = U_ZERO_ERROR;
+
+			while (cur_utf < end) {
+				*cur_ucs++ = ucnv_getNextUChar(d->uchar_converter, &cur_utf,
+											   end, &uc_err);
+				if (!U_SUCCESS(uc_err)) {
+					break;
+				}
+
+				nsym++;
+			}
+
+			if (!U_SUCCESS(uc_err)) {
+				msg_warn_config("cannot convert key %*s to unicode: %s",
+								(gint) keylen, key, u_errorName(uc_err));
+
+				continue;
+			}
+
+			ucs_elt->utf = key;
+			rspamd_language_detector_ucs_lowercase(ucs_elt->s, nsym);
+
+			if (nsym == 3) {
+				g_ptr_array_add(ngramms, ucs_elt);
+			}
+			else {
+				continue;
+			}
+
+			if (rspamd_language_detector_ucs_is_latin(ucs_elt->s, nsym)) {
+				total_latin++;
+			}
+
+			ucs_elt->freq = freq;
+
+			total_ngramms++;
+		}
+	}
+
+	std = sqrt(m2 / (i - 1));
+
+	if (total_latin >= total_ngramms / 3) {
+		nelt->flags |= RS_LANGUAGE_LATIN;
+	}
+
+	nsym = 3;
+
+	total = 0;
+	PTR_ARRAY_FOREACH(ngramms, i, ucs_elt)
+	{
+
+		if (!(nelt->flags & RS_LANGUAGE_LATIN) &&
+			rspamd_language_detector_ucs_is_latin(ucs_elt->s, nsym)) {
+			ucs_elt->freq = 0;
+			/* Skip latin ngramm for non-latin language to avoid garbage */
+			skipped++;
+			continue;
+		}
+
+		/* Now, discriminate low frequency ngramms */
+
+		total += ucs_elt->freq;
+		loaded++;
+	}
+
+	g_ptr_array_sort(ngramms, rspamd_language_detector_cmp_ngramm);
+
+	PTR_ARRAY_FOREACH(ngramms, i, ucs_elt)
+	{
+		if (ucs_elt->freq > 0) {
+			rspamd_language_detector_init_ngramm(cfg, d,
+												 nelt, ucs_elt, nsym,
+												 ucs_elt->freq, total, htb);
+		}
+	}
+
+#ifdef EXTRA_LANGDET_DEBUG
+	/* Useful for debug */
+	for (i = 0; i < 10; i++) {
+		ucs_elt = g_ptr_array_index(ngramms, i);
+
+		msg_debug_lang_det_cfg("%s -> %s: %d", nelt->name,
+							   ucs_elt->utf, ucs_elt->freq);
+	}
+#endif
+
+	g_ptr_array_free(ngramms, TRUE);
+	nelt->mean = mean;
+	nelt->std = std;
+
+	msg_debug_lang_det_cfg("loaded %s language, %d trigrams, "
+						   "%d ngramms loaded; "
+						   "std=%.2f, mean=%.2f, skipped=%d, loaded=%d, stop_words=%d; "
+						   "(%s)",
+						   nelt->name,
+						   (gint) nelt->trigrams_words,
+						   total,
+						   std, mean,
+						   skipped, loaded, nelt->stop_words,
+						   rspamd_language_detector_print_flags(nelt));
+
+	int ret;
+	khiter_t k = kh_put(rspamd_languages_hash, d->languages, nelt->name, &ret);
+	g_assert(ret > 0); /* must be unique */
+	kh_value(d->languages, k) = nelt;
+	ucl_object_unref(top);
+}
+
+static gboolean
+rspamd_ucl_array_find_str(const gchar *str, const ucl_object_t *ar)
+{
+	ucl_object_iter_t it = NULL;
+	const ucl_object_t *cur;
+
+	if (ar == NULL || ar->len == 0) {
+		return FALSE;
+	}
+
+	while ((cur = ucl_object_iterate(ar, &it, true)) != NULL) {
+		if (ucl_object_type(cur) == UCL_STRING && rspamd_strcase_equal(
+													  ucl_object_tostring(cur), str)) {
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+static void
+rspamd_language_detector_process_chain(struct rspamd_config *cfg,
+									   struct rspamd_ngramm_chain *chain)
+{
+	struct rspamd_ngramm_elt *elt;
+	guint i;
+	gdouble delta, mean = 0, delta2, m2 = 0, std;
+
+	if (chain->languages->len > 3) {
+		PTR_ARRAY_FOREACH(chain->languages, i, elt)
+		{
+			delta = elt->prob - mean;
+			mean += delta / (i + 1);
+			delta2 = elt->prob - mean;
+			m2 += delta * delta2;
+		}
+
+		std = sqrt(m2 / (i - 1));
+		chain->mean = mean;
+		chain->std = std;
+
+		/* Now, filter elements that are lower than mean */
+		PTR_ARRAY_FOREACH(chain->languages, i, elt)
+		{
+			if (elt->prob < mean) {
+				g_ptr_array_remove_index_fast(chain->languages, i);
+#ifdef EXTRA_LANGDET_DEBUG
+				msg_debug_lang_det_cfg("remove %s from %s; prob: %.4f; mean: %.4f, std: %.4f",
+									   elt->elt->name, chain->utf, elt->prob, mean, std);
+#endif
+			}
+		}
+	}
+	else {
+		/* We have a unique ngramm, increase its weight */
+		PTR_ARRAY_FOREACH(chain->languages, i, elt)
+		{
+			elt->prob *= 4.0;
+#ifdef EXTRA_LANGDET_DEBUG
+			msg_debug_lang_det_cfg("increase weight of %s in %s; prob: %.4f",
+								   elt->elt->name, chain->utf, elt->prob);
+#endif
+		}
+	}
+}
+
+static void
+rspamd_language_detector_dtor(struct rspamd_lang_detector *d)
+{
+	if (d) {
+		for (guint i = 0; i < RSPAMD_LANGUAGE_MAX; i++) {
+			kh_destroy(rspamd_trigram_hash, d->trigrams[i]);
+			rspamd_multipattern_destroy(d->stop_words[i].mp);
+			g_array_free(d->stop_words[i].ranges, TRUE);
+		}
+
+		if (d->languages) {
+			kh_destroy(rspamd_languages_hash, d->languages);
+		}
+
+		kh_destroy(rspamd_stopwords_hash, d->stop_words_norm);
+		rspamd_lang_detection_fasttext_destroy(d->fasttext_detector);
+	}
+}
+
+struct rspamd_lang_detector *
+rspamd_language_detector_init(struct rspamd_config *cfg)
+{
+	const ucl_object_t *section, *elt, *languages_enable = NULL,
+									   *languages_disable = NULL;
+	const gchar *languages_path = default_languages_path;
+	glob_t gl;
+	size_t i, short_text_limit = default_short_text_limit, total = 0;
+	UErrorCode uc_err = U_ZERO_ERROR;
+	GString *languages_pattern;
+	struct rspamd_ngramm_chain *chain, schain;
+	gchar *fname;
+	struct rspamd_lang_detector *ret = NULL;
+	struct ucl_parser *parser;
+	ucl_object_t *stop_words;
+	bool prefer_fasttext = true;
+
+	section = ucl_object_lookup(cfg->cfg_ucl_obj, "lang_detection");
+
+	if (section != NULL) {
+		elt = ucl_object_lookup(section, "languages");
+
+		if (elt) {
+			languages_path = ucl_object_tostring(elt);
+		}
+
+		elt = ucl_object_lookup(section, "short_text_limit");
+
+		if (elt) {
+			short_text_limit = ucl_object_toint(elt);
+		}
+
+		languages_enable = ucl_object_lookup(section, "languages_enable");
+		languages_disable = ucl_object_lookup(section, "languages_disable");
+
+		elt = ucl_object_lookup(section, "prefer_fasttext");
+		if (elt) {
+			prefer_fasttext = ucl_object_toboolean(elt);
+		}
+	}
+
+	languages_pattern = g_string_sized_new(PATH_MAX);
+	rspamd_printf_gstring(languages_pattern, "%s/stop_words", languages_path);
+	parser = ucl_parser_new(UCL_PARSER_DEFAULT);
+
+	if (ucl_parser_add_file(parser, languages_pattern->str)) {
+		stop_words = ucl_parser_get_object(parser);
+	}
+	else {
+		msg_err_config("cannot read stop words from %s: %s",
+					   languages_pattern->str,
+					   ucl_parser_get_error(parser));
+		stop_words = NULL;
+	}
+
+	ucl_parser_free(parser);
+	languages_pattern->len = 0;
+
+	rspamd_printf_gstring(languages_pattern, "%s/*.json", languages_path);
+	memset(&gl, 0, sizeof(gl));
+
+	if (glob(languages_pattern->str, 0, NULL, &gl) != 0) {
+		msg_err_config("cannot read any files matching %v", languages_pattern);
+		goto end;
+	}
+
+	ret = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(*ret));
+	ret->languages = kh_init(rspamd_languages_hash);
+	kh_resize(rspamd_languages_hash, ret->languages, gl.gl_pathc);
+	ret->uchar_converter = rspamd_get_utf8_converter();
+	ret->short_text_limit = short_text_limit;
+	ret->stop_words_norm = kh_init(rspamd_stopwords_hash);
+	ret->prefer_fasttext = prefer_fasttext;
+
+	/* Map from ngramm in ucs32 to GPtrArray of rspamd_language_elt */
+	for (i = 0; i < RSPAMD_LANGUAGE_MAX; i++) {
+		ret->trigrams[i] = kh_init(rspamd_trigram_hash);
+#ifdef WITH_HYPERSCAN
+		ret->stop_words[i].mp = rspamd_multipattern_create(
+			RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
+			RSPAMD_MULTIPATTERN_RE);
+#else
+		ret->stop_words[i].mp = rspamd_multipattern_create(
+			RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8);
+#endif
+
+		ret->stop_words[i].ranges = g_array_new(FALSE, FALSE,
+												sizeof(struct rspamd_stop_word_range));
+	}
+
+	g_assert(uc_err == U_ZERO_ERROR);
+
+	for (i = 0; i < gl.gl_pathc; i++) {
+		fname = g_path_get_basename(gl.gl_pathv[i]);
+
+		if (!rspamd_ucl_array_find_str(fname, languages_disable) ||
+			(languages_enable == NULL ||
+			 rspamd_ucl_array_find_str(fname, languages_enable))) {
+			rspamd_language_detector_read_file(cfg, ret, gl.gl_pathv[i],
+											   stop_words);
+		}
+		else {
+			msg_info_config("skip language file %s: disabled", fname);
+		}
+
+		g_free(fname);
+	}
+
+	for (i = 0; i < RSPAMD_LANGUAGE_MAX; i++) {
+		GError *err = NULL;
+
+		kh_foreach_value(ret->trigrams[i], schain, {
+			chain = &schain;
+			rspamd_language_detector_process_chain(cfg, chain);
+		});
+
+		if (!rspamd_multipattern_compile(ret->stop_words[i].mp, &err)) {
+			msg_err_config("cannot compile stop words for %z language group: %e",
+						   i, err);
+			g_error_free(err);
+		}
+
+		total += kh_size(ret->trigrams[i]);
+	}
+
+	ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg);
+	char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector);
+
+	msg_info_config("loaded %d languages, "
+					"%d trigrams; %s",
+					(gint) kh_size(ret->languages),
+					(gint) total, fasttext_status);
+	g_free(fasttext_status);
+
+	if (stop_words) {
+		ucl_object_unref(stop_words);
+	}
+
+	REF_INIT_RETAIN(ret, rspamd_language_detector_dtor);
+	rspamd_mempool_add_destructor(cfg->cfg_pool,
+								  (rspamd_mempool_destruct_t) rspamd_language_detector_unref,
+								  ret);
+
+end:
+	if (gl.gl_pathc > 0) {
+		globfree(&gl);
+	}
+
+	g_string_free(languages_pattern, TRUE);
+
+	return ret;
+}
+
+static void
+rspamd_language_detector_random_select(GArray *ucs_tokens, guint nwords,
+									   goffset *offsets_out,
+									   guint64 *seed)
+{
+	guint step_len, remainder, i, out_idx;
+	guint64 coin, sel;
+	rspamd_stat_token_t *tok;
+
+	g_assert(nwords != 0);
+	g_assert(offsets_out != NULL);
+	g_assert(ucs_tokens->len >= nwords);
+	/*
+	 * We split input array into `nwords` parts. For each part we randomly select
+	 * an element from this particular split. Here is an example:
+	 *
+	 * nwords=2, input_len=5
+	 *
+	 * w1 w2 w3   w4 w5
+	 * ^          ^
+	 * part1      part2
+	 *  vv         vv
+	 *  w2         w5
+	 *
+	 * So we have 2 output words from 5 input words selected randomly within
+	 * their splits. It is not uniform distribution but it seems to be better
+	 * to include words from different text parts
+	 */
+	step_len = ucs_tokens->len / nwords;
+	remainder = ucs_tokens->len % nwords;
+
+	out_idx = 0;
+	coin = rspamd_random_uint64_fast_seed(seed);
+	sel = coin % (step_len + remainder);
+	offsets_out[out_idx] = sel;
+
+	for (i = step_len + remainder; i < ucs_tokens->len;
+		 i += step_len, out_idx++) {
+		guint ntries = 0;
+		coin = rspamd_random_uint64_fast_seed(seed);
+		sel = (coin % step_len) + i;
+
+		for (;;) {
+			tok = &g_array_index(ucs_tokens, rspamd_stat_token_t, sel);
+			/* Filter bad tokens */
+
+			if (tok->unicode.len >= 2 &&
+				!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_EXCEPTION) &&
+				u_isalpha(tok->unicode.begin[0]) &&
+				u_isalpha(tok->unicode.begin[tok->unicode.len - 1])) {
+				offsets_out[out_idx] = sel;
+				break;
+			}
+			else {
+				ntries++;
+				coin = rspamd_random_uint64_fast_seed(seed);
+
+				if (ntries < step_len) {
+					sel = (coin % step_len) + i;
+				}
+				else if (ntries < ucs_tokens->len) {
+					sel = coin % ucs_tokens->len;
+				}
+				else {
+					offsets_out[out_idx] = sel;
+					break;
+				}
+			}
+		}
+	}
+
+	/*
+	 * Fisher-Yates algorithm:
+	 * for i from 0 to n−2 do
+	 *   j ← random integer such that i ≤ j < n
+	 *   exchange a[i] and a[j]
+	 */
+#if 0
+	if (out_idx > 2) {
+		for (i = 0; i < out_idx - 2; i++) {
+			coin = rspamd_random_uint64_fast ();
+			sel = (coin % (out_idx - i)) + i;
+			/* swap */
+			tmp = offsets_out[i];
+			offsets_out[i] = offsets_out[sel];
+			offsets_out[sel] = tmp;
+		}
+	}
+#endif
+}
+
+static goffset
+rspamd_language_detector_next_ngramm(rspamd_stat_token_t *tok, UChar32 *window,
+									 guint wlen, goffset cur_off)
+{
+	guint i;
+
+	if (wlen > 1) {
+		/* Deal with spaces at the beginning and ending */
+
+		if (cur_off == 0) {
+			window[0] = (UChar32) ' ';
+
+			for (i = 0; i < wlen - 1; i++) {
+				window[i + 1] = tok->unicode.begin[i];
+			}
+		}
+		else if (cur_off + wlen == tok->unicode.len + 1) {
+			/* Add trailing space */
+			for (i = 0; i < wlen - 1; i++) {
+				window[i] = tok->unicode.begin[cur_off + i];
+			}
+			window[wlen - 1] = (UChar32) ' ';
+		}
+		else if (cur_off + wlen > tok->unicode.len + 1) {
+			/* No more fun */
+			return -1;
+		}
+		else {
+			/* Normal case */
+			for (i = 0; i < wlen; i++) {
+				window[i] = tok->unicode.begin[cur_off + i];
+			}
+		}
+	}
+	else {
+		if (tok->normalized.len <= cur_off) {
+			return -1;
+		}
+
+		window[0] = tok->unicode.begin[cur_off];
+	}
+
+	return cur_off + 1;
+}
+
+/*
+ * Do full guess for a specific ngramm, checking all languages defined
+ */
+static void
+rspamd_language_detector_process_ngramm_full(struct rspamd_task *task,
+											 struct rspamd_lang_detector *d,
+											 UChar32 *window,
+											 khash_t(rspamd_candidates_hash) * candidates,
+											 khash_t(rspamd_trigram_hash) * trigrams)
+{
+	guint i;
+	gint ret;
+	struct rspamd_ngramm_chain *chain = NULL;
+	struct rspamd_ngramm_elt *elt;
+	struct rspamd_lang_detector_res *cand;
+	khiter_t k;
+	gdouble prob;
+
+	k = kh_get(rspamd_trigram_hash, trigrams, window);
+	if (k != kh_end(trigrams)) {
+		chain = &kh_value(trigrams, k);
+	}
+
+	if (chain) {
+		PTR_ARRAY_FOREACH(chain->languages, i, elt)
+		{
+			prob = elt->prob;
+
+			if (prob < chain->mean) {
+				continue;
+			}
+
+			k = kh_get(rspamd_candidates_hash, candidates, elt->elt->name);
+			if (k != kh_end(candidates)) {
+				cand = kh_value(candidates, k);
+			}
+			else {
+				cand = NULL;
+			}
+
+#ifdef NGRAMMS_DEBUG
+			msg_err("gramm: %s, lang: %s, prob: %.3f", chain->utf,
+					elt->elt->name, log2(elt->prob));
+#endif
+			if (cand == NULL) {
+				cand = rspamd_mempool_alloc(task->task_pool, sizeof(*cand));
+				cand->elt = elt->elt;
+				cand->lang = elt->elt->name;
+				cand->prob = prob;
+
+				k = kh_put(rspamd_candidates_hash, candidates, elt->elt->name,
+						   &ret);
+				kh_value(candidates, k) = cand;
+			}
+			else {
+				/* Update guess */
+				cand->prob += prob;
+			}
+		}
+	}
+}
+
+static void
+rspamd_language_detector_detect_word(struct rspamd_task *task,
+									 struct rspamd_lang_detector *d,
+									 rspamd_stat_token_t *tok,
+									 khash_t(rspamd_candidates_hash) * candidates,
+									 khash_t(rspamd_trigram_hash) * trigrams)
+{
+	const guint wlen = 3;
+	UChar32 window[3];
+	goffset cur = 0;
+
+	/* Split words */
+	while ((cur = rspamd_language_detector_next_ngramm(tok, window, wlen, cur)) != -1) {
+		rspamd_language_detector_process_ngramm_full(task,
+													 d, window, candidates, trigrams);
+	}
+}
+
+static const gdouble cutoff_limit = -8.0;
+/*
+ * Converts frequencies to log probabilities, filter those candidates who
+ * has the lowest probabilities
+ */
+
+static inline void
+rspamd_language_detector_filter_step1(struct rspamd_task *task,
+									  struct rspamd_lang_detector_res *cand,
+									  gdouble *max_prob, guint *filtered)
+{
+	if (!isnan(cand->prob)) {
+		if (cand->prob == 0) {
+			cand->prob = NAN;
+			msg_debug_lang_det(
+				"exclude language %s",
+				cand->lang);
+			(*filtered)++;
+		}
+		else {
+			cand->prob = log2(cand->prob);
+			if (cand->prob < cutoff_limit) {
+				msg_debug_lang_det(
+					"exclude language %s: %.3f, cutoff limit: %.3f",
+					cand->lang, cand->prob, cutoff_limit);
+				cand->prob = NAN;
+				(*filtered)++;
+			}
+			else if (cand->prob > *max_prob) {
+				*max_prob = cand->prob;
+			}
+		}
+	}
+}
+
+static inline void
+rspamd_language_detector_filter_step2(struct rspamd_task *task,
+									  struct rspamd_lang_detector_res *cand,
+									  gdouble max_prob, guint *filtered)
+{
+	/*
+		 * Probabilities are logarithmic, so if prob1 - prob2 > 4, it means that
+		 * prob2 is 2^4 less than prob1
+		 */
+	if (!isnan(cand->prob) && max_prob - cand->prob > 1) {
+		msg_debug_lang_det("exclude language %s: %.3f (%.3f max)",
+						   cand->lang, cand->prob, max_prob);
+		cand->prob = NAN;
+		(*filtered)++;
+	}
+}
+
+static void
+rspamd_language_detector_filter_negligible(struct rspamd_task *task,
+										   khash_t(rspamd_candidates_hash) * candidates)
+{
+	struct rspamd_lang_detector_res *cand;
+	guint filtered = 0;
+	gdouble max_prob = -(G_MAXDOUBLE);
+
+	kh_foreach_value(candidates, cand,
+					 rspamd_language_detector_filter_step1(task, cand, &max_prob, &filtered));
+	kh_foreach_value(candidates, cand,
+					 rspamd_language_detector_filter_step2(task, cand, max_prob, &filtered));
+
+	msg_debug_lang_det("removed %d languages", filtered);
+}
+
+static void
+rspamd_language_detector_detect_type(struct rspamd_task *task,
+									 guint nwords,
+									 struct rspamd_lang_detector *d,
+									 GArray *words,
+									 enum rspamd_language_category cat,
+									 khash_t(rspamd_candidates_hash) * candidates,
+									 struct rspamd_mime_text_part *part)
+{
+	guint nparts = MIN(words->len, nwords);
+	goffset *selected_words;
+	rspamd_stat_token_t *tok;
+	guint i;
+	guint64 seed;
+
+	/* Seed PRNG with part digest to provide some sort of determinism */
+	memcpy(&seed, part->mime_part->digest, sizeof(seed));
+	selected_words = g_new0(goffset, nparts);
+	rspamd_language_detector_random_select(words, nparts, selected_words, &seed);
+	msg_debug_lang_det("randomly selected %d words", nparts);
+
+	for (i = 0; i < nparts; i++) {
+		tok = &g_array_index(words, rspamd_stat_token_t,
+							 selected_words[i]);
+
+		if (tok->unicode.len >= 3) {
+			rspamd_language_detector_detect_word(task, d, tok, candidates,
+												 d->trigrams[cat]);
+		}
+	}
+
+	/* Filter negligible candidates */
+	rspamd_language_detector_filter_negligible(task, candidates);
+	g_free(selected_words);
+}
+
+static gint
+rspamd_language_detector_cmp(gconstpointer a, gconstpointer b)
+{
+	const struct rspamd_lang_detector_res
+		*canda = *(const struct rspamd_lang_detector_res **) a,
+		*candb = *(const struct rspamd_lang_detector_res **) b;
+
+	if (canda->prob > candb->prob) {
+		return -1;
+	}
+	else if (candb->prob > canda->prob) {
+		return 1;
+	}
+
+	return 0;
+}
+
+enum rspamd_language_detected_type {
+	rs_detect_none = 0,
+	rs_detect_single,
+	rs_detect_multiple,
+};
+
+static enum rspamd_language_detected_type
+rspamd_language_detector_try_ngramm(struct rspamd_task *task,
+									guint nwords,
+									struct rspamd_lang_detector *d,
+									GArray *ucs_tokens,
+									enum rspamd_language_category cat,
+									khash_t(rspamd_candidates_hash) * candidates,
+									struct rspamd_mime_text_part *part)
+{
+	guint cand_len = 0;
+	struct rspamd_lang_detector_res *cand;
+
+	rspamd_language_detector_detect_type(task,
+										 nwords,
+										 d,
+										 ucs_tokens,
+										 cat,
+										 candidates,
+										 part);
+
+	kh_foreach_value(candidates, cand, {
+		if (!isnan(cand->prob)) {
+			cand_len++;
+		}
+	});
+
+	if (cand_len == 0) {
+		return rs_detect_none;
+	}
+	else if (cand_len == 1) {
+		return rs_detect_single;
+	}
+
+	return rs_detect_multiple;
+}
+
+enum rspamd_language_sort_flags {
+	RSPAMD_LANG_FLAG_DEFAULT = 0,
+	RSPAMD_LANG_FLAG_SHORT = 1 << 0,
+};
+
+struct rspamd_frequency_sort_cbdata {
+	struct rspamd_lang_detector *d;
+	enum rspamd_language_sort_flags flags;
+	gdouble std;
+	gdouble mean;
+};
+
+static const gdouble tier0_adjustment = 1.2;
+static const gdouble tier1_adjustment = 0.8;
+static const gdouble frequency_adjustment = 0.8;
+
+static gint
+rspamd_language_detector_cmp_heuristic(gconstpointer a, gconstpointer b,
+									   gpointer ud)
+{
+	struct rspamd_frequency_sort_cbdata *cbd = ud;
+	struct rspamd_lang_detector_res
+		*canda = *(struct rspamd_lang_detector_res **) a,
+		*candb = *(struct rspamd_lang_detector_res **) b;
+	gdouble adj;
+	gdouble proba_adjusted, probb_adjusted, freqa, freqb;
+
+	if (cbd->d->total_occurrences == 0) {
+		/* Not enough data, compare directly */
+		return rspamd_language_detector_cmp(a, b);
+	}
+
+	freqa = ((gdouble) canda->elt->occurrences) /
+			(gdouble) cbd->d->total_occurrences;
+	freqb = ((gdouble) candb->elt->occurrences) /
+			(gdouble) cbd->d->total_occurrences;
+
+	proba_adjusted = canda->prob;
+	probb_adjusted = candb->prob;
+
+	if (isnormal(freqa) && isnormal(freqb)) {
+		proba_adjusted += cbd->std * (frequency_adjustment * freqa);
+		probb_adjusted += cbd->std * (frequency_adjustment * freqb);
+	}
+
+	if (cbd->flags & RSPAMD_LANG_FLAG_SHORT) {
+		adj = tier1_adjustment * 2.0;
+	}
+	else {
+		adj = tier1_adjustment;
+	}
+	if (canda->elt->flags & RS_LANGUAGE_TIER1) {
+		proba_adjusted += cbd->std * adj;
+	}
+
+	if (candb->elt->flags & RS_LANGUAGE_TIER1) {
+		probb_adjusted += cbd->std * adj;
+	}
+
+	if (cbd->flags & RSPAMD_LANG_FLAG_SHORT) {
+		adj = tier0_adjustment * 16.0;
+	}
+	else {
+		adj = tier0_adjustment;
+	}
+
+	if (canda->elt->flags & RS_LANGUAGE_TIER0) {
+		proba_adjusted += cbd->std * adj;
+	}
+
+	if (candb->elt->flags & RS_LANGUAGE_TIER0) {
+		probb_adjusted += cbd->std * adj;
+	}
+
+	/* Hack: adjust probability directly */
+	canda->prob = proba_adjusted;
+	candb->prob = probb_adjusted;
+
+	if (proba_adjusted > probb_adjusted) {
+		return -1;
+	}
+	else if (probb_adjusted > proba_adjusted) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static void
+rspamd_language_detector_unicode_scripts(struct rspamd_task *task,
+										 struct rspamd_mime_text_part *part,
+										 guint *pchinese,
+										 guint *pspecial)
+{
+	const gchar *p = part->utf_stripped_content->data, *end;
+	guint i = 0, cnt = 0;
+	end = p + part->utf_stripped_content->len;
+	gint32 uc, sc;
+	guint nlatin = 0, nchinese = 0, nspecial = 0;
+	const guint cutoff_limit = 32;
+
+	while (p + i < end) {
+		U8_NEXT(p, i, part->utf_stripped_content->len, uc);
+
+		if (((gint32) uc) < 0) {
+			break;
+		}
+
+		if (u_isalpha(uc)) {
+			sc = ublock_getCode(uc);
+			cnt++;
+
+			switch (sc) {
+			case UBLOCK_BASIC_LATIN:
+			case UBLOCK_LATIN_1_SUPPLEMENT:
+				part->unicode_scripts |= RSPAMD_UNICODE_LATIN;
+				nlatin++;
+				break;
+			case UBLOCK_HEBREW:
+				part->unicode_scripts |= RSPAMD_UNICODE_HEBREW;
+				nspecial++;
+				break;
+			case UBLOCK_GREEK:
+				part->unicode_scripts |= RSPAMD_UNICODE_GREEK;
+				nspecial++;
+				break;
+			case UBLOCK_CYRILLIC:
+				part->unicode_scripts |= RSPAMD_UNICODE_CYRILLIC;
+				nspecial++;
+				break;
+			case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
+			case UBLOCK_CJK_COMPATIBILITY:
+			case UBLOCK_CJK_RADICALS_SUPPLEMENT:
+			case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
+			case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B:
+				part->unicode_scripts |= RSPAMD_UNICODE_CJK;
+				nchinese++;
+				break;
+			case UBLOCK_HIRAGANA:
+			case UBLOCK_KATAKANA:
+				part->unicode_scripts |= RSPAMD_UNICODE_JP;
+				nspecial++;
+				break;
+			case UBLOCK_HANGUL_JAMO:
+			case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
+				part->unicode_scripts |= RSPAMD_UNICODE_HANGUL;
+				nspecial++;
+				break;
+			case UBLOCK_ARABIC:
+				part->unicode_scripts |= RSPAMD_UNICODE_ARABIC;
+				nspecial++;
+				break;
+			case UBLOCK_DEVANAGARI:
+				part->unicode_scripts |= RSPAMD_UNICODE_DEVANAGARI;
+				nspecial++;
+				break;
+			case UBLOCK_ARMENIAN:
+				part->unicode_scripts |= RSPAMD_UNICODE_ARMENIAN;
+				nspecial++;
+				break;
+			case UBLOCK_GEORGIAN:
+				part->unicode_scripts |= RSPAMD_UNICODE_GEORGIAN;
+				nspecial++;
+				break;
+			case UBLOCK_GUJARATI:
+				part->unicode_scripts |= RSPAMD_UNICODE_GUJARATI;
+				nspecial++;
+				break;
+			case UBLOCK_TELUGU:
+				part->unicode_scripts |= RSPAMD_UNICODE_TELUGU;
+				nspecial++;
+				break;
+			case UBLOCK_TAMIL:
+				part->unicode_scripts |= RSPAMD_UNICODE_TAMIL;
+				nspecial++;
+				break;
+			case UBLOCK_THAI:
+				part->unicode_scripts |= RSPAMD_UNICODE_THAI;
+				nspecial++;
+				break;
+			case RSPAMD_UNICODE_MALAYALAM:
+				part->unicode_scripts |= RSPAMD_UNICODE_MALAYALAM;
+				nspecial++;
+				break;
+			case RSPAMD_UNICODE_SINHALA:
+				part->unicode_scripts |= RSPAMD_UNICODE_SINHALA;
+				nspecial++;
+				break;
+			}
+		}
+
+		if (nspecial > cutoff_limit && nspecial > nlatin) {
+			break;
+		}
+		else if (nchinese > cutoff_limit && nchinese > nlatin) {
+			if (nspecial > 0) {
+				/* Likely japanese */
+				break;
+			}
+		}
+	}
+
+	msg_debug_lang_det("stop after checking %d characters, "
+					   "%d latin, %d special, %d chinese",
+					   cnt, nlatin, nspecial, nchinese);
+
+	*pchinese = nchinese;
+	*pspecial = nspecial;
+}
+
+static inline void
+rspamd_language_detector_set_language(struct rspamd_task *task,
+									  struct rspamd_mime_text_part *part,
+									  const gchar *code,
+									  struct rspamd_language_elt *elt)
+{
+	struct rspamd_lang_detector_res *r;
+
+	r = rspamd_mempool_alloc0(task->task_pool, sizeof(*r));
+	r->prob = 1.0;
+	r->lang = code;
+	r->elt = elt;
+
+	if (part->languages == NULL) {
+		part->languages = g_ptr_array_sized_new(1);
+	}
+
+	g_ptr_array_add(part->languages, r);
+	part->language = code;
+}
+
+static gboolean
+rspamd_language_detector_try_uniscript(struct rspamd_task *task,
+									   struct rspamd_mime_text_part *part,
+									   guint nchinese,
+									   guint nspecial)
+{
+	guint i;
+
+	for (i = 0; i < G_N_ELEMENTS(unicode_langs); i++) {
+		if (unicode_langs[i].unicode_code & part->unicode_scripts) {
+
+			if (unicode_langs[i].unicode_code != RSPAMD_UNICODE_JP) {
+				msg_debug_lang_det("set language based on unicode script %s",
+								   unicode_langs[i].lang);
+				rspamd_language_detector_set_language(task, part,
+													  unicode_langs[i].lang, NULL);
+
+				return TRUE;
+			}
+			else {
+				/* Japanese <-> Chinese guess */
+
+				/*
+				 * Typically there might be around 0-70% of kanji glyphs
+				 * and the rest are Haragana/Katakana
+				 *
+				 * If we discover that Kanji is more than 80% then we consider
+				 * it Chinese
+				 */
+				if (nchinese <= 5 || nchinese < nspecial * 5) {
+					msg_debug_lang_det("set language based on unicode script %s",
+									   unicode_langs[i].lang);
+					rspamd_language_detector_set_language(task, part,
+														  unicode_langs[i].lang, NULL);
+
+					return TRUE;
+				}
+			}
+		}
+	}
+
+	if (part->unicode_scripts & RSPAMD_UNICODE_CJK) {
+		msg_debug_lang_det("guess chinese based on CJK characters: %d chinese, %d special",
+						   nchinese, nspecial);
+		rspamd_language_detector_set_language(task, part,
+											  "zh-CN", NULL);
+
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+static guint
+rspamd_langelt_hash_func(gconstpointer key)
+{
+	const struct rspamd_language_elt *elt = (const struct rspamd_language_elt *) key;
+	return rspamd_cryptobox_fast_hash(elt->name, strlen(elt->name),
+									  rspamd_hash_seed());
+}
+
+static gboolean
+rspamd_langelt_equal_func(gconstpointer v, gconstpointer v2)
+{
+	const struct rspamd_language_elt *elt1 = (const struct rspamd_language_elt *) v,
+									 *elt2 = (const struct rspamd_language_elt *) v2;
+	return strcmp(elt1->name, elt2->name) == 0;
+}
+
+/* This hash set stores a word index in the language to avoid duplicate stop words */
+KHASH_INIT(rspamd_sw_res_set, int, char, 0, kh_int_hash_func, kh_int_hash_equal);
+
+KHASH_INIT(rspamd_sw_hash, struct rspamd_language_elt *, khash_t(rspamd_sw_res_set) *, 1,
+		   rspamd_langelt_hash_func, rspamd_langelt_equal_func);
+
+struct rspamd_sw_cbdata {
+	struct rspamd_task *task;
+	khash_t(rspamd_sw_hash) * res;
+	GArray *ranges;
+};
+
+static gint
+rspamd_ranges_cmp(const void *k, const void *memb)
+{
+	gint pos = GPOINTER_TO_INT(k);
+	const struct rspamd_stop_word_range *r = (struct rspamd_stop_word_range *) memb;
+
+	if (pos >= r->start && pos < r->stop) {
+		return 0;
+	}
+	else if (pos < r->start) {
+		return -1;
+	}
+
+	return 1;
+}
+
+static gint
+rspamd_language_detector_sw_cb(struct rspamd_multipattern *mp,
+							   guint strnum,
+							   gint match_start,
+							   gint match_pos,
+							   const gchar *text,
+							   gsize len,
+							   void *context)
+{
+	/* Check if boundary */
+	const gchar *prev = text, *next = text + len;
+	struct rspamd_stop_word_range *r;
+	struct rspamd_sw_cbdata *cbdata = (struct rspamd_sw_cbdata *) context;
+	khiter_t k;
+	static const gsize max_stop_words = 80;
+	struct rspamd_task *task;
+
+	if (match_start > 0) {
+		prev = text + match_start - 1;
+
+		if (!(g_ascii_isspace(*prev) || g_ascii_ispunct(*prev))) {
+			return 0;
+		}
+	}
+
+	if (match_pos < len) {
+		next = text + match_pos;
+
+		if (!(g_ascii_isspace(*next) || g_ascii_ispunct(*next))) {
+			return 0;
+		}
+	}
+
+	/* We have a word on the boundary, check range */
+	task = cbdata->task;
+	r = bsearch(GINT_TO_POINTER(strnum), cbdata->ranges->data,
+				cbdata->ranges->len, sizeof(*r), rspamd_ranges_cmp);
+
+	g_assert(r != NULL);
+
+	k = kh_get(rspamd_sw_hash, cbdata->res, r->elt);
+	gint nwords = 1;
+
+	if (k != kh_end(cbdata->res)) {
+		khiter_t set_k;
+		int tt;
+
+		set_k = kh_get(rspamd_sw_res_set, kh_value(cbdata->res, k), strnum);
+		nwords = kh_size(kh_value(cbdata->res, k));
+
+		if (set_k == kh_end(kh_value(cbdata->res, k))) {
+			/* New word */
+			set_k = kh_put(rspamd_sw_res_set, kh_value(cbdata->res, k), strnum, &tt);
+			msg_debug_lang_det("found new word %*s from %s language (%d stop words found so far)",
+							   (int) (next - prev - 1), prev + 1, r->elt->name, nwords);
+		}
+
+		if (nwords > max_stop_words) {
+			return 1;
+		}
+	}
+	else {
+		gint tt;
+
+		k = kh_put(rspamd_sw_hash, cbdata->res, r->elt, &tt);
+		kh_value(cbdata->res, k) = kh_init(rspamd_sw_res_set);
+		kh_put(rspamd_sw_res_set, kh_value(cbdata->res, k), strnum, &tt);
+
+		msg_debug_lang_det("found new word %*s from %s language (%d stop words found so far)",
+						   (int) (next - prev - 1), prev + 1, r->elt->name, nwords);
+	}
+
+	return 0;
+}
+
+static gboolean
+rspamd_language_detector_try_stop_words(struct rspamd_task *task,
+										struct rspamd_lang_detector *d,
+										struct rspamd_mime_text_part *part,
+										enum rspamd_language_category cat)
+{
+	struct rspamd_stop_word_elt *elt;
+	struct rspamd_sw_cbdata cbdata;
+	gboolean ret = FALSE;
+	static const int stop_words_threshold = 4, /* minimum stop words count */
+		strong_confidence_threshold = 10 /* we are sure that this is enough */;
+
+	elt = &d->stop_words[cat];
+	cbdata.res = kh_init(rspamd_sw_hash);
+	cbdata.ranges = elt->ranges;
+	cbdata.task = task;
+
+	rspamd_multipattern_lookup(elt->mp, part->utf_stripped_content->data,
+							   part->utf_stripped_content->len, rspamd_language_detector_sw_cb,
+							   &cbdata, NULL);
+
+	if (kh_size(cbdata.res) > 0) {
+		khash_t(rspamd_sw_res_set) * cur_res;
+		double max_rate = G_MINDOUBLE;
+		struct rspamd_language_elt *cur_lang, *sel = NULL;
+		gboolean ignore_ascii = FALSE, ignore_latin = FALSE;
+
+	again:
+		kh_foreach(cbdata.res, cur_lang, cur_res, {
+			int cur_matches = kh_size(cur_res);
+
+			if (!ignore_ascii && (cur_lang->flags & RS_LANGUAGE_DIACRITICS)) {
+				/* Restart matches */
+				ignore_ascii = TRUE;
+				sel = NULL;
+				max_rate = G_MINDOUBLE;
+				msg_debug_lang_det("ignore ascii after finding %d stop words from %s",
+								   cur_matches, cur_lang->name);
+				goto again;
+			}
+
+			if (!ignore_latin && cur_lang->category != RSPAMD_LANGUAGE_LATIN) {
+				/* Restart matches */
+				ignore_latin = TRUE;
+				sel = NULL;
+				max_rate = G_MINDOUBLE;
+				msg_debug_lang_det("ignore latin after finding stop %d words from %s",
+								   cur_matches, cur_lang->name);
+				goto again;
+			}
+
+			if (cur_matches < stop_words_threshold) {
+				continue;
+			}
+
+			if (cur_matches < strong_confidence_threshold) {
+				/* Ignore mixed languages when not enough confidence */
+				if (ignore_ascii && (cur_lang->flags & RS_LANGUAGE_ASCII)) {
+					continue;
+				}
+
+				if (ignore_latin && cur_lang->category == RSPAMD_LANGUAGE_LATIN) {
+					continue;
+				}
+			}
+
+			double rate = (double) cur_matches / (double) cur_lang->stop_words;
+
+			if (rate > max_rate) {
+				max_rate = rate;
+				sel = cur_lang;
+			}
+
+			msg_debug_lang_det("found %d stop words from %s: %3f rate",
+							   cur_matches, cur_lang->name, rate);
+		});
+
+		/* Cleanup */
+		kh_foreach(cbdata.res, cur_lang, cur_res, {
+			kh_destroy(rspamd_sw_res_set, cur_res);
+		});
+
+		if (max_rate > 0 && sel) {
+			msg_debug_lang_det("set language based on stop words script %s, %.3f found",
+							   sel->name, max_rate);
+			rspamd_language_detector_set_language(task, part,
+												  sel->name, sel);
+
+			ret = TRUE;
+		}
+	}
+	else {
+		msg_debug_lang_det("found no stop words in a text");
+	}
+
+	kh_destroy(rspamd_sw_hash, cbdata.res);
+
+	return ret;
+}
+
+gboolean
+rspamd_language_detector_detect(struct rspamd_task *task,
+								struct rspamd_lang_detector *d,
+								struct rspamd_mime_text_part *part)
+{
+	khash_t(rspamd_candidates_hash) * candidates;
+	GPtrArray *result;
+	gdouble mean, std, start_ticks, end_ticks;
+	guint cand_len;
+	enum rspamd_language_category cat;
+	struct rspamd_lang_detector_res *cand;
+	enum rspamd_language_detected_type r;
+	struct rspamd_frequency_sort_cbdata cbd;
+	/* Check if we have sorted candidates based on frequency */
+	gboolean frequency_heuristic_applied = FALSE, ret = FALSE;
+
+	if (!part->utf_stripped_content) {
+		return FALSE;
+	}
+
+	start_ticks = rspamd_get_ticks(TRUE);
+
+	guint nchinese = 0, nspecial = 0;
+	rspamd_language_detector_unicode_scripts(task, part, &nchinese, &nspecial);
+
+	/* Disable internal language detection heuristics if we have fasttext */
+	if (!rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector) || !d->prefer_fasttext) {
+		/* Apply unicode scripts heuristic */
+		if (rspamd_language_detector_try_uniscript(task, part, nchinese, nspecial)) {
+			ret = TRUE;
+		}
+
+		cat = rspamd_language_detector_get_category(part->unicode_scripts);
+
+		if (!ret && rspamd_language_detector_try_stop_words(task, d, part, cat)) {
+			ret = TRUE;
+		}
+	}
+
+	if (!ret) {
+		unsigned ndetected = 0;
+		if (rspamd_lang_detection_fasttext_is_enabled(d->fasttext_detector)) {
+			rspamd_fasttext_predict_result_t fasttext_predict_result =
+				rspamd_lang_detection_fasttext_detect(d->fasttext_detector, task,
+													  part->utf_words, 4);
+
+			ndetected = rspamd_lang_detection_fasttext_get_nlangs(fasttext_predict_result);
+
+			if (ndetected > 0) {
+				candidates = kh_init(rspamd_candidates_hash);
+				kh_resize(rspamd_candidates_hash, candidates, ndetected);
+
+				/* Now fill all results where probability is above threshold */
+				float max_prob = rspamd_lang_detection_fasttext_get_prob(fasttext_predict_result, 0);
+
+				for (unsigned int i = 0; i < ndetected; i++) {
+					float prob = rspamd_lang_detection_fasttext_get_prob(fasttext_predict_result, i);
+					if (prob > max_prob * 0.75) {
+						char *lang = rspamd_mempool_strdup(task->task_pool,
+														   rspamd_lang_detection_fasttext_get_lang(fasttext_predict_result, i));
+						int tmp;
+						khiter_t k = kh_put(rspamd_candidates_hash, candidates, lang, &tmp);
+
+						kh_value(candidates, k) = rspamd_mempool_alloc0(task->task_pool, sizeof(*cand));
+						cand = kh_value(candidates, k);
+						cand->lang = lang;
+						cand->prob = rspamd_lang_detection_fasttext_get_prob(fasttext_predict_result, i);
+
+						/* Find the corresponding language elt */
+						k = kh_get(rspamd_languages_hash, d->languages, lang);
+						if (k != kh_end(d->languages)) {
+							cand->elt = kh_value(d->languages, k);
+						}
+					}
+				}
+
+				if (kh_size(candidates) == 1) {
+					r = rs_detect_single;
+				}
+				else if (kh_size(candidates) > 1) {
+					r = rs_detect_multiple;
+				}
+				else {
+					r = rs_detect_none;
+				}
+			}
+
+			rspamd_fasttext_predict_result_destroy(fasttext_predict_result);
+		}
+		if (ndetected == 0) {
+			if (part->utf_words->len < default_short_text_limit) {
+				r = rs_detect_none;
+				msg_debug_lang_det("text is too short for trigrams detection: "
+								   "%d words; at least %d words required",
+								   (int) part->utf_words->len,
+								   (int) default_short_text_limit);
+				switch (cat) {
+				case RSPAMD_LANGUAGE_CYRILLIC:
+					rspamd_language_detector_set_language(task, part, "ru", NULL);
+					break;
+				case RSPAMD_LANGUAGE_DEVANAGARI:
+					rspamd_language_detector_set_language(task, part, "hi", NULL);
+					break;
+				case RSPAMD_LANGUAGE_ARAB:
+					rspamd_language_detector_set_language(task, part, "ar", NULL);
+					break;
+				default:
+				case RSPAMD_LANGUAGE_LATIN:
+					rspamd_language_detector_set_language(task, part, "en", NULL);
+					break;
+				}
+				msg_debug_lang_det("set %s language based on symbols category",
+								   part->language);
+
+				candidates = kh_init(rspamd_candidates_hash);
+			}
+			else {
+				candidates = kh_init(rspamd_candidates_hash);
+				kh_resize(rspamd_candidates_hash, candidates, 32);
+
+				r = rspamd_language_detector_try_ngramm(task,
+														default_words,
+														d,
+														part->utf_words,
+														cat,
+														candidates,
+														part);
+
+				if (r == rs_detect_none) {
+					msg_debug_lang_det("no trigrams found, fallback to english");
+					rspamd_language_detector_set_language(task, part, "en", NULL);
+				}
+				else if (r == rs_detect_multiple) {
+					/* Check our guess */
+
+					mean = 0.0;
+					std = 0.0;
+					cand_len = 0;
+
+					/* Check distribution */
+					kh_foreach_value(candidates, cand, {
+						if (!isnan(cand->prob)) {
+							mean += cand->prob;
+							cand_len++;
+						}
+					});
+
+					if (cand_len > 0) {
+						mean /= cand_len;
+
+						kh_foreach_value(candidates, cand, {
+							gdouble err;
+							if (!isnan(cand->prob)) {
+								err = cand->prob - mean;
+								std += fabs(err);
+							}
+						});
+
+						std /= cand_len;
+					}
+
+					msg_debug_lang_det("trigrams checked, %d candidates, %.3f mean, %.4f stddev",
+									   cand_len, mean, std);
+
+					if (cand_len > 0 && std / fabs(mean) < 0.25) {
+						msg_debug_lang_det("apply frequency heuristic sorting");
+						frequency_heuristic_applied = TRUE;
+						cbd.d = d;
+						cbd.mean = mean;
+						cbd.std = std;
+						cbd.flags = RSPAMD_LANG_FLAG_DEFAULT;
+
+						if (part->nwords < default_words / 2) {
+							cbd.flags |= RSPAMD_LANG_FLAG_SHORT;
+						}
+					}
+				}
+			}
+		}
+
+		/* Now, convert hash to array and sort it */
+		if (r != rs_detect_none && kh_size(candidates) > 0) {
+			result = g_ptr_array_sized_new(kh_size(candidates));
+
+			kh_foreach_value(candidates, cand, {
+				if (!isnan(cand->prob)) {
+					msg_debug_lang_det("pre-sorting probability %s -> %.2f", cand->lang,
+									   cand->prob);
+					g_ptr_array_add(result, cand);
+				}
+			});
+
+			if (frequency_heuristic_applied) {
+				g_ptr_array_sort_with_data(result,
+										   rspamd_language_detector_cmp_heuristic,
+										   (gpointer) &cbd);
+			}
+			else {
+				g_ptr_array_sort(result, rspamd_language_detector_cmp);
+			}
+
+			int i;
+			PTR_ARRAY_FOREACH(result, i, cand)
+			{
+				msg_debug_lang_det("final probability %s -> %.2f", cand->lang,
+								   cand->prob);
+			}
+
+			if (part->languages != NULL) {
+				g_ptr_array_unref(part->languages);
+			}
+
+			part->languages = result;
+			part->language = ((struct rspamd_lang_detector_res *) g_ptr_array_index(result, 0))->lang;
+			ret = TRUE;
+		}
+		else if (part->languages == NULL) {
+			rspamd_language_detector_set_language(task, part, "en", NULL);
+		}
+
+		kh_destroy(rspamd_candidates_hash, candidates);
+	}
+
+	/* Update internal stat */
+	if (part->languages != NULL && part->languages->len > 0 && !frequency_heuristic_applied) {
+		cand = g_ptr_array_index(part->languages, 0);
+		if (cand->elt) {
+			cand->elt->occurrences++;
+			d->total_occurrences++;
+
+			msg_debug_lang_det("updated stat for %s: %d occurrences, %z total detected",
+							   cand->elt->name, cand->elt->occurrences,
+							   d->total_occurrences);
+		}
+	}
+
+	end_ticks = rspamd_get_ticks(TRUE);
+	msg_debug_lang_det("detected languages in %.0f ticks",
+					   (end_ticks - start_ticks));
+
+	return ret;
+}
+
+
+struct rspamd_lang_detector *
+rspamd_language_detector_ref(struct rspamd_lang_detector *d)
+{
+	REF_RETAIN(d);
+
+	return d;
+}
+
+void rspamd_language_detector_unref(struct rspamd_lang_detector *d)
+{
+	REF_RELEASE(d);
+}
+
+gboolean
+rspamd_language_detector_is_stop_word(struct rspamd_lang_detector *d,
+									  const gchar *word, gsize wlen)
+{
+	khiter_t k;
+	rspamd_ftok_t search;
+
+	search.begin = word;
+	search.len = wlen;
+
+	k = kh_get(rspamd_stopwords_hash, d->stop_words_norm, &search);
+
+	if (k != kh_end(d->stop_words_norm)) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+gint rspamd_language_detector_elt_flags(const struct rspamd_language_elt *elt)
+{
+	if (elt) {
+		return elt->flags;
+	}
+
+	return 0;
+}
+\ No newline at end of file
diff --git a/src/libmime/lang_detection.h b/src/libmime/lang_detection.h
new file mode 100644
index 0000000..5423c13
--- /dev/null
+++ b/src/libmime/lang_detection.h
@@ -0,0 +1,110 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_LANG_DETECTION_H
+#define RSPAMD_LANG_DETECTION_H
+
+#include "config.h"
+#include "libserver/cfg_file.h"
+#include "libstat/stat_api.h"
+#include "libmime/message.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_lang_detector;
+struct rspamd_language_elt;
+struct rspamd_task;
+
+enum rspamd_unicode_scripts {
+	RSPAMD_UNICODE_LATIN = (1 << 0),
+	RSPAMD_UNICODE_GREEK = (1 << 1),
+	RSPAMD_UNICODE_CYRILLIC = (1 << 2),
+	RSPAMD_UNICODE_HEBREW = (1 << 3),
+	RSPAMD_UNICODE_CJK = (1 << 4),
+	RSPAMD_UNICODE_JP = (1 << 5),
+	RSPAMD_UNICODE_ARABIC = (1 << 6),
+	RSPAMD_UNICODE_DEVANAGARI = (1 << 7),
+	RSPAMD_UNICODE_THAI = (1 << 8),
+	RSPAMD_UNICODE_ARMENIAN = (1 << 9),
+	RSPAMD_UNICODE_GEORGIAN = (1 << 10),
+	RSPAMD_UNICODE_GUJARATI = (1 << 11),
+	RSPAMD_UNICODE_TAMIL = (1 << 12),
+	RSPAMD_UNICODE_TELUGU = (1 << 13),
+	RSPAMD_UNICODE_MALAYALAM = (1 << 14),
+	RSPAMD_UNICODE_SINHALA = (1 << 15),
+	RSPAMD_UNICODE_HANGUL = (1 << 16),
+};
+
+enum rspamd_language_elt_flags {
+	RS_LANGUAGE_DEFAULT = 0,
+	RS_LANGUAGE_LATIN = (1 << 0),
+	RS_LANGUAGE_TIER1 = (1 << 3),
+	RS_LANGUAGE_TIER0 = (1 << 4),
+	RS_LANGUAGE_DIACRITICS = (1 << 5),
+	RS_LANGUAGE_ASCII = (1 << 6),
+};
+
+struct rspamd_lang_detector_res {
+	gdouble prob;
+	const gchar *lang;
+	struct rspamd_language_elt *elt;
+};
+
+/**
+ * Create new language detector object using configuration object
+ * @param cfg
+ * @return
+ */
+struct rspamd_lang_detector *rspamd_language_detector_init(struct rspamd_config *cfg);
+
+struct rspamd_lang_detector *rspamd_language_detector_ref(struct rspamd_lang_detector *d);
+
+void rspamd_language_detector_unref(struct rspamd_lang_detector *d);
+
+/**
+ * Try to detect language of words
+ * @param d
+ * @param ucs_tokens
+ * @param words_len
+ * @return array of struct rspamd_lang_detector_res sorted by freq descending
+ */
+gboolean rspamd_language_detector_detect(struct rspamd_task *task,
+										 struct rspamd_lang_detector *d,
+										 struct rspamd_mime_text_part *part);
+
+/**
+ * Returns TRUE if the specified word is known to be a stop word
+ * @param d
+ * @param word
+ * @param wlen
+ * @return
+ */
+gboolean rspamd_language_detector_is_stop_word(struct rspamd_lang_detector *d,
+											   const gchar *word, gsize wlen);
+
+/**
+ * Return language flags for a specific language elt
+ * @param elt
+ * @return
+ */
+gint rspamd_language_detector_elt_flags(const struct rspamd_language_elt *elt);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libmime/lang_detection_fasttext.cxx b/src/libmime/lang_detection_fasttext.cxx
new file mode 100644
index 0000000..c973ed7
--- /dev/null
+++ b/src/libmime/lang_detection_fasttext.cxx
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_detection_fasttext.h"
+
+#ifdef WITH_FASTTEXT
+#include "fasttext/fasttext.h"
+#include "libserver/cfg_file.h"
+#include "libserver/logger.h"
+#include "fmt/core.h"
+#include "stat_api.h"
+#include <exception>
+#include <string_view>
+#include <vector>
+#endif
+
+#ifdef WITH_FASTTEXT
+
+EXTERN_LOG_MODULE_DEF(langdet);
+#define msg_debug_lang_det(...) rspamd_conditional_debug_fast(nullptr, nullptr,                                           \
+															  rspamd_langdet_log_id, "langdet", task->task_pool->tag.uid, \
+															  __FUNCTION__,                                               \
+															  __VA_ARGS__)
+
+namespace rspamd::langdet {
+class fasttext_langdet {
+private:
+	fasttext::FastText ft;
+	std::string model_fname;
+	bool loaded = false;
+
+public:
+	explicit fasttext_langdet(struct rspamd_config *cfg)
+	{
+		const auto *ucl_obj = cfg->cfg_ucl_obj;
+		const auto *opts_section = ucl_object_find_key(ucl_obj, "lang_detection");
+
+		if (opts_section) {
+			const auto *model = ucl_object_find_key(opts_section, "fasttext_model");
+
+			if (model) {
+				try {
+					ft.loadModel(ucl_object_tostring(model));
+					loaded = true;
+					model_fname = std::string{ucl_object_tostring(model)};
+				} catch (std::exception &e) {
+					auto err_message = fmt::format("cannot load fasttext model: {}", e.what());
+					msg_err_config("%s", err_message.c_str());
+					loaded = false;
+				}
+			}
+		}
+	}
+
+	/* Disallow multiple initialisation */
+	fasttext_langdet() = delete;
+	fasttext_langdet(const fasttext_langdet &) = delete;
+	fasttext_langdet(fasttext_langdet &&) = delete;
+
+	~fasttext_langdet() = default;
+
+	auto is_enabled() const -> bool
+	{
+		return loaded;
+	}
+	auto word2vec(const char *in, std::size_t len, std::vector<std::int32_t> &word_ngramms) const
+	{
+		if (!loaded) {
+			return;
+		}
+
+		std::string tok{in, len};
+		const auto &dic = ft.getDictionary();
+		auto h = dic->hash(tok);
+		auto wid = dic->getId(tok, h);
+		auto type = wid < 0 ? dic->getType(tok) : dic->getType(wid);
+
+		if (type == fasttext::entry_type::word) {
+			if (wid < 0) {
+				auto pipelined_word = fmt::format("{}{}{}", fasttext::Dictionary::BOW, tok, fasttext::Dictionary::EOW);
+				dic->computeSubwords(pipelined_word, word_ngramms);
+			}
+			else {
+				if (ft.getArgs().maxn <= 0) {
+					word_ngramms.push_back(wid);
+				}
+				else {
+					const auto ngrams = dic->getSubwords(wid);
+					word_ngramms.insert(word_ngramms.end(), ngrams.cbegin(), ngrams.cend());
+				}
+			}
+		}
+	}
+	auto detect_language(std::vector<std::int32_t> &words, int k)
+		-> std::vector<std::pair<fasttext::real, std::string>> *
+	{
+		if (!loaded) {
+			return nullptr;
+		}
+
+		auto predictions = new std::vector<std::pair<fasttext::real, std::string>>;
+		predictions->reserve(k);
+		fasttext::Predictions line_predictions;
+		line_predictions.reserve(k);
+		ft.predict(k, words, line_predictions, 0.0f);
+		const auto *dict = ft.getDictionary().get();
+
+		for (const auto &pred: line_predictions) {
+			predictions->push_back(std::make_pair(std::exp(pred.first), dict->getLabel(pred.second)));
+		}
+		return predictions;
+	}
+
+	auto model_info(void) const -> const std::string
+	{
+		if (!loaded) {
+			static const auto not_loaded = std::string{"fasttext model is not loaded"};
+			return not_loaded;
+		}
+		else {
+			return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname,
+							   ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens());
+		}
+	}
+};
+}// namespace rspamd::langdet
+#endif
+
+/* C API part */
+G_BEGIN_DECLS
+
+#define FASTTEXT_MODEL_TO_C_API(p) reinterpret_cast<rspamd::langdet::fasttext_langdet *>(p)
+#define FASTTEXT_RESULT_TO_C_API(res) reinterpret_cast<std::vector<std::pair<fasttext::real, std::string>> *>(res)
+
+void *rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg)
+{
+#ifndef WITH_FASTTEXT
+	return nullptr;
+#else
+	return (void *) new rspamd::langdet::fasttext_langdet(cfg);
+#endif
+}
+
+char *rspamd_lang_detection_fasttext_show_info(void *ud)
+{
+#ifndef WITH_FASTTEXT
+	return g_strdup("fasttext is not compiled in");
+#else
+	auto model_info = FASTTEXT_MODEL_TO_C_API(ud)->model_info();
+
+	return g_strdup(model_info.c_str());
+#endif
+}
+
+bool rspamd_lang_detection_fasttext_is_enabled(void *ud)
+{
+#ifdef WITH_FASTTEXT
+	auto *real_model = FASTTEXT_MODEL_TO_C_API(ud);
+
+	if (real_model) {
+		return real_model->is_enabled();
+	}
+#endif
+
+	return false;
+}
+
+rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
+																	   struct rspamd_task *task,
+																	   GArray *utf_words,
+																	   int k)
+{
+#ifndef WITH_FASTTEXT
+	return nullptr;
+#else
+	/* Avoid too long inputs */
+	static const guint max_fasttext_input_len = 1024 * 1024;
+	auto *real_model = FASTTEXT_MODEL_TO_C_API(ud);
+	std::vector<std::int32_t> words_vec;
+	words_vec.reserve(utf_words->len);
+
+	for (auto i = 0; i < std::min(utf_words->len, max_fasttext_input_len); i++) {
+		const auto *w = &g_array_index(utf_words, rspamd_stat_token_t, i);
+		if (w->original.len > 0) {
+			real_model->word2vec(w->original.begin, w->original.len, words_vec);
+		}
+	}
+
+	msg_debug_lang_det("fasttext: got %z word tokens from %ud words", words_vec.size(), utf_words->len);
+
+	auto *res = real_model->detect_language(words_vec, k);
+
+	return (rspamd_fasttext_predict_result_t) res;
+#endif
+}
+
+void rspamd_lang_detection_fasttext_destroy(void *ud)
+{
+#ifdef WITH_FASTTEXT
+	delete FASTTEXT_MODEL_TO_C_API(ud);
+#endif
+}
+
+
+guint rspamd_lang_detection_fasttext_get_nlangs(rspamd_fasttext_predict_result_t res)
+{
+#ifdef WITH_FASTTEXT
+	auto *real_res = FASTTEXT_RESULT_TO_C_API(res);
+
+	if (real_res) {
+		return real_res->size();
+	}
+#endif
+	return 0;
+}
+
+const char *
+rspamd_lang_detection_fasttext_get_lang(rspamd_fasttext_predict_result_t res, unsigned int idx)
+{
+#ifdef WITH_FASTTEXT
+	auto *real_res = FASTTEXT_RESULT_TO_C_API(res);
+
+	if (real_res && real_res->size() > idx) {
+		/* Fasttext returns result in form __label__<lang>, so we need to remove __label__ prefix */
+		auto lang = std::string_view{real_res->at(idx).second};
+		if (lang.size() > sizeof("__label__") && lang.substr(0, sizeof("__label__") - 1) == "__label__") {
+			lang.remove_prefix(sizeof("__label__") - 1);
+		}
+		return lang.data();
+	}
+#endif
+	return nullptr;
+}
+
+float rspamd_lang_detection_fasttext_get_prob(rspamd_fasttext_predict_result_t res, unsigned int idx)
+{
+#ifdef WITH_FASTTEXT
+	auto *real_res = FASTTEXT_RESULT_TO_C_API(res);
+
+	if (real_res && real_res->size() > idx) {
+		return real_res->at(idx).first;
+	}
+#endif
+	return 0.0f;
+}
+
+void rspamd_fasttext_predict_result_destroy(rspamd_fasttext_predict_result_t res)
+{
+#ifdef WITH_FASTTEXT
+	auto *real_res = FASTTEXT_RESULT_TO_C_API(res);
+
+	delete real_res;
+#endif
+}
+
+G_END_DECLS
+\ No newline at end of file
diff --git a/src/libmime/lang_detection_fasttext.h b/src/libmime/lang_detection_fasttext.h
new file mode 100644
index 0000000..c8710d3
--- /dev/null
+++ b/src/libmime/lang_detection_fasttext.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_LANG_DETECTION_FASTTEXT_H
+#define RSPAMD_LANG_DETECTION_FASTTEXT_H
+
+#include "config.h"
+
+G_BEGIN_DECLS
+struct rspamd_config;
+struct rspamd_task; /* for logging */
+/**
+ * Initialize fasttext language detector
+ * @param cfg
+ * @return opaque pointer
+ */
+void *rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg);
+
+/**
+ * Check if fasttext language detector is enabled
+ * @param ud
+ * @return
+ */
+bool rspamd_lang_detection_fasttext_is_enabled(void *ud);
+
+/**
+ * Show info about fasttext language detector
+ * @param ud
+ * @return
+ */
+char *rspamd_lang_detection_fasttext_show_info(void *ud);
+
+
+typedef void *rspamd_fasttext_predict_result_t;
+/**
+ * Detect language using fasttext
+ * @param ud opaque pointer
+ * @param in input text
+ * @param len length of input text
+ * @param k number of results to return
+ * @return TRUE if language is detected
+ */
+rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
+																	   struct rspamd_task *task, GArray *utf_words, int k);
+
+/**
+ * Get number of languages detected
+ * @param ud
+ * @return
+ */
+guint rspamd_lang_detection_fasttext_get_nlangs(rspamd_fasttext_predict_result_t ud);
+/**
+ * Get language from fasttext result
+ * @param res
+ * @return
+ */
+const char *rspamd_lang_detection_fasttext_get_lang(rspamd_fasttext_predict_result_t res, unsigned int idx);
+
+/**
+ * Get probability from fasttext result
+ * @param res
+ * @return
+ */
+float rspamd_lang_detection_fasttext_get_prob(rspamd_fasttext_predict_result_t res, unsigned int idx);
+
+/**
+ * Destroy fasttext result
+ * @param res
+ */
+void rspamd_fasttext_predict_result_destroy(rspamd_fasttext_predict_result_t res);
+
+/**
+ * Destroy fasttext language detector
+ */
+void rspamd_lang_detection_fasttext_destroy(void *ud);
+
+
+G_END_DECLS
+#endif /* RSPAMD_LANG_DETECTION_FASTTEXT_H */
diff --git a/src/libmime/message.c b/src/libmime/message.c
new file mode 100644
index 0000000..3acc935
--- /dev/null
+++ b/src/libmime/message.c
@@ -0,0 +1,1732 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "util.h"
+#include "rspamd.h"
+#include "message.h"
+#include "libserver/html/html.h"
+#include "images.h"
+#include "archives.h"
+#include "tokenizers/tokenizers.h"
+#include "smtp_parsers.h"
+#include "mime_parser.h"
+#include "mime_encoding.h"
+#include "lang_detection.h"
+#include "libutil/multipattern.h"
+#include "libserver/mempool_vars_internal.h"
+
+#ifdef WITH_SNOWBALL
+#include "libstemmer.h"
+#endif
+
+#include <math.h>
+#include <unicode/uchar.h>
+#include "sodium.h"
+#include "libserver/cfg_file_private.h"
+#include "lua/lua_common.h"
+#include "contrib/uthash/utlist.h"
+#include "contrib/t1ha/t1ha.h"
+#include "received.h"
+
+#define GTUBE_SYMBOL "GTUBE"
+
+#define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+#define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+
+static const gchar gtube_pattern_reject[] = "XJS*C4JDBQADN1.NSBN3*2IDNEN*"
+											"GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X";
+static const gchar gtube_pattern_add_header[] = "YJS*C4JDBQADN1.NSBN3*2IDNEN*"
+												"GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X";
+static const gchar gtube_pattern_rewrite_subject[] = "ZJS*C4JDBQADN1.NSBN3*2IDNEN*"
+													 "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X";
+static const gchar gtube_pattern_no_action[] = "AJS*C4JDBQADN1.NSBN3*2IDNEN*"
+											   "GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X";
+struct rspamd_multipattern *gtube_matcher = NULL;
+static const guint64 words_hash_seed = 0xdeadbabe;
+
+static void
+free_byte_array_callback(void *pointer)
+{
+	GByteArray *arr = (GByteArray *) pointer;
+	g_byte_array_free(arr, TRUE);
+}
+
+static void
+rspamd_mime_part_extract_words(struct rspamd_task *task,
+							   struct rspamd_mime_text_part *part)
+{
+	rspamd_stat_token_t *w;
+	guint i, total_len = 0, short_len = 0;
+
+	if (part->utf_words) {
+		rspamd_stem_words(part->utf_words, task->task_pool, part->language,
+						  task->lang_det);
+
+		for (i = 0; i < part->utf_words->len; i++) {
+			guint64 h;
+
+			w = &g_array_index(part->utf_words, rspamd_stat_token_t, i);
+
+			if (w->stemmed.len > 0) {
+				/*
+				 * We use static hash seed if we would want to use that in shingles
+				 * computation in future
+				 */
+				h = rspamd_cryptobox_fast_hash_specific(
+					RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+					w->stemmed.begin, w->stemmed.len, words_hash_seed);
+				g_array_append_val(part->normalized_hashes, h);
+				total_len += w->stemmed.len;
+
+				if (w->stemmed.len <= 3) {
+					short_len++;
+				}
+
+				if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT &&
+					!(w->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED)) {
+					part->nwords++;
+				}
+			}
+
+			if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE |
+							RSPAMD_STAT_TOKEN_FLAG_NORMALISED |
+							RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES)) {
+				task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
+			}
+		}
+
+		if (part->utf_words->len) {
+			gdouble *avg_len_p, *short_len_p;
+
+			avg_len_p = rspamd_mempool_get_variable(task->task_pool,
+													RSPAMD_MEMPOOL_AVG_WORDS_LEN);
+
+			if (avg_len_p == NULL) {
+				avg_len_p = rspamd_mempool_alloc(task->task_pool,
+												 sizeof(double));
+				*avg_len_p = total_len;
+				rspamd_mempool_set_variable(task->task_pool,
+											RSPAMD_MEMPOOL_AVG_WORDS_LEN, avg_len_p, NULL);
+			}
+			else {
+				*avg_len_p += total_len;
+			}
+
+			short_len_p = rspamd_mempool_get_variable(task->task_pool,
+													  RSPAMD_MEMPOOL_SHORT_WORDS_CNT);
+
+			if (short_len_p == NULL) {
+				short_len_p = rspamd_mempool_alloc(task->task_pool,
+												   sizeof(double));
+				*short_len_p = short_len;
+				rspamd_mempool_set_variable(task->task_pool,
+											RSPAMD_MEMPOOL_SHORT_WORDS_CNT, avg_len_p, NULL);
+			}
+			else {
+				*short_len_p += short_len;
+			}
+		}
+	}
+}
+
+static void
+rspamd_mime_part_create_words(struct rspamd_task *task,
+							  struct rspamd_mime_text_part *part)
+{
+	enum rspamd_tokenize_type tok_type;
+
+	if (IS_TEXT_PART_UTF(part)) {
+
+#if U_ICU_VERSION_MAJOR_NUM < 50
+		/* Hack to prevent hang with Thai in old libicu */
+		const gchar *p = part->utf_stripped_content->data, *end;
+		guint i = 0;
+		end = p + part->utf_stripped_content->len;
+		gint32 uc, sc;
+
+		tok_type = RSPAMD_TOKENIZE_UTF;
+
+		while (p + i < end) {
+			U8_NEXT(p, i, part->utf_stripped_content->len, uc);
+
+			if (((gint32) uc) < 0) {
+				tok_type = RSPAMD_TOKENIZE_RAW;
+				break;
+			}
+
+			if (u_isalpha(uc)) {
+				sc = ublock_getCode(uc);
+
+				if (sc == UBLOCK_THAI) {
+					msg_info_task("enable workaround for Thai characters for old libicu");
+					tok_type = RSPAMD_TOKENIZE_RAW;
+					break;
+				}
+			}
+		}
+#else
+		tok_type = RSPAMD_TOKENIZE_UTF;
+#endif
+	}
+	else {
+		tok_type = RSPAMD_TOKENIZE_RAW;
+	}
+
+	part->utf_words = rspamd_tokenize_text(
+		part->utf_stripped_content->data,
+		part->utf_stripped_content->len,
+		&part->utf_stripped_text,
+		tok_type, task->cfg,
+		part->exceptions,
+		NULL,
+		NULL,
+		task->task_pool);
+
+
+	if (part->utf_words) {
+		part->normalized_hashes = g_array_sized_new(FALSE, FALSE,
+													sizeof(guint64), part->utf_words->len);
+		rspamd_normalize_words(part->utf_words, task->task_pool);
+	}
+}
+
+static void
+rspamd_mime_part_detect_language(struct rspamd_task *task,
+								 struct rspamd_mime_text_part *part)
+{
+	struct rspamd_lang_detector_res *lang;
+
+	if (!IS_TEXT_PART_EMPTY(part) && part->utf_words && part->utf_words->len > 0 &&
+		task->lang_det) {
+		if (rspamd_language_detector_detect(task, task->lang_det, part)) {
+			lang = g_ptr_array_index(part->languages, 0);
+			part->language = lang->lang;
+
+			msg_info_task("detected part language: %s", part->language);
+		}
+		else {
+			part->language = "en"; /* Safe fallback */
+		}
+	}
+}
+
+static void
+rspamd_strip_newlines_parse(struct rspamd_task *task,
+							const gchar *begin, const gchar *pe,
+							struct rspamd_mime_text_part *part)
+{
+	const gchar *p = begin, *c = begin;
+	gboolean crlf_added = FALSE, is_utf = IS_TEXT_PART_UTF(part);
+	gboolean url_open_bracket = FALSE;
+	UChar32 uc;
+
+	enum {
+		normal_char,
+		seen_cr,
+		seen_lf,
+	} state = normal_char;
+
+	while (p < pe) {
+		if (U8_IS_LEAD(*p) && is_utf) {
+			gint32 off = p - begin;
+			U8_NEXT(begin, off, pe - begin, uc);
+
+			if (uc != -1) {
+				while (p < pe && off < (pe - begin)) {
+					if (IS_ZERO_WIDTH_SPACE(uc)) {
+						/* Invisible space ! */
+						task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
+						part->spaces++;
+
+						if (p > c) {
+							g_byte_array_append(part->utf_stripped_content,
+												(const guint8 *) c, p - c);
+							c = begin + off;
+							p = c;
+						}
+
+						U8_NEXT(begin, off, pe - begin, uc);
+
+						if (!IS_ZERO_WIDTH_SPACE(uc)) {
+							break;
+						}
+
+						part->double_spaces++;
+						p = begin + off;
+						c = p;
+					}
+					else {
+						break;
+					}
+				}
+			}
+		}
+
+		if (G_UNLIKELY(p >= pe)) {
+			/*
+			 * This is reached when there is a utf8 part and we
+			 * have zero width spaces at the end of the text
+			 * So we just check overflow and refuse to access *p if it is
+			 * after our real content.
+			 */
+			break;
+		}
+		else if (*p == '\r') {
+			switch (state) {
+			case normal_char:
+				state = seen_cr;
+				if (p > c) {
+					g_byte_array_append(part->utf_stripped_content,
+										(const guint8 *) c, p - c);
+				}
+
+				crlf_added = FALSE;
+				c = p + 1;
+				break;
+			case seen_cr:
+				/* Double \r\r */
+				if (!crlf_added) {
+					g_byte_array_append(part->utf_stripped_content,
+										(const guint8 *) " ", 1);
+					crlf_added = TRUE;
+					g_ptr_array_add(part->newlines,
+									(((gpointer) (goffset) (part->utf_stripped_content->len))));
+				}
+
+				part->nlines++;
+				part->empty_lines++;
+				c = p + 1;
+				break;
+			case seen_lf:
+				/* Likely \r\n\r...*/
+				state = seen_cr;
+				c = p + 1;
+				break;
+			}
+
+			url_open_bracket = FALSE;
+
+			p++;
+		}
+		else if (*p == '\n') {
+			switch (state) {
+			case normal_char:
+				state = seen_lf;
+
+				if (p > c) {
+					g_byte_array_append(part->utf_stripped_content,
+										(const guint8 *) c, p - c);
+				}
+
+				c = p + 1;
+
+				if (IS_TEXT_PART_HTML(part) || !url_open_bracket) {
+					g_byte_array_append(part->utf_stripped_content,
+										(const guint8 *) " ", 1);
+					g_ptr_array_add(part->newlines,
+									(((gpointer) (goffset) (part->utf_stripped_content->len))));
+					crlf_added = TRUE;
+				}
+				else {
+					crlf_added = FALSE;
+				}
+
+				break;
+			case seen_cr:
+				/* \r\n */
+				if (!crlf_added) {
+					if (IS_TEXT_PART_HTML(part) || !url_open_bracket) {
+						g_byte_array_append(part->utf_stripped_content,
+											(const guint8 *) " ", 1);
+						crlf_added = TRUE;
+					}
+
+					g_ptr_array_add(part->newlines,
+									(((gpointer) (goffset) (part->utf_stripped_content->len))));
+				}
+
+				c = p + 1;
+				state = seen_lf;
+
+				break;
+			case seen_lf:
+				/* Double \n\n */
+				if (!crlf_added) {
+					g_byte_array_append(part->utf_stripped_content,
+										(const guint8 *) " ", 1);
+					crlf_added = TRUE;
+					g_ptr_array_add(part->newlines,
+									(((gpointer) (goffset) (part->utf_stripped_content->len))));
+				}
+
+				part->nlines++;
+				part->empty_lines++;
+
+				c = p + 1;
+				break;
+			}
+			url_open_bracket = FALSE;
+
+			p++;
+		}
+		else {
+			if ((*p) == '<') {
+				url_open_bracket = TRUE;
+			}
+			else if ((*p) == '>') {
+				url_open_bracket = FALSE;
+			}
+
+			switch (state) {
+			case normal_char:
+				if (*p == ' ') {
+					part->spaces++;
+
+					if (p > begin && *(p - 1) == ' ') {
+						part->double_spaces++;
+					}
+				}
+				else {
+					part->non_spaces++;
+
+					if ((*p) & 0x80) {
+						part->non_ascii_chars++;
+					}
+					else {
+						if (g_ascii_isupper(*p)) {
+							part->capital_letters++;
+						}
+						else if (g_ascii_isdigit(*p)) {
+							part->numeric_characters++;
+						}
+
+						part->ascii_chars++;
+					}
+				}
+				break;
+			case seen_cr:
+			case seen_lf:
+				part->nlines++;
+
+				if (!crlf_added) {
+					g_ptr_array_add(part->newlines,
+									(((gpointer) (goffset) (part->utf_stripped_content->len))));
+				}
+
+				/* Skip initial spaces */
+				if (*p == ' ') {
+					if (!crlf_added) {
+						g_byte_array_append(part->utf_stripped_content,
+											(const guint8 *) " ", 1);
+					}
+
+					while (p < pe && *p == ' ') {
+						p++;
+						c++;
+						part->spaces++;
+					}
+
+					if (p < pe && (*p == '\r' || *p == '\n')) {
+						part->empty_lines++;
+					}
+				}
+
+				state = normal_char;
+				continue;
+			}
+
+			p++;
+		}
+	}
+
+	/* Leftover */
+	if (p > c) {
+		if (p > pe) {
+			p = pe;
+		}
+
+		switch (state) {
+		case normal_char:
+			g_byte_array_append(part->utf_stripped_content,
+								(const guint8 *) c, p - c);
+
+			while (c < p) {
+				if (*c == ' ') {
+					part->spaces++;
+
+					if (c > begin && *(c - 1) == ' ') {
+						part->double_spaces++;
+					}
+				}
+				else {
+					part->non_spaces++;
+
+					if ((*c) & 0x80) {
+						part->non_ascii_chars++;
+					}
+					else {
+						part->ascii_chars++;
+					}
+				}
+
+				c++;
+			}
+			break;
+		default:
+
+			if (!crlf_added) {
+				g_byte_array_append(part->utf_stripped_content,
+									(const guint8 *) " ", 1);
+				g_ptr_array_add(part->newlines,
+								(((gpointer) (goffset) (part->utf_stripped_content->len))));
+			}
+
+			part->nlines++;
+			break;
+		}
+	}
+}
+
+static void
+rspamd_u_text_dtor(void *p)
+{
+	utext_close((UText *) p);
+}
+
+static void
+rspamd_normalize_text_part(struct rspamd_task *task,
+						   struct rspamd_mime_text_part *part)
+{
+	const gchar *p, *end;
+	guint i;
+	goffset off;
+	struct rspamd_process_exception *ex;
+	UErrorCode uc_err = U_ZERO_ERROR;
+
+	part->newlines = g_ptr_array_sized_new(128);
+
+	if (IS_TEXT_PART_EMPTY(part)) {
+		part->utf_stripped_content = g_byte_array_new();
+	}
+	else {
+		part->utf_stripped_content = g_byte_array_sized_new(part->utf_content.len);
+
+		p = (const gchar *) part->utf_content.begin;
+		end = p + part->utf_content.len;
+
+		rspamd_strip_newlines_parse(task, p, end, part);
+
+		for (i = 0; i < part->newlines->len; i++) {
+			ex = rspamd_mempool_alloc(task->task_pool, sizeof(*ex));
+			off = (goffset) g_ptr_array_index(part->newlines, i);
+			g_ptr_array_index(part->newlines, i) = (gpointer) (goffset) (part->utf_stripped_content->data + off);
+			ex->pos = off;
+			ex->len = 0;
+			ex->type = RSPAMD_EXCEPTION_NEWLINE;
+			part->exceptions = g_list_prepend(part->exceptions, ex);
+		}
+	}
+
+	if (IS_TEXT_PART_UTF(part)) {
+		utext_openUTF8(&part->utf_stripped_text,
+					   part->utf_stripped_content->data,
+					   part->utf_stripped_content->len,
+					   &uc_err);
+
+		if (!U_SUCCESS(uc_err)) {
+			msg_warn_task("cannot open text from utf content");
+			/* Probably, should be an assertion */
+		}
+		else {
+			rspamd_mempool_add_destructor(task->task_pool,
+										  rspamd_u_text_dtor,
+										  &part->utf_stripped_text);
+		}
+	}
+
+	rspamd_mempool_add_destructor(task->task_pool,
+								  (rspamd_mempool_destruct_t) free_byte_array_callback,
+								  part->utf_stripped_content);
+	rspamd_mempool_notify_alloc(task->task_pool,
+								part->utf_stripped_content->len);
+	rspamd_mempool_add_destructor(task->task_pool,
+								  (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard,
+								  part->newlines);
+}
+
+#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
+
+static guint
+rspamd_words_levenshtein_distance(struct rspamd_task *task,
+								  GArray *w1, GArray *w2)
+{
+	guint s1len, s2len, x, y, lastdiag, olddiag;
+	guint *column, ret;
+	guint64 h1, h2;
+	gint eq;
+	static const guint max_words = 8192;
+
+	s1len = w1->len;
+	s2len = w2->len;
+
+	if (s1len + s2len > max_words) {
+		msg_info_task("cannot direct compare multipart/alternative parts with more than %ud words in total: "
+					  "(%ud words in one part and %ud in another)",
+					  max_words, s1len, s2len);
+
+		/* Use approximate comparison of number of words */
+		if (s1len > s2len) {
+			return s1len - s2len;
+		}
+		else {
+			return s2len - s1len;
+		}
+	}
+
+	column = g_malloc0((s1len + 1) * sizeof(guint));
+
+	for (y = 1; y <= s1len; y++) {
+		column[y] = y;
+	}
+
+	for (x = 1; x <= s2len; x++) {
+		column[0] = x;
+
+		for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
+			olddiag = column[y];
+			h1 = g_array_index(w1, guint64, y - 1);
+			h2 = g_array_index(w2, guint64, x - 1);
+			eq = (h1 == h2) ? 1 : 0;
+			/*
+			 * Cost of replacement is twice higher than cost of add/delete
+			 * to calculate percentage properly
+			 */
+			column[y] = MIN3(column[y] + 1, column[y - 1] + 1,
+							 lastdiag + (eq * 2));
+			lastdiag = olddiag;
+		}
+	}
+
+	ret = column[s1len];
+	g_free(column);
+
+	return ret;
+}
+
+static gint
+rspamd_multipattern_gtube_cb(struct rspamd_multipattern *mp,
+							 guint strnum,
+							 gint match_start,
+							 gint match_pos,
+							 const gchar *text,
+							 gsize len,
+							 void *context)
+{
+	struct rspamd_task *task = (struct rspamd_task *) context;
+
+	if (strnum > 0) {
+		if (task->cfg->gtube_patterns_policy == RSPAMD_GTUBE_ALL) {
+			return strnum + 1;
+		}
+
+		return 0;
+	}
+
+	return strnum + 1; /* To distinguish from zero */
+}
+
+static enum rspamd_action_type
+rspamd_check_gtube(struct rspamd_task *task, struct rspamd_mime_text_part *part)
+{
+	static const gsize max_check_size = 8 * 1024;
+	gint ret;
+	enum rspamd_action_type act = METRIC_ACTION_NOACTION;
+	enum rspamd_gtube_patterns_policy policy = task->cfg ? task->cfg->gtube_patterns_policy : RSPAMD_GTUBE_REJECT;
+	g_assert(part != NULL);
+
+	if (gtube_matcher == NULL && policy != RSPAMD_GTUBE_DISABLED) {
+		gtube_matcher = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
+
+		rspamd_multipattern_add_pattern(gtube_matcher,
+										gtube_pattern_reject,
+										RSPAMD_MULTIPATTERN_DEFAULT);
+		rspamd_multipattern_add_pattern(gtube_matcher,
+										gtube_pattern_add_header,
+										RSPAMD_MULTIPATTERN_DEFAULT);
+		rspamd_multipattern_add_pattern(gtube_matcher,
+										gtube_pattern_rewrite_subject,
+										RSPAMD_MULTIPATTERN_DEFAULT);
+		rspamd_multipattern_add_pattern(gtube_matcher,
+										gtube_pattern_no_action,
+										RSPAMD_MULTIPATTERN_DEFAULT);
+
+		GError *err = NULL;
+		rspamd_multipattern_compile(gtube_matcher, &err);
+
+		if (err != NULL) {
+			/* It will be expensive, but I don't care, still better than to abort */
+			msg_err("cannot compile gtube matcher: %s", err->message);
+			g_error_free(err);
+		}
+	}
+
+	if (part->utf_content.len >= sizeof(gtube_pattern_reject) &&
+		part->utf_content.len <= max_check_size &&
+		policy != RSPAMD_GTUBE_DISABLED) {
+		if ((ret = rspamd_multipattern_lookup(gtube_matcher, part->utf_content.begin,
+											  part->utf_content.len,
+											  rspamd_multipattern_gtube_cb, task, NULL)) > 0) {
+
+			switch (ret) {
+			case 1:
+				act = METRIC_ACTION_REJECT;
+				break;
+			case 2:
+				act = METRIC_ACTION_ADD_HEADER;
+				break;
+			case 3:
+				act = METRIC_ACTION_REWRITE_SUBJECT;
+				break;
+			case 4:
+				act = METRIC_ACTION_NOACTION;
+				break;
+			}
+
+			if (ret != 0) {
+				task->flags |= RSPAMD_TASK_FLAG_SKIP;
+				task->flags |= RSPAMD_TASK_FLAG_GTUBE;
+				msg_info_task(
+					"gtube %s pattern has been found in part of length %uz",
+					rspamd_action_to_str(act),
+					part->utf_content.len);
+			}
+		}
+	}
+
+	return act;
+}
+
+static gint
+exceptions_compare_func(gconstpointer a, gconstpointer b)
+{
+	const struct rspamd_process_exception *ea = a, *eb = b;
+
+	return ea->pos - eb->pos;
+}
+
+static gboolean
+rspamd_message_process_plain_text_part(struct rspamd_task *task,
+									   struct rspamd_mime_text_part *text_part)
+{
+	if (text_part->parsed.len == 0) {
+		text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
+
+		return TRUE;
+	}
+
+	rspamd_mime_text_part_maybe_convert(task, text_part);
+
+	if (text_part->utf_raw_content != NULL) {
+		/* Just have the same content */
+		text_part->utf_content.begin = (const gchar *) text_part->utf_raw_content->data;
+		text_part->utf_content.len = text_part->utf_raw_content->len;
+	}
+	else {
+		/*
+		 * We ignore unconverted parts from now as it is dangerous
+		 * to treat them as text parts
+		 */
+		text_part->utf_content.begin = NULL;
+		text_part->utf_content.len = 0;
+
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+static gboolean
+rspamd_message_process_html_text_part(struct rspamd_task *task,
+									  struct rspamd_mime_text_part *text_part,
+									  uint16_t *cur_url_order)
+{
+	text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_HTML;
+
+	if (text_part->parsed.len == 0) {
+		text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
+
+		return TRUE;
+	}
+
+	rspamd_mime_text_part_maybe_convert(task, text_part);
+
+	if (text_part->utf_raw_content == NULL) {
+		return FALSE;
+	}
+
+
+	text_part->html = rspamd_html_process_part_full(
+		task,
+		text_part->utf_raw_content,
+		&text_part->exceptions,
+		MESSAGE_FIELD(task, urls),
+		text_part->mime_part->urls,
+		task->cfg ? task->cfg->enable_css_parser : true,
+		cur_url_order);
+	rspamd_html_get_parsed_content(text_part->html, &text_part->utf_content);
+
+	if (text_part->utf_content.len == 0) {
+		text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
+	}
+
+	return TRUE;
+}
+
+enum rspamd_message_part_is_text_result {
+	RSPAMD_MESSAGE_PART_IS_TEXT_PLAIN = 0,
+	RSPAMD_MESSAGE_PART_IS_TEXT_HTML,
+	RSPAMD_MESSAGE_PART_IS_NOT_TEXT
+};
+
+static enum rspamd_message_part_is_text_result
+rspamd_message_part_can_be_parsed_as_text(struct rspamd_task *task,
+										  struct rspamd_mime_part *mime_part)
+{
+	enum rspamd_message_part_is_text_result res = RSPAMD_MESSAGE_PART_IS_NOT_TEXT;
+
+	if ((mime_part->ct && (mime_part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) ||
+		(mime_part->detected_type && strcmp(mime_part->detected_type, "text") == 0)) {
+
+		res = RSPAMD_MESSAGE_PART_IS_TEXT_PLAIN;
+		rspamd_ftok_t html_tok, xhtml_tok;
+
+		html_tok.begin = "html";
+		html_tok.len = 4;
+		xhtml_tok.begin = "xhtml";
+		xhtml_tok.len = 5;
+
+		if (rspamd_ftok_casecmp(&mime_part->ct->subtype, &html_tok) == 0 ||
+			rspamd_ftok_casecmp(&mime_part->ct->subtype, &xhtml_tok) == 0 ||
+			(mime_part->detected_ext &&
+			 strcmp(mime_part->detected_ext, "html") == 0)) {
+			res = RSPAMD_MESSAGE_PART_IS_TEXT_HTML;
+		}
+	}
+
+	/* Skip attachments */
+	if (res != RSPAMD_MESSAGE_PART_IS_NOT_TEXT &&
+		(mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT)) {
+		if (!task->cfg->check_text_attachements) {
+			debug_task("skip attachments for checking as text parts");
+			return RSPAMD_MESSAGE_PART_IS_NOT_TEXT;
+		}
+	}
+
+	return res;
+}
+
+static gboolean
+rspamd_message_process_text_part_maybe(struct rspamd_task *task,
+									   struct rspamd_mime_part *mime_part,
+									   enum rspamd_message_part_is_text_result is_text,
+									   uint16_t *cur_url_order)
+{
+	struct rspamd_mime_text_part *text_part;
+	guint flags = 0;
+	enum rspamd_action_type act;
+
+	/* Skip attachments */
+	if ((mime_part->cd && mime_part->cd->type == RSPAMD_CT_ATTACHMENT)) {
+		flags |= RSPAMD_MIME_TEXT_PART_ATTACHMENT;
+	}
+
+	text_part = rspamd_mempool_alloc0(task->task_pool,
+									  sizeof(struct rspamd_mime_text_part));
+	text_part->mime_part = mime_part;
+	text_part->raw.begin = mime_part->raw_data.begin;
+	text_part->raw.len = mime_part->raw_data.len;
+	text_part->parsed.begin = mime_part->parsed_data.begin;
+	text_part->parsed.len = mime_part->parsed_data.len;
+	text_part->utf_stripped_text = (UText) UTEXT_INITIALIZER;
+	text_part->flags |= flags;
+
+	if (is_text == RSPAMD_MESSAGE_PART_IS_TEXT_HTML) {
+		if (!rspamd_message_process_html_text_part(task, text_part, cur_url_order)) {
+			return FALSE;
+		}
+	}
+	else {
+		if (!rspamd_message_process_plain_text_part(task, text_part)) {
+			return FALSE;
+		}
+	}
+
+	g_ptr_array_add(MESSAGE_FIELD(task, text_parts), text_part);
+	mime_part->part_type = RSPAMD_MIME_PART_TEXT;
+	mime_part->specific.txt = text_part;
+
+	act = rspamd_check_gtube(task, text_part);
+	if (act != METRIC_ACTION_NOACTION) {
+		struct rspamd_action *action;
+		gdouble score = NAN;
+
+		action = rspamd_config_get_action_by_type(task->cfg, act);
+
+		if (action) {
+			score = action->threshold;
+
+			rspamd_add_passthrough_result(task, action,
+										  RSPAMD_PASSTHROUGH_CRITICAL,
+										  score, "Gtube pattern",
+										  "GTUBE", 0, NULL);
+		}
+
+		rspamd_task_insert_result(task, GTUBE_SYMBOL, 0, NULL);
+
+		return TRUE;
+	}
+
+	/* Post process part */
+	rspamd_normalize_text_part(task, text_part);
+
+	if (!IS_TEXT_PART_HTML(text_part)) {
+		if (mime_part->parent_part) {
+			struct rspamd_mime_part *parent = mime_part->parent_part;
+
+			if (IS_PART_MULTIPART(parent) && parent->specific.mp->children->len == 2) {
+				/*
+				 * Use strict extraction mode: we will extract missing urls from
+				 * an html part if needed
+				 */
+				rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order,
+										RSPAMD_URL_FIND_STRICT);
+			}
+			else {
+				/*
+				 * Fall back to full text extraction using TLD patterns
+				 */
+				rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order,
+										RSPAMD_URL_FIND_ALL);
+			}
+		}
+		else {
+			/*
+			 * Fall back to full text extraction using TLD patterns
+			*/
+			rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order,
+									RSPAMD_URL_FIND_ALL);
+		}
+	}
+	else {
+		rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order,
+								RSPAMD_URL_FIND_STRICT);
+	}
+
+	if (text_part->exceptions) {
+		text_part->exceptions = g_list_sort(text_part->exceptions,
+											exceptions_compare_func);
+		rspamd_mempool_add_destructor(task->task_pool,
+									  (rspamd_mempool_destruct_t) g_list_free,
+									  text_part->exceptions);
+	}
+
+	rspamd_mime_part_create_words(task, text_part);
+
+	return TRUE;
+}
+
+/* Creates message from various data using libmagic to detect type */
+static void
+rspamd_message_from_data(struct rspamd_task *task, const guchar *start,
+						 gsize len)
+{
+	struct rspamd_content_type *ct = NULL;
+	struct rspamd_mime_part *part;
+	const char *mb = "application/octet-stream";
+	gchar *mid;
+	rspamd_ftok_t srch, *tok;
+	gchar cdbuf[1024];
+
+	g_assert(start != NULL);
+
+	part = rspamd_mempool_alloc0(task->task_pool, sizeof(*part));
+
+	part->raw_data.begin = start;
+	part->raw_data.len = len;
+	part->parsed_data.begin = start;
+	part->parsed_data.len = len;
+	part->part_number = MESSAGE_FIELD(task, parts)->len;
+	part->urls = g_ptr_array_new();
+	part->raw_headers = rspamd_message_headers_new();
+	part->headers_order = NULL;
+
+	tok = rspamd_task_get_request_header(task, "Content-Type");
+
+	if (tok) {
+		/* We have Content-Type defined */
+		ct = rspamd_content_type_parse(tok->begin, tok->len,
+									   task->task_pool);
+		part->ct = ct;
+	}
+	else if (task->cfg && task->cfg->libs_ctx) {
+		lua_State *L = task->cfg->lua_state;
+
+		if (rspamd_lua_require_function(L,
+										"lua_magic", "detect_mime_part")) {
+
+			struct rspamd_mime_part **pmime;
+			struct rspamd_task **ptask;
+
+			pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+			rspamd_lua_setclass(L, "rspamd{mimepart}", -1);
+			*pmime = part;
+			ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+			rspamd_lua_setclass(L, "rspamd{task}", -1);
+			*ptask = task;
+
+			if (lua_pcall(L, 2, 2, 0) != 0) {
+				msg_err_task("cannot detect type: %s", lua_tostring(L, -1));
+			}
+			else {
+				if (lua_istable(L, -1)) {
+					lua_pushstring(L, "ct");
+					lua_gettable(L, -2);
+
+					if (lua_isstring(L, -1)) {
+						mb = rspamd_mempool_strdup(task->task_pool,
+												   lua_tostring(L, -1));
+					}
+				}
+			}
+
+			lua_settop(L, 0);
+		}
+		else {
+			msg_err_task("cannot require lua_magic.detect_mime_part");
+		}
+
+		if (mb) {
+			srch.begin = mb;
+			srch.len = strlen(mb);
+			ct = rspamd_content_type_parse(srch.begin, srch.len,
+										   task->task_pool);
+
+			if (!part->ct) {
+				msg_info_task("construct fake mime of type: %s", mb);
+				part->ct = ct;
+			}
+			else {
+				/* Check sanity */
+				if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) {
+					RSPAMD_FTOK_FROM_STR(&srch, "application");
+
+					if (rspamd_ftok_cmp(&ct->type, &srch) == 0) {
+						msg_info_task("construct fake mime of type: %s", mb);
+						part->ct = ct;
+					}
+				}
+				else {
+					msg_info_task("construct fake mime of type: %T/%T, detected %s",
+								  &part->ct->type, &part->ct->subtype, mb);
+				}
+			}
+
+			part->detected_ct = ct;
+		}
+	}
+
+
+	tok = rspamd_task_get_request_header(task, "Filename");
+
+	if (tok) {
+		rspamd_snprintf(cdbuf, sizeof(cdbuf), "inline; filename=\"%T\"", tok);
+	}
+	else {
+		rspamd_snprintf(cdbuf, sizeof(cdbuf), "inline");
+	}
+
+	part->cd = rspamd_content_disposition_parse(cdbuf, strlen(cdbuf),
+												task->task_pool);
+
+	g_ptr_array_add(MESSAGE_FIELD(task, parts), part);
+	rspamd_mime_parser_calc_digest(part);
+
+	/* Generate message ID */
+	mid = rspamd_mime_message_id_generate("localhost.localdomain");
+	rspamd_mempool_add_destructor(task->task_pool,
+								  (rspamd_mempool_destruct_t) g_free, mid);
+	MESSAGE_FIELD(task, message_id) = mid;
+	task->queue_id = mid;
+}
+
+static void
+rspamd_message_dtor(struct rspamd_message *msg)
+{
+	guint i;
+	struct rspamd_mime_part *p;
+	struct rspamd_mime_text_part *tp;
+
+
+	PTR_ARRAY_FOREACH(msg->parts, i, p)
+	{
+		if (p->raw_headers) {
+			rspamd_message_headers_unref(p->raw_headers);
+		}
+
+		if (IS_PART_MULTIPART(p)) {
+			if (p->specific.mp->children) {
+				g_ptr_array_free(p->specific.mp->children, TRUE);
+			}
+		}
+
+		if (p->part_type == RSPAMD_MIME_PART_CUSTOM_LUA &&
+			p->specific.lua_specific.cbref != -1) {
+			luaL_unref(msg->task->cfg->lua_state,
+					   LUA_REGISTRYINDEX,
+					   p->specific.lua_specific.cbref);
+		}
+
+		if (p->urls) {
+			g_ptr_array_unref(p->urls);
+		}
+	}
+
+	PTR_ARRAY_FOREACH(msg->text_parts, i, tp)
+	{
+		if (tp->utf_words) {
+			g_array_free(tp->utf_words, TRUE);
+		}
+		if (tp->normalized_hashes) {
+			g_array_free(tp->normalized_hashes, TRUE);
+		}
+		if (tp->languages) {
+			g_ptr_array_unref(tp->languages);
+		}
+	}
+
+	rspamd_message_headers_unref(msg->raw_headers);
+
+	g_ptr_array_unref(msg->text_parts);
+	g_ptr_array_unref(msg->parts);
+
+	kh_destroy(rspamd_url_hash, msg->urls);
+}
+
+struct rspamd_message *
+rspamd_message_new(struct rspamd_task *task)
+{
+	struct rspamd_message *msg;
+
+	msg = rspamd_mempool_alloc0(task->task_pool, sizeof(*msg));
+
+	msg->raw_headers = rspamd_message_headers_new();
+	msg->urls = kh_init(rspamd_url_hash);
+	msg->parts = g_ptr_array_sized_new(4);
+	msg->text_parts = g_ptr_array_sized_new(2);
+	msg->task = task;
+
+	REF_INIT_RETAIN(msg, rspamd_message_dtor);
+
+	return msg;
+}
+
+gboolean
+rspamd_message_parse(struct rspamd_task *task)
+{
+	const gchar *p;
+	gsize len;
+	guint i;
+	GError *err = NULL;
+	guint64 n[2], seed;
+
+	if (RSPAMD_TASK_IS_EMPTY(task)) {
+		/* Don't do anything with empty task */
+		task->flags |= RSPAMD_TASK_FLAG_SKIP_PROCESS;
+		return TRUE;
+	}
+
+	p = task->msg.begin;
+	len = task->msg.len;
+
+	/* Skip any space characters to avoid some bad messages to be unparsed */
+	while (len > 0 && g_ascii_isspace(*p)) {
+		p++;
+		len--;
+	}
+
+	/*
+	 * Exim somehow uses mailbox format for messages being scanned:
+	 * From xxx@xxx.com Fri May 13 19:08:48 2016
+	 *
+	 * So we check if a task has this line to avoid possible issues
+	 */
+	if (len > sizeof("From ") - 1) {
+		if (memcmp(p, "From ", sizeof("From ") - 1) == 0) {
+			/* Skip to CRLF */
+			msg_info_task("mailbox input detected, enable workaround");
+			p += sizeof("From ") - 1;
+			len -= sizeof("From ") - 1;
+
+			while (len > 0 && *p != '\n') {
+				p++;
+				len--;
+			}
+			while (len > 0 && g_ascii_isspace(*p)) {
+				p++;
+				len--;
+			}
+		}
+	}
+
+	task->msg.begin = p;
+	task->msg.len = len;
+
+	/* Cleanup old message */
+	if (task->message) {
+		rspamd_message_unref(task->message);
+	}
+
+	task->message = rspamd_message_new(task);
+
+	if (task->flags & RSPAMD_TASK_FLAG_MIME) {
+		enum rspamd_mime_parse_error ret;
+
+		debug_task("construct mime parser from string length %d",
+				   (gint) task->msg.len);
+		ret = rspamd_mime_parse_task(task, &err);
+
+		switch (ret) {
+		case RSPAMD_MIME_PARSE_FATAL:
+			msg_err_task("cannot construct mime from stream: %e", err);
+
+			if (task->cfg && (!task->cfg->allow_raw_input)) {
+				msg_err_task("cannot construct mime from stream");
+				if (err) {
+					task->err = err;
+				}
+
+				return FALSE;
+			}
+			else {
+				task->flags &= ~RSPAMD_TASK_FLAG_MIME;
+				rspamd_message_from_data(task, p, len);
+			}
+			break;
+		case RSPAMD_MIME_PARSE_NESTING:
+			msg_warn_task("cannot construct full mime from stream: %e", err);
+			task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+			break;
+		case RSPAMD_MIME_PARSE_OK:
+		default:
+			break;
+		}
+
+		if (err) {
+			g_error_free(err);
+		}
+	}
+	else {
+		rspamd_message_from_data(task, p, len);
+	}
+
+
+	if (MESSAGE_FIELD(task, message_id) == NULL) {
+		MESSAGE_FIELD(task, message_id) = "undef";
+	}
+
+	debug_task("found %ud parts in message", MESSAGE_FIELD(task, parts)->len);
+	if (task->queue_id == NULL) {
+		task->queue_id = "undef";
+	}
+
+	rspamd_received_maybe_fix_task(task);
+
+	struct rspamd_mime_part *part;
+
+	/* Blake2b applied to string 'rspamd' */
+	static const guchar RSPAMD_ALIGNED(32) hash_key[] = {
+		0xef,
+		0x43,
+		0xae,
+		0x80,
+		0xcc,
+		0x8d,
+		0xc3,
+		0x4c,
+		0x6f,
+		0x1b,
+		0xd6,
+		0x18,
+		0x1b,
+		0xae,
+		0x87,
+		0x74,
+		0x0c,
+		0xca,
+		0xf7,
+		0x8e,
+		0x5f,
+		0x2e,
+		0x54,
+		0x32,
+		0xf6,
+		0x79,
+		0xb9,
+		0x27,
+		0x26,
+		0x96,
+		0x20,
+		0x92,
+		0x70,
+		0x07,
+		0x85,
+		0xeb,
+		0x83,
+		0xf7,
+		0x89,
+		0xe0,
+		0xd7,
+		0x32,
+		0x2a,
+		0xd2,
+		0x1a,
+		0x64,
+		0x41,
+		0xef,
+		0x49,
+		0xff,
+		0xc3,
+		0x8c,
+		0x54,
+		0xf9,
+		0x67,
+		0x74,
+		0x30,
+		0x1e,
+		0x70,
+		0x2e,
+		0xb7,
+		0x12,
+		0x09,
+		0xfe,
+	};
+
+	memcpy(&seed, hash_key, sizeof(seed));
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		n[0] = t1ha2_atonce128(&n[1],
+							   part->digest, sizeof(part->digest),
+							   seed);
+
+		seed = n[0] ^ n[1];
+	}
+
+	memcpy(MESSAGE_FIELD(task, digest), n, sizeof(n));
+
+	if (MESSAGE_FIELD(task, subject)) {
+		p = MESSAGE_FIELD(task, subject);
+		len = strlen(p);
+		n[0] = t1ha2_atonce128(&n[1],
+							   p, len,
+							   seed);
+		memcpy(MESSAGE_FIELD(task, digest), n, sizeof(n));
+	}
+
+	if (task->queue_id) {
+		msg_info_task("loaded message; id: <%s>; queue-id: <%s>; size: %z; "
+					  "checksum: <%*xs>",
+					  MESSAGE_FIELD(task, message_id), task->queue_id, task->msg.len,
+					  (gint) sizeof(MESSAGE_FIELD(task, digest)), MESSAGE_FIELD(task, digest));
+	}
+	else {
+		msg_info_task("loaded message; id: <%s>; size: %z; "
+					  "checksum: <%*xs>",
+					  MESSAGE_FIELD(task, message_id), task->msg.len,
+					  (gint) sizeof(MESSAGE_FIELD(task, digest)), MESSAGE_FIELD(task, digest));
+	}
+
+	return TRUE;
+}
+
+
+/*
+ * A helper structure to store text parts positions, if it was C++, I could just use std::pair,
+ * but here I have to make it all manually, sigh...
+ */
+struct rspamd_mime_part_text_position {
+	unsigned pos;
+	enum rspamd_message_part_is_text_result res;
+};
+
+/* Place html parts first during analysis */
+static int
+rspamd_mime_text_part_position_compare_func(const void *v1, const void *v2)
+{
+	const struct rspamd_mime_part_text_position *p1 = (const struct rspamd_mime_part_text_position *) v1;
+	const struct rspamd_mime_part_text_position *p2 = (const struct rspamd_mime_part_text_position *) v2;
+
+	if (p1->res == p2->res) {
+		return (int) p2->pos - (int) p1->pos;
+	}
+	else {
+		if (p1->res == RSPAMD_MESSAGE_PART_IS_TEXT_HTML) {
+			return -1;
+		}
+		else {
+			return 1;
+		}
+	}
+}
+
+void rspamd_message_process(struct rspamd_task *task)
+{
+	guint i;
+	struct rspamd_mime_text_part *p1, *p2;
+	gdouble diff, *pdiff;
+	guint tw, *ptw, dw;
+	struct rspamd_mime_part *part;
+	lua_State *L = NULL;
+	gint magic_func_pos = -1, content_func_pos = -1, old_top = -1, funcs_top = -1;
+
+	if (task->cfg) {
+		L = task->cfg->lua_state;
+	}
+
+	rspamd_archives_process(task);
+
+	if (L) {
+		old_top = lua_gettop(L);
+	}
+
+	if (L && rspamd_lua_require_function(L,
+										 "lua_magic", "detect_mime_part")) {
+		magic_func_pos = lua_gettop(L);
+	}
+	else {
+		msg_err_task("cannot require lua_magic.detect_mime_part");
+	}
+
+	if (L && rspamd_lua_require_function(L,
+										 "lua_content", "maybe_process_mime_part")) {
+		content_func_pos = lua_gettop(L);
+	}
+	else {
+		msg_err_task("cannot require lua_content.maybe_process_mime_part");
+	}
+
+	if (L) {
+		funcs_top = lua_gettop(L);
+	}
+
+	GArray *detected_text_parts = g_array_sized_new(FALSE, FALSE, sizeof(struct rspamd_mime_part_text_position), 2);
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		if (magic_func_pos != -1 && part->parsed_data.len > 0) {
+			struct rspamd_mime_part **pmime;
+			struct rspamd_task **ptask;
+
+			lua_pushcfunction(L, &rspamd_lua_traceback);
+			gint err_idx = lua_gettop(L);
+			lua_pushvalue(L, magic_func_pos);
+			pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+			rspamd_lua_setclass(L, "rspamd{mimepart}", -1);
+			*pmime = part;
+			ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+			rspamd_lua_setclass(L, "rspamd{task}", -1);
+			*ptask = task;
+
+			if (lua_pcall(L, 2, 2, err_idx) != 0) {
+				msg_err_task("cannot detect type: %s", lua_tostring(L, -1));
+			}
+			else {
+				if (lua_istable(L, -1)) {
+					const gchar *mb;
+
+					/* First returned value */
+					part->detected_ext = rspamd_mempool_strdup(task->task_pool,
+															   lua_tostring(L, -2));
+
+					lua_pushstring(L, "ct");
+					lua_gettable(L, -2);
+
+					if (lua_isstring(L, -1)) {
+						mb = lua_tostring(L, -1);
+
+						if (mb) {
+							rspamd_ftok_t srch;
+
+							srch.begin = mb;
+							srch.len = strlen(mb);
+							part->detected_ct = rspamd_content_type_parse(srch.begin,
+																		  srch.len,
+																		  task->task_pool);
+						}
+					}
+
+					lua_pop(L, 1);
+
+					lua_pushstring(L, "type");
+					lua_gettable(L, -2);
+
+					if (lua_isstring(L, -1)) {
+						part->detected_type = rspamd_mempool_strdup(task->task_pool,
+																	lua_tostring(L, -1));
+					}
+
+					lua_pop(L, 1);
+
+					lua_pushstring(L, "no_text");
+					lua_gettable(L, -2);
+
+					if (lua_isboolean(L, -1)) {
+						if (!!lua_toboolean(L, -1)) {
+							part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
+						}
+					}
+
+					lua_pop(L, 1);
+				}
+			}
+
+			lua_settop(L, funcs_top);
+		}
+
+		/* Now detect content */
+		if (content_func_pos != -1 && part->parsed_data.len > 0 &&
+			part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
+			struct rspamd_mime_part **pmime;
+			struct rspamd_task **ptask;
+
+			lua_pushcfunction(L, &rspamd_lua_traceback);
+			gint err_idx = lua_gettop(L);
+			lua_pushvalue(L, content_func_pos);
+			pmime = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
+			rspamd_lua_setclass(L, "rspamd{mimepart}", -1);
+			*pmime = part;
+			ptask = lua_newuserdata(L, sizeof(struct rspamd_task *));
+			rspamd_lua_setclass(L, "rspamd{task}", -1);
+			*ptask = task;
+
+			if (lua_pcall(L, 2, 0, err_idx) != 0) {
+				msg_err_task("cannot detect content: %s", lua_tostring(L, -1));
+			}
+
+			lua_settop(L, funcs_top);
+		}
+
+		/* Try to detect image before checking for text */
+		rspamd_images_process_mime_part_maybe(task, part);
+
+		if (part->part_type == RSPAMD_MIME_PART_UNDEFINED &&
+			!(part->flags & RSPAMD_MIME_PART_NO_TEXT_EXTRACTION)) {
+			enum rspamd_message_part_is_text_result res = rspamd_message_part_can_be_parsed_as_text(task, part);
+
+			if (res != RSPAMD_MESSAGE_PART_IS_NOT_TEXT) {
+				struct rspamd_mime_part_text_position p = {
+					.pos = i,
+					.res = res};
+				g_array_append_val(detected_text_parts, p);
+			}
+		}
+	}
+
+	uint16_t cur_url_order = 0;
+	g_array_sort(detected_text_parts, rspamd_mime_text_part_position_compare_func);
+	/* One more iteration to process text parts in a more specific order */
+	for (i = 0; i < detected_text_parts->len; i++) {
+		part = g_ptr_array_index(MESSAGE_FIELD(task, parts),
+								 g_array_index(detected_text_parts, struct rspamd_mime_part_text_position, i).pos);
+		rspamd_message_process_text_part_maybe(task, part,
+											   g_array_index(detected_text_parts, struct rspamd_mime_part_text_position, i).res, &cur_url_order);
+	}
+
+	g_array_free(detected_text_parts, TRUE);
+
+	if (old_top != -1) {
+		lua_settop(L, old_top);
+	}
+
+	/* Parse urls inside Subject header */
+	if (MESSAGE_FIELD(task, subject)) {
+		rspamd_url_find_multiple(task->task_pool, MESSAGE_FIELD(task, subject),
+								 strlen(MESSAGE_FIELD(task, subject)),
+								 RSPAMD_URL_FIND_STRICT, NULL,
+								 rspamd_url_task_subject_callback,
+								 task);
+	}
+
+	/* Calculate average words length and number of short words */
+	struct rspamd_mime_text_part *text_part;
+	gdouble *var;
+	guint total_words = 0;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
+	{
+		if (!text_part->language) {
+			rspamd_mime_part_detect_language(task, text_part);
+		}
+
+		rspamd_mime_part_extract_words(task, text_part);
+
+		if (text_part->utf_words) {
+			total_words += text_part->nwords;
+		}
+	}
+
+	/* Calculate distance for 2-parts messages */
+	if (i == 2) {
+		p1 = g_ptr_array_index(MESSAGE_FIELD(task, text_parts), 0);
+		p2 = g_ptr_array_index(MESSAGE_FIELD(task, text_parts), 1);
+
+		/* First of all check parent object */
+		if (p1->mime_part->parent_part) {
+			rspamd_ftok_t srch;
+
+			srch.begin = "alternative";
+			srch.len = 11;
+
+			if (rspamd_ftok_cmp(&p1->mime_part->parent_part->ct->subtype, &srch) == 0) {
+				if (!IS_TEXT_PART_EMPTY(p1) && !IS_TEXT_PART_EMPTY(p2) &&
+					p1->normalized_hashes && p2->normalized_hashes) {
+					/*
+					 * We also detect language on one part and propagate it to
+					 * another one
+					 */
+					struct rspamd_mime_text_part *sel;
+
+					/* Prefer HTML as text part is not displayed normally */
+					if (IS_TEXT_PART_HTML(p1)) {
+						sel = p1;
+					}
+					else if (IS_TEXT_PART_HTML(p2)) {
+						sel = p2;
+					}
+					else {
+						if (p1->utf_content.len > p2->utf_content.len) {
+							sel = p1;
+						}
+						else {
+							sel = p2;
+						}
+					}
+
+					if (sel->language && sel->language[0]) {
+						/* Propagate language */
+						if (sel == p1) {
+							if (p2->languages) {
+								g_ptr_array_unref(p2->languages);
+							}
+
+							p2->language = sel->language;
+							p2->languages = g_ptr_array_ref(sel->languages);
+						}
+						else {
+							if (p1->languages) {
+								g_ptr_array_unref(p1->languages);
+							}
+
+							p1->language = sel->language;
+							p1->languages = g_ptr_array_ref(sel->languages);
+						}
+					}
+
+					tw = p1->normalized_hashes->len + p2->normalized_hashes->len;
+
+					if (tw > 0) {
+						dw = rspamd_words_levenshtein_distance(task,
+															   p1->normalized_hashes,
+															   p2->normalized_hashes);
+						diff = dw / (gdouble) tw;
+
+						msg_debug_task(
+							"different words: %d, total words: %d, "
+							"got diff between parts of %.2f",
+							dw, tw,
+							diff);
+
+						pdiff = rspamd_mempool_alloc(task->task_pool,
+													 sizeof(gdouble));
+						*pdiff = diff;
+						rspamd_mempool_set_variable(task->task_pool,
+													"parts_distance",
+													pdiff,
+													NULL);
+						ptw = rspamd_mempool_alloc(task->task_pool,
+												   sizeof(gint));
+						*ptw = tw;
+						rspamd_mempool_set_variable(task->task_pool,
+													"total_words",
+													ptw,
+													NULL);
+					}
+				}
+			}
+		}
+		else {
+			debug_task(
+				"message contains two parts but they are in different multi-parts");
+		}
+	}
+
+	if (total_words > 0) {
+		var = rspamd_mempool_get_variable(task->task_pool,
+										  RSPAMD_MEMPOOL_AVG_WORDS_LEN);
+
+		if (var) {
+			*var /= (double) total_words;
+		}
+
+		var = rspamd_mempool_get_variable(task->task_pool,
+										  RSPAMD_MEMPOOL_SHORT_WORDS_CNT);
+
+		if (var) {
+			*var /= (double) total_words;
+		}
+	}
+
+	rspamd_images_link(task);
+	rspamd_tokenize_meta_words(task);
+}
+
+
+struct rspamd_message *
+rspamd_message_ref(struct rspamd_message *msg)
+{
+	REF_RETAIN(msg);
+
+	return msg;
+}
+
+void rspamd_message_unref(struct rspamd_message *msg)
+{
+	if (msg) {
+		REF_RELEASE(msg);
+	}
+}
+
+void rspamd_message_update_digest(struct rspamd_message *msg,
+								  const void *input, gsize len)
+{
+	guint64 n[2];
+	/* Sanity */
+	G_STATIC_ASSERT(sizeof(n) == sizeof(msg->digest));
+
+	memcpy(n, msg->digest, sizeof(msg->digest));
+	n[0] = t1ha2_atonce128(&n[1], input, len, n[0]);
+	memcpy(msg->digest, n, sizeof(msg->digest));
+}
diff --git a/src/libmime/message.h b/src/libmime/message.h
new file mode 100644
index 0000000..52dedab
--- /dev/null
+++ b/src/libmime/message.h
@@ -0,0 +1,239 @@
+/**
+ * @file message.h
+ * Message processing functions and structures
+ */
+
+#ifndef RSPAMD_MESSAGE_H
+#define RSPAMD_MESSAGE_H
+
+#include "config.h"
+
+#include "libmime/email_addr.h"
+#include "libutil/addr.h"
+#include "libcryptobox/cryptobox.h"
+#include "libmime/mime_headers.h"
+#include "libmime/content_type.h"
+#include "libserver/url.h"
+#include "libutil/ref.h"
+#include "libutil/str_util.h"
+
+#include <unicode/uchar.h>
+#include <unicode/utext.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct controller_session;
+struct rspamd_image;
+struct rspamd_archive;
+
+enum rspamd_mime_part_flags {
+	RSPAMD_MIME_PART_ATTACHEMENT = (1u << 1u),
+	RSPAMD_MIME_PART_BAD_CTE = (1u << 4u),
+	RSPAMD_MIME_PART_MISSING_CTE = (1u << 5u),
+	RSPAMD_MIME_PART_NO_TEXT_EXTRACTION = (1u << 6u),
+};
+
+enum rspamd_mime_part_type {
+	RSPAMD_MIME_PART_UNDEFINED = 0,
+	RSPAMD_MIME_PART_MULTIPART,
+	RSPAMD_MIME_PART_MESSAGE,
+	RSPAMD_MIME_PART_TEXT,
+	RSPAMD_MIME_PART_ARCHIVE,
+	RSPAMD_MIME_PART_IMAGE,
+	RSPAMD_MIME_PART_CUSTOM_LUA
+};
+
+#define IS_PART_MULTIPART(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MULTIPART))
+#define IS_PART_TEXT(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_TEXT))
+#define IS_PART_MESSAGE(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MESSAGE))
+
+enum rspamd_cte {
+	RSPAMD_CTE_UNKNOWN = 0,
+	RSPAMD_CTE_7BIT = 1,
+	RSPAMD_CTE_8BIT = 2,
+	RSPAMD_CTE_QP = 3,
+	RSPAMD_CTE_B64 = 4,
+	RSPAMD_CTE_UUE = 5,
+};
+
+struct rspamd_mime_text_part;
+
+struct rspamd_mime_multipart {
+	GPtrArray *children;
+	rspamd_ftok_t boundary;
+};
+
+enum rspamd_lua_specific_type {
+	RSPAMD_LUA_PART_TEXT,
+	RSPAMD_LUA_PART_STRING,
+	RSPAMD_LUA_PART_TABLE,
+	RSPAMD_LUA_PART_FUNCTION,
+	RSPAMD_LUA_PART_UNKNOWN,
+};
+
+struct rspamd_lua_specific_part {
+	gint cbref;
+	enum rspamd_lua_specific_type type;
+};
+
+struct rspamd_mime_part {
+	struct rspamd_content_type *ct;
+	struct rspamd_content_type *detected_ct;
+	gchar *detected_type;
+	gchar *detected_ext;
+	struct rspamd_content_disposition *cd;
+	rspamd_ftok_t raw_data;
+	rspamd_ftok_t parsed_data;
+	struct rspamd_mime_part *parent_part;
+
+	struct rspamd_mime_header *headers_order;
+	struct rspamd_mime_headers_table *raw_headers;
+	GPtrArray *urls;
+
+	gchar *raw_headers_str;
+	gsize raw_headers_len;
+
+	enum rspamd_cte cte;
+	guint flags;
+	enum rspamd_mime_part_type part_type;
+	guint part_number;
+
+	union {
+		struct rspamd_mime_multipart *mp;
+		struct rspamd_mime_text_part *txt;
+		struct rspamd_image *img;
+		struct rspamd_archive *arch;
+		struct rspamd_lua_specific_part lua_specific;
+	} specific;
+
+	guchar digest[rspamd_cryptobox_HASHBYTES];
+};
+
+#define RSPAMD_MIME_TEXT_PART_FLAG_UTF (1 << 0)
+#define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 1)
+#define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 2)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW (1 << 3)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 4)
+#define RSPAMD_MIME_TEXT_PART_ATTACHMENT (1 << 5)
+
+#define IS_TEXT_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
+#define IS_TEXT_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+#define IS_TEXT_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
+#define IS_TEXT_PART_ATTACHMENT(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_ATTACHMENT)
+
+
+struct rspamd_mime_text_part {
+	const gchar *language;
+	GPtrArray *languages;
+	const gchar *real_charset;
+
+	/* Raw data in native encoding */
+	rspamd_ftok_t raw;
+	rspamd_ftok_t parsed; /* decoded from mime encodings */
+
+	/* UTF8 content */
+	rspamd_ftok_t utf_content;        /* utf8 encoded processed content */
+	GByteArray *utf_raw_content;      /* utf raw content */
+	GByteArray *utf_stripped_content; /* utf content with no newlines */
+	GArray *normalized_hashes;        /* Array of guint64 */
+	GArray *utf_words;                /* Array of rspamd_stat_token_t */
+	UText utf_stripped_text;          /* Used by libicu to represent the utf8 content */
+
+	GPtrArray *newlines; /**< positions of newlines in text, relative to content*/
+	void *html;
+	GList *exceptions; /**< list of offsets of urls						*/
+	struct rspamd_mime_part *mime_part;
+
+	guint flags;
+	guint nlines;
+	guint spaces;
+	guint nwords;
+	guint non_ascii_chars;
+	guint ascii_chars;
+	guint double_spaces;
+	guint non_spaces;
+	guint empty_lines;
+	guint capital_letters;
+	guint numeric_characters;
+	guint unicode_scripts;
+};
+
+struct rspamd_message_raw_headers_content {
+	const gchar *begin;
+	gsize len;
+	const gchar *body_start;
+};
+
+struct rspamd_message {
+	const gchar *message_id;
+	gchar *subject;
+
+	GPtrArray *parts;      /**< list of parsed parts							*/
+	GPtrArray *text_parts; /**< list of text parts								*/
+	struct rspamd_message_raw_headers_content raw_headers_content;
+	void *received_headers; /**< list of received headers						*/
+	khash_t(rspamd_url_hash) * urls;
+	struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers						*/
+	struct rspamd_mime_header *headers_order;      /**< order of raw headers							*/
+	struct rspamd_task *task;
+	GPtrArray *rcpt_mime;
+	GPtrArray *from_mime;
+	guchar digest[16];
+	enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers 	*/
+	ref_entry_t ref;
+};
+
+#define MESSAGE_FIELD(task, field) ((task)->message->field)
+#define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? (task)->message->field : (__typeof__((task)->message->field)) NULL)
+
+/**
+ * Parse and pre-process mime message
+ * @param task worker_task object
+ * @return
+ */
+gboolean rspamd_message_parse(struct rspamd_task *task);
+
+/**
+ * Process content in task (e.g. HTML parsing)
+ * @param task
+ */
+void rspamd_message_process(struct rspamd_task *task);
+
+
+/**
+ * Converts string to cte
+ * @param str
+ * @return
+ */
+enum rspamd_cte rspamd_cte_from_string(const gchar *str);
+
+/**
+ * Converts cte to string
+ * @param ct
+ * @return
+ */
+const gchar *rspamd_cte_to_string(enum rspamd_cte ct);
+
+struct rspamd_message *rspamd_message_new(struct rspamd_task *task);
+
+struct rspamd_message *rspamd_message_ref(struct rspamd_message *msg);
+
+void rspamd_message_unref(struct rspamd_message *msg);
+
+/**
+ * Updates digest of the message if modified
+ * @param msg
+ * @param input
+ * @param len
+ */
+void rspamd_message_update_digest(struct rspamd_message *msg,
+								  const void *input, gsize len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
new file mode 100644
index 0000000..48a97a4
--- /dev/null
+++ b/src/libmime/mime_encoding.c
@@ -0,0 +1,864 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "libutil/regexp.h"
+#include "libutil/hash.h"
+#include "libserver/cfg_file.h"
+#include "libserver/task.h"
+#include "mime_encoding.h"
+#include "message.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include "contrib/google-ced/ced_c.h"
+#include <unicode/ucnv.h>
+#if U_ICU_VERSION_MAJOR_NUM >= 44
+#include <unicode/unorm2.h>
+#endif
+#include <math.h>
+
+#define UTF8_CHARSET "UTF-8"
+
+#define RSPAMD_CHARSET_FLAG_UTF (1 << 0)
+#define RSPAMD_CHARSET_FLAG_ASCII (1 << 1)
+
+#define RSPAMD_CHARSET_CACHE_SIZE 32
+#define RSPAMD_CHARSET_MAX_CONTENT 512
+
+#define SET_PART_RAW(part) ((part)->flags &= ~RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+#define SET_PART_UTF(part) ((part)->flags |= RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+
+static rspamd_regexp_t *utf_compatible_re = NULL;
+
+struct rspamd_charset_substitution {
+	const gchar *input;
+	const gchar *canon;
+	gint flags;
+};
+
+#include "mime_encoding_list.h"
+
+static GHashTable *sub_hash = NULL;
+
+static const UChar iso_8859_16_map[] = {
+	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+	0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+	0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+	0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
+	0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
+	0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
+	0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
+	0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
+	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+	0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
+	0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
+	0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
+	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+	0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
+	0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF};
+
+struct rspamd_charset_converter {
+	gchar *canon_name;
+	union {
+		UConverter *conv;
+		const UChar *cnv_table;
+	} d;
+	gboolean is_internal;
+};
+
+static GQuark
+rspamd_charset_conv_error_quark(void)
+{
+	return g_quark_from_static_string("charset conversion error");
+}
+
+static void
+rspamd_converter_dtor(gpointer p)
+{
+	struct rspamd_charset_converter *c = (struct rspamd_charset_converter *) p;
+
+	if (!c->is_internal) {
+		ucnv_close(c->d.conv);
+	}
+
+	g_free(c->canon_name);
+	g_free(c);
+}
+
+int32_t
+rspamd_converter_to_uchars(struct rspamd_charset_converter *cnv,
+						   UChar *dest,
+						   int32_t destCapacity,
+						   const char *src,
+						   int32_t srcLength,
+						   UErrorCode *pErrorCode)
+{
+	if (!cnv->is_internal) {
+		return ucnv_toUChars(cnv->d.conv,
+							 dest, destCapacity,
+							 src, srcLength,
+							 pErrorCode);
+	}
+	else {
+		UChar *d = dest, *dend = dest + destCapacity;
+		const guchar *p = src, *end = src + srcLength;
+
+		while (p < end && d < dend) {
+			if (*p <= 127) {
+				*d++ = (UChar) *p;
+			}
+			else {
+				*d++ = cnv->d.cnv_table[*p - 128];
+			}
+
+			p++;
+		}
+
+		return d - dest;
+	}
+}
+
+
+struct rspamd_charset_converter *
+rspamd_mime_get_converter_cached(const gchar *enc,
+								 rspamd_mempool_t *pool,
+								 gboolean is_canon,
+								 UErrorCode *err)
+{
+	const gchar *canon_name;
+	static rspamd_lru_hash_t *cache;
+	struct rspamd_charset_converter *conv;
+
+	if (cache == NULL) {
+		cache = rspamd_lru_hash_new_full(RSPAMD_CHARSET_CACHE_SIZE, NULL,
+										 rspamd_converter_dtor, rspamd_str_hash,
+										 rspamd_str_equal);
+	}
+
+	if (enc == NULL) {
+		return NULL;
+	}
+
+	if (!is_canon) {
+		rspamd_ftok_t cset_tok;
+
+		RSPAMD_FTOK_FROM_STR(&cset_tok, enc);
+		canon_name = rspamd_mime_detect_charset(&cset_tok, pool);
+	}
+	else {
+		canon_name = enc;
+	}
+
+	if (canon_name == NULL) {
+		return NULL;
+	}
+
+	conv = rspamd_lru_hash_lookup(cache, (gpointer) canon_name, 0);
+
+	if (conv == NULL) {
+		if (!(strcmp(canon_name, "ISO-8859-16") == 0 ||
+			  strcmp(canon_name, "latin10") == 0 ||
+			  strcmp(canon_name, "iso-ir-226") == 0)) {
+			conv = g_malloc0(sizeof(*conv));
+			conv->d.conv = ucnv_open(canon_name, err);
+			conv->canon_name = g_strdup(canon_name);
+
+			if (conv->d.conv != NULL) {
+				ucnv_setToUCallBack(conv->d.conv,
+									UCNV_TO_U_CALLBACK_SUBSTITUTE,
+									NULL,
+									NULL,
+									NULL,
+									err);
+				rspamd_lru_hash_insert(cache, conv->canon_name, conv, 0, 0);
+			}
+			else {
+				g_free(conv);
+				conv = NULL;
+			}
+		}
+		else {
+			/* ISO-8859-16 */
+			conv = g_malloc0(sizeof(*conv));
+			conv->is_internal = TRUE;
+			conv->d.cnv_table = iso_8859_16_map;
+			conv->canon_name = g_strdup(canon_name);
+
+			rspamd_lru_hash_insert(cache, conv->canon_name, conv, 0, 0);
+		}
+	}
+
+	return conv;
+}
+
+static void
+rspamd_mime_encoding_substitute_init(void)
+{
+	guint i;
+
+	sub_hash = g_hash_table_new(rspamd_strcase_hash, rspamd_strcase_equal);
+
+	for (i = 0; i < G_N_ELEMENTS(sub); i++) {
+		g_hash_table_insert(sub_hash, (void *) sub[i].input, (void *) &sub[i]);
+	}
+}
+
+static void
+rspamd_charset_normalize(gchar *in)
+{
+	/*
+	 * This is a simple routine to validate input charset
+	 * we just check that charset starts with alphanumeric and ends
+	 * with alphanumeric
+	 */
+	gchar *begin, *end;
+	gboolean changed = FALSE;
+
+	begin = in;
+
+	while (*begin && !g_ascii_isalnum(*begin)) {
+		begin++;
+		changed = TRUE;
+	}
+
+	end = begin + strlen(begin) - 1;
+
+	while (end > begin && !g_ascii_isalnum(*end)) {
+		end--;
+		changed = TRUE;
+	}
+
+	if (changed) {
+		memmove(in, begin, end - begin + 2);
+		*(end + 1) = '\0';
+	}
+}
+
+const gchar *
+rspamd_mime_detect_charset(const rspamd_ftok_t *in, rspamd_mempool_t *pool)
+{
+	gchar *ret = NULL, *h, *t;
+	struct rspamd_charset_substitution *s;
+	const gchar *cset;
+	rspamd_ftok_t utf8_tok;
+	UErrorCode uc_err = U_ZERO_ERROR;
+
+	if (sub_hash == NULL) {
+		rspamd_mime_encoding_substitute_init();
+	}
+
+	/* Fast path */
+	RSPAMD_FTOK_ASSIGN(&utf8_tok, "utf-8");
+
+	if (rspamd_ftok_casecmp(in, &utf8_tok) == 0) {
+		return UTF8_CHARSET;
+	}
+
+	RSPAMD_FTOK_ASSIGN(&utf8_tok, "utf8");
+
+	if (rspamd_ftok_casecmp(in, &utf8_tok) == 0) {
+		return UTF8_CHARSET;
+	}
+
+	ret = rspamd_mempool_ftokdup(pool, in);
+	rspamd_charset_normalize(ret);
+
+	if ((in->len > 3 && rspamd_lc_cmp(in->begin, "cp-", 3) == 0) ||
+		(in->len > 4 && (rspamd_lc_cmp(in->begin, "ibm-", 4) == 0))) {
+		/* Try to remove '-' chars from encoding: e.g. CP-100 to CP100 */
+		h = ret;
+		t = ret;
+
+		while (*h != '\0') {
+			if (*h != '-') {
+				*t++ = *h;
+			}
+
+			h++;
+		}
+
+		*t = '\0';
+	}
+
+	s = g_hash_table_lookup(sub_hash, ret);
+
+	if (s) {
+		ret = (char *) s->canon;
+	}
+
+	/* Try different aliases */
+	cset = ucnv_getCanonicalName(ret, "MIME", &uc_err);
+
+	if (cset == NULL) {
+		uc_err = U_ZERO_ERROR;
+		cset = ucnv_getCanonicalName(ret, "IANA", &uc_err);
+	}
+
+	if (cset == NULL) {
+		uc_err = U_ZERO_ERROR;
+		cset = ucnv_getCanonicalName(ret, "", &uc_err);
+	}
+
+	if (cset == NULL) {
+		uc_err = U_ZERO_ERROR;
+		cset = ucnv_getAlias(ret, 0, &uc_err);
+	}
+
+	return cset;
+}
+
+gchar *
+rspamd_mime_text_to_utf8(rspamd_mempool_t *pool,
+						 gchar *input, gsize len, const gchar *in_enc,
+						 gsize *olen, GError **err)
+{
+	gchar *d;
+	gint32 r, clen, dlen;
+	UChar *tmp_buf;
+
+	UErrorCode uc_err = U_ZERO_ERROR;
+	UConverter *utf8_converter;
+	struct rspamd_charset_converter *conv;
+	rspamd_ftok_t cset_tok;
+
+	/* Check if already utf8 */
+	RSPAMD_FTOK_FROM_STR(&cset_tok, in_enc);
+
+	if (rspamd_mime_charset_utf_check(&cset_tok, input, len,
+									  FALSE)) {
+		d = rspamd_mempool_alloc(pool, len);
+		memcpy(d, input, len);
+		if (olen) {
+			*olen = len;
+		}
+
+		return d;
+	}
+
+	conv = rspamd_mime_get_converter_cached(in_enc, pool, TRUE, &uc_err);
+	utf8_converter = rspamd_get_utf8_converter();
+
+	if (conv == NULL) {
+		g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL,
+					"cannot open converter for %s: %s",
+					in_enc, u_errorName(uc_err));
+
+		return NULL;
+	}
+
+	tmp_buf = g_new(UChar, len + 1);
+	uc_err = U_ZERO_ERROR;
+	r = rspamd_converter_to_uchars(conv, tmp_buf, len + 1, input, len, &uc_err);
+
+	if (!U_SUCCESS(uc_err)) {
+		g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL,
+					"cannot convert data to unicode from %s: %s",
+					in_enc, u_errorName(uc_err));
+		g_free(tmp_buf);
+
+		return NULL;
+	}
+
+	/* Now, convert to utf8 */
+	clen = ucnv_getMaxCharSize(utf8_converter);
+	dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen);
+	d = rspamd_mempool_alloc(pool, dlen);
+	r = ucnv_fromUChars(utf8_converter, d, dlen, tmp_buf, r, &uc_err);
+
+	if (!U_SUCCESS(uc_err)) {
+		g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL,
+					"cannot convert data from unicode from %s: %s",
+					in_enc, u_errorName(uc_err));
+		g_free(tmp_buf);
+
+		return NULL;
+	}
+
+	msg_debug_pool("converted from %s to UTF-8 inlen: %z, outlen: %d",
+				   in_enc, len, r);
+	g_free(tmp_buf);
+
+	if (olen) {
+		*olen = r;
+	}
+
+	return d;
+}
+
+static gboolean
+rspamd_mime_text_part_utf8_convert(struct rspamd_task *task,
+								   struct rspamd_mime_text_part *text_part,
+								   GByteArray *input,
+								   const gchar *charset,
+								   GError **err)
+{
+	gchar *d;
+	gint32 r, clen, dlen, uc_len;
+	UChar *tmp_buf;
+	UErrorCode uc_err = U_ZERO_ERROR;
+	UConverter *utf8_converter;
+	struct rspamd_charset_converter *conv;
+
+	conv = rspamd_mime_get_converter_cached(charset, task->task_pool,
+											TRUE, &uc_err);
+	utf8_converter = rspamd_get_utf8_converter();
+
+	if (conv == NULL) {
+		g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL,
+					"cannot open converter for %s: %s",
+					charset, u_errorName(uc_err));
+
+		return FALSE;
+	}
+
+	tmp_buf = g_new(UChar, input->len + 1);
+	uc_err = U_ZERO_ERROR;
+	uc_len = rspamd_converter_to_uchars(conv,
+										tmp_buf,
+										input->len + 1,
+										input->data,
+										input->len,
+										&uc_err);
+
+	if (!U_SUCCESS(uc_err)) {
+		g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL,
+					"cannot convert data to unicode from %s: %s",
+					charset, u_errorName(uc_err));
+		g_free(tmp_buf);
+
+		return FALSE;
+	}
+
+	/* Now, convert to utf8 */
+	clen = ucnv_getMaxCharSize(utf8_converter);
+	dlen = UCNV_GET_MAX_BYTES_FOR_STRING(uc_len, clen);
+	d = rspamd_mempool_alloc(task->task_pool, dlen);
+	r = ucnv_fromUChars(utf8_converter, d, dlen,
+						tmp_buf, uc_len, &uc_err);
+
+	if (!U_SUCCESS(uc_err)) {
+		g_set_error(err, rspamd_charset_conv_error_quark(), EINVAL,
+					"cannot convert data from unicode from %s: %s",
+					charset, u_errorName(uc_err));
+		g_free(tmp_buf);
+
+		return FALSE;
+	}
+
+	if (text_part->mime_part && text_part->mime_part->ct) {
+		msg_info_task("converted text part from %s ('%T' announced) to UTF-8 inlen: %d, outlen: %d (%d UTF16 chars)",
+					  charset, &text_part->mime_part->ct->charset, input->len, r, uc_len);
+	}
+	else {
+		msg_info_task("converted text part from %s (no charset announced) to UTF-8 inlen: %d, "
+					  "outlen: %d (%d UTF16 chars)",
+					  charset, input->len, r, uc_len);
+	}
+
+	text_part->utf_raw_content = rspamd_mempool_alloc(task->task_pool,
+													  sizeof(*text_part->utf_raw_content) + sizeof(gpointer) * 4);
+	text_part->utf_raw_content->data = d;
+	text_part->utf_raw_content->len = r;
+	g_free(tmp_buf);
+
+	return TRUE;
+}
+
+gboolean
+rspamd_mime_to_utf8_byte_array(GByteArray *in,
+							   GByteArray *out,
+							   rspamd_mempool_t *pool,
+							   const gchar *enc)
+{
+	gint32 r, clen, dlen;
+	UChar *tmp_buf;
+	UErrorCode uc_err = U_ZERO_ERROR;
+	UConverter *utf8_converter;
+	struct rspamd_charset_converter *conv;
+	rspamd_ftok_t charset_tok;
+
+	if (in == NULL || in->len == 0) {
+		return FALSE;
+	}
+
+	if (enc == NULL) {
+		/* Assume utf ? */
+		if (rspamd_fast_utf8_validate(in->data, in->len) == 0) {
+			g_byte_array_set_size(out, in->len);
+			memcpy(out->data, in->data, out->len);
+
+			return TRUE;
+		}
+		else {
+			/* Bad stuff, keep out */
+			return FALSE;
+		}
+	}
+
+	RSPAMD_FTOK_FROM_STR(&charset_tok, enc);
+
+	if (rspamd_mime_charset_utf_check(&charset_tok, (gchar *) in->data, in->len,
+									  FALSE)) {
+		g_byte_array_set_size(out, in->len);
+		memcpy(out->data, in->data, out->len);
+
+		return TRUE;
+	}
+
+	utf8_converter = rspamd_get_utf8_converter();
+	conv = rspamd_mime_get_converter_cached(enc, pool, TRUE, &uc_err);
+
+	if (conv == NULL) {
+		return FALSE;
+	}
+
+	tmp_buf = g_new(UChar, in->len + 1);
+	uc_err = U_ZERO_ERROR;
+	r = rspamd_converter_to_uchars(conv,
+								   tmp_buf, in->len + 1,
+								   in->data, in->len, &uc_err);
+
+	if (!U_SUCCESS(uc_err)) {
+		g_free(tmp_buf);
+
+		return FALSE;
+	}
+
+	/* Now, convert to utf8 */
+	clen = ucnv_getMaxCharSize(utf8_converter);
+	dlen = UCNV_GET_MAX_BYTES_FOR_STRING(r, clen);
+	g_byte_array_set_size(out, dlen);
+	r = ucnv_fromUChars(utf8_converter, out->data, dlen, tmp_buf, r, &uc_err);
+
+	if (!U_SUCCESS(uc_err)) {
+		g_free(tmp_buf);
+
+		return FALSE;
+	}
+
+	g_free(tmp_buf);
+	out->len = r;
+
+	return TRUE;
+}
+
+void rspamd_mime_charset_utf_enforce(gchar *in, gsize len)
+{
+	gchar *p, *end;
+	goffset err_offset;
+	UChar32 uc = 0;
+
+	/* Now we validate input and replace bad characters with '?' symbol */
+	p = in;
+	end = in + len;
+
+	while (p < end && len > 0 && (err_offset = rspamd_fast_utf8_validate(p, len)) > 0) {
+		err_offset--; /* As it returns it 1 indexed */
+		gint32 cur_offset = err_offset;
+
+		while (cur_offset < len) {
+			gint32 tmp = cur_offset;
+
+			U8_NEXT(p, cur_offset, len, uc);
+
+			if (uc > 0) {
+				/* Fill string between err_offset and tmp with `?` character */
+				memset(p + err_offset, '?', tmp - err_offset);
+				break;
+			}
+		}
+
+		if (uc < 0) {
+			/* Fill till the end */
+			memset(p + err_offset, '?', len - err_offset);
+			break;
+		}
+
+		p += cur_offset;
+		len = end - p;
+	}
+}
+
+const char *
+rspamd_mime_charset_find_by_content(const gchar *in, gsize inlen,
+									bool check_utf8)
+{
+	int nconsumed;
+	bool is_reliable;
+	const gchar *ced_name;
+
+	if (check_utf8) {
+		if (rspamd_fast_utf8_validate(in, inlen) == 0) {
+			return UTF8_CHARSET;
+		}
+	}
+
+
+	ced_name = ced_encoding_detect(in, inlen, NULL, NULL,
+								   NULL, 0, CED_EMAIL_CORPUS,
+								   false, &nconsumed, &is_reliable);
+
+	if (ced_name) {
+
+		return ced_name;
+	}
+
+	return NULL;
+}
+
+static const char *
+rspamd_mime_charset_find_by_content_maybe_split(const gchar *in, gsize inlen)
+{
+	if (inlen < RSPAMD_CHARSET_MAX_CONTENT * 3) {
+		return rspamd_mime_charset_find_by_content(in, inlen, false);
+	}
+	else {
+		const gchar *c1, *c2, *c3;
+
+		c1 = rspamd_mime_charset_find_by_content(in, RSPAMD_CHARSET_MAX_CONTENT, false);
+		c2 = rspamd_mime_charset_find_by_content(in + inlen / 2,
+												 RSPAMD_CHARSET_MAX_CONTENT, false);
+		c3 = rspamd_mime_charset_find_by_content(in + inlen - RSPAMD_CHARSET_MAX_CONTENT,
+												 RSPAMD_CHARSET_MAX_CONTENT, false);
+
+		/* 7bit stuff */
+		if (c1 && strcmp(c1, "US-ASCII") == 0) {
+			c1 = NULL; /* Invalid - we have 8 bit there */
+		}
+		if (c2 && strcmp(c2, "US-ASCII") == 0) {
+			c2 = NULL; /* Invalid - we have 8 bit there */
+		}
+		if (c3 && strcmp(c3, "US-ASCII") == 0) {
+			c3 = NULL; /* Invalid - we have 8 bit there */
+		}
+
+		if (!c1) {
+			c1 = c2 ? c2 : c3;
+		}
+		if (!c2) {
+			c2 = c3 ? c3 : c1;
+		}
+		if (!c3) {
+			c3 = c1 ? c2 : c1;
+		}
+
+		if (c1 && c2 && c3) {
+			/* Quorum */
+			if (c1 == c2) {
+				return c1;
+			}
+			else if (c2 == c3) {
+				return c2;
+			}
+			else if (c1 == c3) {
+				return c3;
+			}
+
+			/* All charsets are distinct. Use the one from the top */
+			return c1;
+		}
+
+		return NULL;
+	}
+}
+
+gboolean
+rspamd_mime_charset_utf_check(rspamd_ftok_t *charset,
+							  gchar *in, gsize len, gboolean content_check)
+{
+	const gchar *real_charset;
+
+	if (utf_compatible_re == NULL) {
+		utf_compatible_re = rspamd_regexp_new(
+			"^(?:utf-?8.*)|(?:us-ascii)|(?:ascii)|(?:ansi.*)|(?:CSASCII)$",
+			"i", NULL);
+	}
+
+	if (charset->len == 0 ||
+		rspamd_regexp_match(utf_compatible_re,
+							charset->begin, charset->len, TRUE)) {
+		/*
+		 * In case of UTF8 charset we still can check the content to find
+		 * corner cases
+		 */
+		if (content_check) {
+			if (rspamd_fast_utf8_validate(in, len) != 0) {
+				real_charset = rspamd_mime_charset_find_by_content_maybe_split(in, len);
+
+				if (real_charset) {
+
+					if (rspamd_regexp_match(utf_compatible_re,
+											real_charset, strlen(real_charset), TRUE)) {
+						RSPAMD_FTOK_ASSIGN(charset, UTF8_CHARSET);
+
+						return TRUE;
+					}
+					else {
+						charset->begin = real_charset;
+						charset->len = strlen(real_charset);
+
+						return FALSE;
+					}
+				}
+
+				rspamd_mime_charset_utf_enforce(in, len);
+			}
+		}
+
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
+										 struct rspamd_mime_text_part *text_part)
+{
+	GError *err = NULL;
+	const gchar *charset = NULL;
+	gboolean checked = FALSE, need_charset_heuristic = TRUE, valid_utf8 = FALSE;
+	GByteArray *part_content;
+	rspamd_ftok_t charset_tok;
+	struct rspamd_mime_part *part = text_part->mime_part;
+
+	if (rspamd_str_has_8bit(text_part->raw.begin, text_part->raw.len)) {
+		text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW;
+	}
+
+	/* Allocate copy storage */
+	part_content = g_byte_array_sized_new(text_part->parsed.len);
+	memcpy(part_content->data, text_part->parsed.begin, text_part->parsed.len);
+	part_content->len = text_part->parsed.len;
+	rspamd_mempool_notify_alloc(task->task_pool,
+								part_content->len);
+	rspamd_mempool_add_destructor(task->task_pool,
+								  (rspamd_mempool_destruct_t) g_byte_array_unref, part_content);
+
+	if (rspamd_str_has_8bit(text_part->parsed.begin, text_part->parsed.len)) {
+		if (rspamd_fast_utf8_validate(text_part->parsed.begin, text_part->parsed.len) == 0) {
+			/* Valid UTF, likely all good */
+			need_charset_heuristic = FALSE;
+			valid_utf8 = TRUE;
+			checked = TRUE;
+		}
+
+		text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED;
+	}
+	else {
+		/* All 7bit characters, assume it valid utf */
+		need_charset_heuristic = FALSE;
+		valid_utf8 = TRUE;
+		checked = TRUE; /* Already valid utf, no need in further checks */
+	}
+
+	if (part->ct->charset.len == 0) {
+		if (need_charset_heuristic) {
+			charset = rspamd_mime_charset_find_by_content_maybe_split(text_part->parsed.begin,
+																	  text_part->parsed.len);
+
+			if (charset != NULL) {
+				msg_info_task("detected charset %s", charset);
+			}
+
+			checked = TRUE;
+			text_part->real_charset = charset;
+		}
+		else if (valid_utf8) {
+			SET_PART_UTF(text_part);
+			text_part->utf_raw_content = part_content;
+			text_part->real_charset = UTF8_CHARSET;
+
+			return;
+		}
+	}
+	else {
+		charset = rspamd_mime_detect_charset(&part->ct->charset,
+											 task->task_pool);
+
+		if (charset == NULL) {
+			/* We don't know the real charset but can try heuristic */
+			if (need_charset_heuristic) {
+				charset = rspamd_mime_charset_find_by_content_maybe_split(part_content->data,
+																		  part_content->len);
+				msg_info_task("detected charset: %s", charset);
+				checked = TRUE;
+				text_part->real_charset = charset;
+			}
+			else if (valid_utf8) {
+				/* We already know that the input is valid utf, so skip heuristic */
+				text_part->real_charset = UTF8_CHARSET;
+			}
+		}
+		else {
+			text_part->real_charset = charset;
+
+			if (strcmp(charset, UTF8_CHARSET) != 0) {
+				/*
+				 * We have detected some charset, but we don't know which one,
+				 * so we need to reset valid utf8 flag and enforce it later
+				 */
+				valid_utf8 = FALSE;
+			}
+		}
+	}
+
+	if (text_part->real_charset == NULL) {
+		msg_info_task("<%s>: has invalid charset; original charset: %T; Content-Type: \"%s\"",
+					  MESSAGE_FIELD_CHECK(task, message_id), &part->ct->charset,
+					  part->ct->cpy);
+		SET_PART_RAW(text_part);
+		text_part->utf_raw_content = part_content;
+
+		return;
+	}
+
+	RSPAMD_FTOK_FROM_STR(&charset_tok, charset);
+
+	if (!valid_utf8) {
+		if (rspamd_mime_charset_utf_check(&charset_tok, part_content->data,
+										  part_content->len, !checked)) {
+			SET_PART_UTF(text_part);
+			text_part->utf_raw_content = part_content;
+			text_part->real_charset = UTF8_CHARSET;
+
+			return;
+		}
+		else {
+			charset = charset_tok.begin;
+
+			if (!rspamd_mime_text_part_utf8_convert(task, text_part,
+													part_content, charset, &err)) {
+				msg_warn_task("<%s>: cannot convert from %s to utf8: %s",
+							  MESSAGE_FIELD(task, message_id),
+							  charset,
+							  err ? err->message : "unknown problem");
+				SET_PART_RAW(text_part);
+				g_error_free(err);
+
+				text_part->utf_raw_content = part_content;
+				return;
+			}
+
+			SET_PART_UTF(text_part);
+			text_part->real_charset = charset;
+		}
+	}
+	else {
+		SET_PART_UTF(text_part);
+		text_part->utf_raw_content = part_content;
+	}
+}
diff --git a/src/libmime/mime_encoding.h b/src/libmime/mime_encoding.h
new file mode 100644
index 0000000..ff81292
--- /dev/null
+++ b/src/libmime/mime_encoding.h
@@ -0,0 +1,148 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_ENCODING_H_
+#define SRC_LIBMIME_MIME_ENCODING_H_
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+#include <unicode/uchar.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_mime_part;
+struct rspamd_mime_text_part;
+struct rspamd_charset_converter;
+
+/**
+ * Convert charset alias to a canonic charset name
+ * @param pool pool to store temporary data
+ * @param in
+ * @return
+ */
+const gchar *rspamd_mime_detect_charset(const rspamd_ftok_t *in,
+										rspamd_mempool_t *pool);
+
+/**
+ * Convert text chunk to utf-8. Input encoding is substituted using
+ * `rspamd_mime_detect_charset`.
+ * If input encoding is already utf, this function returns input pointer.
+ * Memory is allocated from pool if a conversion is needed
+ * @param pool
+ * @param input
+ * @param len
+ * @param in_enc canon charset
+ * @param olen
+ * @param err
+ * @return
+ */
+gchar *rspamd_mime_text_to_utf8(rspamd_mempool_t *pool,
+								gchar *input, gsize len, const gchar *in_enc,
+								gsize *olen, GError **err);
+
+/**
+ * Converts data from `in` to `out`,
+ * returns `FALSE` if `enc` is not a valid iconv charset
+ *
+ * This function, in fact, copies `in` from `out` replacing out content in
+ * total.
+ * @param in
+ * @param out
+ * @param enc validated canonical charset name. If NULL, then utf8 check is done only
+ * @return
+ */
+gboolean rspamd_mime_to_utf8_byte_array(GByteArray *in,
+										GByteArray *out,
+										rspamd_mempool_t *pool,
+										const gchar *enc);
+
+/**
+ * Maybe convert part to utf-8
+ * @param task
+ * @param text_part
+ * @return
+ */
+void rspamd_mime_text_part_maybe_convert(struct rspamd_task *task,
+										 struct rspamd_mime_text_part *text_part);
+
+/**
+ * Checks utf8 charset and normalize/validate utf8 string
+ * @param charset
+ * @param in
+ * @param len
+ * @return
+ */
+gboolean rspamd_mime_charset_utf_check(rspamd_ftok_t *charset,
+									   gchar *in, gsize len,
+									   gboolean content_check);
+
+/**
+ * Ensure that all characters in string are valid utf8 chars or replace them
+ * with '?'
+ * @param in
+ * @param len
+ */
+void rspamd_mime_charset_utf_enforce(gchar *in, gsize len);
+
+/**
+  * Gets cached converter
+  * @param enc input encoding
+  * @param pool pool to use for temporary normalisation
+  * @param is_canon TRUE if normalisation is needed
+  * @param err output error
+  * @return converter
+  */
+struct rspamd_charset_converter *rspamd_mime_get_converter_cached(
+	const gchar *enc,
+	rspamd_mempool_t *pool,
+	gboolean is_canon,
+	UErrorCode *err);
+
+/**
+ * Performs charset->utf16 conversion
+ * @param cnv
+ * @param dest
+ * @param destCapacity
+ * @param src
+ * @param srcLength
+ * @param pErrorCode
+ * @return
+ */
+gint32
+rspamd_converter_to_uchars(struct rspamd_charset_converter *cnv,
+						   UChar *dest,
+						   gint32 destCapacity,
+						   const char *src,
+						   gint32 srcLength,
+						   UErrorCode *pErrorCode);
+
+/**
+ * Detect charset in text
+ * @param in
+ * @param inlen
+ * @return detected charset name or NULL
+ */
+const char *rspamd_mime_charset_find_by_content(const gchar *in, gsize inlen,
+												bool check_utf8);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_MIME_ENCODING_H_ */
diff --git a/src/libmime/mime_encoding_list.h b/src/libmime/mime_encoding_list.h
new file mode 100644
index 0000000..b5fc5e1
--- /dev/null
+++ b/src/libmime/mime_encoding_list.h
@@ -0,0 +1,1577 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_ENCODING_LIST_H_
+#define SRC_LIBMIME_MIME_ENCODING_LIST_H_
+
+static const struct rspamd_charset_substitution sub[] = {
+	{
+		.input = "iso-646-us",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "ansi_x3.4-1968",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "iso-ir-6",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "iso_646.irv:1991",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "ascii",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "iso646-us",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "us",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "ibm367",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "cp367",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "csascii",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "ascii7",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "default",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "646",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "iso_646.irv:1983",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "iso969-us",
+		.canon = "ansi_x3.4-1986",
+		.flags = RSPAMD_CHARSET_FLAG_ASCII,
+	},
+	{
+		.input = "tw-big5",
+		.canon = "big5",
+		.flags = 0,
+	},
+	{
+		.input = "csbig5",
+		.canon = "big5",
+		.flags = 0,
+	},
+	{
+		.input = "hkscs-big5",
+		.canon = "big5-hkscs",
+		.flags = 0,
+	},
+	{
+		.input = "big5hk",
+		.canon = "big5-hkscs",
+		.flags = 0,
+	},
+	{
+		.input = "big5-hkscs:unicode",
+		.canon = "big5-hkscs",
+		.flags = 0,
+	},
+	{
+		.input = "extended_unix_code_packed_format_for_japanese",
+		.canon = "euc-jp",
+		.flags = 0,
+	},
+	{
+		.input = "cseucpkdfmtjapanese",
+		.canon = "euc-jp",
+		.flags = 0,
+	},
+	{
+		.input = "x-eucjp",
+		.canon = "euc-jp",
+		.flags = 0,
+	},
+	{
+		.input = "x-euc-jp",
+		.canon = "euc-jp",
+		.flags = 0,
+	},
+	{
+		.input = "unicode-1-1-utf-8",
+		.canon = "utf-8",
+		.flags = RSPAMD_CHARSET_FLAG_UTF,
+	},
+	{
+		.input = "cseuckr",
+		.canon = "euc-kr",
+		.flags = 0,
+	},
+	{
+		.input = "5601",
+		.canon = "euc-kr",
+		.flags = 0,
+	},
+	{
+		.input = "ksc-5601",
+		.canon = "euc-kr",
+		.flags = 0,
+	},
+	{
+		.input = "ksc-5601-1987",
+		.canon = "euc-kr",
+		.flags = 0,
+	},
+	{
+		.input = "ksc-5601_1987",
+		.canon = "euc-kr",
+		.flags = 0,
+	},
+	{
+		.input = "ksc5601",
+		.canon = "euc-kr",
+		.flags = 0,
+	},
+	{
+		.input = "cns11643",
+		.canon = "euc-tw",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-euctw",
+		.canon = "euc-tw",
+		.flags = 0,
+	},
+	{
+		.input = "gb-18030",
+		.canon = "gb18030",
+		.flags = 0,
+	},
+	{
+		.input = "ibm1392",
+		.canon = "gb18030",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-1392",
+		.canon = "gb18030",
+		.flags = 0,
+	},
+	{
+		.input = "gb18030-2000",
+		.canon = "gb18030",
+		.flags = 0,
+	},
+	{
+		.input = "gb-2312",
+		.canon = "gb2312",
+		.flags = 0,
+	},
+	{
+		.input = "csgb2312",
+		.canon = "gb2312",
+		.flags = 0,
+	},
+	{
+		.input = "euc_cn",
+		.canon = "gb2312",
+		.flags = 0,
+	},
+	{
+		.input = "euccn",
+		.canon = "gb2312",
+		.flags = 0,
+	},
+	{
+		.input = "euc-cn",
+		.canon = "gb2312",
+		.flags = 0,
+	},
+	{
+		.input = "gb-k",
+		.canon = "gbk",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-1:1987",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-100",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "latin1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "l1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "ibm819",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "cp819",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "819",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "cp819",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "8859-1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_1",
+		.canon = "iso-8859-1",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-2:1987",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-101",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "latin2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "l2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "912",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "cp912",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-912",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "ibm912",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "8859-2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_2",
+		.canon = "iso-8859-2",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-3:1988",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-109",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "latin3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "l3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "913",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "cp913",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-913",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "ibm913",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "8859-3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_3",
+		.canon = "iso-8859-3",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-4:1988",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-110",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "latin4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "l4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "914",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "cp914",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-914",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "ibm914",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "8859-4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_4",
+		.canon = "iso-8859-4",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-5:1988",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-144",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-5",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "cyrillic",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatincyrillic",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "915",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "cp915",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-915",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "ibm915",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-5",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "8859-5",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_5",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_5",
+		.canon = "iso-8859-5",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-6:1987",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-127",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-6",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "ecma-114",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "asmo-708",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "arabic",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatinarabic",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "1089",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "cp1089",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-1089",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "ibm1089",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-6",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "8859-6",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_6",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_6",
+		.canon = "iso-8859-6",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-7:1987",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-126",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-7",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "elot_928",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "ecma-118",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "greek",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "greek8",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatingreek",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "813",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "cp813",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-813",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "ibm813",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-7",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "8859-7",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_7",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_7",
+		.canon = "iso-8859-7",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-8:1988",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-138",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-8",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "hebrew",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatinhebrew",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "916",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "cp916",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-916",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "ibm916",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-8",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "8859-8",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_8",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_8",
+		.canon = "iso-8859-8",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-9:1989",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-148",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-9",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "latin5",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "l5",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin5",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "920",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "cp920",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-920",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "ibm920",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-9",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "8859-9",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_9",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_9",
+		.canon = "iso-8859-9",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-13",
+		.canon = "iso-8859-13",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-13",
+		.canon = "iso-8859-13",
+		.flags = 0,
+	},
+	{
+		.input = "8859-13",
+		.canon = "iso-8859-13",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859_13",
+		.canon = "iso-8859-13",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859_13",
+		.canon = "iso-8859-13",
+		.flags = 0,
+	},
+	{
+		.input = "iso-ir-199",
+		.canon = "iso-8859-14",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-14:1998",
+		.canon = "iso-8859-14",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-14",
+		.canon = "iso-8859-14",
+		.flags = 0,
+	},
+	{
+		.input = "latin8",
+		.canon = "iso-8859-14",
+		.flags = 0,
+	},
+	{
+		.input = "iso-celtic",
+		.canon = "iso-8859-14",
+		.flags = 0,
+	},
+	{
+		.input = "l8",
+		.canon = "iso-8859-14",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin9",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "csisolatin0",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "latin9",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "latin0",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "923",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "cp923",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-923",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "ibm923",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "iso8859-15",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-15",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "8859-15",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "iso_8859-15_fdis",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "l9",
+		.canon = "iso-8859-15",
+		.flags = 0,
+	},
+	{
+		.input = "koi-8-r",
+		.canon = "koi8-r",
+		.flags = 0,
+	},
+	{
+		.input = "cskoi8r",
+		.canon = "koi8-r",
+		.flags = 0,
+	},
+	{
+		.input = "koi8",
+		.canon = "koi8-r",
+		.flags = 0,
+	},
+	{
+		.input = "koi-8-u",
+		.canon = "koi8-u",
+		.flags = 0,
+	},
+	{
+		.input = "koi-8-t",
+		.canon = "koi8-t",
+		.flags = 0,
+	},
+	{
+		.input = "shiftjis",
+		.canon = "shift_jis",
+		.flags = 0,
+	},
+	{
+		.input = "ms_kanji",
+		.canon = "shift_jis",
+		.flags = 0,
+	},
+	{
+		.input = "csshiftjis",
+		.canon = "shift_jis",
+		.flags = 0,
+	},
+	{
+		.input = "cp-437",
+		.canon = "ibm437",
+		.flags = 0,
+	},
+	{
+		.input = "cp437",
+		.canon = "ibm437",
+		.flags = 0,
+	},
+	{
+		.input = "437",
+		.canon = "ibm437",
+		.flags = 0,
+	},
+	{
+		.input = "cspc8codepage437437",
+		.canon = "ibm437",
+		.flags = 0,
+	},
+	{
+		.input = "cspc8codepage437",
+		.canon = "ibm437",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-437",
+		.canon = "ibm437",
+		.flags = 0,
+	},
+	{
+		.input = "cp-850",
+		.canon = "ibm850",
+		.flags = 0,
+	},
+	{
+		.input = "cp850",
+		.canon = "ibm850",
+		.flags = 0,
+	},
+	{
+		.input = "850",
+		.canon = "ibm850",
+		.flags = 0,
+	},
+	{
+		.input = "cspc850multilingual850",
+		.canon = "ibm850",
+		.flags = 0,
+	},
+	{
+		.input = "cspc850multilingual",
+		.canon = "ibm850",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-850",
+		.canon = "ibm850",
+		.flags = 0,
+	},
+	{
+		.input = "cp-851",
+		.canon = "ibm851",
+		.flags = 0,
+	},
+	{
+		.input = "cp851",
+		.canon = "ibm851",
+		.flags = 0,
+	},
+	{
+		.input = "851",
+		.canon = "ibm851",
+		.flags = 0,
+	},
+	{
+		.input = "csibm851",
+		.canon = "ibm851",
+		.flags = 0,
+	},
+	{
+		.input = "cp-852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "cp852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "cspcp852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "cspcp852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-852",
+		.canon = "ibm852",
+		.flags = 0,
+	},
+	{
+		.input = "cp-855",
+		.canon = "ibm855",
+		.flags = 0,
+	},
+	{
+		.input = "cp855",
+		.canon = "ibm855",
+		.flags = 0,
+	},
+	{
+		.input = "855",
+		.canon = "ibm855",
+		.flags = 0,
+	},
+	{
+		.input = "csibm855",
+		.canon = "ibm855",
+		.flags = 0,
+	},
+	{
+		.input = "cspcp855",
+		.canon = "ibm855",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-855",
+		.canon = "ibm855",
+		.flags = 0,
+	},
+	{
+		.input = "cp-857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "cp857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "csibm857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "csibm857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-857",
+		.canon = "ibm857",
+		.flags = 0,
+	},
+	{
+		.input = "cp-860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "cp860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "csibm860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "csibm860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-860",
+		.canon = "ibm860",
+		.flags = 0,
+	},
+	{
+		.input = "cp-861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "cp861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "cp-is",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "csibm861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "cp-is",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "csibm861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-861",
+		.canon = "ibm861",
+		.flags = 0,
+	},
+	{
+		.input = "cp-862",
+		.canon = "ibm862",
+		.flags = 0,
+	},
+	{
+		.input = "cp862",
+		.canon = "ibm862",
+		.flags = 0,
+	},
+	{
+		.input = "862",
+		.canon = "ibm862",
+		.flags = 0,
+	},
+	{
+		.input = "cspc862latinhebrew862",
+		.canon = "ibm862",
+		.flags = 0,
+	},
+	{
+		.input = "cspc862latinhebrew",
+		.canon = "ibm862",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-862",
+		.canon = "ibm862",
+		.flags = 0,
+	},
+	{
+		.input = "cp-863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "cp863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "csibm863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "csibm863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-863",
+		.canon = "ibm863",
+		.flags = 0,
+	},
+	{
+		.input = "cp-864",
+		.canon = "ibm864",
+		.flags = 0,
+	},
+	{
+		.input = "cp864",
+		.canon = "ibm864",
+		.flags = 0,
+	},
+	{
+		.input = "csibm864",
+		.canon = "ibm864",
+		.flags = 0,
+	},
+	{
+		.input = "csibm864",
+		.canon = "ibm864",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-864",
+		.canon = "ibm864",
+		.flags = 0,
+	},
+	{
+		.input = "cp-865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "cp865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "csibm865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "csibm865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-865",
+		.canon = "ibm865",
+		.flags = 0,
+	},
+	{
+		.input = "cp-866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "cp866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "csibm866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "csibm866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-866",
+		.canon = "ibm866",
+		.flags = 0,
+	},
+	{
+		.input = "cp-868",
+		.canon = "ibm868",
+		.flags = 0,
+	},
+	{
+		.input = "cp868",
+		.canon = "ibm868",
+		.flags = 0,
+	},
+	{
+		.input = "cp-ar",
+		.canon = "ibm868",
+		.flags = 0,
+	},
+	{
+		.input = "csibm868",
+		.canon = "ibm868",
+		.flags = 0,
+	},
+	{
+		.input = "ibm-868",
+		.canon = "ibm868",
+		.flags = 0,
+	},
+	{
+		.input = "cp-869",
+		.canon = "ibm869",
+		.flags = 0,
+	},
+	{
+		.input = "cp869",
+		.canon = "ibm869",
+		.flags = 0,
+	},
+	{
+		.input = "869",
+		.canon = "ibm869",
+		.flags = 0,
+	},
+	{
+		.input = "cp-gr",
+		.canon = "ibm869",
+		.flags = 0,
+	},
+	{
+		.input = "csibm869",
+		.canon = "ibm869",
+		.flags = 0,
+	},
+	{
+		.input = "cp-891",
+		.canon = "ibm891",
+		.flags = 0,
+	},
+	{
+		.input = "cp891",
+		.canon = "ibm891",
+		.flags = 0,
+	},
+	{
+		.input = "csibm891",
+		.canon = "ibm891",
+		.flags = 0,
+	},
+	{
+		.input = "cp-903",
+		.canon = "ibm903",
+		.flags = 0,
+	},
+	{
+		.input = "cp903",
+		.canon = "ibm903",
+		.flags = 0,
+	},
+	{
+		.input = "csibm903",
+		.canon = "ibm903",
+		.flags = 0,
+	},
+	{
+		.input = "cp-904",
+		.canon = "ibm904",
+		.flags = 0,
+	},
+	{
+		.input = "cp904",
+		.canon = "ibm904",
+		.flags = 0,
+	},
+	{
+		.input = "904",
+		.canon = "ibm904",
+		.flags = 0,
+	},
+	{
+		.input = "csibm904",
+		.canon = "ibm904",
+		.flags = 0,
+	},
+	{
+		.input = "cp-1251",
+		.canon = "cp1251",
+		.flags = 0,
+	},
+	{
+		.input = "windows-1251",
+		.canon = "cp1251",
+		.flags = 0,
+	},
+	{
+		.input = "cp-1255",
+		.canon = "cp1255",
+		.flags = 0,
+	},
+	{
+		.input = "windows-1255",
+		.canon = "cp1255",
+		.flags = 0,
+	},
+	{
+		.input = "tis620.2533",
+		.canon = "tis-620",
+		.flags = 0,
+	},
+};
+
+#endif /* SRC_LIBMIME_MIME_ENCODING_LIST_H_ */
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
new file mode 100644
index 0000000..e51539e
--- /dev/null
+++ b/src/libmime/mime_expressions.c
@@ -0,0 +1,2392 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <contrib/libucl/ucl.h>
+#include "config.h"
+#include "util.h"
+#include "cfg_file.h"
+#include "rspamd.h"
+#include "message.h"
+#include "mime_expressions.h"
+#include "libserver/html/html.h"
+#include "lua/lua_common.h"
+#include "utlist.h"
+
+gboolean rspamd_compare_encoding(struct rspamd_task *task,
+								 GArray *args,
+								 void *unused);
+gboolean rspamd_header_exists(struct rspamd_task *task,
+							  GArray *args,
+							  void *unused);
+gboolean rspamd_parts_distance(struct rspamd_task *task,
+							   GArray *args,
+							   void *unused);
+gboolean rspamd_recipients_distance(struct rspamd_task *task,
+									GArray *args,
+									void *unused);
+gboolean rspamd_has_only_html_part(struct rspamd_task *task,
+								   GArray *args,
+								   void *unused);
+gboolean rspamd_is_recipients_sorted(struct rspamd_task *task,
+									 GArray *args,
+									 void *unused);
+gboolean rspamd_compare_transfer_encoding(struct rspamd_task *task,
+										  GArray *args,
+										  void *unused);
+gboolean rspamd_is_html_balanced(struct rspamd_task *task,
+								 GArray *args,
+								 void *unused);
+gboolean rspamd_has_html_tag(struct rspamd_task *task,
+							 GArray *args,
+							 void *unused);
+gboolean rspamd_has_fake_html(struct rspamd_task *task,
+							  GArray *args,
+							  void *unused);
+static gboolean rspamd_raw_header_exists(struct rspamd_task *task,
+										 GArray *args,
+										 void *unused);
+static gboolean rspamd_check_smtp_data(struct rspamd_task *task,
+									   GArray *args,
+									   void *unused);
+static gboolean rspamd_content_type_is_type(struct rspamd_task *task,
+											GArray *args,
+											void *unused);
+static gboolean rspamd_content_type_is_subtype(struct rspamd_task *task,
+											   GArray *args,
+											   void *unused);
+static gboolean rspamd_content_type_has_param(struct rspamd_task *task,
+											  GArray *args,
+											  void *unused);
+static gboolean rspamd_content_type_compare_param(struct rspamd_task *task,
+												  GArray *args,
+												  void *unused);
+static gboolean rspamd_has_content_part(struct rspamd_task *task,
+										GArray *args,
+										void *unused);
+static gboolean rspamd_has_content_part_len(struct rspamd_task *task,
+											GArray *args,
+											void *unused);
+static gboolean rspamd_is_empty_body(struct rspamd_task *task,
+									 GArray *args,
+									 void *unused);
+static gboolean rspamd_has_flag_expr(struct rspamd_task *task,
+									 GArray *args,
+									 void *unused);
+static gboolean rspamd_has_symbol_expr(struct rspamd_task *task,
+									   GArray *args,
+									   void *unused);
+
+static rspamd_expression_atom_t *rspamd_mime_expr_parse(const gchar *line, gsize len,
+														rspamd_mempool_t *pool, gpointer ud, GError **err);
+static gdouble rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom);
+static gint rspamd_mime_expr_priority(rspamd_expression_atom_t *atom);
+static void rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom);
+
+/**
+ * Regexp structure
+ */
+struct rspamd_regexp_atom {
+	enum rspamd_re_type type; /**< regexp type										*/
+	gchar *regexp_text;       /**< regexp text representation							*/
+	rspamd_regexp_t *regexp;  /**< regexp structure									*/
+	union {
+		const gchar *header;   /**< header name for header regexps						*/
+		const gchar *selector; /**< selector name for lua selector regexp				*/
+	} extra;
+	gboolean is_test;     /**< true if this expression must be tested				*/
+	gboolean is_strong;   /**< true if headers search must be case sensitive		*/
+	gboolean is_multiple; /**< true if we need to match all inclusions of atom	*/
+};
+
+/**
+ * Rspamd expression function
+ */
+struct rspamd_function_atom {
+	gchar *name;  /**< name of function								*/
+	GArray *args; /**< its args										*/
+};
+
+enum rspamd_mime_atom_type {
+	MIME_ATOM_REGEXP = 0,
+	MIME_ATOM_INTERNAL_FUNCTION,
+	MIME_ATOM_LUA_FUNCTION,
+	MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
+};
+
+struct rspamd_mime_atom {
+	gchar *str;
+	union {
+		struct rspamd_regexp_atom *re;
+		struct rspamd_function_atom *func;
+		const gchar *lua_function;
+		gint lua_cbref;
+	} d;
+	enum rspamd_mime_atom_type type;
+};
+
+/*
+ * List of internal functions of rspamd
+ * Sorted by name to use bsearch
+ */
+static struct _fl {
+	const gchar *name;
+	rspamd_internal_func_t func;
+	void *user_data;
+} rspamd_functions_list[] = {
+	{"check_smtp_data", rspamd_check_smtp_data, NULL},
+	{"compare_encoding", rspamd_compare_encoding, NULL},
+	{"compare_parts_distance", rspamd_parts_distance, NULL},
+	{"compare_recipients_distance", rspamd_recipients_distance, NULL},
+	{"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
+	{"content_type_compare_param", rspamd_content_type_compare_param, NULL},
+	{"content_type_has_param", rspamd_content_type_has_param, NULL},
+	{"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
+	{"content_type_is_type", rspamd_content_type_is_type, NULL},
+	{"has_content_part", rspamd_has_content_part, NULL},
+	{"has_content_part_len", rspamd_has_content_part_len, NULL},
+	{"has_fake_html", rspamd_has_fake_html, NULL},
+	{"has_flag", rspamd_has_flag_expr, NULL},
+	{"has_html_tag", rspamd_has_html_tag, NULL},
+	{"has_only_html_part", rspamd_has_only_html_part, NULL},
+	{"has_symbol", rspamd_has_symbol_expr, NULL},
+	{"header_exists", rspamd_header_exists, NULL},
+	{"is_empty_body", rspamd_is_empty_body, NULL},
+	{"is_html_balanced", rspamd_is_html_balanced, NULL},
+	{"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
+	{"raw_header_exists", rspamd_raw_header_exists, NULL},
+};
+
+const struct rspamd_atom_subr mime_expr_subr = {
+	.parse = rspamd_mime_expr_parse,
+	.process = rspamd_mime_expr_process,
+	.priority = rspamd_mime_expr_priority,
+	.destroy = rspamd_mime_expr_destroy};
+
+static struct _fl *list_ptr = &rspamd_functions_list[0];
+static guint32 functions_number = sizeof(rspamd_functions_list) /
+								  sizeof(struct _fl);
+static gboolean list_allocated = FALSE;
+
+/* Bsearch routine */
+static gint
+fl_cmp(const void *s1, const void *s2)
+{
+	struct _fl *fl1 = (struct _fl *) s1;
+	struct _fl *fl2 = (struct _fl *) s2;
+	return strcmp(fl1->name, fl2->name);
+}
+
+static GQuark
+rspamd_mime_expr_quark(void)
+{
+	return g_quark_from_static_string("mime-expressions");
+}
+
+#define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
+static gboolean
+rspamd_parse_long_option(const gchar *start, gsize len,
+						 struct rspamd_regexp_atom *a)
+{
+	gboolean ret = FALSE;
+
+	if (TYPE_CHECK(start, "body", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_BODY;
+	}
+	else if (TYPE_CHECK(start, "part", len) ||
+			 TYPE_CHECK(start, "mime", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_MIME;
+	}
+	else if (TYPE_CHECK(start, "raw_part", len) ||
+			 TYPE_CHECK(start, "raw_mime", len) ||
+			 TYPE_CHECK(start, "mime_raw", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_RAWMIME;
+	}
+	else if (TYPE_CHECK(start, "header", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_HEADER;
+	}
+	else if (TYPE_CHECK(start, "mime_header", len) ||
+			 TYPE_CHECK(start, "header_mime", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_MIMEHEADER;
+	}
+	else if (TYPE_CHECK(start, "raw_header", len) ||
+			 TYPE_CHECK(start, "header_raw", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_RAWHEADER;
+	}
+	else if (TYPE_CHECK(start, "all_header", len) ||
+			 TYPE_CHECK(start, "header_all", len) ||
+			 TYPE_CHECK(start, "all_headers", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_ALLHEADER;
+	}
+	else if (TYPE_CHECK(start, "url", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_URL;
+	}
+	else if (TYPE_CHECK(start, "email", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_EMAIL;
+	}
+	else if (TYPE_CHECK(start, "sa_body", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_SABODY;
+	}
+	else if (TYPE_CHECK(start, "sa_raw_body", len) ||
+			 TYPE_CHECK(start, "sa_body_raw", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_SARAWBODY;
+	}
+	else if (TYPE_CHECK(start, "words", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_WORDS;
+	}
+	else if (TYPE_CHECK(start, "raw_words", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_RAWWORDS;
+	}
+	else if (TYPE_CHECK(start, "stem_words", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_STEMWORDS;
+	}
+	else if (TYPE_CHECK(start, "selector", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_SELECTOR;
+	}
+
+	return ret;
+}
+
+/*
+ * Rspamd regexp utility functions
+ */
+static struct rspamd_regexp_atom *
+rspamd_mime_expr_parse_regexp_atom(rspamd_mempool_t *pool, const gchar *line,
+								   struct rspamd_config *cfg)
+{
+	const gchar *begin, *end, *p, *src, *start, *brace;
+	gchar *dbegin, *dend, *extra = NULL;
+	struct rspamd_regexp_atom *result;
+	GError *err = NULL;
+	GString *re_flags;
+
+	if (line == NULL) {
+		msg_err_pool("cannot parse NULL line");
+		return NULL;
+	}
+
+	src = line;
+	result = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_regexp_atom));
+	/* Skip whitespaces */
+	while (g_ascii_isspace(*line)) {
+		line++;
+	}
+	if (*line == '\0') {
+		msg_warn_pool("got empty regexp");
+		return NULL;
+	}
+
+	result->type = RSPAMD_RE_MAX;
+
+	start = line;
+	/* First try to find header name */
+	begin = strchr(line, '/');
+	if (begin != NULL) {
+		p = begin;
+		end = NULL;
+		while (p != line) {
+			if (*p == '=') {
+				end = p;
+				break;
+			}
+			p--;
+		}
+
+		if (end) {
+			extra = rspamd_mempool_alloc(pool, end - line + 1);
+			rspamd_strlcpy(extra, line, end - line + 1);
+			line = end;
+		}
+	}
+	else {
+		extra = rspamd_mempool_strdup(pool, line);
+		result->type = RSPAMD_RE_MAX;
+		line = start;
+	}
+	/* Find begin of regexp */
+	while (*line && *line != '/') {
+		line++;
+	}
+	if (*line != '\0') {
+		begin = line + 1;
+	}
+	else if (extra == NULL) {
+		/* Assume that line without // is just a header name */
+		extra = rspamd_mempool_strdup(pool, line);
+		result->type = RSPAMD_RE_HEADER;
+		return result;
+	}
+	else {
+		/* We got header name earlier but have not found // expression, so it is invalid regexp */
+		msg_warn_pool(
+			"got no header name (eg. header=) but without corresponding regexp, %s",
+			src);
+		return NULL;
+	}
+	/* Find end */
+	end = begin;
+	while (*end && (*end != '/' || *(end - 1) == '\\')) {
+		end++;
+	}
+	if (end == begin || *end != '/') {
+		msg_warn_pool("no trailing / in regexp %s", src);
+		return NULL;
+	}
+	/* Parse flags */
+	p = end + 1;
+	re_flags = g_string_sized_new(32);
+
+	while (p != NULL) {
+		switch (*p) {
+		case 'i':
+		case 'm':
+		case 's':
+		case 'x':
+		case 'u':
+		case 'O':
+		case 'r':
+		case 'L':
+			/* Handled by rspamd_regexp_t */
+			g_string_append_c(re_flags, *p);
+			p++;
+			break;
+		case 'o':
+			p++;
+			break;
+		/* Type flags */
+		case 'H':
+			result->type = RSPAMD_RE_HEADER;
+			p++;
+			break;
+		case 'R':
+			result->type = RSPAMD_RE_ALLHEADER;
+			p++;
+			break;
+		case 'B':
+			result->type = RSPAMD_RE_MIMEHEADER;
+			p++;
+			break;
+		case 'C':
+			result->type = RSPAMD_RE_SABODY;
+			p++;
+			break;
+		case 'D':
+			result->type = RSPAMD_RE_SARAWBODY;
+			p++;
+			break;
+		case 'M':
+			result->type = RSPAMD_RE_BODY;
+			p++;
+			break;
+		case 'P':
+			result->type = RSPAMD_RE_MIME;
+			p++;
+			break;
+		case 'Q':
+			result->type = RSPAMD_RE_RAWMIME;
+			p++;
+			break;
+		case 'U':
+			result->type = RSPAMD_RE_URL;
+			p++;
+			break;
+		case 'X':
+			result->type = RSPAMD_RE_RAWHEADER;
+			p++;
+			break;
+		case '$':
+			result->type = RSPAMD_RE_SELECTOR;
+			p++;
+			break;
+		case '{':
+			/* Long definition */
+			if ((brace = strchr(p + 1, '}')) != NULL) {
+				if (!rspamd_parse_long_option(p + 1, brace - (p + 1), result)) {
+					msg_warn_pool("invalid long regexp type: %*s in '%s'",
+								  (int) (brace - (p + 1)), p + 1, src);
+					p = NULL;
+				}
+				else {
+					p = brace + 1;
+				}
+			}
+			else {
+				p = NULL;
+			}
+			break;
+		/* Other flags */
+		case 'T':
+			result->is_test = TRUE;
+			p++;
+			break;
+		case 'S':
+			result->is_strong = TRUE;
+			p++;
+			break;
+		case 'A':
+			result->is_multiple = TRUE;
+			p++;
+			break;
+		/* Stop flags parsing */
+		default:
+			p = NULL;
+			break;
+		}
+	}
+
+	if (result->type >= RSPAMD_RE_MAX) {
+		if (extra) {
+			/* Assume header regexp */
+			result->extra.header = extra;
+			result->type = RSPAMD_RE_HEADER;
+		}
+		else {
+			msg_err_pool("could not read regexp: %s, unknown type", src);
+			return NULL;
+		}
+	}
+
+	if ((result->type == RSPAMD_RE_HEADER ||
+		 result->type == RSPAMD_RE_RAWHEADER ||
+		 result->type == RSPAMD_RE_MIMEHEADER)) {
+		if (extra == NULL) {
+			msg_err_pool("header regexp: '%s' has no header part", src);
+			return NULL;
+		}
+		else {
+			result->extra.header = extra;
+		}
+	}
+
+	if (result->type == RSPAMD_RE_SELECTOR) {
+		if (extra == NULL) {
+			msg_err_pool("selector regexp: '%s' has no selector part", src);
+			return NULL;
+		}
+		else {
+			result->extra.selector = extra;
+		}
+	}
+
+
+	result->regexp_text = rspamd_mempool_strdup(pool, start);
+	dbegin = result->regexp_text + (begin - start);
+	dend = result->regexp_text + (end - start);
+	*dend = '\0';
+
+	result->regexp = rspamd_regexp_new(dbegin, re_flags->str,
+									   &err);
+
+	g_string_free(re_flags, TRUE);
+
+	if (result->regexp == NULL || err != NULL) {
+		msg_warn_pool("could not read regexp: %s while reading regexp %e",
+					  src, err);
+
+		if (err) {
+			g_error_free(err);
+		}
+
+		return NULL;
+	}
+
+	if (result->is_multiple) {
+		rspamd_regexp_set_maxhits(result->regexp, 0);
+	}
+	else {
+		rspamd_regexp_set_maxhits(result->regexp, 1);
+	}
+
+	rspamd_regexp_set_ud(result->regexp, result);
+
+	*dend = '/';
+
+	return result;
+}
+
+struct rspamd_function_atom *
+rspamd_mime_expr_parse_function_atom(rspamd_mempool_t *pool, const gchar *input)
+{
+	const gchar *obrace, *ebrace, *p, *c;
+	gchar t, *databuf;
+	guint len;
+	struct rspamd_function_atom *res;
+	struct expression_argument arg;
+	GError *err = NULL;
+	enum {
+		start_read_argument = 0,
+		in_string,
+		in_regexp,
+		got_backslash,
+		got_comma
+	} state,
+		prev_state = 0;
+
+	obrace = strchr(input, '(');
+	ebrace = strrchr(input, ')');
+
+	g_assert(obrace != NULL && ebrace != NULL);
+
+	res = rspamd_mempool_alloc0(pool, sizeof(*res));
+	res->name = rspamd_mempool_alloc(pool, obrace - input + 1);
+	rspamd_strlcpy(res->name, input, obrace - input + 1);
+	res->args = g_array_new(FALSE, FALSE, sizeof(struct expression_argument));
+
+	p = obrace + 1;
+	c = p;
+	state = start_read_argument;
+
+	/* Read arguments */
+	while (p <= ebrace) {
+		t = *p;
+		switch (state) {
+		case start_read_argument:
+			if (t == '/') {
+				state = in_regexp;
+				c = p;
+			}
+			else if (!g_ascii_isspace(t)) {
+				state = in_string;
+
+				if (t == '\'' || t == '\"') {
+					c = p + 1;
+				}
+				else {
+					c = p;
+				}
+			}
+			p++;
+			break;
+		case in_regexp:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_regexp;
+			}
+			else if (t == ',' || p == ebrace) {
+				len = p - c + 1;
+				databuf = rspamd_mempool_alloc(pool, len);
+				rspamd_strlcpy(databuf, c, len);
+				arg.type = EXPRESSION_ARGUMENT_REGEXP;
+				arg.data = rspamd_regexp_cache_create(NULL, databuf, NULL, &err);
+
+				if (arg.data == NULL) {
+					/* Fallback to string */
+					msg_warn("cannot parse slashed argument %s as regexp: %s",
+							 databuf, err->message);
+					g_error_free(err);
+					arg.type = EXPRESSION_ARGUMENT_NORMAL;
+					arg.data = databuf;
+				}
+
+				g_array_append_val(res->args, arg);
+				state = got_comma;
+			}
+			p++;
+			break;
+		case in_string:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_string;
+			}
+			else if (t == ',' || p == ebrace) {
+				if (*(p - 1) == '\'' || *(p - 1) == '\"') {
+					len = p - c;
+				}
+				else {
+					len = p - c + 1;
+				}
+
+				databuf = rspamd_mempool_alloc(pool, len);
+				rspamd_strlcpy(databuf, c, len);
+				arg.type = EXPRESSION_ARGUMENT_NORMAL;
+				arg.data = databuf;
+				g_array_append_val(res->args, arg);
+				state = got_comma;
+			}
+			p++;
+			break;
+		case got_backslash:
+			state = prev_state;
+			p++;
+			break;
+		case got_comma:
+			state = start_read_argument;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static rspamd_expression_atom_t *
+rspamd_mime_expr_parse(const gchar *line, gsize len,
+					   rspamd_mempool_t *pool, gpointer ud, GError **err)
+{
+	rspamd_expression_atom_t *a = NULL;
+	struct rspamd_mime_atom *mime_atom = NULL;
+	const gchar *p, *end, *c = NULL;
+	struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *) ud;
+	struct rspamd_config *cfg;
+	rspamd_regexp_t *own_re;
+	gchar t;
+	gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
+	enum {
+		in_header = 0,
+		got_slash,
+		in_regexp,
+		got_backslash,
+		got_second_slash,
+		in_flags,
+		in_flags_brace,
+		got_obrace,
+		in_function,
+		in_local_function,
+		got_ebrace,
+		end_atom,
+		bad_atom
+	} state = 0,
+	  prev_state = 0;
+
+	p = line;
+	end = p + len;
+	cfg = real_ud->cfg;
+
+	while (p < end) {
+		t = *p;
+
+		switch (state) {
+		case in_header:
+			if (t == '/') {
+				/* Regexp */
+				state = got_slash;
+			}
+			else if (t == '(') {
+				/* Function */
+				state = got_obrace;
+			}
+			else if (!g_ascii_isalnum(t) && t != '_' && t != '-' && t != '=') {
+				if (t == ':') {
+					if (p - line == 3 && memcmp(line, "lua", 3) == 0) {
+						type = MIME_ATOM_LOCAL_LUA_FUNCTION;
+						state = in_local_function;
+						c = p + 1;
+					}
+				}
+				else {
+					/* Likely lua function, identified by just a string */
+					type = MIME_ATOM_LUA_FUNCTION;
+					state = end_atom;
+					/* Do not increase p */
+					continue;
+				}
+			}
+			else if (g_ascii_isspace(t)) {
+				state = bad_atom;
+			}
+			p++;
+			break;
+		case got_slash:
+			state = in_regexp;
+			break;
+		case in_regexp:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_regexp;
+			}
+			else if (t == '/') {
+				state = got_second_slash;
+			}
+			p++;
+			break;
+		case got_second_slash:
+			state = in_flags;
+			break;
+		case in_flags:
+			if (t == '{') {
+				state = in_flags_brace;
+				p++;
+			}
+			else if (!g_ascii_isalpha(t) && t != '$') {
+				state = end_atom;
+			}
+			else {
+				p++;
+			}
+			break;
+		case in_flags_brace:
+			if (t == '}') {
+				state = in_flags;
+			}
+			p++;
+			break;
+		case got_backslash:
+			state = prev_state;
+			p++;
+			break;
+		case got_obrace:
+			state = in_function;
+			type = MIME_ATOM_INTERNAL_FUNCTION;
+			obraces++;
+			break;
+		case in_function:
+			if (t == '\\') {
+				state = got_backslash;
+				prev_state = in_function;
+			}
+			else if (t == '(') {
+				obraces++;
+			}
+			else if (t == ')') {
+				ebraces++;
+				if (ebraces == obraces) {
+					state = got_ebrace;
+				}
+			}
+			p++;
+			break;
+		case in_local_function:
+			if (!(g_ascii_isalnum(t) || t == '-' || t == '_')) {
+				g_assert(c != NULL);
+				state = end_atom;
+			}
+			else {
+				p++;
+			}
+			break;
+		case got_ebrace:
+			state = end_atom;
+			break;
+		case bad_atom:
+			g_set_error(err, rspamd_mime_expr_quark(), 100, "cannot parse"
+															" mime atom '%s' when reading symbol '%c' at offset %d, "
+															"near %.*s",
+						line, t, (gint) (p - line),
+						(gint) MIN(end - p, 10), p);
+			return NULL;
+		case end_atom:
+			goto set;
+		}
+	}
+set:
+
+	if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
+						  state != in_flags && state != end_atom)) {
+		g_set_error(err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
+														" mime atom");
+		return NULL;
+	}
+
+	mime_atom = rspamd_mempool_alloc(pool, sizeof(*mime_atom));
+	mime_atom->type = type;
+	mime_atom->str = rspamd_mempool_alloc(pool, p - line + 1);
+	rspamd_strlcpy(mime_atom->str, line, p - line + 1);
+
+	if (type == MIME_ATOM_REGEXP) {
+		mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom(pool,
+															 mime_atom->str, cfg);
+		if (mime_atom->d.re == NULL) {
+			g_set_error(err, rspamd_mime_expr_quark(), 200,
+						"cannot parse regexp '%s'",
+						mime_atom->str);
+			goto err;
+		}
+		else {
+			gint lua_cbref = -1;
+
+			/* Check regexp condition */
+			if (real_ud->conf_obj != NULL) {
+				const ucl_object_t *re_conditions = ucl_object_lookup(real_ud->conf_obj,
+																	  "re_conditions");
+
+				if (re_conditions != NULL) {
+					if (ucl_object_type(re_conditions) != UCL_OBJECT) {
+						g_set_error(err, rspamd_mime_expr_quark(), 320,
+									"re_conditions is not a table for '%s'",
+									mime_atom->str);
+						rspamd_regexp_unref(mime_atom->d.re->regexp);
+						goto err;
+					}
+
+					const ucl_object_t *function_obj = ucl_object_lookup(re_conditions,
+																		 mime_atom->str);
+
+					if (function_obj != NULL) {
+						if (ucl_object_type(function_obj) != UCL_USERDATA) {
+							g_set_error(err, rspamd_mime_expr_quark(), 320,
+										"condition for '%s' is invalid, must be function",
+										mime_atom->str);
+							rspamd_regexp_unref(mime_atom->d.re->regexp);
+							goto err;
+						}
+
+						struct ucl_lua_funcdata *fd = function_obj->value.ud;
+
+						lua_cbref = fd->idx;
+					}
+				}
+			}
+
+			if (lua_cbref != -1) {
+				msg_info_config("added condition for regexp %s", mime_atom->str);
+				/* Add SOM_LEFTMOST_FLAG implicitly */
+				rspamd_regexp_set_flags(mime_atom->d.re->regexp, rspamd_regexp_get_flags(mime_atom->d.re->regexp) |
+																	 RSPAMD_REGEXP_FLAG_LEFTMOST);
+			}
+
+			/* Register new item in the cache */
+			if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
+				mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
+				mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
+
+				if (mime_atom->d.re->extra.header != NULL) {
+					own_re = mime_atom->d.re->regexp;
+					mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
+																  mime_atom->d.re->regexp,
+																  mime_atom->d.re->type,
+																  mime_atom->d.re->extra.header,
+																  strlen(mime_atom->d.re->extra.header) + 1,
+																  lua_cbref);
+					/* Pass ownership to the cache */
+					rspamd_regexp_unref(own_re);
+				}
+				else {
+					/* We have header regexp, but no header name is detected */
+					g_set_error(err,
+								rspamd_mime_expr_quark(),
+								200,
+								"no header name in header regexp: '%s'",
+								mime_atom->str);
+					rspamd_regexp_unref(mime_atom->d.re->regexp);
+					goto err;
+				}
+			}
+			else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
+				if (mime_atom->d.re->extra.selector != NULL) {
+					own_re = mime_atom->d.re->regexp;
+					mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
+																  mime_atom->d.re->regexp,
+																  mime_atom->d.re->type,
+																  mime_atom->d.re->extra.selector,
+																  strlen(mime_atom->d.re->extra.selector) + 1,
+																  lua_cbref);
+					/* Pass ownership to the cache */
+					rspamd_regexp_unref(own_re);
+				}
+				else {
+					/* We have selector regexp, but no selector name is detected */
+					g_set_error(err,
+								rspamd_mime_expr_quark(),
+								200,
+								"no selector name in selector regexp: '%s'",
+								mime_atom->str);
+					rspamd_regexp_unref(mime_atom->d.re->regexp);
+					goto err;
+				}
+			}
+			else {
+				own_re = mime_atom->d.re->regexp;
+				mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
+															  mime_atom->d.re->regexp,
+															  mime_atom->d.re->type,
+															  NULL,
+															  0,
+															  lua_cbref);
+				/* Pass ownership to the cache */
+				rspamd_regexp_unref(own_re);
+			}
+		}
+	}
+	else if (type == MIME_ATOM_LUA_FUNCTION) {
+		mime_atom->d.lua_function = mime_atom->str;
+
+		lua_getglobal(cfg->lua_state, mime_atom->str);
+
+		if (lua_type(cfg->lua_state, -1) != LUA_TFUNCTION) {
+			g_set_error(err, rspamd_mime_expr_quark(), 200,
+						"no such lua function '%s'",
+						mime_atom->str);
+			lua_pop(cfg->lua_state, 1);
+
+			goto err;
+		}
+
+		lua_pop(cfg->lua_state, 1);
+	}
+	else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
+		/* p pointer is set to the start of Lua function name */
+
+		if (real_ud->conf_obj == NULL) {
+			g_set_error(err, rspamd_mime_expr_quark(), 300,
+						"no config object for '%s'",
+						mime_atom->str);
+			goto err;
+		}
+
+		const ucl_object_t *functions = ucl_object_lookup(real_ud->conf_obj,
+														  "functions");
+
+		if (functions == NULL) {
+			g_set_error(err, rspamd_mime_expr_quark(), 310,
+						"no functions defined for '%s'",
+						mime_atom->str);
+			goto err;
+		}
+
+		if (ucl_object_type(functions) != UCL_OBJECT) {
+			g_set_error(err, rspamd_mime_expr_quark(), 320,
+						"functions is not a table for '%s'",
+						mime_atom->str);
+			goto err;
+		}
+
+		const ucl_object_t *function_obj;
+
+		function_obj = ucl_object_lookup_len(functions, c,
+											 p - c);
+
+		if (function_obj == NULL) {
+			g_set_error(err, rspamd_mime_expr_quark(), 320,
+						"function %.*s is not found for '%s'",
+						(int) (p - c), c, mime_atom->str);
+			goto err;
+		}
+
+		if (ucl_object_type(function_obj) != UCL_USERDATA) {
+			g_set_error(err, rspamd_mime_expr_quark(), 320,
+						"function %.*s has invalid type for '%s'",
+						(int) (p - c), c, mime_atom->str);
+			goto err;
+		}
+
+		struct ucl_lua_funcdata *fd = function_obj->value.ud;
+
+		mime_atom->d.lua_cbref = fd->idx;
+	}
+	else {
+		mime_atom->d.func = rspamd_mime_expr_parse_function_atom(pool,
+																 mime_atom->str);
+		if (mime_atom->d.func == NULL) {
+			g_set_error(err, rspamd_mime_expr_quark(), 200,
+						"cannot parse function '%s'",
+						mime_atom->str);
+			goto err;
+		}
+	}
+
+	a = rspamd_mempool_alloc0(pool, sizeof(*a));
+	a->len = p - line;
+	a->priority = 0;
+	a->data = mime_atom;
+
+	return a;
+
+err:
+
+	return NULL;
+}
+
+static gint
+rspamd_mime_expr_process_regexp(struct rspamd_regexp_atom *re,
+								struct rspamd_task *task)
+{
+	gint ret;
+
+	if (re == NULL) {
+		msg_info_task("invalid regexp passed");
+		return 0;
+	}
+
+	if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
+		ret = rspamd_re_cache_process(task,
+									  re->regexp,
+									  re->type,
+									  re->extra.header,
+									  strlen(re->extra.header),
+									  re->is_strong);
+	}
+	else if (re->type == RSPAMD_RE_SELECTOR) {
+		ret = rspamd_re_cache_process(task,
+									  re->regexp,
+									  re->type,
+									  re->extra.selector,
+									  strlen(re->extra.selector),
+									  re->is_strong);
+	}
+	else {
+		ret = rspamd_re_cache_process(task,
+									  re->regexp,
+									  re->type,
+									  NULL,
+									  0,
+									  re->is_strong);
+	}
+
+	if (re->is_test) {
+		msg_info_task("test %s regexp '%s' returned %d",
+					  rspamd_re_cache_type_to_string(re->type),
+					  re->regexp_text, ret);
+	}
+
+	return ret;
+}
+
+
+static gint
+rspamd_mime_expr_priority(rspamd_expression_atom_t *atom)
+{
+	struct rspamd_mime_atom *mime_atom = atom->data;
+	gint ret = 0;
+
+	switch (mime_atom->type) {
+	case MIME_ATOM_INTERNAL_FUNCTION:
+		/* Prioritize internal functions slightly */
+		ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
+		break;
+	case MIME_ATOM_LUA_FUNCTION:
+	case MIME_ATOM_LOCAL_LUA_FUNCTION:
+		ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
+		break;
+	case MIME_ATOM_REGEXP:
+		switch (mime_atom->d.re->type) {
+		case RSPAMD_RE_HEADER:
+		case RSPAMD_RE_RAWHEADER:
+			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
+			break;
+		case RSPAMD_RE_URL:
+		case RSPAMD_RE_EMAIL:
+			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
+			break;
+		case RSPAMD_RE_SELECTOR:
+			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
+			break;
+		case RSPAMD_RE_MIME:
+		case RSPAMD_RE_RAWMIME:
+			ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
+			break;
+		case RSPAMD_RE_WORDS:
+		case RSPAMD_RE_RAWWORDS:
+		case RSPAMD_RE_STEMWORDS:
+		default:
+			/* For expensive regexps */
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void
+rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom)
+{
+	struct rspamd_mime_atom *mime_atom = atom->data;
+
+	if (mime_atom) {
+		if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
+			/* Need to cleanup arguments */
+			g_array_free(mime_atom->d.func->args, TRUE);
+		}
+	}
+}
+
+static gboolean
+rspamd_mime_expr_process_function(struct rspamd_function_atom *func,
+								  struct rspamd_task *task,
+								  lua_State *L)
+{
+	struct _fl *selected, key;
+
+	key.name = func->name;
+
+	selected = bsearch(&key,
+					   list_ptr,
+					   functions_number,
+					   sizeof(struct _fl),
+					   fl_cmp);
+	if (selected == NULL) {
+		/* Try to check lua function */
+		return FALSE;
+	}
+
+	return selected->func(task, func->args, selected->user_data);
+}
+
+static gdouble
+rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom)
+{
+	struct rspamd_task *task = (struct rspamd_task *) ud;
+	struct rspamd_mime_atom *mime_atom;
+	lua_State *L;
+	gdouble ret = 0;
+
+	g_assert(task != NULL);
+	g_assert(atom != NULL);
+
+	mime_atom = atom->data;
+
+	if (mime_atom->type == MIME_ATOM_REGEXP) {
+		ret = rspamd_mime_expr_process_regexp(mime_atom->d.re, task);
+	}
+	else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
+		L = task->cfg->lua_state;
+		lua_getglobal(L, mime_atom->d.lua_function);
+		rspamd_lua_task_push(L, task);
+
+		if (lua_pcall(L, 1, 1, 0) != 0) {
+			msg_info_task("lua call to global function '%s' for atom '%s' failed: %s",
+						  mime_atom->d.lua_function,
+						  mime_atom->str,
+						  lua_tostring(L, -1));
+			lua_pop(L, 1);
+		}
+		else {
+			if (lua_type(L, -1) == LUA_TBOOLEAN) {
+				ret = lua_toboolean(L, -1);
+			}
+			else if (lua_type(L, -1) == LUA_TNUMBER) {
+				ret = lua_tonumber(L, 1);
+			}
+			else {
+				msg_err_task("%s returned wrong return type: %s",
+							 mime_atom->str, lua_typename(L, lua_type(L, -1)));
+			}
+			/* Remove result */
+			lua_pop(L, 1);
+		}
+	}
+	else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
+		gint err_idx;
+
+		L = task->cfg->lua_state;
+		lua_pushcfunction(L, &rspamd_lua_traceback);
+		err_idx = lua_gettop(L);
+
+		lua_rawgeti(L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
+		rspamd_lua_task_push(L, task);
+
+		if (lua_pcall(L, 1, 1, err_idx) != 0) {
+			msg_info_task("lua call to local function for atom '%s' failed: %s",
+						  mime_atom->str,
+						  lua_tostring(L, -1));
+		}
+		else {
+			if (lua_type(L, -1) == LUA_TBOOLEAN) {
+				ret = lua_toboolean(L, -1);
+			}
+			else if (lua_type(L, -1) == LUA_TNUMBER) {
+				ret = lua_tonumber(L, 1);
+			}
+			else {
+				msg_err_task("%s returned wrong return type: %s",
+							 mime_atom->str, lua_typename(L, lua_type(L, -1)));
+			}
+		}
+
+		lua_settop(L, 0);
+	}
+	else {
+		ret = rspamd_mime_expr_process_function(mime_atom->d.func, task,
+												task->cfg->lua_state);
+	}
+
+	return ret;
+}
+
+void register_expression_function(const gchar *name,
+								  rspamd_internal_func_t func,
+								  void *user_data)
+{
+	static struct _fl *new;
+
+	functions_number++;
+
+	new = g_new(struct _fl, functions_number);
+	memcpy(new, list_ptr, (functions_number - 1) * sizeof(struct _fl));
+	if (list_allocated) {
+		g_free(list_ptr);
+	}
+
+	list_allocated = TRUE;
+	new[functions_number - 1].name = name;
+	new[functions_number - 1].func = func;
+	new[functions_number - 1].user_data = user_data;
+	qsort(new, functions_number, sizeof(struct _fl), fl_cmp);
+	list_ptr = new;
+}
+
+gboolean
+rspamd_compare_encoding(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct expression_argument *arg;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	/* XXX: really write this function */
+	return TRUE;
+}
+
+gboolean
+rspamd_header_exists(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct expression_argument *arg;
+	struct rspamd_mime_header *rh;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	rh = rspamd_message_get_header_array(task,
+										 (gchar *) arg->data, FALSE);
+
+	debug_task("try to get header %s: %d", (gchar *) arg->data,
+			   (rh != NULL));
+
+	if (rh) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+
+/*
+ * This function is designed to find difference between text/html and text/plain parts
+ * It takes one argument: difference threshold, if we have two text parts, compare
+ * its hashes and check for threshold, if value is greater than threshold, return TRUE
+ * and return FALSE otherwise.
+ */
+gboolean
+rspamd_parts_distance(struct rspamd_task *task, GArray *args, void *unused)
+{
+	gint threshold, threshold2 = -1;
+	struct expression_argument *arg;
+	gdouble *pdiff, diff;
+
+	if (args == NULL || args->len == 0) {
+		debug_task("no threshold is specified, assume it 100");
+		threshold = 100;
+	}
+	else {
+		errno = 0;
+		arg = &g_array_index(args, struct expression_argument, 0);
+		if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+			msg_warn_task("invalid argument to function is passed");
+			return FALSE;
+		}
+
+		threshold = strtoul((gchar *) arg->data, NULL, 10);
+		if (errno != 0) {
+			msg_info_task("bad numeric value for threshold \"%s\", assume it 100",
+						  (gchar *) arg->data);
+			threshold = 100;
+		}
+		if (args->len >= 2) {
+			arg = &g_array_index(args, struct expression_argument, 1);
+			if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+				msg_warn_task("invalid argument to function is passed");
+				return FALSE;
+			}
+
+			errno = 0;
+			threshold2 = strtoul((gchar *) arg->data, NULL, 10);
+			if (errno != 0) {
+				msg_info_task("bad numeric value for threshold \"%s\", ignore it",
+							  (gchar *) arg->data);
+				threshold2 = -1;
+			}
+		}
+	}
+
+	if ((pdiff =
+			 rspamd_mempool_get_variable(task->task_pool,
+										 "parts_distance")) != NULL) {
+		diff = (1.0 - (*pdiff)) * 100.0;
+
+		if (diff != -1) {
+			if (threshold2 > 0) {
+				if (diff >= MIN(threshold, threshold2) &&
+					diff < MAX(threshold, threshold2)) {
+
+					return TRUE;
+				}
+			}
+			else {
+				if (diff <= threshold) {
+					return TRUE;
+				}
+			}
+			return FALSE;
+		}
+		else {
+			return FALSE;
+		}
+	}
+
+	return FALSE;
+}
+
+struct addr_list {
+	const gchar *name;
+	guint namelen;
+	const gchar *addr;
+	guint addrlen;
+};
+
+static gint
+addr_list_cmp_func(const void *a, const void *b)
+{
+	const struct addr_list *addra = (struct addr_list *) a,
+						   *addrb = (struct addr_list *) b;
+
+	if (addra->addrlen != addrb->addrlen) {
+		return addra->addrlen - addrb->addrlen;
+	}
+
+	return memcmp(addra->addr, addrb->addr, addra->addrlen);
+}
+
+#define COMPARE_RCPT_LEN 3
+#define MIN_RCPT_TO_COMPARE 7
+
+gboolean
+rspamd_recipients_distance(struct rspamd_task *task, GArray *args,
+						   void *unused)
+{
+	struct expression_argument *arg;
+	struct rspamd_email_address *cur;
+	double threshold;
+	struct addr_list *ar;
+	gint num, i, hits = 0;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	errno = 0;
+	threshold = strtod((gchar *) arg->data, NULL);
+
+	if (errno != 0) {
+		msg_warn_task("invalid numeric value '%s': %s",
+					  (gchar *) arg->data,
+					  strerror(errno));
+		return FALSE;
+	}
+
+	if (!MESSAGE_FIELD(task, rcpt_mime)) {
+		return FALSE;
+	}
+
+	num = MESSAGE_FIELD(task, rcpt_mime)->len;
+
+	if (num < MIN_RCPT_TO_COMPARE) {
+		return FALSE;
+	}
+
+	ar = rspamd_mempool_alloc0(task->task_pool, num * sizeof(struct addr_list));
+
+	/* Fill array */
+	num = 0;
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, rcpt_mime), i, cur)
+	{
+		if (cur->addr_len > COMPARE_RCPT_LEN) {
+			ar[num].name = cur->addr;
+			ar[num].namelen = cur->addr_len;
+			ar[num].addr = cur->domain;
+			ar[num].addrlen = cur->domain_len;
+			num++;
+		}
+	}
+
+	qsort(ar, num, sizeof(*ar), addr_list_cmp_func);
+
+	/* Cycle all elements in array */
+	for (i = 0; i < num; i++) {
+		if (i < num - 1) {
+			if (ar[i].namelen == ar[i + 1].namelen) {
+				if (rspamd_lc_cmp(ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
+					hits++;
+				}
+			}
+		}
+	}
+
+	if ((hits * num / 2.) / (double) num >= threshold) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_has_only_html_part(struct rspamd_task *task, GArray *args,
+						  void *unused)
+{
+	struct rspamd_mime_text_part *p;
+	guint i, cnt_html = 0, cnt_txt = 0;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
+	{
+		if (!IS_TEXT_PART_ATTACHMENT(p)) {
+			if (IS_TEXT_PART_HTML(p)) {
+				cnt_html++;
+			}
+			else {
+				cnt_txt++;
+			}
+		}
+	}
+
+	return (cnt_html > 0 && cnt_txt == 0);
+}
+
+static gboolean
+is_recipient_list_sorted(GPtrArray *ar)
+{
+	struct rspamd_email_address *addr;
+	gboolean res = TRUE;
+	rspamd_ftok_t cur, prev;
+	gint i;
+
+	/* Do not check to short address lists */
+	if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
+		return FALSE;
+	}
+
+	prev.len = 0;
+	prev.begin = NULL;
+
+	PTR_ARRAY_FOREACH(ar, i, addr)
+	{
+		cur.begin = addr->addr;
+		cur.len = addr->addr_len;
+
+		if (prev.len != 0) {
+			if (rspamd_ftok_casecmp(&cur, &prev) <= 0) {
+				res = FALSE;
+				break;
+			}
+		}
+
+		prev = cur;
+	}
+
+	return res;
+}
+
+gboolean
+rspamd_is_recipients_sorted(struct rspamd_task *task,
+							GArray *args,
+							void *unused)
+{
+	/* Check all types of addresses */
+
+	if (MESSAGE_FIELD(task, rcpt_mime)) {
+		return is_recipient_list_sorted(MESSAGE_FIELD(task, rcpt_mime));
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_compare_transfer_encoding(struct rspamd_task *task,
+								 GArray *args,
+								 void *unused)
+{
+	struct expression_argument *arg;
+	guint i;
+	struct rspamd_mime_part *part;
+	enum rspamd_cte cte;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	cte = rspamd_cte_from_string(arg->data);
+
+	if (cte == RSPAMD_CTE_UNKNOWN) {
+		msg_warn_task("unknown cte: %s", arg->data);
+		return FALSE;
+	}
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		if (IS_PART_TEXT(part)) {
+			if (part->cte == cte) {
+				return TRUE;
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+gboolean
+rspamd_is_html_balanced(struct rspamd_task *task, GArray *args, void *unused)
+{
+	/* Totally broken but seems to be never used */
+	return TRUE;
+}
+
+gboolean
+rspamd_has_html_tag(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct rspamd_mime_text_part *p;
+	struct expression_argument *arg;
+	guint i;
+	gboolean res = FALSE;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
+	{
+		if (IS_TEXT_PART_HTML(p) && p->html) {
+			res = rspamd_html_tag_seen(p->html, arg->data);
+		}
+
+		if (res) {
+			break;
+		}
+	}
+
+	return res;
+}
+
+gboolean
+rspamd_has_fake_html(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct rspamd_mime_text_part *p;
+	guint i;
+	gboolean res = FALSE;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
+	{
+		if (IS_TEXT_PART_HTML(p) && (rspamd_html_get_tags_count(p->html) < 2)) {
+			res = TRUE;
+		}
+
+		if (res) {
+			break;
+		}
+	}
+
+	return res;
+}
+
+static gboolean
+rspamd_raw_header_exists(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct expression_argument *arg;
+
+	if (args == NULL || task == NULL) {
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid argument to function is passed");
+		return FALSE;
+	}
+
+	return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
+}
+
+static gboolean
+match_smtp_data(struct rspamd_task *task,
+				struct expression_argument *arg,
+				const gchar *what, gsize len)
+{
+	rspamd_regexp_t *re;
+	gint r = 0;
+
+	if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
+		/* This is a regexp */
+		re = arg->data;
+		if (re == NULL) {
+			msg_warn_task("cannot compile regexp for function");
+			return FALSE;
+		}
+
+
+		if (len > 0) {
+			r = rspamd_regexp_search(re, what, len, NULL, NULL, FALSE, NULL);
+		}
+
+		return r;
+	}
+	else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
+			 g_ascii_strncasecmp(arg->data, what, len) == 0) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_check_smtp_data(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct expression_argument *arg;
+	struct rspamd_email_address *addr = NULL;
+	GPtrArray *rcpts = NULL;
+	const gchar *type, *str = NULL;
+	guint i;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+
+	if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+	else {
+		type = arg->data;
+		switch (*type) {
+		case 'f':
+		case 'F':
+			if (g_ascii_strcasecmp(type, "from") == 0) {
+				addr = rspamd_task_get_sender(task);
+			}
+			else {
+				msg_warn_task("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 'h':
+		case 'H':
+			if (g_ascii_strcasecmp(type, "helo") == 0) {
+				str = task->helo;
+			}
+			else {
+				msg_warn_task("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 'u':
+		case 'U':
+			if (g_ascii_strcasecmp(type, "user") == 0) {
+				str = task->auth_user;
+			}
+			else {
+				msg_warn_task("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 's':
+		case 'S':
+			if (g_ascii_strcasecmp(type, "subject") == 0) {
+				str = MESSAGE_FIELD(task, subject);
+			}
+			else {
+				msg_warn_task("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		case 'r':
+		case 'R':
+			if (g_ascii_strcasecmp(type, "rcpt") == 0) {
+				rcpts = task->rcpt_envelope;
+			}
+			else {
+				msg_warn_task("bad argument to function: %s", type);
+				return FALSE;
+			}
+			break;
+		default:
+			msg_warn_task("bad argument to function: %s", type);
+			return FALSE;
+		}
+	}
+
+	if (str == NULL && addr == NULL && rcpts == NULL) {
+		/* Not enough data so regexp would NOT be found anyway */
+		return FALSE;
+	}
+
+	/* We would process only one more argument, others are ignored */
+	if (args->len >= 2) {
+		arg = &g_array_index(args, struct expression_argument, 1);
+
+		if (arg) {
+			if (str != NULL) {
+				return match_smtp_data(task, arg, str, strlen(str));
+			}
+			else if (addr != NULL && addr->addr) {
+				return match_smtp_data(task, arg, addr->addr, addr->addr_len);
+			}
+			else {
+				if (rcpts != NULL) {
+					for (i = 0; i < rcpts->len; i++) {
+						addr = g_ptr_array_index(rcpts, i);
+
+						if (addr && addr->addr &&
+							match_smtp_data(task, arg,
+											addr->addr, addr->addr_len)) {
+							return TRUE;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+static inline gboolean
+rspamd_check_ct_attr(const gchar *begin, gsize len,
+					 struct expression_argument *arg_pattern)
+{
+	rspamd_regexp_t *re;
+	gboolean r = FALSE;
+
+	if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
+		re = arg_pattern->data;
+
+		if (len > 0) {
+			r = rspamd_regexp_search(re,
+									 begin, len,
+									 NULL, NULL, FALSE, NULL);
+		}
+
+		if (r) {
+			return TRUE;
+		}
+	}
+	else {
+		/* Just do strcasecmp */
+		gsize plen = strlen(arg_pattern->data);
+
+		if (plen == len &&
+			g_ascii_strncasecmp(arg_pattern->data, begin, len) == 0) {
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_compare_param(struct rspamd_task *task,
+								  GArray *args,
+								  void *unused)
+{
+
+	struct expression_argument *arg, *arg1, *arg_pattern;
+	gboolean recursive = FALSE;
+	struct rspamd_mime_part *cur_part;
+	guint i;
+	rspamd_ftok_t srch;
+	struct rspamd_content_type_param *found = NULL, *cur;
+	const gchar *param_name;
+
+	if (args == NULL || args->len < 2) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
+	param_name = arg->data;
+	arg_pattern = &g_array_index(args, struct expression_argument, 1);
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
+	{
+		if (args->len >= 3) {
+			arg1 = &g_array_index(args, struct expression_argument, 2);
+			if (g_ascii_strncasecmp(arg1->data, "true",
+									sizeof("true") - 1) == 0) {
+				recursive = TRUE;
+			}
+		}
+		else {
+			/*
+			 * If user did not specify argument, let's assume that he wants
+			 * recursive search if mime part is multipart/mixed
+			 */
+			if (IS_PART_MULTIPART(cur_part)) {
+				recursive = TRUE;
+			}
+		}
+
+		rspamd_ftok_t lit;
+		RSPAMD_FTOK_FROM_STR(&srch, param_name);
+		RSPAMD_FTOK_FROM_STR(&lit, "charset");
+
+		if (rspamd_ftok_equal(&srch, &lit)) {
+			if (rspamd_check_ct_attr(cur_part->ct->charset.begin,
+									 cur_part->ct->charset.len, arg_pattern)) {
+				return TRUE;
+			}
+		}
+
+		RSPAMD_FTOK_FROM_STR(&lit, "boundary");
+		if (rspamd_ftok_equal(&srch, &lit)) {
+			if (rspamd_check_ct_attr(cur_part->ct->orig_boundary.begin,
+									 cur_part->ct->orig_boundary.len, arg_pattern)) {
+				return TRUE;
+			}
+		}
+
+		if (cur_part->ct->attrs) {
+			found = g_hash_table_lookup(cur_part->ct->attrs, &srch);
+
+			if (found) {
+				DL_FOREACH(found, cur)
+				{
+					if (rspamd_check_ct_attr(cur->value.begin,
+											 cur->value.len, arg_pattern)) {
+						return TRUE;
+					}
+				}
+			}
+		}
+
+		if (!recursive) {
+			break;
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_has_param(struct rspamd_task *task,
+							  GArray *args,
+							  void *unused)
+{
+	struct expression_argument *arg, *arg1;
+	gboolean recursive = FALSE;
+	struct rspamd_mime_part *cur_part;
+	guint i;
+	rspamd_ftok_t srch;
+	struct rspamd_content_type_param *found = NULL;
+	const gchar *param_name;
+
+	if (args == NULL || args->len < 1) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg = &g_array_index(args, struct expression_argument, 0);
+	g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
+	param_name = arg->data;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
+	{
+		if (args->len >= 2) {
+			arg1 = &g_array_index(args, struct expression_argument, 1);
+			if (g_ascii_strncasecmp(arg1->data, "true",
+									sizeof("true") - 1) == 0) {
+				recursive = TRUE;
+			}
+		}
+		else {
+			/*
+			 * If user did not specify argument, let's assume that he wants
+			 * recursive search if mime part is multipart/mixed
+			 */
+			if (IS_PART_MULTIPART(cur_part)) {
+				recursive = TRUE;
+			}
+		}
+
+
+		rspamd_ftok_t lit;
+		RSPAMD_FTOK_FROM_STR(&srch, param_name);
+		RSPAMD_FTOK_FROM_STR(&lit, "charset");
+
+		if (rspamd_ftok_equal(&srch, &lit)) {
+			if (cur_part->ct->charset.len > 0) {
+				return TRUE;
+			}
+		}
+
+		RSPAMD_FTOK_FROM_STR(&lit, "boundary");
+		if (rspamd_ftok_equal(&srch, &lit)) {
+			if (cur_part->ct->boundary.len > 0) {
+				return TRUE;
+			}
+		}
+
+		if (cur_part->ct->attrs) {
+			found = g_hash_table_lookup(cur_part->ct->attrs, &srch);
+
+			if (found) {
+				return TRUE;
+			}
+		}
+
+		if (!recursive) {
+			break;
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_check(struct rspamd_task *task,
+						  GArray *args,
+						  gboolean check_subtype)
+{
+	rspamd_ftok_t *param_data, srch;
+	rspamd_regexp_t *re;
+	struct expression_argument *arg1, *arg_pattern;
+	struct rspamd_content_type *ct;
+	gint r = 0;
+	guint i;
+	gboolean recursive = FALSE;
+	struct rspamd_mime_part *cur_part;
+
+	if (args == NULL || args->len < 1) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	arg_pattern = &g_array_index(args, struct expression_argument, 0);
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
+	{
+		ct = cur_part->ct;
+
+		if (args->len >= 2) {
+			arg1 = &g_array_index(args, struct expression_argument, 1);
+			if (g_ascii_strncasecmp(arg1->data, "true",
+									sizeof("true") - 1) == 0) {
+				recursive = TRUE;
+			}
+		}
+		else {
+			/*
+			 * If user did not specify argument, let's assume that he wants
+			 * recursive search if mime part is multipart/mixed
+			 */
+			if (IS_PART_MULTIPART(cur_part)) {
+				recursive = TRUE;
+			}
+		}
+
+		if (check_subtype) {
+			param_data = &ct->subtype;
+		}
+		else {
+			param_data = &ct->type;
+		}
+
+		if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
+			re = arg_pattern->data;
+
+			if (param_data->len > 0) {
+				r = rspamd_regexp_search(re, param_data->begin, param_data->len,
+										 NULL, NULL, FALSE, NULL);
+			}
+
+			if (r) {
+				return TRUE;
+			}
+		}
+		else {
+			/* Just do strcasecmp */
+			srch.begin = arg_pattern->data;
+			srch.len = strlen(arg_pattern->data);
+
+			if (rspamd_ftok_casecmp(param_data, &srch) == 0) {
+				return TRUE;
+			}
+		}
+
+		/* Get next part */
+		if (!recursive) {
+			break;
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_content_type_is_type(struct rspamd_task *task,
+							GArray *args,
+							void *unused)
+{
+	return rspamd_content_type_check(task, args, FALSE);
+}
+
+static gboolean
+rspamd_content_type_is_subtype(struct rspamd_task *task,
+							   GArray *args,
+							   void *unused)
+{
+	return rspamd_content_type_check(task, args, TRUE);
+}
+
+static gboolean
+compare_subtype(struct rspamd_task *task, struct rspamd_content_type *ct,
+				struct expression_argument *subtype)
+{
+	rspamd_regexp_t *re;
+	rspamd_ftok_t srch;
+	gint r = 0;
+
+	if (subtype == NULL || ct == NULL) {
+		msg_warn_task("invalid parameters passed");
+		return FALSE;
+	}
+	if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
+		re = subtype->data;
+
+		if (ct->subtype.len > 0) {
+			r = rspamd_regexp_search(re, ct->subtype.begin, ct->subtype.len,
+									 NULL, NULL, FALSE, NULL);
+		}
+	}
+	else {
+		srch.begin = subtype->data;
+		srch.len = strlen(subtype->data);
+
+		/* Just do strcasecmp */
+		if (rspamd_ftok_casecmp(&ct->subtype, &srch) == 0) {
+			return TRUE;
+		}
+	}
+
+	return r;
+}
+
+static gboolean
+compare_len(struct rspamd_mime_part *part, guint min, guint max)
+{
+	if (min == 0 && max == 0) {
+		return TRUE;
+	}
+
+	if (min == 0) {
+		return part->parsed_data.len <= max;
+	}
+	else if (max == 0) {
+		return part->parsed_data.len >= min;
+	}
+	else {
+		return part->parsed_data.len >= min && part->parsed_data.len <= max;
+	}
+}
+
+static gboolean
+common_has_content_part(struct rspamd_task *task,
+						struct expression_argument *param_type,
+						struct expression_argument *param_subtype,
+						gint min_len,
+						gint max_len)
+{
+	rspamd_regexp_t *re;
+	struct rspamd_mime_part *part;
+	struct rspamd_content_type *ct;
+	rspamd_ftok_t srch;
+	gint r = 0;
+	guint i;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		ct = part->ct;
+
+		if (ct == NULL) {
+			continue;
+		}
+
+		if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
+			re = param_type->data;
+
+			if (ct->type.len > 0) {
+				r = rspamd_regexp_search(re, ct->type.begin, ct->type.len,
+										 NULL, NULL, FALSE, NULL);
+			}
+
+			/* Also check subtype and length of the part */
+			if (r && param_subtype) {
+				r = compare_len(part, min_len, max_len) &&
+					compare_subtype(task, ct, param_subtype);
+
+				return r;
+			}
+		}
+		else {
+			/* Just do strcasecmp */
+			srch.begin = param_type->data;
+			srch.len = strlen(param_type->data);
+
+			if (rspamd_ftok_casecmp(&ct->type, &srch) == 0) {
+				if (param_subtype) {
+					if (compare_subtype(task, ct, param_subtype)) {
+						if (compare_len(part, min_len, max_len)) {
+							return TRUE;
+						}
+					}
+				}
+				else {
+					if (compare_len(part, min_len, max_len)) {
+						return TRUE;
+					}
+				}
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_has_content_part(struct rspamd_task *task, GArray *args, void *unused)
+{
+	struct expression_argument *param_type = NULL, *param_subtype = NULL;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	param_type = &g_array_index(args, struct expression_argument, 0);
+	if (args->len >= 2) {
+		param_subtype = &g_array_index(args, struct expression_argument, 1);
+	}
+
+	return common_has_content_part(task, param_type, param_subtype, 0, 0);
+}
+
+static gboolean
+rspamd_has_content_part_len(struct rspamd_task *task,
+							GArray *args,
+							void *unused)
+{
+	struct expression_argument *param_type = NULL, *param_subtype = NULL;
+	gint min = 0, max = 0;
+	struct expression_argument *arg;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	param_type = &g_array_index(args, struct expression_argument, 0);
+
+	if (args->len >= 2) {
+		param_subtype = &g_array_index(args, struct expression_argument, 1);
+
+		if (args->len >= 3) {
+			arg = &g_array_index(args, struct expression_argument, 2);
+			errno = 0;
+			min = strtoul(arg->data, NULL, 10);
+			g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
+
+			if (errno != 0) {
+				msg_warn_task("invalid numeric value '%s': %s",
+							  (gchar *) arg->data,
+							  strerror(errno));
+				return FALSE;
+			}
+
+			if (args->len >= 4) {
+				arg = &g_array_index(args, struct expression_argument, 3);
+				g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
+				max = strtoul(arg->data, NULL, 10);
+
+				if (errno != 0) {
+					msg_warn_task("invalid numeric value '%s': %s",
+								  (gchar *) arg->data,
+								  strerror(errno));
+					return FALSE;
+				}
+			}
+		}
+	}
+
+	return common_has_content_part(task, param_type, param_subtype, min, max);
+}
+
+static gboolean
+rspamd_is_empty_body(struct rspamd_task *task,
+					 GArray *args,
+					 void *unused)
+{
+	struct rspamd_mime_part *part;
+	guint i;
+
+	PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
+	{
+		if (part->parsed_data.len > 0) {
+			return FALSE;
+		}
+	}
+
+	return TRUE;
+}
+
+#define TASK_FLAG_READ(flag)               \
+	do {                                   \
+		result = !!(task->flags & (flag)); \
+	} while (0)
+
+#define TASK_GET_FLAG(flag, strname, macro)           \
+	do {                                              \
+		if (!found && strcmp((flag), strname) == 0) { \
+			TASK_FLAG_READ((macro));                  \
+			found = TRUE;                             \
+		}                                             \
+	} while (0)
+
+#define TASK_PROTOCOL_FLAG_READ(flag)               \
+	do {                                            \
+		result = !!(task->protocol_flags & (flag)); \
+	} while (0)
+
+#define TASK_GET_PROTOCOL_FLAG(flag, strname, macro)  \
+	do {                                              \
+		if (!found && strcmp((flag), strname) == 0) { \
+			TASK_PROTOCOL_FLAG_READ((macro));         \
+			found = TRUE;                             \
+		}                                             \
+	} while (0)
+
+
+static gboolean
+rspamd_has_flag_expr(struct rspamd_task *task,
+					 GArray *args,
+					 void *unused)
+{
+	gboolean found = FALSE, result = FALSE;
+	struct expression_argument *flag_arg;
+	const gchar *flag_str;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	flag_arg = &g_array_index(args, struct expression_argument, 0);
+
+	if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid parameter to function");
+		return FALSE;
+	}
+
+	flag_str = (const gchar *) flag_arg->data;
+
+	TASK_GET_FLAG(flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
+	TASK_GET_FLAG(flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
+	TASK_GET_FLAG(flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
+	TASK_GET_FLAG(flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
+	TASK_GET_PROTOCOL_FLAG(flag_str, "extended_urls",
+						   RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
+	TASK_GET_FLAG(flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
+	TASK_GET_FLAG(flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
+	TASK_GET_FLAG(flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
+	TASK_GET_FLAG(flag_str, "broken_headers",
+				  RSPAMD_TASK_FLAG_BROKEN_HEADERS);
+	TASK_GET_FLAG(flag_str, "skip_process",
+				  RSPAMD_TASK_FLAG_SKIP_PROCESS);
+	TASK_GET_PROTOCOL_FLAG(flag_str, "milter",
+						   RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
+	TASK_GET_FLAG(flag_str, "bad_unicode",
+				  RSPAMD_TASK_FLAG_BAD_UNICODE);
+
+	if (!found) {
+		msg_warn_task("invalid flag name %s", flag_str);
+		return FALSE;
+	}
+
+	return result;
+}
+
+static gboolean
+rspamd_has_symbol_expr(struct rspamd_task *task,
+					   GArray *args,
+					   void *unused)
+{
+	struct expression_argument *sym_arg;
+	const gchar *symbol_str;
+
+	if (args == NULL) {
+		msg_warn_task("no parameters to function");
+		return FALSE;
+	}
+
+	sym_arg = &g_array_index(args, struct expression_argument, 0);
+
+	if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
+		msg_warn_task("invalid parameter to function");
+		return FALSE;
+	}
+
+	symbol_str = (const gchar *) sym_arg->data;
+
+	if (rspamd_task_find_symbol_result(task, symbol_str, NULL)) {
+		return TRUE;
+	}
+
+	return FALSE;
+}
diff --git a/src/libmime/mime_expressions.h b/src/libmime/mime_expressions.h
new file mode 100644
index 0000000..a2ea3fe
--- /dev/null
+++ b/src/libmime/mime_expressions.h
@@ -0,0 +1,65 @@
+/**
+ * @file expressions.h
+ * Rspamd expressions API
+ */
+
+#ifndef RSPAMD_EXPRESSIONS_H
+#define RSPAMD_EXPRESSIONS_H
+
+#include "config.h"
+#include "expression.h"
+#include "contrib/libucl/ucl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_config;
+
+struct rspamd_mime_expr_ud {
+	struct rspamd_config *cfg;
+	const ucl_object_t *conf_obj;
+};
+
+extern const struct rspamd_atom_subr mime_expr_subr;
+
+/**
+ * Function's argument
+ */
+enum rspamd_expression_type {
+	EXPRESSION_ARGUMENT_NORMAL = 0,
+	EXPRESSION_ARGUMENT_BOOL,
+	EXPRESSION_ARGUMENT_REGEXP
+};
+struct expression_argument {
+	enum rspamd_expression_type type; /**< type of argument (text or other function)		*/
+	void *data;                       /**< pointer to its data							*/
+};
+
+
+typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *,
+										   GArray *args, void *user_data);
+
+
+/**
+ * Register specified function to rspamd internal functions list
+ * @param name name of function
+ * @param func pointer to function
+ */
+void register_expression_function(const gchar *name,
+								  rspamd_internal_func_t func,
+								  void *user_data);
+
+/**
+ * Set global limit of regexp data size to be processed
+ * @param limit new limit in bytes
+ * @return old limit value
+ */
+guint rspamd_mime_expression_set_re_limit(guint limit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
new file mode 100644
index 0000000..2bd559d
--- /dev/null
+++ b/src/libmime/mime_headers.c
@@ -0,0 +1,1441 @@
+/*
+ * Copyright 2024 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mime_headers.h"
+#include "smtp_parsers.h"
+#include "mime_encoding.h"
+#include "received.h"
+#include "contrib/uthash/utlist.h"
+#include "libserver/mempool_vars_internal.h"
+#include "libserver/cfg_file.h"
+#include "libutil/util.h"
+#include <unicode/utf8.h>
+
+KHASH_INIT(rspamd_mime_headers_htb, gchar *,
+		   struct rspamd_mime_header *, 1,
+		   rspamd_strcase_hash, rspamd_strcase_equal);
+
+struct rspamd_mime_headers_table {
+	khash_t(rspamd_mime_headers_htb) htb;
+	ref_entry_t ref;
+};
+
+static void
+rspamd_mime_header_check_special(struct rspamd_task *task,
+								 struct rspamd_mime_header *rh)
+{
+	guint64 h;
+	const gchar *p, *end;
+	gchar *id;
+	gint max_recipients = -1, len;
+
+	if (task->cfg) {
+		max_recipients = task->cfg->max_recipients;
+	}
+
+	h = rspamd_icase_hash(rh->name, strlen(rh->name), 0xdeadbabe);
+
+	switch (h) {
+	case 0x88705DC4D9D61ABULL: /* received */
+		if (rspamd_received_header_parse(task, rh->decoded, strlen(rh->decoded), rh)) {
+			rh->flags |= RSPAMD_HEADER_RECEIVED;
+		}
+		break;
+	case 0x76F31A09F4352521ULL: /* to */
+		MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
+																		rh->value, strlen(rh->value),
+																		MESSAGE_FIELD(task, rcpt_mime), max_recipients);
+		rh->flags |= RSPAMD_HEADER_TO | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
+		break;
+	case 0x7EB117C1480B76ULL: /* cc */
+		MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
+																		rh->value, strlen(rh->value),
+																		MESSAGE_FIELD(task, rcpt_mime), max_recipients);
+		rh->flags |= RSPAMD_HEADER_CC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
+		break;
+	case 0xE4923E11C4989C8DULL: /* bcc */
+		MESSAGE_FIELD(task, rcpt_mime) = rspamd_email_address_from_mime(task->task_pool,
+																		rh->value, strlen(rh->value),
+																		MESSAGE_FIELD(task, rcpt_mime), max_recipients);
+		rh->flags |= RSPAMD_HEADER_BCC | RSPAMD_HEADER_RCPT | RSPAMD_HEADER_UNIQUE;
+		break;
+	case 0x41E1985EDC1CBDE4ULL: /* from */
+		MESSAGE_FIELD(task, from_mime) = rspamd_email_address_from_mime(task->task_pool,
+																		rh->value, strlen(rh->value),
+																		MESSAGE_FIELD(task, from_mime), max_recipients);
+		rh->flags |= RSPAMD_HEADER_FROM | RSPAMD_HEADER_SENDER | RSPAMD_HEADER_UNIQUE;
+		break;
+	case 0x43A558FC7C240226ULL: /* message-id */ {
+
+		rh->flags = RSPAMD_HEADER_MESSAGE_ID | RSPAMD_HEADER_UNIQUE;
+		p = rh->decoded;
+		len = rspamd_strip_smtp_comments_inplace(rh->decoded, strlen(p));
+		rh->decoded[len] = '\0'; /* Zero terminate after stripping */
+		/* Strip surrounding spaces */
+		rh->decoded = g_strstrip(rh->decoded);
+		end = p + len;
+
+		if (*p == '<') {
+			p++;
+		}
+
+		if (end > p) {
+			gchar *d;
+
+			if (*(end - 1) == '>') {
+				end--;
+			}
+
+			id = rspamd_mempool_alloc(task->task_pool, end - p + 1);
+			d = id;
+
+			while (p < end) {
+				if (g_ascii_isgraph(*p)) {
+					*d++ = *p++;
+				}
+				else {
+					*d++ = '?';
+					p++;
+				}
+			}
+
+			*d = '\0';
+
+			MESSAGE_FIELD(task, message_id) = id;
+		}
+
+		break;
+	}
+	case 0xB91D3910358E8212ULL: /* subject */
+		if (MESSAGE_FIELD(task, subject) == NULL) {
+			MESSAGE_FIELD(task, subject) = rh->decoded;
+		}
+		rh->flags = RSPAMD_HEADER_SUBJECT | RSPAMD_HEADER_UNIQUE;
+		break;
+	case 0xEE4AA2EAAC61D6F4ULL: /* return-path */
+		if (task->from_envelope == NULL) {
+			task->from_envelope = rspamd_email_address_from_smtp(rh->decoded,
+																 strlen(rh->decoded));
+		}
+		rh->flags = RSPAMD_HEADER_RETURN_PATH | RSPAMD_HEADER_UNIQUE;
+		break;
+	case 0xB9EEFAD2E93C2161ULL: /* delivered-to */
+		if (task->deliver_to == NULL) {
+			task->deliver_to = rh->decoded;
+		}
+		rh->flags = RSPAMD_HEADER_DELIVERED_TO;
+		break;
+	case 0x2EC3BFF3C393FC10ULL: /* date */
+	case 0xAC0DDB1A1D214CAULL:  /* sender */
+	case 0x54094572367AB695ULL: /* in-reply-to */
+	case 0x81CD9E9131AB6A9AULL: /* content-type */
+	case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
+	case 0xB3F6704CB3AD6589ULL: /* references */
+		rh->flags = RSPAMD_HEADER_UNIQUE;
+		break;
+	}
+}
+
+static void
+rspamd_mime_header_add(struct rspamd_task *task,
+					   khash_t(rspamd_mime_headers_htb) * target,
+					   struct rspamd_mime_header **order_ptr,
+					   struct rspamd_mime_header *rh,
+					   gboolean check_special)
+{
+	khiter_t k;
+	struct rspamd_mime_header *ex;
+	int res;
+
+	k = kh_put(rspamd_mime_headers_htb, target, rh->name, &res);
+
+	if (res == 0) {
+		ex = kh_value(target, k);
+		DL_APPEND(ex, rh);
+		msg_debug_task("append raw header %s: %s", rh->name, rh->value);
+	}
+	else {
+		kh_value(target, k) = rh;
+		rh->prev = rh;
+		rh->next = NULL;
+		msg_debug_task("add new raw header %s: %s", rh->name, rh->value);
+	}
+
+	LL_PREPEND2(*order_ptr, rh, ord_next);
+
+	if (check_special) {
+		rspamd_mime_header_check_special(task, rh);
+	}
+}
+
+
+/* Convert raw headers to a list of struct raw_header * */
+void rspamd_mime_headers_process(struct rspamd_task *task,
+								 struct rspamd_mime_headers_table *target,
+								 struct rspamd_mime_header **order_ptr,
+								 const gchar *in, gsize len,
+								 gboolean check_newlines)
+{
+	struct rspamd_mime_header *nh = NULL;
+	const gchar *p, *c, *end;
+	gchar *tmp, *tp;
+	gint state = 0, l, next_state = 100, err_state = 100, t_state;
+	gboolean valid_folding = FALSE, shift_by_one = FALSE;
+	guint nlines_count[RSPAMD_TASK_NEWLINES_MAX];
+	guint norder = 0;
+
+	p = in;
+	end = p + len;
+	c = p;
+	memset(nlines_count, 0, sizeof(nlines_count));
+	msg_debug_task("start processing headers");
+
+	while (p < end) {
+		/* FSM for processing headers */
+		switch (state) {
+		case 0:
+			/* Begin processing headers */
+			if (!g_ascii_isalpha(*p)) {
+				/* We have some garbage at the beginning of headers, skip this line */
+				state = 100;
+				next_state = 0;
+			}
+			else {
+				state = 1;
+				c = p;
+			}
+			break;
+		case 1:
+			/* We got something like header's name */
+			if (*p == ':') {
+				nh = rspamd_mempool_alloc0(task->task_pool,
+										   sizeof(struct rspamd_mime_header));
+				l = p - c;
+				tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
+				rspamd_null_safe_copy(c, l, tmp, l + 1);
+				nh->name = tmp;
+				nh->flags |= RSPAMD_HEADER_EMPTY_SEPARATOR;
+				nh->raw_value = c;
+				nh->raw_len = p - c; /* Including trailing ':' */
+				p++;
+				state = 2;
+				c = p;
+			}
+			else if (g_ascii_isspace(*p)) {
+				/* Not header but some garbage */
+				if (target == MESSAGE_FIELD(task, raw_headers)) {
+					/* Do not propagate flag from the attachments */
+					task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+				}
+				state = 100;
+				next_state = 0;
+			}
+			else {
+				p++;
+			}
+			break;
+		case 2:
+			/* We got header's name, so skip any \t or spaces */
+			if (*p == '\t') {
+				nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
+				nh->flags |= RSPAMD_HEADER_TAB_SEPARATED;
+				p++;
+			}
+			else if (*p == ' ') {
+				nh->flags &= ~RSPAMD_HEADER_EMPTY_SEPARATOR;
+				p++;
+			}
+			else if (*p == '\n' || *p == '\r') {
+
+				if (check_newlines) {
+					if (*p == '\n') {
+						nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
+					}
+					else if (p + 1 < end && *(p + 1) == '\n') {
+						nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
+					}
+					else {
+						nlines_count[RSPAMD_TASK_NEWLINES_CR]++;
+					}
+				}
+
+				/* Process folding */
+				state = 99;
+				l = p - c;
+				if (l > 0) {
+					tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
+					rspamd_null_safe_copy(c, l, tmp, l + 1);
+					nh->separator = tmp;
+				}
+				next_state = 3;
+				err_state = 5;
+				c = p;
+			}
+			else {
+				/* Process value */
+				l = p - c;
+				if (l >= 0) {
+					tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
+					rspamd_null_safe_copy(c, l, tmp, l + 1);
+					nh->separator = tmp;
+				}
+				c = p;
+				state = 3;
+			}
+			break;
+		case 3:
+			if (*p == '\r' || *p == '\n') {
+				/* Hold folding */
+				if (check_newlines) {
+					if (*p == '\n') {
+						nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
+					}
+					else if (p + 1 < end && *(p + 1) == '\n') {
+						nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
+					}
+					else {
+						nlines_count[RSPAMD_TASK_NEWLINES_CR]++;
+					}
+				}
+				state = 99;
+				next_state = 3;
+				err_state = 4;
+			}
+			else if (p + 1 == end) {
+				state = 4;
+			}
+			else {
+				p++;
+			}
+			break;
+		case 4:
+			/* Copy header's value */
+
+			/*
+			 * XXX:
+			 * The original decision to use here null terminated
+			 * strings was extremely poor!
+			 */
+			l = p - c;
+			tmp = rspamd_mempool_alloc(task->task_pool, l + 1);
+			tp = tmp;
+			t_state = 0;
+			while (l--) {
+				if (t_state == 0) {
+					/* Before folding */
+					if (*c == '\n' || *c == '\r') {
+						t_state = 1;
+						c++;
+						*tp++ = ' ';
+					}
+					else {
+						if (*c != '\0') {
+							*tp++ = *c++;
+						}
+						else {
+							c++;
+						}
+					}
+				}
+				else if (t_state == 1) {
+					/* Inside folding */
+					if (g_ascii_isspace(*c)) {
+						c++;
+					}
+					else {
+						t_state = 0;
+						if (*c != '\0') {
+							*tp++ = *c++;
+						}
+						else {
+							c++;
+						}
+					}
+				}
+			}
+			/* Strip last space that can be added by \r\n parsing */
+			if (tp > tmp && *(tp - 1) == ' ') {
+				tp--;
+			}
+
+			*tp = '\0';
+			/* Strip the initial spaces that could also be added by folding */
+			while (*tmp != '\0' && g_ascii_isspace(*tmp)) {
+				tmp++;
+			}
+
+			if (p + 1 == end) {
+				nh->raw_len = end - nh->raw_value;
+			}
+			else {
+				nh->raw_len = p - nh->raw_value;
+			}
+
+			nh->value = tmp;
+
+			gboolean broken_utf = FALSE;
+
+			nh->decoded = rspamd_mime_header_decode(task->task_pool,
+													nh->value, strlen(tmp), &broken_utf);
+
+			if (broken_utf) {
+				task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
+			}
+
+			if (nh->decoded == NULL) {
+				/* As we strip comments in place... */
+				nh->decoded = rspamd_mempool_strdup(task->task_pool, "");
+			}
+
+			/* We also validate utf8 and replace all non-valid utf8 chars */
+			rspamd_mime_charset_utf_enforce(nh->decoded, strlen(nh->decoded));
+			nh->order = norder++;
+			rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines);
+			nh = NULL;
+			state = 0;
+			break;
+		case 5:
+			/* Header has only name, no value */
+			nh->value = rspamd_mempool_strdup(task->task_pool, "");
+			nh->decoded = rspamd_mempool_strdup(task->task_pool, "");
+			nh->raw_len = p - nh->raw_value;
+			if (shift_by_one) {
+				nh->raw_len++;
+			}
+			nh->order = norder++;
+			rspamd_mime_header_add(task, &target->htb, order_ptr, nh, check_newlines);
+			nh = NULL;
+			state = 0;
+			break;
+		case 99:
+			/* Folding state */
+			if (p + 1 == end) {
+				state = err_state;
+				/* Include the last character into the next header */
+				shift_by_one = TRUE;
+			}
+			else {
+				if (*p == '\r' || *p == '\n') {
+					p++;
+					valid_folding = FALSE;
+				}
+				else if (*p == '\t' || *p == ' ') {
+					/* Valid folding */
+					p++;
+					valid_folding = TRUE;
+				}
+				else {
+					if (valid_folding) {
+						debug_task("go to state: %d->%d", state, next_state);
+						state = next_state;
+					}
+					else {
+						/* Fall back */
+						debug_task("go to state: %d->%d", state, err_state);
+						state = err_state;
+					}
+				}
+			}
+			break;
+		case 100:
+			/* Fail state, skip line */
+
+			if (*p == '\r') {
+				if (p + 1 < end && *(p + 1) == '\n') {
+					nlines_count[RSPAMD_TASK_NEWLINES_CRLF]++;
+					p++;
+				}
+				p++;
+				state = next_state;
+			}
+			else if (*p == '\n') {
+				nlines_count[RSPAMD_TASK_NEWLINES_LF]++;
+
+				if (p + 1 < end && *(p + 1) == '\r') {
+					p++;
+				}
+				p++;
+				state = next_state;
+			}
+			else if (p + 1 == end) {
+				state = next_state;
+				p++;
+			}
+			else {
+				p++;
+			}
+			break;
+		}
+	}
+
+	/* Since we have prepended headers, we need to reverse the list to get the actual order */
+	LL_REVERSE(*order_ptr);
+
+	if (check_newlines) {
+		guint max_cnt = 0;
+		gint sel = 0;
+		rspamd_cryptobox_hash_state_t hs;
+		guchar hout[rspamd_cryptobox_HASHBYTES], *hexout;
+
+		for (gint i = RSPAMD_TASK_NEWLINES_CR; i < RSPAMD_TASK_NEWLINES_MAX; i++) {
+			if (nlines_count[i] > max_cnt) {
+				max_cnt = nlines_count[i];
+				sel = i;
+			}
+		}
+
+		MESSAGE_FIELD(task, nlines_type) = sel;
+
+		rspamd_cryptobox_hash_init(&hs, NULL, 0);
+
+		LL_FOREACH(*order_ptr, nh)
+		{
+			if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
+				rspamd_cryptobox_hash_update(&hs, nh->name, strlen(nh->name));
+			}
+		}
+
+		rspamd_cryptobox_hash_final(&hs, hout);
+		hexout = rspamd_mempool_alloc(task->task_pool, sizeof(hout) * 2 + 1);
+		hexout[sizeof(hout) * 2] = '\0';
+		rspamd_encode_hex_buf(hout, sizeof(hout), hexout,
+							  sizeof(hout) * 2 + 1);
+		rspamd_mempool_set_variable(task->task_pool,
+									RSPAMD_MEMPOOL_HEADERS_HASH,
+									hexout, NULL);
+	}
+}
+
+static void
+rspamd_mime_header_maybe_save_token(rspamd_mempool_t *pool,
+									GString *out,
+									GByteArray *token,
+									GByteArray *decoded_token,
+									rspamd_ftok_t *old_charset,
+									rspamd_ftok_t *new_charset)
+{
+	if (new_charset->len == 0) {
+		g_assert_not_reached();
+	}
+
+	if (old_charset->len > 0) {
+		if (rspamd_ftok_casecmp(new_charset, old_charset) == 0) {
+			rspamd_ftok_t srch;
+
+			/*
+			 * Special case for iso-2022-jp:
+			 * https://github.com/vstakhov/rspamd/issues/1669
+			 */
+			RSPAMD_FTOK_ASSIGN(&srch, "iso-2022-jp");
+
+			if (rspamd_ftok_casecmp(new_charset, &srch) != 0) {
+				/* We can concatenate buffers, just return */
+				return;
+			}
+		}
+	}
+
+	/* We need to flush and decode old token to out string */
+	if (rspamd_mime_to_utf8_byte_array(token, decoded_token, pool,
+									   rspamd_mime_detect_charset(new_charset, pool))) {
+		g_string_append_len(out, decoded_token->data, decoded_token->len);
+	}
+
+	/* We also reset buffer */
+	g_byte_array_set_size(token, 0);
+	/*
+	 * Propagate charset
+	 *
+	 * Here are dragons: we save the original charset to allow buffers concat
+	 * in the condition at the beginning of the function.
+	 * However, it will likely cause unnecessary calls for
+	 * `rspamd_mime_detect_charset` which could be relatively expensive.
+	 * But we ignore that for now...
+	 */
+	memcpy(old_charset, new_charset, sizeof(*old_charset));
+}
+
+static void
+rspamd_mime_header_sanity_check(GString *str)
+{
+	gsize i;
+	gchar t;
+
+	for (i = 0; i < str->len; i++) {
+		t = str->str[i];
+		if (!((t & 0x80) || g_ascii_isgraph(t))) {
+			if (g_ascii_isspace(t)) {
+				/* Replace spaces characters with plain space */
+				str->str[i] = ' ';
+			}
+			else {
+				str->str[i] = '?';
+			}
+		}
+	}
+}
+
+gchar *
+rspamd_mime_header_decode(rspamd_mempool_t *pool, const gchar *in,
+						  gsize inlen, gboolean *invalid_utf)
+{
+	GString *out;
+	const guchar *c, *p, *end;
+	const gchar *tok_start = NULL;
+	gsize tok_len = 0, pos;
+	GByteArray *token = NULL, *decoded;
+	rspamd_ftok_t cur_charset = {0, NULL}, old_charset = {0, NULL};
+	gint encoding;
+	gssize r;
+	guint qmarks = 0;
+	gchar *ret;
+	enum {
+		parse_normal = 0,
+		got_eqsign,
+		got_encoded_start,
+		got_more_qmark,
+		skip_spaces,
+	} state = parse_normal;
+
+	g_assert(in != NULL);
+
+	c = in;
+	p = in;
+	end = in + inlen;
+	out = g_string_sized_new(inlen);
+	token = g_byte_array_sized_new(80);
+	decoded = g_byte_array_sized_new(122);
+
+	while (p < end) {
+		switch (state) {
+		case parse_normal:
+			if (*p == '=') {
+				g_string_append_len(out, c, p - c);
+				c = p;
+				state = got_eqsign;
+			}
+			else if (*p >= 128) {
+				gint off = 0;
+				UChar32 uc;
+				/* Unencoded character */
+				g_string_append_len(out, c, p - c);
+				/* Check if that's valid UTF8 */
+				U8_NEXT(p, off, end - p, uc);
+
+				if (uc <= 0) {
+					c = p + 1;
+					/* 0xFFFD in UTF8 */
+					g_string_append_len(out, "   ", 3);
+					off = 0;
+					U8_APPEND_UNSAFE(out->str + out->len - 3,
+									 off, 0xfffd);
+
+					if (invalid_utf) {
+						*invalid_utf = TRUE;
+					}
+				}
+				else {
+					c = p;
+					p = p + off;
+					continue; /* To avoid p ++ after this block */
+				}
+			}
+			p++;
+			break;
+		case got_eqsign:
+			if (*p == '?') {
+				state = got_encoded_start;
+				qmarks = 0;
+			}
+			else {
+				g_string_append_len(out, c, 1);
+				c = p;
+				state = parse_normal;
+				continue; /* Deal with == case */
+			}
+			p++;
+			break;
+		case got_encoded_start:
+			if (*p == '?') {
+				state = got_more_qmark;
+				qmarks++;
+
+				/* Skip multiple ? signs */
+				p++;
+				while (p < end && *p == '?') {
+					p++;
+				}
+
+				continue;
+			}
+			p++;
+			break;
+		case got_more_qmark:
+			if (*p == '=') {
+				if (qmarks < 3) {
+					state = got_encoded_start;
+				}
+				else {
+					/* Finished encoded boundary */
+					if (*c == '"') {
+						/* Quoted string, non-RFC conformant but used by retards */
+						c++;
+					}
+					if (rspamd_rfc2047_parser(c, p - c + 1, &encoding,
+											  &cur_charset.begin, &cur_charset.len,
+											  &tok_start, &tok_len)) {
+						/* We have a token, so we can decode it from `encoding` */
+						if (token->len > 0) {
+							if (old_charset.len == 0) {
+								memcpy(&old_charset, &cur_charset,
+									   sizeof(old_charset));
+							}
+
+							rspamd_mime_header_maybe_save_token(pool, out,
+																token, decoded,
+																&old_charset, &cur_charset);
+						}
+
+						qmarks = 0;
+						pos = token->len;
+						g_byte_array_set_size(token, pos + tok_len);
+
+						if (encoding == RSPAMD_RFC2047_QP) {
+							r = rspamd_decode_qp2047_buf(tok_start, tok_len,
+														 token->data + pos, tok_len);
+
+							if (r != -1) {
+								token->len = pos + r;
+							}
+							else {
+								/* Cannot decode qp */
+								token->len -= tok_len;
+							}
+						}
+						else {
+							if (rspamd_cryptobox_base64_decode(tok_start, tok_len,
+															   token->data + pos, &tok_len)) {
+								token->len = pos + tok_len;
+							}
+							else {
+								/* Cannot decode */
+								token->len -= tok_len;
+							}
+						}
+
+						c = p + 1;
+						state = skip_spaces;
+					}
+					else {
+						/* Not encoded-word */
+						old_charset.len = 0;
+
+						if (token->len > 0) {
+							rspamd_mime_header_maybe_save_token(pool, out,
+																token, decoded,
+																&old_charset, &cur_charset);
+						}
+
+						g_string_append_len(out, c, p - c);
+						c = p;
+						state = parse_normal;
+					}
+				} /* qmarks >= 3 */
+			}     /* p == '=' */
+			else {
+				state = got_encoded_start;
+			}
+			p++;
+			break;
+		case skip_spaces:
+			if (g_ascii_isspace(*p)) {
+				p++;
+			}
+			else if (*p == '=' && p < end - 1 && p[1] == '?') {
+				/* Next boundary, can glue */
+				c = p;
+				p += 2;
+				state = got_encoded_start;
+			}
+			else {
+				/* Need to save spaces and decoded token */
+				if (token->len > 0) {
+					old_charset.len = 0;
+					rspamd_mime_header_maybe_save_token(pool, out,
+														token, decoded,
+														&old_charset, &cur_charset);
+				}
+
+				g_string_append_len(out, c, p - c);
+				c = p;
+				state = parse_normal;
+			}
+			break;
+		}
+	}
+
+	/* Leftover */
+	switch (state) {
+	case skip_spaces:
+		if (token->len > 0 && cur_charset.len > 0) {
+			old_charset.len = 0;
+			rspamd_mime_header_maybe_save_token(pool, out,
+												token, decoded,
+												&old_charset, &cur_charset);
+		}
+		break;
+	default:
+		/* Just copy leftover */
+		if (p > c) {
+			g_string_append_len(out, c, p - c);
+		}
+		break;
+	}
+
+	g_byte_array_free(token, TRUE);
+	g_byte_array_free(decoded, TRUE);
+	rspamd_mime_header_sanity_check(out);
+	rspamd_mempool_notify_alloc(pool, out->len);
+	ret = g_string_free(out, FALSE);
+	rspamd_mempool_add_destructor(pool, g_free, ret);
+
+	return ret;
+}
+
+gchar *
+rspamd_mime_header_encode(const gchar *in, gsize len)
+{
+	const gchar *p = in, *end = in + len;
+	gchar *out, encode_buf[80 * sizeof(guint32)];
+	GString *res;
+	gboolean need_encoding = FALSE;
+
+	/* Check if we need to encode */
+	while (p < end) {
+		if ((((guchar) *p) & 0x80) != 0) {
+			need_encoding = TRUE;
+			break;
+		}
+		p++;
+	}
+
+	if (!need_encoding) {
+		out = g_malloc(len + 1);
+		rspamd_strlcpy(out, in, len + 1);
+	}
+	else {
+		/* Need encode */
+		gsize ulen, pos;
+		gint r;
+		const gchar *prev;
+		/* Choose step: =?UTF-8?Q?<qp>?= should be less than 76 chars */
+		guint step = (76 - 12) / 3 + 1;
+
+		ulen = g_utf8_strlen(in, len);
+		res = g_string_sized_new(len * 2 + 1);
+		pos = 0;
+		prev = in;
+		/* Adjust chunk size for unicode average length */
+		step *= 1.0 * ulen / (gdouble) len;
+
+		while (pos < ulen) {
+			p = g_utf8_offset_to_pointer(in, pos);
+
+			if (p > prev) {
+				/* Encode and print */
+				r = rspamd_encode_qp2047_buf(prev, p - prev,
+											 encode_buf, sizeof(encode_buf));
+
+				if (r != -1) {
+					if (res->len > 0) {
+						rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r,
+											  encode_buf);
+					}
+					else {
+						rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r,
+											  encode_buf);
+					}
+				}
+			}
+
+			pos += MIN(step, ulen - pos);
+			prev = p;
+		}
+
+		/* Leftover */
+		if (prev < end) {
+			r = rspamd_encode_qp2047_buf(prev, end - prev,
+										 encode_buf, sizeof(encode_buf));
+
+			if (r != -1) {
+				if (res->len > 0) {
+					rspamd_printf_gstring(res, " =?UTF-8?Q?%*s?=", r,
+										  encode_buf);
+				}
+				else {
+					rspamd_printf_gstring(res, "=?UTF-8?Q?%*s?=", r,
+										  encode_buf);
+				}
+			}
+		}
+
+		out = g_string_free(res, FALSE);
+	}
+
+	return out;
+}
+
+gchar *
+rspamd_mime_message_id_generate(const gchar *fqdn)
+{
+	GString *out;
+	guint64 rnd, clk;
+
+	out = g_string_sized_new(strlen(fqdn) + 22);
+	rnd = ottery_rand_uint64();
+	clk = rspamd_get_calendar_ticks() * 1e6;
+
+	rspamd_printf_gstring(out, "%*bs.%*bs@%s",
+						  (gint) sizeof(guint64) - 3, (guchar *) &clk,
+						  (gint) sizeof(guint64), (gchar *) &rnd,
+						  fqdn);
+
+	return g_string_free(out, FALSE);
+}
+
+struct rspamd_mime_header *
+rspamd_message_get_header_from_hash(struct rspamd_mime_headers_table *hdrs,
+									const gchar *field,
+									gboolean need_modified)
+{
+	if (hdrs == NULL) {
+		return NULL;
+	}
+
+	khiter_t k;
+	khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
+	struct rspamd_mime_header *hdr;
+
+	if (htb) {
+		k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) field);
+
+		if (k == kh_end(htb)) {
+			return NULL;
+		}
+
+		hdr = kh_value(htb, k);
+
+		if (!need_modified) {
+			if (hdr->flags & RSPAMD_HEADER_NON_EXISTING) {
+				return NULL;
+			}
+
+			return hdr;
+		}
+		else {
+			if (hdr->flags & RSPAMD_HEADER_MODIFIED) {
+				return hdr->modified_chain;
+			}
+
+			return hdr;
+		}
+	}
+
+	return NULL;
+}
+
+struct rspamd_mime_header *
+rspamd_message_get_header_array(struct rspamd_task *task, const gchar *field,
+								gboolean need_modified)
+{
+	return rspamd_message_get_header_from_hash(
+		MESSAGE_FIELD_CHECK(task, raw_headers),
+		field, need_modified);
+}
+
+gsize rspamd_mime_headers_count(struct rspamd_mime_headers_table *hdrs)
+{
+	if (hdrs) {
+		return kh_size(&hdrs->htb);
+	}
+
+	return 0;
+}
+
+bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *hdrs,
+								 rspamd_hdr_traverse_func_t func, void *ud)
+{
+	const gchar *name;
+	struct rspamd_mime_header *hdr;
+
+	kh_foreach(&hdrs->htb, name, hdr, {
+		if (!func(name, hdr, ud)) {
+			return false;
+		}
+	});
+
+	return true;
+}
+
+static void
+rspamd_message_headers_dtor(struct rspamd_mime_headers_table *hdrs)
+{
+	if (hdrs) {
+		kfree(hdrs->htb.keys);
+		kfree(hdrs->htb.vals);
+		kfree(hdrs->htb.flags);
+		g_free(hdrs);
+	}
+}
+
+struct rspamd_mime_headers_table *
+rspamd_message_headers_ref(struct rspamd_mime_headers_table *hdrs)
+{
+	REF_RETAIN(hdrs);
+
+	return hdrs;
+}
+
+void rspamd_message_headers_unref(struct rspamd_mime_headers_table *hdrs)
+{
+	REF_RELEASE(hdrs);
+}
+
+struct rspamd_mime_headers_table *
+rspamd_message_headers_new(void)
+{
+	struct rspamd_mime_headers_table *nhdrs;
+
+	nhdrs = g_malloc0(sizeof(*nhdrs));
+	REF_INIT_RETAIN(nhdrs, rspamd_message_headers_dtor);
+
+	return nhdrs;
+}
+
+gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len)
+{
+	/*
+	 * t - tortoise (destination)
+	 * h - hare (source)
+	 */
+	char *t = hdr, *h = hdr, *end = (hdr + len);
+	enum {
+		copy_chars,
+		folding_cr,
+		folding_lf,
+		folding_ws,
+	} state = copy_chars;
+
+	while (h < end) {
+		switch (state) {
+		case copy_chars:
+			if (*h == '\r') {
+				state = folding_cr;
+				h++;
+			}
+			else if (*h == '\n') {
+				state = folding_lf;
+				h++;
+			}
+			else {
+				*t++ = *h++;
+			}
+			break;
+		case folding_cr:
+			if (*h == '\n') {
+				state = folding_lf;
+				h++;
+			}
+			else if (g_ascii_isspace(*h)) {
+				state = folding_ws;
+				h++;
+			}
+			else {
+				/* It is weird, not like a folding, so we need to revert back */
+				*t++ = '\r';
+				state = copy_chars;
+			}
+			break;
+		case folding_lf:
+			if (g_ascii_isspace(*h)) {
+				state = folding_ws;
+				h++;
+			}
+			else {
+				/* It is weird, not like a folding, so we need to revert back */
+				*t++ = '\n';
+				state = copy_chars;
+			}
+			break;
+		case folding_ws:
+			if (!g_ascii_isspace(*h)) {
+				*t++ = ' ';
+				state = copy_chars;
+			}
+			else {
+				h++;
+			}
+			break;
+		}
+	}
+
+	return t - hdr;
+}
+
+void rspamd_message_set_modified_header(struct rspamd_task *task,
+										struct rspamd_mime_headers_table *hdrs,
+										const gchar *hdr_name,
+										const ucl_object_t *obj,
+										struct rspamd_mime_header **order_ptr)
+{
+	khiter_t k;
+	khash_t(rspamd_mime_headers_htb) *htb = &hdrs->htb;
+	struct rspamd_mime_header *hdr_elt, *existing_chain;
+	int i;
+
+	if (htb) {
+		k = kh_get(rspamd_mime_headers_htb, htb, (gchar *) hdr_name);
+
+		if (k == kh_end(htb)) {
+			hdr_elt = rspamd_mempool_alloc0(task->task_pool, sizeof(*hdr_elt));
+
+			hdr_elt->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_NON_EXISTING;
+			hdr_elt->name = rspamd_mempool_strdup(task->task_pool, hdr_name);
+
+			int r;
+			k = kh_put(rspamd_mime_headers_htb, htb, hdr_elt->name, &r);
+
+			kh_value(htb, k) = hdr_elt;
+
+			if (order_ptr) {
+				/*
+				 * This iterates over all headers in O(N), but we have no other options here, as the
+				 * list is already set.
+				 */
+				LL_APPEND2(*order_ptr, hdr_elt, ord_next);
+			}
+		}
+		else {
+			hdr_elt = kh_value(htb, k);
+		}
+	}
+	else {
+		/* No hash, no modification */
+		msg_err_task("internal error: calling for set_modified_header for no headers");
+		return;
+	}
+
+	if (hdr_elt->flags & RSPAMD_HEADER_MODIFIED) {
+		existing_chain = hdr_elt->modified_chain;
+	}
+	else {
+		existing_chain = hdr_elt;
+	}
+
+	const ucl_object_t *elt, *cur;
+	ucl_object_iter_t it;
+
+	/* First, deal with removed headers, copying the relevant headers with remove flag */
+	elt = ucl_object_lookup(obj, "remove");
+
+	/*
+	 * remove:  {1, 2 ...}
+	 * where number is the header's position starting from '1'
+	 */
+	if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+		/* First, use a temporary array to keep all headers */
+		GPtrArray *existing_ar = g_ptr_array_new();
+		struct rspamd_mime_header *cur_hdr;
+
+		/* Exclude removed headers */
+		LL_FOREACH(existing_chain, cur_hdr)
+		{
+			if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
+				g_ptr_array_add(existing_ar, cur_hdr);
+			}
+		}
+
+		it = NULL;
+
+		while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+			if (ucl_object_type(cur) == UCL_INT) {
+				int ord = ucl_object_toint(cur);
+
+				if (ord == 0) {
+					/* Remove all headers in the existing chain */
+					PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr)
+					{
+						cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
+					}
+				}
+				else if (ord > 0) {
+					/* Start from the top */
+
+					if (ord <= existing_ar->len) {
+						cur_hdr = g_ptr_array_index(existing_ar, ord - 1);
+						cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
+					}
+				}
+				else {
+					/* Start from the bottom; ord < 0 */
+					if ((-ord) <= existing_ar->len) {
+						cur_hdr = g_ptr_array_index(existing_ar, existing_ar->len + ord);
+						cur_hdr->flags |= RSPAMD_HEADER_MODIFIED | RSPAMD_HEADER_REMOVED;
+					}
+				}
+			}
+		}
+
+		/*
+		 * Next, we return all headers modified to the existing chain
+		 * This implies an additional copy of all structures but is safe enough to
+		 * deal with it
+		 */
+		hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
+		hdr_elt->modified_chain = NULL;
+
+		PTR_ARRAY_FOREACH(existing_ar, i, cur_hdr)
+		{
+			if (!(cur_hdr->flags & RSPAMD_HEADER_REMOVED)) {
+				struct rspamd_mime_header *nhdr = rspamd_mempool_alloc(
+					task->task_pool, sizeof(*nhdr));
+				memcpy(nhdr, cur_hdr, sizeof(*nhdr));
+				nhdr->modified_chain = NULL;
+				nhdr->prev = NULL;
+				nhdr->next = NULL;
+				nhdr->ord_next = NULL;
+
+				DL_APPEND(hdr_elt->modified_chain, nhdr);
+			}
+		}
+
+		g_ptr_array_free(existing_ar, TRUE);
+
+		/* End of headers removal logic */
+	}
+
+	/* We can now deal with headers additions */
+	elt = ucl_object_lookup(obj, "add");
+	if (elt && ucl_object_type(elt) == UCL_ARRAY) {
+		if (!(hdr_elt->flags & RSPAMD_HEADER_MODIFIED)) {
+			/* Copy the header itself to the modified chain */
+			struct rspamd_mime_header *nhdr;
+			hdr_elt->flags |= RSPAMD_HEADER_MODIFIED;
+			nhdr = rspamd_mempool_alloc(
+				task->task_pool, sizeof(*nhdr));
+			memcpy(nhdr, hdr_elt, sizeof(*hdr_elt));
+			nhdr->modified_chain = NULL;
+			nhdr->next = NULL;
+			nhdr->ord_next = NULL;
+			nhdr->prev = nhdr;
+			hdr_elt->modified_chain = nhdr;
+		}
+
+		/*
+		 * add:  {{1, "foo"}, {-1, "bar"} ...}
+		 * where number is the header's position starting from '1'
+		 */
+		it = NULL;
+
+		while ((cur = ucl_object_iterate(elt, &it, true)) != NULL) {
+			if (ucl_object_type(cur) == UCL_ARRAY) {
+				const ucl_object_t *order = ucl_array_find_index(cur, 0),
+								   *value = ucl_array_find_index(cur, 1);
+
+				if (order && value &&
+					(ucl_object_type(order) == UCL_INT &&
+					 ucl_object_type(value) == UCL_STRING)) {
+					int ord = ucl_object_toint(order);
+					const char *raw_value;
+					gsize raw_len;
+
+					raw_value = ucl_object_tolstring(value, &raw_len);
+
+					if (raw_len == 0) {
+						continue;
+					}
+
+					struct rspamd_mime_header *nhdr = rspamd_mempool_alloc0(
+						task->task_pool, sizeof(*nhdr));
+
+					nhdr->flags |= RSPAMD_HEADER_ADDED;
+					nhdr->name = hdr_elt->name;
+					nhdr->value = rspamd_mempool_alloc(task->task_pool,
+													   raw_len + 1);
+					/* Strlcpy will ensure that value will have no embedded \0 */
+					rspamd_strlcpy(nhdr->value, raw_value, raw_len + 1);
+					gsize value_len = rspamd_message_header_unfold_inplace(nhdr->value, raw_len);
+					nhdr->value[value_len] = '\0';
+
+					/* Deal with the raw value */
+					size_t namelen = strlen(hdr_elt->name);
+					char *rawbuf = rspamd_mempool_alloc(task->task_pool, namelen +
+																			 raw_len +
+																			 sizeof(": \r\n"));
+					/* Name: value<newline> */
+					nhdr->raw_value = rawbuf;
+					memcpy(rawbuf, hdr_elt->name, namelen);
+					rawbuf += namelen;
+					memcpy(rawbuf, ": ", sizeof(": ") - 1);
+					nhdr->separator = rspamd_mempool_strdup(task->task_pool, " ");
+					rawbuf += sizeof(": ") - 1;
+					memcpy(rawbuf, raw_value, raw_len);
+					nhdr->raw_len = raw_len;
+
+					if (MESSAGE_FIELD(task, nlines_type) == RSPAMD_TASK_NEWLINES_LF) {
+						rawbuf[raw_len++] = '\n';
+					}
+					else {
+						rawbuf[raw_len++] = '\r';
+
+						if (MESSAGE_FIELD(task, nlines_type) == RSPAMD_TASK_NEWLINES_CRLF) {
+							rawbuf[raw_len++] = '\n';
+						}
+					}
+
+					rawbuf[raw_len] = '\0';
+
+					nhdr->decoded = rspamd_mime_header_decode(task->task_pool,
+															  raw_value, nhdr->raw_len,
+															  NULL);
+
+					/* Now find a position to insert a value */
+					struct rspamd_mime_header **pos = &hdr_elt->modified_chain;
+
+					if (ord == 0) {
+						DL_PREPEND(hdr_elt->modified_chain, nhdr);
+					}
+					else if (ord == -1) {
+						DL_APPEND(hdr_elt->modified_chain, nhdr);
+					}
+					else if (ord > 0) {
+						while (ord > 0 && (*pos)) {
+							ord--;
+							pos = &((*pos)->next);
+						}
+						if (*pos) {
+							/* pos is &(elt)->next */
+							nhdr->next = (*pos);
+							nhdr->prev = (*pos)->prev;
+							(*pos)->prev = nhdr;
+							*pos = nhdr;
+						}
+						else {
+							/* Last element */
+							DL_APPEND(*pos, nhdr);
+						}
+					}
+					else {
+						/* NYI: negative order is not defined */
+						msg_err_task("internal error: calling for set_modified_header "
+									 "with negative add order header");
+					}
+				}
+				else {
+					msg_err_task("internal error: calling for set_modified_header "
+								 "with invalid header");
+				}
+			}
+		}
+	}
+}
+
+gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len)
+{
+	enum parser_state {
+		parse_normal,
+		parse_obrace,
+		parse_comment,
+		parse_quoted_copy,
+		parse_quoted_ignore,
+	} state = parse_normal,
+	  next_state = parse_normal;
+	gchar *d = input, *end = input + len, *start = input;
+	gchar t;
+	int obraces = 0, ebraces = 0;
+
+	while (input < end) {
+		t = *input;
+		switch (state) {
+		case parse_normal:
+			if (t == '(') {
+				state = parse_obrace;
+			}
+			else if (t == '\\') {
+				state = parse_quoted_copy;
+				next_state = parse_normal;
+			}
+			else {
+				*d++ = t;
+			}
+			input++;
+			break;
+		case parse_obrace:
+			obraces++;
+			if (t == '(') {
+				obraces++;
+			}
+			else if (t == ')') {
+				ebraces++;
+
+				if (obraces == ebraces) {
+					obraces = 0;
+					ebraces = 0;
+					state = parse_normal;
+				}
+			}
+			else if (t == '\\') {
+				state = parse_quoted_ignore;
+				next_state = parse_comment;
+			}
+			else {
+				state = parse_comment;
+			}
+			input++;
+			break;
+		case parse_comment:
+			if (t == '(') {
+				state = parse_obrace;
+			}
+			else if (t == ')') {
+				ebraces++;
+
+				if (obraces == ebraces) {
+					obraces = 0;
+					ebraces = 0;
+					state = parse_normal;
+				}
+			}
+			else if (t == '\\') {
+				state = parse_quoted_ignore;
+				next_state = parse_comment;
+			}
+			input++;
+			break;
+		case parse_quoted_copy:
+			*d++ = t;
+			state = next_state;
+			input++;
+			break;
+		case parse_quoted_ignore:
+			state = next_state;
+			input++;
+			break;
+		}
+	}
+
+	return (d - start);
+}
+\ No newline at end of file
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
new file mode 100644
index 0000000..60015a2
--- /dev/null
+++ b/src/libmime/mime_headers.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_HEADERS_H_
+#define SRC_LIBMIME_MIME_HEADERS_H_
+
+#include "config.h"
+#include "libutil/mem_pool.h"
+#include "libutil/addr.h"
+#include "khash.h"
+#include "contrib/libucl/ucl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+
+enum rspamd_rfc2047_encoding {
+	RSPAMD_RFC2047_QP = 0,
+	RSPAMD_RFC2047_BASE64,
+};
+
+enum rspamd_mime_header_flags {
+	RSPAMD_HEADER_GENERIC = 0u,
+	RSPAMD_HEADER_RECEIVED = 1u << 0u,
+	RSPAMD_HEADER_TO = 1u << 2u,
+	RSPAMD_HEADER_CC = 1u << 3u,
+	RSPAMD_HEADER_BCC = 1u << 4u,
+	RSPAMD_HEADER_FROM = 1u << 5u,
+	RSPAMD_HEADER_MESSAGE_ID = 1u << 6u,
+	RSPAMD_HEADER_SUBJECT = 1u << 7u,
+	RSPAMD_HEADER_RETURN_PATH = 1u << 8u,
+	RSPAMD_HEADER_DELIVERED_TO = 1u << 9u,
+	RSPAMD_HEADER_SENDER = 1u << 10u,
+	RSPAMD_HEADER_RCPT = 1u << 11u,
+	RSPAMD_HEADER_UNIQUE = 1u << 12u,
+	RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u,
+	RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u,
+	RSPAMD_HEADER_MODIFIED = 1u << 15u,     /* Means we need to check modified chain */
+	RSPAMD_HEADER_ADDED = 1u << 16u,        /* A header has been artificially added */
+	RSPAMD_HEADER_REMOVED = 1u << 17u,      /* A header has been artificially removed */
+	RSPAMD_HEADER_NON_EXISTING = 1u << 18u, /* Header was not in the original message */
+};
+
+struct rspamd_mime_header {
+	const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
+	gsize raw_len;
+	guint order;
+	int flags; /* see enum rspamd_mime_header_flags */
+	/* These are zero terminated (historically) */
+	gchar *name; /* Also used for key */
+	gchar *value;
+	gchar *separator;
+	gchar *decoded;
+	struct rspamd_mime_header *modified_chain; /* Headers modified during transform */
+	struct rspamd_mime_header *prev, *next;    /* Headers with the same name */
+	struct rspamd_mime_header *ord_next;       /* Overall order of headers, slist */
+};
+
+struct rspamd_mime_headers_table;
+
+/**
+ * Process headers and store them in `target`
+ * @param task
+ * @param target
+ * @param in
+ * @param len
+ * @param check_newlines
+ */
+void rspamd_mime_headers_process(struct rspamd_task *task,
+								 struct rspamd_mime_headers_table *target,
+								 struct rspamd_mime_header **order_ptr,
+								 const gchar *in, gsize len,
+								 gboolean check_newlines);
+
+/**
+ * Perform rfc2047 decoding of a header
+ * @param pool
+ * @param in
+ * @param inlen
+ * @return
+ */
+gchar *rspamd_mime_header_decode(rspamd_mempool_t *pool, const gchar *in,
+								 gsize inlen, gboolean *invalid_utf);
+
+/**
+ * Encode mime header if needed
+ * @param in
+ * @param len
+ * @return newly allocated encoded header
+ */
+gchar *rspamd_mime_header_encode(const gchar *in, gsize len);
+
+/**
+ * Generate new unique message id
+ * @param fqdn
+ * @return
+ */
+gchar *rspamd_mime_message_id_generate(const gchar *fqdn);
+
+/**
+ * Get an array of header's values with specified header's name using raw headers
+ * @param task worker task structure
+ * @param field header's name
+ * @return An array of header's values or NULL. It is NOT permitted to free array or values.
+ */
+struct rspamd_mime_header *
+rspamd_message_get_header_array(struct rspamd_task *task,
+								const gchar *field,
+								gboolean need_modified);
+
+/**
+ * Get an array of header's values with specified header's name using raw headers
+ * @param htb hash table indexed by header name (caseless) with ptr arrays as elements
+ * @param field header's name
+ * @return An array of header's values or NULL. It is NOT permitted to free array or values.
+ */
+struct rspamd_mime_header *
+rspamd_message_get_header_from_hash(struct rspamd_mime_headers_table *hdrs,
+									const gchar *field,
+									gboolean need_modified);
+
+/**
+ * Modifies a header (or insert one if not found)
+ * @param hdrs
+ * @param hdr_name
+ * @param obj an array of modified values
+ *
+ */
+void rspamd_message_set_modified_header(struct rspamd_task *task,
+										struct rspamd_mime_headers_table *hdrs,
+										const gchar *hdr_name,
+										const ucl_object_t *obj,
+										struct rspamd_mime_header **order_ptr);
+
+/**
+ * Cleans up hash table of the headers
+ * @param htb
+ */
+void rspamd_message_headers_unref(struct rspamd_mime_headers_table *hdrs);
+
+struct rspamd_mime_headers_table *rspamd_message_headers_ref(struct rspamd_mime_headers_table *hdrs);
+
+/**
+ * Init headers hash
+ * @return
+ */
+struct rspamd_mime_headers_table *rspamd_message_headers_new(void);
+
+/**
+ * Returns size for a headers table
+ * @param hdrs
+ * @return
+ */
+gsize rspamd_mime_headers_count(struct rspamd_mime_headers_table *hdrs);
+
+typedef bool(rspamd_hdr_traverse_func_t)(const gchar *, const struct rspamd_mime_header *, void *);
+/**
+ * Traverse all headers in a table
+ * @param func
+ * @param ud
+ * @return
+ */
+bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *,
+								 rspamd_hdr_traverse_func_t func, void *ud);
+
+/**
+ * Strip rfc822 CFWS sequences from a string in place
+ * @param input input
+ * @param len length of the input
+ * @return new length of the input
+ */
+gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len);
+
+/**
+ * Unfold header in place
+ * @param hdr header value
+ * @param len length of the header
+ * @return new unfolded length
+ */
+gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_MIME_HEADERS_H_ */
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
new file mode 100644
index 0000000..217f0b8
--- /dev/null
+++ b/src/libmime/mime_parser.c
@@ -0,0 +1,1758 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "config.h"
+#include "task.h"
+#include "mime_parser.h"
+#include "mime_headers.h"
+#include "message.h"
+#include "multipattern.h"
+#include "contrib/libottery/ottery.h"
+#include "contrib/uthash/utlist.h"
+#include <openssl/cms.h>
+#include <openssl/pkcs7.h>
+#include "contrib/fastutf8/fastutf8.h"
+
+struct rspamd_mime_parser_lib_ctx {
+	struct rspamd_multipattern *mp_boundary;
+	guchar hkey[rspamd_cryptobox_SIPKEYBYTES]; /* Key for hashing */
+	guint key_usages;
+};
+
+struct rspamd_mime_parser_lib_ctx *lib_ctx = NULL;
+
+static const guint max_nested = 64;
+static const guint max_key_usages = 10000;
+
+#define msg_debug_mime(...) rspamd_conditional_debug_fast(NULL, task->from_addr,                                \
+														  rspamd_mime_log_id, "mime", task->task_pool->tag.uid, \
+														  RSPAMD_LOG_FUNC,                                      \
+														  __VA_ARGS__)
+
+INIT_LOG_MODULE(mime)
+
+#define RSPAMD_MIME_BOUNDARY_FLAG_CLOSED (1 << 0)
+#define RSPAMD_BOUNDARY_IS_CLOSED(b) ((b)->flags & RSPAMD_MIME_BOUNDARY_FLAG_CLOSED)
+
+struct rspamd_mime_boundary {
+	goffset boundary;
+	goffset start;
+	guint64 hash;
+	guint64 closed_hash;
+	gint flags;
+};
+
+struct rspamd_mime_parser_ctx {
+	GPtrArray *stack;   /* Stack of parts */
+	GArray *boundaries; /* Boundaries found in the whole message */
+	const gchar *start;
+	const gchar *pos;
+	const gchar *end;
+	struct rspamd_task *task;
+	guint nesting;
+};
+
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_multipart_part(struct rspamd_task *task,
+								 struct rspamd_mime_part *part,
+								 struct rspamd_mime_parser_ctx *st,
+								 GError **err);
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_message(struct rspamd_task *task,
+						  struct rspamd_mime_part *part,
+						  struct rspamd_mime_parser_ctx *st,
+						  GError **err);
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_normal_part(struct rspamd_task *task,
+							  struct rspamd_mime_part *part,
+							  struct rspamd_mime_parser_ctx *st,
+							  struct rspamd_content_type *ct,
+							  GError **err);
+
+static enum rspamd_mime_parse_error
+rspamd_mime_process_multipart_node(struct rspamd_task *task,
+								   struct rspamd_mime_parser_ctx *st,
+								   struct rspamd_mime_part *multipart,
+								   const gchar *start, const gchar *end,
+								   gboolean is_finished,
+								   GError **err);
+
+
+#define RSPAMD_MIME_QUARK (rspamd_mime_parser_quark())
+static GQuark
+rspamd_mime_parser_quark(void)
+{
+	return g_quark_from_static_string("mime-parser");
+}
+
+const gchar *
+rspamd_cte_to_string(enum rspamd_cte ct)
+{
+	const gchar *ret = "unknown";
+
+	switch (ct) {
+	case RSPAMD_CTE_7BIT:
+		ret = "7bit";
+		break;
+	case RSPAMD_CTE_8BIT:
+		ret = "8bit";
+		break;
+	case RSPAMD_CTE_QP:
+		ret = "quoted-printable";
+		break;
+	case RSPAMD_CTE_B64:
+		ret = "base64";
+		break;
+	case RSPAMD_CTE_UUE:
+		ret = "X-uuencode";
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+enum rspamd_cte
+rspamd_cte_from_string(const gchar *str)
+{
+	enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN;
+
+	g_assert(str != NULL);
+
+	if (strcmp(str, "7bit") == 0) {
+		ret = RSPAMD_CTE_7BIT;
+	}
+	else if (strcmp(str, "8bit") == 0) {
+		ret = RSPAMD_CTE_8BIT;
+	}
+	else if (strcmp(str, "quoted-printable") == 0) {
+		ret = RSPAMD_CTE_QP;
+	}
+	else if (strcmp(str, "base64") == 0) {
+		ret = RSPAMD_CTE_B64;
+	}
+	else if (strcmp(str, "X-uuencode") == 0) {
+		ret = RSPAMD_CTE_UUE;
+	}
+	else if (strcmp(str, "uuencode") == 0) {
+		ret = RSPAMD_CTE_UUE;
+	}
+	else if (strcmp(str, "X-uue") == 0) {
+		ret = RSPAMD_CTE_UUE;
+	}
+
+	return ret;
+}
+
+static void
+rspamd_mime_parser_init_lib(void)
+{
+	lib_ctx = g_malloc0(sizeof(*lib_ctx));
+	lib_ctx->mp_boundary = rspamd_multipattern_create(RSPAMD_MULTIPATTERN_DEFAULT);
+	g_assert(lib_ctx->mp_boundary != NULL);
+	rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\r--", 0);
+	rspamd_multipattern_add_pattern(lib_ctx->mp_boundary, "\n--", 0);
+
+	GError *err = NULL;
+	if (!rspamd_multipattern_compile(lib_ctx->mp_boundary, &err)) {
+		msg_err("fatal error: cannot compile multipattern for mime parser boundaries: %e", err);
+		g_error_free(err);
+		g_abort();
+	}
+	ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey));
+}
+
+static enum rspamd_cte
+rspamd_mime_parse_cte(const gchar *in, gsize len)
+{
+	guint64 h;
+	enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN;
+
+	in = rspamd_string_len_strip(in, &len, " \t;,.+-#!`~'");
+	h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH64,
+											in, len, 0xdeadbabe);
+
+	switch (h) {
+	case 0xCEDAA7056B4753F7ULL: /* 7bit */
+		ret = RSPAMD_CTE_7BIT;
+		break;
+	case 0x42E0745448B39FC1ULL: /* 8bit */
+	case 0x6B169E6B155BADC0ULL: /* binary */
+		ret = RSPAMD_CTE_8BIT;
+		break;
+	case 0x6D69A5BB02A633B0ULL: /* quoted-printable */
+		ret = RSPAMD_CTE_QP;
+		break;
+	case 0x96305588A76DC9A9ULL: /* base64 */
+	case 0x171029DE1B0423A9ULL: /* base-64 */
+		ret = RSPAMD_CTE_B64;
+		break;
+	case 0x420b54dc00d13cecULL: /* uuencode */
+	case 0x8df6700b8f6c4cf9ULL: /* x-uuencode */
+	case 0x41f725ec544356d3ULL: /* x-uue */
+		ret = RSPAMD_CTE_UUE;
+		break;
+	}
+
+	return ret;
+}
+
+static enum rspamd_cte
+rspamd_mime_part_get_cte_heuristic(struct rspamd_task *task,
+								   struct rspamd_mime_part *part)
+{
+	const guint check_len = 128;
+	guint real_len, nspaces = 0, neqsign = 0, n8bit = 0, nqpencoded = 0,
+					padeqsign = 0, nupper = 0, nlower = 0;
+	gboolean b64_chars = TRUE;
+	const guchar *p, *end;
+	enum rspamd_cte ret = RSPAMD_CTE_UNKNOWN;
+
+	real_len = MIN(check_len, part->raw_data.len);
+	p = (const guchar *) part->raw_data.begin;
+	end = p + part->raw_data.len;
+
+	while (p < end && g_ascii_isspace(*p)) {
+		p++;
+	}
+
+	if (end - p > sizeof("begin-base64 ")) {
+		const guchar *uue_start;
+
+		if (memcmp(p, "begin ", sizeof("begin ") - 1) == 0) {
+			uue_start = p + sizeof("begin ") - 1;
+
+			while (uue_start < end && g_ascii_isspace(*uue_start)) {
+				uue_start++;
+			}
+
+			if (uue_start < end && g_ascii_isdigit(*uue_start)) {
+				return RSPAMD_CTE_UUE;
+			}
+		}
+		else if (memcmp(p, "begin-base64 ", sizeof("begin-base64 ") - 1) == 0) {
+			uue_start = p + sizeof("begin ") - 1;
+
+			while (uue_start < end && g_ascii_isspace(*uue_start)) {
+				uue_start++;
+			}
+
+			if (uue_start < end && g_ascii_isdigit(*uue_start)) {
+				return RSPAMD_CTE_UUE;
+			}
+		}
+	}
+
+	/* Skip trailing spaces */
+	while (end > p && g_ascii_isspace(*(end - 1))) {
+		end--;
+	}
+
+	if (end > p + 2) {
+		if (*(end - 1) == '=') {
+			padeqsign++;
+			end--;
+		}
+
+		if (*(end - 1) == '=') {
+			padeqsign++;
+			end--;
+		}
+	}
+
+	/* Adjust end to analyse only first characters */
+	if (end - p > real_len) {
+		end = p + real_len;
+	}
+
+	while (p < end) {
+		if (*p == ' ') {
+			nspaces++;
+		}
+		else if (*p == '=') {
+			b64_chars = FALSE; /* Eqsign must not be inside base64 */
+			neqsign++;
+			p++;
+
+			if (p + 2 < end && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*(p + 1))) {
+				p++;
+				nqpencoded++;
+			}
+
+			continue;
+		}
+		else if (*p >= 0x80) {
+			n8bit++;
+			b64_chars = FALSE;
+		}
+		else if (!(g_ascii_isalnum(*p) || *p == '/' || *p == '+')) {
+			b64_chars = FALSE;
+		}
+		else if (g_ascii_isupper(*p)) {
+			nupper++;
+		}
+		else if (g_ascii_islower(*p)) {
+			nlower++;
+		}
+
+		p++;
+	}
+
+	if (b64_chars && neqsign <= 2 && nspaces == 0) {
+		/* Need more thinking */
+
+		if (part->raw_data.len > 80) {
+			if (padeqsign > 0) {
+				ret = RSPAMD_CTE_B64;
+			}
+			else {
+				/* We have a large piece of data with no spaces and base64
+				 * symbols only, no padding is detected as well...
+				 *
+				 * There is a small chance that our first 128 characters
+				 * are either some garbage or it is a base64 with no padding
+				 * (e.g. when it is not needed)
+				 */
+				if (nupper > 1 && nlower > 1) {
+					/*
+					 * We have both uppercase and lowercase letters, so it can be
+					 * base64
+					 */
+					ret = RSPAMD_CTE_B64;
+				}
+				else {
+					ret = RSPAMD_CTE_7BIT;
+				}
+			}
+		}
+		else {
+
+			if (((end - (const guchar *) part->raw_data.begin) + padeqsign) % 4 == 0) {
+				if (padeqsign == 0) {
+					/*
+					 * It can be either base64 or plain text, hard to say
+					 * Let's assume that if we have > 1 uppercase it is
+					 * likely base64
+					 */
+					if (nupper > 1 && nlower > 1) {
+						ret = RSPAMD_CTE_B64;
+					}
+					else {
+						ret = RSPAMD_CTE_7BIT;
+					}
+				}
+				else {
+					ret = RSPAMD_CTE_B64;
+				}
+			}
+			else {
+				/* No way */
+				if (padeqsign == 1 || padeqsign == 2) {
+					ret = RSPAMD_CTE_B64;
+				}
+				else {
+					ret = RSPAMD_CTE_7BIT;
+				}
+			}
+		}
+	}
+	else if (n8bit == 0) {
+		if (neqsign > 2 && nqpencoded > 2) {
+			ret = RSPAMD_CTE_QP;
+		}
+		else {
+			ret = RSPAMD_CTE_7BIT;
+		}
+	}
+	else {
+		ret = RSPAMD_CTE_8BIT;
+	}
+
+	msg_debug_mime("detected cte: %s", rspamd_cte_to_string(ret));
+
+	return ret;
+}
+
+static void
+rspamd_mime_part_get_cte(struct rspamd_task *task,
+						 struct rspamd_mime_headers_table *hdrs,
+						 struct rspamd_mime_part *part,
+						 gboolean apply_heuristic)
+{
+	struct rspamd_mime_header *hdr, *cur;
+	enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN;
+	gboolean parent_propagated = FALSE;
+
+	hdr = rspamd_message_get_header_from_hash(hdrs, "Content-Transfer-Encoding", FALSE);
+
+	if (hdr == NULL) {
+		if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN &&
+			!(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) {
+			part->cte = part->parent_part->cte;
+			parent_propagated = TRUE;
+
+			goto check_cte;
+		}
+
+		if (apply_heuristic) {
+			part->cte = rspamd_mime_part_get_cte_heuristic(task, part);
+			msg_info_task("detected missing CTE for part as: %s",
+						  rspamd_cte_to_string(part->cte));
+		}
+
+		part->flags |= RSPAMD_MIME_PART_MISSING_CTE;
+	}
+	else {
+		DL_FOREACH(hdr, cur)
+		{
+			gsize hlen;
+			gchar lc_buf[128];
+
+			hlen = rspamd_snprintf(lc_buf, sizeof(lc_buf), "%s", cur->value);
+			rspamd_str_lc(lc_buf, hlen);
+			cte = rspamd_mime_parse_cte(lc_buf, hlen);
+
+			if (cte != RSPAMD_CTE_UNKNOWN) {
+				part->cte = cte;
+				break;
+			}
+		}
+
+	check_cte:
+		if (apply_heuristic) {
+			if (part->cte == RSPAMD_CTE_UNKNOWN) {
+				part->cte = rspamd_mime_part_get_cte_heuristic(task, part);
+
+				msg_info_task("corrected bad CTE for part to: %s",
+							  rspamd_cte_to_string(part->cte));
+			}
+			else if (part->cte == RSPAMD_CTE_B64 ||
+					 part->cte == RSPAMD_CTE_QP) {
+				/* Additionally check sanity */
+				cte = rspamd_mime_part_get_cte_heuristic(task, part);
+
+				if (cte == RSPAMD_CTE_8BIT) {
+					msg_info_task(
+						"incorrect cte specified for part: %s, %s detected",
+						rspamd_cte_to_string(part->cte),
+						rspamd_cte_to_string(cte));
+					part->cte = cte;
+					part->flags |= RSPAMD_MIME_PART_BAD_CTE;
+				}
+				else if (cte != part->cte && parent_propagated) {
+					part->cte = cte;
+					msg_info_task("detected missing CTE for part as: %s",
+								  rspamd_cte_to_string(part->cte));
+				}
+			}
+			else {
+				msg_debug_mime("processed cte: %s",
+							   rspamd_cte_to_string(cte));
+			}
+		}
+		else {
+			msg_debug_mime("processed cte: %s", rspamd_cte_to_string(cte));
+		}
+	}
+}
+static void
+rspamd_mime_part_get_cd(struct rspamd_task *task, struct rspamd_mime_part *part)
+{
+	struct rspamd_mime_header *hdr, *cur;
+	struct rspamd_content_disposition *cd = NULL;
+	rspamd_ftok_t srch;
+	struct rspamd_content_type_param *found;
+
+	hdr = rspamd_message_get_header_from_hash(part->raw_headers,
+											  "Content-Disposition", FALSE);
+
+
+	if (hdr == NULL) {
+		cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*cd));
+		cd->type = RSPAMD_CT_INLINE;
+
+		/* We can also have content disposition definitions in Content-Type */
+		if (part->ct && part->ct->attrs) {
+			RSPAMD_FTOK_ASSIGN(&srch, "name");
+			found = g_hash_table_lookup(part->ct->attrs, &srch);
+
+			if (!found) {
+				RSPAMD_FTOK_ASSIGN(&srch, "filename");
+				found = g_hash_table_lookup(part->ct->attrs, &srch);
+			}
+
+			if (found) {
+				cd->type = RSPAMD_CT_ATTACHMENT;
+				memcpy(&cd->filename, &found->value, sizeof(cd->filename));
+			}
+		}
+	}
+	else {
+		DL_FOREACH(hdr, cur)
+		{
+			gsize hlen;
+			cd = NULL;
+
+			if (cur->value) {
+				hlen = strlen(cur->value);
+				cd = rspamd_content_disposition_parse(cur->value, hlen,
+													  task->task_pool);
+			}
+
+			if (cd) {
+				/* We still need to check filename */
+				if (cd->filename.len == 0) {
+					if (part->ct && part->ct->attrs) {
+						RSPAMD_FTOK_ASSIGN(&srch, "name");
+						found = g_hash_table_lookup(part->ct->attrs, &srch);
+
+						if (!found) {
+							RSPAMD_FTOK_ASSIGN(&srch, "filename");
+							found = g_hash_table_lookup(part->ct->attrs, &srch);
+						}
+
+						if (found) {
+							cd->type = RSPAMD_CT_ATTACHMENT;
+							memcpy(&cd->filename, &found->value,
+								   sizeof(cd->filename));
+						}
+					}
+				}
+
+				msg_debug_mime("processed content disposition: %s, file: \"%T\"",
+							   cd->lc_data, &cd->filename);
+				break;
+			}
+			else if (part->ct) {
+				/*
+				 * Even in case of malformed Content-Disposition, we can still
+				 * fall back to Content-Type
+				 */
+				cd = rspamd_mempool_alloc0(task->task_pool, sizeof(*cd));
+				cd->type = RSPAMD_CT_INLINE;
+
+				/* We can also have content disposition definitions in Content-Type */
+				if (part->ct->attrs) {
+					RSPAMD_FTOK_ASSIGN(&srch, "name");
+					found = g_hash_table_lookup(part->ct->attrs, &srch);
+
+					if (!found) {
+						RSPAMD_FTOK_ASSIGN(&srch, "filename");
+						found = g_hash_table_lookup(part->ct->attrs, &srch);
+					}
+
+					if (found) {
+						cd->type = RSPAMD_CT_ATTACHMENT;
+						memcpy(&cd->filename, &found->value, sizeof(cd->filename));
+					}
+				}
+			}
+		}
+	}
+
+	part->cd = cd;
+}
+
+void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part)
+{
+	/* Blake2b applied to string 'rspamd' */
+	static const guchar hash_key[] = {
+		0xef,
+		0x43,
+		0xae,
+		0x80,
+		0xcc,
+		0x8d,
+		0xc3,
+		0x4c,
+		0x6f,
+		0x1b,
+		0xd6,
+		0x18,
+		0x1b,
+		0xae,
+		0x87,
+		0x74,
+		0x0c,
+		0xca,
+		0xf7,
+		0x8e,
+		0x5f,
+		0x2e,
+		0x54,
+		0x32,
+		0xf6,
+		0x79,
+		0xb9,
+		0x27,
+		0x26,
+		0x96,
+		0x20,
+		0x92,
+		0x70,
+		0x07,
+		0x85,
+		0xeb,
+		0x83,
+		0xf7,
+		0x89,
+		0xe0,
+		0xd7,
+		0x32,
+		0x2a,
+		0xd2,
+		0x1a,
+		0x64,
+		0x41,
+		0xef,
+		0x49,
+		0xff,
+		0xc3,
+		0x8c,
+		0x54,
+		0xf9,
+		0x67,
+		0x74,
+		0x30,
+		0x1e,
+		0x70,
+		0x2e,
+		0xb7,
+		0x12,
+		0x09,
+		0xfe,
+	};
+
+	if (part->parsed_data.len > 0) {
+		rspamd_cryptobox_hash(part->digest,
+							  part->parsed_data.begin, part->parsed_data.len,
+							  hash_key, sizeof(hash_key));
+	}
+}
+
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_normal_part(struct rspamd_task *task,
+							  struct rspamd_mime_part *part,
+							  struct rspamd_mime_parser_ctx *st,
+							  struct rspamd_content_type *ct,
+							  GError **err)
+{
+	rspamd_fstring_t *parsed;
+	gssize r;
+
+	g_assert(part != NULL);
+
+	rspamd_mime_part_get_cte(task, part->raw_headers, part,
+							 part->ct && !(part->ct->flags & RSPAMD_CONTENT_TYPE_MESSAGE));
+	rspamd_mime_part_get_cd(task, part);
+
+	switch (part->cte) {
+	case RSPAMD_CTE_7BIT:
+	case RSPAMD_CTE_8BIT:
+	case RSPAMD_CTE_UNKNOWN:
+		if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
+			if (part->cte != RSPAMD_CTE_7BIT) {
+				/* We have something that has a missing content-type,
+				 * but it has non-7bit characters.
+				 *
+				 * In theory, it is very unsafe to process it as a text part
+				 * as we unlikely get some sane result
+				 */
+
+				/*
+				 * On the other hand, there is an evidence that some
+				 * emails actually rely on that.
+				 * So we apply an expensive hack here:
+				 * if there are no 8bit characters -OR- the content is valid
+				 * UTF8, we can still imply Content-Type == text/plain
+				 */
+
+				if (rspamd_str_has_8bit(part->raw_data.begin, part->raw_data.len) &&
+					!rspamd_fast_utf8_validate(part->raw_data.begin, part->raw_data.len)) {
+					part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT;
+					part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+				}
+			}
+		}
+
+		if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT)) {
+			/* Need to copy text as we have couple of in-place change functions */
+			parsed = rspamd_fstring_sized_new(part->raw_data.len);
+			parsed->len = part->raw_data.len;
+			memcpy(parsed->str, part->raw_data.begin, parsed->len);
+			part->parsed_data.begin = parsed->str;
+			part->parsed_data.len = parsed->len;
+			rspamd_mempool_notify_alloc(task->task_pool, parsed->len);
+			rspamd_mempool_add_destructor(task->task_pool,
+										  (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed);
+		}
+		else {
+			part->parsed_data.begin = part->raw_data.begin;
+			part->parsed_data.len = part->raw_data.len;
+		}
+		break;
+	case RSPAMD_CTE_QP:
+		parsed = rspamd_fstring_sized_new(part->raw_data.len);
+		r = rspamd_decode_qp_buf(part->raw_data.begin, part->raw_data.len,
+								 parsed->str, parsed->allocated);
+		if (r != -1) {
+			parsed->len = r;
+			part->parsed_data.begin = parsed->str;
+			part->parsed_data.len = parsed->len;
+			rspamd_mempool_notify_alloc(task->task_pool, parsed->len);
+			rspamd_mempool_add_destructor(task->task_pool,
+										  (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed);
+		}
+		else {
+			msg_err_task("invalid quoted-printable encoded part, assume 8bit");
+			if (part->ct) {
+				part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+			}
+			part->cte = RSPAMD_CTE_8BIT;
+			memcpy(parsed->str, part->raw_data.begin, part->raw_data.len);
+			parsed->len = part->raw_data.len;
+			part->parsed_data.begin = parsed->str;
+			part->parsed_data.len = parsed->len;
+			rspamd_mempool_notify_alloc(task->task_pool, parsed->len);
+			rspamd_mempool_add_destructor(task->task_pool,
+										  (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed);
+		}
+		break;
+	case RSPAMD_CTE_B64:
+		parsed = rspamd_fstring_sized_new(part->raw_data.len / 4 * 3 + 12);
+		rspamd_cryptobox_base64_decode(part->raw_data.begin,
+									   part->raw_data.len,
+									   parsed->str, &parsed->len);
+		part->parsed_data.begin = parsed->str;
+		part->parsed_data.len = parsed->len;
+		rspamd_mempool_notify_alloc(task->task_pool, parsed->len);
+		rspamd_mempool_add_destructor(task->task_pool,
+									  (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed);
+		break;
+	case RSPAMD_CTE_UUE:
+		parsed = rspamd_fstring_sized_new(part->raw_data.len / 4 * 3 + 12);
+		r = rspamd_decode_uue_buf(part->raw_data.begin, part->raw_data.len,
+								  parsed->str, parsed->allocated);
+		rspamd_mempool_notify_alloc(task->task_pool, parsed->len);
+		rspamd_mempool_add_destructor(task->task_pool,
+									  (rspamd_mempool_destruct_t) rspamd_fstring_free, parsed);
+		if (r != -1) {
+			parsed->len = r;
+			part->parsed_data.begin = parsed->str;
+			part->parsed_data.len = parsed->len;
+		}
+		else {
+			msg_err_task("invalid uuencoding in encoded part, assume 8bit");
+			if (part->ct) {
+				part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+			}
+			part->cte = RSPAMD_CTE_8BIT;
+			parsed->len = MIN(part->raw_data.len, parsed->allocated);
+			memcpy(parsed->str, part->raw_data.begin, parsed->len);
+			rspamd_mempool_notify_alloc(task->task_pool, parsed->len);
+			part->parsed_data.begin = parsed->str;
+			part->parsed_data.len = parsed->len;
+		}
+		break;
+	default:
+		g_assert_not_reached();
+	}
+
+	part->part_number = MESSAGE_FIELD(task, parts)->len;
+	part->urls = g_ptr_array_new();
+	g_ptr_array_add(MESSAGE_FIELD(task, parts), part);
+	msg_debug_mime("parsed data part %T/%T of length %z (%z orig), %s cte",
+				   &part->ct->type, &part->ct->subtype, part->parsed_data.len,
+				   part->raw_data.len, rspamd_cte_to_string(part->cte));
+	rspamd_mime_parser_calc_digest(part);
+
+	if (ct && (ct->flags & RSPAMD_CONTENT_TYPE_SMIME)) {
+		CMS_ContentInfo *cms;
+		const unsigned char *der_beg = part->parsed_data.begin;
+		cms = d2i_CMS_ContentInfo(NULL, &der_beg, part->parsed_data.len);
+
+		if (cms) {
+			const ASN1_OBJECT *asn_ct = CMS_get0_eContentType(cms);
+			int ct_nid = OBJ_obj2nid(asn_ct);
+
+			if (ct_nid == NID_pkcs7_data) {
+				BIO *bio = BIO_new_mem_buf(part->parsed_data.begin,
+										   part->parsed_data.len);
+
+				PKCS7 *p7;
+				p7 = d2i_PKCS7_bio(bio, NULL);
+
+				if (p7) {
+					ct_nid = OBJ_obj2nid(p7->type);
+
+					if (ct_nid == NID_pkcs7_signed) {
+						PKCS7 *p7_signed_content = p7->d.sign->contents;
+
+						ct_nid = OBJ_obj2nid(p7_signed_content->type);
+
+						if (ct_nid == NID_pkcs7_data && p7_signed_content->d.data) {
+							int ret;
+
+							msg_debug_mime("found an additional part inside of "
+										   "smime structure of type %T/%T; length=%d",
+										   &ct->type, &ct->subtype, p7_signed_content->d.data->length);
+							/*
+							 * Since ASN.1 structures are freed, we need to copy
+							 * the content
+							 */
+							gchar *cpy = rspamd_mempool_alloc(task->task_pool,
+															  p7_signed_content->d.data->length);
+							memcpy(cpy, p7_signed_content->d.data->data,
+								   p7_signed_content->d.data->length);
+							ret = rspamd_mime_process_multipart_node(task,
+																	 st, NULL,
+																	 cpy, cpy + p7_signed_content->d.data->length,
+																	 TRUE, err);
+
+							PKCS7_free(p7);
+							BIO_free(bio);
+							CMS_ContentInfo_free(cms);
+
+							return ret;
+						}
+					}
+
+					PKCS7_free(p7);
+				}
+
+				BIO_free(bio);
+			}
+
+			CMS_ContentInfo_free(cms);
+		}
+	}
+
+	return RSPAMD_MIME_PARSE_OK;
+}
+
+struct rspamd_mime_multipart_cbdata {
+	struct rspamd_task *task;
+	struct rspamd_mime_part *multipart;
+	struct rspamd_mime_parser_ctx *st;
+	const gchar *part_start;
+	rspamd_ftok_t *cur_boundary;
+	guint64 bhash;
+	GError **err;
+};
+
+static enum rspamd_mime_parse_error
+rspamd_mime_process_multipart_node(struct rspamd_task *task,
+								   struct rspamd_mime_parser_ctx *st,
+								   struct rspamd_mime_part *multipart,
+								   const gchar *start, const gchar *end,
+								   gboolean is_finished,
+								   GError **err)
+{
+	struct rspamd_content_type *ct, *sel = NULL;
+	struct rspamd_mime_header *hdr = NULL, *cur;
+	struct rspamd_mime_part *npart;
+	GString str;
+	goffset hdr_pos, body_pos;
+	enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL;
+
+
+	str.str = (gchar *) start;
+	str.len = end - start;
+
+	if (*start == '\n' || *start == '\r') {
+		/*
+		 * We have a part that starts from newline which means that
+		 * there are completely no headers in this part,
+		 * hence we assume it as a text part
+		 */
+		hdr_pos = 0;
+		body_pos = 0;
+
+		if (!is_finished) {
+			/* Ignore garbage */
+			const gchar *p = start;
+			gboolean seen_something = FALSE;
+
+			while (p < end) {
+				if (g_ascii_isalnum(*p)) {
+					seen_something = TRUE;
+					break;
+				}
+				p++;
+			}
+
+			if (!seen_something) {
+				return RSPAMD_MIME_PARSE_NO_PART;
+			}
+		}
+	}
+	else {
+		hdr_pos = rspamd_string_find_eoh(&str, &body_pos);
+	}
+
+	npart = rspamd_mempool_alloc0(task->task_pool,
+								  sizeof(struct rspamd_mime_part));
+	npart->parent_part = multipart;
+	npart->raw_headers = rspamd_message_headers_new();
+	npart->headers_order = NULL;
+
+	if (multipart) {
+		if (multipart->specific.mp->children == NULL) {
+			multipart->specific.mp->children = g_ptr_array_sized_new(2);
+		}
+
+		g_ptr_array_add(multipart->specific.mp->children, npart);
+	}
+
+	if (hdr_pos > 0 && hdr_pos < str.len) {
+		npart->raw_headers_str = str.str;
+		npart->raw_headers_len = hdr_pos;
+		npart->raw_data.begin = start + body_pos;
+		npart->raw_data.len = (end - start) - body_pos;
+
+		if (npart->raw_headers_len > 0) {
+			rspamd_mime_headers_process(task, npart->raw_headers,
+										&npart->headers_order,
+										npart->raw_headers_str,
+										npart->raw_headers_len,
+										FALSE);
+
+			/* Preserve the natural order */
+			if (npart->headers_order) {
+				LL_REVERSE2(npart->headers_order, ord_next);
+			}
+		}
+
+		hdr = rspamd_message_get_header_from_hash(npart->raw_headers,
+												  "Content-Type", FALSE);
+	}
+	else {
+		npart->raw_headers_str = 0;
+		npart->raw_headers_len = 0;
+		npart->raw_data.begin = start;
+		npart->raw_data.len = end - start;
+	}
+
+
+	if (hdr != NULL) {
+
+		DL_FOREACH(hdr, cur)
+		{
+			ct = rspamd_content_type_parse(cur->value, strlen(cur->value),
+										   task->task_pool);
+
+			/* Here we prefer multipart content-type or any content-type */
+			if (ct) {
+				if (sel == NULL) {
+					sel = ct;
+				}
+				else if (ct->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+					sel = ct;
+				}
+			}
+		}
+	}
+
+	if (sel == NULL) {
+		sel = rspamd_mempool_alloc0(task->task_pool, sizeof(*sel));
+		RSPAMD_FTOK_ASSIGN(&sel->type, "text");
+		RSPAMD_FTOK_ASSIGN(&sel->subtype, "plain");
+	}
+
+	npart->ct = sel;
+
+	if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+		st->nesting++;
+		g_ptr_array_add(st->stack, npart);
+		npart->part_type = RSPAMD_MIME_PART_MULTIPART;
+		npart->specific.mp = rspamd_mempool_alloc0(task->task_pool,
+												   sizeof(struct rspamd_mime_multipart));
+		memcpy(&npart->specific.mp->boundary, &sel->orig_boundary,
+			   sizeof(rspamd_ftok_t));
+		ret = rspamd_mime_parse_multipart_part(task, npart, st, err);
+	}
+	else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
+		st->nesting++;
+		g_ptr_array_add(st->stack, npart);
+		npart->part_type = RSPAMD_MIME_PART_MESSAGE;
+
+		if ((ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err)) == RSPAMD_MIME_PARSE_OK) {
+			ret = rspamd_mime_parse_message(task, npart, st, err);
+		}
+	}
+	else {
+		ret = rspamd_mime_parse_normal_part(task, npart, st, sel, err);
+	}
+
+	return ret;
+}
+
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_multipart_cb(struct rspamd_task *task,
+							   struct rspamd_mime_part *multipart,
+							   struct rspamd_mime_parser_ctx *st,
+							   struct rspamd_mime_multipart_cbdata *cb,
+							   struct rspamd_mime_boundary *b)
+{
+	const gchar *pos = st->start + b->boundary;
+	enum rspamd_mime_parse_error ret;
+
+	task = cb->task;
+
+	/* Now check boundary */
+	if (!cb->part_start) {
+		cb->part_start = st->start + b->start;
+		st->pos = cb->part_start;
+	}
+	else {
+		/*
+		 * We have seen the start of the boundary,
+		 * but it might be unsuitable (e.g. in broken headers)
+		 */
+		if (cb->part_start < pos && cb->cur_boundary) {
+
+			if ((ret = rspamd_mime_process_multipart_node(task, cb->st,
+														  cb->multipart, cb->part_start, pos, TRUE, cb->err)) != RSPAMD_MIME_PARSE_OK) {
+				return ret;
+			}
+
+			if (b->start > 0) {
+				/* Go towards the next part */
+				cb->part_start = st->start + b->start;
+				cb->st->pos = cb->part_start;
+			}
+		}
+		else {
+			/* We have an empty boundary, do nothing */
+		}
+	}
+
+	return RSPAMD_MIME_PARSE_OK;
+}
+
+static enum rspamd_mime_parse_error
+rspamd_multipart_boundaries_filter(struct rspamd_task *task,
+								   struct rspamd_mime_part *multipart,
+								   struct rspamd_mime_parser_ctx *st,
+								   struct rspamd_mime_multipart_cbdata *cb)
+{
+	struct rspamd_mime_boundary *cur;
+	goffset last_offset;
+	guint i, sel = 0;
+	enum rspamd_mime_parse_error ret;
+
+	last_offset = (multipart->raw_data.begin - st->start) +
+				  multipart->raw_data.len;
+
+	/* Find the first offset suitable for this part */
+	for (i = 0; i < st->boundaries->len; i++) {
+		cur = &g_array_index(st->boundaries, struct rspamd_mime_boundary, i);
+
+		if (cur->start >= multipart->raw_data.begin - st->start) {
+			if (cb->cur_boundary) {
+				/* Check boundary */
+				msg_debug_mime("compare %L and %L (and %L)",
+							   cb->bhash, cur->hash, cur->closed_hash);
+
+				if (cb->bhash == cur->hash) {
+					sel = i;
+					break;
+				}
+				else if (cb->bhash == cur->closed_hash) {
+					/* Not a closing element in fact */
+					cur->flags &= ~(RSPAMD_MIME_BOUNDARY_FLAG_CLOSED);
+					cur->hash = cur->closed_hash;
+					sel = i;
+					break;
+				}
+			}
+			else {
+				/* Set current boundary */
+				cb->cur_boundary = rspamd_mempool_alloc(task->task_pool,
+														sizeof(rspamd_ftok_t));
+				cb->cur_boundary->begin = st->start + cur->boundary;
+				cb->cur_boundary->len = 0;
+				cb->bhash = cur->hash;
+				sel = i;
+				break;
+			}
+		}
+	}
+
+	/* Now we can go forward with boundaries that are same to what we have */
+	for (i = sel; i < st->boundaries->len; i++) {
+		cur = &g_array_index(st->boundaries, struct rspamd_mime_boundary, i);
+
+		if (cur->boundary > last_offset) {
+			break;
+		}
+
+		if (cur->hash == cb->bhash || cur->closed_hash == cb->bhash) {
+			if ((ret = rspamd_mime_parse_multipart_cb(task, multipart, st,
+													  cb, cur)) != RSPAMD_MIME_PARSE_OK) {
+				return ret;
+			}
+
+			if (cur->closed_hash == cb->bhash) {
+				/* We have again fake closed hash */
+				cur->flags &= ~(RSPAMD_MIME_BOUNDARY_FLAG_CLOSED);
+				cur->hash = cur->closed_hash;
+			}
+
+			if (RSPAMD_BOUNDARY_IS_CLOSED(cur)) {
+				/* We also might check the next boundary... */
+				if (i < st->boundaries->len - 1) {
+					cur = &g_array_index(st->boundaries,
+										 struct rspamd_mime_boundary, i + 1);
+
+					if (cur->hash == cb->bhash) {
+						continue;
+					}
+					else if (cur->closed_hash == cb->bhash) {
+						/* We have again fake closed hash */
+						cur->flags &= ~(RSPAMD_MIME_BOUNDARY_FLAG_CLOSED);
+						cur->hash = cur->closed_hash;
+						continue;
+					}
+				}
+
+				break;
+			}
+		}
+	}
+
+	if (i == st->boundaries->len && cb->cur_boundary) {
+		/* Process the last part */
+		struct rspamd_mime_boundary fb;
+
+		fb.boundary = last_offset;
+		fb.start = -1;
+
+		if ((ret = rspamd_mime_parse_multipart_cb(task, multipart, st,
+												  cb, &fb)) != RSPAMD_MIME_PARSE_OK) {
+			return ret;
+		}
+	}
+
+	return RSPAMD_MIME_PARSE_OK;
+}
+
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_multipart_part(struct rspamd_task *task,
+								 struct rspamd_mime_part *part,
+								 struct rspamd_mime_parser_ctx *st,
+								 GError **err)
+{
+	struct rspamd_mime_multipart_cbdata cbdata;
+	enum rspamd_mime_parse_error ret;
+
+	if (st->nesting > max_nested) {
+		g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d",
+					st->nesting);
+		return RSPAMD_MIME_PARSE_NESTING;
+	}
+
+	part->part_number = MESSAGE_FIELD(task, parts)->len;
+	part->urls = g_ptr_array_new();
+	g_ptr_array_add(MESSAGE_FIELD(task, parts), part);
+	st->nesting++;
+	rspamd_mime_part_get_cte(task, part->raw_headers, part, FALSE);
+
+	st->pos = part->raw_data.begin;
+	cbdata.multipart = part;
+	cbdata.task = task;
+	cbdata.st = st;
+	cbdata.part_start = NULL;
+	cbdata.err = err;
+
+	if (part->ct->boundary.len > 0) {
+		/* We know our boundary */
+		cbdata.cur_boundary = &part->ct->boundary;
+		rspamd_cryptobox_siphash((guchar *) &cbdata.bhash,
+								 cbdata.cur_boundary->begin, cbdata.cur_boundary->len,
+								 lib_ctx->hkey);
+		msg_debug_mime("hash: %T -> %L", cbdata.cur_boundary, cbdata.bhash);
+	}
+	else {
+		/* Guess boundary */
+		cbdata.cur_boundary = NULL;
+		cbdata.bhash = 0;
+	}
+
+	ret = rspamd_multipart_boundaries_filter(task, part, st, &cbdata);
+	/* Cleanup stack */
+	st->nesting--;
+	g_ptr_array_remove_index_fast(st->stack, st->stack->len - 1);
+
+	return ret;
+}
+
+/* Process boundary like structures in a message */
+static gint
+rspamd_mime_preprocess_cb(struct rspamd_multipattern *mp,
+						  guint strnum,
+						  gint match_start,
+						  gint match_pos,
+						  const gchar *text,
+						  gsize len,
+						  void *context)
+{
+	const gchar *end = text + len, *p = text + match_pos, *bend;
+	gsize blen;
+	gboolean closing = FALSE;
+	struct rspamd_mime_boundary b;
+	struct rspamd_mime_parser_ctx *st = context;
+	struct rspamd_task *task;
+
+	task = st->task;
+
+	if (G_LIKELY(p < end)) {
+
+		blen = 0;
+
+		while (p < end) {
+			if (*p == '\r' || *p == '\n') {
+				break;
+			}
+
+			blen++;
+			p++;
+		}
+
+		if (blen > 0) {
+			/* We have found something like boundary */
+			p = text + match_pos;
+			bend = p + blen - 1;
+
+			if (*bend == '-') {
+				/* We need to verify last -- */
+				if (bend > p + 1 && *(bend - 1) == '-') {
+					closing = TRUE;
+					bend--;
+					blen -= 2;
+				}
+				else {
+					/* Not a closing boundary somehow, e.g. if a boundary=='-' */
+					bend++;
+				}
+			}
+			else {
+				bend++;
+			}
+
+			while (bend < end) {
+				if (*bend == '\r') {
+					bend++;
+
+					/* \r\n */
+					if (bend < end && *bend == '\n') {
+						bend++;
+					}
+				}
+				else if (*bend == '\n') {
+					/* \n */
+					bend++;
+				}
+				else if (g_ascii_isspace(*bend)) {
+					/* Spaces in the same line, skip them */
+					bend++;
+					continue;
+				}
+
+				break;
+			}
+
+			b.boundary = p - st->start - 2;
+			b.start = bend - st->start;
+
+			/* Small optimisation as boundaries are usually short strings */
+			gchar *lc_copy, lc_copy_buf[128];
+
+			if (blen + 2 < sizeof(lc_copy_buf)) {
+				lc_copy = lc_copy_buf;
+			}
+			else {
+				lc_copy = g_malloc(blen + 2);
+			}
+
+			if (closing) {
+				memcpy(lc_copy, p, blen + 2);
+				rspamd_str_lc(lc_copy, blen + 2);
+			}
+			else {
+				memcpy(lc_copy, p, blen);
+				rspamd_str_lc(lc_copy, blen);
+			}
+
+			rspamd_cryptobox_siphash((guchar *) &b.hash, lc_copy, blen,
+									 lib_ctx->hkey);
+			msg_debug_mime("normal hash: %*s -> %L, %d boffset, %d data offset",
+						   (gint) blen, lc_copy, b.hash, (int) b.boundary, (int) b.start);
+
+			if (closing) {
+				b.flags = RSPAMD_MIME_BOUNDARY_FLAG_CLOSED;
+				rspamd_cryptobox_siphash((guchar *) &b.closed_hash, lc_copy,
+										 blen + 2,
+										 lib_ctx->hkey);
+				msg_debug_mime("closing hash: %*s -> %L, %d boffset, %d data offset",
+							   (gint) blen + 2, lc_copy,
+							   b.closed_hash,
+							   (int) b.boundary, (int) b.start);
+			}
+			else {
+				b.flags = 0;
+				b.closed_hash = 0;
+			}
+
+			/* Check if a string has been allocated on the heap */
+			if (blen + 2 >= sizeof(lc_copy_buf)) {
+				g_free(lc_copy);
+			}
+			g_array_append_val(st->boundaries, b);
+		}
+	}
+
+	return 0;
+}
+
+static goffset
+rspamd_mime_parser_headers_heuristic(GString *input, goffset *body_start)
+{
+	const gsize default_max_len = 76;
+	gsize max_len = MIN(input->len, default_max_len);
+	const gchar *p, *end;
+	enum {
+		st_before_colon = 0,
+		st_colon,
+		st_spaces_after_colon,
+		st_value,
+		st_error
+	} state = st_before_colon;
+
+	p = input->str;
+	end = p + max_len;
+
+	while (p < end) {
+		switch (state) {
+		case st_before_colon:
+			if (G_UNLIKELY(*p == ':')) {
+				state = st_colon;
+			}
+			else if (G_UNLIKELY(!g_ascii_isgraph(*p))) {
+				state = st_error;
+			}
+
+			p++;
+			break;
+		case st_colon:
+			if (g_ascii_isspace(*p)) {
+				state = st_spaces_after_colon;
+			}
+			else {
+				state = st_value;
+			}
+			p++;
+			break;
+		case st_spaces_after_colon:
+			if (!g_ascii_isspace(*p)) {
+				state = st_value;
+			}
+			p++;
+			break;
+		case st_value:
+			/* We accept any value */
+			goto end;
+			break;
+		case st_error:
+			return (-1);
+			break;
+		}
+	}
+
+end:
+	if (state == st_value) {
+		if (body_start) {
+			*body_start = input->len;
+		}
+
+		return input->len;
+	}
+
+	return (-1);
+}
+
+static void
+rspamd_mime_preprocess_message(struct rspamd_task *task,
+							   struct rspamd_mime_part *top,
+							   struct rspamd_mime_parser_ctx *st)
+{
+
+	if (top->raw_data.begin >= st->pos) {
+		rspamd_multipattern_lookup(lib_ctx->mp_boundary,
+								   top->raw_data.begin - 1,
+								   top->raw_data.len + 1,
+								   rspamd_mime_preprocess_cb, st, NULL);
+	}
+	else {
+		rspamd_multipattern_lookup(lib_ctx->mp_boundary,
+								   st->pos,
+								   st->end - st->pos,
+								   rspamd_mime_preprocess_cb, st, NULL);
+	}
+}
+
+static void
+rspamd_mime_parse_stack_free(struct rspamd_mime_parser_ctx *st)
+{
+	if (st) {
+		g_ptr_array_free(st->stack, TRUE);
+		g_array_free(st->boundaries, TRUE);
+		g_free(st);
+	}
+}
+
+static enum rspamd_mime_parse_error
+rspamd_mime_parse_message(struct rspamd_task *task,
+						  struct rspamd_mime_part *part,
+						  struct rspamd_mime_parser_ctx *st,
+						  GError **err)
+{
+	struct rspamd_content_type *ct, *sel = NULL;
+	struct rspamd_mime_header *hdr = NULL, *cur;
+	const gchar *pbegin, *p;
+	gsize plen, len;
+	struct rspamd_mime_part *npart;
+	goffset hdr_pos, body_pos;
+	guint i;
+	enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
+	GString str;
+	struct rspamd_mime_parser_ctx *nst = st;
+
+	if (st->nesting > max_nested) {
+		g_set_error(err, RSPAMD_MIME_QUARK, E2BIG, "Nesting level is too high: %d",
+					st->nesting);
+		return RSPAMD_MIME_PARSE_NESTING;
+	}
+
+	/* Allocate real part */
+	npart = rspamd_mempool_alloc0(task->task_pool,
+								  sizeof(struct rspamd_mime_part));
+
+	if (part == NULL) {
+		/* Top level message */
+		p = task->msg.begin;
+		len = task->msg.len;
+
+		str.str = (gchar *) p;
+		str.len = len;
+
+		hdr_pos = rspamd_string_find_eoh(&str, &body_pos);
+
+		if (hdr_pos > 0 && hdr_pos < str.len) {
+
+			MESSAGE_FIELD(task, raw_headers_content).begin = str.str;
+			MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos;
+			MESSAGE_FIELD(task, raw_headers_content).body_start = str.str + body_pos;
+
+			if (MESSAGE_FIELD(task, raw_headers_content).len > 0) {
+				rspamd_mime_headers_process(task,
+											MESSAGE_FIELD(task, raw_headers),
+											&MESSAGE_FIELD(task, headers_order),
+											MESSAGE_FIELD(task, raw_headers_content).begin,
+											MESSAGE_FIELD(task, raw_headers_content).len,
+											TRUE);
+				npart->raw_headers = rspamd_message_headers_ref(
+					MESSAGE_FIELD(task, raw_headers));
+
+				/* Preserve the natural order */
+				if (MESSAGE_FIELD(task, headers_order)) {
+					LL_REVERSE2(MESSAGE_FIELD(task, headers_order), ord_next);
+				}
+			}
+
+			hdr = rspamd_message_get_header_from_hash(
+				MESSAGE_FIELD(task, raw_headers),
+				"Content-Type", FALSE);
+		}
+		else {
+			/* First apply heuristic, maybe we have just headers */
+			hdr_pos = rspamd_mime_parser_headers_heuristic(&str, &body_pos);
+
+			if (hdr_pos > 0 && hdr_pos <= str.len) {
+				MESSAGE_FIELD(task, raw_headers_content).begin = str.str;
+				MESSAGE_FIELD(task, raw_headers_content).len = hdr_pos;
+				MESSAGE_FIELD(task, raw_headers_content).body_start = str.str +
+																	  body_pos;
+
+				if (MESSAGE_FIELD(task, raw_headers_content).len > 0) {
+					rspamd_mime_headers_process(task,
+												MESSAGE_FIELD(task, raw_headers),
+												&MESSAGE_FIELD(task, headers_order),
+												MESSAGE_FIELD(task, raw_headers_content).begin,
+												MESSAGE_FIELD(task, raw_headers_content).len,
+												TRUE);
+					npart->raw_headers = rspamd_message_headers_ref(
+						MESSAGE_FIELD(task, raw_headers));
+
+					/* Preserve the natural order */
+					if (MESSAGE_FIELD(task, headers_order)) {
+						LL_REVERSE2(MESSAGE_FIELD(task, headers_order), ord_next);
+					}
+				}
+
+				hdr = rspamd_message_get_header_from_hash(
+					MESSAGE_FIELD(task, raw_headers),
+					"Content-Type", FALSE);
+				task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
+			}
+			else {
+				body_pos = 0;
+			}
+		}
+
+		pbegin = st->start + body_pos;
+		plen = st->end - pbegin;
+		npart->headers_order = NULL;
+	}
+	else {
+		/*
+		 * Here are dragons:
+		 * We allocate new parser context as we need to shift pointers
+		 */
+		nst = g_malloc0(sizeof(*st));
+		nst->stack = g_ptr_array_sized_new(4);
+		nst->boundaries = g_array_sized_new(FALSE, FALSE,
+											sizeof(struct rspamd_mime_boundary), 8);
+		nst->start = part->parsed_data.begin;
+		nst->end = nst->start + part->parsed_data.len;
+		nst->pos = nst->start;
+		nst->task = st->task;
+		nst->nesting = st->nesting;
+		st->nesting++;
+
+		str.str = (gchar *) part->parsed_data.begin;
+		str.len = part->parsed_data.len;
+
+		hdr_pos = rspamd_string_find_eoh(&str, &body_pos);
+		npart->raw_headers = rspamd_message_headers_new();
+		npart->headers_order = NULL;
+
+		if (hdr_pos > 0 && hdr_pos < str.len) {
+			npart->raw_headers_str = str.str;
+			npart->raw_headers_len = hdr_pos;
+			npart->raw_data.begin = str.str + body_pos;
+
+			if (npart->raw_headers_len > 0) {
+				rspamd_mime_headers_process(task,
+											npart->raw_headers,
+											&npart->headers_order,
+											npart->raw_headers_str,
+											npart->raw_headers_len,
+											FALSE);
+
+				/* Preserve the natural order */
+				if (npart->headers_order) {
+					LL_REVERSE2(npart->headers_order, ord_next);
+				}
+			}
+
+			hdr = rspamd_message_get_header_from_hash(npart->raw_headers,
+													  "Content-Type", FALSE);
+		}
+		else {
+			body_pos = 0;
+		}
+
+		pbegin = part->parsed_data.begin + body_pos;
+		plen = part->parsed_data.len - body_pos;
+	}
+
+	npart->raw_data.begin = pbegin;
+	npart->raw_data.len = plen;
+	npart->parent_part = part;
+
+	if (hdr == NULL) {
+		sel = NULL;
+	}
+	else {
+		DL_FOREACH(hdr, cur)
+		{
+			ct = rspamd_content_type_parse(cur->value, strlen(cur->value),
+										   task->task_pool);
+
+			/* Here we prefer multipart content-type or any content-type */
+			if (ct) {
+				if (sel == NULL) {
+					sel = ct;
+				}
+				else if (ct->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+					sel = ct;
+				}
+			}
+		}
+	}
+
+	if (sel == NULL) {
+		/* For messages we automatically assume plaintext */
+		msg_info_task("cannot find content-type for a message, assume text/plain");
+		sel = rspamd_mempool_alloc0(task->task_pool, sizeof(*sel));
+		sel->flags = RSPAMD_CONTENT_TYPE_TEXT | RSPAMD_CONTENT_TYPE_MISSING;
+		RSPAMD_FTOK_ASSIGN(&sel->type, "text");
+		RSPAMD_FTOK_ASSIGN(&sel->subtype, "plain");
+	}
+
+	npart->ct = sel;
+
+	if ((part == NULL || nst != st) &&
+		(sel->flags & (RSPAMD_CONTENT_TYPE_MULTIPART | RSPAMD_CONTENT_TYPE_MESSAGE))) {
+		/* Not a trivial message, need to preprocess */
+		rspamd_mime_preprocess_message(task, npart, nst);
+	}
+
+	if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
+		g_ptr_array_add(nst->stack, npart);
+		nst->nesting++;
+		npart->part_type = RSPAMD_MIME_PART_MULTIPART;
+		npart->specific.mp = rspamd_mempool_alloc0(task->task_pool,
+												   sizeof(struct rspamd_mime_multipart));
+		memcpy(&npart->specific.mp->boundary, &sel->orig_boundary,
+			   sizeof(rspamd_ftok_t));
+		ret = rspamd_mime_parse_multipart_part(task, npart, nst, err);
+	}
+	else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
+		if ((ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err)) == RSPAMD_MIME_PARSE_OK) {
+			npart->part_type = RSPAMD_MIME_PART_MESSAGE;
+			ret = rspamd_mime_parse_message(task, npart, nst, err);
+		}
+	}
+	else {
+		ret = rspamd_mime_parse_normal_part(task, npart, nst, sel, err);
+	}
+
+	if (ret != RSPAMD_MIME_PARSE_OK) {
+		return ret;
+	}
+
+	if (part && st->stack->len > 0) {
+		/* Remove message part from the parent stack */
+		g_ptr_array_remove_index_fast(st->stack, st->stack->len - 1);
+		st->nesting--;
+	}
+
+	/* Process leftovers for boundaries */
+	if (nst->boundaries) {
+		struct rspamd_mime_boundary *boundary, *start_boundary = NULL,
+											   *end_boundary = NULL;
+		goffset cur_offset = nst->pos - nst->start,
+				end_offset = st->end - st->start;
+		guint sel_idx = 0;
+
+		for (;;) {
+			start_boundary = NULL;
+
+			for (i = sel_idx; i < nst->boundaries->len; i++) {
+				boundary = &g_array_index(nst->boundaries,
+										  struct rspamd_mime_boundary, i);
+
+				if (boundary->start > cur_offset &&
+					boundary->boundary < end_offset &&
+					!RSPAMD_BOUNDARY_IS_CLOSED(boundary)) {
+					start_boundary = boundary;
+					sel_idx = i;
+					break;
+				}
+			}
+
+			if (start_boundary) {
+				const gchar *start, *end;
+
+				if (nst->boundaries->len > sel_idx + 1) {
+					end_boundary = &g_array_index(nst->boundaries,
+												  struct rspamd_mime_boundary, sel_idx + 1);
+					end = nst->start + end_boundary->boundary;
+				}
+				else {
+					end = nst->end;
+				}
+
+				sel_idx++;
+
+				start = nst->start + start_boundary->start;
+
+				if (end > start &&
+					(ret = rspamd_mime_process_multipart_node(task, nst,
+															  NULL, start, end, FALSE, err)) != RSPAMD_MIME_PARSE_OK) {
+
+					if (nst != st) {
+						rspamd_mime_parse_stack_free(nst);
+					}
+
+					if (ret == RSPAMD_MIME_PARSE_NO_PART) {
+						return RSPAMD_MIME_PARSE_OK;
+					}
+
+					return ret;
+				}
+			}
+			else {
+				break;
+			}
+		}
+	}
+
+	if (nst != st) {
+		rspamd_mime_parse_stack_free(nst);
+	}
+
+	return ret;
+}
+
+enum rspamd_mime_parse_error
+rspamd_mime_parse_task(struct rspamd_task *task, GError **err)
+{
+	struct rspamd_mime_parser_ctx *st;
+	enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_OK;
+
+	if (lib_ctx == NULL) {
+		rspamd_mime_parser_init_lib();
+	}
+
+	if (++lib_ctx->key_usages > max_key_usages) {
+		/* Regenerate siphash key */
+		ottery_rand_bytes(lib_ctx->hkey, sizeof(lib_ctx->hkey));
+		lib_ctx->key_usages = 0;
+	}
+
+	st = g_malloc0(sizeof(*st));
+	st->stack = g_ptr_array_sized_new(4);
+	st->pos = MESSAGE_FIELD(task, raw_headers_content).body_start;
+	st->end = task->msg.begin + task->msg.len;
+	st->boundaries = g_array_sized_new(FALSE, FALSE,
+									   sizeof(struct rspamd_mime_boundary), 8);
+	st->task = task;
+
+	if (st->pos == NULL) {
+		st->pos = task->msg.begin;
+	}
+
+	st->start = task->msg.begin;
+	ret = rspamd_mime_parse_message(task, NULL, st, err);
+	rspamd_mime_parse_stack_free(st);
+
+	return ret;
+}
diff --git a/src/libmime/mime_parser.h b/src/libmime/mime_parser.h
new file mode 100644
index 0000000..aa77b2b
--- /dev/null
+++ b/src/libmime/mime_parser.h
@@ -0,0 +1,46 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_MIME_PARSER_H_
+#define SRC_LIBMIME_MIME_PARSER_H_
+
+#include "config.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_mime_part;
+
+enum rspamd_mime_parse_error {
+	RSPAMD_MIME_PARSE_OK = 0,
+	RSPAMD_MIME_PARSE_FATAL,
+	RSPAMD_MIME_PARSE_NESTING,
+	RSPAMD_MIME_PARSE_NO_PART,
+};
+
+enum rspamd_mime_parse_error rspamd_mime_parse_task(struct rspamd_task *task,
+													GError **err);
+
+void rspamd_mime_parser_calc_digest(struct rspamd_mime_part *part);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_MIME_PARSER_H_ */
diff --git a/src/libmime/mime_string.cxx b/src/libmime/mime_string.cxx
new file mode 100644
index 0000000..e818e64
--- /dev/null
+++ b/src/libmime/mime_string.cxx
@@ -0,0 +1,167 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+#include "mime_string.hxx"
+#include "unicode/uchar.h"
+
+TEST_SUITE("mime_string")
+{
+	using namespace rspamd::mime;
+	TEST_CASE("mime_string unfiltered ctors")
+	{
+		SUBCASE("empty")
+		{
+			mime_string st;
+			CHECK(st.size() == 0);
+			CHECK(st == "");
+		}
+		SUBCASE("unfiltered valid")
+		{
+			mime_string st{std::string_view("abcd")};
+			CHECK(st == "abcd");
+		}
+		SUBCASE("unfiltered zero character")
+		{
+			mime_string st{"abc\0d", 5};
+			CHECK(st.has_zeroes());
+			CHECK(st == "abcd");
+		}
+		SUBCASE("unfiltered invalid character - middle")
+		{
+			mime_string st{std::string("abc\234d")};
+			CHECK(st.has_invalid());
+			CHECK(st == "abc\uFFFDd");
+		}
+		SUBCASE("unfiltered invalid character - end")
+		{
+			mime_string st{std::string("abc\234")};
+			CHECK(st.has_invalid());
+			CHECK(st == "abc\uFFFD");
+		}
+		SUBCASE("unfiltered invalid character - start")
+		{
+			mime_string st{std::string("\234abc")};
+			CHECK(st.has_invalid());
+			CHECK(st == "\uFFFDabc");
+		}
+	}
+
+	TEST_CASE("mime_string filtered ctors")
+	{
+		auto print_filter = [](UChar32 inp) -> UChar32 {
+			if (!u_isprint(inp)) {
+				return 0;
+			}
+
+			return inp;
+		};
+
+		auto tolower_filter = [](UChar32 inp) -> UChar32 {
+			return u_tolower(inp);
+		};
+
+		SUBCASE("empty")
+		{
+			mime_string st{std::string_view(""), tolower_filter};
+			CHECK(st.size() == 0);
+			CHECK(st == "");
+		}
+		SUBCASE("filtered valid")
+		{
+			mime_string st{std::string("AbCdУ"), tolower_filter};
+			CHECK(st == "abcdу");
+		}
+		SUBCASE("filtered invalid + filtered")
+		{
+			mime_string st{std::string("abcd\234\1"), print_filter};
+			CHECK(st == "abcd\uFFFD");
+		}
+	}
+	TEST_CASE("mime_string assign")
+	{
+		SUBCASE("assign from valid")
+		{
+			mime_string st;
+
+			CHECK(st.assign_if_valid(std::string("test")));
+			CHECK(st == "test");
+		}
+		SUBCASE("assign from invalid")
+		{
+			mime_string st;
+
+			CHECK(!st.assign_if_valid(std::string("test\234t")));
+			CHECK(st == "");
+		}
+	}
+
+	TEST_CASE("mime_string iterators")
+	{
+
+		SUBCASE("unfiltered iterator ascii")
+		{
+			auto in = std::string("abcd");
+			mime_string st{in};
+			CHECK(st == "abcd");
+
+			int i = 0;
+			for (auto &&c: st) {
+				CHECK(c == in[i++]);
+			}
+		}
+
+		SUBCASE("unfiltered iterator utf8")
+		{
+			auto in = std::string("тест");
+			UChar32 ucs[4] = {1090, 1077, 1089, 1090};
+			mime_string st{in};
+			CHECK(st == "тест");
+
+			int i = 0;
+			for (auto &&c: st) {
+				CHECK(c == ucs[i++]);
+			}
+			CHECK(i == sizeof(ucs) / sizeof(ucs[0]));
+		}
+
+		SUBCASE("unfiltered raw iterator ascii")
+		{
+			auto in = std::string("abcd");
+			mime_string st{in};
+			CHECK(st == "abcd");
+
+			int i = 0;
+			for (auto it = st.raw_begin(); it != st.raw_end(); ++it) {
+				CHECK(*it == in[i++]);
+			}
+		}
+
+		SUBCASE("unfiltered raw iterator utf8")
+		{
+			auto in = std::string("тест");
+			mime_string st{in};
+			CHECK(st == "тест");
+
+			int i = 0;
+			for (auto it = st.raw_begin(); it != st.raw_end(); ++it) {
+				CHECK(*it == in[i++]);
+			}
+			CHECK(i == in.size());
+		}
+	}
+}
+\ No newline at end of file
diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx
new file mode 100644
index 0000000..7476816
--- /dev/null
+++ b/src/libmime/mime_string.hxx
@@ -0,0 +1,670 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RSPAMD_MIME_STRING_HXX
+#define RSPAMD_MIME_STRING_HXX
+#pragma once
+
+#include <algorithm>
+#include <string>
+#include <string_view>
+#include <memory>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iosfwd>
+#include "libutil/mem_pool.h"
+#include "function2/function2.hpp"
+#include "unicode/utf8.h"
+#include "contrib/fastutf8/fastutf8.h"
+
+namespace rspamd::mime {
+/*
+ * The motivation for another string is to have utf8 valid string replacing
+ * all bad things with FFFFD replacement character and filtering \0 and other
+ * strange stuff defined by policies.
+ * This string always exclude \0 characters and ignore them! This is how MUA acts,
+ * and we also store a flag about bad characters.
+ * Mime string iterators are always const, so the underlying storage should not
+ * be modified externally.
+ */
+template<class T = char, class Allocator = std::allocator<T>,
+		 class Functor = fu2::function_view<UChar32(UChar32)>>
+class basic_mime_string;
+
+using mime_string = basic_mime_string<char>;
+using mime_pool_string = basic_mime_string<char, mempool_allocator<char>>;
+
+/* Helpers for type safe flags */
+enum class mime_string_flags : std::uint8_t {
+	MIME_STRING_DEFAULT = 0,
+	MIME_STRING_SEEN_ZEROES = 0x1 << 0,
+	MIME_STRING_SEEN_INVALID = 0x1 << 1,
+};
+
+constexpr mime_string_flags operator|(mime_string_flags lhs, mime_string_flags rhs)
+{
+	using ut = std::underlying_type<mime_string_flags>::type;
+	return static_cast<mime_string_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs));
+}
+
+constexpr mime_string_flags operator&(mime_string_flags lhs, mime_string_flags rhs)
+{
+	using ut = std::underlying_type<mime_string_flags>::type;
+	return static_cast<mime_string_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs));
+}
+
+constexpr bool operator!(mime_string_flags fl)
+{
+	return fl == mime_string_flags::MIME_STRING_DEFAULT;
+}
+
+// Codepoint iterator base class
+template<typename Container, bool Raw = false>
+struct iterator_base {
+	template<typename, typename, typename>
+	friend class basic_mime_string;
+
+public:
+	using value_type = typename Container::value_type;
+	using difference_type = typename Container::difference_type;
+	using codepoint_type = typename Container::codepoint_type;
+	using reference_type = codepoint_type;
+	using iterator_category = std::bidirectional_iterator_tag;
+
+	bool operator==(const iterator_base &it) const noexcept
+	{
+		return idx == it.idx;
+	}
+
+	bool operator!=(const iterator_base &it) const noexcept
+	{
+		return idx != it.idx;
+	}
+
+	iterator_base(difference_type index, Container *instance) noexcept
+		: idx(index), cont_instance(instance)
+	{
+	}
+	iterator_base() noexcept = default;
+	iterator_base(const iterator_base &) noexcept = default;
+
+	iterator_base &operator=(const iterator_base &) noexcept = default;
+
+	Container *get_instance() const noexcept
+	{
+		return cont_instance;
+	}
+
+	codepoint_type get_value() const noexcept
+	{
+		auto i = idx;
+		codepoint_type uc;
+		U8_NEXT_UNSAFE(cont_instance->data(), i, uc);
+		return uc;
+	}
+
+protected:
+	difference_type idx;
+	Container *cont_instance = nullptr;
+
+protected:
+	void advance(difference_type n) noexcept
+	{
+		if (n > 0) {
+			U8_FWD_N_UNSAFE(cont_instance->data(), idx, n);
+		}
+		else if (n < 0) {
+			U8_BACK_N_UNSAFE(cont_instance->data(), idx, (-n));
+		}
+	}
+	void increment() noexcept
+	{
+		codepoint_type uc;
+		U8_NEXT_UNSAFE(cont_instance->data(), idx, uc);
+	}
+
+	void decrement() noexcept
+	{
+		codepoint_type uc;
+		U8_PREV_UNSAFE(cont_instance->data(), idx, uc);
+	}
+};
+
+// Partial spec for raw Byte-based iterator base
+template<typename Container>
+struct iterator_base<Container, true> {
+	template<typename, typename, typename>
+	friend class basic_string;
+
+public:
+	using value_type = typename Container::value_type;
+	using difference_type = typename Container::difference_type;
+	using reference_type = value_type;
+	using iterator_category = std::bidirectional_iterator_tag;
+
+	bool operator==(const iterator_base &it) const noexcept
+	{
+		return idx == it.idx;
+	}
+	bool operator!=(const iterator_base &it) const noexcept
+	{
+		return idx != it.idx;
+	}
+
+	iterator_base(difference_type index, Container *instance) noexcept
+		: idx(index), cont_instance(instance)
+	{
+	}
+
+	iterator_base() noexcept = default;
+	iterator_base(const iterator_base &) noexcept = default;
+	iterator_base &operator=(const iterator_base &) noexcept = default;
+	Container *get_instance() const noexcept
+	{
+		return cont_instance;
+	}
+
+	value_type get_value() const noexcept
+	{
+		return cont_instance->get_storage().at(idx);
+	}
+
+protected:
+	difference_type idx;
+	Container *cont_instance = nullptr;
+
+protected:
+	//! Advance the iterator n times (negative values allowed!)
+	void advance(difference_type n) noexcept
+	{
+		idx += n;
+	}
+
+	void increment() noexcept
+	{
+		idx++;
+	}
+	void decrement() noexcept
+	{
+		idx--;
+	}
+};
+
+template<typename Container, bool Raw>
+struct iterator;
+template<typename Container, bool Raw>
+struct const_iterator;
+
+template<typename Container, bool Raw = false>
+struct iterator : iterator_base<Container, Raw> {
+	iterator(typename iterator_base<Container, Raw>::difference_type index, Container *instance) noexcept
+		: iterator_base<Container, Raw>(index, instance)
+	{
+	}
+	iterator() noexcept = default;
+	iterator(const iterator &) noexcept = default;
+
+	iterator &operator=(const iterator &) noexcept = default;
+	/* Disallow creating from const_iterator */
+	iterator(const const_iterator<Container, Raw> &) = delete;
+
+	/* Prefix */
+	iterator &operator++() noexcept
+	{
+		this->increment();
+		return *this;
+	}
+
+	/* Postfix */
+	iterator operator++(int) noexcept
+	{
+		iterator tmp{this->idx, this->cont_instance};
+		this->increment();
+		return tmp;
+	}
+
+	/* Prefix */
+	iterator &operator--() noexcept
+	{
+		this->decrement();
+		return *this;
+	}
+
+	/* Postfix */
+	iterator operator--(int) noexcept
+	{
+		iterator tmp{this->idx, this->cont_instance};
+		this->decrement();
+		return tmp;
+	}
+
+	iterator operator+(typename iterator_base<Container, Raw>::difference_type n) const noexcept
+	{
+		iterator it{*this};
+		it.advance(n);
+		return it;
+	}
+
+	iterator &operator+=(typename iterator_base<Container, Raw>::difference_type n) noexcept
+	{
+		this->advance(n);
+		return *this;
+	}
+
+	iterator operator-(typename iterator_base<Container, Raw>::difference_type n) const noexcept
+	{
+		iterator it{*this};
+		it.advance(-n);
+		return it;
+	}
+
+	iterator &operator-=(typename iterator_base<Container, Raw>::difference_type n) noexcept
+	{
+		this->advance(-n);
+		return *this;
+	}
+
+	typename iterator::reference_type operator*() const noexcept
+	{
+		return this->get_value();
+	}
+};
+
+template<class CharT, class Allocator, class Functor>
+class basic_mime_string : private Allocator {
+public:
+	using storage_type = std::basic_string<CharT, std::char_traits<CharT>, Allocator>;
+	using view_type = std::basic_string_view<CharT, std::char_traits<CharT>>;
+	using filter_type = Functor;
+	using codepoint_type = UChar32;
+	using value_type = CharT;
+	using difference_type = std::ptrdiff_t;
+	using iterator = rspamd::mime::iterator<basic_mime_string, false>;
+	using raw_iterator = rspamd::mime::iterator<basic_mime_string, true>;
+	/* Ctors */
+	basic_mime_string() noexcept
+		: Allocator()
+	{
+	}
+	explicit basic_mime_string(const Allocator &alloc) noexcept
+		: Allocator(alloc)
+	{
+	}
+	explicit basic_mime_string(filter_type &&filt, const Allocator &alloc = Allocator()) noexcept
+		: Allocator(alloc), filter_func(std::move(filt))
+	{
+	}
+
+	basic_mime_string(const CharT *str, std::size_t sz, const Allocator &alloc = Allocator()) noexcept
+		: Allocator(alloc)
+	{
+		append_c_string_unfiltered(str, sz);
+	}
+
+	basic_mime_string(const storage_type &st,
+					  const Allocator &alloc = Allocator()) noexcept
+		: basic_mime_string(st.data(), st.size(), alloc)
+	{
+	}
+
+	basic_mime_string(const view_type &st,
+					  const Allocator &alloc = Allocator()) noexcept
+		: basic_mime_string(st.data(), st.size(), alloc)
+	{
+	}
+	/* Explicit move ctor */
+	basic_mime_string(basic_mime_string &&other) noexcept
+	{
+		*this = std::move(other);
+	}
+
+
+	/**
+	 * Creates a string with a filter function. It is calee responsibility to
+	 * ensure that the filter functor survives long enough to work with a string
+	 * @param str
+	 * @param sz
+	 * @param filt
+	 * @param alloc
+	 */
+	basic_mime_string(const CharT *str, std::size_t sz,
+					  filter_type &&filt,
+					  const Allocator &alloc = Allocator()) noexcept
+		: Allocator(alloc),
+		  filter_func(std::move(filt))
+	{
+		append_c_string_filtered(str, sz);
+	}
+
+	basic_mime_string(const storage_type &st,
+					  filter_type &&filt,
+					  const Allocator &alloc = Allocator()) noexcept
+		: basic_mime_string(st.data(), st.size(), std::move(filt), alloc)
+	{
+	}
+	basic_mime_string(const view_type &st,
+					  filter_type &&filt,
+					  const Allocator &alloc = Allocator()) noexcept
+		: basic_mime_string(st.data(), st.size(), std::move(filt), alloc)
+	{
+	}
+
+	/* It seems some libc++ implementations still perform copy, this might fix them */
+	basic_mime_string &operator=(basic_mime_string &&other)
+	{
+		storage = std::move(other.storage);
+		filter_func = std::move(other.filter_func);
+
+		return *this;
+	}
+
+	constexpr auto size() const noexcept -> std::size_t
+	{
+		return storage.size();
+	}
+
+	constexpr auto data() const noexcept -> const CharT *
+	{
+		return storage.data();
+	}
+
+	constexpr auto has_zeroes() const noexcept -> bool
+	{
+		return !!(flags & mime_string_flags::MIME_STRING_SEEN_ZEROES);
+	}
+
+	constexpr auto has_invalid() const noexcept -> bool
+	{
+		return !!(flags & mime_string_flags::MIME_STRING_SEEN_INVALID);
+	}
+
+	/**
+	 * Assign mime string from another string using move operation if a source string
+	 * is utf8 valid.
+	 * If this function returns false, then ownership has not been transferred
+	 * and the `other` string is unmodified as well as the storage
+	 * @param other
+	 * @return
+	 */
+	[[nodiscard]] auto assign_if_valid(storage_type &&other) -> bool
+	{
+		if (filter_func) {
+			/* No way */
+			return false;
+		}
+		if (rspamd_fast_utf8_validate((const unsigned char *) other.data(), other.size()) == 0) {
+			std::swap(storage, other);
+
+			return true;
+		}
+
+		return false;
+	}
+
+	/**
+	 * Copy to the internal storage discarding the contained value
+	 * @param other
+	 * @return
+	 */
+	auto assign_copy(const view_type &other)
+	{
+		storage.clear();
+
+		if (filter_func) {
+			append_c_string_filtered(other.data(), other.size());
+		}
+		else {
+			append_c_string_unfiltered(other.data(), other.size());
+		}
+	}
+	auto assign_copy(const storage_type &other)
+	{
+		storage.clear();
+
+		if (filter_func) {
+			append_c_string_filtered(other.data(), other.size());
+		}
+		else {
+			append_c_string_unfiltered(other.data(), other.size());
+		}
+	}
+	auto assign_copy(const basic_mime_string &other)
+	{
+		storage.clear();
+
+		if (filter_func) {
+			append_c_string_filtered(other.data(), other.size());
+		}
+		else {
+			append_c_string_unfiltered(other.data(), other.size());
+		}
+	}
+
+	/* Mutators */
+	auto append(const CharT *str, std::size_t size) -> std::size_t
+	{
+		if (filter_func) {
+			return append_c_string_filtered(str, size);
+		}
+		else {
+			return append_c_string_unfiltered(str, size);
+		}
+	}
+	auto append(const storage_type &other) -> std::size_t
+	{
+		return append(other.data(), other.size());
+	}
+	auto append(const view_type &other) -> std::size_t
+	{
+		return append(other.data(), other.size());
+	}
+
+	auto ltrim(const view_type &what) -> void
+	{
+		auto it = std::find_if(storage.begin(), storage.end(),
+							   [&what](CharT c) {
+								   return !std::any_of(what.begin(), what.end(), [&c](CharT sc) { return sc == c; });
+							   });
+		storage.erase(storage.begin(), it);
+	}
+
+	auto rtrim(const view_type &what) -> void
+	{
+		auto it = std::find_if(storage.rbegin(), storage.rend(),
+							   [&what](CharT c) {
+								   return !std::any_of(what.begin(), what.end(), [&c](CharT sc) { return sc == c; });
+							   });
+		storage.erase(it.base(), storage.end());
+	}
+
+	auto trim(const view_type &what) -> void
+	{
+		ltrim(what);
+		rtrim(what);
+	}
+
+	/* Comparison */
+	auto operator==(const basic_mime_string &other)
+	{
+		return other.storage == storage;
+	}
+	auto operator==(const storage_type &other)
+	{
+		return other == storage;
+	}
+	auto operator==(const view_type &other)
+	{
+		return other == storage;
+	}
+	auto operator==(const CharT *other)
+	{
+		if (other == NULL) {
+			return false;
+		}
+		auto olen = strlen(other);
+		if (storage.size() == olen) {
+			return memcmp(storage.data(), other, olen) == 0;
+		}
+
+		return false;
+	}
+
+	/* Iterators */
+	inline auto begin() noexcept -> iterator
+	{
+		return {0, this};
+	}
+
+	inline auto raw_begin() noexcept -> raw_iterator
+	{
+		return {0, this};
+	}
+
+	inline auto end() noexcept -> iterator
+	{
+		return {(difference_type) size(), this};
+	}
+
+	inline auto raw_end() noexcept -> raw_iterator
+	{
+		return {(difference_type) size(), this};
+	}
+
+	/* Utility */
+	inline auto get_storage() const noexcept -> const storage_type &
+	{
+		return storage;
+	}
+
+	inline auto as_view() const noexcept -> view_type
+	{
+		return view_type{storage};
+	}
+
+	constexpr CharT operator[](std::size_t pos) const noexcept
+	{
+		return storage[pos];
+	}
+	constexpr CharT at(std::size_t pos) const
+	{
+		return storage.at(pos);
+	}
+	constexpr bool empty() const noexcept
+	{
+		return storage.empty();
+	}
+
+
+	/* For doctest stringify */
+	friend std::ostream &operator<<(std::ostream &os, const CharT &value)
+	{
+		os << value.storage;
+		return os;
+	}
+
+private:
+	mime_string_flags flags = mime_string_flags::MIME_STRING_DEFAULT;
+	storage_type storage;
+	filter_type filter_func;
+
+	auto append_c_string_unfiltered(const CharT *str, std::size_t len) -> std::size_t
+	{
+		/* This is fast path */
+		const auto *p = str;
+		const auto *end = str + len;
+		std::int32_t err_offset;// We have to use int32_t here as old libicu is brain-damaged
+		auto orig_size = storage.size();
+
+		storage.reserve(len + storage.size());
+
+		if (memchr(str, 0, len) != NULL) {
+			/* Fallback to slow path */
+			flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES;
+			return append_c_string_filtered(str, len);
+		}
+
+		while (p < end && len > 0 &&
+			   (err_offset = rspamd_fast_utf8_validate((const unsigned char *) p, len)) > 0) {
+			auto cur_offset = err_offset - 1;
+			storage.append(p, cur_offset);
+
+			while (cur_offset < len) {
+				auto tmp = cur_offset;
+				UChar32 uc;
+
+				U8_NEXT(p, cur_offset, len, uc);
+
+				if (uc < 0) {
+					storage.append("\uFFFD");
+					flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID;
+				}
+				else {
+					cur_offset = tmp;
+					break;
+				}
+			}
+
+			p += cur_offset;
+			len = end - p;
+		}
+
+		storage.append(p, len);
+		return storage.size() - orig_size;
+	}
+
+	auto append_c_string_filtered(const CharT *str, std::size_t len) -> std::size_t
+	{
+		std::int32_t i = 0;// We have to use int32_t here as old libicu is brain-damaged
+		UChar32 uc;
+		char tmp[4];
+		auto orig_size = storage.size();
+		/* Slow path */
+
+		storage.reserve(len + storage.size());
+
+		while (i < len) {
+			U8_NEXT(str, i, len, uc);
+
+			if (uc < 0) {
+				/* Replace with 0xFFFD */
+				storage.append("\uFFFD");
+				flags = flags | mime_string_flags::MIME_STRING_SEEN_INVALID;
+			}
+			else {
+				if (filter_func) {
+					uc = filter_func(uc);
+				}
+
+				if (uc == 0) {
+					/* Special case, ignore it */
+					flags = flags | mime_string_flags::MIME_STRING_SEEN_ZEROES;
+				}
+				else {
+					std::int32_t o = 0;
+					U8_APPEND_UNSAFE(tmp, o, uc);
+					storage.append(tmp, o);
+				}
+			}
+		}
+
+		return storage.size() - orig_size;
+	}
+};
+
+}// namespace rspamd::mime
+
+
+#endif//RSPAMD_MIME_STRING_HXX
diff --git a/src/libmime/received.cxx b/src/libmime/received.cxx
new file mode 100644
index 0000000..dc16d9b
--- /dev/null
+++ b/src/libmime/received.cxx
@@ -0,0 +1,1017 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libserver/url.h"
+#include "lua/lua_common.h"
+#include "libserver/cfg_file.h"
+#include "libserver/mempool_vars_internal.h"
+#include "mime_string.hxx"
+#include "smtp_parsers.h"
+#include "message.h"
+#include "received.hxx"
+#include "frozen/string.h"
+#include "frozen/unordered_map.h"
+
+namespace rspamd::mime {
+
+enum class received_part_type {
+	RSPAMD_RECEIVED_PART_FROM,
+	RSPAMD_RECEIVED_PART_BY,
+	RSPAMD_RECEIVED_PART_FOR,
+	RSPAMD_RECEIVED_PART_WITH,
+	RSPAMD_RECEIVED_PART_ID,
+	RSPAMD_RECEIVED_PART_UNKNOWN,
+};
+
+struct received_part {
+	received_part_type type;
+	mime_string data;
+	std::vector<mime_string> comments;
+
+	explicit received_part(received_part_type t)
+		: type(t),
+		  data(received_char_filter)
+	{
+	}
+};
+
+static inline auto
+received_part_set_or_append(const gchar *begin,
+							gsize len,
+							mime_string &dest) -> void
+{
+	if (len == 0) {
+		return;
+	}
+
+	dest.append(begin, len);
+	dest.trim(" \t");
+}
+
+static auto
+received_process_part(const std::string_view &data,
+					  received_part_type type,
+					  std::ptrdiff_t &last,
+					  received_part &npart) -> bool
+{
+	auto obraces = 0, ebraces = 0;
+	auto seen_tcpinfo = false;
+	enum _parse_state {
+		skip_spaces,
+		in_comment,
+		read_data,
+		read_tcpinfo,
+		all_done
+	} state,
+		next_state;
+
+	/* In this function, we just process comments and data separately */
+	const auto *p = data.data();
+	const auto *end = p + data.size();
+	const auto *c = p;
+
+	state = skip_spaces;
+	next_state = read_data;
+
+	while (p < end) {
+		switch (state) {
+		case skip_spaces:
+			if (!g_ascii_isspace(*p)) {
+				c = p;
+				state = next_state;
+			}
+			else {
+				p++;
+			}
+			break;
+		case in_comment:
+			if (*p == '(') {
+				obraces++;
+			}
+			else if (*p == ')') {
+				ebraces++;
+
+				if (ebraces >= obraces) {
+					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+						if (p > c) {
+							npart.comments.emplace_back(received_char_filter);
+							auto &comment = npart.comments.back();
+							received_part_set_or_append(c, p - c,
+														comment);
+						}
+					}
+
+					p++;
+					c = p;
+					state = skip_spaces;
+					next_state = read_data;
+
+					continue;
+				}
+			}
+
+			p++;
+			break;
+		case read_data:
+			if (*p == '(') {
+				if (p > c) {
+					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+						received_part_set_or_append(c, p - c,
+													npart.data);
+					}
+				}
+
+				state = in_comment;
+				obraces = 1;
+				ebraces = 0;
+				p++;
+				c = p;
+			}
+			else if (g_ascii_isspace(*p)) {
+				if (p > c) {
+					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+						received_part_set_or_append(c, p - c,
+													npart.data);
+					}
+				}
+
+				state = skip_spaces;
+				next_state = read_data;
+				c = p;
+			}
+			else if (*p == ';') {
+				/* It is actually delimiter of date part if not in the comments */
+				if (p > c) {
+					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+						received_part_set_or_append(c, p - c,
+													npart.data);
+					}
+				}
+
+				state = all_done;
+				continue;
+			}
+			else if (npart.data.size() > 0) {
+				/* We have already received data and find something with no ( */
+				if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
+					/* Check if we have something special here, such as TCPinfo */
+					if (*c == '[') {
+						state = read_tcpinfo;
+						p++;
+					}
+					else {
+						state = all_done;
+						continue;
+					}
+				}
+				else {
+					state = all_done;
+					continue;
+				}
+			}
+			else {
+				p++;
+			}
+			break;
+		case read_tcpinfo:
+			if (*p == ']') {
+				received_part_set_or_append(c, p - c + 1,
+											npart.data);
+				seen_tcpinfo = TRUE;
+				state = skip_spaces;
+				next_state = read_data;
+				c = p;
+			}
+			p++;
+			break;
+		case all_done:
+			if (p > data.data()) {
+				last = p - data.data();
+				return true;
+			}
+			else {
+				/* Empty element */
+				return false;
+			}
+			break;
+		}
+	}
+
+	/* Leftover */
+	switch (state) {
+	case read_data:
+		if (p > c) {
+			if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
+				received_part_set_or_append(c, p - c,
+											npart.data);
+			}
+
+			last = p - data.data();
+
+			return true;
+		}
+		break;
+	case skip_spaces:
+		if (p > data.data()) {
+			last = p - data.data();
+
+			return true;
+		}
+	default:
+		break;
+	}
+
+	return false;
+}
+
+template<std::size_t N>
+constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
+{
+	for (auto i = 0; i < N; i++) {
+		if (lc_map[(unsigned char) in[i]] != lit[i]) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static auto
+received_spill(const std::string_view &in,
+			   std::ptrdiff_t &date_pos) -> std::vector<received_part>
+{
+	std::vector<received_part> parts;
+	std::ptrdiff_t pos = 0;
+	auto seen_from = false, seen_by = false;
+
+	const auto *p = in.data();
+	const auto *end = p + in.size();
+
+	auto skip_spaces = [&p, end]() {
+		while (p < end && g_ascii_isspace(*p)) {
+			p++;
+		}
+	};
+
+	skip_spaces();
+
+	/* Skip SMTP comments */
+	if (*p == '(') {
+		auto obraces = 0, ebraces = 0;
+
+		while (p < end) {
+			if (*p == ')') {
+				ebraces++;
+			}
+			else if (*p == '(') {
+				obraces++;
+			}
+
+			p++;
+
+			if (obraces == ebraces) {
+				/* Skip spaces after  */
+				skip_spaces();
+				break;
+			}
+		}
+	}
+
+	auto len = end - p;
+
+	if (len == 0) {
+		return parts;
+	}
+
+	auto maybe_process_part = [&](received_part_type what) -> bool {
+		parts.emplace_back(what);
+		auto &rcvd_part = parts.back();
+		auto chunk = std::string_view{p, (std::size_t)(end - p)};
+
+		if (!received_process_part(chunk, what, pos, rcvd_part)) {
+			parts.pop_back();
+
+			return false;
+		}
+
+		return true;
+	};
+
+	if (len > 4 && lit_compare_lowercase<4>("from", p)) {
+		p += sizeof("from") - 1;
+
+		/* We can now store from part */
+		if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
+			/* Do not accept malformed from */
+			return {};
+		}
+
+		g_assert(pos != 0);
+		p += pos;
+		len = end > p ? end - p : 0;
+		seen_from = true;
+	}
+
+	if (len > 2 && lit_compare_lowercase<2>("by", p)) {
+		p += sizeof("by") - 1;
+
+		if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
+			return {};
+		}
+
+		g_assert(pos != 0);
+		p += pos;
+		len = end > p ? end - p : 0;
+		seen_by = true;
+	}
+
+	if (!seen_from && !seen_by) {
+		/* Useless received */
+		return {};
+	}
+
+	while (p < end) {
+		bool got_part = false;
+		if (*p == ';') {
+			/* We are at the date separator, stop here */
+			date_pos = p - in.data() + 1;
+			break;
+		}
+		else {
+			if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
+				p += sizeof("with") - 1;
+
+				got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
+			}
+			else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
+				p += sizeof("for") - 1;
+				got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
+			}
+			else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
+				p += sizeof("id") - 1;
+				got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
+			}
+			else {
+				while (p < end) {
+					if (!(g_ascii_isspace(*p) || *p == '(' || *p == ';')) {
+						p++;
+					}
+					else {
+						break;
+					}
+				}
+
+				if (p == end) {
+					return {};
+				}
+				else if (*p == ';') {
+					date_pos = p - in.data() + 1;
+					break;
+				}
+				else {
+					got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
+				}
+			}
+
+			if (!got_part) {
+				p++;
+				len = end > p ? end - p : 0;
+			}
+			else {
+				g_assert(pos != 0);
+				p += pos;
+				len = end > p ? end - p : 0;
+			}
+		}
+	}
+
+	return parts;
+}
+
+#define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
+	(rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE | RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
+
+static auto
+received_process_rdns(rspamd_mempool_t *pool,
+					  const std::string_view &in,
+					  mime_string &dest) -> bool
+{
+	auto seen_dot = false;
+
+	const auto *p = in.data();
+	const auto *end = p + in.size();
+
+	if (in.empty()) {
+		return false;
+	}
+
+	if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
+		/* We have enclosed ip address */
+		auto *addr = rspamd_parse_inet_address_pool(p + 1,
+													(end - p) - 2,
+													pool,
+													RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+		if (addr) {
+			const gchar *addr_str;
+
+			if (rspamd_inet_address_get_port(addr) != 0) {
+				addr_str = rspamd_inet_address_to_string_pretty(addr);
+			}
+			else {
+				addr_str = rspamd_inet_address_to_string(addr);
+			}
+
+			dest.assign_copy(std::string_view{addr_str});
+
+			return true;
+		}
+	}
+
+	auto hlen = 0u;
+
+	while (p < end) {
+		if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
+			if (*p == '.') {
+				seen_dot = true;
+			}
+
+			hlen++;
+		}
+		else {
+			break;
+		}
+
+		p++;
+	}
+
+	if (hlen > 0) {
+		if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
+			/* All data looks like a hostname */
+			dest.assign_copy(std::string_view{in.data(), hlen});
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static auto
+received_process_host_tcpinfo(rspamd_mempool_t *pool,
+							  received_header &rh,
+							  const std::string_view &in) -> bool
+{
+	rspamd_inet_addr_t *addr = nullptr;
+	auto ret = false;
+
+	if (in.empty()) {
+		return false;
+	}
+
+	if (in[0] == '[') {
+		/* Likely Exim version */
+
+		auto brace_pos = in.find(']');
+
+		if (brace_pos != std::string_view::npos) {
+			auto substr_addr = in.substr(1, brace_pos - 1);
+			addr = rspamd_parse_inet_address_pool(substr_addr.data(),
+												  substr_addr.size(),
+												  pool,
+												  RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+			if (addr) {
+				rh.addr = addr;
+				rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+			}
+		}
+	}
+	else {
+		if (g_ascii_isxdigit(in[0])) {
+			/* Try to parse IP address */
+			addr = rspamd_parse_inet_address_pool(in.data(),
+												  in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+			if (addr) {
+				rh.addr = addr;
+				rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+			}
+		}
+
+		if (!addr) {
+			/* Try canonical Postfix version: rdns [ip] */
+			auto obrace_pos = in.find('[');
+
+			if (obrace_pos != std::string_view::npos) {
+				auto ebrace_pos = in.rfind(']');
+
+				if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) {
+					auto substr_addr = in.substr(obrace_pos + 1,
+												 ebrace_pos - obrace_pos - 1);
+					addr = rspamd_parse_inet_address_pool(substr_addr.data(),
+														  substr_addr.size(),
+														  pool,
+														  RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
+
+					if (addr) {
+						rh.addr = addr;
+						rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
+
+						/* Process with rDNS */
+						auto rdns_substr = in.substr(0, obrace_pos);
+
+						if (received_process_rdns(pool, rdns_substr, rh.real_hostname)) {
+							ret = true;
+						}
+					}
+				}
+			}
+			else {
+				/* Hostname or some crap, sigh... */
+				if (received_process_rdns(pool, in, rh.real_hostname)) {
+					ret = true;
+				}
+			}
+		}
+	}
+
+	return ret;
+}
+
+static void
+received_process_from(rspamd_mempool_t *pool,
+					  const received_part &rpart,
+					  received_header &rh)
+{
+	if (rpart.data.size() > 0) {
+		/* We have seen multiple cases:
+		 * - [ip] (hostname/unknown [real_ip])
+		 * - helo (hostname/unknown [real_ip])
+		 * - [ip]
+		 * - hostname
+		 * - hostname ([ip]:port helo=xxx)
+		 * Maybe more...
+		 */
+		auto seen_ip_in_data = false;
+
+		if (!rpart.comments.empty()) {
+			/* We can have info within comment as part of RFC */
+			received_process_host_tcpinfo(
+				pool, rh,
+				rpart.comments[0].as_view());
+		}
+
+		if (rh.real_ip.size() == 0) {
+			/* Try to do the same with data */
+			if (received_process_host_tcpinfo(
+					pool, rh,
+					rpart.data.as_view())) {
+				seen_ip_in_data = true;
+			}
+		}
+
+		if (!seen_ip_in_data) {
+			if (rh.real_ip.size() != 0) {
+				/* Get announced hostname (usually helo) */
+				received_process_rdns(pool,
+									  rpart.data.as_view(),
+									  rh.from_hostname);
+			}
+			else {
+				received_process_host_tcpinfo(pool,
+											  rh, rpart.data.as_view());
+			}
+		}
+	}
+	else {
+		/* rpart->dlen = 0 */
+		if (!rpart.comments.empty()) {
+			received_process_host_tcpinfo(
+				pool, rh,
+				rpart.comments[0].as_view());
+		}
+	}
+}
+
+static auto
+received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool,
+					  const std::string_view &in,
+					  struct rspamd_mime_header *hdr) -> bool
+{
+	std::ptrdiff_t date_pos = -1;
+
+	static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({{"smtp", received_flags::SMTP},
+																										 {"esmtp", received_flags::ESMTP},
+																										 {"esmtpa", received_flags::ESMTPA |
+																														received_flags::AUTHENTICATED},
+																										 {"esmtpsa", received_flags::ESMTPSA |
+																														 received_flags::SSL |
+																														 received_flags::AUTHENTICATED},
+																										 {"esmtps", received_flags::ESMTPS |
+																														received_flags::SSL},
+																										 {"lmtp", received_flags::LMTP},
+																										 {"imap", received_flags::IMAP},
+																										 {"imaps", received_flags::IMAP |
+																													   received_flags::SSL},
+																										 {"http", received_flags::HTTP},
+																										 {"https", received_flags::HTTP |
+																													   received_flags::SSL},
+																										 {"local", received_flags::LOCAL}});
+
+	auto parts = received_spill(in, date_pos);
+
+	if (parts.empty()) {
+		return false;
+	}
+
+	auto &rh = chain.new_received();
+
+	rh.flags = received_flags::UNKNOWN;
+	rh.hdr = hdr;
+
+	for (const auto &part: parts) {
+		switch (part.type) {
+		case received_part_type::RSPAMD_RECEIVED_PART_FROM:
+			received_process_from(pool, part, rh);
+			break;
+		case received_part_type::RSPAMD_RECEIVED_PART_BY:
+			received_process_rdns(pool,
+								  part.data.as_view(),
+								  rh.by_hostname);
+			break;
+		case received_part_type::RSPAMD_RECEIVED_PART_WITH:
+			if (part.data.size() > 0) {
+				auto proto_flag_it = protos_map.find(part.data.as_view());
+
+				if (proto_flag_it != protos_map.end()) {
+					rh.flags = proto_flag_it->second;
+				}
+			}
+			break;
+		case received_part_type::RSPAMD_RECEIVED_PART_FOR:
+			rh.for_mbox.assign_copy(part.data);
+			rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(),
+														 rh.for_mbox.size());
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+	}
+
+	if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
+		rh.from_hostname.assign_copy(rh.real_hostname);
+	}
+
+	if (date_pos > 0 && date_pos < in.size()) {
+		auto date_sub = in.substr(date_pos);
+		rh.timestamp = rspamd_parse_smtp_date((const unsigned char *) date_sub.data(),
+											  date_sub.size(), nullptr);
+	}
+
+	return true;
+}
+
+static auto
+received_maybe_fix_task(struct rspamd_task *task) -> bool
+{
+	auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
+
+	if (recv_chain_ptr) {
+		auto need_recv_correction = false;
+
+		auto top_recv_maybe = recv_chain_ptr->get_received(0);
+
+		if (top_recv_maybe.has_value()) {
+			auto &top_recv = top_recv_maybe.value().get();
+
+			const auto *raddr = top_recv.addr;
+			if (top_recv.real_ip.size() == 0 || (task->cfg && task->cfg->ignore_received)) {
+				need_recv_correction = true;
+			}
+			else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
+				if (!raddr) {
+					need_recv_correction = true;
+				}
+				else {
+					if (rspamd_inet_address_compare(raddr, task->from_addr, FALSE) != 0) {
+						need_recv_correction = true;
+					}
+				}
+			}
+
+			if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
+				msg_debug_task("the first received seems to be"
+							   " not ours, prepend it with fake one");
+
+				auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head);
+				trecv.flags |= received_flags::ARTIFICIAL;
+
+				if (task->flags & RSPAMD_TASK_FLAG_SSL) {
+					trecv.flags |= received_flags::SSL;
+				}
+
+				if (task->auth_user) {
+					trecv.flags |= received_flags::AUTHENTICATED;
+				}
+
+				trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr)));
+
+				const auto *mta_name = (const char *) rspamd_mempool_get_variable(task->task_pool,
+																				  RSPAMD_MEMPOOL_MTA_NAME);
+
+				if (mta_name) {
+					trecv.by_hostname.assign_copy(std::string_view(mta_name));
+				}
+				trecv.addr = rspamd_inet_address_copy(task->from_addr,
+													  task->task_pool);
+
+				if (task->hostname) {
+					trecv.real_hostname.assign_copy(std::string_view(task->hostname));
+					trecv.from_hostname.assign_copy(trecv.real_hostname);
+				}
+
+				return true;
+			}
+
+			/* Extract data from received header if we were not given IP */
+			if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) &&
+				(task->cfg && !task->cfg->ignore_received)) {
+				if (!top_recv.real_ip.empty()) {
+					if (!rspamd_parse_inet_address(&task->from_addr,
+												   top_recv.real_ip.data(),
+												   top_recv.real_ip.size(),
+												   RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
+						msg_warn_task("cannot get IP from received header: '%s'",
+									  top_recv.real_ip.data());
+						task->from_addr = nullptr;
+					}
+				}
+				if (!top_recv.real_hostname.empty()) {
+					task->hostname = top_recv.real_hostname.data();
+				}
+
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+static auto
+received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool
+{
+	if (chain == nullptr) {
+		return false;
+	}
+
+	lua_createtable(L, chain->size(), 0);
+
+	auto push_flag = [L](const received_header &rh, received_flags fl, const char *name) {
+		lua_pushboolean(L, !!(rh.flags & fl));
+		lua_setfield(L, -2, name);
+	};
+
+	auto i = 1;
+
+	for (const auto &rh: chain->as_vector()) {
+		lua_createtable(L, 0, 10);
+
+		if (rh.hdr && rh.hdr->decoded) {
+			rspamd_lua_table_set(L, "raw", rh.hdr->decoded);
+		}
+
+		lua_createtable(L, 0, 3);
+		push_flag(rh, received_flags::ARTIFICIAL, "artificial");
+		push_flag(rh, received_flags::AUTHENTICATED, "authenticated");
+		push_flag(rh, received_flags::SSL, "ssl");
+		lua_setfield(L, -2, "flags");
+
+		auto push_nullable_string = [L](const mime_string &st, const char *field) {
+			if (st.empty()) {
+				lua_pushnil(L);
+			}
+			else {
+				lua_pushlstring(L, st.data(), st.size());
+			}
+			lua_setfield(L, -2, field);
+		};
+
+		push_nullable_string(rh.from_hostname, "from_hostname");
+		push_nullable_string(rh.real_hostname, "real_hostname");
+		push_nullable_string(rh.real_ip, "from_ip");
+		push_nullable_string(rh.by_hostname, "by_hostname");
+		push_nullable_string(rh.for_mbox, "for");
+
+		if (rh.addr) {
+			rspamd_lua_ip_push(L, rh.addr);
+		}
+		else {
+			lua_pushnil(L);
+		}
+		lua_setfield(L, -2, "real_ip");
+
+		lua_pushstring(L, received_protocol_to_string(rh.flags));
+		lua_setfield(L, -2, "proto");
+
+		lua_pushinteger(L, rh.timestamp);
+		lua_setfield(L, -2, "timestamp");
+
+		lua_rawseti(L, -2, i++);
+	}
+
+	return true;
+}
+
+}// namespace rspamd::mime
+
+bool rspamd_received_header_parse(struct rspamd_task *task,
+								  const char *data, size_t sz,
+								  struct rspamd_mime_header *hdr)
+{
+	auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers));
+
+	if (recv_chain_ptr == nullptr) {
+		/* This constructor automatically registers dtor in mempool */
+		recv_chain_ptr = new rspamd::mime::received_header_chain(task);
+		MESSAGE_FIELD(task, received_headers) = (void *) recv_chain_ptr;
+	}
+	return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool,
+											   std::string_view{data, sz}, hdr);
+}
+
+bool rspamd_received_maybe_fix_task(struct rspamd_task *task)
+{
+	return rspamd::mime::received_maybe_fix_task(task);
+}
+
+bool rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L)
+{
+	return rspamd::mime::received_export_to_lua(
+		static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)),
+		L);
+}
+
+/* Tests part */
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+TEST_SUITE("received")
+{
+	TEST_CASE("parse received")
+	{
+		using namespace std::string_view_literals;
+		using map_type = ankerl::unordered_dense::map<std::string_view, std::string_view>;
+		std::vector<std::pair<std::string_view, map_type>> cases{
+			// Simple received
+			{"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv,
+			 {{"real_ip", "185.243.30.90"},
+			  {"real_hostname", "smtp11.mailtrack.pl"},
+			  {"from_hostname", "smtp11.mailtrack.pl"}}},
+			// Real Postfix IPv6 received
+			{"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n"
+			 "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n"
+			 "\t(Client did not present a certificate)\n"
+			 "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n"
+			 "\tfor <test@example.com>; Mon,  6 Jul 2015 09:01:20 +0000 (UTC)\n"
+			 "\t(envelope-from upwest201diana@outlook.com)"sv,
+			 {{"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"},
+			  {"from_hostname", "server.chat-met-vreemden.nl"},
+			  {"by_hostname", "mx1.freebsd.org"},
+			  {"for_mbox", "<test@example.com>"}}},
+			// Exim IPv4 received
+			{"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n"
+			 " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n"
+			 " (envelope-from <exim-dev-bounces@exim.org>)\n"
+			 " id 1fZ55o-0006DP-3H\n"
+			 " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv,
+			 {
+				 {"from_hostname", "localhost"},
+				 {"real_ip", "127.0.0.1"},
+				 {"for_mbox", "<xxx@xxx.xxx>"},
+				 {"by_hostname", "hummus.csx.cam.ac.uk"},
+			 }},
+			// Exim IPv6 received
+			{"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n"
+			 " helo=mx.spodhuis.org)\n"
+			 " by hummus.csx.cam.ac.uk with esmtpsa (TLSv1.3:TLS_AES_256_GCM_SHA384:256)\n"
+			 " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n"
+			 " id 1fZ55k-0006CO-9M\n"
+			 " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv,
+			 {
+				 {"from_hostname", "smtp.spodhuis.org"},
+				 {"real_ip", "2a02:898:31:0:48:4558:736d:7470"},
+				 {"for_mbox", "exim-dev@exim.org"},
+				 {"by_hostname", "hummus.csx.cam.ac.uk"},
+			 }},
+			// Haraka received
+			{"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with "
+			 "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> "
+			 "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv,
+			 {
+				 {"from_hostname", "aaa.cn"},
+				 {"real_ip", "1.1.1.1"},
+				 {"by_hostname", "localhost.localdomain"},
+			 }},
+			// Invalid by
+			{"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) "
+			 "by guovswzqkvry051@sohu.com with gg login "
+			 "by AOL 6.0 for Windows US sub 008 SMTP  ; Tue, 03 Jul 2018 09:01:47 -0300"sv,
+			 {
+				 {"from_hostname", "192.83.172.101"},
+				 {"real_ip", "192.83.172.101"},
+			 }},
+			// Invalid hostinfo
+			{"from example.com ([]) by example.com with ESMTP id 2019091111 ;"
+			 " Thu, 26 Sep 2019 11:19:07 +0200"sv,
+			 {
+				 {"by_hostname", "example.com"},
+				 {"from_hostname", "example.com"},
+				 {"real_hostname", "example.com"},
+			 }},
+			// Different real and announced hostnames + broken crap
+			{"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) "
+			 "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 "
+			 ": <g @yi.br>"sv,
+			 {
+				 {"real_ip", "1.1.1.1"},
+				 {"from_hostname", "171-29.br"},
+				 {"real_hostname", "1-1-1-1.z.com.br"},
+				 {"by_hostname", "x.com.br"},
+			 }},
+			// Different real and announced ips + no hostname
+			{"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv,
+			 {
+				 {"real_ip", "127.0.0.2"},
+				 {"from_hostname", "127.0.0.1"},
+				 {"by_hostname", "smtp.gmail.com"},
+			 }},
+			// Different real and hostanes
+			{"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) "
+			 "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
+			 {
+				 {"real_ip", "185.118.166.127"},
+				 {"from_hostname", "185.118.166.127"},
+				 {"real_hostname", "steven2.zhou01.pserver.ru"},
+				 {"by_hostname", "mail.832zsu.cn"},
+			 }},
+			// \0 in received must be filtered
+			{"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv,
+			 {{"real_ip", "185.243.30.90"},
+			  {"real_hostname", "smtp11.mailtrack.pl"},
+			  {"from_hostname", "smtp11.mailtrack.pl"}}},
+			// No from part
+			{"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
+			 {
+				 {"by_hostname", "mail.832zsu.cn"},
+			 }},
+			// From part is in the comment
+			{"(from asterisk@localhost)\n"
+			 "        by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n"
+			 "        Thu, 6 Aug 2020 11:50:04 -0500"sv,
+			 {
+				 {"by_hostname", "pbx.xxx.com"},
+			 }},
+		};
+		rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0);
+
+		for (auto &&c: cases) {
+			SUBCASE(c.first.data())
+			{
+				rspamd::mime::received_header_chain chain;
+				auto ret = rspamd::mime::received_header_parse(chain, pool,
+															   c.first, nullptr);
+				CHECK(ret == true);
+				auto &&rh = chain.get_received(0);
+				CHECK(rh.has_value());
+				auto res = rh.value().get().as_map();
+
+				for (const auto &expected: c.second) {
+					CHECK_MESSAGE(res.contains(expected.first), expected.first.data());
+					CHECK(res[expected.first] == expected.second);
+				}
+				for (const auto &existing: res) {
+					CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data());
+					CHECK(c.second[existing.first] == existing.second);
+				}
+			}
+		}
+
+		rspamd_mempool_delete(pool);
+	}
+}
+\ No newline at end of file
diff --git a/src/libmime/received.h b/src/libmime/received.h
new file mode 100644
index 0000000..46608a3
--- /dev/null
+++ b/src/libmime/received.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef RSPAMD_RECEIVED_H
+#define RSPAMD_RECEIVED_H
+
+#include "config.h"
+#include "libutil/addr.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * C bindings for C++ received code
+ */
+
+struct rspamd_email_address;
+struct rspamd_received_header_chain;
+struct rspamd_mime_header;
+
+/**
+ * Parse received header from an input header data
+ * @param task
+ * @param data
+ * @param sz
+ * @param hdr
+ * @return
+ */
+bool rspamd_received_header_parse(struct rspamd_task *task,
+								  const char *data, size_t sz, struct rspamd_mime_header *hdr);
+
+
+/**
+ * Process task data and the most top received and fix either part if needed
+ * @param task
+ * @return
+ */
+bool rspamd_received_maybe_fix_task(struct rspamd_task *task);
+
+struct lua_State;
+/**
+ * Push received headers chain to lua
+ * @param task
+ * @param L
+ * @return
+ */
+bool rspamd_received_export_to_lua(struct rspamd_task *task, struct lua_State *L);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif//RSPAMD_RECEIVED_H
diff --git a/src/libmime/received.hxx b/src/libmime/received.hxx
new file mode 100644
index 0000000..4f423f1
--- /dev/null
+++ b/src/libmime/received.hxx
@@ -0,0 +1,314 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef RSPAMD_RECEIVED_HXX
+#define RSPAMD_RECEIVED_HXX
+#pragma once
+
+#include "config.h"
+#include "received.h"
+#include "mime_string.hxx"
+#include "libmime/email_addr.h"
+#include "libserver/task.h"
+#include "contrib/ankerl/unordered_dense.h"
+#include <vector>
+#include <string_view>
+#include <utility>
+#include <optional>
+
+namespace rspamd::mime {
+
+static inline auto
+received_char_filter(UChar32 uc) -> UChar32
+{
+	if (u_isprint(uc)) {
+		return u_tolower(uc);
+	}
+
+	return 0;
+}
+
+enum class received_flags {
+	DEFAULT = 0,
+	SMTP = 1u << 0u,
+	ESMTP = 1u << 1u,
+	ESMTPA = 1u << 2u,
+	ESMTPS = 1u << 3u,
+	ESMTPSA = 1u << 4u,
+	LMTP = 1u << 5u,
+	IMAP = 1u << 6u,
+	LOCAL = 1u << 7u,
+	HTTP = 1u << 8u,
+	MAPI = 1u << 9u,
+	UNKNOWN = 1u << 10u,
+	ARTIFICIAL = (1u << 11u),
+	SSL = (1u << 12u),
+	AUTHENTICATED = (1u << 13u),
+};
+
+constexpr received_flags operator|(received_flags lhs, received_flags rhs)
+{
+	using ut = std::underlying_type<received_flags>::type;
+	return static_cast<received_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs));
+}
+
+constexpr received_flags operator|=(received_flags &lhs, const received_flags rhs)
+{
+	using ut = std::underlying_type<received_flags>::type;
+	lhs = static_cast<received_flags>(static_cast<ut>(lhs) | static_cast<ut>(rhs));
+	return lhs;
+}
+
+constexpr received_flags operator&(received_flags lhs, received_flags rhs)
+{
+	using ut = std::underlying_type<received_flags>::type;
+	return static_cast<received_flags>(static_cast<ut>(lhs) & static_cast<ut>(rhs));
+}
+
+constexpr bool operator!(received_flags fl)
+{
+	return fl == received_flags::DEFAULT;
+}
+
+constexpr received_flags received_type_apply_protocols_mask(received_flags fl)
+{
+	return fl & (received_flags::SMTP |
+				 received_flags::ESMTP |
+				 received_flags::ESMTPA |
+				 received_flags::ESMTPS |
+				 received_flags::ESMTPSA |
+				 received_flags::IMAP |
+				 received_flags::HTTP |
+				 received_flags::LOCAL |
+				 received_flags::MAPI |
+				 received_flags::LMTP);
+}
+
+constexpr const char *received_protocol_to_string(received_flags fl)
+{
+	const auto *proto = "unknown";
+
+	switch (received_type_apply_protocols_mask(fl)) {
+	case received_flags::SMTP:
+		proto = "smtp";
+		break;
+	case received_flags::ESMTP:
+		proto = "esmtp";
+		break;
+	case received_flags::ESMTPS:
+		proto = "esmtps";
+		break;
+	case received_flags::ESMTPA:
+		proto = "esmtpa";
+		break;
+	case received_flags::ESMTPSA:
+		proto = "esmtpsa";
+		break;
+	case received_flags::LMTP:
+		proto = "lmtp";
+		break;
+	case received_flags::IMAP:
+		proto = "imap";
+		break;
+	case received_flags::HTTP:
+		proto = "http";
+		break;
+	case received_flags::LOCAL:
+		proto = "local";
+		break;
+	case received_flags::MAPI:
+		proto = "mapi";
+		break;
+	default:
+		break;
+	}
+
+	return proto;
+}
+
+struct received_header {
+	mime_string from_hostname;
+	mime_string real_hostname;
+	mime_string real_ip;
+	mime_string by_hostname;
+	mime_string for_mbox;
+	struct rspamd_email_address *for_addr = nullptr;
+	rspamd_inet_addr_t *addr = nullptr;
+	struct rspamd_mime_header *hdr = nullptr;
+	time_t timestamp = 0;
+	received_flags flags = received_flags::DEFAULT; /* See enum rspamd_received_type */
+
+	received_header() noexcept
+		: from_hostname(received_char_filter),
+		  real_hostname(received_char_filter),
+		  real_ip(received_char_filter),
+		  by_hostname(received_char_filter),
+		  for_mbox()
+	{
+	}
+	/* We have raw C pointers, so copy is explicitly disabled */
+	received_header(const received_header &other) = delete;
+	received_header(received_header &&other) noexcept
+	{
+		*this = std::move(other);
+	}
+
+	received_header &operator=(received_header &&other) noexcept
+	{
+		if (this != &other) {
+			from_hostname = std::move(other.from_hostname);
+			real_hostname = std::move(other.real_hostname);
+			real_ip = std::move(other.real_ip);
+			by_hostname = std::move(other.by_hostname);
+			for_mbox = std::move(other.for_mbox);
+			timestamp = other.timestamp;
+			flags = other.flags;
+			std::swap(for_addr, other.for_addr);
+			std::swap(addr, other.addr);
+			std::swap(hdr, other.hdr);
+		}
+		return *this;
+	}
+
+	/* Unit tests helper */
+	static auto from_map(const ankerl::unordered_dense::map<std::string_view, std::string_view> &map) -> received_header
+	{
+		using namespace std::string_view_literals;
+		received_header rh;
+
+		if (map.contains("from_hostname")) {
+			rh.from_hostname.assign_copy(map.at("from_hostname"sv));
+		}
+		if (map.contains("real_hostname")) {
+			rh.real_hostname.assign_copy(map.at("real_hostname"sv));
+		}
+		if (map.contains("by_hostname")) {
+			rh.by_hostname.assign_copy(map.at("by_hostname"sv));
+		}
+		if (map.contains("real_ip")) {
+			rh.real_ip.assign_copy(map.at("real_ip"sv));
+		}
+		if (map.contains("for_mbox")) {
+			rh.for_mbox.assign_copy(map.at("for_mbox"sv));
+		}
+
+		return rh;
+	}
+
+	auto as_map() const -> ankerl::unordered_dense::map<std::string_view, std::string_view>
+	{
+		ankerl::unordered_dense::map<std::string_view, std::string_view> map;
+
+		if (!from_hostname.empty()) {
+			map["from_hostname"] = from_hostname.as_view();
+		}
+		if (!real_hostname.empty()) {
+			map["real_hostname"] = real_hostname.as_view();
+		}
+		if (!by_hostname.empty()) {
+			map["by_hostname"] = by_hostname.as_view();
+		}
+		if (!real_ip.empty()) {
+			map["real_ip"] = real_ip.as_view();
+		}
+		if (!for_mbox.empty()) {
+			map["for_mbox"] = for_mbox.as_view();
+		}
+
+		return map;
+	}
+
+	~received_header()
+	{
+		if (for_addr) {
+			rspamd_email_address_free(for_addr);
+		}
+	}
+};
+
+class received_header_chain {
+public:
+	explicit received_header_chain(struct rspamd_task *task)
+	{
+		headers.reserve(2);
+		rspamd_mempool_add_destructor(task->task_pool,
+									  received_header_chain::received_header_chain_pool_dtor, this);
+	}
+	explicit received_header_chain()
+	{
+		headers.reserve(2);
+	}
+
+	enum class append_type {
+		append_tail,
+		append_head
+	};
+
+	auto new_received(append_type how = append_type::append_tail) -> received_header &
+	{
+		if (how == append_type::append_tail) {
+			headers.emplace_back();
+
+			return headers.back();
+		}
+		else {
+			headers.insert(std::begin(headers), received_header());
+
+			return headers.front();
+		}
+	}
+	auto new_received(received_header &&hdr, append_type how = append_type::append_tail) -> received_header &
+	{
+		if (how == append_type::append_tail) {
+			headers.emplace_back(std::move(hdr));
+
+			return headers.back();
+		}
+		else {
+			headers.insert(std::begin(headers), std::move(hdr));
+
+			return headers.front();
+		}
+	}
+	auto get_received(std::size_t nth) -> std::optional<std::reference_wrapper<received_header>>
+	{
+		if (nth < headers.size()) {
+			return headers[nth];
+		}
+
+		return std::nullopt;
+	}
+	auto size() const -> std::size_t
+	{
+		return headers.size();
+	}
+	constexpr auto as_vector() const -> const std::vector<received_header> &
+	{
+		return headers;
+	}
+
+private:
+	static auto received_header_chain_pool_dtor(void *ptr) -> void
+	{
+		delete static_cast<received_header_chain *>(ptr);
+	}
+	std::vector<received_header> headers;
+};
+
+}// namespace rspamd::mime
+
+#endif//RSPAMD_RECEIVED_HXX
diff --git a/src/libmime/scan_result.c b/src/libmime/scan_result.c
new file mode 100644
index 0000000..a6bc0cb
--- /dev/null
+++ b/src/libmime/scan_result.c
@@ -0,0 +1,1106 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "mem_pool.h"
+#include "scan_result.h"
+#include "rspamd.h"
+#include "message.h"
+#include "lua/lua_common.h"
+#include "libserver/cfg_file_private.h"
+#include "libmime/scan_result_private.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include <math.h>
+#include "contrib/uthash/utlist.h"
+
+#define msg_debug_metric(...) rspamd_conditional_debug_fast(NULL, NULL,                                               \
+															rspamd_metric_log_id, "metric", task->task_pool->tag.uid, \
+															RSPAMD_LOG_FUNC,                                          \
+															__VA_ARGS__)
+
+INIT_LOG_MODULE(metric)
+
+/* Average symbols count to optimize hash allocation */
+static struct rspamd_counter_data symbols_count;
+
+static void
+rspamd_scan_result_dtor(gpointer d)
+{
+	struct rspamd_scan_result *r = (struct rspamd_scan_result *) d;
+	struct rspamd_symbol_result *sres;
+
+	rspamd_set_counter_ema(&symbols_count, kh_size(r->symbols), 0.5);
+
+	if (r->symbol_cbref != -1) {
+		luaL_unref(r->task->cfg->lua_state, LUA_REGISTRYINDEX, r->symbol_cbref);
+	}
+
+	kh_foreach_value(r->symbols, sres, {
+		if (sres->options) {
+			kh_destroy(rspamd_options_hash, sres->options);
+		}
+	});
+
+	kh_destroy(rspamd_symbols_hash, r->symbols);
+	kh_destroy(rspamd_symbols_group_hash, r->sym_groups);
+}
+
+static void
+rspamd_metric_actions_foreach_cb(int i, struct rspamd_action *act, void *cbd)
+{
+	struct rspamd_scan_result *metric_res = (struct rspamd_scan_result *) cbd;
+	metric_res->actions_config[i].flags = RSPAMD_ACTION_RESULT_DEFAULT;
+	if (!(act->flags & RSPAMD_ACTION_NO_THRESHOLD)) {
+		metric_res->actions_config[i].cur_limit = act->threshold;
+	}
+	else {
+		metric_res->actions_config[i].flags |= RSPAMD_ACTION_RESULT_NO_THRESHOLD;
+	}
+	metric_res->actions_config[i].action = act;
+}
+
+struct rspamd_scan_result *
+rspamd_create_metric_result(struct rspamd_task *task,
+							const gchar *name, gint lua_sym_cbref)
+{
+	struct rspamd_scan_result *metric_res;
+
+	metric_res = rspamd_mempool_alloc0(task->task_pool,
+									   sizeof(struct rspamd_scan_result));
+	metric_res->symbols = kh_init(rspamd_symbols_hash);
+	metric_res->sym_groups = kh_init(rspamd_symbols_group_hash);
+
+	if (name) {
+		metric_res->name = rspamd_mempool_strdup(task->task_pool, name);
+	}
+	else {
+		metric_res->name = NULL;
+	}
+
+	metric_res->symbol_cbref = lua_sym_cbref;
+	metric_res->task = task;
+
+	/* Optimize allocation */
+	kh_resize(rspamd_symbols_group_hash, metric_res->sym_groups, 4);
+
+	if (symbols_count.mean > 4) {
+		kh_resize(rspamd_symbols_hash, metric_res->symbols, symbols_count.mean);
+	}
+	else {
+		kh_resize(rspamd_symbols_hash, metric_res->symbols, 4);
+	}
+
+	if (task->cfg) {
+		size_t nact = rspamd_config_actions_size(task->cfg);
+		metric_res->actions_config = rspamd_mempool_alloc0(task->task_pool,
+														   sizeof(struct rspamd_action_config) * nact);
+		rspamd_config_actions_foreach_enumerate(task->cfg, rspamd_metric_actions_foreach_cb, metric_res);
+		metric_res->nactions = nact;
+	}
+
+	rspamd_mempool_add_destructor(task->task_pool,
+								  rspamd_scan_result_dtor,
+								  metric_res);
+	DL_APPEND(task->result, metric_res);
+
+	return metric_res;
+}
+
+static inline int
+rspamd_pr_sort(const struct rspamd_passthrough_result *pra,
+			   const struct rspamd_passthrough_result *prb)
+{
+	return prb->priority - pra->priority;
+}
+
+bool rspamd_add_passthrough_result(struct rspamd_task *task,
+								   struct rspamd_action *action,
+								   guint priority,
+								   double target_score,
+								   const gchar *message,
+								   const gchar *module,
+								   uint flags,
+								   struct rspamd_scan_result *scan_result)
+{
+	struct rspamd_passthrough_result *pr;
+
+	if (scan_result == NULL) {
+		scan_result = task->result;
+	}
+
+	/* Find the specific action config */
+	struct rspamd_action_config *action_config = NULL;
+
+	for (unsigned int i = 0; i < scan_result->nactions; i++) {
+		struct rspamd_action_config *cur = &scan_result->actions_config[i];
+
+		/* We assume that all action pointers are static */
+		if (cur->action == action) {
+			action_config = cur;
+			break;
+		}
+	}
+
+	if (action_config && (action_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
+		msg_info_task("<%s>: NOT set pre-result to '%s' %s(%.2f): '%s' from %s(%d); action is disabled",
+					  MESSAGE_FIELD_CHECK(task, message_id), action->name,
+					  flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
+					  target_score,
+					  message, module, priority);
+
+		return false;
+	}
+
+	pr = rspamd_mempool_alloc(task->task_pool, sizeof(*pr));
+	pr->action = action;
+	pr->priority = priority;
+	pr->message = message;
+	pr->module = module;
+	pr->target_score = target_score;
+	pr->flags = flags;
+
+	DL_APPEND(scan_result->passthrough_result, pr);
+	DL_SORT(scan_result->passthrough_result, rspamd_pr_sort);
+
+	if (!isnan(target_score)) {
+
+		msg_info_task("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)",
+					  MESSAGE_FIELD_CHECK(task, message_id), action->name,
+					  flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
+					  target_score,
+					  message, module, priority);
+	}
+	else {
+		msg_info_task("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)",
+					  MESSAGE_FIELD_CHECK(task, message_id), action->name,
+					  flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
+					  message, module, priority);
+	}
+
+	scan_result->nresults++;
+
+	return true;
+}
+
+static inline gdouble
+rspamd_check_group_score(struct rspamd_task *task,
+						 const gchar *symbol,
+						 struct rspamd_symbols_group *gr,
+						 gdouble *group_score,
+						 gdouble w)
+{
+	if (gr != NULL && group_score && gr->max_score > 0.0 && w > 0.0) {
+		if (*group_score >= gr->max_score && w > 0) {
+			msg_info_task("maximum group score %.2f for group %s has been reached,"
+						  " ignoring symbol %s with weight %.2f",
+						  gr->max_score,
+						  gr->name, symbol, w);
+			return NAN;
+		}
+		else if (*group_score + w > gr->max_score) {
+			w = gr->max_score - *group_score;
+		}
+	}
+
+	return w;
+}
+
+#ifndef DBL_EPSILON
+#define DBL_EPSILON 2.2204460492503131e-16
+#endif
+
+static struct rspamd_symbol_result *
+insert_metric_result(struct rspamd_task *task,
+					 const gchar *symbol,
+					 double weight,
+					 const gchar *opt,
+					 struct rspamd_scan_result *metric_res,
+					 enum rspamd_symbol_insert_flags flags,
+					 bool *new_sym)
+{
+	struct rspamd_symbol_result *symbol_result = NULL;
+	gdouble final_score, *gr_score = NULL, next_gf = 1.0, diff;
+	struct rspamd_symbol *sdef;
+	struct rspamd_symbols_group *gr = NULL;
+	const ucl_object_t *mobj, *sobj;
+	gint max_shots = G_MAXINT, ret;
+	guint i;
+	khiter_t k;
+	gboolean single = !!(flags & RSPAMD_SYMBOL_INSERT_SINGLE);
+	gchar *sym_cpy;
+
+	if (!isfinite(weight)) {
+		msg_warn_task("detected %s score for symbol %s, replace it with zero",
+					  isnan(weight) ? "NaN" : "infinity", symbol);
+		weight = 0.0;
+	}
+
+	msg_debug_metric("want to insert symbol %s, initial weight %.2f",
+					 symbol, weight);
+
+	sdef = g_hash_table_lookup(task->cfg->symbols, symbol);
+	if (sdef == NULL) {
+		if (flags & RSPAMD_SYMBOL_INSERT_ENFORCE) {
+			final_score = 1.0 * weight; /* Enforce static weight to 1.0 */
+		}
+		else {
+			final_score = 0.0;
+		}
+
+		msg_debug_metric("no symbol definition for %s; final multiplier %.2f",
+						 symbol, final_score);
+	}
+	else {
+		if (sdef->cache_item) {
+			/* Check if we can insert this symbol at all */
+			if (!rspamd_symcache_is_item_allowed(task, sdef->cache_item, FALSE)) {
+				msg_debug_metric("symbol %s is not allowed to be inserted due to settings",
+								 symbol);
+				return NULL;
+			}
+		}
+
+		final_score = (*sdef->weight_ptr) * weight;
+
+		PTR_ARRAY_FOREACH(sdef->groups, i, gr)
+		{
+			k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr);
+
+			if (k == kh_end(metric_res->sym_groups)) {
+				k = kh_put(rspamd_symbols_group_hash, metric_res->sym_groups,
+						   gr, &ret);
+				kh_value(metric_res->sym_groups, k) = 0;
+			}
+		}
+
+		msg_debug_metric("metric multiplier for %s is %.2f",
+						 symbol, *sdef->weight_ptr);
+	}
+
+	if (task->settings) {
+		gdouble corr;
+		mobj = ucl_object_lookup(task->settings, "scores");
+
+		if (!mobj) {
+			/* Legacy */
+			mobj = task->settings;
+		}
+		else {
+			msg_debug_metric("found scores in the settings");
+		}
+
+		sobj = ucl_object_lookup(mobj, symbol);
+		if (sobj != NULL && ucl_object_todouble_safe(sobj, &corr)) {
+			msg_debug_metric("settings: changed weight of symbol %s from %.2f "
+							 "to %.2f * %.2f",
+							 symbol, final_score, corr, weight);
+			final_score = corr * weight;
+		}
+	}
+
+	k = kh_get(rspamd_symbols_hash, metric_res->symbols, symbol);
+	if (k != kh_end(metric_res->symbols)) {
+		/* Existing metric score */
+		symbol_result = kh_value(metric_res->symbols, k);
+		if (single) {
+			max_shots = 1;
+		}
+		else {
+			if (sdef) {
+				if (sdef->groups) {
+					PTR_ARRAY_FOREACH(sdef->groups, i, gr)
+					{
+						if (gr->flags & RSPAMD_SYMBOL_GROUP_ONE_SHOT) {
+							max_shots = 1;
+						}
+					}
+				}
+
+				max_shots = MIN(max_shots, sdef->nshots);
+			}
+			else {
+				max_shots = task->cfg->default_max_shots;
+			}
+		}
+
+		msg_debug_metric("nshots: %d for symbol %s", max_shots, symbol);
+
+		if (!single && (max_shots > 0 && (symbol_result->nshots >= max_shots))) {
+			single = TRUE;
+		}
+
+		symbol_result->nshots++;
+
+		if (opt) {
+			rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt));
+		}
+
+		/* Adjust diff */
+		if (!single) {
+			diff = final_score;
+			msg_debug_metric("symbol %s can be inserted multiple times: %.2f weight",
+							 symbol, diff);
+		}
+		else {
+			if (fabs(symbol_result->score) < fabs(final_score) &&
+				signbit(symbol_result->score) == signbit(final_score)) {
+				/* Replace less significant weight with a more significant one */
+				diff = final_score - symbol_result->score;
+				msg_debug_metric("symbol %s can be inserted single time;"
+								 " weight adjusted %.2f + %.2f",
+								 symbol, symbol_result->score, diff);
+			}
+			else {
+				diff = 0;
+			}
+		}
+
+		if (diff) {
+			/* Handle grow factor */
+			if (metric_res->grow_factor && diff > 0) {
+				diff *= metric_res->grow_factor;
+				next_gf *= task->cfg->grow_factor;
+			}
+			else if (diff > 0) {
+				next_gf = task->cfg->grow_factor;
+			}
+
+			msg_debug_metric("adjust grow factor to %.2f for symbol %s (%.2f final)",
+							 next_gf, symbol, diff);
+
+			if (sdef) {
+				PTR_ARRAY_FOREACH(sdef->groups, i, gr)
+				{
+					gdouble cur_diff;
+
+					k = kh_get(rspamd_symbols_group_hash,
+							   metric_res->sym_groups, gr);
+					g_assert(k != kh_end(metric_res->sym_groups));
+					gr_score = &kh_value(metric_res->sym_groups, k);
+					cur_diff = rspamd_check_group_score(task, symbol, gr,
+														gr_score, diff);
+
+					if (isnan(cur_diff)) {
+						/* Limit reached, do not add result */
+						msg_debug_metric(
+							"group limit %.2f is reached for %s when inserting symbol %s;"
+							" drop score %.2f",
+							*gr_score, gr->name, symbol, diff);
+
+						diff = NAN;
+						break;
+					}
+					else if (gr_score) {
+						*gr_score += cur_diff;
+
+						if (cur_diff < diff) {
+							/* Reduce */
+							msg_debug_metric(
+								"group limit %.2f is reached for %s when inserting symbol %s;"
+								" reduce score %.2f - %.2f",
+								*gr_score, gr->name, symbol, diff, cur_diff);
+							diff = cur_diff;
+						}
+					}
+				}
+			}
+
+			if (!isnan(diff)) {
+				metric_res->score += diff;
+				metric_res->grow_factor = next_gf;
+
+				if (single) {
+					msg_debug_metric("final score for single symbol %s = %.2f; %.2f diff",
+									 symbol, final_score, diff);
+					symbol_result->score = final_score;
+				}
+				else {
+					msg_debug_metric("increase final score for multiple symbol %s += %.2f = %.2f",
+									 symbol, symbol_result->score, diff);
+					symbol_result->score += diff;
+				}
+			}
+		}
+	}
+	else {
+		/* New result */
+		if (new_sym) {
+			*new_sym = true;
+		}
+
+		sym_cpy = rspamd_mempool_strdup(task->task_pool, symbol);
+		k = kh_put(rspamd_symbols_hash, metric_res->symbols,
+				   sym_cpy, &ret);
+		g_assert(ret > 0);
+		symbol_result = rspamd_mempool_alloc0(task->task_pool, sizeof(*symbol_result));
+		kh_value(metric_res->symbols, k) = symbol_result;
+
+		/* Handle grow factor */
+		if (metric_res->grow_factor && final_score > 0) {
+			final_score *= metric_res->grow_factor;
+			next_gf *= task->cfg->grow_factor;
+		}
+		else if (final_score > 0) {
+			next_gf = task->cfg->grow_factor;
+		}
+
+		msg_debug_metric("adjust grow factor to %.2f for symbol %s (%.2f final)",
+						 next_gf, symbol, final_score);
+
+		symbol_result->name = sym_cpy;
+		symbol_result->sym = sdef;
+		symbol_result->nshots = 1;
+
+		if (sdef) {
+			/* Check group limits */
+			PTR_ARRAY_FOREACH(sdef->groups, i, gr)
+			{
+				gdouble cur_score;
+
+				k = kh_get(rspamd_symbols_group_hash, metric_res->sym_groups, gr);
+				g_assert(k != kh_end(metric_res->sym_groups));
+				gr_score = &kh_value(metric_res->sym_groups, k);
+				cur_score = rspamd_check_group_score(task, symbol, gr,
+													 gr_score, final_score);
+
+				if (isnan(cur_score)) {
+					/* Limit reached, do not add result */
+					msg_debug_metric(
+						"group limit %.2f is reached for %s when inserting symbol %s;"
+						" drop score %.2f",
+						*gr_score, gr->name, symbol, final_score);
+					final_score = NAN;
+					break;
+				}
+				else if (gr_score) {
+					*gr_score += cur_score;
+
+					if (cur_score < final_score) {
+						/* Reduce */
+						msg_debug_metric(
+							"group limit %.2f is reached for %s when inserting symbol %s;"
+							" reduce score %.2f - %.2f",
+							*gr_score, gr->name, symbol, final_score, cur_score);
+						final_score = cur_score;
+					}
+				}
+			}
+		}
+
+		if (!isnan(final_score)) {
+			const double epsilon = DBL_EPSILON;
+
+			metric_res->score += final_score;
+			metric_res->grow_factor = next_gf;
+			symbol_result->score = final_score;
+
+			if (final_score > epsilon) {
+				metric_res->npositive++;
+				metric_res->positive_score += final_score;
+			}
+			else if (final_score < -epsilon) {
+				metric_res->nnegative++;
+				metric_res->negative_score += fabs(final_score);
+			}
+		}
+		else {
+			symbol_result->score = 0;
+		}
+
+		if (opt) {
+			rspamd_task_add_result_option(task, symbol_result, opt, strlen(opt));
+		}
+	}
+
+	msg_debug_metric("final insertion for symbol %s, score %.2f, factor: %f",
+					 symbol,
+					 symbol_result->score,
+					 final_score);
+	metric_res->nresults++;
+
+	return symbol_result;
+}
+
+struct rspamd_symbol_result *
+rspamd_task_insert_result_full(struct rspamd_task *task,
+							   const gchar *symbol,
+							   double weight,
+							   const gchar *opt,
+							   enum rspamd_symbol_insert_flags flags,
+							   struct rspamd_scan_result *result)
+{
+	struct rspamd_symbol_result *symbol_result = NULL, *ret = NULL;
+	struct rspamd_scan_result *mres;
+
+	/*
+	 * We allow symbols to be inserted for skipped tasks, as it might be a
+	 * race condition before some symbol is finished and skip flag being set.
+	 */
+	if (!RSPAMD_TASK_IS_SKIPPED(task) && (task->processed_stages & (RSPAMD_TASK_STAGE_IDEMPOTENT >> 1))) {
+		msg_err_task("cannot insert symbol %s on idempotent phase",
+					 symbol);
+
+		return NULL;
+	}
+
+	if (result == NULL) {
+		/* Insert everywhere */
+		DL_FOREACH(task->result, mres)
+		{
+			if (mres->symbol_cbref != -1) {
+				/* Check if we can insert this symbol to this symbol result */
+				GError *err = NULL;
+				lua_State *L = (lua_State *) task->cfg->lua_state;
+
+				if (!rspamd_lua_universal_pcall(L, mres->symbol_cbref,
+												G_STRLOC, 1, "uss", &err,
+												"rspamd{task}", task, symbol, mres->name ? mres->name : "default")) {
+					msg_warn_task("cannot call for symbol_cbref for result %s: %e",
+								  mres->name ? mres->name : "default", err);
+					g_error_free(err);
+
+					continue;
+				}
+				else {
+					if (!lua_toboolean(L, -1)) {
+						/* Skip symbol */
+						msg_debug_metric("skip symbol %s for result %s due to Lua return value",
+										 symbol, mres->name);
+						lua_pop(L, 1); /* Remove result */
+
+						continue;
+					}
+
+					lua_pop(L, 1); /* Remove result */
+				}
+			}
+
+			bool new_symbol = false;
+
+			symbol_result = insert_metric_result(task,
+												 symbol,
+												 weight,
+												 opt,
+												 mres,
+												 flags,
+												 &new_symbol);
+
+			if (mres->name == NULL) {
+				/* Default result */
+				ret = symbol_result;
+
+				/* Process cache item */
+				if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) {
+					rspamd_symcache_inc_frequency(task->cfg->cache,
+												  symbol_result->sym->cache_item,
+												  symbol_result->sym->name);
+				}
+			}
+			else if (new_symbol) {
+				/* O(N) but we normally don't have any shadow results */
+				LL_APPEND(ret, symbol_result);
+			}
+		}
+	}
+	else {
+		/* Specific insertion */
+		symbol_result = insert_metric_result(task,
+											 symbol,
+											 weight,
+											 opt,
+											 result,
+											 flags,
+											 NULL);
+		ret = symbol_result;
+
+		if (result->name == NULL) {
+			/* Process cache item */
+			if (symbol_result && task->cfg->cache && symbol_result->sym && symbol_result->nshots == 1) {
+				rspamd_symcache_inc_frequency(task->cfg->cache,
+											  symbol_result->sym->cache_item,
+											  symbol_result->sym->name);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static gchar *
+rspamd_task_option_safe_copy(struct rspamd_task *task,
+							 const gchar *val,
+							 gsize vlen,
+							 gsize *outlen)
+{
+	const gchar *p, *end;
+
+	p = val;
+	end = val + vlen;
+	vlen = 0; /* Reuse */
+
+	while (p < end) {
+		if (*p & 0x80) {
+			UChar32 uc;
+			gint off = 0;
+
+			U8_NEXT(p, off, end - p, uc);
+
+			if (uc > 0) {
+				if (u_isprint(uc)) {
+					vlen += off;
+				}
+				else {
+					/* We will replace it with 0xFFFD */
+					vlen += MAX(off, 3);
+				}
+			}
+			else {
+				vlen += MAX(off, 3);
+			}
+
+			p += off;
+		}
+		else if (!g_ascii_isprint(*p)) {
+			/* Another 0xFFFD */
+			vlen += 3;
+			p++;
+		}
+		else {
+			p++;
+			vlen++;
+		}
+	}
+
+	gchar *dest, *d;
+
+	dest = rspamd_mempool_alloc(task->task_pool, vlen + 1);
+	d = dest;
+	p = val;
+
+	while (p < end) {
+		if (*p & 0x80) {
+			UChar32 uc;
+			gint off = 0;
+
+			U8_NEXT(p, off, end - p, uc);
+
+			if (uc > 0) {
+				if (u_isprint(uc)) {
+					memcpy(d, p, off);
+					d += off;
+				}
+				else {
+					/* We will replace it with 0xFFFD */
+					*d++ = '\357';
+					*d++ = '\277';
+					*d++ = '\275';
+				}
+			}
+			else {
+				*d++ = '\357';
+				*d++ = '\277';
+				*d++ = '\275';
+			}
+
+			p += off;
+		}
+		else if (!g_ascii_isprint(*p)) {
+			/* Another 0xFFFD */
+			*d++ = '\357';
+			*d++ = '\277';
+			*d++ = '\275';
+			p++;
+		}
+		else {
+			*d++ = *p++;
+		}
+	}
+
+	*d = '\0';
+	*(outlen) = d - dest;
+
+	return dest;
+}
+
+gboolean
+rspamd_task_add_result_option(struct rspamd_task *task,
+							  struct rspamd_symbol_result *s,
+							  const gchar *val,
+							  gsize vlen)
+{
+	struct rspamd_symbol_option *opt, srch;
+	gboolean ret = FALSE;
+	gchar *opt_cpy = NULL;
+	gsize cpy_len;
+	khiter_t k;
+	gint r;
+	struct rspamd_symbol_result *cur;
+
+	if (s && val) {
+		/*
+		 * Here we assume that this function is all the time called with the
+		 * symbol from the default result, not some shadow result, or
+		 * the option insertion will be wrong
+		 */
+		LL_FOREACH(s, cur)
+		{
+			if (cur->opts_len < 0) {
+				/* Cannot add more options, give up */
+				msg_debug_task("cannot add more options to symbol %s when adding option %s",
+							   cur->name, val);
+				ret = FALSE;
+				continue;
+			}
+
+			if (!cur->options) {
+				cur->options = kh_init(rspamd_options_hash);
+			}
+
+			if (vlen + cur->opts_len > task->cfg->max_opts_len) {
+				/* Add truncated option */
+				msg_info_task("cannot add more options to symbol %s when adding option %s",
+							  cur->name, val);
+				val = "...";
+				vlen = 3;
+				cur->opts_len = -1;
+			}
+
+			if (!(cur->sym && (cur->sym->flags & RSPAMD_SYMBOL_FLAG_ONEPARAM))) {
+
+				srch.option = (gchar *) val;
+				srch.optlen = vlen;
+				k = kh_get(rspamd_options_hash, cur->options, &srch);
+
+				if (k == kh_end(cur->options)) {
+					opt_cpy = rspamd_task_option_safe_copy(task, val, vlen, &cpy_len);
+					if (cpy_len != vlen) {
+						srch.option = (gchar *) opt_cpy;
+						srch.optlen = cpy_len;
+						k = kh_get(rspamd_options_hash, cur->options, &srch);
+					}
+					/* Append new options */
+					if (k == kh_end(cur->options)) {
+						opt = rspamd_mempool_alloc0(task->task_pool, sizeof(*opt));
+						opt->optlen = cpy_len;
+						opt->option = opt_cpy;
+
+						kh_put(rspamd_options_hash, cur->options, opt, &r);
+						DL_APPEND(cur->opts_head, opt);
+
+						if (s == cur) {
+							ret = TRUE;
+						}
+					}
+				}
+			}
+			else {
+				/* Skip addition */
+				if (s == cur) {
+					ret = FALSE;
+				}
+			}
+
+			if (ret && cur->opts_len >= 0) {
+				cur->opts_len += vlen;
+			}
+		}
+	}
+	else if (!val) {
+		ret = TRUE;
+	}
+
+	task->result->nresults++;
+
+	return ret;
+}
+
+struct rspamd_action_config *
+rspamd_find_action_config_for_action(struct rspamd_scan_result *scan_result,
+									 struct rspamd_action *act)
+{
+	for (unsigned int i = 0; i < scan_result->nactions; i++) {
+		struct rspamd_action_config *cur = &scan_result->actions_config[i];
+
+		if (act == cur->action) {
+			return cur;
+		}
+	}
+
+	return NULL;
+}
+
+struct rspamd_action *
+rspamd_check_action_metric(struct rspamd_task *task,
+						   struct rspamd_passthrough_result **ppr,
+						   struct rspamd_scan_result *scan_result)
+{
+	struct rspamd_action_config *action_lim,
+		*noaction = NULL;
+	struct rspamd_action *selected_action = NULL, *least_action = NULL;
+	struct rspamd_passthrough_result *pr, *sel_pr = NULL;
+	double max_score = -(G_MAXDOUBLE), sc;
+	gboolean seen_least = FALSE;
+
+	if (scan_result == NULL) {
+		scan_result = task->result;
+	}
+
+	if (scan_result->passthrough_result != NULL) {
+		DL_FOREACH(scan_result->passthrough_result, pr)
+		{
+			struct rspamd_action_config *act_config =
+				rspamd_find_action_config_for_action(scan_result, pr->action);
+
+			/* Skip disabled actions */
+			if (act_config && (act_config->flags & RSPAMD_ACTION_RESULT_DISABLED)) {
+				continue;
+			}
+
+			if (!seen_least || !(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
+				sc = pr->target_score;
+				selected_action = pr->action;
+
+				if (!(pr->flags & RSPAMD_PASSTHROUGH_LEAST)) {
+					if (!isnan(sc)) {
+						if (pr->action->action_type == METRIC_ACTION_NOACTION) {
+							scan_result->score = MIN(sc, scan_result->score);
+						}
+						else {
+							scan_result->score = sc;
+						}
+					}
+
+					if (ppr) {
+						*ppr = pr;
+					}
+
+					return selected_action;
+				}
+				else {
+					seen_least = true;
+					least_action = selected_action;
+
+					if (isnan(sc)) {
+
+						if (selected_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
+							/*
+							 * In this case, we have a passthrough action that
+							 * is `least` action, however, there is no threshold
+							 * on it.
+							 *
+							 * Hence, we imply the following logic:
+							 *
+							 * - we leave score unchanged
+							 * - we apply passthrough no threshold action unless
+							 *   score based action *is not* reject, otherwise
+							 *   we apply reject action
+							 */
+						}
+						else {
+							sc = selected_action->threshold;
+							max_score = sc;
+							sel_pr = pr;
+						}
+					}
+					else {
+						max_score = sc;
+						sel_pr = pr;
+					}
+				}
+			}
+		}
+	}
+
+	/*
+	 * Select result by score
+	 */
+	for (size_t i = scan_result->nactions - 1; i != (size_t) -1; i--) {
+		action_lim = &scan_result->actions_config[i];
+		sc = action_lim->cur_limit;
+
+		if (action_lim->action->action_type == METRIC_ACTION_NOACTION) {
+			noaction = action_lim;
+		}
+
+		if ((action_lim->flags & (RSPAMD_ACTION_RESULT_DISABLED | RSPAMD_ACTION_RESULT_NO_THRESHOLD))) {
+			continue;
+		}
+
+		if (isnan(sc) ||
+			(action_lim->action->flags & (RSPAMD_ACTION_NO_THRESHOLD | RSPAMD_ACTION_HAM))) {
+			continue;
+		}
+
+		if (scan_result->score >= sc && sc > max_score) {
+			selected_action = action_lim->action;
+			max_score = sc;
+		}
+	}
+
+	if (selected_action == NULL) {
+		selected_action = noaction->action;
+	}
+
+	if (selected_action) {
+
+		if (seen_least) {
+			/* Adjust least action */
+			if (least_action->flags & RSPAMD_ACTION_NO_THRESHOLD) {
+				if (selected_action->action_type != METRIC_ACTION_REJECT &&
+					selected_action->action_type != METRIC_ACTION_DISCARD) {
+					/* Override score based action with least action */
+					selected_action = least_action;
+
+					if (ppr) {
+						*ppr = sel_pr;
+					}
+				}
+			}
+			else {
+				/* Adjust score if needed */
+				if (max_score > scan_result->score) {
+					if (ppr) {
+						*ppr = sel_pr;
+					}
+
+					scan_result->score = max_score;
+				}
+			}
+		}
+
+		return selected_action;
+	}
+
+	if (ppr) {
+		*ppr = sel_pr;
+	}
+
+	return noaction->action;
+}
+
+struct rspamd_symbol_result *
+rspamd_task_find_symbol_result(struct rspamd_task *task, const char *sym,
+							   struct rspamd_scan_result *result)
+{
+	struct rspamd_symbol_result *res = NULL;
+	khiter_t k;
+
+	if (result == NULL) {
+		/* Use default result */
+		result = task->result;
+	}
+
+	k = kh_get(rspamd_symbols_hash, result->symbols, sym);
+
+	if (k != kh_end(result->symbols)) {
+		res = kh_value(result->symbols, k);
+	}
+
+	return res;
+}
+
+struct rspamd_symbol_result *rspamd_task_remove_symbol_result(
+	struct rspamd_task *task,
+	const gchar *symbol,
+	struct rspamd_scan_result *result)
+{
+	struct rspamd_symbol_result *res = NULL;
+	khiter_t k;
+
+	if (result == NULL) {
+		/* Use default result */
+		result = task->result;
+	}
+
+	k = kh_get(rspamd_symbols_hash, result->symbols, symbol);
+
+	if (k != kh_end(result->symbols)) {
+		res = kh_value(result->symbols, k);
+
+		if (!isnan(res->score)) {
+			/* Remove score from the result */
+			result->score -= res->score;
+
+			/* Also check the group limit */
+			if (result->sym_groups && res->sym) {
+				struct rspamd_symbol_group *gr;
+				gint i;
+				khiter_t k_groups;
+
+				PTR_ARRAY_FOREACH(res->sym->groups, i, gr)
+				{
+					gdouble *gr_score;
+
+					k_groups = kh_get(rspamd_symbols_group_hash,
+									  result->sym_groups, gr);
+
+					if (k_groups != kh_end(result->sym_groups)) {
+						gr_score = &kh_value(result->sym_groups, k_groups);
+
+						if (gr_score) {
+							*gr_score -= res->score;
+						}
+					}
+				}
+			}
+		}
+
+		kh_del(rspamd_symbols_hash, result->symbols, k);
+	}
+	else {
+		return NULL;
+	}
+
+	return res;
+}
+
+void rspamd_task_symbol_result_foreach(struct rspamd_task *task,
+									   struct rspamd_scan_result *result, GHFunc func,
+									   gpointer ud)
+{
+	const gchar *kk;
+	struct rspamd_symbol_result *res;
+
+	if (result == NULL) {
+		/* Use default result */
+		result = task->result;
+	}
+
+	if (func) {
+		kh_foreach(result->symbols, kk, res, {
+			func((gpointer) kk, (gpointer) res, ud);
+		});
+	}
+}
+
+struct rspamd_scan_result *
+rspamd_find_metric_result(struct rspamd_task *task,
+						  const gchar *name)
+{
+	struct rspamd_scan_result *res;
+
+	if (name == NULL || strcmp(name, "default") == 0) {
+		return task->result;
+	}
+
+	DL_FOREACH(task->result, res)
+	{
+		if (res->name && strcmp(res->name, name) == 0) {
+			return res;
+		}
+	}
+
+	return NULL;
+}
diff --git a/src/libmime/scan_result.h b/src/libmime/scan_result.h
new file mode 100644
index 0000000..46c2de8
--- /dev/null
+++ b/src/libmime/scan_result.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file scan_result.h
+ * Scan result holder
+ */
+
+#ifndef RSPAMD_SCAN_RESULT_H
+#define RSPAMD_SCAN_RESULT_H
+
+#include "config.h"
+#include "rspamd_symcache.h"
+#include "task.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct rspamd_settings;
+struct rspamd_classifier_config;
+
+struct rspamd_symbol_option {
+	gchar *option;
+	gsize optlen;
+	struct rspamd_symbol_option *prev, *next;
+};
+
+enum rspamd_symbol_result_flags {
+	RSPAMD_SYMBOL_RESULT_NORMAL = 0,
+	RSPAMD_SYMBOL_RESULT_IGNORED = (1 << 0)
+};
+
+struct kh_rspamd_options_hash_s;
+
+/**
+ * Rspamd symbol
+ */
+struct rspamd_symbol_result {
+	double score;                             /**< symbol's score							*/
+	struct kh_rspamd_options_hash_s *options; /**< list of symbol's options				*/
+	struct rspamd_symbol_option *opts_head;   /**< head of linked list of options			*/
+	const gchar *name;
+	struct rspamd_symbol *sym; /**< symbol configuration					*/
+	gssize opts_len;           /**< total size of all options (negative if truncated option is added) */
+	guint nshots;
+	int flags;
+	struct rspamd_symbol_result *next; /**< for shadow results */
+};
+
+
+#define RSPAMD_PASSTHROUGH_NORMAL 1
+#define RSPAMD_PASSTHROUGH_LOW 0
+#define RSPAMD_PASSTHROUGH_HIGH 2
+#define RSPAMD_PASSTHROUGH_CRITICAL 3
+
+#define RSPAMD_PASSTHROUGH_LEAST (1u << 0u)
+#define RSPAMD_PASSTHROUGH_NO_SMTP_MESSAGE (1u << 1u)
+#define RSPAMD_PASSTHROUGH_PROCESS_ALL (1u << 2u)
+
+struct rspamd_passthrough_result {
+	struct rspamd_action *action;
+	guint priority;
+	guint flags;
+	double target_score;
+	const gchar *message;
+	const gchar *module;
+	struct rspamd_passthrough_result *prev, *next;
+};
+
+
+enum rspamd_action_config_flags {
+	RSPAMD_ACTION_RESULT_DEFAULT = 0,
+	RSPAMD_ACTION_RESULT_NO_THRESHOLD = (1u << 0u),
+	RSPAMD_ACTION_RESULT_DISABLED = (1u << 1u),
+};
+struct rspamd_action_config {
+	gdouble cur_limit;
+	int flags;
+	struct rspamd_action *action;
+};
+
+struct kh_rspamd_symbols_hash_s;
+struct kh_rspamd_symbols_group_hash_s;
+
+
+struct rspamd_scan_result {
+	double score;       /**< total score							*/
+	double grow_factor; /**< current grow factor					*/
+	struct rspamd_passthrough_result *passthrough_result;
+	double positive_score;
+	double negative_score;
+	struct kh_rspamd_symbols_hash_s *symbols;          /**< symbols of metric						*/
+	struct kh_rspamd_symbols_group_hash_s *sym_groups; /**< groups of symbols						*/
+	struct rspamd_action_config *actions_config;
+	const gchar *name;        /**< for named results, NULL is the default result */
+	struct rspamd_task *task; /**< back reference */
+	gint symbol_cbref;        /**< lua function that defines if a symbol can be inserted, -1 if unused */
+	guint nactions;
+	guint npositive;
+	guint nnegative;
+	guint nresults;                         /**< all results: positive, negative, passthrough etc */
+	guint nresults_postfilters;             /**< how many results are there before postfilters stage */
+	struct rspamd_scan_result *prev, *next; /**< double linked list of results */
+};
+
+/**
+ * Create or return existing result for the specified metric name
+ * @param task task object
+ * @return metric result or NULL if metric `name` has not been found
+ */
+struct rspamd_scan_result *rspamd_create_metric_result(struct rspamd_task *task,
+													   const gchar *name, gint lua_sym_cbref);
+
+/**
+ * Find result with a specific name (NULL means the default result)
+ * @param task
+ * @param name
+ * @return
+ */
+struct rspamd_scan_result *rspamd_find_metric_result(struct rspamd_task *task,
+													 const gchar *name);
+
+/**
+ * Adds a new passthrough result to a task
+ * @param task
+ * @param action
+ * @param priority
+ * @param target_score
+ * @param message
+ * @param module
+ */
+bool rspamd_add_passthrough_result(struct rspamd_task *task,
+								   struct rspamd_action *action, guint priority,
+								   double target_score, const gchar *message,
+								   const gchar *module, guint flags,
+								   struct rspamd_scan_result *scan_result);
+
+enum rspamd_symbol_insert_flags {
+	RSPAMD_SYMBOL_INSERT_DEFAULT = 0,
+	RSPAMD_SYMBOL_INSERT_SINGLE = (1 << 0),
+	RSPAMD_SYMBOL_INSERT_ENFORCE = (1 << 1),
+};
+
+/**
+ * Insert a result to task
+ * @param task worker's task that present message from user
+ * @param metric_name metric's name to which we need to insert result
+ * @param symbol symbol to insert
+ * @param weight numeric weight for symbol
+ * @param opts list of symbol's options
+ */
+struct rspamd_symbol_result *rspamd_task_insert_result_full(struct rspamd_task *task,
+															const gchar *symbol,
+															double weight,
+															const gchar *opts,
+															enum rspamd_symbol_insert_flags flags,
+															struct rspamd_scan_result *result);
+
+#define rspamd_task_insert_result_single(task, symbol, weight, opts) \
+	rspamd_task_insert_result_full((task), (symbol), (weight), (opts), RSPAMD_SYMBOL_INSERT_SINGLE, NULL)
+#define rspamd_task_insert_result(task, symbol, weight, opts) \
+	rspamd_task_insert_result_full((task), (symbol), (weight), (opts), RSPAMD_SYMBOL_INSERT_DEFAULT, NULL)
+
+/**
+ * Removes a symbol from a specific symbol result
+ * @param task
+ * @param symbol
+ * @param result
+ * @return
+ */
+struct rspamd_symbol_result *rspamd_task_remove_symbol_result(
+	struct rspamd_task *task,
+	const gchar *symbol,
+	struct rspamd_scan_result *result);
+/**
+ * Adds new option to symbol
+ * @param task
+ * @param s
+ * @param opt
+ */
+gboolean rspamd_task_add_result_option(struct rspamd_task *task,
+									   struct rspamd_symbol_result *s,
+									   const gchar *opt,
+									   gsize vlen);
+
+/**
+ * Finds symbol result
+ * @param task
+ * @param sym
+ * @return
+ */
+struct rspamd_symbol_result *
+rspamd_task_find_symbol_result(struct rspamd_task *task, const char *sym,
+							   struct rspamd_scan_result *result);
+
+/**
+ * Compatibility function to iterate on symbols hash
+ * @param task
+ * @param func
+ * @param ud
+ */
+void rspamd_task_symbol_result_foreach(struct rspamd_task *task,
+									   struct rspamd_scan_result *result,
+									   GHFunc func,
+									   gpointer ud);
+
+/**
+ * Default consolidation function for metric, it get all symbols and multiply symbol
+ * weight by some factor that is specified in config. Default factor is 1.
+ * @param task worker's task that present message from user
+ * @param metric_name name of metric
+ * @return result metric weight
+ */
+double rspamd_factor_consolidation_func(struct rspamd_task *task,
+										const gchar *metric_name,
+										const gchar *unused);
+
+
+/**
+ * Check thresholds and return action for a task
+ * @param task
+ * @return
+ */
+struct rspamd_action *rspamd_check_action_metric(struct rspamd_task *task,
+												 struct rspamd_passthrough_result **ppr,
+												 struct rspamd_scan_result *scan_result);
+
+struct rspamd_action_config *rspamd_find_action_config_for_action(struct rspamd_scan_result *scan_result,
+																  struct rspamd_action *act);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libmime/scan_result_private.h b/src/libmime/scan_result_private.h
new file mode 100644
index 0000000..cf0c0c5
--- /dev/null
+++ b/src/libmime/scan_result_private.h
@@ -0,0 +1,55 @@
+//
+// Created by Vsevolod Stakhov on 2019-01-14.
+//
+
+#ifndef RSPAMD_SCAN_RESULT_PRIVATE_H
+#define RSPAMD_SCAN_RESULT_PRIVATE_H
+
+#include "scan_result.h"
+#include "contrib/libucl/khash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RSPAMD_OPTS_SEED 0x9f1f608628a4fefbULL
+#define rspamd_symopt_hash(opt) (rspamd_cryptobox_fast_hash( \
+	((struct rspamd_symbol_option *) opt)->option,           \
+	((struct rspamd_symbol_option *) opt)->optlen, RSPAMD_OPTS_SEED))
+static inline bool
+rspamd_symopt_equal(const struct rspamd_symbol_option *o1,
+					const struct rspamd_symbol_option *o2)
+{
+	if (o1->optlen == o2->optlen) {
+		return (memcmp(o1->option, o2->option, o1->optlen) == 0);
+	}
+
+	return false;
+}
+
+KHASH_INIT(rspamd_options_hash, struct rspamd_symbol_option *, char,
+		   0, rspamd_symopt_hash, rspamd_symopt_equal);
+/**
+ * Result of metric processing
+ */
+KHASH_MAP_INIT_STR(rspamd_symbols_hash, struct rspamd_symbol_result *);
+#if UINTPTR_MAX <= UINT_MAX
+/* 32 bit */
+#define rspamd_ptr_hash_func(key) (khint32_t)(((uintptr_t) (key)) >> 1)
+#else
+/* likely 64 bit */
+#define rspamd_ptr_hash_func(key) (khint32_t)(((uintptr_t) (key)) >> 3)
+#endif
+#define rspamd_ptr_equal_func(a, b) ((a) == (b))
+KHASH_INIT(rspamd_symbols_group_hash,
+		   void *,
+		   double,
+		   1,
+		   rspamd_ptr_hash_func,
+		   rspamd_ptr_equal_func);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//RSPAMD_SCAN_RESULT_PRIVATE_H
diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h
new file mode 100644
index 0000000..e188b63
--- /dev/null
+++ b/src/libmime/smtp_parsers.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBMIME_SMTP_PARSERS_H_
+#define SRC_LIBMIME_SMTP_PARSERS_H_
+
+#include "config.h"
+#include "email_addr.h"
+#include "content_type.h"
+#include "task.h"
+#include "message.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int rspamd_smtp_addr_parse(const char *data, size_t len,
+						   struct rspamd_email_address *addr);
+
+gboolean rspamd_content_disposition_parser(const char *data, size_t len,
+										   struct rspamd_content_disposition *cd,
+										   rspamd_mempool_t *pool);
+
+gboolean
+rspamd_rfc2047_parser(const gchar *in, gsize len, gint *pencoding,
+					  const gchar **charset, gsize *charset_len,
+					  const gchar **encoded, gsize *encoded_len);
+
+rspamd_inet_addr_t *rspamd_parse_smtp_ip(const char *data, size_t len,
+										 rspamd_mempool_t *pool);
+
+guint64 rspamd_parse_smtp_date(const unsigned char *data, size_t len, GError **err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-10 21:30:40 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-10 21:30:40 +0000
commit	133a45c109da5310add55824db21af5239951f93 (patch)
tree	ba6ac4c0a950a0dda56451944315d66409923918 /src/libmime
parent	Initial commit. (diff)
download	rspamd-upstream.tar.xz rspamd-upstream.zip