summaryrefslogtreecommitdiffstats
path: root/src/libmime/message.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/libmime/message.h')
-rw-r--r--src/libmime/message.h239
1 files changed, 239 insertions, 0 deletions
diff --git a/src/libmime/message.h b/src/libmime/message.h
new file mode 100644
index 0000000..52dedab
--- /dev/null
+++ b/src/libmime/message.h
@@ -0,0 +1,239 @@
+/**
+ * @file message.h
+ * Message processing functions and structures
+ */
+
+#ifndef RSPAMD_MESSAGE_H
+#define RSPAMD_MESSAGE_H
+
+#include "config.h"
+
+#include "libmime/email_addr.h"
+#include "libutil/addr.h"
+#include "libcryptobox/cryptobox.h"
+#include "libmime/mime_headers.h"
+#include "libmime/content_type.h"
+#include "libserver/url.h"
+#include "libutil/ref.h"
+#include "libutil/str_util.h"
+
+#include <unicode/uchar.h>
+#include <unicode/utext.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+struct controller_session;
+struct rspamd_image;
+struct rspamd_archive;
+
+enum rspamd_mime_part_flags {
+ RSPAMD_MIME_PART_ATTACHEMENT = (1u << 1u),
+ RSPAMD_MIME_PART_BAD_CTE = (1u << 4u),
+ RSPAMD_MIME_PART_MISSING_CTE = (1u << 5u),
+ RSPAMD_MIME_PART_NO_TEXT_EXTRACTION = (1u << 6u),
+};
+
+enum rspamd_mime_part_type {
+ RSPAMD_MIME_PART_UNDEFINED = 0,
+ RSPAMD_MIME_PART_MULTIPART,
+ RSPAMD_MIME_PART_MESSAGE,
+ RSPAMD_MIME_PART_TEXT,
+ RSPAMD_MIME_PART_ARCHIVE,
+ RSPAMD_MIME_PART_IMAGE,
+ RSPAMD_MIME_PART_CUSTOM_LUA
+};
+
+#define IS_PART_MULTIPART(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MULTIPART))
+#define IS_PART_TEXT(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_TEXT))
+#define IS_PART_MESSAGE(part) ((part) && ((part)->part_type == RSPAMD_MIME_PART_MESSAGE))
+
+enum rspamd_cte {
+ RSPAMD_CTE_UNKNOWN = 0,
+ RSPAMD_CTE_7BIT = 1,
+ RSPAMD_CTE_8BIT = 2,
+ RSPAMD_CTE_QP = 3,
+ RSPAMD_CTE_B64 = 4,
+ RSPAMD_CTE_UUE = 5,
+};
+
+struct rspamd_mime_text_part;
+
+struct rspamd_mime_multipart {
+ GPtrArray *children;
+ rspamd_ftok_t boundary;
+};
+
+enum rspamd_lua_specific_type {
+ RSPAMD_LUA_PART_TEXT,
+ RSPAMD_LUA_PART_STRING,
+ RSPAMD_LUA_PART_TABLE,
+ RSPAMD_LUA_PART_FUNCTION,
+ RSPAMD_LUA_PART_UNKNOWN,
+};
+
+struct rspamd_lua_specific_part {
+ gint cbref;
+ enum rspamd_lua_specific_type type;
+};
+
+struct rspamd_mime_part {
+ struct rspamd_content_type *ct;
+ struct rspamd_content_type *detected_ct;
+ gchar *detected_type;
+ gchar *detected_ext;
+ struct rspamd_content_disposition *cd;
+ rspamd_ftok_t raw_data;
+ rspamd_ftok_t parsed_data;
+ struct rspamd_mime_part *parent_part;
+
+ struct rspamd_mime_header *headers_order;
+ struct rspamd_mime_headers_table *raw_headers;
+ GPtrArray *urls;
+
+ gchar *raw_headers_str;
+ gsize raw_headers_len;
+
+ enum rspamd_cte cte;
+ guint flags;
+ enum rspamd_mime_part_type part_type;
+ guint part_number;
+
+ union {
+ struct rspamd_mime_multipart *mp;
+ struct rspamd_mime_text_part *txt;
+ struct rspamd_image *img;
+ struct rspamd_archive *arch;
+ struct rspamd_lua_specific_part lua_specific;
+ } specific;
+
+ guchar digest[rspamd_cryptobox_HASHBYTES];
+};
+
+#define RSPAMD_MIME_TEXT_PART_FLAG_UTF (1 << 0)
+#define RSPAMD_MIME_TEXT_PART_FLAG_EMPTY (1 << 1)
+#define RSPAMD_MIME_TEXT_PART_FLAG_HTML (1 << 2)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW (1 << 3)
+#define RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED (1 << 4)
+#define RSPAMD_MIME_TEXT_PART_ATTACHMENT (1 << 5)
+
+#define IS_TEXT_PART_EMPTY(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_EMPTY)
+#define IS_TEXT_PART_UTF(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_UTF)
+#define IS_TEXT_PART_HTML(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_FLAG_HTML)
+#define IS_TEXT_PART_ATTACHMENT(part) ((part)->flags & RSPAMD_MIME_TEXT_PART_ATTACHMENT)
+
+
+struct rspamd_mime_text_part {
+ const gchar *language;
+ GPtrArray *languages;
+ const gchar *real_charset;
+
+ /* Raw data in native encoding */
+ rspamd_ftok_t raw;
+ rspamd_ftok_t parsed; /* decoded from mime encodings */
+
+ /* UTF8 content */
+ rspamd_ftok_t utf_content; /* utf8 encoded processed content */
+ GByteArray *utf_raw_content; /* utf raw content */
+ GByteArray *utf_stripped_content; /* utf content with no newlines */
+ GArray *normalized_hashes; /* Array of guint64 */
+ GArray *utf_words; /* Array of rspamd_stat_token_t */
+ UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
+
+ GPtrArray *newlines; /**< positions of newlines in text, relative to content*/
+ void *html;
+ GList *exceptions; /**< list of offsets of urls */
+ struct rspamd_mime_part *mime_part;
+
+ guint flags;
+ guint nlines;
+ guint spaces;
+ guint nwords;
+ guint non_ascii_chars;
+ guint ascii_chars;
+ guint double_spaces;
+ guint non_spaces;
+ guint empty_lines;
+ guint capital_letters;
+ guint numeric_characters;
+ guint unicode_scripts;
+};
+
+struct rspamd_message_raw_headers_content {
+ const gchar *begin;
+ gsize len;
+ const gchar *body_start;
+};
+
+struct rspamd_message {
+ const gchar *message_id;
+ gchar *subject;
+
+ GPtrArray *parts; /**< list of parsed parts */
+ GPtrArray *text_parts; /**< list of text parts */
+ struct rspamd_message_raw_headers_content raw_headers_content;
+ void *received_headers; /**< list of received headers */
+ khash_t(rspamd_url_hash) * urls;
+ struct rspamd_mime_headers_table *raw_headers; /**< list of raw headers */
+ struct rspamd_mime_header *headers_order; /**< order of raw headers */
+ struct rspamd_task *task;
+ GPtrArray *rcpt_mime;
+ GPtrArray *from_mime;
+ guchar digest[16];
+ enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers */
+ ref_entry_t ref;
+};
+
+#define MESSAGE_FIELD(task, field) ((task)->message->field)
+#define MESSAGE_FIELD_CHECK(task, field) ((task)->message ? (task)->message->field : (__typeof__((task)->message->field)) NULL)
+
+/**
+ * Parse and pre-process mime message
+ * @param task worker_task object
+ * @return
+ */
+gboolean rspamd_message_parse(struct rspamd_task *task);
+
+/**
+ * Process content in task (e.g. HTML parsing)
+ * @param task
+ */
+void rspamd_message_process(struct rspamd_task *task);
+
+
+/**
+ * Converts string to cte
+ * @param str
+ * @return
+ */
+enum rspamd_cte rspamd_cte_from_string(const gchar *str);
+
+/**
+ * Converts cte to string
+ * @param ct
+ * @return
+ */
+const gchar *rspamd_cte_to_string(enum rspamd_cte ct);
+
+struct rspamd_message *rspamd_message_new(struct rspamd_task *task);
+
+struct rspamd_message *rspamd_message_ref(struct rspamd_message *msg);
+
+void rspamd_message_unref(struct rspamd_message *msg);
+
+/**
+ * Updates digest of the message if modified
+ * @param msg
+ * @param input
+ * @param len
+ */
+void rspamd_message_update_digest(struct rspamd_message *msg,
+ const void *input, gsize len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif