summaryrefslogtreecommitdiffstats
path: root/src/libstat/stat_api.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/libstat/stat_api.h147
1 files changed, 147 insertions, 0 deletions
diff --git a/src/libstat/stat_api.h b/src/libstat/stat_api.h
new file mode 100644
index 0000000..1badb20
--- /dev/null
+++ b/src/libstat/stat_api.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef STAT_API_H_
+#define STAT_API_H_
+
+#include "config.h"
+#include "task.h"
+#include "lua/lua_common.h"
+#include "contrib/libev/ev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file stat_api.h
+ * High level statistics API
+ */
+
+#define RSPAMD_STAT_TOKEN_FLAG_TEXT (1u << 0)
+#define RSPAMD_STAT_TOKEN_FLAG_META (1u << 1)
+#define RSPAMD_STAT_TOKEN_FLAG_LUA_META (1u << 2)
+#define RSPAMD_STAT_TOKEN_FLAG_EXCEPTION (1u << 3)
+#define RSPAMD_STAT_TOKEN_FLAG_HEADER (1u << 4)
+#define RSPAMD_STAT_TOKEN_FLAG_UNIGRAM (1u << 5)
+#define RSPAMD_STAT_TOKEN_FLAG_UTF (1u << 6)
+#define RSPAMD_STAT_TOKEN_FLAG_NORMALISED (1u << 7)
+#define RSPAMD_STAT_TOKEN_FLAG_STEMMED (1u << 8)
+#define RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE (1u << 9)
+#define RSPAMD_STAT_TOKEN_FLAG_STOP_WORD (1u << 10)
+#define RSPAMD_STAT_TOKEN_FLAG_SKIPPED (1u << 11)
+#define RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES (1u << 12)
+#define RSPAMD_STAT_TOKEN_FLAG_EMOJI (1u << 13)
+
+typedef struct rspamd_stat_token_s {
+ rspamd_ftok_t original; /* utf8 raw */
+ rspamd_ftok_unicode_t unicode; /* array of unicode characters, normalized, lowercased */
+ rspamd_ftok_t normalized; /* normalized and lowercased utf8 */
+ rspamd_ftok_t stemmed; /* stemmed utf8 */
+ guint flags;
+} rspamd_stat_token_t;
+
+typedef struct token_node_s {
+ guint64 data;
+ guint window_idx;
+ guint flags;
+ rspamd_stat_token_t *t1;
+ rspamd_stat_token_t *t2;
+ float values[];
+} rspamd_token_t;
+
+struct rspamd_stat_ctx;
+
+/**
+ * The results of statistics processing:
+ * - error
+ * - need to do additional job for processing
+ * - all processed
+ */
+typedef enum rspamd_stat_result_e {
+ RSPAMD_STAT_PROCESS_ERROR = 0,
+ RSPAMD_STAT_PROCESS_DELAYED = 1,
+ RSPAMD_STAT_PROCESS_OK
+} rspamd_stat_result_t;
+
+/**
+ * Initialise statistics modules
+ * @param cfg
+ */
+void rspamd_stat_init(struct rspamd_config *cfg, struct ev_loop *ev_base);
+
+/**
+ * Finalize statistics
+ */
+void rspamd_stat_close(void);
+
+/**
+ * Tokenize task
+ * @param st_ctx
+ * @param task
+ */
+void rspamd_stat_process_tokenize(struct rspamd_stat_ctx *st_ctx,
+ struct rspamd_task *task);
+
+/**
+ * Classify the task specified and insert symbols if needed
+ * @param task
+ * @param L lua state
+ * @param err error returned
+ * @return TRUE if task has been classified
+ */
+rspamd_stat_result_t rspamd_stat_classify(struct rspamd_task *task,
+ lua_State *L, guint stage, GError **err);
+
+
+/**
+ * Check if a task should be learned and set the appropriate flags for it
+ * @param task
+ * @return
+ */
+gboolean rspamd_stat_check_autolearn(struct rspamd_task *task);
+
+/**
+ * Learn task as spam or ham, task must be processed prior to this call
+ * @param task task to learn
+ * @param spam if TRUE learn spam, otherwise learn ham
+ * @param L lua state
+ * @param classifier NULL to learn all classifiers, name to learn a specific one
+ * @param err error returned
+ * @return TRUE if task has been learned
+ */
+rspamd_stat_result_t rspamd_stat_learn(struct rspamd_task *task,
+ gboolean spam, lua_State *L, const gchar *classifier,
+ guint stage,
+ GError **err);
+
+/**
+ * Get the overall statistics for all statfile backends
+ * @param cfg configuration
+ * @param total_learns the total number of learns is stored here
+ * @return array of statistical information
+ */
+rspamd_stat_result_t rspamd_stat_statistics(struct rspamd_task *task,
+ struct rspamd_config *cfg,
+ guint64 *total_learns,
+ ucl_object_t **res);
+
+void rspamd_stat_unload(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* STAT_API_H_ */