summaryrefslogtreecommitdiffstats
path: root/src/lib-fts/fts-language.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib-fts/fts-language.h72
1 files changed, 72 insertions, 0 deletions
diff --git a/src/lib-fts/fts-language.h b/src/lib-fts/fts-language.h
new file mode 100644
index 0000000..884998f
--- /dev/null
+++ b/src/lib-fts/fts-language.h
@@ -0,0 +1,72 @@
+#ifndef FTS_LANGUAGE_H
+#define FTS_LANGUAGE_H
+
+struct fts_language_list;
+
+enum fts_language_result {
+ /* Provided sample is too short. */
+ FTS_LANGUAGE_RESULT_SHORT,
+ /* Language is unknown or not in the provided list . */
+ FTS_LANGUAGE_RESULT_UNKNOWN,
+
+ FTS_LANGUAGE_RESULT_OK,
+ /* textcat library initialization failed. */
+ FTS_LANGUAGE_RESULT_ERROR
+};
+
+struct fts_language {
+ /* Two-letter language name lowercased, e.g. "en" */
+ const char *name;
+};
+ARRAY_DEFINE_TYPE(fts_language, const struct fts_language *);
+
+/* Used for raw data that is indexed. This data shouldn't go through any
+ language-specific filters. */
+extern const struct fts_language fts_language_data;
+
+/*
+ Language module API.
+*/
+void fts_languages_init(void);
+void fts_languages_deinit(void);
+/* Add a language to the list of supported languages. */
+void fts_language_register(const char *name);
+/* Find a specified language by name. This finds from the internal list of
+ supported languages. */
+const struct fts_language *fts_language_find(const char *name);
+
+/*
+ Language list API
+*/
+int fts_language_list_init(const char *const *settings,
+ struct fts_language_list **list_r,
+ const char **error_r);
+void fts_language_list_deinit(struct fts_language_list **list);
+
+/* Add a language to the list of wanted languages. */
+void fts_language_list_add(struct fts_language_list *list,
+ const struct fts_language *lang);
+/* Add wanted languages from a space-separated list of language names.
+ Duplicates are ignored. Returns TRUE if ok, FALSE and unknown_name if an
+ unknown language was found from the list. */
+bool fts_language_list_add_names(struct fts_language_list *list,
+ const char *names,
+ const char **unknown_name_r);
+
+/* Return an array of all wanted languages. */
+const ARRAY_TYPE(fts_language) *
+fts_language_list_get_all(struct fts_language_list *list);
+/* Returns the first wanted language (default language). */
+const struct fts_language *
+fts_language_list_get_first(struct fts_language_list *list);
+
+/* If text was detected to be one of the languages in the list,
+ returns FTS_LANGUAGE_RESULT_OK and (a pointer to) the language (in
+ the list). error_r is set for FTS_LANGUAGE_RESULT_ERROR. */
+enum fts_language_result
+fts_language_detect(struct fts_language_list *list,
+ const unsigned char *text, size_t size,
+ const struct fts_language **lang_r,
+ const char **error_r);
+
+#endif