summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/fts')
-rw-r--r--src/plugins/fts/Makefile.am74
-rw-r--r--src/plugins/fts/Makefile.in1140
-rwxr-xr-xsrc/plugins/fts/decode2text.sh105
-rw-r--r--src/plugins/fts/doveadm-dump-fts-expunge-log.c116
-rw-r--r--src/plugins/fts/doveadm-fts.c470
-rw-r--r--src/plugins/fts/doveadm-fts.h11
-rw-r--r--src/plugins/fts/fts-api-private.h139
-rw-r--r--src/plugins/fts/fts-api.c554
-rw-r--r--src/plugins/fts/fts-api.h173
-rw-r--r--src/plugins/fts/fts-build-mail.c719
-rw-r--r--src/plugins/fts/fts-build-mail.h9
-rw-r--r--src/plugins/fts/fts-expunge-log.c617
-rw-r--r--src/plugins/fts/fts-expunge-log.h58
-rw-r--r--src/plugins/fts/fts-indexer.c300
-rw-r--r--src/plugins/fts/fts-indexer.h22
-rw-r--r--src/plugins/fts/fts-parser-html.c64
-rw-r--r--src/plugins/fts/fts-parser-script.c277
-rw-r--r--src/plugins/fts/fts-parser-tika.c278
-rw-r--r--src/plugins/fts/fts-parser.c127
-rw-r--r--src/plugins/fts/fts-parser.h48
-rw-r--r--src/plugins/fts/fts-plugin.c33
-rw-r--r--src/plugins/fts/fts-plugin.h7
-rw-r--r--src/plugins/fts/fts-search-args.c258
-rw-r--r--src/plugins/fts/fts-search-args.h7
-rw-r--r--src/plugins/fts/fts-search-serialize.c99
-rw-r--r--src/plugins/fts/fts-search-serialize.h16
-rw-r--r--src/plugins/fts/fts-search.c385
-rw-r--r--src/plugins/fts/fts-storage.c981
-rw-r--r--src/plugins/fts/fts-storage.h70
-rw-r--r--src/plugins/fts/fts-user.c423
-rw-r--r--src/plugins/fts/fts-user.h27
-rw-r--r--src/plugins/fts/xml2text.c44
32 files changed, 7651 insertions, 0 deletions
diff --git a/src/plugins/fts/Makefile.am b/src/plugins/fts/Makefile.am
new file mode 100644
index 0000000..2e7753c
--- /dev/null
+++ b/src/plugins/fts/Makefile.am
@@ -0,0 +1,74 @@
+pkglibexecdir = $(libexecdir)/dovecot
+doveadm_moduledir = $(moduledir)/doveadm
+
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/lib \
+ -I$(top_srcdir)/src/lib-settings \
+ -I$(top_srcdir)/src/lib-fts \
+ -I$(top_srcdir)/src/lib-ssl-iostream \
+ -I$(top_srcdir)/src/lib-http \
+ -I$(top_srcdir)/src/lib-mail \
+ -I$(top_srcdir)/src/lib-imap \
+ -I$(top_srcdir)/src/lib-index \
+ -I$(top_srcdir)/src/lib-storage \
+ -I$(top_srcdir)/src/lib-storage/index \
+ -I$(top_srcdir)/src/doveadm
+
+NOPLUGIN_LDFLAGS =
+lib20_doveadm_fts_plugin_la_LDFLAGS = -module -avoid-version
+lib20_fts_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+ lib20_fts_plugin.la
+
+lib20_fts_plugin_la_LIBADD = ../../lib-fts/libfts.la
+
+lib20_fts_plugin_la_SOURCES = \
+ fts-api.c \
+ fts-build-mail.c \
+ fts-expunge-log.c \
+ fts-indexer.c \
+ fts-parser.c \
+ fts-parser-html.c \
+ fts-parser-script.c \
+ fts-parser-tika.c \
+ fts-plugin.c \
+ fts-search.c \
+ fts-search-args.c \
+ fts-search-serialize.c \
+ fts-storage.c \
+ fts-user.c
+
+pkginc_libdir=$(pkgincludedir)
+pkginc_lib_HEADERS = \
+ fts-api.h \
+ fts-api-private.h \
+ fts-expunge-log.h \
+ fts-indexer.h \
+ fts-parser.h \
+ fts-storage.h \
+ fts-user.h
+
+noinst_HEADERS = \
+ doveadm-fts.h \
+ fts-build-mail.h \
+ fts-plugin.h \
+ fts-search-args.h \
+ fts-search-serialize.h
+
+pkglibexec_PROGRAMS = xml2text
+
+xml2text_SOURCES = xml2text.c fts-parser-html.c
+xml2text_CPPFLAGS = $(AM_CPPFLAGS) $(BINARY_CFLAGS)
+xml2text_LDADD = $(LIBDOVECOT) $(BINARY_LDFLAGS)
+xml2text_DEPENDENCIES = $(module_LTLIBRARIES) $(LIBDOVECOT_DEPS)
+
+pkglibexec_SCRIPTS = decode2text.sh
+EXTRA_DIST = $(pkglibexec_SCRIPTS)
+
+doveadm_module_LTLIBRARIES = \
+ lib20_doveadm_fts_plugin.la
+
+lib20_doveadm_fts_plugin_la_SOURCES = \
+ doveadm-fts.c \
+ doveadm-dump-fts-expunge-log.c
diff --git a/src/plugins/fts/Makefile.in b/src/plugins/fts/Makefile.in
new file mode 100644
index 0000000..624f69f
--- /dev/null
+++ b/src/plugins/fts/Makefile.in
@@ -0,0 +1,1140 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+pkglibexec_PROGRAMS = xml2text$(EXEEXT)
+subdir = src/plugins/fts
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \
+ $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \
+ $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \
+ $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \
+ $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \
+ $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \
+ $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \
+ $(top_srcdir)/m4/flexible_array_member.m4 \
+ $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \
+ $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \
+ $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \
+ $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \
+ $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \
+ $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \
+ $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \
+ $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \
+ $(top_srcdir)/m4/pr_set_dumpable.m4 \
+ $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \
+ $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \
+ $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \
+ $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \
+ $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \
+ $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \
+ $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \
+ $(top_srcdir)/m4/typeof_dev_t.m4 \
+ $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \
+ $(top_srcdir)/m4/want_apparmor.m4 \
+ $(top_srcdir)/m4/want_bsdauth.m4 \
+ $(top_srcdir)/m4/want_bzlib.m4 \
+ $(top_srcdir)/m4/want_cassandra.m4 \
+ $(top_srcdir)/m4/want_cdb.m4 \
+ $(top_srcdir)/m4/want_checkpassword.m4 \
+ $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \
+ $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \
+ $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \
+ $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \
+ $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \
+ $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \
+ $(top_srcdir)/m4/want_prefetch.m4 \
+ $(top_srcdir)/m4/want_shadow.m4 \
+ $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \
+ $(top_srcdir)/m4/want_sqlite.m4 \
+ $(top_srcdir)/m4/want_stemmer.m4 \
+ $(top_srcdir)/m4/want_systemd.m4 \
+ $(top_srcdir)/m4/want_textcat.m4 \
+ $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \
+ $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
+ $(pkginc_lib_HEADERS) $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(pkglibexecdir)" \
+ "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" \
+ "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(pkginc_libdir)"
+PROGRAMS = $(pkglibexec_PROGRAMS)
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+LTLIBRARIES = $(doveadm_module_LTLIBRARIES) $(module_LTLIBRARIES)
+lib20_doveadm_fts_plugin_la_LIBADD =
+am_lib20_doveadm_fts_plugin_la_OBJECTS = doveadm-fts.lo \
+ doveadm-dump-fts-expunge-log.lo
+lib20_doveadm_fts_plugin_la_OBJECTS = \
+ $(am_lib20_doveadm_fts_plugin_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+lib20_doveadm_fts_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(lib20_doveadm_fts_plugin_la_LDFLAGS) \
+ $(LDFLAGS) -o $@
+lib20_fts_plugin_la_DEPENDENCIES = ../../lib-fts/libfts.la
+am_lib20_fts_plugin_la_OBJECTS = fts-api.lo fts-build-mail.lo \
+ fts-expunge-log.lo fts-indexer.lo fts-parser.lo \
+ fts-parser-html.lo fts-parser-script.lo fts-parser-tika.lo \
+ fts-plugin.lo fts-search.lo fts-search-args.lo \
+ fts-search-serialize.lo fts-storage.lo fts-user.lo
+lib20_fts_plugin_la_OBJECTS = $(am_lib20_fts_plugin_la_OBJECTS)
+lib20_fts_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(lib20_fts_plugin_la_LDFLAGS) \
+ $(LDFLAGS) -o $@
+am_xml2text_OBJECTS = xml2text-xml2text.$(OBJEXT) \
+ xml2text-fts-parser-html.$(OBJEXT)
+xml2text_OBJECTS = $(am_xml2text_OBJECTS)
+am__DEPENDENCIES_1 =
+SCRIPTS = $(pkglibexec_SCRIPTS)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo \
+ ./$(DEPDIR)/doveadm-fts.Plo ./$(DEPDIR)/fts-api.Plo \
+ ./$(DEPDIR)/fts-build-mail.Plo ./$(DEPDIR)/fts-expunge-log.Plo \
+ ./$(DEPDIR)/fts-indexer.Plo ./$(DEPDIR)/fts-parser-html.Plo \
+ ./$(DEPDIR)/fts-parser-script.Plo \
+ ./$(DEPDIR)/fts-parser-tika.Plo ./$(DEPDIR)/fts-parser.Plo \
+ ./$(DEPDIR)/fts-plugin.Plo ./$(DEPDIR)/fts-search-args.Plo \
+ ./$(DEPDIR)/fts-search-serialize.Plo \
+ ./$(DEPDIR)/fts-search.Plo ./$(DEPDIR)/fts-storage.Plo \
+ ./$(DEPDIR)/fts-user.Plo \
+ ./$(DEPDIR)/xml2text-fts-parser-html.Po \
+ ./$(DEPDIR)/xml2text-xml2text.Po
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(lib20_doveadm_fts_plugin_la_SOURCES) \
+ $(lib20_fts_plugin_la_SOURCES) $(xml2text_SOURCES)
+DIST_SOURCES = $(lib20_doveadm_fts_plugin_la_SOURCES) \
+ $(lib20_fts_plugin_la_SOURCES) $(xml2text_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+HEADERS = $(noinst_HEADERS) $(pkginc_lib_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+pkglibexecdir = $(libexecdir)/dovecot
+ACLOCAL = @ACLOCAL@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+APPARMOR_LIBS = @APPARMOR_LIBS@
+AR = @AR@
+AUTH_CFLAGS = @AUTH_CFLAGS@
+AUTH_LIBS = @AUTH_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BINARY_CFLAGS = @BINARY_CFLAGS@
+BINARY_LDFLAGS = @BINARY_LDFLAGS@
+BISON = @BISON@
+CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@
+CASSANDRA_LIBS = @CASSANDRA_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CDB_LIBS = @CDB_LIBS@
+CFLAGS = @CFLAGS@
+CLUCENE_CFLAGS = @CLUCENE_CFLAGS@
+CLUCENE_LIBS = @CLUCENE_LIBS@
+COMPRESS_LIBS = @COMPRESS_LIBS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CRYPT_LIBS = @CRYPT_LIBS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DICT_LIBS = @DICT_LIBS@
+DLLIB = @DLLIB@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+FLEX = @FLEX@
+FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@
+FUZZER_LDFLAGS = @FUZZER_LDFLAGS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KRB5CONFIG = @KRB5CONFIG@
+KRB5_CFLAGS = @KRB5_CFLAGS@
+KRB5_LIBS = @KRB5_LIBS@
+LD = @LD@
+LDAP_LIBS = @LDAP_LIBS@
+LDFLAGS = @LDFLAGS@
+LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@
+LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@
+LIBCAP = @LIBCAP@
+LIBDOVECOT = @LIBDOVECOT@
+LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@
+LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@
+LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@
+LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@
+LIBDOVECOT_LDA = @LIBDOVECOT_LDA@
+LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@
+LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@
+LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@
+LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@
+LIBDOVECOT_LUA = @LIBDOVECOT_LUA@
+LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@
+LIBDOVECOT_SQL = @LIBDOVECOT_SQL@
+LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@
+LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@
+LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@
+LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@
+LIBICONV = @LIBICONV@
+LIBICU_CFLAGS = @LIBICU_CFLAGS@
+LIBICU_LIBS = @LIBICU_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@
+LIBSODIUM_LIBS = @LIBSODIUM_LIBS@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
+LIBTIRPC_LIBS = @LIBTIRPC_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
+LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
+LIBWRAP_LIBS = @LIBWRAP_LIBS@
+LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBICONV = @LTLIBICONV@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+LUA_CFLAGS = @LUA_CFLAGS@
+LUA_LIBS = @LUA_LIBS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MODULE_LIBS = @MODULE_LIBS@
+MODULE_SUFFIX = @MODULE_SUFFIX@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_CONFIG = @MYSQL_CONFIG@
+MYSQL_LIBS = @MYSQL_LIBS@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOPLUGIN_LDFLAGS =
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PANDOC = @PANDOC@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PG_CONFIG = @PG_CONFIG@
+PIE_CFLAGS = @PIE_CFLAGS@
+PIE_LDFLAGS = @PIE_LDFLAGS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+QUOTA_LIBS = @QUOTA_LIBS@
+RANLIB = @RANLIB@
+RELRO_LDFLAGS = @RELRO_LDFLAGS@
+RPCGEN = @RPCGEN@
+RUN_TEST = @RUN_TEST@
+SED = @SED@
+SETTING_FILES = @SETTING_FILES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SQLITE_CFLAGS = @SQLITE_CFLAGS@
+SQLITE_LIBS = @SQLITE_LIBS@
+SQL_CFLAGS = @SQL_CFLAGS@
+SQL_LIBS = @SQL_LIBS@
+SSL_CFLAGS = @SSL_CFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@
+SYSTEMD_LIBS = @SYSTEMD_LIBS@
+VALGRIND = @VALGRIND@
+VERSION = @VERSION@
+ZSTD_CFLAGS = @ZSTD_CFLAGS@
+ZSTD_LIBS = @ZSTD_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+dict_drivers = @dict_drivers@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+moduledir = @moduledir@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+rundir = @rundir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+sql_drivers = @sql_drivers@
+srcdir = @srcdir@
+ssldir = @ssldir@
+statedir = @statedir@
+sysconfdir = @sysconfdir@
+systemdservicetype = @systemdservicetype@
+systemdsystemunitdir = @systemdsystemunitdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+doveadm_moduledir = $(moduledir)/doveadm
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/lib \
+ -I$(top_srcdir)/src/lib-settings \
+ -I$(top_srcdir)/src/lib-fts \
+ -I$(top_srcdir)/src/lib-ssl-iostream \
+ -I$(top_srcdir)/src/lib-http \
+ -I$(top_srcdir)/src/lib-mail \
+ -I$(top_srcdir)/src/lib-imap \
+ -I$(top_srcdir)/src/lib-index \
+ -I$(top_srcdir)/src/lib-storage \
+ -I$(top_srcdir)/src/lib-storage/index \
+ -I$(top_srcdir)/src/doveadm
+
+lib20_doveadm_fts_plugin_la_LDFLAGS = -module -avoid-version
+lib20_fts_plugin_la_LDFLAGS = -module -avoid-version
+module_LTLIBRARIES = \
+ lib20_fts_plugin.la
+
+lib20_fts_plugin_la_LIBADD = ../../lib-fts/libfts.la
+lib20_fts_plugin_la_SOURCES = \
+ fts-api.c \
+ fts-build-mail.c \
+ fts-expunge-log.c \
+ fts-indexer.c \
+ fts-parser.c \
+ fts-parser-html.c \
+ fts-parser-script.c \
+ fts-parser-tika.c \
+ fts-plugin.c \
+ fts-search.c \
+ fts-search-args.c \
+ fts-search-serialize.c \
+ fts-storage.c \
+ fts-user.c
+
+pkginc_libdir = $(pkgincludedir)
+pkginc_lib_HEADERS = \
+ fts-api.h \
+ fts-api-private.h \
+ fts-expunge-log.h \
+ fts-indexer.h \
+ fts-parser.h \
+ fts-storage.h \
+ fts-user.h
+
+noinst_HEADERS = \
+ doveadm-fts.h \
+ fts-build-mail.h \
+ fts-plugin.h \
+ fts-search-args.h \
+ fts-search-serialize.h
+
+xml2text_SOURCES = xml2text.c fts-parser-html.c
+xml2text_CPPFLAGS = $(AM_CPPFLAGS) $(BINARY_CFLAGS)
+xml2text_LDADD = $(LIBDOVECOT) $(BINARY_LDFLAGS)
+xml2text_DEPENDENCIES = $(module_LTLIBRARIES) $(LIBDOVECOT_DEPS)
+pkglibexec_SCRIPTS = decode2text.sh
+EXTRA_DIST = $(pkglibexec_SCRIPTS)
+doveadm_module_LTLIBRARIES = \
+ lib20_doveadm_fts_plugin.la
+
+lib20_doveadm_fts_plugin_la_SOURCES = \
+ doveadm-fts.c \
+ doveadm-dump-fts-expunge-log.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/plugins/fts/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-pkglibexecPROGRAMS: $(pkglibexec_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ @list='$(pkglibexec_PROGRAMS)'; test -n "$(pkglibexecdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(pkglibexecdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(pkglibexecdir)" || exit 1; \
+ fi; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p \
+ || test -f $$p1 \
+ ; then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' \
+ -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(pkglibexecdir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(pkglibexecdir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-pkglibexecPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkglibexec_PROGRAMS)'; test -n "$(pkglibexecdir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' \
+ `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(pkglibexecdir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(pkglibexecdir)" && rm -f $$files
+
+clean-pkglibexecPROGRAMS:
+ @list='$(pkglibexec_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+
+install-doveadm_moduleLTLIBRARIES: $(doveadm_module_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(doveadm_moduledir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(doveadm_moduledir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(doveadm_moduledir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(doveadm_moduledir)"; \
+ }
+
+uninstall-doveadm_moduleLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(doveadm_moduledir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(doveadm_moduledir)/$$f"; \
+ done
+
+clean-doveadm_moduleLTLIBRARIES:
+ -test -z "$(doveadm_module_LTLIBRARIES)" || rm -f $(doveadm_module_LTLIBRARIES)
+ @list='$(doveadm_module_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+install-moduleLTLIBRARIES: $(module_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \
+ }
+
+uninstall-moduleLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \
+ done
+
+clean-moduleLTLIBRARIES:
+ -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES)
+ @list='$(module_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+lib20_doveadm_fts_plugin.la: $(lib20_doveadm_fts_plugin_la_OBJECTS) $(lib20_doveadm_fts_plugin_la_DEPENDENCIES) $(EXTRA_lib20_doveadm_fts_plugin_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(lib20_doveadm_fts_plugin_la_LINK) -rpath $(doveadm_moduledir) $(lib20_doveadm_fts_plugin_la_OBJECTS) $(lib20_doveadm_fts_plugin_la_LIBADD) $(LIBS)
+
+lib20_fts_plugin.la: $(lib20_fts_plugin_la_OBJECTS) $(lib20_fts_plugin_la_DEPENDENCIES) $(EXTRA_lib20_fts_plugin_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(lib20_fts_plugin_la_LINK) -rpath $(moduledir) $(lib20_fts_plugin_la_OBJECTS) $(lib20_fts_plugin_la_LIBADD) $(LIBS)
+
+xml2text$(EXEEXT): $(xml2text_OBJECTS) $(xml2text_DEPENDENCIES) $(EXTRA_xml2text_DEPENDENCIES)
+ @rm -f xml2text$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(xml2text_OBJECTS) $(xml2text_LDADD) $(LIBS)
+install-pkglibexecSCRIPTS: $(pkglibexec_SCRIPTS)
+ @$(NORMAL_INSTALL)
+ @list='$(pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(pkglibexecdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(pkglibexecdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n' \
+ -e 'h;s|.*|.|' \
+ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) { files[d] = files[d] " " $$1; \
+ if (++n[d] == $(am__install_max)) { \
+ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
+ else { print "f", d "/" $$4, $$1 } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(pkglibexecdir)$$dir'"; \
+ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(pkglibexecdir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-pkglibexecSCRIPTS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || exit 0; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 's,.*/,,;$(transform)'`; \
+ dir='$(DESTDIR)$(pkglibexecdir)'; $(am__uninstall_files_from_dir)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-fts.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-api.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-build-mail.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-expunge-log.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-indexer.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-html.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-script.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-tika.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-plugin.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search-args.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search-serialize.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-storage.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-user.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2text-fts-parser-html.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2text-xml2text.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+ @$(MKDIR_P) $(@D)
+ @echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+xml2text-xml2text.o: xml2text.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-xml2text.o -MD -MP -MF $(DEPDIR)/xml2text-xml2text.Tpo -c -o xml2text-xml2text.o `test -f 'xml2text.c' || echo '$(srcdir)/'`xml2text.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-xml2text.Tpo $(DEPDIR)/xml2text-xml2text.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xml2text.c' object='xml2text-xml2text.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-xml2text.o `test -f 'xml2text.c' || echo '$(srcdir)/'`xml2text.c
+
+xml2text-xml2text.obj: xml2text.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-xml2text.obj -MD -MP -MF $(DEPDIR)/xml2text-xml2text.Tpo -c -o xml2text-xml2text.obj `if test -f 'xml2text.c'; then $(CYGPATH_W) 'xml2text.c'; else $(CYGPATH_W) '$(srcdir)/xml2text.c'; fi`
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-xml2text.Tpo $(DEPDIR)/xml2text-xml2text.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xml2text.c' object='xml2text-xml2text.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-xml2text.obj `if test -f 'xml2text.c'; then $(CYGPATH_W) 'xml2text.c'; else $(CYGPATH_W) '$(srcdir)/xml2text.c'; fi`
+
+xml2text-fts-parser-html.o: fts-parser-html.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-fts-parser-html.o -MD -MP -MF $(DEPDIR)/xml2text-fts-parser-html.Tpo -c -o xml2text-fts-parser-html.o `test -f 'fts-parser-html.c' || echo '$(srcdir)/'`fts-parser-html.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-fts-parser-html.Tpo $(DEPDIR)/xml2text-fts-parser-html.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fts-parser-html.c' object='xml2text-fts-parser-html.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-fts-parser-html.o `test -f 'fts-parser-html.c' || echo '$(srcdir)/'`fts-parser-html.c
+
+xml2text-fts-parser-html.obj: fts-parser-html.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-fts-parser-html.obj -MD -MP -MF $(DEPDIR)/xml2text-fts-parser-html.Tpo -c -o xml2text-fts-parser-html.obj `if test -f 'fts-parser-html.c'; then $(CYGPATH_W) 'fts-parser-html.c'; else $(CYGPATH_W) '$(srcdir)/fts-parser-html.c'; fi`
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-fts-parser-html.Tpo $(DEPDIR)/xml2text-fts-parser-html.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fts-parser-html.c' object='xml2text-fts-parser-html.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-fts-parser-html.obj `if test -f 'fts-parser-html.c'; then $(CYGPATH_W) 'fts-parser-html.c'; else $(CYGPATH_W) '$(srcdir)/fts-parser-html.c'; fi`
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-pkginc_libHEADERS: $(pkginc_lib_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(pkginc_libdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(pkginc_libdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkginc_libdir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkginc_libdir)" || exit $$?; \
+ done
+
+uninstall-pkginc_libHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(pkginc_libdir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(SCRIPTS) $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(pkginc_libdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \
+ clean-moduleLTLIBRARIES clean-pkglibexecPROGRAMS \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -f ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo
+ -rm -f ./$(DEPDIR)/doveadm-fts.Plo
+ -rm -f ./$(DEPDIR)/fts-api.Plo
+ -rm -f ./$(DEPDIR)/fts-build-mail.Plo
+ -rm -f ./$(DEPDIR)/fts-expunge-log.Plo
+ -rm -f ./$(DEPDIR)/fts-indexer.Plo
+ -rm -f ./$(DEPDIR)/fts-parser-html.Plo
+ -rm -f ./$(DEPDIR)/fts-parser-script.Plo
+ -rm -f ./$(DEPDIR)/fts-parser-tika.Plo
+ -rm -f ./$(DEPDIR)/fts-parser.Plo
+ -rm -f ./$(DEPDIR)/fts-plugin.Plo
+ -rm -f ./$(DEPDIR)/fts-search-args.Plo
+ -rm -f ./$(DEPDIR)/fts-search-serialize.Plo
+ -rm -f ./$(DEPDIR)/fts-search.Plo
+ -rm -f ./$(DEPDIR)/fts-storage.Plo
+ -rm -f ./$(DEPDIR)/fts-user.Plo
+ -rm -f ./$(DEPDIR)/xml2text-fts-parser-html.Po
+ -rm -f ./$(DEPDIR)/xml2text-xml2text.Po
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-doveadm_moduleLTLIBRARIES \
+ install-moduleLTLIBRARIES install-pkginc_libHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-pkglibexecPROGRAMS install-pkglibexecSCRIPTS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo
+ -rm -f ./$(DEPDIR)/doveadm-fts.Plo
+ -rm -f ./$(DEPDIR)/fts-api.Plo
+ -rm -f ./$(DEPDIR)/fts-build-mail.Plo
+ -rm -f ./$(DEPDIR)/fts-expunge-log.Plo
+ -rm -f ./$(DEPDIR)/fts-indexer.Plo
+ -rm -f ./$(DEPDIR)/fts-parser-html.Plo
+ -rm -f ./$(DEPDIR)/fts-parser-script.Plo
+ -rm -f ./$(DEPDIR)/fts-parser-tika.Plo
+ -rm -f ./$(DEPDIR)/fts-parser.Plo
+ -rm -f ./$(DEPDIR)/fts-plugin.Plo
+ -rm -f ./$(DEPDIR)/fts-search-args.Plo
+ -rm -f ./$(DEPDIR)/fts-search-serialize.Plo
+ -rm -f ./$(DEPDIR)/fts-search.Plo
+ -rm -f ./$(DEPDIR)/fts-storage.Plo
+ -rm -f ./$(DEPDIR)/fts-user.Plo
+ -rm -f ./$(DEPDIR)/xml2text-fts-parser-html.Po
+ -rm -f ./$(DEPDIR)/xml2text-xml2text.Po
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-doveadm_moduleLTLIBRARIES \
+ uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS \
+ uninstall-pkglibexecPROGRAMS uninstall-pkglibexecSCRIPTS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+ clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \
+ clean-moduleLTLIBRARIES clean-pkglibexecPROGRAMS cscopelist-am \
+ ctags ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-doveadm_moduleLTLIBRARIES install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-moduleLTLIBRARIES install-pdf install-pdf-am \
+ install-pkginc_libHEADERS install-pkglibexecPROGRAMS \
+ install-pkglibexecSCRIPTS install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+ uninstall-doveadm_moduleLTLIBRARIES \
+ uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS \
+ uninstall-pkglibexecPROGRAMS uninstall-pkglibexecSCRIPTS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/plugins/fts/decode2text.sh b/src/plugins/fts/decode2text.sh
new file mode 100755
index 0000000..1c881ff
--- /dev/null
+++ b/src/plugins/fts/decode2text.sh
@@ -0,0 +1,105 @@
+#!/bin/sh
+
+# Example attachment decoder script. The attachment comes from stdin, and
+# the script is expected to output UTF-8 data to stdout. (If the output isn't
+# UTF-8, everything except valid UTF-8 sequences are dropped from it.)
+
+# The attachment decoding is enabled by setting:
+#
+# plugin {
+# fts_decoder = decode2text
+# }
+# service decode2text {
+# executable = script /usr/local/libexec/dovecot/decode2text.sh
+# user = dovecot
+# unix_listener decode2text {
+# mode = 0666
+# }
+# }
+
+libexec_dir=`dirname $0`
+content_type=$1
+
+# The second parameter is the format's filename extension, which is used when
+# found from a filename of application/octet-stream. You can also add more
+# extensions by giving more parameters.
+formats='application/pdf pdf
+application/x-pdf pdf
+application/msword doc
+application/mspowerpoint ppt
+application/vnd.ms-powerpoint ppt
+application/ms-excel xls
+application/x-msexcel xls
+application/vnd.ms-excel xls
+application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
+application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.presentation odp
+'
+
+if [ "$content_type" = "" ]; then
+ echo "$formats"
+ exit 0
+fi
+
+fmt=`echo "$formats" | grep -w "^$content_type" | cut -d ' ' -f 2`
+if [ "$fmt" = "" ]; then
+ echo "Content-Type: $content_type not supported" >&2
+ exit 1
+fi
+
+# most decoders can't handle stdin directly, so write the attachment
+# to a temp file
+path=`mktemp`
+trap "rm -f $path" 0 1 2 3 14 15
+cat > $path
+
+xmlunzip() {
+ name=$1
+
+ tempdir=`mktemp -d`
+ if [ "$tempdir" = "" ]; then
+ exit 1
+ fi
+ trap "rm -rf $path $tempdir" 0 1 2 3 14 15
+ cd $tempdir || exit 1
+ unzip -q "$path" 2>/dev/null || exit 0
+ find . -name "$name" -print0 | xargs -0 cat |
+ $libexec_dir/xml2text
+}
+
+wait_timeout() {
+ childpid=$!
+ trap "kill -9 $childpid; rm -f $path" 1 2 3 14 15
+ wait $childpid
+}
+
+LANG=en_US.UTF-8
+export LANG
+if [ $fmt = "pdf" ]; then
+ /usr/bin/pdftotext $path - 2>/dev/null&
+ wait_timeout 2>/dev/null
+elif [ $fmt = "doc" ]; then
+ (/usr/bin/catdoc $path; true) 2>/dev/null&
+ wait_timeout 2>/dev/null
+elif [ $fmt = "ppt" ]; then
+ (/usr/bin/catppt $path; true) 2>/dev/null&
+ wait_timeout 2>/dev/null
+elif [ $fmt = "xls" ]; then
+ (/usr/bin/xls2csv $path; true) 2>/dev/null&
+ wait_timeout 2>/dev/null
+elif [ $fmt = "odt" -o $fmt = "ods" -o $fmt = "odp" ]; then
+ xmlunzip "content.xml"
+elif [ $fmt = "docx" ]; then
+ xmlunzip "document.xml"
+elif [ $fmt = "xlsx" ]; then
+ xmlunzip "sharedStrings.xml"
+elif [ $fmt = "pptx" ]; then
+ xmlunzip "slide*.xml"
+else
+ echo "Buggy decoder script: $fmt not handled" >&2
+ exit 1
+fi
+exit 0
diff --git a/src/plugins/fts/doveadm-dump-fts-expunge-log.c b/src/plugins/fts/doveadm-dump-fts-expunge-log.c
new file mode 100644
index 0000000..7438bca
--- /dev/null
+++ b/src/plugins/fts/doveadm-dump-fts-expunge-log.c
@@ -0,0 +1,116 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "hex-binary.h"
+#include "guid.h"
+#include "doveadm-dump.h"
+#include "doveadm-fts.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+struct fts_expunge_log_record {
+ uint32_t checksum;
+ uint32_t record_size;
+ guid_128_t guid;
+};
+
+static int dump_record(int fd, buffer_t *buf)
+{
+ struct fts_expunge_log_record rec;
+ off_t offset;
+ void *data;
+ const uint32_t *expunges, *uids;
+ ssize_t ret;
+ size_t data_size;
+ unsigned int i, uids_count;
+
+ offset = lseek(fd, 0, SEEK_CUR);
+
+ ret = read(fd, &rec, sizeof(rec));
+ if (ret == 0)
+ return 0;
+
+ if (ret != sizeof(rec))
+ i_fatal("rec read() %d != %d", (int)ret, (int)sizeof(rec));
+
+ if (rec.record_size < sizeof(rec) + sizeof(uint32_t) ||
+ rec.record_size > INT_MAX) {
+ i_fatal("Invalid record_size=%u at offset %"PRIuUOFF_T,
+ rec.record_size, offset);
+ }
+ data_size = rec.record_size - sizeof(rec);
+ buffer_set_used_size(buf, 0);
+ data = buffer_append_space_unsafe(buf, data_size);
+ ret = read(fd, data, data_size);
+ if (ret != (ssize_t)data_size)
+ i_fatal("rec read() %d != %d", (int)ret, (int)data_size);
+
+ printf("#%"PRIuUOFF_T":\n", offset);
+ printf(" checksum = %8x\n", rec.checksum);
+ printf(" size .... = %u\n", rec.record_size);
+ printf(" mailbox . = %s\n", guid_128_to_string(rec.guid));
+
+ expunges = CONST_PTR_OFFSET(data, data_size - sizeof(uint32_t));
+ printf(" expunges = %u\n", *expunges);
+
+ printf(" uids .... = ");
+
+ uids = data;
+ uids_count = (rec.record_size - sizeof(rec) - sizeof(uint32_t)) /
+ sizeof(uint32_t);
+ for (i = 0; i < uids_count; i += 2) {
+ if (i != 0)
+ printf(",");
+ if (uids[i] == uids[i+1])
+ printf("%u", uids[i]);
+ else
+ printf("%u-%u", uids[i], uids[i+1]);
+ }
+ printf("\n");
+ return 1;
+}
+
+static void
+cmd_dump_fts_expunge_log(const char *path, const char *const *args ATTR_UNUSED)
+{
+ buffer_t *buf;
+ int fd, ret;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ i_fatal("open(%s) failed: %m", path);
+
+ buf = buffer_create_dynamic(default_pool, 1024);
+ do {
+ T_BEGIN {
+ ret = dump_record(fd, buf);
+ } T_END;
+ } while (ret > 0);
+ buffer_free(&buf);
+ i_close_fd(&fd);
+}
+
+static bool test_dump_fts_expunge_log(const char *path)
+{
+ const char *p;
+
+ if ((p = strrchr(path, '/')) != NULL)
+ p++;
+ else
+ p = path;
+ return strcmp(p, "dovecot-expunges.log") == 0;
+}
+
+static const struct doveadm_cmd_dump doveadm_cmd_dump_fts_expunge_log = {
+ "fts-expunge-log",
+ test_dump_fts_expunge_log,
+ cmd_dump_fts_expunge_log
+};
+
+void doveadm_dump_fts_expunge_log_init(void)
+{
+ doveadm_dump_register(&doveadm_cmd_dump_fts_expunge_log);
+}
diff --git a/src/plugins/fts/doveadm-fts.c b/src/plugins/fts/doveadm-fts.c
new file mode 100644
index 0000000..1b902a1
--- /dev/null
+++ b/src/plugins/fts/doveadm-fts.c
@@ -0,0 +1,470 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "imap-util.h"
+#include "mail-namespace.h"
+#include "mail-search.h"
+#include "mailbox-list-iter.h"
+#include "fts-tokenizer.h"
+#include "fts-filter.h"
+#include "fts-language.h"
+#include "fts-storage.h"
+#include "fts-search-args.h"
+#include "fts-user.h"
+#include "doveadm-print.h"
+#include "doveadm-mail.h"
+#include "doveadm-mailbox-list-iter.h"
+#include "doveadm-fts.h"
+
+const char *doveadm_fts_plugin_version = DOVECOT_ABI_VERSION;
+
+struct fts_tokenize_cmd_context {
+ struct doveadm_mail_cmd_context ctx;
+ const char *language;
+ const char *tokens;
+};
+
+static int
+cmd_search_box(struct doveadm_mail_cmd_context *ctx,
+ const struct mailbox_info *info)
+{
+ struct mailbox *box;
+ struct fts_backend *backend;
+ struct fts_result result;
+ int ret = 0;
+
+ backend = fts_list_backend(info->ns->list);
+ if (backend == NULL) {
+ i_error("fts not enabled for %s", info->vname);
+ ctx->exit_code = EX_CONFIG;
+ return -1;
+ }
+
+ i_zero(&result);
+ i_array_init(&result.definite_uids, 16);
+ i_array_init(&result.maybe_uids, 16);
+ i_array_init(&result.scores, 16);
+
+ box = mailbox_alloc(info->ns->list, info->vname, 0);
+ if (fts_backend_lookup(backend, box, ctx->search_args->args,
+ FTS_LOOKUP_FLAG_AND_ARGS, &result) < 0) {
+ i_error("fts lookup failed");
+ doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP);
+ ret = -1;
+ } else {
+ printf("%s: ", info->vname);
+ if (array_count(&result.definite_uids) == 0)
+ printf("no results\n");
+ else T_BEGIN {
+ string_t *str = t_str_new(128);
+ imap_write_seq_range(str, &result.definite_uids);
+ printf("%s\n", str_c(str));
+ } T_END;
+ if (array_count(&result.maybe_uids) > 0) T_BEGIN {
+ string_t *str = t_str_new(128);
+ imap_write_seq_range(str, &result.maybe_uids);
+ printf(" - maybe: %s\n", str_c(str));
+ } T_END;
+ fts_backend_lookup_done(backend);
+ }
+ mailbox_free(&box);
+ array_free(&result.definite_uids);
+ array_free(&result.maybe_uids);
+ array_free(&result.scores);
+ return ret;
+}
+
+static int
+cmd_fts_lookup_run(struct doveadm_mail_cmd_context *ctx,
+ struct mail_user *user)
+{
+ const enum mailbox_list_iter_flags iter_flags =
+ MAILBOX_LIST_ITER_NO_AUTO_BOXES |
+ MAILBOX_LIST_ITER_RETURN_NO_FLAGS;
+ struct doveadm_mailbox_list_iter *iter;
+ const struct mailbox_info *info;
+ int ret = 0;
+
+ iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args,
+ iter_flags);
+ while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN {
+ if (cmd_search_box(ctx, info) < 0)
+ ret = -1;
+ } T_END;
+ if (doveadm_mailbox_list_iter_deinit(&iter) < 0)
+ ret = -1;
+ return ret;
+}
+
+static void
+cmd_fts_lookup_init(struct doveadm_mail_cmd_context *ctx,
+ const char *const args[])
+{
+ if (args[0] == NULL)
+ doveadm_mail_help_name("fts lookup");
+
+ ctx->search_args = doveadm_mail_build_search_args(args);
+}
+
+static struct doveadm_mail_cmd_context *
+cmd_fts_lookup_alloc(void)
+{
+ struct doveadm_mail_cmd_context *ctx;
+
+ ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context);
+ ctx->v.run = cmd_fts_lookup_run;
+ ctx->v.init = cmd_fts_lookup_init;
+ return ctx;
+}
+
+static int
+cmd_fts_expand_run(struct doveadm_mail_cmd_context *ctx,
+ struct mail_user *user)
+{
+ struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces);
+ struct mailbox *box;
+ struct fts_backend *backend;
+ string_t *str = t_str_new(128);
+
+ backend = fts_list_backend(ns->list);
+ if (backend == NULL) {
+ i_error("fts not enabled for INBOX");
+ ctx->exit_code = EX_CONFIG;
+ return -1;
+ }
+
+ box = mailbox_alloc(ns->list, "INBOX", 0);
+ mail_search_args_init(ctx->search_args, box, FALSE, NULL);
+
+ if (fts_search_args_expand(backend, ctx->search_args) < 0)
+ i_fatal("Couldn't expand search args");
+ mail_search_args_to_cmdline(str, ctx->search_args->args);
+ printf("%s\n", str_c(str));
+ mailbox_free(&box);
+ return 0;
+}
+
+static void
+cmd_fts_expand_init(struct doveadm_mail_cmd_context *ctx,
+ const char *const args[])
+{
+ if (args[0] == NULL)
+ doveadm_mail_help_name("fts expand");
+
+ ctx->search_args = doveadm_mail_build_search_args(args);
+}
+
+static struct doveadm_mail_cmd_context *
+cmd_fts_expand_alloc(void)
+{
+ struct doveadm_mail_cmd_context *ctx;
+
+ ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context);
+ ctx->v.run = cmd_fts_expand_run;
+ ctx->v.init = cmd_fts_expand_init;
+ return ctx;
+}
+
+static int
+cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx,
+ struct mail_user *user)
+{
+ struct fts_tokenize_cmd_context *ctx =
+ (struct fts_tokenize_cmd_context *)_ctx;
+ struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces);
+ struct fts_backend *backend;
+ struct fts_user_language *user_lang;
+ const struct fts_language *lang = NULL;
+ int ret, ret2;
+ bool final = FALSE;
+
+ backend = fts_list_backend(ns->list);
+ if (backend == NULL) {
+ i_error("fts not enabled for INBOX");
+ _ctx->exit_code = EX_CONFIG;
+ return -1;
+ }
+
+ if (ctx->language == NULL) {
+ struct fts_language_list *lang_list =
+ fts_user_get_language_list(user);
+ enum fts_language_result result;
+ const char *error;
+
+ result = fts_language_detect(lang_list,
+ (const unsigned char *)ctx->tokens, strlen(ctx->tokens),
+ &lang, &error);
+ if (lang == NULL)
+ lang = fts_language_list_get_first(lang_list);
+ switch (result) {
+ case FTS_LANGUAGE_RESULT_SHORT:
+ i_warning("Text too short, can't detect its language - assuming %s", lang->name);
+ break;
+ case FTS_LANGUAGE_RESULT_UNKNOWN:
+ i_warning("Can't detect its language - assuming %s", lang->name);
+ break;
+ case FTS_LANGUAGE_RESULT_OK:
+ break;
+ case FTS_LANGUAGE_RESULT_ERROR:
+ i_error("Language detection library initialization failed: %s", error);
+ _ctx->exit_code = EX_CONFIG;
+ return -1;
+ default:
+ i_unreached();
+ }
+ } else {
+ lang = fts_language_find(ctx->language);
+ if (lang == NULL) {
+ i_error("Unknown language: %s", ctx->language);
+ _ctx->exit_code = EX_USAGE;
+ return -1;
+ }
+ }
+ user_lang = fts_user_language_find(user, lang);
+ if (user_lang == NULL) {
+ i_error("Language not enabled for user: %s", ctx->language);
+ _ctx->exit_code = EX_USAGE;
+ return -1;
+ }
+
+ fts_tokenizer_reset(user_lang->index_tokenizer);
+ for (;;) {
+ const char *token, *error;
+
+ if (!final) {
+ ret = fts_tokenizer_next(user_lang->index_tokenizer,
+ (const unsigned char *)ctx->tokens, strlen(ctx->tokens),
+ &token, &error);
+ } else {
+ ret = fts_tokenizer_final(user_lang->index_tokenizer,
+ &token, &error);
+ }
+ if (ret < 0)
+ break;
+ if (ret > 0 && user_lang->filter != NULL) {
+ ret2 = fts_filter_filter(user_lang->filter, &token, &error);
+ if (ret2 > 0)
+ doveadm_print(token);
+ else if (ret2 < 0)
+ i_error("Couldn't create indexable tokens: %s", error);
+ }
+ if (ret == 0) {
+ if (final)
+ break;
+ final = TRUE;
+ }
+ }
+ return 0;
+}
+
+static void
+cmd_fts_tokenize_init(struct doveadm_mail_cmd_context *_ctx,
+ const char *const args[])
+{
+ struct fts_tokenize_cmd_context *ctx =
+ (struct fts_tokenize_cmd_context *)_ctx;
+
+ if (args[0] == NULL)
+ doveadm_mail_help_name("fts tokenize");
+
+ ctx->tokens = p_strdup(_ctx->pool, t_strarray_join(args, " "));
+
+ doveadm_print_init(DOVEADM_PRINT_TYPE_FLOW);
+ doveadm_print_header("token", "token", DOVEADM_PRINT_HEADER_FLAG_HIDE_TITLE);
+}
+
+static bool
+cmd_fts_tokenize_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c)
+{
+ struct fts_tokenize_cmd_context *ctx =
+ (struct fts_tokenize_cmd_context *)_ctx;
+
+ switch (c) {
+ case 'l':
+ ctx->language = p_strdup(_ctx->pool, optarg);
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static struct doveadm_mail_cmd_context *
+cmd_fts_tokenize_alloc(void)
+{
+ struct fts_tokenize_cmd_context *ctx;
+
+ ctx = doveadm_mail_cmd_alloc(struct fts_tokenize_cmd_context);
+ ctx->ctx.v.run = cmd_fts_tokenize_run;
+ ctx->ctx.v.init = cmd_fts_tokenize_init;
+ ctx->ctx.v.parse_arg = cmd_fts_tokenize_parse_arg;
+ ctx->ctx.getopt_args = "l";
+ return &ctx->ctx;
+}
+
+static int
+fts_namespace_find(struct mail_user *user, const char *ns_prefix,
+ struct mail_namespace **ns_r)
+{
+ struct mail_namespace *ns;
+
+ if (ns_prefix == NULL)
+ ns = mail_namespace_find_inbox(user->namespaces);
+ else {
+ ns = mail_namespace_find_prefix(user->namespaces, ns_prefix);
+ if (ns == NULL) {
+ i_error("Namespace prefix not found: %s", ns_prefix);
+ return -1;
+ }
+ }
+
+ if (fts_list_backend(ns->list) == NULL) {
+ i_error("fts not enabled for user's namespace %s",
+ ns_prefix != NULL ? ns_prefix : "INBOX");
+ return -1;
+ }
+ *ns_r = ns;
+ return 0;
+}
+
+static int
+cmd_fts_optimize_run(struct doveadm_mail_cmd_context *ctx,
+ struct mail_user *user)
+{
+ const char *ns_prefix = ctx->args[0];
+ struct mail_namespace *ns;
+ struct fts_backend *backend;
+
+ if (fts_namespace_find(user, ns_prefix, &ns) < 0) {
+ doveadm_mail_failed_error(ctx, MAIL_ERROR_NOTFOUND);
+ return -1;
+ }
+ backend = fts_list_backend(ns->list);
+ if (fts_backend_optimize(backend) < 0) {
+ i_error("fts optimize failed");
+ doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP);
+ return -1;
+ }
+ return 0;
+}
+
+static void
+cmd_fts_optimize_init(struct doveadm_mail_cmd_context *ctx ATTR_UNUSED,
+ const char *const args[])
+{
+ if (str_array_length(args) > 1)
+ doveadm_mail_help_name("fts optimize");
+}
+
+static struct doveadm_mail_cmd_context *
+cmd_fts_optimize_alloc(void)
+{
+ struct doveadm_mail_cmd_context *ctx;
+
+ ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context);
+ ctx->v.run = cmd_fts_optimize_run;
+ ctx->v.init = cmd_fts_optimize_init;
+ return ctx;
+}
+
+static int
+cmd_fts_rescan_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user)
+{
+ const char *ns_prefix = ctx->args[0];
+ struct mail_namespace *ns;
+ struct fts_backend *backend;
+
+ if (fts_namespace_find(user, ns_prefix, &ns) < 0) {
+ doveadm_mail_failed_error(ctx, MAIL_ERROR_NOTFOUND);
+ return -1;
+ }
+ backend = fts_list_backend(ns->list);
+ if (fts_backend_rescan(backend) < 0) {
+ i_error("fts rescan failed");
+ doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP);
+ return -1;
+ }
+ return 0;
+}
+
+static void
+cmd_fts_rescan_init(struct doveadm_mail_cmd_context *ctx ATTR_UNUSED,
+ const char *const args[])
+{
+ if (str_array_length(args) > 1)
+ doveadm_mail_help_name("fts rescan");
+}
+
+static struct doveadm_mail_cmd_context *
+cmd_fts_rescan_alloc(void)
+{
+ struct doveadm_mail_cmd_context *ctx;
+
+ ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context);
+ ctx->v.run = cmd_fts_rescan_run;
+ ctx->v.init = cmd_fts_rescan_init;
+ return ctx;
+}
+
+static struct doveadm_cmd_ver2 fts_commands[] = {
+{
+ .name = "fts lookup",
+ .mail_cmd = cmd_fts_lookup_alloc,
+ .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<search query>",
+DOVEADM_CMD_PARAMS_START
+DOVEADM_CMD_MAIL_COMMON
+DOVEADM_CMD_PARAM('\0', "query", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL)
+DOVEADM_CMD_PARAMS_END
+},
+{
+ .name = "fts expand",
+ .mail_cmd = cmd_fts_expand_alloc,
+ .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<search query>",
+DOVEADM_CMD_PARAMS_START
+DOVEADM_CMD_MAIL_COMMON
+DOVEADM_CMD_PARAM('\0', "query", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL)
+DOVEADM_CMD_PARAMS_END
+},
+{
+ .name = "fts tokenize",
+ .mail_cmd = cmd_fts_tokenize_alloc,
+ .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<text>",
+DOVEADM_CMD_PARAMS_START
+DOVEADM_CMD_MAIL_COMMON
+DOVEADM_CMD_PARAM('l', "language", CMD_PARAM_STR, 0)
+DOVEADM_CMD_PARAM('\0', "text", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL)
+DOVEADM_CMD_PARAMS_END
+},
+{
+ .name = "fts optimize",
+ .mail_cmd = cmd_fts_optimize_alloc,
+ .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]",
+DOVEADM_CMD_PARAMS_START
+DOVEADM_CMD_MAIL_COMMON
+DOVEADM_CMD_PARAM('\0', "namespace", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL)
+DOVEADM_CMD_PARAMS_END
+},
+{
+ .name = "fts rescan",
+ .mail_cmd = cmd_fts_rescan_alloc,
+ .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]",
+DOVEADM_CMD_PARAMS_START
+DOVEADM_CMD_MAIL_COMMON
+DOVEADM_CMD_PARAM('\0', "namespace", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL)
+DOVEADM_CMD_PARAMS_END
+},
+};
+
+void doveadm_fts_plugin_init(struct module *module ATTR_UNUSED)
+{
+ unsigned int i;
+
+ for (i = 0; i < N_ELEMENTS(fts_commands); i++)
+ doveadm_cmd_register_ver2(&fts_commands[i]);
+ doveadm_dump_fts_expunge_log_init();
+}
+
+void doveadm_fts_plugin_deinit(void)
+{
+}
diff --git a/src/plugins/fts/doveadm-fts.h b/src/plugins/fts/doveadm-fts.h
new file mode 100644
index 0000000..d4307fe
--- /dev/null
+++ b/src/plugins/fts/doveadm-fts.h
@@ -0,0 +1,11 @@
+#ifndef DOVEADM_FTS_H
+#define DOVEADM_FTS_H
+
+struct module;
+
+void doveadm_dump_fts_expunge_log_init(void);
+
+void doveadm_fts_plugin_init(struct module *module);
+void doveadm_fts_plugin_deinit(void);
+
+#endif
diff --git a/src/plugins/fts/fts-api-private.h b/src/plugins/fts/fts-api-private.h
new file mode 100644
index 0000000..a070564
--- /dev/null
+++ b/src/plugins/fts/fts-api-private.h
@@ -0,0 +1,139 @@
+#ifndef FTS_API_PRIVATE_H
+#define FTS_API_PRIVATE_H
+
+#include "unichar.h"
+#include "fts-api.h"
+
+struct mail_user;
+struct mailbox_list;
+
+#define MAILBOX_GUID_HEX_LENGTH (GUID_128_SIZE*2)
+
+struct fts_backend_vfuncs {
+ struct fts_backend *(*alloc)(void);
+ int (*init)(struct fts_backend *backend, const char **error_r);
+ void (*deinit)(struct fts_backend *backend);
+
+ int (*get_last_uid)(struct fts_backend *backend, struct mailbox *box,
+ uint32_t *last_uid_r);
+
+ struct fts_backend_update_context *
+ (*update_init)(struct fts_backend *backend);
+ int (*update_deinit)(struct fts_backend_update_context *ctx);
+
+ void (*update_set_mailbox)(struct fts_backend_update_context *ctx,
+ struct mailbox *box);
+ void (*update_expunge)(struct fts_backend_update_context *ctx,
+ uint32_t uid);
+
+ /* Start a build for specified key */
+ bool (*update_set_build_key)(struct fts_backend_update_context *ctx,
+ const struct fts_backend_build_key *key);
+ /* Finish a build for specified key - guaranteed to be called */
+ void (*update_unset_build_key)(struct fts_backend_update_context *ctx);
+ /* Add data for current build key */
+ int (*update_build_more)(struct fts_backend_update_context *ctx,
+ const unsigned char *data, size_t size);
+
+ int (*refresh)(struct fts_backend *backend);
+ int (*rescan)(struct fts_backend *backend);
+ int (*optimize)(struct fts_backend *backend);
+
+ bool (*can_lookup)(struct fts_backend *backend,
+ const struct mail_search_arg *args);
+ int (*lookup)(struct fts_backend *backend, struct mailbox *box,
+ struct mail_search_arg *args, enum fts_lookup_flags flags,
+ struct fts_result *result);
+ int (*lookup_multi)(struct fts_backend *backend,
+ struct mailbox *const boxes[],
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result);
+ void (*lookup_done)(struct fts_backend *backend);
+};
+
+enum fts_backend_flags {
+ /* Backend supports indexing binary MIME parts */
+ FTS_BACKEND_FLAG_BINARY_MIME_PARTS = 0x01,
+ /* Send built text to backend normalized rather than
+ preserving original case */
+ FTS_BACKEND_FLAG_NORMALIZE_INPUT = 0x02,
+ /* Send only fully indexable words rather than randomly sized blocks */
+ FTS_BACKEND_FLAG_BUILD_FULL_WORDS = 0x04,
+ /* Fuzzy search works */
+ FTS_BACKEND_FLAG_FUZZY_SEARCH = 0x08,
+ /* Tokenize all the input. update_build_more() will be called a single
+ directly indexable token at a time. Searching will modify the search
+ args so that lookup() sees only tokens that can be directly
+ searched. */
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT = 0x10
+};
+
+struct fts_header_filters {
+ pool_t pool;
+ ARRAY_TYPE(const_string) includes;
+ ARRAY_TYPE(const_string) excludes;
+ bool loaded:1;
+ bool exclude_is_default:1;
+};
+
+struct fts_backend {
+ const char *name;
+ enum fts_backend_flags flags;
+
+ struct fts_backend_vfuncs v;
+ struct mail_namespace *ns;
+ struct fts_header_filters header_filters;
+
+ bool updating:1;
+};
+
+struct fts_backend_update_context {
+ struct fts_backend *backend;
+ normalizer_func_t *normalizer;
+
+ struct mailbox *cur_box, *backend_box;
+
+ bool build_key_open:1;
+ bool failed:1;
+};
+
+struct fts_index_header {
+ uint32_t last_indexed_uid;
+
+ /* Checksum of settings. If the settings change, the index should
+ be rebuilt. */
+ uint32_t settings_checksum;
+ uint32_t unused;
+};
+
+void fts_backend_register(const struct fts_backend *backend);
+void fts_backend_unregister(const char *name);
+
+bool fts_backend_default_can_lookup(struct fts_backend *backend,
+ const struct mail_search_arg *args);
+
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+ const ARRAY_TYPE(seq_range) *definite_filter,
+ ARRAY_TYPE(seq_range) *maybe_dest,
+ const ARRAY_TYPE(seq_range) *maybe_filter);
+
+/* Returns TRUE if ok, FALSE if no fts header */
+bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r);
+int fts_index_set_header(struct mailbox *box,
+ const struct fts_index_header *hdr);
+int ATTR_NOWARN_UNUSED_RESULT
+fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid);
+int fts_backend_reset_last_uids(struct fts_backend *backend);
+int fts_index_have_compatible_settings(struct mailbox_list *list,
+ uint32_t checksum);
+
+/* Returns TRUE if FTS backend should index the header for optimizing
+ separate lookups */
+bool fts_header_want_indexed(const char *hdr_name);
+/* Returns TRUE if header's values should be considered to have a language. */
+bool fts_header_has_language(const char *hdr_name);
+
+int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r);
+
+#endif
diff --git a/src/plugins/fts/fts-api.c b/src/plugins/fts/fts-api.c
new file mode 100644
index 0000000..a6ea716
--- /dev/null
+++ b/src/plugins/fts/fts-api.c
@@ -0,0 +1,554 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "hex-binary.h"
+#include "mail-index.h"
+#include "mail-namespace.h"
+#include "mail-storage-private.h"
+#include "mailbox-list-iter.h"
+#include "mail-search.h"
+#include "fts-api-private.h"
+
+struct event_category event_category_fts = {
+ .name = "fts",
+};
+
+static ARRAY(const struct fts_backend *) backends;
+
+void fts_backend_register(const struct fts_backend *backend)
+{
+ if (!array_is_created(&backends))
+ i_array_init(&backends, 4);
+ array_push_back(&backends, &backend);
+}
+
+void fts_backend_unregister(const char *name)
+{
+ const struct fts_backend *const *be;
+ unsigned int i, count;
+
+ be = array_get(&backends, &count);
+ for (i = 0; i < count; i++) {
+ if (strcmp(be[i]->name, name) == 0) {
+ array_delete(&backends, i, 1);
+ break;
+ }
+ }
+ if (i == count)
+ i_panic("fts_backend_unregister(%s): unknown backend", name);
+
+ if (count == 1)
+ array_free(&backends);
+}
+
+static const struct fts_backend *
+fts_backend_class_lookup(const char *backend_name)
+{
+ const struct fts_backend *const *be;
+ unsigned int i, count;
+
+ if (array_is_created(&backends)) {
+ be = array_get(&backends, &count);
+ for (i = 0; i < count; i++) {
+ if (strcmp(be[i]->name, backend_name) == 0)
+ return be[i];
+ }
+ }
+ return NULL;
+}
+
+static void
+fts_header_filters_init(struct fts_backend *backend)
+{
+ struct fts_header_filters *filters = &backend->header_filters;
+ pool_t pool = filters->pool = pool_alloconly_create(
+ MEMPOOL_GROWING"fts_header_filters", 256);
+
+ p_array_init(&filters->includes, pool, 8);
+ p_array_init(&filters->excludes, pool, 8);
+}
+
+static void
+fts_header_filters_deinit(struct fts_backend *backend)
+{
+ pool_unref(&backend->header_filters.pool);
+}
+
+int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
+ const char **error_r, struct fts_backend **backend_r)
+{
+ const struct fts_backend *be;
+ struct fts_backend *backend;
+
+ be = fts_backend_class_lookup(backend_name);
+ if (be == NULL) {
+ *error_r = "Unknown backend";
+ return -1;
+ }
+
+ backend = be->v.alloc();
+ backend->ns = ns;
+ if (backend->v.init(backend, error_r) < 0) {
+ i_free(backend);
+ return -1;
+ }
+
+ fts_header_filters_init(backend);
+ *backend_r = backend;
+ return 0;
+}
+
+void fts_backend_deinit(struct fts_backend **_backend)
+{
+ struct fts_backend *backend = *_backend;
+
+ fts_header_filters_deinit(backend);
+ *_backend = NULL;
+ backend->v.deinit(backend);
+}
+
+int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box,
+ uint32_t *last_uid_r)
+{
+ struct fts_index_header hdr;
+
+ if (box->virtual_vfuncs != NULL) {
+ /* virtual mailboxes themselves don't have any indexes,
+ so catch this call here */
+ if (!fts_index_get_header(box, &hdr))
+ *last_uid_r = 0;
+ else
+ *last_uid_r = hdr.last_indexed_uid;
+ return 0;
+ }
+
+ return backend->v.get_last_uid(backend, box, last_uid_r);
+}
+
+bool fts_backend_is_updating(struct fts_backend *backend)
+{
+ return backend->updating;
+}
+
+struct fts_backend_update_context *
+fts_backend_update_init(struct fts_backend *backend)
+{
+ struct fts_backend_update_context *ctx;
+
+ i_assert(!backend->updating);
+
+ backend->updating = TRUE;
+ ctx = backend->v.update_init(backend);
+ if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0)
+ ctx->normalizer = backend->ns->user->default_normalizer;
+ return ctx;
+}
+
+static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx)
+{
+ fts_backend_update_unset_build_key(ctx);
+ if (ctx->backend_box != ctx->cur_box) {
+ ctx->backend->v.update_set_mailbox(ctx, ctx->cur_box);
+ ctx->backend_box = ctx->cur_box;
+ }
+}
+
+int fts_backend_update_deinit(struct fts_backend_update_context **_ctx)
+{
+ struct fts_backend_update_context *ctx = *_ctx;
+ struct fts_backend *backend = ctx->backend;
+ int ret;
+
+ *_ctx = NULL;
+
+ ctx->cur_box = NULL;
+ fts_backend_set_cur_mailbox(ctx);
+
+ ret = backend->v.update_deinit(ctx);
+ backend->updating = FALSE;
+ return ret;
+}
+
+void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx,
+ struct mailbox *box)
+{
+ if (ctx->backend_box != NULL && box != ctx->backend_box) {
+ /* make sure we don't reference the backend box anymore */
+ ctx->backend->v.update_set_mailbox(ctx, NULL);
+ ctx->backend_box = NULL;
+ }
+ ctx->cur_box = box;
+}
+
+void fts_backend_update_expunge(struct fts_backend_update_context *ctx,
+ uint32_t uid)
+{
+ fts_backend_set_cur_mailbox(ctx);
+ ctx->backend->v.update_expunge(ctx, uid);
+}
+
+bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx,
+ const struct fts_backend_build_key *key)
+{
+ fts_backend_set_cur_mailbox(ctx);
+
+ i_assert(ctx->cur_box != NULL);
+
+ if (!ctx->backend->v.update_set_build_key(ctx, key))
+ return FALSE;
+ ctx->build_key_open = TRUE;
+ return TRUE;
+}
+
+void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx)
+{
+ if (ctx->build_key_open) {
+ ctx->backend->v.update_unset_build_key(ctx);
+ ctx->build_key_open = FALSE;
+ }
+}
+
+int fts_backend_update_build_more(struct fts_backend_update_context *ctx,
+ const unsigned char *data, size_t size)
+{
+ i_assert(ctx->build_key_open);
+
+ return ctx->backend->v.update_build_more(ctx, data, size);
+}
+
+int fts_backend_refresh(struct fts_backend *backend)
+{
+ return backend->v.refresh(backend);
+}
+
+int fts_backend_reset_last_uids(struct fts_backend *backend)
+{
+ struct mailbox_list_iterate_context *iter;
+ const struct mailbox_info *info;
+ struct mailbox *box;
+ int ret = 0;
+
+ iter = mailbox_list_iter_init(backend->ns->list, "*",
+ MAILBOX_LIST_ITER_SKIP_ALIASES |
+ MAILBOX_LIST_ITER_NO_AUTO_BOXES);
+ while ((info = mailbox_list_iter_next(iter)) != NULL) {
+ if ((info->flags &
+ (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
+ continue;
+
+ box = mailbox_alloc(info->ns->list, info->vname, 0);
+ if (mailbox_open(box) == 0) {
+ if (fts_index_set_last_uid(box, 0) < 0)
+ ret = -1;
+ }
+ mailbox_free(&box);
+ }
+ if (mailbox_list_iter_deinit(&iter) < 0)
+ ret = -1;
+ return ret;
+}
+
+int fts_backend_rescan(struct fts_backend *backend)
+{
+ struct mailbox *box;
+ bool virtual_storage;
+
+ box = mailbox_alloc(backend->ns->list, "", 0);
+ virtual_storage = box->virtual_vfuncs != NULL;
+ mailbox_free(&box);
+
+ if (virtual_storage) {
+ /* just reset the last-uids for a virtual storage. */
+ return fts_backend_reset_last_uids(backend);
+ }
+
+ return backend->v.rescan == NULL ? 0 :
+ backend->v.rescan(backend);
+}
+
+int fts_backend_optimize(struct fts_backend *backend)
+{
+ return backend->v.optimize == NULL ? 0 :
+ backend->v.optimize(backend);
+}
+
+static void
+fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
+ const ARRAY_TYPE(seq_range) *dest_definite,
+ const ARRAY_TYPE(seq_range) *src_maybe,
+ const ARRAY_TYPE(seq_range) *src_definite)
+{
+ ARRAY_TYPE(seq_range) src_unwanted;
+ const struct seq_range *range;
+ struct seq_range new_range;
+ unsigned int i, count;
+ uint32_t seq;
+
+ /* add/leave to dest_maybe if at least one list has maybe,
+ and no lists have none */
+
+ /* create unwanted sequences list from both sources */
+ t_array_init(&src_unwanted, 128);
+ new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1;
+ array_push_back(&src_unwanted, &new_range);
+ seq_range_array_remove_seq_range(&src_unwanted, src_maybe);
+ seq_range_array_remove_seq_range(&src_unwanted, src_definite);
+
+ /* drop unwanted uids */
+ seq_range_array_remove_seq_range(dest_maybe, &src_unwanted);
+
+ /* add uids that are in dest_definite and src_maybe lists */
+ range = array_get(dest_definite, &count);
+ for (i = 0; i < count; i++) {
+ for (seq = range[i].seq1; seq <= range[i].seq2; seq++) {
+ if (seq_range_exists(src_maybe, seq))
+ seq_range_array_add(dest_maybe, seq);
+ }
+ }
+}
+
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+ const ARRAY_TYPE(seq_range) *definite_filter,
+ ARRAY_TYPE(seq_range) *maybe_dest,
+ const ARRAY_TYPE(seq_range) *maybe_filter)
+{
+ T_BEGIN {
+ fts_merge_maybies(maybe_dest, definite_dest,
+ maybe_filter, definite_filter);
+ } T_END;
+ /* keep only what exists in both lists. the rest is in
+ maybies or not wanted */
+ seq_range_array_intersect(definite_dest, definite_filter);
+}
+
+bool fts_backend_default_can_lookup(struct fts_backend *backend,
+ const struct mail_search_arg *args)
+{
+ for (; args != NULL; args = args->next) {
+ switch (args->type) {
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ case SEARCH_INTHREAD:
+ if (fts_backend_default_can_lookup(backend,
+ args->value.subargs))
+ return TRUE;
+ break;
+ case SEARCH_HEADER:
+ case SEARCH_HEADER_ADDRESS:
+ case SEARCH_HEADER_COMPRESS_LWSP:
+ case SEARCH_BODY:
+ case SEARCH_TEXT:
+ if (!args->no_fts)
+ return TRUE;
+ break;
+ default:
+ break;
+ }
+ }
+ return FALSE;
+}
+
+bool fts_backend_can_lookup(struct fts_backend *backend,
+ const struct mail_search_arg *args)
+{
+ return backend->v.can_lookup(backend, args);
+}
+
+static int fts_score_map_sort(const struct fts_score_map *m1,
+ const struct fts_score_map *m2)
+{
+ if (m1->uid < m2->uid)
+ return -1;
+ if (m1->uid > m2->uid)
+ return 1;
+ return 0;
+}
+
+int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_result *result)
+{
+ array_clear(&result->definite_uids);
+ array_clear(&result->maybe_uids);
+ array_clear(&result->scores);
+
+ if (backend->v.lookup(backend, box, args, flags, result) < 0)
+ return -1;
+
+ if (!result->scores_sorted && array_is_created(&result->scores)) {
+ array_sort(&result->scores, fts_score_map_sort);
+ result->scores_sorted = TRUE;
+ }
+ return 0;
+}
+
+int fts_backend_lookup_multi(struct fts_backend *backend,
+ struct mailbox *const boxes[],
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result)
+{
+ unsigned int i;
+
+ i_assert(boxes[0] != NULL);
+
+ if (backend->v.lookup_multi != NULL) {
+ if (backend->v.lookup_multi(backend, boxes, args,
+ flags, result) < 0)
+ return -1;
+ if (result->box_results == NULL) {
+ result->box_results = p_new(result->pool,
+ struct fts_result, 1);
+ }
+ return 0;
+ }
+
+ for (i = 0; boxes[i] != NULL; i++) ;
+ result->box_results = p_new(result->pool, struct fts_result, i+1);
+
+ for (i = 0; boxes[i] != NULL; i++) {
+ struct fts_result *box_result = &result->box_results[i];
+
+ p_array_init(&box_result->definite_uids, result->pool, 32);
+ p_array_init(&box_result->maybe_uids, result->pool, 32);
+ p_array_init(&box_result->scores, result->pool, 32);
+ if (backend->v.lookup(backend, boxes[i], args,
+ flags, box_result) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+void fts_backend_lookup_done(struct fts_backend *backend)
+{
+ if (backend->v.lookup_done != NULL)
+ backend->v.lookup_done(backend);
+}
+
+static uint32_t fts_index_get_ext_id(struct mailbox *box)
+{
+ return mail_index_ext_register(box->index, "fts",
+ sizeof(struct fts_index_header),
+ 0, 0);
+}
+
+bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r)
+{
+ struct mail_index_view *view;
+ const void *data;
+ size_t data_size;
+ bool ret;
+
+ mail_index_refresh(box->index);
+ view = mail_index_view_open(box->index);
+ mail_index_get_header_ext(view, fts_index_get_ext_id(box),
+ &data, &data_size);
+ if (data_size < sizeof(*hdr_r)) {
+ i_zero(hdr_r);
+ ret = FALSE;
+ } else {
+ memcpy(hdr_r, data, sizeof(*hdr_r));
+ ret = TRUE;
+ }
+ mail_index_view_close(&view);
+ return ret;
+}
+
+int fts_index_set_header(struct mailbox *box,
+ const struct fts_index_header *hdr)
+{
+ struct mail_index_transaction *trans;
+ uint32_t ext_id = fts_index_get_ext_id(box);
+
+ trans = mail_index_transaction_begin(box->view, 0);
+ mail_index_update_header_ext(trans, ext_id, 0, hdr, sizeof(*hdr));
+ return mail_index_transaction_commit(&trans);
+}
+
+int fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid)
+{
+ struct fts_index_header hdr;
+
+ (void)fts_index_get_header(box, &hdr);
+ hdr.last_indexed_uid = last_uid;
+ return fts_index_set_header(box, &hdr);
+}
+
+int fts_index_have_compatible_settings(struct mailbox_list *list,
+ uint32_t checksum)
+{
+ struct mail_namespace *ns = mailbox_list_get_namespace(list);
+ struct mailbox *box;
+ struct fts_index_header hdr;
+ const char *vname;
+ size_t len;
+ int ret;
+
+ if ((ns->flags & NAMESPACE_FLAG_INBOX_USER) != 0)
+ vname = "INBOX";
+ else {
+ len = strlen(ns->prefix);
+ if (len > 0 && ns->prefix[len-1] == mail_namespace_get_sep(ns))
+ len--;
+ vname = t_strndup(ns->prefix, len);
+ }
+
+ box = mailbox_alloc(list, vname, 0);
+ if (mailbox_sync(box, (enum mailbox_sync_flags)0) < 0) {
+ i_error("fts: Failed to sync mailbox %s: %s", vname,
+ mailbox_get_last_internal_error(box, NULL));
+ ret = -1;
+ } else {
+ ret = fts_index_get_header(box, &hdr) &&
+ hdr.settings_checksum == checksum ? 1 : 0;
+ }
+ mailbox_free(&box);
+ return ret;
+}
+
+static const char *indexed_headers[] = {
+ "From", "To", "Cc", "Bcc", "Subject"
+};
+
+bool fts_header_want_indexed(const char *hdr_name)
+{
+ unsigned int i;
+
+ for (i = 0; i < N_ELEMENTS(indexed_headers); i++) {
+ if (strcasecmp(hdr_name, indexed_headers[i]) == 0)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+bool fts_header_has_language(const char *hdr_name)
+{
+ /* FIXME: should email address headers be detected as different
+ languages? That mainly contains people's names.. */
+ /*if (message_header_is_address(hdr_name))
+ return TRUE;*/
+
+ /* Subject definitely contains language-specific data that can be
+ detected. Comment and Keywords headers also could contain, although
+ just about nobody uses those headers.
+
+ For now we assume that other headers contain non-language specific
+ data that we don't want to filter in special ways. For example
+ it is good to be able to search for Message-IDs. */
+ return strcasecmp(hdr_name, "Subject") == 0 ||
+ strcasecmp(hdr_name, "Comments") == 0 ||
+ strcasecmp(hdr_name, "Keywords") == 0;
+}
+
+int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r)
+{
+ struct mailbox_metadata metadata;
+
+ if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, &metadata) < 0)
+ return -1;
+
+ *guid_r = guid_128_to_string(metadata.guid);
+ return 0;
+}
diff --git a/src/plugins/fts/fts-api.h b/src/plugins/fts/fts-api.h
new file mode 100644
index 0000000..11a331f
--- /dev/null
+++ b/src/plugins/fts/fts-api.h
@@ -0,0 +1,173 @@
+#ifndef FTS_API_H
+#define FTS_API_H
+
+struct mail;
+struct mailbox;
+struct mail_namespace;
+struct mail_search_arg;
+
+struct fts_backend;
+
+#include "seq-range-array.h"
+
+enum fts_lookup_flags {
+ /* Specifies if the args should be ANDed or ORed together. */
+ FTS_LOOKUP_FLAG_AND_ARGS = 0x01,
+ /* Require exact matching for non-fuzzy search args by returning all
+ such matches as maybe_uids instead of definite_uids */
+ FTS_LOOKUP_FLAG_NO_AUTO_FUZZY = 0x02
+};
+
+enum fts_backend_build_key_type {
+ /* Header */
+ FTS_BACKEND_BUILD_KEY_HDR,
+ /* MIME part header */
+ FTS_BACKEND_BUILD_KEY_MIME_HDR,
+ /* MIME body part */
+ FTS_BACKEND_BUILD_KEY_BODY_PART,
+ /* Binary MIME body part, if backend supports binary data */
+ FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY
+};
+
+struct fts_backend_build_key {
+ uint32_t uid;
+ enum fts_backend_build_key_type type;
+ struct message_part *part;
+
+ /* for _KEY_HDR: */
+ const char *hdr_name;
+
+ /* for _KEY_BODY_PART and _KEY_BODY_PART_BINARY: */
+
+ /* Contains a valid parsed "type/subtype" string. For messages without
+ (valid) Content-Type: header, it's set to "text/plain". */
+ const char *body_content_type;
+ /* Content-Disposition: header without parsing/validation if it exists,
+ otherwise NULL. */
+ const char *body_content_disposition;
+};
+
+struct fts_score_map {
+ uint32_t uid;
+ float score;
+};
+ARRAY_DEFINE_TYPE(fts_score_map, struct fts_score_map);
+
+/* the structure is meant to be implemented by plugins that want to carry
+ some state over from a call to next ones within an fts_search_context
+ session.
+
+ The pointer to this structure is initially granted to be NULL and it
+ remains such unless the plugin itself activates it.
+
+ Any memory management for the pointer and its contents is expected to
+ be performed by the plugin itself, possibly but not necessarily using
+ the result pool propagated to plugin call by struct fts_result.pool and
+ struct fts_multi_result.pool. */
+
+struct fts_search_state;
+
+struct fts_result {
+ pool_t pool;
+ struct fts_search_state *search_state;
+
+ struct mailbox *box;
+
+ ARRAY_TYPE(seq_range) definite_uids;
+ /* The maybe_uids is useful with backends that can only filter out
+ messages, but can't definitively say if the search matched a
+ message. */
+ ARRAY_TYPE(seq_range) maybe_uids;
+ ARRAY_TYPE(fts_score_map) scores;
+ bool scores_sorted;
+};
+
+struct fts_multi_result {
+ pool_t pool;
+ struct fts_search_state *search_state;
+
+ /* box=NULL-terminated array of mailboxes and matching UIDs,
+ all allocated from the given pool. */
+ struct fts_result *box_results;
+};
+
+extern struct event_category event_category_fts;
+
+int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
+ const char **error_r, struct fts_backend **backend_r);
+void fts_backend_deinit(struct fts_backend **backend);
+
+/* Get the last_uid for the mailbox. */
+int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box,
+ uint32_t *last_uid_r);
+
+/* Returns TRUE if there exists an update context. */
+bool fts_backend_is_updating(struct fts_backend *backend);
+
+/* Start an index update. */
+struct fts_backend_update_context *
+fts_backend_update_init(struct fts_backend *backend);
+/* Finish an index update. Returns 0 if ok, -1 if some updates failed.
+ If updates failed, the index is in unspecified state. */
+int fts_backend_update_deinit(struct fts_backend_update_context **ctx);
+
+/* Switch to updating the specified mailbox. box may also be set to NULL to
+ make sure the previous mailbox won't tried to be accessed anymore. */
+void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx,
+ struct mailbox *box);
+/* Expunge the specified mail. */
+void fts_backend_update_expunge(struct fts_backend_update_context *ctx,
+ uint32_t uid);
+
+/* Switch to building index for specified key. If backend doesn't want to
+ index this key, it can return FALSE and caller will skip to next key. */
+bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx,
+ const struct fts_backend_build_key *key);
+/* Make sure that if _build_more() is called, we'll assert-crash. */
+void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx);
+/* Add more content to the index for the currently specified build key.
+ Non-BODY_PART_BINARY data must contain only full valid UTF-8 characters,
+ but it doesn't need to be NUL-terminated. size contains the data size in
+ bytes, not characters. This function may be called many times and the data
+ block sizes may be small. Backend returns 0 if ok, -1 if build should be
+ aborted. */
+int fts_backend_update_build_more(struct fts_backend_update_context *ctx,
+ const unsigned char *data, size_t size);
+
+/* Refresh index to make sure we see latest changes from lookups.
+ Returns 0 if ok, -1 if error. */
+int fts_backend_refresh(struct fts_backend *backend);
+/* Go through the entire index and make sure all mails are indexed,
+ and delete any extra mails in the index. */
+int fts_backend_rescan(struct fts_backend *backend);
+/* Optimize the index. This can be a somewhat heavy operation. */
+int fts_backend_optimize(struct fts_backend *backend);
+
+/* Returns TRUE if fts_backend_lookup() should even be tried for the
+ given args. */
+bool fts_backend_can_lookup(struct fts_backend *backend,
+ const struct mail_search_arg *args);
+/* Do a FTS lookup for the given search args. Backends can support different
+ kinds of search arguments, so match_always=TRUE must be set to all search
+ args that were actually used to produce the search results. The other args
+ are handled by the regular search code. The backends MUST ignore all args
+ that have subargs (SEARCH_OR, SEARCH_SUB), since they are looked up
+ separately.
+
+ The arrays in result must be initialized by caller. */
+int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_result *result);
+
+/* Search from multiple mailboxes. result->pool must be initialized. */
+int fts_backend_lookup_multi(struct fts_backend *backend,
+ struct mailbox *const boxes[],
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result);
+/* Called after the lookups are done. The next lookup will be preceded by a
+ refresh. */
+void fts_backend_lookup_done(struct fts_backend *backend);
+
+#endif
diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c
new file mode 100644
index 0000000..73d4f4b
--- /dev/null
+++ b/src/plugins/fts/fts-build-mail.c
@@ -0,0 +1,719 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "istream.h"
+#include "buffer.h"
+#include "str.h"
+#include "rfc822-parser.h"
+#include "message-address.h"
+#include "message-parser.h"
+#include "message-decoder.h"
+#include "mail-storage.h"
+#include "index-mail.h"
+#include "fts-parser.h"
+#include "fts-user.h"
+#include "fts-language.h"
+#include "fts-tokenizer.h"
+#include "fts-filter.h"
+#include "fts-api-private.h"
+#include "fts-build-mail.h"
+
+/* there are other characters as well, but this doesn't have to be exact */
+#define IS_WORD_WHITESPACE(c) \
+ ((c) == ' ' || (c) == '\t' || (c) == '\n')
+/* if we see a word larger than this, just go ahead and split it from
+ wherever */
+#define MAX_WORD_SIZE 1024
+
+struct fts_mail_build_context {
+ struct mail *mail;
+ struct fts_backend_update_context *update_ctx;
+
+ char *content_type, *content_disposition;
+ struct fts_parser *body_parser;
+
+ buffer_t *word_buf, *pending_input;
+ struct fts_user_language *cur_user_lang;
+};
+
+static int fts_build_data(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last);
+
+static void fts_build_parse_content_type(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ struct rfc822_parser_context parser;
+ string_t *content_type;
+
+ if (ctx->content_type != NULL)
+ return;
+
+ rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+ rfc822_skip_lwsp(&parser);
+
+ T_BEGIN {
+ content_type = t_str_new(64);
+ (void)rfc822_parse_content_type(&parser, content_type);
+ ctx->content_type = str_lcase(i_strdup(str_c(content_type)));
+ } T_END;
+ rfc822_parser_deinit(&parser);
+}
+
+static void
+fts_build_parse_content_disposition(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ /* just pass it as-is to backend. */
+ i_free(ctx->content_disposition);
+ ctx->content_disposition =
+ i_strndup(hdr->full_value, hdr->full_value_len);
+}
+
+static void fts_parse_mail_header(struct fts_mail_build_context *ctx,
+ const struct message_block *raw_block)
+{
+ const struct message_header_line *hdr = raw_block->hdr;
+
+ if (strcasecmp(hdr->name, "Content-Type") == 0)
+ fts_build_parse_content_type(ctx, hdr);
+ else if (strcasecmp(hdr->name, "Content-Disposition") == 0)
+ fts_build_parse_content_disposition(ctx, hdr);
+}
+
+static int
+fts_build_unstructured_header(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ const unsigned char *data = hdr->full_value;
+ unsigned char *buf = NULL;
+ unsigned int i;
+ int ret;
+
+ /* @UNSAFE: if there are any NULs, replace them with spaces */
+ for (i = 0; i < hdr->full_value_len; i++) {
+ if (hdr->full_value[i] == '\0') {
+ if (buf == NULL) {
+ buf = i_memdup(hdr->full_value,
+ hdr->full_value_len);
+ data = buf;
+ }
+ buf[i] = ' ';
+ }
+ }
+ ret = fts_build_data(ctx, data, hdr->full_value_len, TRUE);
+ i_free(buf);
+ return ret;
+}
+
+static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx,
+ struct fts_user_language *user_lang)
+{
+ i_assert(user_lang != NULL);
+
+ ctx->cur_user_lang = user_lang;
+ /* reset tokenizer between fields - just to be sure no state
+ leaks between fields (especially if previous indexing had
+ failed) */
+ fts_tokenizer_reset(user_lang->index_tokenizer);
+}
+
+static void
+fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ /* Headers that don't contain any human language will only be
+ translated to lowercase - no stemming or other filtering. There's
+ unfortunately no pefect way of detecting which headers contain
+ human languages, so we check with fts_header_has_language if the
+ header is something that's supposed to containing human text. */
+ if (fts_header_has_language(hdr->name))
+ ctx->cur_user_lang = NULL;
+ else {
+ fts_mail_build_ctx_set_lang(ctx,
+ fts_user_get_data_lang(ctx->update_ctx->backend->ns->user));
+ }
+}
+
+static int fts_build_mail_header(struct fts_mail_build_context *ctx,
+ const struct message_block *block)
+{
+ const struct message_header_line *hdr = block->hdr;
+ struct fts_backend_build_key key;
+ int ret;
+
+ if (hdr->eoh)
+ return 0;
+
+ /* hdr->full_value is always set because we get the block from
+ message_decoder */
+ i_zero(&key);
+ key.uid = ctx->mail->uid;
+ key.type = block->part->physical_pos == 0 ?
+ FTS_BACKEND_BUILD_KEY_HDR : FTS_BACKEND_BUILD_KEY_MIME_HDR;
+ key.part = block->part;
+ key.hdr_name = hdr->name;
+
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0)
+ fts_build_tokenized_hdr_update_lang(ctx, hdr);
+
+ if (!fts_backend_update_set_build_key(ctx->update_ctx, &key))
+ return 0;
+
+ if (!message_header_is_address(hdr->name)) {
+ /* regular unstructured header */
+ ret = fts_build_unstructured_header(ctx, hdr);
+ } else T_BEGIN {
+ /* message address. normalize it to give better
+ search results. */
+ struct message_address *addr;
+ string_t *str;
+
+ addr = message_address_parse(pool_datastack_create(),
+ hdr->full_value,
+ hdr->full_value_len,
+ UINT_MAX, 0);
+ str = t_str_new(hdr->full_value_len);
+ message_address_write(str, addr);
+
+ ret = fts_build_data(ctx, str_data(str), str_len(str), TRUE);
+ } T_END;
+
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
+ /* index the header name itself using data-language. */
+ struct fts_user_language *prev_lang = ctx->cur_user_lang;
+
+ fts_mail_build_ctx_set_lang(ctx,
+ fts_user_get_data_lang(ctx->update_ctx->backend->ns->user));
+ key.hdr_name = "";
+ if (fts_backend_update_set_build_key(ctx->update_ctx, &key)) {
+ if (fts_build_data(ctx, (const void *)hdr->name,
+ strlen(hdr->name), TRUE) < 0)
+ ret = -1;
+ }
+ fts_mail_build_ctx_set_lang(ctx, prev_lang);
+ }
+ return ret;
+}
+
+static bool
+fts_build_body_begin(struct fts_mail_build_context *ctx,
+ struct message_part *part, bool *binary_body_r)
+{
+ struct mail_storage *storage;
+ struct fts_parser_context parser_context;
+ struct fts_backend_build_key key;
+
+ i_assert(ctx->body_parser == NULL);
+
+ *binary_body_r = FALSE;
+ i_zero(&key);
+ key.uid = ctx->mail->uid;
+ key.part = part;
+
+ i_zero(&parser_context);
+ parser_context.content_type = ctx->content_type != NULL ?
+ ctx->content_type : "text/plain";
+ if (str_begins(parser_context.content_type, "multipart/")) {
+ /* multiparts are never indexed, only their contents */
+ return FALSE;
+ }
+ storage = mailbox_get_storage(ctx->mail->box);
+ parser_context.user = mail_storage_get_user(storage);
+ parser_context.content_disposition = ctx->content_disposition;
+
+ if (fts_parser_init(&parser_context, &ctx->body_parser)) {
+ /* extract text using the the returned parser */
+ *binary_body_r = TRUE;
+ key.type = FTS_BACKEND_BUILD_KEY_BODY_PART;
+ } else if (str_begins(parser_context.content_type, "text/") ||
+ str_begins(parser_context.content_type, "message/")) {
+ /* text body parts */
+ key.type = FTS_BACKEND_BUILD_KEY_BODY_PART;
+ ctx->body_parser = fts_parser_text_init();
+ } else {
+ /* possibly binary */
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_BINARY_MIME_PARTS) == 0)
+ return FALSE;
+ *binary_body_r = TRUE;
+ key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY;
+ }
+ key.body_content_type = parser_context.content_type;
+ key.body_content_disposition = ctx->content_disposition;
+ ctx->cur_user_lang = NULL;
+ if (!fts_backend_update_set_build_key(ctx->update_ctx, &key)) {
+ if (ctx->body_parser != NULL)
+ (void)fts_parser_deinit(&ctx->body_parser, NULL);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static int
+fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size)
+{
+ struct fts_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer;
+ struct fts_filter *filter = ctx->cur_user_lang->filter;
+ const char *token, *error;
+ int ret = 1, ret2;
+
+ while (ret > 0) T_BEGIN {
+ ret = ret2 = fts_tokenizer_next(tokenizer, data, size, &token, &error);
+ if (ret2 > 0 && filter != NULL)
+ ret2 = fts_filter_filter(filter, &token, &error);
+ if (ret2 < 0) {
+ mail_set_critical(ctx->mail,
+ "fts: Couldn't create indexable tokens: %s",
+ error);
+ }
+ if (ret2 > 0) {
+ if (fts_backend_update_build_more(ctx->update_ctx,
+ (const void *)token,
+ strlen(token)) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ ret = -1;
+ }
+ }
+ } T_END;
+ return ret;
+}
+
+static int
+fts_detect_language(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last,
+ const struct fts_language **lang_r)
+{
+ struct mail_user *user = ctx->update_ctx->backend->ns->user;
+ struct fts_language_list *lang_list = fts_user_get_language_list(user);
+ const struct fts_language *lang;
+ const char *error;
+
+ switch (fts_language_detect(lang_list, data, size, &lang, &error)) {
+ case FTS_LANGUAGE_RESULT_SHORT:
+ /* save the input so far and try again later */
+ buffer_append(ctx->pending_input, data, size);
+ if (last) {
+ /* we've run out of data. use the default language. */
+ *lang_r = fts_language_list_get_first(lang_list);
+ return 1;
+ }
+ return 0;
+ case FTS_LANGUAGE_RESULT_UNKNOWN:
+ /* use the default language */
+ *lang_r = fts_language_list_get_first(lang_list);
+ return 1;
+ case FTS_LANGUAGE_RESULT_OK:
+ *lang_r = lang;
+ return 1;
+ case FTS_LANGUAGE_RESULT_ERROR:
+ /* internal language detection library failure
+ (e.g. invalid config). don't index anything. */
+ mail_set_critical(ctx->mail,
+ "Language detection library initialization failed: %s",
+ error);
+ return -1;
+ default:
+ i_unreached();
+ }
+}
+
+static int
+fts_build_tokenized(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last)
+{
+ struct mail_user *user = ctx->update_ctx->backend->ns->user;
+ const struct fts_language *lang;
+ int ret;
+
+ if (ctx->cur_user_lang != NULL) {
+ /* we already have a language */
+ } else if ((ret = fts_detect_language(ctx, data, size, last, &lang)) < 0) {
+ return -1;
+ } else if (ret == 0) {
+ /* wait for more data */
+ return 0;
+ } else {
+ fts_mail_build_ctx_set_lang(ctx, fts_user_language_find(user, lang));
+
+ if (ctx->pending_input->used > 0) {
+ if (fts_build_add_tokens_with_filter(ctx,
+ ctx->pending_input->data,
+ ctx->pending_input->used) < 0)
+ return -1;
+ buffer_set_used_size(ctx->pending_input, 0);
+ }
+ }
+ if (fts_build_add_tokens_with_filter(ctx, data, size) < 0)
+ return -1;
+ if (last) {
+ if (fts_build_add_tokens_with_filter(ctx, NULL, 0) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+fts_build_full_words(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last)
+{
+ size_t i;
+
+ /* we'll need to send only full words to the backend */
+
+ if (ctx->word_buf != NULL && ctx->word_buf->used > 0) {
+ /* continuing previous word */
+ for (i = 0; i < size; i++) {
+ if (IS_WORD_WHITESPACE(data[i]))
+ break;
+ }
+ buffer_append(ctx->word_buf, data, i);
+ data += i;
+ size -= i;
+ if (size == 0 && ctx->word_buf->used < MAX_WORD_SIZE && !last) {
+ /* word is still not finished */
+ return 0;
+ }
+ /* we have a full word, index it */
+ if (fts_backend_update_build_more(ctx->update_ctx,
+ ctx->word_buf->data,
+ ctx->word_buf->used) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+ buffer_set_used_size(ctx->word_buf, 0);
+ }
+
+ /* find the boundary for last word */
+ if (last)
+ i = size;
+ else {
+ for (i = size; i > 0; i--) {
+ if (IS_WORD_WHITESPACE(data[i-1]))
+ break;
+ }
+ }
+
+ if (fts_backend_update_build_more(ctx->update_ctx, data, i) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+
+ if (i < size) {
+ if (ctx->word_buf == NULL) {
+ ctx->word_buf =
+ buffer_create_dynamic(default_pool, 128);
+ }
+ buffer_append(ctx->word_buf, data + i, size - i);
+ }
+ return 0;
+}
+
+static int fts_build_data(struct fts_mail_build_context *ctx,
+ const unsigned char *data, size_t size, bool last)
+{
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
+ return fts_build_tokenized(ctx, data, size, last);
+ } else if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_BUILD_FULL_WORDS) != 0) {
+ return fts_build_full_words(ctx, data, size, last);
+ } else {
+ if (fts_backend_update_build_more(ctx->update_ctx, data, size) < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+ return 0;
+ }
+}
+
+static int fts_build_body_block(struct fts_mail_build_context *ctx,
+ const struct message_block *block, bool last)
+{
+ i_assert(block->hdr == NULL);
+
+ return fts_build_data(ctx, block->data, block->size, last);
+}
+
+static int fts_body_parser_finish(struct fts_mail_build_context *ctx,
+ const char **retriable_err_msg_r,
+ bool *may_need_retry_r)
+{
+ struct message_block block;
+ const char *retriable_error;
+ int ret = 0;
+ int deinit_ret;
+ *may_need_retry_r = FALSE;
+
+ do {
+ i_zero(&block);
+ fts_parser_more(ctx->body_parser, &block);
+ if (fts_build_body_block(ctx, &block, FALSE) < 0) {
+ ret = -1;
+ break;
+ }
+ } while (block.size > 0);
+
+ deinit_ret = fts_parser_deinit(&ctx->body_parser, &retriable_error);
+ if (ret < 0) {
+ /* indexing already failed - we don't want to retry
+ in any case */
+ return -1;
+ }
+
+ if (deinit_ret == 0) {
+ /* retry the parsing */
+ *may_need_retry_r = TRUE;
+ *retriable_err_msg_r = retriable_error;
+ return -1;
+ }
+ if (deinit_ret < 0) {
+ mail_storage_set_internal_error(ctx->mail->box->storage);
+ return -1;
+ }
+ return 0;
+}
+
+static void
+load_header_filter(const char *key, struct fts_backend *backend,
+ ARRAY_TYPE(const_string) list, bool *matches_all_r)
+{
+ const char *str = mail_user_plugin_getenv(backend->ns->user, key);
+
+ *matches_all_r = FALSE;
+ if (str == NULL || *str == '\0')
+ return;
+
+ char **entries = p_strsplit_spaces(backend->header_filters.pool, str, " ");
+ for (char **entry = entries; *entry != NULL; ++entry) {
+ const char *value = str_lcase(*entry);
+ array_push_back(&list, &value);
+ if (*value == '*') {
+ *matches_all_r = TRUE;
+ break;
+ }
+ }
+ array_sort(&list, i_strcmp_p);
+}
+
+static struct fts_header_filters *
+load_header_filters(struct fts_backend *backend)
+{
+ struct fts_header_filters *filters = &backend->header_filters;
+ if (!filters->loaded) {
+ bool match_all;
+
+ /* match_all return ignored in includes */
+ load_header_filter("fts_header_includes", backend,
+ filters->includes, &match_all);
+
+ load_header_filter("fts_header_excludes", backend,
+ filters->excludes, &match_all);
+ filters->loaded = TRUE;
+ filters->exclude_is_default = match_all;
+ }
+ return filters;
+}
+
+/* This performs comparison between two strings, where the second one can end
+ * with the wildcard '*'. When the match reaches a '*' on the pitem side, zero
+ * (match) is returned regardles of the remaining characters.
+ *
+ * The function obeys the same lexicographic order as i_strcmp_p() and
+ * strcmp(), which is the reason for the casts to unsigned before comparing.
+ */
+static int ATTR_PURE
+header_prefix_cmp(const char *const *pkey, const char *const *pitem)
+{
+ const char *key = *pkey;
+ const char *item = *pitem;
+
+ while (*key == *item && *key != '\0') key++, item++;
+ return item[0] == '*' && item[1] == '\0' ? 0 :
+ (unsigned char)*key - (unsigned char)*item;
+}
+
+static bool
+is_header_indexable(const char *header_name, struct fts_backend *backend)
+{
+ bool indexable;
+ T_BEGIN {
+ struct fts_header_filters *filters = load_header_filters(backend);
+ const char *hdr = t_str_lcase(header_name);
+
+ if (array_bsearch(&filters->includes, &hdr, header_prefix_cmp) != NULL)
+ indexable = TRUE;
+ else if (filters->exclude_is_default ||
+ array_bsearch(&filters->excludes, &hdr, header_prefix_cmp) != NULL)
+ indexable = FALSE;
+ else
+ indexable = TRUE;
+ } T_END;
+ return indexable;
+}
+
+static int
+fts_build_mail_real(struct fts_backend_update_context *update_ctx,
+ struct mail *mail,
+ const char **retriable_err_msg_r,
+ bool *may_need_retry_r)
+{
+ const struct message_parser_settings parser_set = {
+ .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE,
+ };
+ struct fts_mail_build_context ctx;
+ struct istream *input;
+ struct message_parser_ctx *parser;
+ struct message_decoder_context *decoder;
+ struct message_block raw_block, block;
+ struct message_part *prev_part, *parts;
+ bool skip_body = FALSE, body_part = FALSE, body_added = FALSE;
+ bool binary_body;
+ const char *error;
+ int ret;
+
+ *may_need_retry_r = FALSE;
+ if (mail_get_stream_because(mail, NULL, NULL, "fts indexing", &input) < 0) {
+ if (mail->expunged)
+ return 0;
+ mail_set_critical(mail, "Failed to read stream: %s",
+ mailbox_get_last_internal_error(mail->box, NULL));
+ return -1;
+ }
+
+ i_zero(&ctx);
+ ctx.update_ctx = update_ctx;
+ ctx.mail = mail;
+ if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0)
+ ctx.pending_input = buffer_create_dynamic(default_pool, 128);
+
+ prev_part = NULL;
+ parser = message_parser_init(pool_datastack_create(), input, &parser_set);
+
+ decoder = message_decoder_init(update_ctx->normalizer, 0);
+ for (;;) {
+ ret = message_parser_parse_next_block(parser, &raw_block);
+ i_assert(ret != 0);
+ if (ret < 0) {
+ if (input->stream_errno == 0)
+ ret = 0;
+ else {
+ mail_set_critical(mail, "read(%s) failed: %s",
+ i_stream_get_name(input),
+ i_stream_get_error(input));
+ }
+ break;
+ }
+
+ if (raw_block.part != prev_part) {
+ /* body part changed. we're now parsing the end of
+ boundary, possibly followed by message epilogue */
+ if (ctx.body_parser != NULL) {
+ if (fts_body_parser_finish(&ctx, retriable_err_msg_r,
+ may_need_retry_r) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+ message_decoder_set_return_binary(decoder, FALSE);
+ fts_backend_update_unset_build_key(update_ctx);
+ prev_part = raw_block.part;
+ i_free_and_null(ctx.content_type);
+ i_free_and_null(ctx.content_disposition);
+
+ if (raw_block.size != 0) {
+ /* multipart. skip until beginning of next
+ part's headers */
+ skip_body = TRUE;
+ }
+ }
+
+ if (raw_block.hdr != NULL) {
+ /* always handle headers */
+ } else if (raw_block.size == 0) {
+ /* end of headers */
+ skip_body = !fts_build_body_begin(&ctx, raw_block.part,
+ &binary_body);
+ if (binary_body)
+ message_decoder_set_return_binary(decoder, TRUE);
+ body_part = TRUE;
+ } else {
+ if (skip_body)
+ continue;
+ }
+
+ if (!message_decoder_decode_next_block(decoder, &raw_block,
+ &block))
+ continue;
+
+ if (block.hdr != NULL) {
+ fts_parse_mail_header(&ctx, &raw_block);
+ if (is_header_indexable(block.hdr->name, update_ctx->backend) &&
+ fts_build_mail_header(&ctx, &block) < 0) {
+ ret = -1;
+ break;
+ }
+ } else if (block.size == 0) {
+ /* end of headers */
+ } else {
+ i_assert(body_part);
+ if (ctx.body_parser != NULL)
+ fts_parser_more(ctx.body_parser, &block);
+ if (fts_build_body_block(&ctx, &block, FALSE) < 0) {
+ ret = -1;
+ break;
+ }
+ body_added = TRUE;
+ }
+ }
+ if (ctx.body_parser != NULL) {
+ if (ret == 0)
+ ret = fts_body_parser_finish(&ctx, retriable_err_msg_r,
+ may_need_retry_r);
+ else
+ (void)fts_parser_deinit(&ctx.body_parser, NULL);
+ }
+ if (ret == 0 && body_part && !skip_body && !body_added) {
+ /* make sure body is added even when it doesn't exist */
+ block.data = NULL; block.size = 0;
+ ret = fts_build_body_block(&ctx, &block, TRUE);
+ }
+ if (message_parser_deinit_from_parts(&parser, &parts, &error) < 0)
+ index_mail_set_message_parts_corrupted(mail, error);
+ message_decoder_deinit(&decoder);
+ i_free(ctx.content_type);
+ i_free(ctx.content_disposition);
+ buffer_free(&ctx.word_buf);
+ buffer_free(&ctx.pending_input);
+ return ret < 0 ? -1 : 1;
+}
+
+int fts_build_mail(struct fts_backend_update_context *update_ctx,
+ struct mail *mail)
+{
+ int ret;
+ /* Number of attempts to be taken if retry is needed */
+ unsigned int attempts = 2;
+ const char *retriable_err_msg;
+ bool may_need_retry;
+
+ T_BEGIN {
+ while ((ret = fts_build_mail_real(update_ctx, mail,
+ &retriable_err_msg,
+ &may_need_retry)) < 0 &&
+ may_need_retry) {
+ if (--attempts == 0) {
+ /* Log this as info instead of as error,
+ because e.g. Tika doesn't differentiate
+ between temporary errors and invalid
+ document input. */
+ i_info("%s - ignoring", retriable_err_msg);
+ ret = 0;
+ break;
+ }
+ }
+ } T_END;
+ return ret;
+}
diff --git a/src/plugins/fts/fts-build-mail.h b/src/plugins/fts/fts-build-mail.h
new file mode 100644
index 0000000..aed4413
--- /dev/null
+++ b/src/plugins/fts/fts-build-mail.h
@@ -0,0 +1,9 @@
+#ifndef FTS_BUILD_MAIL_H
+#define FTS_BUILD_MAIL_H
+
+/* Build indexes for the given mail. Returns 0 on success, -1 on error.
+ The error is set to mail's storage. */
+int fts_build_mail(struct fts_backend_update_context *update_ctx,
+ struct mail *mail);
+
+#endif
diff --git a/src/plugins/fts/fts-expunge-log.c b/src/plugins/fts/fts-expunge-log.c
new file mode 100644
index 0000000..d39ceea
--- /dev/null
+++ b/src/plugins/fts/fts-expunge-log.c
@@ -0,0 +1,617 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "crc32.h"
+#include "hash.h"
+#include "istream.h"
+#include "write-full.h"
+#include "seq-range-array.h"
+#include "mail-storage.h"
+#include "fts-expunge-log.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+struct fts_expunge_log_record {
+ /* CRC32 of this entire record (except this checksum) */
+ uint32_t checksum;
+ /* Size of this entire record */
+ uint32_t record_size;
+
+ /* Mailbox GUID */
+ guid_128_t guid;
+ /* { uid1, uid2 } pairs */
+ /* uint32_t expunge_uid_ranges[]; */
+
+ /* Total number of messages expunged so far in this log */
+ /* uint32_t expunge_count; */
+};
+
+struct fts_expunge_log {
+ char *path;
+
+ int fd;
+ struct stat st;
+};
+
+struct fts_expunge_log_mailbox {
+ guid_128_t guid;
+ ARRAY_TYPE(seq_range) uids;
+ unsigned uids_count;
+};
+
+struct fts_expunge_log_append_ctx {
+ struct fts_expunge_log *log;
+ pool_t pool;
+
+ HASH_TABLE(uint8_t *, struct fts_expunge_log_mailbox *) mailboxes;
+ struct fts_expunge_log_mailbox *prev_mailbox;
+
+ bool failed;
+};
+
+struct fts_expunge_log_read_ctx {
+ struct fts_expunge_log *log;
+
+ struct istream *input;
+ buffer_t buffer;
+ struct fts_expunge_log_read_record read_rec;
+
+ bool failed;
+ bool corrupted;
+ bool unlink;
+};
+
+struct fts_expunge_log *fts_expunge_log_init(const char *path)
+{
+ struct fts_expunge_log *log;
+
+ log = i_new(struct fts_expunge_log, 1);
+ log->path = i_strdup(path);
+ log->fd = -1;
+ return log;
+}
+
+void fts_expunge_log_deinit(struct fts_expunge_log **_log)
+{
+ struct fts_expunge_log *log = *_log;
+
+ *_log = NULL;
+ i_close_fd(&log->fd);
+ i_free(log->path);
+ i_free(log);
+}
+
+static int fts_expunge_log_open(struct fts_expunge_log *log, bool create)
+{
+ int fd;
+
+ i_assert(log->fd == -1);
+
+ /* FIXME: use proper permissions */
+ fd = open(log->path, O_RDWR | O_APPEND | (create ? O_CREAT : 0), 0600);
+ if (fd == -1) {
+ if (errno == ENOENT && !create)
+ return 0;
+
+ i_error("open(%s) failed: %m", log->path);
+ return -1;
+ }
+ if (fstat(fd, &log->st) < 0) {
+ i_error("fstat(%s) failed: %m", log->path);
+ i_close_fd(&fd);
+ return -1;
+ }
+ log->fd = fd;
+ return 1;
+}
+
+static int
+fts_expunge_log_reopen_if_needed(struct fts_expunge_log *log, bool create)
+{
+ struct stat st;
+
+ if (log->fd == -1)
+ return fts_expunge_log_open(log, create);
+
+ if (stat(log->path, &st) == 0) {
+ if (st.st_ino == log->st.st_ino &&
+ CMP_DEV_T(st.st_dev, log->st.st_dev)) {
+ /* same file */
+ return 0;
+ }
+ /* file changed */
+ } else if (errno == ENOENT) {
+ /* recreate the file */
+ } else {
+ i_error("stat(%s) failed: %m", log->path);
+ return -1;
+ }
+ if (close(log->fd) < 0)
+ i_error("close(%s) failed: %m", log->path);
+ log->fd = -1;
+ return fts_expunge_log_open(log, create);
+}
+
+static int
+fts_expunge_log_read_expunge_count(struct fts_expunge_log *log,
+ uint32_t *expunge_count_r)
+{
+ ssize_t ret;
+
+ i_assert(log->fd != -1);
+
+ if (fstat(log->fd, &log->st) < 0) {
+ i_error("fstat(%s) failed: %m", log->path);
+ return -1;
+ }
+ if ((uoff_t)log->st.st_size < sizeof(*expunge_count_r)) {
+ *expunge_count_r = 0;
+ return 0;
+ }
+ /* we'll assume that write()s atomically grow the file size, as
+ O_APPEND almost guarantees. even if not, having a race condition
+ isn't the end of the world. the expunge count is simply read wrong
+ and fts optimize is performed earlier or later than intended. */
+ ret = pread(log->fd, expunge_count_r, sizeof(*expunge_count_r),
+ log->st.st_size - 4);
+ if (ret < 0) {
+ i_error("pread(%s) failed: %m", log->path);
+ return -1;
+ }
+ if (ret != sizeof(*expunge_count_r)) {
+ i_error("pread(%s) read only %d of %d bytes", log->path,
+ (int)ret, (int)sizeof(*expunge_count_r));
+ return -1;
+ }
+ return 0;
+}
+
+struct fts_expunge_log_append_ctx *
+fts_expunge_log_append_begin(struct fts_expunge_log *log)
+{
+ struct fts_expunge_log_append_ctx *ctx;
+ pool_t pool;
+
+ pool = pool_alloconly_create("fts expunge log append", 1024);
+ ctx = p_new(pool, struct fts_expunge_log_append_ctx, 1);
+ ctx->log = log;
+ ctx->pool = pool;
+ hash_table_create(&ctx->mailboxes, pool, 0, guid_128_hash, guid_128_cmp);
+
+ if (log != NULL && fts_expunge_log_reopen_if_needed(log, TRUE) < 0)
+ ctx->failed = TRUE;
+ return ctx;
+}
+
+static struct fts_expunge_log_mailbox *
+fts_expunge_log_mailbox_alloc(struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid)
+{
+ uint8_t *guid_p;
+ struct fts_expunge_log_mailbox *mailbox;
+
+ mailbox = p_new(ctx->pool, struct fts_expunge_log_mailbox, 1);
+ guid_128_copy(mailbox->guid, mailbox_guid);
+ p_array_init(&mailbox->uids, ctx->pool, 16);
+
+ guid_p = mailbox->guid;
+ hash_table_insert(ctx->mailboxes, guid_p, mailbox);
+ return mailbox;
+}
+
+static struct fts_expunge_log_mailbox *
+fts_expunge_log_append_mailbox(struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid)
+{
+ const uint8_t *guid_p = mailbox_guid;
+ struct fts_expunge_log_mailbox *mailbox;
+
+ if (ctx->prev_mailbox != NULL &&
+ guid_128_equals(mailbox_guid, ctx->prev_mailbox->guid))
+ mailbox = ctx->prev_mailbox;
+ else {
+ mailbox = hash_table_lookup(ctx->mailboxes, guid_p);
+ if (mailbox == NULL)
+ mailbox = fts_expunge_log_mailbox_alloc(ctx, mailbox_guid);
+ ctx->prev_mailbox = mailbox;
+ }
+ return mailbox;
+}
+void fts_expunge_log_append_next(struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid,
+ uint32_t uid)
+{
+ struct fts_expunge_log_mailbox *mailbox;
+
+ mailbox = fts_expunge_log_append_mailbox(ctx, mailbox_guid);
+ if (!seq_range_array_add(&mailbox->uids, uid))
+ mailbox->uids_count++;
+}
+void fts_expunge_log_append_range(struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid,
+ const struct seq_range *uids)
+{
+ struct fts_expunge_log_mailbox *mailbox;
+
+ mailbox = fts_expunge_log_append_mailbox(ctx, mailbox_guid);
+ mailbox->uids_count += seq_range_array_add_range_count(&mailbox->uids,
+ uids->seq1, uids->seq2);
+ /* To be honest, an unbacked log doesn't need to maintain the uids_count,
+ but we don't know here if we're supporting an unbacked log or not, so we
+ have to maintain the value, just in case.
+ At the moment, the only caller of this function is for unbacked logs. */
+}
+void fts_expunge_log_append_record(struct fts_expunge_log_append_ctx *ctx,
+ const struct fts_expunge_log_read_record *record)
+{
+ const struct seq_range *range;
+ /* FIXME: Optimise with a merge */
+ array_foreach(&record->uids, range)
+ fts_expunge_log_append_range(ctx, record->mailbox_guid, range);
+}
+static void fts_expunge_log_append_mailbox_record(struct fts_expunge_log_append_ctx *ctx,
+ struct fts_expunge_log_mailbox *mailbox)
+{
+ const struct seq_range *range;
+ /* FIXME: Optimise with a merge */
+ array_foreach(&mailbox->uids, range)
+ fts_expunge_log_append_range(ctx, mailbox->guid, range);
+}
+
+static void
+fts_expunge_log_export(struct fts_expunge_log_append_ctx *ctx,
+ uint32_t expunge_count, buffer_t *output)
+{
+ struct hash_iterate_context *iter;
+ uint8_t *guid_p;
+ struct fts_expunge_log_mailbox *mailbox;
+ struct fts_expunge_log_record *rec;
+ size_t rec_offset;
+
+ iter = hash_table_iterate_init(ctx->mailboxes);
+ while (hash_table_iterate(iter, ctx->mailboxes, &guid_p, &mailbox)) {
+ rec_offset = output->used;
+ rec = buffer_append_space_unsafe(output, sizeof(*rec));
+ memcpy(rec->guid, mailbox->guid, sizeof(rec->guid));
+
+ /* uint32_t expunge_uid_ranges[]; */
+ buffer_append(output, array_front(&mailbox->uids),
+ array_count(&mailbox->uids) *
+ sizeof(struct seq_range));
+ /* uint32_t expunge_count; */
+ expunge_count += mailbox->uids_count;
+ buffer_append(output, &expunge_count, sizeof(expunge_count));
+
+ /* update the header now that we know the record contents */
+ rec = buffer_get_space_unsafe(output, rec_offset,
+ output->used - rec_offset);
+ rec->record_size = output->used - rec_offset;
+ rec->checksum = crc32_data(&rec->record_size,
+ rec->record_size -
+ sizeof(rec->checksum));
+ }
+ hash_table_iterate_deinit(&iter);
+}
+
+static int
+fts_expunge_log_write(struct fts_expunge_log_append_ctx *ctx)
+{
+ struct fts_expunge_log *log = ctx->log;
+ buffer_t *buf;
+ uint32_t expunge_count, *e;
+ int ret;
+
+ /* Unbacked expunge logs cannot be written, by definition */
+ i_assert(log != NULL);
+
+ /* try to append to the latest file */
+ if (fts_expunge_log_reopen_if_needed(log, TRUE) < 0)
+ return -1;
+
+ if (fts_expunge_log_read_expunge_count(log, &expunge_count) < 0)
+ return -1;
+
+ buf = buffer_create_dynamic(default_pool, 1024);
+ fts_expunge_log_export(ctx, expunge_count, buf);
+ /* the file was opened with O_APPEND, so this write() should be
+ appended atomically without any need for locking. */
+ for (;;) {
+ if (write_full(log->fd, buf->data, buf->used) < 0) {
+ i_error("write(%s) failed: %m", log->path);
+ if (ftruncate(log->fd, log->st.st_size) < 0)
+ i_error("ftruncate(%s) failed: %m", log->path);
+ }
+ if ((ret = fts_expunge_log_reopen_if_needed(log, TRUE)) <= 0)
+ break;
+ /* the log was unlinked, so we'll need to write again to
+ the new file. the expunge_count needs to be reset to zero
+ from here. */
+ e = buffer_get_space_unsafe(buf, buf->used - sizeof(uint32_t),
+ sizeof(uint32_t));
+ i_assert(*e > expunge_count);
+ *e -= expunge_count;
+ expunge_count = 0;
+ }
+ buffer_free(&buf);
+
+ if (ret == 0) {
+ /* finish by closing the log. this forces NFS to flush the
+ changes to disk without our having to explicitly play with
+ fsync() */
+ if (close(log->fd) < 0) {
+ /* FIXME: we should ftruncate() in case there
+ were partial writes.. */
+ i_error("close(%s) failed: %m", log->path);
+ ret = -1;
+ }
+ log->fd = -1;
+ }
+ return ret;
+}
+
+static int fts_expunge_log_append_finalize(struct fts_expunge_log_append_ctx **_ctx,
+ bool commit)
+{
+ struct fts_expunge_log_append_ctx *ctx = *_ctx;
+ int ret = ctx->failed ? -1 : 0;
+
+ *_ctx = NULL;
+ if (commit && ret == 0)
+ ret = fts_expunge_log_write(ctx);
+
+ hash_table_destroy(&ctx->mailboxes);
+ pool_unref(&ctx->pool);
+ return ret;
+}
+
+int fts_expunge_log_uid_count(struct fts_expunge_log *log,
+ unsigned int *expunges_r)
+{
+ int ret;
+
+ if ((ret = fts_expunge_log_reopen_if_needed(log, FALSE)) <= 0) {
+ *expunges_r = 0;
+ return ret;
+ }
+
+ return fts_expunge_log_read_expunge_count(log, expunges_r);
+}
+
+int fts_expunge_log_append_commit(struct fts_expunge_log_append_ctx **_ctx)
+{
+ return fts_expunge_log_append_finalize(_ctx, TRUE);
+}
+
+int fts_expunge_log_append_abort(struct fts_expunge_log_append_ctx **_ctx)
+{
+ return fts_expunge_log_append_finalize(_ctx, FALSE);
+}
+
+struct fts_expunge_log_read_ctx *
+fts_expunge_log_read_begin(struct fts_expunge_log *log)
+{
+ struct fts_expunge_log_read_ctx *ctx;
+
+ ctx = i_new(struct fts_expunge_log_read_ctx, 1);
+ ctx->log = log;
+ if (fts_expunge_log_reopen_if_needed(log, FALSE) < 0)
+ ctx->failed = TRUE;
+ else if (log->fd != -1)
+ ctx->input = i_stream_create_fd(log->fd, SIZE_MAX);
+ ctx->unlink = TRUE;
+ return ctx;
+}
+
+static bool
+fts_expunge_log_record_size_is_valid(const struct fts_expunge_log_record *rec,
+ unsigned int *uids_size_r)
+{
+ if (rec->record_size < sizeof(*rec) + sizeof(uint32_t)*3)
+ return FALSE;
+ *uids_size_r = rec->record_size - sizeof(*rec) - sizeof(uint32_t);
+ return *uids_size_r % sizeof(uint32_t)*2 == 0;
+}
+
+static void
+fts_expunge_log_read_failure(struct fts_expunge_log_read_ctx *ctx,
+ unsigned int wanted_size)
+{
+ size_t size;
+
+ if (ctx->input->stream_errno != 0) {
+ ctx->failed = TRUE;
+ i_error("read(%s) failed: %s", ctx->log->path,
+ i_stream_get_error(ctx->input));
+ } else {
+ size = i_stream_get_data_size(ctx->input);
+ ctx->corrupted = TRUE;
+ i_error("Corrupted fts expunge log %s: "
+ "Unexpected EOF (read %zu / %u bytes)",
+ ctx->log->path, size, wanted_size);
+ }
+}
+
+const struct fts_expunge_log_read_record *
+fts_expunge_log_read_next(struct fts_expunge_log_read_ctx *ctx)
+{
+ const unsigned char *data;
+ const struct fts_expunge_log_record *rec;
+ unsigned int uids_size;
+ size_t size;
+ uint32_t checksum;
+
+ if (ctx->input == NULL)
+ return NULL;
+
+ /* initial read to try to get the record */
+ (void)i_stream_read_bytes(ctx->input, &data, &size, IO_BLOCK_SIZE);
+ if (size == 0 && ctx->input->stream_errno == 0) {
+ /* expected EOF - mark the file as read by unlinking it */
+ if (ctx->unlink)
+ i_unlink_if_exists(ctx->log->path);
+
+ /* try reading again, in case something new was written */
+ i_stream_sync(ctx->input);
+ (void)i_stream_read_bytes(ctx->input, &data, &size,
+ IO_BLOCK_SIZE);
+ }
+ if (size < sizeof(*rec)) {
+ if (size == 0 && ctx->input->stream_errno == 0) {
+ /* expected EOF */
+ return NULL;
+ }
+ fts_expunge_log_read_failure(ctx, sizeof(*rec));
+ return NULL;
+ }
+ rec = (const void *)data;
+
+ if (!fts_expunge_log_record_size_is_valid(rec, &uids_size)) {
+ ctx->corrupted = TRUE;
+ i_error("Corrupted fts expunge log %s: "
+ "Invalid record size: %u",
+ ctx->log->path, rec->record_size);
+ return NULL;
+ }
+
+ /* read the entire record */
+ while (size < rec->record_size) {
+ if (i_stream_read_bytes(ctx->input, &data, &size, rec->record_size) < 0) {
+ fts_expunge_log_read_failure(ctx, rec->record_size);
+ return NULL;
+ }
+ rec = (const void *)data;
+ }
+
+ /* verify that the record checksum is valid */
+ checksum = crc32_data(&rec->record_size,
+ rec->record_size - sizeof(rec->checksum));
+ if (checksum != rec->checksum) {
+ ctx->corrupted = TRUE;
+ i_error("Corrupted fts expunge log %s: "
+ "Record checksum mismatch: %u != %u",
+ ctx->log->path, checksum, rec->checksum);
+ return NULL;
+ }
+
+ memcpy(ctx->read_rec.mailbox_guid, rec->guid,
+ sizeof(ctx->read_rec.mailbox_guid));
+ /* create the UIDs array by pointing it directly into input
+ stream's buffer */
+ buffer_create_from_const_data(&ctx->buffer, rec + 1, uids_size);
+ array_create_from_buffer(&ctx->read_rec.uids, &ctx->buffer,
+ sizeof(struct seq_range));
+
+ i_stream_skip(ctx->input, rec->record_size);
+ return &ctx->read_rec;
+}
+
+int fts_expunge_log_read_end(struct fts_expunge_log_read_ctx **_ctx)
+{
+ struct fts_expunge_log_read_ctx *ctx = *_ctx;
+ int ret = ctx->failed ? -1 : (ctx->corrupted ? 0 : 1);
+
+ *_ctx = NULL;
+
+ if (ctx->corrupted) {
+ if (ctx->unlink)
+ i_unlink_if_exists(ctx->log->path);
+ }
+
+ i_stream_unref(&ctx->input);
+ i_free(ctx);
+ return ret;
+}
+
+int fts_expunge_log_flatten(const char *path,
+ struct fts_expunge_log_append_ctx **flattened_r)
+{
+ struct fts_expunge_log *read;
+ struct fts_expunge_log_read_ctx *read_ctx;
+ const struct fts_expunge_log_read_record *record;
+ struct fts_expunge_log_append_ctx *append;
+ int ret;
+
+ i_assert(path != NULL && flattened_r != NULL);
+ read = fts_expunge_log_init(path);
+
+ read_ctx = fts_expunge_log_read_begin(read);
+ read_ctx->unlink = FALSE;
+
+ append = fts_expunge_log_append_begin(NULL);
+ while((record = fts_expunge_log_read_next(read_ctx)) != NULL) {
+ fts_expunge_log_append_record(append, record);
+ }
+
+ if ((ret = fts_expunge_log_read_end(&read_ctx)) > 0)
+ *flattened_r = append;
+ fts_expunge_log_deinit(&read);
+
+ return ret;
+}
+bool fts_expunge_log_contains(const struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid, uint32_t uid)
+{
+ const struct fts_expunge_log_mailbox *mailbox;
+ const uint8_t *guid_p = mailbox_guid;
+
+ mailbox = hash_table_lookup(ctx->mailboxes, guid_p);
+ if (mailbox == NULL)
+ return FALSE;
+ return seq_range_exists(&mailbox->uids, uid);
+}
+int fts_expunge_log_append_remove(struct fts_expunge_log_append_ctx *from,
+ const struct fts_expunge_log_read_record *record)
+{
+ const uint8_t *guid_p = record->mailbox_guid;
+ struct fts_expunge_log_mailbox *mailbox = hash_table_lookup(from->mailboxes, guid_p);
+ if (mailbox == NULL)
+ return 0; /* may only remove things that exist */
+
+ mailbox->uids_count -= seq_range_array_remove_seq_range(&mailbox->uids, &record->uids);
+ return 1;
+}
+int fts_expunge_log_subtract(struct fts_expunge_log_append_ctx *from,
+ struct fts_expunge_log *subtract)
+{
+ unsigned int failures = 0;
+ struct fts_expunge_log_read_ctx *read_ctx = fts_expunge_log_read_begin(subtract);
+ read_ctx->unlink = FALSE;
+
+ const struct fts_expunge_log_read_record *record;
+ while ((record = fts_expunge_log_read_next(read_ctx)) != NULL) {
+ if (fts_expunge_log_append_remove(from, record) <= 0)
+ failures++;
+ }
+ if (failures > 0)
+ i_warning("fts: Expunge log subtract ignored %u nonexistent mailbox GUIDs",
+ failures);
+ return fts_expunge_log_read_end(&read_ctx);
+}
+/* It could be argued that somehow adding a log (file) to the append context
+ and then calling the _write() helper would be easier. But then there's the
+ _commit() vs. _abort() cleanup that would need to be addressed. Just creating
+ a copy is simpler. */
+int fts_expunge_log_flat_write(const struct fts_expunge_log_append_ctx *read_log,
+ const char *path)
+{
+ int ret;
+ struct fts_expunge_log *nlog = fts_expunge_log_init(path);
+ struct fts_expunge_log_append_ctx *nappend = fts_expunge_log_append_begin(nlog);
+
+ struct hash_iterate_context *iter;
+ uint8_t *guid_p;
+ struct fts_expunge_log_mailbox *mailbox;
+
+ iter = hash_table_iterate_init(read_log->mailboxes);
+ while (hash_table_iterate(iter, read_log->mailboxes, &guid_p, &mailbox))
+ fts_expunge_log_append_mailbox_record(nappend, mailbox);
+
+ hash_table_iterate_deinit(&iter);
+ ret = fts_expunge_log_append_commit(&nappend);
+ fts_expunge_log_deinit(&nlog);
+
+ return ret;
+}
diff --git a/src/plugins/fts/fts-expunge-log.h b/src/plugins/fts/fts-expunge-log.h
new file mode 100644
index 0000000..cc15f29
--- /dev/null
+++ b/src/plugins/fts/fts-expunge-log.h
@@ -0,0 +1,58 @@
+#ifndef FTS_EXPUNGE_LOG
+#define FTS_EXPUNGE_LOG
+
+#include "seq-range-array.h"
+#include "guid.h"
+
+struct fts_expunge_log_read_record {
+ guid_128_t mailbox_guid;
+ ARRAY_TYPE(seq_range) uids;
+};
+
+struct fts_expunge_log *fts_expunge_log_init(const char *path);
+void fts_expunge_log_deinit(struct fts_expunge_log **log);
+
+struct fts_expunge_log_append_ctx *
+fts_expunge_log_append_begin(struct fts_expunge_log *log);
+void fts_expunge_log_append_next(struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid,
+ uint32_t uid);
+void fts_expunge_log_append_range(struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid,
+ const struct seq_range *uids);
+void fts_expunge_log_append_record(struct fts_expunge_log_append_ctx *ctx,
+ const struct fts_expunge_log_read_record *record);
+/* In-memory flattened structures may have records removed from them,
+ file-backed ones may not. Non-existence of UIDs is not an error,
+ non-existence of mailbox GUID causes an error return of 0. */
+int fts_expunge_log_append_remove(struct fts_expunge_log_append_ctx *ctx,
+ const struct fts_expunge_log_read_record *record);
+int fts_expunge_log_append_commit(struct fts_expunge_log_append_ctx **ctx);
+/* Do not commit non-backed structures, abort them after use. */
+int fts_expunge_log_append_abort(struct fts_expunge_log_append_ctx **ctx);
+
+int fts_expunge_log_uid_count(struct fts_expunge_log *log,
+ unsigned int *expunges_r);
+
+struct fts_expunge_log_read_ctx *
+fts_expunge_log_read_begin(struct fts_expunge_log *log);
+const struct fts_expunge_log_read_record *
+fts_expunge_log_read_next(struct fts_expunge_log_read_ctx *ctx);
+/* Returns 1 if all ok, 0 if there was corruption, -1 if I/O error.
+ If end() is called before reading all records, the log isn't unlinked. */
+int fts_expunge_log_read_end(struct fts_expunge_log_read_ctx **ctx);
+
+/* Read an entire log file, and flatten it into one hash of arrays.
+ The struct it returns cannot be written, as it has no backing store */
+int fts_expunge_log_flatten(const char *path,
+ struct fts_expunge_log_append_ctx **flattened_r);
+bool fts_expunge_log_contains(const struct fts_expunge_log_append_ctx *ctx,
+ const guid_128_t mailbox_guid, uint32_t uid);
+/* Modify in-place a flattened log. If non-existent mailbox GUIDs are
+ encountered, a warning will be logged. */
+int fts_expunge_log_subtract(struct fts_expunge_log_append_ctx *from,
+ struct fts_expunge_log *subtract);
+/* Write a modified flattened log as a new file. */
+int fts_expunge_log_flat_write(const struct fts_expunge_log_append_ctx *flattened,
+ const char *path);
+#endif
diff --git a/src/plugins/fts/fts-indexer.c b/src/plugins/fts/fts-indexer.c
new file mode 100644
index 0000000..aca23c9
--- /dev/null
+++ b/src/plugins/fts/fts-indexer.c
@@ -0,0 +1,300 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "ioloop.h"
+#include "connection.h"
+#include "write-full.h"
+#include "istream.h"
+#include "ostream.h"
+#include "strescape.h"
+#include "time-util.h"
+#include "settings-parser.h"
+#include "mail-user.h"
+#include "mail-storage-private.h"
+#include "fts-api.h"
+#include "fts-indexer.h"
+
+#define INDEXER_NOTIFY_INTERVAL_SECS 10
+#define INDEXER_SOCKET_NAME "indexer"
+#define INDEXER_WAIT_MSECS 250
+
+struct fts_indexer_context {
+ struct connection conn;
+
+ struct mailbox *box;
+ struct ioloop *ioloop;
+
+ struct timeval search_start_time, last_notify;
+ unsigned int percentage;
+ struct connection_list *connection_list;
+
+ bool notified:1;
+ bool failed:1;
+ bool completed:1;
+};
+
+static void fts_indexer_notify(struct fts_indexer_context *ctx)
+{
+ unsigned long long elapsed_msecs, est_total_msecs;
+ unsigned int eta_secs;
+
+ if (ioloop_time - ctx->last_notify.tv_sec < INDEXER_NOTIFY_INTERVAL_SECS)
+ return;
+ ctx->last_notify = ioloop_timeval;
+
+ if (ctx->box->storage->callbacks.notify_ok == NULL ||
+ ctx->percentage == 0)
+ return;
+
+ elapsed_msecs = timeval_diff_msecs(&ioloop_timeval,
+ &ctx->search_start_time);
+ est_total_msecs = elapsed_msecs * 100 / ctx->percentage;
+ eta_secs = (est_total_msecs - elapsed_msecs) / 1000;
+
+ T_BEGIN {
+ const char *text;
+
+ text = t_strdup_printf("Indexed %d%% of the mailbox, "
+ "ETA %d:%02d", ctx->percentage,
+ eta_secs/60, eta_secs%60);
+ ctx->box->storage->callbacks.
+ notify_ok(ctx->box, text,
+ ctx->box->storage->callback_context);
+ ctx->notified = TRUE;
+ } T_END;
+}
+
+static int fts_indexer_more_int(struct fts_indexer_context *ctx)
+{
+ struct ioloop *prev_ioloop = current_ioloop;
+ struct timeout *to;
+
+ if (ctx->failed)
+ return -1;
+ if (ctx->completed)
+ return 1;
+
+ /* wait for a while for the reply. FIXME: once search API supports
+ asynchronous waits, get rid of this wait and use the mail IO loop */
+ io_loop_set_current(ctx->ioloop);
+ to = timeout_add_short(INDEXER_WAIT_MSECS, io_loop_stop, ctx->ioloop);
+ io_loop_run(ctx->ioloop);
+ timeout_remove(&to);
+ io_loop_set_current(prev_ioloop);
+
+ if (ctx->failed)
+ return -1;
+ if (ctx->completed)
+ return 1;
+ return 0;
+}
+
+int fts_indexer_more(struct fts_indexer_context *ctx)
+{
+ int ret;
+
+ if ((ret = fts_indexer_more_int(ctx)) < 0) {
+ /* If failed is already set, the code has had a chance to
+ * set an internal error already, i.e. MAIL_ERROR_INUSE. */
+ if (!ctx->failed)
+ mail_storage_set_internal_error(ctx->box->storage);
+ ctx->failed = TRUE;
+ return -1;
+ }
+
+ if (ret == 0)
+ fts_indexer_notify(ctx);
+
+ return ret;
+}
+
+static void fts_indexer_destroy(struct connection *conn)
+{
+ struct fts_indexer_context *ctx =
+ container_of(conn, struct fts_indexer_context, conn);
+ connection_deinit(conn);
+ if (!ctx->completed)
+ ctx->failed = TRUE;
+ ctx->completed = TRUE;
+}
+
+int fts_indexer_deinit(struct fts_indexer_context **_ctx)
+{
+ struct fts_indexer_context *ctx = *_ctx;
+ i_assert(ctx != NULL);
+ *_ctx = NULL;
+ if (!ctx->completed)
+ ctx->failed = TRUE;
+ int ret = ctx->failed ? -1 : 0;
+ if (ctx->notified) {
+ /* we notified at least once */
+ ctx->box->storage->callbacks.
+ notify_ok(ctx->box, "Mailbox indexing finished",
+ ctx->box->storage->callback_context);
+ }
+ connection_list_deinit(&ctx->connection_list);
+ io_loop_set_current(ctx->ioloop);
+ io_loop_destroy(&ctx->ioloop);
+ i_free(ctx);
+ return ret;
+}
+
+static int
+fts_indexer_input_args(struct connection *conn, const char *const *args)
+{
+ struct fts_indexer_context *ctx =
+ container_of(conn, struct fts_indexer_context, conn);
+ int percentage;
+ if (args[1] == NULL) {
+ e_error(conn->event, "indexer sent invalid reply");
+ return -1;
+ }
+ if (strcmp(args[0], "1") != 0) {
+ e_error(conn->event, "indexer sent invalid reply");
+ return -1;
+ }
+ if (strcmp(args[1], "OK") == 0)
+ return 1;
+ if (str_to_int(args[1], &percentage) < 0) {
+ e_error(conn->event, "indexer sent invalid progress: %s", args[1]);
+ ctx->failed = TRUE;
+ return -1;
+ }
+ if (percentage < 0) {
+ e_error(ctx->box->event, "indexer failed to index mailbox");
+ ctx->failed = TRUE;
+ return -1;
+ }
+ ctx->percentage = percentage;
+ if (ctx->percentage == 100)
+ ctx->completed = TRUE;
+ return 1;
+}
+
+static void fts_indexer_client_connected(struct connection *conn, bool success)
+{
+ struct fts_indexer_context *ctx =
+ container_of(conn, struct fts_indexer_context, conn);
+ if (!success) {
+ ctx->completed = TRUE;
+ ctx->failed = TRUE;
+ return;
+ }
+ ctx->failed = ctx->completed = FALSE;
+ const char *cmd = t_strdup_printf("PREPEND\t1\t%s\t%s\t0\t%s\n",
+ str_tabescape(ctx->box->storage->user->username),
+ str_tabescape(ctx->box->vname),
+ str_tabescape(ctx->box->storage->user->session_id));
+ o_stream_nsend_str(conn->output, cmd);
+}
+
+static void fts_indexer_idle_timeout(struct connection *conn)
+{
+ struct fts_indexer_context *ctx =
+ container_of(conn, struct fts_indexer_context, conn);
+ mail_storage_set_error(ctx->box->storage, MAIL_ERROR_INUSE,
+ "Timeout while waiting for indexing to finish");
+ ctx->failed = TRUE;
+ connection_disconnect(conn);
+}
+
+static const struct connection_settings indexer_client_set =
+{
+ .service_name_in = "indexer",
+ .service_name_out = "indexer",
+ .major_version = 1,
+ .minor_version = 0,
+ .client_connect_timeout_msecs = 2000,
+ .input_max_size = SIZE_MAX,
+ .output_max_size = IO_BLOCK_SIZE,
+ .client = TRUE,
+};
+
+static const struct connection_vfuncs indexer_client_vfuncs =
+{
+ .destroy = fts_indexer_destroy,
+ .client_connected = fts_indexer_client_connected,
+ .input_args = fts_indexer_input_args,
+ .idle_timeout = fts_indexer_idle_timeout,
+};
+
+int fts_indexer_init(struct fts_backend *backend, struct mailbox *box,
+ struct fts_indexer_context **ctx_r)
+{
+ struct ioloop *prev_ioloop = current_ioloop;
+ struct fts_indexer_context *ctx;
+ struct mailbox_status status;
+ uint32_t last_uid, seq1, seq2;
+ const char *path, *value, *error;
+ unsigned int timeout_secs = 0;
+ int ret;
+
+ value = mail_user_plugin_getenv(box->storage->user, "fts_index_timeout");
+ if (value != NULL) {
+ if (settings_get_time(value, &timeout_secs, &error) < 0) {
+ e_error(box->storage->user->event,
+ "Invalid fts_index_timeout setting: %s",
+ error);
+ return -1;
+ }
+ }
+
+ if (fts_backend_get_last_uid(backend, box, &last_uid) < 0)
+ return -1;
+
+ mailbox_get_open_status(box, STATUS_UIDNEXT, &status);
+ if (status.uidnext == last_uid+1) {
+ /* everything is already indexed */
+ return 0;
+ }
+
+ mailbox_get_seq_range(box, last_uid+1, (uint32_t)-1, &seq1, &seq2);
+ if (seq1 == 0) {
+ /* no new messages (last messages in mailbox were expunged) */
+ return 0;
+ }
+
+ path = t_strconcat(box->storage->user->set->base_dir,
+ "/"INDEXER_SOCKET_NAME, NULL);
+
+ ctx = i_new(struct fts_indexer_context, 1);
+ ctx->box = box;
+ ctx->search_start_time = ioloop_timeval;
+ ctx->conn.event_parent = box->event;
+ ctx->ioloop = io_loop_create();
+ ctx->connection_list = connection_list_init(&indexer_client_set,
+ &indexer_client_vfuncs);
+ ctx->conn.input_idle_timeout_secs = timeout_secs;
+ connection_init_client_unix(ctx->connection_list, &ctx->conn,
+ path);
+ ret = connection_client_connect(&ctx->conn);
+ io_loop_set_current(prev_ioloop);
+ *ctx_r = ctx;
+ return ctx->failed || ret < 0 ? -1 : 1;
+}
+
+#define INDEXER_HANDSHAKE "1\t0\tindexer\tindexer\n"
+
+int fts_indexer_cmd(struct mail_user *user, const char *cmd,
+ const char **path_r)
+{
+ const char *path;
+ int fd;
+
+ path = t_strconcat(user->set->base_dir,
+ "/"INDEXER_SOCKET_NAME, NULL);
+ fd = net_connect_unix_with_retries(path, 1000);
+ if (fd == -1) {
+ i_error("net_connect_unix(%s) failed: %m", path);
+ return -1;
+ }
+
+ cmd = t_strconcat(INDEXER_HANDSHAKE, cmd, NULL);
+ if (write_full(fd, cmd, strlen(cmd)) < 0) {
+ i_error("write(%s) failed: %m", path);
+ i_close_fd(&fd);
+ return -1;
+ }
+ *path_r = path;
+ return fd;
+}
diff --git a/src/plugins/fts/fts-indexer.h b/src/plugins/fts/fts-indexer.h
new file mode 100644
index 0000000..7ccbc7e
--- /dev/null
+++ b/src/plugins/fts/fts-indexer.h
@@ -0,0 +1,22 @@
+#ifndef FTS_BUILD_H
+#define FTS_BUILD_H
+
+struct fts_backend;
+struct fts_indexer_context;
+
+/* Initialize indexing the given mailbox via indexer service. Returns 1 if
+ indexing started, 0 if there was no need to index or -1 if error. */
+int fts_indexer_init(struct fts_backend *backend, struct mailbox *box,
+ struct fts_indexer_context **ctx_r);
+/* Returns 0 if ok, -1 if error. */
+int fts_indexer_deinit(struct fts_indexer_context **ctx);
+
+/* Build more. Returns 1 if finished, 0 if this function needs to be called
+ again, -1 if error. */
+int fts_indexer_more(struct fts_indexer_context *ctx);
+
+/* Returns fd, which you can either read from or close. */
+int fts_indexer_cmd(struct mail_user *user, const char *cmd,
+ const char **path_r);
+
+#endif
diff --git a/src/plugins/fts/fts-parser-html.c b/src/plugins/fts/fts-parser-html.c
new file mode 100644
index 0000000..aa2078d
--- /dev/null
+++ b/src/plugins/fts/fts-parser-html.c
@@ -0,0 +1,64 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "message-parser.h"
+#include "mail-html2text.h"
+#include "fts-parser.h"
+
+struct html_fts_parser {
+ struct fts_parser parser;
+ struct mail_html2text *html2text;
+ buffer_t *output;
+};
+
+static struct fts_parser *
+fts_parser_html_try_init(struct fts_parser_context *parser_context)
+{
+ struct html_fts_parser *parser;
+
+ if (!mail_html2text_content_type_match(parser_context->content_type))
+ return NULL;
+
+ parser = i_new(struct html_fts_parser, 1);
+ parser->parser.v = fts_parser_html;
+ parser->html2text = mail_html2text_init(0);
+ parser->output = buffer_create_dynamic(default_pool, 4096);
+ return &parser->parser;
+}
+
+static void fts_parser_html_more(struct fts_parser *_parser,
+ struct message_block *block)
+{
+ struct html_fts_parser *parser = (struct html_fts_parser *)_parser;
+
+ if (block->size == 0) {
+ /* finished */
+ return;
+ }
+
+ buffer_set_used_size(parser->output, 0);
+ mail_html2text_more(parser->html2text, block->data, block->size,
+ parser->output);
+
+ block->data = parser->output->data;
+ block->size = parser->output->used;
+}
+
+static int fts_parser_html_deinit(struct fts_parser *_parser,
+ const char **retriable_err_msg_r ATTR_UNUSED)
+{
+ struct html_fts_parser *parser = (struct html_fts_parser *)_parser;
+
+ mail_html2text_deinit(&parser->html2text);
+ buffer_free(&parser->output);
+ i_free(parser);
+ return 1;
+}
+
+struct fts_parser_vfuncs fts_parser_html = {
+ fts_parser_html_try_init,
+ fts_parser_html_more,
+ fts_parser_html_deinit,
+ NULL
+};
diff --git a/src/plugins/fts/fts-parser-script.c b/src/plugins/fts/fts-parser-script.c
new file mode 100644
index 0000000..eefbe07
--- /dev/null
+++ b/src/plugins/fts/fts-parser-script.c
@@ -0,0 +1,277 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "net.h"
+#include "istream.h"
+#include "write-full.h"
+#include "module-context.h"
+#include "rfc822-parser.h"
+#include "rfc2231-parser.h"
+#include "message-parser.h"
+#include "mail-user.h"
+#include "fts-parser.h"
+
+#define SCRIPT_USER_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_parser_script_user_module)
+
+#define SCRIPT_HANDSHAKE "VERSION\tscript\t4\t0\nalarm=10\nnoreply\n"
+
+struct content {
+ const char *content_type;
+ const char *const *extensions;
+};
+
+struct fts_parser_script_user {
+ union mail_user_module_context module_ctx;
+
+ ARRAY(struct content) content;
+};
+
+struct script_fts_parser {
+ struct fts_parser parser;
+
+ int fd;
+ char *path;
+
+ unsigned char outbuf[IO_BLOCK_SIZE];
+ bool failed;
+ bool shutdown;
+};
+
+static MODULE_CONTEXT_DEFINE_INIT(fts_parser_script_user_module,
+ &mail_user_module_register);
+
+static int script_connect(struct mail_user *user, const char **path_r)
+{
+ const char *path;
+ int fd;
+
+ path = mail_user_plugin_getenv(user, "fts_decoder");
+ if (path == NULL)
+ return -1;
+
+ if (*path != '/')
+ path = t_strconcat(user->set->base_dir, "/", path, NULL);
+ fd = net_connect_unix_with_retries(path, 1000);
+ if (fd == -1)
+ i_error("net_connect_unix(%s) failed: %m", path);
+ else
+ net_set_nonblock(fd, FALSE);
+ *path_r = path;
+ return fd;
+}
+
+static int script_contents_read(struct mail_user *user)
+{
+ struct fts_parser_script_user *suser = SCRIPT_USER_CONTEXT(user);
+ const char *path, *cmd, *line;
+ char **args;
+ struct istream *input;
+ struct content *content;
+ bool eof_seen = FALSE;
+ int fd, ret = 0;
+ i_assert(suser != NULL);
+
+ fd = script_connect(user, &path);
+ if (fd == -1)
+ return -1;
+
+ cmd = t_strdup_printf(SCRIPT_HANDSHAKE"\n");
+ if (write_full(fd, cmd, strlen(cmd)) < 0) {
+ i_error("write(%s) failed: %m", path);
+ i_close_fd(&fd);
+ return -1;
+ }
+ input = i_stream_create_fd_autoclose(&fd, 1024);
+ while ((line = i_stream_read_next_line(input)) != NULL) {
+ /* <content-type> <extension> [<extension> ...] */
+ args = p_strsplit_spaces(user->pool, line, " ");
+ if (args[0] == NULL) {
+ eof_seen = TRUE;
+ break;
+ }
+ if (args[0][0] == '\0' || args[1] == NULL) {
+ i_error("parser script sent invalid input: %s", line);
+ continue;
+ }
+
+ content = array_append_space(&suser->content);
+ content->content_type = str_lcase(args[0]);
+ content->extensions = (const void *)(args+1);
+ }
+ if (input->stream_errno != 0) {
+ i_error("parser script read(%s) failed: %s", path,
+ i_stream_get_error(input));
+ ret = -1;
+ } else if (!eof_seen) {
+ if (input->v_offset == 0)
+ i_error("parser script didn't send any data");
+ else
+ i_error("parser script didn't send empty EOF line");
+ }
+ i_stream_destroy(&input);
+ return ret;
+}
+
+static bool script_support_content(struct mail_user *user,
+ const char **content_type,
+ const char *filename)
+{
+ struct fts_parser_script_user *suser = SCRIPT_USER_CONTEXT(user);
+ const struct content *content;
+ const char *extension;
+
+ if (suser == NULL) {
+ suser = p_new(user->pool, struct fts_parser_script_user, 1);
+ p_array_init(&suser->content, user->pool, 32);
+ MODULE_CONTEXT_SET(user, fts_parser_script_user_module, suser);
+ }
+ if (array_count(&suser->content) == 0) {
+ if (script_contents_read(user) < 0)
+ return FALSE;
+ }
+
+ if (strcmp(*content_type, "application/octet-stream") == 0) {
+ if (filename == NULL)
+ return FALSE;
+ extension = strrchr(filename, '.');
+ if (extension == NULL)
+ return FALSE;
+ extension = extension + 1;
+
+ array_foreach(&suser->content, content) {
+ if (content->extensions != NULL &&
+ str_array_icase_find(content->extensions, extension)) {
+ *content_type = content->content_type;
+ return TRUE;
+ }
+ }
+ } else {
+ array_foreach(&suser->content, content) {
+ if (strcmp(content->content_type, *content_type) == 0)
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static void parse_content_disposition(const char *content_disposition,
+ const char **filename_r)
+{
+ struct rfc822_parser_context parser;
+ const char *const *results, *filename2;
+ string_t *str;
+
+ *filename_r = NULL;
+
+ if (content_disposition == NULL)
+ return;
+
+ rfc822_parser_init(&parser, (const unsigned char *)content_disposition,
+ strlen(content_disposition), NULL);
+ rfc822_skip_lwsp(&parser);
+
+ /* type; param; param; .. */
+ str = t_str_new(32);
+ if (rfc822_parse_mime_token(&parser, str) < 0) {
+ rfc822_parser_deinit(&parser);
+ return;
+ }
+
+ rfc2231_parse(&parser, &results);
+ filename2 = NULL;
+ for (; *results != NULL; results += 2) {
+ if (strcasecmp(results[0], "filename") == 0) {
+ *filename_r = results[1];
+ break;
+ }
+ if (strcasecmp(results[0], "filename*") == 0)
+ filename2 = results[1];
+ }
+ if (*filename_r == NULL) {
+ /* RFC 2231 style non-ascii filename. we don't really care
+ much about the filename actually, just about its extension */
+ *filename_r = filename2;
+ }
+ rfc822_parser_deinit(&parser);
+}
+
+static struct fts_parser *
+fts_parser_script_try_init(struct fts_parser_context *parser_context)
+{
+ struct script_fts_parser *parser;
+ const char *filename, *path, *cmd;
+ int fd;
+
+ parse_content_disposition(parser_context->content_disposition, &filename);
+ if (!script_support_content(parser_context->user, &parser_context->content_type, filename))
+ return NULL;
+
+ fd = script_connect(parser_context->user, &path);
+ if (fd == -1)
+ return NULL;
+ cmd = t_strdup_printf(SCRIPT_HANDSHAKE"%s\n\n", parser_context->content_type);
+ if (write_full(fd, cmd, strlen(cmd)) < 0) {
+ i_error("write(%s) failed: %m", path);
+ i_close_fd(&fd);
+ return NULL;
+ }
+
+ parser = i_new(struct script_fts_parser, 1);
+ parser->parser.v = fts_parser_script;
+ parser->path = i_strdup(path);
+ parser->fd = fd;
+ return &parser->parser;
+}
+
+static void fts_parser_script_more(struct fts_parser *_parser,
+ struct message_block *block)
+{
+ struct script_fts_parser *parser = (struct script_fts_parser *)_parser;
+ ssize_t ret;
+
+ if (block->size > 0) {
+ /* first we'll send everything to the script */
+ if (!parser->failed &&
+ write_full(parser->fd, block->data, block->size) < 0) {
+ i_error("write(%s) failed: %m", parser->path);
+ parser->failed = TRUE;
+ }
+ block->size = 0;
+ } else {
+ if (!parser->shutdown) {
+ if (shutdown(parser->fd, SHUT_WR) < 0)
+ i_error("shutdown(%s) failed: %m", parser->path);
+ parser->shutdown = TRUE;
+ }
+ /* read the result from the script */
+ ret = read(parser->fd, parser->outbuf, sizeof(parser->outbuf));
+ if (ret < 0)
+ i_error("read(%s) failed: %m", parser->path);
+ else {
+ block->data = parser->outbuf;
+ block->size = ret;
+ }
+ }
+}
+
+static int fts_parser_script_deinit(struct fts_parser *_parser,
+ const char **retriable_err_msg_r ATTR_UNUSED)
+{
+ struct script_fts_parser *parser = (struct script_fts_parser *)_parser;
+ int ret = parser->failed ? -1 : 1;
+
+ if (close(parser->fd) < 0)
+ i_error("close(%s) failed: %m", parser->path);
+ i_free(parser->path);
+ i_free(parser);
+ return ret;
+}
+
+struct fts_parser_vfuncs fts_parser_script = {
+ fts_parser_script_try_init,
+ fts_parser_script_more,
+ fts_parser_script_deinit,
+ NULL
+};
diff --git a/src/plugins/fts/fts-parser-tika.c b/src/plugins/fts/fts-parser-tika.c
new file mode 100644
index 0000000..bb6379c
--- /dev/null
+++ b/src/plugins/fts/fts-parser-tika.c
@@ -0,0 +1,278 @@
+/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "ioloop.h"
+#include "istream.h"
+#include "module-context.h"
+#include "iostream-ssl.h"
+#include "http-url.h"
+#include "http-client.h"
+#include "message-parser.h"
+#include "mail-user.h"
+#include "fts-parser.h"
+
+#define TIKA_USER_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_parser_tika_user_module)
+
+struct fts_parser_tika_user {
+ union mail_user_module_context module_ctx;
+ struct http_url *http_url;
+};
+
+struct tika_fts_parser {
+ struct fts_parser parser;
+ struct mail_user *user;
+ struct http_client_request *http_req;
+
+ struct ioloop *ioloop;
+ struct io *io;
+ struct istream *payload;
+
+ bool failed;
+};
+
+static struct http_client *tika_http_client = NULL;
+static MODULE_CONTEXT_DEFINE_INIT(fts_parser_tika_user_module,
+ &mail_user_module_register);
+
+static int
+tika_get_http_client_url(struct mail_user *user, struct http_url **http_url_r)
+{
+ struct fts_parser_tika_user *tuser = TIKA_USER_CONTEXT(user);
+ struct http_client_settings http_set;
+ struct ssl_iostream_settings ssl_set;
+ const char *url, *error;
+
+ url = mail_user_plugin_getenv(user, "fts_tika");
+ if (url == NULL) {
+ /* fts_tika disabled */
+ return -1;
+ }
+
+ if (tuser != NULL) {
+ *http_url_r = tuser->http_url;
+ return *http_url_r == NULL ? -1 : 0;
+ }
+
+ tuser = p_new(user->pool, struct fts_parser_tika_user, 1);
+ MODULE_CONTEXT_SET(user, fts_parser_tika_user_module, tuser);
+
+ if (http_url_parse(url, NULL, 0, user->pool,
+ &tuser->http_url, &error) < 0) {
+ i_error("fts_tika: Failed to parse HTTP url %s: %s", url, error);
+ return -1;
+ }
+
+ if (tika_http_client == NULL) {
+ mail_user_init_ssl_client_settings(user, &ssl_set);
+
+ i_zero(&http_set);
+ http_set.max_idle_time_msecs = 100;
+ http_set.max_parallel_connections = 1;
+ http_set.max_pipelined_requests = 1;
+ http_set.max_redirects = 1;
+ http_set.max_attempts = 3;
+ http_set.connect_timeout_msecs = 5*1000;
+ http_set.request_timeout_msecs = 60*1000;
+ http_set.ssl = &ssl_set;
+ http_set.debug = user->mail_debug;
+ http_set.event_parent = user->event;
+
+ /* FIXME: We should initialize a shared client instead. However,
+ this is currently not possible due to an obscure bug
+ in the blocking HTTP payload API, which causes
+ conflicts with other HTTP applications like FTS Solr.
+ Using a private client will provide a quick fix for
+ now. */
+ tika_http_client = http_client_init_private(&http_set);
+ }
+ *http_url_r = tuser->http_url;
+ return 0;
+}
+
+static void
+fts_tika_parser_response(const struct http_response *response,
+ struct tika_fts_parser *parser)
+{
+ i_assert(parser->payload == NULL);
+
+ switch (response->status) {
+ case 200:
+ /* read response */
+ if (response->payload == NULL)
+ parser->payload = i_stream_create_from_data("", 0);
+ else {
+ i_stream_ref(response->payload);
+ parser->payload = response->payload;
+ }
+ break;
+ case 204: /* empty response */
+ case 415: /* Unsupported Media Type */
+ case 422: /* Unprocessable Entity */
+ e_debug(parser->user->event, "fts_tika: PUT %s failed: %s",
+ mail_user_plugin_getenv(parser->user, "fts_tika"),
+ http_response_get_message(response));
+ parser->payload = i_stream_create_from_data("", 0);
+ break;
+ default:
+ if (response->status / 100 == 5) {
+ /* Server Error - the problem could be anything (in Tika or
+ HTTP server or proxy) and might be retriable, but Tika has
+ trouble processing some documents and throws up this error
+ every time for those documents. */
+ parser->parser.may_need_retry = TRUE;
+ i_free(parser->parser.retriable_error_msg);
+ parser->parser.retriable_error_msg =
+ i_strdup_printf("fts_tika: PUT %s failed: %s",
+ mail_user_plugin_getenv(parser->user, "fts_tika"),
+ http_response_get_message(response));
+ parser->payload = i_stream_create_from_data("", 0);
+ } else {
+ i_error("fts_tika: PUT %s failed: %s",
+ mail_user_plugin_getenv(parser->user, "fts_tika"),
+ http_response_get_message(response));
+ parser->failed = TRUE;
+ }
+ break;
+ }
+ parser->http_req = NULL;
+ io_loop_stop(current_ioloop);
+}
+
+static struct fts_parser *
+fts_parser_tika_try_init(struct fts_parser_context *parser_context)
+{
+ struct tika_fts_parser *parser;
+ struct http_url *http_url;
+ struct http_client_request *http_req;
+
+ if (tika_get_http_client_url(parser_context->user, &http_url) < 0)
+ return NULL;
+ if (http_url->path == NULL)
+ http_url->path = "/";
+
+ parser = i_new(struct tika_fts_parser, 1);
+ parser->parser.v = fts_parser_tika;
+ parser->user = parser_context->user;
+
+ http_req = http_client_request(tika_http_client, "PUT",
+ http_url->host.name,
+ t_strconcat(http_url->path, http_url->enc_query, NULL),
+ fts_tika_parser_response, parser);
+ http_client_request_set_port(http_req, http_url->port);
+ http_client_request_set_ssl(http_req, http_url->have_ssl);
+ if (parser_context->content_type != NULL)
+ http_client_request_add_header(http_req, "Content-Type",
+ parser_context->content_type);
+ if (parser_context->content_disposition != NULL)
+ http_client_request_add_header(http_req, "Content-Disposition",
+ parser_context->content_disposition);
+ http_client_request_add_header(http_req, "Accept", "text/plain");
+
+ parser->http_req = http_req;
+ return &parser->parser;
+}
+
+static void fts_parser_tika_more(struct fts_parser *_parser,
+ struct message_block *block)
+{
+ struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser;
+ struct ioloop *prev_ioloop = current_ioloop;
+ const unsigned char *data;
+ size_t size;
+ ssize_t ret;
+
+ if (block->size > 0) {
+ /* first we'll send everything to Tika */
+ if (!parser->failed &&
+ http_client_request_send_payload(&parser->http_req,
+ block->data,
+ block->size) < 0)
+ parser->failed = TRUE;
+ block->size = 0;
+ return;
+ }
+
+ if (parser->payload == NULL) {
+ /* read the result from Tika */
+ if (!parser->failed &&
+ http_client_request_finish_payload(&parser->http_req) < 0)
+ parser->failed = TRUE;
+ if (!parser->failed && parser->payload == NULL)
+ http_client_wait(tika_http_client);
+ if (parser->failed)
+ return;
+ i_assert(parser->payload != NULL);
+ }
+ /* continue returning data from Tika. we'll create a new ioloop just
+ for reading this one payload. */
+ while ((ret = i_stream_read_more(parser->payload, &data, &size)) == 0) {
+ if (parser->failed)
+ break;
+ /* wait for more input from Tika */
+ if (parser->ioloop == NULL) {
+ parser->ioloop = io_loop_create();
+ parser->io = io_add_istream(parser->payload, io_loop_stop,
+ current_ioloop);
+ } else {
+ io_loop_set_current(parser->ioloop);
+ }
+ io_loop_run(current_ioloop);
+ }
+ /* switch back to original ioloop. */
+ io_loop_set_current(prev_ioloop);
+
+ if (parser->failed)
+ ;
+ else if (size > 0) {
+ i_assert(ret > 0);
+ block->data = data;
+ block->size = size;
+ i_stream_skip(parser->payload, size);
+ } else {
+ /* finished */
+ i_assert(ret == -1);
+ if (parser->payload->stream_errno != 0) {
+ i_error("read(%s) failed: %s",
+ i_stream_get_name(parser->payload),
+ i_stream_get_error(parser->payload));
+ parser->failed = TRUE;
+ }
+ }
+}
+
+static int fts_parser_tika_deinit(struct fts_parser *_parser, const char **retriable_err_msg_r)
+{
+ struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser;
+ int ret = _parser->may_need_retry ? 0: (parser->failed ? -1 : 1);
+
+ i_assert(ret != 0 || _parser->retriable_error_msg != NULL);
+ if (retriable_err_msg_r != NULL)
+ *retriable_err_msg_r = t_strdup(_parser->retriable_error_msg);
+ i_free(_parser->retriable_error_msg);
+
+ /* remove io before unrefing payload - otherwise lib-http adds another
+ timeout to ioloop unnecessarily */
+ i_stream_unref(&parser->payload);
+ io_remove(&parser->io);
+ http_client_request_abort(&parser->http_req);
+ if (parser->ioloop != NULL) {
+ io_loop_set_current(parser->ioloop);
+ io_loop_destroy(&parser->ioloop);
+ }
+ i_free(parser);
+ return ret;
+}
+
+static void fts_parser_tika_unload(void)
+{
+ if (tika_http_client != NULL)
+ http_client_deinit(&tika_http_client);
+}
+
+struct fts_parser_vfuncs fts_parser_tika = {
+ fts_parser_tika_try_init,
+ fts_parser_tika_more,
+ fts_parser_tika_deinit,
+ fts_parser_tika_unload
+};
diff --git a/src/plugins/fts/fts-parser.c b/src/plugins/fts/fts-parser.c
new file mode 100644
index 0000000..c0eac80
--- /dev/null
+++ b/src/plugins/fts/fts-parser.c
@@ -0,0 +1,127 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "unichar.h"
+#include "message-parser.h"
+#include "fts-parser.h"
+
+static const struct fts_parser_vfuncs *parsers[] = {
+ &fts_parser_html,
+ &fts_parser_script,
+ &fts_parser_tika
+};
+
+static const char *plaintext_content_types[] = {
+ "text/plain",
+ "message/delivery-status",
+ "message/disposition-notification",
+ "application/pgp-signature",
+ NULL
+};
+
+bool fts_parser_init(struct fts_parser_context *parser_context,
+ struct fts_parser **parser_r)
+{
+ unsigned int i;
+ i_assert(parser_context->user != NULL);
+ i_assert(parser_context->content_type != NULL);
+
+ if (str_array_find(plaintext_content_types, parser_context->content_type)) {
+ /* we probably don't want/need to allow parsers to handle
+ plaintext? */
+ return FALSE;
+ }
+
+ for (i = 0; i < N_ELEMENTS(parsers); i++) {
+ *parser_r = parsers[i]->try_init(parser_context);
+ if (*parser_r != NULL)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+struct fts_parser *fts_parser_text_init(void)
+{
+ return i_new(struct fts_parser, 1);
+}
+
+static bool data_has_nuls(const unsigned char *data, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (data[i] == '\0')
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static void replace_nul_bytes(buffer_t *buf)
+{
+ unsigned char *data;
+ size_t i, size;
+
+ data = buffer_get_modifiable_data(buf, &size);
+ for (i = 0; i < size; i++) {
+ if (data[i] == '\0')
+ data[i] = ' ';
+ }
+}
+
+void fts_parser_more(struct fts_parser *parser, struct message_block *block)
+{
+ if (parser->v.more != NULL)
+ parser->v.more(parser, block);
+
+ if (!uni_utf8_data_is_valid(block->data, block->size) ||
+ data_has_nuls(block->data, block->size)) {
+ /* output isn't valid UTF-8. make it. */
+ if (parser->utf8_output == NULL) {
+ parser->utf8_output =
+ buffer_create_dynamic(default_pool, 4096);
+ } else {
+ buffer_set_used_size(parser->utf8_output, 0);
+ }
+ if (uni_utf8_get_valid_data(block->data, block->size,
+ parser->utf8_output)) {
+ /* valid UTF-8, but there were NULs */
+ buffer_append(parser->utf8_output, block->data,
+ block->size);
+ }
+ replace_nul_bytes(parser->utf8_output);
+ block->data = parser->utf8_output->data;
+ block->size = parser->utf8_output->used;
+ }
+}
+
+int fts_parser_deinit(struct fts_parser **_parser, const char **retriable_err_msg_r)
+{
+ struct fts_parser *parser = *_parser;
+ int ret = 1;
+
+ *_parser = NULL;
+
+ buffer_free(&parser->utf8_output);
+ if (parser->v.deinit != NULL) {
+ const char *error = NULL;
+ ret = parser->v.deinit(parser, &error);
+ if (ret == 0) {
+ i_assert(error != NULL);
+ if (retriable_err_msg_r != NULL)
+ *retriable_err_msg_r = error;
+ }
+ } else
+ i_free(parser);
+ return ret;
+}
+
+void fts_parsers_unload(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < N_ELEMENTS(parsers); i++) {
+ if (parsers[i]->unload != NULL)
+ parsers[i]->unload();
+ }
+}
diff --git a/src/plugins/fts/fts-parser.h b/src/plugins/fts/fts-parser.h
new file mode 100644
index 0000000..0eb716e
--- /dev/null
+++ b/src/plugins/fts/fts-parser.h
@@ -0,0 +1,48 @@
+#ifndef FTS_PARSER_H
+#define FTS_PARSER_H
+
+struct message_block;
+struct mail_user;
+
+struct fts_parser_context {
+ /* Can't be NULL */
+ struct mail_user *user;
+ /* Can't be NULL */
+ const char *content_type;
+ const char *content_disposition;
+};
+
+struct fts_parser_vfuncs {
+ struct fts_parser *(*try_init)(struct fts_parser_context *parser_context);
+ void (*more)(struct fts_parser *parser, struct message_block *block);
+ int (*deinit)(struct fts_parser *parser, const char **retriable_err_msg_r);
+ void (*unload)(void);
+};
+
+struct fts_parser {
+ struct fts_parser_vfuncs v;
+ buffer_t *utf8_output;
+ bool may_need_retry;
+ char *retriable_error_msg;
+};
+
+extern struct fts_parser_vfuncs fts_parser_html;
+extern struct fts_parser_vfuncs fts_parser_script;
+extern struct fts_parser_vfuncs fts_parser_tika;
+
+bool fts_parser_init(struct fts_parser_context *parser_context,
+ struct fts_parser **parser_r);
+struct fts_parser *fts_parser_text_init(void);
+
+/* The parser is initially called with message body blocks. Once message is
+ finished, it's still called with incoming size=0 while the parser increases
+ it to non-zero. */
+void fts_parser_more(struct fts_parser *parser, struct message_block *block);
+/* Returns 1 if ok, 0 if the parsing should be retried, -1 if error.
+ If 0 is returned, the retriable_err_msg_r is set, which should be logged
+ as error if no retrying is performed. */
+int fts_parser_deinit(struct fts_parser **parser, const char **retriable_err_msg_r);
+
+void fts_parsers_unload(void);
+
+#endif
diff --git a/src/plugins/fts/fts-plugin.c b/src/plugins/fts/fts-plugin.c
new file mode 100644
index 0000000..1902cb6
--- /dev/null
+++ b/src/plugins/fts/fts-plugin.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "mail-storage-hooks.h"
+#include "fts-filter.h"
+#include "fts-tokenizer.h"
+#include "fts-parser.h"
+#include "fts-storage.h"
+#include "fts-user.h"
+#include "fts-plugin.h"
+#include "fts-library.h"
+
+const char *fts_plugin_version = DOVECOT_ABI_VERSION;
+
+static struct mail_storage_hooks fts_mail_storage_hooks = {
+ .mail_namespaces_added = fts_mail_namespaces_added,
+ .mailbox_list_created = fts_mailbox_list_created,
+ .mailbox_allocated = fts_mailbox_allocated,
+ .mail_allocated = fts_mail_allocated
+};
+
+void fts_plugin_init(struct module *module)
+{
+ fts_library_init();
+ mail_storage_hooks_add(module, &fts_mail_storage_hooks);
+}
+
+void fts_plugin_deinit(void)
+{
+ fts_library_deinit();
+ fts_parsers_unload();
+ mail_storage_hooks_remove(&fts_mail_storage_hooks);
+}
diff --git a/src/plugins/fts/fts-plugin.h b/src/plugins/fts/fts-plugin.h
new file mode 100644
index 0000000..aeec68c
--- /dev/null
+++ b/src/plugins/fts/fts-plugin.h
@@ -0,0 +1,7 @@
+#ifndef FTS_PLUGIN_H
+#define FTS_PLUGIN_H
+
+void fts_plugin_init(struct module *module);
+void fts_plugin_deinit(void);
+
+#endif
diff --git a/src/plugins/fts/fts-search-args.c b/src/plugins/fts/fts-search-args.c
new file mode 100644
index 0000000..b58b238
--- /dev/null
+++ b/src/plugins/fts/fts-search-args.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "mail-namespace.h"
+#include "mail-search.h"
+#include "fts-api-private.h"
+#include "fts-tokenizer.h"
+#include "fts-filter.h"
+#include "fts-user.h"
+#include "fts-search-args.h"
+
+static void strings_deduplicate(ARRAY_TYPE(const_string) *arr)
+{
+ const char *const *strings;
+ unsigned int i, count;
+
+ strings = array_get(arr, &count);
+ for (i = 1; i < count; ) {
+ if (strcmp(strings[i-1], strings[i]) == 0) {
+ array_delete(arr, i, 1);
+ strings = array_get(arr, &count);
+ } else {
+ i++;
+ }
+ }
+}
+
+static struct mail_search_arg *
+fts_search_arg_create_or(const struct mail_search_arg *orig_arg, pool_t pool,
+ const ARRAY_TYPE(const_string) *tokens)
+{
+ struct mail_search_arg *arg, *or_arg, **argp;
+ const char *token;
+
+ /* create the OR arg first as the parent */
+ or_arg = p_new(pool, struct mail_search_arg, 1);
+ or_arg->type = SEARCH_OR;
+
+ /* now create all the child args for the OR */
+ argp = &or_arg->value.subargs;
+ array_foreach_elem(tokens, token) {
+ arg = p_new(pool, struct mail_search_arg, 1);
+ *arg = *orig_arg;
+ arg->match_not = FALSE; /* we copied this to the root OR */
+ arg->next = NULL;
+ arg->value.str = p_strdup(pool, token);
+
+ *argp = arg;
+ argp = &arg->next;
+ }
+ return or_arg;
+}
+
+static int
+fts_backend_dovecot_expand_tokens(struct fts_filter *filter,
+ pool_t pool,
+ struct mail_search_arg *parent_arg,
+ const struct mail_search_arg *orig_arg,
+ const char *orig_token, const char *token,
+ const char **error_r)
+{
+ struct mail_search_arg *arg;
+ ARRAY_TYPE(const_string) tokens;
+ const char *token2, *error;
+ int ret;
+
+ t_array_init(&tokens, 4);
+ /* first add the word exactly as it without any tokenization */
+ array_push_back(&tokens, &orig_token);
+ /* then add it tokenized, but without filtering */
+ array_push_back(&tokens, &token);
+
+ /* add the word filtered */
+ if (filter != NULL) {
+ token2 = t_strdup(token);
+ ret = fts_filter_filter(filter, &token2, &error);
+ if (ret > 0) {
+ token2 = t_strdup(token2);
+ array_push_back(&tokens, &token2);
+ } else if (ret < 0) {
+ *error_r = t_strdup_printf("Couldn't filter search token: %s", error);
+ return -1;
+ } else {
+ /* The filter dropped the token, which means it was
+ never even indexed. Ignore this word entirely in the
+ search query. */
+ return 0;
+ }
+ }
+ array_sort(&tokens, i_strcmp_p);
+ strings_deduplicate(&tokens);
+
+ arg = fts_search_arg_create_or(orig_arg, pool, &tokens);
+ arg->next = parent_arg->value.subargs;
+ parent_arg->value.subargs = arg;
+ return 0;
+}
+
+static int
+fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang,
+ pool_t pool, struct mail_search_arg *or_arg,
+ struct mail_search_arg *orig_arg,
+ const char *orig_token, const char **error_r)
+{
+ size_t orig_token_len = strlen(orig_token);
+ struct mail_search_arg *and_arg, *orig_or_args = or_arg->value.subargs;
+ const char *token, *error;
+ int ret;
+
+ /* we want all the tokens found from the string to be found, so create
+ a parent AND and place all the filtered token alternatives under
+ it */
+ and_arg = p_new(pool, struct mail_search_arg, 1);
+ and_arg->type = SEARCH_SUB;
+ and_arg->next = orig_or_args;
+ or_arg->value.subargs = and_arg;
+
+ /* reset tokenizer between search args in case there's any state left
+ from some previous failure */
+ fts_tokenizer_reset(user_lang->search_tokenizer);
+ while ((ret = fts_tokenizer_next(user_lang->search_tokenizer,
+ (const void *)orig_token,
+ orig_token_len, &token, &error)) > 0) {
+ if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool,
+ and_arg, orig_arg, orig_token,
+ token, error_r) < 0)
+ return -1;
+ }
+ while (ret >= 0 &&
+ (ret = fts_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) {
+ if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool,
+ and_arg, orig_arg, orig_token,
+ token, error_r) < 0)
+ return -1;
+ }
+ if (ret < 0) {
+ *error_r = t_strdup_printf("Couldn't tokenize search args: %s", error);
+ return -1;
+ }
+ if (and_arg->value.subargs == NULL) {
+ /* nothing was actually expanded, remove the empty and_arg */
+ or_arg->value.subargs = orig_or_args;
+ }
+ return 0;
+}
+
+static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool,
+ struct mail_search_arg **argp)
+{
+ const ARRAY_TYPE(fts_user_language) *languages;
+ struct fts_user_language *lang;
+ struct mail_search_arg *or_arg, *orig_arg = *argp;
+ const char *error, *orig_token = orig_arg->value.str;
+
+ if (((*argp)->type == SEARCH_HEADER ||
+ (*argp)->type == SEARCH_HEADER_ADDRESS ||
+ (*argp)->type == SEARCH_HEADER_COMPRESS_LWSP) &&
+ !fts_header_has_language((*argp)->hdr_field_name)) {
+ /* use only the data-language */
+ languages = fts_user_get_data_languages(backend->ns->user);
+ } else {
+ languages = fts_user_get_all_languages(backend->ns->user);
+ }
+
+ /* OR together all the different expansions for different languages.
+ it's enough for one of them to match. */
+ or_arg = p_new(pool, struct mail_search_arg, 1);
+ or_arg->type = SEARCH_OR;
+ or_arg->match_not = orig_arg->match_not;
+ or_arg->next = orig_arg->next;
+
+ array_foreach_elem(languages, lang) {
+ if (fts_backend_dovecot_tokenize_lang(lang, pool, or_arg,
+ orig_arg, orig_token, &error) < 0) {
+ i_error("fts: %s", error);
+ return -1;
+ }
+ }
+
+ if (or_arg->value.subargs == NULL) {
+ /* we couldn't parse any tokens from the input */
+ or_arg->type = SEARCH_ALL;
+ or_arg->match_not = !or_arg->match_not;
+ }
+ *argp = or_arg;
+ return 0;
+}
+
+static int
+fts_search_args_expand_tree(struct fts_backend *backend, pool_t pool,
+ struct mail_search_arg **argp)
+{
+ int ret;
+
+ for (; *argp != NULL; argp = &(*argp)->next) {
+ switch ((*argp)->type) {
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ case SEARCH_INTHREAD:
+ if (fts_search_args_expand_tree(backend, pool,
+ &(*argp)->value.subargs) < 0)
+ return -1;
+ break;
+ case SEARCH_HEADER:
+ case SEARCH_HEADER_ADDRESS:
+ case SEARCH_HEADER_COMPRESS_LWSP:
+ if ((*argp)->value.str[0] == '\0') {
+ /* we're testing for the existence of
+ the header */
+ break;
+ }
+ /* fall through */
+ case SEARCH_BODY:
+ case SEARCH_TEXT:
+ T_BEGIN {
+ ret = fts_search_arg_expand(backend, pool, argp);
+ } T_END;
+ if (ret < 0)
+ return -1;
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+int fts_search_args_expand(struct fts_backend *backend,
+ struct mail_search_args *args)
+{
+ struct mail_search_arg *args_dup, *orig_args = args->args;
+
+ /* don't keep re-expanding every time the search args are used.
+ this is especially important to avoid an assert-crash in
+ index_search_result_update_flags(). */
+ if (args->fts_expanded)
+ return 0;
+ args->fts_expanded = TRUE;
+
+ /* duplicate the args, so if expansion fails we haven't changed
+ anything */
+ args_dup = mail_search_arg_dup(args->pool, args->args);
+
+ if (fts_search_args_expand_tree(backend, args->pool, &args_dup) < 0)
+ return -1;
+
+ /* we'll need to re-simplify the args if we changed anything */
+ args->simplified = FALSE;
+ args->args = args_dup;
+ mail_search_args_simplify(args);
+
+ /* duplicated args aren't initialized */
+ i_assert(args->init_refcount > 0);
+ mail_search_arg_init(args, args_dup);
+ mail_search_arg_deinit(orig_args);
+ return 0;
+}
diff --git a/src/plugins/fts/fts-search-args.h b/src/plugins/fts/fts-search-args.h
new file mode 100644
index 0000000..9fb8923
--- /dev/null
+++ b/src/plugins/fts/fts-search-args.h
@@ -0,0 +1,7 @@
+#ifndef FTS_SEARCH_ARGS_H
+#define FTS_SEARCH_ARGS_H
+
+int fts_search_args_expand(struct fts_backend *backend,
+ struct mail_search_args *args);
+
+#endif
diff --git a/src/plugins/fts/fts-search-serialize.c b/src/plugins/fts/fts-search-serialize.c
new file mode 100644
index 0000000..e30d4ce
--- /dev/null
+++ b/src/plugins/fts/fts-search-serialize.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "mail-search.h"
+#include "fts-search-serialize.h"
+
+#define HAVE_SUBARGS(arg) \
+ ((arg)->type == SEARCH_SUB || (arg)->type == SEARCH_OR)
+
+void fts_search_serialize(buffer_t *buf, const struct mail_search_arg *args)
+{
+ char chr;
+
+ for (; args != NULL; args = args->next) {
+ chr = (args->match_always ? 1 : 0) |
+ (args->nonmatch_always ? 2 : 0);
+ buffer_append_c(buf, chr);
+
+ if (HAVE_SUBARGS(args))
+ fts_search_serialize(buf, args->value.subargs);
+ }
+}
+
+static void fts_search_deserialize_idx(struct mail_search_arg *args,
+ const buffer_t *buf, unsigned int *idx)
+{
+ const char *data = buf->data;
+
+ for (; args != NULL; args = args->next) {
+ i_assert(*idx < buf->used);
+
+ args->match_always = (data[*idx] & 1) != 0;
+ args->nonmatch_always = (data[*idx] & 2) != 0;
+ args->result = args->match_always ? 1 :
+ (args->nonmatch_always ? 0 : -1);
+ *idx += 1;
+
+ if (HAVE_SUBARGS(args)) {
+ fts_search_deserialize_idx(args->value.subargs,
+ buf, idx);
+ }
+ }
+}
+
+void fts_search_deserialize(struct mail_search_arg *args,
+ const buffer_t *buf)
+{
+ unsigned int idx = 0;
+
+ fts_search_deserialize_idx(args, buf, &idx);
+ i_assert(idx == buf->used);
+}
+
+static void
+fts_search_deserialize_add_idx(struct mail_search_arg *args,
+ const buffer_t *buf, unsigned int *idx,
+ bool matches)
+{
+ const char *data = buf->data;
+
+ for (; args != NULL; args = args->next) {
+ i_assert(*idx < buf->used);
+
+ if (data[*idx] != 0) {
+ if (matches) {
+ args->match_always = TRUE;
+ args->result = 1;
+ } else {
+ args->nonmatch_always = TRUE;
+ args->result = 0;
+ }
+ }
+ *idx += 1;
+
+ if (HAVE_SUBARGS(args)) {
+ fts_search_deserialize_add_idx(args->value.subargs,
+ buf, idx, matches);
+ }
+ }
+}
+
+void fts_search_deserialize_add_matches(struct mail_search_arg *args,
+ const buffer_t *buf)
+{
+ unsigned int idx = 0;
+
+ fts_search_deserialize_add_idx(args, buf, &idx, TRUE);
+ i_assert(idx == buf->used);
+}
+
+void fts_search_deserialize_add_nonmatches(struct mail_search_arg *args,
+ const buffer_t *buf)
+{
+ unsigned int idx = 0;
+
+ fts_search_deserialize_add_idx(args, buf, &idx, FALSE);
+ i_assert(idx == buf->used);
+}
diff --git a/src/plugins/fts/fts-search-serialize.h b/src/plugins/fts/fts-search-serialize.h
new file mode 100644
index 0000000..c1a7d88
--- /dev/null
+++ b/src/plugins/fts/fts-search-serialize.h
@@ -0,0 +1,16 @@
+#ifndef FTS_SEARCH_SERIALIZE_H
+#define FTS_SEARCH_SERIALIZE_H
+
+/* serialize [non]match_always fields (clearing buffer) */
+void fts_search_serialize(buffer_t *buf, const struct mail_search_arg *args);
+/* add/remove [non]match_always fields in search args */
+void fts_search_deserialize(struct mail_search_arg *args,
+ const buffer_t *buf);
+/* add match_always=TRUE fields to search args */
+void fts_search_deserialize_add_matches(struct mail_search_arg *args,
+ const buffer_t *buf);
+/* add nonmatch_always=TRUE fields to search args */
+void fts_search_deserialize_add_nonmatches(struct mail_search_arg *args,
+ const buffer_t *buf);
+
+#endif
diff --git a/src/plugins/fts/fts-search.c b/src/plugins/fts/fts-search.c
new file mode 100644
index 0000000..895ea59
--- /dev/null
+++ b/src/plugins/fts/fts-search.c
@@ -0,0 +1,385 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "seq-range-array.h"
+#include "mail-search.h"
+#include "fts-api-private.h"
+#include "fts-search-args.h"
+#include "fts-search-serialize.h"
+#include "fts-storage.h"
+#include "hash.h"
+
+static void
+uid_range_to_seqs(struct fts_search_context *fctx,
+ const ARRAY_TYPE(seq_range) *uid_range,
+ ARRAY_TYPE(seq_range) *seq_range)
+{
+ const struct seq_range *range;
+ unsigned int i, count;
+ uint32_t seq1, seq2;
+
+ range = array_get(uid_range, &count);
+ if (!array_is_created(seq_range))
+ p_array_init(seq_range, fctx->result_pool, count);
+ for (i = 0; i < count; i++) {
+ if (range[i].seq1 > range[i].seq2)
+ continue;
+ mailbox_get_seq_range(fctx->box, range[i].seq1, range[i].seq2,
+ &seq1, &seq2);
+ if (seq1 != 0)
+ seq_range_array_add_range(seq_range, seq1, seq2);
+ }
+}
+
+static int fts_search_lookup_level_single(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ bool and_args)
+{
+ enum fts_lookup_flags flags = fctx->flags |
+ (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0);
+ struct fts_search_level *level;
+ struct fts_result result;
+
+ i_zero(&result);
+ result.search_state = fctx->search_state;
+ result.pool = fctx->result_pool;
+ p_array_init(&result.definite_uids, fctx->result_pool, 32);
+ p_array_init(&result.maybe_uids, fctx->result_pool, 32);
+ p_array_init(&result.scores, fctx->result_pool, 32);
+
+ mail_search_args_reset(args, TRUE);
+ if (fts_backend_lookup(fctx->backend, fctx->box, args, flags,
+ &result) < 0)
+ return -1;
+
+ fctx->search_state = result.search_state;
+ level = array_append_space(&fctx->levels);
+ level->args_matches = buffer_create_dynamic(fctx->result_pool, 16);
+ fts_search_serialize(level->args_matches, args);
+
+ uid_range_to_seqs(fctx, &result.definite_uids, &level->definite_seqs);
+ uid_range_to_seqs(fctx, &result.maybe_uids, &level->maybe_seqs);
+ level->score_map = result.scores;
+ return 0;
+}
+
+static void
+level_scores_add_vuids(struct mailbox *box,
+ struct fts_search_level *level, struct fts_result *br)
+{
+ const struct fts_score_map *scores;
+ unsigned int i, count;
+ ARRAY_TYPE(seq_range) backend_uids;
+ ARRAY_TYPE(uint32_t) vuids_arr;
+ const uint32_t *vuids;
+ struct fts_score_map *score;
+
+ scores = array_get(&br->scores, &count);
+ t_array_init(&vuids_arr, count);
+ t_array_init(&backend_uids, 64);
+ for (i = 0; i < count; i++)
+ seq_range_array_add(&backend_uids, scores[i].uid);
+ box->virtual_vfuncs->get_virtual_uid_map(box, br->box,
+ &backend_uids, &vuids_arr);
+
+ i_assert(array_count(&vuids_arr) == array_count(&br->scores));
+ vuids = array_get(&vuids_arr, &count);
+ for (i = 0; i < count; i++) {
+ score = array_append_space(&level->score_map);
+ score->uid = vuids[i];
+ score->score = scores[i].score;
+ }
+}
+
+static int
+mailbox_cmp_fts_backend(struct mailbox *const *m1, struct mailbox *const *m2)
+{
+ struct fts_backend *b1, *b2;
+
+ b1 = fts_mailbox_backend(*m1);
+ b2 = fts_mailbox_backend(*m2);
+ if (b1 < b2)
+ return -1;
+ if (b1 > b2)
+ return 1;
+ return 0;
+}
+
+static int
+multi_add_lookup_result(struct fts_search_context *fctx,
+ struct fts_search_level *level,
+ struct mail_search_arg *args,
+ struct fts_multi_result *result)
+{
+ ARRAY_TYPE(seq_range) vuids;
+ size_t orig_size;
+ unsigned int i;
+
+ orig_size = level->args_matches->used;
+ fts_search_serialize(level->args_matches, args);
+ if (orig_size > 0) {
+ if (level->args_matches->used != orig_size * 2 ||
+ memcmp(level->args_matches->data,
+ CONST_PTR_OFFSET(level->args_matches->data,
+ orig_size), orig_size) != 0)
+ i_panic("incompatible fts backends for namespaces");
+ buffer_set_used_size(level->args_matches, orig_size);
+ }
+
+ t_array_init(&vuids, 64);
+ for (i = 0; result->box_results[i].box != NULL; i++) {
+ struct fts_result *br = &result->box_results[i];
+
+ array_clear(&vuids);
+ if (array_is_created(&br->definite_uids)) {
+ fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box,
+ br->box, &br->definite_uids, &vuids);
+ }
+ uid_range_to_seqs(fctx, &vuids, &level->definite_seqs);
+
+ array_clear(&vuids);
+ if (array_is_created(&br->maybe_uids)) {
+ fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box,
+ br->box, &br->maybe_uids, &vuids);
+ }
+ uid_range_to_seqs(fctx, &vuids, &level->maybe_seqs);
+
+ if (array_is_created(&br->scores))
+ level_scores_add_vuids(fctx->box, level, br);
+ }
+ return 0;
+}
+
+static int fts_search_lookup_level_multi(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ bool and_args)
+{
+ enum fts_lookup_flags flags = fctx->flags |
+ (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0);
+ ARRAY_TYPE(mailboxes) mailboxes_arr, tmp_mailboxes;
+ struct mailbox *const *mailboxes;
+ struct fts_backend *backend;
+ struct fts_search_level *level;
+ struct fts_multi_result result;
+ unsigned int i, j, mailbox_count;
+
+ p_array_init(&mailboxes_arr, fctx->result_pool, 8);
+ fctx->box->virtual_vfuncs->get_virtual_backend_boxes(fctx->box,
+ &mailboxes_arr, TRUE);
+ array_sort(&mailboxes_arr, mailbox_cmp_fts_backend);
+
+ i_zero(&result);
+ result.search_state = fctx->search_state;
+ result.pool = fctx->result_pool;
+
+ level = array_append_space(&fctx->levels);
+ level->args_matches = buffer_create_dynamic(fctx->result_pool, 16);
+ p_array_init(&level->score_map, fctx->result_pool, 1);
+
+ mailboxes = array_get(&mailboxes_arr, &mailbox_count);
+ t_array_init(&tmp_mailboxes, mailbox_count);
+ for (i = 0; i < mailbox_count; i = j) {
+ array_clear(&tmp_mailboxes);
+ array_push_back(&tmp_mailboxes, &mailboxes[i]);
+
+ backend = fts_mailbox_backend(mailboxes[i]);
+ for (j = i + 1; j < mailbox_count; j++) {
+ if (fts_mailbox_backend(mailboxes[j]) != backend)
+ break;
+ array_push_back(&tmp_mailboxes, &mailboxes[j]);
+ }
+ array_append_zero(&tmp_mailboxes);
+
+ mail_search_args_reset(args, TRUE);
+ if (fts_backend_lookup_multi(backend,
+ array_front(&tmp_mailboxes),
+ args, flags, &result) < 0)
+ return -1;
+
+ if (multi_add_lookup_result(fctx, level, args, &result) < 0)
+ return -1;
+ }
+ fctx->search_state = result.search_state;
+ return 0;
+}
+
+static int fts_search_lookup_level(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ bool and_args)
+{
+ int ret;
+
+ T_BEGIN {
+ ret = !fctx->virtual_mailbox ?
+ fts_search_lookup_level_single(fctx, args, and_args) :
+ fts_search_lookup_level_multi(fctx, args, and_args);
+ } T_END;
+ if (ret < 0)
+ return -1;
+
+ for (; args != NULL; args = args->next) {
+ if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
+ continue;
+
+ if (fts_search_lookup_level(fctx, args->value.subargs,
+ args->type == SEARCH_SUB) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static void
+fts_search_merge_scores_and(ARRAY_TYPE(fts_score_map) *dest,
+ const ARRAY_TYPE(fts_score_map) *src)
+{
+ struct fts_score_map *dest_map;
+ const struct fts_score_map *src_map;
+ unsigned int desti, srci, dest_count, src_count;
+
+ dest_map = array_get_modifiable(dest, &dest_count);
+ src_map = array_get(src, &src_count);
+
+ /* arg_scores are summed to current scores. we could drop UIDs that
+ don't exist in both, but that's just extra work so don't bother */
+ for (desti = srci = 0; desti < dest_count && srci < src_count;) {
+ if (dest_map[desti].uid < src_map[srci].uid)
+ desti++;
+ else if (dest_map[desti].uid > src_map[srci].uid)
+ srci++;
+ else {
+ if (dest_map[desti].score < src_map[srci].score)
+ dest_map[desti].score = src_map[srci].score;
+ desti++; srci++;
+ }
+ }
+}
+
+static void
+fts_search_merge_scores_or(ARRAY_TYPE(fts_score_map) *dest,
+ const ARRAY_TYPE(fts_score_map) *src)
+{
+ ARRAY_TYPE(fts_score_map) src2;
+ const struct fts_score_map *src_map, *src2_map;
+ unsigned int srci, src2i, src_count, src2_count;
+
+ t_array_init(&src2, array_count(dest));
+ array_append_array(&src2, dest);
+ array_clear(dest);
+
+ src_map = array_get(src, &src_count);
+ src2_map = array_get(&src2, &src2_count);
+
+ /* add any missing UIDs to current scores. if any existing UIDs have
+ lower scores than in arg_scores, increase them. */
+ for (srci = src2i = 0; srci < src_count || src2i < src2_count;) {
+ if (src2i == src2_count ||
+ src_map[srci].uid < src2_map[src2i].uid) {
+ array_push_back(dest, &src_map[srci]);
+ srci++;
+ } else if (srci == src_count ||
+ src_map[srci].uid > src2_map[src2i].uid) {
+ array_push_back(dest, &src2_map[src2i]);
+ src2i++;
+ } else {
+ i_assert(src_map[srci].uid == src2_map[src2i].uid);
+ if (src_map[srci].score > src2_map[src2i].score)
+ array_push_back(dest, &src_map[srci]);
+ else
+ array_push_back(dest, &src2_map[src2i]);
+ srci++; src2i++;
+ }
+ }
+}
+
+static void
+fts_search_merge_scores_level(struct fts_search_context *fctx,
+ struct mail_search_arg *args, unsigned int *idx,
+ bool and_args, ARRAY_TYPE(fts_score_map) *scores)
+{
+ const struct fts_search_level *level;
+ ARRAY_TYPE(fts_score_map) arg_scores;
+
+ i_assert(array_count(scores) == 0);
+
+ /*
+ The (simplified) args can look like:
+
+ A and B and (C or D) and (E or F) and ...
+ A or B or (C and D) or (E and F) or ...
+
+ The A op B part's scores are in level->scores. The child args'
+ scores are in the sub levels' scores.
+ */
+
+ level = array_idx(&fctx->levels, *idx);
+ array_append_array(scores, &level->score_map);
+
+ t_array_init(&arg_scores, 64);
+ for (; args != NULL; args = args->next) {
+ if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
+ continue;
+
+ *idx += 1;
+ array_clear(&arg_scores);
+ fts_search_merge_scores_level(fctx, args->value.subargs, idx,
+ args->type == SEARCH_OR,
+ &arg_scores);
+
+ if (and_args)
+ fts_search_merge_scores_and(scores, &arg_scores);
+ else
+ fts_search_merge_scores_or(scores, &arg_scores);
+ }
+}
+
+static void fts_search_merge_scores(struct fts_search_context *fctx)
+{
+ unsigned int idx = 0;
+
+ fts_search_merge_scores_level(fctx, fctx->args->args, &idx,
+ TRUE, &fctx->scores->score_map);
+}
+
+static void fts_search_try_lookup(struct fts_search_context *fctx)
+{
+ uint32_t last_uid, seq1, seq2;
+
+ i_assert(array_count(&fctx->levels) == 0);
+ i_assert(fctx->args->simplified);
+
+ if (fts_backend_refresh(fctx->backend) < 0)
+ return;
+ if (fts_backend_get_last_uid(fctx->backend, fctx->box, &last_uid) < 0)
+ return;
+ mailbox_get_seq_range(fctx->box, last_uid+1, (uint32_t)-1,
+ &seq1, &seq2);
+ fctx->first_unindexed_seq = seq1 != 0 ? seq1 : (uint32_t)-1;
+
+ if (fctx->virtual_mailbox) {
+ hash_table_clear(fctx->last_indexed_virtual_uids, TRUE);
+ fctx->next_unindexed_seq = fctx->first_unindexed_seq;
+ }
+
+ if ((fctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
+ if (fts_search_args_expand(fctx->backend, fctx->args) < 0)
+ return;
+ }
+ fts_search_serialize(fctx->orig_matches, fctx->args->args);
+
+ if (fts_search_lookup_level(fctx, fctx->args->args, TRUE) == 0) {
+ fctx->fts_lookup_success = TRUE;
+ fts_search_merge_scores(fctx);
+ }
+
+ fts_search_deserialize(fctx->args->args, fctx->orig_matches);
+ fts_backend_lookup_done(fctx->backend);
+}
+
+void fts_search_lookup(struct fts_search_context *fctx)
+{
+ struct event_reason *reason = event_reason_begin("fts:lookup");
+ fts_search_try_lookup(fctx);
+ event_reason_end(&reason);
+}
diff --git a/src/plugins/fts/fts-storage.c b/src/plugins/fts/fts-storage.c
new file mode 100644
index 0000000..101d52a
--- /dev/null
+++ b/src/plugins/fts/fts-storage.c
@@ -0,0 +1,981 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "net.h"
+#include "str.h"
+#include "strescape.h"
+#include "write-full.h"
+#include "mail-search-build.h"
+#include "mail-storage-private.h"
+#include "mailbox-list-private.h"
+#include "fts-api-private.h"
+#include "fts-tokenizer.h"
+#include "fts-indexer.h"
+#include "fts-build-mail.h"
+#include "fts-search-serialize.h"
+#include "fts-plugin.h"
+#include "fts-user.h"
+#include "fts-storage.h"
+#include "hash.h"
+
+
+#define FTS_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_storage_module)
+#define FTS_CONTEXT_REQUIRE(obj) \
+ MODULE_CONTEXT_REQUIRE(obj, fts_storage_module)
+#define FTS_MAIL_CONTEXT(obj) \
+ MODULE_CONTEXT_REQUIRE(obj, fts_mail_module)
+#define FTS_LIST_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_mailbox_list_module)
+#define FTS_LIST_CONTEXT_REQUIRE(obj) \
+ MODULE_CONTEXT_REQUIRE(obj, fts_mailbox_list_module)
+
+#define INDEXER_SOCKET_NAME "indexer"
+#define INDEXER_HANDSHAKE "VERSION\tindexer\t1\t0\n"
+
+struct fts_mailbox_list {
+ union mailbox_list_module_context module_ctx;
+ struct fts_backend *backend;
+
+ const char *backend_name;
+ struct fts_backend_update_context *update_ctx;
+ unsigned int update_ctx_refcount;
+
+ bool failed:1;
+};
+
+struct fts_mailbox {
+ union mailbox_module_context module_ctx;
+ struct fts_backend_update_context *sync_update_ctx;
+ bool fts_mailbox_excluded;
+};
+
+struct fts_transaction_context {
+ union mailbox_transaction_module_context module_ctx;
+
+ struct fts_scores *scores;
+ uint32_t next_index_seq;
+ uint32_t highest_virtual_uid;
+ unsigned int precache_extra_count;
+
+ bool indexing:1;
+ bool precached:1;
+ bool mails_saved:1;
+ const char *failure_reason;
+};
+
+struct fts_mail {
+ union mail_module_context module_ctx;
+ char score[30];
+
+ bool virtual_mail:1;
+};
+
+static MODULE_CONTEXT_DEFINE_INIT(fts_storage_module,
+ &mail_storage_module_register);
+static MODULE_CONTEXT_DEFINE_INIT(fts_mail_module, &mail_module_register);
+static MODULE_CONTEXT_DEFINE_INIT(fts_mailbox_list_module,
+ &mailbox_list_module_register);
+
+static int fts_mailbox_get_last_cached_seq(struct mailbox *box, uint32_t *seq_r)
+{
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list);
+ uint32_t seq1, seq2, last_uid;
+
+ if (fts_backend_get_last_uid(flist->backend, box, &last_uid) < 0) {
+ mail_storage_set_internal_error(box->storage);
+ return -1;
+ }
+
+ if (last_uid == 0)
+ *seq_r = 0;
+ else {
+ mailbox_get_seq_range(box, 1, last_uid, &seq1, &seq2);
+ *seq_r = seq2;
+ }
+ return 0;
+}
+
+static int
+fts_mailbox_get_status(struct mailbox *box, enum mailbox_status_items items,
+ struct mailbox_status *status_r)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box);
+ uint32_t seq;
+
+ if (fbox->module_ctx.super.get_status(box, items, status_r) < 0)
+ return -1;
+
+ if ((items & STATUS_LAST_CACHED_SEQ) != 0) {
+ if (fts_mailbox_get_last_cached_seq(box, &seq) < 0)
+ return -1;
+
+ /* Always use the FTS's last_cached_seq. This is because we
+ don't want to reindex all mails to FTS if .cache file is
+ deleted. */
+ status_r->last_cached_seq = seq;
+ }
+ return 0;
+}
+
+
+static void fts_scores_unref(struct fts_scores **_scores)
+{
+ struct fts_scores *scores = *_scores;
+
+ *_scores = NULL;
+ if (--scores->refcount == 0) {
+ array_free(&scores->score_map);
+ i_free(scores);
+ }
+}
+
+static void fts_try_build_init(struct mail_search_context *ctx,
+ struct fts_search_context *fctx)
+{
+ int ret;
+
+ i_assert(!fts_backend_is_updating(fctx->backend));
+
+ ret = fts_indexer_init(fctx->backend, ctx->transaction->box,
+ &fctx->indexer_ctx);
+ if (ret < 0)
+ return;
+
+ if (ret == 0) {
+ /* the index was up to date */
+ fts_search_lookup(fctx);
+ } else {
+ /* hide "searching" notifications while building index */
+ ctx->progress_hidden = TRUE;
+ }
+}
+
+static bool fts_want_build_args(const struct mail_search_arg *args)
+{
+ /* we want to update index only when searching from message body.
+ it's not worth the wait for searching only from headers, which
+ could be in cache file already */
+ for (; args != NULL; args = args->next) {
+ switch (args->type) {
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ case SEARCH_INTHREAD:
+ if (fts_want_build_args(args->value.subargs))
+ return TRUE;
+ break;
+ case SEARCH_BODY:
+ case SEARCH_TEXT:
+ if (!args->no_fts)
+ return TRUE;
+ break;
+ default:
+ break;
+ }
+ }
+ return FALSE;
+}
+
+static bool fts_args_have_fuzzy(const struct mail_search_arg *args)
+{
+ for (; args != NULL; args = args->next) {
+ if (args->fuzzy)
+ return TRUE;
+ switch (args->type) {
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ case SEARCH_INTHREAD:
+ if (fts_args_have_fuzzy(args->value.subargs))
+ return TRUE;
+ break;
+ default:
+ break;
+ }
+ }
+ return FALSE;
+}
+
+static enum fts_enforced fts_enforced_parse(const char *str)
+{
+ if (str == NULL || strcmp(str, "no") == 0)
+ return FTS_ENFORCED_NO;
+ else if (strcmp(str, "body") == 0)
+ return FTS_ENFORCED_BODY;
+ else
+ return FTS_ENFORCED_YES;
+}
+
+static struct mail_search_context *
+fts_mailbox_search_init(struct mailbox_transaction_context *t,
+ struct mail_search_args *args,
+ const enum mail_sort_type *sort_program,
+ enum mail_fetch_field wanted_fields,
+ struct mailbox_header_lookup_ctx *wanted_headers)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t);
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box);
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(t->box->list);
+ struct mail_search_context *ctx;
+ struct fts_search_context *fctx;
+
+ ctx = fbox->module_ctx.super.search_init(t, args, sort_program,
+ wanted_fields, wanted_headers);
+
+ if (!fts_backend_can_lookup(flist->backend, args->args))
+ return ctx;
+
+ fctx = i_new(struct fts_search_context, 1);
+ fctx->box = t->box;
+ fctx->backend = flist->backend;
+ fctx->t = t;
+ fctx->args = args;
+ fctx->result_pool = pool_alloconly_create("fts results", 1024*64);
+ fctx->orig_matches = buffer_create_dynamic(default_pool, 64);
+ fctx->virtual_mailbox = t->box->virtual_vfuncs != NULL;
+ if (fctx->virtual_mailbox) {
+ hash_table_create(&fctx->last_indexed_virtual_uids,
+ default_pool, 0, str_hash, strcmp);
+ }
+ fctx->enforced = fts_enforced_parse(
+ mail_user_plugin_getenv(t->box->storage->user, "fts_enforced"));
+ i_array_init(&fctx->levels, 8);
+ fctx->scores = i_new(struct fts_scores, 1);
+ fctx->scores->refcount = 1;
+ i_array_init(&fctx->scores->score_map, 64);
+ MODULE_CONTEXT_SET(ctx, fts_storage_module, fctx);
+
+ /* FIXME: we'll assume that all the args are fuzzy. not good,
+ but would require much more work to fix it. */
+ if (!fts_args_have_fuzzy(args->args) &&
+ mail_user_plugin_getenv_bool(t->box->storage->user,
+ "fts_no_autofuzzy"))
+ fctx->flags |= FTS_LOOKUP_FLAG_NO_AUTO_FUZZY;
+ /* transaction contains the last search's scores. they can be
+ queried later with mail_get_special() */
+ if (ft->scores != NULL)
+ fts_scores_unref(&ft->scores);
+ ft->scores = fctx->scores;
+ ft->scores->refcount++;
+
+ if (fctx->enforced == FTS_ENFORCED_YES ||
+ fts_want_build_args(args->args))
+ fts_try_build_init(ctx, fctx);
+ else
+ fts_search_lookup(fctx);
+ return ctx;
+}
+
+static bool fts_mailbox_build_continue(struct mail_search_context *ctx)
+{
+ struct fts_search_context *fctx = FTS_CONTEXT_REQUIRE(ctx);
+ int ret;
+
+ ret = fts_indexer_more(fctx->indexer_ctx);
+ if (ret == 0)
+ return FALSE;
+
+ /* indexing finished */
+ ctx->progress_hidden = FALSE;
+ if (fts_indexer_deinit(&fctx->indexer_ctx) < 0)
+ ret = -1;
+ if (ret > 0)
+ fts_search_lookup(fctx);
+ if (ret < 0) {
+ /* if indexing timed out, it probably means that
+ the mailbox is still being indexed, but it's a large
+ mailbox and it takes a while. in this situation
+ we'll simply abort the search.
+
+ if indexing failed for any other reason, just
+ fallback to searching the slow way. */
+ fctx->indexing_timed_out =
+ mailbox_get_last_mail_error(fctx->box) == MAIL_ERROR_INUSE;
+ }
+ return TRUE;
+}
+
+static bool
+fts_mailbox_search_next_nonblock(struct mail_search_context *ctx,
+ struct mail **mail_r, bool *tryagain_r)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box);
+ struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+
+ if (fctx != NULL && fctx->indexer_ctx != NULL) {
+ /* this command is still building the indexes */
+ if (!fts_mailbox_build_continue(ctx)) {
+ *tryagain_r = TRUE;
+ return FALSE;
+ }
+ if (fctx->indexing_timed_out) {
+ *tryagain_r = FALSE;
+ return FALSE;
+ }
+ }
+ if (fctx != NULL && !fctx->fts_lookup_success &&
+ fctx->enforced != FTS_ENFORCED_NO)
+ return FALSE;
+
+ return fbox->module_ctx.super.
+ search_next_nonblock(ctx, mail_r, tryagain_r);
+}
+
+static void
+fts_search_apply_results_level(struct mail_search_context *ctx,
+ struct mail_search_arg *args, unsigned int *idx)
+{
+ struct fts_search_context *fctx = FTS_CONTEXT_REQUIRE(ctx);
+ const struct fts_search_level *level;
+
+ level = array_idx(&fctx->levels, *idx);
+
+ if (array_is_created(&level->definite_seqs) &&
+ seq_range_exists(&level->definite_seqs, ctx->seq))
+ fts_search_deserialize_add_matches(args, level->args_matches);
+ else if (!array_is_created(&level->maybe_seqs) ||
+ !seq_range_exists(&level->maybe_seqs, ctx->seq))
+ fts_search_deserialize_add_nonmatches(args, level->args_matches);
+
+ for (; args != NULL; args = args->next) {
+ if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
+ continue;
+
+ *idx += 1;
+ fts_search_apply_results_level(ctx, args->value.subargs, idx);
+ }
+}
+
+static bool fts_mailbox_search_next_update_seq(struct mail_search_context *ctx)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box);
+ struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+ unsigned int idx;
+
+ if (fctx == NULL || !fctx->fts_lookup_success) {
+ /* fts lookup not done for this search */
+ if (fctx != NULL && fctx->indexing_timed_out)
+ return FALSE;
+ return fbox->module_ctx.super.search_next_update_seq(ctx);
+ }
+
+ /* restore original [non]matches */
+ fts_search_deserialize(ctx->args->args, fctx->orig_matches);
+
+ if (!fbox->module_ctx.super.search_next_update_seq(ctx))
+ return FALSE;
+
+ if (ctx->seq >= fctx->first_unindexed_seq) {
+ /* we've not indexed this far */
+ return TRUE;
+ }
+
+ /* apply [non]matches based on the FTS lookup results */
+ idx = 0;
+ fts_search_apply_results_level(ctx, ctx->args->args, &idx);
+ return TRUE;
+}
+
+static int fts_mailbox_search_deinit(struct mail_search_context *ctx)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box);
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction);
+ struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+ int ret = 0;
+
+ if (fctx != NULL) {
+ if (fctx->virtual_mailbox)
+ hash_table_destroy(&fctx->last_indexed_virtual_uids);
+ if (fctx->indexer_ctx != NULL) {
+ if (fts_indexer_deinit(&fctx->indexer_ctx) < 0)
+ ft->failure_reason = "FTS indexing failed";
+ }
+ if (fctx->indexing_timed_out)
+ ret = -1;
+ else if (!fctx->fts_lookup_success &&
+ fctx->enforced != FTS_ENFORCED_NO) {
+ /* FTS lookup failed and we didn't want to fallback to
+ opening all the mails and searching manually */
+ mail_storage_set_internal_error(ctx->transaction->box->storage);
+ ret = -1;
+ }
+
+ buffer_free(&fctx->orig_matches);
+ array_free(&fctx->levels);
+ pool_unref(&fctx->result_pool);
+ fts_scores_unref(&fctx->scores);
+ i_free(fctx);
+ }
+ if (fbox->module_ctx.super.search_deinit(ctx) < 0)
+ ret = -1;
+ return ret;
+}
+
+static int fts_score_cmp(const uint32_t *uid, const struct fts_score_map *score)
+{
+ return *uid < score->uid ? -1 :
+ (*uid > score->uid ? 1 : 0);
+}
+
+static int fts_mail_get_special(struct mail *_mail, enum mail_fetch_field field,
+ const char **value_r)
+{
+ struct mail_private *mail = (struct mail_private *)_mail;
+ struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail);
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction);
+ const struct fts_score_map *scores;
+
+ if (field != MAIL_FETCH_SEARCH_RELEVANCY || ft->scores == NULL)
+ scores = NULL;
+ else {
+ scores = array_bsearch(&ft->scores->score_map, &_mail->uid,
+ fts_score_cmp);
+ }
+ if (scores != NULL) {
+ i_assert(scores->uid == _mail->uid);
+ (void)i_snprintf(fmail->score, sizeof(fmail->score),
+ "%f", scores->score);
+
+ *value_r = fmail->score;
+ return 0;
+ }
+
+ return fmail->module_ctx.super.get_special(_mail, field, value_r);
+}
+
+static int
+fts_mail_precache_range(struct mailbox_transaction_context *trans,
+ struct fts_backend_update_context *update_ctx,
+ uint32_t seq1, uint32_t seq2, unsigned int *extra_count)
+{
+ struct mail_search_args *search_args;
+ struct mail_search_context *ctx;
+ struct mail *mail;
+ int ret = 0;
+
+ search_args = mail_search_build_init();
+ mail_search_build_add_seqset(search_args, seq1, seq2);
+ ctx = mailbox_search_init(trans, search_args, NULL,
+ MAIL_FETCH_STREAM_HEADER |
+ MAIL_FETCH_STREAM_BODY, NULL);
+ mail_search_args_unref(&search_args);
+
+ while (mailbox_search_next(ctx, &mail)) {
+ if (fts_build_mail(update_ctx, mail) < 0) {
+ ret = -1;
+ break;
+ }
+ if (mail_precache(mail) < 0) {
+ ret = -1;
+ break;
+ }
+ *extra_count += 1;
+ }
+ if (mailbox_search_deinit(&ctx) < 0)
+ ret = -1;
+ return ret;
+}
+
+static int fts_mail_precache_init(struct mail *_mail)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction);
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(_mail->box->list);
+ uint32_t last_seq;
+
+ if (fts_mailbox_get_last_cached_seq(_mail->box, &last_seq) < 0) {
+ ft->failure_reason = "Failed to lookup last indexed FTS mail";
+ return -1;
+ }
+
+ ft->precached = TRUE;
+ ft->next_index_seq = last_seq + 1;
+ if (flist->update_ctx == NULL)
+ flist->update_ctx = fts_backend_update_init(flist->backend);
+ flist->update_ctx_refcount++;
+ return 0;
+}
+
+static int fts_mail_index(struct mail *_mail)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction);
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(_mail->box->list);
+ struct mail_private *pmail = (struct mail_private *)_mail;
+
+ if (ft->failure_reason != NULL)
+ return -1;
+
+ if (!ft->precached) {
+ if (fts_mail_precache_init(_mail) < 0)
+ return -1;
+ }
+ if (pmail->vmail != NULL) {
+ /* Indexing via virtual mailbox: Index all the mails in this
+ same real mailbox. */
+ uint32_t msgs_count =
+ mail_index_view_get_messages_count(_mail->box->view);
+
+ fts_backend_update_set_mailbox(flist->update_ctx, _mail->box);
+ if (ft->next_index_seq > msgs_count) {
+ /* everything indexed already */
+ return 0;
+ } else if (fts_mail_precache_range(_mail->transaction,
+ flist->update_ctx,
+ ft->next_index_seq,
+ msgs_count,
+ &ft->precache_extra_count) < 0) {
+ return -1;
+ } else {
+ ft->next_index_seq = msgs_count+1;
+ return 0;
+ }
+ }
+
+ if (ft->next_index_seq < _mail->seq) {
+ /* we'll first need to index all the missing mails up to the
+ current one. */
+ fts_backend_update_set_mailbox(flist->update_ctx, _mail->box);
+ if (fts_mail_precache_range(_mail->transaction,
+ flist->update_ctx,
+ ft->next_index_seq,
+ _mail->seq-1,
+ &ft->precache_extra_count) < 0)
+ return -1;
+ ft->next_index_seq = _mail->seq;
+ }
+
+ if (ft->next_index_seq == _mail->seq) {
+ fts_backend_update_set_mailbox(flist->update_ctx, _mail->box);
+ if (fts_build_mail(flist->update_ctx, _mail) < 0)
+ return -1;
+ ft->next_index_seq = _mail->seq + 1;
+ }
+ return 0;
+}
+
+static int fts_mail_precache(struct mail *_mail)
+{
+ struct mail_private *mail = (struct mail_private *)_mail;
+ struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail);
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction);
+ int ret = 0;
+
+ fmail->module_ctx.super.precache(_mail);
+ if (fmail->virtual_mail) {
+ if (ft->highest_virtual_uid < _mail->uid)
+ ft->highest_virtual_uid = _mail->uid;
+ } else if (!ft->indexing) T_BEGIN {
+ /* avoid recursing here from fts_mail_precache_range() */
+ struct event_reason *reason =
+ event_reason_begin("fts:index");
+ ft->indexing = TRUE;
+ ret = fts_mail_index(_mail);
+ i_assert(ft->indexing);
+ ft->indexing = FALSE;
+ event_reason_end(&reason);
+ } T_END;
+ return ret;
+}
+
+void fts_mail_allocated(struct mail *_mail)
+{
+ struct mail_private *mail = (struct mail_private *)_mail;
+ struct mail_vfuncs *v = mail->vlast;
+ struct fts_mailbox *fbox = FTS_CONTEXT(_mail->box);
+ struct fts_mail *fmail;
+
+ if (fbox == NULL)
+ return;
+
+ fmail = p_new(mail->pool, struct fts_mail, 1);
+ fmail->module_ctx.super = *v;
+ mail->vlast = &fmail->module_ctx.super;
+ fmail->virtual_mail = _mail->box->virtual_vfuncs != NULL;
+
+ v->get_special = fts_mail_get_special;
+ v->precache = fts_mail_precache;
+ MODULE_CONTEXT_SET(mail, fts_mail_module, fmail);
+}
+
+static struct mailbox_transaction_context *
+fts_transaction_begin(struct mailbox *box,
+ enum mailbox_transaction_flags flags,
+ const char *reason)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box);
+ struct mailbox_transaction_context *t;
+ struct fts_transaction_context *ft;
+
+ ft = i_new(struct fts_transaction_context, 1);
+
+ t = fbox->module_ctx.super.transaction_begin(box, flags, reason);
+ MODULE_CONTEXT_SET(t, fts_storage_module, ft);
+ return t;
+}
+
+static int fts_transaction_end(struct mailbox_transaction_context *t, const char **error_r)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t);
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(t->box->list);
+ int ret = 0;
+
+ if (ft->failure_reason != NULL) {
+ *error_r = t_strdup(ft->failure_reason);
+ ret = -1;
+ }
+
+ struct event_reason *reason = event_reason_begin("fts:index");
+ if (ft->precached) {
+ i_assert(flist->update_ctx_refcount > 0);
+ if (--flist->update_ctx_refcount == 0) {
+ if (fts_backend_update_deinit(&flist->update_ctx) < 0) {
+ ret = -1;
+ *error_r = "backend deinit";
+ }
+ }
+ } else if (ft->highest_virtual_uid > 0) {
+ if (fts_index_set_last_uid(t->box, ft->highest_virtual_uid) < 0) {
+ ret = -1;
+ *error_r = "index last uid setting";
+ }
+ }
+ if (ft->scores != NULL)
+ fts_scores_unref(&ft->scores);
+ if (ft->precache_extra_count > 0) {
+ if (ret < 0) {
+ i_error("fts: Failed after indexing %u extra mails internally in %s: %s",
+ ft->precache_extra_count, t->box->vname, *error_r);
+ } else {
+ i_info("fts: Indexed %u extra mails internally in %s",
+ ft->precache_extra_count, t->box->vname);
+ }
+ }
+ event_reason_end(&reason);
+ i_free(ft);
+ return ret;
+}
+
+static void fts_transaction_rollback(struct mailbox_transaction_context *t)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box);
+ const char *error;
+
+ (void)fts_transaction_end(t, &error);
+ fbox->module_ctx.super.transaction_rollback(t);
+}
+
+static void fts_queue_index(struct mailbox *box)
+{
+ struct mail_user *user = box->storage->user;
+ string_t *str = t_str_new(256);
+ const char *path, *value;
+ unsigned int max_recent_msgs;
+ int fd;
+
+ path = t_strconcat(user->set->base_dir, "/"INDEXER_SOCKET_NAME, NULL);
+ fd = net_connect_unix(path);
+ if (fd == -1) {
+ i_error("net_connect_unix(%s) failed: %m", path);
+ return;
+ }
+
+ value = mail_user_plugin_getenv(user, "fts_autoindex_max_recent_msgs");
+ if (value == NULL || str_to_uint(value, &max_recent_msgs) < 0)
+ max_recent_msgs = 0;
+
+ str_append(str, INDEXER_HANDSHAKE);
+ str_append(str, "APPEND\t0\t");
+ str_append_tabescaped(str, user->username);
+ str_append_c(str, '\t');
+ str_append_tabescaped(str, box->vname);
+ str_printfa(str, "\t%u", max_recent_msgs);
+ str_append_c(str, '\t');
+ str_append_tabescaped(str, box->storage->user->session_id);
+ str_append_c(str, '\n');
+ if (write_full(fd, str_data(str), str_len(str)) < 0)
+ i_error("write(%s) failed: %m", path);
+ i_close_fd(&fd);
+}
+
+static int
+fts_transaction_commit(struct mailbox_transaction_context *t,
+ struct mail_transaction_commit_changes *changes_r)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t);
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box);
+ struct mailbox *box = t->box;
+ bool autoindex;
+ int ret = 0;
+ const char *error;
+
+ autoindex = ft->mails_saved && !fbox->fts_mailbox_excluded &&
+ mail_user_plugin_getenv_bool(box->storage->user,
+ "fts_autoindex");
+
+ if (fts_transaction_end(t, &error) < 0) {
+ mail_storage_set_error(t->box->storage, MAIL_ERROR_TEMP,
+ t_strdup_printf("FTS transaction commit failed: %s",
+ error));
+ ret = -1;
+ }
+ if (fbox->module_ctx.super.transaction_commit(t, changes_r) < 0)
+ ret = -1;
+ if (ret < 0)
+ return -1;
+
+ if (autoindex)
+ fts_queue_index(box);
+ return 0;
+}
+
+static void fts_mailbox_sync_notify(struct mailbox *box, uint32_t uid,
+ enum mailbox_sync_type sync_type)
+{
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list);
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box);
+
+ if (fbox->module_ctx.super.sync_notify != NULL)
+ fbox->module_ctx.super.sync_notify(box, uid, sync_type);
+
+ if (sync_type != MAILBOX_SYNC_TYPE_EXPUNGE) {
+ if (uid == 0 && fbox->sync_update_ctx != NULL) {
+ /* this sync is finished */
+ (void)fts_backend_update_deinit(&fbox->sync_update_ctx);
+ }
+ return;
+ }
+
+ if (fbox->sync_update_ctx == NULL) {
+ if (fts_backend_is_updating(flist->backend)) {
+ /* FIXME: maildir workaround - we could get here
+ because we're building an index, which doesn't find
+ some mail and starts syncing the mailbox.. */
+ return;
+ }
+ fbox->sync_update_ctx = fts_backend_update_init(flist->backend);
+ fts_backend_update_set_mailbox(fbox->sync_update_ctx, box);
+ }
+ fts_backend_update_expunge(fbox->sync_update_ctx, uid);
+}
+
+static int fts_sync_deinit(struct mailbox_sync_context *ctx,
+ struct mailbox_sync_status *status_r)
+{
+ struct mailbox *box = ctx->box;
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box);
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(box->list);
+ bool optimize;
+ int ret = 0;
+
+ optimize = (ctx->flags & (MAILBOX_SYNC_FLAG_FORCE_RESYNC |
+ MAILBOX_SYNC_FLAG_OPTIMIZE)) != 0;
+ if (fbox->module_ctx.super.sync_deinit(ctx, status_r) < 0)
+ return -1;
+ ctx = NULL;
+
+ if (optimize) {
+ i_assert(flist != NULL);
+ if (fts_backend_optimize(flist->backend) < 0) {
+ mailbox_set_critical(box, "FTS optimize failed");
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+static int fts_save_finish(struct mail_save_context *ctx)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction);
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box);
+
+ if (fbox->module_ctx.super.save_finish(ctx) < 0)
+ return -1;
+ ft->mails_saved = TRUE;
+ return 0;
+}
+
+static int fts_copy(struct mail_save_context *ctx, struct mail *mail)
+{
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction);
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box);
+
+ if (fbox->module_ctx.super.copy(ctx, mail) < 0)
+ return -1;
+ ft->mails_saved = TRUE;
+ return 0;
+}
+
+static void fts_mailbox_virtual_match_mail(struct mail_search_context *ctx,
+ struct mail *mail)
+{
+ struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+ unsigned int idx, be_last_uid;
+
+ if (fctx == NULL || !fctx->fts_lookup_success || !fctx->virtual_mailbox ||
+ ctx->seq < fctx->first_unindexed_seq)
+ return;
+ /* Table of last indexed UID per backend mailbox */
+ HASH_TABLE_TYPE(virtual_last_indexed) hash_tbl =
+ fctx->last_indexed_virtual_uids;
+
+ struct mail *backend_mail;
+ if (mail->box->mail_vfuncs->get_backend_mail(mail, &backend_mail) < 0)
+ return;
+ const char *box_name = backend_mail->box->vname;
+ /* Get the last indexed UID in the backend mailbox */
+ void *uid_value =
+ hash_table_lookup(fctx->last_indexed_virtual_uids, box_name);
+ if (uid_value == NULL) {
+ /* This backend's last indexed uid is not yet inserted to the table */
+ struct fts_mailbox_list *flist =
+ FTS_LIST_CONTEXT(backend_mail->box->list);
+ if (flist == NULL || flist->failed ||
+ mailbox_open(backend_mail->box) < 0 ||
+ fts_backend_get_last_uid(flist->backend, backend_mail->box,
+ &be_last_uid) < 0) {
+ be_last_uid = 0;
+ } else {
+ const char *vname_copy =
+ p_strdup(fctx->result_pool, backend_mail->box->vname);
+ hash_table_insert(hash_tbl, vname_copy,
+ POINTER_CAST(be_last_uid + 1));
+ }
+ } else {
+ be_last_uid = POINTER_CAST_TO(uid_value, uint32_t) - 1;
+ }
+ if (backend_mail->uid <= be_last_uid) {
+ /* Mail was already indexed in the backend mailbox.
+ Apply [non]matches based on the FTS lookup results */
+ struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction);
+
+ if (fctx->next_unindexed_seq == mail->seq) {
+ fctx->next_unindexed_seq++;
+ ft->highest_virtual_uid = mail->uid;
+ }
+ idx = 0;
+ fts_search_apply_results_level(ctx, ctx->args->args, &idx);
+ } else {
+ fctx->virtual_seen_unindexed_gaps = TRUE;
+ }
+}
+
+static int fts_mailbox_search_next_match_mail(struct mail_search_context *ctx,
+ struct mail *mail)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box);
+
+ fts_mailbox_virtual_match_mail(ctx, mail);
+ return fbox->module_ctx.super.search_next_match_mail(ctx, mail);
+}
+
+void fts_mailbox_allocated(struct mailbox *box)
+{
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(box->list);
+ struct mailbox_vfuncs *v = box->vlast;
+ struct fts_mailbox *fbox;
+
+ if (flist == NULL || flist->failed)
+ return;
+
+ fbox = p_new(box->pool, struct fts_mailbox, 1);
+ fbox->module_ctx.super = *v;
+ box->vlast = &fbox->module_ctx.super;
+ fbox->fts_mailbox_excluded = fts_user_autoindex_exclude(box);
+
+ v->get_status = fts_mailbox_get_status;
+ v->search_init = fts_mailbox_search_init;
+ v->search_next_nonblock = fts_mailbox_search_next_nonblock;
+ v->search_next_update_seq = fts_mailbox_search_next_update_seq;
+ v->search_deinit = fts_mailbox_search_deinit;
+ v->transaction_begin = fts_transaction_begin;
+ v->transaction_rollback = fts_transaction_rollback;
+ v->transaction_commit = fts_transaction_commit;
+ v->sync_notify = fts_mailbox_sync_notify;
+ v->sync_deinit = fts_sync_deinit;
+ v->save_finish = fts_save_finish;
+ v->copy = fts_copy;
+ v->search_next_match_mail = fts_mailbox_search_next_match_mail;
+
+ MODULE_CONTEXT_SET(box, fts_storage_module, fbox);
+}
+
+static void fts_mailbox_list_deinit(struct mailbox_list *list)
+{
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(list);
+
+ if (flist->backend != NULL)
+ fts_backend_deinit(&flist->backend);
+ flist->module_ctx.super.deinit(list);
+}
+
+static int
+fts_init_namespace(struct fts_mailbox_list *flist, struct mail_namespace *ns,
+ const char **error_r)
+{
+ struct fts_backend *backend;
+ if (fts_backend_init(flist->backend_name, ns, error_r, &backend) < 0) {
+ flist->failed = TRUE;
+ return -1;
+ }
+ flist->backend = backend;
+ if ((flist->backend->flags & FTS_BACKEND_FLAG_FUZZY_SEARCH) != 0)
+ ns->user->fuzzy_search = TRUE;
+ return 0;
+}
+
+void fts_mail_namespaces_added(struct mail_namespace *ns)
+{
+ while(ns != NULL) {
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(ns->list);
+ const char *error;
+
+ if (flist != NULL && !flist->failed && flist->backend == NULL &&
+ fts_init_namespace(flist, ns, &error) < 0) {
+ i_error("fts: Failed to initialize backend '%s': %s",
+ flist->backend_name, error);
+ }
+ ns = ns->next;
+ }
+}
+
+void
+fts_mailbox_list_created(struct mailbox_list *list)
+{
+ const char *name = mail_user_plugin_getenv(list->ns->user, "fts");
+ const char *path;
+
+ if (name == NULL || name[0] == '\0') {
+ e_debug(list->ns->user->event,
+ "fts: No fts setting - plugin disabled");
+ return;
+ }
+
+ if (!mailbox_list_get_root_path(list, MAILBOX_LIST_PATH_TYPE_INDEX, &path)) {
+ e_debug(list->ns->user->event,
+ "fts: Indexes disabled for namespace '%s'",
+ list->ns->prefix);
+ return;
+ }
+
+ struct fts_mailbox_list *flist;
+ struct mailbox_list_vfuncs *v = list->vlast;
+
+ flist = p_new(list->pool, struct fts_mailbox_list, 1);
+ flist->module_ctx.super = *v;
+ flist->backend_name = name;
+ list->vlast = &flist->module_ctx.super;
+ v->deinit = fts_mailbox_list_deinit;
+ MODULE_CONTEXT_SET(list, fts_mailbox_list_module, flist);
+}
+
+struct fts_backend *fts_mailbox_backend(struct mailbox *box)
+{
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list);
+
+ return flist->backend;
+}
+
+struct fts_backend *fts_list_backend(struct mailbox_list *list)
+{
+ struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(list);
+
+ return flist == NULL ? NULL : flist->backend;
+}
diff --git a/src/plugins/fts/fts-storage.h b/src/plugins/fts/fts-storage.h
new file mode 100644
index 0000000..ea28ed2
--- /dev/null
+++ b/src/plugins/fts/fts-storage.h
@@ -0,0 +1,70 @@
+#ifndef FTS_STORAGE_H
+#define FTS_STORAGE_H
+
+#include "mail-storage-private.h"
+#include "fts-api.h"
+
+enum fts_enforced {
+ FTS_ENFORCED_NO,
+ FTS_ENFORCED_YES,
+ FTS_ENFORCED_BODY,
+};
+
+struct fts_scores {
+ int refcount;
+ ARRAY_TYPE(fts_score_map) score_map;
+};
+
+struct fts_search_level {
+ ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs;
+ buffer_t *args_matches;
+ ARRAY_TYPE(fts_score_map) score_map;
+};
+
+HASH_TABLE_DEFINE_TYPE(virtual_last_indexed, const char *, void *);
+
+struct fts_search_context {
+ union mail_search_module_context module_ctx;
+
+ struct fts_backend *backend;
+ struct mailbox *box;
+ struct mailbox_transaction_context *t;
+ struct mail_search_args *args;
+ enum fts_lookup_flags flags;
+ enum fts_enforced enforced;
+
+ pool_t result_pool;
+ ARRAY(struct fts_search_level) levels;
+ buffer_t *orig_matches;
+
+ uint32_t first_unindexed_seq;
+ uint32_t next_unindexed_seq;
+ HASH_TABLE_TYPE(virtual_last_indexed) last_indexed_virtual_uids;
+
+ /* final scores, combined from all levels */
+ struct fts_scores *scores;
+
+ struct fts_indexer_context *indexer_ctx;
+ struct fts_search_state *search_state;
+
+ bool virtual_mailbox:1;
+ bool fts_lookup_success:1;
+ bool indexing_timed_out:1;
+ bool virtual_seen_unindexed_gaps:1;
+};
+
+/* Figure out if we want to use full text search indexes and update
+ backends in fctx accordingly. */
+void fts_search_analyze(struct fts_search_context *fctx);
+/* Perform the actual index lookup and update definite_uids and maybe_uids. */
+void fts_search_lookup(struct fts_search_context *fctx);
+/* Returns FTS backend for the given mailbox (assumes it has one). */
+struct fts_backend *fts_mailbox_backend(struct mailbox *box);
+/* Returns FTS backend for the given mailbox list, or NULL if it has none. */
+struct fts_backend *fts_list_backend(struct mailbox_list *list);
+
+void fts_mail_allocated(struct mail *mail);
+void fts_mail_namespaces_added(struct mail_namespace *ns);
+void fts_mailbox_allocated(struct mailbox *box);
+void fts_mailbox_list_created(struct mailbox_list *list);
+#endif
diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c
new file mode 100644
index 0000000..3c813cd
--- /dev/null
+++ b/src/plugins/fts/fts-user.c
@@ -0,0 +1,423 @@
+/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "module-context.h"
+#include "mail-user.h"
+#include "mail-storage-private.h"
+#include "mailbox-match-plugin.h"
+#include "fts-language.h"
+#include "fts-filter.h"
+#include "fts-tokenizer.h"
+#include "fts-user.h"
+
+#define FTS_USER_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_user_module)
+
+struct fts_user {
+ union mail_user_module_context module_ctx;
+ int refcount;
+
+ struct fts_language_list *lang_list;
+ struct fts_user_language *data_lang;
+ ARRAY_TYPE(fts_user_language) languages, data_languages;
+
+ struct mailbox_match_plugin *autoindex_exclude;
+};
+
+static MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
+ &mail_user_module_register);
+
+static const char *const *str_keyvalues_to_array(const char *str)
+{
+ const char *key, *value, *const *keyvalues;
+ ARRAY_TYPE(const_string) arr;
+ unsigned int i;
+
+ if (str == NULL)
+ return NULL;
+
+ t_array_init(&arr, 8);
+ keyvalues = t_strsplit_spaces(str, " ");
+ for (i = 0; keyvalues[i] != NULL; i++) {
+ value = strchr(keyvalues[i], '=');
+ if (value != NULL)
+ key = t_strdup_until(keyvalues[i], value++);
+ else {
+ key = keyvalues[i];
+ value = "";
+ }
+ array_push_back(&arr, &key);
+ array_push_back(&arr, &value);
+ }
+ array_append_zero(&arr);
+ return array_front(&arr);
+}
+
+static int
+fts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
+ const char **error_r)
+{
+ const char *languages, *unknown;
+ const char *lang_config[3] = {NULL, NULL, NULL};
+
+ languages = mail_user_plugin_getenv(user, "fts_languages");
+ if (languages == NULL) {
+ *error_r = "fts_languages setting is missing";
+ return -1;
+ }
+
+ lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
+ if (lang_config[1] != NULL)
+ lang_config[0] = "fts_language_config";
+ if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0)
+ return -1;
+
+ if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
+ *error_r = t_strdup_printf(
+ "fts_languages: Unknown language '%s'", unknown);
+ return -1;
+ }
+ if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
+ *error_r = "fts_languages setting is empty";
+ return -1;
+ }
+ return 0;
+}
+
+static int
+fts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
+ struct fts_filter **filter_r, const char **error_r)
+{
+ const struct fts_filter *filter_class;
+ struct fts_filter *filter = NULL, *parent = NULL;
+ const char *filters_key, *const *filters, *filter_set_name;
+ const char *str, *error, *set_key;
+ unsigned int i;
+ int ret = 0;
+
+ /* try to get the language-specific filters first */
+ filters_key = t_strconcat("fts_filters_", lang->name, NULL);
+ str = mail_user_plugin_getenv(user, filters_key);
+ if (str == NULL) {
+ /* fallback to global filters */
+ filters_key = "fts_filters";
+ str = mail_user_plugin_getenv(user, filters_key);
+ if (str == NULL) {
+ /* No filters */
+ *filter_r = NULL;
+ return 0;
+ }
+ }
+
+ filters = t_strsplit_spaces(str, " ");
+ for (i = 0; filters[i] != NULL; i++) {
+ filter_class = fts_filter_find(filters[i]);
+ if (filter_class == NULL) {
+ *error_r = t_strdup_printf("%s: Unknown filter '%s'",
+ filters_key, filters[i]);
+ ret = -1;
+ break;
+ }
+
+ /* try the language-specific setting first */
+ filter_set_name = t_str_replace(filters[i], '-', '_');
+ set_key = t_strdup_printf("fts_filter_%s_%s",
+ lang->name, filter_set_name);
+ str = mail_user_plugin_getenv(user, set_key);
+ if (str == NULL) {
+ set_key = t_strdup_printf("fts_filter_%s", filter_set_name);
+ str = mail_user_plugin_getenv(user, set_key);
+ }
+
+ if (fts_filter_create(filter_class, parent, lang,
+ str_keyvalues_to_array(str),
+ &filter, &error) < 0) {
+ *error_r = t_strdup_printf("%s: %s", set_key, error);
+ ret = -1;
+ break;
+ }
+ if (parent != NULL)
+ fts_filter_unref(&parent);
+ parent = filter;
+ }
+ if (ret < 0) {
+ if (parent != NULL)
+ fts_filter_unref(&parent);
+ return -1;
+ }
+ *filter_r = filter;
+ return 0;
+}
+
+static int
+fts_user_create_tokenizer(struct mail_user *user,
+ const struct fts_language *lang,
+ struct fts_tokenizer **tokenizer_r, bool search,
+ const char **error_r)
+{
+ const struct fts_tokenizer *tokenizer_class;
+ struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
+ const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
+ const char *str, *error, *set_key;
+ unsigned int i;
+ int ret = 0;
+
+ tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL);
+ str = mail_user_plugin_getenv(user, tokenizers_key);
+ if (str == NULL) {
+ str = mail_user_plugin_getenv(user, "fts_tokenizers");
+ if (str == NULL) {
+ *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key);
+ return -1;
+ }
+ tokenizers_key = "fts_tokenizers";
+ }
+
+ tokenizers = t_strsplit_spaces(str, " ");
+
+ for (i = 0; tokenizers[i] != NULL; i++) {
+ tokenizer_class = fts_tokenizer_find(tokenizers[i]);
+ if (tokenizer_class == NULL) {
+ *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
+ tokenizers_key, tokenizers[i]);
+ ret = -1;
+ break;
+ }
+
+ tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
+ set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name);
+ str = mail_user_plugin_getenv(user, set_key);
+ if (str == NULL) {
+ set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name);
+ str = mail_user_plugin_getenv(user, set_key);
+ }
+
+ /* tell the tokenizers that we're tokenizing a search string
+ (instead of tokenizing indexed data) */
+ if (search)
+ str = t_strconcat("search=yes ", str, NULL);
+
+ if (fts_tokenizer_create(tokenizer_class, parent,
+ str_keyvalues_to_array(str),
+ &tokenizer, &error) < 0) {
+ *error_r = t_strdup_printf("%s: %s", set_key, error);
+ ret = -1;
+ break;
+ }
+ if (parent != NULL)
+ fts_tokenizer_unref(&parent);
+ parent = tokenizer;
+ }
+ if (ret < 0) {
+ if (parent != NULL)
+ fts_tokenizer_unref(&parent);
+ return -1;
+ }
+ *tokenizer_r = tokenizer;
+ return 0;
+}
+
+static int
+fts_user_language_init_tokenizers(struct mail_user *user,
+ struct fts_user_language *user_lang,
+ const char **error_r)
+{
+ if (fts_user_create_tokenizer(user, user_lang->lang,
+ &user_lang->index_tokenizer, FALSE,
+ error_r) < 0)
+ return -1;
+
+ if (fts_user_create_tokenizer(user, user_lang->lang,
+ &user_lang->search_tokenizer, TRUE,
+ error_r) < 0)
+ return -1;
+ return 0;
+}
+
+struct fts_user_language *
+fts_user_language_find(struct mail_user *user,
+ const struct fts_language *lang)
+{
+ struct fts_user_language *user_lang;
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ array_foreach_elem(&fuser->languages, user_lang) {
+ if (strcmp(user_lang->lang->name, lang->name) == 0)
+ return user_lang;
+ }
+ return NULL;
+}
+
+static int fts_user_language_create(struct mail_user *user,
+ struct fts_user *fuser,
+ const struct fts_language *lang,
+ const char **error_r)
+{
+ struct fts_user_language *user_lang;
+
+ user_lang = p_new(user->pool, struct fts_user_language, 1);
+ user_lang->lang = lang;
+ array_push_back(&fuser->languages, &user_lang);
+
+ if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
+ return -1;
+ if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0)
+ return -1;
+ return 0;
+}
+
+static int fts_user_languages_fill_all(struct mail_user *user,
+ struct fts_user *fuser,
+ const char **error_r)
+{
+ const struct fts_language *lang;
+
+ array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) {
+ if (fts_user_language_create(user, fuser, lang, error_r) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser,
+ const char **error_r)
+{
+ struct fts_user_language *user_lang;
+ const char *error;
+
+ user_lang = p_new(user->pool, struct fts_user_language, 1);
+ user_lang->lang = &fts_language_data;
+
+ if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0)
+ return -1;
+
+ if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL,
+ &user_lang->filter, &error) < 0)
+ i_unreached();
+ i_assert(user_lang->filter != NULL);
+
+ p_array_init(&fuser->data_languages, user->pool, 1);
+ array_push_back(&fuser->data_languages, &user_lang);
+ array_push_back(&fuser->languages, &user_lang);
+
+ fuser->data_lang = user_lang;
+ return 0;
+}
+
+struct fts_language_list *fts_user_get_language_list(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return fuser->lang_list;
+}
+
+const ARRAY_TYPE(fts_user_language) *
+fts_user_get_all_languages(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return &fuser->languages;
+}
+
+const ARRAY_TYPE(fts_user_language) *
+fts_user_get_data_languages(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return &fuser->data_languages;
+}
+
+struct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ i_assert(fuser != NULL);
+ return fuser->data_lang;
+}
+
+bool fts_user_autoindex_exclude(struct mailbox *box)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user);
+
+ return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box);
+}
+
+static void fts_user_language_free(struct fts_user_language *user_lang)
+{
+ if (user_lang->filter != NULL)
+ fts_filter_unref(&user_lang->filter);
+ if (user_lang->index_tokenizer != NULL)
+ fts_tokenizer_unref(&user_lang->index_tokenizer);
+ if (user_lang->search_tokenizer != NULL)
+ fts_tokenizer_unref(&user_lang->search_tokenizer);
+}
+
+static void fts_user_free(struct fts_user *fuser)
+{
+ struct fts_user_language *user_lang;
+
+ if (fuser->lang_list != NULL)
+ fts_language_list_deinit(&fuser->lang_list);
+
+ if (array_is_created(&fuser->languages)) {
+ array_foreach_elem(&fuser->languages, user_lang)
+ fts_user_language_free(user_lang);
+ }
+ mailbox_match_plugin_deinit(&fuser->autoindex_exclude);
+}
+
+static int
+fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser,
+ const char **error_r)
+{
+ p_array_init(&fuser->languages, user->pool, 4);
+
+ if (fts_user_init_languages(user, fuser, error_r) < 0 ||
+ fts_user_init_data_language(user, fuser, error_r) < 0)
+ return -1;
+ if (fts_user_languages_fill_all(user, fuser, error_r) < 0)
+ return -1;
+ return 0;
+}
+
+int fts_mail_user_init(struct mail_user *user, bool initialize_libfts,
+ const char **error_r)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ if (fuser != NULL) {
+ /* multiple fts plugins are loaded */
+ fuser->refcount++;
+ return 0;
+ }
+
+ fuser = p_new(user->pool, struct fts_user, 1);
+ fuser->refcount = 1;
+ if (initialize_libfts) {
+ if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) {
+ fts_user_free(fuser);
+ return -1;
+ }
+ }
+ fuser->autoindex_exclude =
+ mailbox_match_plugin_init(user, "fts_autoindex_exclude");
+
+ MODULE_CONTEXT_SET(user, fts_user_module, fuser);
+ return 0;
+}
+
+void fts_mail_user_deinit(struct mail_user *user)
+{
+ struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+ if (fuser != NULL) {
+ i_assert(fuser->refcount > 0);
+ if (--fuser->refcount == 0)
+ fts_user_free(fuser);
+ }
+}
diff --git a/src/plugins/fts/fts-user.h b/src/plugins/fts/fts-user.h
new file mode 100644
index 0000000..043f4e1
--- /dev/null
+++ b/src/plugins/fts/fts-user.h
@@ -0,0 +1,27 @@
+#ifndef FTS_USER_H
+#define FTS_USER_H
+
+struct fts_user_language {
+ const struct fts_language *lang;
+ struct fts_filter *filter;
+ struct fts_tokenizer *index_tokenizer, *search_tokenizer;
+};
+ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *);
+
+struct fts_user_language *
+fts_user_language_find(struct mail_user *user,
+ const struct fts_language *lang);
+struct fts_language_list *fts_user_get_language_list(struct mail_user *user);
+const ARRAY_TYPE(fts_user_language) *
+fts_user_get_all_languages(struct mail_user *user);
+struct fts_user_language *fts_user_get_data_lang(struct mail_user *user);
+const ARRAY_TYPE(fts_user_language) *
+fts_user_get_data_languages(struct mail_user *user);
+
+bool fts_user_autoindex_exclude(struct mailbox *box);
+
+int fts_mail_user_init(struct mail_user *user, bool initialize_libfts,
+ const char **error_r);
+void fts_mail_user_deinit(struct mail_user *user);
+
+#endif
diff --git a/src/plugins/fts/xml2text.c b/src/plugins/fts/xml2text.c
new file mode 100644
index 0000000..f3c573c
--- /dev/null
+++ b/src/plugins/fts/xml2text.c
@@ -0,0 +1,44 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "message-parser.h"
+#include "fts-parser.h"
+
+#include <unistd.h>
+
+int main(void)
+{
+ struct fts_parser *parser;
+ unsigned char buf[IO_BLOCK_SIZE];
+ struct message_block block;
+ ssize_t ret;
+ struct fts_parser_context parser_context = {.content_type = "text/html"};
+
+ lib_init();
+
+ parser = fts_parser_html.try_init(&parser_context);
+ i_assert(parser != NULL);
+
+ i_zero(&block);
+ while ((ret = read(STDIN_FILENO, buf, sizeof(buf))) > 0) {
+ block.data = buf;
+ block.size = ret;
+ parser->v.more(parser, &block);
+ if (write(STDOUT_FILENO, block.data, block.size) < 0)
+ i_fatal("write(stdout) failed: %m");
+ }
+ if (ret < 0)
+ i_fatal("read(stdin) failed: %m");
+
+ for (;;) {
+ block.size = 0;
+ parser->v.more(parser, &block);
+ if (block.size == 0)
+ break;
+ if (write(STDOUT_FILENO, block.data, block.size) < 0)
+ i_fatal("write(stdout) failed: %m");
+ }
+
+ lib_deinit();
+ return 0;
+}