diff options
Diffstat (limited to 'src/plugins/fts')
32 files changed, 7651 insertions, 0 deletions
diff --git a/src/plugins/fts/Makefile.am b/src/plugins/fts/Makefile.am new file mode 100644 index 0000000..2e7753c --- /dev/null +++ b/src/plugins/fts/Makefile.am @@ -0,0 +1,74 @@ +pkglibexecdir = $(libexecdir)/dovecot +doveadm_moduledir = $(moduledir)/doveadm + +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-settings \ + -I$(top_srcdir)/src/lib-fts \ + -I$(top_srcdir)/src/lib-ssl-iostream \ + -I$(top_srcdir)/src/lib-http \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-imap \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/lib-storage/index \ + -I$(top_srcdir)/src/doveadm + +NOPLUGIN_LDFLAGS = +lib20_doveadm_fts_plugin_la_LDFLAGS = -module -avoid-version +lib20_fts_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib20_fts_plugin.la + +lib20_fts_plugin_la_LIBADD = ../../lib-fts/libfts.la + +lib20_fts_plugin_la_SOURCES = \ + fts-api.c \ + fts-build-mail.c \ + fts-expunge-log.c \ + fts-indexer.c \ + fts-parser.c \ + fts-parser-html.c \ + fts-parser-script.c \ + fts-parser-tika.c \ + fts-plugin.c \ + fts-search.c \ + fts-search-args.c \ + fts-search-serialize.c \ + fts-storage.c \ + fts-user.c + +pkginc_libdir=$(pkgincludedir) +pkginc_lib_HEADERS = \ + fts-api.h \ + fts-api-private.h \ + fts-expunge-log.h \ + fts-indexer.h \ + fts-parser.h \ + fts-storage.h \ + fts-user.h + +noinst_HEADERS = \ + doveadm-fts.h \ + fts-build-mail.h \ + fts-plugin.h \ + fts-search-args.h \ + fts-search-serialize.h + +pkglibexec_PROGRAMS = xml2text + +xml2text_SOURCES = xml2text.c fts-parser-html.c +xml2text_CPPFLAGS = $(AM_CPPFLAGS) $(BINARY_CFLAGS) +xml2text_LDADD = $(LIBDOVECOT) $(BINARY_LDFLAGS) +xml2text_DEPENDENCIES = $(module_LTLIBRARIES) $(LIBDOVECOT_DEPS) + +pkglibexec_SCRIPTS = decode2text.sh +EXTRA_DIST = $(pkglibexec_SCRIPTS) + +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_plugin.la + +lib20_doveadm_fts_plugin_la_SOURCES = \ + doveadm-fts.c \ + doveadm-dump-fts-expunge-log.c diff --git a/src/plugins/fts/Makefile.in b/src/plugins/fts/Makefile.in new file mode 100644 index 0000000..624f69f --- /dev/null +++ b/src/plugins/fts/Makefile.in @@ -0,0 +1,1140 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +pkglibexec_PROGRAMS = xml2text$(EXEEXT) +subdir = src/plugins/fts +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \ + $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \ + $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \ + $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \ + $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \ + $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \ + $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \ + $(top_srcdir)/m4/flexible_array_member.m4 \ + $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \ + $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \ + $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \ + $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \ + $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \ + $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \ + $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \ + $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \ + $(top_srcdir)/m4/pr_set_dumpable.m4 \ + $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \ + $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \ + $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \ + $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \ + $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \ + $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \ + $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \ + $(top_srcdir)/m4/typeof_dev_t.m4 \ + $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \ + $(top_srcdir)/m4/want_apparmor.m4 \ + $(top_srcdir)/m4/want_bsdauth.m4 \ + $(top_srcdir)/m4/want_bzlib.m4 \ + $(top_srcdir)/m4/want_cassandra.m4 \ + $(top_srcdir)/m4/want_cdb.m4 \ + $(top_srcdir)/m4/want_checkpassword.m4 \ + $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \ + $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \ + $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \ + $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \ + $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \ + $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \ + $(top_srcdir)/m4/want_prefetch.m4 \ + $(top_srcdir)/m4/want_shadow.m4 \ + $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \ + $(top_srcdir)/m4/want_sqlite.m4 \ + $(top_srcdir)/m4/want_stemmer.m4 \ + $(top_srcdir)/m4/want_systemd.m4 \ + $(top_srcdir)/m4/want_textcat.m4 \ + $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \ + $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(pkginc_lib_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(pkglibexecdir)" \ + "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" \ + "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(pkginc_libdir)" +PROGRAMS = $(pkglibexec_PROGRAMS) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +LTLIBRARIES = $(doveadm_module_LTLIBRARIES) $(module_LTLIBRARIES) +lib20_doveadm_fts_plugin_la_LIBADD = +am_lib20_doveadm_fts_plugin_la_OBJECTS = doveadm-fts.lo \ + doveadm-dump-fts-expunge-log.lo +lib20_doveadm_fts_plugin_la_OBJECTS = \ + $(am_lib20_doveadm_fts_plugin_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +lib20_doveadm_fts_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(lib20_doveadm_fts_plugin_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +lib20_fts_plugin_la_DEPENDENCIES = ../../lib-fts/libfts.la +am_lib20_fts_plugin_la_OBJECTS = fts-api.lo fts-build-mail.lo \ + fts-expunge-log.lo fts-indexer.lo fts-parser.lo \ + fts-parser-html.lo fts-parser-script.lo fts-parser-tika.lo \ + fts-plugin.lo fts-search.lo fts-search-args.lo \ + fts-search-serialize.lo fts-storage.lo fts-user.lo +lib20_fts_plugin_la_OBJECTS = $(am_lib20_fts_plugin_la_OBJECTS) +lib20_fts_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(lib20_fts_plugin_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_xml2text_OBJECTS = xml2text-xml2text.$(OBJEXT) \ + xml2text-fts-parser-html.$(OBJEXT) +xml2text_OBJECTS = $(am_xml2text_OBJECTS) +am__DEPENDENCIES_1 = +SCRIPTS = $(pkglibexec_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo \ + ./$(DEPDIR)/doveadm-fts.Plo ./$(DEPDIR)/fts-api.Plo \ + ./$(DEPDIR)/fts-build-mail.Plo ./$(DEPDIR)/fts-expunge-log.Plo \ + ./$(DEPDIR)/fts-indexer.Plo ./$(DEPDIR)/fts-parser-html.Plo \ + ./$(DEPDIR)/fts-parser-script.Plo \ + ./$(DEPDIR)/fts-parser-tika.Plo ./$(DEPDIR)/fts-parser.Plo \ + ./$(DEPDIR)/fts-plugin.Plo ./$(DEPDIR)/fts-search-args.Plo \ + ./$(DEPDIR)/fts-search-serialize.Plo \ + ./$(DEPDIR)/fts-search.Plo ./$(DEPDIR)/fts-storage.Plo \ + ./$(DEPDIR)/fts-user.Plo \ + ./$(DEPDIR)/xml2text-fts-parser-html.Po \ + ./$(DEPDIR)/xml2text-xml2text.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(lib20_doveadm_fts_plugin_la_SOURCES) \ + $(lib20_fts_plugin_la_SOURCES) $(xml2text_SOURCES) +DIST_SOURCES = $(lib20_doveadm_fts_plugin_la_SOURCES) \ + $(lib20_fts_plugin_la_SOURCES) $(xml2text_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) $(pkginc_lib_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibexecdir = $(libexecdir)/dovecot +ACLOCAL = @ACLOCAL@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPARMOR_LIBS = @APPARMOR_LIBS@ +AR = @AR@ +AUTH_CFLAGS = @AUTH_CFLAGS@ +AUTH_LIBS = @AUTH_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINARY_CFLAGS = @BINARY_CFLAGS@ +BINARY_LDFLAGS = @BINARY_LDFLAGS@ +BISON = @BISON@ +CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@ +CASSANDRA_LIBS = @CASSANDRA_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CDB_LIBS = @CDB_LIBS@ +CFLAGS = @CFLAGS@ +CLUCENE_CFLAGS = @CLUCENE_CFLAGS@ +CLUCENE_LIBS = @CLUCENE_LIBS@ +COMPRESS_LIBS = @COMPRESS_LIBS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRYPT_LIBS = @CRYPT_LIBS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DICT_LIBS = @DICT_LIBS@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLEX = @FLEX@ +FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@ +FUZZER_LDFLAGS = @FUZZER_LDFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KRB5CONFIG = @KRB5CONFIG@ +KRB5_CFLAGS = @KRB5_CFLAGS@ +KRB5_LIBS = @KRB5_LIBS@ +LD = @LD@ +LDAP_LIBS = @LDAP_LIBS@ +LDFLAGS = @LDFLAGS@ +LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@ +LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@ +LIBCAP = @LIBCAP@ +LIBDOVECOT = @LIBDOVECOT@ +LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@ +LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@ +LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@ +LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@ +LIBDOVECOT_LDA = @LIBDOVECOT_LDA@ +LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@ +LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@ +LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@ +LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@ +LIBDOVECOT_LUA = @LIBDOVECOT_LUA@ +LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@ +LIBDOVECOT_SQL = @LIBDOVECOT_SQL@ +LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@ +LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@ +LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@ +LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@ +LIBICONV = @LIBICONV@ +LIBICU_CFLAGS = @LIBICU_CFLAGS@ +LIBICU_LIBS = @LIBICU_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@ +LIBSODIUM_LIBS = @LIBSODIUM_LIBS@ +LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@ +LIBTIRPC_LIBS = @LIBTIRPC_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIBWRAP_LIBS = @LIBWRAP_LIBS@ +LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LUA_CFLAGS = @LUA_CFLAGS@ +LUA_LIBS = @LUA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_LIBS = @MODULE_LIBS@ +MODULE_SUFFIX = @MODULE_SUFFIX@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_CONFIG = @MYSQL_CONFIG@ +MYSQL_LIBS = @MYSQL_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOPLUGIN_LDFLAGS = +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PANDOC = @PANDOC@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PG_CONFIG = @PG_CONFIG@ +PIE_CFLAGS = @PIE_CFLAGS@ +PIE_LDFLAGS = @PIE_LDFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUOTA_LIBS = @QUOTA_LIBS@ +RANLIB = @RANLIB@ +RELRO_LDFLAGS = @RELRO_LDFLAGS@ +RPCGEN = @RPCGEN@ +RUN_TEST = @RUN_TEST@ +SED = @SED@ +SETTING_FILES = @SETTING_FILES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SQLITE_CFLAGS = @SQLITE_CFLAGS@ +SQLITE_LIBS = @SQLITE_LIBS@ +SQL_CFLAGS = @SQL_CFLAGS@ +SQL_LIBS = @SQL_LIBS@ +SSL_CFLAGS = @SSL_CFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND = @VALGRIND@ +VERSION = @VERSION@ +ZSTD_CFLAGS = @ZSTD_CFLAGS@ +ZSTD_LIBS = @ZSTD_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dict_drivers = @dict_drivers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +rundir = @rundir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sql_drivers = @sql_drivers@ +srcdir = @srcdir@ +ssldir = @ssldir@ +statedir = @statedir@ +sysconfdir = @sysconfdir@ +systemdservicetype = @systemdservicetype@ +systemdsystemunitdir = @systemdsystemunitdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +doveadm_moduledir = $(moduledir)/doveadm +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-settings \ + -I$(top_srcdir)/src/lib-fts \ + -I$(top_srcdir)/src/lib-ssl-iostream \ + -I$(top_srcdir)/src/lib-http \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-imap \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/lib-storage/index \ + -I$(top_srcdir)/src/doveadm + +lib20_doveadm_fts_plugin_la_LDFLAGS = -module -avoid-version +lib20_fts_plugin_la_LDFLAGS = -module -avoid-version +module_LTLIBRARIES = \ + lib20_fts_plugin.la + +lib20_fts_plugin_la_LIBADD = ../../lib-fts/libfts.la +lib20_fts_plugin_la_SOURCES = \ + fts-api.c \ + fts-build-mail.c \ + fts-expunge-log.c \ + fts-indexer.c \ + fts-parser.c \ + fts-parser-html.c \ + fts-parser-script.c \ + fts-parser-tika.c \ + fts-plugin.c \ + fts-search.c \ + fts-search-args.c \ + fts-search-serialize.c \ + fts-storage.c \ + fts-user.c + +pkginc_libdir = $(pkgincludedir) +pkginc_lib_HEADERS = \ + fts-api.h \ + fts-api-private.h \ + fts-expunge-log.h \ + fts-indexer.h \ + fts-parser.h \ + fts-storage.h \ + fts-user.h + +noinst_HEADERS = \ + doveadm-fts.h \ + fts-build-mail.h \ + fts-plugin.h \ + fts-search-args.h \ + fts-search-serialize.h + +xml2text_SOURCES = xml2text.c fts-parser-html.c +xml2text_CPPFLAGS = $(AM_CPPFLAGS) $(BINARY_CFLAGS) +xml2text_LDADD = $(LIBDOVECOT) $(BINARY_LDFLAGS) +xml2text_DEPENDENCIES = $(module_LTLIBRARIES) $(LIBDOVECOT_DEPS) +pkglibexec_SCRIPTS = decode2text.sh +EXTRA_DIST = $(pkglibexec_SCRIPTS) +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_plugin.la + +lib20_doveadm_fts_plugin_la_SOURCES = \ + doveadm-fts.c \ + doveadm-dump-fts-expunge-log.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/fts/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibexecPROGRAMS: $(pkglibexec_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(pkglibexec_PROGRAMS)'; test -n "$(pkglibexecdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibexecdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibexecdir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(pkglibexecdir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(pkglibexecdir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-pkglibexecPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(pkglibexec_PROGRAMS)'; test -n "$(pkglibexecdir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(pkglibexecdir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(pkglibexecdir)" && rm -f $$files + +clean-pkglibexecPROGRAMS: + @list='$(pkglibexec_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +install-doveadm_moduleLTLIBRARIES: $(doveadm_module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(doveadm_moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(doveadm_moduledir)"; \ + } + +uninstall-doveadm_moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(doveadm_moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(doveadm_moduledir)/$$f"; \ + done + +clean-doveadm_moduleLTLIBRARIES: + -test -z "$(doveadm_module_LTLIBRARIES)" || rm -f $(doveadm_module_LTLIBRARIES) + @list='$(doveadm_module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +lib20_doveadm_fts_plugin.la: $(lib20_doveadm_fts_plugin_la_OBJECTS) $(lib20_doveadm_fts_plugin_la_DEPENDENCIES) $(EXTRA_lib20_doveadm_fts_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib20_doveadm_fts_plugin_la_LINK) -rpath $(doveadm_moduledir) $(lib20_doveadm_fts_plugin_la_OBJECTS) $(lib20_doveadm_fts_plugin_la_LIBADD) $(LIBS) + +lib20_fts_plugin.la: $(lib20_fts_plugin_la_OBJECTS) $(lib20_fts_plugin_la_DEPENDENCIES) $(EXTRA_lib20_fts_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib20_fts_plugin_la_LINK) -rpath $(moduledir) $(lib20_fts_plugin_la_OBJECTS) $(lib20_fts_plugin_la_LIBADD) $(LIBS) + +xml2text$(EXEEXT): $(xml2text_OBJECTS) $(xml2text_DEPENDENCIES) $(EXTRA_xml2text_DEPENDENCIES) + @rm -f xml2text$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(xml2text_OBJECTS) $(xml2text_LDADD) $(LIBS) +install-pkglibexecSCRIPTS: $(pkglibexec_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibexecdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibexecdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(pkglibexecdir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(pkglibexecdir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-pkglibexecSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(pkglibexecdir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-fts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-api.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-build-mail.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-expunge-log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-indexer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-html.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-script.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-tika.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-plugin.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search-args.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search-serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-storage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-user.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2text-fts-parser-html.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2text-xml2text.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +xml2text-xml2text.o: xml2text.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-xml2text.o -MD -MP -MF $(DEPDIR)/xml2text-xml2text.Tpo -c -o xml2text-xml2text.o `test -f 'xml2text.c' || echo '$(srcdir)/'`xml2text.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-xml2text.Tpo $(DEPDIR)/xml2text-xml2text.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xml2text.c' object='xml2text-xml2text.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-xml2text.o `test -f 'xml2text.c' || echo '$(srcdir)/'`xml2text.c + +xml2text-xml2text.obj: xml2text.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-xml2text.obj -MD -MP -MF $(DEPDIR)/xml2text-xml2text.Tpo -c -o xml2text-xml2text.obj `if test -f 'xml2text.c'; then $(CYGPATH_W) 'xml2text.c'; else $(CYGPATH_W) '$(srcdir)/xml2text.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-xml2text.Tpo $(DEPDIR)/xml2text-xml2text.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xml2text.c' object='xml2text-xml2text.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-xml2text.obj `if test -f 'xml2text.c'; then $(CYGPATH_W) 'xml2text.c'; else $(CYGPATH_W) '$(srcdir)/xml2text.c'; fi` + +xml2text-fts-parser-html.o: fts-parser-html.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-fts-parser-html.o -MD -MP -MF $(DEPDIR)/xml2text-fts-parser-html.Tpo -c -o xml2text-fts-parser-html.o `test -f 'fts-parser-html.c' || echo '$(srcdir)/'`fts-parser-html.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-fts-parser-html.Tpo $(DEPDIR)/xml2text-fts-parser-html.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fts-parser-html.c' object='xml2text-fts-parser-html.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-fts-parser-html.o `test -f 'fts-parser-html.c' || echo '$(srcdir)/'`fts-parser-html.c + +xml2text-fts-parser-html.obj: fts-parser-html.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-fts-parser-html.obj -MD -MP -MF $(DEPDIR)/xml2text-fts-parser-html.Tpo -c -o xml2text-fts-parser-html.obj `if test -f 'fts-parser-html.c'; then $(CYGPATH_W) 'fts-parser-html.c'; else $(CYGPATH_W) '$(srcdir)/fts-parser-html.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-fts-parser-html.Tpo $(DEPDIR)/xml2text-fts-parser-html.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fts-parser-html.c' object='xml2text-fts-parser-html.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-fts-parser-html.obj `if test -f 'fts-parser-html.c'; then $(CYGPATH_W) 'fts-parser-html.c'; else $(CYGPATH_W) '$(srcdir)/fts-parser-html.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkginc_libHEADERS: $(pkginc_lib_HEADERS) + @$(NORMAL_INSTALL) + @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkginc_libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkginc_libdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkginc_libdir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkginc_libdir)" || exit $$?; \ + done + +uninstall-pkginc_libHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkginc_libdir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(SCRIPTS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(pkginc_libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES clean-pkglibexecPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/doveadm-fts.Plo + -rm -f ./$(DEPDIR)/fts-api.Plo + -rm -f ./$(DEPDIR)/fts-build-mail.Plo + -rm -f ./$(DEPDIR)/fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/fts-indexer.Plo + -rm -f ./$(DEPDIR)/fts-parser-html.Plo + -rm -f ./$(DEPDIR)/fts-parser-script.Plo + -rm -f ./$(DEPDIR)/fts-parser-tika.Plo + -rm -f ./$(DEPDIR)/fts-parser.Plo + -rm -f ./$(DEPDIR)/fts-plugin.Plo + -rm -f ./$(DEPDIR)/fts-search-args.Plo + -rm -f ./$(DEPDIR)/fts-search-serialize.Plo + -rm -f ./$(DEPDIR)/fts-search.Plo + -rm -f ./$(DEPDIR)/fts-storage.Plo + -rm -f ./$(DEPDIR)/fts-user.Plo + -rm -f ./$(DEPDIR)/xml2text-fts-parser-html.Po + -rm -f ./$(DEPDIR)/xml2text-xml2text.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-doveadm_moduleLTLIBRARIES \ + install-moduleLTLIBRARIES install-pkginc_libHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibexecPROGRAMS install-pkglibexecSCRIPTS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/doveadm-fts.Plo + -rm -f ./$(DEPDIR)/fts-api.Plo + -rm -f ./$(DEPDIR)/fts-build-mail.Plo + -rm -f ./$(DEPDIR)/fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/fts-indexer.Plo + -rm -f ./$(DEPDIR)/fts-parser-html.Plo + -rm -f ./$(DEPDIR)/fts-parser-script.Plo + -rm -f ./$(DEPDIR)/fts-parser-tika.Plo + -rm -f ./$(DEPDIR)/fts-parser.Plo + -rm -f ./$(DEPDIR)/fts-plugin.Plo + -rm -f ./$(DEPDIR)/fts-search-args.Plo + -rm -f ./$(DEPDIR)/fts-search-serialize.Plo + -rm -f ./$(DEPDIR)/fts-search.Plo + -rm -f ./$(DEPDIR)/fts-storage.Plo + -rm -f ./$(DEPDIR)/fts-user.Plo + -rm -f ./$(DEPDIR)/xml2text-fts-parser-html.Po + -rm -f ./$(DEPDIR)/xml2text-xml2text.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-doveadm_moduleLTLIBRARIES \ + uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS \ + uninstall-pkglibexecPROGRAMS uninstall-pkglibexecSCRIPTS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES clean-pkglibexecPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-doveadm_moduleLTLIBRARIES install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-pkginc_libHEADERS install-pkglibexecPROGRAMS \ + install-pkglibexecSCRIPTS install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-doveadm_moduleLTLIBRARIES \ + uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS \ + uninstall-pkglibexecPROGRAMS uninstall-pkglibexecSCRIPTS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/fts/decode2text.sh b/src/plugins/fts/decode2text.sh new file mode 100755 index 0000000..1c881ff --- /dev/null +++ b/src/plugins/fts/decode2text.sh @@ -0,0 +1,105 @@ +#!/bin/sh + +# Example attachment decoder script. The attachment comes from stdin, and +# the script is expected to output UTF-8 data to stdout. (If the output isn't +# UTF-8, everything except valid UTF-8 sequences are dropped from it.) + +# The attachment decoding is enabled by setting: +# +# plugin { +# fts_decoder = decode2text +# } +# service decode2text { +# executable = script /usr/local/libexec/dovecot/decode2text.sh +# user = dovecot +# unix_listener decode2text { +# mode = 0666 +# } +# } + +libexec_dir=`dirname $0` +content_type=$1 + +# The second parameter is the format's filename extension, which is used when +# found from a filename of application/octet-stream. You can also add more +# extensions by giving more parameters. +formats='application/pdf pdf +application/x-pdf pdf +application/msword doc +application/mspowerpoint ppt +application/vnd.ms-powerpoint ppt +application/ms-excel xls +application/x-msexcel xls +application/vnd.ms-excel xls +application/vnd.openxmlformats-officedocument.wordprocessingml.document docx +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx +application/vnd.openxmlformats-officedocument.presentationml.presentation pptx +application/vnd.oasis.opendocument.text odt +application/vnd.oasis.opendocument.spreadsheet ods +application/vnd.oasis.opendocument.presentation odp +' + +if [ "$content_type" = "" ]; then + echo "$formats" + exit 0 +fi + +fmt=`echo "$formats" | grep -w "^$content_type" | cut -d ' ' -f 2` +if [ "$fmt" = "" ]; then + echo "Content-Type: $content_type not supported" >&2 + exit 1 +fi + +# most decoders can't handle stdin directly, so write the attachment +# to a temp file +path=`mktemp` +trap "rm -f $path" 0 1 2 3 14 15 +cat > $path + +xmlunzip() { + name=$1 + + tempdir=`mktemp -d` + if [ "$tempdir" = "" ]; then + exit 1 + fi + trap "rm -rf $path $tempdir" 0 1 2 3 14 15 + cd $tempdir || exit 1 + unzip -q "$path" 2>/dev/null || exit 0 + find . -name "$name" -print0 | xargs -0 cat | + $libexec_dir/xml2text +} + +wait_timeout() { + childpid=$! + trap "kill -9 $childpid; rm -f $path" 1 2 3 14 15 + wait $childpid +} + +LANG=en_US.UTF-8 +export LANG +if [ $fmt = "pdf" ]; then + /usr/bin/pdftotext $path - 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "doc" ]; then + (/usr/bin/catdoc $path; true) 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "ppt" ]; then + (/usr/bin/catppt $path; true) 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "xls" ]; then + (/usr/bin/xls2csv $path; true) 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "odt" -o $fmt = "ods" -o $fmt = "odp" ]; then + xmlunzip "content.xml" +elif [ $fmt = "docx" ]; then + xmlunzip "document.xml" +elif [ $fmt = "xlsx" ]; then + xmlunzip "sharedStrings.xml" +elif [ $fmt = "pptx" ]; then + xmlunzip "slide*.xml" +else + echo "Buggy decoder script: $fmt not handled" >&2 + exit 1 +fi +exit 0 diff --git a/src/plugins/fts/doveadm-dump-fts-expunge-log.c b/src/plugins/fts/doveadm-dump-fts-expunge-log.c new file mode 100644 index 0000000..7438bca --- /dev/null +++ b/src/plugins/fts/doveadm-dump-fts-expunge-log.c @@ -0,0 +1,116 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "hex-binary.h" +#include "guid.h" +#include "doveadm-dump.h" +#include "doveadm-fts.h" + +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> + +struct fts_expunge_log_record { + uint32_t checksum; + uint32_t record_size; + guid_128_t guid; +}; + +static int dump_record(int fd, buffer_t *buf) +{ + struct fts_expunge_log_record rec; + off_t offset; + void *data; + const uint32_t *expunges, *uids; + ssize_t ret; + size_t data_size; + unsigned int i, uids_count; + + offset = lseek(fd, 0, SEEK_CUR); + + ret = read(fd, &rec, sizeof(rec)); + if (ret == 0) + return 0; + + if (ret != sizeof(rec)) + i_fatal("rec read() %d != %d", (int)ret, (int)sizeof(rec)); + + if (rec.record_size < sizeof(rec) + sizeof(uint32_t) || + rec.record_size > INT_MAX) { + i_fatal("Invalid record_size=%u at offset %"PRIuUOFF_T, + rec.record_size, offset); + } + data_size = rec.record_size - sizeof(rec); + buffer_set_used_size(buf, 0); + data = buffer_append_space_unsafe(buf, data_size); + ret = read(fd, data, data_size); + if (ret != (ssize_t)data_size) + i_fatal("rec read() %d != %d", (int)ret, (int)data_size); + + printf("#%"PRIuUOFF_T":\n", offset); + printf(" checksum = %8x\n", rec.checksum); + printf(" size .... = %u\n", rec.record_size); + printf(" mailbox . = %s\n", guid_128_to_string(rec.guid)); + + expunges = CONST_PTR_OFFSET(data, data_size - sizeof(uint32_t)); + printf(" expunges = %u\n", *expunges); + + printf(" uids .... = "); + + uids = data; + uids_count = (rec.record_size - sizeof(rec) - sizeof(uint32_t)) / + sizeof(uint32_t); + for (i = 0; i < uids_count; i += 2) { + if (i != 0) + printf(","); + if (uids[i] == uids[i+1]) + printf("%u", uids[i]); + else + printf("%u-%u", uids[i], uids[i+1]); + } + printf("\n"); + return 1; +} + +static void +cmd_dump_fts_expunge_log(const char *path, const char *const *args ATTR_UNUSED) +{ + buffer_t *buf; + int fd, ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + i_fatal("open(%s) failed: %m", path); + + buf = buffer_create_dynamic(default_pool, 1024); + do { + T_BEGIN { + ret = dump_record(fd, buf); + } T_END; + } while (ret > 0); + buffer_free(&buf); + i_close_fd(&fd); +} + +static bool test_dump_fts_expunge_log(const char *path) +{ + const char *p; + + if ((p = strrchr(path, '/')) != NULL) + p++; + else + p = path; + return strcmp(p, "dovecot-expunges.log") == 0; +} + +static const struct doveadm_cmd_dump doveadm_cmd_dump_fts_expunge_log = { + "fts-expunge-log", + test_dump_fts_expunge_log, + cmd_dump_fts_expunge_log +}; + +void doveadm_dump_fts_expunge_log_init(void) +{ + doveadm_dump_register(&doveadm_cmd_dump_fts_expunge_log); +} diff --git a/src/plugins/fts/doveadm-fts.c b/src/plugins/fts/doveadm-fts.c new file mode 100644 index 0000000..1b902a1 --- /dev/null +++ b/src/plugins/fts/doveadm-fts.c @@ -0,0 +1,470 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "imap-util.h" +#include "mail-namespace.h" +#include "mail-search.h" +#include "mailbox-list-iter.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-language.h" +#include "fts-storage.h" +#include "fts-search-args.h" +#include "fts-user.h" +#include "doveadm-print.h" +#include "doveadm-mail.h" +#include "doveadm-mailbox-list-iter.h" +#include "doveadm-fts.h" + +const char *doveadm_fts_plugin_version = DOVECOT_ABI_VERSION; + +struct fts_tokenize_cmd_context { + struct doveadm_mail_cmd_context ctx; + const char *language; + const char *tokens; +}; + +static int +cmd_search_box(struct doveadm_mail_cmd_context *ctx, + const struct mailbox_info *info) +{ + struct mailbox *box; + struct fts_backend *backend; + struct fts_result result; + int ret = 0; + + backend = fts_list_backend(info->ns->list); + if (backend == NULL) { + i_error("fts not enabled for %s", info->vname); + ctx->exit_code = EX_CONFIG; + return -1; + } + + i_zero(&result); + i_array_init(&result.definite_uids, 16); + i_array_init(&result.maybe_uids, 16); + i_array_init(&result.scores, 16); + + box = mailbox_alloc(info->ns->list, info->vname, 0); + if (fts_backend_lookup(backend, box, ctx->search_args->args, + FTS_LOOKUP_FLAG_AND_ARGS, &result) < 0) { + i_error("fts lookup failed"); + doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP); + ret = -1; + } else { + printf("%s: ", info->vname); + if (array_count(&result.definite_uids) == 0) + printf("no results\n"); + else T_BEGIN { + string_t *str = t_str_new(128); + imap_write_seq_range(str, &result.definite_uids); + printf("%s\n", str_c(str)); + } T_END; + if (array_count(&result.maybe_uids) > 0) T_BEGIN { + string_t *str = t_str_new(128); + imap_write_seq_range(str, &result.maybe_uids); + printf(" - maybe: %s\n", str_c(str)); + } T_END; + fts_backend_lookup_done(backend); + } + mailbox_free(&box); + array_free(&result.definite_uids); + array_free(&result.maybe_uids); + array_free(&result.scores); + return ret; +} + +static int +cmd_fts_lookup_run(struct doveadm_mail_cmd_context *ctx, + struct mail_user *user) +{ + const enum mailbox_list_iter_flags iter_flags = + MAILBOX_LIST_ITER_NO_AUTO_BOXES | + MAILBOX_LIST_ITER_RETURN_NO_FLAGS; + struct doveadm_mailbox_list_iter *iter; + const struct mailbox_info *info; + int ret = 0; + + iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args, + iter_flags); + while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN { + if (cmd_search_box(ctx, info) < 0) + ret = -1; + } T_END; + if (doveadm_mailbox_list_iter_deinit(&iter) < 0) + ret = -1; + return ret; +} + +static void +cmd_fts_lookup_init(struct doveadm_mail_cmd_context *ctx, + const char *const args[]) +{ + if (args[0] == NULL) + doveadm_mail_help_name("fts lookup"); + + ctx->search_args = doveadm_mail_build_search_args(args); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_lookup_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_lookup_run; + ctx->v.init = cmd_fts_lookup_init; + return ctx; +} + +static int +cmd_fts_expand_run(struct doveadm_mail_cmd_context *ctx, + struct mail_user *user) +{ + struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); + struct mailbox *box; + struct fts_backend *backend; + string_t *str = t_str_new(128); + + backend = fts_list_backend(ns->list); + if (backend == NULL) { + i_error("fts not enabled for INBOX"); + ctx->exit_code = EX_CONFIG; + return -1; + } + + box = mailbox_alloc(ns->list, "INBOX", 0); + mail_search_args_init(ctx->search_args, box, FALSE, NULL); + + if (fts_search_args_expand(backend, ctx->search_args) < 0) + i_fatal("Couldn't expand search args"); + mail_search_args_to_cmdline(str, ctx->search_args->args); + printf("%s\n", str_c(str)); + mailbox_free(&box); + return 0; +} + +static void +cmd_fts_expand_init(struct doveadm_mail_cmd_context *ctx, + const char *const args[]) +{ + if (args[0] == NULL) + doveadm_mail_help_name("fts expand"); + + ctx->search_args = doveadm_mail_build_search_args(args); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_expand_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_expand_run; + ctx->v.init = cmd_fts_expand_init; + return ctx; +} + +static int +cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, + struct mail_user *user) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); + struct fts_backend *backend; + struct fts_user_language *user_lang; + const struct fts_language *lang = NULL; + int ret, ret2; + bool final = FALSE; + + backend = fts_list_backend(ns->list); + if (backend == NULL) { + i_error("fts not enabled for INBOX"); + _ctx->exit_code = EX_CONFIG; + return -1; + } + + if (ctx->language == NULL) { + struct fts_language_list *lang_list = + fts_user_get_language_list(user); + enum fts_language_result result; + const char *error; + + result = fts_language_detect(lang_list, + (const unsigned char *)ctx->tokens, strlen(ctx->tokens), + &lang, &error); + if (lang == NULL) + lang = fts_language_list_get_first(lang_list); + switch (result) { + case FTS_LANGUAGE_RESULT_SHORT: + i_warning("Text too short, can't detect its language - assuming %s", lang->name); + break; + case FTS_LANGUAGE_RESULT_UNKNOWN: + i_warning("Can't detect its language - assuming %s", lang->name); + break; + case FTS_LANGUAGE_RESULT_OK: + break; + case FTS_LANGUAGE_RESULT_ERROR: + i_error("Language detection library initialization failed: %s", error); + _ctx->exit_code = EX_CONFIG; + return -1; + default: + i_unreached(); + } + } else { + lang = fts_language_find(ctx->language); + if (lang == NULL) { + i_error("Unknown language: %s", ctx->language); + _ctx->exit_code = EX_USAGE; + return -1; + } + } + user_lang = fts_user_language_find(user, lang); + if (user_lang == NULL) { + i_error("Language not enabled for user: %s", ctx->language); + _ctx->exit_code = EX_USAGE; + return -1; + } + + fts_tokenizer_reset(user_lang->index_tokenizer); + for (;;) { + const char *token, *error; + + if (!final) { + ret = fts_tokenizer_next(user_lang->index_tokenizer, + (const unsigned char *)ctx->tokens, strlen(ctx->tokens), + &token, &error); + } else { + ret = fts_tokenizer_final(user_lang->index_tokenizer, + &token, &error); + } + if (ret < 0) + break; + if (ret > 0 && user_lang->filter != NULL) { + ret2 = fts_filter_filter(user_lang->filter, &token, &error); + if (ret2 > 0) + doveadm_print(token); + else if (ret2 < 0) + i_error("Couldn't create indexable tokens: %s", error); + } + if (ret == 0) { + if (final) + break; + final = TRUE; + } + } + return 0; +} + +static void +cmd_fts_tokenize_init(struct doveadm_mail_cmd_context *_ctx, + const char *const args[]) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + + if (args[0] == NULL) + doveadm_mail_help_name("fts tokenize"); + + ctx->tokens = p_strdup(_ctx->pool, t_strarray_join(args, " ")); + + doveadm_print_init(DOVEADM_PRINT_TYPE_FLOW); + doveadm_print_header("token", "token", DOVEADM_PRINT_HEADER_FLAG_HIDE_TITLE); +} + +static bool +cmd_fts_tokenize_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + + switch (c) { + case 'l': + ctx->language = p_strdup(_ctx->pool, optarg); + break; + default: + return FALSE; + } + return TRUE; +} + +static struct doveadm_mail_cmd_context * +cmd_fts_tokenize_alloc(void) +{ + struct fts_tokenize_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct fts_tokenize_cmd_context); + ctx->ctx.v.run = cmd_fts_tokenize_run; + ctx->ctx.v.init = cmd_fts_tokenize_init; + ctx->ctx.v.parse_arg = cmd_fts_tokenize_parse_arg; + ctx->ctx.getopt_args = "l"; + return &ctx->ctx; +} + +static int +fts_namespace_find(struct mail_user *user, const char *ns_prefix, + struct mail_namespace **ns_r) +{ + struct mail_namespace *ns; + + if (ns_prefix == NULL) + ns = mail_namespace_find_inbox(user->namespaces); + else { + ns = mail_namespace_find_prefix(user->namespaces, ns_prefix); + if (ns == NULL) { + i_error("Namespace prefix not found: %s", ns_prefix); + return -1; + } + } + + if (fts_list_backend(ns->list) == NULL) { + i_error("fts not enabled for user's namespace %s", + ns_prefix != NULL ? ns_prefix : "INBOX"); + return -1; + } + *ns_r = ns; + return 0; +} + +static int +cmd_fts_optimize_run(struct doveadm_mail_cmd_context *ctx, + struct mail_user *user) +{ + const char *ns_prefix = ctx->args[0]; + struct mail_namespace *ns; + struct fts_backend *backend; + + if (fts_namespace_find(user, ns_prefix, &ns) < 0) { + doveadm_mail_failed_error(ctx, MAIL_ERROR_NOTFOUND); + return -1; + } + backend = fts_list_backend(ns->list); + if (fts_backend_optimize(backend) < 0) { + i_error("fts optimize failed"); + doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP); + return -1; + } + return 0; +} + +static void +cmd_fts_optimize_init(struct doveadm_mail_cmd_context *ctx ATTR_UNUSED, + const char *const args[]) +{ + if (str_array_length(args) > 1) + doveadm_mail_help_name("fts optimize"); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_optimize_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_optimize_run; + ctx->v.init = cmd_fts_optimize_init; + return ctx; +} + +static int +cmd_fts_rescan_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user) +{ + const char *ns_prefix = ctx->args[0]; + struct mail_namespace *ns; + struct fts_backend *backend; + + if (fts_namespace_find(user, ns_prefix, &ns) < 0) { + doveadm_mail_failed_error(ctx, MAIL_ERROR_NOTFOUND); + return -1; + } + backend = fts_list_backend(ns->list); + if (fts_backend_rescan(backend) < 0) { + i_error("fts rescan failed"); + doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP); + return -1; + } + return 0; +} + +static void +cmd_fts_rescan_init(struct doveadm_mail_cmd_context *ctx ATTR_UNUSED, + const char *const args[]) +{ + if (str_array_length(args) > 1) + doveadm_mail_help_name("fts rescan"); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_rescan_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_rescan_run; + ctx->v.init = cmd_fts_rescan_init; + return ctx; +} + +static struct doveadm_cmd_ver2 fts_commands[] = { +{ + .name = "fts lookup", + .mail_cmd = cmd_fts_lookup_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<search query>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "query", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts expand", + .mail_cmd = cmd_fts_expand_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<search query>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "query", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts tokenize", + .mail_cmd = cmd_fts_tokenize_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<text>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('l', "language", CMD_PARAM_STR, 0) +DOVEADM_CMD_PARAM('\0', "text", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts optimize", + .mail_cmd = cmd_fts_optimize_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "namespace", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts rescan", + .mail_cmd = cmd_fts_rescan_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "namespace", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +}; + +void doveadm_fts_plugin_init(struct module *module ATTR_UNUSED) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(fts_commands); i++) + doveadm_cmd_register_ver2(&fts_commands[i]); + doveadm_dump_fts_expunge_log_init(); +} + +void doveadm_fts_plugin_deinit(void) +{ +} diff --git a/src/plugins/fts/doveadm-fts.h b/src/plugins/fts/doveadm-fts.h new file mode 100644 index 0000000..d4307fe --- /dev/null +++ b/src/plugins/fts/doveadm-fts.h @@ -0,0 +1,11 @@ +#ifndef DOVEADM_FTS_H +#define DOVEADM_FTS_H + +struct module; + +void doveadm_dump_fts_expunge_log_init(void); + +void doveadm_fts_plugin_init(struct module *module); +void doveadm_fts_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts/fts-api-private.h b/src/plugins/fts/fts-api-private.h new file mode 100644 index 0000000..a070564 --- /dev/null +++ b/src/plugins/fts/fts-api-private.h @@ -0,0 +1,139 @@ +#ifndef FTS_API_PRIVATE_H +#define FTS_API_PRIVATE_H + +#include "unichar.h" +#include "fts-api.h" + +struct mail_user; +struct mailbox_list; + +#define MAILBOX_GUID_HEX_LENGTH (GUID_128_SIZE*2) + +struct fts_backend_vfuncs { + struct fts_backend *(*alloc)(void); + int (*init)(struct fts_backend *backend, const char **error_r); + void (*deinit)(struct fts_backend *backend); + + int (*get_last_uid)(struct fts_backend *backend, struct mailbox *box, + uint32_t *last_uid_r); + + struct fts_backend_update_context * + (*update_init)(struct fts_backend *backend); + int (*update_deinit)(struct fts_backend_update_context *ctx); + + void (*update_set_mailbox)(struct fts_backend_update_context *ctx, + struct mailbox *box); + void (*update_expunge)(struct fts_backend_update_context *ctx, + uint32_t uid); + + /* Start a build for specified key */ + bool (*update_set_build_key)(struct fts_backend_update_context *ctx, + const struct fts_backend_build_key *key); + /* Finish a build for specified key - guaranteed to be called */ + void (*update_unset_build_key)(struct fts_backend_update_context *ctx); + /* Add data for current build key */ + int (*update_build_more)(struct fts_backend_update_context *ctx, + const unsigned char *data, size_t size); + + int (*refresh)(struct fts_backend *backend); + int (*rescan)(struct fts_backend *backend); + int (*optimize)(struct fts_backend *backend); + + bool (*can_lookup)(struct fts_backend *backend, + const struct mail_search_arg *args); + int (*lookup)(struct fts_backend *backend, struct mailbox *box, + struct mail_search_arg *args, enum fts_lookup_flags flags, + struct fts_result *result); + int (*lookup_multi)(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result); + void (*lookup_done)(struct fts_backend *backend); +}; + +enum fts_backend_flags { + /* Backend supports indexing binary MIME parts */ + FTS_BACKEND_FLAG_BINARY_MIME_PARTS = 0x01, + /* Send built text to backend normalized rather than + preserving original case */ + FTS_BACKEND_FLAG_NORMALIZE_INPUT = 0x02, + /* Send only fully indexable words rather than randomly sized blocks */ + FTS_BACKEND_FLAG_BUILD_FULL_WORDS = 0x04, + /* Fuzzy search works */ + FTS_BACKEND_FLAG_FUZZY_SEARCH = 0x08, + /* Tokenize all the input. update_build_more() will be called a single + directly indexable token at a time. Searching will modify the search + args so that lookup() sees only tokens that can be directly + searched. */ + FTS_BACKEND_FLAG_TOKENIZED_INPUT = 0x10 +}; + +struct fts_header_filters { + pool_t pool; + ARRAY_TYPE(const_string) includes; + ARRAY_TYPE(const_string) excludes; + bool loaded:1; + bool exclude_is_default:1; +}; + +struct fts_backend { + const char *name; + enum fts_backend_flags flags; + + struct fts_backend_vfuncs v; + struct mail_namespace *ns; + struct fts_header_filters header_filters; + + bool updating:1; +}; + +struct fts_backend_update_context { + struct fts_backend *backend; + normalizer_func_t *normalizer; + + struct mailbox *cur_box, *backend_box; + + bool build_key_open:1; + bool failed:1; +}; + +struct fts_index_header { + uint32_t last_indexed_uid; + + /* Checksum of settings. If the settings change, the index should + be rebuilt. */ + uint32_t settings_checksum; + uint32_t unused; +}; + +void fts_backend_register(const struct fts_backend *backend); +void fts_backend_unregister(const char *name); + +bool fts_backend_default_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args); + +void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, + const ARRAY_TYPE(seq_range) *definite_filter, + ARRAY_TYPE(seq_range) *maybe_dest, + const ARRAY_TYPE(seq_range) *maybe_filter); + +/* Returns TRUE if ok, FALSE if no fts header */ +bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r); +int fts_index_set_header(struct mailbox *box, + const struct fts_index_header *hdr); +int ATTR_NOWARN_UNUSED_RESULT +fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid); +int fts_backend_reset_last_uids(struct fts_backend *backend); +int fts_index_have_compatible_settings(struct mailbox_list *list, + uint32_t checksum); + +/* Returns TRUE if FTS backend should index the header for optimizing + separate lookups */ +bool fts_header_want_indexed(const char *hdr_name); +/* Returns TRUE if header's values should be considered to have a language. */ +bool fts_header_has_language(const char *hdr_name); + +int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r); + +#endif diff --git a/src/plugins/fts/fts-api.c b/src/plugins/fts/fts-api.c new file mode 100644 index 0000000..a6ea716 --- /dev/null +++ b/src/plugins/fts/fts-api.c @@ -0,0 +1,554 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hex-binary.h" +#include "mail-index.h" +#include "mail-namespace.h" +#include "mail-storage-private.h" +#include "mailbox-list-iter.h" +#include "mail-search.h" +#include "fts-api-private.h" + +struct event_category event_category_fts = { + .name = "fts", +}; + +static ARRAY(const struct fts_backend *) backends; + +void fts_backend_register(const struct fts_backend *backend) +{ + if (!array_is_created(&backends)) + i_array_init(&backends, 4); + array_push_back(&backends, &backend); +} + +void fts_backend_unregister(const char *name) +{ + const struct fts_backend *const *be; + unsigned int i, count; + + be = array_get(&backends, &count); + for (i = 0; i < count; i++) { + if (strcmp(be[i]->name, name) == 0) { + array_delete(&backends, i, 1); + break; + } + } + if (i == count) + i_panic("fts_backend_unregister(%s): unknown backend", name); + + if (count == 1) + array_free(&backends); +} + +static const struct fts_backend * +fts_backend_class_lookup(const char *backend_name) +{ + const struct fts_backend *const *be; + unsigned int i, count; + + if (array_is_created(&backends)) { + be = array_get(&backends, &count); + for (i = 0; i < count; i++) { + if (strcmp(be[i]->name, backend_name) == 0) + return be[i]; + } + } + return NULL; +} + +static void +fts_header_filters_init(struct fts_backend *backend) +{ + struct fts_header_filters *filters = &backend->header_filters; + pool_t pool = filters->pool = pool_alloconly_create( + MEMPOOL_GROWING"fts_header_filters", 256); + + p_array_init(&filters->includes, pool, 8); + p_array_init(&filters->excludes, pool, 8); +} + +static void +fts_header_filters_deinit(struct fts_backend *backend) +{ + pool_unref(&backend->header_filters.pool); +} + +int fts_backend_init(const char *backend_name, struct mail_namespace *ns, + const char **error_r, struct fts_backend **backend_r) +{ + const struct fts_backend *be; + struct fts_backend *backend; + + be = fts_backend_class_lookup(backend_name); + if (be == NULL) { + *error_r = "Unknown backend"; + return -1; + } + + backend = be->v.alloc(); + backend->ns = ns; + if (backend->v.init(backend, error_r) < 0) { + i_free(backend); + return -1; + } + + fts_header_filters_init(backend); + *backend_r = backend; + return 0; +} + +void fts_backend_deinit(struct fts_backend **_backend) +{ + struct fts_backend *backend = *_backend; + + fts_header_filters_deinit(backend); + *_backend = NULL; + backend->v.deinit(backend); +} + +int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box, + uint32_t *last_uid_r) +{ + struct fts_index_header hdr; + + if (box->virtual_vfuncs != NULL) { + /* virtual mailboxes themselves don't have any indexes, + so catch this call here */ + if (!fts_index_get_header(box, &hdr)) + *last_uid_r = 0; + else + *last_uid_r = hdr.last_indexed_uid; + return 0; + } + + return backend->v.get_last_uid(backend, box, last_uid_r); +} + +bool fts_backend_is_updating(struct fts_backend *backend) +{ + return backend->updating; +} + +struct fts_backend_update_context * +fts_backend_update_init(struct fts_backend *backend) +{ + struct fts_backend_update_context *ctx; + + i_assert(!backend->updating); + + backend->updating = TRUE; + ctx = backend->v.update_init(backend); + if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0) + ctx->normalizer = backend->ns->user->default_normalizer; + return ctx; +} + +static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx) +{ + fts_backend_update_unset_build_key(ctx); + if (ctx->backend_box != ctx->cur_box) { + ctx->backend->v.update_set_mailbox(ctx, ctx->cur_box); + ctx->backend_box = ctx->cur_box; + } +} + +int fts_backend_update_deinit(struct fts_backend_update_context **_ctx) +{ + struct fts_backend_update_context *ctx = *_ctx; + struct fts_backend *backend = ctx->backend; + int ret; + + *_ctx = NULL; + + ctx->cur_box = NULL; + fts_backend_set_cur_mailbox(ctx); + + ret = backend->v.update_deinit(ctx); + backend->updating = FALSE; + return ret; +} + +void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx, + struct mailbox *box) +{ + if (ctx->backend_box != NULL && box != ctx->backend_box) { + /* make sure we don't reference the backend box anymore */ + ctx->backend->v.update_set_mailbox(ctx, NULL); + ctx->backend_box = NULL; + } + ctx->cur_box = box; +} + +void fts_backend_update_expunge(struct fts_backend_update_context *ctx, + uint32_t uid) +{ + fts_backend_set_cur_mailbox(ctx); + ctx->backend->v.update_expunge(ctx, uid); +} + +bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx, + const struct fts_backend_build_key *key) +{ + fts_backend_set_cur_mailbox(ctx); + + i_assert(ctx->cur_box != NULL); + + if (!ctx->backend->v.update_set_build_key(ctx, key)) + return FALSE; + ctx->build_key_open = TRUE; + return TRUE; +} + +void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx) +{ + if (ctx->build_key_open) { + ctx->backend->v.update_unset_build_key(ctx); + ctx->build_key_open = FALSE; + } +} + +int fts_backend_update_build_more(struct fts_backend_update_context *ctx, + const unsigned char *data, size_t size) +{ + i_assert(ctx->build_key_open); + + return ctx->backend->v.update_build_more(ctx, data, size); +} + +int fts_backend_refresh(struct fts_backend *backend) +{ + return backend->v.refresh(backend); +} + +int fts_backend_reset_last_uids(struct fts_backend *backend) +{ + struct mailbox_list_iterate_context *iter; + const struct mailbox_info *info; + struct mailbox *box; + int ret = 0; + + iter = mailbox_list_iter_init(backend->ns->list, "*", + MAILBOX_LIST_ITER_SKIP_ALIASES | + MAILBOX_LIST_ITER_NO_AUTO_BOXES); + while ((info = mailbox_list_iter_next(iter)) != NULL) { + if ((info->flags & + (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0) + continue; + + box = mailbox_alloc(info->ns->list, info->vname, 0); + if (mailbox_open(box) == 0) { + if (fts_index_set_last_uid(box, 0) < 0) + ret = -1; + } + mailbox_free(&box); + } + if (mailbox_list_iter_deinit(&iter) < 0) + ret = -1; + return ret; +} + +int fts_backend_rescan(struct fts_backend *backend) +{ + struct mailbox *box; + bool virtual_storage; + + box = mailbox_alloc(backend->ns->list, "", 0); + virtual_storage = box->virtual_vfuncs != NULL; + mailbox_free(&box); + + if (virtual_storage) { + /* just reset the last-uids for a virtual storage. */ + return fts_backend_reset_last_uids(backend); + } + + return backend->v.rescan == NULL ? 0 : + backend->v.rescan(backend); +} + +int fts_backend_optimize(struct fts_backend *backend) +{ + return backend->v.optimize == NULL ? 0 : + backend->v.optimize(backend); +} + +static void +fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe, + const ARRAY_TYPE(seq_range) *dest_definite, + const ARRAY_TYPE(seq_range) *src_maybe, + const ARRAY_TYPE(seq_range) *src_definite) +{ + ARRAY_TYPE(seq_range) src_unwanted; + const struct seq_range *range; + struct seq_range new_range; + unsigned int i, count; + uint32_t seq; + + /* add/leave to dest_maybe if at least one list has maybe, + and no lists have none */ + + /* create unwanted sequences list from both sources */ + t_array_init(&src_unwanted, 128); + new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1; + array_push_back(&src_unwanted, &new_range); + seq_range_array_remove_seq_range(&src_unwanted, src_maybe); + seq_range_array_remove_seq_range(&src_unwanted, src_definite); + + /* drop unwanted uids */ + seq_range_array_remove_seq_range(dest_maybe, &src_unwanted); + + /* add uids that are in dest_definite and src_maybe lists */ + range = array_get(dest_definite, &count); + for (i = 0; i < count; i++) { + for (seq = range[i].seq1; seq <= range[i].seq2; seq++) { + if (seq_range_exists(src_maybe, seq)) + seq_range_array_add(dest_maybe, seq); + } + } +} + +void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, + const ARRAY_TYPE(seq_range) *definite_filter, + ARRAY_TYPE(seq_range) *maybe_dest, + const ARRAY_TYPE(seq_range) *maybe_filter) +{ + T_BEGIN { + fts_merge_maybies(maybe_dest, definite_dest, + maybe_filter, definite_filter); + } T_END; + /* keep only what exists in both lists. the rest is in + maybies or not wanted */ + seq_range_array_intersect(definite_dest, definite_filter); +} + +bool fts_backend_default_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args) +{ + for (; args != NULL; args = args->next) { + switch (args->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_backend_default_can_lookup(backend, + args->value.subargs)) + return TRUE; + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + case SEARCH_BODY: + case SEARCH_TEXT: + if (!args->no_fts) + return TRUE; + break; + default: + break; + } + } + return FALSE; +} + +bool fts_backend_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args) +{ + return backend->v.can_lookup(backend, args); +} + +static int fts_score_map_sort(const struct fts_score_map *m1, + const struct fts_score_map *m2) +{ + if (m1->uid < m2->uid) + return -1; + if (m1->uid > m2->uid) + return 1; + return 0; +} + +int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + array_clear(&result->definite_uids); + array_clear(&result->maybe_uids); + array_clear(&result->scores); + + if (backend->v.lookup(backend, box, args, flags, result) < 0) + return -1; + + if (!result->scores_sorted && array_is_created(&result->scores)) { + array_sort(&result->scores, fts_score_map_sort); + result->scores_sorted = TRUE; + } + return 0; +} + +int fts_backend_lookup_multi(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + unsigned int i; + + i_assert(boxes[0] != NULL); + + if (backend->v.lookup_multi != NULL) { + if (backend->v.lookup_multi(backend, boxes, args, + flags, result) < 0) + return -1; + if (result->box_results == NULL) { + result->box_results = p_new(result->pool, + struct fts_result, 1); + } + return 0; + } + + for (i = 0; boxes[i] != NULL; i++) ; + result->box_results = p_new(result->pool, struct fts_result, i+1); + + for (i = 0; boxes[i] != NULL; i++) { + struct fts_result *box_result = &result->box_results[i]; + + p_array_init(&box_result->definite_uids, result->pool, 32); + p_array_init(&box_result->maybe_uids, result->pool, 32); + p_array_init(&box_result->scores, result->pool, 32); + if (backend->v.lookup(backend, boxes[i], args, + flags, box_result) < 0) + return -1; + } + return 0; +} + +void fts_backend_lookup_done(struct fts_backend *backend) +{ + if (backend->v.lookup_done != NULL) + backend->v.lookup_done(backend); +} + +static uint32_t fts_index_get_ext_id(struct mailbox *box) +{ + return mail_index_ext_register(box->index, "fts", + sizeof(struct fts_index_header), + 0, 0); +} + +bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r) +{ + struct mail_index_view *view; + const void *data; + size_t data_size; + bool ret; + + mail_index_refresh(box->index); + view = mail_index_view_open(box->index); + mail_index_get_header_ext(view, fts_index_get_ext_id(box), + &data, &data_size); + if (data_size < sizeof(*hdr_r)) { + i_zero(hdr_r); + ret = FALSE; + } else { + memcpy(hdr_r, data, sizeof(*hdr_r)); + ret = TRUE; + } + mail_index_view_close(&view); + return ret; +} + +int fts_index_set_header(struct mailbox *box, + const struct fts_index_header *hdr) +{ + struct mail_index_transaction *trans; + uint32_t ext_id = fts_index_get_ext_id(box); + + trans = mail_index_transaction_begin(box->view, 0); + mail_index_update_header_ext(trans, ext_id, 0, hdr, sizeof(*hdr)); + return mail_index_transaction_commit(&trans); +} + +int fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid) +{ + struct fts_index_header hdr; + + (void)fts_index_get_header(box, &hdr); + hdr.last_indexed_uid = last_uid; + return fts_index_set_header(box, &hdr); +} + +int fts_index_have_compatible_settings(struct mailbox_list *list, + uint32_t checksum) +{ + struct mail_namespace *ns = mailbox_list_get_namespace(list); + struct mailbox *box; + struct fts_index_header hdr; + const char *vname; + size_t len; + int ret; + + if ((ns->flags & NAMESPACE_FLAG_INBOX_USER) != 0) + vname = "INBOX"; + else { + len = strlen(ns->prefix); + if (len > 0 && ns->prefix[len-1] == mail_namespace_get_sep(ns)) + len--; + vname = t_strndup(ns->prefix, len); + } + + box = mailbox_alloc(list, vname, 0); + if (mailbox_sync(box, (enum mailbox_sync_flags)0) < 0) { + i_error("fts: Failed to sync mailbox %s: %s", vname, + mailbox_get_last_internal_error(box, NULL)); + ret = -1; + } else { + ret = fts_index_get_header(box, &hdr) && + hdr.settings_checksum == checksum ? 1 : 0; + } + mailbox_free(&box); + return ret; +} + +static const char *indexed_headers[] = { + "From", "To", "Cc", "Bcc", "Subject" +}; + +bool fts_header_want_indexed(const char *hdr_name) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(indexed_headers); i++) { + if (strcasecmp(hdr_name, indexed_headers[i]) == 0) + return TRUE; + } + return FALSE; +} + +bool fts_header_has_language(const char *hdr_name) +{ + /* FIXME: should email address headers be detected as different + languages? That mainly contains people's names.. */ + /*if (message_header_is_address(hdr_name)) + return TRUE;*/ + + /* Subject definitely contains language-specific data that can be + detected. Comment and Keywords headers also could contain, although + just about nobody uses those headers. + + For now we assume that other headers contain non-language specific + data that we don't want to filter in special ways. For example + it is good to be able to search for Message-IDs. */ + return strcasecmp(hdr_name, "Subject") == 0 || + strcasecmp(hdr_name, "Comments") == 0 || + strcasecmp(hdr_name, "Keywords") == 0; +} + +int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r) +{ + struct mailbox_metadata metadata; + + if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, &metadata) < 0) + return -1; + + *guid_r = guid_128_to_string(metadata.guid); + return 0; +} diff --git a/src/plugins/fts/fts-api.h b/src/plugins/fts/fts-api.h new file mode 100644 index 0000000..11a331f --- /dev/null +++ b/src/plugins/fts/fts-api.h @@ -0,0 +1,173 @@ +#ifndef FTS_API_H +#define FTS_API_H + +struct mail; +struct mailbox; +struct mail_namespace; +struct mail_search_arg; + +struct fts_backend; + +#include "seq-range-array.h" + +enum fts_lookup_flags { + /* Specifies if the args should be ANDed or ORed together. */ + FTS_LOOKUP_FLAG_AND_ARGS = 0x01, + /* Require exact matching for non-fuzzy search args by returning all + such matches as maybe_uids instead of definite_uids */ + FTS_LOOKUP_FLAG_NO_AUTO_FUZZY = 0x02 +}; + +enum fts_backend_build_key_type { + /* Header */ + FTS_BACKEND_BUILD_KEY_HDR, + /* MIME part header */ + FTS_BACKEND_BUILD_KEY_MIME_HDR, + /* MIME body part */ + FTS_BACKEND_BUILD_KEY_BODY_PART, + /* Binary MIME body part, if backend supports binary data */ + FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY +}; + +struct fts_backend_build_key { + uint32_t uid; + enum fts_backend_build_key_type type; + struct message_part *part; + + /* for _KEY_HDR: */ + const char *hdr_name; + + /* for _KEY_BODY_PART and _KEY_BODY_PART_BINARY: */ + + /* Contains a valid parsed "type/subtype" string. For messages without + (valid) Content-Type: header, it's set to "text/plain". */ + const char *body_content_type; + /* Content-Disposition: header without parsing/validation if it exists, + otherwise NULL. */ + const char *body_content_disposition; +}; + +struct fts_score_map { + uint32_t uid; + float score; +}; +ARRAY_DEFINE_TYPE(fts_score_map, struct fts_score_map); + +/* the structure is meant to be implemented by plugins that want to carry + some state over from a call to next ones within an fts_search_context + session. + + The pointer to this structure is initially granted to be NULL and it + remains such unless the plugin itself activates it. + + Any memory management for the pointer and its contents is expected to + be performed by the plugin itself, possibly but not necessarily using + the result pool propagated to plugin call by struct fts_result.pool and + struct fts_multi_result.pool. */ + +struct fts_search_state; + +struct fts_result { + pool_t pool; + struct fts_search_state *search_state; + + struct mailbox *box; + + ARRAY_TYPE(seq_range) definite_uids; + /* The maybe_uids is useful with backends that can only filter out + messages, but can't definitively say if the search matched a + message. */ + ARRAY_TYPE(seq_range) maybe_uids; + ARRAY_TYPE(fts_score_map) scores; + bool scores_sorted; +}; + +struct fts_multi_result { + pool_t pool; + struct fts_search_state *search_state; + + /* box=NULL-terminated array of mailboxes and matching UIDs, + all allocated from the given pool. */ + struct fts_result *box_results; +}; + +extern struct event_category event_category_fts; + +int fts_backend_init(const char *backend_name, struct mail_namespace *ns, + const char **error_r, struct fts_backend **backend_r); +void fts_backend_deinit(struct fts_backend **backend); + +/* Get the last_uid for the mailbox. */ +int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box, + uint32_t *last_uid_r); + +/* Returns TRUE if there exists an update context. */ +bool fts_backend_is_updating(struct fts_backend *backend); + +/* Start an index update. */ +struct fts_backend_update_context * +fts_backend_update_init(struct fts_backend *backend); +/* Finish an index update. Returns 0 if ok, -1 if some updates failed. + If updates failed, the index is in unspecified state. */ +int fts_backend_update_deinit(struct fts_backend_update_context **ctx); + +/* Switch to updating the specified mailbox. box may also be set to NULL to + make sure the previous mailbox won't tried to be accessed anymore. */ +void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx, + struct mailbox *box); +/* Expunge the specified mail. */ +void fts_backend_update_expunge(struct fts_backend_update_context *ctx, + uint32_t uid); + +/* Switch to building index for specified key. If backend doesn't want to + index this key, it can return FALSE and caller will skip to next key. */ +bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx, + const struct fts_backend_build_key *key); +/* Make sure that if _build_more() is called, we'll assert-crash. */ +void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx); +/* Add more content to the index for the currently specified build key. + Non-BODY_PART_BINARY data must contain only full valid UTF-8 characters, + but it doesn't need to be NUL-terminated. size contains the data size in + bytes, not characters. This function may be called many times and the data + block sizes may be small. Backend returns 0 if ok, -1 if build should be + aborted. */ +int fts_backend_update_build_more(struct fts_backend_update_context *ctx, + const unsigned char *data, size_t size); + +/* Refresh index to make sure we see latest changes from lookups. + Returns 0 if ok, -1 if error. */ +int fts_backend_refresh(struct fts_backend *backend); +/* Go through the entire index and make sure all mails are indexed, + and delete any extra mails in the index. */ +int fts_backend_rescan(struct fts_backend *backend); +/* Optimize the index. This can be a somewhat heavy operation. */ +int fts_backend_optimize(struct fts_backend *backend); + +/* Returns TRUE if fts_backend_lookup() should even be tried for the + given args. */ +bool fts_backend_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args); +/* Do a FTS lookup for the given search args. Backends can support different + kinds of search arguments, so match_always=TRUE must be set to all search + args that were actually used to produce the search results. The other args + are handled by the regular search code. The backends MUST ignore all args + that have subargs (SEARCH_OR, SEARCH_SUB), since they are looked up + separately. + + The arrays in result must be initialized by caller. */ +int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result); + +/* Search from multiple mailboxes. result->pool must be initialized. */ +int fts_backend_lookup_multi(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result); +/* Called after the lookups are done. The next lookup will be preceded by a + refresh. */ +void fts_backend_lookup_done(struct fts_backend *backend); + +#endif diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c new file mode 100644 index 0000000..73d4f4b --- /dev/null +++ b/src/plugins/fts/fts-build-mail.c @@ -0,0 +1,719 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "buffer.h" +#include "str.h" +#include "rfc822-parser.h" +#include "message-address.h" +#include "message-parser.h" +#include "message-decoder.h" +#include "mail-storage.h" +#include "index-mail.h" +#include "fts-parser.h" +#include "fts-user.h" +#include "fts-language.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-api-private.h" +#include "fts-build-mail.h" + +/* there are other characters as well, but this doesn't have to be exact */ +#define IS_WORD_WHITESPACE(c) \ + ((c) == ' ' || (c) == '\t' || (c) == '\n') +/* if we see a word larger than this, just go ahead and split it from + wherever */ +#define MAX_WORD_SIZE 1024 + +struct fts_mail_build_context { + struct mail *mail; + struct fts_backend_update_context *update_ctx; + + char *content_type, *content_disposition; + struct fts_parser *body_parser; + + buffer_t *word_buf, *pending_input; + struct fts_user_language *cur_user_lang; +}; + +static int fts_build_data(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last); + +static void fts_build_parse_content_type(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *content_type; + + if (ctx->content_type != NULL) + return; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + T_BEGIN { + content_type = t_str_new(64); + (void)rfc822_parse_content_type(&parser, content_type); + ctx->content_type = str_lcase(i_strdup(str_c(content_type))); + } T_END; + rfc822_parser_deinit(&parser); +} + +static void +fts_build_parse_content_disposition(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + /* just pass it as-is to backend. */ + i_free(ctx->content_disposition); + ctx->content_disposition = + i_strndup(hdr->full_value, hdr->full_value_len); +} + +static void fts_parse_mail_header(struct fts_mail_build_context *ctx, + const struct message_block *raw_block) +{ + const struct message_header_line *hdr = raw_block->hdr; + + if (strcasecmp(hdr->name, "Content-Type") == 0) + fts_build_parse_content_type(ctx, hdr); + else if (strcasecmp(hdr->name, "Content-Disposition") == 0) + fts_build_parse_content_disposition(ctx, hdr); +} + +static int +fts_build_unstructured_header(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + const unsigned char *data = hdr->full_value; + unsigned char *buf = NULL; + unsigned int i; + int ret; + + /* @UNSAFE: if there are any NULs, replace them with spaces */ + for (i = 0; i < hdr->full_value_len; i++) { + if (hdr->full_value[i] == '\0') { + if (buf == NULL) { + buf = i_memdup(hdr->full_value, + hdr->full_value_len); + data = buf; + } + buf[i] = ' '; + } + } + ret = fts_build_data(ctx, data, hdr->full_value_len, TRUE); + i_free(buf); + return ret; +} + +static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx, + struct fts_user_language *user_lang) +{ + i_assert(user_lang != NULL); + + ctx->cur_user_lang = user_lang; + /* reset tokenizer between fields - just to be sure no state + leaks between fields (especially if previous indexing had + failed) */ + fts_tokenizer_reset(user_lang->index_tokenizer); +} + +static void +fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + /* Headers that don't contain any human language will only be + translated to lowercase - no stemming or other filtering. There's + unfortunately no pefect way of detecting which headers contain + human languages, so we check with fts_header_has_language if the + header is something that's supposed to containing human text. */ + if (fts_header_has_language(hdr->name)) + ctx->cur_user_lang = NULL; + else { + fts_mail_build_ctx_set_lang(ctx, + fts_user_get_data_lang(ctx->update_ctx->backend->ns->user)); + } +} + +static int fts_build_mail_header(struct fts_mail_build_context *ctx, + const struct message_block *block) +{ + const struct message_header_line *hdr = block->hdr; + struct fts_backend_build_key key; + int ret; + + if (hdr->eoh) + return 0; + + /* hdr->full_value is always set because we get the block from + message_decoder */ + i_zero(&key); + key.uid = ctx->mail->uid; + key.type = block->part->physical_pos == 0 ? + FTS_BACKEND_BUILD_KEY_HDR : FTS_BACKEND_BUILD_KEY_MIME_HDR; + key.part = block->part; + key.hdr_name = hdr->name; + + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) + fts_build_tokenized_hdr_update_lang(ctx, hdr); + + if (!fts_backend_update_set_build_key(ctx->update_ctx, &key)) + return 0; + + if (!message_header_is_address(hdr->name)) { + /* regular unstructured header */ + ret = fts_build_unstructured_header(ctx, hdr); + } else T_BEGIN { + /* message address. normalize it to give better + search results. */ + struct message_address *addr; + string_t *str; + + addr = message_address_parse(pool_datastack_create(), + hdr->full_value, + hdr->full_value_len, + UINT_MAX, 0); + str = t_str_new(hdr->full_value_len); + message_address_write(str, addr); + + ret = fts_build_data(ctx, str_data(str), str_len(str), TRUE); + } T_END; + + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { + /* index the header name itself using data-language. */ + struct fts_user_language *prev_lang = ctx->cur_user_lang; + + fts_mail_build_ctx_set_lang(ctx, + fts_user_get_data_lang(ctx->update_ctx->backend->ns->user)); + key.hdr_name = ""; + if (fts_backend_update_set_build_key(ctx->update_ctx, &key)) { + if (fts_build_data(ctx, (const void *)hdr->name, + strlen(hdr->name), TRUE) < 0) + ret = -1; + } + fts_mail_build_ctx_set_lang(ctx, prev_lang); + } + return ret; +} + +static bool +fts_build_body_begin(struct fts_mail_build_context *ctx, + struct message_part *part, bool *binary_body_r) +{ + struct mail_storage *storage; + struct fts_parser_context parser_context; + struct fts_backend_build_key key; + + i_assert(ctx->body_parser == NULL); + + *binary_body_r = FALSE; + i_zero(&key); + key.uid = ctx->mail->uid; + key.part = part; + + i_zero(&parser_context); + parser_context.content_type = ctx->content_type != NULL ? + ctx->content_type : "text/plain"; + if (str_begins(parser_context.content_type, "multipart/")) { + /* multiparts are never indexed, only their contents */ + return FALSE; + } + storage = mailbox_get_storage(ctx->mail->box); + parser_context.user = mail_storage_get_user(storage); + parser_context.content_disposition = ctx->content_disposition; + + if (fts_parser_init(&parser_context, &ctx->body_parser)) { + /* extract text using the the returned parser */ + *binary_body_r = TRUE; + key.type = FTS_BACKEND_BUILD_KEY_BODY_PART; + } else if (str_begins(parser_context.content_type, "text/") || + str_begins(parser_context.content_type, "message/")) { + /* text body parts */ + key.type = FTS_BACKEND_BUILD_KEY_BODY_PART; + ctx->body_parser = fts_parser_text_init(); + } else { + /* possibly binary */ + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_BINARY_MIME_PARTS) == 0) + return FALSE; + *binary_body_r = TRUE; + key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY; + } + key.body_content_type = parser_context.content_type; + key.body_content_disposition = ctx->content_disposition; + ctx->cur_user_lang = NULL; + if (!fts_backend_update_set_build_key(ctx->update_ctx, &key)) { + if (ctx->body_parser != NULL) + (void)fts_parser_deinit(&ctx->body_parser, NULL); + return FALSE; + } + return TRUE; +} + +static int +fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size) +{ + struct fts_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer; + struct fts_filter *filter = ctx->cur_user_lang->filter; + const char *token, *error; + int ret = 1, ret2; + + while (ret > 0) T_BEGIN { + ret = ret2 = fts_tokenizer_next(tokenizer, data, size, &token, &error); + if (ret2 > 0 && filter != NULL) + ret2 = fts_filter_filter(filter, &token, &error); + if (ret2 < 0) { + mail_set_critical(ctx->mail, + "fts: Couldn't create indexable tokens: %s", + error); + } + if (ret2 > 0) { + if (fts_backend_update_build_more(ctx->update_ctx, + (const void *)token, + strlen(token)) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + ret = -1; + } + } + } T_END; + return ret; +} + +static int +fts_detect_language(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last, + const struct fts_language **lang_r) +{ + struct mail_user *user = ctx->update_ctx->backend->ns->user; + struct fts_language_list *lang_list = fts_user_get_language_list(user); + const struct fts_language *lang; + const char *error; + + switch (fts_language_detect(lang_list, data, size, &lang, &error)) { + case FTS_LANGUAGE_RESULT_SHORT: + /* save the input so far and try again later */ + buffer_append(ctx->pending_input, data, size); + if (last) { + /* we've run out of data. use the default language. */ + *lang_r = fts_language_list_get_first(lang_list); + return 1; + } + return 0; + case FTS_LANGUAGE_RESULT_UNKNOWN: + /* use the default language */ + *lang_r = fts_language_list_get_first(lang_list); + return 1; + case FTS_LANGUAGE_RESULT_OK: + *lang_r = lang; + return 1; + case FTS_LANGUAGE_RESULT_ERROR: + /* internal language detection library failure + (e.g. invalid config). don't index anything. */ + mail_set_critical(ctx->mail, + "Language detection library initialization failed: %s", + error); + return -1; + default: + i_unreached(); + } +} + +static int +fts_build_tokenized(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last) +{ + struct mail_user *user = ctx->update_ctx->backend->ns->user; + const struct fts_language *lang; + int ret; + + if (ctx->cur_user_lang != NULL) { + /* we already have a language */ + } else if ((ret = fts_detect_language(ctx, data, size, last, &lang)) < 0) { + return -1; + } else if (ret == 0) { + /* wait for more data */ + return 0; + } else { + fts_mail_build_ctx_set_lang(ctx, fts_user_language_find(user, lang)); + + if (ctx->pending_input->used > 0) { + if (fts_build_add_tokens_with_filter(ctx, + ctx->pending_input->data, + ctx->pending_input->used) < 0) + return -1; + buffer_set_used_size(ctx->pending_input, 0); + } + } + if (fts_build_add_tokens_with_filter(ctx, data, size) < 0) + return -1; + if (last) { + if (fts_build_add_tokens_with_filter(ctx, NULL, 0) < 0) + return -1; + } + return 0; +} + +static int +fts_build_full_words(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last) +{ + size_t i; + + /* we'll need to send only full words to the backend */ + + if (ctx->word_buf != NULL && ctx->word_buf->used > 0) { + /* continuing previous word */ + for (i = 0; i < size; i++) { + if (IS_WORD_WHITESPACE(data[i])) + break; + } + buffer_append(ctx->word_buf, data, i); + data += i; + size -= i; + if (size == 0 && ctx->word_buf->used < MAX_WORD_SIZE && !last) { + /* word is still not finished */ + return 0; + } + /* we have a full word, index it */ + if (fts_backend_update_build_more(ctx->update_ctx, + ctx->word_buf->data, + ctx->word_buf->used) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + buffer_set_used_size(ctx->word_buf, 0); + } + + /* find the boundary for last word */ + if (last) + i = size; + else { + for (i = size; i > 0; i--) { + if (IS_WORD_WHITESPACE(data[i-1])) + break; + } + } + + if (fts_backend_update_build_more(ctx->update_ctx, data, i) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + + if (i < size) { + if (ctx->word_buf == NULL) { + ctx->word_buf = + buffer_create_dynamic(default_pool, 128); + } + buffer_append(ctx->word_buf, data + i, size - i); + } + return 0; +} + +static int fts_build_data(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last) +{ + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { + return fts_build_tokenized(ctx, data, size, last); + } else if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_BUILD_FULL_WORDS) != 0) { + return fts_build_full_words(ctx, data, size, last); + } else { + if (fts_backend_update_build_more(ctx->update_ctx, data, size) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + return 0; + } +} + +static int fts_build_body_block(struct fts_mail_build_context *ctx, + const struct message_block *block, bool last) +{ + i_assert(block->hdr == NULL); + + return fts_build_data(ctx, block->data, block->size, last); +} + +static int fts_body_parser_finish(struct fts_mail_build_context *ctx, + const char **retriable_err_msg_r, + bool *may_need_retry_r) +{ + struct message_block block; + const char *retriable_error; + int ret = 0; + int deinit_ret; + *may_need_retry_r = FALSE; + + do { + i_zero(&block); + fts_parser_more(ctx->body_parser, &block); + if (fts_build_body_block(ctx, &block, FALSE) < 0) { + ret = -1; + break; + } + } while (block.size > 0); + + deinit_ret = fts_parser_deinit(&ctx->body_parser, &retriable_error); + if (ret < 0) { + /* indexing already failed - we don't want to retry + in any case */ + return -1; + } + + if (deinit_ret == 0) { + /* retry the parsing */ + *may_need_retry_r = TRUE; + *retriable_err_msg_r = retriable_error; + return -1; + } + if (deinit_ret < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + return 0; +} + +static void +load_header_filter(const char *key, struct fts_backend *backend, + ARRAY_TYPE(const_string) list, bool *matches_all_r) +{ + const char *str = mail_user_plugin_getenv(backend->ns->user, key); + + *matches_all_r = FALSE; + if (str == NULL || *str == '\0') + return; + + char **entries = p_strsplit_spaces(backend->header_filters.pool, str, " "); + for (char **entry = entries; *entry != NULL; ++entry) { + const char *value = str_lcase(*entry); + array_push_back(&list, &value); + if (*value == '*') { + *matches_all_r = TRUE; + break; + } + } + array_sort(&list, i_strcmp_p); +} + +static struct fts_header_filters * +load_header_filters(struct fts_backend *backend) +{ + struct fts_header_filters *filters = &backend->header_filters; + if (!filters->loaded) { + bool match_all; + + /* match_all return ignored in includes */ + load_header_filter("fts_header_includes", backend, + filters->includes, &match_all); + + load_header_filter("fts_header_excludes", backend, + filters->excludes, &match_all); + filters->loaded = TRUE; + filters->exclude_is_default = match_all; + } + return filters; +} + +/* This performs comparison between two strings, where the second one can end + * with the wildcard '*'. When the match reaches a '*' on the pitem side, zero + * (match) is returned regardles of the remaining characters. + * + * The function obeys the same lexicographic order as i_strcmp_p() and + * strcmp(), which is the reason for the casts to unsigned before comparing. + */ +static int ATTR_PURE +header_prefix_cmp(const char *const *pkey, const char *const *pitem) +{ + const char *key = *pkey; + const char *item = *pitem; + + while (*key == *item && *key != '\0') key++, item++; + return item[0] == '*' && item[1] == '\0' ? 0 : + (unsigned char)*key - (unsigned char)*item; +} + +static bool +is_header_indexable(const char *header_name, struct fts_backend *backend) +{ + bool indexable; + T_BEGIN { + struct fts_header_filters *filters = load_header_filters(backend); + const char *hdr = t_str_lcase(header_name); + + if (array_bsearch(&filters->includes, &hdr, header_prefix_cmp) != NULL) + indexable = TRUE; + else if (filters->exclude_is_default || + array_bsearch(&filters->excludes, &hdr, header_prefix_cmp) != NULL) + indexable = FALSE; + else + indexable = TRUE; + } T_END; + return indexable; +} + +static int +fts_build_mail_real(struct fts_backend_update_context *update_ctx, + struct mail *mail, + const char **retriable_err_msg_r, + bool *may_need_retry_r) +{ + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, + }; + struct fts_mail_build_context ctx; + struct istream *input; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_block raw_block, block; + struct message_part *prev_part, *parts; + bool skip_body = FALSE, body_part = FALSE, body_added = FALSE; + bool binary_body; + const char *error; + int ret; + + *may_need_retry_r = FALSE; + if (mail_get_stream_because(mail, NULL, NULL, "fts indexing", &input) < 0) { + if (mail->expunged) + return 0; + mail_set_critical(mail, "Failed to read stream: %s", + mailbox_get_last_internal_error(mail->box, NULL)); + return -1; + } + + i_zero(&ctx); + ctx.update_ctx = update_ctx; + ctx.mail = mail; + if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) + ctx.pending_input = buffer_create_dynamic(default_pool, 128); + + prev_part = NULL; + parser = message_parser_init(pool_datastack_create(), input, &parser_set); + + decoder = message_decoder_init(update_ctx->normalizer, 0); + for (;;) { + ret = message_parser_parse_next_block(parser, &raw_block); + i_assert(ret != 0); + if (ret < 0) { + if (input->stream_errno == 0) + ret = 0; + else { + mail_set_critical(mail, "read(%s) failed: %s", + i_stream_get_name(input), + i_stream_get_error(input)); + } + break; + } + + if (raw_block.part != prev_part) { + /* body part changed. we're now parsing the end of + boundary, possibly followed by message epilogue */ + if (ctx.body_parser != NULL) { + if (fts_body_parser_finish(&ctx, retriable_err_msg_r, + may_need_retry_r) < 0) { + ret = -1; + break; + } + } + message_decoder_set_return_binary(decoder, FALSE); + fts_backend_update_unset_build_key(update_ctx); + prev_part = raw_block.part; + i_free_and_null(ctx.content_type); + i_free_and_null(ctx.content_disposition); + + if (raw_block.size != 0) { + /* multipart. skip until beginning of next + part's headers */ + skip_body = TRUE; + } + } + + if (raw_block.hdr != NULL) { + /* always handle headers */ + } else if (raw_block.size == 0) { + /* end of headers */ + skip_body = !fts_build_body_begin(&ctx, raw_block.part, + &binary_body); + if (binary_body) + message_decoder_set_return_binary(decoder, TRUE); + body_part = TRUE; + } else { + if (skip_body) + continue; + } + + if (!message_decoder_decode_next_block(decoder, &raw_block, + &block)) + continue; + + if (block.hdr != NULL) { + fts_parse_mail_header(&ctx, &raw_block); + if (is_header_indexable(block.hdr->name, update_ctx->backend) && + fts_build_mail_header(&ctx, &block) < 0) { + ret = -1; + break; + } + } else if (block.size == 0) { + /* end of headers */ + } else { + i_assert(body_part); + if (ctx.body_parser != NULL) + fts_parser_more(ctx.body_parser, &block); + if (fts_build_body_block(&ctx, &block, FALSE) < 0) { + ret = -1; + break; + } + body_added = TRUE; + } + } + if (ctx.body_parser != NULL) { + if (ret == 0) + ret = fts_body_parser_finish(&ctx, retriable_err_msg_r, + may_need_retry_r); + else + (void)fts_parser_deinit(&ctx.body_parser, NULL); + } + if (ret == 0 && body_part && !skip_body && !body_added) { + /* make sure body is added even when it doesn't exist */ + block.data = NULL; block.size = 0; + ret = fts_build_body_block(&ctx, &block, TRUE); + } + if (message_parser_deinit_from_parts(&parser, &parts, &error) < 0) + index_mail_set_message_parts_corrupted(mail, error); + message_decoder_deinit(&decoder); + i_free(ctx.content_type); + i_free(ctx.content_disposition); + buffer_free(&ctx.word_buf); + buffer_free(&ctx.pending_input); + return ret < 0 ? -1 : 1; +} + +int fts_build_mail(struct fts_backend_update_context *update_ctx, + struct mail *mail) +{ + int ret; + /* Number of attempts to be taken if retry is needed */ + unsigned int attempts = 2; + const char *retriable_err_msg; + bool may_need_retry; + + T_BEGIN { + while ((ret = fts_build_mail_real(update_ctx, mail, + &retriable_err_msg, + &may_need_retry)) < 0 && + may_need_retry) { + if (--attempts == 0) { + /* Log this as info instead of as error, + because e.g. Tika doesn't differentiate + between temporary errors and invalid + document input. */ + i_info("%s - ignoring", retriable_err_msg); + ret = 0; + break; + } + } + } T_END; + return ret; +} diff --git a/src/plugins/fts/fts-build-mail.h b/src/plugins/fts/fts-build-mail.h new file mode 100644 index 0000000..aed4413 --- /dev/null +++ b/src/plugins/fts/fts-build-mail.h @@ -0,0 +1,9 @@ +#ifndef FTS_BUILD_MAIL_H +#define FTS_BUILD_MAIL_H + +/* Build indexes for the given mail. Returns 0 on success, -1 on error. + The error is set to mail's storage. */ +int fts_build_mail(struct fts_backend_update_context *update_ctx, + struct mail *mail); + +#endif diff --git a/src/plugins/fts/fts-expunge-log.c b/src/plugins/fts/fts-expunge-log.c new file mode 100644 index 0000000..d39ceea --- /dev/null +++ b/src/plugins/fts/fts-expunge-log.c @@ -0,0 +1,617 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "crc32.h" +#include "hash.h" +#include "istream.h" +#include "write-full.h" +#include "seq-range-array.h" +#include "mail-storage.h" +#include "fts-expunge-log.h" + +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +struct fts_expunge_log_record { + /* CRC32 of this entire record (except this checksum) */ + uint32_t checksum; + /* Size of this entire record */ + uint32_t record_size; + + /* Mailbox GUID */ + guid_128_t guid; + /* { uid1, uid2 } pairs */ + /* uint32_t expunge_uid_ranges[]; */ + + /* Total number of messages expunged so far in this log */ + /* uint32_t expunge_count; */ +}; + +struct fts_expunge_log { + char *path; + + int fd; + struct stat st; +}; + +struct fts_expunge_log_mailbox { + guid_128_t guid; + ARRAY_TYPE(seq_range) uids; + unsigned uids_count; +}; + +struct fts_expunge_log_append_ctx { + struct fts_expunge_log *log; + pool_t pool; + + HASH_TABLE(uint8_t *, struct fts_expunge_log_mailbox *) mailboxes; + struct fts_expunge_log_mailbox *prev_mailbox; + + bool failed; +}; + +struct fts_expunge_log_read_ctx { + struct fts_expunge_log *log; + + struct istream *input; + buffer_t buffer; + struct fts_expunge_log_read_record read_rec; + + bool failed; + bool corrupted; + bool unlink; +}; + +struct fts_expunge_log *fts_expunge_log_init(const char *path) +{ + struct fts_expunge_log *log; + + log = i_new(struct fts_expunge_log, 1); + log->path = i_strdup(path); + log->fd = -1; + return log; +} + +void fts_expunge_log_deinit(struct fts_expunge_log **_log) +{ + struct fts_expunge_log *log = *_log; + + *_log = NULL; + i_close_fd(&log->fd); + i_free(log->path); + i_free(log); +} + +static int fts_expunge_log_open(struct fts_expunge_log *log, bool create) +{ + int fd; + + i_assert(log->fd == -1); + + /* FIXME: use proper permissions */ + fd = open(log->path, O_RDWR | O_APPEND | (create ? O_CREAT : 0), 0600); + if (fd == -1) { + if (errno == ENOENT && !create) + return 0; + + i_error("open(%s) failed: %m", log->path); + return -1; + } + if (fstat(fd, &log->st) < 0) { + i_error("fstat(%s) failed: %m", log->path); + i_close_fd(&fd); + return -1; + } + log->fd = fd; + return 1; +} + +static int +fts_expunge_log_reopen_if_needed(struct fts_expunge_log *log, bool create) +{ + struct stat st; + + if (log->fd == -1) + return fts_expunge_log_open(log, create); + + if (stat(log->path, &st) == 0) { + if (st.st_ino == log->st.st_ino && + CMP_DEV_T(st.st_dev, log->st.st_dev)) { + /* same file */ + return 0; + } + /* file changed */ + } else if (errno == ENOENT) { + /* recreate the file */ + } else { + i_error("stat(%s) failed: %m", log->path); + return -1; + } + if (close(log->fd) < 0) + i_error("close(%s) failed: %m", log->path); + log->fd = -1; + return fts_expunge_log_open(log, create); +} + +static int +fts_expunge_log_read_expunge_count(struct fts_expunge_log *log, + uint32_t *expunge_count_r) +{ + ssize_t ret; + + i_assert(log->fd != -1); + + if (fstat(log->fd, &log->st) < 0) { + i_error("fstat(%s) failed: %m", log->path); + return -1; + } + if ((uoff_t)log->st.st_size < sizeof(*expunge_count_r)) { + *expunge_count_r = 0; + return 0; + } + /* we'll assume that write()s atomically grow the file size, as + O_APPEND almost guarantees. even if not, having a race condition + isn't the end of the world. the expunge count is simply read wrong + and fts optimize is performed earlier or later than intended. */ + ret = pread(log->fd, expunge_count_r, sizeof(*expunge_count_r), + log->st.st_size - 4); + if (ret < 0) { + i_error("pread(%s) failed: %m", log->path); + return -1; + } + if (ret != sizeof(*expunge_count_r)) { + i_error("pread(%s) read only %d of %d bytes", log->path, + (int)ret, (int)sizeof(*expunge_count_r)); + return -1; + } + return 0; +} + +struct fts_expunge_log_append_ctx * +fts_expunge_log_append_begin(struct fts_expunge_log *log) +{ + struct fts_expunge_log_append_ctx *ctx; + pool_t pool; + + pool = pool_alloconly_create("fts expunge log append", 1024); + ctx = p_new(pool, struct fts_expunge_log_append_ctx, 1); + ctx->log = log; + ctx->pool = pool; + hash_table_create(&ctx->mailboxes, pool, 0, guid_128_hash, guid_128_cmp); + + if (log != NULL && fts_expunge_log_reopen_if_needed(log, TRUE) < 0) + ctx->failed = TRUE; + return ctx; +} + +static struct fts_expunge_log_mailbox * +fts_expunge_log_mailbox_alloc(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid) +{ + uint8_t *guid_p; + struct fts_expunge_log_mailbox *mailbox; + + mailbox = p_new(ctx->pool, struct fts_expunge_log_mailbox, 1); + guid_128_copy(mailbox->guid, mailbox_guid); + p_array_init(&mailbox->uids, ctx->pool, 16); + + guid_p = mailbox->guid; + hash_table_insert(ctx->mailboxes, guid_p, mailbox); + return mailbox; +} + +static struct fts_expunge_log_mailbox * +fts_expunge_log_append_mailbox(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid) +{ + const uint8_t *guid_p = mailbox_guid; + struct fts_expunge_log_mailbox *mailbox; + + if (ctx->prev_mailbox != NULL && + guid_128_equals(mailbox_guid, ctx->prev_mailbox->guid)) + mailbox = ctx->prev_mailbox; + else { + mailbox = hash_table_lookup(ctx->mailboxes, guid_p); + if (mailbox == NULL) + mailbox = fts_expunge_log_mailbox_alloc(ctx, mailbox_guid); + ctx->prev_mailbox = mailbox; + } + return mailbox; +} +void fts_expunge_log_append_next(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + uint32_t uid) +{ + struct fts_expunge_log_mailbox *mailbox; + + mailbox = fts_expunge_log_append_mailbox(ctx, mailbox_guid); + if (!seq_range_array_add(&mailbox->uids, uid)) + mailbox->uids_count++; +} +void fts_expunge_log_append_range(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + const struct seq_range *uids) +{ + struct fts_expunge_log_mailbox *mailbox; + + mailbox = fts_expunge_log_append_mailbox(ctx, mailbox_guid); + mailbox->uids_count += seq_range_array_add_range_count(&mailbox->uids, + uids->seq1, uids->seq2); + /* To be honest, an unbacked log doesn't need to maintain the uids_count, + but we don't know here if we're supporting an unbacked log or not, so we + have to maintain the value, just in case. + At the moment, the only caller of this function is for unbacked logs. */ +} +void fts_expunge_log_append_record(struct fts_expunge_log_append_ctx *ctx, + const struct fts_expunge_log_read_record *record) +{ + const struct seq_range *range; + /* FIXME: Optimise with a merge */ + array_foreach(&record->uids, range) + fts_expunge_log_append_range(ctx, record->mailbox_guid, range); +} +static void fts_expunge_log_append_mailbox_record(struct fts_expunge_log_append_ctx *ctx, + struct fts_expunge_log_mailbox *mailbox) +{ + const struct seq_range *range; + /* FIXME: Optimise with a merge */ + array_foreach(&mailbox->uids, range) + fts_expunge_log_append_range(ctx, mailbox->guid, range); +} + +static void +fts_expunge_log_export(struct fts_expunge_log_append_ctx *ctx, + uint32_t expunge_count, buffer_t *output) +{ + struct hash_iterate_context *iter; + uint8_t *guid_p; + struct fts_expunge_log_mailbox *mailbox; + struct fts_expunge_log_record *rec; + size_t rec_offset; + + iter = hash_table_iterate_init(ctx->mailboxes); + while (hash_table_iterate(iter, ctx->mailboxes, &guid_p, &mailbox)) { + rec_offset = output->used; + rec = buffer_append_space_unsafe(output, sizeof(*rec)); + memcpy(rec->guid, mailbox->guid, sizeof(rec->guid)); + + /* uint32_t expunge_uid_ranges[]; */ + buffer_append(output, array_front(&mailbox->uids), + array_count(&mailbox->uids) * + sizeof(struct seq_range)); + /* uint32_t expunge_count; */ + expunge_count += mailbox->uids_count; + buffer_append(output, &expunge_count, sizeof(expunge_count)); + + /* update the header now that we know the record contents */ + rec = buffer_get_space_unsafe(output, rec_offset, + output->used - rec_offset); + rec->record_size = output->used - rec_offset; + rec->checksum = crc32_data(&rec->record_size, + rec->record_size - + sizeof(rec->checksum)); + } + hash_table_iterate_deinit(&iter); +} + +static int +fts_expunge_log_write(struct fts_expunge_log_append_ctx *ctx) +{ + struct fts_expunge_log *log = ctx->log; + buffer_t *buf; + uint32_t expunge_count, *e; + int ret; + + /* Unbacked expunge logs cannot be written, by definition */ + i_assert(log != NULL); + + /* try to append to the latest file */ + if (fts_expunge_log_reopen_if_needed(log, TRUE) < 0) + return -1; + + if (fts_expunge_log_read_expunge_count(log, &expunge_count) < 0) + return -1; + + buf = buffer_create_dynamic(default_pool, 1024); + fts_expunge_log_export(ctx, expunge_count, buf); + /* the file was opened with O_APPEND, so this write() should be + appended atomically without any need for locking. */ + for (;;) { + if (write_full(log->fd, buf->data, buf->used) < 0) { + i_error("write(%s) failed: %m", log->path); + if (ftruncate(log->fd, log->st.st_size) < 0) + i_error("ftruncate(%s) failed: %m", log->path); + } + if ((ret = fts_expunge_log_reopen_if_needed(log, TRUE)) <= 0) + break; + /* the log was unlinked, so we'll need to write again to + the new file. the expunge_count needs to be reset to zero + from here. */ + e = buffer_get_space_unsafe(buf, buf->used - sizeof(uint32_t), + sizeof(uint32_t)); + i_assert(*e > expunge_count); + *e -= expunge_count; + expunge_count = 0; + } + buffer_free(&buf); + + if (ret == 0) { + /* finish by closing the log. this forces NFS to flush the + changes to disk without our having to explicitly play with + fsync() */ + if (close(log->fd) < 0) { + /* FIXME: we should ftruncate() in case there + were partial writes.. */ + i_error("close(%s) failed: %m", log->path); + ret = -1; + } + log->fd = -1; + } + return ret; +} + +static int fts_expunge_log_append_finalize(struct fts_expunge_log_append_ctx **_ctx, + bool commit) +{ + struct fts_expunge_log_append_ctx *ctx = *_ctx; + int ret = ctx->failed ? -1 : 0; + + *_ctx = NULL; + if (commit && ret == 0) + ret = fts_expunge_log_write(ctx); + + hash_table_destroy(&ctx->mailboxes); + pool_unref(&ctx->pool); + return ret; +} + +int fts_expunge_log_uid_count(struct fts_expunge_log *log, + unsigned int *expunges_r) +{ + int ret; + + if ((ret = fts_expunge_log_reopen_if_needed(log, FALSE)) <= 0) { + *expunges_r = 0; + return ret; + } + + return fts_expunge_log_read_expunge_count(log, expunges_r); +} + +int fts_expunge_log_append_commit(struct fts_expunge_log_append_ctx **_ctx) +{ + return fts_expunge_log_append_finalize(_ctx, TRUE); +} + +int fts_expunge_log_append_abort(struct fts_expunge_log_append_ctx **_ctx) +{ + return fts_expunge_log_append_finalize(_ctx, FALSE); +} + +struct fts_expunge_log_read_ctx * +fts_expunge_log_read_begin(struct fts_expunge_log *log) +{ + struct fts_expunge_log_read_ctx *ctx; + + ctx = i_new(struct fts_expunge_log_read_ctx, 1); + ctx->log = log; + if (fts_expunge_log_reopen_if_needed(log, FALSE) < 0) + ctx->failed = TRUE; + else if (log->fd != -1) + ctx->input = i_stream_create_fd(log->fd, SIZE_MAX); + ctx->unlink = TRUE; + return ctx; +} + +static bool +fts_expunge_log_record_size_is_valid(const struct fts_expunge_log_record *rec, + unsigned int *uids_size_r) +{ + if (rec->record_size < sizeof(*rec) + sizeof(uint32_t)*3) + return FALSE; + *uids_size_r = rec->record_size - sizeof(*rec) - sizeof(uint32_t); + return *uids_size_r % sizeof(uint32_t)*2 == 0; +} + +static void +fts_expunge_log_read_failure(struct fts_expunge_log_read_ctx *ctx, + unsigned int wanted_size) +{ + size_t size; + + if (ctx->input->stream_errno != 0) { + ctx->failed = TRUE; + i_error("read(%s) failed: %s", ctx->log->path, + i_stream_get_error(ctx->input)); + } else { + size = i_stream_get_data_size(ctx->input); + ctx->corrupted = TRUE; + i_error("Corrupted fts expunge log %s: " + "Unexpected EOF (read %zu / %u bytes)", + ctx->log->path, size, wanted_size); + } +} + +const struct fts_expunge_log_read_record * +fts_expunge_log_read_next(struct fts_expunge_log_read_ctx *ctx) +{ + const unsigned char *data; + const struct fts_expunge_log_record *rec; + unsigned int uids_size; + size_t size; + uint32_t checksum; + + if (ctx->input == NULL) + return NULL; + + /* initial read to try to get the record */ + (void)i_stream_read_bytes(ctx->input, &data, &size, IO_BLOCK_SIZE); + if (size == 0 && ctx->input->stream_errno == 0) { + /* expected EOF - mark the file as read by unlinking it */ + if (ctx->unlink) + i_unlink_if_exists(ctx->log->path); + + /* try reading again, in case something new was written */ + i_stream_sync(ctx->input); + (void)i_stream_read_bytes(ctx->input, &data, &size, + IO_BLOCK_SIZE); + } + if (size < sizeof(*rec)) { + if (size == 0 && ctx->input->stream_errno == 0) { + /* expected EOF */ + return NULL; + } + fts_expunge_log_read_failure(ctx, sizeof(*rec)); + return NULL; + } + rec = (const void *)data; + + if (!fts_expunge_log_record_size_is_valid(rec, &uids_size)) { + ctx->corrupted = TRUE; + i_error("Corrupted fts expunge log %s: " + "Invalid record size: %u", + ctx->log->path, rec->record_size); + return NULL; + } + + /* read the entire record */ + while (size < rec->record_size) { + if (i_stream_read_bytes(ctx->input, &data, &size, rec->record_size) < 0) { + fts_expunge_log_read_failure(ctx, rec->record_size); + return NULL; + } + rec = (const void *)data; + } + + /* verify that the record checksum is valid */ + checksum = crc32_data(&rec->record_size, + rec->record_size - sizeof(rec->checksum)); + if (checksum != rec->checksum) { + ctx->corrupted = TRUE; + i_error("Corrupted fts expunge log %s: " + "Record checksum mismatch: %u != %u", + ctx->log->path, checksum, rec->checksum); + return NULL; + } + + memcpy(ctx->read_rec.mailbox_guid, rec->guid, + sizeof(ctx->read_rec.mailbox_guid)); + /* create the UIDs array by pointing it directly into input + stream's buffer */ + buffer_create_from_const_data(&ctx->buffer, rec + 1, uids_size); + array_create_from_buffer(&ctx->read_rec.uids, &ctx->buffer, + sizeof(struct seq_range)); + + i_stream_skip(ctx->input, rec->record_size); + return &ctx->read_rec; +} + +int fts_expunge_log_read_end(struct fts_expunge_log_read_ctx **_ctx) +{ + struct fts_expunge_log_read_ctx *ctx = *_ctx; + int ret = ctx->failed ? -1 : (ctx->corrupted ? 0 : 1); + + *_ctx = NULL; + + if (ctx->corrupted) { + if (ctx->unlink) + i_unlink_if_exists(ctx->log->path); + } + + i_stream_unref(&ctx->input); + i_free(ctx); + return ret; +} + +int fts_expunge_log_flatten(const char *path, + struct fts_expunge_log_append_ctx **flattened_r) +{ + struct fts_expunge_log *read; + struct fts_expunge_log_read_ctx *read_ctx; + const struct fts_expunge_log_read_record *record; + struct fts_expunge_log_append_ctx *append; + int ret; + + i_assert(path != NULL && flattened_r != NULL); + read = fts_expunge_log_init(path); + + read_ctx = fts_expunge_log_read_begin(read); + read_ctx->unlink = FALSE; + + append = fts_expunge_log_append_begin(NULL); + while((record = fts_expunge_log_read_next(read_ctx)) != NULL) { + fts_expunge_log_append_record(append, record); + } + + if ((ret = fts_expunge_log_read_end(&read_ctx)) > 0) + *flattened_r = append; + fts_expunge_log_deinit(&read); + + return ret; +} +bool fts_expunge_log_contains(const struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, uint32_t uid) +{ + const struct fts_expunge_log_mailbox *mailbox; + const uint8_t *guid_p = mailbox_guid; + + mailbox = hash_table_lookup(ctx->mailboxes, guid_p); + if (mailbox == NULL) + return FALSE; + return seq_range_exists(&mailbox->uids, uid); +} +int fts_expunge_log_append_remove(struct fts_expunge_log_append_ctx *from, + const struct fts_expunge_log_read_record *record) +{ + const uint8_t *guid_p = record->mailbox_guid; + struct fts_expunge_log_mailbox *mailbox = hash_table_lookup(from->mailboxes, guid_p); + if (mailbox == NULL) + return 0; /* may only remove things that exist */ + + mailbox->uids_count -= seq_range_array_remove_seq_range(&mailbox->uids, &record->uids); + return 1; +} +int fts_expunge_log_subtract(struct fts_expunge_log_append_ctx *from, + struct fts_expunge_log *subtract) +{ + unsigned int failures = 0; + struct fts_expunge_log_read_ctx *read_ctx = fts_expunge_log_read_begin(subtract); + read_ctx->unlink = FALSE; + + const struct fts_expunge_log_read_record *record; + while ((record = fts_expunge_log_read_next(read_ctx)) != NULL) { + if (fts_expunge_log_append_remove(from, record) <= 0) + failures++; + } + if (failures > 0) + i_warning("fts: Expunge log subtract ignored %u nonexistent mailbox GUIDs", + failures); + return fts_expunge_log_read_end(&read_ctx); +} +/* It could be argued that somehow adding a log (file) to the append context + and then calling the _write() helper would be easier. But then there's the + _commit() vs. _abort() cleanup that would need to be addressed. Just creating + a copy is simpler. */ +int fts_expunge_log_flat_write(const struct fts_expunge_log_append_ctx *read_log, + const char *path) +{ + int ret; + struct fts_expunge_log *nlog = fts_expunge_log_init(path); + struct fts_expunge_log_append_ctx *nappend = fts_expunge_log_append_begin(nlog); + + struct hash_iterate_context *iter; + uint8_t *guid_p; + struct fts_expunge_log_mailbox *mailbox; + + iter = hash_table_iterate_init(read_log->mailboxes); + while (hash_table_iterate(iter, read_log->mailboxes, &guid_p, &mailbox)) + fts_expunge_log_append_mailbox_record(nappend, mailbox); + + hash_table_iterate_deinit(&iter); + ret = fts_expunge_log_append_commit(&nappend); + fts_expunge_log_deinit(&nlog); + + return ret; +} diff --git a/src/plugins/fts/fts-expunge-log.h b/src/plugins/fts/fts-expunge-log.h new file mode 100644 index 0000000..cc15f29 --- /dev/null +++ b/src/plugins/fts/fts-expunge-log.h @@ -0,0 +1,58 @@ +#ifndef FTS_EXPUNGE_LOG +#define FTS_EXPUNGE_LOG + +#include "seq-range-array.h" +#include "guid.h" + +struct fts_expunge_log_read_record { + guid_128_t mailbox_guid; + ARRAY_TYPE(seq_range) uids; +}; + +struct fts_expunge_log *fts_expunge_log_init(const char *path); +void fts_expunge_log_deinit(struct fts_expunge_log **log); + +struct fts_expunge_log_append_ctx * +fts_expunge_log_append_begin(struct fts_expunge_log *log); +void fts_expunge_log_append_next(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + uint32_t uid); +void fts_expunge_log_append_range(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + const struct seq_range *uids); +void fts_expunge_log_append_record(struct fts_expunge_log_append_ctx *ctx, + const struct fts_expunge_log_read_record *record); +/* In-memory flattened structures may have records removed from them, + file-backed ones may not. Non-existence of UIDs is not an error, + non-existence of mailbox GUID causes an error return of 0. */ +int fts_expunge_log_append_remove(struct fts_expunge_log_append_ctx *ctx, + const struct fts_expunge_log_read_record *record); +int fts_expunge_log_append_commit(struct fts_expunge_log_append_ctx **ctx); +/* Do not commit non-backed structures, abort them after use. */ +int fts_expunge_log_append_abort(struct fts_expunge_log_append_ctx **ctx); + +int fts_expunge_log_uid_count(struct fts_expunge_log *log, + unsigned int *expunges_r); + +struct fts_expunge_log_read_ctx * +fts_expunge_log_read_begin(struct fts_expunge_log *log); +const struct fts_expunge_log_read_record * +fts_expunge_log_read_next(struct fts_expunge_log_read_ctx *ctx); +/* Returns 1 if all ok, 0 if there was corruption, -1 if I/O error. + If end() is called before reading all records, the log isn't unlinked. */ +int fts_expunge_log_read_end(struct fts_expunge_log_read_ctx **ctx); + +/* Read an entire log file, and flatten it into one hash of arrays. + The struct it returns cannot be written, as it has no backing store */ +int fts_expunge_log_flatten(const char *path, + struct fts_expunge_log_append_ctx **flattened_r); +bool fts_expunge_log_contains(const struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, uint32_t uid); +/* Modify in-place a flattened log. If non-existent mailbox GUIDs are + encountered, a warning will be logged. */ +int fts_expunge_log_subtract(struct fts_expunge_log_append_ctx *from, + struct fts_expunge_log *subtract); +/* Write a modified flattened log as a new file. */ +int fts_expunge_log_flat_write(const struct fts_expunge_log_append_ctx *flattened, + const char *path); +#endif diff --git a/src/plugins/fts/fts-indexer.c b/src/plugins/fts/fts-indexer.c new file mode 100644 index 0000000..aca23c9 --- /dev/null +++ b/src/plugins/fts/fts-indexer.c @@ -0,0 +1,300 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "connection.h" +#include "write-full.h" +#include "istream.h" +#include "ostream.h" +#include "strescape.h" +#include "time-util.h" +#include "settings-parser.h" +#include "mail-user.h" +#include "mail-storage-private.h" +#include "fts-api.h" +#include "fts-indexer.h" + +#define INDEXER_NOTIFY_INTERVAL_SECS 10 +#define INDEXER_SOCKET_NAME "indexer" +#define INDEXER_WAIT_MSECS 250 + +struct fts_indexer_context { + struct connection conn; + + struct mailbox *box; + struct ioloop *ioloop; + + struct timeval search_start_time, last_notify; + unsigned int percentage; + struct connection_list *connection_list; + + bool notified:1; + bool failed:1; + bool completed:1; +}; + +static void fts_indexer_notify(struct fts_indexer_context *ctx) +{ + unsigned long long elapsed_msecs, est_total_msecs; + unsigned int eta_secs; + + if (ioloop_time - ctx->last_notify.tv_sec < INDEXER_NOTIFY_INTERVAL_SECS) + return; + ctx->last_notify = ioloop_timeval; + + if (ctx->box->storage->callbacks.notify_ok == NULL || + ctx->percentage == 0) + return; + + elapsed_msecs = timeval_diff_msecs(&ioloop_timeval, + &ctx->search_start_time); + est_total_msecs = elapsed_msecs * 100 / ctx->percentage; + eta_secs = (est_total_msecs - elapsed_msecs) / 1000; + + T_BEGIN { + const char *text; + + text = t_strdup_printf("Indexed %d%% of the mailbox, " + "ETA %d:%02d", ctx->percentage, + eta_secs/60, eta_secs%60); + ctx->box->storage->callbacks. + notify_ok(ctx->box, text, + ctx->box->storage->callback_context); + ctx->notified = TRUE; + } T_END; +} + +static int fts_indexer_more_int(struct fts_indexer_context *ctx) +{ + struct ioloop *prev_ioloop = current_ioloop; + struct timeout *to; + + if (ctx->failed) + return -1; + if (ctx->completed) + return 1; + + /* wait for a while for the reply. FIXME: once search API supports + asynchronous waits, get rid of this wait and use the mail IO loop */ + io_loop_set_current(ctx->ioloop); + to = timeout_add_short(INDEXER_WAIT_MSECS, io_loop_stop, ctx->ioloop); + io_loop_run(ctx->ioloop); + timeout_remove(&to); + io_loop_set_current(prev_ioloop); + + if (ctx->failed) + return -1; + if (ctx->completed) + return 1; + return 0; +} + +int fts_indexer_more(struct fts_indexer_context *ctx) +{ + int ret; + + if ((ret = fts_indexer_more_int(ctx)) < 0) { + /* If failed is already set, the code has had a chance to + * set an internal error already, i.e. MAIL_ERROR_INUSE. */ + if (!ctx->failed) + mail_storage_set_internal_error(ctx->box->storage); + ctx->failed = TRUE; + return -1; + } + + if (ret == 0) + fts_indexer_notify(ctx); + + return ret; +} + +static void fts_indexer_destroy(struct connection *conn) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + connection_deinit(conn); + if (!ctx->completed) + ctx->failed = TRUE; + ctx->completed = TRUE; +} + +int fts_indexer_deinit(struct fts_indexer_context **_ctx) +{ + struct fts_indexer_context *ctx = *_ctx; + i_assert(ctx != NULL); + *_ctx = NULL; + if (!ctx->completed) + ctx->failed = TRUE; + int ret = ctx->failed ? -1 : 0; + if (ctx->notified) { + /* we notified at least once */ + ctx->box->storage->callbacks. + notify_ok(ctx->box, "Mailbox indexing finished", + ctx->box->storage->callback_context); + } + connection_list_deinit(&ctx->connection_list); + io_loop_set_current(ctx->ioloop); + io_loop_destroy(&ctx->ioloop); + i_free(ctx); + return ret; +} + +static int +fts_indexer_input_args(struct connection *conn, const char *const *args) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + int percentage; + if (args[1] == NULL) { + e_error(conn->event, "indexer sent invalid reply"); + return -1; + } + if (strcmp(args[0], "1") != 0) { + e_error(conn->event, "indexer sent invalid reply"); + return -1; + } + if (strcmp(args[1], "OK") == 0) + return 1; + if (str_to_int(args[1], &percentage) < 0) { + e_error(conn->event, "indexer sent invalid progress: %s", args[1]); + ctx->failed = TRUE; + return -1; + } + if (percentage < 0) { + e_error(ctx->box->event, "indexer failed to index mailbox"); + ctx->failed = TRUE; + return -1; + } + ctx->percentage = percentage; + if (ctx->percentage == 100) + ctx->completed = TRUE; + return 1; +} + +static void fts_indexer_client_connected(struct connection *conn, bool success) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + if (!success) { + ctx->completed = TRUE; + ctx->failed = TRUE; + return; + } + ctx->failed = ctx->completed = FALSE; + const char *cmd = t_strdup_printf("PREPEND\t1\t%s\t%s\t0\t%s\n", + str_tabescape(ctx->box->storage->user->username), + str_tabescape(ctx->box->vname), + str_tabescape(ctx->box->storage->user->session_id)); + o_stream_nsend_str(conn->output, cmd); +} + +static void fts_indexer_idle_timeout(struct connection *conn) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + mail_storage_set_error(ctx->box->storage, MAIL_ERROR_INUSE, + "Timeout while waiting for indexing to finish"); + ctx->failed = TRUE; + connection_disconnect(conn); +} + +static const struct connection_settings indexer_client_set = +{ + .service_name_in = "indexer", + .service_name_out = "indexer", + .major_version = 1, + .minor_version = 0, + .client_connect_timeout_msecs = 2000, + .input_max_size = SIZE_MAX, + .output_max_size = IO_BLOCK_SIZE, + .client = TRUE, +}; + +static const struct connection_vfuncs indexer_client_vfuncs = +{ + .destroy = fts_indexer_destroy, + .client_connected = fts_indexer_client_connected, + .input_args = fts_indexer_input_args, + .idle_timeout = fts_indexer_idle_timeout, +}; + +int fts_indexer_init(struct fts_backend *backend, struct mailbox *box, + struct fts_indexer_context **ctx_r) +{ + struct ioloop *prev_ioloop = current_ioloop; + struct fts_indexer_context *ctx; + struct mailbox_status status; + uint32_t last_uid, seq1, seq2; + const char *path, *value, *error; + unsigned int timeout_secs = 0; + int ret; + + value = mail_user_plugin_getenv(box->storage->user, "fts_index_timeout"); + if (value != NULL) { + if (settings_get_time(value, &timeout_secs, &error) < 0) { + e_error(box->storage->user->event, + "Invalid fts_index_timeout setting: %s", + error); + return -1; + } + } + + if (fts_backend_get_last_uid(backend, box, &last_uid) < 0) + return -1; + + mailbox_get_open_status(box, STATUS_UIDNEXT, &status); + if (status.uidnext == last_uid+1) { + /* everything is already indexed */ + return 0; + } + + mailbox_get_seq_range(box, last_uid+1, (uint32_t)-1, &seq1, &seq2); + if (seq1 == 0) { + /* no new messages (last messages in mailbox were expunged) */ + return 0; + } + + path = t_strconcat(box->storage->user->set->base_dir, + "/"INDEXER_SOCKET_NAME, NULL); + + ctx = i_new(struct fts_indexer_context, 1); + ctx->box = box; + ctx->search_start_time = ioloop_timeval; + ctx->conn.event_parent = box->event; + ctx->ioloop = io_loop_create(); + ctx->connection_list = connection_list_init(&indexer_client_set, + &indexer_client_vfuncs); + ctx->conn.input_idle_timeout_secs = timeout_secs; + connection_init_client_unix(ctx->connection_list, &ctx->conn, + path); + ret = connection_client_connect(&ctx->conn); + io_loop_set_current(prev_ioloop); + *ctx_r = ctx; + return ctx->failed || ret < 0 ? -1 : 1; +} + +#define INDEXER_HANDSHAKE "1\t0\tindexer\tindexer\n" + +int fts_indexer_cmd(struct mail_user *user, const char *cmd, + const char **path_r) +{ + const char *path; + int fd; + + path = t_strconcat(user->set->base_dir, + "/"INDEXER_SOCKET_NAME, NULL); + fd = net_connect_unix_with_retries(path, 1000); + if (fd == -1) { + i_error("net_connect_unix(%s) failed: %m", path); + return -1; + } + + cmd = t_strconcat(INDEXER_HANDSHAKE, cmd, NULL); + if (write_full(fd, cmd, strlen(cmd)) < 0) { + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); + return -1; + } + *path_r = path; + return fd; +} diff --git a/src/plugins/fts/fts-indexer.h b/src/plugins/fts/fts-indexer.h new file mode 100644 index 0000000..7ccbc7e --- /dev/null +++ b/src/plugins/fts/fts-indexer.h @@ -0,0 +1,22 @@ +#ifndef FTS_BUILD_H +#define FTS_BUILD_H + +struct fts_backend; +struct fts_indexer_context; + +/* Initialize indexing the given mailbox via indexer service. Returns 1 if + indexing started, 0 if there was no need to index or -1 if error. */ +int fts_indexer_init(struct fts_backend *backend, struct mailbox *box, + struct fts_indexer_context **ctx_r); +/* Returns 0 if ok, -1 if error. */ +int fts_indexer_deinit(struct fts_indexer_context **ctx); + +/* Build more. Returns 1 if finished, 0 if this function needs to be called + again, -1 if error. */ +int fts_indexer_more(struct fts_indexer_context *ctx); + +/* Returns fd, which you can either read from or close. */ +int fts_indexer_cmd(struct mail_user *user, const char *cmd, + const char **path_r); + +#endif diff --git a/src/plugins/fts/fts-parser-html.c b/src/plugins/fts/fts-parser-html.c new file mode 100644 index 0000000..aa2078d --- /dev/null +++ b/src/plugins/fts/fts-parser-html.c @@ -0,0 +1,64 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "message-parser.h" +#include "mail-html2text.h" +#include "fts-parser.h" + +struct html_fts_parser { + struct fts_parser parser; + struct mail_html2text *html2text; + buffer_t *output; +}; + +static struct fts_parser * +fts_parser_html_try_init(struct fts_parser_context *parser_context) +{ + struct html_fts_parser *parser; + + if (!mail_html2text_content_type_match(parser_context->content_type)) + return NULL; + + parser = i_new(struct html_fts_parser, 1); + parser->parser.v = fts_parser_html; + parser->html2text = mail_html2text_init(0); + parser->output = buffer_create_dynamic(default_pool, 4096); + return &parser->parser; +} + +static void fts_parser_html_more(struct fts_parser *_parser, + struct message_block *block) +{ + struct html_fts_parser *parser = (struct html_fts_parser *)_parser; + + if (block->size == 0) { + /* finished */ + return; + } + + buffer_set_used_size(parser->output, 0); + mail_html2text_more(parser->html2text, block->data, block->size, + parser->output); + + block->data = parser->output->data; + block->size = parser->output->used; +} + +static int fts_parser_html_deinit(struct fts_parser *_parser, + const char **retriable_err_msg_r ATTR_UNUSED) +{ + struct html_fts_parser *parser = (struct html_fts_parser *)_parser; + + mail_html2text_deinit(&parser->html2text); + buffer_free(&parser->output); + i_free(parser); + return 1; +} + +struct fts_parser_vfuncs fts_parser_html = { + fts_parser_html_try_init, + fts_parser_html_more, + fts_parser_html_deinit, + NULL +}; diff --git a/src/plugins/fts/fts-parser-script.c b/src/plugins/fts/fts-parser-script.c new file mode 100644 index 0000000..eefbe07 --- /dev/null +++ b/src/plugins/fts/fts-parser-script.c @@ -0,0 +1,277 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "net.h" +#include "istream.h" +#include "write-full.h" +#include "module-context.h" +#include "rfc822-parser.h" +#include "rfc2231-parser.h" +#include "message-parser.h" +#include "mail-user.h" +#include "fts-parser.h" + +#define SCRIPT_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_parser_script_user_module) + +#define SCRIPT_HANDSHAKE "VERSION\tscript\t4\t0\nalarm=10\nnoreply\n" + +struct content { + const char *content_type; + const char *const *extensions; +}; + +struct fts_parser_script_user { + union mail_user_module_context module_ctx; + + ARRAY(struct content) content; +}; + +struct script_fts_parser { + struct fts_parser parser; + + int fd; + char *path; + + unsigned char outbuf[IO_BLOCK_SIZE]; + bool failed; + bool shutdown; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_parser_script_user_module, + &mail_user_module_register); + +static int script_connect(struct mail_user *user, const char **path_r) +{ + const char *path; + int fd; + + path = mail_user_plugin_getenv(user, "fts_decoder"); + if (path == NULL) + return -1; + + if (*path != '/') + path = t_strconcat(user->set->base_dir, "/", path, NULL); + fd = net_connect_unix_with_retries(path, 1000); + if (fd == -1) + i_error("net_connect_unix(%s) failed: %m", path); + else + net_set_nonblock(fd, FALSE); + *path_r = path; + return fd; +} + +static int script_contents_read(struct mail_user *user) +{ + struct fts_parser_script_user *suser = SCRIPT_USER_CONTEXT(user); + const char *path, *cmd, *line; + char **args; + struct istream *input; + struct content *content; + bool eof_seen = FALSE; + int fd, ret = 0; + i_assert(suser != NULL); + + fd = script_connect(user, &path); + if (fd == -1) + return -1; + + cmd = t_strdup_printf(SCRIPT_HANDSHAKE"\n"); + if (write_full(fd, cmd, strlen(cmd)) < 0) { + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); + return -1; + } + input = i_stream_create_fd_autoclose(&fd, 1024); + while ((line = i_stream_read_next_line(input)) != NULL) { + /* <content-type> <extension> [<extension> ...] */ + args = p_strsplit_spaces(user->pool, line, " "); + if (args[0] == NULL) { + eof_seen = TRUE; + break; + } + if (args[0][0] == '\0' || args[1] == NULL) { + i_error("parser script sent invalid input: %s", line); + continue; + } + + content = array_append_space(&suser->content); + content->content_type = str_lcase(args[0]); + content->extensions = (const void *)(args+1); + } + if (input->stream_errno != 0) { + i_error("parser script read(%s) failed: %s", path, + i_stream_get_error(input)); + ret = -1; + } else if (!eof_seen) { + if (input->v_offset == 0) + i_error("parser script didn't send any data"); + else + i_error("parser script didn't send empty EOF line"); + } + i_stream_destroy(&input); + return ret; +} + +static bool script_support_content(struct mail_user *user, + const char **content_type, + const char *filename) +{ + struct fts_parser_script_user *suser = SCRIPT_USER_CONTEXT(user); + const struct content *content; + const char *extension; + + if (suser == NULL) { + suser = p_new(user->pool, struct fts_parser_script_user, 1); + p_array_init(&suser->content, user->pool, 32); + MODULE_CONTEXT_SET(user, fts_parser_script_user_module, suser); + } + if (array_count(&suser->content) == 0) { + if (script_contents_read(user) < 0) + return FALSE; + } + + if (strcmp(*content_type, "application/octet-stream") == 0) { + if (filename == NULL) + return FALSE; + extension = strrchr(filename, '.'); + if (extension == NULL) + return FALSE; + extension = extension + 1; + + array_foreach(&suser->content, content) { + if (content->extensions != NULL && + str_array_icase_find(content->extensions, extension)) { + *content_type = content->content_type; + return TRUE; + } + } + } else { + array_foreach(&suser->content, content) { + if (strcmp(content->content_type, *content_type) == 0) + return TRUE; + } + } + return FALSE; +} + +static void parse_content_disposition(const char *content_disposition, + const char **filename_r) +{ + struct rfc822_parser_context parser; + const char *const *results, *filename2; + string_t *str; + + *filename_r = NULL; + + if (content_disposition == NULL) + return; + + rfc822_parser_init(&parser, (const unsigned char *)content_disposition, + strlen(content_disposition), NULL); + rfc822_skip_lwsp(&parser); + + /* type; param; param; .. */ + str = t_str_new(32); + if (rfc822_parse_mime_token(&parser, str) < 0) { + rfc822_parser_deinit(&parser); + return; + } + + rfc2231_parse(&parser, &results); + filename2 = NULL; + for (; *results != NULL; results += 2) { + if (strcasecmp(results[0], "filename") == 0) { + *filename_r = results[1]; + break; + } + if (strcasecmp(results[0], "filename*") == 0) + filename2 = results[1]; + } + if (*filename_r == NULL) { + /* RFC 2231 style non-ascii filename. we don't really care + much about the filename actually, just about its extension */ + *filename_r = filename2; + } + rfc822_parser_deinit(&parser); +} + +static struct fts_parser * +fts_parser_script_try_init(struct fts_parser_context *parser_context) +{ + struct script_fts_parser *parser; + const char *filename, *path, *cmd; + int fd; + + parse_content_disposition(parser_context->content_disposition, &filename); + if (!script_support_content(parser_context->user, &parser_context->content_type, filename)) + return NULL; + + fd = script_connect(parser_context->user, &path); + if (fd == -1) + return NULL; + cmd = t_strdup_printf(SCRIPT_HANDSHAKE"%s\n\n", parser_context->content_type); + if (write_full(fd, cmd, strlen(cmd)) < 0) { + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); + return NULL; + } + + parser = i_new(struct script_fts_parser, 1); + parser->parser.v = fts_parser_script; + parser->path = i_strdup(path); + parser->fd = fd; + return &parser->parser; +} + +static void fts_parser_script_more(struct fts_parser *_parser, + struct message_block *block) +{ + struct script_fts_parser *parser = (struct script_fts_parser *)_parser; + ssize_t ret; + + if (block->size > 0) { + /* first we'll send everything to the script */ + if (!parser->failed && + write_full(parser->fd, block->data, block->size) < 0) { + i_error("write(%s) failed: %m", parser->path); + parser->failed = TRUE; + } + block->size = 0; + } else { + if (!parser->shutdown) { + if (shutdown(parser->fd, SHUT_WR) < 0) + i_error("shutdown(%s) failed: %m", parser->path); + parser->shutdown = TRUE; + } + /* read the result from the script */ + ret = read(parser->fd, parser->outbuf, sizeof(parser->outbuf)); + if (ret < 0) + i_error("read(%s) failed: %m", parser->path); + else { + block->data = parser->outbuf; + block->size = ret; + } + } +} + +static int fts_parser_script_deinit(struct fts_parser *_parser, + const char **retriable_err_msg_r ATTR_UNUSED) +{ + struct script_fts_parser *parser = (struct script_fts_parser *)_parser; + int ret = parser->failed ? -1 : 1; + + if (close(parser->fd) < 0) + i_error("close(%s) failed: %m", parser->path); + i_free(parser->path); + i_free(parser); + return ret; +} + +struct fts_parser_vfuncs fts_parser_script = { + fts_parser_script_try_init, + fts_parser_script_more, + fts_parser_script_deinit, + NULL +}; diff --git a/src/plugins/fts/fts-parser-tika.c b/src/plugins/fts/fts-parser-tika.c new file mode 100644 index 0000000..bb6379c --- /dev/null +++ b/src/plugins/fts/fts-parser-tika.c @@ -0,0 +1,278 @@ +/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "istream.h" +#include "module-context.h" +#include "iostream-ssl.h" +#include "http-url.h" +#include "http-client.h" +#include "message-parser.h" +#include "mail-user.h" +#include "fts-parser.h" + +#define TIKA_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_parser_tika_user_module) + +struct fts_parser_tika_user { + union mail_user_module_context module_ctx; + struct http_url *http_url; +}; + +struct tika_fts_parser { + struct fts_parser parser; + struct mail_user *user; + struct http_client_request *http_req; + + struct ioloop *ioloop; + struct io *io; + struct istream *payload; + + bool failed; +}; + +static struct http_client *tika_http_client = NULL; +static MODULE_CONTEXT_DEFINE_INIT(fts_parser_tika_user_module, + &mail_user_module_register); + +static int +tika_get_http_client_url(struct mail_user *user, struct http_url **http_url_r) +{ + struct fts_parser_tika_user *tuser = TIKA_USER_CONTEXT(user); + struct http_client_settings http_set; + struct ssl_iostream_settings ssl_set; + const char *url, *error; + + url = mail_user_plugin_getenv(user, "fts_tika"); + if (url == NULL) { + /* fts_tika disabled */ + return -1; + } + + if (tuser != NULL) { + *http_url_r = tuser->http_url; + return *http_url_r == NULL ? -1 : 0; + } + + tuser = p_new(user->pool, struct fts_parser_tika_user, 1); + MODULE_CONTEXT_SET(user, fts_parser_tika_user_module, tuser); + + if (http_url_parse(url, NULL, 0, user->pool, + &tuser->http_url, &error) < 0) { + i_error("fts_tika: Failed to parse HTTP url %s: %s", url, error); + return -1; + } + + if (tika_http_client == NULL) { + mail_user_init_ssl_client_settings(user, &ssl_set); + + i_zero(&http_set); + http_set.max_idle_time_msecs = 100; + http_set.max_parallel_connections = 1; + http_set.max_pipelined_requests = 1; + http_set.max_redirects = 1; + http_set.max_attempts = 3; + http_set.connect_timeout_msecs = 5*1000; + http_set.request_timeout_msecs = 60*1000; + http_set.ssl = &ssl_set; + http_set.debug = user->mail_debug; + http_set.event_parent = user->event; + + /* FIXME: We should initialize a shared client instead. However, + this is currently not possible due to an obscure bug + in the blocking HTTP payload API, which causes + conflicts with other HTTP applications like FTS Solr. + Using a private client will provide a quick fix for + now. */ + tika_http_client = http_client_init_private(&http_set); + } + *http_url_r = tuser->http_url; + return 0; +} + +static void +fts_tika_parser_response(const struct http_response *response, + struct tika_fts_parser *parser) +{ + i_assert(parser->payload == NULL); + + switch (response->status) { + case 200: + /* read response */ + if (response->payload == NULL) + parser->payload = i_stream_create_from_data("", 0); + else { + i_stream_ref(response->payload); + parser->payload = response->payload; + } + break; + case 204: /* empty response */ + case 415: /* Unsupported Media Type */ + case 422: /* Unprocessable Entity */ + e_debug(parser->user->event, "fts_tika: PUT %s failed: %s", + mail_user_plugin_getenv(parser->user, "fts_tika"), + http_response_get_message(response)); + parser->payload = i_stream_create_from_data("", 0); + break; + default: + if (response->status / 100 == 5) { + /* Server Error - the problem could be anything (in Tika or + HTTP server or proxy) and might be retriable, but Tika has + trouble processing some documents and throws up this error + every time for those documents. */ + parser->parser.may_need_retry = TRUE; + i_free(parser->parser.retriable_error_msg); + parser->parser.retriable_error_msg = + i_strdup_printf("fts_tika: PUT %s failed: %s", + mail_user_plugin_getenv(parser->user, "fts_tika"), + http_response_get_message(response)); + parser->payload = i_stream_create_from_data("", 0); + } else { + i_error("fts_tika: PUT %s failed: %s", + mail_user_plugin_getenv(parser->user, "fts_tika"), + http_response_get_message(response)); + parser->failed = TRUE; + } + break; + } + parser->http_req = NULL; + io_loop_stop(current_ioloop); +} + +static struct fts_parser * +fts_parser_tika_try_init(struct fts_parser_context *parser_context) +{ + struct tika_fts_parser *parser; + struct http_url *http_url; + struct http_client_request *http_req; + + if (tika_get_http_client_url(parser_context->user, &http_url) < 0) + return NULL; + if (http_url->path == NULL) + http_url->path = "/"; + + parser = i_new(struct tika_fts_parser, 1); + parser->parser.v = fts_parser_tika; + parser->user = parser_context->user; + + http_req = http_client_request(tika_http_client, "PUT", + http_url->host.name, + t_strconcat(http_url->path, http_url->enc_query, NULL), + fts_tika_parser_response, parser); + http_client_request_set_port(http_req, http_url->port); + http_client_request_set_ssl(http_req, http_url->have_ssl); + if (parser_context->content_type != NULL) + http_client_request_add_header(http_req, "Content-Type", + parser_context->content_type); + if (parser_context->content_disposition != NULL) + http_client_request_add_header(http_req, "Content-Disposition", + parser_context->content_disposition); + http_client_request_add_header(http_req, "Accept", "text/plain"); + + parser->http_req = http_req; + return &parser->parser; +} + +static void fts_parser_tika_more(struct fts_parser *_parser, + struct message_block *block) +{ + struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser; + struct ioloop *prev_ioloop = current_ioloop; + const unsigned char *data; + size_t size; + ssize_t ret; + + if (block->size > 0) { + /* first we'll send everything to Tika */ + if (!parser->failed && + http_client_request_send_payload(&parser->http_req, + block->data, + block->size) < 0) + parser->failed = TRUE; + block->size = 0; + return; + } + + if (parser->payload == NULL) { + /* read the result from Tika */ + if (!parser->failed && + http_client_request_finish_payload(&parser->http_req) < 0) + parser->failed = TRUE; + if (!parser->failed && parser->payload == NULL) + http_client_wait(tika_http_client); + if (parser->failed) + return; + i_assert(parser->payload != NULL); + } + /* continue returning data from Tika. we'll create a new ioloop just + for reading this one payload. */ + while ((ret = i_stream_read_more(parser->payload, &data, &size)) == 0) { + if (parser->failed) + break; + /* wait for more input from Tika */ + if (parser->ioloop == NULL) { + parser->ioloop = io_loop_create(); + parser->io = io_add_istream(parser->payload, io_loop_stop, + current_ioloop); + } else { + io_loop_set_current(parser->ioloop); + } + io_loop_run(current_ioloop); + } + /* switch back to original ioloop. */ + io_loop_set_current(prev_ioloop); + + if (parser->failed) + ; + else if (size > 0) { + i_assert(ret > 0); + block->data = data; + block->size = size; + i_stream_skip(parser->payload, size); + } else { + /* finished */ + i_assert(ret == -1); + if (parser->payload->stream_errno != 0) { + i_error("read(%s) failed: %s", + i_stream_get_name(parser->payload), + i_stream_get_error(parser->payload)); + parser->failed = TRUE; + } + } +} + +static int fts_parser_tika_deinit(struct fts_parser *_parser, const char **retriable_err_msg_r) +{ + struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser; + int ret = _parser->may_need_retry ? 0: (parser->failed ? -1 : 1); + + i_assert(ret != 0 || _parser->retriable_error_msg != NULL); + if (retriable_err_msg_r != NULL) + *retriable_err_msg_r = t_strdup(_parser->retriable_error_msg); + i_free(_parser->retriable_error_msg); + + /* remove io before unrefing payload - otherwise lib-http adds another + timeout to ioloop unnecessarily */ + i_stream_unref(&parser->payload); + io_remove(&parser->io); + http_client_request_abort(&parser->http_req); + if (parser->ioloop != NULL) { + io_loop_set_current(parser->ioloop); + io_loop_destroy(&parser->ioloop); + } + i_free(parser); + return ret; +} + +static void fts_parser_tika_unload(void) +{ + if (tika_http_client != NULL) + http_client_deinit(&tika_http_client); +} + +struct fts_parser_vfuncs fts_parser_tika = { + fts_parser_tika_try_init, + fts_parser_tika_more, + fts_parser_tika_deinit, + fts_parser_tika_unload +}; diff --git a/src/plugins/fts/fts-parser.c b/src/plugins/fts/fts-parser.c new file mode 100644 index 0000000..c0eac80 --- /dev/null +++ b/src/plugins/fts/fts-parser.c @@ -0,0 +1,127 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "unichar.h" +#include "message-parser.h" +#include "fts-parser.h" + +static const struct fts_parser_vfuncs *parsers[] = { + &fts_parser_html, + &fts_parser_script, + &fts_parser_tika +}; + +static const char *plaintext_content_types[] = { + "text/plain", + "message/delivery-status", + "message/disposition-notification", + "application/pgp-signature", + NULL +}; + +bool fts_parser_init(struct fts_parser_context *parser_context, + struct fts_parser **parser_r) +{ + unsigned int i; + i_assert(parser_context->user != NULL); + i_assert(parser_context->content_type != NULL); + + if (str_array_find(plaintext_content_types, parser_context->content_type)) { + /* we probably don't want/need to allow parsers to handle + plaintext? */ + return FALSE; + } + + for (i = 0; i < N_ELEMENTS(parsers); i++) { + *parser_r = parsers[i]->try_init(parser_context); + if (*parser_r != NULL) + return TRUE; + } + return FALSE; +} + +struct fts_parser *fts_parser_text_init(void) +{ + return i_new(struct fts_parser, 1); +} + +static bool data_has_nuls(const unsigned char *data, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (data[i] == '\0') + return TRUE; + } + return FALSE; +} + +static void replace_nul_bytes(buffer_t *buf) +{ + unsigned char *data; + size_t i, size; + + data = buffer_get_modifiable_data(buf, &size); + for (i = 0; i < size; i++) { + if (data[i] == '\0') + data[i] = ' '; + } +} + +void fts_parser_more(struct fts_parser *parser, struct message_block *block) +{ + if (parser->v.more != NULL) + parser->v.more(parser, block); + + if (!uni_utf8_data_is_valid(block->data, block->size) || + data_has_nuls(block->data, block->size)) { + /* output isn't valid UTF-8. make it. */ + if (parser->utf8_output == NULL) { + parser->utf8_output = + buffer_create_dynamic(default_pool, 4096); + } else { + buffer_set_used_size(parser->utf8_output, 0); + } + if (uni_utf8_get_valid_data(block->data, block->size, + parser->utf8_output)) { + /* valid UTF-8, but there were NULs */ + buffer_append(parser->utf8_output, block->data, + block->size); + } + replace_nul_bytes(parser->utf8_output); + block->data = parser->utf8_output->data; + block->size = parser->utf8_output->used; + } +} + +int fts_parser_deinit(struct fts_parser **_parser, const char **retriable_err_msg_r) +{ + struct fts_parser *parser = *_parser; + int ret = 1; + + *_parser = NULL; + + buffer_free(&parser->utf8_output); + if (parser->v.deinit != NULL) { + const char *error = NULL; + ret = parser->v.deinit(parser, &error); + if (ret == 0) { + i_assert(error != NULL); + if (retriable_err_msg_r != NULL) + *retriable_err_msg_r = error; + } + } else + i_free(parser); + return ret; +} + +void fts_parsers_unload(void) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(parsers); i++) { + if (parsers[i]->unload != NULL) + parsers[i]->unload(); + } +} diff --git a/src/plugins/fts/fts-parser.h b/src/plugins/fts/fts-parser.h new file mode 100644 index 0000000..0eb716e --- /dev/null +++ b/src/plugins/fts/fts-parser.h @@ -0,0 +1,48 @@ +#ifndef FTS_PARSER_H +#define FTS_PARSER_H + +struct message_block; +struct mail_user; + +struct fts_parser_context { + /* Can't be NULL */ + struct mail_user *user; + /* Can't be NULL */ + const char *content_type; + const char *content_disposition; +}; + +struct fts_parser_vfuncs { + struct fts_parser *(*try_init)(struct fts_parser_context *parser_context); + void (*more)(struct fts_parser *parser, struct message_block *block); + int (*deinit)(struct fts_parser *parser, const char **retriable_err_msg_r); + void (*unload)(void); +}; + +struct fts_parser { + struct fts_parser_vfuncs v; + buffer_t *utf8_output; + bool may_need_retry; + char *retriable_error_msg; +}; + +extern struct fts_parser_vfuncs fts_parser_html; +extern struct fts_parser_vfuncs fts_parser_script; +extern struct fts_parser_vfuncs fts_parser_tika; + +bool fts_parser_init(struct fts_parser_context *parser_context, + struct fts_parser **parser_r); +struct fts_parser *fts_parser_text_init(void); + +/* The parser is initially called with message body blocks. Once message is + finished, it's still called with incoming size=0 while the parser increases + it to non-zero. */ +void fts_parser_more(struct fts_parser *parser, struct message_block *block); +/* Returns 1 if ok, 0 if the parsing should be retried, -1 if error. + If 0 is returned, the retriable_err_msg_r is set, which should be logged + as error if no retrying is performed. */ +int fts_parser_deinit(struct fts_parser **parser, const char **retriable_err_msg_r); + +void fts_parsers_unload(void); + +#endif diff --git a/src/plugins/fts/fts-plugin.c b/src/plugins/fts/fts-plugin.c new file mode 100644 index 0000000..1902cb6 --- /dev/null +++ b/src/plugins/fts/fts-plugin.c @@ -0,0 +1,33 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "mail-storage-hooks.h" +#include "fts-filter.h" +#include "fts-tokenizer.h" +#include "fts-parser.h" +#include "fts-storage.h" +#include "fts-user.h" +#include "fts-plugin.h" +#include "fts-library.h" + +const char *fts_plugin_version = DOVECOT_ABI_VERSION; + +static struct mail_storage_hooks fts_mail_storage_hooks = { + .mail_namespaces_added = fts_mail_namespaces_added, + .mailbox_list_created = fts_mailbox_list_created, + .mailbox_allocated = fts_mailbox_allocated, + .mail_allocated = fts_mail_allocated +}; + +void fts_plugin_init(struct module *module) +{ + fts_library_init(); + mail_storage_hooks_add(module, &fts_mail_storage_hooks); +} + +void fts_plugin_deinit(void) +{ + fts_library_deinit(); + fts_parsers_unload(); + mail_storage_hooks_remove(&fts_mail_storage_hooks); +} diff --git a/src/plugins/fts/fts-plugin.h b/src/plugins/fts/fts-plugin.h new file mode 100644 index 0000000..aeec68c --- /dev/null +++ b/src/plugins/fts/fts-plugin.h @@ -0,0 +1,7 @@ +#ifndef FTS_PLUGIN_H +#define FTS_PLUGIN_H + +void fts_plugin_init(struct module *module); +void fts_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts/fts-search-args.c b/src/plugins/fts/fts-search-args.c new file mode 100644 index 0000000..b58b238 --- /dev/null +++ b/src/plugins/fts/fts-search-args.c @@ -0,0 +1,258 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "mail-namespace.h" +#include "mail-search.h" +#include "fts-api-private.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-user.h" +#include "fts-search-args.h" + +static void strings_deduplicate(ARRAY_TYPE(const_string) *arr) +{ + const char *const *strings; + unsigned int i, count; + + strings = array_get(arr, &count); + for (i = 1; i < count; ) { + if (strcmp(strings[i-1], strings[i]) == 0) { + array_delete(arr, i, 1); + strings = array_get(arr, &count); + } else { + i++; + } + } +} + +static struct mail_search_arg * +fts_search_arg_create_or(const struct mail_search_arg *orig_arg, pool_t pool, + const ARRAY_TYPE(const_string) *tokens) +{ + struct mail_search_arg *arg, *or_arg, **argp; + const char *token; + + /* create the OR arg first as the parent */ + or_arg = p_new(pool, struct mail_search_arg, 1); + or_arg->type = SEARCH_OR; + + /* now create all the child args for the OR */ + argp = &or_arg->value.subargs; + array_foreach_elem(tokens, token) { + arg = p_new(pool, struct mail_search_arg, 1); + *arg = *orig_arg; + arg->match_not = FALSE; /* we copied this to the root OR */ + arg->next = NULL; + arg->value.str = p_strdup(pool, token); + + *argp = arg; + argp = &arg->next; + } + return or_arg; +} + +static int +fts_backend_dovecot_expand_tokens(struct fts_filter *filter, + pool_t pool, + struct mail_search_arg *parent_arg, + const struct mail_search_arg *orig_arg, + const char *orig_token, const char *token, + const char **error_r) +{ + struct mail_search_arg *arg; + ARRAY_TYPE(const_string) tokens; + const char *token2, *error; + int ret; + + t_array_init(&tokens, 4); + /* first add the word exactly as it without any tokenization */ + array_push_back(&tokens, &orig_token); + /* then add it tokenized, but without filtering */ + array_push_back(&tokens, &token); + + /* add the word filtered */ + if (filter != NULL) { + token2 = t_strdup(token); + ret = fts_filter_filter(filter, &token2, &error); + if (ret > 0) { + token2 = t_strdup(token2); + array_push_back(&tokens, &token2); + } else if (ret < 0) { + *error_r = t_strdup_printf("Couldn't filter search token: %s", error); + return -1; + } else { + /* The filter dropped the token, which means it was + never even indexed. Ignore this word entirely in the + search query. */ + return 0; + } + } + array_sort(&tokens, i_strcmp_p); + strings_deduplicate(&tokens); + + arg = fts_search_arg_create_or(orig_arg, pool, &tokens); + arg->next = parent_arg->value.subargs; + parent_arg->value.subargs = arg; + return 0; +} + +static int +fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang, + pool_t pool, struct mail_search_arg *or_arg, + struct mail_search_arg *orig_arg, + const char *orig_token, const char **error_r) +{ + size_t orig_token_len = strlen(orig_token); + struct mail_search_arg *and_arg, *orig_or_args = or_arg->value.subargs; + const char *token, *error; + int ret; + + /* we want all the tokens found from the string to be found, so create + a parent AND and place all the filtered token alternatives under + it */ + and_arg = p_new(pool, struct mail_search_arg, 1); + and_arg->type = SEARCH_SUB; + and_arg->next = orig_or_args; + or_arg->value.subargs = and_arg; + + /* reset tokenizer between search args in case there's any state left + from some previous failure */ + fts_tokenizer_reset(user_lang->search_tokenizer); + while ((ret = fts_tokenizer_next(user_lang->search_tokenizer, + (const void *)orig_token, + orig_token_len, &token, &error)) > 0) { + if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool, + and_arg, orig_arg, orig_token, + token, error_r) < 0) + return -1; + } + while (ret >= 0 && + (ret = fts_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) { + if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool, + and_arg, orig_arg, orig_token, + token, error_r) < 0) + return -1; + } + if (ret < 0) { + *error_r = t_strdup_printf("Couldn't tokenize search args: %s", error); + return -1; + } + if (and_arg->value.subargs == NULL) { + /* nothing was actually expanded, remove the empty and_arg */ + or_arg->value.subargs = orig_or_args; + } + return 0; +} + +static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool, + struct mail_search_arg **argp) +{ + const ARRAY_TYPE(fts_user_language) *languages; + struct fts_user_language *lang; + struct mail_search_arg *or_arg, *orig_arg = *argp; + const char *error, *orig_token = orig_arg->value.str; + + if (((*argp)->type == SEARCH_HEADER || + (*argp)->type == SEARCH_HEADER_ADDRESS || + (*argp)->type == SEARCH_HEADER_COMPRESS_LWSP) && + !fts_header_has_language((*argp)->hdr_field_name)) { + /* use only the data-language */ + languages = fts_user_get_data_languages(backend->ns->user); + } else { + languages = fts_user_get_all_languages(backend->ns->user); + } + + /* OR together all the different expansions for different languages. + it's enough for one of them to match. */ + or_arg = p_new(pool, struct mail_search_arg, 1); + or_arg->type = SEARCH_OR; + or_arg->match_not = orig_arg->match_not; + or_arg->next = orig_arg->next; + + array_foreach_elem(languages, lang) { + if (fts_backend_dovecot_tokenize_lang(lang, pool, or_arg, + orig_arg, orig_token, &error) < 0) { + i_error("fts: %s", error); + return -1; + } + } + + if (or_arg->value.subargs == NULL) { + /* we couldn't parse any tokens from the input */ + or_arg->type = SEARCH_ALL; + or_arg->match_not = !or_arg->match_not; + } + *argp = or_arg; + return 0; +} + +static int +fts_search_args_expand_tree(struct fts_backend *backend, pool_t pool, + struct mail_search_arg **argp) +{ + int ret; + + for (; *argp != NULL; argp = &(*argp)->next) { + switch ((*argp)->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_search_args_expand_tree(backend, pool, + &(*argp)->value.subargs) < 0) + return -1; + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if ((*argp)->value.str[0] == '\0') { + /* we're testing for the existence of + the header */ + break; + } + /* fall through */ + case SEARCH_BODY: + case SEARCH_TEXT: + T_BEGIN { + ret = fts_search_arg_expand(backend, pool, argp); + } T_END; + if (ret < 0) + return -1; + break; + default: + break; + } + } + return 0; +} + +int fts_search_args_expand(struct fts_backend *backend, + struct mail_search_args *args) +{ + struct mail_search_arg *args_dup, *orig_args = args->args; + + /* don't keep re-expanding every time the search args are used. + this is especially important to avoid an assert-crash in + index_search_result_update_flags(). */ + if (args->fts_expanded) + return 0; + args->fts_expanded = TRUE; + + /* duplicate the args, so if expansion fails we haven't changed + anything */ + args_dup = mail_search_arg_dup(args->pool, args->args); + + if (fts_search_args_expand_tree(backend, args->pool, &args_dup) < 0) + return -1; + + /* we'll need to re-simplify the args if we changed anything */ + args->simplified = FALSE; + args->args = args_dup; + mail_search_args_simplify(args); + + /* duplicated args aren't initialized */ + i_assert(args->init_refcount > 0); + mail_search_arg_init(args, args_dup); + mail_search_arg_deinit(orig_args); + return 0; +} diff --git a/src/plugins/fts/fts-search-args.h b/src/plugins/fts/fts-search-args.h new file mode 100644 index 0000000..9fb8923 --- /dev/null +++ b/src/plugins/fts/fts-search-args.h @@ -0,0 +1,7 @@ +#ifndef FTS_SEARCH_ARGS_H +#define FTS_SEARCH_ARGS_H + +int fts_search_args_expand(struct fts_backend *backend, + struct mail_search_args *args); + +#endif diff --git a/src/plugins/fts/fts-search-serialize.c b/src/plugins/fts/fts-search-serialize.c new file mode 100644 index 0000000..e30d4ce --- /dev/null +++ b/src/plugins/fts/fts-search-serialize.c @@ -0,0 +1,99 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "mail-search.h" +#include "fts-search-serialize.h" + +#define HAVE_SUBARGS(arg) \ + ((arg)->type == SEARCH_SUB || (arg)->type == SEARCH_OR) + +void fts_search_serialize(buffer_t *buf, const struct mail_search_arg *args) +{ + char chr; + + for (; args != NULL; args = args->next) { + chr = (args->match_always ? 1 : 0) | + (args->nonmatch_always ? 2 : 0); + buffer_append_c(buf, chr); + + if (HAVE_SUBARGS(args)) + fts_search_serialize(buf, args->value.subargs); + } +} + +static void fts_search_deserialize_idx(struct mail_search_arg *args, + const buffer_t *buf, unsigned int *idx) +{ + const char *data = buf->data; + + for (; args != NULL; args = args->next) { + i_assert(*idx < buf->used); + + args->match_always = (data[*idx] & 1) != 0; + args->nonmatch_always = (data[*idx] & 2) != 0; + args->result = args->match_always ? 1 : + (args->nonmatch_always ? 0 : -1); + *idx += 1; + + if (HAVE_SUBARGS(args)) { + fts_search_deserialize_idx(args->value.subargs, + buf, idx); + } + } +} + +void fts_search_deserialize(struct mail_search_arg *args, + const buffer_t *buf) +{ + unsigned int idx = 0; + + fts_search_deserialize_idx(args, buf, &idx); + i_assert(idx == buf->used); +} + +static void +fts_search_deserialize_add_idx(struct mail_search_arg *args, + const buffer_t *buf, unsigned int *idx, + bool matches) +{ + const char *data = buf->data; + + for (; args != NULL; args = args->next) { + i_assert(*idx < buf->used); + + if (data[*idx] != 0) { + if (matches) { + args->match_always = TRUE; + args->result = 1; + } else { + args->nonmatch_always = TRUE; + args->result = 0; + } + } + *idx += 1; + + if (HAVE_SUBARGS(args)) { + fts_search_deserialize_add_idx(args->value.subargs, + buf, idx, matches); + } + } +} + +void fts_search_deserialize_add_matches(struct mail_search_arg *args, + const buffer_t *buf) +{ + unsigned int idx = 0; + + fts_search_deserialize_add_idx(args, buf, &idx, TRUE); + i_assert(idx == buf->used); +} + +void fts_search_deserialize_add_nonmatches(struct mail_search_arg *args, + const buffer_t *buf) +{ + unsigned int idx = 0; + + fts_search_deserialize_add_idx(args, buf, &idx, FALSE); + i_assert(idx == buf->used); +} diff --git a/src/plugins/fts/fts-search-serialize.h b/src/plugins/fts/fts-search-serialize.h new file mode 100644 index 0000000..c1a7d88 --- /dev/null +++ b/src/plugins/fts/fts-search-serialize.h @@ -0,0 +1,16 @@ +#ifndef FTS_SEARCH_SERIALIZE_H +#define FTS_SEARCH_SERIALIZE_H + +/* serialize [non]match_always fields (clearing buffer) */ +void fts_search_serialize(buffer_t *buf, const struct mail_search_arg *args); +/* add/remove [non]match_always fields in search args */ +void fts_search_deserialize(struct mail_search_arg *args, + const buffer_t *buf); +/* add match_always=TRUE fields to search args */ +void fts_search_deserialize_add_matches(struct mail_search_arg *args, + const buffer_t *buf); +/* add nonmatch_always=TRUE fields to search args */ +void fts_search_deserialize_add_nonmatches(struct mail_search_arg *args, + const buffer_t *buf); + +#endif diff --git a/src/plugins/fts/fts-search.c b/src/plugins/fts/fts-search.c new file mode 100644 index 0000000..895ea59 --- /dev/null +++ b/src/plugins/fts/fts-search.c @@ -0,0 +1,385 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "seq-range-array.h" +#include "mail-search.h" +#include "fts-api-private.h" +#include "fts-search-args.h" +#include "fts-search-serialize.h" +#include "fts-storage.h" +#include "hash.h" + +static void +uid_range_to_seqs(struct fts_search_context *fctx, + const ARRAY_TYPE(seq_range) *uid_range, + ARRAY_TYPE(seq_range) *seq_range) +{ + const struct seq_range *range; + unsigned int i, count; + uint32_t seq1, seq2; + + range = array_get(uid_range, &count); + if (!array_is_created(seq_range)) + p_array_init(seq_range, fctx->result_pool, count); + for (i = 0; i < count; i++) { + if (range[i].seq1 > range[i].seq2) + continue; + mailbox_get_seq_range(fctx->box, range[i].seq1, range[i].seq2, + &seq1, &seq2); + if (seq1 != 0) + seq_range_array_add_range(seq_range, seq1, seq2); + } +} + +static int fts_search_lookup_level_single(struct fts_search_context *fctx, + struct mail_search_arg *args, + bool and_args) +{ + enum fts_lookup_flags flags = fctx->flags | + (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0); + struct fts_search_level *level; + struct fts_result result; + + i_zero(&result); + result.search_state = fctx->search_state; + result.pool = fctx->result_pool; + p_array_init(&result.definite_uids, fctx->result_pool, 32); + p_array_init(&result.maybe_uids, fctx->result_pool, 32); + p_array_init(&result.scores, fctx->result_pool, 32); + + mail_search_args_reset(args, TRUE); + if (fts_backend_lookup(fctx->backend, fctx->box, args, flags, + &result) < 0) + return -1; + + fctx->search_state = result.search_state; + level = array_append_space(&fctx->levels); + level->args_matches = buffer_create_dynamic(fctx->result_pool, 16); + fts_search_serialize(level->args_matches, args); + + uid_range_to_seqs(fctx, &result.definite_uids, &level->definite_seqs); + uid_range_to_seqs(fctx, &result.maybe_uids, &level->maybe_seqs); + level->score_map = result.scores; + return 0; +} + +static void +level_scores_add_vuids(struct mailbox *box, + struct fts_search_level *level, struct fts_result *br) +{ + const struct fts_score_map *scores; + unsigned int i, count; + ARRAY_TYPE(seq_range) backend_uids; + ARRAY_TYPE(uint32_t) vuids_arr; + const uint32_t *vuids; + struct fts_score_map *score; + + scores = array_get(&br->scores, &count); + t_array_init(&vuids_arr, count); + t_array_init(&backend_uids, 64); + for (i = 0; i < count; i++) + seq_range_array_add(&backend_uids, scores[i].uid); + box->virtual_vfuncs->get_virtual_uid_map(box, br->box, + &backend_uids, &vuids_arr); + + i_assert(array_count(&vuids_arr) == array_count(&br->scores)); + vuids = array_get(&vuids_arr, &count); + for (i = 0; i < count; i++) { + score = array_append_space(&level->score_map); + score->uid = vuids[i]; + score->score = scores[i].score; + } +} + +static int +mailbox_cmp_fts_backend(struct mailbox *const *m1, struct mailbox *const *m2) +{ + struct fts_backend *b1, *b2; + + b1 = fts_mailbox_backend(*m1); + b2 = fts_mailbox_backend(*m2); + if (b1 < b2) + return -1; + if (b1 > b2) + return 1; + return 0; +} + +static int +multi_add_lookup_result(struct fts_search_context *fctx, + struct fts_search_level *level, + struct mail_search_arg *args, + struct fts_multi_result *result) +{ + ARRAY_TYPE(seq_range) vuids; + size_t orig_size; + unsigned int i; + + orig_size = level->args_matches->used; + fts_search_serialize(level->args_matches, args); + if (orig_size > 0) { + if (level->args_matches->used != orig_size * 2 || + memcmp(level->args_matches->data, + CONST_PTR_OFFSET(level->args_matches->data, + orig_size), orig_size) != 0) + i_panic("incompatible fts backends for namespaces"); + buffer_set_used_size(level->args_matches, orig_size); + } + + t_array_init(&vuids, 64); + for (i = 0; result->box_results[i].box != NULL; i++) { + struct fts_result *br = &result->box_results[i]; + + array_clear(&vuids); + if (array_is_created(&br->definite_uids)) { + fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box, + br->box, &br->definite_uids, &vuids); + } + uid_range_to_seqs(fctx, &vuids, &level->definite_seqs); + + array_clear(&vuids); + if (array_is_created(&br->maybe_uids)) { + fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box, + br->box, &br->maybe_uids, &vuids); + } + uid_range_to_seqs(fctx, &vuids, &level->maybe_seqs); + + if (array_is_created(&br->scores)) + level_scores_add_vuids(fctx->box, level, br); + } + return 0; +} + +static int fts_search_lookup_level_multi(struct fts_search_context *fctx, + struct mail_search_arg *args, + bool and_args) +{ + enum fts_lookup_flags flags = fctx->flags | + (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0); + ARRAY_TYPE(mailboxes) mailboxes_arr, tmp_mailboxes; + struct mailbox *const *mailboxes; + struct fts_backend *backend; + struct fts_search_level *level; + struct fts_multi_result result; + unsigned int i, j, mailbox_count; + + p_array_init(&mailboxes_arr, fctx->result_pool, 8); + fctx->box->virtual_vfuncs->get_virtual_backend_boxes(fctx->box, + &mailboxes_arr, TRUE); + array_sort(&mailboxes_arr, mailbox_cmp_fts_backend); + + i_zero(&result); + result.search_state = fctx->search_state; + result.pool = fctx->result_pool; + + level = array_append_space(&fctx->levels); + level->args_matches = buffer_create_dynamic(fctx->result_pool, 16); + p_array_init(&level->score_map, fctx->result_pool, 1); + + mailboxes = array_get(&mailboxes_arr, &mailbox_count); + t_array_init(&tmp_mailboxes, mailbox_count); + for (i = 0; i < mailbox_count; i = j) { + array_clear(&tmp_mailboxes); + array_push_back(&tmp_mailboxes, &mailboxes[i]); + + backend = fts_mailbox_backend(mailboxes[i]); + for (j = i + 1; j < mailbox_count; j++) { + if (fts_mailbox_backend(mailboxes[j]) != backend) + break; + array_push_back(&tmp_mailboxes, &mailboxes[j]); + } + array_append_zero(&tmp_mailboxes); + + mail_search_args_reset(args, TRUE); + if (fts_backend_lookup_multi(backend, + array_front(&tmp_mailboxes), + args, flags, &result) < 0) + return -1; + + if (multi_add_lookup_result(fctx, level, args, &result) < 0) + return -1; + } + fctx->search_state = result.search_state; + return 0; +} + +static int fts_search_lookup_level(struct fts_search_context *fctx, + struct mail_search_arg *args, + bool and_args) +{ + int ret; + + T_BEGIN { + ret = !fctx->virtual_mailbox ? + fts_search_lookup_level_single(fctx, args, and_args) : + fts_search_lookup_level_multi(fctx, args, and_args); + } T_END; + if (ret < 0) + return -1; + + for (; args != NULL; args = args->next) { + if (args->type != SEARCH_OR && args->type != SEARCH_SUB) + continue; + + if (fts_search_lookup_level(fctx, args->value.subargs, + args->type == SEARCH_SUB) < 0) + return -1; + } + return 0; +} + +static void +fts_search_merge_scores_and(ARRAY_TYPE(fts_score_map) *dest, + const ARRAY_TYPE(fts_score_map) *src) +{ + struct fts_score_map *dest_map; + const struct fts_score_map *src_map; + unsigned int desti, srci, dest_count, src_count; + + dest_map = array_get_modifiable(dest, &dest_count); + src_map = array_get(src, &src_count); + + /* arg_scores are summed to current scores. we could drop UIDs that + don't exist in both, but that's just extra work so don't bother */ + for (desti = srci = 0; desti < dest_count && srci < src_count;) { + if (dest_map[desti].uid < src_map[srci].uid) + desti++; + else if (dest_map[desti].uid > src_map[srci].uid) + srci++; + else { + if (dest_map[desti].score < src_map[srci].score) + dest_map[desti].score = src_map[srci].score; + desti++; srci++; + } + } +} + +static void +fts_search_merge_scores_or(ARRAY_TYPE(fts_score_map) *dest, + const ARRAY_TYPE(fts_score_map) *src) +{ + ARRAY_TYPE(fts_score_map) src2; + const struct fts_score_map *src_map, *src2_map; + unsigned int srci, src2i, src_count, src2_count; + + t_array_init(&src2, array_count(dest)); + array_append_array(&src2, dest); + array_clear(dest); + + src_map = array_get(src, &src_count); + src2_map = array_get(&src2, &src2_count); + + /* add any missing UIDs to current scores. if any existing UIDs have + lower scores than in arg_scores, increase them. */ + for (srci = src2i = 0; srci < src_count || src2i < src2_count;) { + if (src2i == src2_count || + src_map[srci].uid < src2_map[src2i].uid) { + array_push_back(dest, &src_map[srci]); + srci++; + } else if (srci == src_count || + src_map[srci].uid > src2_map[src2i].uid) { + array_push_back(dest, &src2_map[src2i]); + src2i++; + } else { + i_assert(src_map[srci].uid == src2_map[src2i].uid); + if (src_map[srci].score > src2_map[src2i].score) + array_push_back(dest, &src_map[srci]); + else + array_push_back(dest, &src2_map[src2i]); + srci++; src2i++; + } + } +} + +static void +fts_search_merge_scores_level(struct fts_search_context *fctx, + struct mail_search_arg *args, unsigned int *idx, + bool and_args, ARRAY_TYPE(fts_score_map) *scores) +{ + const struct fts_search_level *level; + ARRAY_TYPE(fts_score_map) arg_scores; + + i_assert(array_count(scores) == 0); + + /* + The (simplified) args can look like: + + A and B and (C or D) and (E or F) and ... + A or B or (C and D) or (E and F) or ... + + The A op B part's scores are in level->scores. The child args' + scores are in the sub levels' scores. + */ + + level = array_idx(&fctx->levels, *idx); + array_append_array(scores, &level->score_map); + + t_array_init(&arg_scores, 64); + for (; args != NULL; args = args->next) { + if (args->type != SEARCH_OR && args->type != SEARCH_SUB) + continue; + + *idx += 1; + array_clear(&arg_scores); + fts_search_merge_scores_level(fctx, args->value.subargs, idx, + args->type == SEARCH_OR, + &arg_scores); + + if (and_args) + fts_search_merge_scores_and(scores, &arg_scores); + else + fts_search_merge_scores_or(scores, &arg_scores); + } +} + +static void fts_search_merge_scores(struct fts_search_context *fctx) +{ + unsigned int idx = 0; + + fts_search_merge_scores_level(fctx, fctx->args->args, &idx, + TRUE, &fctx->scores->score_map); +} + +static void fts_search_try_lookup(struct fts_search_context *fctx) +{ + uint32_t last_uid, seq1, seq2; + + i_assert(array_count(&fctx->levels) == 0); + i_assert(fctx->args->simplified); + + if (fts_backend_refresh(fctx->backend) < 0) + return; + if (fts_backend_get_last_uid(fctx->backend, fctx->box, &last_uid) < 0) + return; + mailbox_get_seq_range(fctx->box, last_uid+1, (uint32_t)-1, + &seq1, &seq2); + fctx->first_unindexed_seq = seq1 != 0 ? seq1 : (uint32_t)-1; + + if (fctx->virtual_mailbox) { + hash_table_clear(fctx->last_indexed_virtual_uids, TRUE); + fctx->next_unindexed_seq = fctx->first_unindexed_seq; + } + + if ((fctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { + if (fts_search_args_expand(fctx->backend, fctx->args) < 0) + return; + } + fts_search_serialize(fctx->orig_matches, fctx->args->args); + + if (fts_search_lookup_level(fctx, fctx->args->args, TRUE) == 0) { + fctx->fts_lookup_success = TRUE; + fts_search_merge_scores(fctx); + } + + fts_search_deserialize(fctx->args->args, fctx->orig_matches); + fts_backend_lookup_done(fctx->backend); +} + +void fts_search_lookup(struct fts_search_context *fctx) +{ + struct event_reason *reason = event_reason_begin("fts:lookup"); + fts_search_try_lookup(fctx); + event_reason_end(&reason); +} diff --git a/src/plugins/fts/fts-storage.c b/src/plugins/fts/fts-storage.c new file mode 100644 index 0000000..101d52a --- /dev/null +++ b/src/plugins/fts/fts-storage.c @@ -0,0 +1,981 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "net.h" +#include "str.h" +#include "strescape.h" +#include "write-full.h" +#include "mail-search-build.h" +#include "mail-storage-private.h" +#include "mailbox-list-private.h" +#include "fts-api-private.h" +#include "fts-tokenizer.h" +#include "fts-indexer.h" +#include "fts-build-mail.h" +#include "fts-search-serialize.h" +#include "fts-plugin.h" +#include "fts-user.h" +#include "fts-storage.h" +#include "hash.h" + + +#define FTS_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_storage_module) +#define FTS_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_storage_module) +#define FTS_MAIL_CONTEXT(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_mail_module) +#define FTS_LIST_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_mailbox_list_module) +#define FTS_LIST_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_mailbox_list_module) + +#define INDEXER_SOCKET_NAME "indexer" +#define INDEXER_HANDSHAKE "VERSION\tindexer\t1\t0\n" + +struct fts_mailbox_list { + union mailbox_list_module_context module_ctx; + struct fts_backend *backend; + + const char *backend_name; + struct fts_backend_update_context *update_ctx; + unsigned int update_ctx_refcount; + + bool failed:1; +}; + +struct fts_mailbox { + union mailbox_module_context module_ctx; + struct fts_backend_update_context *sync_update_ctx; + bool fts_mailbox_excluded; +}; + +struct fts_transaction_context { + union mailbox_transaction_module_context module_ctx; + + struct fts_scores *scores; + uint32_t next_index_seq; + uint32_t highest_virtual_uid; + unsigned int precache_extra_count; + + bool indexing:1; + bool precached:1; + bool mails_saved:1; + const char *failure_reason; +}; + +struct fts_mail { + union mail_module_context module_ctx; + char score[30]; + + bool virtual_mail:1; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_storage_module, + &mail_storage_module_register); +static MODULE_CONTEXT_DEFINE_INIT(fts_mail_module, &mail_module_register); +static MODULE_CONTEXT_DEFINE_INIT(fts_mailbox_list_module, + &mailbox_list_module_register); + +static int fts_mailbox_get_last_cached_seq(struct mailbox *box, uint32_t *seq_r) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list); + uint32_t seq1, seq2, last_uid; + + if (fts_backend_get_last_uid(flist->backend, box, &last_uid) < 0) { + mail_storage_set_internal_error(box->storage); + return -1; + } + + if (last_uid == 0) + *seq_r = 0; + else { + mailbox_get_seq_range(box, 1, last_uid, &seq1, &seq2); + *seq_r = seq2; + } + return 0; +} + +static int +fts_mailbox_get_status(struct mailbox *box, enum mailbox_status_items items, + struct mailbox_status *status_r) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + uint32_t seq; + + if (fbox->module_ctx.super.get_status(box, items, status_r) < 0) + return -1; + + if ((items & STATUS_LAST_CACHED_SEQ) != 0) { + if (fts_mailbox_get_last_cached_seq(box, &seq) < 0) + return -1; + + /* Always use the FTS's last_cached_seq. This is because we + don't want to reindex all mails to FTS if .cache file is + deleted. */ + status_r->last_cached_seq = seq; + } + return 0; +} + + +static void fts_scores_unref(struct fts_scores **_scores) +{ + struct fts_scores *scores = *_scores; + + *_scores = NULL; + if (--scores->refcount == 0) { + array_free(&scores->score_map); + i_free(scores); + } +} + +static void fts_try_build_init(struct mail_search_context *ctx, + struct fts_search_context *fctx) +{ + int ret; + + i_assert(!fts_backend_is_updating(fctx->backend)); + + ret = fts_indexer_init(fctx->backend, ctx->transaction->box, + &fctx->indexer_ctx); + if (ret < 0) + return; + + if (ret == 0) { + /* the index was up to date */ + fts_search_lookup(fctx); + } else { + /* hide "searching" notifications while building index */ + ctx->progress_hidden = TRUE; + } +} + +static bool fts_want_build_args(const struct mail_search_arg *args) +{ + /* we want to update index only when searching from message body. + it's not worth the wait for searching only from headers, which + could be in cache file already */ + for (; args != NULL; args = args->next) { + switch (args->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_want_build_args(args->value.subargs)) + return TRUE; + break; + case SEARCH_BODY: + case SEARCH_TEXT: + if (!args->no_fts) + return TRUE; + break; + default: + break; + } + } + return FALSE; +} + +static bool fts_args_have_fuzzy(const struct mail_search_arg *args) +{ + for (; args != NULL; args = args->next) { + if (args->fuzzy) + return TRUE; + switch (args->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_args_have_fuzzy(args->value.subargs)) + return TRUE; + break; + default: + break; + } + } + return FALSE; +} + +static enum fts_enforced fts_enforced_parse(const char *str) +{ + if (str == NULL || strcmp(str, "no") == 0) + return FTS_ENFORCED_NO; + else if (strcmp(str, "body") == 0) + return FTS_ENFORCED_BODY; + else + return FTS_ENFORCED_YES; +} + +static struct mail_search_context * +fts_mailbox_search_init(struct mailbox_transaction_context *t, + struct mail_search_args *args, + const enum mail_sort_type *sort_program, + enum mail_fetch_field wanted_fields, + struct mailbox_header_lookup_ctx *wanted_headers) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(t->box->list); + struct mail_search_context *ctx; + struct fts_search_context *fctx; + + ctx = fbox->module_ctx.super.search_init(t, args, sort_program, + wanted_fields, wanted_headers); + + if (!fts_backend_can_lookup(flist->backend, args->args)) + return ctx; + + fctx = i_new(struct fts_search_context, 1); + fctx->box = t->box; + fctx->backend = flist->backend; + fctx->t = t; + fctx->args = args; + fctx->result_pool = pool_alloconly_create("fts results", 1024*64); + fctx->orig_matches = buffer_create_dynamic(default_pool, 64); + fctx->virtual_mailbox = t->box->virtual_vfuncs != NULL; + if (fctx->virtual_mailbox) { + hash_table_create(&fctx->last_indexed_virtual_uids, + default_pool, 0, str_hash, strcmp); + } + fctx->enforced = fts_enforced_parse( + mail_user_plugin_getenv(t->box->storage->user, "fts_enforced")); + i_array_init(&fctx->levels, 8); + fctx->scores = i_new(struct fts_scores, 1); + fctx->scores->refcount = 1; + i_array_init(&fctx->scores->score_map, 64); + MODULE_CONTEXT_SET(ctx, fts_storage_module, fctx); + + /* FIXME: we'll assume that all the args are fuzzy. not good, + but would require much more work to fix it. */ + if (!fts_args_have_fuzzy(args->args) && + mail_user_plugin_getenv_bool(t->box->storage->user, + "fts_no_autofuzzy")) + fctx->flags |= FTS_LOOKUP_FLAG_NO_AUTO_FUZZY; + /* transaction contains the last search's scores. they can be + queried later with mail_get_special() */ + if (ft->scores != NULL) + fts_scores_unref(&ft->scores); + ft->scores = fctx->scores; + ft->scores->refcount++; + + if (fctx->enforced == FTS_ENFORCED_YES || + fts_want_build_args(args->args)) + fts_try_build_init(ctx, fctx); + else + fts_search_lookup(fctx); + return ctx; +} + +static bool fts_mailbox_build_continue(struct mail_search_context *ctx) +{ + struct fts_search_context *fctx = FTS_CONTEXT_REQUIRE(ctx); + int ret; + + ret = fts_indexer_more(fctx->indexer_ctx); + if (ret == 0) + return FALSE; + + /* indexing finished */ + ctx->progress_hidden = FALSE; + if (fts_indexer_deinit(&fctx->indexer_ctx) < 0) + ret = -1; + if (ret > 0) + fts_search_lookup(fctx); + if (ret < 0) { + /* if indexing timed out, it probably means that + the mailbox is still being indexed, but it's a large + mailbox and it takes a while. in this situation + we'll simply abort the search. + + if indexing failed for any other reason, just + fallback to searching the slow way. */ + fctx->indexing_timed_out = + mailbox_get_last_mail_error(fctx->box) == MAIL_ERROR_INUSE; + } + return TRUE; +} + +static bool +fts_mailbox_search_next_nonblock(struct mail_search_context *ctx, + struct mail **mail_r, bool *tryagain_r) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + + if (fctx != NULL && fctx->indexer_ctx != NULL) { + /* this command is still building the indexes */ + if (!fts_mailbox_build_continue(ctx)) { + *tryagain_r = TRUE; + return FALSE; + } + if (fctx->indexing_timed_out) { + *tryagain_r = FALSE; + return FALSE; + } + } + if (fctx != NULL && !fctx->fts_lookup_success && + fctx->enforced != FTS_ENFORCED_NO) + return FALSE; + + return fbox->module_ctx.super. + search_next_nonblock(ctx, mail_r, tryagain_r); +} + +static void +fts_search_apply_results_level(struct mail_search_context *ctx, + struct mail_search_arg *args, unsigned int *idx) +{ + struct fts_search_context *fctx = FTS_CONTEXT_REQUIRE(ctx); + const struct fts_search_level *level; + + level = array_idx(&fctx->levels, *idx); + + if (array_is_created(&level->definite_seqs) && + seq_range_exists(&level->definite_seqs, ctx->seq)) + fts_search_deserialize_add_matches(args, level->args_matches); + else if (!array_is_created(&level->maybe_seqs) || + !seq_range_exists(&level->maybe_seqs, ctx->seq)) + fts_search_deserialize_add_nonmatches(args, level->args_matches); + + for (; args != NULL; args = args->next) { + if (args->type != SEARCH_OR && args->type != SEARCH_SUB) + continue; + + *idx += 1; + fts_search_apply_results_level(ctx, args->value.subargs, idx); + } +} + +static bool fts_mailbox_search_next_update_seq(struct mail_search_context *ctx) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + unsigned int idx; + + if (fctx == NULL || !fctx->fts_lookup_success) { + /* fts lookup not done for this search */ + if (fctx != NULL && fctx->indexing_timed_out) + return FALSE; + return fbox->module_ctx.super.search_next_update_seq(ctx); + } + + /* restore original [non]matches */ + fts_search_deserialize(ctx->args->args, fctx->orig_matches); + + if (!fbox->module_ctx.super.search_next_update_seq(ctx)) + return FALSE; + + if (ctx->seq >= fctx->first_unindexed_seq) { + /* we've not indexed this far */ + return TRUE; + } + + /* apply [non]matches based on the FTS lookup results */ + idx = 0; + fts_search_apply_results_level(ctx, ctx->args->args, &idx); + return TRUE; +} + +static int fts_mailbox_search_deinit(struct mail_search_context *ctx) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + int ret = 0; + + if (fctx != NULL) { + if (fctx->virtual_mailbox) + hash_table_destroy(&fctx->last_indexed_virtual_uids); + if (fctx->indexer_ctx != NULL) { + if (fts_indexer_deinit(&fctx->indexer_ctx) < 0) + ft->failure_reason = "FTS indexing failed"; + } + if (fctx->indexing_timed_out) + ret = -1; + else if (!fctx->fts_lookup_success && + fctx->enforced != FTS_ENFORCED_NO) { + /* FTS lookup failed and we didn't want to fallback to + opening all the mails and searching manually */ + mail_storage_set_internal_error(ctx->transaction->box->storage); + ret = -1; + } + + buffer_free(&fctx->orig_matches); + array_free(&fctx->levels); + pool_unref(&fctx->result_pool); + fts_scores_unref(&fctx->scores); + i_free(fctx); + } + if (fbox->module_ctx.super.search_deinit(ctx) < 0) + ret = -1; + return ret; +} + +static int fts_score_cmp(const uint32_t *uid, const struct fts_score_map *score) +{ + return *uid < score->uid ? -1 : + (*uid > score->uid ? 1 : 0); +} + +static int fts_mail_get_special(struct mail *_mail, enum mail_fetch_field field, + const char **value_r) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail); + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + const struct fts_score_map *scores; + + if (field != MAIL_FETCH_SEARCH_RELEVANCY || ft->scores == NULL) + scores = NULL; + else { + scores = array_bsearch(&ft->scores->score_map, &_mail->uid, + fts_score_cmp); + } + if (scores != NULL) { + i_assert(scores->uid == _mail->uid); + (void)i_snprintf(fmail->score, sizeof(fmail->score), + "%f", scores->score); + + *value_r = fmail->score; + return 0; + } + + return fmail->module_ctx.super.get_special(_mail, field, value_r); +} + +static int +fts_mail_precache_range(struct mailbox_transaction_context *trans, + struct fts_backend_update_context *update_ctx, + uint32_t seq1, uint32_t seq2, unsigned int *extra_count) +{ + struct mail_search_args *search_args; + struct mail_search_context *ctx; + struct mail *mail; + int ret = 0; + + search_args = mail_search_build_init(); + mail_search_build_add_seqset(search_args, seq1, seq2); + ctx = mailbox_search_init(trans, search_args, NULL, + MAIL_FETCH_STREAM_HEADER | + MAIL_FETCH_STREAM_BODY, NULL); + mail_search_args_unref(&search_args); + + while (mailbox_search_next(ctx, &mail)) { + if (fts_build_mail(update_ctx, mail) < 0) { + ret = -1; + break; + } + if (mail_precache(mail) < 0) { + ret = -1; + break; + } + *extra_count += 1; + } + if (mailbox_search_deinit(&ctx) < 0) + ret = -1; + return ret; +} + +static int fts_mail_precache_init(struct mail *_mail) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(_mail->box->list); + uint32_t last_seq; + + if (fts_mailbox_get_last_cached_seq(_mail->box, &last_seq) < 0) { + ft->failure_reason = "Failed to lookup last indexed FTS mail"; + return -1; + } + + ft->precached = TRUE; + ft->next_index_seq = last_seq + 1; + if (flist->update_ctx == NULL) + flist->update_ctx = fts_backend_update_init(flist->backend); + flist->update_ctx_refcount++; + return 0; +} + +static int fts_mail_index(struct mail *_mail) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(_mail->box->list); + struct mail_private *pmail = (struct mail_private *)_mail; + + if (ft->failure_reason != NULL) + return -1; + + if (!ft->precached) { + if (fts_mail_precache_init(_mail) < 0) + return -1; + } + if (pmail->vmail != NULL) { + /* Indexing via virtual mailbox: Index all the mails in this + same real mailbox. */ + uint32_t msgs_count = + mail_index_view_get_messages_count(_mail->box->view); + + fts_backend_update_set_mailbox(flist->update_ctx, _mail->box); + if (ft->next_index_seq > msgs_count) { + /* everything indexed already */ + return 0; + } else if (fts_mail_precache_range(_mail->transaction, + flist->update_ctx, + ft->next_index_seq, + msgs_count, + &ft->precache_extra_count) < 0) { + return -1; + } else { + ft->next_index_seq = msgs_count+1; + return 0; + } + } + + if (ft->next_index_seq < _mail->seq) { + /* we'll first need to index all the missing mails up to the + current one. */ + fts_backend_update_set_mailbox(flist->update_ctx, _mail->box); + if (fts_mail_precache_range(_mail->transaction, + flist->update_ctx, + ft->next_index_seq, + _mail->seq-1, + &ft->precache_extra_count) < 0) + return -1; + ft->next_index_seq = _mail->seq; + } + + if (ft->next_index_seq == _mail->seq) { + fts_backend_update_set_mailbox(flist->update_ctx, _mail->box); + if (fts_build_mail(flist->update_ctx, _mail) < 0) + return -1; + ft->next_index_seq = _mail->seq + 1; + } + return 0; +} + +static int fts_mail_precache(struct mail *_mail) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail); + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + int ret = 0; + + fmail->module_ctx.super.precache(_mail); + if (fmail->virtual_mail) { + if (ft->highest_virtual_uid < _mail->uid) + ft->highest_virtual_uid = _mail->uid; + } else if (!ft->indexing) T_BEGIN { + /* avoid recursing here from fts_mail_precache_range() */ + struct event_reason *reason = + event_reason_begin("fts:index"); + ft->indexing = TRUE; + ret = fts_mail_index(_mail); + i_assert(ft->indexing); + ft->indexing = FALSE; + event_reason_end(&reason); + } T_END; + return ret; +} + +void fts_mail_allocated(struct mail *_mail) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct mail_vfuncs *v = mail->vlast; + struct fts_mailbox *fbox = FTS_CONTEXT(_mail->box); + struct fts_mail *fmail; + + if (fbox == NULL) + return; + + fmail = p_new(mail->pool, struct fts_mail, 1); + fmail->module_ctx.super = *v; + mail->vlast = &fmail->module_ctx.super; + fmail->virtual_mail = _mail->box->virtual_vfuncs != NULL; + + v->get_special = fts_mail_get_special; + v->precache = fts_mail_precache; + MODULE_CONTEXT_SET(mail, fts_mail_module, fmail); +} + +static struct mailbox_transaction_context * +fts_transaction_begin(struct mailbox *box, + enum mailbox_transaction_flags flags, + const char *reason) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + struct mailbox_transaction_context *t; + struct fts_transaction_context *ft; + + ft = i_new(struct fts_transaction_context, 1); + + t = fbox->module_ctx.super.transaction_begin(box, flags, reason); + MODULE_CONTEXT_SET(t, fts_storage_module, ft); + return t; +} + +static int fts_transaction_end(struct mailbox_transaction_context *t, const char **error_r) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(t->box->list); + int ret = 0; + + if (ft->failure_reason != NULL) { + *error_r = t_strdup(ft->failure_reason); + ret = -1; + } + + struct event_reason *reason = event_reason_begin("fts:index"); + if (ft->precached) { + i_assert(flist->update_ctx_refcount > 0); + if (--flist->update_ctx_refcount == 0) { + if (fts_backend_update_deinit(&flist->update_ctx) < 0) { + ret = -1; + *error_r = "backend deinit"; + } + } + } else if (ft->highest_virtual_uid > 0) { + if (fts_index_set_last_uid(t->box, ft->highest_virtual_uid) < 0) { + ret = -1; + *error_r = "index last uid setting"; + } + } + if (ft->scores != NULL) + fts_scores_unref(&ft->scores); + if (ft->precache_extra_count > 0) { + if (ret < 0) { + i_error("fts: Failed after indexing %u extra mails internally in %s: %s", + ft->precache_extra_count, t->box->vname, *error_r); + } else { + i_info("fts: Indexed %u extra mails internally in %s", + ft->precache_extra_count, t->box->vname); + } + } + event_reason_end(&reason); + i_free(ft); + return ret; +} + +static void fts_transaction_rollback(struct mailbox_transaction_context *t) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box); + const char *error; + + (void)fts_transaction_end(t, &error); + fbox->module_ctx.super.transaction_rollback(t); +} + +static void fts_queue_index(struct mailbox *box) +{ + struct mail_user *user = box->storage->user; + string_t *str = t_str_new(256); + const char *path, *value; + unsigned int max_recent_msgs; + int fd; + + path = t_strconcat(user->set->base_dir, "/"INDEXER_SOCKET_NAME, NULL); + fd = net_connect_unix(path); + if (fd == -1) { + i_error("net_connect_unix(%s) failed: %m", path); + return; + } + + value = mail_user_plugin_getenv(user, "fts_autoindex_max_recent_msgs"); + if (value == NULL || str_to_uint(value, &max_recent_msgs) < 0) + max_recent_msgs = 0; + + str_append(str, INDEXER_HANDSHAKE); + str_append(str, "APPEND\t0\t"); + str_append_tabescaped(str, user->username); + str_append_c(str, '\t'); + str_append_tabescaped(str, box->vname); + str_printfa(str, "\t%u", max_recent_msgs); + str_append_c(str, '\t'); + str_append_tabescaped(str, box->storage->user->session_id); + str_append_c(str, '\n'); + if (write_full(fd, str_data(str), str_len(str)) < 0) + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); +} + +static int +fts_transaction_commit(struct mailbox_transaction_context *t, + struct mail_transaction_commit_changes *changes_r) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box); + struct mailbox *box = t->box; + bool autoindex; + int ret = 0; + const char *error; + + autoindex = ft->mails_saved && !fbox->fts_mailbox_excluded && + mail_user_plugin_getenv_bool(box->storage->user, + "fts_autoindex"); + + if (fts_transaction_end(t, &error) < 0) { + mail_storage_set_error(t->box->storage, MAIL_ERROR_TEMP, + t_strdup_printf("FTS transaction commit failed: %s", + error)); + ret = -1; + } + if (fbox->module_ctx.super.transaction_commit(t, changes_r) < 0) + ret = -1; + if (ret < 0) + return -1; + + if (autoindex) + fts_queue_index(box); + return 0; +} + +static void fts_mailbox_sync_notify(struct mailbox *box, uint32_t uid, + enum mailbox_sync_type sync_type) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + + if (fbox->module_ctx.super.sync_notify != NULL) + fbox->module_ctx.super.sync_notify(box, uid, sync_type); + + if (sync_type != MAILBOX_SYNC_TYPE_EXPUNGE) { + if (uid == 0 && fbox->sync_update_ctx != NULL) { + /* this sync is finished */ + (void)fts_backend_update_deinit(&fbox->sync_update_ctx); + } + return; + } + + if (fbox->sync_update_ctx == NULL) { + if (fts_backend_is_updating(flist->backend)) { + /* FIXME: maildir workaround - we could get here + because we're building an index, which doesn't find + some mail and starts syncing the mailbox.. */ + return; + } + fbox->sync_update_ctx = fts_backend_update_init(flist->backend); + fts_backend_update_set_mailbox(fbox->sync_update_ctx, box); + } + fts_backend_update_expunge(fbox->sync_update_ctx, uid); +} + +static int fts_sync_deinit(struct mailbox_sync_context *ctx, + struct mailbox_sync_status *status_r) +{ + struct mailbox *box = ctx->box; + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(box->list); + bool optimize; + int ret = 0; + + optimize = (ctx->flags & (MAILBOX_SYNC_FLAG_FORCE_RESYNC | + MAILBOX_SYNC_FLAG_OPTIMIZE)) != 0; + if (fbox->module_ctx.super.sync_deinit(ctx, status_r) < 0) + return -1; + ctx = NULL; + + if (optimize) { + i_assert(flist != NULL); + if (fts_backend_optimize(flist->backend) < 0) { + mailbox_set_critical(box, "FTS optimize failed"); + ret = -1; + } + } + return ret; +} + +static int fts_save_finish(struct mail_save_context *ctx) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + + if (fbox->module_ctx.super.save_finish(ctx) < 0) + return -1; + ft->mails_saved = TRUE; + return 0; +} + +static int fts_copy(struct mail_save_context *ctx, struct mail *mail) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + + if (fbox->module_ctx.super.copy(ctx, mail) < 0) + return -1; + ft->mails_saved = TRUE; + return 0; +} + +static void fts_mailbox_virtual_match_mail(struct mail_search_context *ctx, + struct mail *mail) +{ + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + unsigned int idx, be_last_uid; + + if (fctx == NULL || !fctx->fts_lookup_success || !fctx->virtual_mailbox || + ctx->seq < fctx->first_unindexed_seq) + return; + /* Table of last indexed UID per backend mailbox */ + HASH_TABLE_TYPE(virtual_last_indexed) hash_tbl = + fctx->last_indexed_virtual_uids; + + struct mail *backend_mail; + if (mail->box->mail_vfuncs->get_backend_mail(mail, &backend_mail) < 0) + return; + const char *box_name = backend_mail->box->vname; + /* Get the last indexed UID in the backend mailbox */ + void *uid_value = + hash_table_lookup(fctx->last_indexed_virtual_uids, box_name); + if (uid_value == NULL) { + /* This backend's last indexed uid is not yet inserted to the table */ + struct fts_mailbox_list *flist = + FTS_LIST_CONTEXT(backend_mail->box->list); + if (flist == NULL || flist->failed || + mailbox_open(backend_mail->box) < 0 || + fts_backend_get_last_uid(flist->backend, backend_mail->box, + &be_last_uid) < 0) { + be_last_uid = 0; + } else { + const char *vname_copy = + p_strdup(fctx->result_pool, backend_mail->box->vname); + hash_table_insert(hash_tbl, vname_copy, + POINTER_CAST(be_last_uid + 1)); + } + } else { + be_last_uid = POINTER_CAST_TO(uid_value, uint32_t) - 1; + } + if (backend_mail->uid <= be_last_uid) { + /* Mail was already indexed in the backend mailbox. + Apply [non]matches based on the FTS lookup results */ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + + if (fctx->next_unindexed_seq == mail->seq) { + fctx->next_unindexed_seq++; + ft->highest_virtual_uid = mail->uid; + } + idx = 0; + fts_search_apply_results_level(ctx, ctx->args->args, &idx); + } else { + fctx->virtual_seen_unindexed_gaps = TRUE; + } +} + +static int fts_mailbox_search_next_match_mail(struct mail_search_context *ctx, + struct mail *mail) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + + fts_mailbox_virtual_match_mail(ctx, mail); + return fbox->module_ctx.super.search_next_match_mail(ctx, mail); +} + +void fts_mailbox_allocated(struct mailbox *box) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(box->list); + struct mailbox_vfuncs *v = box->vlast; + struct fts_mailbox *fbox; + + if (flist == NULL || flist->failed) + return; + + fbox = p_new(box->pool, struct fts_mailbox, 1); + fbox->module_ctx.super = *v; + box->vlast = &fbox->module_ctx.super; + fbox->fts_mailbox_excluded = fts_user_autoindex_exclude(box); + + v->get_status = fts_mailbox_get_status; + v->search_init = fts_mailbox_search_init; + v->search_next_nonblock = fts_mailbox_search_next_nonblock; + v->search_next_update_seq = fts_mailbox_search_next_update_seq; + v->search_deinit = fts_mailbox_search_deinit; + v->transaction_begin = fts_transaction_begin; + v->transaction_rollback = fts_transaction_rollback; + v->transaction_commit = fts_transaction_commit; + v->sync_notify = fts_mailbox_sync_notify; + v->sync_deinit = fts_sync_deinit; + v->save_finish = fts_save_finish; + v->copy = fts_copy; + v->search_next_match_mail = fts_mailbox_search_next_match_mail; + + MODULE_CONTEXT_SET(box, fts_storage_module, fbox); +} + +static void fts_mailbox_list_deinit(struct mailbox_list *list) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(list); + + if (flist->backend != NULL) + fts_backend_deinit(&flist->backend); + flist->module_ctx.super.deinit(list); +} + +static int +fts_init_namespace(struct fts_mailbox_list *flist, struct mail_namespace *ns, + const char **error_r) +{ + struct fts_backend *backend; + if (fts_backend_init(flist->backend_name, ns, error_r, &backend) < 0) { + flist->failed = TRUE; + return -1; + } + flist->backend = backend; + if ((flist->backend->flags & FTS_BACKEND_FLAG_FUZZY_SEARCH) != 0) + ns->user->fuzzy_search = TRUE; + return 0; +} + +void fts_mail_namespaces_added(struct mail_namespace *ns) +{ + while(ns != NULL) { + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(ns->list); + const char *error; + + if (flist != NULL && !flist->failed && flist->backend == NULL && + fts_init_namespace(flist, ns, &error) < 0) { + i_error("fts: Failed to initialize backend '%s': %s", + flist->backend_name, error); + } + ns = ns->next; + } +} + +void +fts_mailbox_list_created(struct mailbox_list *list) +{ + const char *name = mail_user_plugin_getenv(list->ns->user, "fts"); + const char *path; + + if (name == NULL || name[0] == '\0') { + e_debug(list->ns->user->event, + "fts: No fts setting - plugin disabled"); + return; + } + + if (!mailbox_list_get_root_path(list, MAILBOX_LIST_PATH_TYPE_INDEX, &path)) { + e_debug(list->ns->user->event, + "fts: Indexes disabled for namespace '%s'", + list->ns->prefix); + return; + } + + struct fts_mailbox_list *flist; + struct mailbox_list_vfuncs *v = list->vlast; + + flist = p_new(list->pool, struct fts_mailbox_list, 1); + flist->module_ctx.super = *v; + flist->backend_name = name; + list->vlast = &flist->module_ctx.super; + v->deinit = fts_mailbox_list_deinit; + MODULE_CONTEXT_SET(list, fts_mailbox_list_module, flist); +} + +struct fts_backend *fts_mailbox_backend(struct mailbox *box) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list); + + return flist->backend; +} + +struct fts_backend *fts_list_backend(struct mailbox_list *list) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(list); + + return flist == NULL ? NULL : flist->backend; +} diff --git a/src/plugins/fts/fts-storage.h b/src/plugins/fts/fts-storage.h new file mode 100644 index 0000000..ea28ed2 --- /dev/null +++ b/src/plugins/fts/fts-storage.h @@ -0,0 +1,70 @@ +#ifndef FTS_STORAGE_H +#define FTS_STORAGE_H + +#include "mail-storage-private.h" +#include "fts-api.h" + +enum fts_enforced { + FTS_ENFORCED_NO, + FTS_ENFORCED_YES, + FTS_ENFORCED_BODY, +}; + +struct fts_scores { + int refcount; + ARRAY_TYPE(fts_score_map) score_map; +}; + +struct fts_search_level { + ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs; + buffer_t *args_matches; + ARRAY_TYPE(fts_score_map) score_map; +}; + +HASH_TABLE_DEFINE_TYPE(virtual_last_indexed, const char *, void *); + +struct fts_search_context { + union mail_search_module_context module_ctx; + + struct fts_backend *backend; + struct mailbox *box; + struct mailbox_transaction_context *t; + struct mail_search_args *args; + enum fts_lookup_flags flags; + enum fts_enforced enforced; + + pool_t result_pool; + ARRAY(struct fts_search_level) levels; + buffer_t *orig_matches; + + uint32_t first_unindexed_seq; + uint32_t next_unindexed_seq; + HASH_TABLE_TYPE(virtual_last_indexed) last_indexed_virtual_uids; + + /* final scores, combined from all levels */ + struct fts_scores *scores; + + struct fts_indexer_context *indexer_ctx; + struct fts_search_state *search_state; + + bool virtual_mailbox:1; + bool fts_lookup_success:1; + bool indexing_timed_out:1; + bool virtual_seen_unindexed_gaps:1; +}; + +/* Figure out if we want to use full text search indexes and update + backends in fctx accordingly. */ +void fts_search_analyze(struct fts_search_context *fctx); +/* Perform the actual index lookup and update definite_uids and maybe_uids. */ +void fts_search_lookup(struct fts_search_context *fctx); +/* Returns FTS backend for the given mailbox (assumes it has one). */ +struct fts_backend *fts_mailbox_backend(struct mailbox *box); +/* Returns FTS backend for the given mailbox list, or NULL if it has none. */ +struct fts_backend *fts_list_backend(struct mailbox_list *list); + +void fts_mail_allocated(struct mail *mail); +void fts_mail_namespaces_added(struct mail_namespace *ns); +void fts_mailbox_allocated(struct mailbox *box); +void fts_mailbox_list_created(struct mailbox_list *list); +#endif diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c new file mode 100644 index 0000000..3c813cd --- /dev/null +++ b/src/plugins/fts/fts-user.c @@ -0,0 +1,423 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "module-context.h" +#include "mail-user.h" +#include "mail-storage-private.h" +#include "mailbox-match-plugin.h" +#include "fts-language.h" +#include "fts-filter.h" +#include "fts-tokenizer.h" +#include "fts-user.h" + +#define FTS_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_user_module) + +struct fts_user { + union mail_user_module_context module_ctx; + int refcount; + + struct fts_language_list *lang_list; + struct fts_user_language *data_lang; + ARRAY_TYPE(fts_user_language) languages, data_languages; + + struct mailbox_match_plugin *autoindex_exclude; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_user_module, + &mail_user_module_register); + +static const char *const *str_keyvalues_to_array(const char *str) +{ + const char *key, *value, *const *keyvalues; + ARRAY_TYPE(const_string) arr; + unsigned int i; + + if (str == NULL) + return NULL; + + t_array_init(&arr, 8); + keyvalues = t_strsplit_spaces(str, " "); + for (i = 0; keyvalues[i] != NULL; i++) { + value = strchr(keyvalues[i], '='); + if (value != NULL) + key = t_strdup_until(keyvalues[i], value++); + else { + key = keyvalues[i]; + value = ""; + } + array_push_back(&arr, &key); + array_push_back(&arr, &value); + } + array_append_zero(&arr); + return array_front(&arr); +} + +static int +fts_user_init_languages(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + const char *languages, *unknown; + const char *lang_config[3] = {NULL, NULL, NULL}; + + languages = mail_user_plugin_getenv(user, "fts_languages"); + if (languages == NULL) { + *error_r = "fts_languages setting is missing"; + return -1; + } + + lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config"); + if (lang_config[1] != NULL) + lang_config[0] = "fts_language_config"; + if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0) + return -1; + + if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) { + *error_r = t_strdup_printf( + "fts_languages: Unknown language '%s'", unknown); + return -1; + } + if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) { + *error_r = "fts_languages setting is empty"; + return -1; + } + return 0; +} + +static int +fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, + struct fts_filter **filter_r, const char **error_r) +{ + const struct fts_filter *filter_class; + struct fts_filter *filter = NULL, *parent = NULL; + const char *filters_key, *const *filters, *filter_set_name; + const char *str, *error, *set_key; + unsigned int i; + int ret = 0; + + /* try to get the language-specific filters first */ + filters_key = t_strconcat("fts_filters_", lang->name, NULL); + str = mail_user_plugin_getenv(user, filters_key); + if (str == NULL) { + /* fallback to global filters */ + filters_key = "fts_filters"; + str = mail_user_plugin_getenv(user, filters_key); + if (str == NULL) { + /* No filters */ + *filter_r = NULL; + return 0; + } + } + + filters = t_strsplit_spaces(str, " "); + for (i = 0; filters[i] != NULL; i++) { + filter_class = fts_filter_find(filters[i]); + if (filter_class == NULL) { + *error_r = t_strdup_printf("%s: Unknown filter '%s'", + filters_key, filters[i]); + ret = -1; + break; + } + + /* try the language-specific setting first */ + filter_set_name = t_str_replace(filters[i], '-', '_'); + set_key = t_strdup_printf("fts_filter_%s_%s", + lang->name, filter_set_name); + str = mail_user_plugin_getenv(user, set_key); + if (str == NULL) { + set_key = t_strdup_printf("fts_filter_%s", filter_set_name); + str = mail_user_plugin_getenv(user, set_key); + } + + if (fts_filter_create(filter_class, parent, lang, + str_keyvalues_to_array(str), + &filter, &error) < 0) { + *error_r = t_strdup_printf("%s: %s", set_key, error); + ret = -1; + break; + } + if (parent != NULL) + fts_filter_unref(&parent); + parent = filter; + } + if (ret < 0) { + if (parent != NULL) + fts_filter_unref(&parent); + return -1; + } + *filter_r = filter; + return 0; +} + +static int +fts_user_create_tokenizer(struct mail_user *user, + const struct fts_language *lang, + struct fts_tokenizer **tokenizer_r, bool search, + const char **error_r) +{ + const struct fts_tokenizer *tokenizer_class; + struct fts_tokenizer *tokenizer = NULL, *parent = NULL; + const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name; + const char *str, *error, *set_key; + unsigned int i; + int ret = 0; + + tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL); + str = mail_user_plugin_getenv(user, tokenizers_key); + if (str == NULL) { + str = mail_user_plugin_getenv(user, "fts_tokenizers"); + if (str == NULL) { + *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key); + return -1; + } + tokenizers_key = "fts_tokenizers"; + } + + tokenizers = t_strsplit_spaces(str, " "); + + for (i = 0; tokenizers[i] != NULL; i++) { + tokenizer_class = fts_tokenizer_find(tokenizers[i]); + if (tokenizer_class == NULL) { + *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'", + tokenizers_key, tokenizers[i]); + ret = -1; + break; + } + + tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_'); + set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name); + str = mail_user_plugin_getenv(user, set_key); + if (str == NULL) { + set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name); + str = mail_user_plugin_getenv(user, set_key); + } + + /* tell the tokenizers that we're tokenizing a search string + (instead of tokenizing indexed data) */ + if (search) + str = t_strconcat("search=yes ", str, NULL); + + if (fts_tokenizer_create(tokenizer_class, parent, + str_keyvalues_to_array(str), + &tokenizer, &error) < 0) { + *error_r = t_strdup_printf("%s: %s", set_key, error); + ret = -1; + break; + } + if (parent != NULL) + fts_tokenizer_unref(&parent); + parent = tokenizer; + } + if (ret < 0) { + if (parent != NULL) + fts_tokenizer_unref(&parent); + return -1; + } + *tokenizer_r = tokenizer; + return 0; +} + +static int +fts_user_language_init_tokenizers(struct mail_user *user, + struct fts_user_language *user_lang, + const char **error_r) +{ + if (fts_user_create_tokenizer(user, user_lang->lang, + &user_lang->index_tokenizer, FALSE, + error_r) < 0) + return -1; + + if (fts_user_create_tokenizer(user, user_lang->lang, + &user_lang->search_tokenizer, TRUE, + error_r) < 0) + return -1; + return 0; +} + +struct fts_user_language * +fts_user_language_find(struct mail_user *user, + const struct fts_language *lang) +{ + struct fts_user_language *user_lang; + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + array_foreach_elem(&fuser->languages, user_lang) { + if (strcmp(user_lang->lang->name, lang->name) == 0) + return user_lang; + } + return NULL; +} + +static int fts_user_language_create(struct mail_user *user, + struct fts_user *fuser, + const struct fts_language *lang, + const char **error_r) +{ + struct fts_user_language *user_lang; + + user_lang = p_new(user->pool, struct fts_user_language, 1); + user_lang->lang = lang; + array_push_back(&fuser->languages, &user_lang); + + if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) + return -1; + if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0) + return -1; + return 0; +} + +static int fts_user_languages_fill_all(struct mail_user *user, + struct fts_user *fuser, + const char **error_r) +{ + const struct fts_language *lang; + + array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) { + if (fts_user_language_create(user, fuser, lang, error_r) < 0) + return -1; + } + return 0; +} + +static int +fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + struct fts_user_language *user_lang; + const char *error; + + user_lang = p_new(user->pool, struct fts_user_language, 1); + user_lang->lang = &fts_language_data; + + if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) + return -1; + + if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL, + &user_lang->filter, &error) < 0) + i_unreached(); + i_assert(user_lang->filter != NULL); + + p_array_init(&fuser->data_languages, user->pool, 1); + array_push_back(&fuser->data_languages, &user_lang); + array_push_back(&fuser->languages, &user_lang); + + fuser->data_lang = user_lang; + return 0; +} + +struct fts_language_list *fts_user_get_language_list(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return fuser->lang_list; +} + +const ARRAY_TYPE(fts_user_language) * +fts_user_get_all_languages(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return &fuser->languages; +} + +const ARRAY_TYPE(fts_user_language) * +fts_user_get_data_languages(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return &fuser->data_languages; +} + +struct fts_user_language *fts_user_get_data_lang(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return fuser->data_lang; +} + +bool fts_user_autoindex_exclude(struct mailbox *box) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user); + + return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box); +} + +static void fts_user_language_free(struct fts_user_language *user_lang) +{ + if (user_lang->filter != NULL) + fts_filter_unref(&user_lang->filter); + if (user_lang->index_tokenizer != NULL) + fts_tokenizer_unref(&user_lang->index_tokenizer); + if (user_lang->search_tokenizer != NULL) + fts_tokenizer_unref(&user_lang->search_tokenizer); +} + +static void fts_user_free(struct fts_user *fuser) +{ + struct fts_user_language *user_lang; + + if (fuser->lang_list != NULL) + fts_language_list_deinit(&fuser->lang_list); + + if (array_is_created(&fuser->languages)) { + array_foreach_elem(&fuser->languages, user_lang) + fts_user_language_free(user_lang); + } + mailbox_match_plugin_deinit(&fuser->autoindex_exclude); +} + +static int +fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + p_array_init(&fuser->languages, user->pool, 4); + + if (fts_user_init_languages(user, fuser, error_r) < 0 || + fts_user_init_data_language(user, fuser, error_r) < 0) + return -1; + if (fts_user_languages_fill_all(user, fuser, error_r) < 0) + return -1; + return 0; +} + +int fts_mail_user_init(struct mail_user *user, bool initialize_libfts, + const char **error_r) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + if (fuser != NULL) { + /* multiple fts plugins are loaded */ + fuser->refcount++; + return 0; + } + + fuser = p_new(user->pool, struct fts_user, 1); + fuser->refcount = 1; + if (initialize_libfts) { + if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) { + fts_user_free(fuser); + return -1; + } + } + fuser->autoindex_exclude = + mailbox_match_plugin_init(user, "fts_autoindex_exclude"); + + MODULE_CONTEXT_SET(user, fts_user_module, fuser); + return 0; +} + +void fts_mail_user_deinit(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + if (fuser != NULL) { + i_assert(fuser->refcount > 0); + if (--fuser->refcount == 0) + fts_user_free(fuser); + } +} diff --git a/src/plugins/fts/fts-user.h b/src/plugins/fts/fts-user.h new file mode 100644 index 0000000..043f4e1 --- /dev/null +++ b/src/plugins/fts/fts-user.h @@ -0,0 +1,27 @@ +#ifndef FTS_USER_H +#define FTS_USER_H + +struct fts_user_language { + const struct fts_language *lang; + struct fts_filter *filter; + struct fts_tokenizer *index_tokenizer, *search_tokenizer; +}; +ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *); + +struct fts_user_language * +fts_user_language_find(struct mail_user *user, + const struct fts_language *lang); +struct fts_language_list *fts_user_get_language_list(struct mail_user *user); +const ARRAY_TYPE(fts_user_language) * +fts_user_get_all_languages(struct mail_user *user); +struct fts_user_language *fts_user_get_data_lang(struct mail_user *user); +const ARRAY_TYPE(fts_user_language) * +fts_user_get_data_languages(struct mail_user *user); + +bool fts_user_autoindex_exclude(struct mailbox *box); + +int fts_mail_user_init(struct mail_user *user, bool initialize_libfts, + const char **error_r); +void fts_mail_user_deinit(struct mail_user *user); + +#endif diff --git a/src/plugins/fts/xml2text.c b/src/plugins/fts/xml2text.c new file mode 100644 index 0000000..f3c573c --- /dev/null +++ b/src/plugins/fts/xml2text.c @@ -0,0 +1,44 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "message-parser.h" +#include "fts-parser.h" + +#include <unistd.h> + +int main(void) +{ + struct fts_parser *parser; + unsigned char buf[IO_BLOCK_SIZE]; + struct message_block block; + ssize_t ret; + struct fts_parser_context parser_context = {.content_type = "text/html"}; + + lib_init(); + + parser = fts_parser_html.try_init(&parser_context); + i_assert(parser != NULL); + + i_zero(&block); + while ((ret = read(STDIN_FILENO, buf, sizeof(buf))) > 0) { + block.data = buf; + block.size = ret; + parser->v.more(parser, &block); + if (write(STDOUT_FILENO, block.data, block.size) < 0) + i_fatal("write(stdout) failed: %m"); + } + if (ret < 0) + i_fatal("read(stdin) failed: %m"); + + for (;;) { + block.size = 0; + parser->v.more(parser, &block); + if (block.size == 0) + break; + if (write(STDOUT_FILENO, block.data, block.size) < 0) + i_fatal("write(stdout) failed: %m"); + } + + lib_deinit(); + return 0; +} |