diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
commit | f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch) | |
tree | a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/plugins/fts | |
parent | Initial commit. (diff) | |
download | dovecot-upstream.tar.xz dovecot-upstream.zip |
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
66 files changed, 21328 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/Makefile.am b/src/plugins/fts-lucene/Makefile.am new file mode 100644 index 0000000..d68e6ae --- /dev/null +++ b/src/plugins/fts-lucene/Makefile.am @@ -0,0 +1,61 @@ +doveadm_moduledir = $(moduledir)/doveadm + +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts \ + -I$(top_srcdir)/src/doveadm + +AM_CXXFLAGS = \ + $(CLUCENE_CFLAGS) \ + $(LIBEXTTEXTCAT_CFLAGS) + +NOPLUGIN_LDFLAGS = +lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib21_fts_lucene_plugin.la + +if BUILD_FTS_STEMMER +STEMMER_LIBS = -lstemmer +SHOWBALL_SOURCES = Snowball.cc +endif + +if BUILD_FTS_EXTTEXTCAT +TEXTCAT_LIBS = $(LIBEXTTEXTCAT_LIBS) +else +if BUILD_FTS_TEXTCAT +TEXTCAT_LIBS = -ltextcat +endif +endif + +lib21_fts_lucene_plugin_la_LIBADD = \ + $(CLUCENE_LIBS) $(TEXTCAT_LIBS) $(STEMMER_LIBS) + +lib21_fts_lucene_plugin_la_SOURCES = \ + fts-lucene-plugin.c \ + fts-backend-lucene.c \ + lucene-wrapper.cc \ + $(SHOWBALL_SOURCES) + +noinst_HEADERS = \ + fts-lucene-plugin.h \ + lucene-wrapper.h \ + SnowballAnalyzer.h \ + SnowballFilter.h + +if BUILD_FTS_TEXTCAT +exampledir = $(docdir)/example-config +example_DATA = \ + textcat.conf +endif +EXTRA_DIST = textcat.conf + +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_lucene_plugin.la + +lib20_doveadm_fts_lucene_plugin_la_SOURCES = \ + doveadm-fts-lucene.c diff --git a/src/plugins/fts-lucene/Makefile.in b/src/plugins/fts-lucene/Makefile.in new file mode 100644 index 0000000..323982d --- /dev/null +++ b/src/plugins/fts-lucene/Makefile.in @@ -0,0 +1,990 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/plugins/fts-lucene +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \ + $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \ + $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \ + $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \ + $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \ + $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \ + $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \ + $(top_srcdir)/m4/flexible_array_member.m4 \ + $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \ + $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \ + $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \ + $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \ + $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \ + $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \ + $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \ + $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \ + $(top_srcdir)/m4/pr_set_dumpable.m4 \ + $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \ + $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \ + $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \ + $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \ + $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \ + $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \ + $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \ + $(top_srcdir)/m4/typeof_dev_t.m4 \ + $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \ + $(top_srcdir)/m4/want_apparmor.m4 \ + $(top_srcdir)/m4/want_bsdauth.m4 \ + $(top_srcdir)/m4/want_bzlib.m4 \ + $(top_srcdir)/m4/want_cassandra.m4 \ + $(top_srcdir)/m4/want_cdb.m4 \ + $(top_srcdir)/m4/want_checkpassword.m4 \ + $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \ + $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \ + $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \ + $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \ + $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \ + $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \ + $(top_srcdir)/m4/want_prefetch.m4 \ + $(top_srcdir)/m4/want_shadow.m4 \ + $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \ + $(top_srcdir)/m4/want_sqlite.m4 \ + $(top_srcdir)/m4/want_stemmer.m4 \ + $(top_srcdir)/m4/want_systemd.m4 \ + $(top_srcdir)/m4/want_textcat.m4 \ + $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \ + $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(doveadm_moduledir)" \ + "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(exampledir)" +LTLIBRARIES = $(doveadm_module_LTLIBRARIES) $(module_LTLIBRARIES) +lib20_doveadm_fts_lucene_plugin_la_LIBADD = +am_lib20_doveadm_fts_lucene_plugin_la_OBJECTS = doveadm-fts-lucene.lo +lib20_doveadm_fts_lucene_plugin_la_OBJECTS = \ + $(am_lib20_doveadm_fts_lucene_plugin_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +lib20_doveadm_fts_lucene_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(lib20_doveadm_fts_lucene_plugin_la_LDFLAGS) $(LDFLAGS) -o $@ +am__DEPENDENCIES_1 = +@BUILD_FTS_EXTTEXTCAT_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +lib21_fts_lucene_plugin_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1) +am__lib21_fts_lucene_plugin_la_SOURCES_DIST = fts-lucene-plugin.c \ + fts-backend-lucene.c lucene-wrapper.cc Snowball.cc +@BUILD_FTS_STEMMER_TRUE@am__objects_1 = Snowball.lo +am_lib21_fts_lucene_plugin_la_OBJECTS = fts-lucene-plugin.lo \ + fts-backend-lucene.lo lucene-wrapper.lo $(am__objects_1) +lib21_fts_lucene_plugin_la_OBJECTS = \ + $(am_lib21_fts_lucene_plugin_la_OBJECTS) +lib21_fts_lucene_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(lib21_fts_lucene_plugin_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/Snowball.Plo \ + ./$(DEPDIR)/doveadm-fts-lucene.Plo \ + ./$(DEPDIR)/fts-backend-lucene.Plo \ + ./$(DEPDIR)/fts-lucene-plugin.Plo \ + ./$(DEPDIR)/lucene-wrapper.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(lib20_doveadm_fts_lucene_plugin_la_SOURCES) \ + $(lib21_fts_lucene_plugin_la_SOURCES) +DIST_SOURCES = $(lib20_doveadm_fts_lucene_plugin_la_SOURCES) \ + $(am__lib21_fts_lucene_plugin_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(example_DATA) +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPARMOR_LIBS = @APPARMOR_LIBS@ +AR = @AR@ +AUTH_CFLAGS = @AUTH_CFLAGS@ +AUTH_LIBS = @AUTH_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINARY_CFLAGS = @BINARY_CFLAGS@ +BINARY_LDFLAGS = @BINARY_LDFLAGS@ +BISON = @BISON@ +CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@ +CASSANDRA_LIBS = @CASSANDRA_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CDB_LIBS = @CDB_LIBS@ +CFLAGS = @CFLAGS@ +CLUCENE_CFLAGS = @CLUCENE_CFLAGS@ +CLUCENE_LIBS = @CLUCENE_LIBS@ +COMPRESS_LIBS = @COMPRESS_LIBS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRYPT_LIBS = @CRYPT_LIBS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DICT_LIBS = @DICT_LIBS@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLEX = @FLEX@ +FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@ +FUZZER_LDFLAGS = @FUZZER_LDFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KRB5CONFIG = @KRB5CONFIG@ +KRB5_CFLAGS = @KRB5_CFLAGS@ +KRB5_LIBS = @KRB5_LIBS@ +LD = @LD@ +LDAP_LIBS = @LDAP_LIBS@ +LDFLAGS = @LDFLAGS@ +LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@ +LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@ +LIBCAP = @LIBCAP@ +LIBDOVECOT = @LIBDOVECOT@ +LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@ +LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@ +LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@ +LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@ +LIBDOVECOT_LDA = @LIBDOVECOT_LDA@ +LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@ +LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@ +LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@ +LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@ +LIBDOVECOT_LUA = @LIBDOVECOT_LUA@ +LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@ +LIBDOVECOT_SQL = @LIBDOVECOT_SQL@ +LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@ +LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@ +LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@ +LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@ +LIBICONV = @LIBICONV@ +LIBICU_CFLAGS = @LIBICU_CFLAGS@ +LIBICU_LIBS = @LIBICU_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@ +LIBSODIUM_LIBS = @LIBSODIUM_LIBS@ +LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@ +LIBTIRPC_LIBS = @LIBTIRPC_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIBWRAP_LIBS = @LIBWRAP_LIBS@ +LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LUA_CFLAGS = @LUA_CFLAGS@ +LUA_LIBS = @LUA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_LIBS = @MODULE_LIBS@ +MODULE_SUFFIX = @MODULE_SUFFIX@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_CONFIG = @MYSQL_CONFIG@ +MYSQL_LIBS = @MYSQL_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOPLUGIN_LDFLAGS = +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PANDOC = @PANDOC@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PG_CONFIG = @PG_CONFIG@ +PIE_CFLAGS = @PIE_CFLAGS@ +PIE_LDFLAGS = @PIE_LDFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUOTA_LIBS = @QUOTA_LIBS@ +RANLIB = @RANLIB@ +RELRO_LDFLAGS = @RELRO_LDFLAGS@ +RPCGEN = @RPCGEN@ +RUN_TEST = @RUN_TEST@ +SED = @SED@ +SETTING_FILES = @SETTING_FILES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SQLITE_CFLAGS = @SQLITE_CFLAGS@ +SQLITE_LIBS = @SQLITE_LIBS@ +SQL_CFLAGS = @SQL_CFLAGS@ +SQL_LIBS = @SQL_LIBS@ +SSL_CFLAGS = @SSL_CFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND = @VALGRIND@ +VERSION = @VERSION@ +ZSTD_CFLAGS = @ZSTD_CFLAGS@ +ZSTD_LIBS = @ZSTD_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dict_drivers = @dict_drivers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +rundir = @rundir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sql_drivers = @sql_drivers@ +srcdir = @srcdir@ +ssldir = @ssldir@ +statedir = @statedir@ +sysconfdir = @sysconfdir@ +systemdservicetype = @systemdservicetype@ +systemdsystemunitdir = @systemdsystemunitdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +doveadm_moduledir = $(moduledir)/doveadm +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts \ + -I$(top_srcdir)/src/doveadm + +AM_CXXFLAGS = \ + $(CLUCENE_CFLAGS) \ + $(LIBEXTTEXTCAT_CFLAGS) + +lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +module_LTLIBRARIES = \ + lib21_fts_lucene_plugin.la + +@BUILD_FTS_STEMMER_TRUE@STEMMER_LIBS = -lstemmer +@BUILD_FTS_STEMMER_TRUE@SHOWBALL_SOURCES = Snowball.cc +@BUILD_FTS_EXTTEXTCAT_FALSE@@BUILD_FTS_TEXTCAT_TRUE@TEXTCAT_LIBS = -ltextcat +@BUILD_FTS_EXTTEXTCAT_TRUE@TEXTCAT_LIBS = $(LIBEXTTEXTCAT_LIBS) +lib21_fts_lucene_plugin_la_LIBADD = \ + $(CLUCENE_LIBS) $(TEXTCAT_LIBS) $(STEMMER_LIBS) + +lib21_fts_lucene_plugin_la_SOURCES = \ + fts-lucene-plugin.c \ + fts-backend-lucene.c \ + lucene-wrapper.cc \ + $(SHOWBALL_SOURCES) + +noinst_HEADERS = \ + fts-lucene-plugin.h \ + lucene-wrapper.h \ + SnowballAnalyzer.h \ + SnowballFilter.h + +@BUILD_FTS_TEXTCAT_TRUE@exampledir = $(docdir)/example-config +@BUILD_FTS_TEXTCAT_TRUE@example_DATA = \ +@BUILD_FTS_TEXTCAT_TRUE@ textcat.conf + +EXTRA_DIST = textcat.conf +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_lucene_plugin.la + +lib20_doveadm_fts_lucene_plugin_la_SOURCES = \ + doveadm-fts-lucene.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts-lucene/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/fts-lucene/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-doveadm_moduleLTLIBRARIES: $(doveadm_module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(doveadm_moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(doveadm_moduledir)"; \ + } + +uninstall-doveadm_moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(doveadm_moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(doveadm_moduledir)/$$f"; \ + done + +clean-doveadm_moduleLTLIBRARIES: + -test -z "$(doveadm_module_LTLIBRARIES)" || rm -f $(doveadm_module_LTLIBRARIES) + @list='$(doveadm_module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +lib20_doveadm_fts_lucene_plugin.la: $(lib20_doveadm_fts_lucene_plugin_la_OBJECTS) $(lib20_doveadm_fts_lucene_plugin_la_DEPENDENCIES) $(EXTRA_lib20_doveadm_fts_lucene_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib20_doveadm_fts_lucene_plugin_la_LINK) -rpath $(doveadm_moduledir) $(lib20_doveadm_fts_lucene_plugin_la_OBJECTS) $(lib20_doveadm_fts_lucene_plugin_la_LIBADD) $(LIBS) + +lib21_fts_lucene_plugin.la: $(lib21_fts_lucene_plugin_la_OBJECTS) $(lib21_fts_lucene_plugin_la_DEPENDENCIES) $(EXTRA_lib21_fts_lucene_plugin_la_DEPENDENCIES) + $(AM_V_CXXLD)$(lib21_fts_lucene_plugin_la_LINK) -rpath $(moduledir) $(lib21_fts_lucene_plugin_la_OBJECTS) $(lib21_fts_lucene_plugin_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Snowball.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-fts-lucene.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-backend-lucene.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-lucene-plugin.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lucene-wrapper.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-exampleDATA: $(example_DATA) + @$(NORMAL_INSTALL) + @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(exampledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(exampledir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(exampledir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(exampledir)" || exit $$?; \ + done + +uninstall-exampleDATA: + @$(NORMAL_UNINSTALL) + @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(exampledir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(exampledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/Snowball.Plo + -rm -f ./$(DEPDIR)/doveadm-fts-lucene.Plo + -rm -f ./$(DEPDIR)/fts-backend-lucene.Plo + -rm -f ./$(DEPDIR)/fts-lucene-plugin.Plo + -rm -f ./$(DEPDIR)/lucene-wrapper.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-doveadm_moduleLTLIBRARIES install-exampleDATA \ + install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/Snowball.Plo + -rm -f ./$(DEPDIR)/doveadm-fts-lucene.Plo + -rm -f ./$(DEPDIR)/fts-backend-lucene.Plo + -rm -f ./$(DEPDIR)/fts-lucene-plugin.Plo + -rm -f ./$(DEPDIR)/lucene-wrapper.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-doveadm_moduleLTLIBRARIES \ + uninstall-exampleDATA uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am \ + install-doveadm_moduleLTLIBRARIES install-dvi install-dvi-am \ + install-exampleDATA install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-doveadm_moduleLTLIBRARIES uninstall-exampleDATA \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/fts-lucene/Snowball.cc b/src/plugins/fts-lucene/Snowball.cc new file mode 100644 index 0000000..43b54e3 --- /dev/null +++ b/src/plugins/fts-lucene/Snowball.cc @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include <CLucene.h> +#include "SnowballAnalyzer.h" +#include "SnowballFilter.h" +#include <CLucene/util/CLStreams.h> +#include <CLucene/analysis/Analyzers.h> +#include <CLucene/analysis/standard/StandardTokenizer.h> +#include <CLucene/analysis/standard/StandardFilter.h> + +extern "C" { +#include "lib.h" +#include "buffer.h" +#include "unichar.h" +#include "lucene-wrapper.h" +}; + +CL_NS_USE(analysis) +CL_NS_USE(util) +CL_NS_USE2(analysis,standard) + +CL_NS_DEF2(analysis,snowball) + + /** Builds the named analyzer with no stop words. */ + SnowballAnalyzer::SnowballAnalyzer(normalizer_func_t *_normalizer, const char* _language) + : language(i_strdup(_language)), + normalizer(_normalizer), + stopSet(NULL), + prevstream(NULL) + { + } + + SnowballAnalyzer::~SnowballAnalyzer() + { + if (prevstream) + _CLDELETE(prevstream); + i_free(language); + if ( stopSet != NULL ) + _CLDELETE(stopSet); + } + + /** Builds the named analyzer with the given stop words. + */ + SnowballAnalyzer::SnowballAnalyzer(const char* language, const TCHAR** stopWords) + : language(i_strdup(language)), + normalizer(NULL), + stopSet(_CLNEW CLTCSetList(true)), + prevstream(NULL) + { + StopFilter::fillStopTable(stopSet,stopWords); + } + + TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { + return this->tokenStream(fieldName,reader,false); + } + + /** Constructs a {@link StandardTokenizer} filtered by a {@link + StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ + TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader) { + BufferedReader* bufferedReader = reader->__asBufferedReader(); + TokenStream* result; + + if ( bufferedReader == NULL ) + result = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, deleteReader), true ); + else + result = _CLNEW StandardTokenizer(bufferedReader, deleteReader); + + result = _CLNEW StandardFilter(result, true); + result = _CLNEW CL_NS(analysis)::LowerCaseFilter(result, true); + if (stopSet != NULL) + result = _CLNEW CL_NS(analysis)::StopFilter(result, true, stopSet); + result = _CLNEW SnowballFilter(result, normalizer, language, true); + return result; + } + + TokenStream* SnowballAnalyzer::reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { + if (prevstream) _CLDELETE(prevstream); + prevstream = this->tokenStream(fieldName, reader); + return prevstream; + } + + + + + + + /** Construct the named stemming filter. + * + * @param in the input tokens to stem + * @param name the name of a stemmer + */ + SnowballFilter::SnowballFilter(TokenStream* in, normalizer_func_t *normalizer, const char* language, bool deleteTS): + TokenFilter(in,deleteTS) + { + stemmer = sb_stemmer_new(language, NULL); //use utf8 encoding + this->normalizer = normalizer; + + if ( stemmer == NULL ){ + _CLTHROWA(CL_ERR_IllegalArgument, "language not available for stemming\n"); //todo: richer error + } + } + + SnowballFilter::~SnowballFilter(){ + sb_stemmer_delete(stemmer); + } + + /** Returns the next input Token, after being stemmed */ + Token* SnowballFilter::next(Token* token){ + if (input->next(token) == NULL) + return NULL; + + unsigned char utf8text[LUCENE_MAX_WORD_LEN*5+1]; + unsigned int len = I_MIN(LUCENE_MAX_WORD_LEN, token->termLength()); + + buffer_t buf = { { 0, 0 } }; + i_assert(sizeof(wchar_t) == sizeof(unichar_t)); + buffer_create_from_data(&buf, utf8text, sizeof(utf8text)); + uni_ucs4_to_utf8((const unichar_t *)token->termBuffer(), len, &buf); + + const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8text, buf.used); + if ( stemmed == NULL ) + _CLTHROWA(CL_ERR_Runtime,"Out of memory"); + + int stemmedLen=sb_stemmer_length(stemmer); + + if (normalizer == NULL) { + unsigned int tchartext_size = + uni_utf8_strlen_n(stemmed, stemmedLen) + 1; + TCHAR tchartext[tchartext_size]; + lucene_utf8_n_to_tchar(stemmed, stemmedLen, tchartext, tchartext_size); + token->set(tchartext,token->startOffset(), token->endOffset(), token->type()); + } else T_BEGIN { + buffer_t *norm_buf = t_buffer_create(stemmedLen); + normalizer(stemmed, stemmedLen, norm_buf); + + unsigned int tchartext_size = + uni_utf8_strlen_n(norm_buf->data, norm_buf->used) + 1; + TCHAR tchartext[tchartext_size]; + lucene_utf8_n_to_tchar((const unsigned char *)norm_buf->data, + norm_buf->used, tchartext, tchartext_size); + token->set(tchartext,token->startOffset(), token->endOffset(), token->type()); + } T_END; + return token; + } + + +CL_NS_END2 diff --git a/src/plugins/fts-lucene/SnowballAnalyzer.h b/src/plugins/fts-lucene/SnowballAnalyzer.h new file mode 100644 index 0000000..45455c5 --- /dev/null +++ b/src/plugins/fts-lucene/SnowballAnalyzer.h @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_analysis_snowball_analyser_ +#define _lucene_analysis_snowball_analyser_ + +extern "C" { +#include "lib.h" +#include "unichar.h" +}; +#include "CLucene/analysis/AnalysisHeader.h" + +CL_CLASS_DEF(util,BufferedReader) +CL_NS_DEF2(analysis,snowball) + +/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link + * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}. + * + * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a + * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in + * {@link EnglishStemmer} is named "English". + */ +class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer { + char* language; + normalizer_func_t *normalizer; + CLTCSetList* stopSet; + TokenStream *prevstream; + +public: + /** Builds the named analyzer with no stop words. */ + SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english"); + + /** Builds the named analyzer with the given stop words. + */ + SnowballAnalyzer(const char* language, const TCHAR** stopWords); + + ~SnowballAnalyzer(); + + /** Constructs a {@link StandardTokenizer} filtered by a {@link + StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader); + TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); +}; + +CL_NS_END2 +#endif + diff --git a/src/plugins/fts-lucene/SnowballFilter.h b/src/plugins/fts-lucene/SnowballFilter.h new file mode 100644 index 0000000..6a0ed12 --- /dev/null +++ b/src/plugins/fts-lucene/SnowballFilter.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_analysis_snowball_filter_ +#define _lucene_analysis_snowball_filter_ + +#include "CLucene/analysis/AnalysisHeader.h" +#include "libstemmer.h" + +CL_NS_DEF2(analysis,snowball) + +/** A filter that stems words using a Snowball-generated stemmer. + * + * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a + * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in + * {@link EnglishStemmer} is named "English". + * + * Note: todo: This is not thread safe... + */ +class CLUCENE_CONTRIBS_EXPORT SnowballFilter: public TokenFilter { + struct sb_stemmer * stemmer; + normalizer_func_t *normalizer; +public: + + /** Construct the named stemming filter. + * + * @param in the input tokens to stem + * @param name the name of a stemmer + */ + SnowballFilter(TokenStream* in, normalizer_func_t *normalizer, const char* language, bool deleteTS); + + ~SnowballFilter(); + + /** Returns the next input Token, after being stemmed */ + Token* next(Token* token); +}; + +CL_NS_END2 +#endif diff --git a/src/plugins/fts-lucene/doveadm-fts-lucene.c b/src/plugins/fts-lucene/doveadm-fts-lucene.c new file mode 100644 index 0000000..a761907 --- /dev/null +++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c @@ -0,0 +1,70 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "doveadm-dump.h" +#include "doveadm-fts.h" +#include "lucene-wrapper.h" + +#include <stdio.h> +#include <sys/stat.h> + +const char *doveadm_fts_lucene_plugin_version = DOVECOT_ABI_VERSION; + +void doveadm_fts_lucene_plugin_init(struct module *module); +void doveadm_fts_lucene_plugin_deinit(void); + +static void +cmd_dump_fts_lucene(const char *path, const char *const *args ATTR_UNUSED) +{ + struct lucene_index *index; + struct lucene_index_iter *iter; + guid_128_t prev_guid; + const struct lucene_index_record *rec; + bool first = TRUE; + + i_zero(&prev_guid); + index = lucene_index_init(path, NULL, NULL); + iter = lucene_index_iter_init(index); + while ((rec = lucene_index_iter_next(iter)) != NULL) { + if (memcmp(prev_guid, rec->mailbox_guid, + sizeof(prev_guid)) != 0) { + if (first) + first = FALSE; + else + printf("\n"); + memcpy(prev_guid, rec->mailbox_guid, sizeof(prev_guid)); + printf("%s: ", guid_128_to_string(prev_guid)); + } + printf("%u", rec->uid); + if (rec->part_num != 0) + printf("[%u]", rec->part_num); + printf("\n"); + } + printf("\n"); + if (lucene_index_iter_deinit(&iter) < 0) + i_error("Lucene index iteration failed"); + lucene_index_deinit(index); +} + +static bool test_dump_fts_lucene(const char *path) +{ + struct stat st; + + path = t_strconcat(path, "/segments.gen", NULL); + return stat(path, &st) == 0; +} + +static const struct doveadm_cmd_dump doveadm_cmd_dump_fts_lucene = { + "fts-lucene", + test_dump_fts_lucene, + cmd_dump_fts_lucene +}; + +void doveadm_fts_lucene_plugin_init(struct module *module ATTR_UNUSED) +{ + doveadm_dump_register(&doveadm_cmd_dump_fts_lucene); +} + +void doveadm_fts_lucene_plugin_deinit(void) +{ +} diff --git a/src/plugins/fts-lucene/fts-backend-lucene.c b/src/plugins/fts-lucene/fts-backend-lucene.c new file mode 100644 index 0000000..963dbdf --- /dev/null +++ b/src/plugins/fts-lucene/fts-backend-lucene.c @@ -0,0 +1,605 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hash.h" +#include "hex-binary.h" +#include "strescape.h" +#include "message-part.h" +#include "mail-namespace.h" +#include "mail-storage-private.h" +#include "fts-expunge-log.h" +#include "lucene-wrapper.h" +#include "fts-indexer.h" +#include "fts-lucene-plugin.h" + +#include <wchar.h> + +#define LUCENE_INDEX_DIR_NAME "lucene-indexes" +#define LUCENE_EXPUNGE_LOG_NAME "dovecot-expunges.log" +#define LUCENE_OPTIMIZE_BATCH_MSGS_COUNT 100 + +struct lucene_fts_backend { + struct fts_backend backend; + char *dir_path; + + struct lucene_index *index; + struct mailbox *selected_box; + unsigned int selected_box_generation; + guid_128_t selected_box_guid; + + struct fts_expunge_log *expunge_log; + + bool dir_created:1; + bool updating:1; +}; + +struct lucene_fts_backend_update_context { + struct fts_backend_update_context ctx; + + struct mailbox *box; + uint32_t last_uid; + uint32_t last_indexed_uid; + char *first_box_vname; + + uint32_t uid, part_num; + char *hdr_name; + + unsigned int added_msgs; + struct fts_expunge_log_append_ctx *expunge_ctx; + + bool lucene_opened; + bool last_indexed_uid_set; + bool mime_parts; +}; + +static int fts_backend_lucene_mkdir(struct lucene_fts_backend *backend) +{ + if (backend->dir_created) + return 0; + + backend->dir_created = TRUE; + if (mailbox_list_mkdir_root(backend->backend.ns->list, + backend->dir_path, + MAILBOX_LIST_PATH_TYPE_INDEX) < 0) + return -1; + return 0; +} + +static int +fts_lucene_get_mailbox_guid(struct mailbox *box, guid_128_t guid_r) +{ + struct mailbox_metadata metadata; + + if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, + &metadata) < 0) { + i_error("lucene: Couldn't get mailbox %s GUID: %s", + box->vname, mailbox_get_last_internal_error(box, NULL)); + return -1; + } + memcpy(guid_r, metadata.guid, GUID_128_SIZE); + return 0; +} + +static int +fts_backend_select(struct lucene_fts_backend *backend, struct mailbox *box) +{ + guid_128_t guid; + unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH]; + wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH]; + buffer_t buf; + unsigned int i; + + i_assert(box != NULL); + + if (backend->selected_box == box && + backend->selected_box_generation == box->generation_sequence) + return 0; + + if (fts_lucene_get_mailbox_guid(box, guid) < 0) + return -1; + buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH); + binary_to_hex_append(&buf, guid, GUID_128_SIZE); + for (i = 0; i < N_ELEMENTS(wguid_hex); i++) + wguid_hex[i] = guid_hex[i]; + + lucene_index_select_mailbox(backend->index, wguid_hex); + + backend->selected_box = box; + memcpy(backend->selected_box_guid, guid, + sizeof(backend->selected_box_guid)); + backend->selected_box_generation = box->generation_sequence; + return 0; +} + +static struct fts_backend *fts_backend_lucene_alloc(void) +{ + struct lucene_fts_backend *backend; + + backend = i_new(struct lucene_fts_backend, 1); + backend->backend = fts_backend_lucene; + return &backend->backend; +} + +static int +fts_backend_lucene_init(struct fts_backend *_backend, const char **error_r) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + struct fts_lucene_user *fuser = + FTS_LUCENE_USER_CONTEXT(_backend->ns->user); + const char *path; + + if (fuser == NULL) { + /* invalid settings */ + *error_r = "Invalid fts_lucene settings"; + return -1; + } + /* fts already checked that index exists */ + + if (fuser->set.use_libfts) { + /* change our flags so we get proper input */ + _backend->flags &= ENUM_NEGATE(FTS_BACKEND_FLAG_FUZZY_SEARCH); + _backend->flags |= FTS_BACKEND_FLAG_TOKENIZED_INPUT; + } + path = mailbox_list_get_root_forced(_backend->ns->list, + MAILBOX_LIST_PATH_TYPE_INDEX); + + backend->dir_path = i_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL); + backend->index = lucene_index_init(backend->dir_path, + _backend->ns->list, + &fuser->set); + + path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL); + backend->expunge_log = fts_expunge_log_init(path); + return 0; +} + +static void fts_backend_lucene_deinit(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + + if (backend->index != NULL) + lucene_index_deinit(backend->index); + if (backend->expunge_log != NULL) + fts_expunge_log_deinit(&backend->expunge_log); + i_free(backend->dir_path); + i_free(backend); +} + +static int +fts_backend_lucene_get_last_uid(struct fts_backend *_backend, + struct mailbox *box, uint32_t *last_uid_r) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + struct fts_lucene_user *fuser = + FTS_LUCENE_USER_CONTEXT_REQUIRE(_backend->ns->user); + struct fts_index_header hdr; + uint32_t set_checksum; + int ret; + + if (fts_index_get_header(box, &hdr)) { + set_checksum = fts_lucene_settings_checksum(&fuser->set); + ret = fts_index_have_compatible_settings(_backend->ns->list, + set_checksum); + if (ret < 0) + return -1; + if (ret == 0) { + /* need to rebuild the index */ + *last_uid_r = 0; + } else { + *last_uid_r = hdr.last_indexed_uid; + } + return 0; + } + + /* either nothing has been indexed, or the index was corrupted. + do it the slow way. */ + if (fts_backend_select(backend, box) < 0) + return -1; + if (lucene_index_get_last_uid(backend->index, last_uid_r) < 0) + return -1; + + fts_index_set_last_uid(box, *last_uid_r); + return 0; +} + +static struct fts_backend_update_context * +fts_backend_lucene_update_init(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + struct lucene_fts_backend_update_context *ctx; + struct fts_lucene_user *fuser = + FTS_LUCENE_USER_CONTEXT_REQUIRE(_backend->ns->user); + + i_assert(!backend->updating); + + ctx = i_new(struct lucene_fts_backend_update_context, 1); + ctx->ctx.backend = _backend; + ctx->mime_parts = fuser->set.mime_parts; + backend->updating = TRUE; + return &ctx->ctx; +} + +static bool +fts_backend_lucene_need_optimize(struct lucene_fts_backend_update_context *ctx) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)ctx->ctx.backend; + unsigned int expunges; + uint32_t numdocs; + + if (ctx->added_msgs >= LUCENE_OPTIMIZE_BATCH_MSGS_COUNT) + return TRUE; + if (lucene_index_get_doc_count(backend->index, &numdocs) < 0) + return FALSE; + + if (fts_expunge_log_uid_count(backend->expunge_log, &expunges) < 0) + return FALSE; + return expunges > 0 && + numdocs / expunges <= 50; /* >2% of index has been expunged */ +} + +static int +fts_backend_lucene_update_deinit(struct fts_backend_update_context *_ctx) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + int ret = _ctx->failed ? -1 : 0; + + i_assert(backend->updating); + + backend->updating = FALSE; + if (ctx->lucene_opened) { + if (lucene_index_build_deinit(backend->index) < 0) + ret = -1; + } + + if (ctx->expunge_ctx != NULL) { + if (fts_expunge_log_append_commit(&ctx->expunge_ctx) < 0) { + struct stat st; + ret = -1; + + if (stat(backend->dir_path, &st) < 0 && errno == ENOENT) { + /* lucene-indexes directory doesn't even exist, + so dovecot.index's last_index_uid is wrong. + rescan to update them. */ + (void)lucene_index_rescan(backend->index); + ret = 0; + } + } + } + + if (fts_backend_lucene_need_optimize(ctx)) { + if (ctx->lucene_opened) + (void)fts_backend_optimize(_ctx->backend); + else if (ctx->first_box_vname != NULL) { + struct mail_user *user = backend->backend.ns->user; + const char *cmd, *path; + int fd; + + /* the optimize affects all mailboxes within namespace, + so just use any mailbox name in it */ + cmd = t_strdup_printf("OPTIMIZE\t0\t%s\t%s\n", + str_tabescape(user->username), + str_tabescape(ctx->first_box_vname)); + fd = fts_indexer_cmd(user, cmd, &path); + i_close_fd(&fd); + } + } + + i_free(ctx->first_box_vname); + i_free(ctx); + return ret; +} + +static void +fts_backend_lucene_update_set_mailbox(struct fts_backend_update_context *_ctx, + struct mailbox *box) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + + if (ctx->last_uid != 0) { + fts_index_set_last_uid(ctx->box, ctx->last_uid); + ctx->last_uid = 0; + } + if (ctx->first_box_vname == NULL && box != NULL) + ctx->first_box_vname = i_strdup(box->vname); + ctx->box = box; + ctx->last_indexed_uid_set = FALSE; +} + +static void +fts_backend_lucene_update_expunge(struct fts_backend_update_context *_ctx, + uint32_t uid) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + struct fts_index_header hdr; + + if (!ctx->last_indexed_uid_set) { + if (!fts_index_get_header(ctx->box, &hdr)) + ctx->last_indexed_uid = 0; + else + ctx->last_indexed_uid = hdr.last_indexed_uid; + ctx->last_indexed_uid_set = TRUE; + } + if (ctx->last_indexed_uid == 0 || + uid > ctx->last_indexed_uid + 100) { + /* don't waste time adding expunge to log for a message that + isn't even indexed. this check is racy, because indexer may + just be in the middle of indexing this message. we'll + attempt to avoid that by skipping the expunging only if + indexing hasn't been done for a while (100 msgs). */ + return; + } + + if (ctx->expunge_ctx == NULL) { + ctx->expunge_ctx = + fts_expunge_log_append_begin(backend->expunge_log); + } + + if (fts_backend_select(backend, ctx->box) < 0) + _ctx->failed = TRUE; + + fts_expunge_log_append_next(ctx->expunge_ctx, + backend->selected_box_guid, uid); +} + +static bool +fts_backend_lucene_update_set_build_key(struct fts_backend_update_context *_ctx, + const struct fts_backend_build_key *key) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + + if (!ctx->lucene_opened) { + if (fts_backend_lucene_mkdir(backend) < 0) + ctx->ctx.failed = TRUE; + if (lucene_index_build_init(backend->index) < 0) + ctx->ctx.failed = TRUE; + ctx->lucene_opened = TRUE; + } + + if (fts_backend_select(backend, ctx->box) < 0) + _ctx->failed = TRUE; + + switch (key->type) { + case FTS_BACKEND_BUILD_KEY_HDR: + case FTS_BACKEND_BUILD_KEY_MIME_HDR: + i_assert(key->hdr_name != NULL); + + i_free(ctx->hdr_name); + ctx->hdr_name = i_strdup(key->hdr_name); + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART: + i_free_and_null(ctx->hdr_name); + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY: + i_unreached(); + } + + if (key->uid != ctx->last_uid) { + i_assert(key->uid >= ctx->last_uid); + ctx->last_uid = key->uid; + ctx->added_msgs++; + } + + ctx->uid = key->uid; + if (ctx->mime_parts) + ctx->part_num = message_part_to_idx(key->part); + return TRUE; +} + +static void +fts_backend_lucene_update_unset_build_key(struct fts_backend_update_context *_ctx) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + + ctx->uid = 0; + ctx->part_num = 0; + i_free_and_null(ctx->hdr_name); +} + +static int +fts_backend_lucene_update_build_more(struct fts_backend_update_context *_ctx, + const unsigned char *data, size_t size) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + int ret; + + i_assert(ctx->uid != 0); + + if (_ctx->failed) + return -1; + + T_BEGIN { + ret = lucene_index_build_more(backend->index, ctx->uid, + ctx->part_num, data, size, + ctx->hdr_name); + } T_END; + return ret; +} + +static int +fts_backend_lucene_refresh(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + + if (backend->index != NULL) + lucene_index_close(backend->index); + return 0; +} + +static int fts_backend_lucene_rescan(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + + if (lucene_index_rescan(backend->index) < 0) + return -1; + return lucene_index_optimize(backend->index); +} + +static int fts_backend_lucene_optimize(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + int ret; + + ret = lucene_index_expunge_from_log(backend->index, + backend->expunge_log); + if (ret == 0) { + /* log was corrupted, need to rescan */ + ret = lucene_index_rescan(backend->index); + } + if (ret >= 0) + ret = lucene_index_optimize(backend->index); + return ret; +} + +static int +fts_backend_lucene_lookup(struct fts_backend *_backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + int ret; + + if (fts_backend_select(backend, box) < 0) + return -1; + T_BEGIN { + ret = lucene_index_lookup(backend->index, args, flags, result); + } T_END; + return ret; +} + +/* a char* hash function from ASU -- from glib */ +static unsigned int wstr_hash(const wchar_t *s) +{ + unsigned int g, h = 0; + + while (*s != '\0') { + h = (h << 4) + *s; + if ((g = h & 0xf0000000UL) != 0) { + h = h ^ (g >> 24); + h = h ^ g; + } + s++; + } + + return h; +} + +static int +mailboxes_get_guids(struct mailbox *const boxes[], + HASH_TABLE_TYPE(wguid_result) guids, + struct fts_multi_result *result) +{ + ARRAY(struct fts_result) box_results; + struct fts_result *box_result; + const char *guid; + wchar_t *guid_dup; + unsigned int i, j; + + p_array_init(&box_results, result->pool, 32); + /* first create the box_results - we'll be using pointers to them + later on and appending to the array changes the pointers */ + for (i = 0; boxes[i] != NULL; i++) { + box_result = array_append_space(&box_results); + box_result->box = boxes[i]; + } + for (i = 0; boxes[i] != NULL; i++) { + if (fts_mailbox_get_guid(boxes[i], &guid) < 0) + return -1; + + i_assert(strlen(guid) == MAILBOX_GUID_HEX_LENGTH); + guid_dup = t_new(wchar_t, MAILBOX_GUID_HEX_LENGTH + 1); + for (j = 0; j < MAILBOX_GUID_HEX_LENGTH; j++) + guid_dup[j] = guid[j]; + + box_result = array_idx_modifiable(&box_results, i); + hash_table_insert(guids, guid_dup, box_result); + } + + array_append_zero(&box_results); + result->box_results = array_front_modifiable(&box_results); + return 0; +} + +static int +fts_backend_lucene_lookup_multi(struct fts_backend *_backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + int ret; + + T_BEGIN { + HASH_TABLE_TYPE(wguid_result) guids; + + hash_table_create(&guids, default_pool, 0, wstr_hash, wcscmp); + ret = mailboxes_get_guids(boxes, guids, result); + if (ret == 0) { + ret = lucene_index_lookup_multi(backend->index, + guids, args, flags, + result); + } + hash_table_destroy(&guids); + } T_END; + return ret; +} + +static void fts_backend_lucene_lookup_done(struct fts_backend *_backend) +{ + /* the next refresh is going to close the index anyway, so we might as + well do it now */ + (void)fts_backend_lucene_refresh(_backend); +} + +struct fts_backend fts_backend_lucene = { + .name = "lucene", + .flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS | + FTS_BACKEND_FLAG_FUZZY_SEARCH, + + { + fts_backend_lucene_alloc, + fts_backend_lucene_init, + fts_backend_lucene_deinit, + fts_backend_lucene_get_last_uid, + fts_backend_lucene_update_init, + fts_backend_lucene_update_deinit, + fts_backend_lucene_update_set_mailbox, + fts_backend_lucene_update_expunge, + fts_backend_lucene_update_set_build_key, + fts_backend_lucene_update_unset_build_key, + fts_backend_lucene_update_build_more, + fts_backend_lucene_refresh, + fts_backend_lucene_rescan, + fts_backend_lucene_optimize, + fts_backend_default_can_lookup, + fts_backend_lucene_lookup, + fts_backend_lucene_lookup_multi, + fts_backend_lucene_lookup_done + } +}; diff --git a/src/plugins/fts-lucene/fts-lucene-plugin.c b/src/plugins/fts-lucene/fts-lucene-plugin.c new file mode 100644 index 0000000..7c58fa7 --- /dev/null +++ b/src/plugins/fts-lucene/fts-lucene-plugin.c @@ -0,0 +1,146 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "crc32.h" +#include "mail-storage-hooks.h" +#include "lucene-wrapper.h" +#include "fts-user.h" +#include "fts-lucene-plugin.h" + +const char *fts_lucene_plugin_version = DOVECOT_ABI_VERSION; + +struct fts_lucene_user_module fts_lucene_user_module = + MODULE_CONTEXT_INIT(&mail_user_module_register); + +static int +fts_lucene_plugin_init_settings(struct mail_user *user, + struct fts_lucene_settings *set, + const char *str) +{ + const char *const *tmp; + + for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) { + if (str_begins(*tmp, "default_language=")) { + set->default_language = + p_strdup(user->pool, *tmp + 17); + } else if (str_begins(*tmp, "textcat_conf=")) { + set->textcat_conf = p_strdup(user->pool, *tmp + 13); + } else if (str_begins(*tmp, "textcat_dir=")) { + set->textcat_dir = p_strdup(user->pool, *tmp + 12); + } else if (str_begins(*tmp, "whitespace_chars=")) { + set->whitespace_chars = p_strdup(user->pool, *tmp + 17); + } else if (strcmp(*tmp, "normalize") == 0) { + set->normalize = TRUE; + } else if (strcmp(*tmp, "no_snowball") == 0) { + set->no_snowball = TRUE; + } else if (strcmp(*tmp, "mime_parts") == 0) { + set->mime_parts = TRUE; + } else if (strcmp(*tmp, "use_libfts") == 0) { + set->use_libfts = TRUE; + } else { + i_error("fts_lucene: Invalid setting: %s", *tmp); + return -1; + } + } + if (set->textcat_conf != NULL && set->textcat_dir == NULL) { + i_error("fts_lucene: textcat_conf set, but textcat_dir unset"); + return -1; + } + if (set->textcat_conf == NULL && set->textcat_dir != NULL) { + i_error("fts_lucene: textcat_dir set, but textcat_conf unset"); + return -1; + } + if (set->whitespace_chars == NULL) + set->whitespace_chars = ""; +#ifndef HAVE_FTS_STEMMER + if (set->default_language != NULL) { + i_error("fts_lucene: default_language set, " + "but Dovecot built without stemmer support"); + return -1; + } +#else + if (set->default_language == NULL) + set->default_language = "english"; +#endif +#ifndef HAVE_FTS_TEXTCAT + if (set->textcat_conf != NULL) { + i_error("fts_lucene: textcat_dir set, " + "but Dovecot built without textcat support"); + return -1; + } +#endif + return 0; +} + +uint32_t fts_lucene_settings_checksum(const struct fts_lucene_settings *set) +{ + uint32_t crc; + + if (set->use_libfts) + return crc32_str("l"); + + /* checksum is always different when compiling with/without stemmer */ + crc = set->default_language == NULL ? 0 : + crc32_str(set->default_language); + crc = crc32_str_more(crc, set->whitespace_chars); + if (set->normalize) + crc = crc32_str_more(crc, "n"); + if (set->no_snowball) + crc = crc32_str_more(crc, "s"); + /* don't include mime_parts here, since changing it doesn't + necessarily need the index to be rebuilt */ + return crc; +} + +static void fts_lucene_mail_user_deinit(struct mail_user *user) +{ + struct fts_lucene_user *fuser = FTS_LUCENE_USER_CONTEXT_REQUIRE(user); + + fts_mail_user_deinit(user); + fuser->module_ctx.super.deinit(user); +} + +static void fts_lucene_mail_user_created(struct mail_user *user) +{ + struct mail_user_vfuncs *v = user->vlast; + struct fts_lucene_user *fuser; + const char *env, *error; + + fuser = p_new(user->pool, struct fts_lucene_user, 1); + env = mail_user_plugin_getenv(user, "fts_lucene"); + if (env == NULL) + env = ""; + + if (fts_lucene_plugin_init_settings(user, &fuser->set, env) < 0) { + /* invalid settings, disabling */ + return; + } + if (fts_mail_user_init(user, fuser->set.use_libfts, &error) < 0) { + i_error("fts_lucene: %s", error); + return; + } + + fuser->module_ctx.super = *v; + user->vlast = &fuser->module_ctx.super; + v->deinit = fts_lucene_mail_user_deinit; + MODULE_CONTEXT_SET(user, fts_lucene_user_module, fuser); +} + +static struct mail_storage_hooks fts_lucene_mail_storage_hooks = { + .mail_user_created = fts_lucene_mail_user_created +}; + +void fts_lucene_plugin_init(struct module *module ATTR_UNUSED) +{ + fts_backend_register(&fts_backend_lucene); + mail_storage_hooks_add(module, &fts_lucene_mail_storage_hooks); +} + +void fts_lucene_plugin_deinit(void) +{ + fts_backend_unregister(fts_backend_lucene.name); + mail_storage_hooks_remove(&fts_lucene_mail_storage_hooks); + lucene_shutdown(); +} + +const char *fts_lucene_plugin_dependencies[] = { "fts", NULL }; diff --git a/src/plugins/fts-lucene/fts-lucene-plugin.h b/src/plugins/fts-lucene/fts-lucene-plugin.h new file mode 100644 index 0000000..69440fb --- /dev/null +++ b/src/plugins/fts-lucene/fts-lucene-plugin.h @@ -0,0 +1,36 @@ +#ifndef FTS_LUCENE_PLUGIN_H +#define FTS_LUCENE_PLUGIN_H + +#include "module-context.h" +#include "mail-user.h" +#include "fts-api-private.h" + +#define FTS_LUCENE_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_lucene_user_module) +#define FTS_LUCENE_USER_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_lucene_user_module) + +struct fts_lucene_settings { + const char *default_language; + const char *textcat_conf, *textcat_dir; + const char *whitespace_chars; + bool normalize; + bool no_snowball; + bool mime_parts; + bool use_libfts; +}; + +struct fts_lucene_user { + union mail_user_module_context module_ctx; + struct fts_lucene_settings set; +}; + +extern struct fts_backend fts_backend_lucene; +extern MODULE_CONTEXT_DEFINE(fts_lucene_user_module, &mail_user_module_register); + +uint32_t fts_lucene_settings_checksum(const struct fts_lucene_settings *set); + +void fts_lucene_plugin_init(struct module *module); +void fts_lucene_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts-lucene/lucene-wrapper.cc b/src/plugins/fts-lucene/lucene-wrapper.cc new file mode 100644 index 0000000..7446693 --- /dev/null +++ b/src/plugins/fts-lucene/lucene-wrapper.cc @@ -0,0 +1,1639 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +extern "C" { +#include "lib.h" +#include "array.h" +#include "unichar.h" +#include "hash.h" +#include "hex-binary.h" +#include "ioloop.h" +#include "unlink-directory.h" +#include "ioloop.h" +#include "mail-index.h" +#include "mail-search.h" +#include "mail-namespace.h" +#include "mailbox-list-private.h" +#include "mail-storage.h" +#include "fts-expunge-log.h" +#include "fts-lucene-plugin.h" +#include "lucene-wrapper.h" + +#include <sys/stat.h> +#ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H +# include <libexttextcat/textcat.h> +#elif defined (HAVE_LIBTEXTCAT_TEXTCAT_H) +# include <libtextcat/textcat.h> +#elif defined (HAVE_FTS_TEXTCAT) +# include <textcat.h> +#endif +}; +#include <CLucene.h> +#include <CLucene/util/CLStreams.h> +#include <CLucene/search/MultiPhraseQuery.h> +#include "SnowballAnalyzer.h" + +/* Lucene's default is 10000. Use it here also.. */ +#define MAX_TERMS_PER_DOCUMENT 10000 +#define FTS_LUCENE_MAX_SEARCH_TERMS 1000 + +#define LUCENE_LOCK_OVERRIDE_SECS 60 +#define LUCENE_INDEX_CLOSE_TIMEOUT_MSECS (120*1000) + +using namespace lucene::document; +using namespace lucene::index; +using namespace lucene::search; +using namespace lucene::queryParser; +using namespace lucene::analysis; +using namespace lucene::analysis; +using namespace lucene::util; + +struct lucene_query { + Query *query; + BooleanClause::Occur occur; +}; +ARRAY_DEFINE_TYPE(lucene_query, struct lucene_query); + +struct lucene_analyzer { + char *lang; + Analyzer *analyzer; +}; + +struct lucene_index { + char *path; + struct mailbox_list *list; + struct fts_lucene_settings set; + normalizer_func_t *normalizer; + + wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1]; + + IndexReader *reader; + IndexWriter *writer; + IndexSearcher *searcher; + struct timeout *to_close; + + buffer_t *normalizer_buf; + Analyzer *default_analyzer, *cur_analyzer; + ARRAY(struct lucene_analyzer) analyzers; + + Document *doc; + uint32_t prev_uid, prev_part_idx; + bool no_analyzer; +}; + +struct rescan_context { + struct lucene_index *index; + + struct mailbox *box; + guid_128_t box_guid; + int box_ret; + + pool_t pool; + HASH_TABLE(uint8_t *, uint8_t *) seen_mailbox_guids; + + ARRAY_TYPE(seq_range) uids; + struct seq_range_iter uids_iter; + unsigned int uids_iter_n; + + uint32_t last_existing_uid; + bool warned; +}; + +static void *textcat = NULL; +#ifdef HAVE_FTS_TEXTCAT +static bool textcat_broken = FALSE; +#endif +static int textcat_refcount = 0; + +static void lucene_handle_error(struct lucene_index *index, CLuceneError &err, + const char *msg); +static void rescan_clear_unseen_mailboxes(struct lucene_index *index, + struct rescan_context *rescan_ctx); + +struct lucene_index *lucene_index_init(const char *path, + struct mailbox_list *list, + const struct fts_lucene_settings *set) +{ + struct lucene_index *index; + + index = i_new(struct lucene_index, 1); + index->path = i_strdup(path); + index->list = list; + if (set != NULL) { + index->set = *set; + index->normalizer = !set->normalize ? NULL : + mailbox_list_get_namespace(list)->user->default_normalizer; + } else { + /* this is valid only for doveadm dump, so it doesn't matter */ + index->set.default_language = ""; + } + if (index->set.use_libfts) { + index->default_analyzer = _CLNEW KeywordAnalyzer(); + } else +#ifdef HAVE_FTS_STEMMER + if (set == NULL || !set->no_snowball) { + index->default_analyzer = + _CLNEW snowball::SnowballAnalyzer(index->normalizer, + index->set.default_language); + } else +#endif + { + index->default_analyzer = _CLNEW standard::StandardAnalyzer(); + if (index->normalizer != NULL) { + index->normalizer_buf = + buffer_create_dynamic(default_pool, 1024); + } + } + + i_array_init(&index->analyzers, 32); + textcat_refcount++; + + return index; +} + +void lucene_index_close(struct lucene_index *index) +{ + timeout_remove(&index->to_close); + + _CLDELETE(index->searcher); + if (index->writer != NULL) { + try { + index->writer->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::close"); + } + _CLDELETE(index->writer); + } + if (index->reader != NULL) { + try { + index->reader->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexReader::close"); + } + _CLDELETE(index->reader); + } +} + +void lucene_index_deinit(struct lucene_index *index) +{ + struct lucene_analyzer *a; + + lucene_index_close(index); + array_foreach_modifiable(&index->analyzers, a) { + i_free(a->lang); + _CLDELETE(a->analyzer); + } + array_free(&index->analyzers); + if (--textcat_refcount == 0 && textcat != NULL) { +#ifdef HAVE_FTS_TEXTCAT + textcat_Done(textcat); +#endif + textcat = NULL; + } + _CLDELETE(index->default_analyzer); + if (index->normalizer_buf != NULL) + buffer_free(&index->normalizer_buf); + i_free(index->path); + i_free(index); +} + +static void lucene_data_translate(struct lucene_index *index, + wchar_t *data, unsigned int len) +{ + const char *whitespace_chars = index->set.whitespace_chars; + unsigned int i; + + if (*whitespace_chars == '\0' || index->set.use_libfts) + return; + + for (i = 0; i < len; i++) { + if (strchr(whitespace_chars, data[i]) != NULL) + data[i] = ' '; + } +} + +void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize, + wchar_t *dest, size_t destsize) +{ + ARRAY_TYPE(unichars) dest_arr; + buffer_t buf = { { 0, 0 } }; + + i_assert(sizeof(wchar_t) == sizeof(unichar_t)); + + buffer_create_from_data(&buf, dest, sizeof(wchar_t) * destsize); + array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t)); + if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0) + i_unreached(); + i_assert(array_count(&dest_arr)+1 == destsize); + dest[destsize-1] = 0; +} + +static const wchar_t * +t_lucene_utf8_to_tchar(struct lucene_index *index, const char *str) +{ + ARRAY_TYPE(unichars) dest_arr; + const unichar_t *chars; + wchar_t *ret; + unsigned int len; + + i_assert(sizeof(wchar_t) == sizeof(unichar_t)); + + t_array_init(&dest_arr, strlen(str) + 1); + if (uni_utf8_to_ucs4(str, &dest_arr) < 0) + i_unreached(); + (void)array_append_space(&dest_arr); + + chars = array_get_modifiable(&dest_arr, &len); + ret = (wchar_t *)chars; + lucene_data_translate(index, ret, len - 1); + return ret; +} + +void lucene_index_select_mailbox(struct lucene_index *index, + const wchar_t guid[MAILBOX_GUID_HEX_LENGTH]) +{ + memcpy(index->mailbox_guid, guid, + MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t)); + index->mailbox_guid[MAILBOX_GUID_HEX_LENGTH] = '\0'; +} + +void lucene_index_unselect_mailbox(struct lucene_index *index) +{ + memset(index->mailbox_guid, 0, sizeof(index->mailbox_guid)); +} + +static void lucene_handle_error(struct lucene_index *index, CLuceneError &err, + const char *msg) +{ + const char *error, *what = err.what(); + + i_error("lucene index %s: %s failed (#%d): %s", + index->path, msg, err.number(), what); + + if (index->list != NULL && + (err.number() == CL_ERR_CorruptIndex || + err.number() == CL_ERR_IO)) { + /* delete corrupted index. most IO errors are also about + missing files and other such corruption.. */ + if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0) + i_error("unlink_directory(%s) failed: %s", index->path, error); + rescan_clear_unseen_mailboxes(index, NULL); + } +} + +static int lucene_index_open(struct lucene_index *index) +{ + if (index->reader != NULL) { + i_assert(index->to_close != NULL); + timeout_reset(index->to_close); + return 1; + } + + if (!IndexReader::indexExists(index->path)) + return 0; + + try { + index->reader = IndexReader::open(index->path); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexReader::open()"); + return -1; + } + i_assert(index->to_close == NULL); + index->to_close = timeout_add(LUCENE_INDEX_CLOSE_TIMEOUT_MSECS, + lucene_index_close, index); + return 1; +} + +static int lucene_index_open_search(struct lucene_index *index) +{ + int ret; + + if (index->searcher != NULL) + return 1; + + if ((ret = lucene_index_open(index)) <= 0) + return ret; + + index->searcher = _CLNEW IndexSearcher(index->reader); + return 1; +} + +static int +lucene_doc_get_uid(struct lucene_index *index, Document *doc, uint32_t *uid_r) +{ + Field *field = doc->getField(_T("uid")); + const TCHAR *uid = field == NULL ? NULL : field->stringValue(); + if (uid == NULL) { + i_error("lucene: Corrupted FTS index %s: No UID for document", + index->path); + return -1; + } + + uint32_t num = 0; + while (*uid != 0) { + num = num*10 + (*uid - '0'); + uid++; + } + *uid_r = num; + return 0; +} + +static uint32_t +lucene_doc_get_part(struct lucene_index *index, Document *doc) +{ + Field *field = doc->getField(_T("part")); + const TCHAR *part = field == NULL ? NULL : field->stringValue(); + if (part == NULL) + return 0; + + uint32_t num = 0; + while (*part != 0) { + num = num*10 + (*part - '0'); + part++; + } + return num; +} + +int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r) +{ + int ret = 0; + + *last_uid_r = 0; + + if ((ret = lucene_index_open_search(index)) <= 0) + return ret; + + Term mailbox_term(_T("box"), index->mailbox_guid); + TermQuery query(&mailbox_term); + + uint32_t last_uid = 0; + try { + Hits *hits = index->searcher->search(&query); + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + if (uid > last_uid) + last_uid = uid; + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "last_uid search"); + ret = -1; + } + *last_uid_r = last_uid; + return ret; +} + +int lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r) +{ + int ret; + + if (index->reader == NULL) { + lucene_index_close(index); + if ((ret = lucene_index_open(index)) < 0) + return -1; + if (ret == 0) { + *count_r = 0; + return 0; + } + } + *count_r = index->reader->numDocs(); + return 0; +} + +static int lucene_settings_check(struct lucene_index *index) +{ + uint32_t set_checksum; + const char *error; + int ret = 0; + + set_checksum = fts_lucene_settings_checksum(&index->set); + ret = fts_index_have_compatible_settings(index->list, set_checksum); + if (ret != 0) + return ret; + + i_warning("fts-lucene: Settings have changed, rebuilding index for mailbox"); + + /* settings changed, rebuild index */ + if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0) { + i_error("unlink_directory(%s) failed: %s", index->path, error); + ret = -1; + } else { + rescan_clear_unseen_mailboxes(index, NULL); + } + return ret; +} + +int lucene_index_build_init(struct lucene_index *index) +{ + const char *lock_path; + struct stat st; + + lucene_index_close(index); + + lock_path = t_strdup_printf("%s/write.lock", index->path); + if (stat(lock_path, &st) == 0 && + st.st_mtime < time(NULL) - LUCENE_LOCK_OVERRIDE_SECS) { + if (unlink(lock_path) < 0) + i_error("unlink(%s) failed: %m", lock_path); + } + + if (lucene_settings_check(index) < 0) + return -1; + + bool exists = IndexReader::indexExists(index->path); + try { + index->writer = _CLNEW IndexWriter(index->path, + index->default_analyzer, + !exists); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter()"); + return -1; + } + index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT); + return 0; +} + +#ifdef HAVE_FTS_TEXTCAT +static Analyzer *get_analyzer(struct lucene_index *index, const char *lang) +{ + normalizer_func_t *normalizer = index->normalizer; + const struct lucene_analyzer *a; + struct lucene_analyzer new_analyzer; + Analyzer *analyzer; + + array_foreach(&index->analyzers, a) { + if (strcmp(a->lang, lang) == 0) + return a->analyzer; + } + + memset(&new_analyzer, 0, sizeof(new_analyzer)); + new_analyzer.lang = i_strdup(lang); + new_analyzer.analyzer = + _CLNEW snowball::SnowballAnalyzer(normalizer, lang); + array_append_i(&index->analyzers.arr, &new_analyzer, 1); + return new_analyzer.analyzer; +} + +static void *textcat_init(struct lucene_index *index) +{ + const char *textcat_dir = index->set.textcat_dir; + unsigned int len; + + if (textcat_dir == NULL) + return NULL; + + /* textcat really wants the '/' suffix */ + len = strlen(textcat_dir); + if (len > 0 && textcat_dir[len-1] != '/') + textcat_dir = t_strconcat(textcat_dir, "/", NULL); + + return special_textcat_Init(index->set.textcat_conf, textcat_dir); +} + +static Analyzer * +guess_analyzer(struct lucene_index *index, const void *data, size_t size) +{ + const char *lang; + + if (textcat_broken) + return NULL; + + if (textcat == NULL) { + textcat = textcat_init(index); + if (textcat == NULL) { + textcat_broken = TRUE; + return NULL; + } + } + + /* try to guess the language */ + lang = textcat_Classify(textcat, (const char *)data, + I_MIN(size, 500)); + const char *p = strchr(lang, ']'); + if (lang[0] != '[' || p == NULL) + return NULL; + lang = t_strdup_until(lang+1, p); + if (strcmp(lang, index->set.default_language) == 0) + return index->default_analyzer; + + return get_analyzer(index, lang); +} +#else +static Analyzer * +guess_analyzer(struct lucene_index *index ATTR_UNUSED, + const void *data ATTR_UNUSED, size_t size ATTR_UNUSED) +{ + return NULL; +} +#endif + +static int lucene_index_build_flush(struct lucene_index *index) +{ + int ret = 0; + + if (index->doc == NULL) + return 0; + + try { + CL_NS(analysis)::Analyzer *analyzer = NULL; + + if (!index->set.use_libfts) { + analyzer = index->cur_analyzer != NULL ? + index->cur_analyzer : index->default_analyzer; + } + index->writer->addDocument(index->doc, analyzer); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::addDocument()"); + ret = -1; + } + + _CLDELETE(index->doc); + index->doc = NULL; + index->cur_analyzer = NULL; + return ret; +} + +int lucene_index_build_more(struct lucene_index *index, uint32_t uid, + uint32_t part_idx, const unsigned char *data, + size_t size, const char *hdr_name) +{ + wchar_t id[MAX_INT_STRLEN]; + size_t namesize, datasize; + + if (uid != index->prev_uid || part_idx != index->prev_part_idx) { + if (lucene_index_build_flush(index) < 0) + return -1; + index->prev_uid = uid; + index->prev_part_idx = part_idx; + + index->doc = _CLNEW Document(); + swprintf(id, N_ELEMENTS(id), L"%u", uid); + index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + if (part_idx != 0) { + swprintf(id, N_ELEMENTS(id), L"%u", part_idx); + index->doc->add(*_CLNEW Field(_T("part"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + } + index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + } + + if (index->normalizer_buf != NULL && !index->set.use_libfts) { + buffer_set_used_size(index->normalizer_buf, 0); + index->normalizer(data, size, index->normalizer_buf); + data = (const unsigned char *)index->normalizer_buf->data; + size = index->normalizer_buf->used; + } + + datasize = uni_utf8_strlen_n(data, size) + 1; + wchar_t *dest, *dest_free = NULL; + if (datasize < 4096) + dest = t_new(wchar_t, datasize); + else + dest = dest_free = i_new(wchar_t, datasize); + lucene_utf8_n_to_tchar(data, size, dest, datasize); + lucene_data_translate(index, dest, datasize-1); + + int token_flag = index->set.use_libfts ? + Field::INDEX_UNTOKENIZED : Field::INDEX_TOKENIZED; + if (hdr_name != NULL) { + /* hdr_name should be ASCII, but don't break in case it isn't */ + hdr_name = t_str_lcase(hdr_name); + namesize = uni_utf8_strlen(hdr_name) + 1; + wchar_t wname[namesize]; + lucene_utf8_n_to_tchar((const unsigned char *)hdr_name, + strlen(hdr_name), wname, namesize); + if (!index->set.use_libfts) + index->doc->add(*_CLNEW Field(_T("hdr"), wname, Field::STORE_NO | token_flag)); + index->doc->add(*_CLNEW Field(_T("hdr"), dest, Field::STORE_NO | token_flag)); + + if (fts_header_want_indexed(hdr_name)) + index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | token_flag)); + } else if (size > 0) { + if (index->cur_analyzer == NULL && !index->set.use_libfts) + index->cur_analyzer = guess_analyzer(index, data, size); + index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | token_flag)); + } + i_free(dest_free); + return 0; +} + +int lucene_index_build_deinit(struct lucene_index *index) +{ + int ret = 0; + + if (index->prev_uid == 0) { + /* no changes. */ + return 0; + } + index->prev_uid = 0; + index->prev_part_idx = 0; + + if (index->writer == NULL) { + lucene_index_close(index); + return -1; + } + + if (lucene_index_build_flush(index) < 0) + ret = -1; + + try { + index->writer->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::close()"); + ret = -1; + } + + lucene_index_close(index); + return ret; +} + +static int +wcharguid_to_guid(guid_128_t dest, const wchar_t *src) +{ + buffer_t buf = { { 0, 0 } }; + char src_chars[GUID_128_SIZE*2 + 1]; + unsigned int i; + + for (i = 0; i < sizeof(src_chars)-1; i++) { + if ((src[i] >= '0' && src[i] <= '9') || + (src[i] >= 'a' && src[i] <= 'f')) + src_chars[i] = src[i]; + else + return -1; + } + if (src[i] != '\0') + return -1; + src_chars[i] = '\0'; + + buffer_create_from_data(&buf, dest, GUID_128_SIZE); + return hex_to_binary(src_chars, &buf); +} + +static int +rescan_get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids) +{ + struct mailbox_status status; + + if (mailbox_get_status(box, STATUS_MESSAGES, &status) < 0) + return -1; + + if (status.messages > 0) T_BEGIN { + ARRAY_TYPE(seq_range) seqs; + + t_array_init(&seqs, 2); + seq_range_array_add_range(&seqs, 1, status.messages); + mailbox_get_uid_range(box, &seqs, uids); + } T_END; + return 0; +} + +static int rescan_finish(struct rescan_context *ctx) +{ + int ret; + + ret = fts_index_set_last_uid(ctx->box, ctx->last_existing_uid); + mailbox_free(&ctx->box); + return ret; +} + +static int +fts_lucene_get_mailbox_guid(struct lucene_index *index, Document *doc, + guid_128_t guid_r) +{ + Field *field = doc->getField(_T("box")); + const TCHAR *box_guid = field == NULL ? NULL : field->stringValue(); + if (box_guid == NULL) { + i_error("lucene: Corrupted FTS index %s: No mailbox for document", + index->path); + return -1; + } + + if (wcharguid_to_guid(guid_r, box_guid) < 0) { + i_error("lucene: Corrupted FTS index %s: " + "box field not in expected format", index->path); + return -1; + } + return 0; +} + +static int +rescan_open_mailbox(struct rescan_context *ctx, Document *doc) +{ + guid_128_t guid, *guidp; + int ret; + + if (fts_lucene_get_mailbox_guid(ctx->index, doc, guid) < 0) + return 0; + + if (memcmp(guid, ctx->box_guid, sizeof(guid)) == 0) { + /* same as last one */ + return ctx->box_ret; + } + memcpy(ctx->box_guid, guid, sizeof(ctx->box_guid)); + + guidp = p_new(ctx->pool, guid_128_t, 1); + memcpy(guidp, guid, sizeof(*guidp)); + hash_table_insert(ctx->seen_mailbox_guids, guidp, guidp); + + if (ctx->box != NULL) + rescan_finish(ctx); + ctx->box = mailbox_alloc_guid(ctx->index->list, guid, + (enum mailbox_flags)0); + if (mailbox_open(ctx->box) < 0) { + enum mail_error error; + const char *errstr; + + errstr = mailbox_get_last_internal_error(ctx->box, &error); + if (error == MAIL_ERROR_NOTFOUND) + ret = 0; + else { + i_error("lucene: Couldn't open mailbox %s: %s", + mailbox_get_vname(ctx->box), errstr); + ret = -1; + } + mailbox_free(&ctx->box); + ctx->box_ret = ret; + return ret; + } + if (mailbox_sync(ctx->box, (enum mailbox_sync_flags)0) < 0) { + i_error("lucene: Failed to sync mailbox %s: %s", + mailbox_get_vname(ctx->box), + mailbox_get_last_internal_error(ctx->box, NULL)); + mailbox_free(&ctx->box); + ctx->box_ret = -1; + return -1; + } + + array_clear(&ctx->uids); + rescan_get_uids(ctx->box, &ctx->uids); + + ctx->warned = FALSE; + ctx->last_existing_uid = 0; + ctx->uids_iter_n = 0; + seq_range_array_iter_init(&ctx->uids_iter, &ctx->uids); + + ctx->box_ret = 1; + return 1; +} + +static int +rescan_next(struct rescan_context *ctx, Document *doc) +{ + uint32_t lucene_uid, idx_uid; + + if (lucene_doc_get_uid(ctx->index, doc, &lucene_uid) < 0) + return 0; + + if (seq_range_array_iter_nth(&ctx->uids_iter, ctx->uids_iter_n, + &idx_uid)) { + if (idx_uid == lucene_uid) { + ctx->uids_iter_n++; + ctx->last_existing_uid = idx_uid; + return 1; + } + if (idx_uid < lucene_uid) { + /* lucene is missing an UID from the middle. delete + the rest of the messages from this mailbox and + reindex. */ + if (!ctx->warned) { + i_warning("lucene: Mailbox %s " + "missing UIDs in the middle", + mailbox_get_vname(ctx->box)); + ctx->warned = TRUE; + } + } else { + /* UID has been expunged from index. delete from + lucene as well. */ + } + return 0; + } else { + /* the rest of the messages have been expunged from index */ + return 0; + } +} + +static void +rescan_clear_unseen_mailbox(struct lucene_index *index, + struct rescan_context *rescan_ctx, + const char *vname, + const struct fts_index_header *hdr) +{ + struct mailbox *box; + struct mailbox_metadata metadata; + + box = mailbox_alloc(index->list, vname, + (enum mailbox_flags)0); + if (mailbox_open(box) == 0 && + mailbox_get_metadata(box, MAILBOX_METADATA_GUID, + &metadata) == 0 && + (rescan_ctx == NULL || + hash_table_lookup(rescan_ctx->seen_mailbox_guids, + metadata.guid) == NULL)) { + /* this mailbox had no records in lucene index. + make sure its last indexed uid is 0 */ + (void)fts_index_set_header(box, hdr); + } + mailbox_free(&box); +} + +static void rescan_clear_unseen_mailboxes(struct lucene_index *index, + struct rescan_context *rescan_ctx) +{ + const enum mailbox_list_iter_flags iter_flags = + (enum mailbox_list_iter_flags) + (MAILBOX_LIST_ITER_NO_AUTO_BOXES | + MAILBOX_LIST_ITER_RETURN_NO_FLAGS); + struct mailbox_list_iterate_context *iter; + const struct mailbox_info *info; + struct fts_index_header hdr; + struct mail_namespace *ns = index->list->ns; + const char *vname; + + memset(&hdr, 0, sizeof(hdr)); + hdr.settings_checksum = fts_lucene_settings_checksum(&index->set); + + iter = mailbox_list_iter_init(index->list, "*", iter_flags); + while ((info = mailbox_list_iter_next(iter)) != NULL) + rescan_clear_unseen_mailbox(index, rescan_ctx, info->vname, &hdr); + (void)mailbox_list_iter_deinit(&iter); + + if (ns->prefix_len > 0 && + ns->prefix[ns->prefix_len-1] == mail_namespace_get_sep(ns)) { + /* namespace prefix itself isn't returned by the listing */ + vname = t_strndup(index->list->ns->prefix, + index->list->ns->prefix_len-1); + rescan_clear_unseen_mailbox(index, rescan_ctx, vname, &hdr); + } +} + +int lucene_index_rescan(struct lucene_index *index) +{ + static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL }; + struct rescan_context ctx; + bool failed = false; + int ret; + + i_assert(index->list != NULL); + + if ((ret = lucene_index_open_search(index)) < 0) + return ret; + + Term term(_T("box"), _T("*")); + WildcardQuery query(&term); + Sort sort(sort_fields); + + memset(&ctx, 0, sizeof(ctx)); + ctx.index = index; + ctx.pool = pool_alloconly_create("guids", 1024); + hash_table_create(&ctx.seen_mailbox_guids, ctx.pool, 0, + guid_128_hash, guid_128_cmp); + i_array_init(&ctx.uids, 128); + + if (ret > 0) try { + Hits *hits = index->searcher->search(&query, &sort); + + for (size_t i = 0; i < hits->length(); i++) { + ret = rescan_open_mailbox(&ctx, &hits->doc(i)); + if (ret > 0) + ret = rescan_next(&ctx, &hits->doc(i)); + if (ret < 0) + failed = true; + else if (ret == 0) + index->reader->deleteDocument(hits->id(i)); + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "rescan search"); + failed = true; + } + lucene_index_close(index); + if (ctx.box != NULL) + rescan_finish(&ctx); + array_free(&ctx.uids); + + rescan_clear_unseen_mailboxes(index, &ctx); + hash_table_destroy(&ctx.seen_mailbox_guids); + pool_unref(&ctx.pool); + return failed ? -1 : 0; +} + +static void guid128_to_wguid(const guid_128_t guid, + wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH + 1]) +{ + buffer_t buf = { { 0, 0 } }; + unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH]; + unsigned int i; + + buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH); + binary_to_hex_append(&buf, guid, GUID_128_SIZE); + for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++) + wguid_hex[i] = guid_hex[i]; + wguid_hex[i] = '\0'; +} + +static bool +lucene_index_add_uid_filter(BooleanQuery *query, + const struct fts_expunge_log_read_record *rec) +{ + struct seq_range_iter iter; + wchar_t wuid[MAX_INT_STRLEN]; + unsigned int n; + uint32_t uid; + + /* RangeQuery and WildcardQuery work by enumerating through all terms + that match them, and then adding TermQueries for them. So we can + simply do the same directly, and if it looks like there are too + many terms just go through everything. */ + + if (seq_range_count(&rec->uids) > FTS_LUCENE_MAX_SEARCH_TERMS) + return false; + + seq_range_array_iter_init(&iter, &rec->uids); n = 0; + while (seq_range_array_iter_nth(&iter, n++, &uid)) { + swprintf(wuid, N_ELEMENTS(wuid), L"%u", uid); + + Term *term = _CLNEW Term(_T("uid"), wuid); + query->add(_CLNEW TermQuery(term), true, BooleanClause::SHOULD); + _CLDECDELETE(term); + } + return true; +} + +static int +lucene_index_expunge_record(struct lucene_index *index, + const struct fts_expunge_log_read_record *rec) +{ + int ret; + + if ((ret = lucene_index_open_search(index)) <= 0) + return ret; + + BooleanQuery query; + BooleanQuery uids_query; + + if (lucene_index_add_uid_filter(&uids_query, rec)) + query.add(&uids_query, BooleanClause::MUST); + + wchar_t wguid[MAILBOX_GUID_HEX_LENGTH + 1]; + guid128_to_wguid(rec->mailbox_guid, wguid); + Term term(_T("box"), wguid); + TermQuery mailbox_query(&term); + query.add(&mailbox_query, BooleanClause::MUST); + + try { + Hits *hits = index->searcher->search(&query); + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0 || + seq_range_exists(&rec->uids, uid)) + index->reader->deleteDocument(hits->id(i)); + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "expunge search"); + ret = -1; + } + return ret < 0 ? -1 : 0; +} + +int lucene_index_expunge_from_log(struct lucene_index *index, + struct fts_expunge_log *log) +{ + struct fts_expunge_log_read_ctx *ctx; + const struct fts_expunge_log_read_record *rec; + int ret = 0, ret2; + + ctx = fts_expunge_log_read_begin(log); + while ((rec = fts_expunge_log_read_next(ctx)) != NULL) { + if (lucene_index_expunge_record(index, rec) < 0) { + ret = -1; + break; + } + } + + lucene_index_close(index); + + ret2 = fts_expunge_log_read_end(&ctx); + if (ret < 0 || ret2 < 0) + return -1; + return ret2; +} + +int lucene_index_optimize(struct lucene_index *index) +{ + int ret = 0; + + if (!IndexReader::indexExists(index->path)) + return 0; + if (IndexReader::isLocked(index->path)) + IndexReader::unlock(index->path); + + IndexWriter *writer = NULL; + try { + writer = _CLNEW IndexWriter(index->path, index->default_analyzer, false); + writer->optimize(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::optimize()"); + ret = -1; + } + try { + writer->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::close()"); + ret = -1; + } + if (writer != NULL) + _CLDELETE(writer); + return ret; +} + +// Mostly copy&pasted from CLucene's QueryParser +static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + StringReader reader(queryText); + TokenStream* source = analyzer->tokenStream(_field, &reader); + + CLVector<CL_NS(analysis)::Token*, Deletor::Object<CL_NS(analysis)::Token> > v; + CL_NS(analysis)::Token* t = NULL; + int32_t positionCount = 0; + bool severalTokensAtSamePosition = false; + + while (true) { + t = _CLNEW Token(); + try { + Token* _t = source->next(t); + if (_t == NULL) _CLDELETE(t); + }_CLCATCH_ERR(CL_ERR_IO, _CLLDELETE(source);_CLLDELETE(t);,{ + t = NULL; + }); + if (t == NULL) + break; + v.push_back(t); + if (t->getPositionIncrement() != 0) + positionCount += t->getPositionIncrement(); + else + severalTokensAtSamePosition = true; + } + try { + source->close(); + } + _CLCATCH_ERR_CLEANUP(CL_ERR_IO, {_CLLDELETE(source);_CLLDELETE(t);} ); /* cleanup */ + _CLLDELETE(source); + + if (v.size() == 0) + return NULL; + else if (v.size() == 1) { + Term* tm = _CLNEW Term(_field, v.at(0)->termBuffer()); + Query* ret; + if (fuzzy) + ret = _CLNEW FuzzyQuery( tm ); + else + ret = _CLNEW TermQuery( tm ); + _CLDECDELETE(tm); + return ret; + } else { + if (severalTokensAtSamePosition) { + if (positionCount == 1) { + // no phrase query: + BooleanQuery* q = _CLNEW BooleanQuery(true); + for(size_t i=0; i<v.size(); i++ ){ + Term* tm = _CLNEW Term(_field, v.at(i)->termBuffer()); + q->add(_CLNEW TermQuery(tm), true, BooleanClause::SHOULD); + _CLDECDELETE(tm); + } + return q; + }else { + MultiPhraseQuery* mpq = _CLNEW MultiPhraseQuery(); + CLArrayList<Term*> multiTerms; + int32_t position = -1; + for (size_t i = 0; i < v.size(); i++) { + t = v.at(i); + if (t->getPositionIncrement() > 0 && multiTerms.size() > 0) { + ValueArray<Term*> termsArray(multiTerms.size()); + multiTerms.toArray(termsArray.values); + mpq->add(&termsArray,position); + multiTerms.clear(); + } + position += t->getPositionIncrement(); + multiTerms.push_back(_CLNEW Term(_field, t->termBuffer())); + } + ValueArray<Term*> termsArray(multiTerms.size()); + multiTerms.toArray(termsArray.values); + mpq->add(&termsArray,position); + return mpq; + } + }else { + PhraseQuery* pq = _CLNEW PhraseQuery(); + int32_t position = -1; + + for (size_t i = 0; i < v.size(); i++) { + t = v.at(i); + Term* tm = _CLNEW Term(_field, t->termBuffer()); + position += t->getPositionIncrement(); + pq->add(tm,position); + _CLDECDELETE(tm); + } + return pq; + } + } +} + +static Query * +lucene_get_query_str(struct lucene_index *index, + const TCHAR *key, const char *str, bool fuzzy) +{ + const TCHAR *wvalue; + Analyzer *analyzer; + + if (index->set.use_libfts) { + const wchar_t *wstr = t_lucene_utf8_to_tchar(index, str); + Term* tm = _CLNEW Term(key, wstr); + Query* ret; + if (fuzzy) + ret = _CLNEW FuzzyQuery( tm ); + else + ret = _CLNEW TermQuery( tm ); + _CLDECDELETE(tm); + return ret; + } + + if (index->normalizer_buf != NULL) { + buffer_set_used_size(index->normalizer_buf, 0); + index->normalizer(str, strlen(str), index->normalizer_buf); + buffer_append_c(index->normalizer_buf, '\0'); + str = (const char *)index->normalizer_buf->data; + } + + wvalue = t_lucene_utf8_to_tchar(index, str); + analyzer = guess_analyzer(index, str, strlen(str)); + if (analyzer == NULL) { + analyzer = index->default_analyzer; + i_assert(analyzer != NULL); + } + + return getFieldQuery(analyzer, key, wvalue, fuzzy); +} + +static Query * +lucene_get_query(struct lucene_index *index, + const TCHAR *key, const struct mail_search_arg *arg) +{ + return lucene_get_query_str(index, key, arg->value.str, arg->fuzzy); +} + +static bool +lucene_add_definite_query(struct lucene_index *index, + ARRAY_TYPE(lucene_query) &queries, + struct mail_search_arg *arg, + enum fts_lookup_flags flags) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + Query *q; + + if (arg->no_fts) + return false; + + if (arg->match_not && !and_args) { + /* FIXME: we could handle this by doing multiple queries.. */ + return false; + } + + switch (arg->type) { + case SEARCH_TEXT: { + Query *q1 = lucene_get_query(index, _T("hdr"), arg); + Query *q2 = lucene_get_query(index, _T("body"), arg); + + if (q1 == NULL && q2 == NULL) + q = NULL; + else { + BooleanQuery *bq = _CLNEW BooleanQuery(); + if (q1 != NULL) + bq->add(q1, true, BooleanClause::SHOULD); + if (q2 != NULL) + bq->add(q2, true, BooleanClause::SHOULD); + q = bq; + } + break; + } + case SEARCH_BODY: + q = lucene_get_query(index, _T("body"), arg); + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if (!fts_header_want_indexed(arg->hdr_field_name) || + *arg->value.str == '\0') + return false; + + q = lucene_get_query(index, + t_lucene_utf8_to_tchar(index, t_str_lcase(arg->hdr_field_name)), + arg); + break; + default: + return false; + } + + if (q == NULL) { + /* couldn't handle this search after all (e.g. trying to search + a stop word) */ + return false; + } + + struct lucene_query *lq = array_append_space(&queries); + lq->query = q; + if (!and_args) + lq->occur = BooleanClause::SHOULD; + else if (!arg->match_not) + lq->occur = BooleanClause::MUST; + else + lq->occur = BooleanClause::MUST_NOT; + return true; +} + +static bool +lucene_add_maybe_query(struct lucene_index *index, + ARRAY_TYPE(lucene_query) &queries, + struct mail_search_arg *arg, + enum fts_lookup_flags flags) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + Query *q = NULL; + + if (arg->no_fts) + return false; + + if (arg->match_not) { + /* FIXME: we could handle this by doing multiple queries.. */ + return false; + } + + switch (arg->type) { + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if (*arg->value.str == '\0' && !index->set.use_libfts) { + /* checking potential existence of the header name */ + q = lucene_get_query_str(index, _T("hdr"), + t_str_lcase(arg->hdr_field_name), FALSE); + break; + } + + if (fts_header_want_indexed(arg->hdr_field_name)) + return false; + + /* we can check if the search key exists in some header and + filter out the messages that have no chance of matching */ + q = lucene_get_query(index, _T("hdr"), arg); + break; + default: + return false; + } + + if (q == NULL) { + /* couldn't handle this search after all (e.g. trying to search + a stop word) */ + return false; + } + struct lucene_query *lq = array_append_space(&queries); + lq->query = q; + if (!and_args) + lq->occur = BooleanClause::SHOULD; + else if (!arg->match_not) + lq->occur = BooleanClause::MUST; + else + lq->occur = BooleanClause::MUST_NOT; + return true; +} + +static bool queries_have_non_must_nots(ARRAY_TYPE(lucene_query) &queries) +{ + const struct lucene_query *lq; + + array_foreach(&queries, lq) { + if (lq->occur != BooleanClause::MUST_NOT) + return TRUE; + } + return FALSE; +} + +static void search_query_add(BooleanQuery &query, + ARRAY_TYPE(lucene_query) &queries) +{ + BooleanQuery *search_query = _CLNEW BooleanQuery(); + const struct lucene_query *lq; + + if (queries_have_non_must_nots(queries)) { + array_foreach(&queries, lq) + search_query->add(lq->query, true, lq->occur); + query.add(search_query, true, BooleanClause::MUST); + } else { + array_foreach(&queries, lq) + search_query->add(lq->query, true, BooleanClause::SHOULD); + query.add(search_query, true, BooleanClause::MUST_NOT); + } +} + +static int +lucene_index_search(struct lucene_index *index, + ARRAY_TYPE(lucene_query) &queries, + struct fts_result *result, ARRAY_TYPE(seq_range) *uids_r) +{ + struct fts_score_map *score; + int ret = 0; + + BooleanQuery query; + search_query_add(query, queries); + + Term mailbox_term(_T("box"), index->mailbox_guid); + TermQuery mailbox_query(&mailbox_term); + query.add(&mailbox_query, BooleanClause::MUST); + + try { + Hits *hits = index->searcher->search(&query); + + uint32_t last_uid = 0; + if (result != NULL) + result->scores_sorted = true; + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + if (seq_range_array_add(uids_r, uid)) { + /* duplicate result */ + } else if (result != NULL) { + if (uid < last_uid) + result->scores_sorted = false; + last_uid = uid; + + score = array_append_space(&result->scores); + score->uid = uid; + score->score = hits->score(i); + } + } + _CLDELETE(hits); + return ret; + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "search"); + return -1; + } +} + +int lucene_index_lookup(struct lucene_index *index, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + struct mail_search_arg *arg; + + if (lucene_index_open_search(index) <= 0) + return -1; + + ARRAY_TYPE(lucene_query) def_queries; + t_array_init(&def_queries, 16); + bool have_definites = false; + + for (arg = args; arg != NULL; arg = arg->next) { + if (lucene_add_definite_query(index, def_queries, arg, flags)) { + arg->match_always = true; + have_definites = true; + } + } + + if (have_definites) { + ARRAY_TYPE(seq_range) *uids_arr = + (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ? + &result->definite_uids : &result->maybe_uids; + if (lucene_index_search(index, def_queries, result, + uids_arr) < 0) + return -1; + } + + if (have_definites) { + /* FIXME: mixing up definite + maybe queries is broken. if the + definite query matched, it'll just assume that the maybe + queries matched as well */ + return 0; + } + + ARRAY_TYPE(lucene_query) maybe_queries; + t_array_init(&maybe_queries, 16); + bool have_maybies = false; + + for (arg = args; arg != NULL; arg = arg->next) { + if (lucene_add_maybe_query(index, maybe_queries, arg, flags)) { + arg->match_always = true; + have_maybies = true; + } + } + + if (have_maybies) { + if (lucene_index_search(index, maybe_queries, NULL, + &result->maybe_uids) < 0) + return -1; + } + return 0; +} + +static int +lucene_index_search_multi(struct lucene_index *index, + HASH_TABLE_TYPE(wguid_result) guids, + ARRAY_TYPE(lucene_query) &queries, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct fts_score_map *score; + int ret = 0; + + BooleanQuery query; + search_query_add(query, queries); + + BooleanQuery mailbox_query; + struct hash_iterate_context *iter; + void *key, *value; + iter = hash_table_iterate_init(guids); + while (hash_table_iterate(iter, guids, &key, &value)) { + Term *term = _CLNEW Term(_T("box"), (wchar_t *)key); + TermQuery *q = _CLNEW TermQuery(term); + mailbox_query.add(q, true, BooleanClause::SHOULD); + } + hash_table_iterate_deinit(&iter); + + query.add(&mailbox_query, BooleanClause::MUST); + try { + Hits *hits = index->searcher->search(&query); + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + Field *field = hits->doc(i).getField(_T("box")); + const TCHAR *box_guid = field == NULL ? NULL : field->stringValue(); + if (box_guid == NULL) { + i_error("lucene: Corrupted FTS index %s: No mailbox for document", + index->path); + ret = -1; + break; + } + struct fts_result *br = + hash_table_lookup(guids, box_guid); + if (br == NULL) { + i_warning("lucene: Returned unexpected mailbox with GUID %ls", box_guid); + continue; + } + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + ARRAY_TYPE(seq_range) *uids_arr = + (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ? + &br->maybe_uids : &br->definite_uids; + if (!array_is_created(uids_arr)) { + p_array_init(uids_arr, result->pool, 32); + p_array_init(&br->scores, result->pool, 32); + } + if (seq_range_array_add(uids_arr, uid)) { + /* duplicate result */ + } else { + score = array_append_space(&br->scores); + score->uid = uid; + score->score = hits->score(i); + } + } + _CLDELETE(hits); + return ret; + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "multi search"); + return -1; + } +} + +int lucene_index_lookup_multi(struct lucene_index *index, + HASH_TABLE_TYPE(wguid_result) guids, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct mail_search_arg *arg; + + if (lucene_index_open_search(index) <= 0) + return -1; + + ARRAY_TYPE(lucene_query) def_queries; + t_array_init(&def_queries, 16); + bool have_definites = false; + + for (arg = args; arg != NULL; arg = arg->next) { + if (lucene_add_definite_query(index, def_queries, arg, flags)) { + arg->match_always = true; + have_definites = true; + } + } + + if (have_definites) { + if (lucene_index_search_multi(index, guids, def_queries, flags, + result) < 0) + return -1; + } + return 0; +} + +struct lucene_index_iter { + struct lucene_index *index; + struct lucene_index_record rec; + + Term *term; + WildcardQuery *query; + Sort *sort; + + Hits *hits; + size_t i; + bool failed; +}; + +struct lucene_index_iter * +lucene_index_iter_init(struct lucene_index *index) +{ + static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL }; + struct lucene_index_iter *iter; + int ret; + + iter = i_new(struct lucene_index_iter, 1); + iter->index = index; + if ((ret = lucene_index_open_search(index)) <= 0) { + if (ret < 0) + iter->failed = true; + return iter; + } + + iter->term = _CLNEW Term(_T("box"), _T("*")); + iter->query = _CLNEW WildcardQuery(iter->term); + iter->sort = _CLNEW Sort(sort_fields); + + try { + iter->hits = index->searcher->search(iter->query, iter->sort); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "rescan search"); + iter->failed = true; + } + return iter; +} + +const struct lucene_index_record * +lucene_index_iter_next(struct lucene_index_iter *iter) +{ + if (iter->hits == NULL) + return NULL; + if (iter->i == iter->hits->length()) + return NULL; + + Document *doc = &iter->hits->doc(iter->i); + iter->i++; + + memset(&iter->rec, 0, sizeof(iter->rec)); + (void)fts_lucene_get_mailbox_guid(iter->index, doc, + iter->rec.mailbox_guid); + (void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid); + iter->rec.part_num = lucene_doc_get_part(iter->index, doc); + return &iter->rec; +} + +int lucene_index_iter_deinit(struct lucene_index_iter **_iter) +{ + struct lucene_index_iter *iter = *_iter; + int ret = iter->failed ? -1 : 0; + + *_iter = NULL; + if (iter->hits != NULL) + _CLDELETE(iter->hits); + if (iter->query != NULL) { + _CLDELETE(iter->query); + _CLDELETE(iter->sort); + _CLDELETE(iter->term); + } + i_free(iter); + return ret; +} + +void lucene_shutdown(void) +{ + _lucene_shutdown(); +} diff --git a/src/plugins/fts-lucene/lucene-wrapper.h b/src/plugins/fts-lucene/lucene-wrapper.h new file mode 100644 index 0000000..270e902 --- /dev/null +++ b/src/plugins/fts-lucene/lucene-wrapper.h @@ -0,0 +1,67 @@ +#ifndef LUCENE_WRAPPER_H +#define LUCENE_WRAPPER_H + +#include "fts-api-private.h" +#include "guid.h" + +struct mailbox_list; +struct fts_expunge_log; +struct fts_lucene_settings; + +#define MAILBOX_GUID_HEX_LENGTH (GUID_128_SIZE*2) + +struct lucene_index_record { + guid_128_t mailbox_guid; + uint32_t uid, part_num; +}; + +HASH_TABLE_DEFINE_TYPE(wguid_result, wchar_t *, struct fts_result *); + +struct lucene_index * +lucene_index_init(const char *path, struct mailbox_list *list, + const struct fts_lucene_settings *set) + ATTR_NULL(2, 3); +void lucene_index_deinit(struct lucene_index *index); + +void lucene_index_select_mailbox(struct lucene_index *index, + const wchar_t guid[MAILBOX_GUID_HEX_LENGTH]); +void lucene_index_unselect_mailbox(struct lucene_index *index); +int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r); +int lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r); + +int lucene_index_build_init(struct lucene_index *index); +int lucene_index_build_more(struct lucene_index *index, uint32_t uid, + uint32_t part_num, const unsigned char *data, + size_t size, const char *hdr_name); +int lucene_index_build_deinit(struct lucene_index *index); + +void lucene_index_close(struct lucene_index *index); +int lucene_index_rescan(struct lucene_index *index); +int lucene_index_expunge_from_log(struct lucene_index *index, + struct fts_expunge_log *log); +int lucene_index_optimize(struct lucene_index *index); + +int lucene_index_lookup(struct lucene_index *index, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result); + +int lucene_index_lookup_multi(struct lucene_index *index, + HASH_TABLE_TYPE(wguid_result) guids, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result); + +struct lucene_index_iter * +lucene_index_iter_init(struct lucene_index *index); +const struct lucene_index_record * +lucene_index_iter_next(struct lucene_index_iter *iter); +int lucene_index_iter_deinit(struct lucene_index_iter **iter); + +/* internal: */ +void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize, + wchar_t *dest, size_t destsize); + +void lucene_shutdown(void); + +#endif diff --git a/src/plugins/fts-lucene/textcat.conf b/src/plugins/fts-lucene/textcat.conf new file mode 100644 index 0000000..d75c4fe --- /dev/null +++ b/src/plugins/fts-lucene/textcat.conf @@ -0,0 +1,25 @@ +# +# A sample config file for the language models +# provided with Gertjan van Noords language guesser +# (http://odur.let.rug.nl/~vannoord/TextCat/) +# +# Notes: +# - You may consider eliminating a couple of small languages from this +# list because they cause false positives with big languages and are +# bad for performance. (Do you really want to recognize Drents?) +# - Putting the most probable languages at the top of the list +# improves performance, because this will raise the threshold for +# likely candidates more quickly. +# +LM/english.lm english +LM/italian.lm italian +LM/danish.lm danish +LM/dutch.lm dutch +LM/finnish.lm finnish +LM/french.lm french +LM/german.lm german +LM/norwegian.lm norwegian +LM/portuguese.lm portuguese +LM/russian.lm russian +LM/spanish.lm spanish +LM/swedish.lm swedish diff --git a/src/plugins/fts-solr/Makefile.am b/src/plugins/fts-solr/Makefile.am new file mode 100644 index 0000000..cd1e17b --- /dev/null +++ b/src/plugins/fts-solr/Makefile.am @@ -0,0 +1,64 @@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-ssl-iostream \ + -I$(top_srcdir)/src/lib-http \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-imap \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts + +NOPLUGIN_LDFLAGS = +lib21_fts_solr_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib21_fts_solr_plugin.la + +if DOVECOT_PLUGIN_DEPS +fts_plugin_dep = ../fts/lib20_fts_plugin.la +endif + +lib21_fts_solr_plugin_la_LIBADD = \ + $(fts_plugin_dep) \ + -lexpat + +lib21_fts_solr_plugin_la_SOURCES = \ + fts-backend-solr.c \ + fts-backend-solr-old.c \ + fts-solr-plugin.c \ + solr-response.c \ + solr-connection.c + +noinst_HEADERS = \ + fts-solr-plugin.h \ + solr-response.h \ + solr-connection.h + +test_programs = \ + test-solr-response + +test_libs = \ + ../../lib-test/libtest.la \ + ../../lib-charset/libcharset.la \ + ../../lib/liblib.la \ + $(MODULE_LIBS) + +noinst_PROGRAMS = test-solr-response + +test_solr_response_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_srcdir)/src/lib-test +test_solr_response_SOURCES = \ + solr-response.c \ + test-solr-response.c +test_solr_response_LDADD = \ + $(test_libs) -lexpat + +pkginc_libdir=$(pkgincludedir) +pkginc_lib_HEADERS = $(headers) + +check: check-am check-test +check-test: all-am + for bin in $(test_programs); do \ + if ! $(RUN_TEST) ./$$bin; then exit 1; fi; \ + done diff --git a/src/plugins/fts-solr/Makefile.in b/src/plugins/fts-solr/Makefile.in new file mode 100644 index 0000000..ee248f8 --- /dev/null +++ b/src/plugins/fts-solr/Makefile.in @@ -0,0 +1,965 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +noinst_PROGRAMS = test-solr-response$(EXEEXT) +subdir = src/plugins/fts-solr +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \ + $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \ + $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \ + $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \ + $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \ + $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \ + $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \ + $(top_srcdir)/m4/flexible_array_member.m4 \ + $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \ + $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \ + $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \ + $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \ + $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \ + $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \ + $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \ + $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \ + $(top_srcdir)/m4/pr_set_dumpable.m4 \ + $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \ + $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \ + $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \ + $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \ + $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \ + $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \ + $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \ + $(top_srcdir)/m4/typeof_dev_t.m4 \ + $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \ + $(top_srcdir)/m4/want_apparmor.m4 \ + $(top_srcdir)/m4/want_bsdauth.m4 \ + $(top_srcdir)/m4/want_bzlib.m4 \ + $(top_srcdir)/m4/want_cassandra.m4 \ + $(top_srcdir)/m4/want_cdb.m4 \ + $(top_srcdir)/m4/want_checkpassword.m4 \ + $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \ + $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \ + $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \ + $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \ + $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \ + $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \ + $(top_srcdir)/m4/want_prefetch.m4 \ + $(top_srcdir)/m4/want_shadow.m4 \ + $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \ + $(top_srcdir)/m4/want_sqlite.m4 \ + $(top_srcdir)/m4/want_stemmer.m4 \ + $(top_srcdir)/m4/want_systemd.m4 \ + $(top_srcdir)/m4/want_textcat.m4 \ + $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \ + $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(pkginc_lib_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" \ + "$(DESTDIR)$(pkginc_libdir)" +LTLIBRARIES = $(module_LTLIBRARIES) +lib21_fts_solr_plugin_la_DEPENDENCIES = $(fts_plugin_dep) +am_lib21_fts_solr_plugin_la_OBJECTS = fts-backend-solr.lo \ + fts-backend-solr-old.lo fts-solr-plugin.lo solr-response.lo \ + solr-connection.lo +lib21_fts_solr_plugin_la_OBJECTS = \ + $(am_lib21_fts_solr_plugin_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +lib21_fts_solr_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(lib21_fts_solr_plugin_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_test_solr_response_OBJECTS = \ + test_solr_response-solr-response.$(OBJEXT) \ + test_solr_response-test-solr-response.$(OBJEXT) +test_solr_response_OBJECTS = $(am_test_solr_response_OBJECTS) +am__DEPENDENCIES_1 = +am__DEPENDENCIES_2 = ../../lib-test/libtest.la \ + ../../lib-charset/libcharset.la ../../lib/liblib.la \ + $(am__DEPENDENCIES_1) +test_solr_response_DEPENDENCIES = $(am__DEPENDENCIES_2) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/fts-backend-solr-old.Plo \ + ./$(DEPDIR)/fts-backend-solr.Plo \ + ./$(DEPDIR)/fts-solr-plugin.Plo \ + ./$(DEPDIR)/solr-connection.Plo ./$(DEPDIR)/solr-response.Plo \ + ./$(DEPDIR)/test_solr_response-solr-response.Po \ + ./$(DEPDIR)/test_solr_response-test-solr-response.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(lib21_fts_solr_plugin_la_SOURCES) \ + $(test_solr_response_SOURCES) +DIST_SOURCES = $(lib21_fts_solr_plugin_la_SOURCES) \ + $(test_solr_response_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) $(pkginc_lib_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPARMOR_LIBS = @APPARMOR_LIBS@ +AR = @AR@ +AUTH_CFLAGS = @AUTH_CFLAGS@ +AUTH_LIBS = @AUTH_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINARY_CFLAGS = @BINARY_CFLAGS@ +BINARY_LDFLAGS = @BINARY_LDFLAGS@ +BISON = @BISON@ +CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@ +CASSANDRA_LIBS = @CASSANDRA_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CDB_LIBS = @CDB_LIBS@ +CFLAGS = @CFLAGS@ +CLUCENE_CFLAGS = @CLUCENE_CFLAGS@ +CLUCENE_LIBS = @CLUCENE_LIBS@ +COMPRESS_LIBS = @COMPRESS_LIBS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRYPT_LIBS = @CRYPT_LIBS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DICT_LIBS = @DICT_LIBS@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLEX = @FLEX@ +FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@ +FUZZER_LDFLAGS = @FUZZER_LDFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KRB5CONFIG = @KRB5CONFIG@ +KRB5_CFLAGS = @KRB5_CFLAGS@ +KRB5_LIBS = @KRB5_LIBS@ +LD = @LD@ +LDAP_LIBS = @LDAP_LIBS@ +LDFLAGS = @LDFLAGS@ +LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@ +LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@ +LIBCAP = @LIBCAP@ +LIBDOVECOT = @LIBDOVECOT@ +LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@ +LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@ +LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@ +LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@ +LIBDOVECOT_LDA = @LIBDOVECOT_LDA@ +LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@ +LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@ +LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@ +LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@ +LIBDOVECOT_LUA = @LIBDOVECOT_LUA@ +LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@ +LIBDOVECOT_SQL = @LIBDOVECOT_SQL@ +LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@ +LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@ +LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@ +LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@ +LIBICONV = @LIBICONV@ +LIBICU_CFLAGS = @LIBICU_CFLAGS@ +LIBICU_LIBS = @LIBICU_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@ +LIBSODIUM_LIBS = @LIBSODIUM_LIBS@ +LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@ +LIBTIRPC_LIBS = @LIBTIRPC_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIBWRAP_LIBS = @LIBWRAP_LIBS@ +LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LUA_CFLAGS = @LUA_CFLAGS@ +LUA_LIBS = @LUA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_LIBS = @MODULE_LIBS@ +MODULE_SUFFIX = @MODULE_SUFFIX@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_CONFIG = @MYSQL_CONFIG@ +MYSQL_LIBS = @MYSQL_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOPLUGIN_LDFLAGS = +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PANDOC = @PANDOC@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PG_CONFIG = @PG_CONFIG@ +PIE_CFLAGS = @PIE_CFLAGS@ +PIE_LDFLAGS = @PIE_LDFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUOTA_LIBS = @QUOTA_LIBS@ +RANLIB = @RANLIB@ +RELRO_LDFLAGS = @RELRO_LDFLAGS@ +RPCGEN = @RPCGEN@ +RUN_TEST = @RUN_TEST@ +SED = @SED@ +SETTING_FILES = @SETTING_FILES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SQLITE_CFLAGS = @SQLITE_CFLAGS@ +SQLITE_LIBS = @SQLITE_LIBS@ +SQL_CFLAGS = @SQL_CFLAGS@ +SQL_LIBS = @SQL_LIBS@ +SSL_CFLAGS = @SSL_CFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND = @VALGRIND@ +VERSION = @VERSION@ +ZSTD_CFLAGS = @ZSTD_CFLAGS@ +ZSTD_LIBS = @ZSTD_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dict_drivers = @dict_drivers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +rundir = @rundir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sql_drivers = @sql_drivers@ +srcdir = @srcdir@ +ssldir = @ssldir@ +statedir = @statedir@ +sysconfdir = @sysconfdir@ +systemdservicetype = @systemdservicetype@ +systemdsystemunitdir = @systemdsystemunitdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-ssl-iostream \ + -I$(top_srcdir)/src/lib-http \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-imap \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts + +lib21_fts_solr_plugin_la_LDFLAGS = -module -avoid-version +module_LTLIBRARIES = \ + lib21_fts_solr_plugin.la + +@DOVECOT_PLUGIN_DEPS_TRUE@fts_plugin_dep = ../fts/lib20_fts_plugin.la +lib21_fts_solr_plugin_la_LIBADD = \ + $(fts_plugin_dep) \ + -lexpat + +lib21_fts_solr_plugin_la_SOURCES = \ + fts-backend-solr.c \ + fts-backend-solr-old.c \ + fts-solr-plugin.c \ + solr-response.c \ + solr-connection.c + +noinst_HEADERS = \ + fts-solr-plugin.h \ + solr-response.h \ + solr-connection.h + +test_programs = \ + test-solr-response + +test_libs = \ + ../../lib-test/libtest.la \ + ../../lib-charset/libcharset.la \ + ../../lib/liblib.la \ + $(MODULE_LIBS) + +test_solr_response_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_srcdir)/src/lib-test + +test_solr_response_SOURCES = \ + solr-response.c \ + test-solr-response.c + +test_solr_response_LDADD = \ + $(test_libs) -lexpat + +pkginc_libdir = $(pkgincludedir) +pkginc_lib_HEADERS = $(headers) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts-solr/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/fts-solr/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +lib21_fts_solr_plugin.la: $(lib21_fts_solr_plugin_la_OBJECTS) $(lib21_fts_solr_plugin_la_DEPENDENCIES) $(EXTRA_lib21_fts_solr_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib21_fts_solr_plugin_la_LINK) -rpath $(moduledir) $(lib21_fts_solr_plugin_la_OBJECTS) $(lib21_fts_solr_plugin_la_LIBADD) $(LIBS) + +test-solr-response$(EXEEXT): $(test_solr_response_OBJECTS) $(test_solr_response_DEPENDENCIES) $(EXTRA_test_solr_response_DEPENDENCIES) + @rm -f test-solr-response$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(test_solr_response_OBJECTS) $(test_solr_response_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-backend-solr-old.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-backend-solr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-solr-plugin.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solr-connection.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/solr-response.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_solr_response-solr-response.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_solr_response-test-solr-response.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +test_solr_response-solr-response.o: solr-response.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT test_solr_response-solr-response.o -MD -MP -MF $(DEPDIR)/test_solr_response-solr-response.Tpo -c -o test_solr_response-solr-response.o `test -f 'solr-response.c' || echo '$(srcdir)/'`solr-response.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_solr_response-solr-response.Tpo $(DEPDIR)/test_solr_response-solr-response.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='solr-response.c' object='test_solr_response-solr-response.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o test_solr_response-solr-response.o `test -f 'solr-response.c' || echo '$(srcdir)/'`solr-response.c + +test_solr_response-solr-response.obj: solr-response.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT test_solr_response-solr-response.obj -MD -MP -MF $(DEPDIR)/test_solr_response-solr-response.Tpo -c -o test_solr_response-solr-response.obj `if test -f 'solr-response.c'; then $(CYGPATH_W) 'solr-response.c'; else $(CYGPATH_W) '$(srcdir)/solr-response.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_solr_response-solr-response.Tpo $(DEPDIR)/test_solr_response-solr-response.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='solr-response.c' object='test_solr_response-solr-response.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o test_solr_response-solr-response.obj `if test -f 'solr-response.c'; then $(CYGPATH_W) 'solr-response.c'; else $(CYGPATH_W) '$(srcdir)/solr-response.c'; fi` + +test_solr_response-test-solr-response.o: test-solr-response.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT test_solr_response-test-solr-response.o -MD -MP -MF $(DEPDIR)/test_solr_response-test-solr-response.Tpo -c -o test_solr_response-test-solr-response.o `test -f 'test-solr-response.c' || echo '$(srcdir)/'`test-solr-response.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_solr_response-test-solr-response.Tpo $(DEPDIR)/test_solr_response-test-solr-response.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test-solr-response.c' object='test_solr_response-test-solr-response.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o test_solr_response-test-solr-response.o `test -f 'test-solr-response.c' || echo '$(srcdir)/'`test-solr-response.c + +test_solr_response-test-solr-response.obj: test-solr-response.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT test_solr_response-test-solr-response.obj -MD -MP -MF $(DEPDIR)/test_solr_response-test-solr-response.Tpo -c -o test_solr_response-test-solr-response.obj `if test -f 'test-solr-response.c'; then $(CYGPATH_W) 'test-solr-response.c'; else $(CYGPATH_W) '$(srcdir)/test-solr-response.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_solr_response-test-solr-response.Tpo $(DEPDIR)/test_solr_response-test-solr-response.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test-solr-response.c' object='test_solr_response-test-solr-response.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_solr_response_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o test_solr_response-test-solr-response.obj `if test -f 'test-solr-response.c'; then $(CYGPATH_W) 'test-solr-response.c'; else $(CYGPATH_W) '$(srcdir)/test-solr-response.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkginc_libHEADERS: $(pkginc_lib_HEADERS) + @$(NORMAL_INSTALL) + @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkginc_libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkginc_libdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkginc_libdir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkginc_libdir)" || exit $$?; \ + done + +uninstall-pkginc_libHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkginc_libdir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkginc_libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/fts-backend-solr-old.Plo + -rm -f ./$(DEPDIR)/fts-backend-solr.Plo + -rm -f ./$(DEPDIR)/fts-solr-plugin.Plo + -rm -f ./$(DEPDIR)/solr-connection.Plo + -rm -f ./$(DEPDIR)/solr-response.Plo + -rm -f ./$(DEPDIR)/test_solr_response-solr-response.Po + -rm -f ./$(DEPDIR)/test_solr_response-test-solr-response.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES install-pkginc_libHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/fts-backend-solr-old.Plo + -rm -f ./$(DEPDIR)/fts-backend-solr.Plo + -rm -f ./$(DEPDIR)/fts-solr-plugin.Plo + -rm -f ./$(DEPDIR)/solr-connection.Plo + -rm -f ./$(DEPDIR)/solr-response.Plo + -rm -f ./$(DEPDIR)/test_solr_response-solr-response.Po + -rm -f ./$(DEPDIR)/test_solr_response-test-solr-response.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-moduleLTLIBRARIES \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-pkginc_libHEADERS install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS + +.PRECIOUS: Makefile + + +check: check-am check-test +check-test: all-am + for bin in $(test_programs); do \ + if ! $(RUN_TEST) ./$$bin; then exit 1; fi; \ + done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/fts-solr/fts-backend-solr-old.c b/src/plugins/fts-solr/fts-backend-solr-old.c new file mode 100644 index 0000000..20b891b --- /dev/null +++ b/src/plugins/fts-solr/fts-backend-solr-old.c @@ -0,0 +1,879 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "hash.h" +#include "strescape.h" +#include "unichar.h" +#include "iostream-ssl.h" +#include "http-url.h" +#include "imap-utf7.h" +#include "mail-storage-private.h" +#include "mailbox-list-private.h" +#include "mail-search.h" +#include "fts-api.h" +#include "solr-connection.h" +#include "fts-solr-plugin.h" + +#include <ctype.h> + +#define SOLR_CMDBUF_SIZE (1024*64) +#define SOLR_MAX_MULTI_ROWS 100000 + +struct solr_fts_backend { + struct fts_backend backend; + struct solr_connection *solr_conn; + char *id_username, *id_namespace; + struct mail_namespace *default_ns; +}; + +struct solr_fts_backend_update_context { + struct fts_backend_update_context ctx; + + struct mailbox *cur_box; + char *id_box_name; + + struct solr_connection_post *post; + uint32_t prev_uid, uid_validity; + string_t *cmd, *hdr; + + bool headers_open; + bool body_open; + bool documents_added; +}; + +static const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\/ "; + +static bool is_valid_xml_char(unichar_t chr) +{ + /* Valid characters in XML: + + #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | + [#x10000-#x10FFFF] + + This function gets called only for #x80 and higher */ + if (chr > 0xd7ff && chr < 0xe000) + return FALSE; + if (chr > 0xfffd && chr < 0x10000) + return FALSE; + return chr < 0x10ffff; +} + +static void +xml_encode_data(string_t *dest, const unsigned char *data, size_t len) +{ + unichar_t chr; + size_t i; + + for (i = 0; i < len; i++) { + switch (data[i]) { + case '&': + str_append(dest, "&"); + break; + case '<': + str_append(dest, "<"); + break; + case '>': + str_append(dest, ">"); + break; + case '\t': + case '\n': + case '\r': + /* exceptions to the following control char check */ + str_append_c(dest, data[i]); + break; + default: + if (data[i] < 32) { + /* SOLR doesn't like control characters. + replace them with spaces. */ + str_append_c(dest, ' '); + } else if (data[i] >= 0x80) { + /* make sure the character is valid for XML + so we don't get XML parser errors */ + int char_len = + uni_utf8_get_char_n(data + i, len - i, &chr); + i_assert(char_len > 0); /* input is valid UTF8 */ + if (is_valid_xml_char(chr)) + str_append_data(dest, data + i, char_len); + else { + str_append_data(dest, utf8_replacement_char, + UTF8_REPLACEMENT_CHAR_LEN); + } + i += char_len - 1; + } else { + str_append_c(dest, data[i]); + } + break; + } + } +} + +static void xml_encode(string_t *dest, const char *str) +{ + xml_encode_data(dest, (const unsigned char *)str, strlen(str)); +} + +static const char *solr_escape_id_str(const char *str) +{ + string_t *tmp; + const char *p; + + for (p = str; *p != '\0'; p++) { + if (*p == '/' || *p == '!') + break; + } + if (*p == '\0') + return str; + + tmp = t_str_new(64); + for (p = str; *p != '\0'; p++) { + switch (*p) { + case '/': + str_append(tmp, "!\\"); + break; + case '!': + str_append(tmp, "!!"); + break; + default: + str_append_c(tmp, *p); + break; + } + } + return str_c(tmp); +} + +static const char *solr_escape(const char *str) +{ + string_t *ret; + unsigned int i; + + if (str[0] == '\0') + return "\"\""; + + ret = t_str_new(strlen(str) + 16); + for (i = 0; str[i] != '\0'; i++) { + if (strchr(solr_escape_chars, str[i]) != NULL) + str_append_c(ret, '\\'); + str_append_c(ret, str[i]); + } + return str_c(ret); +} + +static void solr_quote(string_t *dest, const char *str) +{ + str_append(dest, solr_escape(str)); +} + +static void solr_quote_http(string_t *dest, const char *str) +{ + http_url_escape_param(dest, solr_escape(str)); +} + +static void fts_solr_set_default_ns(struct solr_fts_backend *backend) +{ + struct mail_namespace *ns = backend->backend.ns; + struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT_REQUIRE(ns->user); + const struct fts_solr_settings *set = &fuser->set; + const char *str; + + if (backend->default_ns != NULL) + return; + + if (set->default_ns_prefix != NULL) { + backend->default_ns = + mail_namespace_find_prefix(ns->user->namespaces, + set->default_ns_prefix); + if (backend->default_ns == NULL) { + i_error("fts_solr: default_ns setting points to " + "nonexistent namespace"); + } + } + if (backend->default_ns == NULL) { + backend->default_ns = + mail_namespace_find_inbox(ns->user->namespaces); + } + while (backend->default_ns->alias_for != NULL) + backend->default_ns = backend->default_ns->alias_for; + + if (ns != backend->default_ns) { + str = solr_escape_id_str(ns->prefix); + backend->id_namespace = i_strdup(str); + } +} + +static void fts_box_name_get_root(struct mail_namespace **ns, const char **name) +{ + struct mail_namespace *orig_ns = *ns; + + while ((*ns)->alias_for != NULL) + *ns = (*ns)->alias_for; + + if (**name == '\0' && *ns != orig_ns && + ((*ns)->flags & NAMESPACE_FLAG_INBOX_USER) != 0) { + /* ugly workaround to allow selecting INBOX from a Maildir/ + when it's not in the inbox=yes namespace. */ + *name = "INBOX"; + } +} + +static const char * +fts_box_get_root(struct mailbox *box, struct mail_namespace **ns_r) +{ + struct mail_namespace *ns = mailbox_get_namespace(box); + const char *name; + + if (t_imap_utf8_to_utf7(box->name, &name) < 0) + i_unreached(); + + fts_box_name_get_root(&ns, &name); + *ns_r = ns; + return name; +} + +static struct fts_backend *fts_backend_solr_alloc(void) +{ + struct solr_fts_backend *backend; + + backend = i_new(struct solr_fts_backend, 1); + backend->backend = fts_backend_solr_old; + return &backend->backend; +} + +static int +fts_backend_solr_init(struct fts_backend *_backend, const char **error_r) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user); + struct ssl_iostream_settings ssl_set; + const char *str; + + if (fuser == NULL) { + *error_r = "Invalid fts_solr setting"; + return -1; + } + + mail_user_init_ssl_client_settings(_backend->ns->user, &ssl_set); + if (solr_connection_init(&fuser->set, &ssl_set, + _backend->ns->user->event, + &backend->solr_conn, error_r) < 0) + return -1; + + str = solr_escape_id_str(_backend->ns->user->username); + backend->id_username = i_strdup(str); + return 0; +} + +static void fts_backend_solr_deinit(struct fts_backend *_backend) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + + solr_connection_deinit(&backend->solr_conn); + i_free(backend->id_namespace); + i_free(backend->id_username); + i_free(backend); +} + +static void +solr_add_ns_query(string_t *str, struct solr_fts_backend *backend, + struct mail_namespace *ns, bool neg) +{ + while (ns->alias_for != NULL) + ns = ns->alias_for; + + if (ns == backend->default_ns || *ns->prefix == '\0') { + if (!neg) + str_append(str, " -ns:[* TO *]"); + else + str_append(str, " +ns:[* TO *]"); + } else { + if (!neg) + str_append(str, " +ns:"); + else + str_append(str, " -ns:"); + solr_quote(str, ns->prefix); + } +} + +static void +solr_add_ns_query_http(string_t *str, struct solr_fts_backend *backend, + struct mail_namespace *ns) +{ + string_t *tmp; + + tmp = t_str_new(64); + solr_add_ns_query(tmp, backend, ns, FALSE); + http_url_escape_param(str, str_c(tmp)); +} + +static int +fts_backend_solr_get_last_uid_fallback(struct solr_fts_backend *backend, + struct mailbox *box, + uint32_t *last_uid_r) +{ + struct mail_namespace *ns; + struct mailbox_status status; + struct solr_result **results; + const struct seq_range *uidvals; + const char *box_name; + unsigned int count; + string_t *str; + pool_t pool; + int ret = 0; + + str = t_str_new(256); + str_append(str, "fl=uid&rows=1&sort=uid+desc&q="); + + box_name = fts_box_get_root(box, &ns); + + mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status); + str_printfa(str, "uidv:%u+AND+box:", status.uidvalidity); + solr_quote_http(str, box_name); + solr_add_ns_query_http(str, backend, ns); + str_append(str, "+AND+user:"); + solr_quote_http(str, ns->user->username); + + pool = pool_alloconly_create("solr last uid lookup", 1024); + if (solr_connection_select(backend->solr_conn, str_c(str), + pool, &results) < 0) + ret = -1; + else if (results[0] == NULL) { + /* no UIDs */ + *last_uid_r = 0; + } else { + uidvals = array_get(&results[0]->uids, &count); + i_assert(count > 0); + if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) { + *last_uid_r = uidvals[0].seq1; + } else { + i_error("fts_solr: Last UID lookup returned multiple rows"); + ret = -1; + } + } + pool_unref(&pool); + return ret; +} + +static int +fts_backend_solr_get_last_uid(struct fts_backend *_backend, + struct mailbox *box, uint32_t *last_uid_r) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_backend; + struct fts_index_header hdr; + + if (fts_index_get_header(box, &hdr)) { + *last_uid_r = hdr.last_indexed_uid; + return 0; + } + + /* either nothing has been indexed, or the index was corrupted. + do it the slow way. */ + if (fts_backend_solr_get_last_uid_fallback(backend, box, last_uid_r) < 0) + return -1; + + fts_index_set_last_uid(box, *last_uid_r); + return 0; +} + +static struct fts_backend_update_context * +fts_backend_solr_update_init(struct fts_backend *_backend) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_backend; + struct solr_fts_backend_update_context *ctx; + + ctx = i_new(struct solr_fts_backend_update_context, 1); + ctx->ctx.backend = _backend; + ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE); + ctx->hdr = str_new(default_pool, 4096); + fts_solr_set_default_ns(backend); + return &ctx->ctx; +} + +static void xml_encode_id(struct solr_fts_backend_update_context *ctx, + string_t *str, uint32_t uid) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)ctx->ctx.backend; + + if (uid != 0) + str_printfa(str, "%u/", uid); + else + str_append(str, "L/"); + + if (backend->id_namespace != NULL) { + xml_encode(str, backend->id_namespace); + str_append_c(str, '/'); + } + str_printfa(str, "%u/", ctx->uid_validity); + xml_encode(str, backend->id_username); + str_append_c(str, '/'); + xml_encode(str, ctx->id_box_name); +} + +static void +fts_backend_solr_add_doc_prefix(struct solr_fts_backend_update_context *ctx, + uint32_t uid) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)ctx->ctx.backend; + struct mailbox *box = ctx->cur_box; + struct mail_namespace *ns; + const char *box_name; + + ctx->documents_added = TRUE; + + str_printfa(ctx->cmd, "<doc>" + "<field name=\"uid\">%u</field>" + "<field name=\"uidv\">%u</field>", + uid, ctx->uid_validity); + + box_name = fts_box_get_root(box, &ns); + + if (ns != backend->default_ns) { + str_append(ctx->cmd, "<field name=\"ns\">"); + xml_encode(ctx->cmd, ns->prefix); + str_append(ctx->cmd, "</field>"); + } + str_append(ctx->cmd, "<field name=\"box\">"); + xml_encode(ctx->cmd, box_name); + str_append(ctx->cmd, "</field><field name=\"user\">"); + xml_encode(ctx->cmd, ns->user->username); + str_append(ctx->cmd, "</field>"); +} + +static int +fts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx) +{ + if (ctx->post == NULL) + return 0; + + str_append(ctx->cmd, "</doc></add>"); + + solr_connection_post_more(ctx->post, str_data(ctx->cmd), + str_len(ctx->cmd)); + return solr_connection_post_end(&ctx->post); +} + +static int +fts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_ctx->backend; + const char *str; + int ret; + + ret = fts_backed_solr_build_commit(ctx); + + /* commit and wait until the documents we just indexed are + visible to the following search */ + str = t_strdup_printf("<commit waitFlush=\"false\" " + "waitSearcher=\"%s\"/>", + ctx->documents_added ? "true" : "false"); + if (solr_connection_post(backend->solr_conn, str) < 0) + ret = -1; + + str_free(&ctx->cmd); + str_free(&ctx->hdr); + i_free(ctx->id_box_name); + i_free(ctx); + return ret; +} + +static void +fts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx, + struct mailbox *box) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + struct mailbox_status status; + struct mail_namespace *ns; + + if (ctx->prev_uid != 0) { + fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid); + ctx->prev_uid = 0; + } + + ctx->cur_box = box; + ctx->uid_validity = 0; + i_free_and_null(ctx->id_box_name); + + if (box != NULL) { + ctx->id_box_name = i_strdup(fts_box_get_root(box, &ns)); + + mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status); + ctx->uid_validity = status.uidvalidity; + } +} + +static void +fts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx, + uint32_t uid) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_ctx->backend; + + T_BEGIN { + string_t *cmd; + + cmd = t_str_new(256); + str_append(cmd, "<delete><id>"); + xml_encode_id(ctx, cmd, uid); + str_append(cmd, "</id></delete>"); + + (void)solr_connection_post(backend->solr_conn, str_c(cmd)); + } T_END; +} + +static void +fts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx, + uint32_t uid) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)ctx->ctx.backend; + + if (ctx->post == NULL) { + i_assert(ctx->prev_uid == 0); + + ctx->post = solr_connection_post_begin(backend->solr_conn); + str_append(ctx->cmd, "<add>"); + } else { + ctx->headers_open = FALSE; + if (ctx->body_open) { + ctx->body_open = FALSE; + str_append(ctx->cmd, "</field>"); + } + str_append(ctx->cmd, "<field name=\"hdr\">"); + str_append_str(ctx->cmd, ctx->hdr); + str_append(ctx->cmd, "</field>"); + str_truncate(ctx->hdr, 0); + + str_append(ctx->cmd, "</doc>"); + } + ctx->prev_uid = uid; + + fts_backend_solr_add_doc_prefix(ctx, uid); + str_printfa(ctx->cmd, "<field name=\"id\">"); + xml_encode_id(ctx, ctx->cmd, uid); + str_append(ctx->cmd, "</field>"); +} + +static bool +fts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx, + const struct fts_backend_build_key *key) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + + if (key->uid != ctx->prev_uid) + fts_backend_solr_uid_changed(ctx, key->uid); + + switch (key->type) { + case FTS_BACKEND_BUILD_KEY_HDR: + case FTS_BACKEND_BUILD_KEY_MIME_HDR: + xml_encode(ctx->hdr, key->hdr_name); + str_append(ctx->hdr, ": "); + ctx->headers_open = TRUE; + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART: + ctx->headers_open = FALSE; + if (!ctx->body_open) { + ctx->body_open = TRUE; + str_append(ctx->cmd, "<field name=\"body\">"); + } + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY: + i_unreached(); + } + return TRUE; +} + +static void +fts_backend_solr_update_unset_build_key(struct fts_backend_update_context *_ctx) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + + if (ctx->headers_open) + str_append_c(ctx->hdr, '\n'); + else { + i_assert(ctx->body_open); + str_append_c(ctx->cmd, '\n'); + } +} + +static int +fts_backend_solr_update_build_more(struct fts_backend_update_context *_ctx, + const unsigned char *data, size_t size) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + + xml_encode_data(ctx->cmd, data, size); + if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) { + solr_connection_post_more(ctx->post, str_data(ctx->cmd), + str_len(ctx->cmd)); + str_truncate(ctx->cmd, 0); + } + return 0; +} + +static int fts_backend_solr_refresh(struct fts_backend *backend ATTR_UNUSED) +{ + return 0; +} + +static int fts_backend_solr_optimize(struct fts_backend *backend ATTR_UNUSED) +{ + return 0; +} + +static bool +solr_add_definite_query(string_t *str, struct mail_search_arg *arg) +{ + if (arg->no_fts) + return FALSE; + switch (arg->type) { + case SEARCH_TEXT: { + if (arg->match_not) + str_append_c(str, '-'); + str_append(str, "(hdr:"); + solr_quote_http(str, arg->value.str); + str_append(str, "+OR+body:"); + solr_quote_http(str, arg->value.str); + str_append(str, ")"); + break; + } + case SEARCH_BODY: + if (arg->match_not) + str_append_c(str, '-'); + str_append(str, "body:"); + solr_quote_http(str, arg->value.str); + break; + default: + return FALSE; + } + return TRUE; +} + +static bool +solr_add_definite_query_args(string_t *str, struct mail_search_arg *arg, + bool and_args) +{ + size_t last_len; + + last_len = str_len(str); + for (; arg != NULL; arg = arg->next) { + if (solr_add_definite_query(str, arg)) { + arg->match_always = TRUE; + last_len = str_len(str); + if (and_args) + str_append(str, "+AND+"); + else + str_append(str, "+OR+"); + } + } + if (str_len(str) == last_len) + return FALSE; + + str_truncate(str, last_len); + return TRUE; +} + +static int +fts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_backend; + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + struct mail_namespace *ns; + struct mailbox_status status; + string_t *str; + const char *box_name; + pool_t pool; + struct solr_result **results; + int ret; + + fts_solr_set_default_ns(backend); + mailbox_get_open_status(box, STATUS_UIDVALIDITY | STATUS_UIDNEXT, + &status); + + str = t_str_new(256); + str_printfa(str, "fl=uid,score&rows=%u&sort=uid+asc&q=%%7b!lucene+q.op%%3dAND%%7d", + status.uidnext); + + if (!solr_add_definite_query_args(str, args, and_args)) { + /* can't search this query */ + return 0; + } + + /* use a separate filter query for selecting the mailbox. it shouldn't + affect the score and there could be some caching benefits too. */ + str_append(str, "&fq=%2Buser:"); + solr_quote_http(str, box->storage->user->username); + box_name = fts_box_get_root(box, &ns); + str_printfa(str, "+%%2Buidv:%u+%%2Bbox:", status.uidvalidity); + solr_quote_http(str, box_name); + solr_add_ns_query_http(str, backend, ns); + + pool = pool_alloconly_create("fts solr search", 1024); + ret = solr_connection_select(backend->solr_conn, str_c(str), + pool, &results); + if (ret == 0 && results[0] != NULL) { + if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0) + array_append_array(&result->definite_uids, &results[0]->uids); + else + array_append_array(&result->maybe_uids, &results[0]->uids); + array_append_array(&result->scores, &results[0]->scores); + } + result->scores_sorted = TRUE; + pool_unref(&pool); + return ret; +} + +static char * +mailbox_get_id(struct solr_fts_backend *backend, struct mail_namespace *ns, + const char *mailbox, uint32_t uidvalidity) +{ + string_t *str = t_str_new(64); + + str_printfa(str, "%u\001", uidvalidity); + str_append(str, mailbox); + if (ns != backend->default_ns) + str_printfa(str, "\001%s", ns->prefix); + return str_c_modifiable(str); +} + +static int +solr_search_multi(struct solr_fts_backend *backend, string_t *str, + struct mailbox *const boxes[], + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct solr_result **solr_results; + struct fts_result *fts_result; + ARRAY(struct fts_result) fts_results; + struct mail_namespace *ns; + struct mailbox_status status; + HASH_TABLE(char *, struct mailbox *) mailboxes; + struct mailbox *box; + const char *box_name; + char *box_id; + unsigned int i; + size_t len; + + /* use a separate filter query for selecting the mailbox. it shouldn't + affect the score and there could be some caching benefits too. */ + str_append(str, "&fq=%2Buser:"); + if (backend->backend.ns->owner != NULL) + solr_quote_http(str, backend->backend.ns->owner->username); + else + str_append(str, "%22%22"); + + hash_table_create(&mailboxes, default_pool, 0, str_hash, strcmp); + str_append(str, "%2B("); + len = str_len(str); + for (i = 0; boxes[i] != NULL; i++) { + if (str_len(str) != len) + str_append(str, "+OR+"); + + box_name = fts_box_get_root(boxes[i], &ns); + mailbox_get_open_status(boxes[i], STATUS_UIDVALIDITY, &status); + str_printfa(str, "%%2B(%%2Buidv:%u+%%2Bbox:", status.uidvalidity); + solr_quote_http(str, box_name); + solr_add_ns_query_http(str, backend, ns); + str_append_c(str, ')'); + + box_id = mailbox_get_id(backend, ns, box_name, status.uidvalidity); + hash_table_insert(mailboxes, box_id, boxes[i]); + } + str_append_c(str, ')'); + + if (solr_connection_select(backend->solr_conn, str_c(str), + result->pool, &solr_results) < 0) { + hash_table_destroy(&mailboxes); + return -1; + } + + p_array_init(&fts_results, result->pool, 32); + for (i = 0; solr_results[i] != NULL; i++) { + box = hash_table_lookup(mailboxes, solr_results[i]->box_id); + if (box == NULL) { + i_warning("fts_solr: Lookup returned unexpected mailbox " + "with id=%s", solr_results[i]->box_id); + continue; + } + fts_result = array_append_space(&fts_results); + fts_result->box = box; + if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0) + fts_result->definite_uids = solr_results[i]->uids; + else + fts_result->maybe_uids = solr_results[i]->uids; + fts_result->scores = solr_results[i]->scores; + fts_result->scores_sorted = TRUE; + } + array_append_zero(&fts_results); + result->box_results = array_front_modifiable(&fts_results); + hash_table_destroy(&mailboxes); + return 0; +} + +static int +fts_backend_solr_lookup_multi(struct fts_backend *_backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_backend; + string_t *str; + + fts_solr_set_default_ns(backend); + + str = t_str_new(256); + str_printfa(str, "fl=ns,box,uidv,uid,score&rows=%u&sort=box+asc,uid+asc&q=%%7b!lucene+q.op%%3dAND%%7d", + SOLR_MAX_MULTI_ROWS); + + if (solr_add_definite_query_args(str, args, and_args)) { + if (solr_search_multi(backend, str, boxes, flags, result) < 0) + return -1; + } + /* FIXME: maybe_uids could be handled also with some more work.. */ + return 0; +} + +struct fts_backend fts_backend_solr_old = { + .name = "solr_old", + .flags = 0, + + { + fts_backend_solr_alloc, + fts_backend_solr_init, + fts_backend_solr_deinit, + fts_backend_solr_get_last_uid, + fts_backend_solr_update_init, + fts_backend_solr_update_deinit, + fts_backend_solr_update_set_mailbox, + fts_backend_solr_update_expunge, + fts_backend_solr_update_set_build_key, + fts_backend_solr_update_unset_build_key, + fts_backend_solr_update_build_more, + fts_backend_solr_refresh, + NULL, + fts_backend_solr_optimize, + fts_backend_default_can_lookup, + fts_backend_solr_lookup, + fts_backend_solr_lookup_multi, + NULL + } +}; diff --git a/src/plugins/fts-solr/fts-backend-solr.c b/src/plugins/fts-solr/fts-backend-solr.c new file mode 100644 index 0000000..0ac0f18 --- /dev/null +++ b/src/plugins/fts-solr/fts-backend-solr.c @@ -0,0 +1,984 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "hash.h" +#include "strescape.h" +#include "unichar.h" +#include "iostream-ssl.h" +#include "http-url.h" +#include "mail-storage-private.h" +#include "mailbox-list-private.h" +#include "mail-search.h" +#include "fts-api.h" +#include "solr-connection.h" +#include "fts-solr-plugin.h" + +#include <ctype.h> + +#define SOLR_CMDBUF_SIZE (1024*64) +#define SOLR_CMDBUF_FLUSH_SIZE (SOLR_CMDBUF_SIZE-128) +#define SOLR_MAX_MULTI_ROWS 100000 + +/* If header is larger than this, truncate it. */ +#define SOLR_HEADER_MAX_SIZE (1024*1024) +/* If SOLR_HEADER_MAX_SIZE was already reached, write still to individual + header fields as long as they're smaller than this */ +#define SOLR_HEADER_LINE_MAX_TRUNC_SIZE 1024 + +#define SOLR_QUERY_MAX_MAILBOX_COUNT 10 + +struct solr_fts_backend { + struct fts_backend backend; + struct solr_connection *solr_conn; +}; + +struct solr_fts_field { + char *key; + string_t *value; +}; + +struct solr_fts_backend_update_context { + struct fts_backend_update_context ctx; + + struct mailbox *cur_box; + char box_guid[MAILBOX_GUID_HEX_LENGTH+1]; + + struct solr_connection_post *post; + uint32_t prev_uid; + string_t *cmd, *cur_value, *cur_value2; + string_t *cmd_expunge; + ARRAY(struct solr_fts_field) fields; + + uint32_t last_indexed_uid; + unsigned int mails_since_flush; + + bool tokenized_input:1; + bool last_indexed_uid_set:1; + bool body_open:1; + bool documents_added:1; + bool expunges:1; + bool truncate_header:1; +}; + +static const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\/ "; + +static bool is_valid_xml_char(unichar_t chr) +{ + /* Valid characters in XML: + + #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | + [#x10000-#x10FFFF] + + This function gets called only for #x80 and higher */ + if (chr > 0xd7ff && chr < 0xe000) + return FALSE; + if (chr > 0xfffd && chr < 0x10000) + return FALSE; + return chr < 0x10ffff; +} + +static size_t +xml_encode_data_max(string_t *dest, const unsigned char *data, size_t len, + unsigned int max_len) +{ + unichar_t chr; + size_t i; + + i_assert(max_len > 0 || len == 0); + + if (max_len > len) + max_len = len; + for (i = 0; i < max_len; i++) { + switch (data[i]) { + case '&': + str_append(dest, "&"); + break; + case '<': + str_append(dest, "<"); + break; + case '>': + str_append(dest, ">"); + break; + case '\t': + case '\n': + case '\r': + /* exceptions to the following control char check */ + str_append_c(dest, data[i]); + break; + default: + if (data[i] < 32) { + /* SOLR doesn't like control characters. + replace them with spaces. */ + str_append_c(dest, ' '); + } else if (data[i] >= 0x80) { + /* make sure the character is valid for XML + so we don't get XML parser errors */ + int char_len = + uni_utf8_get_char_n(data + i, len - i, &chr); + i_assert(char_len > 0); /* input is valid UTF8 */ + if (is_valid_xml_char(chr)) + str_append_data(dest, data + i, char_len); + else { + str_append_data(dest, utf8_replacement_char, + UTF8_REPLACEMENT_CHAR_LEN); + } + i += char_len - 1; + } else { + str_append_c(dest, data[i]); + } + break; + } + } + return i; +} + +static void +xml_encode_data(string_t *dest, const unsigned char *data, size_t len) +{ + (void)xml_encode_data_max(dest, data, len, len); +} + +static void xml_encode(string_t *dest, const char *str) +{ + xml_encode_data(dest, (const unsigned char *)str, strlen(str)); +} + +static const char *solr_escape(const char *str) +{ + string_t *ret; + unsigned int i; + + ret = t_str_new(strlen(str) + 16); + for (i = 0; str[i] != '\0'; i++) { + if (strchr(solr_escape_chars, str[i]) != NULL) + str_append_c(ret, '\\'); + str_append_c(ret, str[i]); + } + return str_c(ret); +} + +static void solr_quote_http(string_t *dest, const char *str) +{ + if (str[0] != '\0') + http_url_escape_param(dest, solr_escape(str)); + else + str_append(dest, "%22%22"); +} + +static struct fts_backend *fts_backend_solr_alloc(void) +{ + struct solr_fts_backend *backend; + + backend = i_new(struct solr_fts_backend, 1); + backend->backend = fts_backend_solr; + return &backend->backend; +} + +static int +fts_backend_solr_init(struct fts_backend *_backend, const char **error_r) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user); + struct ssl_iostream_settings ssl_set; + + if (fuser == NULL) { + *error_r = "Invalid fts_solr setting"; + return -1; + } + if (fuser->set.use_libfts) { + /* change our flags so we get proper input */ + _backend->flags &= ENUM_NEGATE(FTS_BACKEND_FLAG_FUZZY_SEARCH); + _backend->flags |= FTS_BACKEND_FLAG_TOKENIZED_INPUT; + } + + mail_user_init_ssl_client_settings(_backend->ns->user, &ssl_set); + return solr_connection_init(&fuser->set, &ssl_set, + _backend->ns->user->event, + &backend->solr_conn, error_r); +} + +static void fts_backend_solr_deinit(struct fts_backend *_backend) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + + solr_connection_deinit(&backend->solr_conn); + i_free(backend); +} + +static int +get_last_uid_fallback(struct fts_backend *_backend, struct mailbox *box, + uint32_t *last_uid_r) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + const struct seq_range *uidvals; + const char *box_guid; + unsigned int count; + struct solr_result **results; + string_t *str; + pool_t pool; + int ret = 0; + + str = t_str_new(256); + str_append(str, "wt=xml&fl=uid&rows=1&sort=uid+desc&q="); + + if (fts_mailbox_get_guid(box, &box_guid) < 0) + return -1; + + str_printfa(str, "box:%s+AND+user:", box_guid); + if (_backend->ns->owner != NULL) + solr_quote_http(str, _backend->ns->owner->username); + else + str_append(str, "%22%22"); + + pool = pool_alloconly_create("solr last uid lookup", 1024); + if (solr_connection_select(backend->solr_conn, str_c(str), + pool, &results) < 0) + ret = -1; + else if (results[0] == NULL) { + /* no UIDs */ + *last_uid_r = 0; + } else { + uidvals = array_get(&results[0]->uids, &count); + i_assert(count > 0); + if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) { + *last_uid_r = uidvals[0].seq1; + } else { + i_error("fts_solr: Last UID lookup returned multiple rows"); + ret = -1; + } + } + pool_unref(&pool); + return ret; +} + +static int +fts_backend_solr_get_last_uid(struct fts_backend *_backend, + struct mailbox *box, uint32_t *last_uid_r) +{ + struct fts_index_header hdr; + + if (fts_index_get_header(box, &hdr)) { + *last_uid_r = hdr.last_indexed_uid; + return 0; + } + + /* either nothing has been indexed, or the index was corrupted. + do it the slow way. */ + if (get_last_uid_fallback(_backend, box, last_uid_r) < 0) + return -1; + + fts_index_set_last_uid(box, *last_uid_r); + return 0; +} + +static struct fts_backend_update_context * +fts_backend_solr_update_init(struct fts_backend *_backend) +{ + struct solr_fts_backend_update_context *ctx; + + ctx = i_new(struct solr_fts_backend_update_context, 1); + ctx->ctx.backend = _backend; + ctx->tokenized_input = + (_backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0; + i_array_init(&ctx->fields, 16); + return &ctx->ctx; +} + +static void xml_encode_id(struct solr_fts_backend_update_context *ctx, + string_t *str, uint32_t uid) +{ + str_printfa(str, "%u/%s", uid, ctx->box_guid); + if (ctx->ctx.backend->ns->owner != NULL) { + str_append_c(str, '/'); + xml_encode(str, ctx->ctx.backend->ns->owner->username); + } +} + +static void +fts_backend_solr_doc_open(struct solr_fts_backend_update_context *ctx, + uint32_t uid) +{ + ctx->documents_added = TRUE; + + str_printfa(ctx->cmd, "<doc>" + "<field name=\"uid\">%u</field>" + "<field name=\"box\">%s</field>", + uid, ctx->box_guid); + str_append(ctx->cmd, "<field name=\"user\">"); + if (ctx->ctx.backend->ns->owner != NULL) + xml_encode(ctx->cmd, ctx->ctx.backend->ns->owner->username); + str_append(ctx->cmd, "</field>"); + + str_printfa(ctx->cmd, "<field name=\"id\">"); + xml_encode_id(ctx, ctx->cmd, uid); + str_append(ctx->cmd, "</field>"); +} + +static string_t * +fts_solr_field_get(struct solr_fts_backend_update_context *ctx, const char *key) +{ + const struct solr_fts_field *field; + struct solr_fts_field new_field; + + /* there are only a few fields. this lookup is fast enough. */ + array_foreach(&ctx->fields, field) { + if (strcasecmp(field->key, key) == 0) + return field->value; + } + + i_zero(&new_field); + new_field.key = str_lcase(i_strdup(key)); + new_field.value = str_new(default_pool, 128); + array_push_back(&ctx->fields, &new_field); + return new_field.value; +} + +static void +fts_backend_solr_doc_close(struct solr_fts_backend_update_context *ctx) +{ + struct solr_fts_field *field; + + if (ctx->body_open) { + ctx->body_open = FALSE; + str_append(ctx->cmd, "</field>"); + } + array_foreach_modifiable(&ctx->fields, field) { + str_printfa(ctx->cmd, "<field name=\"%s\">", field->key); + /* the values are already xml-escaped */ + str_append_str(ctx->cmd, field->value); + str_append(ctx->cmd, "</field>"); + str_truncate(field->value, 0); + } + str_append(ctx->cmd, "</doc>"); +} + +static int +fts_backed_solr_build_flush(struct solr_fts_backend_update_context *ctx) +{ + if (ctx->post == NULL) + return 0; + + fts_backend_solr_doc_close(ctx); + str_append(ctx->cmd, "</add>"); + ctx->mails_since_flush = 0; + + solr_connection_post_more(ctx->post, str_data(ctx->cmd), + str_len(ctx->cmd)); + str_truncate(ctx->cmd, 0); + return solr_connection_post_end(&ctx->post); +} + +static void +fts_backend_solr_expunge_flush(struct solr_fts_backend_update_context *ctx) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)ctx->ctx.backend; + + str_append(ctx->cmd_expunge, "</delete>"); + (void)solr_connection_post(backend->solr_conn, str_c(ctx->cmd_expunge)); + str_truncate(ctx->cmd_expunge, 0); + str_append(ctx->cmd_expunge, "<delete>"); +} + +static int +fts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + struct solr_fts_backend *backend = + (struct solr_fts_backend *)_ctx->backend; + struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_ctx->backend->ns->user); + struct solr_fts_field *field; + const char *str; + int ret = _ctx->failed ? -1 : 0; + + if (fts_backed_solr_build_flush(ctx) < 0) + ret = -1; + + if (ctx->documents_added || ctx->expunges) { + /* commit and wait until the documents we just indexed are + visible to the following search */ + if (ctx->expunges) + fts_backend_solr_expunge_flush(ctx); + if (fuser->set.soft_commit) { + str = t_strdup_printf("<commit softCommit=\"true\" waitSearcher=\"%s\"/>", + ctx->documents_added ? "true" : "false"); + if (solr_connection_post(backend->solr_conn, str) < 0) + ret = -1; + } + } + + str_free(&ctx->cmd); + str_free(&ctx->cmd_expunge); + array_foreach_modifiable(&ctx->fields, field) { + str_free(&field->value); + i_free(field->key); + } + array_free(&ctx->fields); + i_free(ctx); + return ret; +} + +static void +fts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx, + struct mailbox *box) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + const char *box_guid; + + if (ctx->prev_uid != 0) { + i_assert(ctx->cur_box != NULL); + + /* flush solr between mailboxes, so we don't wrongly update + last_uid before we know it has succeeded */ + if (fts_backed_solr_build_flush(ctx) < 0) + _ctx->failed = TRUE; + else if (!_ctx->failed) + fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid); + ctx->prev_uid = 0; + } + + if (box != NULL) { + if (fts_mailbox_get_guid(box, &box_guid) < 0) + _ctx->failed = TRUE; + + i_assert(strlen(box_guid) == sizeof(ctx->box_guid)-1); + memcpy(ctx->box_guid, box_guid, sizeof(ctx->box_guid)-1); + } else { + memset(ctx->box_guid, 0, sizeof(ctx->box_guid)); + } + ctx->cur_box = box; +} + +static void +fts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx, + uint32_t uid) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + struct fts_index_header hdr; + + if (!ctx->last_indexed_uid_set) { + if (!fts_index_get_header(ctx->cur_box, &hdr)) + ctx->last_indexed_uid = 0; + else + ctx->last_indexed_uid = hdr.last_indexed_uid; + ctx->last_indexed_uid_set = TRUE; + } + if (ctx->last_indexed_uid == 0 || + uid > ctx->last_indexed_uid + 100) { + /* don't waste time asking Solr to expunge a message that is + highly unlikely to be indexed at this time. */ + return; + } + if (!ctx->expunges) { + ctx->expunges = TRUE; + ctx->cmd_expunge = str_new(default_pool, 1024); + str_append(ctx->cmd_expunge, "<delete>"); + } + + if (str_len(ctx->cmd_expunge) >= SOLR_CMDBUF_FLUSH_SIZE) + fts_backend_solr_expunge_flush(ctx); + + str_append(ctx->cmd_expunge, "<id>"); + xml_encode_id(ctx, ctx->cmd_expunge, uid); + str_append(ctx->cmd_expunge, "</id>"); +} + +static void +fts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx, + uint32_t uid) +{ + struct solr_fts_backend *backend = + (struct solr_fts_backend *)ctx->ctx.backend; + struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(ctx->ctx.backend->ns->user); + + if (ctx->mails_since_flush >= fuser->set.batch_size) { + if (fts_backed_solr_build_flush(ctx) < 0) + ctx->ctx.failed = TRUE; + } + ctx->mails_since_flush++; + if (ctx->post == NULL) { + if (ctx->cmd == NULL) + ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE); + ctx->post = solr_connection_post_begin(backend->solr_conn); + str_append(ctx->cmd, "<add>"); + } else { + fts_backend_solr_doc_close(ctx); + } + ctx->prev_uid = uid; + ctx->truncate_header = FALSE; + fts_backend_solr_doc_open(ctx, uid); +} + +static bool +fts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx, + const struct fts_backend_build_key *key) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + + if (key->uid != ctx->prev_uid) + fts_backend_solr_uid_changed(ctx, key->uid); + + switch (key->type) { + case FTS_BACKEND_BUILD_KEY_HDR: + if (fts_header_want_indexed(key->hdr_name)) { + ctx->cur_value2 = + fts_solr_field_get(ctx, key->hdr_name); + } + /* fall through */ + case FTS_BACKEND_BUILD_KEY_MIME_HDR: + ctx->cur_value = fts_solr_field_get(ctx, "hdr"); + xml_encode(ctx->cur_value, key->hdr_name); + str_append(ctx->cur_value, ": "); + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART: + if (!ctx->body_open) { + ctx->body_open = TRUE; + str_append(ctx->cmd, "<field name=\"body\">"); + } + ctx->cur_value = ctx->cmd; + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY: + i_unreached(); + } + return TRUE; +} + +static void +fts_backend_solr_update_unset_build_key(struct fts_backend_update_context *_ctx) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + + /* There can be multiple duplicate keys (duplicate header lines, + multiple MIME body parts). Make sure they are separated by + whitespace. */ + str_append_c(ctx->cur_value, '\n'); + ctx->cur_value = NULL; + if (ctx->cur_value2 != NULL) { + str_append_c(ctx->cur_value2, '\n'); + ctx->cur_value2 = NULL; + } +} + +static int +fts_backend_solr_update_build_more(struct fts_backend_update_context *_ctx, + const unsigned char *data, size_t size) +{ + struct solr_fts_backend_update_context *ctx = + (struct solr_fts_backend_update_context *)_ctx; + size_t len; + + if (_ctx->failed) + return -1; + + if (ctx->cur_value2 == NULL && ctx->cur_value == ctx->cmd) { + /* we're writing to message body. if size is huge, + flush it once in a while */ + while (size >= SOLR_CMDBUF_FLUSH_SIZE) { + if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) { + solr_connection_post_more(ctx->post, + str_data(ctx->cmd), + str_len(ctx->cmd)); + str_truncate(ctx->cmd, 0); + } + len = xml_encode_data_max(ctx->cmd, data, size, + SOLR_CMDBUF_FLUSH_SIZE - + str_len(ctx->cmd)); + i_assert(len > 0); + i_assert(len <= size); + data += len; + size -= len; + } + xml_encode_data(ctx->cmd, data, size); + if (ctx->tokenized_input) + str_append_c(ctx->cmd, ' '); + } else { + if (!ctx->truncate_header) { + xml_encode_data(ctx->cur_value, data, size); + if (ctx->tokenized_input) + str_append_c(ctx->cur_value, ' '); + } + if (ctx->cur_value2 != NULL && + (!ctx->truncate_header || + str_len(ctx->cur_value2) < SOLR_HEADER_LINE_MAX_TRUNC_SIZE)) { + xml_encode_data(ctx->cur_value2, data, size); + if (ctx->tokenized_input) + str_append_c(ctx->cur_value2, ' '); + } + } + + if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) { + solr_connection_post_more(ctx->post, str_data(ctx->cmd), + str_len(ctx->cmd)); + str_truncate(ctx->cmd, 0); + } + if (!ctx->truncate_header && + str_len(ctx->cur_value) >= SOLR_HEADER_MAX_SIZE) { + /* a large header */ + i_assert(ctx->cur_value != ctx->cmd); + + i_warning("fts-solr(%s): Mailbox %s UID=%u header size is huge, truncating", + ctx->cur_box->storage->user->username, + mailbox_get_vname(ctx->cur_box), ctx->prev_uid); + ctx->truncate_header = TRUE; + } + return 0; +} + +static int fts_backend_solr_refresh(struct fts_backend *backend ATTR_UNUSED) +{ + return 0; +} + +static int fts_backend_solr_rescan(struct fts_backend *backend) +{ + /* FIXME: proper rescan needed. for now we'll just reset the + last-uids */ + return fts_backend_reset_last_uids(backend); +} + +static int fts_backend_solr_optimize(struct fts_backend *backend ATTR_UNUSED) +{ + return 0; +} + +static bool solr_need_escaping(const char *str) +{ + for (; *str != '\0'; str++) { + if (strchr(solr_escape_chars, *str) != NULL) + return TRUE; + } + return FALSE; +} + +static void solr_add_str_arg(string_t *str, struct mail_search_arg *arg) +{ + /* currently we'll just disable fuzzy searching if there are any + parameters that need escaping. solr doesn't seem to give good + fuzzy results even if we did escape them.. */ + if (!arg->fuzzy || arg->value.str[0] == '\0' || + solr_need_escaping(arg->value.str)) + solr_quote_http(str, arg->value.str); + else { + http_url_escape_param(str, arg->value.str); + str_append_c(str, '~'); + } +} + +static bool +solr_add_definite_query(string_t *str, struct mail_search_arg *arg) +{ + if (arg->no_fts) + return FALSE; + switch (arg->type) { + case SEARCH_TEXT: { + if (arg->match_not) + str_append_c(str, '-'); + str_append(str, "(hdr:"); + solr_add_str_arg(str, arg); + str_append(str, "+OR+body:"); + solr_add_str_arg(str, arg); + str_append(str, ")"); + break; + } + case SEARCH_BODY: + if (arg->match_not) + str_append_c(str, '-'); + str_append(str, "body:"); + solr_add_str_arg(str, arg); + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if (!fts_header_want_indexed(arg->hdr_field_name)) + return FALSE; + + if (arg->match_not) + str_append_c(str, '-'); + str_append(str, t_str_lcase(arg->hdr_field_name)); + str_append_c(str, ':'); + solr_add_str_arg(str, arg); + break; + default: + return FALSE; + } + return TRUE; +} + +static bool +solr_add_definite_query_args(string_t *str, struct mail_search_arg *arg, + bool and_args) +{ + size_t last_len; + + last_len = str_len(str); + for (; arg != NULL; arg = arg->next) { + if (solr_add_definite_query(str, arg)) { + arg->match_always = TRUE; + last_len = str_len(str); + if (and_args) + str_append(str, "+AND+"); + else + str_append(str, "+OR+"); + } + } + if (str_len(str) == last_len) + return FALSE; + + str_truncate(str, last_len); + return TRUE; +} + +static bool +solr_add_maybe_query(string_t *str, struct mail_search_arg *arg) +{ + if (arg->no_fts) + return FALSE; + switch (arg->type) { + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if (fts_header_want_indexed(arg->hdr_field_name)) + return FALSE; + if (arg->match_not) { + /* all matches would be definite, but all non-matches + would be maybies. too much trouble to optimize. */ + return FALSE; + } + + /* we can check if the search key exists in some header and + filter out the messages that have no chance of matching */ + str_append(str, "hdr:"); + if (*arg->value.str != '\0') + solr_quote_http(str, arg->value.str); + else { + /* checking potential existence of the header name */ + solr_quote_http(str, t_str_lcase(arg->hdr_field_name)); + } + break; + default: + return FALSE; + } + return TRUE; +} + +static bool +solr_add_maybe_query_args(string_t *str, struct mail_search_arg *arg, + bool and_args) +{ + size_t last_len; + + last_len = str_len(str); + for (; arg != NULL; arg = arg->next) { + if (solr_add_maybe_query(str, arg)) { + arg->match_always = TRUE; + last_len = str_len(str); + if (and_args) + str_append(str, "+AND+"); + else + str_append(str, "+OR+"); + } + } + if (str_len(str) == last_len) + return FALSE; + + str_truncate(str, last_len); + return TRUE; +} + +static int solr_search(struct fts_backend *_backend, string_t *str, + const char *box_guid, ARRAY_TYPE(seq_range) *uids_r, + ARRAY_TYPE(fts_score_map) *scores_r) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + pool_t pool = pool_alloconly_create("fts solr search", 1024); + struct solr_result **results; + int ret; + + /* use a separate filter query for selecting the mailbox. it shouldn't + affect the score and there could be some caching benefits too. */ + str_printfa(str, "&fq=%%2Bbox:%s+%%2Buser:", box_guid); + if (_backend->ns->owner != NULL) + solr_quote_http(str, _backend->ns->owner->username); + else + str_append(str, "%22%22"); + + ret = solr_connection_select(backend->solr_conn, str_c(str), + pool, &results); + if (ret == 0 && results[0] != NULL) { + array_append_array(uids_r, &results[0]->uids); + array_append_array(scores_r, &results[0]->scores); + } + pool_unref(&pool); + return ret; +} + +static int +fts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + struct mailbox_status status; + string_t *str; + const char *box_guid; + size_t prefix_len; + + if (fts_mailbox_get_guid(box, &box_guid) < 0) + return -1; + mailbox_get_open_status(box, STATUS_UIDNEXT, &status); + + str = t_str_new(256); + str_printfa(str, "wt=xml&fl=uid,score&rows=%u&sort=uid+asc&q=%%7b!lucene+q.op%%3dAND%%7d", + status.uidnext); + prefix_len = str_len(str); + + if (solr_add_definite_query_args(str, args, and_args)) { + ARRAY_TYPE(seq_range) *uids_arr = + (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ? + &result->definite_uids : &result->maybe_uids; + if (solr_search(_backend, str, box_guid, + uids_arr, &result->scores) < 0) + return -1; + } + str_truncate(str, prefix_len); + if (solr_add_maybe_query_args(str, args, and_args)) { + if (solr_search(_backend, str, box_guid, + &result->maybe_uids, &result->scores) < 0) + return -1; + } + result->scores_sorted = TRUE; + return 0; +} + +static int +solr_search_multi(struct fts_backend *_backend, string_t *str, + struct mailbox *const boxes[], enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend; + struct solr_result **solr_results; + struct fts_result *fts_result; + ARRAY(struct fts_result) fts_results; + HASH_TABLE(char *, struct mailbox *) mailboxes; + struct mailbox *box; + const char *box_guid; + unsigned int i; + size_t len; + bool search_all_mailboxes; + + /* use a separate filter query for selecting the mailbox. it shouldn't + affect the score and there could be some caching benefits too. */ + str_append(str, "&fq=%2Buser:"); + if (_backend->ns->owner != NULL) + solr_quote_http(str, _backend->ns->owner->username); + else + str_append(str, "%22%22"); + + hash_table_create(&mailboxes, default_pool, 0, str_hash, strcmp); + for (i = 0; boxes[i] != NULL; i++) ; + search_all_mailboxes = i > SOLR_QUERY_MAX_MAILBOX_COUNT; + if (!search_all_mailboxes) + str_append(str, "+%2B("); + len = str_len(str); + + for (i = 0; boxes[i] != NULL; i++) { + if (fts_mailbox_get_guid(boxes[i], &box_guid) < 0) + continue; + + if (!search_all_mailboxes) { + if (str_len(str) != len) + str_append(str, "+OR+"); + str_printfa(str, "box:%s", box_guid); + } + hash_table_insert(mailboxes, t_strdup_noconst(box_guid), + boxes[i]); + } + if (!search_all_mailboxes) + str_append_c(str, ')'); + + if (solr_connection_select(backend->solr_conn, str_c(str), + result->pool, &solr_results) < 0) { + hash_table_destroy(&mailboxes); + return -1; + } + + p_array_init(&fts_results, result->pool, 32); + for (i = 0; solr_results[i] != NULL; i++) { + box = hash_table_lookup(mailboxes, solr_results[i]->box_id); + if (box == NULL) { + if (!search_all_mailboxes) { + i_warning("fts_solr: Lookup returned unexpected mailbox " + "with guid=%s", solr_results[i]->box_id); + } + continue; + } + fts_result = array_append_space(&fts_results); + fts_result->box = box; + if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0) + fts_result->definite_uids = solr_results[i]->uids; + else + fts_result->maybe_uids = solr_results[i]->uids; + fts_result->scores = solr_results[i]->scores; + fts_result->scores_sorted = TRUE; + } + array_append_zero(&fts_results); + result->box_results = array_front_modifiable(&fts_results); + hash_table_destroy(&mailboxes); + return 0; +} + +static int +fts_backend_solr_lookup_multi(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + string_t *str; + + str = t_str_new(256); + str_printfa(str, "wt=xml&fl=box,uid,score&rows=%u&sort=box+asc,uid+asc&q=%%7b!lucene+q.op%%3dAND%%7d", + SOLR_MAX_MULTI_ROWS); + + if (solr_add_definite_query_args(str, args, and_args)) { + if (solr_search_multi(backend, str, boxes, flags, result) < 0) + return -1; + } + /* FIXME: maybe_uids could be handled also with some more work.. */ + return 0; +} + +struct fts_backend fts_backend_solr = { + .name = "solr", + .flags = FTS_BACKEND_FLAG_FUZZY_SEARCH, + + { + fts_backend_solr_alloc, + fts_backend_solr_init, + fts_backend_solr_deinit, + fts_backend_solr_get_last_uid, + fts_backend_solr_update_init, + fts_backend_solr_update_deinit, + fts_backend_solr_update_set_mailbox, + fts_backend_solr_update_expunge, + fts_backend_solr_update_set_build_key, + fts_backend_solr_update_unset_build_key, + fts_backend_solr_update_build_more, + fts_backend_solr_refresh, + fts_backend_solr_rescan, + fts_backend_solr_optimize, + fts_backend_default_can_lookup, + fts_backend_solr_lookup, + fts_backend_solr_lookup_multi, + NULL + } +}; diff --git a/src/plugins/fts-solr/fts-solr-plugin.c b/src/plugins/fts-solr/fts-solr-plugin.c new file mode 100644 index 0000000..5899784 --- /dev/null +++ b/src/plugins/fts-solr/fts-solr-plugin.c @@ -0,0 +1,131 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "http-client.h" +#include "mail-user.h" +#include "mail-storage-hooks.h" +#include "solr-connection.h" +#include "fts-user.h" +#include "fts-solr-plugin.h" + +#define DEFAULT_SOLR_BATCH_SIZE 1000 + +const char *fts_solr_plugin_version = DOVECOT_ABI_VERSION; +struct http_client *solr_http_client = NULL; + +struct fts_solr_user_module fts_solr_user_module = + MODULE_CONTEXT_INIT(&mail_user_module_register); + +static int +fts_solr_plugin_init_settings(struct mail_user *user, + struct fts_solr_settings *set, const char *str) +{ + const char *const *tmp; + + if (str == NULL) + str = ""; + + set->batch_size = DEFAULT_SOLR_BATCH_SIZE; + set->soft_commit = TRUE; + + for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) { + if (str_begins(*tmp, "url=")) { + set->url = p_strdup(user->pool, *tmp + 4); + } else if (strcmp(*tmp, "debug") == 0) { + set->debug = TRUE; + } else if (strcmp(*tmp, "use_libfts") == 0) { + set->use_libfts = TRUE; + } else if (str_begins(*tmp, "default_ns=")) { + set->default_ns_prefix = + p_strdup(user->pool, *tmp + 11); + } else if (str_begins(*tmp, "rawlog_dir=")) { + set->rawlog_dir = p_strdup(user->pool, *tmp + 11); + } else if (str_begins(*tmp, "batch_size=")) { + if (str_to_uint(*tmp+11, &set->batch_size) < 0 || + set->batch_size == 0) { + i_error("fts_solr: batch_size must be a positive integer"); + return -1; + } + } else if (str_begins(*tmp, "soft_commit=")) { + if (strcmp(*tmp + 12, "yes") == 0) { + set->soft_commit = TRUE; + } else if (strcmp(*tmp + 12, "no") == 0) { + set->soft_commit = FALSE; + } else { + i_error("fts_solr: Invalid setting for soft_commit: %s", *tmp+12); + return -1; + } + } else { + i_error("fts_solr: Invalid setting: %s", *tmp); + return -1; + } + } + if (set->url == NULL) { + i_error("fts_solr: url setting missing"); + return -1; + } + return 0; +} + +static void fts_solr_mail_user_deinit(struct mail_user *user) +{ + struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT_REQUIRE(user); + + fts_mail_user_deinit(user); + fuser->module_ctx.super.deinit(user); +} + +static void fts_solr_mail_user_create(struct mail_user *user, const char *env) +{ + struct mail_user_vfuncs *v = user->vlast; + struct fts_solr_user *fuser; + const char *error; + + fuser = p_new(user->pool, struct fts_solr_user, 1); + if (fts_solr_plugin_init_settings(user, &fuser->set, env) < 0) { + /* invalid settings, disabling */ + return; + } + if (fts_mail_user_init(user, fuser->set.use_libfts, &error) < 0) { + i_error("fts-solr: %s", error); + return; + } + + fuser->module_ctx.super = *v; + user->vlast = &fuser->module_ctx.super; + v->deinit = fts_solr_mail_user_deinit; + MODULE_CONTEXT_SET(user, fts_solr_user_module, fuser); +} + +static void fts_solr_mail_user_created(struct mail_user *user) +{ + const char *env; + + env = mail_user_plugin_getenv(user, "fts_solr"); + if (env != NULL) + fts_solr_mail_user_create(user, env); +} + +static struct mail_storage_hooks fts_solr_mail_storage_hooks = { + .mail_user_created = fts_solr_mail_user_created +}; + +void fts_solr_plugin_init(struct module *module) +{ + fts_backend_register(&fts_backend_solr); + fts_backend_register(&fts_backend_solr_old); + mail_storage_hooks_add(module, &fts_solr_mail_storage_hooks); +} + +void fts_solr_plugin_deinit(void) +{ + fts_backend_unregister(fts_backend_solr.name); + fts_backend_unregister(fts_backend_solr_old.name); + mail_storage_hooks_remove(&fts_solr_mail_storage_hooks); + if (solr_http_client != NULL) + http_client_deinit(&solr_http_client); + +} + +const char *fts_solr_plugin_dependencies[] = { "fts", NULL }; diff --git a/src/plugins/fts-solr/fts-solr-plugin.h b/src/plugins/fts-solr/fts-solr-plugin.h new file mode 100644 index 0000000..abc1b66 --- /dev/null +++ b/src/plugins/fts-solr/fts-solr-plugin.h @@ -0,0 +1,35 @@ +#ifndef FTS_SOLR_PLUGIN_H +#define FTS_SOLR_PLUGIN_H + +#include "module-context.h" +#include "mail-user.h" +#include "fts-api-private.h" + +#define FTS_SOLR_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_solr_user_module) +#define FTS_SOLR_USER_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_solr_user_module) + +struct fts_solr_settings { + const char *url, *default_ns_prefix, *rawlog_dir; + unsigned int batch_size; + bool use_libfts; + bool debug; + bool soft_commit; +}; + +struct fts_solr_user { + union mail_user_module_context module_ctx; + struct fts_solr_settings set; +}; + +extern const char *fts_solr_plugin_dependencies[]; +extern struct fts_backend fts_backend_solr; +extern struct fts_backend fts_backend_solr_old; +extern MODULE_CONTEXT_DEFINE(fts_solr_user_module, &mail_user_module_register); +extern struct http_client *solr_http_client; + +void fts_solr_plugin_init(struct module *module); +void fts_solr_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts-solr/solr-connection.c b/src/plugins/fts-solr/solr-connection.c new file mode 100644 index 0000000..41a4fee --- /dev/null +++ b/src/plugins/fts-solr/solr-connection.c @@ -0,0 +1,327 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hash.h" +#include "str.h" +#include "strescape.h" +#include "ioloop.h" +#include "istream.h" +#include "http-url.h" +#include "http-client.h" +#include "fts-solr-plugin.h" +#include "solr-connection.h" + +#include <expat.h> + +struct solr_lookup_context { + pool_t result_pool; + struct istream *payload; + struct io *io; + + int request_status; + + struct solr_response_parser *parser; + struct solr_result **results; +}; + +struct solr_connection_post { + struct solr_connection *conn; + + struct http_client_request *http_req; + int request_status; + + bool failed:1; +}; + +struct solr_connection { + struct event *event; + char *http_host; + in_port_t http_port; + char *http_base_url; + char *http_failure; + char *http_user; + char *http_password; + + bool debug:1; + bool posting:1; + bool http_ssl:1; +}; + +/* Regardless of the specified URL, make sure path ends in '/' */ +static char *solr_connection_create_http_base_url(struct http_url *http_url) +{ + if (http_url->path == NULL) + return i_strconcat("/", http_url->enc_query, NULL); + size_t len = strlen(http_url->path); + if (len > 0 && http_url->path[len-1] != '/') + return i_strconcat(http_url->path, "/", + http_url->enc_query, NULL); + /* http_url->path is NULL on empty path, so this is impossible. */ + i_assert(len != 0); + return i_strconcat(http_url->path, http_url->enc_query, NULL); +} + +int solr_connection_init(const struct fts_solr_settings *solr_set, + const struct ssl_iostream_settings *ssl_client_set, + struct event *event_parent, + struct solr_connection **conn_r, const char **error_r) +{ + struct http_client_settings http_set; + struct solr_connection *conn; + struct http_url *http_url; + const char *error; + + if (http_url_parse(solr_set->url, NULL, HTTP_URL_ALLOW_USERINFO_PART, + pool_datastack_create(), &http_url, &error) < 0) { + *error_r = t_strdup_printf( + "fts_solr: Failed to parse HTTP url: %s", error); + return -1; + } + + conn = i_new(struct solr_connection, 1); + conn->event = event_create(event_parent); + conn->http_host = i_strdup(http_url->host.name); + conn->http_port = http_url->port; + conn->http_base_url = solr_connection_create_http_base_url(http_url); + conn->http_ssl = http_url->have_ssl; + if (http_url->user != NULL) { + conn->http_user = i_strdup(http_url->user); + /* allow empty password */ + conn->http_password = i_strdup(http_url->password != NULL ? + http_url->password : ""); + } + + conn->debug = solr_set->debug; + + if (solr_http_client == NULL) { + i_zero(&http_set); + http_set.max_idle_time_msecs = 5*1000; + http_set.max_parallel_connections = 1; + http_set.max_pipelined_requests = 1; + http_set.max_redirects = 1; + http_set.max_attempts = 3; + http_set.connect_timeout_msecs = 5*1000; + http_set.request_timeout_msecs = 60*1000; + http_set.ssl = ssl_client_set; + http_set.debug = solr_set->debug; + http_set.rawlog_dir = solr_set->rawlog_dir; + http_set.event_parent = conn->event; + + /* FIXME: We should initialize a shared client instead. However, + this is currently not possible due to an obscure bug + in the blocking HTTP payload API, which causes + conflicts with other HTTP applications like FTS Tika. + Using a private client will provide a quick fix for + now. */ + solr_http_client = http_client_init_private(&http_set); + } + + *conn_r = conn; + return 0; +} + +void solr_connection_deinit(struct solr_connection **_conn) +{ + struct solr_connection *conn = *_conn; + + *_conn = NULL; + event_unref(&conn->event); + i_free(conn->http_host); + i_free(conn->http_base_url); + i_free(conn->http_user); + i_free(conn->http_password); + i_free(conn); +} + +static void solr_connection_payload_input(struct solr_lookup_context *lctx) +{ + int ret; + + /* read payload */ + ret = solr_response_parse(lctx->parser, &lctx->results); + + if (ret == 0) { + /* we will be called again for more data */ + } else { + if (lctx->payload->stream_errno != 0) { + i_assert(ret < 0); + i_error("fts_solr: " + "failed to read payload from HTTP server: %s", + i_stream_get_error(lctx->payload)); + } + if (ret < 0) + lctx->request_status = -1; + solr_response_parser_deinit(&lctx->parser); + io_remove(&lctx->io); + } +} + +static void +solr_connection_select_response(const struct http_response *response, + struct solr_lookup_context *lctx) +{ + if (response->status / 100 != 2) { + i_error("fts_solr: Lookup failed: %s", + http_response_get_message(response)); + lctx->request_status = -1; + return; + } + + if (response->payload == NULL) { + i_error("fts_solr: Lookup failed: Empty response payload"); + lctx->request_status = -1; + return; + } + + lctx->parser = solr_response_parser_init(lctx->result_pool, + response->payload); + lctx->payload = response->payload; + lctx->io = io_add_istream(response->payload, + solr_connection_payload_input, lctx); + solr_connection_payload_input(lctx); +} + +int solr_connection_select(struct solr_connection *conn, const char *query, + pool_t pool, struct solr_result ***box_results_r) +{ + struct solr_lookup_context lctx; + struct http_client_request *http_req; + const char *url; + + i_zero(&lctx); + lctx.result_pool = pool; + + i_free_and_null(conn->http_failure); + url = t_strconcat(conn->http_base_url, "select?", query, NULL); + + http_req = http_client_request(solr_http_client, "GET", + conn->http_host, url, + solr_connection_select_response, + &lctx); + if (conn->http_user != NULL) { + http_client_request_set_auth_simple( + http_req, conn->http_user, conn->http_password); + } + http_client_request_set_port(http_req, conn->http_port); + http_client_request_set_ssl(http_req, conn->http_ssl); + http_client_request_submit(http_req); + + lctx.request_status = 0; + http_client_wait(solr_http_client); + + if (lctx.request_status < 0) + return -1; + + *box_results_r = lctx.results; + return 0; +} + +static void +solr_connection_update_response(const struct http_response *response, + struct solr_connection_post *post) +{ + if (response->status / 100 != 2) { + i_error("fts_solr: Indexing failed: %s", + http_response_get_message(response)); + post->request_status = -1; + } +} + +static struct http_client_request * +solr_connection_post_request(struct solr_connection_post *post) +{ + struct solr_connection *conn = post->conn; + struct http_client_request *http_req; + const char *url; + + url = t_strconcat(conn->http_base_url, "update", NULL); + + http_req = http_client_request(solr_http_client, "POST", + conn->http_host, url, + solr_connection_update_response, post); + if (conn->http_user != NULL) { + http_client_request_set_auth_simple( + http_req, conn->http_user, conn->http_password); + } + http_client_request_set_port(http_req, conn->http_port); + http_client_request_set_ssl(http_req, conn->http_ssl); + http_client_request_add_header(http_req, "Content-Type", "text/xml"); + return http_req; +} + +struct solr_connection_post * +solr_connection_post_begin(struct solr_connection *conn) +{ + struct solr_connection_post *post; + + i_assert(!conn->posting); + conn->posting = TRUE; + + post = i_new(struct solr_connection_post, 1); + post->conn = conn; + post->http_req = solr_connection_post_request(post); + return post; +} + +void solr_connection_post_more(struct solr_connection_post *post, + const unsigned char *data, size_t size) +{ + i_assert(post->conn->posting); + + if (post->failed) + return; + + if (post->request_status == 0) { + (void)http_client_request_send_payload( + &post->http_req, data, size); + } + if (post->request_status < 0) + post->failed = TRUE; +} + +int solr_connection_post_end(struct solr_connection_post **_post) +{ + struct solr_connection_post *post = *_post; + struct solr_connection *conn = post->conn; + int ret = post->failed ? -1 : 0; + + i_assert(conn->posting); + + *_post = NULL; + + if (!post->failed) { + if (http_client_request_finish_payload(&post->http_req) < 0 || + post->request_status < 0) { + ret = -1; + } + } else { + http_client_request_abort(&post->http_req); + } + i_free(post); + + conn->posting = FALSE; + return ret; +} + +int solr_connection_post(struct solr_connection *conn, const char *cmd) +{ + struct istream *post_payload; + struct solr_connection_post post; + + i_assert(!conn->posting); + + i_zero(&post); + post.conn = conn; + + post.http_req = solr_connection_post_request(&post); + post_payload = i_stream_create_from_data(cmd, strlen(cmd)); + http_client_request_set_payload(post.http_req, post_payload, TRUE); + i_stream_unref(&post_payload); + http_client_request_submit(post.http_req); + + post.request_status = 0; + http_client_wait(solr_http_client); + + return post.request_status; +} diff --git a/src/plugins/fts-solr/solr-connection.h b/src/plugins/fts-solr/solr-connection.h new file mode 100644 index 0000000..ebad8be --- /dev/null +++ b/src/plugins/fts-solr/solr-connection.h @@ -0,0 +1,26 @@ +#ifndef SOLR_CONNECTION_H +#define SOLR_CONNECTION_H + +#include "solr-response.h" + +struct solr_connection; +struct fts_solr_settings; + +int solr_connection_init(const struct fts_solr_settings *solr_set, + const struct ssl_iostream_settings *ssl_client_set, + struct event *event_parent, + struct solr_connection **conn_r, + const char **error_r); +void solr_connection_deinit(struct solr_connection **conn); + +int solr_connection_select(struct solr_connection *conn, const char *query, + pool_t pool, struct solr_result ***box_results_r); +int solr_connection_post(struct solr_connection *conn, const char *cmd); + +struct solr_connection_post * +solr_connection_post_begin(struct solr_connection *conn); +void solr_connection_post_more(struct solr_connection_post *post, + const unsigned char *data, size_t size); +int solr_connection_post_end(struct solr_connection_post **post); + +#endif diff --git a/src/plugins/fts-solr/solr-response.c b/src/plugins/fts-solr/solr-response.c new file mode 100644 index 0000000..65a6a1f --- /dev/null +++ b/src/plugins/fts-solr/solr-response.c @@ -0,0 +1,372 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hash.h" +#include "str.h" +#include "istream.h" +#include "solr-response.h" + +#include <expat.h> + +#define MAX_VALUE_LEN 2048 + +enum solr_xml_response_state { + SOLR_XML_RESPONSE_STATE_ROOT, + SOLR_XML_RESPONSE_STATE_RESPONSE, + SOLR_XML_RESPONSE_STATE_RESULT, + SOLR_XML_RESPONSE_STATE_DOC, + SOLR_XML_RESPONSE_STATE_CONTENT +}; + +enum solr_xml_content_state { + SOLR_XML_CONTENT_STATE_NONE = 0, + SOLR_XML_CONTENT_STATE_UID, + SOLR_XML_CONTENT_STATE_SCORE, + SOLR_XML_CONTENT_STATE_MAILBOX, + SOLR_XML_CONTENT_STATE_NAMESPACE, + SOLR_XML_CONTENT_STATE_UIDVALIDITY, + SOLR_XML_CONTENT_STATE_ERROR +}; + +struct solr_response_parser { + XML_Parser xml_parser; + struct istream *input; + + enum solr_xml_response_state state; + enum solr_xml_content_state content_state; + int depth; + string_t *buffer; + + uint32_t uid, uidvalidity; + float score; + char *mailbox, *ns; + + pool_t result_pool; + /* box_id -> solr_result */ + HASH_TABLE(char *, struct solr_result *) mailboxes; + ARRAY(struct solr_result *) results; + + bool xml_failed:1; +}; + +static int +solr_xml_parse(struct solr_response_parser *parser, + const void *data, size_t size, bool done) +{ + enum XML_Error err; + int line, col; + + if (parser->xml_failed) + return -1; + + if (XML_Parse(parser->xml_parser, data, size, done ? 1 : 0) != 0) + return 0; + + err = XML_GetErrorCode(parser->xml_parser); + if (err != XML_ERROR_FINISHED) { + line = XML_GetCurrentLineNumber(parser->xml_parser); + col = XML_GetCurrentColumnNumber(parser->xml_parser); + i_error("fts_solr: Invalid XML input at %d:%d: %s " + "(near: %.*s)", line, col, XML_ErrorString(err), + (int)I_MIN(size, 128), (const char *)data); + parser->xml_failed = TRUE; + return -1; + } + return 0; +} + +static const char *attrs_get_name(const char **attrs) +{ + for (; *attrs != NULL; attrs += 2) { + if (strcmp(attrs[0], "name") == 0) + return attrs[1]; + } + return ""; +} + +static void +solr_lookup_xml_start(void *context, const char *name, const char **attrs) +{ + struct solr_response_parser *parser = context; + const char *name_attr; + + i_assert(parser->depth >= (int)parser->state); + + parser->depth++; + if (parser->depth - 1 > (int)parser->state) { + /* skipping over unwanted elements */ + return; + } + + str_truncate(parser->buffer, 0); + + /* response -> result -> doc */ + switch (parser->state) { + case SOLR_XML_RESPONSE_STATE_ROOT: + if (strcmp(name, "response") == 0) + parser->state++; + break; + case SOLR_XML_RESPONSE_STATE_RESPONSE: + if (strcmp(name, "result") == 0) + parser->state++; + break; + case SOLR_XML_RESPONSE_STATE_RESULT: + if (strcmp(name, "doc") == 0) { + parser->state++; + parser->uid = 0; + parser->score = 0; + i_free_and_null(parser->mailbox); + i_free_and_null(parser->ns); + parser->uidvalidity = 0; + } + break; + case SOLR_XML_RESPONSE_STATE_DOC: + name_attr = attrs_get_name(attrs); + if (strcmp(name_attr, "uid") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_UID; + else if (strcmp(name_attr, "score") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_SCORE; + else if (strcmp(name_attr, "box") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_MAILBOX; + else if (strcmp(name_attr, "ns") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_NAMESPACE; + else if (strcmp(name_attr, "uidv") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY; + else + break; + parser->state++; + break; + case SOLR_XML_RESPONSE_STATE_CONTENT: + break; + } +} + +static struct solr_result * +solr_result_get(struct solr_response_parser *parser, const char *box_id) +{ + struct solr_result *result; + char *box_id_dup; + + result = hash_table_lookup(parser->mailboxes, box_id); + if (result != NULL) + return result; + + box_id_dup = p_strdup(parser->result_pool, box_id); + result = p_new(parser->result_pool, struct solr_result, 1); + result->box_id = box_id_dup; + p_array_init(&result->uids, parser->result_pool, 32); + p_array_init(&result->scores, parser->result_pool, 32); + hash_table_insert(parser->mailboxes, box_id_dup, result); + array_push_back(&parser->results, &result); + return result; +} + +static int solr_lookup_add_doc(struct solr_response_parser *parser) +{ + struct fts_score_map *score; + struct solr_result *result; + const char *box_id; + + if (parser->uid == 0) { + i_error("fts_solr: uid missing from inside doc"); + return -1; + } + + if (parser->mailbox == NULL) { + /* looking up from a single mailbox only */ + box_id = ""; + } else if (parser->uidvalidity != 0) { + /* old style lookup */ + string_t *str = t_str_new(64); + str_printfa(str, "%u\001", parser->uidvalidity); + str_append(str, parser->mailbox); + if (parser->ns != NULL) + str_printfa(str, "\001%s", parser->ns); + box_id = str_c(str); + } else { + /* new style lookup */ + box_id = parser->mailbox; + } + result = solr_result_get(parser, box_id); + + if (seq_range_array_add(&result->uids, parser->uid)) { + /* duplicate result */ + } else if (parser->score != 0) { + score = array_append_space(&result->scores); + score->uid = parser->uid; + score->score = parser->score; + } + return 0; +} + +static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED) +{ + struct solr_response_parser *parser = context; + string_t *buf = parser->buffer; + int ret; + + switch (parser->content_state) { + case SOLR_XML_CONTENT_STATE_NONE: + break; + case SOLR_XML_CONTENT_STATE_UID: + if (str_to_uint32(str_c(buf), &parser->uid) < 0 || + parser->uid == 0) { + i_error("fts_solr: received invalid uid '%s'", + str_c(buf)); + parser->content_state = SOLR_XML_CONTENT_STATE_ERROR; + } + break; + case SOLR_XML_CONTENT_STATE_SCORE: + parser->score = strtod(str_c(buf), NULL); + break; + case SOLR_XML_CONTENT_STATE_MAILBOX: + parser->mailbox = i_strdup(str_c(buf)); + break; + case SOLR_XML_CONTENT_STATE_NAMESPACE: + parser->ns = i_strdup(str_c(buf)); + break; + case SOLR_XML_CONTENT_STATE_UIDVALIDITY: + if (str_to_uint32(str_c(buf), &parser->uidvalidity) < 0) + i_error("fts_solr: received invalid uidvalidity"); + break; + case SOLR_XML_CONTENT_STATE_ERROR: + return; + } + + i_assert(parser->depth >= (int)parser->state); + + if (parser->state == SOLR_XML_RESPONSE_STATE_CONTENT && + parser->content_state == SOLR_XML_CONTENT_STATE_MAILBOX && + parser->mailbox == NULL) { + /* mailbox is namespace prefix */ + parser->mailbox = i_strdup(""); + } + + if (parser->depth == (int)parser->state) { + ret = 0; + if (parser->state == SOLR_XML_RESPONSE_STATE_DOC) { + T_BEGIN { + ret = solr_lookup_add_doc(parser); + } T_END; + } + parser->state--; + if (ret < 0) + parser->content_state = SOLR_XML_CONTENT_STATE_ERROR; + else + parser->content_state = SOLR_XML_CONTENT_STATE_NONE; + } + parser->depth--; +} + +static void solr_lookup_xml_data(void *context, const char *str, int len) +{ + struct solr_response_parser *parser = context; + + switch (parser->content_state) { + case SOLR_XML_CONTENT_STATE_NONE: + case SOLR_XML_CONTENT_STATE_ERROR: + /* ignore element data */ + return; + case SOLR_XML_CONTENT_STATE_UID: + case SOLR_XML_CONTENT_STATE_SCORE: + case SOLR_XML_CONTENT_STATE_MAILBOX: + case SOLR_XML_CONTENT_STATE_NAMESPACE: + case SOLR_XML_CONTENT_STATE_UIDVALIDITY: + break; + } + + if (str_len(parser->buffer) + len > MAX_VALUE_LEN) { + i_error("fts_solr: XML element data length out of range"); + parser->content_state = SOLR_XML_CONTENT_STATE_ERROR; + return; + } + + str_append_data(parser->buffer, str, len); +} + +struct solr_response_parser * +solr_response_parser_init(pool_t result_pool, struct istream *input) +{ + struct solr_response_parser *parser; + + parser = i_new(struct solr_response_parser, 1); + + parser->xml_parser = XML_ParserCreate("UTF-8"); + if (parser->xml_parser == NULL) { + i_fatal_status(FATAL_OUTOFMEM, + "fts_solr: Failed to allocate XML parser"); + } + + parser->buffer = str_new(default_pool, 256); + hash_table_create(&parser->mailboxes, default_pool, 0, + str_hash, strcmp); + + parser->result_pool = result_pool; + pool_ref(result_pool); + p_array_init(&parser->results, result_pool, 32); + + parser->input = input; + i_stream_ref(input); + + parser->xml_failed = FALSE; + XML_SetElementHandler(parser->xml_parser, + solr_lookup_xml_start, solr_lookup_xml_end); + XML_SetCharacterDataHandler(parser->xml_parser, solr_lookup_xml_data); + XML_SetUserData(parser->xml_parser, parser); + + return parser; +} + +void solr_response_parser_deinit(struct solr_response_parser **_parser) +{ + struct solr_response_parser *parser = *_parser; + + *_parser = NULL; + + if (parser == NULL) + return; + + str_free(&parser->buffer); + hash_table_destroy(&parser->mailboxes); + XML_ParserFree(parser->xml_parser); + i_stream_unref(&parser->input); + pool_unref(&parser->result_pool); + i_free(parser); +} + +int solr_response_parse(struct solr_response_parser *parser, + struct solr_result ***box_results_r) +{ + const unsigned char *data; + size_t size; + int stream_errno, ret; + + i_assert(parser->input != NULL); + i_zero(box_results_r); + + /* read payload */ + while ((ret = i_stream_read_more(parser->input, &data, &size)) > 0) { + (void)solr_xml_parse(parser, data, size, FALSE); + i_stream_skip(parser->input, size); + } + + if (ret == 0) { + /* we will be called again for more data */ + return 0; + } + + stream_errno = parser->input->stream_errno; + i_stream_unref(&parser->input); + + if (parser->content_state == SOLR_XML_CONTENT_STATE_ERROR) + return -1; + if (stream_errno != 0) + return -1; + + ret = solr_xml_parse(parser, "", 0, TRUE); + + array_append_zero(&parser->results); + *box_results_r = array_front_modifiable(&parser->results); + return (ret == 0 ? 1 : -1); +} diff --git a/src/plugins/fts-solr/solr-response.h b/src/plugins/fts-solr/solr-response.h new file mode 100644 index 0000000..1d5cdd5 --- /dev/null +++ b/src/plugins/fts-solr/solr-response.h @@ -0,0 +1,23 @@ +#ifndef SOLR_RESPONSE_H +#define SOLR_RESPONSE_H + +#include "seq-range-array.h" +#include "fts-api.h" + +struct solr_response_parser; + +struct solr_result { + const char *box_id; + + ARRAY_TYPE(seq_range) uids; + ARRAY_TYPE(fts_score_map) scores; +}; + +struct solr_response_parser * +solr_response_parser_init(pool_t result_pool, struct istream *input); +void solr_response_parser_deinit(struct solr_response_parser **_parser); + +int solr_response_parse(struct solr_response_parser *parser, + struct solr_result ***box_results_r); + +#endif diff --git a/src/plugins/fts-solr/test-solr-response.c b/src/plugins/fts-solr/test-solr-response.c new file mode 100644 index 0000000..8add6db --- /dev/null +++ b/src/plugins/fts-solr/test-solr-response.c @@ -0,0 +1,295 @@ +/* Copyright (c) 2019 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "array.h" +#include "istream.h" +#include "solr-response.h" +#include "test-common.h" + +#include <unistd.h> + +static bool debug = FALSE; + +struct solr_response_test_result { + const char *box_id; + struct fts_score_map *scores; +}; + +struct solr_response_test { + const char *input; + + struct solr_response_test_result *results; +}; + +struct fts_score_map test_results1_scores[] = { + { .score = 0.042314477, .uid = 1 }, + { .score = 0.06996078, .uid = 2, }, + { .score = 0.020381179, .uid = 3 }, + { .score = 0.020381179, .uid = 4 }, + { .score = 5.510487E-4, .uid = 6 }, + { .score = 0.0424253, .uid = 7 }, + { .score = 0.04215967, .uid = 8 }, + { .score = 0.02470572, .uid = 9 }, + { .score = 0.05936369, .uid = 10 }, + { .score = 0.048221838, .uid = 11 }, + { .score = 7.793006E-4, .uid = 12 }, + { .score = 2.7900032E-4, .uid = 13 }, + { .score = 0.02088323, .uid = 14 }, + { .score = 0.011646388, .uid = 15 }, + { .score = 1.3776218E-4, .uid = 17 }, + { .score = 2.386111E-4, .uid = 19 }, + { .score = 2.7552436E-4, .uid = 20 }, + { .score = 4.772222E-4, .uid = 23 }, + { .score = 4.772222E-4, .uid = 24 }, + { .score = 5.965277E-4, .uid = 25 }, + { .score = 0.0471366, .uid = 26 }, + { .score = 0.0471366, .uid = 50 }, + { .score = 0.047274362, .uid = 51 }, + { .score = 0.053303234, .uid = 56 }, + { .score = 5.445528E-4, .uid = 62 }, + { .score = 2.922377E-4, .uid = 66 }, + { .score = 0.02623833, .uid = 68 }, + { .score = 3.4440547E-4, .uid = 70 }, + { .score = 2.922377E-4, .uid = 74 }, + { .score = 2.7552436E-4, .uid = 76 }, + { .score = 1.3776218E-4, .uid = 77 }, + { .score = 0, .uid = 0 }, +}; + +struct solr_response_test_result test_results1[] = { + { + .box_id = "", + .scores = test_results1_scores, + }, + { + .box_id = NULL + } +}; + +static const struct solr_response_test tests[] = { + { + .input = + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<response>\n" + "<lst name=\"responseHeader\"><int name=\"status\"" + ">0</int><int name=\"QTime\">3</int><lst name=\"pa" + "rams\"><str name=\"wt\">xml</str><str name=\"fl\"" + ">uid,score</str><str name=\"rows\">4023</str><str" + " name=\"sort\">uid asc</str><str name=\"q\">{!luc" + "ene q.op=AND}subject:pierreserveur OR from:pierre" + "serveur OR to:pierreserveur OR cc:pierreserveur O" + "R bcc:pierreserveur OR body:pierreserveur</str><s" + "tr name=\"fq\">+box:fa74101044cb607d5f0900001de14" + "712 +user:jpierreserveur</str></lst></lst><result" + " name=\"response\" numFound=\"31\" start=\"0\" ma" + "xScore=\"0.06996078\"><doc><float name=\"score\">" + "0.042314477</float><long name=\"uid\">1</long></d" + "oc><doc><float name=\"score\">0.06996078</float><" + "long name=\"uid\">2</long></doc><doc><float name=" + "\"score\">0.020381179</float><long name=\"uid\">3" + "</long></doc><doc><float name=\"score\">0.0203811" + "79</float><long name=\"uid\">4</long></doc><doc><" + "float name=\"score\">5.510487E-4</float><long nam" + "e=\"uid\">6</long></doc><doc><float name=\"score\"" + ">0.0424253</float><long name=\"uid\">7</long></do" + "c><doc><float name=\"score\">0.04215967</float><l" + "ong name=\"uid\">8</long></doc><doc><float name=\"" + "score\">0.02470572</float><long name=\"uid\">9</l" + "ong></doc><doc><float name=\"score\">0.05936369</" + "float><long name=\"uid\">10</long></doc><doc><flo" + "at name=\"score\">0.048221838</float><long name=\"" + "uid\">11</long></doc><doc><float name=\"score\">7" + ".793006E-4</float><long name=\"uid\">12</long></d" + "oc><doc><float name=\"score\">2.7900032E-4</float" + "><long name=\"uid\">13</long></doc><doc><float na" + "me=\"score\">0.02088323</float><long name=\"uid\"" + ">14</long></doc><doc><float name=\"score\">0.0116" + "46388</float><long name=\"uid\">15</long></doc><d" + "oc><float name=\"score\">1.3776218E-4</float><lon" + "g name=\"uid\">17</long></doc><doc><float name=\"" + "score\">2.386111E-4</float><long name=\"uid\">19<" + "/long></doc><doc><float name=\"score\">2.7552436E" + "-4</float><long name=\"uid\">20</long></doc><doc>" + "<float name=\"score\">4.772222E-4</float><long na" + "me=\"uid\">23</long></doc><doc><float name=\"scor" + "e\">4.772222E-4</float><long name=\"uid\">24</lon" + "g></doc><doc><float name=\"score\">5.965277E-4</f" + "loat><long name=\"uid\">25</long></doc><doc><floa" + "t name=\"score\">0.0471366</float><long name=\"ui" + "d\">26</long></doc><doc><float name=\"score\">0.0" + "471366</float><long name=\"uid\">50</long></doc><" + "doc><float name=\"score\">0.047274362</float><lon" + "g name=\"uid\">51</long></doc><doc><float name=\"" + "score\">0.053303234</float><long name=\"uid\">56<" + "/long></doc><doc><float name=\"score\">5.445528E-" + "4</float><long name=\"uid\">62</long></doc><doc><" + "float name=\"score\">2.922377E-4</float><long nam" + "e=\"uid\">66</long></doc><doc><float name=\"score" + "\">0.02623833</float><long name=\"uid\">68</long>" + "</doc><doc><float name=\"score\">3.4440547E-4</fl" + "oat><long name=\"uid\">70</long></doc><doc><float" + " name=\"score\">2.922377E-4</float><long name=\"u" + "id\">74</long></doc><doc><float name=\"score\">2." + "7552436E-4</float><long name=\"uid\">76</long></d" + "oc><doc><float name=\"score\">1.3776218E-4</float" + "><long name=\"uid\">77</long></doc></result>\n" + "</response>\n", + .results = test_results1, + }, +}; + +static const unsigned tests_count = N_ELEMENTS(tests); + +static void +test_solr_result(const struct solr_response_test_result *test_results, + struct solr_result **parse_results) +{ + unsigned int rcount, i; + + for (i = 0; test_results[i].box_id != NULL; i++); + rcount = i; + + for (i = 0; parse_results[i] != NULL; i++); + + test_out_quiet("result count equal", i == rcount); + if (test_has_failed()) + return; + + for (i = 0; i < rcount && parse_results[i] != NULL; i++) { + unsigned int scount, j; + const struct fts_score_map *tscores = test_results[i].scores; + const struct fts_score_map *pscores = + array_get(&parse_results[i]->scores, &scount); + + test_out_quiet(t_strdup_printf("box id equal[%u]", i), + strcmp(test_results[i].box_id, + parse_results[i]->box_id) == 0); + + for (j = 0; tscores[j].uid != 0; j++); + test_out_quiet(t_strdup_printf("scores count equal[%u]", i), + j == scount); + if (j != scount) + continue; + + for (j = 0; j < scount; j++) { + test_out_quiet( + t_strdup_printf("score uid equal[%u/%u]", i, j), + pscores[j].uid == tscores[j].uid); + test_out_quiet( + t_strdup_printf("score value equal[%u/%u]", i, j), + pscores[j].score == tscores[j].score); + } + } +} + +static void test_solr_response_parser(void) +{ + unsigned int i; + + for (i = 0; i < tests_count; i++) T_BEGIN { + const struct solr_response_test *test; + const char *text; + unsigned int pos, text_len; + struct istream *input; + struct solr_response_parser *parser; + struct solr_result **box_results; + const char *error = NULL; + pool_t pool; + int ret = 0; + + test = &tests[i]; + text = test->input; + text_len = strlen(text); + + test_begin(t_strdup_printf("solr response [%d]", i)); + + input = test_istream_create_data(text, text_len); + pool = pool_alloconly_create("solr response", 4096); + parser = solr_response_parser_init(pool, input); + + ret = solr_response_parse(parser, &box_results); + + test_out_reason("parse ok (buffer)", ret > 0, error); + if (ret > 0) + test_solr_result(test->results, box_results); + + solr_response_parser_deinit(&parser); + pool_unref(&pool); + i_stream_unref(&input); + + input = test_istream_create_data(text, text_len); + pool = pool_alloconly_create("solr response", 4096); + parser = solr_response_parser_init(pool, input); + + ret = 0; + for (pos = 0; pos <= text_len && ret == 0; pos++) { + test_istream_set_size(input, pos); + ret = solr_response_parse(parser, &box_results); + } + + test_out_reason("parse ok (trickle)", ret > 0, error); + if (ret > 0) + test_solr_result(test->results, box_results); + + solr_response_parser_deinit(&parser); + pool_unref(&pool); + i_stream_unref(&input); + + test_end(); + + } T_END; +} + +static void test_solr_response_file(const char *file) +{ + pool_t pool; + struct istream *input; + struct solr_response_parser *parser; + struct solr_result **box_results; + int ret = 0; + + pool = pool_alloconly_create("solr response", 4096); + input = i_stream_create_file(file, 1024); + parser = solr_response_parser_init(pool, input); + + while ((ret = solr_response_parse(parser, &box_results)) == 0); + + if (ret < 0) + i_fatal("Failed to read response"); + + solr_response_parser_deinit(&parser); + i_stream_unref(&input); + pool_unref(&pool); +} + +int main(int argc, char *argv[]) +{ + int c; + + static void (*test_functions[])(void) = { + test_solr_response_parser, + NULL + }; + + while ((c = getopt(argc, argv, "D")) > 0) { + switch (c) { + case 'D': + debug = TRUE; + break; + default: + i_fatal("Usage: %s [-D]", argv[0]); + } + } + argc -= optind; + argv += optind; + + if (argc > 0) { + test_solr_response_file(argv[0]); + return 0; + } + + return test_run(test_functions); +} + + diff --git a/src/plugins/fts-squat/Makefile.am b/src/plugins/fts-squat/Makefile.am new file mode 100644 index 0000000..6c8181c --- /dev/null +++ b/src/plugins/fts-squat/Makefile.am @@ -0,0 +1,47 @@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts + +NOPLUGIN_LDFLAGS = +lib21_fts_squat_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib21_fts_squat_plugin.la + +if DOVECOT_PLUGIN_DEPS +lib21_fts_squat_plugin_la_LIBADD = \ + ../fts/lib20_fts_plugin.la +endif + +lib21_fts_squat_plugin_la_SOURCES = \ + fts-squat-plugin.c \ + fts-backend-squat.c \ + squat-trie.c \ + squat-uidlist.c + +noinst_HEADERS = \ + fts-squat-plugin.h \ + squat-trie.h \ + squat-trie-private.h \ + squat-uidlist.h + +noinst_PROGRAMS = squat-test + +squat_test_SOURCES = \ + squat-test.c + +common_objects = \ + squat-trie.lo \ + squat-uidlist.lo + +squat_test_LDADD = \ + $(common_objects) \ + $(LIBDOVECOT_STORAGE) \ + $(LIBDOVECOT) +squat_test_DEPENDENCIES = \ + $(common_objects) \ + $(LIBDOVECOT_STORAGE_DEPS) \ + $(LIBDOVECOT_DEPS) diff --git a/src/plugins/fts-squat/Makefile.in b/src/plugins/fts-squat/Makefile.in new file mode 100644 index 0000000..443a7d0 --- /dev/null +++ b/src/plugins/fts-squat/Makefile.in @@ -0,0 +1,883 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +noinst_PROGRAMS = squat-test$(EXEEXT) +subdir = src/plugins/fts-squat +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \ + $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \ + $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \ + $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \ + $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \ + $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \ + $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \ + $(top_srcdir)/m4/flexible_array_member.m4 \ + $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \ + $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \ + $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \ + $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \ + $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \ + $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \ + $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \ + $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \ + $(top_srcdir)/m4/pr_set_dumpable.m4 \ + $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \ + $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \ + $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \ + $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \ + $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \ + $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \ + $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \ + $(top_srcdir)/m4/typeof_dev_t.m4 \ + $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \ + $(top_srcdir)/m4/want_apparmor.m4 \ + $(top_srcdir)/m4/want_bsdauth.m4 \ + $(top_srcdir)/m4/want_bzlib.m4 \ + $(top_srcdir)/m4/want_cassandra.m4 \ + $(top_srcdir)/m4/want_cdb.m4 \ + $(top_srcdir)/m4/want_checkpassword.m4 \ + $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \ + $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \ + $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \ + $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \ + $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \ + $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \ + $(top_srcdir)/m4/want_prefetch.m4 \ + $(top_srcdir)/m4/want_shadow.m4 \ + $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \ + $(top_srcdir)/m4/want_sqlite.m4 \ + $(top_srcdir)/m4/want_stemmer.m4 \ + $(top_srcdir)/m4/want_systemd.m4 \ + $(top_srcdir)/m4/want_textcat.m4 \ + $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \ + $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@DOVECOT_PLUGIN_DEPS_TRUE@lib21_fts_squat_plugin_la_DEPENDENCIES = \ +@DOVECOT_PLUGIN_DEPS_TRUE@ ../fts/lib20_fts_plugin.la +am_lib21_fts_squat_plugin_la_OBJECTS = fts-squat-plugin.lo \ + fts-backend-squat.lo squat-trie.lo squat-uidlist.lo +lib21_fts_squat_plugin_la_OBJECTS = \ + $(am_lib21_fts_squat_plugin_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +lib21_fts_squat_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(lib21_fts_squat_plugin_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_squat_test_OBJECTS = squat-test.$(OBJEXT) +squat_test_OBJECTS = $(am_squat_test_OBJECTS) +am__DEPENDENCIES_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/fts-backend-squat.Plo \ + ./$(DEPDIR)/fts-squat-plugin.Plo ./$(DEPDIR)/squat-test.Po \ + ./$(DEPDIR)/squat-trie.Plo ./$(DEPDIR)/squat-uidlist.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(lib21_fts_squat_plugin_la_SOURCES) $(squat_test_SOURCES) +DIST_SOURCES = $(lib21_fts_squat_plugin_la_SOURCES) \ + $(squat_test_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPARMOR_LIBS = @APPARMOR_LIBS@ +AR = @AR@ +AUTH_CFLAGS = @AUTH_CFLAGS@ +AUTH_LIBS = @AUTH_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINARY_CFLAGS = @BINARY_CFLAGS@ +BINARY_LDFLAGS = @BINARY_LDFLAGS@ +BISON = @BISON@ +CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@ +CASSANDRA_LIBS = @CASSANDRA_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CDB_LIBS = @CDB_LIBS@ +CFLAGS = @CFLAGS@ +CLUCENE_CFLAGS = @CLUCENE_CFLAGS@ +CLUCENE_LIBS = @CLUCENE_LIBS@ +COMPRESS_LIBS = @COMPRESS_LIBS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRYPT_LIBS = @CRYPT_LIBS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DICT_LIBS = @DICT_LIBS@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLEX = @FLEX@ +FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@ +FUZZER_LDFLAGS = @FUZZER_LDFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KRB5CONFIG = @KRB5CONFIG@ +KRB5_CFLAGS = @KRB5_CFLAGS@ +KRB5_LIBS = @KRB5_LIBS@ +LD = @LD@ +LDAP_LIBS = @LDAP_LIBS@ +LDFLAGS = @LDFLAGS@ +LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@ +LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@ +LIBCAP = @LIBCAP@ +LIBDOVECOT = @LIBDOVECOT@ +LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@ +LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@ +LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@ +LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@ +LIBDOVECOT_LDA = @LIBDOVECOT_LDA@ +LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@ +LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@ +LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@ +LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@ +LIBDOVECOT_LUA = @LIBDOVECOT_LUA@ +LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@ +LIBDOVECOT_SQL = @LIBDOVECOT_SQL@ +LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@ +LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@ +LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@ +LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@ +LIBICONV = @LIBICONV@ +LIBICU_CFLAGS = @LIBICU_CFLAGS@ +LIBICU_LIBS = @LIBICU_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@ +LIBSODIUM_LIBS = @LIBSODIUM_LIBS@ +LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@ +LIBTIRPC_LIBS = @LIBTIRPC_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIBWRAP_LIBS = @LIBWRAP_LIBS@ +LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LUA_CFLAGS = @LUA_CFLAGS@ +LUA_LIBS = @LUA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_LIBS = @MODULE_LIBS@ +MODULE_SUFFIX = @MODULE_SUFFIX@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_CONFIG = @MYSQL_CONFIG@ +MYSQL_LIBS = @MYSQL_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOPLUGIN_LDFLAGS = +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PANDOC = @PANDOC@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PG_CONFIG = @PG_CONFIG@ +PIE_CFLAGS = @PIE_CFLAGS@ +PIE_LDFLAGS = @PIE_LDFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUOTA_LIBS = @QUOTA_LIBS@ +RANLIB = @RANLIB@ +RELRO_LDFLAGS = @RELRO_LDFLAGS@ +RPCGEN = @RPCGEN@ +RUN_TEST = @RUN_TEST@ +SED = @SED@ +SETTING_FILES = @SETTING_FILES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SQLITE_CFLAGS = @SQLITE_CFLAGS@ +SQLITE_LIBS = @SQLITE_LIBS@ +SQL_CFLAGS = @SQL_CFLAGS@ +SQL_LIBS = @SQL_LIBS@ +SSL_CFLAGS = @SSL_CFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND = @VALGRIND@ +VERSION = @VERSION@ +ZSTD_CFLAGS = @ZSTD_CFLAGS@ +ZSTD_LIBS = @ZSTD_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dict_drivers = @dict_drivers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +rundir = @rundir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sql_drivers = @sql_drivers@ +srcdir = @srcdir@ +ssldir = @ssldir@ +statedir = @statedir@ +sysconfdir = @sysconfdir@ +systemdservicetype = @systemdservicetype@ +systemdsystemunitdir = @systemdsystemunitdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts + +lib21_fts_squat_plugin_la_LDFLAGS = -module -avoid-version +module_LTLIBRARIES = \ + lib21_fts_squat_plugin.la + +@DOVECOT_PLUGIN_DEPS_TRUE@lib21_fts_squat_plugin_la_LIBADD = \ +@DOVECOT_PLUGIN_DEPS_TRUE@ ../fts/lib20_fts_plugin.la + +lib21_fts_squat_plugin_la_SOURCES = \ + fts-squat-plugin.c \ + fts-backend-squat.c \ + squat-trie.c \ + squat-uidlist.c + +noinst_HEADERS = \ + fts-squat-plugin.h \ + squat-trie.h \ + squat-trie-private.h \ + squat-uidlist.h + +squat_test_SOURCES = \ + squat-test.c + +common_objects = \ + squat-trie.lo \ + squat-uidlist.lo + +squat_test_LDADD = \ + $(common_objects) \ + $(LIBDOVECOT_STORAGE) \ + $(LIBDOVECOT) + +squat_test_DEPENDENCIES = \ + $(common_objects) \ + $(LIBDOVECOT_STORAGE_DEPS) \ + $(LIBDOVECOT_DEPS) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts-squat/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/fts-squat/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +lib21_fts_squat_plugin.la: $(lib21_fts_squat_plugin_la_OBJECTS) $(lib21_fts_squat_plugin_la_DEPENDENCIES) $(EXTRA_lib21_fts_squat_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib21_fts_squat_plugin_la_LINK) -rpath $(moduledir) $(lib21_fts_squat_plugin_la_OBJECTS) $(lib21_fts_squat_plugin_la_LIBADD) $(LIBS) + +squat-test$(EXEEXT): $(squat_test_OBJECTS) $(squat_test_DEPENDENCIES) $(EXTRA_squat_test_DEPENDENCIES) + @rm -f squat-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(squat_test_OBJECTS) $(squat_test_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-backend-squat.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-squat-plugin.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/squat-test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/squat-trie.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/squat-uidlist.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/fts-backend-squat.Plo + -rm -f ./$(DEPDIR)/fts-squat-plugin.Plo + -rm -f ./$(DEPDIR)/squat-test.Po + -rm -f ./$(DEPDIR)/squat-trie.Plo + -rm -f ./$(DEPDIR)/squat-uidlist.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/fts-backend-squat.Plo + -rm -f ./$(DEPDIR)/fts-squat-plugin.Plo + -rm -f ./$(DEPDIR)/squat-test.Po + -rm -f ./$(DEPDIR)/squat-trie.Plo + -rm -f ./$(DEPDIR)/squat-uidlist.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-moduleLTLIBRARIES \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/fts-squat/fts-backend-squat.c b/src/plugins/fts-squat/fts-backend-squat.c new file mode 100644 index 0000000..fbd7bbe --- /dev/null +++ b/src/plugins/fts-squat/fts-backend-squat.c @@ -0,0 +1,497 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "unichar.h" +#include "mail-user.h" +#include "mail-namespace.h" +#include "mail-storage-private.h" +#include "mail-search-build.h" +#include "squat-trie.h" +#include "fts-squat-plugin.h" + + +#define SQUAT_FILE_PREFIX "dovecot.index.search" + +struct squat_fts_backend { + struct fts_backend backend; + + struct mailbox *box; + struct squat_trie *trie; + + unsigned int partial_len, full_len; + bool refresh; +}; + +struct squat_fts_backend_update_context { + struct fts_backend_update_context ctx; + struct squat_trie_build_context *build_ctx; + + enum squat_index_type squat_type; + uint32_t uid; + string_t *hdr; + + bool failed; +}; + +static struct fts_backend *fts_backend_squat_alloc(void) +{ + struct squat_fts_backend *backend; + + backend = i_new(struct squat_fts_backend, 1); + backend->backend = fts_backend_squat; + return &backend->backend; +} + +static int +fts_backend_squat_init(struct fts_backend *_backend, const char **error_r) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + const char *const *tmp, *env; + unsigned int len; + + env = mail_user_plugin_getenv(_backend->ns->user, "fts_squat"); + if (env == NULL) + return 0; + + for (tmp = t_strsplit_spaces(env, " "); *tmp != NULL; tmp++) { + if (str_begins(*tmp, "partial=")) { + if (str_to_uint(*tmp + 8, &len) < 0 || len == 0) { + *error_r = t_strdup_printf( + "Invalid partial length: %s", *tmp + 8); + return -1; + } + backend->partial_len = len; + } else if (str_begins(*tmp, "full=")) { + if (str_to_uint(*tmp + 5, &len) < 0 || len == 0) { + *error_r = t_strdup_printf( + "Invalid full length: %s", *tmp + 5); + return -1; + } + backend->full_len = len; + } else { + *error_r = t_strdup_printf("Invalid setting: %s", *tmp); + return -1; + } + } + return 0; +} + +static void +fts_backend_squat_unset_box(struct squat_fts_backend *backend) +{ + if (backend->trie != NULL) + squat_trie_deinit(&backend->trie); + backend->box = NULL; +} + +static void fts_backend_squat_deinit(struct fts_backend *_backend) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + + fts_backend_squat_unset_box(backend); + i_free(backend); +} + +static int +fts_backend_squat_set_box(struct squat_fts_backend *backend, + struct mailbox *box) +{ + const struct mailbox_permissions *perm; + struct mail_storage *storage; + struct mailbox_status status; + const char *path; + enum squat_index_flags flags = 0; + int ret; + + if (backend->box == box) + { + if (backend->refresh) { + ret = squat_trie_refresh(backend->trie); + if (ret < 0) + return ret; + backend->refresh = FALSE; + } + return 0; + } + fts_backend_squat_unset_box(backend); + backend->refresh = FALSE; + if (box == NULL) + return 0; + + perm = mailbox_get_permissions(box); + storage = mailbox_get_storage(box); + if (mailbox_get_path_to(box, MAILBOX_LIST_PATH_TYPE_INDEX, &path) <= 0) + i_unreached(); /* fts already checked this */ + + mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status); + if (storage->set->mmap_disable) + flags |= SQUAT_INDEX_FLAG_MMAP_DISABLE; + if (storage->set->mail_nfs_index) + flags |= SQUAT_INDEX_FLAG_NFS_FLUSH; + if (storage->set->dotlock_use_excl) + flags |= SQUAT_INDEX_FLAG_DOTLOCK_USE_EXCL; + + backend->trie = + squat_trie_init(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL), + status.uidvalidity, + storage->set->parsed_lock_method, + flags, perm->file_create_mode, + perm->file_create_gid); + + if (backend->partial_len != 0) + squat_trie_set_partial_len(backend->trie, backend->partial_len); + if (backend->full_len != 0) + squat_trie_set_full_len(backend->trie, backend->full_len); + backend->box = box; + return squat_trie_open(backend->trie); +} + +static int +fts_backend_squat_get_last_uid(struct fts_backend *_backend, + struct mailbox *box, uint32_t *last_uid_r) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + + int ret = fts_backend_squat_set_box(backend, box); + if (ret < 0) + return -1; + return squat_trie_get_last_uid(backend->trie, last_uid_r); +} + +static struct fts_backend_update_context * +fts_backend_squat_update_init(struct fts_backend *_backend) +{ + struct squat_fts_backend_update_context *ctx; + + ctx = i_new(struct squat_fts_backend_update_context, 1); + ctx->ctx.backend = _backend; + ctx->hdr = str_new(default_pool, 1024*32); + return &ctx->ctx; +} + +static int get_all_msg_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids) +{ + struct mailbox_transaction_context *t; + struct mail_search_context *search_ctx; + struct mail_search_args *search_args; + struct mail *mail; + int ret; + + t = mailbox_transaction_begin(box, 0, __func__); + + search_args = mail_search_build_init(); + mail_search_build_add_all(search_args); + search_ctx = mailbox_search_init(t, search_args, NULL, 0, NULL); + mail_search_args_unref(&search_args); + + while (mailbox_search_next(search_ctx, &mail)) { + /* *2 because even/odd is for body/header */ + seq_range_array_add_range(uids, mail->uid * 2, + mail->uid * 2 + 1); + } + ret = mailbox_search_deinit(&search_ctx); + (void)mailbox_transaction_commit(&t); + return ret; +} + +static int +fts_backend_squat_update_uid_changed(struct squat_fts_backend_update_context *ctx) +{ + int ret = 0; + + if (ctx->uid == 0) + return 0; + + if (squat_trie_build_more(ctx->build_ctx, ctx->uid, + SQUAT_INDEX_TYPE_HEADER, + str_data(ctx->hdr), str_len(ctx->hdr)) < 0) + ret = -1; + str_truncate(ctx->hdr, 0); + return ret; +} + +static int +fts_backend_squat_build_deinit(struct squat_fts_backend_update_context *ctx) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)ctx->ctx.backend; + ARRAY_TYPE(seq_range) uids; + int ret = 0; + + if (ctx->build_ctx == NULL) + return 0; + + if (fts_backend_squat_update_uid_changed(ctx) < 0) + ret = -1; + + i_array_init(&uids, 1024); + if (get_all_msg_uids(backend->box, &uids) < 0) { + (void)squat_trie_build_deinit(&ctx->build_ctx, NULL); + ret = -1; + } else { + seq_range_array_invert(&uids, 2, (uint32_t)-2); + if (squat_trie_build_deinit(&ctx->build_ctx, &uids) < 0) + ret = -1; + } + array_free(&uids); + return ret; +} + +static int +fts_backend_squat_update_deinit(struct fts_backend_update_context *_ctx) +{ + struct squat_fts_backend_update_context *ctx = + (struct squat_fts_backend_update_context *)_ctx; + int ret = ctx->failed ? -1 : 0; + + if (fts_backend_squat_build_deinit(ctx) < 0) + ret = -1; + str_free(&ctx->hdr); + i_free(ctx); + return ret; +} + +static void +fts_backend_squat_update_set_mailbox(struct fts_backend_update_context *_ctx, + struct mailbox *box) +{ + struct squat_fts_backend_update_context *ctx = + (struct squat_fts_backend_update_context *)_ctx; + struct squat_fts_backend *backend = + (struct squat_fts_backend *)ctx->ctx.backend; + + if (fts_backend_squat_build_deinit(ctx) < 0) + ctx->failed = TRUE; + if (fts_backend_squat_set_box(backend, box) < 0) + ctx->failed = TRUE; + else if (box != NULL) { + if (squat_trie_build_init(backend->trie, &ctx->build_ctx) < 0) + ctx->failed = TRUE; + } +} + +static void +fts_backend_squat_update_expunge(struct fts_backend_update_context *_ctx ATTR_UNUSED, + uint32_t last_uid ATTR_UNUSED) +{ + /* FIXME */ +} + +static bool +fts_backend_squat_update_set_build_key(struct fts_backend_update_context *_ctx, + const struct fts_backend_build_key *key) +{ + struct squat_fts_backend_update_context *ctx = + (struct squat_fts_backend_update_context *)_ctx; + + if (ctx->failed) + return FALSE; + + if (key->uid != ctx->uid) { + if (fts_backend_squat_update_uid_changed(ctx) < 0) + ctx->failed = TRUE; + } + + switch (key->type) { + case FTS_BACKEND_BUILD_KEY_HDR: + case FTS_BACKEND_BUILD_KEY_MIME_HDR: + str_printfa(ctx->hdr, "%s: ", key->hdr_name); + ctx->squat_type = SQUAT_INDEX_TYPE_HEADER; + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART: + ctx->squat_type = SQUAT_INDEX_TYPE_BODY; + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY: + i_unreached(); + } + ctx->uid = key->uid; + return TRUE; +} + +static void +fts_backend_squat_update_unset_build_key(struct fts_backend_update_context *_ctx) +{ + struct squat_fts_backend_update_context *ctx = + (struct squat_fts_backend_update_context *)_ctx; + + if (ctx->squat_type == SQUAT_INDEX_TYPE_HEADER) + str_append_c(ctx->hdr, '\n'); +} + +static int +fts_backend_squat_update_build_more(struct fts_backend_update_context *_ctx, + const unsigned char *data, size_t size) +{ + struct squat_fts_backend_update_context *ctx = + (struct squat_fts_backend_update_context *)_ctx; + + if (ctx->squat_type == SQUAT_INDEX_TYPE_HEADER) { + str_append_data(ctx->hdr, data, size); + return 0; + } + return squat_trie_build_more(ctx->build_ctx, ctx->uid, ctx->squat_type, + data, size); +} + +static int fts_backend_squat_refresh(struct fts_backend *_backend) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + + backend->refresh = TRUE; + return 0; +} + +static int fts_backend_squat_optimize(struct fts_backend *_backend ATTR_UNUSED) +{ + /* FIXME: drop expunged messages */ + return 0; +} + +static int squat_lookup_arg(struct squat_fts_backend *backend, + const struct mail_search_arg *arg, bool and_args, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + enum squat_index_type squat_type; + ARRAY_TYPE(seq_range) tmp_definite_uids, tmp_maybe_uids; + string_t *dtc; + uint32_t last_uid; + int ret; + + switch (arg->type) { + case SEARCH_TEXT: + squat_type = SQUAT_INDEX_TYPE_HEADER | + SQUAT_INDEX_TYPE_BODY; + break; + case SEARCH_BODY: + squat_type = SQUAT_INDEX_TYPE_BODY; + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + squat_type = SQUAT_INDEX_TYPE_HEADER; + break; + default: + return 0; + } + + i_array_init(&tmp_definite_uids, 128); + i_array_init(&tmp_maybe_uids, 128); + + dtc = t_str_new(128); + if (backend->backend.ns->user-> + default_normalizer(arg->value.str, strlen(arg->value.str), dtc) < 0) + i_panic("squat: search key not utf8"); + + ret = squat_trie_lookup(backend->trie, str_c(dtc), squat_type, + &tmp_definite_uids, &tmp_maybe_uids); + if (arg->match_not) { + /* definite -> non-match + maybe -> maybe + non-match -> maybe */ + array_clear(&tmp_maybe_uids); + + if (squat_trie_get_last_uid(backend->trie, &last_uid) < 0) + i_unreached(); + seq_range_array_add_range(&tmp_maybe_uids, 1, last_uid); + seq_range_array_remove_seq_range(&tmp_maybe_uids, + &tmp_definite_uids); + array_clear(&tmp_definite_uids); + } + + if (and_args) { + /* AND: + definite && definite -> definite + definite && maybe -> maybe + maybe && maybe -> maybe */ + + /* put definites among maybies, so they can be intersected */ + seq_range_array_merge(maybe_uids, definite_uids); + seq_range_array_merge(&tmp_maybe_uids, &tmp_definite_uids); + + seq_range_array_intersect(maybe_uids, &tmp_maybe_uids); + seq_range_array_intersect(definite_uids, &tmp_definite_uids); + /* remove duplicate maybies that are also definites */ + seq_range_array_remove_seq_range(maybe_uids, definite_uids); + } else { + /* OR: + definite || definite -> definite + definite || maybe -> definite + maybe || maybe -> maybe */ + + /* remove maybies that are now definites */ + seq_range_array_remove_seq_range(&tmp_maybe_uids, + definite_uids); + seq_range_array_remove_seq_range(maybe_uids, + &tmp_definite_uids); + + seq_range_array_merge(definite_uids, &tmp_definite_uids); + seq_range_array_merge(maybe_uids, &tmp_maybe_uids); + } + + array_free(&tmp_definite_uids); + array_free(&tmp_maybe_uids); + return ret < 0 ? -1 : 1; +} + +static int +fts_backend_squat_lookup(struct fts_backend *_backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + bool first = TRUE; + int ret; + + ret = fts_backend_squat_set_box(backend, box); + if (ret < 0) + return -1; + + for (; args != NULL; args = args->next) { + ret = squat_lookup_arg(backend, args, first ? FALSE : and_args, + &result->definite_uids, + &result->maybe_uids); + if (ret < 0) + return -1; + if (ret > 0) { + args->match_always = TRUE; + first = FALSE; + } + } + return 0; +} + +struct fts_backend fts_backend_squat = { + .name = "squat", + .flags = FTS_BACKEND_FLAG_NORMALIZE_INPUT, + + { + fts_backend_squat_alloc, + fts_backend_squat_init, + fts_backend_squat_deinit, + fts_backend_squat_get_last_uid, + fts_backend_squat_update_init, + fts_backend_squat_update_deinit, + fts_backend_squat_update_set_mailbox, + fts_backend_squat_update_expunge, + fts_backend_squat_update_set_build_key, + fts_backend_squat_update_unset_build_key, + fts_backend_squat_update_build_more, + fts_backend_squat_refresh, + NULL, + fts_backend_squat_optimize, + fts_backend_default_can_lookup, + fts_backend_squat_lookup, + NULL, + NULL + } +}; diff --git a/src/plugins/fts-squat/fts-squat-plugin.c b/src/plugins/fts-squat/fts-squat-plugin.c new file mode 100644 index 0000000..59d9383 --- /dev/null +++ b/src/plugins/fts-squat/fts-squat-plugin.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "fts-squat-plugin.h" + +const char *fts_squat_plugin_version = DOVECOT_ABI_VERSION; + +void fts_squat_plugin_init(struct module *module ATTR_UNUSED) +{ + fts_backend_register(&fts_backend_squat); +} + +void fts_squat_plugin_deinit(void) +{ + fts_backend_unregister(fts_backend_squat.name); +} + +const char *fts_squat_plugin_dependencies[] = { "fts", NULL }; diff --git a/src/plugins/fts-squat/fts-squat-plugin.h b/src/plugins/fts-squat/fts-squat-plugin.h new file mode 100644 index 0000000..0d6bfcb --- /dev/null +++ b/src/plugins/fts-squat/fts-squat-plugin.h @@ -0,0 +1,14 @@ +#ifndef FTS_SQUAT_PLUGIN_H +#define FTS_SQUAT_PLUGIN_H + +#include "fts-api-private.h" + +struct module; + +extern const char *fts_squat_plugin_dependencies[]; +extern struct fts_backend fts_backend_squat; + +void fts_squat_plugin_init(struct module *module); +void fts_squat_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts-squat/squat-test.c b/src/plugins/fts-squat/squat-test.c new file mode 100644 index 0000000..b55646c --- /dev/null +++ b/src/plugins/fts-squat/squat-test.c @@ -0,0 +1,197 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "file-lock.h" +#include "istream.h" +#include "time-util.h" +#include "unichar.h" +#include "squat-trie.h" +#include "squat-uidlist.h" + +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <time.h> +#include <sys/time.h> + +static void result_print(ARRAY_TYPE(seq_range) *result) +{ + const struct seq_range *range; + unsigned int i, count; + + range = array_get(result, &count); + for (i = 0; i < count; i++) { + if (i != 0) + printf(","); + printf("%u", range[i].seq1); + if (range[i].seq1 != range[i].seq2) + printf("-%u", range[i].seq2); + } + printf("\n"); +} + +int main(int argc ATTR_UNUSED, char *argv[]) +{ + const char *trie_path = "/tmp/squat-test-index.search"; + const char *uidlist_path = "/tmp/squat-test-index.search.uids"; + struct squat_trie *trie; + struct squat_trie_build_context *build_ctx; + struct istream *input; + struct stat trie_st, uidlist_st; + ARRAY_TYPE(seq_range) definite_uids, maybe_uids; + char *line, *str, buf[4096]; + buffer_t *valid; + int ret, fd; + unsigned int last = 0, seq = 1, node_count, uidlist_count; + size_t len; + enum squat_index_type index_type; + bool data_header = TRUE, first = TRUE, skip_body = FALSE; + bool mime_header = TRUE; + size_t trie_mem, uidlist_mem; + clock_t clock_start, clock_end; + struct timeval tv_start, tv_end; + double cputime; + + lib_init(); + i_unlink_if_exists(trie_path); + i_unlink_if_exists(uidlist_path); + trie = squat_trie_init(trie_path, time(NULL), + FILE_LOCK_METHOD_FCNTL, 0, 0600, (gid_t)-1); + + clock_start = clock(); + i_gettimeofday(&tv_start); + + fd = open(argv[1], O_RDONLY); + if (fd == -1) + return 1; + + if (squat_trie_build_init(trie, &build_ctx) < 0) + return 1; + + valid = buffer_create_dynamic(default_pool, 4096); + input = i_stream_create_fd(fd, SIZE_MAX); + ret = 0; + while (ret == 0 && (line = i_stream_read_next_line(input)) != NULL) { + if (last != input->v_offset/(1024*100)) { + fprintf(stderr, "\r%ukB", (unsigned)(input->v_offset/1024)); + fflush(stderr); + last = input->v_offset/(1024*100); + } + if (str_begins(line, "From ")) { + if (!first) + seq++; + data_header = TRUE; + skip_body = FALSE; + mime_header = TRUE; + continue; + } + first = FALSE; + + if (str_begins(line, "--")) { + skip_body = FALSE; + mime_header = TRUE; + } + + if (mime_header) { + if (*line == '\0') { + data_header = FALSE; + mime_header = FALSE; + continue; + } + + if (strncasecmp(line, "Content-Type:", 13) == 0 && + strncasecmp(line, "Content-Type: text/", 19) != 0 && + strncasecmp(line, "Content-Type: message/", 22) != 0) + skip_body = TRUE; + else if (strncasecmp(line, "Content-Transfer-Encoding: base64", 33) == 0) + skip_body = TRUE; + } else if (skip_body) + continue; + if (*line == '\0') + continue; + + /* we're actually indexing here headers as bodies and bodies + as headers. it doesn't really matter in this test, and + fixing it would require storing headers temporarily + elsewhere and index them only after the body */ + index_type = !data_header ? SQUAT_INDEX_TYPE_HEADER : + SQUAT_INDEX_TYPE_BODY; + + buffer_set_used_size(valid, 0); + len = strlen(line); + if (uni_utf8_get_valid_data((const unsigned char *)line, + len, valid)) { + ret = squat_trie_build_more(build_ctx, seq, index_type, + (const void *)line, len); + } else if (valid->used > 0) { + ret = squat_trie_build_more(build_ctx, seq, index_type, + valid->data, valid->used); + } + } + buffer_free(&valid); + if (squat_trie_build_deinit(&build_ctx, NULL) < 0) + ret = -1; + if (ret < 0) { + printf("build broken\n"); + return 1; + } + + clock_end = clock(); + i_gettimeofday(&tv_end); + + cputime = (double)(clock_end - clock_start) / CLOCKS_PER_SEC; + fprintf(stderr, "\n - Index time: %.2f CPU seconds, " + "%.2f real seconds (%.02fMB/CPUs)\n", cputime, + timeval_diff_msecs(&tv_end, &tv_start)/1000.0, + input->v_offset / cputime / (1024*1024)); + + if (stat(trie_path, &trie_st) < 0) + i_error("stat(%s) failed: %m", trie_path); + if (stat(uidlist_path, &uidlist_st) < 0) + i_error("stat(%s) failed: %m", uidlist_path); + + trie_mem = squat_trie_mem_used(trie, &node_count); + uidlist_mem = squat_uidlist_mem_used(squat_trie_get_uidlist(trie), + &uidlist_count); + fprintf(stderr, " - memory: %uk for trie, %uk for uidlist\n", + (unsigned)(trie_mem/1024), (unsigned)(uidlist_mem/1024)); + fprintf(stderr, " - %"PRIuUOFF_T" bytes in %u nodes (%.02f%%)\n", + trie_st.st_size, node_count, + trie_st.st_size / (float)input->v_offset * 100.0); + fprintf(stderr, " - %"PRIuUOFF_T" bytes in %u UID lists (%.02f%%)\n", + uidlist_st.st_size, uidlist_count, + uidlist_st.st_size / (float)input->v_offset * 100.0); + fprintf(stderr, " - %"PRIuUOFF_T" bytes total of %" + PRIuUOFF_T" (%.02f%%)\n", + (trie_st.st_size + uidlist_st.st_size), input->v_offset, + (trie_st.st_size + uidlist_st.st_size) / + (float)input->v_offset * 100.0); + + i_stream_unref(&input); + i_close_fd(&fd); + + i_array_init(&definite_uids, 128); + i_array_init(&maybe_uids, 128); + while ((str = fgets(buf, sizeof(buf), stdin)) != NULL) { + ret = strlen(str)-1; + str[ret] = 0; + + i_gettimeofday(&tv_start); + ret = squat_trie_lookup(trie, str, SQUAT_INDEX_TYPE_HEADER | + SQUAT_INDEX_TYPE_BODY, + &definite_uids, &maybe_uids); + if (ret < 0) + printf("error\n"); + else { + i_gettimeofday(&tv_end); + printf(" - Search took %.05f CPU seconds\n", + timeval_diff_usecs(&tv_end, &tv_start)/1000000.0); + printf(" - definite uids: "); + result_print(&definite_uids); + printf(" - maybe uids: "); + result_print(&maybe_uids); + } + } + return 0; +} diff --git a/src/plugins/fts-squat/squat-trie-private.h b/src/plugins/fts-squat/squat-trie-private.h new file mode 100644 index 0000000..b079554 --- /dev/null +++ b/src/plugins/fts-squat/squat-trie-private.h @@ -0,0 +1,192 @@ +#ifndef SQUAT_TRIE_PRIVATE_H +#define SQUAT_TRIE_PRIVATE_H + +#include "file-dotlock.h" +#include "squat-trie.h" + +#define SQUAT_TRIE_VERSION 2 +#define SQUAT_TRIE_LOCK_TIMEOUT 60 +#define SQUAT_TRIE_DOTLOCK_STALE_TIMEOUT (15*60) + +struct squat_file_header { + uint8_t version; + uint8_t unused[3]; + + uint32_t indexid; + uint32_t uidvalidity; + uint32_t used_file_size; + uint32_t deleted_space; + uint32_t node_count; + + uint32_t root_offset; + uint32_t root_unused_uids; + uint32_t root_next_uid; + uint32_t root_uidlist_idx; + + uint8_t partial_len; + uint8_t full_len; + uint8_t normalize_map[256]; +}; + +/* + node file: FIXME: no up-to-date + + struct squat_file_header; + + // children are written before their parents + node[] { + uint8_t child_count; + unsigned char chars[child_count]; + packed neg_diff_to_first_child_offset; // relative to node + packed diff_to_prev_offset[child_count-1]; + packed[child_count] { + // unused_uids_count == uid if have_uid_offset bit is zero + (unused_uids_count << 1) | (have_uid_offset); + [diff_to_prev_uid_offset;] // first one is relative to zero + } + } +*/ + +struct squat_node { + unsigned int child_count:8; + + /* children.leaf_string contains this many bytes */ + unsigned int leaf_string_length:16; + + /* TRUE = children.data contains our children. + FALSE = children.offset contains offset to our children in the + index file. */ + bool children_not_mapped:1; + /* When allocating our children, use a sequential array. */ + bool want_sequential:1; + /* This node's children are in a sequential array, meaning that the + first SEQUENTIAL_COUNT children have chars[n] = n. */ + bool have_sequential:1; + + /* Number of UIDs that exists in parent node but not in this one + (i.e. number of UIDs [0..next_uid-1] not in this node's uidlist). + This is mainly used when adding new UIDs to our children to set + the UID to be relative to this node's UID list. */ + uint32_t unused_uids; + + /* next_uid=0 means there are no UIDs in this node, otherwise + next_uid-1 is the last UID added to this node. */ + uint32_t next_uid; + uint32_t uid_list_idx; + + /* + struct { + unsigned char chars[child_count]; + struct squat_node[child_count]; + } *children; + */ + union { + /* children_not_mapped determines if data or offset should + be used. */ + void *data; + unsigned char *leaf_string; + unsigned char static_leaf_string[sizeof(void *)]; + uint32_t offset; + } children; +}; +/* Return pointer to node.children.chars[] */ +#define NODE_CHILDREN_CHARS(node) \ + ((unsigned char *)(node)->children.data) +/* Return pointer to node.children.node[] */ +#define NODE_CHILDREN_NODES(_node) \ + ((struct squat_node *)(NODE_CHILDREN_CHARS(_node) + \ + MEM_ALIGN((_node)->child_count))) +/* Return number of bytes allocated in node.children.data */ +#define NODE_CHILDREN_ALLOC_SIZE(child_count) \ + (MEM_ALIGN(child_count) + \ + ((child_count) / 8 + 1) * 8 * sizeof(struct squat_node)) +/* Return TRUE if children.leaf_string is set. */ +#define NODE_IS_DYNAMIC_LEAF(node) \ + ((node)->leaf_string_length > \ + sizeof((node)->children.static_leaf_string)) +/* Return node's leaf string. Assumes that it is set. */ +#define NODE_LEAF_STRING(node) \ + (NODE_IS_DYNAMIC_LEAF(node) ? \ + (node)->children.leaf_string : (node)->children.static_leaf_string) +struct squat_trie { + struct squat_node root; + struct squat_uidlist *uidlist; + + struct squat_file_header hdr; + size_t node_alloc_size; + unsigned int unmapped_child_count; + + enum squat_index_flags flags; + enum file_lock_method lock_method; + mode_t create_mode; + gid_t create_gid; + uint32_t uidvalidity; + + char *path; + int fd; + struct file_cache *file_cache; + struct dotlock_settings dotlock_set; + + uoff_t locked_file_size; + const void *data; + size_t data_size; + + void *mmap_base; + size_t mmap_size; + + unsigned char default_normalize_map[256]; + unsigned int default_partial_len; + unsigned int default_full_len; + + bool corrupted:1; +}; + +#define SQUAT_PACK_MAX_SIZE ((sizeof(uint32_t) * 8 + 7) / 7) + +static inline void squat_pack_num(uint8_t **p, uint32_t num) +{ + /* number continues as long as the highest bit is set */ + while (num >= 0x80) { + **p = (num & 0x7f) | 0x80; + *p += 1; + num >>= 7; + } + + **p = num; + *p += 1; +} + +static inline uint32_t squat_unpack_num(const uint8_t **p, const uint8_t *end) +{ + const uint8_t *c = *p; + uint32_t value = 0; + unsigned int bits = 0; + + for (;;) { + if (unlikely(c == end)) { + /* we should never see EOF */ + return 0; + } + + value |= (*c & 0x7f) << bits; + if (*c < 0x80) + break; + + bits += 7; + c++; + } + + if (unlikely(bits >= 32)) { + /* broken input */ + *p = end; + return 0; + } + + *p = c + 1; + return value; +} + +int squat_trie_create_fd(struct squat_trie *trie, const char *path, int flags); +void squat_trie_delete(struct squat_trie *trie); + +#endif diff --git a/src/plugins/fts-squat/squat-trie.c b/src/plugins/fts-squat/squat-trie.c new file mode 100644 index 0000000..d006817 --- /dev/null +++ b/src/plugins/fts-squat/squat-trie.c @@ -0,0 +1,2096 @@ +/* Copyright (c) 2007-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "read-full.h" +#include "istream.h" +#include "ostream.h" +#include "unichar.h" +#include "nfs-workarounds.h" +#include "file-cache.h" +#include "seq-range-array.h" +#include "squat-uidlist.h" +#include "squat-trie-private.h" + +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> + +#define DEFAULT_NORMALIZE_MAP_CHARS \ + "EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789@.-+#$%_&" +#define DEFAULT_PARTIAL_LEN 4 +#define DEFAULT_FULL_LEN 4 + +#define MAX_FAST_LEVEL 3 +#define SEQUENTIAL_COUNT 46 + +#define TRIE_BYTES_LEFT(n) \ + ((n) * SQUAT_PACK_MAX_SIZE) +#define TRIE_READAHEAD_SIZE \ + I_MAX(4096, 1 + 256 + TRIE_BYTES_LEFT(256)) + +struct squat_trie_build_context { + struct squat_trie *trie; + struct ostream *output; + struct squat_uidlist_build_context *uidlist_build_ctx; + + struct file_lock *file_lock; + struct dotlock *dotlock; + + uint32_t first_uid; + bool compress_nodes:1; +}; + +struct squat_trie_iterate_node { + struct squat_node *node; + ARRAY_TYPE(seq_range) shifts; + unsigned int idx; +}; + +struct squat_trie_iterate_context { + struct squat_trie *trie; + struct squat_trie_iterate_node cur; + ARRAY(struct squat_trie_iterate_node) parents; + bool failed; +}; + +static int squat_trie_map(struct squat_trie *trie, bool building); + +void squat_trie_delete(struct squat_trie *trie) +{ + i_unlink_if_exists(trie->path); + squat_uidlist_delete(trie->uidlist); +} + +static void squat_trie_set_corrupted(struct squat_trie *trie) +{ + trie->corrupted = TRUE; + i_error("Corrupted file %s", trie->path); + squat_trie_delete(trie); +} + +static void squat_trie_normalize_map_build(struct squat_trie *trie) +{ + static unsigned char valid_chars[] = + DEFAULT_NORMALIZE_MAP_CHARS; + unsigned int i, j; + + memset(trie->default_normalize_map, 0, + sizeof(trie->default_normalize_map)); + +#if 1 + for (i = 0, j = 1; i < sizeof(valid_chars)-1; i++) { + unsigned char chr = valid_chars[i]; + + if (chr >= 'A' && chr <= 'Z') + trie->default_normalize_map[chr-'A'+'a'] = j; + trie->default_normalize_map[chr] = j++; + } + i_assert(j <= SEQUENTIAL_COUNT); + + for (i = 128; i < 256; i++) + trie->default_normalize_map[i] = j++; +#else + for (i = 0; i < sizeof(valid_chars)-1; i++) { + unsigned char chr = valid_chars[i]; + + if (chr >= 'A' && chr <= 'Z') + trie->default_normalize_map[chr-'A'+'a'] = chr; + trie->default_normalize_map[chr] = chr; + } + for (i = 128; i < 256; i++) + trie->default_normalize_map[i] = i_toupper(i); +#endif +} + +static void node_free(struct squat_trie *trie, struct squat_node *node) +{ + struct squat_node *children; + unsigned int i; + + if (node->leaf_string_length > 0) { + if (NODE_IS_DYNAMIC_LEAF(node)) + i_free(node->children.leaf_string); + } else if (!node->children_not_mapped) { + children = NODE_CHILDREN_NODES(node); + + trie->node_alloc_size -= + NODE_CHILDREN_ALLOC_SIZE(node->child_count); + for (i = 0; i < node->child_count; i++) + node_free(trie, &children[i]); + + i_free(node->children.data); + } +} + +struct squat_trie * +squat_trie_init(const char *path, uint32_t uidvalidity, + enum file_lock_method lock_method, enum squat_index_flags flags, + mode_t mode, gid_t gid) +{ + struct squat_trie *trie; + + trie = i_new(struct squat_trie, 1); + trie->path = i_strdup(path); + trie->uidlist = squat_uidlist_init(trie); + trie->fd = -1; + trie->lock_method = lock_method; + trie->uidvalidity = uidvalidity; + trie->flags = flags; + trie->create_mode = mode; + trie->create_gid = gid; + squat_trie_normalize_map_build(trie); + + trie->dotlock_set.use_excl_lock = + (flags & SQUAT_INDEX_FLAG_DOTLOCK_USE_EXCL) != 0; + trie->dotlock_set.nfs_flush = (flags & SQUAT_INDEX_FLAG_NFS_FLUSH) != 0; + trie->dotlock_set.timeout = SQUAT_TRIE_LOCK_TIMEOUT; + trie->dotlock_set.stale_timeout = SQUAT_TRIE_DOTLOCK_STALE_TIMEOUT; + trie->default_partial_len = DEFAULT_PARTIAL_LEN; + trie->default_full_len = DEFAULT_FULL_LEN; + return trie; +} + +static void squat_trie_close_fd(struct squat_trie *trie) +{ + trie->data = NULL; + trie->data_size = 0; + + if (trie->mmap_size != 0) { + if (munmap(trie->mmap_base, trie->mmap_size) < 0) + i_error("munmap(%s) failed: %m", trie->path); + trie->mmap_base = NULL; + trie->mmap_size = 0; + } + i_close_fd_path(&trie->fd, trie->path); +} + +static void squat_trie_close(struct squat_trie *trie) +{ + trie->corrupted = FALSE; + node_free(trie, &trie->root); + i_zero(&trie->root); + i_zero(&trie->hdr); + + squat_trie_close_fd(trie); + if (trie->file_cache != NULL) + file_cache_free(&trie->file_cache); + trie->locked_file_size = 0; +} + +void squat_trie_deinit(struct squat_trie **_trie) +{ + struct squat_trie *trie = *_trie; + + *_trie = NULL; + squat_trie_close(trie); + squat_uidlist_deinit(trie->uidlist); + i_free(trie->path); + i_free(trie); +} + +void squat_trie_set_partial_len(struct squat_trie *trie, unsigned int len) +{ + trie->default_partial_len = len; +} + +void squat_trie_set_full_len(struct squat_trie *trie, unsigned int len) +{ + trie->default_full_len = len; +} + +static void squat_trie_header_init(struct squat_trie *trie) +{ + i_zero(&trie->hdr); + trie->hdr.version = SQUAT_TRIE_VERSION; + trie->hdr.indexid = time(NULL); + trie->hdr.uidvalidity = trie->uidvalidity; + trie->hdr.partial_len = trie->default_partial_len; + trie->hdr.full_len = trie->default_full_len; + + i_assert(sizeof(trie->hdr.normalize_map) == + sizeof(trie->default_normalize_map)); + memcpy(trie->hdr.normalize_map, trie->default_normalize_map, + sizeof(trie->hdr.normalize_map)); +} + +static int squat_trie_open_fd(struct squat_trie *trie) +{ + trie->fd = open(trie->path, O_RDWR); + if (trie->fd == -1) { + if (errno == ENOENT) { + squat_trie_header_init(trie); + return 0; + } + i_error("open(%s) failed: %m", trie->path); + return -1; + } + if (trie->file_cache != NULL) + file_cache_set_fd(trie->file_cache, trie->fd); + return 0; +} + +int squat_trie_open(struct squat_trie *trie) +{ + squat_trie_close(trie); + + if (squat_trie_open_fd(trie) < 0) + return -1; + return squat_trie_map(trie, FALSE); +} + +static int squat_trie_is_file_stale(struct squat_trie *trie) +{ + struct stat st, st2; + + if ((trie->flags & SQUAT_INDEX_FLAG_NFS_FLUSH) != 0) + nfs_flush_file_handle_cache(trie->path); + if (nfs_safe_stat(trie->path, &st) < 0) { + if (errno == ENOENT) + return 1; + + i_error("stat(%s) failed: %m", trie->path); + return -1; + } + if (fstat(trie->fd, &st2) < 0) { + if (errno == ESTALE) + return 1; + i_error("fstat(%s) failed: %m", trie->path); + return -1; + } + trie->locked_file_size = st2.st_size; + + if (st.st_ino == st2.st_ino && CMP_DEV_T(st.st_dev, st2.st_dev)) { + i_assert(trie->locked_file_size >= trie->data_size); + return 0; + } + return 1; +} + +int squat_trie_refresh(struct squat_trie *trie) +{ + int ret; + + ret = squat_trie_is_file_stale(trie); + if (ret > 0) + ret = squat_trie_open(trie); + return ret; +} + +static int squat_trie_lock(struct squat_trie *trie, int lock_type, + struct file_lock **file_lock_r, + struct dotlock **dotlock_r) +{ + const char *error; + int ret; + + i_assert(trie->fd != -1); + + *file_lock_r = NULL; + *dotlock_r = NULL; + + for (;;) { + if (trie->lock_method != FILE_LOCK_METHOD_DOTLOCK) { + struct file_lock_settings lock_set = { + .lock_method = trie->lock_method, + }; + ret = file_wait_lock(trie->fd, trie->path, lock_type, + &lock_set, SQUAT_TRIE_LOCK_TIMEOUT, + file_lock_r, &error); + if (ret < 0) { + i_error("squat trie %s: %s", + trie->path, error); + } + } else { + ret = file_dotlock_create(&trie->dotlock_set, + trie->path, 0, dotlock_r); + } + if (ret == 0) { + i_error("squat trie %s: Locking timed out", trie->path); + return 0; + } + if (ret < 0) + return -1; + + /* if the trie has been compressed, we need to reopen the + file and try to lock again */ + ret = squat_trie_is_file_stale(trie); + if (ret == 0) + break; + + if (*file_lock_r != NULL) + file_unlock(file_lock_r); + else + file_dotlock_delete(dotlock_r); + if (ret < 0) + return -1; + + squat_trie_close(trie); + if (squat_trie_open_fd(trie) < 0) + return -1; + if (trie->fd == -1) + return 0; + } + + if ((trie->flags & SQUAT_INDEX_FLAG_NFS_FLUSH) != 0) + nfs_flush_read_cache_locked(trie->path, trie->fd); + return 1; +} + +static void +node_make_sequential(struct squat_trie *trie, struct squat_node *node, int level) +{ + const unsigned int alloc_size = + NODE_CHILDREN_ALLOC_SIZE(SEQUENTIAL_COUNT); + struct squat_node *children; + unsigned char *chars; + unsigned int i; + + i_assert(node->child_count == 0); + + trie->node_alloc_size += alloc_size; + + node->want_sequential = FALSE; + node->have_sequential = TRUE; + + node->child_count = SEQUENTIAL_COUNT; + node->children.data = i_malloc(alloc_size); + + chars = NODE_CHILDREN_CHARS(node); + for (i = 0; i < SEQUENTIAL_COUNT; i++) + chars[i] = i; + + if (level < MAX_FAST_LEVEL) { + children = NODE_CHILDREN_NODES(node); + for (i = 0; i < SEQUENTIAL_COUNT; i++) + children[i].want_sequential = TRUE; + } +} + +static unsigned int +node_add_child(struct squat_trie *trie, struct squat_node *node, + unsigned char chr, int level) +{ + unsigned int old_child_count = node->child_count; + struct squat_node *children, *old_children; + unsigned char *chars; + size_t old_size, new_size; + + i_assert(node->leaf_string_length == 0); + + if (node->want_sequential) { + node_make_sequential(trie, node, level); + + if (chr < SEQUENTIAL_COUNT) + return chr; + old_child_count = SEQUENTIAL_COUNT; + } + + node->child_count++; + new_size = NODE_CHILDREN_ALLOC_SIZE(node->child_count); + + if (old_child_count == 0) { + /* first child */ + node->children.data = i_malloc(new_size); + trie->node_alloc_size += new_size; + } else { + old_size = NODE_CHILDREN_ALLOC_SIZE(old_child_count); + if (old_size != new_size) { + trie->node_alloc_size += new_size - old_size; + node->children.data = i_realloc(node->children.data, + old_size, new_size); + } + + children = NODE_CHILDREN_NODES(node); + old_children = (void *)(NODE_CHILDREN_CHARS(node) + + MEM_ALIGN(old_child_count)); + if (children != old_children) { + memmove(children, old_children, + old_child_count * sizeof(struct squat_node)); + } + } + + chars = NODE_CHILDREN_CHARS(node); + i_assert(chars != NULL); + chars[node->child_count - 1] = chr; + return node->child_count - 1; +} + +static int +trie_file_cache_read(struct squat_trie *trie, size_t offset, size_t size) +{ + if (trie->file_cache == NULL) + return 0; + + if (file_cache_read(trie->file_cache, offset, size) < 0) { + i_error("read(%s) failed: %m", trie->path); + return -1; + } + trie->data = file_cache_get_map(trie->file_cache, &trie->data_size); + return 0; +} + +static int +node_read_children(struct squat_trie *trie, struct squat_node *node, int level) +{ + const uint8_t *data, *end; + const unsigned char *child_chars; + struct squat_node *child, *children = NULL; + uoff_t node_offset; + unsigned int i, child_idx, child_count; + uoff_t base_offset; + uint32_t num; + + i_assert(node->children_not_mapped); + i_assert(!node->have_sequential); + i_assert(trie->unmapped_child_count > 0); + i_assert(trie->data_size <= trie->locked_file_size); + + trie->unmapped_child_count--; + node_offset = node->children.offset; + node->children_not_mapped = FALSE; + node->children.data = NULL; + + if (trie_file_cache_read(trie, node_offset, TRIE_READAHEAD_SIZE) < 0) + return -1; + if (unlikely(node_offset >= trie->data_size)) { + squat_trie_set_corrupted(trie); + return -1; + } + + data = CONST_PTR_OFFSET(trie->data, node_offset); + end = CONST_PTR_OFFSET(trie->data, trie->data_size); + child_count = *data++; + if (unlikely(node_offset + child_count >= trie->data_size)) { + squat_trie_set_corrupted(trie); + return -1; + } + + if (child_count == 0) + return 0; + + child_chars = data; + data += child_count; + + /* get child offsets */ + base_offset = node_offset; + for (i = 0; i < child_count; i++) { + /* we always start with !have_sequential, so at i=0 this + check always goes to add the first child */ + if (node->have_sequential && child_chars[i] < SEQUENTIAL_COUNT) + child_idx = child_chars[i]; + else { + child_idx = node_add_child(trie, node, child_chars[i], + level); + children = NODE_CHILDREN_NODES(node); + } + + i_assert(children != NULL); + + child = &children[child_idx]; + + /* 1) child offset */ + num = squat_unpack_num(&data, end); + if (num == 0) { + /* no children */ + } else { + if ((num & 1) != 0) { + base_offset += num >> 1; + } else { + base_offset -= num >> 1; + } + if (base_offset >= trie->locked_file_size) { + squat_trie_set_corrupted(trie); + return -1; + } + trie->unmapped_child_count++; + child->children_not_mapped = TRUE; + child->children.offset = base_offset; + } + + /* 2) uidlist */ + child->uid_list_idx = squat_unpack_num(&data, end); + if (child->uid_list_idx == 0) { + /* we don't write nodes with empty uidlists */ + squat_trie_set_corrupted(trie); + return -1; + } + if (!UIDLIST_IS_SINGLETON(child->uid_list_idx)) { + /* 3) next uid */ + child->next_uid = squat_unpack_num(&data, end) + 1; + } else { + uint32_t idx = child->uid_list_idx; + + child->next_uid = 1 + + squat_uidlist_singleton_last_uid(idx); + } + + /* 4) unused uids + leaf string flag */ + num = squat_unpack_num(&data, end); + child->unused_uids = num >> 1; + if ((num & 1) != 0) { + /* leaf string */ + unsigned int len; + unsigned char *dest; + + /* 5) leaf string length */ + len = child->leaf_string_length = + squat_unpack_num(&data, end) + 1; + if (!NODE_IS_DYNAMIC_LEAF(child)) + dest = child->children.static_leaf_string; + else { + dest = child->children.leaf_string = + i_malloc(len); + } + + if (trie->file_cache != NULL) { + /* the string may be long - + recalculate the end pos */ + size_t offset, size; + + offset = (const char *)data - + (const char *)trie->data; + size = len + TRIE_BYTES_LEFT(child_count - i); + + if (trie_file_cache_read(trie, offset, + size) < 0) + return -1; + data = CONST_PTR_OFFSET(trie->data, offset); + end = CONST_PTR_OFFSET(trie->data, + trie->data_size); + child_chars = CONST_PTR_OFFSET(trie->data, + node_offset + 1); + } + + if ((size_t)(end - data) < len) { + squat_trie_set_corrupted(trie); + return -1; + } + memcpy(dest, data, len); + data += len; + } + } + if (unlikely(data == end)) { + /* we should never get this far */ + squat_trie_set_corrupted(trie); + return -1; + } + return 0; +} + +static void +node_write_children(struct squat_trie_build_context *ctx, + struct squat_node *node, const uoff_t *node_offsets) +{ + struct squat_node *children; + const unsigned char *chars; + uint8_t child_count, buf[SQUAT_PACK_MAX_SIZE * 5], *bufp; + uoff_t base_offset; + unsigned int i; + + chars = NODE_CHILDREN_CHARS(node); + children = NODE_CHILDREN_NODES(node); + + base_offset = ctx->output->offset; + child_count = node->child_count; + o_stream_nsend(ctx->output, &child_count, 1); + o_stream_nsend(ctx->output, chars, child_count); + + for (i = 0; i < child_count; i++) { + bufp = buf; + /* 1) child offset */ + if (node_offsets[i] == 0) + *bufp++ = 0; + else if (node_offsets[i] >= base_offset) { + squat_pack_num(&bufp, + ((node_offsets[i] - base_offset) << 1) | 1); + base_offset = node_offsets[i]; + } else { + squat_pack_num(&bufp, + (base_offset - node_offsets[i]) << 1); + base_offset = node_offsets[i]; + } + + /* 2) uidlist */ + squat_pack_num(&bufp, children[i].uid_list_idx); + if (!UIDLIST_IS_SINGLETON(children[i].uid_list_idx)) { + /* 3) next uid */ + squat_pack_num(&bufp, children[i].next_uid - 1); + } + + if (children[i].leaf_string_length == 0) { + /* 4a) unused uids */ + squat_pack_num(&bufp, children[i].unused_uids << 1); + o_stream_nsend(ctx->output, buf, bufp - buf); + } else { + i_assert(node_offsets[i] == 0); + /* 4b) unused uids + flag */ + squat_pack_num(&bufp, (children[i].unused_uids << 1) | 1); + /* 5) leaf string length */ + squat_pack_num(&bufp, children[i].leaf_string_length - 1); + o_stream_nsend(ctx->output, buf, bufp - buf); + o_stream_nsend(ctx->output, + NODE_LEAF_STRING(&children[i]), + children[i].leaf_string_length); + } + } +} + +static inline void +node_add_uid(struct squat_trie_build_context *ctx, uint32_t uid, + struct squat_node *node) +{ + if (uid < node->next_uid) { + /* duplicate */ + return; + } + node->unused_uids += uid - node->next_uid; + node->next_uid = uid + 1; + + node->uid_list_idx = + squat_uidlist_build_add_uid(ctx->uidlist_build_ctx, + node->uid_list_idx, uid); +} + +static void +node_split_string(struct squat_trie_build_context *ctx, struct squat_node *node) +{ + struct squat_node *child; + unsigned char *str; + unsigned int uid, idx, leafstr_len = node->leaf_string_length; + + i_assert(leafstr_len > 0); + + /* make a copy of the leaf string and convert to normal node by + removing it. */ + str = t_malloc_no0(leafstr_len); + if (!NODE_IS_DYNAMIC_LEAF(node)) + memcpy(str, node->children.static_leaf_string, leafstr_len); + else { + memcpy(str, node->children.leaf_string, leafstr_len); + i_free(node->children.leaf_string); + } + node->leaf_string_length = 0; + + /* create a new child node for the rest of the string */ + idx = node_add_child(ctx->trie, node, str[0], MAX_FAST_LEVEL); + child = NODE_CHILDREN_NODES(node) + idx; + + /* update uidlist to contain all of parent's UIDs */ + child->next_uid = node->next_uid - node->unused_uids; + for (uid = 0; uid < child->next_uid; uid++) { + child->uid_list_idx = + squat_uidlist_build_add_uid(ctx->uidlist_build_ctx, + child->uid_list_idx, uid); + } + + i_assert(!child->have_sequential && child->children.data == NULL); + if (leafstr_len > 1) { + /* make the child a leaf string */ + leafstr_len--; + child->leaf_string_length = leafstr_len; + if (!NODE_IS_DYNAMIC_LEAF(child)) { + memcpy(child->children.static_leaf_string, + str + 1, leafstr_len); + } else { + child->children.leaf_string = i_malloc(leafstr_len); + memcpy(child->children.leaf_string, + str + 1, leafstr_len); + } + } +} + +static bool +node_leaf_string_add_or_split(struct squat_trie_build_context *ctx, + struct squat_node *node, + const unsigned char *data, unsigned int data_len) +{ + const unsigned char *str = NODE_LEAF_STRING(node); + const unsigned int leafstr_len = node->leaf_string_length; + unsigned int i; + + if (data_len != leafstr_len) { + /* different lengths, can't match */ + T_BEGIN { + node_split_string(ctx, node); + } T_END; + return FALSE; + } + + for (i = 0; i < data_len; i++) { + if (data[i] != str[i]) { + /* non-match */ + T_BEGIN { + node_split_string(ctx, node); + } T_END; + return FALSE; + } + } + return TRUE; +} + +static int squat_build_add(struct squat_trie_build_context *ctx, uint32_t uid, + const unsigned char *data, unsigned int size) +{ + struct squat_trie *trie = ctx->trie; + struct squat_node *node = &trie->root; + const unsigned char *end = data + size; + unsigned char *chars; + unsigned int idx; + int level = 0; + + for (;;) { + if (node->children_not_mapped) { + if (unlikely(node_read_children(trie, node, level) < 0)) + return -1; + } + + if (node->leaf_string_length != 0) { + /* the whole string must match or we need to split + the node */ + if (node_leaf_string_add_or_split(ctx, node, data, + end - data)) { + node_add_uid(ctx, uid, node); + return 0; + } + } + + node_add_uid(ctx, uid, node); + + if (unlikely(uid < node->unused_uids)) { + squat_trie_set_corrupted(trie); + return -1; + } + /* child node's UIDs are relative to ours. so for example if + we're adding UID 4 and this node now has [2,4] UIDs, + unused_uids=3 and so the child node will be adding + UID 4-3 = 1. */ + uid -= node->unused_uids; + + if (data == end) + return 0; + level++; + + if (node->have_sequential) { + i_assert(node->child_count >= SEQUENTIAL_COUNT); + if (*data < SEQUENTIAL_COUNT) { + idx = *data; + goto found; + } + idx = SEQUENTIAL_COUNT; + } else { + idx = 0; + } + chars = NODE_CHILDREN_CHARS(node); + for (; idx < node->child_count; idx++) { + if (chars[idx] == *data) + goto found; + } + break; + found: + data++; + node = NODE_CHILDREN_NODES(node) + idx; + } + + /* create new children */ + i_assert(node->leaf_string_length == 0); + + for (;;) { + idx = node_add_child(trie, node, *data, + size - (end - data) + 1); + node = NODE_CHILDREN_NODES(node) + idx; + + node_add_uid(ctx, uid, node); + uid = 0; + + if (++data == end) + break; + + if (!node->have_sequential) { + /* convert the node into a leaf string */ + unsigned int len = end - data; + + i_assert(node->children.data == NULL); + node->leaf_string_length = len; + if (!NODE_IS_DYNAMIC_LEAF(node)) { + memcpy(node->children.static_leaf_string, + data, len); + } else { + node->children.leaf_string = i_malloc(len); + memcpy(node->children.leaf_string, data, len); + } + break; + } + } + return 0; +} + +static int +squat_build_word_bytes(struct squat_trie_build_context *ctx, uint32_t uid, + const unsigned char *data, unsigned int size) +{ + struct squat_trie *trie = ctx->trie; + unsigned int i; + + if (trie->hdr.full_len <= trie->hdr.partial_len) + i = 0; + else { + /* the first word is longer than others */ + if (squat_build_add(ctx, uid, data, + I_MIN(size, trie->hdr.full_len)) < 0) + return -1; + i = 1; + } + + for (; i < size; i++) { + if (squat_build_add(ctx, uid, data + i, + I_MIN(trie->hdr.partial_len, size-i)) < 0) + return -1; + } + return 0; +} + +static int +squat_build_word(struct squat_trie_build_context *ctx, uint32_t uid, + const unsigned char *data, const uint8_t *char_lengths, + unsigned int size) +{ + struct squat_trie *trie = ctx->trie; + unsigned int i, j, bytelen; + + if (char_lengths == NULL) { + /* optimization path: all characters are bytes */ + return squat_build_word_bytes(ctx, uid, data, size); + } + + if (trie->hdr.full_len <= trie->hdr.partial_len) + i = 0; + else { + /* the first word is longer than others */ + bytelen = 0; + for (j = 0; j < trie->hdr.full_len && bytelen < size; j++) + bytelen += char_lengths[bytelen]; + i_assert(bytelen <= size); + + if (squat_build_add(ctx, uid, data, bytelen) < 0) + return -1; + i = char_lengths[0]; + } + + for (; i < size; i += char_lengths[i]) { + bytelen = 0; + for (j = 0; j < trie->hdr.partial_len && i+bytelen < size; j++) + bytelen += char_lengths[i + bytelen]; + i_assert(i + bytelen <= size); + + if (squat_build_add(ctx, uid, data + i, bytelen) < 0) + return -1; + } + return 0; +} + +static unsigned char * +squat_data_normalize(struct squat_trie *trie, const unsigned char *data, + unsigned int size) +{ + static const unsigned char replacement_utf8[] = { 0xef, 0xbf, 0xbd }; + unsigned char *dest; + unsigned int i; + + dest = t_malloc_no0(size); + for (i = 0; i < size; i++) { + if (data[i] == replacement_utf8[0] && i + 2 < size && + data[i+1] == replacement_utf8[1] && + data[i+2] == replacement_utf8[2]) { + /* Don't index replacement character */ + dest[i++] = 0; + dest[i++] = 0; + dest[i] = 0; + } else { + dest[i] = trie->hdr.normalize_map[data[i]]; + } + } + return dest; +} + +static int +squat_trie_build_more_real(struct squat_trie_build_context *ctx, + uint32_t uid, enum squat_index_type type, + const unsigned char *input, unsigned int size) +{ + struct squat_trie *trie = ctx->trie; + const unsigned char *data; + uint8_t *char_lengths; + unsigned int i, start = 0; + bool multibyte_chars = FALSE; + int ret = 0; + + uid = uid * 2 + (type == SQUAT_INDEX_TYPE_HEADER ? 1 : 0); + + char_lengths = t_malloc_no0(size); + data = squat_data_normalize(trie, input, size); + for (i = 0; i < size; i++) { + char_lengths[i] = uni_utf8_char_bytes(input[i]); + if (char_lengths[i] != 1) + multibyte_chars = TRUE; + if (data[i] != '\0') + continue; + + while (start < i && data[start] == '\0') + start++; + if (i != start) { + if (squat_build_word(ctx, uid, data + start, + !multibyte_chars ? NULL : + char_lengths + start, + i - start) < 0) { + ret = -1; + start = i; + break; + } + } + start = i + 1; + } + while (start < i && data[start] == '\0') + start++; + if (i != start) { + if (squat_build_word(ctx, uid, data + start, + !multibyte_chars ? NULL : + char_lengths + start, i - start) < 0) + ret = -1; + } + return ret; +} + +int squat_trie_build_more(struct squat_trie_build_context *ctx, + uint32_t uid, enum squat_index_type type, + const unsigned char *input, unsigned int size) +{ + int ret = 0; + + if (size != 0) T_BEGIN { + ret = squat_trie_build_more_real(ctx, uid, type, input, size); + } T_END; + return ret; +} + +static void +node_drop_unused_children(struct squat_trie *trie, struct squat_node *node) +{ + unsigned char *chars; + struct squat_node *children_src, *children_dest; + unsigned int i, j, orig_child_count = node->child_count; + + chars = NODE_CHILDREN_CHARS(node); + children_src = NODE_CHILDREN_NODES(node); + + /* move chars */ + for (i = j = 0; i < orig_child_count; i++) { + if (children_src[i].next_uid != 0) + chars[j++] = chars[i]; + } + node->child_count = j; + + /* move children. note that children_dest may point to different + location than children_src, although they both point to the + same node. */ + children_dest = NODE_CHILDREN_NODES(node); + for (i = j = 0; i < orig_child_count; i++) { + if (children_src[i].next_uid != 0) + children_dest[j++] = children_src[i]; + else + node_free(trie, &children_src[i]); + } +} + +static int +squat_write_node(struct squat_trie_build_context *ctx, struct squat_node *node, + uoff_t *node_offset_r, int level) +{ + struct squat_trie *trie = ctx->trie; + struct squat_node *children; + unsigned int i; + uoff_t *node_offsets; + uint8_t child_count; + int ret; + + i_assert(node->next_uid != 0); + + if (node->children_not_mapped && ctx->compress_nodes) { + if (node_read_children(trie, node, MAX_FAST_LEVEL) < 0) + return -1; + } + + node->have_sequential = FALSE; + node_drop_unused_children(trie, node); + + child_count = node->child_count; + if (child_count == 0) { + i_assert(!node->children_not_mapped || + node->leaf_string_length == 0); + *node_offset_r = !node->children_not_mapped ? 0 : + node->children.offset; + return 0; + } + i_assert(!node->children_not_mapped); + + trie->hdr.node_count++; + + children = NODE_CHILDREN_NODES(node); + node_offsets = t_new(uoff_t, child_count); + for (i = 0; i < child_count; i++) { + T_BEGIN { + ret = squat_write_node(ctx, &children[i], + &node_offsets[i], level + 1); + } T_END; + if (ret < 0) + return -1; + } + + *node_offset_r = ctx->output->offset; + node_write_children(ctx, node, node_offsets); + return 0; +} + +static int squat_write_nodes(struct squat_trie_build_context *ctx) +{ + struct squat_trie *trie = ctx->trie; + uoff_t node_offset; + int ret; + + if (ctx->trie->root.next_uid == 0) + return 0; + + T_BEGIN { + ret = squat_write_node(ctx, &ctx->trie->root, &node_offset, 0); + } T_END; + if (ret < 0) + return -1; + + trie->hdr.root_offset = node_offset; + trie->hdr.root_unused_uids = trie->root.unused_uids; + trie->hdr.root_next_uid = trie->root.next_uid; + trie->hdr.root_uidlist_idx = trie->root.uid_list_idx; + return 0; +} + +static struct squat_trie_iterate_context * +squat_trie_iterate_init(struct squat_trie *trie) +{ + struct squat_trie_iterate_context *ctx; + + ctx = i_new(struct squat_trie_iterate_context, 1); + ctx->trie = trie; + ctx->cur.node = &trie->root; + i_array_init(&ctx->parents, trie->hdr.partial_len*2); + return ctx; +} + +static int +squat_trie_iterate_deinit(struct squat_trie_iterate_context *ctx) +{ + struct squat_trie_iterate_node *node; + int ret = ctx->failed ? -1 : 0; + + if (array_is_created(&ctx->cur.shifts)) { + array_foreach_modifiable(&ctx->parents, node) + array_free(&node->shifts); + array_free(&ctx->cur.shifts); + } + array_free(&ctx->parents); + i_free(ctx); + return ret; +} + +static struct squat_node * +squat_trie_iterate_first(struct squat_trie_iterate_context *ctx) +{ + if (ctx->cur.node->children_not_mapped) { + if (node_read_children(ctx->trie, ctx->cur.node, 1) < 0) { + ctx->failed = TRUE; + return NULL; + } + } + return ctx->cur.node; +} + +static struct squat_node * +squat_trie_iterate_next(struct squat_trie_iterate_context *ctx, + ARRAY_TYPE(seq_range) *shifts_r) +{ + struct squat_trie_iterate_node *iter_nodes; + struct squat_node *children; + unsigned int count, shift_count = 0; + + while (ctx->cur.idx == ctx->cur.node->child_count || + ctx->cur.node->uid_list_idx == 0) + { + iter_nodes = array_get_modifiable(&ctx->parents, &count); + if (count == 0) + return NULL; + + if (array_is_created(&ctx->cur.shifts)) + array_free(&ctx->cur.shifts); + ctx->cur = iter_nodes[count-1]; + array_delete(&ctx->parents, count-1, 1); + } + + *shifts_r = ctx->cur.shifts; + if (array_is_created(&ctx->cur.shifts)) + shift_count = array_count(&ctx->cur.shifts); + + children = NODE_CHILDREN_NODES(ctx->cur.node); + while (children[ctx->cur.idx++].uid_list_idx == 0) { + if (ctx->cur.idx == ctx->cur.node->child_count) { + /* no more non-empty children in this node */ + return squat_trie_iterate_next(ctx, shifts_r); + } + } + array_push_back(&ctx->parents, &ctx->cur); + ctx->cur.node = &children[ctx->cur.idx-1]; + ctx->cur.idx = 0; + if (shift_count != 0) + i_array_init(&ctx->cur.shifts, shift_count); + else + i_zero(&ctx->cur.shifts); + return squat_trie_iterate_first(ctx); +} + +static void +squat_uidlist_update_expunged_uids(const ARRAY_TYPE(seq_range) *shifts_arr, + ARRAY_TYPE(seq_range) *child_shifts, + ARRAY_TYPE(seq_range) *uids_arr, + struct squat_trie *trie, + struct squat_node *node, bool do_shifts) +{ + const struct seq_range *shifts; + struct seq_range *uids, shift; + unsigned int i, uid_idx, uid_count, shift_count; + uint32_t child_shift_seq1, child_shift_count, seq_high; + unsigned int shift_sum = 0, child_sum = 0; + + if (!array_is_created(shifts_arr)) { + i_assert(node->uid_list_idx != 0 || node->child_count == 0); + return; + } + + /* we'll recalculate this */ + node->unused_uids = 0; + + uids = array_get_modifiable(uids_arr, &uid_count); + shifts = array_get(shifts_arr, &shift_count); + for (i = 0, uid_idx = 0, seq_high = 0;; ) { + /* skip UID ranges until we skip/overlap shifts */ + while (uid_idx < uid_count && + (i == shift_count || + I_MAX(shifts[i].seq1, seq_high) > uids[uid_idx].seq2)) + { + i_assert(uids[uid_idx].seq1 >= shift_sum); + uids[uid_idx].seq1 -= shift_sum; + uids[uid_idx].seq2 -= shift_sum; + child_sum += uids[uid_idx].seq2 - + uids[uid_idx].seq1 + 1; + + if (uid_idx > 0 && + uids[uid_idx-1].seq2 >= uids[uid_idx].seq1 - 1) { + /* we can merge this and the previous range */ + i_assert(uids[uid_idx-1].seq2 == + uids[uid_idx].seq1 - 1); + uids[uid_idx-1].seq2 = uids[uid_idx].seq2; + array_delete(uids_arr, uid_idx, 1); + uids = array_get_modifiable(uids_arr, + &uid_count); + } else { + if (uid_idx == 0) + node->unused_uids += uids[0].seq1; + else { + node->unused_uids += + uids[uid_idx].seq1 - + uids[uid_idx-1].seq2 - 1; + } + uid_idx++; + } + } + if (uid_idx == uid_count) + break; + + shift.seq1 = I_MAX(shifts[i].seq1, seq_high); + shift.seq2 = shifts[i].seq2; + if (shift.seq2 < uids[uid_idx].seq1) { + /* shift is entirely before UID range */ + shift_sum += shift.seq2 - shift.seq1 + 1; + i++; + } else { + /* handle shifts before UID range */ + if (shift.seq1 < uids[uid_idx].seq1) { + shift_sum += uids[uid_idx].seq1 - shift.seq1; + shift.seq1 = uids[uid_idx].seq1; + } + /* update child shifts */ + child_shift_seq1 = child_sum + + shift.seq1 - uids[uid_idx].seq1; + child_shift_count = + I_MIN(shift.seq2, uids[uid_idx].seq2) - + shift.seq1 + 1; + seq_range_array_add_range(child_shifts, + child_shift_seq1, + child_shift_seq1 + + child_shift_count - 1); + child_sum += child_shift_count; + + /* if the shifts continue after the UID range, + treat it in the next loop iteration */ + if (shift.seq2 <= uids[uid_idx].seq2) + i++; + else + seq_high = uids[uid_idx].seq2 + 1; + + /* update UIDs - no matter where within the UID range + the shifts happened, the result is the same: + shift number of UIDs are removed, and the rest + are decreased by shift_sum. + + 123 uids child_shifts + a) s -> 12 1 + b) s -> 12 2 + c) s -> 12 3 + */ + if (uids[uid_idx].seq1 + + child_shift_count > uids[uid_idx].seq2) { + /* removed completely */ + array_delete(uids_arr, uid_idx, 1); + uids = array_get_modifiable(uids_arr, + &uid_count); + } else if (do_shifts) { + /* the next loop iteration fixes the UIDs */ + uids[uid_idx].seq1 += child_shift_count; + } else { + seq_range_array_remove_range(uids_arr, + shift.seq1, + I_MIN(shift.seq2, uids[uid_idx].seq2)); + uids = array_get_modifiable(uids_arr, + &uid_count); + } + shift_sum += child_shift_count; + } + if (!do_shifts) { + /* root node - UIDs are only removed, not shifted */ + shift_sum = 0; + } + } + + if (uid_count == 0) { + /* no UIDs left, delete the node's children and mark it + unused */ + if (!NODE_IS_DYNAMIC_LEAF(node)) + node_free(trie, node); + + node->child_count = 0; + node->have_sequential = FALSE; + node->next_uid = 0; + } else { + if (do_shifts) + node->next_uid = uids[uid_count-1].seq2 + 1; + else { + node->unused_uids += (node->next_uid - 1) - + uids[uid_count-1].seq2; + } + } +} + +static int +squat_trie_expunge_uidlists(struct squat_trie_build_context *ctx, + struct squat_uidlist_rebuild_context *rebuild_ctx, + struct squat_trie_iterate_context *iter, + const ARRAY_TYPE(seq_range) *expunged_uids) +{ + struct squat_node *node; + ARRAY_TYPE(seq_range) uid_range, root_shifts, shifts; + bool shift = FALSE; + int ret = 0; + + node = squat_trie_iterate_first(iter); + if (node->uid_list_idx == 0) + return 0; + + i_array_init(&uid_range, 1024); + i_array_init(&root_shifts, array_count(expunged_uids)); + array_append_array(&root_shifts, expunged_uids); + + if (array_count(expunged_uids) > 0) + i_array_init(&iter->cur.shifts, array_count(expunged_uids)); + + shifts = root_shifts; + do { + i_assert(node->uid_list_idx != 0); + array_clear(&uid_range); + if (squat_uidlist_get_seqrange(ctx->trie->uidlist, + node->uid_list_idx, + &uid_range) < 0) { + ret = -1; + break; + } + squat_uidlist_update_expunged_uids(&shifts, &iter->cur.shifts, + &uid_range, ctx->trie, node, + shift); + node->uid_list_idx = + squat_uidlist_rebuild_nextu(rebuild_ctx, &uid_range); + i_assert(node->uid_list_idx != 0 || node->next_uid == 0); + + node = squat_trie_iterate_next(iter, &shifts); + shift = TRUE; + } while (node != NULL); + array_free(&uid_range); + array_free(&root_shifts); + return ret; +} + +static int +squat_trie_renumber_uidlists2(struct squat_trie_build_context *ctx, + struct squat_uidlist_rebuild_context *rebuild_ctx, + struct squat_trie_iterate_context *iter) +{ + struct squat_node *node; + ARRAY_TYPE(seq_range) shifts; + ARRAY_TYPE(uint32_t) uids; + int ret = 0; + + node = squat_trie_iterate_first(iter); + if (node->uid_list_idx == 0) + return 0; + + i_array_init(&uids, 1024); + while (node != NULL) { + i_assert(node->uid_list_idx != 0); + if (!UIDLIST_IS_SINGLETON(node->uid_list_idx)) { + /* rebuild the uidlist */ + array_clear(&uids); + if (squat_uidlist_get(ctx->trie->uidlist, + node->uid_list_idx, &uids) < 0) { + ret = -1; + break; + } + node->uid_list_idx = + squat_uidlist_rebuild_next(rebuild_ctx, &uids); + } + node = squat_trie_iterate_next(iter, &shifts); + } + array_free(&uids); + return ret; +} + +static int +squat_trie_renumber_uidlists(struct squat_trie_build_context *ctx, + const ARRAY_TYPE(seq_range) *expunged_uids, + bool compress) +{ + struct squat_trie_iterate_context *iter; + struct squat_uidlist_rebuild_context *rebuild_ctx; + time_t now; + int ret = 0; + + if ((ret = squat_uidlist_rebuild_init(ctx->uidlist_build_ctx, + compress, &rebuild_ctx)) <= 0) + return ret; + + now = time(NULL); + ctx->trie->hdr.indexid = + I_MAX((unsigned int)now, ctx->trie->hdr.indexid + 1); + + iter = squat_trie_iterate_init(ctx->trie); + if (expunged_uids != NULL) { + ret = squat_trie_expunge_uidlists(ctx, rebuild_ctx, iter, + expunged_uids); + } else { + ret = squat_trie_renumber_uidlists2(ctx, rebuild_ctx, iter); + } + if (squat_trie_iterate_deinit(iter) < 0) + ret = -1; + + /* lock the trie before we rename uidlist */ + i_assert(ctx->file_lock == NULL && ctx->dotlock == NULL); + if (squat_trie_lock(ctx->trie, F_WRLCK, + &ctx->file_lock, &ctx->dotlock) <= 0) + ret = -1; + return squat_uidlist_rebuild_finish(rebuild_ctx, ret < 0); +} + +static bool squat_trie_check_header(struct squat_trie *trie) +{ + if (trie->hdr.version != SQUAT_TRIE_VERSION || + trie->hdr.uidvalidity != trie->uidvalidity) + return FALSE; + + if (trie->hdr.partial_len > trie->hdr.full_len) { + i_error("Corrupted %s: partial len > full len", trie->path); + return FALSE; + } + if (trie->hdr.full_len == 0) { + i_error("Corrupted %s: full len=0", trie->path); + return FALSE; + } + return TRUE; +} + +static int squat_trie_map_header(struct squat_trie *trie) +{ + int ret; + + if (trie->locked_file_size == 0) { + /* newly created file */ + squat_trie_header_init(trie); + return 1; + } + i_assert(trie->fd != -1); + + if ((trie->flags & SQUAT_INDEX_FLAG_MMAP_DISABLE) != 0) { + ret = pread_full(trie->fd, &trie->hdr, sizeof(trie->hdr), 0); + if (ret <= 0) { + if (ret < 0) { + i_error("pread(%s) failed: %m", trie->path); + return -1; + } + i_error("Corrupted %s: File too small", trie->path); + return 0; + } + trie->data = NULL; + trie->data_size = 0; + } else { + if (trie->locked_file_size < sizeof(trie->hdr)) { + i_error("Corrupted %s: File too small", trie->path); + return 0; + } + if (trie->mmap_size != 0) { + if (munmap(trie->mmap_base, trie->mmap_size) < 0) + i_error("munmap(%s) failed: %m", trie->path); + } + + trie->mmap_size = trie->locked_file_size; + trie->mmap_base = mmap(NULL, trie->mmap_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, trie->fd, 0); + if (trie->mmap_base == MAP_FAILED) { + trie->data = trie->mmap_base = NULL; + trie->data_size = trie->mmap_size = 0; + i_error("mmap(%s) failed: %m", trie->path); + return -1; + } + memcpy(&trie->hdr, trie->mmap_base, sizeof(trie->hdr)); + trie->data = trie->mmap_base; + trie->data_size = trie->mmap_size; + } + + return squat_trie_check_header(trie) ? 1 : 0; +} + +static int squat_trie_map(struct squat_trie *trie, bool building) +{ + struct file_lock *file_lock = NULL; + struct dotlock *dotlock = NULL; + bool changed; + int ret; + + if (trie->fd != -1) { + if (squat_trie_lock(trie, F_RDLCK, &file_lock, &dotlock) <= 0) + return -1; + if ((trie->flags & SQUAT_INDEX_FLAG_MMAP_DISABLE) != 0 && + trie->file_cache == NULL) + trie->file_cache = file_cache_new_path(trie->fd, trie->path); + } + + ret = squat_trie_map_header(trie); + if (ret == 0) { + if (file_lock != NULL) + file_unlock(&file_lock); + else + file_dotlock_delete(&dotlock); + squat_trie_delete(trie); + squat_trie_close(trie); + squat_trie_header_init(trie); + } + changed = trie->root.children.offset != trie->hdr.root_offset; + + if (changed || trie->hdr.root_offset == 0) { + node_free(trie, &trie->root); + i_zero(&trie->root); + trie->root.want_sequential = TRUE; + trie->root.unused_uids = trie->hdr.root_unused_uids; + trie->root.next_uid = trie->hdr.root_next_uid; + trie->root.uid_list_idx = trie->hdr.root_uidlist_idx; + trie->root.children.offset = trie->hdr.root_offset; + + if (trie->hdr.root_offset == 0) { + trie->unmapped_child_count = 0; + trie->root.children_not_mapped = FALSE; + } else { + trie->unmapped_child_count = 1; + trie->root.children_not_mapped = TRUE; + } + } + + if (ret >= 0 && !building) { + /* do this while we're still locked */ + ret = squat_uidlist_refresh(trie->uidlist); + } + + if (file_lock != NULL) + file_unlock(&file_lock); + if (dotlock != NULL) + file_dotlock_delete(&dotlock); + if (ret < 0) + return -1; + + return trie->hdr.root_offset == 0 || !changed ? 0 : + node_read_children(trie, &trie->root, 1); +} + +int squat_trie_create_fd(struct squat_trie *trie, const char *path, int flags) +{ + mode_t old_mask; + int fd; + + old_mask = umask(0); + fd = open(path, O_RDWR | O_CREAT | flags, trie->create_mode); + umask(old_mask); + if (fd == -1) { + i_error("creat(%s) failed: %m", path); + return -1; + } + if (trie->create_gid != (gid_t)-1) { + if (fchown(fd, (uid_t)-1, trie->create_gid) < 0) { + i_error("fchown(%s, -1, %ld) failed: %m", + path, (long)trie->create_gid); + i_close_fd(&fd); + return -1; + } + } + return fd; +} + +int squat_trie_build_init(struct squat_trie *trie, + struct squat_trie_build_context **ctx_r) +{ + struct squat_trie_build_context *ctx; + struct squat_uidlist_build_context *uidlist_build_ctx; + + if (trie->fd == -1) { + trie->fd = squat_trie_create_fd(trie, trie->path, 0); + if (trie->fd == -1) + return -1; + + if (trie->file_cache != NULL) + file_cache_set_fd(trie->file_cache, trie->fd); + i_assert(trie->locked_file_size == 0); + } + + /* uidlist locks building */ + if (squat_uidlist_build_init(trie->uidlist, &uidlist_build_ctx) < 0) + return -1; + + if (squat_trie_map(trie, TRUE) < 0) { + squat_uidlist_build_deinit(&uidlist_build_ctx); + return -1; + } + + ctx = i_new(struct squat_trie_build_context, 1); + ctx->trie = trie; + ctx->uidlist_build_ctx = uidlist_build_ctx; + ctx->first_uid = trie->root.next_uid; + + *ctx_r = ctx; + return 0; +} + +static int squat_trie_write_lock(struct squat_trie_build_context *ctx) +{ + if (ctx->file_lock != NULL || ctx->dotlock != NULL) + return 0; + + if (squat_trie_lock(ctx->trie, F_WRLCK, + &ctx->file_lock, &ctx->dotlock) <= 0) + return -1; + return 0; +} + +static int squat_trie_write(struct squat_trie_build_context *ctx) +{ + struct squat_trie *trie = ctx->trie; + struct file_lock *file_lock = NULL; + struct ostream *output; + const char *path, *error; + int fd = -1, ret = 0; + + if ((trie->hdr.used_file_size > sizeof(trie->hdr) && + trie->unmapped_child_count < trie->hdr.node_count/4) || 1) { + /* we might as well recreate the file */ + ctx->compress_nodes = TRUE; + + path = t_strconcat(trie->path, ".tmp", NULL); + fd = squat_trie_create_fd(trie, path, O_TRUNC); + if (fd == -1) + return -1; + + if (trie->lock_method != FILE_LOCK_METHOD_DOTLOCK) { + struct file_lock_settings lock_set = { + .lock_method = trie->lock_method, + }; + ret = file_wait_lock(fd, path, F_WRLCK, &lock_set, + SQUAT_TRIE_LOCK_TIMEOUT, + &file_lock, &error); + if (ret <= 0) { + i_error("file_wait_lock(%s) failed: %s", + path, error); + i_close_fd(&fd); + return -1; + } + } + + output = o_stream_create_fd(fd, 0); + o_stream_cork(output); + o_stream_nsend(output, &trie->hdr, sizeof(trie->hdr)); + } else { + /* we need to lock only while header is being written */ + path = trie->path; + ctx->compress_nodes = + trie->hdr.used_file_size == sizeof(trie->hdr); + + if (trie->hdr.used_file_size == 0) { + /* lock before opening the file, in case we reopen it */ + if (squat_trie_write_lock(ctx) < 0) + return -1; + } + output = o_stream_create_fd(trie->fd, 0); + o_stream_cork(output); + + if (trie->hdr.used_file_size != 0) + (void)o_stream_seek(output, trie->hdr.used_file_size); + else + o_stream_nsend(output, &trie->hdr, sizeof(trie->hdr)); + } + + ctx->output = output; + ret = squat_write_nodes(ctx); + ctx->output = NULL; + + /* write 1 byte guard at the end of file, so that we can verify broken + squat_unpack_num() input by checking if data==end */ + o_stream_nsend(output, "", 1); + + if (trie->corrupted) + ret = -1; + if (ret == 0) + ret = squat_trie_write_lock(ctx); + if (ret == 0) { + trie->hdr.used_file_size = output->offset; + (void)o_stream_seek(output, 0); + o_stream_nsend(output, &trie->hdr, sizeof(trie->hdr)); + } + if (o_stream_finish(output) < 0) { + i_error("write(%s) failed: %s", path, + o_stream_get_error(output)); + ret = -1; + } + o_stream_destroy(&output); + + if (fd == -1) { + /* appended to the existing file */ + i_assert(file_lock == NULL); + return ret; + } + + /* recreating the trie file */ + if (ret < 0) { + if (close(fd) < 0) + i_error("close(%s) failed: %m", path); + fd = -1; + } else if (rename(path, trie->path) < 0) { + i_error("rename(%s, %s) failed: %m", path, trie->path); + ret = -1; + } + + if (ret < 0) { + i_unlink_if_exists(path); + file_lock_free(&file_lock); + } else { + squat_trie_close_fd(trie); + trie->fd = fd; + trie->locked_file_size = trie->hdr.used_file_size; + if (trie->file_cache != NULL) + file_cache_set_fd(trie->file_cache, trie->fd); + + file_lock_free(&ctx->file_lock); + ctx->file_lock = file_lock; + } + return ret; +} + +int squat_trie_build_deinit(struct squat_trie_build_context **_ctx, + const ARRAY_TYPE(seq_range) *expunged_uids) +{ + struct squat_trie_build_context *ctx = *_ctx; + bool compress, unlock = TRUE; + int ret; + + *_ctx = NULL; + + compress = (ctx->trie->root.next_uid - ctx->first_uid) > 10; + + /* keep trie locked while header is being written and when files are + being renamed, so that while trie is read locked, uidlist can't + change under. */ + squat_uidlist_build_flush(ctx->uidlist_build_ctx); + ret = squat_trie_renumber_uidlists(ctx, expunged_uids, compress); + if (ret == 0) { + ret = squat_trie_write(ctx); + if (ret < 0) + unlock = FALSE; + } + + if (ret == 0) + ret = squat_uidlist_build_finish(ctx->uidlist_build_ctx); + if (ctx->file_lock != NULL) { + if (unlock) + file_unlock(&ctx->file_lock); + else + file_lock_free(&ctx->file_lock); + } + if (ctx->dotlock != NULL) + file_dotlock_delete(&ctx->dotlock); + squat_uidlist_build_deinit(&ctx->uidlist_build_ctx); + + i_free(ctx); + return ret; +} + +int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *last_uid_r) +{ + if (trie->fd == -1) { + if (squat_trie_open(trie) < 0) + return -1; + } + + *last_uid_r = I_MAX((trie->root.next_uid+1)/2, 1) - 1; + return 0; +} + +static int +squat_trie_lookup_data(struct squat_trie *trie, const unsigned char *data, + unsigned int size, ARRAY_TYPE(seq_range) *uids) +{ + struct squat_node *node = &trie->root; + unsigned char *chars; + unsigned int idx; + int level = 0; + + array_clear(uids); + + for (;;) { + if (node->children_not_mapped) { + if (node_read_children(trie, node, level) < 0) + return -1; + } + if (node->leaf_string_length != 0) { + unsigned int len = node->leaf_string_length; + const unsigned char *str; + + if (len > sizeof(node->children.static_leaf_string)) + str = node->children.leaf_string; + else + str = node->children.static_leaf_string; + + if (size > len || memcmp(data, str, size) != 0) + return 0; + + /* match */ + break; + } + + if (size == 0) + break; + level++; + + if (node->have_sequential) { + if (*data < SEQUENTIAL_COUNT) { + idx = *data; + goto found; + } + idx = SEQUENTIAL_COUNT; + } else { + idx = 0; + } + chars = NODE_CHILDREN_CHARS(node); + for (; idx < node->child_count; idx++) { + if (chars[idx] == *data) + goto found; + } + return 0; + found: + /* follow to children */ + if (level == 1) { + /* root level, add all UIDs */ + if (squat_uidlist_get_seqrange(trie->uidlist, + node->uid_list_idx, + uids) < 0) + return -1; + } else { + if (squat_uidlist_filter(trie->uidlist, + node->uid_list_idx, uids) < 0) + return -1; + } + data++; + size--; + node = NODE_CHILDREN_NODES(node) + idx; + } + + if (squat_uidlist_filter(trie->uidlist, node->uid_list_idx, uids) < 0) + return -1; + return 1; +} + +static void +squat_trie_filter_type(enum squat_index_type type, + const ARRAY_TYPE(seq_range) *src, + ARRAY_TYPE(seq_range) *dest) +{ + const struct seq_range *src_range; + struct seq_range new_range; + unsigned int i, count, mask; + uint32_t next_seq, uid; + + array_clear(dest); + src_range = array_get(src, &count); + if (count == 0) + return; + + if ((type & SQUAT_INDEX_TYPE_HEADER) != 0 && + (type & SQUAT_INDEX_TYPE_BODY) != 0) { + /* everything is fine, just fix the UIDs */ + new_range.seq1 = src_range[0].seq1 / 2; + new_range.seq2 = src_range[0].seq2 / 2; + for (i = 1; i < count; i++) { + next_seq = src_range[i].seq1 / 2; + if (next_seq == new_range.seq2 + 1) { + /* we can continue the previous range */ + } else { + array_push_back(dest, &new_range); + new_range.seq1 = src_range[i].seq1 / 2; + } + new_range.seq2 = src_range[i].seq2 / 2; + } + array_push_back(dest, &new_range); + return; + } + + /* we'll have to drop either header or body UIDs */ + mask = (type & SQUAT_INDEX_TYPE_HEADER) != 0 ? 1 : 0; + for (i = 0; i < count; i++) { + for (uid = src_range[i].seq1; uid <= src_range[i].seq2; uid++) { + if ((uid & 1) == mask) + seq_range_array_add(dest, uid/2); + } + } +} + +struct squat_trie_lookup_context { + struct squat_trie *trie; + enum squat_index_type type; + + ARRAY_TYPE(seq_range) *definite_uids, *maybe_uids; + ARRAY_TYPE(seq_range) tmp_uids, tmp_uids2; + bool first; +}; + +static int +squat_trie_lookup_partial(struct squat_trie_lookup_context *ctx, + const unsigned char *data, uint8_t *char_lengths, + unsigned int size) +{ + const unsigned int partial_len = ctx->trie->hdr.partial_len; + unsigned int char_idx, max_chars, i, j, bytelen; + int ret; + + for (i = 0, max_chars = 0; i < size; max_chars++) + i += char_lengths[i]; + i_assert(max_chars > 0); + + i = 0; char_idx = 0; + do { + bytelen = 0; + for (j = 0; j < partial_len && i+bytelen < size; j++) + bytelen += char_lengths[i + bytelen]; + + ret = squat_trie_lookup_data(ctx->trie, data + i, bytelen, + &ctx->tmp_uids); + if (ret <= 0) { + array_clear(ctx->maybe_uids); + return ret; + } + + if (ctx->first) { + squat_trie_filter_type(ctx->type, &ctx->tmp_uids, + ctx->maybe_uids); + ctx->first = FALSE; + } else { + squat_trie_filter_type(ctx->type, &ctx->tmp_uids, + &ctx->tmp_uids2); + seq_range_array_intersect(ctx->maybe_uids, + &ctx->tmp_uids2); + } + i += char_lengths[i]; + char_idx++; + } while (max_chars - char_idx >= partial_len); + return 1; +} + +static void squat_trie_add_unknown(struct squat_trie *trie, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + struct seq_range *range, new_range; + unsigned int count; + uint32_t last_uid; + + last_uid = I_MAX((trie->root.next_uid+1)/2, 1) - 1; + + range = array_get_modifiable(maybe_uids, &count); + if (count > 0 && range[count-1].seq2 == last_uid) { + /* increase the range */ + range[count-1].seq2 = (uint32_t)-1; + } else { + new_range.seq1 = last_uid + 1; + new_range.seq2 = (uint32_t)-1; + array_push_back(maybe_uids, &new_range); + } +} + +static int +squat_trie_lookup_real(struct squat_trie *trie, const char *str, + enum squat_index_type type, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + struct squat_trie_lookup_context ctx; + unsigned char *data; + uint8_t *char_lengths; + unsigned int i, start, bytes, str_bytelen, str_charlen; + bool searched = FALSE; + int ret = 0; + + array_clear(definite_uids); + array_clear(maybe_uids); + + i_zero(&ctx); + ctx.trie = trie; + ctx.type = type; + ctx.definite_uids = definite_uids; + ctx.maybe_uids = maybe_uids; + i_array_init(&ctx.tmp_uids, 128); + i_array_init(&ctx.tmp_uids2, 128); + ctx.first = TRUE; + + str_bytelen = strlen(str); + char_lengths = str_bytelen == 0 ? NULL : t_malloc0(str_bytelen); + for (i = 0, str_charlen = 0; i < str_bytelen; str_charlen++) { + bytes = uni_utf8_char_bytes(str[i]); + char_lengths[i] = bytes; + i += bytes; + } + data = squat_data_normalize(trie, (const unsigned char *)str, + str_bytelen); + + for (i = start = 0; i < str_bytelen && ret >= 0; i += char_lengths[i]) { + if (data[i] != '\0') + continue; + + /* string has nonindexed characters. + search it in parts. */ + if (i != start) { + ret = squat_trie_lookup_partial(&ctx, data + start, + char_lengths + start, + i - start); + searched = TRUE; + } + start = i + char_lengths[i]; + } + + if (start == 0) { + if (str_charlen <= trie->hdr.partial_len || + trie->hdr.full_len > trie->hdr.partial_len) { + ret = squat_trie_lookup_data(trie, data, str_bytelen, + &ctx.tmp_uids); + if (ret > 0) { + squat_trie_filter_type(type, &ctx.tmp_uids, + definite_uids); + } + } else { + array_clear(definite_uids); + } + + if (str_charlen <= trie->hdr.partial_len || + trie->hdr.partial_len == 0) { + /* we have the result */ + array_clear(maybe_uids); + } else { + ret = squat_trie_lookup_partial(&ctx, data + start, + char_lengths + start, + i - start); + } + } else if (str_bytelen > 0) { + /* string has nonindexed characters. finish the search. */ + array_clear(definite_uids); + if (i != start && ret >= 0) { + ret = squat_trie_lookup_partial(&ctx, data + start, + char_lengths + start, + i - start); + } else if (!searched) { + /* string has only nonindexed chars, + list all root UIDs as maybes */ + ret = squat_uidlist_get_seqrange(trie->uidlist, + trie->root.uid_list_idx, + &ctx.tmp_uids); + squat_trie_filter_type(type, &ctx.tmp_uids, + maybe_uids); + } + } else { + /* zero string length - list all root UIDs as definite + answers */ +#if 0 /* FIXME: this code is never actually reached now. */ + ret = squat_uidlist_get_seqrange(trie->uidlist, + trie->root.uid_list_idx, + &ctx.tmp_uids); + squat_trie_filter_type(type, &ctx.tmp_uids, + definite_uids); +#else + i_unreached(); +#endif + } + seq_range_array_remove_seq_range(maybe_uids, definite_uids); + squat_trie_add_unknown(trie, maybe_uids); + array_free(&ctx.tmp_uids); + array_free(&ctx.tmp_uids2); + return ret < 0 ? -1 : 0; +} + +int squat_trie_lookup(struct squat_trie *trie, const char *str, + enum squat_index_type type, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + int ret; + + T_BEGIN { + ret = squat_trie_lookup_real(trie, str, type, + definite_uids, maybe_uids); + } T_END; + return ret; +} + +struct squat_uidlist *squat_trie_get_uidlist(struct squat_trie *trie) +{ + return trie->uidlist; +} + +size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r) +{ + *count_r = trie->hdr.node_count; + return trie->node_alloc_size; +} diff --git a/src/plugins/fts-squat/squat-trie.h b/src/plugins/fts-squat/squat-trie.h new file mode 100644 index 0000000..91530b8 --- /dev/null +++ b/src/plugins/fts-squat/squat-trie.h @@ -0,0 +1,54 @@ +#ifndef SQUAT_TRIE_H +#define SQUAT_TRIE_H + +#include "file-lock.h" +#include "seq-range-array.h" + +enum squat_index_flags { + SQUAT_INDEX_FLAG_MMAP_DISABLE = 0x01, + SQUAT_INDEX_FLAG_NFS_FLUSH = 0x02, + SQUAT_INDEX_FLAG_DOTLOCK_USE_EXCL = 0x04 +}; + +enum squat_index_type { + SQUAT_INDEX_TYPE_HEADER = 0x01, + SQUAT_INDEX_TYPE_BODY = 0x02 +}; + +struct squat_trie_build_context; + +struct squat_trie * +squat_trie_init(const char *path, uint32_t uidvalidity, + enum file_lock_method lock_method, + enum squat_index_flags flags, mode_t mode, gid_t gid); +void squat_trie_deinit(struct squat_trie **trie); + +void squat_trie_set_partial_len(struct squat_trie *trie, unsigned int len); +void squat_trie_set_full_len(struct squat_trie *trie, unsigned int len); + +int squat_trie_open(struct squat_trie *trie); +int squat_trie_refresh(struct squat_trie *trie); + +int squat_trie_build_init(struct squat_trie *trie, + struct squat_trie_build_context **ctx_r); +/* bodies must be added before headers */ +int squat_trie_build_more(struct squat_trie_build_context *ctx, + uint32_t uid, enum squat_index_type type, + const unsigned char *data, unsigned int size); +/* if expunged_uids is non-NULL, they may be removed from the index if they + still exist. */ +int squat_trie_build_deinit(struct squat_trie_build_context **ctx, + const ARRAY_TYPE(seq_range) *expunged_uids) + ATTR_NULL(2); + +int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *last_uid_r); +/* type specifies if we're looking at header, body or both */ +int squat_trie_lookup(struct squat_trie *trie, const char *str, + enum squat_index_type type, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids); + +struct squat_uidlist *squat_trie_get_uidlist(struct squat_trie *trie); +size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r); + +#endif diff --git a/src/plugins/fts-squat/squat-uidlist.c b/src/plugins/fts-squat/squat-uidlist.c new file mode 100644 index 0000000..facb8d0 --- /dev/null +++ b/src/plugins/fts-squat/squat-uidlist.c @@ -0,0 +1,1624 @@ +/* Copyright (c) 2007-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "sort.h" +#include "bsearch-insert-pos.h" +#include "file-cache.h" +#include "file-lock.h" +#include "read-full.h" +#include "write-full.h" +#include "ostream.h" +#include "mmap-util.h" +#include "squat-trie-private.h" +#include "squat-uidlist.h" + +#include <stdio.h> +#include <sys/stat.h> + +#define UIDLIST_LIST_SIZE 31 +#define UIDLIST_BLOCK_LIST_COUNT 100 +#define UID_LIST_MASK_RANGE 0x80000000U + +/* set = points to uidlist index number, unset = points to uidlist offset */ +#define UID_LIST_POINTER_MASK_LIST_IDX 0x80000000U + +#define UIDLIST_PACKED_FLAG_BITMASK 1 +#define UIDLIST_PACKED_FLAG_BEGINS_WITH_POINTER 2 + +struct uidlist_list { + unsigned int uid_count:31; + bool uid_begins_with_pointer:1; + uint32_t uid_list[UIDLIST_LIST_SIZE]; +}; + +struct squat_uidlist { + struct squat_trie *trie; + + char *path; + int fd; + struct file_cache *file_cache; + + struct file_lock *file_lock; + struct dotlock *dotlock; + uoff_t locked_file_size; + + void *mmap_base; + size_t mmap_size; + struct squat_uidlist_file_header hdr; + + const void *data; + size_t data_size; + + unsigned int cur_block_count; + const uint32_t *cur_block_offsets; + const uint32_t *cur_block_end_indexes; + + size_t max_size; + bool corrupted:1; + bool building:1; +}; + +struct squat_uidlist_build_context { + struct squat_uidlist *uidlist; + struct ostream *output; + + ARRAY_TYPE(uint32_t) block_offsets; + ARRAY_TYPE(uint32_t) block_end_indexes; + + ARRAY(struct uidlist_list) lists; + uint32_t list_start_idx; + + struct squat_uidlist_file_header build_hdr; + bool need_reopen:1; +}; + +struct squat_uidlist_rebuild_context { + struct squat_uidlist *uidlist; + struct squat_uidlist_build_context *build_ctx; + + int fd; + struct ostream *output; + + ARRAY_TYPE(uint32_t) new_block_offsets, new_block_end_indexes; + uoff_t cur_block_start_offset; + + uint32_t list_sizes[UIDLIST_BLOCK_LIST_COUNT]; + uint32_t next_uid_list_idx; + unsigned int list_idx; + unsigned int new_count; +}; + +static void squat_uidlist_close(struct squat_uidlist *uidlist); + +void squat_uidlist_delete(struct squat_uidlist *uidlist) +{ + i_unlink_if_exists(uidlist->path); +} + +static void squat_uidlist_set_corrupted(struct squat_uidlist *uidlist, + const char *reason) +{ + if (uidlist->corrupted) + return; + uidlist->corrupted = TRUE; + + i_error("Corrupted squat uidlist file %s: %s", uidlist->path, reason); + squat_trie_delete(uidlist->trie); +} + +static int +uidlist_write_array(struct ostream *output, const uint32_t *uid_list, + unsigned int uid_count, uint32_t packed_flags, + uint32_t offset, bool write_size, uint32_t *size_r) +{ + uint8_t *uidbuf, *bufp, sizebuf[SQUAT_PACK_MAX_SIZE], *sizebufp; + uint8_t listbuf[SQUAT_PACK_MAX_SIZE], *listbufp = listbuf; + uint32_t uid, uid2, prev, base_uid, size_value; + unsigned int i, bitmask_len, uid_list_len; + unsigned int idx, max_idx, mask; + bool datastack; + int num; + + if ((packed_flags & UIDLIST_PACKED_FLAG_BEGINS_WITH_POINTER) != 0) + squat_pack_num(&listbufp, offset); + + /* @UNSAFE */ + base_uid = uid_list[0] & ~UID_LIST_MASK_RANGE; + datastack = uid_count < 1024*8/SQUAT_PACK_MAX_SIZE; + if (datastack) + uidbuf = t_malloc_no0(SQUAT_PACK_MAX_SIZE * uid_count); + else + uidbuf = i_malloc(SQUAT_PACK_MAX_SIZE * uid_count); + bufp = uidbuf; + squat_pack_num(&bufp, base_uid); + + bitmask_len = (uid_list[uid_count-1] - base_uid + 7) / 8 + + (bufp - uidbuf); + if (bitmask_len < uid_count) { + bitmask_build: + i_assert(bitmask_len < SQUAT_PACK_MAX_SIZE*uid_count); + + memset(bufp, 0, bitmask_len - (bufp - uidbuf)); + if ((uid_list[0] & UID_LIST_MASK_RANGE) == 0) { + i = 1; + uid = i == uid_count ? 0 : uid_list[i]; + } else { + i = 0; + uid = uid_list[0] + 1; + } + base_uid++; + + for (; i < uid_count; i++) { + i_assert((uid & ~UID_LIST_MASK_RANGE) >= base_uid); + if ((uid & UID_LIST_MASK_RANGE) == 0) { + uid -= base_uid; + uid2 = uid; + } else { + uid &= ~UID_LIST_MASK_RANGE; + uid -= base_uid; + uid2 = uid_list[i+1] - base_uid; + i++; + } + + if (uid2 - uid < 3*8) { + for (; uid <= uid2; uid++) + bufp[uid / 8] |= 1 << (uid % 8); + } else { + /* first byte */ + idx = uid / 8; + num = uid % 8; + if (num != 0) { + uid += 8 - num; + for (mask = 0; num < 8; num++) + mask |= 1 << num; + bufp[idx++] |= mask; + } + + /* middle bytes */ + num = uid2 % 8; + max_idx = idx + (uid2 - num - uid)/8; + for (; idx < max_idx; idx++, uid += 8) + bufp[idx] = 0xff; + + /* last byte */ + for (mask = 0; num >= 0; num--) + mask |= 1 << num; + bufp[idx] |= mask; + } + uid = i+1 == uid_count ? 0 : uid_list[i+1]; + } + uid_list_len = bitmask_len; + packed_flags |= UIDLIST_PACKED_FLAG_BITMASK; + } else { + bufp = uidbuf; + prev = 0; + for (i = 0; i < uid_count; i++) { + uid = uid_list[i]; + if (unlikely((uid & ~UID_LIST_MASK_RANGE) < prev)) { + if (!datastack) + i_free(uidbuf); + return -1; + } + if ((uid & UID_LIST_MASK_RANGE) == 0) { + squat_pack_num(&bufp, (uid - prev) << 1); + prev = uid + 1; + } else { + uid &= ~UID_LIST_MASK_RANGE; + squat_pack_num(&bufp, 1 | (uid - prev) << 1); + squat_pack_num(&bufp, uid_list[i+1] - uid - 1); + prev = uid_list[i+1] + 1; + i++; + } + } + uid_list_len = bufp - uidbuf; + if (uid_list_len > bitmask_len) { + bufp = uidbuf; + squat_pack_num(&bufp, base_uid); + goto bitmask_build; + } + } + + size_value = ((uid_list_len + + (listbufp - listbuf)) << 2) | packed_flags; + if (write_size) { + sizebufp = sizebuf; + squat_pack_num(&sizebufp, size_value); + o_stream_nsend(output, sizebuf, sizebufp - sizebuf); + } + o_stream_nsend(output, listbuf, listbufp - listbuf); + o_stream_nsend(output, uidbuf, uid_list_len); + if (!datastack) + i_free(uidbuf); + + *size_r = size_value; + return 0; +} + +static int +uidlist_write(struct ostream *output, const struct uidlist_list *list, + bool write_size, uint32_t *size_r) +{ + const uint32_t *uid_list = list->uid_list; + uint8_t buf[SQUAT_PACK_MAX_SIZE], *bufp; + uint32_t uid_count = list->uid_count; + uint32_t packed_flags = 0; + uint32_t offset = 0; + int ret; + + if (list->uid_begins_with_pointer) { + /* continued UID list */ + packed_flags |= UIDLIST_PACKED_FLAG_BEGINS_WITH_POINTER; + if ((uid_list[0] & UID_LIST_POINTER_MASK_LIST_IDX) != 0) { + offset = ((uid_list[0] & ~UID_LIST_POINTER_MASK_LIST_IDX) << 1) | 1; + if (list->uid_count == 1) { + bufp = buf; + squat_pack_num(&bufp, offset); + o_stream_nsend(output, buf, bufp - buf); + *size_r = (bufp - buf) << 2 | packed_flags; + return 0; + } + } else if (unlikely(output->offset <= uid_list[0])) { + i_assert(output->closed); + return -1; + } else { + i_assert(list->uid_count > 1); + offset = (output->offset - uid_list[0]) << 1; + } + uid_list++; + uid_count--; + } + + T_BEGIN { + ret = uidlist_write_array(output, uid_list, uid_count, + packed_flags, offset, + write_size, size_r); + } T_END; + return ret; +} + +static void squat_uidlist_map_blocks_set_pointers(struct squat_uidlist *uidlist) +{ + const void *base; + size_t end_index_size, end_size; + + base = CONST_PTR_OFFSET(uidlist->data, uidlist->hdr.block_list_offset + + sizeof(uint32_t)); + + end_index_size = uidlist->cur_block_count * sizeof(uint32_t); + end_size = end_index_size + uidlist->cur_block_count * sizeof(uint32_t); + if (end_size <= uidlist->data_size) { + uidlist->cur_block_end_indexes = base; + uidlist->cur_block_offsets = + CONST_PTR_OFFSET(base, end_index_size); + } else { + uidlist->cur_block_end_indexes = NULL; + uidlist->cur_block_offsets = NULL; + } +} + +static int uidlist_file_cache_read(struct squat_uidlist *uidlist, + size_t offset, size_t size) +{ + if (uidlist->file_cache == NULL) + return 0; + + if (file_cache_read(uidlist->file_cache, offset, size) < 0) { + i_error("read(%s) failed: %m", uidlist->path); + return -1; + } + uidlist->data = file_cache_get_map(uidlist->file_cache, + &uidlist->data_size); + squat_uidlist_map_blocks_set_pointers(uidlist); + return 0; +} + +static int squat_uidlist_map_blocks(struct squat_uidlist *uidlist) +{ + const struct squat_uidlist_file_header *hdr = &uidlist->hdr; + const void *base; + uint32_t block_count, blocks_offset, blocks_size, i, verify_count; + + if (hdr->block_list_offset == 0) { + /* empty file */ + uidlist->cur_block_count = 0; + return 1; + } + + /* get number of blocks */ + if (uidlist_file_cache_read(uidlist, hdr->block_list_offset, + sizeof(block_count)) < 0) + return -1; + blocks_offset = hdr->block_list_offset + sizeof(block_count); + if (blocks_offset > uidlist->data_size) { + squat_uidlist_set_corrupted(uidlist, "block list outside file"); + return 0; + } + + i_assert(uidlist->data != NULL); + base = CONST_PTR_OFFSET(uidlist->data, hdr->block_list_offset); + memcpy(&block_count, base, sizeof(block_count)); + + /* map the blocks */ + blocks_size = block_count * sizeof(uint32_t)*2; + if (uidlist_file_cache_read(uidlist, blocks_offset, blocks_size) < 0) + return -1; + if (blocks_offset + blocks_size > uidlist->data_size) { + squat_uidlist_set_corrupted(uidlist, "block list outside file"); + return 0; + } + + uidlist->cur_block_count = block_count; + squat_uidlist_map_blocks_set_pointers(uidlist); + + i_assert(uidlist->cur_block_end_indexes != NULL); + + /* verify just a couple of the end indexes to make sure they + look correct */ + verify_count = I_MIN(block_count, 8); + for (i = 1; i < verify_count; i++) { + if (unlikely(uidlist->cur_block_end_indexes[i-1] >= + uidlist->cur_block_end_indexes[i])) { + squat_uidlist_set_corrupted(uidlist, + "block list corrupted"); + return 0; + } + } + return 1; +} + +static int squat_uidlist_map_header(struct squat_uidlist *uidlist) +{ + if (uidlist->hdr.indexid == 0) { + /* still being built */ + return 1; + } + if (uidlist->hdr.indexid != uidlist->trie->hdr.indexid) { + /* see if trie was recreated */ + (void)squat_trie_open(uidlist->trie); + } + if (uidlist->hdr.indexid != uidlist->trie->hdr.indexid) { + squat_uidlist_set_corrupted(uidlist, "wrong indexid"); + return 0; + } + if (uidlist->hdr.used_file_size < sizeof(uidlist->hdr) || + (uidlist->hdr.used_file_size > uidlist->mmap_size && + uidlist->mmap_base != NULL)) { + squat_uidlist_set_corrupted(uidlist, "broken used_file_size"); + return 0; + } + return squat_uidlist_map_blocks(uidlist); +} + +static void squat_uidlist_unmap(struct squat_uidlist *uidlist) +{ + if (uidlist->mmap_size != 0) { + if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0) + i_error("munmap(%s) failed: %m", uidlist->path); + uidlist->mmap_base = NULL; + uidlist->mmap_size = 0; + } + uidlist->cur_block_count = 0; + uidlist->cur_block_end_indexes = NULL; + uidlist->cur_block_offsets = NULL; +} + +static int squat_uidlist_mmap(struct squat_uidlist *uidlist) +{ + struct stat st; + + if (fstat(uidlist->fd, &st) < 0) { + i_error("fstat(%s) failed: %m", uidlist->path); + return -1; + } + if (st.st_size < (off_t)sizeof(uidlist->hdr)) { + squat_uidlist_set_corrupted(uidlist, "File too small"); + return -1; + } + + squat_uidlist_unmap(uidlist); + uidlist->mmap_size = st.st_size; + uidlist->mmap_base = mmap(NULL, uidlist->mmap_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, uidlist->fd, 0); + if (uidlist->mmap_base == MAP_FAILED) { + uidlist->data = uidlist->mmap_base = NULL; + uidlist->data_size = uidlist->mmap_size = 0; + i_error("mmap(%s) failed: %m", uidlist->path); + return -1; + } + uidlist->data = uidlist->mmap_base; + uidlist->data_size = uidlist->mmap_size; + return 0; +} + +static int squat_uidlist_map(struct squat_uidlist *uidlist) +{ + const struct squat_uidlist_file_header *mmap_hdr = uidlist->mmap_base; + int ret; + + if (mmap_hdr != NULL && !uidlist->building && + uidlist->hdr.block_list_offset == mmap_hdr->block_list_offset) { + /* file hasn't changed */ + return 1; + } + + if ((uidlist->trie->flags & SQUAT_INDEX_FLAG_MMAP_DISABLE) == 0) { + if (mmap_hdr == NULL || uidlist->building || + uidlist->mmap_size < mmap_hdr->used_file_size) { + if (squat_uidlist_mmap(uidlist) < 0) + return -1; + } + + if (!uidlist->building) { + memcpy(&uidlist->hdr, uidlist->mmap_base, + sizeof(uidlist->hdr)); + } + } else if (uidlist->building) { + /* we want to update blocks mapping, but using the header + in memory */ + } else { + ret = pread_full(uidlist->fd, &uidlist->hdr, + sizeof(uidlist->hdr), 0); + if (ret <= 0) { + if (ret < 0) { + i_error("pread(%s) failed: %m", uidlist->path); + return -1; + } + i_error("Corrupted %s: File too small", uidlist->path); + return 0; + } + uidlist->data = NULL; + uidlist->data_size = 0; + } + if (uidlist->file_cache == NULL && + (uidlist->trie->flags & SQUAT_INDEX_FLAG_MMAP_DISABLE) != 0) + uidlist->file_cache = file_cache_new_path(uidlist->fd, uidlist->path); + return squat_uidlist_map_header(uidlist); +} + +static int squat_uidlist_read_to_memory(struct squat_uidlist *uidlist) +{ + size_t i, page_size = mmap_get_page_size(); + + if (uidlist->file_cache != NULL) { + return uidlist_file_cache_read(uidlist, 0, + uidlist->hdr.used_file_size); + } + /* Tell the kernel we're going to use the uidlist data, so it loads + it into memory and keeps it there. */ + (void)madvise(uidlist->mmap_base, uidlist->mmap_size, MADV_WILLNEED); + /* It also speeds up a bit for us to sequentially load everything + into memory, although at least Linux catches up quite fast even + without this code. Compiler can quite easily optimize away this + entire for loop, but volatile seems to help with gcc 4.2. */ + for (i = 0; i < uidlist->mmap_size; i += page_size) + ((const volatile char *)uidlist->data)[i]; + return 0; +} + +static void squat_uidlist_free_from_memory(struct squat_uidlist *uidlist) +{ + size_t page_size = mmap_get_page_size(); + + if (uidlist->file_cache != NULL) { + file_cache_invalidate(uidlist->file_cache, + page_size, UOFF_T_MAX); + } else { + (void)madvise(uidlist->mmap_base, uidlist->mmap_size, + MADV_DONTNEED); + } +} + +struct squat_uidlist *squat_uidlist_init(struct squat_trie *trie) +{ + struct squat_uidlist *uidlist; + + uidlist = i_new(struct squat_uidlist, 1); + uidlist->trie = trie; + uidlist->path = i_strconcat(trie->path, ".uids", NULL); + uidlist->fd = -1; + + return uidlist; +} + +void squat_uidlist_deinit(struct squat_uidlist *uidlist) +{ + squat_uidlist_close(uidlist); + + i_free(uidlist->path); + i_free(uidlist); +} + +static int squat_uidlist_open(struct squat_uidlist *uidlist) +{ + squat_uidlist_close(uidlist); + + uidlist->fd = open(uidlist->path, O_RDWR); + if (uidlist->fd == -1) { + if (errno == ENOENT) { + i_zero(&uidlist->hdr); + return 0; + } + i_error("open(%s) failed: %m", uidlist->path); + return -1; + } + return squat_uidlist_map(uidlist) <= 0 ? -1 : 0; +} + +static void squat_uidlist_close(struct squat_uidlist *uidlist) +{ + i_assert(!uidlist->building); + + squat_uidlist_unmap(uidlist); + if (uidlist->file_cache != NULL) + file_cache_free(&uidlist->file_cache); + file_lock_free(&uidlist->file_lock); + if (uidlist->dotlock != NULL) + file_dotlock_delete(&uidlist->dotlock); + i_close_fd_path(&uidlist->fd, uidlist->path); + uidlist->corrupted = FALSE; +} + +int squat_uidlist_refresh(struct squat_uidlist *uidlist) +{ + /* we assume here that trie is locked, so that we don't need to worry + about it when reading the header */ + if (uidlist->fd == -1 || + uidlist->hdr.indexid != uidlist->trie->hdr.indexid) { + if (squat_uidlist_open(uidlist) < 0) + return -1; + } else { + if (squat_uidlist_map(uidlist) <= 0) + return -1; + } + return 0; +} + +static int squat_uidlist_is_file_stale(struct squat_uidlist *uidlist) +{ + struct stat st, st2; + + i_assert(uidlist->fd != -1); + + if (stat(uidlist->path, &st) < 0) { + if (errno == ENOENT) + return 1; + + i_error("stat(%s) failed: %m", uidlist->path); + return -1; + } + if (fstat(uidlist->fd, &st2) < 0) { + i_error("fstat(%s) failed: %m", uidlist->path); + return -1; + } + uidlist->locked_file_size = st2.st_size; + + return st.st_ino == st2.st_ino && + CMP_DEV_T(st.st_dev, st2.st_dev) ? 0 : 1; +} + +static int squat_uidlist_lock(struct squat_uidlist *uidlist) +{ + const char *error; + int ret; + + for (;;) { + i_assert(uidlist->fd != -1); + i_assert(uidlist->file_lock == NULL); + i_assert(uidlist->dotlock == NULL); + + if (uidlist->trie->lock_method != FILE_LOCK_METHOD_DOTLOCK) { + struct file_lock_settings lock_set = { + .lock_method = uidlist->trie->lock_method, + }; + ret = file_wait_lock(uidlist->fd, uidlist->path, + F_WRLCK, &lock_set, + SQUAT_TRIE_LOCK_TIMEOUT, + &uidlist->file_lock, &error); + if (ret < 0) { + i_error("squat uidlist %s: %s", + uidlist->path, error); + } + } else { + ret = file_dotlock_create(&uidlist->trie->dotlock_set, + uidlist->path, 0, + &uidlist->dotlock); + } + if (ret == 0) { + i_error("squat uidlist %s: Locking timed out", + uidlist->path); + return 0; + } + if (ret < 0) + return -1; + + ret = squat_uidlist_is_file_stale(uidlist); + if (ret == 0) + break; + + if (uidlist->file_lock != NULL) + file_unlock(&uidlist->file_lock); + else + file_dotlock_delete(&uidlist->dotlock); + if (ret < 0) + return -1; + + squat_uidlist_close(uidlist); + uidlist->fd = squat_trie_create_fd(uidlist->trie, + uidlist->path, 0); + if (uidlist->fd == -1) + return -1; + } + return 1; +} + +static int squat_uidlist_open_or_create(struct squat_uidlist *uidlist) +{ + int ret; + + if (uidlist->fd == -1) { + uidlist->fd = squat_trie_create_fd(uidlist->trie, + uidlist->path, 0); + if (uidlist->fd == -1) + return -1; + } + if (squat_uidlist_lock(uidlist) <= 0) + return -1; + + if (uidlist->locked_file_size != 0) { + if ((ret = squat_uidlist_map(uidlist)) < 0) + return -1; + if (ret == 0) { + /* broken file, truncate */ + if (ftruncate(uidlist->fd, 0) < 0) { + i_error("ftruncate(%s) failed: %m", + uidlist->path); + return -1; + } + uidlist->locked_file_size = 0; + } + } + if (uidlist->locked_file_size == 0) { + /* write using 0 until we're finished */ + i_zero(&uidlist->hdr); + if (write_full(uidlist->fd, &uidlist->hdr, + sizeof(uidlist->hdr)) < 0) { + i_error("write(%s) failed: %m", uidlist->path); + return -1; + } + } + return 0; +} + +int squat_uidlist_build_init(struct squat_uidlist *uidlist, + struct squat_uidlist_build_context **ctx_r) +{ + struct squat_uidlist_build_context *ctx; + int ret; + + i_assert(!uidlist->building); + + ret = squat_uidlist_open_or_create(uidlist); + if (ret == 0 && + lseek(uidlist->fd, uidlist->hdr.used_file_size, SEEK_SET) < 0) { + i_error("lseek(%s) failed: %m", uidlist->path); + ret = -1; + } + + if (ret < 0) { + if (uidlist->file_lock != NULL) + file_unlock(&uidlist->file_lock); + if (uidlist->dotlock != NULL) + file_dotlock_delete(&uidlist->dotlock); + return -1; + } + + ctx = i_new(struct squat_uidlist_build_context, 1); + ctx->uidlist = uidlist; + ctx->output = o_stream_create_fd(uidlist->fd, 0); + if (ctx->output->offset == 0) { + struct squat_uidlist_file_header hdr; + + i_zero(&hdr); + o_stream_nsend(ctx->output, &hdr, sizeof(hdr)); + } + o_stream_cork(ctx->output); + i_array_init(&ctx->lists, 10240); + i_array_init(&ctx->block_offsets, 128); + i_array_init(&ctx->block_end_indexes, 128); + ctx->list_start_idx = uidlist->hdr.count; + ctx->build_hdr = uidlist->hdr; + + uidlist->building = TRUE; + *ctx_r = ctx; + return 0; +} + +static void +uidlist_write_block_list_and_header(struct squat_uidlist_build_context *ctx, + struct ostream *output, + ARRAY_TYPE(uint32_t) *block_offsets, + ARRAY_TYPE(uint32_t) *block_end_indexes, + bool write_old_blocks) +{ + struct squat_uidlist *uidlist = ctx->uidlist; + unsigned int align, old_block_count, new_block_count; + uint32_t block_offset_count; + uoff_t block_list_offset; + + i_assert(uidlist->trie->hdr.indexid != 0); + ctx->build_hdr.indexid = uidlist->trie->hdr.indexid; + + if (array_count(block_end_indexes) == 0) { + ctx->build_hdr.used_file_size = output->offset; + ctx->build_hdr.block_list_offset = 0; + uidlist->hdr = ctx->build_hdr; + return; + } + + align = output->offset % sizeof(uint32_t); + if (align != 0) { + static char null[sizeof(uint32_t)-1] = { 0, }; + + o_stream_nsend(output, null, sizeof(uint32_t) - align); + } + block_list_offset = output->offset; + + new_block_count = array_count(block_offsets); + old_block_count = write_old_blocks ? uidlist->cur_block_count : 0; + + block_offset_count = new_block_count + old_block_count; + o_stream_nsend(output, &block_offset_count, sizeof(block_offset_count)); + /* write end indexes */ + o_stream_nsend(output, uidlist->cur_block_end_indexes, + old_block_count * sizeof(uint32_t)); + o_stream_nsend(output, array_front(block_end_indexes), + new_block_count * sizeof(uint32_t)); + /* write offsets */ + o_stream_nsend(output, uidlist->cur_block_offsets, + old_block_count * sizeof(uint32_t)); + o_stream_nsend(output, array_front(block_offsets), + new_block_count * sizeof(uint32_t)); + (void)o_stream_flush(output); + + /* update header - it's written later when trie is locked */ + ctx->build_hdr.block_list_offset = block_list_offset; + ctx->build_hdr.used_file_size = output->offset; + uidlist->hdr = ctx->build_hdr; +} + +void squat_uidlist_build_flush(struct squat_uidlist_build_context *ctx) +{ + struct uidlist_list *lists; + uint8_t buf[SQUAT_PACK_MAX_SIZE], *bufp; + unsigned int i, j, count, max; + uint32_t block_offset, block_end_idx, start_offset; + uint32_t list_sizes[UIDLIST_BLOCK_LIST_COUNT]; + size_t mem_size; + + if (ctx->uidlist->corrupted) + return; + + lists = array_get_modifiable(&ctx->lists, &count); + if (count == 0) + return; + + /* write the lists and save the written sizes to uid_list[0] */ + for (i = 0; i < count; i += UIDLIST_BLOCK_LIST_COUNT) { + start_offset = ctx->output->offset; + max = I_MIN(count - i, UIDLIST_BLOCK_LIST_COUNT); + for (j = 0; j < max; j++) { + if (uidlist_write(ctx->output, &lists[i+j], + FALSE, &list_sizes[j]) < 0) { + squat_uidlist_set_corrupted(ctx->uidlist, + "Broken uidlists"); + return; + } + } + + block_offset = ctx->output->offset; + block_end_idx = ctx->list_start_idx + i + max; + array_push_back(&ctx->block_offsets, &block_offset); + array_push_back(&ctx->block_end_indexes, &block_end_idx); + + /* write the full size of the uidlists */ + bufp = buf; + squat_pack_num(&bufp, block_offset - start_offset); + o_stream_nsend(ctx->output, buf, bufp - buf); + + /* write the sizes/flags */ + for (j = 0; j < max; j++) { + bufp = buf; + squat_pack_num(&bufp, list_sizes[j]); + o_stream_nsend(ctx->output, buf, bufp - buf); + } + } + + mem_size = ctx->lists.arr.buffer->used + + ctx->block_offsets.arr.buffer->used + + ctx->block_end_indexes.arr.buffer->used; + if (ctx->uidlist->max_size < mem_size) + ctx->uidlist->max_size = mem_size; + + ctx->list_start_idx += count; + array_clear(&ctx->lists); + + uidlist_write_block_list_and_header(ctx, ctx->output, + &ctx->block_offsets, + &ctx->block_end_indexes, TRUE); + + (void)squat_uidlist_map(ctx->uidlist); + + array_clear(&ctx->block_offsets); + array_clear(&ctx->block_end_indexes); +} + +int squat_uidlist_build_finish(struct squat_uidlist_build_context *ctx) +{ + if (ctx->uidlist->corrupted) + return -1; + + if (!ctx->output->closed) { + (void)o_stream_seek(ctx->output, 0); + o_stream_nsend(ctx->output, + &ctx->build_hdr, sizeof(ctx->build_hdr)); + (void)o_stream_seek(ctx->output, ctx->build_hdr.used_file_size); + } + + if (o_stream_finish(ctx->output) < 0) { + i_error("write() to %s failed: %s", ctx->uidlist->path, + o_stream_get_error(ctx->output)); + return -1; + } + return 0; +} + +void squat_uidlist_build_deinit(struct squat_uidlist_build_context **_ctx) +{ + struct squat_uidlist_build_context *ctx = *_ctx; + + *_ctx = NULL; + + i_assert(array_count(&ctx->lists) == 0 || ctx->uidlist->corrupted); + i_assert(ctx->uidlist->building); + ctx->uidlist->building = FALSE; + + if (ctx->uidlist->file_lock != NULL) + file_unlock(&ctx->uidlist->file_lock); + else + file_dotlock_delete(&ctx->uidlist->dotlock); + + if (ctx->need_reopen) + (void)squat_uidlist_open(ctx->uidlist); + + array_free(&ctx->block_offsets); + array_free(&ctx->block_end_indexes); + array_free(&ctx->lists); + o_stream_ignore_last_errors(ctx->output); + o_stream_unref(&ctx->output); + i_free(ctx); +} + +int squat_uidlist_rebuild_init(struct squat_uidlist_build_context *build_ctx, + bool compress, + struct squat_uidlist_rebuild_context **ctx_r) +{ + struct squat_uidlist_rebuild_context *ctx; + struct squat_uidlist_file_header hdr; + const char *temp_path; + int fd; + + if (build_ctx->build_hdr.link_count == 0) + return 0; + + if (!compress) { + if (build_ctx->build_hdr.link_count < + build_ctx->build_hdr.count*2/3) + return 0; + } + + /* make sure the entire uidlist is in memory before beginning, + otherwise the pages are faulted to memory in random order which + takes forever. */ + if (squat_uidlist_read_to_memory(build_ctx->uidlist) < 0) + return -1; + + temp_path = t_strconcat(build_ctx->uidlist->path, ".tmp", NULL); + fd = squat_trie_create_fd(build_ctx->uidlist->trie, temp_path, O_TRUNC); + if (fd == -1) + return -1; + + ctx = i_new(struct squat_uidlist_rebuild_context, 1); + ctx->uidlist = build_ctx->uidlist; + ctx->build_ctx = build_ctx; + ctx->fd = fd; + ctx->output = o_stream_create_fd(ctx->fd, 0); + ctx->next_uid_list_idx = 0x100; + o_stream_cork(ctx->output); + + i_zero(&hdr); + o_stream_nsend(ctx->output, &hdr, sizeof(hdr)); + + ctx->cur_block_start_offset = ctx->output->offset; + i_array_init(&ctx->new_block_offsets, + build_ctx->build_hdr.count / UIDLIST_BLOCK_LIST_COUNT); + i_array_init(&ctx->new_block_end_indexes, + build_ctx->build_hdr.count / UIDLIST_BLOCK_LIST_COUNT); + *ctx_r = ctx; + return 1; +} + +static void +uidlist_rebuild_flush_block(struct squat_uidlist_rebuild_context *ctx) +{ + uint8_t buf[SQUAT_PACK_MAX_SIZE], *bufp; + uint32_t block_offset, block_end_idx; + unsigned int i; + + ctx->new_count += ctx->list_idx; + + block_offset = ctx->output->offset; + block_end_idx = ctx->new_count; + array_push_back(&ctx->new_block_offsets, &block_offset); + array_push_back(&ctx->new_block_end_indexes, &block_end_idx); + + /* this block's contents started from cur_block_start_offset and + ended to current offset. write the size of this area. */ + bufp = buf; + squat_pack_num(&bufp, block_offset - ctx->cur_block_start_offset); + o_stream_nsend(ctx->output, buf, bufp - buf); + + /* write the sizes/flags */ + for (i = 0; i < ctx->list_idx; i++) { + bufp = buf; + squat_pack_num(&bufp, ctx->list_sizes[i]); + o_stream_nsend(ctx->output, buf, bufp - buf); + } + ctx->cur_block_start_offset = ctx->output->offset; +} + +uint32_t squat_uidlist_rebuild_next(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(uint32_t) *uids) +{ + int ret; + + T_BEGIN { + ret = uidlist_write_array(ctx->output, array_front(uids), + array_count(uids), 0, 0, FALSE, + &ctx->list_sizes[ctx->list_idx]); + } T_END; + if (ret < 0) + squat_uidlist_set_corrupted(ctx->uidlist, "Broken uidlists"); + + if (++ctx->list_idx == UIDLIST_BLOCK_LIST_COUNT) { + uidlist_rebuild_flush_block(ctx); + ctx->list_idx = 0; + } + return ctx->next_uid_list_idx++ << 1; +} + +uint32_t squat_uidlist_rebuild_nextu(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(seq_range) *uids) +{ + const struct seq_range *range; + ARRAY_TYPE(uint32_t) tmp_uids; + uint32_t seq, uid1, ret; + unsigned int i, count; + + range = array_get(uids, &count); + if (count == 0) + return 0; + + if (range[count-1].seq2 < 8) { + /* we can use a singleton bitmask */ + ret = 0; + for (i = 0; i < count; i++) { + for (seq = range[i].seq1; seq <= range[i].seq2; seq++) + ret |= 1 << (seq+1); + } + return ret; + } + if (count == 1 && range[0].seq1 == range[0].seq2) { + /* single UID */ + return (range[0].seq1 << 1) | 1; + } + + /* convert seq range to our internal representation and use the + normal _rebuild_next() to write it */ + i_array_init(&tmp_uids, 128); + for (i = 0; i < count; i++) { + if (range[i].seq1 == range[i].seq2) + array_push_back(&tmp_uids, &range[i].seq1); + else { + uid1 = range[i].seq1 | UID_LIST_MASK_RANGE; + array_push_back(&tmp_uids, &uid1); + array_push_back(&tmp_uids, &range[i].seq2); + } + } + ret = squat_uidlist_rebuild_next(ctx, &tmp_uids); + array_free(&tmp_uids); + return ret; +} + +int squat_uidlist_rebuild_finish(struct squat_uidlist_rebuild_context *ctx, + bool cancel) +{ + const char *temp_path; + int ret = 1; + + if (ctx->list_idx != 0) + uidlist_rebuild_flush_block(ctx); + if (cancel || ctx->uidlist->corrupted) + ret = 0; + + temp_path = t_strconcat(ctx->uidlist->path, ".tmp", NULL); + if (ret > 0) { + ctx->build_ctx->build_hdr.indexid = + ctx->uidlist->trie->hdr.indexid; + ctx->build_ctx->build_hdr.count = ctx->new_count; + ctx->build_ctx->build_hdr.link_count = 0; + uidlist_write_block_list_and_header(ctx->build_ctx, ctx->output, + &ctx->new_block_offsets, + &ctx->new_block_end_indexes, + FALSE); + (void)o_stream_seek(ctx->output, 0); + o_stream_nsend(ctx->output, &ctx->build_ctx->build_hdr, + sizeof(ctx->build_ctx->build_hdr)); + (void)o_stream_seek(ctx->output, + ctx->build_ctx->build_hdr.used_file_size); + + if (ctx->uidlist->corrupted) + ret = -1; + else if (o_stream_finish(ctx->output) < 0) { + i_error("write(%s) failed: %s", temp_path, + o_stream_get_error(ctx->output)); + ret = -1; + } else if (rename(temp_path, ctx->uidlist->path) < 0) { + i_error("rename(%s, %s) failed: %m", + temp_path, ctx->uidlist->path); + ret = -1; + } + ctx->build_ctx->need_reopen = TRUE; + } else { + o_stream_abort(ctx->output); + } + + /* we no longer require the entire uidlist to be in memory, + let it be used for something more useful. */ + squat_uidlist_free_from_memory(ctx->uidlist); + + o_stream_unref(&ctx->output); + if (close(ctx->fd) < 0) + i_error("close(%s) failed: %m", temp_path); + + if (ret <= 0) + i_unlink(temp_path); + array_free(&ctx->new_block_offsets); + array_free(&ctx->new_block_end_indexes); + i_free(ctx); + return ret < 0 ? -1 : 0; +} + +static void +uidlist_flush(struct squat_uidlist_build_context *ctx, + struct uidlist_list *list, uint32_t uid) +{ + uint32_t size, offset = ctx->output->offset; + + ctx->build_hdr.link_count++; + if (uidlist_write(ctx->output, list, TRUE, &size) < 0) + squat_uidlist_set_corrupted(ctx->uidlist, "Broken uidlists"); + + list->uid_count = 2; + list->uid_begins_with_pointer = TRUE; + + list->uid_list[0] = offset; + list->uid_list[1] = uid; +} + +static struct uidlist_list * +uidlist_add_new(struct squat_uidlist_build_context *ctx, unsigned int count, + uint32_t *uid_list_idx_r) +{ + struct uidlist_list *list; + + i_assert(array_count(&ctx->lists) + + ctx->list_start_idx == ctx->build_hdr.count); + *uid_list_idx_r = (ctx->build_hdr.count + 0x100) << 1; + list = array_append_space(&ctx->lists); + ctx->build_hdr.count++; + + list->uid_count = count; + return list; +} + +uint32_t squat_uidlist_build_add_uid(struct squat_uidlist_build_context *ctx, + uint32_t uid_list_idx, uint32_t uid) +{ + struct uidlist_list *list; + unsigned int idx, mask; + uint32_t *p; + + if ((uid_list_idx & 1) != 0) { + /* adding second UID */ + uint32_t prev_uid = uid_list_idx >> 1; + + i_assert(prev_uid != uid); + list = uidlist_add_new(ctx, 2, &uid_list_idx); + list->uid_list[0] = prev_uid; + if (prev_uid + 1 == uid) + list->uid_list[0] |= UID_LIST_MASK_RANGE; + list->uid_list[1] = uid; + return uid_list_idx; + } else if (uid_list_idx < (0x100 << 1)) { + uint32_t old_list_idx; + + if (uid < 8) { + /* UID lists containing only UIDs 0-7 are saved as + uidlist values 2..511. think of it as a bitmask. */ + uid_list_idx |= 1 << (uid + 1); + i_assert((uid_list_idx & 1) == 0); + return uid_list_idx; + } + + if (uid_list_idx == 0) { + /* first UID */ + return (uid << 1) | 1; + } + + /* create a new list */ + old_list_idx = uid_list_idx >> 1; + list = uidlist_add_new(ctx, 1, &uid_list_idx); + /* add the first UID ourself */ + idx = 0; + i_assert((old_list_idx & 0xff) != 0); + for (mask = 1; mask <= 128; mask <<= 1, idx++) { + if ((old_list_idx & mask) != 0) { + list->uid_list[0] = idx; + idx++; mask <<= 1; + break; + } + } + for (; mask <= 128; mask <<= 1, idx++) { + if ((old_list_idx & mask) != 0) { + (void)squat_uidlist_build_add_uid(ctx, + uid_list_idx, idx); + } + } + } + + /* add to existing list */ + idx = (uid_list_idx >> 1) - 0x100; + if (idx < ctx->list_start_idx) { + list = uidlist_add_new(ctx, 2, &uid_list_idx); + list->uid_list[0] = UID_LIST_POINTER_MASK_LIST_IDX | idx; + list->uid_list[1] = uid; + list->uid_begins_with_pointer = TRUE; + ctx->build_hdr.link_count++; + return uid_list_idx; + } + + idx -= ctx->list_start_idx; + if (idx >= array_count(&ctx->lists)) { + squat_uidlist_set_corrupted(ctx->uidlist, + "missing/broken uidlist"); + return 0; + } + list = array_idx_modifiable(&ctx->lists, idx); + i_assert(list->uid_count > 0); + + p = &list->uid_list[list->uid_count-1]; + i_assert(uid != *p || ctx->uidlist->corrupted || + (list->uid_count == 1 && list->uid_begins_with_pointer)); + if (uid == *p + 1 && + (list->uid_count > 1 || !list->uid_begins_with_pointer)) { + /* use a range */ + if (list->uid_count > 1 && (p[-1] & UID_LIST_MASK_RANGE) != 0 && + (list->uid_count > 2 || !list->uid_begins_with_pointer)) { + /* increase the existing range */ + *p += 1; + return uid_list_idx; + } + + if (list->uid_count == UIDLIST_LIST_SIZE) { + uidlist_flush(ctx, list, uid); + return uid_list_idx; + } + /* create a new range */ + *p |= UID_LIST_MASK_RANGE; + } else { + if (list->uid_count == UIDLIST_LIST_SIZE) { + uidlist_flush(ctx, list, uid); + return uid_list_idx; + } + } + + p++; + list->uid_count++; + + *p = uid; + return uid_list_idx; +} + +static void uidlist_array_append(ARRAY_TYPE(uint32_t) *uids, uint32_t uid) +{ + uint32_t *uidlist; + unsigned int count; + + uidlist = array_get_modifiable(uids, &count); + if (count == 0) { + array_push_back(uids, &uid); + return; + } + if (uidlist[count-1] + 1 == uid) { + if (count > 1 && (uidlist[count-2] & + UID_LIST_MASK_RANGE) != 0) { + uidlist[count-1]++; + return; + } + uidlist[count-1] |= UID_LIST_MASK_RANGE; + } + array_push_back(uids, &uid); +} + +static void uidlist_array_append_range(ARRAY_TYPE(uint32_t) *uids, + uint32_t uid1, uint32_t uid2) +{ + uint32_t *uidlist; + unsigned int count; + + i_assert(uid1 < uid2); + + uidlist = array_get_modifiable(uids, &count); + if (count == 0) { + uid1 |= UID_LIST_MASK_RANGE; + array_push_back(uids, &uid1); + array_push_back(uids, &uid2); + return; + } + if (uidlist[count-1] + 1 == uid1) { + if (count > 1 && (uidlist[count-2] & + UID_LIST_MASK_RANGE) != 0) { + uidlist[count-1] = uid2; + return; + } + uidlist[count-1] |= UID_LIST_MASK_RANGE; + } else { + uid1 |= UID_LIST_MASK_RANGE; + array_push_back(uids, &uid1); + } + array_push_back(uids, &uid2); +} + +static int +squat_uidlist_get_at_offset(struct squat_uidlist *uidlist, uoff_t offset, + uint32_t num, ARRAY_TYPE(uint32_t) *uids) +{ + const uint32_t *uid_list; + const uint8_t *p, *end; + uint32_t size, base_uid, next_uid, flags, prev; + uoff_t uidlist_data_offset; + unsigned int i, j, count; + + if (num != 0) + uidlist_data_offset = offset; + else { + /* not given, read it */ + if (uidlist_file_cache_read(uidlist, offset, + SQUAT_PACK_MAX_SIZE) < 0) + return -1; + + p = CONST_PTR_OFFSET(uidlist->data, offset); + end = CONST_PTR_OFFSET(uidlist->data, uidlist->data_size); + num = squat_unpack_num(&p, end); + uidlist_data_offset = p - (const uint8_t *)uidlist->data; + } + size = num >> 2; + + if (uidlist_file_cache_read(uidlist, uidlist_data_offset, size) < 0) + return -1; + if (uidlist_data_offset + size > uidlist->data_size) { + squat_uidlist_set_corrupted(uidlist, + "size points outside file"); + return -1; + } + + p = CONST_PTR_OFFSET(uidlist->data, uidlist_data_offset); + end = p + size; + + flags = num; + if ((flags & UIDLIST_PACKED_FLAG_BEGINS_WITH_POINTER) != 0) { + /* link to the file */ + prev = squat_unpack_num(&p, end); + + if ((prev & 1) != 0) { + /* pointer to uidlist */ + prev = ((prev >> 1) + 0x100) << 1; + if (squat_uidlist_get(uidlist, prev, uids) < 0) + return -1; + } else { + prev = offset - (prev >> 1); + if (squat_uidlist_get_at_offset(uidlist, prev, + 0, uids) < 0) + return -1; + } + uid_list = array_get(uids, &count); + next_uid = count == 0 ? 0 : uid_list[count-1] + 1; + } else { + next_uid = 0; + } + + num = base_uid = squat_unpack_num(&p, end); + if ((flags & UIDLIST_PACKED_FLAG_BITMASK) == 0) + base_uid >>= 1; + if (base_uid < next_uid) { + squat_uidlist_set_corrupted(uidlist, + "broken continued uidlist"); + return -1; + } + + if ((flags & UIDLIST_PACKED_FLAG_BITMASK) != 0) { + /* bitmask */ + size = end - p; + + uidlist_array_append(uids, base_uid++); + for (i = 0; i < size; i++) { + for (j = 0; j < 8; j++, base_uid++) { + if ((p[i] & (1 << j)) != 0) + uidlist_array_append(uids, base_uid); + } + } + } else { + /* range */ + for (;;) { + if ((num & 1) == 0) { + uidlist_array_append(uids, base_uid); + } else { + /* range */ + uint32_t seq1 = base_uid; + base_uid += squat_unpack_num(&p, end) + 1; + uidlist_array_append_range(uids, seq1, + base_uid); + } + if (p == end) + break; + + num = squat_unpack_num(&p, end); + base_uid += (num >> 1) + 1; + } + } + return 0; +} + +static int +squat_uidlist_get_offset(struct squat_uidlist *uidlist, uint32_t uid_list_idx, + uint32_t *offset_r, uint32_t *num_r) +{ + const uint8_t *p, *end; + unsigned int idx; + uint32_t num, skip_bytes, uidlists_offset; + size_t max_map_size; + + if (uidlist->fd == -1) { + squat_uidlist_set_corrupted(uidlist, "no uidlists"); + return -1; + } + + if (bsearch_insert_pos(&uid_list_idx, uidlist->cur_block_end_indexes, + uidlist->cur_block_count, + sizeof(uint32_t), uint32_cmp, &idx)) + idx++; + if (unlikely(idx == uidlist->cur_block_count)) { + squat_uidlist_set_corrupted(uidlist, "uidlist not found"); + return -1; + } + i_assert(uidlist->cur_block_end_indexes != NULL); + if (unlikely(idx > 0 && + uidlist->cur_block_end_indexes[idx-1] > uid_list_idx)) { + squat_uidlist_set_corrupted(uidlist, "broken block list"); + return -1; + } + + /* make sure everything is mapped */ + uid_list_idx -= idx == 0 ? 0 : uidlist->cur_block_end_indexes[idx-1]; + max_map_size = SQUAT_PACK_MAX_SIZE * (1+uid_list_idx); + if (uidlist_file_cache_read(uidlist, uidlist->cur_block_offsets[idx], + max_map_size) < 0) + return -1; + + /* find the uidlist inside the block */ + i_assert(uidlist->cur_block_offsets != NULL); + p = CONST_PTR_OFFSET(uidlist->data, uidlist->cur_block_offsets[idx]); + end = CONST_PTR_OFFSET(uidlist->data, uidlist->data_size); + + uidlists_offset = uidlist->cur_block_offsets[idx] - + squat_unpack_num(&p, end); + for (skip_bytes = 0; uid_list_idx > 0; uid_list_idx--) { + num = squat_unpack_num(&p, end); + skip_bytes += num >> 2; + } + *offset_r = uidlists_offset + skip_bytes; + *num_r = squat_unpack_num(&p, end); + + if (unlikely(p == end)) { + squat_uidlist_set_corrupted(uidlist, "broken file"); + return -1; + } + if (unlikely(*offset_r > uidlist->mmap_size && + uidlist->mmap_base != NULL)) { + squat_uidlist_set_corrupted(uidlist, "broken offset"); + return -1; + } + return 0; +} + +int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx, + ARRAY_TYPE(uint32_t) *uids) +{ + unsigned int mask; + uint32_t uid, offset, num; + + if ((uid_list_idx & 1) != 0) { + /* single UID */ + uid = uid_list_idx >> 1; + uidlist_array_append(uids, uid); + return 0; + } else if (uid_list_idx < (0x100 << 1)) { + /* bitmask */ + for (uid = 0, mask = 2; mask <= 256; mask <<= 1, uid++) { + if ((uid_list_idx & mask) != 0) + uidlist_array_append(uids, uid); + } + return 0; + } + + uid_list_idx = (uid_list_idx >> 1) - 0x100; + if (squat_uidlist_get_offset(uidlist, uid_list_idx, &offset, &num) < 0) + return -1; + return squat_uidlist_get_at_offset(uidlist, offset, num, uids); +} + +uint32_t squat_uidlist_singleton_last_uid(uint32_t uid_list_idx) +{ + unsigned int idx, mask; + + if ((uid_list_idx & 1) != 0) { + /* single UID */ + return uid_list_idx >> 1; + } else if (uid_list_idx < (0x100 << 1)) { + /* bitmask */ + if (uid_list_idx == 2) { + /* just a quick optimization */ + return 0; + } + for (idx = 7, mask = 256; mask > 2; mask >>= 1, idx--) { + if ((uid_list_idx & mask) != 0) + return idx; + } + } + + i_unreached(); + return 0; +} + +int squat_uidlist_get_seqrange(struct squat_uidlist *uidlist, + uint32_t uid_list_idx, + ARRAY_TYPE(seq_range) *seq_range_arr) +{ + ARRAY_TYPE(uint32_t) tmp_uid_arr; + struct seq_range range; + const uint32_t *tmp_uids; + unsigned int i, count; + int ret; + + i_array_init(&tmp_uid_arr, 128); + ret = squat_uidlist_get(uidlist, uid_list_idx, &tmp_uid_arr); + if (ret == 0) { + tmp_uids = array_get(&tmp_uid_arr, &count); + for (i = 0; i < count; i++) { + if ((tmp_uids[i] & UID_LIST_MASK_RANGE) == 0) + range.seq1 = range.seq2 = tmp_uids[i]; + else { + range.seq1 = tmp_uids[i] & ~UID_LIST_MASK_RANGE; + range.seq2 = tmp_uids[++i]; + } + array_push_back(seq_range_arr, &range); + } + } + array_free(&tmp_uid_arr); + return ret; +} + +int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx, + ARRAY_TYPE(seq_range) *uids) +{ + const struct seq_range *parent_range; + ARRAY_TYPE(seq_range) dest_uids; + ARRAY_TYPE(uint32_t) relative_uids; + const uint32_t *rel_range; + unsigned int i, rel_count, parent_idx, parent_count, diff, parent_uid; + uint32_t prev_seq, seq1, seq2; + int ret = 0; + + parent_range = array_get(uids, &parent_count); + if (parent_count == 0) + return 0; + + i_array_init(&relative_uids, 128); + i_array_init(&dest_uids, 128); + if (squat_uidlist_get(uidlist, uid_list_idx, &relative_uids) < 0) + ret = -1; + + parent_idx = 0; + rel_range = array_get(&relative_uids, &rel_count); + prev_seq = 0; parent_uid = parent_range[0].seq1; + for (i = 0; i < rel_count; i++) { + if (unlikely(parent_uid == (uint32_t)-1)) { + i_error("broken UID ranges"); + ret = -1; + break; + } + if ((rel_range[i] & UID_LIST_MASK_RANGE) == 0) + seq1 = seq2 = rel_range[i]; + else { + seq1 = (rel_range[i] & ~UID_LIST_MASK_RANGE); + seq2 = rel_range[++i]; + } + i_assert(seq1 >= prev_seq); + diff = seq1 - prev_seq; + while (diff > 0) { + if (unlikely(parent_uid == (uint32_t)-1)) { + i_error("broken UID ranges"); + ret = -1; + break; + } + + for (; parent_idx < parent_count; parent_idx++) { + if (parent_range[parent_idx].seq2 <= parent_uid) + continue; + if (parent_uid < parent_range[parent_idx].seq1) + parent_uid = parent_range[parent_idx].seq1; + else + parent_uid++; + break; + } + diff--; + } + diff = seq2 - seq1 + 1; + while (diff > 0) { + if (unlikely(parent_uid == (uint32_t)-1)) { + i_error("broken UID ranges"); + ret = -1; + break; + } + seq_range_array_add(&dest_uids, parent_uid); + for (; parent_idx < parent_count; parent_idx++) { + if (parent_range[parent_idx].seq2 <= parent_uid) + continue; + if (parent_uid < parent_range[parent_idx].seq1) + parent_uid = parent_range[parent_idx].seq1; + else + parent_uid++; + break; + } + diff--; + } + + prev_seq = seq2 + 1; + } + + buffer_set_used_size(uids->arr.buffer, 0); + array_append_array(uids, &dest_uids); + + array_free(&relative_uids); + array_free(&dest_uids); + return ret; +} + +size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist, + unsigned int *count_r) +{ + *count_r = uidlist->hdr.count; + return uidlist->max_size; +} diff --git a/src/plugins/fts-squat/squat-uidlist.h b/src/plugins/fts-squat/squat-uidlist.h new file mode 100644 index 0000000..79ed791 --- /dev/null +++ b/src/plugins/fts-squat/squat-uidlist.h @@ -0,0 +1,71 @@ +#ifndef SQUAT_UIDLIST_H +#define SQUAT_UIDLIST_H + +struct squat_trie; +struct squat_uidlist_build_context; +struct squat_uidlist_rebuild_context; + +struct squat_uidlist_file_header { + uint32_t indexid; + uint32_t used_file_size; + uint32_t block_list_offset; + uint32_t count, link_count; +}; + +/* + uidlist file: + + struct uidlist_header; + + // size includes both prev_offset and uidlist + packed (size << 2) | packed_flags; // UIDLIST_PACKED_FLAG_* + [packed prev_offset;] // If UIDLIST_PACKED_FLAG_BEGINS_WITH_OFFSET is set + if (UIDLIST_PACKED_FLAG_BITMASK) { + packed base_uid; // first UID in uidlist + uint8_t bitmask[]; // first bit is base_uid+1 + } else { + // FIXME: packed range + } +*/ + +#define UIDLIST_IS_SINGLETON(idx) \ + (((idx) & 1) != 0 || (idx) < (0x100 << 1)) + +struct squat_uidlist *squat_uidlist_init(struct squat_trie *trie); +void squat_uidlist_deinit(struct squat_uidlist *uidlist); + +int squat_uidlist_refresh(struct squat_uidlist *uidlist); + +int squat_uidlist_build_init(struct squat_uidlist *uidlist, + struct squat_uidlist_build_context **ctx_r); +uint32_t squat_uidlist_build_add_uid(struct squat_uidlist_build_context *ctx, + uint32_t uid_list_idx, uint32_t uid); +void squat_uidlist_build_flush(struct squat_uidlist_build_context *ctx); +int squat_uidlist_build_finish(struct squat_uidlist_build_context *ctx); +void squat_uidlist_build_deinit(struct squat_uidlist_build_context **ctx); + +int squat_uidlist_rebuild_init(struct squat_uidlist_build_context *build_ctx, + bool compress, + struct squat_uidlist_rebuild_context **ctx_r); +uint32_t squat_uidlist_rebuild_next(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(uint32_t) *uids); +uint32_t squat_uidlist_rebuild_nextu(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(seq_range) *uids); +int squat_uidlist_rebuild_finish(struct squat_uidlist_rebuild_context *ctx, + bool cancel); + +int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx, + ARRAY_TYPE(uint32_t) *uids); +uint32_t squat_uidlist_singleton_last_uid(uint32_t uid_list_idx); + +int squat_uidlist_get_seqrange(struct squat_uidlist *uidlist, + uint32_t uid_list_idx, + ARRAY_TYPE(seq_range) *seq_range_arr); +int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx, + ARRAY_TYPE(seq_range) *uids); + +void squat_uidlist_delete(struct squat_uidlist *uidlist); +size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist, + unsigned int *count_r); + +#endif diff --git a/src/plugins/fts/Makefile.am b/src/plugins/fts/Makefile.am new file mode 100644 index 0000000..2e7753c --- /dev/null +++ b/src/plugins/fts/Makefile.am @@ -0,0 +1,74 @@ +pkglibexecdir = $(libexecdir)/dovecot +doveadm_moduledir = $(moduledir)/doveadm + +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-settings \ + -I$(top_srcdir)/src/lib-fts \ + -I$(top_srcdir)/src/lib-ssl-iostream \ + -I$(top_srcdir)/src/lib-http \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-imap \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/lib-storage/index \ + -I$(top_srcdir)/src/doveadm + +NOPLUGIN_LDFLAGS = +lib20_doveadm_fts_plugin_la_LDFLAGS = -module -avoid-version +lib20_fts_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib20_fts_plugin.la + +lib20_fts_plugin_la_LIBADD = ../../lib-fts/libfts.la + +lib20_fts_plugin_la_SOURCES = \ + fts-api.c \ + fts-build-mail.c \ + fts-expunge-log.c \ + fts-indexer.c \ + fts-parser.c \ + fts-parser-html.c \ + fts-parser-script.c \ + fts-parser-tika.c \ + fts-plugin.c \ + fts-search.c \ + fts-search-args.c \ + fts-search-serialize.c \ + fts-storage.c \ + fts-user.c + +pkginc_libdir=$(pkgincludedir) +pkginc_lib_HEADERS = \ + fts-api.h \ + fts-api-private.h \ + fts-expunge-log.h \ + fts-indexer.h \ + fts-parser.h \ + fts-storage.h \ + fts-user.h + +noinst_HEADERS = \ + doveadm-fts.h \ + fts-build-mail.h \ + fts-plugin.h \ + fts-search-args.h \ + fts-search-serialize.h + +pkglibexec_PROGRAMS = xml2text + +xml2text_SOURCES = xml2text.c fts-parser-html.c +xml2text_CPPFLAGS = $(AM_CPPFLAGS) $(BINARY_CFLAGS) +xml2text_LDADD = $(LIBDOVECOT) $(BINARY_LDFLAGS) +xml2text_DEPENDENCIES = $(module_LTLIBRARIES) $(LIBDOVECOT_DEPS) + +pkglibexec_SCRIPTS = decode2text.sh +EXTRA_DIST = $(pkglibexec_SCRIPTS) + +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_plugin.la + +lib20_doveadm_fts_plugin_la_SOURCES = \ + doveadm-fts.c \ + doveadm-dump-fts-expunge-log.c diff --git a/src/plugins/fts/Makefile.in b/src/plugins/fts/Makefile.in new file mode 100644 index 0000000..624f69f --- /dev/null +++ b/src/plugins/fts/Makefile.in @@ -0,0 +1,1140 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +pkglibexec_PROGRAMS = xml2text$(EXEEXT) +subdir = src/plugins/fts +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \ + $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \ + $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \ + $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \ + $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \ + $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \ + $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \ + $(top_srcdir)/m4/flexible_array_member.m4 \ + $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \ + $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \ + $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \ + $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \ + $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \ + $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \ + $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \ + $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \ + $(top_srcdir)/m4/pr_set_dumpable.m4 \ + $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \ + $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \ + $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \ + $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \ + $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \ + $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \ + $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \ + $(top_srcdir)/m4/typeof_dev_t.m4 \ + $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \ + $(top_srcdir)/m4/want_apparmor.m4 \ + $(top_srcdir)/m4/want_bsdauth.m4 \ + $(top_srcdir)/m4/want_bzlib.m4 \ + $(top_srcdir)/m4/want_cassandra.m4 \ + $(top_srcdir)/m4/want_cdb.m4 \ + $(top_srcdir)/m4/want_checkpassword.m4 \ + $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \ + $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \ + $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \ + $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \ + $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \ + $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \ + $(top_srcdir)/m4/want_prefetch.m4 \ + $(top_srcdir)/m4/want_shadow.m4 \ + $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \ + $(top_srcdir)/m4/want_sqlite.m4 \ + $(top_srcdir)/m4/want_stemmer.m4 \ + $(top_srcdir)/m4/want_systemd.m4 \ + $(top_srcdir)/m4/want_textcat.m4 \ + $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \ + $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(pkginc_lib_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(pkglibexecdir)" \ + "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" \ + "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(pkginc_libdir)" +PROGRAMS = $(pkglibexec_PROGRAMS) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +LTLIBRARIES = $(doveadm_module_LTLIBRARIES) $(module_LTLIBRARIES) +lib20_doveadm_fts_plugin_la_LIBADD = +am_lib20_doveadm_fts_plugin_la_OBJECTS = doveadm-fts.lo \ + doveadm-dump-fts-expunge-log.lo +lib20_doveadm_fts_plugin_la_OBJECTS = \ + $(am_lib20_doveadm_fts_plugin_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +lib20_doveadm_fts_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(lib20_doveadm_fts_plugin_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +lib20_fts_plugin_la_DEPENDENCIES = ../../lib-fts/libfts.la +am_lib20_fts_plugin_la_OBJECTS = fts-api.lo fts-build-mail.lo \ + fts-expunge-log.lo fts-indexer.lo fts-parser.lo \ + fts-parser-html.lo fts-parser-script.lo fts-parser-tika.lo \ + fts-plugin.lo fts-search.lo fts-search-args.lo \ + fts-search-serialize.lo fts-storage.lo fts-user.lo +lib20_fts_plugin_la_OBJECTS = $(am_lib20_fts_plugin_la_OBJECTS) +lib20_fts_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(lib20_fts_plugin_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_xml2text_OBJECTS = xml2text-xml2text.$(OBJEXT) \ + xml2text-fts-parser-html.$(OBJEXT) +xml2text_OBJECTS = $(am_xml2text_OBJECTS) +am__DEPENDENCIES_1 = +SCRIPTS = $(pkglibexec_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo \ + ./$(DEPDIR)/doveadm-fts.Plo ./$(DEPDIR)/fts-api.Plo \ + ./$(DEPDIR)/fts-build-mail.Plo ./$(DEPDIR)/fts-expunge-log.Plo \ + ./$(DEPDIR)/fts-indexer.Plo ./$(DEPDIR)/fts-parser-html.Plo \ + ./$(DEPDIR)/fts-parser-script.Plo \ + ./$(DEPDIR)/fts-parser-tika.Plo ./$(DEPDIR)/fts-parser.Plo \ + ./$(DEPDIR)/fts-plugin.Plo ./$(DEPDIR)/fts-search-args.Plo \ + ./$(DEPDIR)/fts-search-serialize.Plo \ + ./$(DEPDIR)/fts-search.Plo ./$(DEPDIR)/fts-storage.Plo \ + ./$(DEPDIR)/fts-user.Plo \ + ./$(DEPDIR)/xml2text-fts-parser-html.Po \ + ./$(DEPDIR)/xml2text-xml2text.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(lib20_doveadm_fts_plugin_la_SOURCES) \ + $(lib20_fts_plugin_la_SOURCES) $(xml2text_SOURCES) +DIST_SOURCES = $(lib20_doveadm_fts_plugin_la_SOURCES) \ + $(lib20_fts_plugin_la_SOURCES) $(xml2text_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) $(pkginc_lib_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibexecdir = $(libexecdir)/dovecot +ACLOCAL = @ACLOCAL@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPARMOR_LIBS = @APPARMOR_LIBS@ +AR = @AR@ +AUTH_CFLAGS = @AUTH_CFLAGS@ +AUTH_LIBS = @AUTH_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINARY_CFLAGS = @BINARY_CFLAGS@ +BINARY_LDFLAGS = @BINARY_LDFLAGS@ +BISON = @BISON@ +CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@ +CASSANDRA_LIBS = @CASSANDRA_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CDB_LIBS = @CDB_LIBS@ +CFLAGS = @CFLAGS@ +CLUCENE_CFLAGS = @CLUCENE_CFLAGS@ +CLUCENE_LIBS = @CLUCENE_LIBS@ +COMPRESS_LIBS = @COMPRESS_LIBS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRYPT_LIBS = @CRYPT_LIBS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DICT_LIBS = @DICT_LIBS@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLEX = @FLEX@ +FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@ +FUZZER_LDFLAGS = @FUZZER_LDFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KRB5CONFIG = @KRB5CONFIG@ +KRB5_CFLAGS = @KRB5_CFLAGS@ +KRB5_LIBS = @KRB5_LIBS@ +LD = @LD@ +LDAP_LIBS = @LDAP_LIBS@ +LDFLAGS = @LDFLAGS@ +LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@ +LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@ +LIBCAP = @LIBCAP@ +LIBDOVECOT = @LIBDOVECOT@ +LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@ +LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@ +LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@ +LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@ +LIBDOVECOT_LDA = @LIBDOVECOT_LDA@ +LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@ +LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@ +LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@ +LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@ +LIBDOVECOT_LUA = @LIBDOVECOT_LUA@ +LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@ +LIBDOVECOT_SQL = @LIBDOVECOT_SQL@ +LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@ +LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@ +LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@ +LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@ +LIBICONV = @LIBICONV@ +LIBICU_CFLAGS = @LIBICU_CFLAGS@ +LIBICU_LIBS = @LIBICU_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@ +LIBSODIUM_LIBS = @LIBSODIUM_LIBS@ +LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@ +LIBTIRPC_LIBS = @LIBTIRPC_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIBWRAP_LIBS = @LIBWRAP_LIBS@ +LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LUA_CFLAGS = @LUA_CFLAGS@ +LUA_LIBS = @LUA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_LIBS = @MODULE_LIBS@ +MODULE_SUFFIX = @MODULE_SUFFIX@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_CONFIG = @MYSQL_CONFIG@ +MYSQL_LIBS = @MYSQL_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOPLUGIN_LDFLAGS = +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PANDOC = @PANDOC@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PG_CONFIG = @PG_CONFIG@ +PIE_CFLAGS = @PIE_CFLAGS@ +PIE_LDFLAGS = @PIE_LDFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUOTA_LIBS = @QUOTA_LIBS@ +RANLIB = @RANLIB@ +RELRO_LDFLAGS = @RELRO_LDFLAGS@ +RPCGEN = @RPCGEN@ +RUN_TEST = @RUN_TEST@ +SED = @SED@ +SETTING_FILES = @SETTING_FILES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SQLITE_CFLAGS = @SQLITE_CFLAGS@ +SQLITE_LIBS = @SQLITE_LIBS@ +SQL_CFLAGS = @SQL_CFLAGS@ +SQL_LIBS = @SQL_LIBS@ +SSL_CFLAGS = @SSL_CFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND = @VALGRIND@ +VERSION = @VERSION@ +ZSTD_CFLAGS = @ZSTD_CFLAGS@ +ZSTD_LIBS = @ZSTD_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dict_drivers = @dict_drivers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +rundir = @rundir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sql_drivers = @sql_drivers@ +srcdir = @srcdir@ +ssldir = @ssldir@ +statedir = @statedir@ +sysconfdir = @sysconfdir@ +systemdservicetype = @systemdservicetype@ +systemdsystemunitdir = @systemdsystemunitdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +doveadm_moduledir = $(moduledir)/doveadm +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-settings \ + -I$(top_srcdir)/src/lib-fts \ + -I$(top_srcdir)/src/lib-ssl-iostream \ + -I$(top_srcdir)/src/lib-http \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-imap \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/lib-storage/index \ + -I$(top_srcdir)/src/doveadm + +lib20_doveadm_fts_plugin_la_LDFLAGS = -module -avoid-version +lib20_fts_plugin_la_LDFLAGS = -module -avoid-version +module_LTLIBRARIES = \ + lib20_fts_plugin.la + +lib20_fts_plugin_la_LIBADD = ../../lib-fts/libfts.la +lib20_fts_plugin_la_SOURCES = \ + fts-api.c \ + fts-build-mail.c \ + fts-expunge-log.c \ + fts-indexer.c \ + fts-parser.c \ + fts-parser-html.c \ + fts-parser-script.c \ + fts-parser-tika.c \ + fts-plugin.c \ + fts-search.c \ + fts-search-args.c \ + fts-search-serialize.c \ + fts-storage.c \ + fts-user.c + +pkginc_libdir = $(pkgincludedir) +pkginc_lib_HEADERS = \ + fts-api.h \ + fts-api-private.h \ + fts-expunge-log.h \ + fts-indexer.h \ + fts-parser.h \ + fts-storage.h \ + fts-user.h + +noinst_HEADERS = \ + doveadm-fts.h \ + fts-build-mail.h \ + fts-plugin.h \ + fts-search-args.h \ + fts-search-serialize.h + +xml2text_SOURCES = xml2text.c fts-parser-html.c +xml2text_CPPFLAGS = $(AM_CPPFLAGS) $(BINARY_CFLAGS) +xml2text_LDADD = $(LIBDOVECOT) $(BINARY_LDFLAGS) +xml2text_DEPENDENCIES = $(module_LTLIBRARIES) $(LIBDOVECOT_DEPS) +pkglibexec_SCRIPTS = decode2text.sh +EXTRA_DIST = $(pkglibexec_SCRIPTS) +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_plugin.la + +lib20_doveadm_fts_plugin_la_SOURCES = \ + doveadm-fts.c \ + doveadm-dump-fts-expunge-log.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/fts/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-pkglibexecPROGRAMS: $(pkglibexec_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(pkglibexec_PROGRAMS)'; test -n "$(pkglibexecdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibexecdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibexecdir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(pkglibexecdir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(pkglibexecdir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-pkglibexecPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(pkglibexec_PROGRAMS)'; test -n "$(pkglibexecdir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(pkglibexecdir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(pkglibexecdir)" && rm -f $$files + +clean-pkglibexecPROGRAMS: + @list='$(pkglibexec_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +install-doveadm_moduleLTLIBRARIES: $(doveadm_module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(doveadm_moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(doveadm_moduledir)"; \ + } + +uninstall-doveadm_moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(doveadm_moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(doveadm_moduledir)/$$f"; \ + done + +clean-doveadm_moduleLTLIBRARIES: + -test -z "$(doveadm_module_LTLIBRARIES)" || rm -f $(doveadm_module_LTLIBRARIES) + @list='$(doveadm_module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +lib20_doveadm_fts_plugin.la: $(lib20_doveadm_fts_plugin_la_OBJECTS) $(lib20_doveadm_fts_plugin_la_DEPENDENCIES) $(EXTRA_lib20_doveadm_fts_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib20_doveadm_fts_plugin_la_LINK) -rpath $(doveadm_moduledir) $(lib20_doveadm_fts_plugin_la_OBJECTS) $(lib20_doveadm_fts_plugin_la_LIBADD) $(LIBS) + +lib20_fts_plugin.la: $(lib20_fts_plugin_la_OBJECTS) $(lib20_fts_plugin_la_DEPENDENCIES) $(EXTRA_lib20_fts_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib20_fts_plugin_la_LINK) -rpath $(moduledir) $(lib20_fts_plugin_la_OBJECTS) $(lib20_fts_plugin_la_LIBADD) $(LIBS) + +xml2text$(EXEEXT): $(xml2text_OBJECTS) $(xml2text_DEPENDENCIES) $(EXTRA_xml2text_DEPENDENCIES) + @rm -f xml2text$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(xml2text_OBJECTS) $(xml2text_LDADD) $(LIBS) +install-pkglibexecSCRIPTS: $(pkglibexec_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkglibexecdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkglibexecdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(pkglibexecdir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(pkglibexecdir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-pkglibexecSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(pkglibexec_SCRIPTS)'; test -n "$(pkglibexecdir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(pkglibexecdir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-fts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-api.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-build-mail.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-expunge-log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-indexer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-html.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-script.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser-tika.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-parser.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-plugin.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search-args.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search-serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-search.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-storage.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-user.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2text-fts-parser-html.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2text-xml2text.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +xml2text-xml2text.o: xml2text.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-xml2text.o -MD -MP -MF $(DEPDIR)/xml2text-xml2text.Tpo -c -o xml2text-xml2text.o `test -f 'xml2text.c' || echo '$(srcdir)/'`xml2text.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-xml2text.Tpo $(DEPDIR)/xml2text-xml2text.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xml2text.c' object='xml2text-xml2text.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-xml2text.o `test -f 'xml2text.c' || echo '$(srcdir)/'`xml2text.c + +xml2text-xml2text.obj: xml2text.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-xml2text.obj -MD -MP -MF $(DEPDIR)/xml2text-xml2text.Tpo -c -o xml2text-xml2text.obj `if test -f 'xml2text.c'; then $(CYGPATH_W) 'xml2text.c'; else $(CYGPATH_W) '$(srcdir)/xml2text.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-xml2text.Tpo $(DEPDIR)/xml2text-xml2text.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xml2text.c' object='xml2text-xml2text.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-xml2text.obj `if test -f 'xml2text.c'; then $(CYGPATH_W) 'xml2text.c'; else $(CYGPATH_W) '$(srcdir)/xml2text.c'; fi` + +xml2text-fts-parser-html.o: fts-parser-html.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-fts-parser-html.o -MD -MP -MF $(DEPDIR)/xml2text-fts-parser-html.Tpo -c -o xml2text-fts-parser-html.o `test -f 'fts-parser-html.c' || echo '$(srcdir)/'`fts-parser-html.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-fts-parser-html.Tpo $(DEPDIR)/xml2text-fts-parser-html.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fts-parser-html.c' object='xml2text-fts-parser-html.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-fts-parser-html.o `test -f 'fts-parser-html.c' || echo '$(srcdir)/'`fts-parser-html.c + +xml2text-fts-parser-html.obj: fts-parser-html.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xml2text-fts-parser-html.obj -MD -MP -MF $(DEPDIR)/xml2text-fts-parser-html.Tpo -c -o xml2text-fts-parser-html.obj `if test -f 'fts-parser-html.c'; then $(CYGPATH_W) 'fts-parser-html.c'; else $(CYGPATH_W) '$(srcdir)/fts-parser-html.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xml2text-fts-parser-html.Tpo $(DEPDIR)/xml2text-fts-parser-html.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='fts-parser-html.c' object='xml2text-fts-parser-html.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xml2text_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml2text-fts-parser-html.obj `if test -f 'fts-parser-html.c'; then $(CYGPATH_W) 'fts-parser-html.c'; else $(CYGPATH_W) '$(srcdir)/fts-parser-html.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-pkginc_libHEADERS: $(pkginc_lib_HEADERS) + @$(NORMAL_INSTALL) + @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkginc_libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkginc_libdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkginc_libdir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkginc_libdir)" || exit $$?; \ + done + +uninstall-pkginc_libHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(pkginc_lib_HEADERS)'; test -n "$(pkginc_libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkginc_libdir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(SCRIPTS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(pkginc_libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES clean-pkglibexecPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/doveadm-fts.Plo + -rm -f ./$(DEPDIR)/fts-api.Plo + -rm -f ./$(DEPDIR)/fts-build-mail.Plo + -rm -f ./$(DEPDIR)/fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/fts-indexer.Plo + -rm -f ./$(DEPDIR)/fts-parser-html.Plo + -rm -f ./$(DEPDIR)/fts-parser-script.Plo + -rm -f ./$(DEPDIR)/fts-parser-tika.Plo + -rm -f ./$(DEPDIR)/fts-parser.Plo + -rm -f ./$(DEPDIR)/fts-plugin.Plo + -rm -f ./$(DEPDIR)/fts-search-args.Plo + -rm -f ./$(DEPDIR)/fts-search-serialize.Plo + -rm -f ./$(DEPDIR)/fts-search.Plo + -rm -f ./$(DEPDIR)/fts-storage.Plo + -rm -f ./$(DEPDIR)/fts-user.Plo + -rm -f ./$(DEPDIR)/xml2text-fts-parser-html.Po + -rm -f ./$(DEPDIR)/xml2text-xml2text.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-doveadm_moduleLTLIBRARIES \ + install-moduleLTLIBRARIES install-pkginc_libHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-pkglibexecPROGRAMS install-pkglibexecSCRIPTS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/doveadm-dump-fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/doveadm-fts.Plo + -rm -f ./$(DEPDIR)/fts-api.Plo + -rm -f ./$(DEPDIR)/fts-build-mail.Plo + -rm -f ./$(DEPDIR)/fts-expunge-log.Plo + -rm -f ./$(DEPDIR)/fts-indexer.Plo + -rm -f ./$(DEPDIR)/fts-parser-html.Plo + -rm -f ./$(DEPDIR)/fts-parser-script.Plo + -rm -f ./$(DEPDIR)/fts-parser-tika.Plo + -rm -f ./$(DEPDIR)/fts-parser.Plo + -rm -f ./$(DEPDIR)/fts-plugin.Plo + -rm -f ./$(DEPDIR)/fts-search-args.Plo + -rm -f ./$(DEPDIR)/fts-search-serialize.Plo + -rm -f ./$(DEPDIR)/fts-search.Plo + -rm -f ./$(DEPDIR)/fts-storage.Plo + -rm -f ./$(DEPDIR)/fts-user.Plo + -rm -f ./$(DEPDIR)/xml2text-fts-parser-html.Po + -rm -f ./$(DEPDIR)/xml2text-xml2text.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-doveadm_moduleLTLIBRARIES \ + uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS \ + uninstall-pkglibexecPROGRAMS uninstall-pkglibexecSCRIPTS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES clean-pkglibexecPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-doveadm_moduleLTLIBRARIES install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-pkginc_libHEADERS install-pkglibexecPROGRAMS \ + install-pkglibexecSCRIPTS install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-doveadm_moduleLTLIBRARIES \ + uninstall-moduleLTLIBRARIES uninstall-pkginc_libHEADERS \ + uninstall-pkglibexecPROGRAMS uninstall-pkglibexecSCRIPTS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/fts/decode2text.sh b/src/plugins/fts/decode2text.sh new file mode 100755 index 0000000..1c881ff --- /dev/null +++ b/src/plugins/fts/decode2text.sh @@ -0,0 +1,105 @@ +#!/bin/sh + +# Example attachment decoder script. The attachment comes from stdin, and +# the script is expected to output UTF-8 data to stdout. (If the output isn't +# UTF-8, everything except valid UTF-8 sequences are dropped from it.) + +# The attachment decoding is enabled by setting: +# +# plugin { +# fts_decoder = decode2text +# } +# service decode2text { +# executable = script /usr/local/libexec/dovecot/decode2text.sh +# user = dovecot +# unix_listener decode2text { +# mode = 0666 +# } +# } + +libexec_dir=`dirname $0` +content_type=$1 + +# The second parameter is the format's filename extension, which is used when +# found from a filename of application/octet-stream. You can also add more +# extensions by giving more parameters. +formats='application/pdf pdf +application/x-pdf pdf +application/msword doc +application/mspowerpoint ppt +application/vnd.ms-powerpoint ppt +application/ms-excel xls +application/x-msexcel xls +application/vnd.ms-excel xls +application/vnd.openxmlformats-officedocument.wordprocessingml.document docx +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx +application/vnd.openxmlformats-officedocument.presentationml.presentation pptx +application/vnd.oasis.opendocument.text odt +application/vnd.oasis.opendocument.spreadsheet ods +application/vnd.oasis.opendocument.presentation odp +' + +if [ "$content_type" = "" ]; then + echo "$formats" + exit 0 +fi + +fmt=`echo "$formats" | grep -w "^$content_type" | cut -d ' ' -f 2` +if [ "$fmt" = "" ]; then + echo "Content-Type: $content_type not supported" >&2 + exit 1 +fi + +# most decoders can't handle stdin directly, so write the attachment +# to a temp file +path=`mktemp` +trap "rm -f $path" 0 1 2 3 14 15 +cat > $path + +xmlunzip() { + name=$1 + + tempdir=`mktemp -d` + if [ "$tempdir" = "" ]; then + exit 1 + fi + trap "rm -rf $path $tempdir" 0 1 2 3 14 15 + cd $tempdir || exit 1 + unzip -q "$path" 2>/dev/null || exit 0 + find . -name "$name" -print0 | xargs -0 cat | + $libexec_dir/xml2text +} + +wait_timeout() { + childpid=$! + trap "kill -9 $childpid; rm -f $path" 1 2 3 14 15 + wait $childpid +} + +LANG=en_US.UTF-8 +export LANG +if [ $fmt = "pdf" ]; then + /usr/bin/pdftotext $path - 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "doc" ]; then + (/usr/bin/catdoc $path; true) 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "ppt" ]; then + (/usr/bin/catppt $path; true) 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "xls" ]; then + (/usr/bin/xls2csv $path; true) 2>/dev/null& + wait_timeout 2>/dev/null +elif [ $fmt = "odt" -o $fmt = "ods" -o $fmt = "odp" ]; then + xmlunzip "content.xml" +elif [ $fmt = "docx" ]; then + xmlunzip "document.xml" +elif [ $fmt = "xlsx" ]; then + xmlunzip "sharedStrings.xml" +elif [ $fmt = "pptx" ]; then + xmlunzip "slide*.xml" +else + echo "Buggy decoder script: $fmt not handled" >&2 + exit 1 +fi +exit 0 diff --git a/src/plugins/fts/doveadm-dump-fts-expunge-log.c b/src/plugins/fts/doveadm-dump-fts-expunge-log.c new file mode 100644 index 0000000..7438bca --- /dev/null +++ b/src/plugins/fts/doveadm-dump-fts-expunge-log.c @@ -0,0 +1,116 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "hex-binary.h" +#include "guid.h" +#include "doveadm-dump.h" +#include "doveadm-fts.h" + +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> + +struct fts_expunge_log_record { + uint32_t checksum; + uint32_t record_size; + guid_128_t guid; +}; + +static int dump_record(int fd, buffer_t *buf) +{ + struct fts_expunge_log_record rec; + off_t offset; + void *data; + const uint32_t *expunges, *uids; + ssize_t ret; + size_t data_size; + unsigned int i, uids_count; + + offset = lseek(fd, 0, SEEK_CUR); + + ret = read(fd, &rec, sizeof(rec)); + if (ret == 0) + return 0; + + if (ret != sizeof(rec)) + i_fatal("rec read() %d != %d", (int)ret, (int)sizeof(rec)); + + if (rec.record_size < sizeof(rec) + sizeof(uint32_t) || + rec.record_size > INT_MAX) { + i_fatal("Invalid record_size=%u at offset %"PRIuUOFF_T, + rec.record_size, offset); + } + data_size = rec.record_size - sizeof(rec); + buffer_set_used_size(buf, 0); + data = buffer_append_space_unsafe(buf, data_size); + ret = read(fd, data, data_size); + if (ret != (ssize_t)data_size) + i_fatal("rec read() %d != %d", (int)ret, (int)data_size); + + printf("#%"PRIuUOFF_T":\n", offset); + printf(" checksum = %8x\n", rec.checksum); + printf(" size .... = %u\n", rec.record_size); + printf(" mailbox . = %s\n", guid_128_to_string(rec.guid)); + + expunges = CONST_PTR_OFFSET(data, data_size - sizeof(uint32_t)); + printf(" expunges = %u\n", *expunges); + + printf(" uids .... = "); + + uids = data; + uids_count = (rec.record_size - sizeof(rec) - sizeof(uint32_t)) / + sizeof(uint32_t); + for (i = 0; i < uids_count; i += 2) { + if (i != 0) + printf(","); + if (uids[i] == uids[i+1]) + printf("%u", uids[i]); + else + printf("%u-%u", uids[i], uids[i+1]); + } + printf("\n"); + return 1; +} + +static void +cmd_dump_fts_expunge_log(const char *path, const char *const *args ATTR_UNUSED) +{ + buffer_t *buf; + int fd, ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + i_fatal("open(%s) failed: %m", path); + + buf = buffer_create_dynamic(default_pool, 1024); + do { + T_BEGIN { + ret = dump_record(fd, buf); + } T_END; + } while (ret > 0); + buffer_free(&buf); + i_close_fd(&fd); +} + +static bool test_dump_fts_expunge_log(const char *path) +{ + const char *p; + + if ((p = strrchr(path, '/')) != NULL) + p++; + else + p = path; + return strcmp(p, "dovecot-expunges.log") == 0; +} + +static const struct doveadm_cmd_dump doveadm_cmd_dump_fts_expunge_log = { + "fts-expunge-log", + test_dump_fts_expunge_log, + cmd_dump_fts_expunge_log +}; + +void doveadm_dump_fts_expunge_log_init(void) +{ + doveadm_dump_register(&doveadm_cmd_dump_fts_expunge_log); +} diff --git a/src/plugins/fts/doveadm-fts.c b/src/plugins/fts/doveadm-fts.c new file mode 100644 index 0000000..1b902a1 --- /dev/null +++ b/src/plugins/fts/doveadm-fts.c @@ -0,0 +1,470 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "imap-util.h" +#include "mail-namespace.h" +#include "mail-search.h" +#include "mailbox-list-iter.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-language.h" +#include "fts-storage.h" +#include "fts-search-args.h" +#include "fts-user.h" +#include "doveadm-print.h" +#include "doveadm-mail.h" +#include "doveadm-mailbox-list-iter.h" +#include "doveadm-fts.h" + +const char *doveadm_fts_plugin_version = DOVECOT_ABI_VERSION; + +struct fts_tokenize_cmd_context { + struct doveadm_mail_cmd_context ctx; + const char *language; + const char *tokens; +}; + +static int +cmd_search_box(struct doveadm_mail_cmd_context *ctx, + const struct mailbox_info *info) +{ + struct mailbox *box; + struct fts_backend *backend; + struct fts_result result; + int ret = 0; + + backend = fts_list_backend(info->ns->list); + if (backend == NULL) { + i_error("fts not enabled for %s", info->vname); + ctx->exit_code = EX_CONFIG; + return -1; + } + + i_zero(&result); + i_array_init(&result.definite_uids, 16); + i_array_init(&result.maybe_uids, 16); + i_array_init(&result.scores, 16); + + box = mailbox_alloc(info->ns->list, info->vname, 0); + if (fts_backend_lookup(backend, box, ctx->search_args->args, + FTS_LOOKUP_FLAG_AND_ARGS, &result) < 0) { + i_error("fts lookup failed"); + doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP); + ret = -1; + } else { + printf("%s: ", info->vname); + if (array_count(&result.definite_uids) == 0) + printf("no results\n"); + else T_BEGIN { + string_t *str = t_str_new(128); + imap_write_seq_range(str, &result.definite_uids); + printf("%s\n", str_c(str)); + } T_END; + if (array_count(&result.maybe_uids) > 0) T_BEGIN { + string_t *str = t_str_new(128); + imap_write_seq_range(str, &result.maybe_uids); + printf(" - maybe: %s\n", str_c(str)); + } T_END; + fts_backend_lookup_done(backend); + } + mailbox_free(&box); + array_free(&result.definite_uids); + array_free(&result.maybe_uids); + array_free(&result.scores); + return ret; +} + +static int +cmd_fts_lookup_run(struct doveadm_mail_cmd_context *ctx, + struct mail_user *user) +{ + const enum mailbox_list_iter_flags iter_flags = + MAILBOX_LIST_ITER_NO_AUTO_BOXES | + MAILBOX_LIST_ITER_RETURN_NO_FLAGS; + struct doveadm_mailbox_list_iter *iter; + const struct mailbox_info *info; + int ret = 0; + + iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args, + iter_flags); + while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN { + if (cmd_search_box(ctx, info) < 0) + ret = -1; + } T_END; + if (doveadm_mailbox_list_iter_deinit(&iter) < 0) + ret = -1; + return ret; +} + +static void +cmd_fts_lookup_init(struct doveadm_mail_cmd_context *ctx, + const char *const args[]) +{ + if (args[0] == NULL) + doveadm_mail_help_name("fts lookup"); + + ctx->search_args = doveadm_mail_build_search_args(args); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_lookup_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_lookup_run; + ctx->v.init = cmd_fts_lookup_init; + return ctx; +} + +static int +cmd_fts_expand_run(struct doveadm_mail_cmd_context *ctx, + struct mail_user *user) +{ + struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); + struct mailbox *box; + struct fts_backend *backend; + string_t *str = t_str_new(128); + + backend = fts_list_backend(ns->list); + if (backend == NULL) { + i_error("fts not enabled for INBOX"); + ctx->exit_code = EX_CONFIG; + return -1; + } + + box = mailbox_alloc(ns->list, "INBOX", 0); + mail_search_args_init(ctx->search_args, box, FALSE, NULL); + + if (fts_search_args_expand(backend, ctx->search_args) < 0) + i_fatal("Couldn't expand search args"); + mail_search_args_to_cmdline(str, ctx->search_args->args); + printf("%s\n", str_c(str)); + mailbox_free(&box); + return 0; +} + +static void +cmd_fts_expand_init(struct doveadm_mail_cmd_context *ctx, + const char *const args[]) +{ + if (args[0] == NULL) + doveadm_mail_help_name("fts expand"); + + ctx->search_args = doveadm_mail_build_search_args(args); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_expand_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_expand_run; + ctx->v.init = cmd_fts_expand_init; + return ctx; +} + +static int +cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, + struct mail_user *user) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); + struct fts_backend *backend; + struct fts_user_language *user_lang; + const struct fts_language *lang = NULL; + int ret, ret2; + bool final = FALSE; + + backend = fts_list_backend(ns->list); + if (backend == NULL) { + i_error("fts not enabled for INBOX"); + _ctx->exit_code = EX_CONFIG; + return -1; + } + + if (ctx->language == NULL) { + struct fts_language_list *lang_list = + fts_user_get_language_list(user); + enum fts_language_result result; + const char *error; + + result = fts_language_detect(lang_list, + (const unsigned char *)ctx->tokens, strlen(ctx->tokens), + &lang, &error); + if (lang == NULL) + lang = fts_language_list_get_first(lang_list); + switch (result) { + case FTS_LANGUAGE_RESULT_SHORT: + i_warning("Text too short, can't detect its language - assuming %s", lang->name); + break; + case FTS_LANGUAGE_RESULT_UNKNOWN: + i_warning("Can't detect its language - assuming %s", lang->name); + break; + case FTS_LANGUAGE_RESULT_OK: + break; + case FTS_LANGUAGE_RESULT_ERROR: + i_error("Language detection library initialization failed: %s", error); + _ctx->exit_code = EX_CONFIG; + return -1; + default: + i_unreached(); + } + } else { + lang = fts_language_find(ctx->language); + if (lang == NULL) { + i_error("Unknown language: %s", ctx->language); + _ctx->exit_code = EX_USAGE; + return -1; + } + } + user_lang = fts_user_language_find(user, lang); + if (user_lang == NULL) { + i_error("Language not enabled for user: %s", ctx->language); + _ctx->exit_code = EX_USAGE; + return -1; + } + + fts_tokenizer_reset(user_lang->index_tokenizer); + for (;;) { + const char *token, *error; + + if (!final) { + ret = fts_tokenizer_next(user_lang->index_tokenizer, + (const unsigned char *)ctx->tokens, strlen(ctx->tokens), + &token, &error); + } else { + ret = fts_tokenizer_final(user_lang->index_tokenizer, + &token, &error); + } + if (ret < 0) + break; + if (ret > 0 && user_lang->filter != NULL) { + ret2 = fts_filter_filter(user_lang->filter, &token, &error); + if (ret2 > 0) + doveadm_print(token); + else if (ret2 < 0) + i_error("Couldn't create indexable tokens: %s", error); + } + if (ret == 0) { + if (final) + break; + final = TRUE; + } + } + return 0; +} + +static void +cmd_fts_tokenize_init(struct doveadm_mail_cmd_context *_ctx, + const char *const args[]) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + + if (args[0] == NULL) + doveadm_mail_help_name("fts tokenize"); + + ctx->tokens = p_strdup(_ctx->pool, t_strarray_join(args, " ")); + + doveadm_print_init(DOVEADM_PRINT_TYPE_FLOW); + doveadm_print_header("token", "token", DOVEADM_PRINT_HEADER_FLAG_HIDE_TITLE); +} + +static bool +cmd_fts_tokenize_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + + switch (c) { + case 'l': + ctx->language = p_strdup(_ctx->pool, optarg); + break; + default: + return FALSE; + } + return TRUE; +} + +static struct doveadm_mail_cmd_context * +cmd_fts_tokenize_alloc(void) +{ + struct fts_tokenize_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct fts_tokenize_cmd_context); + ctx->ctx.v.run = cmd_fts_tokenize_run; + ctx->ctx.v.init = cmd_fts_tokenize_init; + ctx->ctx.v.parse_arg = cmd_fts_tokenize_parse_arg; + ctx->ctx.getopt_args = "l"; + return &ctx->ctx; +} + +static int +fts_namespace_find(struct mail_user *user, const char *ns_prefix, + struct mail_namespace **ns_r) +{ + struct mail_namespace *ns; + + if (ns_prefix == NULL) + ns = mail_namespace_find_inbox(user->namespaces); + else { + ns = mail_namespace_find_prefix(user->namespaces, ns_prefix); + if (ns == NULL) { + i_error("Namespace prefix not found: %s", ns_prefix); + return -1; + } + } + + if (fts_list_backend(ns->list) == NULL) { + i_error("fts not enabled for user's namespace %s", + ns_prefix != NULL ? ns_prefix : "INBOX"); + return -1; + } + *ns_r = ns; + return 0; +} + +static int +cmd_fts_optimize_run(struct doveadm_mail_cmd_context *ctx, + struct mail_user *user) +{ + const char *ns_prefix = ctx->args[0]; + struct mail_namespace *ns; + struct fts_backend *backend; + + if (fts_namespace_find(user, ns_prefix, &ns) < 0) { + doveadm_mail_failed_error(ctx, MAIL_ERROR_NOTFOUND); + return -1; + } + backend = fts_list_backend(ns->list); + if (fts_backend_optimize(backend) < 0) { + i_error("fts optimize failed"); + doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP); + return -1; + } + return 0; +} + +static void +cmd_fts_optimize_init(struct doveadm_mail_cmd_context *ctx ATTR_UNUSED, + const char *const args[]) +{ + if (str_array_length(args) > 1) + doveadm_mail_help_name("fts optimize"); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_optimize_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_optimize_run; + ctx->v.init = cmd_fts_optimize_init; + return ctx; +} + +static int +cmd_fts_rescan_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user) +{ + const char *ns_prefix = ctx->args[0]; + struct mail_namespace *ns; + struct fts_backend *backend; + + if (fts_namespace_find(user, ns_prefix, &ns) < 0) { + doveadm_mail_failed_error(ctx, MAIL_ERROR_NOTFOUND); + return -1; + } + backend = fts_list_backend(ns->list); + if (fts_backend_rescan(backend) < 0) { + i_error("fts rescan failed"); + doveadm_mail_failed_error(ctx, MAIL_ERROR_TEMP); + return -1; + } + return 0; +} + +static void +cmd_fts_rescan_init(struct doveadm_mail_cmd_context *ctx ATTR_UNUSED, + const char *const args[]) +{ + if (str_array_length(args) > 1) + doveadm_mail_help_name("fts rescan"); +} + +static struct doveadm_mail_cmd_context * +cmd_fts_rescan_alloc(void) +{ + struct doveadm_mail_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct doveadm_mail_cmd_context); + ctx->v.run = cmd_fts_rescan_run; + ctx->v.init = cmd_fts_rescan_init; + return ctx; +} + +static struct doveadm_cmd_ver2 fts_commands[] = { +{ + .name = "fts lookup", + .mail_cmd = cmd_fts_lookup_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<search query>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "query", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts expand", + .mail_cmd = cmd_fts_expand_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<search query>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "query", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts tokenize", + .mail_cmd = cmd_fts_tokenize_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<text>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('l', "language", CMD_PARAM_STR, 0) +DOVEADM_CMD_PARAM('\0', "text", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts optimize", + .mail_cmd = cmd_fts_optimize_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "namespace", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ + .name = "fts rescan", + .mail_cmd = cmd_fts_rescan_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('\0', "namespace", CMD_PARAM_STR, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +}; + +void doveadm_fts_plugin_init(struct module *module ATTR_UNUSED) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(fts_commands); i++) + doveadm_cmd_register_ver2(&fts_commands[i]); + doveadm_dump_fts_expunge_log_init(); +} + +void doveadm_fts_plugin_deinit(void) +{ +} diff --git a/src/plugins/fts/doveadm-fts.h b/src/plugins/fts/doveadm-fts.h new file mode 100644 index 0000000..d4307fe --- /dev/null +++ b/src/plugins/fts/doveadm-fts.h @@ -0,0 +1,11 @@ +#ifndef DOVEADM_FTS_H +#define DOVEADM_FTS_H + +struct module; + +void doveadm_dump_fts_expunge_log_init(void); + +void doveadm_fts_plugin_init(struct module *module); +void doveadm_fts_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts/fts-api-private.h b/src/plugins/fts/fts-api-private.h new file mode 100644 index 0000000..a070564 --- /dev/null +++ b/src/plugins/fts/fts-api-private.h @@ -0,0 +1,139 @@ +#ifndef FTS_API_PRIVATE_H +#define FTS_API_PRIVATE_H + +#include "unichar.h" +#include "fts-api.h" + +struct mail_user; +struct mailbox_list; + +#define MAILBOX_GUID_HEX_LENGTH (GUID_128_SIZE*2) + +struct fts_backend_vfuncs { + struct fts_backend *(*alloc)(void); + int (*init)(struct fts_backend *backend, const char **error_r); + void (*deinit)(struct fts_backend *backend); + + int (*get_last_uid)(struct fts_backend *backend, struct mailbox *box, + uint32_t *last_uid_r); + + struct fts_backend_update_context * + (*update_init)(struct fts_backend *backend); + int (*update_deinit)(struct fts_backend_update_context *ctx); + + void (*update_set_mailbox)(struct fts_backend_update_context *ctx, + struct mailbox *box); + void (*update_expunge)(struct fts_backend_update_context *ctx, + uint32_t uid); + + /* Start a build for specified key */ + bool (*update_set_build_key)(struct fts_backend_update_context *ctx, + const struct fts_backend_build_key *key); + /* Finish a build for specified key - guaranteed to be called */ + void (*update_unset_build_key)(struct fts_backend_update_context *ctx); + /* Add data for current build key */ + int (*update_build_more)(struct fts_backend_update_context *ctx, + const unsigned char *data, size_t size); + + int (*refresh)(struct fts_backend *backend); + int (*rescan)(struct fts_backend *backend); + int (*optimize)(struct fts_backend *backend); + + bool (*can_lookup)(struct fts_backend *backend, + const struct mail_search_arg *args); + int (*lookup)(struct fts_backend *backend, struct mailbox *box, + struct mail_search_arg *args, enum fts_lookup_flags flags, + struct fts_result *result); + int (*lookup_multi)(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result); + void (*lookup_done)(struct fts_backend *backend); +}; + +enum fts_backend_flags { + /* Backend supports indexing binary MIME parts */ + FTS_BACKEND_FLAG_BINARY_MIME_PARTS = 0x01, + /* Send built text to backend normalized rather than + preserving original case */ + FTS_BACKEND_FLAG_NORMALIZE_INPUT = 0x02, + /* Send only fully indexable words rather than randomly sized blocks */ + FTS_BACKEND_FLAG_BUILD_FULL_WORDS = 0x04, + /* Fuzzy search works */ + FTS_BACKEND_FLAG_FUZZY_SEARCH = 0x08, + /* Tokenize all the input. update_build_more() will be called a single + directly indexable token at a time. Searching will modify the search + args so that lookup() sees only tokens that can be directly + searched. */ + FTS_BACKEND_FLAG_TOKENIZED_INPUT = 0x10 +}; + +struct fts_header_filters { + pool_t pool; + ARRAY_TYPE(const_string) includes; + ARRAY_TYPE(const_string) excludes; + bool loaded:1; + bool exclude_is_default:1; +}; + +struct fts_backend { + const char *name; + enum fts_backend_flags flags; + + struct fts_backend_vfuncs v; + struct mail_namespace *ns; + struct fts_header_filters header_filters; + + bool updating:1; +}; + +struct fts_backend_update_context { + struct fts_backend *backend; + normalizer_func_t *normalizer; + + struct mailbox *cur_box, *backend_box; + + bool build_key_open:1; + bool failed:1; +}; + +struct fts_index_header { + uint32_t last_indexed_uid; + + /* Checksum of settings. If the settings change, the index should + be rebuilt. */ + uint32_t settings_checksum; + uint32_t unused; +}; + +void fts_backend_register(const struct fts_backend *backend); +void fts_backend_unregister(const char *name); + +bool fts_backend_default_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args); + +void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, + const ARRAY_TYPE(seq_range) *definite_filter, + ARRAY_TYPE(seq_range) *maybe_dest, + const ARRAY_TYPE(seq_range) *maybe_filter); + +/* Returns TRUE if ok, FALSE if no fts header */ +bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r); +int fts_index_set_header(struct mailbox *box, + const struct fts_index_header *hdr); +int ATTR_NOWARN_UNUSED_RESULT +fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid); +int fts_backend_reset_last_uids(struct fts_backend *backend); +int fts_index_have_compatible_settings(struct mailbox_list *list, + uint32_t checksum); + +/* Returns TRUE if FTS backend should index the header for optimizing + separate lookups */ +bool fts_header_want_indexed(const char *hdr_name); +/* Returns TRUE if header's values should be considered to have a language. */ +bool fts_header_has_language(const char *hdr_name); + +int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r); + +#endif diff --git a/src/plugins/fts/fts-api.c b/src/plugins/fts/fts-api.c new file mode 100644 index 0000000..a6ea716 --- /dev/null +++ b/src/plugins/fts/fts-api.c @@ -0,0 +1,554 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hex-binary.h" +#include "mail-index.h" +#include "mail-namespace.h" +#include "mail-storage-private.h" +#include "mailbox-list-iter.h" +#include "mail-search.h" +#include "fts-api-private.h" + +struct event_category event_category_fts = { + .name = "fts", +}; + +static ARRAY(const struct fts_backend *) backends; + +void fts_backend_register(const struct fts_backend *backend) +{ + if (!array_is_created(&backends)) + i_array_init(&backends, 4); + array_push_back(&backends, &backend); +} + +void fts_backend_unregister(const char *name) +{ + const struct fts_backend *const *be; + unsigned int i, count; + + be = array_get(&backends, &count); + for (i = 0; i < count; i++) { + if (strcmp(be[i]->name, name) == 0) { + array_delete(&backends, i, 1); + break; + } + } + if (i == count) + i_panic("fts_backend_unregister(%s): unknown backend", name); + + if (count == 1) + array_free(&backends); +} + +static const struct fts_backend * +fts_backend_class_lookup(const char *backend_name) +{ + const struct fts_backend *const *be; + unsigned int i, count; + + if (array_is_created(&backends)) { + be = array_get(&backends, &count); + for (i = 0; i < count; i++) { + if (strcmp(be[i]->name, backend_name) == 0) + return be[i]; + } + } + return NULL; +} + +static void +fts_header_filters_init(struct fts_backend *backend) +{ + struct fts_header_filters *filters = &backend->header_filters; + pool_t pool = filters->pool = pool_alloconly_create( + MEMPOOL_GROWING"fts_header_filters", 256); + + p_array_init(&filters->includes, pool, 8); + p_array_init(&filters->excludes, pool, 8); +} + +static void +fts_header_filters_deinit(struct fts_backend *backend) +{ + pool_unref(&backend->header_filters.pool); +} + +int fts_backend_init(const char *backend_name, struct mail_namespace *ns, + const char **error_r, struct fts_backend **backend_r) +{ + const struct fts_backend *be; + struct fts_backend *backend; + + be = fts_backend_class_lookup(backend_name); + if (be == NULL) { + *error_r = "Unknown backend"; + return -1; + } + + backend = be->v.alloc(); + backend->ns = ns; + if (backend->v.init(backend, error_r) < 0) { + i_free(backend); + return -1; + } + + fts_header_filters_init(backend); + *backend_r = backend; + return 0; +} + +void fts_backend_deinit(struct fts_backend **_backend) +{ + struct fts_backend *backend = *_backend; + + fts_header_filters_deinit(backend); + *_backend = NULL; + backend->v.deinit(backend); +} + +int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box, + uint32_t *last_uid_r) +{ + struct fts_index_header hdr; + + if (box->virtual_vfuncs != NULL) { + /* virtual mailboxes themselves don't have any indexes, + so catch this call here */ + if (!fts_index_get_header(box, &hdr)) + *last_uid_r = 0; + else + *last_uid_r = hdr.last_indexed_uid; + return 0; + } + + return backend->v.get_last_uid(backend, box, last_uid_r); +} + +bool fts_backend_is_updating(struct fts_backend *backend) +{ + return backend->updating; +} + +struct fts_backend_update_context * +fts_backend_update_init(struct fts_backend *backend) +{ + struct fts_backend_update_context *ctx; + + i_assert(!backend->updating); + + backend->updating = TRUE; + ctx = backend->v.update_init(backend); + if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0) + ctx->normalizer = backend->ns->user->default_normalizer; + return ctx; +} + +static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx) +{ + fts_backend_update_unset_build_key(ctx); + if (ctx->backend_box != ctx->cur_box) { + ctx->backend->v.update_set_mailbox(ctx, ctx->cur_box); + ctx->backend_box = ctx->cur_box; + } +} + +int fts_backend_update_deinit(struct fts_backend_update_context **_ctx) +{ + struct fts_backend_update_context *ctx = *_ctx; + struct fts_backend *backend = ctx->backend; + int ret; + + *_ctx = NULL; + + ctx->cur_box = NULL; + fts_backend_set_cur_mailbox(ctx); + + ret = backend->v.update_deinit(ctx); + backend->updating = FALSE; + return ret; +} + +void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx, + struct mailbox *box) +{ + if (ctx->backend_box != NULL && box != ctx->backend_box) { + /* make sure we don't reference the backend box anymore */ + ctx->backend->v.update_set_mailbox(ctx, NULL); + ctx->backend_box = NULL; + } + ctx->cur_box = box; +} + +void fts_backend_update_expunge(struct fts_backend_update_context *ctx, + uint32_t uid) +{ + fts_backend_set_cur_mailbox(ctx); + ctx->backend->v.update_expunge(ctx, uid); +} + +bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx, + const struct fts_backend_build_key *key) +{ + fts_backend_set_cur_mailbox(ctx); + + i_assert(ctx->cur_box != NULL); + + if (!ctx->backend->v.update_set_build_key(ctx, key)) + return FALSE; + ctx->build_key_open = TRUE; + return TRUE; +} + +void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx) +{ + if (ctx->build_key_open) { + ctx->backend->v.update_unset_build_key(ctx); + ctx->build_key_open = FALSE; + } +} + +int fts_backend_update_build_more(struct fts_backend_update_context *ctx, + const unsigned char *data, size_t size) +{ + i_assert(ctx->build_key_open); + + return ctx->backend->v.update_build_more(ctx, data, size); +} + +int fts_backend_refresh(struct fts_backend *backend) +{ + return backend->v.refresh(backend); +} + +int fts_backend_reset_last_uids(struct fts_backend *backend) +{ + struct mailbox_list_iterate_context *iter; + const struct mailbox_info *info; + struct mailbox *box; + int ret = 0; + + iter = mailbox_list_iter_init(backend->ns->list, "*", + MAILBOX_LIST_ITER_SKIP_ALIASES | + MAILBOX_LIST_ITER_NO_AUTO_BOXES); + while ((info = mailbox_list_iter_next(iter)) != NULL) { + if ((info->flags & + (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0) + continue; + + box = mailbox_alloc(info->ns->list, info->vname, 0); + if (mailbox_open(box) == 0) { + if (fts_index_set_last_uid(box, 0) < 0) + ret = -1; + } + mailbox_free(&box); + } + if (mailbox_list_iter_deinit(&iter) < 0) + ret = -1; + return ret; +} + +int fts_backend_rescan(struct fts_backend *backend) +{ + struct mailbox *box; + bool virtual_storage; + + box = mailbox_alloc(backend->ns->list, "", 0); + virtual_storage = box->virtual_vfuncs != NULL; + mailbox_free(&box); + + if (virtual_storage) { + /* just reset the last-uids for a virtual storage. */ + return fts_backend_reset_last_uids(backend); + } + + return backend->v.rescan == NULL ? 0 : + backend->v.rescan(backend); +} + +int fts_backend_optimize(struct fts_backend *backend) +{ + return backend->v.optimize == NULL ? 0 : + backend->v.optimize(backend); +} + +static void +fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe, + const ARRAY_TYPE(seq_range) *dest_definite, + const ARRAY_TYPE(seq_range) *src_maybe, + const ARRAY_TYPE(seq_range) *src_definite) +{ + ARRAY_TYPE(seq_range) src_unwanted; + const struct seq_range *range; + struct seq_range new_range; + unsigned int i, count; + uint32_t seq; + + /* add/leave to dest_maybe if at least one list has maybe, + and no lists have none */ + + /* create unwanted sequences list from both sources */ + t_array_init(&src_unwanted, 128); + new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1; + array_push_back(&src_unwanted, &new_range); + seq_range_array_remove_seq_range(&src_unwanted, src_maybe); + seq_range_array_remove_seq_range(&src_unwanted, src_definite); + + /* drop unwanted uids */ + seq_range_array_remove_seq_range(dest_maybe, &src_unwanted); + + /* add uids that are in dest_definite and src_maybe lists */ + range = array_get(dest_definite, &count); + for (i = 0; i < count; i++) { + for (seq = range[i].seq1; seq <= range[i].seq2; seq++) { + if (seq_range_exists(src_maybe, seq)) + seq_range_array_add(dest_maybe, seq); + } + } +} + +void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, + const ARRAY_TYPE(seq_range) *definite_filter, + ARRAY_TYPE(seq_range) *maybe_dest, + const ARRAY_TYPE(seq_range) *maybe_filter) +{ + T_BEGIN { + fts_merge_maybies(maybe_dest, definite_dest, + maybe_filter, definite_filter); + } T_END; + /* keep only what exists in both lists. the rest is in + maybies or not wanted */ + seq_range_array_intersect(definite_dest, definite_filter); +} + +bool fts_backend_default_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args) +{ + for (; args != NULL; args = args->next) { + switch (args->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_backend_default_can_lookup(backend, + args->value.subargs)) + return TRUE; + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + case SEARCH_BODY: + case SEARCH_TEXT: + if (!args->no_fts) + return TRUE; + break; + default: + break; + } + } + return FALSE; +} + +bool fts_backend_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args) +{ + return backend->v.can_lookup(backend, args); +} + +static int fts_score_map_sort(const struct fts_score_map *m1, + const struct fts_score_map *m2) +{ + if (m1->uid < m2->uid) + return -1; + if (m1->uid > m2->uid) + return 1; + return 0; +} + +int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + array_clear(&result->definite_uids); + array_clear(&result->maybe_uids); + array_clear(&result->scores); + + if (backend->v.lookup(backend, box, args, flags, result) < 0) + return -1; + + if (!result->scores_sorted && array_is_created(&result->scores)) { + array_sort(&result->scores, fts_score_map_sort); + result->scores_sorted = TRUE; + } + return 0; +} + +int fts_backend_lookup_multi(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + unsigned int i; + + i_assert(boxes[0] != NULL); + + if (backend->v.lookup_multi != NULL) { + if (backend->v.lookup_multi(backend, boxes, args, + flags, result) < 0) + return -1; + if (result->box_results == NULL) { + result->box_results = p_new(result->pool, + struct fts_result, 1); + } + return 0; + } + + for (i = 0; boxes[i] != NULL; i++) ; + result->box_results = p_new(result->pool, struct fts_result, i+1); + + for (i = 0; boxes[i] != NULL; i++) { + struct fts_result *box_result = &result->box_results[i]; + + p_array_init(&box_result->definite_uids, result->pool, 32); + p_array_init(&box_result->maybe_uids, result->pool, 32); + p_array_init(&box_result->scores, result->pool, 32); + if (backend->v.lookup(backend, boxes[i], args, + flags, box_result) < 0) + return -1; + } + return 0; +} + +void fts_backend_lookup_done(struct fts_backend *backend) +{ + if (backend->v.lookup_done != NULL) + backend->v.lookup_done(backend); +} + +static uint32_t fts_index_get_ext_id(struct mailbox *box) +{ + return mail_index_ext_register(box->index, "fts", + sizeof(struct fts_index_header), + 0, 0); +} + +bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r) +{ + struct mail_index_view *view; + const void *data; + size_t data_size; + bool ret; + + mail_index_refresh(box->index); + view = mail_index_view_open(box->index); + mail_index_get_header_ext(view, fts_index_get_ext_id(box), + &data, &data_size); + if (data_size < sizeof(*hdr_r)) { + i_zero(hdr_r); + ret = FALSE; + } else { + memcpy(hdr_r, data, sizeof(*hdr_r)); + ret = TRUE; + } + mail_index_view_close(&view); + return ret; +} + +int fts_index_set_header(struct mailbox *box, + const struct fts_index_header *hdr) +{ + struct mail_index_transaction *trans; + uint32_t ext_id = fts_index_get_ext_id(box); + + trans = mail_index_transaction_begin(box->view, 0); + mail_index_update_header_ext(trans, ext_id, 0, hdr, sizeof(*hdr)); + return mail_index_transaction_commit(&trans); +} + +int fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid) +{ + struct fts_index_header hdr; + + (void)fts_index_get_header(box, &hdr); + hdr.last_indexed_uid = last_uid; + return fts_index_set_header(box, &hdr); +} + +int fts_index_have_compatible_settings(struct mailbox_list *list, + uint32_t checksum) +{ + struct mail_namespace *ns = mailbox_list_get_namespace(list); + struct mailbox *box; + struct fts_index_header hdr; + const char *vname; + size_t len; + int ret; + + if ((ns->flags & NAMESPACE_FLAG_INBOX_USER) != 0) + vname = "INBOX"; + else { + len = strlen(ns->prefix); + if (len > 0 && ns->prefix[len-1] == mail_namespace_get_sep(ns)) + len--; + vname = t_strndup(ns->prefix, len); + } + + box = mailbox_alloc(list, vname, 0); + if (mailbox_sync(box, (enum mailbox_sync_flags)0) < 0) { + i_error("fts: Failed to sync mailbox %s: %s", vname, + mailbox_get_last_internal_error(box, NULL)); + ret = -1; + } else { + ret = fts_index_get_header(box, &hdr) && + hdr.settings_checksum == checksum ? 1 : 0; + } + mailbox_free(&box); + return ret; +} + +static const char *indexed_headers[] = { + "From", "To", "Cc", "Bcc", "Subject" +}; + +bool fts_header_want_indexed(const char *hdr_name) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(indexed_headers); i++) { + if (strcasecmp(hdr_name, indexed_headers[i]) == 0) + return TRUE; + } + return FALSE; +} + +bool fts_header_has_language(const char *hdr_name) +{ + /* FIXME: should email address headers be detected as different + languages? That mainly contains people's names.. */ + /*if (message_header_is_address(hdr_name)) + return TRUE;*/ + + /* Subject definitely contains language-specific data that can be + detected. Comment and Keywords headers also could contain, although + just about nobody uses those headers. + + For now we assume that other headers contain non-language specific + data that we don't want to filter in special ways. For example + it is good to be able to search for Message-IDs. */ + return strcasecmp(hdr_name, "Subject") == 0 || + strcasecmp(hdr_name, "Comments") == 0 || + strcasecmp(hdr_name, "Keywords") == 0; +} + +int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r) +{ + struct mailbox_metadata metadata; + + if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, &metadata) < 0) + return -1; + + *guid_r = guid_128_to_string(metadata.guid); + return 0; +} diff --git a/src/plugins/fts/fts-api.h b/src/plugins/fts/fts-api.h new file mode 100644 index 0000000..11a331f --- /dev/null +++ b/src/plugins/fts/fts-api.h @@ -0,0 +1,173 @@ +#ifndef FTS_API_H +#define FTS_API_H + +struct mail; +struct mailbox; +struct mail_namespace; +struct mail_search_arg; + +struct fts_backend; + +#include "seq-range-array.h" + +enum fts_lookup_flags { + /* Specifies if the args should be ANDed or ORed together. */ + FTS_LOOKUP_FLAG_AND_ARGS = 0x01, + /* Require exact matching for non-fuzzy search args by returning all + such matches as maybe_uids instead of definite_uids */ + FTS_LOOKUP_FLAG_NO_AUTO_FUZZY = 0x02 +}; + +enum fts_backend_build_key_type { + /* Header */ + FTS_BACKEND_BUILD_KEY_HDR, + /* MIME part header */ + FTS_BACKEND_BUILD_KEY_MIME_HDR, + /* MIME body part */ + FTS_BACKEND_BUILD_KEY_BODY_PART, + /* Binary MIME body part, if backend supports binary data */ + FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY +}; + +struct fts_backend_build_key { + uint32_t uid; + enum fts_backend_build_key_type type; + struct message_part *part; + + /* for _KEY_HDR: */ + const char *hdr_name; + + /* for _KEY_BODY_PART and _KEY_BODY_PART_BINARY: */ + + /* Contains a valid parsed "type/subtype" string. For messages without + (valid) Content-Type: header, it's set to "text/plain". */ + const char *body_content_type; + /* Content-Disposition: header without parsing/validation if it exists, + otherwise NULL. */ + const char *body_content_disposition; +}; + +struct fts_score_map { + uint32_t uid; + float score; +}; +ARRAY_DEFINE_TYPE(fts_score_map, struct fts_score_map); + +/* the structure is meant to be implemented by plugins that want to carry + some state over from a call to next ones within an fts_search_context + session. + + The pointer to this structure is initially granted to be NULL and it + remains such unless the plugin itself activates it. + + Any memory management for the pointer and its contents is expected to + be performed by the plugin itself, possibly but not necessarily using + the result pool propagated to plugin call by struct fts_result.pool and + struct fts_multi_result.pool. */ + +struct fts_search_state; + +struct fts_result { + pool_t pool; + struct fts_search_state *search_state; + + struct mailbox *box; + + ARRAY_TYPE(seq_range) definite_uids; + /* The maybe_uids is useful with backends that can only filter out + messages, but can't definitively say if the search matched a + message. */ + ARRAY_TYPE(seq_range) maybe_uids; + ARRAY_TYPE(fts_score_map) scores; + bool scores_sorted; +}; + +struct fts_multi_result { + pool_t pool; + struct fts_search_state *search_state; + + /* box=NULL-terminated array of mailboxes and matching UIDs, + all allocated from the given pool. */ + struct fts_result *box_results; +}; + +extern struct event_category event_category_fts; + +int fts_backend_init(const char *backend_name, struct mail_namespace *ns, + const char **error_r, struct fts_backend **backend_r); +void fts_backend_deinit(struct fts_backend **backend); + +/* Get the last_uid for the mailbox. */ +int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box, + uint32_t *last_uid_r); + +/* Returns TRUE if there exists an update context. */ +bool fts_backend_is_updating(struct fts_backend *backend); + +/* Start an index update. */ +struct fts_backend_update_context * +fts_backend_update_init(struct fts_backend *backend); +/* Finish an index update. Returns 0 if ok, -1 if some updates failed. + If updates failed, the index is in unspecified state. */ +int fts_backend_update_deinit(struct fts_backend_update_context **ctx); + +/* Switch to updating the specified mailbox. box may also be set to NULL to + make sure the previous mailbox won't tried to be accessed anymore. */ +void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx, + struct mailbox *box); +/* Expunge the specified mail. */ +void fts_backend_update_expunge(struct fts_backend_update_context *ctx, + uint32_t uid); + +/* Switch to building index for specified key. If backend doesn't want to + index this key, it can return FALSE and caller will skip to next key. */ +bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx, + const struct fts_backend_build_key *key); +/* Make sure that if _build_more() is called, we'll assert-crash. */ +void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx); +/* Add more content to the index for the currently specified build key. + Non-BODY_PART_BINARY data must contain only full valid UTF-8 characters, + but it doesn't need to be NUL-terminated. size contains the data size in + bytes, not characters. This function may be called many times and the data + block sizes may be small. Backend returns 0 if ok, -1 if build should be + aborted. */ +int fts_backend_update_build_more(struct fts_backend_update_context *ctx, + const unsigned char *data, size_t size); + +/* Refresh index to make sure we see latest changes from lookups. + Returns 0 if ok, -1 if error. */ +int fts_backend_refresh(struct fts_backend *backend); +/* Go through the entire index and make sure all mails are indexed, + and delete any extra mails in the index. */ +int fts_backend_rescan(struct fts_backend *backend); +/* Optimize the index. This can be a somewhat heavy operation. */ +int fts_backend_optimize(struct fts_backend *backend); + +/* Returns TRUE if fts_backend_lookup() should even be tried for the + given args. */ +bool fts_backend_can_lookup(struct fts_backend *backend, + const struct mail_search_arg *args); +/* Do a FTS lookup for the given search args. Backends can support different + kinds of search arguments, so match_always=TRUE must be set to all search + args that were actually used to produce the search results. The other args + are handled by the regular search code. The backends MUST ignore all args + that have subargs (SEARCH_OR, SEARCH_SUB), since they are looked up + separately. + + The arrays in result must be initialized by caller. */ +int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result); + +/* Search from multiple mailboxes. result->pool must be initialized. */ +int fts_backend_lookup_multi(struct fts_backend *backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result); +/* Called after the lookups are done. The next lookup will be preceded by a + refresh. */ +void fts_backend_lookup_done(struct fts_backend *backend); + +#endif diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c new file mode 100644 index 0000000..73d4f4b --- /dev/null +++ b/src/plugins/fts/fts-build-mail.c @@ -0,0 +1,719 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "buffer.h" +#include "str.h" +#include "rfc822-parser.h" +#include "message-address.h" +#include "message-parser.h" +#include "message-decoder.h" +#include "mail-storage.h" +#include "index-mail.h" +#include "fts-parser.h" +#include "fts-user.h" +#include "fts-language.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-api-private.h" +#include "fts-build-mail.h" + +/* there are other characters as well, but this doesn't have to be exact */ +#define IS_WORD_WHITESPACE(c) \ + ((c) == ' ' || (c) == '\t' || (c) == '\n') +/* if we see a word larger than this, just go ahead and split it from + wherever */ +#define MAX_WORD_SIZE 1024 + +struct fts_mail_build_context { + struct mail *mail; + struct fts_backend_update_context *update_ctx; + + char *content_type, *content_disposition; + struct fts_parser *body_parser; + + buffer_t *word_buf, *pending_input; + struct fts_user_language *cur_user_lang; +}; + +static int fts_build_data(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last); + +static void fts_build_parse_content_type(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *content_type; + + if (ctx->content_type != NULL) + return; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + T_BEGIN { + content_type = t_str_new(64); + (void)rfc822_parse_content_type(&parser, content_type); + ctx->content_type = str_lcase(i_strdup(str_c(content_type))); + } T_END; + rfc822_parser_deinit(&parser); +} + +static void +fts_build_parse_content_disposition(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + /* just pass it as-is to backend. */ + i_free(ctx->content_disposition); + ctx->content_disposition = + i_strndup(hdr->full_value, hdr->full_value_len); +} + +static void fts_parse_mail_header(struct fts_mail_build_context *ctx, + const struct message_block *raw_block) +{ + const struct message_header_line *hdr = raw_block->hdr; + + if (strcasecmp(hdr->name, "Content-Type") == 0) + fts_build_parse_content_type(ctx, hdr); + else if (strcasecmp(hdr->name, "Content-Disposition") == 0) + fts_build_parse_content_disposition(ctx, hdr); +} + +static int +fts_build_unstructured_header(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + const unsigned char *data = hdr->full_value; + unsigned char *buf = NULL; + unsigned int i; + int ret; + + /* @UNSAFE: if there are any NULs, replace them with spaces */ + for (i = 0; i < hdr->full_value_len; i++) { + if (hdr->full_value[i] == '\0') { + if (buf == NULL) { + buf = i_memdup(hdr->full_value, + hdr->full_value_len); + data = buf; + } + buf[i] = ' '; + } + } + ret = fts_build_data(ctx, data, hdr->full_value_len, TRUE); + i_free(buf); + return ret; +} + +static void fts_mail_build_ctx_set_lang(struct fts_mail_build_context *ctx, + struct fts_user_language *user_lang) +{ + i_assert(user_lang != NULL); + + ctx->cur_user_lang = user_lang; + /* reset tokenizer between fields - just to be sure no state + leaks between fields (especially if previous indexing had + failed) */ + fts_tokenizer_reset(user_lang->index_tokenizer); +} + +static void +fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx, + const struct message_header_line *hdr) +{ + /* Headers that don't contain any human language will only be + translated to lowercase - no stemming or other filtering. There's + unfortunately no pefect way of detecting which headers contain + human languages, so we check with fts_header_has_language if the + header is something that's supposed to containing human text. */ + if (fts_header_has_language(hdr->name)) + ctx->cur_user_lang = NULL; + else { + fts_mail_build_ctx_set_lang(ctx, + fts_user_get_data_lang(ctx->update_ctx->backend->ns->user)); + } +} + +static int fts_build_mail_header(struct fts_mail_build_context *ctx, + const struct message_block *block) +{ + const struct message_header_line *hdr = block->hdr; + struct fts_backend_build_key key; + int ret; + + if (hdr->eoh) + return 0; + + /* hdr->full_value is always set because we get the block from + message_decoder */ + i_zero(&key); + key.uid = ctx->mail->uid; + key.type = block->part->physical_pos == 0 ? + FTS_BACKEND_BUILD_KEY_HDR : FTS_BACKEND_BUILD_KEY_MIME_HDR; + key.part = block->part; + key.hdr_name = hdr->name; + + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) + fts_build_tokenized_hdr_update_lang(ctx, hdr); + + if (!fts_backend_update_set_build_key(ctx->update_ctx, &key)) + return 0; + + if (!message_header_is_address(hdr->name)) { + /* regular unstructured header */ + ret = fts_build_unstructured_header(ctx, hdr); + } else T_BEGIN { + /* message address. normalize it to give better + search results. */ + struct message_address *addr; + string_t *str; + + addr = message_address_parse(pool_datastack_create(), + hdr->full_value, + hdr->full_value_len, + UINT_MAX, 0); + str = t_str_new(hdr->full_value_len); + message_address_write(str, addr); + + ret = fts_build_data(ctx, str_data(str), str_len(str), TRUE); + } T_END; + + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { + /* index the header name itself using data-language. */ + struct fts_user_language *prev_lang = ctx->cur_user_lang; + + fts_mail_build_ctx_set_lang(ctx, + fts_user_get_data_lang(ctx->update_ctx->backend->ns->user)); + key.hdr_name = ""; + if (fts_backend_update_set_build_key(ctx->update_ctx, &key)) { + if (fts_build_data(ctx, (const void *)hdr->name, + strlen(hdr->name), TRUE) < 0) + ret = -1; + } + fts_mail_build_ctx_set_lang(ctx, prev_lang); + } + return ret; +} + +static bool +fts_build_body_begin(struct fts_mail_build_context *ctx, + struct message_part *part, bool *binary_body_r) +{ + struct mail_storage *storage; + struct fts_parser_context parser_context; + struct fts_backend_build_key key; + + i_assert(ctx->body_parser == NULL); + + *binary_body_r = FALSE; + i_zero(&key); + key.uid = ctx->mail->uid; + key.part = part; + + i_zero(&parser_context); + parser_context.content_type = ctx->content_type != NULL ? + ctx->content_type : "text/plain"; + if (str_begins(parser_context.content_type, "multipart/")) { + /* multiparts are never indexed, only their contents */ + return FALSE; + } + storage = mailbox_get_storage(ctx->mail->box); + parser_context.user = mail_storage_get_user(storage); + parser_context.content_disposition = ctx->content_disposition; + + if (fts_parser_init(&parser_context, &ctx->body_parser)) { + /* extract text using the the returned parser */ + *binary_body_r = TRUE; + key.type = FTS_BACKEND_BUILD_KEY_BODY_PART; + } else if (str_begins(parser_context.content_type, "text/") || + str_begins(parser_context.content_type, "message/")) { + /* text body parts */ + key.type = FTS_BACKEND_BUILD_KEY_BODY_PART; + ctx->body_parser = fts_parser_text_init(); + } else { + /* possibly binary */ + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_BINARY_MIME_PARTS) == 0) + return FALSE; + *binary_body_r = TRUE; + key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY; + } + key.body_content_type = parser_context.content_type; + key.body_content_disposition = ctx->content_disposition; + ctx->cur_user_lang = NULL; + if (!fts_backend_update_set_build_key(ctx->update_ctx, &key)) { + if (ctx->body_parser != NULL) + (void)fts_parser_deinit(&ctx->body_parser, NULL); + return FALSE; + } + return TRUE; +} + +static int +fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size) +{ + struct fts_tokenizer *tokenizer = ctx->cur_user_lang->index_tokenizer; + struct fts_filter *filter = ctx->cur_user_lang->filter; + const char *token, *error; + int ret = 1, ret2; + + while (ret > 0) T_BEGIN { + ret = ret2 = fts_tokenizer_next(tokenizer, data, size, &token, &error); + if (ret2 > 0 && filter != NULL) + ret2 = fts_filter_filter(filter, &token, &error); + if (ret2 < 0) { + mail_set_critical(ctx->mail, + "fts: Couldn't create indexable tokens: %s", + error); + } + if (ret2 > 0) { + if (fts_backend_update_build_more(ctx->update_ctx, + (const void *)token, + strlen(token)) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + ret = -1; + } + } + } T_END; + return ret; +} + +static int +fts_detect_language(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last, + const struct fts_language **lang_r) +{ + struct mail_user *user = ctx->update_ctx->backend->ns->user; + struct fts_language_list *lang_list = fts_user_get_language_list(user); + const struct fts_language *lang; + const char *error; + + switch (fts_language_detect(lang_list, data, size, &lang, &error)) { + case FTS_LANGUAGE_RESULT_SHORT: + /* save the input so far and try again later */ + buffer_append(ctx->pending_input, data, size); + if (last) { + /* we've run out of data. use the default language. */ + *lang_r = fts_language_list_get_first(lang_list); + return 1; + } + return 0; + case FTS_LANGUAGE_RESULT_UNKNOWN: + /* use the default language */ + *lang_r = fts_language_list_get_first(lang_list); + return 1; + case FTS_LANGUAGE_RESULT_OK: + *lang_r = lang; + return 1; + case FTS_LANGUAGE_RESULT_ERROR: + /* internal language detection library failure + (e.g. invalid config). don't index anything. */ + mail_set_critical(ctx->mail, + "Language detection library initialization failed: %s", + error); + return -1; + default: + i_unreached(); + } +} + +static int +fts_build_tokenized(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last) +{ + struct mail_user *user = ctx->update_ctx->backend->ns->user; + const struct fts_language *lang; + int ret; + + if (ctx->cur_user_lang != NULL) { + /* we already have a language */ + } else if ((ret = fts_detect_language(ctx, data, size, last, &lang)) < 0) { + return -1; + } else if (ret == 0) { + /* wait for more data */ + return 0; + } else { + fts_mail_build_ctx_set_lang(ctx, fts_user_language_find(user, lang)); + + if (ctx->pending_input->used > 0) { + if (fts_build_add_tokens_with_filter(ctx, + ctx->pending_input->data, + ctx->pending_input->used) < 0) + return -1; + buffer_set_used_size(ctx->pending_input, 0); + } + } + if (fts_build_add_tokens_with_filter(ctx, data, size) < 0) + return -1; + if (last) { + if (fts_build_add_tokens_with_filter(ctx, NULL, 0) < 0) + return -1; + } + return 0; +} + +static int +fts_build_full_words(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last) +{ + size_t i; + + /* we'll need to send only full words to the backend */ + + if (ctx->word_buf != NULL && ctx->word_buf->used > 0) { + /* continuing previous word */ + for (i = 0; i < size; i++) { + if (IS_WORD_WHITESPACE(data[i])) + break; + } + buffer_append(ctx->word_buf, data, i); + data += i; + size -= i; + if (size == 0 && ctx->word_buf->used < MAX_WORD_SIZE && !last) { + /* word is still not finished */ + return 0; + } + /* we have a full word, index it */ + if (fts_backend_update_build_more(ctx->update_ctx, + ctx->word_buf->data, + ctx->word_buf->used) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + buffer_set_used_size(ctx->word_buf, 0); + } + + /* find the boundary for last word */ + if (last) + i = size; + else { + for (i = size; i > 0; i--) { + if (IS_WORD_WHITESPACE(data[i-1])) + break; + } + } + + if (fts_backend_update_build_more(ctx->update_ctx, data, i) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + + if (i < size) { + if (ctx->word_buf == NULL) { + ctx->word_buf = + buffer_create_dynamic(default_pool, 128); + } + buffer_append(ctx->word_buf, data + i, size - i); + } + return 0; +} + +static int fts_build_data(struct fts_mail_build_context *ctx, + const unsigned char *data, size_t size, bool last) +{ + if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { + return fts_build_tokenized(ctx, data, size, last); + } else if ((ctx->update_ctx->backend->flags & + FTS_BACKEND_FLAG_BUILD_FULL_WORDS) != 0) { + return fts_build_full_words(ctx, data, size, last); + } else { + if (fts_backend_update_build_more(ctx->update_ctx, data, size) < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + return 0; + } +} + +static int fts_build_body_block(struct fts_mail_build_context *ctx, + const struct message_block *block, bool last) +{ + i_assert(block->hdr == NULL); + + return fts_build_data(ctx, block->data, block->size, last); +} + +static int fts_body_parser_finish(struct fts_mail_build_context *ctx, + const char **retriable_err_msg_r, + bool *may_need_retry_r) +{ + struct message_block block; + const char *retriable_error; + int ret = 0; + int deinit_ret; + *may_need_retry_r = FALSE; + + do { + i_zero(&block); + fts_parser_more(ctx->body_parser, &block); + if (fts_build_body_block(ctx, &block, FALSE) < 0) { + ret = -1; + break; + } + } while (block.size > 0); + + deinit_ret = fts_parser_deinit(&ctx->body_parser, &retriable_error); + if (ret < 0) { + /* indexing already failed - we don't want to retry + in any case */ + return -1; + } + + if (deinit_ret == 0) { + /* retry the parsing */ + *may_need_retry_r = TRUE; + *retriable_err_msg_r = retriable_error; + return -1; + } + if (deinit_ret < 0) { + mail_storage_set_internal_error(ctx->mail->box->storage); + return -1; + } + return 0; +} + +static void +load_header_filter(const char *key, struct fts_backend *backend, + ARRAY_TYPE(const_string) list, bool *matches_all_r) +{ + const char *str = mail_user_plugin_getenv(backend->ns->user, key); + + *matches_all_r = FALSE; + if (str == NULL || *str == '\0') + return; + + char **entries = p_strsplit_spaces(backend->header_filters.pool, str, " "); + for (char **entry = entries; *entry != NULL; ++entry) { + const char *value = str_lcase(*entry); + array_push_back(&list, &value); + if (*value == '*') { + *matches_all_r = TRUE; + break; + } + } + array_sort(&list, i_strcmp_p); +} + +static struct fts_header_filters * +load_header_filters(struct fts_backend *backend) +{ + struct fts_header_filters *filters = &backend->header_filters; + if (!filters->loaded) { + bool match_all; + + /* match_all return ignored in includes */ + load_header_filter("fts_header_includes", backend, + filters->includes, &match_all); + + load_header_filter("fts_header_excludes", backend, + filters->excludes, &match_all); + filters->loaded = TRUE; + filters->exclude_is_default = match_all; + } + return filters; +} + +/* This performs comparison between two strings, where the second one can end + * with the wildcard '*'. When the match reaches a '*' on the pitem side, zero + * (match) is returned regardles of the remaining characters. + * + * The function obeys the same lexicographic order as i_strcmp_p() and + * strcmp(), which is the reason for the casts to unsigned before comparing. + */ +static int ATTR_PURE +header_prefix_cmp(const char *const *pkey, const char *const *pitem) +{ + const char *key = *pkey; + const char *item = *pitem; + + while (*key == *item && *key != '\0') key++, item++; + return item[0] == '*' && item[1] == '\0' ? 0 : + (unsigned char)*key - (unsigned char)*item; +} + +static bool +is_header_indexable(const char *header_name, struct fts_backend *backend) +{ + bool indexable; + T_BEGIN { + struct fts_header_filters *filters = load_header_filters(backend); + const char *hdr = t_str_lcase(header_name); + + if (array_bsearch(&filters->includes, &hdr, header_prefix_cmp) != NULL) + indexable = TRUE; + else if (filters->exclude_is_default || + array_bsearch(&filters->excludes, &hdr, header_prefix_cmp) != NULL) + indexable = FALSE; + else + indexable = TRUE; + } T_END; + return indexable; +} + +static int +fts_build_mail_real(struct fts_backend_update_context *update_ctx, + struct mail *mail, + const char **retriable_err_msg_r, + bool *may_need_retry_r) +{ + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, + }; + struct fts_mail_build_context ctx; + struct istream *input; + struct message_parser_ctx *parser; + struct message_decoder_context *decoder; + struct message_block raw_block, block; + struct message_part *prev_part, *parts; + bool skip_body = FALSE, body_part = FALSE, body_added = FALSE; + bool binary_body; + const char *error; + int ret; + + *may_need_retry_r = FALSE; + if (mail_get_stream_because(mail, NULL, NULL, "fts indexing", &input) < 0) { + if (mail->expunged) + return 0; + mail_set_critical(mail, "Failed to read stream: %s", + mailbox_get_last_internal_error(mail->box, NULL)); + return -1; + } + + i_zero(&ctx); + ctx.update_ctx = update_ctx; + ctx.mail = mail; + if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) + ctx.pending_input = buffer_create_dynamic(default_pool, 128); + + prev_part = NULL; + parser = message_parser_init(pool_datastack_create(), input, &parser_set); + + decoder = message_decoder_init(update_ctx->normalizer, 0); + for (;;) { + ret = message_parser_parse_next_block(parser, &raw_block); + i_assert(ret != 0); + if (ret < 0) { + if (input->stream_errno == 0) + ret = 0; + else { + mail_set_critical(mail, "read(%s) failed: %s", + i_stream_get_name(input), + i_stream_get_error(input)); + } + break; + } + + if (raw_block.part != prev_part) { + /* body part changed. we're now parsing the end of + boundary, possibly followed by message epilogue */ + if (ctx.body_parser != NULL) { + if (fts_body_parser_finish(&ctx, retriable_err_msg_r, + may_need_retry_r) < 0) { + ret = -1; + break; + } + } + message_decoder_set_return_binary(decoder, FALSE); + fts_backend_update_unset_build_key(update_ctx); + prev_part = raw_block.part; + i_free_and_null(ctx.content_type); + i_free_and_null(ctx.content_disposition); + + if (raw_block.size != 0) { + /* multipart. skip until beginning of next + part's headers */ + skip_body = TRUE; + } + } + + if (raw_block.hdr != NULL) { + /* always handle headers */ + } else if (raw_block.size == 0) { + /* end of headers */ + skip_body = !fts_build_body_begin(&ctx, raw_block.part, + &binary_body); + if (binary_body) + message_decoder_set_return_binary(decoder, TRUE); + body_part = TRUE; + } else { + if (skip_body) + continue; + } + + if (!message_decoder_decode_next_block(decoder, &raw_block, + &block)) + continue; + + if (block.hdr != NULL) { + fts_parse_mail_header(&ctx, &raw_block); + if (is_header_indexable(block.hdr->name, update_ctx->backend) && + fts_build_mail_header(&ctx, &block) < 0) { + ret = -1; + break; + } + } else if (block.size == 0) { + /* end of headers */ + } else { + i_assert(body_part); + if (ctx.body_parser != NULL) + fts_parser_more(ctx.body_parser, &block); + if (fts_build_body_block(&ctx, &block, FALSE) < 0) { + ret = -1; + break; + } + body_added = TRUE; + } + } + if (ctx.body_parser != NULL) { + if (ret == 0) + ret = fts_body_parser_finish(&ctx, retriable_err_msg_r, + may_need_retry_r); + else + (void)fts_parser_deinit(&ctx.body_parser, NULL); + } + if (ret == 0 && body_part && !skip_body && !body_added) { + /* make sure body is added even when it doesn't exist */ + block.data = NULL; block.size = 0; + ret = fts_build_body_block(&ctx, &block, TRUE); + } + if (message_parser_deinit_from_parts(&parser, &parts, &error) < 0) + index_mail_set_message_parts_corrupted(mail, error); + message_decoder_deinit(&decoder); + i_free(ctx.content_type); + i_free(ctx.content_disposition); + buffer_free(&ctx.word_buf); + buffer_free(&ctx.pending_input); + return ret < 0 ? -1 : 1; +} + +int fts_build_mail(struct fts_backend_update_context *update_ctx, + struct mail *mail) +{ + int ret; + /* Number of attempts to be taken if retry is needed */ + unsigned int attempts = 2; + const char *retriable_err_msg; + bool may_need_retry; + + T_BEGIN { + while ((ret = fts_build_mail_real(update_ctx, mail, + &retriable_err_msg, + &may_need_retry)) < 0 && + may_need_retry) { + if (--attempts == 0) { + /* Log this as info instead of as error, + because e.g. Tika doesn't differentiate + between temporary errors and invalid + document input. */ + i_info("%s - ignoring", retriable_err_msg); + ret = 0; + break; + } + } + } T_END; + return ret; +} diff --git a/src/plugins/fts/fts-build-mail.h b/src/plugins/fts/fts-build-mail.h new file mode 100644 index 0000000..aed4413 --- /dev/null +++ b/src/plugins/fts/fts-build-mail.h @@ -0,0 +1,9 @@ +#ifndef FTS_BUILD_MAIL_H +#define FTS_BUILD_MAIL_H + +/* Build indexes for the given mail. Returns 0 on success, -1 on error. + The error is set to mail's storage. */ +int fts_build_mail(struct fts_backend_update_context *update_ctx, + struct mail *mail); + +#endif diff --git a/src/plugins/fts/fts-expunge-log.c b/src/plugins/fts/fts-expunge-log.c new file mode 100644 index 0000000..d39ceea --- /dev/null +++ b/src/plugins/fts/fts-expunge-log.c @@ -0,0 +1,617 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "crc32.h" +#include "hash.h" +#include "istream.h" +#include "write-full.h" +#include "seq-range-array.h" +#include "mail-storage.h" +#include "fts-expunge-log.h" + +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +struct fts_expunge_log_record { + /* CRC32 of this entire record (except this checksum) */ + uint32_t checksum; + /* Size of this entire record */ + uint32_t record_size; + + /* Mailbox GUID */ + guid_128_t guid; + /* { uid1, uid2 } pairs */ + /* uint32_t expunge_uid_ranges[]; */ + + /* Total number of messages expunged so far in this log */ + /* uint32_t expunge_count; */ +}; + +struct fts_expunge_log { + char *path; + + int fd; + struct stat st; +}; + +struct fts_expunge_log_mailbox { + guid_128_t guid; + ARRAY_TYPE(seq_range) uids; + unsigned uids_count; +}; + +struct fts_expunge_log_append_ctx { + struct fts_expunge_log *log; + pool_t pool; + + HASH_TABLE(uint8_t *, struct fts_expunge_log_mailbox *) mailboxes; + struct fts_expunge_log_mailbox *prev_mailbox; + + bool failed; +}; + +struct fts_expunge_log_read_ctx { + struct fts_expunge_log *log; + + struct istream *input; + buffer_t buffer; + struct fts_expunge_log_read_record read_rec; + + bool failed; + bool corrupted; + bool unlink; +}; + +struct fts_expunge_log *fts_expunge_log_init(const char *path) +{ + struct fts_expunge_log *log; + + log = i_new(struct fts_expunge_log, 1); + log->path = i_strdup(path); + log->fd = -1; + return log; +} + +void fts_expunge_log_deinit(struct fts_expunge_log **_log) +{ + struct fts_expunge_log *log = *_log; + + *_log = NULL; + i_close_fd(&log->fd); + i_free(log->path); + i_free(log); +} + +static int fts_expunge_log_open(struct fts_expunge_log *log, bool create) +{ + int fd; + + i_assert(log->fd == -1); + + /* FIXME: use proper permissions */ + fd = open(log->path, O_RDWR | O_APPEND | (create ? O_CREAT : 0), 0600); + if (fd == -1) { + if (errno == ENOENT && !create) + return 0; + + i_error("open(%s) failed: %m", log->path); + return -1; + } + if (fstat(fd, &log->st) < 0) { + i_error("fstat(%s) failed: %m", log->path); + i_close_fd(&fd); + return -1; + } + log->fd = fd; + return 1; +} + +static int +fts_expunge_log_reopen_if_needed(struct fts_expunge_log *log, bool create) +{ + struct stat st; + + if (log->fd == -1) + return fts_expunge_log_open(log, create); + + if (stat(log->path, &st) == 0) { + if (st.st_ino == log->st.st_ino && + CMP_DEV_T(st.st_dev, log->st.st_dev)) { + /* same file */ + return 0; + } + /* file changed */ + } else if (errno == ENOENT) { + /* recreate the file */ + } else { + i_error("stat(%s) failed: %m", log->path); + return -1; + } + if (close(log->fd) < 0) + i_error("close(%s) failed: %m", log->path); + log->fd = -1; + return fts_expunge_log_open(log, create); +} + +static int +fts_expunge_log_read_expunge_count(struct fts_expunge_log *log, + uint32_t *expunge_count_r) +{ + ssize_t ret; + + i_assert(log->fd != -1); + + if (fstat(log->fd, &log->st) < 0) { + i_error("fstat(%s) failed: %m", log->path); + return -1; + } + if ((uoff_t)log->st.st_size < sizeof(*expunge_count_r)) { + *expunge_count_r = 0; + return 0; + } + /* we'll assume that write()s atomically grow the file size, as + O_APPEND almost guarantees. even if not, having a race condition + isn't the end of the world. the expunge count is simply read wrong + and fts optimize is performed earlier or later than intended. */ + ret = pread(log->fd, expunge_count_r, sizeof(*expunge_count_r), + log->st.st_size - 4); + if (ret < 0) { + i_error("pread(%s) failed: %m", log->path); + return -1; + } + if (ret != sizeof(*expunge_count_r)) { + i_error("pread(%s) read only %d of %d bytes", log->path, + (int)ret, (int)sizeof(*expunge_count_r)); + return -1; + } + return 0; +} + +struct fts_expunge_log_append_ctx * +fts_expunge_log_append_begin(struct fts_expunge_log *log) +{ + struct fts_expunge_log_append_ctx *ctx; + pool_t pool; + + pool = pool_alloconly_create("fts expunge log append", 1024); + ctx = p_new(pool, struct fts_expunge_log_append_ctx, 1); + ctx->log = log; + ctx->pool = pool; + hash_table_create(&ctx->mailboxes, pool, 0, guid_128_hash, guid_128_cmp); + + if (log != NULL && fts_expunge_log_reopen_if_needed(log, TRUE) < 0) + ctx->failed = TRUE; + return ctx; +} + +static struct fts_expunge_log_mailbox * +fts_expunge_log_mailbox_alloc(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid) +{ + uint8_t *guid_p; + struct fts_expunge_log_mailbox *mailbox; + + mailbox = p_new(ctx->pool, struct fts_expunge_log_mailbox, 1); + guid_128_copy(mailbox->guid, mailbox_guid); + p_array_init(&mailbox->uids, ctx->pool, 16); + + guid_p = mailbox->guid; + hash_table_insert(ctx->mailboxes, guid_p, mailbox); + return mailbox; +} + +static struct fts_expunge_log_mailbox * +fts_expunge_log_append_mailbox(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid) +{ + const uint8_t *guid_p = mailbox_guid; + struct fts_expunge_log_mailbox *mailbox; + + if (ctx->prev_mailbox != NULL && + guid_128_equals(mailbox_guid, ctx->prev_mailbox->guid)) + mailbox = ctx->prev_mailbox; + else { + mailbox = hash_table_lookup(ctx->mailboxes, guid_p); + if (mailbox == NULL) + mailbox = fts_expunge_log_mailbox_alloc(ctx, mailbox_guid); + ctx->prev_mailbox = mailbox; + } + return mailbox; +} +void fts_expunge_log_append_next(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + uint32_t uid) +{ + struct fts_expunge_log_mailbox *mailbox; + + mailbox = fts_expunge_log_append_mailbox(ctx, mailbox_guid); + if (!seq_range_array_add(&mailbox->uids, uid)) + mailbox->uids_count++; +} +void fts_expunge_log_append_range(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + const struct seq_range *uids) +{ + struct fts_expunge_log_mailbox *mailbox; + + mailbox = fts_expunge_log_append_mailbox(ctx, mailbox_guid); + mailbox->uids_count += seq_range_array_add_range_count(&mailbox->uids, + uids->seq1, uids->seq2); + /* To be honest, an unbacked log doesn't need to maintain the uids_count, + but we don't know here if we're supporting an unbacked log or not, so we + have to maintain the value, just in case. + At the moment, the only caller of this function is for unbacked logs. */ +} +void fts_expunge_log_append_record(struct fts_expunge_log_append_ctx *ctx, + const struct fts_expunge_log_read_record *record) +{ + const struct seq_range *range; + /* FIXME: Optimise with a merge */ + array_foreach(&record->uids, range) + fts_expunge_log_append_range(ctx, record->mailbox_guid, range); +} +static void fts_expunge_log_append_mailbox_record(struct fts_expunge_log_append_ctx *ctx, + struct fts_expunge_log_mailbox *mailbox) +{ + const struct seq_range *range; + /* FIXME: Optimise with a merge */ + array_foreach(&mailbox->uids, range) + fts_expunge_log_append_range(ctx, mailbox->guid, range); +} + +static void +fts_expunge_log_export(struct fts_expunge_log_append_ctx *ctx, + uint32_t expunge_count, buffer_t *output) +{ + struct hash_iterate_context *iter; + uint8_t *guid_p; + struct fts_expunge_log_mailbox *mailbox; + struct fts_expunge_log_record *rec; + size_t rec_offset; + + iter = hash_table_iterate_init(ctx->mailboxes); + while (hash_table_iterate(iter, ctx->mailboxes, &guid_p, &mailbox)) { + rec_offset = output->used; + rec = buffer_append_space_unsafe(output, sizeof(*rec)); + memcpy(rec->guid, mailbox->guid, sizeof(rec->guid)); + + /* uint32_t expunge_uid_ranges[]; */ + buffer_append(output, array_front(&mailbox->uids), + array_count(&mailbox->uids) * + sizeof(struct seq_range)); + /* uint32_t expunge_count; */ + expunge_count += mailbox->uids_count; + buffer_append(output, &expunge_count, sizeof(expunge_count)); + + /* update the header now that we know the record contents */ + rec = buffer_get_space_unsafe(output, rec_offset, + output->used - rec_offset); + rec->record_size = output->used - rec_offset; + rec->checksum = crc32_data(&rec->record_size, + rec->record_size - + sizeof(rec->checksum)); + } + hash_table_iterate_deinit(&iter); +} + +static int +fts_expunge_log_write(struct fts_expunge_log_append_ctx *ctx) +{ + struct fts_expunge_log *log = ctx->log; + buffer_t *buf; + uint32_t expunge_count, *e; + int ret; + + /* Unbacked expunge logs cannot be written, by definition */ + i_assert(log != NULL); + + /* try to append to the latest file */ + if (fts_expunge_log_reopen_if_needed(log, TRUE) < 0) + return -1; + + if (fts_expunge_log_read_expunge_count(log, &expunge_count) < 0) + return -1; + + buf = buffer_create_dynamic(default_pool, 1024); + fts_expunge_log_export(ctx, expunge_count, buf); + /* the file was opened with O_APPEND, so this write() should be + appended atomically without any need for locking. */ + for (;;) { + if (write_full(log->fd, buf->data, buf->used) < 0) { + i_error("write(%s) failed: %m", log->path); + if (ftruncate(log->fd, log->st.st_size) < 0) + i_error("ftruncate(%s) failed: %m", log->path); + } + if ((ret = fts_expunge_log_reopen_if_needed(log, TRUE)) <= 0) + break; + /* the log was unlinked, so we'll need to write again to + the new file. the expunge_count needs to be reset to zero + from here. */ + e = buffer_get_space_unsafe(buf, buf->used - sizeof(uint32_t), + sizeof(uint32_t)); + i_assert(*e > expunge_count); + *e -= expunge_count; + expunge_count = 0; + } + buffer_free(&buf); + + if (ret == 0) { + /* finish by closing the log. this forces NFS to flush the + changes to disk without our having to explicitly play with + fsync() */ + if (close(log->fd) < 0) { + /* FIXME: we should ftruncate() in case there + were partial writes.. */ + i_error("close(%s) failed: %m", log->path); + ret = -1; + } + log->fd = -1; + } + return ret; +} + +static int fts_expunge_log_append_finalize(struct fts_expunge_log_append_ctx **_ctx, + bool commit) +{ + struct fts_expunge_log_append_ctx *ctx = *_ctx; + int ret = ctx->failed ? -1 : 0; + + *_ctx = NULL; + if (commit && ret == 0) + ret = fts_expunge_log_write(ctx); + + hash_table_destroy(&ctx->mailboxes); + pool_unref(&ctx->pool); + return ret; +} + +int fts_expunge_log_uid_count(struct fts_expunge_log *log, + unsigned int *expunges_r) +{ + int ret; + + if ((ret = fts_expunge_log_reopen_if_needed(log, FALSE)) <= 0) { + *expunges_r = 0; + return ret; + } + + return fts_expunge_log_read_expunge_count(log, expunges_r); +} + +int fts_expunge_log_append_commit(struct fts_expunge_log_append_ctx **_ctx) +{ + return fts_expunge_log_append_finalize(_ctx, TRUE); +} + +int fts_expunge_log_append_abort(struct fts_expunge_log_append_ctx **_ctx) +{ + return fts_expunge_log_append_finalize(_ctx, FALSE); +} + +struct fts_expunge_log_read_ctx * +fts_expunge_log_read_begin(struct fts_expunge_log *log) +{ + struct fts_expunge_log_read_ctx *ctx; + + ctx = i_new(struct fts_expunge_log_read_ctx, 1); + ctx->log = log; + if (fts_expunge_log_reopen_if_needed(log, FALSE) < 0) + ctx->failed = TRUE; + else if (log->fd != -1) + ctx->input = i_stream_create_fd(log->fd, SIZE_MAX); + ctx->unlink = TRUE; + return ctx; +} + +static bool +fts_expunge_log_record_size_is_valid(const struct fts_expunge_log_record *rec, + unsigned int *uids_size_r) +{ + if (rec->record_size < sizeof(*rec) + sizeof(uint32_t)*3) + return FALSE; + *uids_size_r = rec->record_size - sizeof(*rec) - sizeof(uint32_t); + return *uids_size_r % sizeof(uint32_t)*2 == 0; +} + +static void +fts_expunge_log_read_failure(struct fts_expunge_log_read_ctx *ctx, + unsigned int wanted_size) +{ + size_t size; + + if (ctx->input->stream_errno != 0) { + ctx->failed = TRUE; + i_error("read(%s) failed: %s", ctx->log->path, + i_stream_get_error(ctx->input)); + } else { + size = i_stream_get_data_size(ctx->input); + ctx->corrupted = TRUE; + i_error("Corrupted fts expunge log %s: " + "Unexpected EOF (read %zu / %u bytes)", + ctx->log->path, size, wanted_size); + } +} + +const struct fts_expunge_log_read_record * +fts_expunge_log_read_next(struct fts_expunge_log_read_ctx *ctx) +{ + const unsigned char *data; + const struct fts_expunge_log_record *rec; + unsigned int uids_size; + size_t size; + uint32_t checksum; + + if (ctx->input == NULL) + return NULL; + + /* initial read to try to get the record */ + (void)i_stream_read_bytes(ctx->input, &data, &size, IO_BLOCK_SIZE); + if (size == 0 && ctx->input->stream_errno == 0) { + /* expected EOF - mark the file as read by unlinking it */ + if (ctx->unlink) + i_unlink_if_exists(ctx->log->path); + + /* try reading again, in case something new was written */ + i_stream_sync(ctx->input); + (void)i_stream_read_bytes(ctx->input, &data, &size, + IO_BLOCK_SIZE); + } + if (size < sizeof(*rec)) { + if (size == 0 && ctx->input->stream_errno == 0) { + /* expected EOF */ + return NULL; + } + fts_expunge_log_read_failure(ctx, sizeof(*rec)); + return NULL; + } + rec = (const void *)data; + + if (!fts_expunge_log_record_size_is_valid(rec, &uids_size)) { + ctx->corrupted = TRUE; + i_error("Corrupted fts expunge log %s: " + "Invalid record size: %u", + ctx->log->path, rec->record_size); + return NULL; + } + + /* read the entire record */ + while (size < rec->record_size) { + if (i_stream_read_bytes(ctx->input, &data, &size, rec->record_size) < 0) { + fts_expunge_log_read_failure(ctx, rec->record_size); + return NULL; + } + rec = (const void *)data; + } + + /* verify that the record checksum is valid */ + checksum = crc32_data(&rec->record_size, + rec->record_size - sizeof(rec->checksum)); + if (checksum != rec->checksum) { + ctx->corrupted = TRUE; + i_error("Corrupted fts expunge log %s: " + "Record checksum mismatch: %u != %u", + ctx->log->path, checksum, rec->checksum); + return NULL; + } + + memcpy(ctx->read_rec.mailbox_guid, rec->guid, + sizeof(ctx->read_rec.mailbox_guid)); + /* create the UIDs array by pointing it directly into input + stream's buffer */ + buffer_create_from_const_data(&ctx->buffer, rec + 1, uids_size); + array_create_from_buffer(&ctx->read_rec.uids, &ctx->buffer, + sizeof(struct seq_range)); + + i_stream_skip(ctx->input, rec->record_size); + return &ctx->read_rec; +} + +int fts_expunge_log_read_end(struct fts_expunge_log_read_ctx **_ctx) +{ + struct fts_expunge_log_read_ctx *ctx = *_ctx; + int ret = ctx->failed ? -1 : (ctx->corrupted ? 0 : 1); + + *_ctx = NULL; + + if (ctx->corrupted) { + if (ctx->unlink) + i_unlink_if_exists(ctx->log->path); + } + + i_stream_unref(&ctx->input); + i_free(ctx); + return ret; +} + +int fts_expunge_log_flatten(const char *path, + struct fts_expunge_log_append_ctx **flattened_r) +{ + struct fts_expunge_log *read; + struct fts_expunge_log_read_ctx *read_ctx; + const struct fts_expunge_log_read_record *record; + struct fts_expunge_log_append_ctx *append; + int ret; + + i_assert(path != NULL && flattened_r != NULL); + read = fts_expunge_log_init(path); + + read_ctx = fts_expunge_log_read_begin(read); + read_ctx->unlink = FALSE; + + append = fts_expunge_log_append_begin(NULL); + while((record = fts_expunge_log_read_next(read_ctx)) != NULL) { + fts_expunge_log_append_record(append, record); + } + + if ((ret = fts_expunge_log_read_end(&read_ctx)) > 0) + *flattened_r = append; + fts_expunge_log_deinit(&read); + + return ret; +} +bool fts_expunge_log_contains(const struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, uint32_t uid) +{ + const struct fts_expunge_log_mailbox *mailbox; + const uint8_t *guid_p = mailbox_guid; + + mailbox = hash_table_lookup(ctx->mailboxes, guid_p); + if (mailbox == NULL) + return FALSE; + return seq_range_exists(&mailbox->uids, uid); +} +int fts_expunge_log_append_remove(struct fts_expunge_log_append_ctx *from, + const struct fts_expunge_log_read_record *record) +{ + const uint8_t *guid_p = record->mailbox_guid; + struct fts_expunge_log_mailbox *mailbox = hash_table_lookup(from->mailboxes, guid_p); + if (mailbox == NULL) + return 0; /* may only remove things that exist */ + + mailbox->uids_count -= seq_range_array_remove_seq_range(&mailbox->uids, &record->uids); + return 1; +} +int fts_expunge_log_subtract(struct fts_expunge_log_append_ctx *from, + struct fts_expunge_log *subtract) +{ + unsigned int failures = 0; + struct fts_expunge_log_read_ctx *read_ctx = fts_expunge_log_read_begin(subtract); + read_ctx->unlink = FALSE; + + const struct fts_expunge_log_read_record *record; + while ((record = fts_expunge_log_read_next(read_ctx)) != NULL) { + if (fts_expunge_log_append_remove(from, record) <= 0) + failures++; + } + if (failures > 0) + i_warning("fts: Expunge log subtract ignored %u nonexistent mailbox GUIDs", + failures); + return fts_expunge_log_read_end(&read_ctx); +} +/* It could be argued that somehow adding a log (file) to the append context + and then calling the _write() helper would be easier. But then there's the + _commit() vs. _abort() cleanup that would need to be addressed. Just creating + a copy is simpler. */ +int fts_expunge_log_flat_write(const struct fts_expunge_log_append_ctx *read_log, + const char *path) +{ + int ret; + struct fts_expunge_log *nlog = fts_expunge_log_init(path); + struct fts_expunge_log_append_ctx *nappend = fts_expunge_log_append_begin(nlog); + + struct hash_iterate_context *iter; + uint8_t *guid_p; + struct fts_expunge_log_mailbox *mailbox; + + iter = hash_table_iterate_init(read_log->mailboxes); + while (hash_table_iterate(iter, read_log->mailboxes, &guid_p, &mailbox)) + fts_expunge_log_append_mailbox_record(nappend, mailbox); + + hash_table_iterate_deinit(&iter); + ret = fts_expunge_log_append_commit(&nappend); + fts_expunge_log_deinit(&nlog); + + return ret; +} diff --git a/src/plugins/fts/fts-expunge-log.h b/src/plugins/fts/fts-expunge-log.h new file mode 100644 index 0000000..cc15f29 --- /dev/null +++ b/src/plugins/fts/fts-expunge-log.h @@ -0,0 +1,58 @@ +#ifndef FTS_EXPUNGE_LOG +#define FTS_EXPUNGE_LOG + +#include "seq-range-array.h" +#include "guid.h" + +struct fts_expunge_log_read_record { + guid_128_t mailbox_guid; + ARRAY_TYPE(seq_range) uids; +}; + +struct fts_expunge_log *fts_expunge_log_init(const char *path); +void fts_expunge_log_deinit(struct fts_expunge_log **log); + +struct fts_expunge_log_append_ctx * +fts_expunge_log_append_begin(struct fts_expunge_log *log); +void fts_expunge_log_append_next(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + uint32_t uid); +void fts_expunge_log_append_range(struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, + const struct seq_range *uids); +void fts_expunge_log_append_record(struct fts_expunge_log_append_ctx *ctx, + const struct fts_expunge_log_read_record *record); +/* In-memory flattened structures may have records removed from them, + file-backed ones may not. Non-existence of UIDs is not an error, + non-existence of mailbox GUID causes an error return of 0. */ +int fts_expunge_log_append_remove(struct fts_expunge_log_append_ctx *ctx, + const struct fts_expunge_log_read_record *record); +int fts_expunge_log_append_commit(struct fts_expunge_log_append_ctx **ctx); +/* Do not commit non-backed structures, abort them after use. */ +int fts_expunge_log_append_abort(struct fts_expunge_log_append_ctx **ctx); + +int fts_expunge_log_uid_count(struct fts_expunge_log *log, + unsigned int *expunges_r); + +struct fts_expunge_log_read_ctx * +fts_expunge_log_read_begin(struct fts_expunge_log *log); +const struct fts_expunge_log_read_record * +fts_expunge_log_read_next(struct fts_expunge_log_read_ctx *ctx); +/* Returns 1 if all ok, 0 if there was corruption, -1 if I/O error. + If end() is called before reading all records, the log isn't unlinked. */ +int fts_expunge_log_read_end(struct fts_expunge_log_read_ctx **ctx); + +/* Read an entire log file, and flatten it into one hash of arrays. + The struct it returns cannot be written, as it has no backing store */ +int fts_expunge_log_flatten(const char *path, + struct fts_expunge_log_append_ctx **flattened_r); +bool fts_expunge_log_contains(const struct fts_expunge_log_append_ctx *ctx, + const guid_128_t mailbox_guid, uint32_t uid); +/* Modify in-place a flattened log. If non-existent mailbox GUIDs are + encountered, a warning will be logged. */ +int fts_expunge_log_subtract(struct fts_expunge_log_append_ctx *from, + struct fts_expunge_log *subtract); +/* Write a modified flattened log as a new file. */ +int fts_expunge_log_flat_write(const struct fts_expunge_log_append_ctx *flattened, + const char *path); +#endif diff --git a/src/plugins/fts/fts-indexer.c b/src/plugins/fts/fts-indexer.c new file mode 100644 index 0000000..aca23c9 --- /dev/null +++ b/src/plugins/fts/fts-indexer.c @@ -0,0 +1,300 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "connection.h" +#include "write-full.h" +#include "istream.h" +#include "ostream.h" +#include "strescape.h" +#include "time-util.h" +#include "settings-parser.h" +#include "mail-user.h" +#include "mail-storage-private.h" +#include "fts-api.h" +#include "fts-indexer.h" + +#define INDEXER_NOTIFY_INTERVAL_SECS 10 +#define INDEXER_SOCKET_NAME "indexer" +#define INDEXER_WAIT_MSECS 250 + +struct fts_indexer_context { + struct connection conn; + + struct mailbox *box; + struct ioloop *ioloop; + + struct timeval search_start_time, last_notify; + unsigned int percentage; + struct connection_list *connection_list; + + bool notified:1; + bool failed:1; + bool completed:1; +}; + +static void fts_indexer_notify(struct fts_indexer_context *ctx) +{ + unsigned long long elapsed_msecs, est_total_msecs; + unsigned int eta_secs; + + if (ioloop_time - ctx->last_notify.tv_sec < INDEXER_NOTIFY_INTERVAL_SECS) + return; + ctx->last_notify = ioloop_timeval; + + if (ctx->box->storage->callbacks.notify_ok == NULL || + ctx->percentage == 0) + return; + + elapsed_msecs = timeval_diff_msecs(&ioloop_timeval, + &ctx->search_start_time); + est_total_msecs = elapsed_msecs * 100 / ctx->percentage; + eta_secs = (est_total_msecs - elapsed_msecs) / 1000; + + T_BEGIN { + const char *text; + + text = t_strdup_printf("Indexed %d%% of the mailbox, " + "ETA %d:%02d", ctx->percentage, + eta_secs/60, eta_secs%60); + ctx->box->storage->callbacks. + notify_ok(ctx->box, text, + ctx->box->storage->callback_context); + ctx->notified = TRUE; + } T_END; +} + +static int fts_indexer_more_int(struct fts_indexer_context *ctx) +{ + struct ioloop *prev_ioloop = current_ioloop; + struct timeout *to; + + if (ctx->failed) + return -1; + if (ctx->completed) + return 1; + + /* wait for a while for the reply. FIXME: once search API supports + asynchronous waits, get rid of this wait and use the mail IO loop */ + io_loop_set_current(ctx->ioloop); + to = timeout_add_short(INDEXER_WAIT_MSECS, io_loop_stop, ctx->ioloop); + io_loop_run(ctx->ioloop); + timeout_remove(&to); + io_loop_set_current(prev_ioloop); + + if (ctx->failed) + return -1; + if (ctx->completed) + return 1; + return 0; +} + +int fts_indexer_more(struct fts_indexer_context *ctx) +{ + int ret; + + if ((ret = fts_indexer_more_int(ctx)) < 0) { + /* If failed is already set, the code has had a chance to + * set an internal error already, i.e. MAIL_ERROR_INUSE. */ + if (!ctx->failed) + mail_storage_set_internal_error(ctx->box->storage); + ctx->failed = TRUE; + return -1; + } + + if (ret == 0) + fts_indexer_notify(ctx); + + return ret; +} + +static void fts_indexer_destroy(struct connection *conn) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + connection_deinit(conn); + if (!ctx->completed) + ctx->failed = TRUE; + ctx->completed = TRUE; +} + +int fts_indexer_deinit(struct fts_indexer_context **_ctx) +{ + struct fts_indexer_context *ctx = *_ctx; + i_assert(ctx != NULL); + *_ctx = NULL; + if (!ctx->completed) + ctx->failed = TRUE; + int ret = ctx->failed ? -1 : 0; + if (ctx->notified) { + /* we notified at least once */ + ctx->box->storage->callbacks. + notify_ok(ctx->box, "Mailbox indexing finished", + ctx->box->storage->callback_context); + } + connection_list_deinit(&ctx->connection_list); + io_loop_set_current(ctx->ioloop); + io_loop_destroy(&ctx->ioloop); + i_free(ctx); + return ret; +} + +static int +fts_indexer_input_args(struct connection *conn, const char *const *args) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + int percentage; + if (args[1] == NULL) { + e_error(conn->event, "indexer sent invalid reply"); + return -1; + } + if (strcmp(args[0], "1") != 0) { + e_error(conn->event, "indexer sent invalid reply"); + return -1; + } + if (strcmp(args[1], "OK") == 0) + return 1; + if (str_to_int(args[1], &percentage) < 0) { + e_error(conn->event, "indexer sent invalid progress: %s", args[1]); + ctx->failed = TRUE; + return -1; + } + if (percentage < 0) { + e_error(ctx->box->event, "indexer failed to index mailbox"); + ctx->failed = TRUE; + return -1; + } + ctx->percentage = percentage; + if (ctx->percentage == 100) + ctx->completed = TRUE; + return 1; +} + +static void fts_indexer_client_connected(struct connection *conn, bool success) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + if (!success) { + ctx->completed = TRUE; + ctx->failed = TRUE; + return; + } + ctx->failed = ctx->completed = FALSE; + const char *cmd = t_strdup_printf("PREPEND\t1\t%s\t%s\t0\t%s\n", + str_tabescape(ctx->box->storage->user->username), + str_tabescape(ctx->box->vname), + str_tabescape(ctx->box->storage->user->session_id)); + o_stream_nsend_str(conn->output, cmd); +} + +static void fts_indexer_idle_timeout(struct connection *conn) +{ + struct fts_indexer_context *ctx = + container_of(conn, struct fts_indexer_context, conn); + mail_storage_set_error(ctx->box->storage, MAIL_ERROR_INUSE, + "Timeout while waiting for indexing to finish"); + ctx->failed = TRUE; + connection_disconnect(conn); +} + +static const struct connection_settings indexer_client_set = +{ + .service_name_in = "indexer", + .service_name_out = "indexer", + .major_version = 1, + .minor_version = 0, + .client_connect_timeout_msecs = 2000, + .input_max_size = SIZE_MAX, + .output_max_size = IO_BLOCK_SIZE, + .client = TRUE, +}; + +static const struct connection_vfuncs indexer_client_vfuncs = +{ + .destroy = fts_indexer_destroy, + .client_connected = fts_indexer_client_connected, + .input_args = fts_indexer_input_args, + .idle_timeout = fts_indexer_idle_timeout, +}; + +int fts_indexer_init(struct fts_backend *backend, struct mailbox *box, + struct fts_indexer_context **ctx_r) +{ + struct ioloop *prev_ioloop = current_ioloop; + struct fts_indexer_context *ctx; + struct mailbox_status status; + uint32_t last_uid, seq1, seq2; + const char *path, *value, *error; + unsigned int timeout_secs = 0; + int ret; + + value = mail_user_plugin_getenv(box->storage->user, "fts_index_timeout"); + if (value != NULL) { + if (settings_get_time(value, &timeout_secs, &error) < 0) { + e_error(box->storage->user->event, + "Invalid fts_index_timeout setting: %s", + error); + return -1; + } + } + + if (fts_backend_get_last_uid(backend, box, &last_uid) < 0) + return -1; + + mailbox_get_open_status(box, STATUS_UIDNEXT, &status); + if (status.uidnext == last_uid+1) { + /* everything is already indexed */ + return 0; + } + + mailbox_get_seq_range(box, last_uid+1, (uint32_t)-1, &seq1, &seq2); + if (seq1 == 0) { + /* no new messages (last messages in mailbox were expunged) */ + return 0; + } + + path = t_strconcat(box->storage->user->set->base_dir, + "/"INDEXER_SOCKET_NAME, NULL); + + ctx = i_new(struct fts_indexer_context, 1); + ctx->box = box; + ctx->search_start_time = ioloop_timeval; + ctx->conn.event_parent = box->event; + ctx->ioloop = io_loop_create(); + ctx->connection_list = connection_list_init(&indexer_client_set, + &indexer_client_vfuncs); + ctx->conn.input_idle_timeout_secs = timeout_secs; + connection_init_client_unix(ctx->connection_list, &ctx->conn, + path); + ret = connection_client_connect(&ctx->conn); + io_loop_set_current(prev_ioloop); + *ctx_r = ctx; + return ctx->failed || ret < 0 ? -1 : 1; +} + +#define INDEXER_HANDSHAKE "1\t0\tindexer\tindexer\n" + +int fts_indexer_cmd(struct mail_user *user, const char *cmd, + const char **path_r) +{ + const char *path; + int fd; + + path = t_strconcat(user->set->base_dir, + "/"INDEXER_SOCKET_NAME, NULL); + fd = net_connect_unix_with_retries(path, 1000); + if (fd == -1) { + i_error("net_connect_unix(%s) failed: %m", path); + return -1; + } + + cmd = t_strconcat(INDEXER_HANDSHAKE, cmd, NULL); + if (write_full(fd, cmd, strlen(cmd)) < 0) { + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); + return -1; + } + *path_r = path; + return fd; +} diff --git a/src/plugins/fts/fts-indexer.h b/src/plugins/fts/fts-indexer.h new file mode 100644 index 0000000..7ccbc7e --- /dev/null +++ b/src/plugins/fts/fts-indexer.h @@ -0,0 +1,22 @@ +#ifndef FTS_BUILD_H +#define FTS_BUILD_H + +struct fts_backend; +struct fts_indexer_context; + +/* Initialize indexing the given mailbox via indexer service. Returns 1 if + indexing started, 0 if there was no need to index or -1 if error. */ +int fts_indexer_init(struct fts_backend *backend, struct mailbox *box, + struct fts_indexer_context **ctx_r); +/* Returns 0 if ok, -1 if error. */ +int fts_indexer_deinit(struct fts_indexer_context **ctx); + +/* Build more. Returns 1 if finished, 0 if this function needs to be called + again, -1 if error. */ +int fts_indexer_more(struct fts_indexer_context *ctx); + +/* Returns fd, which you can either read from or close. */ +int fts_indexer_cmd(struct mail_user *user, const char *cmd, + const char **path_r); + +#endif diff --git a/src/plugins/fts/fts-parser-html.c b/src/plugins/fts/fts-parser-html.c new file mode 100644 index 0000000..aa2078d --- /dev/null +++ b/src/plugins/fts/fts-parser-html.c @@ -0,0 +1,64 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "message-parser.h" +#include "mail-html2text.h" +#include "fts-parser.h" + +struct html_fts_parser { + struct fts_parser parser; + struct mail_html2text *html2text; + buffer_t *output; +}; + +static struct fts_parser * +fts_parser_html_try_init(struct fts_parser_context *parser_context) +{ + struct html_fts_parser *parser; + + if (!mail_html2text_content_type_match(parser_context->content_type)) + return NULL; + + parser = i_new(struct html_fts_parser, 1); + parser->parser.v = fts_parser_html; + parser->html2text = mail_html2text_init(0); + parser->output = buffer_create_dynamic(default_pool, 4096); + return &parser->parser; +} + +static void fts_parser_html_more(struct fts_parser *_parser, + struct message_block *block) +{ + struct html_fts_parser *parser = (struct html_fts_parser *)_parser; + + if (block->size == 0) { + /* finished */ + return; + } + + buffer_set_used_size(parser->output, 0); + mail_html2text_more(parser->html2text, block->data, block->size, + parser->output); + + block->data = parser->output->data; + block->size = parser->output->used; +} + +static int fts_parser_html_deinit(struct fts_parser *_parser, + const char **retriable_err_msg_r ATTR_UNUSED) +{ + struct html_fts_parser *parser = (struct html_fts_parser *)_parser; + + mail_html2text_deinit(&parser->html2text); + buffer_free(&parser->output); + i_free(parser); + return 1; +} + +struct fts_parser_vfuncs fts_parser_html = { + fts_parser_html_try_init, + fts_parser_html_more, + fts_parser_html_deinit, + NULL +}; diff --git a/src/plugins/fts/fts-parser-script.c b/src/plugins/fts/fts-parser-script.c new file mode 100644 index 0000000..eefbe07 --- /dev/null +++ b/src/plugins/fts/fts-parser-script.c @@ -0,0 +1,277 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "net.h" +#include "istream.h" +#include "write-full.h" +#include "module-context.h" +#include "rfc822-parser.h" +#include "rfc2231-parser.h" +#include "message-parser.h" +#include "mail-user.h" +#include "fts-parser.h" + +#define SCRIPT_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_parser_script_user_module) + +#define SCRIPT_HANDSHAKE "VERSION\tscript\t4\t0\nalarm=10\nnoreply\n" + +struct content { + const char *content_type; + const char *const *extensions; +}; + +struct fts_parser_script_user { + union mail_user_module_context module_ctx; + + ARRAY(struct content) content; +}; + +struct script_fts_parser { + struct fts_parser parser; + + int fd; + char *path; + + unsigned char outbuf[IO_BLOCK_SIZE]; + bool failed; + bool shutdown; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_parser_script_user_module, + &mail_user_module_register); + +static int script_connect(struct mail_user *user, const char **path_r) +{ + const char *path; + int fd; + + path = mail_user_plugin_getenv(user, "fts_decoder"); + if (path == NULL) + return -1; + + if (*path != '/') + path = t_strconcat(user->set->base_dir, "/", path, NULL); + fd = net_connect_unix_with_retries(path, 1000); + if (fd == -1) + i_error("net_connect_unix(%s) failed: %m", path); + else + net_set_nonblock(fd, FALSE); + *path_r = path; + return fd; +} + +static int script_contents_read(struct mail_user *user) +{ + struct fts_parser_script_user *suser = SCRIPT_USER_CONTEXT(user); + const char *path, *cmd, *line; + char **args; + struct istream *input; + struct content *content; + bool eof_seen = FALSE; + int fd, ret = 0; + i_assert(suser != NULL); + + fd = script_connect(user, &path); + if (fd == -1) + return -1; + + cmd = t_strdup_printf(SCRIPT_HANDSHAKE"\n"); + if (write_full(fd, cmd, strlen(cmd)) < 0) { + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); + return -1; + } + input = i_stream_create_fd_autoclose(&fd, 1024); + while ((line = i_stream_read_next_line(input)) != NULL) { + /* <content-type> <extension> [<extension> ...] */ + args = p_strsplit_spaces(user->pool, line, " "); + if (args[0] == NULL) { + eof_seen = TRUE; + break; + } + if (args[0][0] == '\0' || args[1] == NULL) { + i_error("parser script sent invalid input: %s", line); + continue; + } + + content = array_append_space(&suser->content); + content->content_type = str_lcase(args[0]); + content->extensions = (const void *)(args+1); + } + if (input->stream_errno != 0) { + i_error("parser script read(%s) failed: %s", path, + i_stream_get_error(input)); + ret = -1; + } else if (!eof_seen) { + if (input->v_offset == 0) + i_error("parser script didn't send any data"); + else + i_error("parser script didn't send empty EOF line"); + } + i_stream_destroy(&input); + return ret; +} + +static bool script_support_content(struct mail_user *user, + const char **content_type, + const char *filename) +{ + struct fts_parser_script_user *suser = SCRIPT_USER_CONTEXT(user); + const struct content *content; + const char *extension; + + if (suser == NULL) { + suser = p_new(user->pool, struct fts_parser_script_user, 1); + p_array_init(&suser->content, user->pool, 32); + MODULE_CONTEXT_SET(user, fts_parser_script_user_module, suser); + } + if (array_count(&suser->content) == 0) { + if (script_contents_read(user) < 0) + return FALSE; + } + + if (strcmp(*content_type, "application/octet-stream") == 0) { + if (filename == NULL) + return FALSE; + extension = strrchr(filename, '.'); + if (extension == NULL) + return FALSE; + extension = extension + 1; + + array_foreach(&suser->content, content) { + if (content->extensions != NULL && + str_array_icase_find(content->extensions, extension)) { + *content_type = content->content_type; + return TRUE; + } + } + } else { + array_foreach(&suser->content, content) { + if (strcmp(content->content_type, *content_type) == 0) + return TRUE; + } + } + return FALSE; +} + +static void parse_content_disposition(const char *content_disposition, + const char **filename_r) +{ + struct rfc822_parser_context parser; + const char *const *results, *filename2; + string_t *str; + + *filename_r = NULL; + + if (content_disposition == NULL) + return; + + rfc822_parser_init(&parser, (const unsigned char *)content_disposition, + strlen(content_disposition), NULL); + rfc822_skip_lwsp(&parser); + + /* type; param; param; .. */ + str = t_str_new(32); + if (rfc822_parse_mime_token(&parser, str) < 0) { + rfc822_parser_deinit(&parser); + return; + } + + rfc2231_parse(&parser, &results); + filename2 = NULL; + for (; *results != NULL; results += 2) { + if (strcasecmp(results[0], "filename") == 0) { + *filename_r = results[1]; + break; + } + if (strcasecmp(results[0], "filename*") == 0) + filename2 = results[1]; + } + if (*filename_r == NULL) { + /* RFC 2231 style non-ascii filename. we don't really care + much about the filename actually, just about its extension */ + *filename_r = filename2; + } + rfc822_parser_deinit(&parser); +} + +static struct fts_parser * +fts_parser_script_try_init(struct fts_parser_context *parser_context) +{ + struct script_fts_parser *parser; + const char *filename, *path, *cmd; + int fd; + + parse_content_disposition(parser_context->content_disposition, &filename); + if (!script_support_content(parser_context->user, &parser_context->content_type, filename)) + return NULL; + + fd = script_connect(parser_context->user, &path); + if (fd == -1) + return NULL; + cmd = t_strdup_printf(SCRIPT_HANDSHAKE"%s\n\n", parser_context->content_type); + if (write_full(fd, cmd, strlen(cmd)) < 0) { + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); + return NULL; + } + + parser = i_new(struct script_fts_parser, 1); + parser->parser.v = fts_parser_script; + parser->path = i_strdup(path); + parser->fd = fd; + return &parser->parser; +} + +static void fts_parser_script_more(struct fts_parser *_parser, + struct message_block *block) +{ + struct script_fts_parser *parser = (struct script_fts_parser *)_parser; + ssize_t ret; + + if (block->size > 0) { + /* first we'll send everything to the script */ + if (!parser->failed && + write_full(parser->fd, block->data, block->size) < 0) { + i_error("write(%s) failed: %m", parser->path); + parser->failed = TRUE; + } + block->size = 0; + } else { + if (!parser->shutdown) { + if (shutdown(parser->fd, SHUT_WR) < 0) + i_error("shutdown(%s) failed: %m", parser->path); + parser->shutdown = TRUE; + } + /* read the result from the script */ + ret = read(parser->fd, parser->outbuf, sizeof(parser->outbuf)); + if (ret < 0) + i_error("read(%s) failed: %m", parser->path); + else { + block->data = parser->outbuf; + block->size = ret; + } + } +} + +static int fts_parser_script_deinit(struct fts_parser *_parser, + const char **retriable_err_msg_r ATTR_UNUSED) +{ + struct script_fts_parser *parser = (struct script_fts_parser *)_parser; + int ret = parser->failed ? -1 : 1; + + if (close(parser->fd) < 0) + i_error("close(%s) failed: %m", parser->path); + i_free(parser->path); + i_free(parser); + return ret; +} + +struct fts_parser_vfuncs fts_parser_script = { + fts_parser_script_try_init, + fts_parser_script_more, + fts_parser_script_deinit, + NULL +}; diff --git a/src/plugins/fts/fts-parser-tika.c b/src/plugins/fts/fts-parser-tika.c new file mode 100644 index 0000000..bb6379c --- /dev/null +++ b/src/plugins/fts/fts-parser-tika.c @@ -0,0 +1,278 @@ +/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "istream.h" +#include "module-context.h" +#include "iostream-ssl.h" +#include "http-url.h" +#include "http-client.h" +#include "message-parser.h" +#include "mail-user.h" +#include "fts-parser.h" + +#define TIKA_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_parser_tika_user_module) + +struct fts_parser_tika_user { + union mail_user_module_context module_ctx; + struct http_url *http_url; +}; + +struct tika_fts_parser { + struct fts_parser parser; + struct mail_user *user; + struct http_client_request *http_req; + + struct ioloop *ioloop; + struct io *io; + struct istream *payload; + + bool failed; +}; + +static struct http_client *tika_http_client = NULL; +static MODULE_CONTEXT_DEFINE_INIT(fts_parser_tika_user_module, + &mail_user_module_register); + +static int +tika_get_http_client_url(struct mail_user *user, struct http_url **http_url_r) +{ + struct fts_parser_tika_user *tuser = TIKA_USER_CONTEXT(user); + struct http_client_settings http_set; + struct ssl_iostream_settings ssl_set; + const char *url, *error; + + url = mail_user_plugin_getenv(user, "fts_tika"); + if (url == NULL) { + /* fts_tika disabled */ + return -1; + } + + if (tuser != NULL) { + *http_url_r = tuser->http_url; + return *http_url_r == NULL ? -1 : 0; + } + + tuser = p_new(user->pool, struct fts_parser_tika_user, 1); + MODULE_CONTEXT_SET(user, fts_parser_tika_user_module, tuser); + + if (http_url_parse(url, NULL, 0, user->pool, + &tuser->http_url, &error) < 0) { + i_error("fts_tika: Failed to parse HTTP url %s: %s", url, error); + return -1; + } + + if (tika_http_client == NULL) { + mail_user_init_ssl_client_settings(user, &ssl_set); + + i_zero(&http_set); + http_set.max_idle_time_msecs = 100; + http_set.max_parallel_connections = 1; + http_set.max_pipelined_requests = 1; + http_set.max_redirects = 1; + http_set.max_attempts = 3; + http_set.connect_timeout_msecs = 5*1000; + http_set.request_timeout_msecs = 60*1000; + http_set.ssl = &ssl_set; + http_set.debug = user->mail_debug; + http_set.event_parent = user->event; + + /* FIXME: We should initialize a shared client instead. However, + this is currently not possible due to an obscure bug + in the blocking HTTP payload API, which causes + conflicts with other HTTP applications like FTS Solr. + Using a private client will provide a quick fix for + now. */ + tika_http_client = http_client_init_private(&http_set); + } + *http_url_r = tuser->http_url; + return 0; +} + +static void +fts_tika_parser_response(const struct http_response *response, + struct tika_fts_parser *parser) +{ + i_assert(parser->payload == NULL); + + switch (response->status) { + case 200: + /* read response */ + if (response->payload == NULL) + parser->payload = i_stream_create_from_data("", 0); + else { + i_stream_ref(response->payload); + parser->payload = response->payload; + } + break; + case 204: /* empty response */ + case 415: /* Unsupported Media Type */ + case 422: /* Unprocessable Entity */ + e_debug(parser->user->event, "fts_tika: PUT %s failed: %s", + mail_user_plugin_getenv(parser->user, "fts_tika"), + http_response_get_message(response)); + parser->payload = i_stream_create_from_data("", 0); + break; + default: + if (response->status / 100 == 5) { + /* Server Error - the problem could be anything (in Tika or + HTTP server or proxy) and might be retriable, but Tika has + trouble processing some documents and throws up this error + every time for those documents. */ + parser->parser.may_need_retry = TRUE; + i_free(parser->parser.retriable_error_msg); + parser->parser.retriable_error_msg = + i_strdup_printf("fts_tika: PUT %s failed: %s", + mail_user_plugin_getenv(parser->user, "fts_tika"), + http_response_get_message(response)); + parser->payload = i_stream_create_from_data("", 0); + } else { + i_error("fts_tika: PUT %s failed: %s", + mail_user_plugin_getenv(parser->user, "fts_tika"), + http_response_get_message(response)); + parser->failed = TRUE; + } + break; + } + parser->http_req = NULL; + io_loop_stop(current_ioloop); +} + +static struct fts_parser * +fts_parser_tika_try_init(struct fts_parser_context *parser_context) +{ + struct tika_fts_parser *parser; + struct http_url *http_url; + struct http_client_request *http_req; + + if (tika_get_http_client_url(parser_context->user, &http_url) < 0) + return NULL; + if (http_url->path == NULL) + http_url->path = "/"; + + parser = i_new(struct tika_fts_parser, 1); + parser->parser.v = fts_parser_tika; + parser->user = parser_context->user; + + http_req = http_client_request(tika_http_client, "PUT", + http_url->host.name, + t_strconcat(http_url->path, http_url->enc_query, NULL), + fts_tika_parser_response, parser); + http_client_request_set_port(http_req, http_url->port); + http_client_request_set_ssl(http_req, http_url->have_ssl); + if (parser_context->content_type != NULL) + http_client_request_add_header(http_req, "Content-Type", + parser_context->content_type); + if (parser_context->content_disposition != NULL) + http_client_request_add_header(http_req, "Content-Disposition", + parser_context->content_disposition); + http_client_request_add_header(http_req, "Accept", "text/plain"); + + parser->http_req = http_req; + return &parser->parser; +} + +static void fts_parser_tika_more(struct fts_parser *_parser, + struct message_block *block) +{ + struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser; + struct ioloop *prev_ioloop = current_ioloop; + const unsigned char *data; + size_t size; + ssize_t ret; + + if (block->size > 0) { + /* first we'll send everything to Tika */ + if (!parser->failed && + http_client_request_send_payload(&parser->http_req, + block->data, + block->size) < 0) + parser->failed = TRUE; + block->size = 0; + return; + } + + if (parser->payload == NULL) { + /* read the result from Tika */ + if (!parser->failed && + http_client_request_finish_payload(&parser->http_req) < 0) + parser->failed = TRUE; + if (!parser->failed && parser->payload == NULL) + http_client_wait(tika_http_client); + if (parser->failed) + return; + i_assert(parser->payload != NULL); + } + /* continue returning data from Tika. we'll create a new ioloop just + for reading this one payload. */ + while ((ret = i_stream_read_more(parser->payload, &data, &size)) == 0) { + if (parser->failed) + break; + /* wait for more input from Tika */ + if (parser->ioloop == NULL) { + parser->ioloop = io_loop_create(); + parser->io = io_add_istream(parser->payload, io_loop_stop, + current_ioloop); + } else { + io_loop_set_current(parser->ioloop); + } + io_loop_run(current_ioloop); + } + /* switch back to original ioloop. */ + io_loop_set_current(prev_ioloop); + + if (parser->failed) + ; + else if (size > 0) { + i_assert(ret > 0); + block->data = data; + block->size = size; + i_stream_skip(parser->payload, size); + } else { + /* finished */ + i_assert(ret == -1); + if (parser->payload->stream_errno != 0) { + i_error("read(%s) failed: %s", + i_stream_get_name(parser->payload), + i_stream_get_error(parser->payload)); + parser->failed = TRUE; + } + } +} + +static int fts_parser_tika_deinit(struct fts_parser *_parser, const char **retriable_err_msg_r) +{ + struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser; + int ret = _parser->may_need_retry ? 0: (parser->failed ? -1 : 1); + + i_assert(ret != 0 || _parser->retriable_error_msg != NULL); + if (retriable_err_msg_r != NULL) + *retriable_err_msg_r = t_strdup(_parser->retriable_error_msg); + i_free(_parser->retriable_error_msg); + + /* remove io before unrefing payload - otherwise lib-http adds another + timeout to ioloop unnecessarily */ + i_stream_unref(&parser->payload); + io_remove(&parser->io); + http_client_request_abort(&parser->http_req); + if (parser->ioloop != NULL) { + io_loop_set_current(parser->ioloop); + io_loop_destroy(&parser->ioloop); + } + i_free(parser); + return ret; +} + +static void fts_parser_tika_unload(void) +{ + if (tika_http_client != NULL) + http_client_deinit(&tika_http_client); +} + +struct fts_parser_vfuncs fts_parser_tika = { + fts_parser_tika_try_init, + fts_parser_tika_more, + fts_parser_tika_deinit, + fts_parser_tika_unload +}; diff --git a/src/plugins/fts/fts-parser.c b/src/plugins/fts/fts-parser.c new file mode 100644 index 0000000..c0eac80 --- /dev/null +++ b/src/plugins/fts/fts-parser.c @@ -0,0 +1,127 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "unichar.h" +#include "message-parser.h" +#include "fts-parser.h" + +static const struct fts_parser_vfuncs *parsers[] = { + &fts_parser_html, + &fts_parser_script, + &fts_parser_tika +}; + +static const char *plaintext_content_types[] = { + "text/plain", + "message/delivery-status", + "message/disposition-notification", + "application/pgp-signature", + NULL +}; + +bool fts_parser_init(struct fts_parser_context *parser_context, + struct fts_parser **parser_r) +{ + unsigned int i; + i_assert(parser_context->user != NULL); + i_assert(parser_context->content_type != NULL); + + if (str_array_find(plaintext_content_types, parser_context->content_type)) { + /* we probably don't want/need to allow parsers to handle + plaintext? */ + return FALSE; + } + + for (i = 0; i < N_ELEMENTS(parsers); i++) { + *parser_r = parsers[i]->try_init(parser_context); + if (*parser_r != NULL) + return TRUE; + } + return FALSE; +} + +struct fts_parser *fts_parser_text_init(void) +{ + return i_new(struct fts_parser, 1); +} + +static bool data_has_nuls(const unsigned char *data, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (data[i] == '\0') + return TRUE; + } + return FALSE; +} + +static void replace_nul_bytes(buffer_t *buf) +{ + unsigned char *data; + size_t i, size; + + data = buffer_get_modifiable_data(buf, &size); + for (i = 0; i < size; i++) { + if (data[i] == '\0') + data[i] = ' '; + } +} + +void fts_parser_more(struct fts_parser *parser, struct message_block *block) +{ + if (parser->v.more != NULL) + parser->v.more(parser, block); + + if (!uni_utf8_data_is_valid(block->data, block->size) || + data_has_nuls(block->data, block->size)) { + /* output isn't valid UTF-8. make it. */ + if (parser->utf8_output == NULL) { + parser->utf8_output = + buffer_create_dynamic(default_pool, 4096); + } else { + buffer_set_used_size(parser->utf8_output, 0); + } + if (uni_utf8_get_valid_data(block->data, block->size, + parser->utf8_output)) { + /* valid UTF-8, but there were NULs */ + buffer_append(parser->utf8_output, block->data, + block->size); + } + replace_nul_bytes(parser->utf8_output); + block->data = parser->utf8_output->data; + block->size = parser->utf8_output->used; + } +} + +int fts_parser_deinit(struct fts_parser **_parser, const char **retriable_err_msg_r) +{ + struct fts_parser *parser = *_parser; + int ret = 1; + + *_parser = NULL; + + buffer_free(&parser->utf8_output); + if (parser->v.deinit != NULL) { + const char *error = NULL; + ret = parser->v.deinit(parser, &error); + if (ret == 0) { + i_assert(error != NULL); + if (retriable_err_msg_r != NULL) + *retriable_err_msg_r = error; + } + } else + i_free(parser); + return ret; +} + +void fts_parsers_unload(void) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(parsers); i++) { + if (parsers[i]->unload != NULL) + parsers[i]->unload(); + } +} diff --git a/src/plugins/fts/fts-parser.h b/src/plugins/fts/fts-parser.h new file mode 100644 index 0000000..0eb716e --- /dev/null +++ b/src/plugins/fts/fts-parser.h @@ -0,0 +1,48 @@ +#ifndef FTS_PARSER_H +#define FTS_PARSER_H + +struct message_block; +struct mail_user; + +struct fts_parser_context { + /* Can't be NULL */ + struct mail_user *user; + /* Can't be NULL */ + const char *content_type; + const char *content_disposition; +}; + +struct fts_parser_vfuncs { + struct fts_parser *(*try_init)(struct fts_parser_context *parser_context); + void (*more)(struct fts_parser *parser, struct message_block *block); + int (*deinit)(struct fts_parser *parser, const char **retriable_err_msg_r); + void (*unload)(void); +}; + +struct fts_parser { + struct fts_parser_vfuncs v; + buffer_t *utf8_output; + bool may_need_retry; + char *retriable_error_msg; +}; + +extern struct fts_parser_vfuncs fts_parser_html; +extern struct fts_parser_vfuncs fts_parser_script; +extern struct fts_parser_vfuncs fts_parser_tika; + +bool fts_parser_init(struct fts_parser_context *parser_context, + struct fts_parser **parser_r); +struct fts_parser *fts_parser_text_init(void); + +/* The parser is initially called with message body blocks. Once message is + finished, it's still called with incoming size=0 while the parser increases + it to non-zero. */ +void fts_parser_more(struct fts_parser *parser, struct message_block *block); +/* Returns 1 if ok, 0 if the parsing should be retried, -1 if error. + If 0 is returned, the retriable_err_msg_r is set, which should be logged + as error if no retrying is performed. */ +int fts_parser_deinit(struct fts_parser **parser, const char **retriable_err_msg_r); + +void fts_parsers_unload(void); + +#endif diff --git a/src/plugins/fts/fts-plugin.c b/src/plugins/fts/fts-plugin.c new file mode 100644 index 0000000..1902cb6 --- /dev/null +++ b/src/plugins/fts/fts-plugin.c @@ -0,0 +1,33 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "mail-storage-hooks.h" +#include "fts-filter.h" +#include "fts-tokenizer.h" +#include "fts-parser.h" +#include "fts-storage.h" +#include "fts-user.h" +#include "fts-plugin.h" +#include "fts-library.h" + +const char *fts_plugin_version = DOVECOT_ABI_VERSION; + +static struct mail_storage_hooks fts_mail_storage_hooks = { + .mail_namespaces_added = fts_mail_namespaces_added, + .mailbox_list_created = fts_mailbox_list_created, + .mailbox_allocated = fts_mailbox_allocated, + .mail_allocated = fts_mail_allocated +}; + +void fts_plugin_init(struct module *module) +{ + fts_library_init(); + mail_storage_hooks_add(module, &fts_mail_storage_hooks); +} + +void fts_plugin_deinit(void) +{ + fts_library_deinit(); + fts_parsers_unload(); + mail_storage_hooks_remove(&fts_mail_storage_hooks); +} diff --git a/src/plugins/fts/fts-plugin.h b/src/plugins/fts/fts-plugin.h new file mode 100644 index 0000000..aeec68c --- /dev/null +++ b/src/plugins/fts/fts-plugin.h @@ -0,0 +1,7 @@ +#ifndef FTS_PLUGIN_H +#define FTS_PLUGIN_H + +void fts_plugin_init(struct module *module); +void fts_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts/fts-search-args.c b/src/plugins/fts/fts-search-args.c new file mode 100644 index 0000000..b58b238 --- /dev/null +++ b/src/plugins/fts/fts-search-args.c @@ -0,0 +1,258 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "mail-namespace.h" +#include "mail-search.h" +#include "fts-api-private.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-user.h" +#include "fts-search-args.h" + +static void strings_deduplicate(ARRAY_TYPE(const_string) *arr) +{ + const char *const *strings; + unsigned int i, count; + + strings = array_get(arr, &count); + for (i = 1; i < count; ) { + if (strcmp(strings[i-1], strings[i]) == 0) { + array_delete(arr, i, 1); + strings = array_get(arr, &count); + } else { + i++; + } + } +} + +static struct mail_search_arg * +fts_search_arg_create_or(const struct mail_search_arg *orig_arg, pool_t pool, + const ARRAY_TYPE(const_string) *tokens) +{ + struct mail_search_arg *arg, *or_arg, **argp; + const char *token; + + /* create the OR arg first as the parent */ + or_arg = p_new(pool, struct mail_search_arg, 1); + or_arg->type = SEARCH_OR; + + /* now create all the child args for the OR */ + argp = &or_arg->value.subargs; + array_foreach_elem(tokens, token) { + arg = p_new(pool, struct mail_search_arg, 1); + *arg = *orig_arg; + arg->match_not = FALSE; /* we copied this to the root OR */ + arg->next = NULL; + arg->value.str = p_strdup(pool, token); + + *argp = arg; + argp = &arg->next; + } + return or_arg; +} + +static int +fts_backend_dovecot_expand_tokens(struct fts_filter *filter, + pool_t pool, + struct mail_search_arg *parent_arg, + const struct mail_search_arg *orig_arg, + const char *orig_token, const char *token, + const char **error_r) +{ + struct mail_search_arg *arg; + ARRAY_TYPE(const_string) tokens; + const char *token2, *error; + int ret; + + t_array_init(&tokens, 4); + /* first add the word exactly as it without any tokenization */ + array_push_back(&tokens, &orig_token); + /* then add it tokenized, but without filtering */ + array_push_back(&tokens, &token); + + /* add the word filtered */ + if (filter != NULL) { + token2 = t_strdup(token); + ret = fts_filter_filter(filter, &token2, &error); + if (ret > 0) { + token2 = t_strdup(token2); + array_push_back(&tokens, &token2); + } else if (ret < 0) { + *error_r = t_strdup_printf("Couldn't filter search token: %s", error); + return -1; + } else { + /* The filter dropped the token, which means it was + never even indexed. Ignore this word entirely in the + search query. */ + return 0; + } + } + array_sort(&tokens, i_strcmp_p); + strings_deduplicate(&tokens); + + arg = fts_search_arg_create_or(orig_arg, pool, &tokens); + arg->next = parent_arg->value.subargs; + parent_arg->value.subargs = arg; + return 0; +} + +static int +fts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang, + pool_t pool, struct mail_search_arg *or_arg, + struct mail_search_arg *orig_arg, + const char *orig_token, const char **error_r) +{ + size_t orig_token_len = strlen(orig_token); + struct mail_search_arg *and_arg, *orig_or_args = or_arg->value.subargs; + const char *token, *error; + int ret; + + /* we want all the tokens found from the string to be found, so create + a parent AND and place all the filtered token alternatives under + it */ + and_arg = p_new(pool, struct mail_search_arg, 1); + and_arg->type = SEARCH_SUB; + and_arg->next = orig_or_args; + or_arg->value.subargs = and_arg; + + /* reset tokenizer between search args in case there's any state left + from some previous failure */ + fts_tokenizer_reset(user_lang->search_tokenizer); + while ((ret = fts_tokenizer_next(user_lang->search_tokenizer, + (const void *)orig_token, + orig_token_len, &token, &error)) > 0) { + if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool, + and_arg, orig_arg, orig_token, + token, error_r) < 0) + return -1; + } + while (ret >= 0 && + (ret = fts_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) { + if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool, + and_arg, orig_arg, orig_token, + token, error_r) < 0) + return -1; + } + if (ret < 0) { + *error_r = t_strdup_printf("Couldn't tokenize search args: %s", error); + return -1; + } + if (and_arg->value.subargs == NULL) { + /* nothing was actually expanded, remove the empty and_arg */ + or_arg->value.subargs = orig_or_args; + } + return 0; +} + +static int fts_search_arg_expand(struct fts_backend *backend, pool_t pool, + struct mail_search_arg **argp) +{ + const ARRAY_TYPE(fts_user_language) *languages; + struct fts_user_language *lang; + struct mail_search_arg *or_arg, *orig_arg = *argp; + const char *error, *orig_token = orig_arg->value.str; + + if (((*argp)->type == SEARCH_HEADER || + (*argp)->type == SEARCH_HEADER_ADDRESS || + (*argp)->type == SEARCH_HEADER_COMPRESS_LWSP) && + !fts_header_has_language((*argp)->hdr_field_name)) { + /* use only the data-language */ + languages = fts_user_get_data_languages(backend->ns->user); + } else { + languages = fts_user_get_all_languages(backend->ns->user); + } + + /* OR together all the different expansions for different languages. + it's enough for one of them to match. */ + or_arg = p_new(pool, struct mail_search_arg, 1); + or_arg->type = SEARCH_OR; + or_arg->match_not = orig_arg->match_not; + or_arg->next = orig_arg->next; + + array_foreach_elem(languages, lang) { + if (fts_backend_dovecot_tokenize_lang(lang, pool, or_arg, + orig_arg, orig_token, &error) < 0) { + i_error("fts: %s", error); + return -1; + } + } + + if (or_arg->value.subargs == NULL) { + /* we couldn't parse any tokens from the input */ + or_arg->type = SEARCH_ALL; + or_arg->match_not = !or_arg->match_not; + } + *argp = or_arg; + return 0; +} + +static int +fts_search_args_expand_tree(struct fts_backend *backend, pool_t pool, + struct mail_search_arg **argp) +{ + int ret; + + for (; *argp != NULL; argp = &(*argp)->next) { + switch ((*argp)->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_search_args_expand_tree(backend, pool, + &(*argp)->value.subargs) < 0) + return -1; + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if ((*argp)->value.str[0] == '\0') { + /* we're testing for the existence of + the header */ + break; + } + /* fall through */ + case SEARCH_BODY: + case SEARCH_TEXT: + T_BEGIN { + ret = fts_search_arg_expand(backend, pool, argp); + } T_END; + if (ret < 0) + return -1; + break; + default: + break; + } + } + return 0; +} + +int fts_search_args_expand(struct fts_backend *backend, + struct mail_search_args *args) +{ + struct mail_search_arg *args_dup, *orig_args = args->args; + + /* don't keep re-expanding every time the search args are used. + this is especially important to avoid an assert-crash in + index_search_result_update_flags(). */ + if (args->fts_expanded) + return 0; + args->fts_expanded = TRUE; + + /* duplicate the args, so if expansion fails we haven't changed + anything */ + args_dup = mail_search_arg_dup(args->pool, args->args); + + if (fts_search_args_expand_tree(backend, args->pool, &args_dup) < 0) + return -1; + + /* we'll need to re-simplify the args if we changed anything */ + args->simplified = FALSE; + args->args = args_dup; + mail_search_args_simplify(args); + + /* duplicated args aren't initialized */ + i_assert(args->init_refcount > 0); + mail_search_arg_init(args, args_dup); + mail_search_arg_deinit(orig_args); + return 0; +} diff --git a/src/plugins/fts/fts-search-args.h b/src/plugins/fts/fts-search-args.h new file mode 100644 index 0000000..9fb8923 --- /dev/null +++ b/src/plugins/fts/fts-search-args.h @@ -0,0 +1,7 @@ +#ifndef FTS_SEARCH_ARGS_H +#define FTS_SEARCH_ARGS_H + +int fts_search_args_expand(struct fts_backend *backend, + struct mail_search_args *args); + +#endif diff --git a/src/plugins/fts/fts-search-serialize.c b/src/plugins/fts/fts-search-serialize.c new file mode 100644 index 0000000..e30d4ce --- /dev/null +++ b/src/plugins/fts/fts-search-serialize.c @@ -0,0 +1,99 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "mail-search.h" +#include "fts-search-serialize.h" + +#define HAVE_SUBARGS(arg) \ + ((arg)->type == SEARCH_SUB || (arg)->type == SEARCH_OR) + +void fts_search_serialize(buffer_t *buf, const struct mail_search_arg *args) +{ + char chr; + + for (; args != NULL; args = args->next) { + chr = (args->match_always ? 1 : 0) | + (args->nonmatch_always ? 2 : 0); + buffer_append_c(buf, chr); + + if (HAVE_SUBARGS(args)) + fts_search_serialize(buf, args->value.subargs); + } +} + +static void fts_search_deserialize_idx(struct mail_search_arg *args, + const buffer_t *buf, unsigned int *idx) +{ + const char *data = buf->data; + + for (; args != NULL; args = args->next) { + i_assert(*idx < buf->used); + + args->match_always = (data[*idx] & 1) != 0; + args->nonmatch_always = (data[*idx] & 2) != 0; + args->result = args->match_always ? 1 : + (args->nonmatch_always ? 0 : -1); + *idx += 1; + + if (HAVE_SUBARGS(args)) { + fts_search_deserialize_idx(args->value.subargs, + buf, idx); + } + } +} + +void fts_search_deserialize(struct mail_search_arg *args, + const buffer_t *buf) +{ + unsigned int idx = 0; + + fts_search_deserialize_idx(args, buf, &idx); + i_assert(idx == buf->used); +} + +static void +fts_search_deserialize_add_idx(struct mail_search_arg *args, + const buffer_t *buf, unsigned int *idx, + bool matches) +{ + const char *data = buf->data; + + for (; args != NULL; args = args->next) { + i_assert(*idx < buf->used); + + if (data[*idx] != 0) { + if (matches) { + args->match_always = TRUE; + args->result = 1; + } else { + args->nonmatch_always = TRUE; + args->result = 0; + } + } + *idx += 1; + + if (HAVE_SUBARGS(args)) { + fts_search_deserialize_add_idx(args->value.subargs, + buf, idx, matches); + } + } +} + +void fts_search_deserialize_add_matches(struct mail_search_arg *args, + const buffer_t *buf) +{ + unsigned int idx = 0; + + fts_search_deserialize_add_idx(args, buf, &idx, TRUE); + i_assert(idx == buf->used); +} + +void fts_search_deserialize_add_nonmatches(struct mail_search_arg *args, + const buffer_t *buf) +{ + unsigned int idx = 0; + + fts_search_deserialize_add_idx(args, buf, &idx, FALSE); + i_assert(idx == buf->used); +} diff --git a/src/plugins/fts/fts-search-serialize.h b/src/plugins/fts/fts-search-serialize.h new file mode 100644 index 0000000..c1a7d88 --- /dev/null +++ b/src/plugins/fts/fts-search-serialize.h @@ -0,0 +1,16 @@ +#ifndef FTS_SEARCH_SERIALIZE_H +#define FTS_SEARCH_SERIALIZE_H + +/* serialize [non]match_always fields (clearing buffer) */ +void fts_search_serialize(buffer_t *buf, const struct mail_search_arg *args); +/* add/remove [non]match_always fields in search args */ +void fts_search_deserialize(struct mail_search_arg *args, + const buffer_t *buf); +/* add match_always=TRUE fields to search args */ +void fts_search_deserialize_add_matches(struct mail_search_arg *args, + const buffer_t *buf); +/* add nonmatch_always=TRUE fields to search args */ +void fts_search_deserialize_add_nonmatches(struct mail_search_arg *args, + const buffer_t *buf); + +#endif diff --git a/src/plugins/fts/fts-search.c b/src/plugins/fts/fts-search.c new file mode 100644 index 0000000..895ea59 --- /dev/null +++ b/src/plugins/fts/fts-search.c @@ -0,0 +1,385 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "seq-range-array.h" +#include "mail-search.h" +#include "fts-api-private.h" +#include "fts-search-args.h" +#include "fts-search-serialize.h" +#include "fts-storage.h" +#include "hash.h" + +static void +uid_range_to_seqs(struct fts_search_context *fctx, + const ARRAY_TYPE(seq_range) *uid_range, + ARRAY_TYPE(seq_range) *seq_range) +{ + const struct seq_range *range; + unsigned int i, count; + uint32_t seq1, seq2; + + range = array_get(uid_range, &count); + if (!array_is_created(seq_range)) + p_array_init(seq_range, fctx->result_pool, count); + for (i = 0; i < count; i++) { + if (range[i].seq1 > range[i].seq2) + continue; + mailbox_get_seq_range(fctx->box, range[i].seq1, range[i].seq2, + &seq1, &seq2); + if (seq1 != 0) + seq_range_array_add_range(seq_range, seq1, seq2); + } +} + +static int fts_search_lookup_level_single(struct fts_search_context *fctx, + struct mail_search_arg *args, + bool and_args) +{ + enum fts_lookup_flags flags = fctx->flags | + (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0); + struct fts_search_level *level; + struct fts_result result; + + i_zero(&result); + result.search_state = fctx->search_state; + result.pool = fctx->result_pool; + p_array_init(&result.definite_uids, fctx->result_pool, 32); + p_array_init(&result.maybe_uids, fctx->result_pool, 32); + p_array_init(&result.scores, fctx->result_pool, 32); + + mail_search_args_reset(args, TRUE); + if (fts_backend_lookup(fctx->backend, fctx->box, args, flags, + &result) < 0) + return -1; + + fctx->search_state = result.search_state; + level = array_append_space(&fctx->levels); + level->args_matches = buffer_create_dynamic(fctx->result_pool, 16); + fts_search_serialize(level->args_matches, args); + + uid_range_to_seqs(fctx, &result.definite_uids, &level->definite_seqs); + uid_range_to_seqs(fctx, &result.maybe_uids, &level->maybe_seqs); + level->score_map = result.scores; + return 0; +} + +static void +level_scores_add_vuids(struct mailbox *box, + struct fts_search_level *level, struct fts_result *br) +{ + const struct fts_score_map *scores; + unsigned int i, count; + ARRAY_TYPE(seq_range) backend_uids; + ARRAY_TYPE(uint32_t) vuids_arr; + const uint32_t *vuids; + struct fts_score_map *score; + + scores = array_get(&br->scores, &count); + t_array_init(&vuids_arr, count); + t_array_init(&backend_uids, 64); + for (i = 0; i < count; i++) + seq_range_array_add(&backend_uids, scores[i].uid); + box->virtual_vfuncs->get_virtual_uid_map(box, br->box, + &backend_uids, &vuids_arr); + + i_assert(array_count(&vuids_arr) == array_count(&br->scores)); + vuids = array_get(&vuids_arr, &count); + for (i = 0; i < count; i++) { + score = array_append_space(&level->score_map); + score->uid = vuids[i]; + score->score = scores[i].score; + } +} + +static int +mailbox_cmp_fts_backend(struct mailbox *const *m1, struct mailbox *const *m2) +{ + struct fts_backend *b1, *b2; + + b1 = fts_mailbox_backend(*m1); + b2 = fts_mailbox_backend(*m2); + if (b1 < b2) + return -1; + if (b1 > b2) + return 1; + return 0; +} + +static int +multi_add_lookup_result(struct fts_search_context *fctx, + struct fts_search_level *level, + struct mail_search_arg *args, + struct fts_multi_result *result) +{ + ARRAY_TYPE(seq_range) vuids; + size_t orig_size; + unsigned int i; + + orig_size = level->args_matches->used; + fts_search_serialize(level->args_matches, args); + if (orig_size > 0) { + if (level->args_matches->used != orig_size * 2 || + memcmp(level->args_matches->data, + CONST_PTR_OFFSET(level->args_matches->data, + orig_size), orig_size) != 0) + i_panic("incompatible fts backends for namespaces"); + buffer_set_used_size(level->args_matches, orig_size); + } + + t_array_init(&vuids, 64); + for (i = 0; result->box_results[i].box != NULL; i++) { + struct fts_result *br = &result->box_results[i]; + + array_clear(&vuids); + if (array_is_created(&br->definite_uids)) { + fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box, + br->box, &br->definite_uids, &vuids); + } + uid_range_to_seqs(fctx, &vuids, &level->definite_seqs); + + array_clear(&vuids); + if (array_is_created(&br->maybe_uids)) { + fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box, + br->box, &br->maybe_uids, &vuids); + } + uid_range_to_seqs(fctx, &vuids, &level->maybe_seqs); + + if (array_is_created(&br->scores)) + level_scores_add_vuids(fctx->box, level, br); + } + return 0; +} + +static int fts_search_lookup_level_multi(struct fts_search_context *fctx, + struct mail_search_arg *args, + bool and_args) +{ + enum fts_lookup_flags flags = fctx->flags | + (and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0); + ARRAY_TYPE(mailboxes) mailboxes_arr, tmp_mailboxes; + struct mailbox *const *mailboxes; + struct fts_backend *backend; + struct fts_search_level *level; + struct fts_multi_result result; + unsigned int i, j, mailbox_count; + + p_array_init(&mailboxes_arr, fctx->result_pool, 8); + fctx->box->virtual_vfuncs->get_virtual_backend_boxes(fctx->box, + &mailboxes_arr, TRUE); + array_sort(&mailboxes_arr, mailbox_cmp_fts_backend); + + i_zero(&result); + result.search_state = fctx->search_state; + result.pool = fctx->result_pool; + + level = array_append_space(&fctx->levels); + level->args_matches = buffer_create_dynamic(fctx->result_pool, 16); + p_array_init(&level->score_map, fctx->result_pool, 1); + + mailboxes = array_get(&mailboxes_arr, &mailbox_count); + t_array_init(&tmp_mailboxes, mailbox_count); + for (i = 0; i < mailbox_count; i = j) { + array_clear(&tmp_mailboxes); + array_push_back(&tmp_mailboxes, &mailboxes[i]); + + backend = fts_mailbox_backend(mailboxes[i]); + for (j = i + 1; j < mailbox_count; j++) { + if (fts_mailbox_backend(mailboxes[j]) != backend) + break; + array_push_back(&tmp_mailboxes, &mailboxes[j]); + } + array_append_zero(&tmp_mailboxes); + + mail_search_args_reset(args, TRUE); + if (fts_backend_lookup_multi(backend, + array_front(&tmp_mailboxes), + args, flags, &result) < 0) + return -1; + + if (multi_add_lookup_result(fctx, level, args, &result) < 0) + return -1; + } + fctx->search_state = result.search_state; + return 0; +} + +static int fts_search_lookup_level(struct fts_search_context *fctx, + struct mail_search_arg *args, + bool and_args) +{ + int ret; + + T_BEGIN { + ret = !fctx->virtual_mailbox ? + fts_search_lookup_level_single(fctx, args, and_args) : + fts_search_lookup_level_multi(fctx, args, and_args); + } T_END; + if (ret < 0) + return -1; + + for (; args != NULL; args = args->next) { + if (args->type != SEARCH_OR && args->type != SEARCH_SUB) + continue; + + if (fts_search_lookup_level(fctx, args->value.subargs, + args->type == SEARCH_SUB) < 0) + return -1; + } + return 0; +} + +static void +fts_search_merge_scores_and(ARRAY_TYPE(fts_score_map) *dest, + const ARRAY_TYPE(fts_score_map) *src) +{ + struct fts_score_map *dest_map; + const struct fts_score_map *src_map; + unsigned int desti, srci, dest_count, src_count; + + dest_map = array_get_modifiable(dest, &dest_count); + src_map = array_get(src, &src_count); + + /* arg_scores are summed to current scores. we could drop UIDs that + don't exist in both, but that's just extra work so don't bother */ + for (desti = srci = 0; desti < dest_count && srci < src_count;) { + if (dest_map[desti].uid < src_map[srci].uid) + desti++; + else if (dest_map[desti].uid > src_map[srci].uid) + srci++; + else { + if (dest_map[desti].score < src_map[srci].score) + dest_map[desti].score = src_map[srci].score; + desti++; srci++; + } + } +} + +static void +fts_search_merge_scores_or(ARRAY_TYPE(fts_score_map) *dest, + const ARRAY_TYPE(fts_score_map) *src) +{ + ARRAY_TYPE(fts_score_map) src2; + const struct fts_score_map *src_map, *src2_map; + unsigned int srci, src2i, src_count, src2_count; + + t_array_init(&src2, array_count(dest)); + array_append_array(&src2, dest); + array_clear(dest); + + src_map = array_get(src, &src_count); + src2_map = array_get(&src2, &src2_count); + + /* add any missing UIDs to current scores. if any existing UIDs have + lower scores than in arg_scores, increase them. */ + for (srci = src2i = 0; srci < src_count || src2i < src2_count;) { + if (src2i == src2_count || + src_map[srci].uid < src2_map[src2i].uid) { + array_push_back(dest, &src_map[srci]); + srci++; + } else if (srci == src_count || + src_map[srci].uid > src2_map[src2i].uid) { + array_push_back(dest, &src2_map[src2i]); + src2i++; + } else { + i_assert(src_map[srci].uid == src2_map[src2i].uid); + if (src_map[srci].score > src2_map[src2i].score) + array_push_back(dest, &src_map[srci]); + else + array_push_back(dest, &src2_map[src2i]); + srci++; src2i++; + } + } +} + +static void +fts_search_merge_scores_level(struct fts_search_context *fctx, + struct mail_search_arg *args, unsigned int *idx, + bool and_args, ARRAY_TYPE(fts_score_map) *scores) +{ + const struct fts_search_level *level; + ARRAY_TYPE(fts_score_map) arg_scores; + + i_assert(array_count(scores) == 0); + + /* + The (simplified) args can look like: + + A and B and (C or D) and (E or F) and ... + A or B or (C and D) or (E and F) or ... + + The A op B part's scores are in level->scores. The child args' + scores are in the sub levels' scores. + */ + + level = array_idx(&fctx->levels, *idx); + array_append_array(scores, &level->score_map); + + t_array_init(&arg_scores, 64); + for (; args != NULL; args = args->next) { + if (args->type != SEARCH_OR && args->type != SEARCH_SUB) + continue; + + *idx += 1; + array_clear(&arg_scores); + fts_search_merge_scores_level(fctx, args->value.subargs, idx, + args->type == SEARCH_OR, + &arg_scores); + + if (and_args) + fts_search_merge_scores_and(scores, &arg_scores); + else + fts_search_merge_scores_or(scores, &arg_scores); + } +} + +static void fts_search_merge_scores(struct fts_search_context *fctx) +{ + unsigned int idx = 0; + + fts_search_merge_scores_level(fctx, fctx->args->args, &idx, + TRUE, &fctx->scores->score_map); +} + +static void fts_search_try_lookup(struct fts_search_context *fctx) +{ + uint32_t last_uid, seq1, seq2; + + i_assert(array_count(&fctx->levels) == 0); + i_assert(fctx->args->simplified); + + if (fts_backend_refresh(fctx->backend) < 0) + return; + if (fts_backend_get_last_uid(fctx->backend, fctx->box, &last_uid) < 0) + return; + mailbox_get_seq_range(fctx->box, last_uid+1, (uint32_t)-1, + &seq1, &seq2); + fctx->first_unindexed_seq = seq1 != 0 ? seq1 : (uint32_t)-1; + + if (fctx->virtual_mailbox) { + hash_table_clear(fctx->last_indexed_virtual_uids, TRUE); + fctx->next_unindexed_seq = fctx->first_unindexed_seq; + } + + if ((fctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) { + if (fts_search_args_expand(fctx->backend, fctx->args) < 0) + return; + } + fts_search_serialize(fctx->orig_matches, fctx->args->args); + + if (fts_search_lookup_level(fctx, fctx->args->args, TRUE) == 0) { + fctx->fts_lookup_success = TRUE; + fts_search_merge_scores(fctx); + } + + fts_search_deserialize(fctx->args->args, fctx->orig_matches); + fts_backend_lookup_done(fctx->backend); +} + +void fts_search_lookup(struct fts_search_context *fctx) +{ + struct event_reason *reason = event_reason_begin("fts:lookup"); + fts_search_try_lookup(fctx); + event_reason_end(&reason); +} diff --git a/src/plugins/fts/fts-storage.c b/src/plugins/fts/fts-storage.c new file mode 100644 index 0000000..101d52a --- /dev/null +++ b/src/plugins/fts/fts-storage.c @@ -0,0 +1,981 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "net.h" +#include "str.h" +#include "strescape.h" +#include "write-full.h" +#include "mail-search-build.h" +#include "mail-storage-private.h" +#include "mailbox-list-private.h" +#include "fts-api-private.h" +#include "fts-tokenizer.h" +#include "fts-indexer.h" +#include "fts-build-mail.h" +#include "fts-search-serialize.h" +#include "fts-plugin.h" +#include "fts-user.h" +#include "fts-storage.h" +#include "hash.h" + + +#define FTS_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_storage_module) +#define FTS_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_storage_module) +#define FTS_MAIL_CONTEXT(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_mail_module) +#define FTS_LIST_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_mailbox_list_module) +#define FTS_LIST_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_mailbox_list_module) + +#define INDEXER_SOCKET_NAME "indexer" +#define INDEXER_HANDSHAKE "VERSION\tindexer\t1\t0\n" + +struct fts_mailbox_list { + union mailbox_list_module_context module_ctx; + struct fts_backend *backend; + + const char *backend_name; + struct fts_backend_update_context *update_ctx; + unsigned int update_ctx_refcount; + + bool failed:1; +}; + +struct fts_mailbox { + union mailbox_module_context module_ctx; + struct fts_backend_update_context *sync_update_ctx; + bool fts_mailbox_excluded; +}; + +struct fts_transaction_context { + union mailbox_transaction_module_context module_ctx; + + struct fts_scores *scores; + uint32_t next_index_seq; + uint32_t highest_virtual_uid; + unsigned int precache_extra_count; + + bool indexing:1; + bool precached:1; + bool mails_saved:1; + const char *failure_reason; +}; + +struct fts_mail { + union mail_module_context module_ctx; + char score[30]; + + bool virtual_mail:1; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_storage_module, + &mail_storage_module_register); +static MODULE_CONTEXT_DEFINE_INIT(fts_mail_module, &mail_module_register); +static MODULE_CONTEXT_DEFINE_INIT(fts_mailbox_list_module, + &mailbox_list_module_register); + +static int fts_mailbox_get_last_cached_seq(struct mailbox *box, uint32_t *seq_r) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list); + uint32_t seq1, seq2, last_uid; + + if (fts_backend_get_last_uid(flist->backend, box, &last_uid) < 0) { + mail_storage_set_internal_error(box->storage); + return -1; + } + + if (last_uid == 0) + *seq_r = 0; + else { + mailbox_get_seq_range(box, 1, last_uid, &seq1, &seq2); + *seq_r = seq2; + } + return 0; +} + +static int +fts_mailbox_get_status(struct mailbox *box, enum mailbox_status_items items, + struct mailbox_status *status_r) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + uint32_t seq; + + if (fbox->module_ctx.super.get_status(box, items, status_r) < 0) + return -1; + + if ((items & STATUS_LAST_CACHED_SEQ) != 0) { + if (fts_mailbox_get_last_cached_seq(box, &seq) < 0) + return -1; + + /* Always use the FTS's last_cached_seq. This is because we + don't want to reindex all mails to FTS if .cache file is + deleted. */ + status_r->last_cached_seq = seq; + } + return 0; +} + + +static void fts_scores_unref(struct fts_scores **_scores) +{ + struct fts_scores *scores = *_scores; + + *_scores = NULL; + if (--scores->refcount == 0) { + array_free(&scores->score_map); + i_free(scores); + } +} + +static void fts_try_build_init(struct mail_search_context *ctx, + struct fts_search_context *fctx) +{ + int ret; + + i_assert(!fts_backend_is_updating(fctx->backend)); + + ret = fts_indexer_init(fctx->backend, ctx->transaction->box, + &fctx->indexer_ctx); + if (ret < 0) + return; + + if (ret == 0) { + /* the index was up to date */ + fts_search_lookup(fctx); + } else { + /* hide "searching" notifications while building index */ + ctx->progress_hidden = TRUE; + } +} + +static bool fts_want_build_args(const struct mail_search_arg *args) +{ + /* we want to update index only when searching from message body. + it's not worth the wait for searching only from headers, which + could be in cache file already */ + for (; args != NULL; args = args->next) { + switch (args->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_want_build_args(args->value.subargs)) + return TRUE; + break; + case SEARCH_BODY: + case SEARCH_TEXT: + if (!args->no_fts) + return TRUE; + break; + default: + break; + } + } + return FALSE; +} + +static bool fts_args_have_fuzzy(const struct mail_search_arg *args) +{ + for (; args != NULL; args = args->next) { + if (args->fuzzy) + return TRUE; + switch (args->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + if (fts_args_have_fuzzy(args->value.subargs)) + return TRUE; + break; + default: + break; + } + } + return FALSE; +} + +static enum fts_enforced fts_enforced_parse(const char *str) +{ + if (str == NULL || strcmp(str, "no") == 0) + return FTS_ENFORCED_NO; + else if (strcmp(str, "body") == 0) + return FTS_ENFORCED_BODY; + else + return FTS_ENFORCED_YES; +} + +static struct mail_search_context * +fts_mailbox_search_init(struct mailbox_transaction_context *t, + struct mail_search_args *args, + const enum mail_sort_type *sort_program, + enum mail_fetch_field wanted_fields, + struct mailbox_header_lookup_ctx *wanted_headers) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(t->box->list); + struct mail_search_context *ctx; + struct fts_search_context *fctx; + + ctx = fbox->module_ctx.super.search_init(t, args, sort_program, + wanted_fields, wanted_headers); + + if (!fts_backend_can_lookup(flist->backend, args->args)) + return ctx; + + fctx = i_new(struct fts_search_context, 1); + fctx->box = t->box; + fctx->backend = flist->backend; + fctx->t = t; + fctx->args = args; + fctx->result_pool = pool_alloconly_create("fts results", 1024*64); + fctx->orig_matches = buffer_create_dynamic(default_pool, 64); + fctx->virtual_mailbox = t->box->virtual_vfuncs != NULL; + if (fctx->virtual_mailbox) { + hash_table_create(&fctx->last_indexed_virtual_uids, + default_pool, 0, str_hash, strcmp); + } + fctx->enforced = fts_enforced_parse( + mail_user_plugin_getenv(t->box->storage->user, "fts_enforced")); + i_array_init(&fctx->levels, 8); + fctx->scores = i_new(struct fts_scores, 1); + fctx->scores->refcount = 1; + i_array_init(&fctx->scores->score_map, 64); + MODULE_CONTEXT_SET(ctx, fts_storage_module, fctx); + + /* FIXME: we'll assume that all the args are fuzzy. not good, + but would require much more work to fix it. */ + if (!fts_args_have_fuzzy(args->args) && + mail_user_plugin_getenv_bool(t->box->storage->user, + "fts_no_autofuzzy")) + fctx->flags |= FTS_LOOKUP_FLAG_NO_AUTO_FUZZY; + /* transaction contains the last search's scores. they can be + queried later with mail_get_special() */ + if (ft->scores != NULL) + fts_scores_unref(&ft->scores); + ft->scores = fctx->scores; + ft->scores->refcount++; + + if (fctx->enforced == FTS_ENFORCED_YES || + fts_want_build_args(args->args)) + fts_try_build_init(ctx, fctx); + else + fts_search_lookup(fctx); + return ctx; +} + +static bool fts_mailbox_build_continue(struct mail_search_context *ctx) +{ + struct fts_search_context *fctx = FTS_CONTEXT_REQUIRE(ctx); + int ret; + + ret = fts_indexer_more(fctx->indexer_ctx); + if (ret == 0) + return FALSE; + + /* indexing finished */ + ctx->progress_hidden = FALSE; + if (fts_indexer_deinit(&fctx->indexer_ctx) < 0) + ret = -1; + if (ret > 0) + fts_search_lookup(fctx); + if (ret < 0) { + /* if indexing timed out, it probably means that + the mailbox is still being indexed, but it's a large + mailbox and it takes a while. in this situation + we'll simply abort the search. + + if indexing failed for any other reason, just + fallback to searching the slow way. */ + fctx->indexing_timed_out = + mailbox_get_last_mail_error(fctx->box) == MAIL_ERROR_INUSE; + } + return TRUE; +} + +static bool +fts_mailbox_search_next_nonblock(struct mail_search_context *ctx, + struct mail **mail_r, bool *tryagain_r) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + + if (fctx != NULL && fctx->indexer_ctx != NULL) { + /* this command is still building the indexes */ + if (!fts_mailbox_build_continue(ctx)) { + *tryagain_r = TRUE; + return FALSE; + } + if (fctx->indexing_timed_out) { + *tryagain_r = FALSE; + return FALSE; + } + } + if (fctx != NULL && !fctx->fts_lookup_success && + fctx->enforced != FTS_ENFORCED_NO) + return FALSE; + + return fbox->module_ctx.super. + search_next_nonblock(ctx, mail_r, tryagain_r); +} + +static void +fts_search_apply_results_level(struct mail_search_context *ctx, + struct mail_search_arg *args, unsigned int *idx) +{ + struct fts_search_context *fctx = FTS_CONTEXT_REQUIRE(ctx); + const struct fts_search_level *level; + + level = array_idx(&fctx->levels, *idx); + + if (array_is_created(&level->definite_seqs) && + seq_range_exists(&level->definite_seqs, ctx->seq)) + fts_search_deserialize_add_matches(args, level->args_matches); + else if (!array_is_created(&level->maybe_seqs) || + !seq_range_exists(&level->maybe_seqs, ctx->seq)) + fts_search_deserialize_add_nonmatches(args, level->args_matches); + + for (; args != NULL; args = args->next) { + if (args->type != SEARCH_OR && args->type != SEARCH_SUB) + continue; + + *idx += 1; + fts_search_apply_results_level(ctx, args->value.subargs, idx); + } +} + +static bool fts_mailbox_search_next_update_seq(struct mail_search_context *ctx) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + unsigned int idx; + + if (fctx == NULL || !fctx->fts_lookup_success) { + /* fts lookup not done for this search */ + if (fctx != NULL && fctx->indexing_timed_out) + return FALSE; + return fbox->module_ctx.super.search_next_update_seq(ctx); + } + + /* restore original [non]matches */ + fts_search_deserialize(ctx->args->args, fctx->orig_matches); + + if (!fbox->module_ctx.super.search_next_update_seq(ctx)) + return FALSE; + + if (ctx->seq >= fctx->first_unindexed_seq) { + /* we've not indexed this far */ + return TRUE; + } + + /* apply [non]matches based on the FTS lookup results */ + idx = 0; + fts_search_apply_results_level(ctx, ctx->args->args, &idx); + return TRUE; +} + +static int fts_mailbox_search_deinit(struct mail_search_context *ctx) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + int ret = 0; + + if (fctx != NULL) { + if (fctx->virtual_mailbox) + hash_table_destroy(&fctx->last_indexed_virtual_uids); + if (fctx->indexer_ctx != NULL) { + if (fts_indexer_deinit(&fctx->indexer_ctx) < 0) + ft->failure_reason = "FTS indexing failed"; + } + if (fctx->indexing_timed_out) + ret = -1; + else if (!fctx->fts_lookup_success && + fctx->enforced != FTS_ENFORCED_NO) { + /* FTS lookup failed and we didn't want to fallback to + opening all the mails and searching manually */ + mail_storage_set_internal_error(ctx->transaction->box->storage); + ret = -1; + } + + buffer_free(&fctx->orig_matches); + array_free(&fctx->levels); + pool_unref(&fctx->result_pool); + fts_scores_unref(&fctx->scores); + i_free(fctx); + } + if (fbox->module_ctx.super.search_deinit(ctx) < 0) + ret = -1; + return ret; +} + +static int fts_score_cmp(const uint32_t *uid, const struct fts_score_map *score) +{ + return *uid < score->uid ? -1 : + (*uid > score->uid ? 1 : 0); +} + +static int fts_mail_get_special(struct mail *_mail, enum mail_fetch_field field, + const char **value_r) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail); + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + const struct fts_score_map *scores; + + if (field != MAIL_FETCH_SEARCH_RELEVANCY || ft->scores == NULL) + scores = NULL; + else { + scores = array_bsearch(&ft->scores->score_map, &_mail->uid, + fts_score_cmp); + } + if (scores != NULL) { + i_assert(scores->uid == _mail->uid); + (void)i_snprintf(fmail->score, sizeof(fmail->score), + "%f", scores->score); + + *value_r = fmail->score; + return 0; + } + + return fmail->module_ctx.super.get_special(_mail, field, value_r); +} + +static int +fts_mail_precache_range(struct mailbox_transaction_context *trans, + struct fts_backend_update_context *update_ctx, + uint32_t seq1, uint32_t seq2, unsigned int *extra_count) +{ + struct mail_search_args *search_args; + struct mail_search_context *ctx; + struct mail *mail; + int ret = 0; + + search_args = mail_search_build_init(); + mail_search_build_add_seqset(search_args, seq1, seq2); + ctx = mailbox_search_init(trans, search_args, NULL, + MAIL_FETCH_STREAM_HEADER | + MAIL_FETCH_STREAM_BODY, NULL); + mail_search_args_unref(&search_args); + + while (mailbox_search_next(ctx, &mail)) { + if (fts_build_mail(update_ctx, mail) < 0) { + ret = -1; + break; + } + if (mail_precache(mail) < 0) { + ret = -1; + break; + } + *extra_count += 1; + } + if (mailbox_search_deinit(&ctx) < 0) + ret = -1; + return ret; +} + +static int fts_mail_precache_init(struct mail *_mail) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(_mail->box->list); + uint32_t last_seq; + + if (fts_mailbox_get_last_cached_seq(_mail->box, &last_seq) < 0) { + ft->failure_reason = "Failed to lookup last indexed FTS mail"; + return -1; + } + + ft->precached = TRUE; + ft->next_index_seq = last_seq + 1; + if (flist->update_ctx == NULL) + flist->update_ctx = fts_backend_update_init(flist->backend); + flist->update_ctx_refcount++; + return 0; +} + +static int fts_mail_index(struct mail *_mail) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(_mail->box->list); + struct mail_private *pmail = (struct mail_private *)_mail; + + if (ft->failure_reason != NULL) + return -1; + + if (!ft->precached) { + if (fts_mail_precache_init(_mail) < 0) + return -1; + } + if (pmail->vmail != NULL) { + /* Indexing via virtual mailbox: Index all the mails in this + same real mailbox. */ + uint32_t msgs_count = + mail_index_view_get_messages_count(_mail->box->view); + + fts_backend_update_set_mailbox(flist->update_ctx, _mail->box); + if (ft->next_index_seq > msgs_count) { + /* everything indexed already */ + return 0; + } else if (fts_mail_precache_range(_mail->transaction, + flist->update_ctx, + ft->next_index_seq, + msgs_count, + &ft->precache_extra_count) < 0) { + return -1; + } else { + ft->next_index_seq = msgs_count+1; + return 0; + } + } + + if (ft->next_index_seq < _mail->seq) { + /* we'll first need to index all the missing mails up to the + current one. */ + fts_backend_update_set_mailbox(flist->update_ctx, _mail->box); + if (fts_mail_precache_range(_mail->transaction, + flist->update_ctx, + ft->next_index_seq, + _mail->seq-1, + &ft->precache_extra_count) < 0) + return -1; + ft->next_index_seq = _mail->seq; + } + + if (ft->next_index_seq == _mail->seq) { + fts_backend_update_set_mailbox(flist->update_ctx, _mail->box); + if (fts_build_mail(flist->update_ctx, _mail) < 0) + return -1; + ft->next_index_seq = _mail->seq + 1; + } + return 0; +} + +static int fts_mail_precache(struct mail *_mail) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail); + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(_mail->transaction); + int ret = 0; + + fmail->module_ctx.super.precache(_mail); + if (fmail->virtual_mail) { + if (ft->highest_virtual_uid < _mail->uid) + ft->highest_virtual_uid = _mail->uid; + } else if (!ft->indexing) T_BEGIN { + /* avoid recursing here from fts_mail_precache_range() */ + struct event_reason *reason = + event_reason_begin("fts:index"); + ft->indexing = TRUE; + ret = fts_mail_index(_mail); + i_assert(ft->indexing); + ft->indexing = FALSE; + event_reason_end(&reason); + } T_END; + return ret; +} + +void fts_mail_allocated(struct mail *_mail) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct mail_vfuncs *v = mail->vlast; + struct fts_mailbox *fbox = FTS_CONTEXT(_mail->box); + struct fts_mail *fmail; + + if (fbox == NULL) + return; + + fmail = p_new(mail->pool, struct fts_mail, 1); + fmail->module_ctx.super = *v; + mail->vlast = &fmail->module_ctx.super; + fmail->virtual_mail = _mail->box->virtual_vfuncs != NULL; + + v->get_special = fts_mail_get_special; + v->precache = fts_mail_precache; + MODULE_CONTEXT_SET(mail, fts_mail_module, fmail); +} + +static struct mailbox_transaction_context * +fts_transaction_begin(struct mailbox *box, + enum mailbox_transaction_flags flags, + const char *reason) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + struct mailbox_transaction_context *t; + struct fts_transaction_context *ft; + + ft = i_new(struct fts_transaction_context, 1); + + t = fbox->module_ctx.super.transaction_begin(box, flags, reason); + MODULE_CONTEXT_SET(t, fts_storage_module, ft); + return t; +} + +static int fts_transaction_end(struct mailbox_transaction_context *t, const char **error_r) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(t->box->list); + int ret = 0; + + if (ft->failure_reason != NULL) { + *error_r = t_strdup(ft->failure_reason); + ret = -1; + } + + struct event_reason *reason = event_reason_begin("fts:index"); + if (ft->precached) { + i_assert(flist->update_ctx_refcount > 0); + if (--flist->update_ctx_refcount == 0) { + if (fts_backend_update_deinit(&flist->update_ctx) < 0) { + ret = -1; + *error_r = "backend deinit"; + } + } + } else if (ft->highest_virtual_uid > 0) { + if (fts_index_set_last_uid(t->box, ft->highest_virtual_uid) < 0) { + ret = -1; + *error_r = "index last uid setting"; + } + } + if (ft->scores != NULL) + fts_scores_unref(&ft->scores); + if (ft->precache_extra_count > 0) { + if (ret < 0) { + i_error("fts: Failed after indexing %u extra mails internally in %s: %s", + ft->precache_extra_count, t->box->vname, *error_r); + } else { + i_info("fts: Indexed %u extra mails internally in %s", + ft->precache_extra_count, t->box->vname); + } + } + event_reason_end(&reason); + i_free(ft); + return ret; +} + +static void fts_transaction_rollback(struct mailbox_transaction_context *t) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box); + const char *error; + + (void)fts_transaction_end(t, &error); + fbox->module_ctx.super.transaction_rollback(t); +} + +static void fts_queue_index(struct mailbox *box) +{ + struct mail_user *user = box->storage->user; + string_t *str = t_str_new(256); + const char *path, *value; + unsigned int max_recent_msgs; + int fd; + + path = t_strconcat(user->set->base_dir, "/"INDEXER_SOCKET_NAME, NULL); + fd = net_connect_unix(path); + if (fd == -1) { + i_error("net_connect_unix(%s) failed: %m", path); + return; + } + + value = mail_user_plugin_getenv(user, "fts_autoindex_max_recent_msgs"); + if (value == NULL || str_to_uint(value, &max_recent_msgs) < 0) + max_recent_msgs = 0; + + str_append(str, INDEXER_HANDSHAKE); + str_append(str, "APPEND\t0\t"); + str_append_tabescaped(str, user->username); + str_append_c(str, '\t'); + str_append_tabescaped(str, box->vname); + str_printfa(str, "\t%u", max_recent_msgs); + str_append_c(str, '\t'); + str_append_tabescaped(str, box->storage->user->session_id); + str_append_c(str, '\n'); + if (write_full(fd, str_data(str), str_len(str)) < 0) + i_error("write(%s) failed: %m", path); + i_close_fd(&fd); +} + +static int +fts_transaction_commit(struct mailbox_transaction_context *t, + struct mail_transaction_commit_changes *changes_r) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(t); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(t->box); + struct mailbox *box = t->box; + bool autoindex; + int ret = 0; + const char *error; + + autoindex = ft->mails_saved && !fbox->fts_mailbox_excluded && + mail_user_plugin_getenv_bool(box->storage->user, + "fts_autoindex"); + + if (fts_transaction_end(t, &error) < 0) { + mail_storage_set_error(t->box->storage, MAIL_ERROR_TEMP, + t_strdup_printf("FTS transaction commit failed: %s", + error)); + ret = -1; + } + if (fbox->module_ctx.super.transaction_commit(t, changes_r) < 0) + ret = -1; + if (ret < 0) + return -1; + + if (autoindex) + fts_queue_index(box); + return 0; +} + +static void fts_mailbox_sync_notify(struct mailbox *box, uint32_t uid, + enum mailbox_sync_type sync_type) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + + if (fbox->module_ctx.super.sync_notify != NULL) + fbox->module_ctx.super.sync_notify(box, uid, sync_type); + + if (sync_type != MAILBOX_SYNC_TYPE_EXPUNGE) { + if (uid == 0 && fbox->sync_update_ctx != NULL) { + /* this sync is finished */ + (void)fts_backend_update_deinit(&fbox->sync_update_ctx); + } + return; + } + + if (fbox->sync_update_ctx == NULL) { + if (fts_backend_is_updating(flist->backend)) { + /* FIXME: maildir workaround - we could get here + because we're building an index, which doesn't find + some mail and starts syncing the mailbox.. */ + return; + } + fbox->sync_update_ctx = fts_backend_update_init(flist->backend); + fts_backend_update_set_mailbox(fbox->sync_update_ctx, box); + } + fts_backend_update_expunge(fbox->sync_update_ctx, uid); +} + +static int fts_sync_deinit(struct mailbox_sync_context *ctx, + struct mailbox_sync_status *status_r) +{ + struct mailbox *box = ctx->box; + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(box); + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(box->list); + bool optimize; + int ret = 0; + + optimize = (ctx->flags & (MAILBOX_SYNC_FLAG_FORCE_RESYNC | + MAILBOX_SYNC_FLAG_OPTIMIZE)) != 0; + if (fbox->module_ctx.super.sync_deinit(ctx, status_r) < 0) + return -1; + ctx = NULL; + + if (optimize) { + i_assert(flist != NULL); + if (fts_backend_optimize(flist->backend) < 0) { + mailbox_set_critical(box, "FTS optimize failed"); + ret = -1; + } + } + return ret; +} + +static int fts_save_finish(struct mail_save_context *ctx) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + + if (fbox->module_ctx.super.save_finish(ctx) < 0) + return -1; + ft->mails_saved = TRUE; + return 0; +} + +static int fts_copy(struct mail_save_context *ctx, struct mail *mail) +{ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + + if (fbox->module_ctx.super.copy(ctx, mail) < 0) + return -1; + ft->mails_saved = TRUE; + return 0; +} + +static void fts_mailbox_virtual_match_mail(struct mail_search_context *ctx, + struct mail *mail) +{ + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + unsigned int idx, be_last_uid; + + if (fctx == NULL || !fctx->fts_lookup_success || !fctx->virtual_mailbox || + ctx->seq < fctx->first_unindexed_seq) + return; + /* Table of last indexed UID per backend mailbox */ + HASH_TABLE_TYPE(virtual_last_indexed) hash_tbl = + fctx->last_indexed_virtual_uids; + + struct mail *backend_mail; + if (mail->box->mail_vfuncs->get_backend_mail(mail, &backend_mail) < 0) + return; + const char *box_name = backend_mail->box->vname; + /* Get the last indexed UID in the backend mailbox */ + void *uid_value = + hash_table_lookup(fctx->last_indexed_virtual_uids, box_name); + if (uid_value == NULL) { + /* This backend's last indexed uid is not yet inserted to the table */ + struct fts_mailbox_list *flist = + FTS_LIST_CONTEXT(backend_mail->box->list); + if (flist == NULL || flist->failed || + mailbox_open(backend_mail->box) < 0 || + fts_backend_get_last_uid(flist->backend, backend_mail->box, + &be_last_uid) < 0) { + be_last_uid = 0; + } else { + const char *vname_copy = + p_strdup(fctx->result_pool, backend_mail->box->vname); + hash_table_insert(hash_tbl, vname_copy, + POINTER_CAST(be_last_uid + 1)); + } + } else { + be_last_uid = POINTER_CAST_TO(uid_value, uint32_t) - 1; + } + if (backend_mail->uid <= be_last_uid) { + /* Mail was already indexed in the backend mailbox. + Apply [non]matches based on the FTS lookup results */ + struct fts_transaction_context *ft = FTS_CONTEXT_REQUIRE(ctx->transaction); + + if (fctx->next_unindexed_seq == mail->seq) { + fctx->next_unindexed_seq++; + ft->highest_virtual_uid = mail->uid; + } + idx = 0; + fts_search_apply_results_level(ctx, ctx->args->args, &idx); + } else { + fctx->virtual_seen_unindexed_gaps = TRUE; + } +} + +static int fts_mailbox_search_next_match_mail(struct mail_search_context *ctx, + struct mail *mail) +{ + struct fts_mailbox *fbox = FTS_CONTEXT_REQUIRE(ctx->transaction->box); + + fts_mailbox_virtual_match_mail(ctx, mail); + return fbox->module_ctx.super.search_next_match_mail(ctx, mail); +} + +void fts_mailbox_allocated(struct mailbox *box) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(box->list); + struct mailbox_vfuncs *v = box->vlast; + struct fts_mailbox *fbox; + + if (flist == NULL || flist->failed) + return; + + fbox = p_new(box->pool, struct fts_mailbox, 1); + fbox->module_ctx.super = *v; + box->vlast = &fbox->module_ctx.super; + fbox->fts_mailbox_excluded = fts_user_autoindex_exclude(box); + + v->get_status = fts_mailbox_get_status; + v->search_init = fts_mailbox_search_init; + v->search_next_nonblock = fts_mailbox_search_next_nonblock; + v->search_next_update_seq = fts_mailbox_search_next_update_seq; + v->search_deinit = fts_mailbox_search_deinit; + v->transaction_begin = fts_transaction_begin; + v->transaction_rollback = fts_transaction_rollback; + v->transaction_commit = fts_transaction_commit; + v->sync_notify = fts_mailbox_sync_notify; + v->sync_deinit = fts_sync_deinit; + v->save_finish = fts_save_finish; + v->copy = fts_copy; + v->search_next_match_mail = fts_mailbox_search_next_match_mail; + + MODULE_CONTEXT_SET(box, fts_storage_module, fbox); +} + +static void fts_mailbox_list_deinit(struct mailbox_list *list) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(list); + + if (flist->backend != NULL) + fts_backend_deinit(&flist->backend); + flist->module_ctx.super.deinit(list); +} + +static int +fts_init_namespace(struct fts_mailbox_list *flist, struct mail_namespace *ns, + const char **error_r) +{ + struct fts_backend *backend; + if (fts_backend_init(flist->backend_name, ns, error_r, &backend) < 0) { + flist->failed = TRUE; + return -1; + } + flist->backend = backend; + if ((flist->backend->flags & FTS_BACKEND_FLAG_FUZZY_SEARCH) != 0) + ns->user->fuzzy_search = TRUE; + return 0; +} + +void fts_mail_namespaces_added(struct mail_namespace *ns) +{ + while(ns != NULL) { + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(ns->list); + const char *error; + + if (flist != NULL && !flist->failed && flist->backend == NULL && + fts_init_namespace(flist, ns, &error) < 0) { + i_error("fts: Failed to initialize backend '%s': %s", + flist->backend_name, error); + } + ns = ns->next; + } +} + +void +fts_mailbox_list_created(struct mailbox_list *list) +{ + const char *name = mail_user_plugin_getenv(list->ns->user, "fts"); + const char *path; + + if (name == NULL || name[0] == '\0') { + e_debug(list->ns->user->event, + "fts: No fts setting - plugin disabled"); + return; + } + + if (!mailbox_list_get_root_path(list, MAILBOX_LIST_PATH_TYPE_INDEX, &path)) { + e_debug(list->ns->user->event, + "fts: Indexes disabled for namespace '%s'", + list->ns->prefix); + return; + } + + struct fts_mailbox_list *flist; + struct mailbox_list_vfuncs *v = list->vlast; + + flist = p_new(list->pool, struct fts_mailbox_list, 1); + flist->module_ctx.super = *v; + flist->backend_name = name; + list->vlast = &flist->module_ctx.super; + v->deinit = fts_mailbox_list_deinit; + MODULE_CONTEXT_SET(list, fts_mailbox_list_module, flist); +} + +struct fts_backend *fts_mailbox_backend(struct mailbox *box) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT_REQUIRE(box->list); + + return flist->backend; +} + +struct fts_backend *fts_list_backend(struct mailbox_list *list) +{ + struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(list); + + return flist == NULL ? NULL : flist->backend; +} diff --git a/src/plugins/fts/fts-storage.h b/src/plugins/fts/fts-storage.h new file mode 100644 index 0000000..ea28ed2 --- /dev/null +++ b/src/plugins/fts/fts-storage.h @@ -0,0 +1,70 @@ +#ifndef FTS_STORAGE_H +#define FTS_STORAGE_H + +#include "mail-storage-private.h" +#include "fts-api.h" + +enum fts_enforced { + FTS_ENFORCED_NO, + FTS_ENFORCED_YES, + FTS_ENFORCED_BODY, +}; + +struct fts_scores { + int refcount; + ARRAY_TYPE(fts_score_map) score_map; +}; + +struct fts_search_level { + ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs; + buffer_t *args_matches; + ARRAY_TYPE(fts_score_map) score_map; +}; + +HASH_TABLE_DEFINE_TYPE(virtual_last_indexed, const char *, void *); + +struct fts_search_context { + union mail_search_module_context module_ctx; + + struct fts_backend *backend; + struct mailbox *box; + struct mailbox_transaction_context *t; + struct mail_search_args *args; + enum fts_lookup_flags flags; + enum fts_enforced enforced; + + pool_t result_pool; + ARRAY(struct fts_search_level) levels; + buffer_t *orig_matches; + + uint32_t first_unindexed_seq; + uint32_t next_unindexed_seq; + HASH_TABLE_TYPE(virtual_last_indexed) last_indexed_virtual_uids; + + /* final scores, combined from all levels */ + struct fts_scores *scores; + + struct fts_indexer_context *indexer_ctx; + struct fts_search_state *search_state; + + bool virtual_mailbox:1; + bool fts_lookup_success:1; + bool indexing_timed_out:1; + bool virtual_seen_unindexed_gaps:1; +}; + +/* Figure out if we want to use full text search indexes and update + backends in fctx accordingly. */ +void fts_search_analyze(struct fts_search_context *fctx); +/* Perform the actual index lookup and update definite_uids and maybe_uids. */ +void fts_search_lookup(struct fts_search_context *fctx); +/* Returns FTS backend for the given mailbox (assumes it has one). */ +struct fts_backend *fts_mailbox_backend(struct mailbox *box); +/* Returns FTS backend for the given mailbox list, or NULL if it has none. */ +struct fts_backend *fts_list_backend(struct mailbox_list *list); + +void fts_mail_allocated(struct mail *mail); +void fts_mail_namespaces_added(struct mail_namespace *ns); +void fts_mailbox_allocated(struct mailbox *box); +void fts_mailbox_list_created(struct mailbox_list *list); +#endif diff --git a/src/plugins/fts/fts-user.c b/src/plugins/fts/fts-user.c new file mode 100644 index 0000000..3c813cd --- /dev/null +++ b/src/plugins/fts/fts-user.c @@ -0,0 +1,423 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "module-context.h" +#include "mail-user.h" +#include "mail-storage-private.h" +#include "mailbox-match-plugin.h" +#include "fts-language.h" +#include "fts-filter.h" +#include "fts-tokenizer.h" +#include "fts-user.h" + +#define FTS_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_user_module) + +struct fts_user { + union mail_user_module_context module_ctx; + int refcount; + + struct fts_language_list *lang_list; + struct fts_user_language *data_lang; + ARRAY_TYPE(fts_user_language) languages, data_languages; + + struct mailbox_match_plugin *autoindex_exclude; +}; + +static MODULE_CONTEXT_DEFINE_INIT(fts_user_module, + &mail_user_module_register); + +static const char *const *str_keyvalues_to_array(const char *str) +{ + const char *key, *value, *const *keyvalues; + ARRAY_TYPE(const_string) arr; + unsigned int i; + + if (str == NULL) + return NULL; + + t_array_init(&arr, 8); + keyvalues = t_strsplit_spaces(str, " "); + for (i = 0; keyvalues[i] != NULL; i++) { + value = strchr(keyvalues[i], '='); + if (value != NULL) + key = t_strdup_until(keyvalues[i], value++); + else { + key = keyvalues[i]; + value = ""; + } + array_push_back(&arr, &key); + array_push_back(&arr, &value); + } + array_append_zero(&arr); + return array_front(&arr); +} + +static int +fts_user_init_languages(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + const char *languages, *unknown; + const char *lang_config[3] = {NULL, NULL, NULL}; + + languages = mail_user_plugin_getenv(user, "fts_languages"); + if (languages == NULL) { + *error_r = "fts_languages setting is missing"; + return -1; + } + + lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config"); + if (lang_config[1] != NULL) + lang_config[0] = "fts_language_config"; + if (fts_language_list_init(lang_config, &fuser->lang_list, error_r) < 0) + return -1; + + if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) { + *error_r = t_strdup_printf( + "fts_languages: Unknown language '%s'", unknown); + return -1; + } + if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) { + *error_r = "fts_languages setting is empty"; + return -1; + } + return 0; +} + +static int +fts_user_create_filters(struct mail_user *user, const struct fts_language *lang, + struct fts_filter **filter_r, const char **error_r) +{ + const struct fts_filter *filter_class; + struct fts_filter *filter = NULL, *parent = NULL; + const char *filters_key, *const *filters, *filter_set_name; + const char *str, *error, *set_key; + unsigned int i; + int ret = 0; + + /* try to get the language-specific filters first */ + filters_key = t_strconcat("fts_filters_", lang->name, NULL); + str = mail_user_plugin_getenv(user, filters_key); + if (str == NULL) { + /* fallback to global filters */ + filters_key = "fts_filters"; + str = mail_user_plugin_getenv(user, filters_key); + if (str == NULL) { + /* No filters */ + *filter_r = NULL; + return 0; + } + } + + filters = t_strsplit_spaces(str, " "); + for (i = 0; filters[i] != NULL; i++) { + filter_class = fts_filter_find(filters[i]); + if (filter_class == NULL) { + *error_r = t_strdup_printf("%s: Unknown filter '%s'", + filters_key, filters[i]); + ret = -1; + break; + } + + /* try the language-specific setting first */ + filter_set_name = t_str_replace(filters[i], '-', '_'); + set_key = t_strdup_printf("fts_filter_%s_%s", + lang->name, filter_set_name); + str = mail_user_plugin_getenv(user, set_key); + if (str == NULL) { + set_key = t_strdup_printf("fts_filter_%s", filter_set_name); + str = mail_user_plugin_getenv(user, set_key); + } + + if (fts_filter_create(filter_class, parent, lang, + str_keyvalues_to_array(str), + &filter, &error) < 0) { + *error_r = t_strdup_printf("%s: %s", set_key, error); + ret = -1; + break; + } + if (parent != NULL) + fts_filter_unref(&parent); + parent = filter; + } + if (ret < 0) { + if (parent != NULL) + fts_filter_unref(&parent); + return -1; + } + *filter_r = filter; + return 0; +} + +static int +fts_user_create_tokenizer(struct mail_user *user, + const struct fts_language *lang, + struct fts_tokenizer **tokenizer_r, bool search, + const char **error_r) +{ + const struct fts_tokenizer *tokenizer_class; + struct fts_tokenizer *tokenizer = NULL, *parent = NULL; + const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name; + const char *str, *error, *set_key; + unsigned int i; + int ret = 0; + + tokenizers_key = t_strconcat("fts_tokenizers_", lang->name, NULL); + str = mail_user_plugin_getenv(user, tokenizers_key); + if (str == NULL) { + str = mail_user_plugin_getenv(user, "fts_tokenizers"); + if (str == NULL) { + *error_r = t_strdup_printf("%s or fts_tokenizers setting must exist", tokenizers_key); + return -1; + } + tokenizers_key = "fts_tokenizers"; + } + + tokenizers = t_strsplit_spaces(str, " "); + + for (i = 0; tokenizers[i] != NULL; i++) { + tokenizer_class = fts_tokenizer_find(tokenizers[i]); + if (tokenizer_class == NULL) { + *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'", + tokenizers_key, tokenizers[i]); + ret = -1; + break; + } + + tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_'); + set_key = t_strdup_printf("fts_tokenizer_%s_%s", tokenizer_set_name, lang->name); + str = mail_user_plugin_getenv(user, set_key); + if (str == NULL) { + set_key = t_strdup_printf("fts_tokenizer_%s", tokenizer_set_name); + str = mail_user_plugin_getenv(user, set_key); + } + + /* tell the tokenizers that we're tokenizing a search string + (instead of tokenizing indexed data) */ + if (search) + str = t_strconcat("search=yes ", str, NULL); + + if (fts_tokenizer_create(tokenizer_class, parent, + str_keyvalues_to_array(str), + &tokenizer, &error) < 0) { + *error_r = t_strdup_printf("%s: %s", set_key, error); + ret = -1; + break; + } + if (parent != NULL) + fts_tokenizer_unref(&parent); + parent = tokenizer; + } + if (ret < 0) { + if (parent != NULL) + fts_tokenizer_unref(&parent); + return -1; + } + *tokenizer_r = tokenizer; + return 0; +} + +static int +fts_user_language_init_tokenizers(struct mail_user *user, + struct fts_user_language *user_lang, + const char **error_r) +{ + if (fts_user_create_tokenizer(user, user_lang->lang, + &user_lang->index_tokenizer, FALSE, + error_r) < 0) + return -1; + + if (fts_user_create_tokenizer(user, user_lang->lang, + &user_lang->search_tokenizer, TRUE, + error_r) < 0) + return -1; + return 0; +} + +struct fts_user_language * +fts_user_language_find(struct mail_user *user, + const struct fts_language *lang) +{ + struct fts_user_language *user_lang; + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + array_foreach_elem(&fuser->languages, user_lang) { + if (strcmp(user_lang->lang->name, lang->name) == 0) + return user_lang; + } + return NULL; +} + +static int fts_user_language_create(struct mail_user *user, + struct fts_user *fuser, + const struct fts_language *lang, + const char **error_r) +{ + struct fts_user_language *user_lang; + + user_lang = p_new(user->pool, struct fts_user_language, 1); + user_lang->lang = lang; + array_push_back(&fuser->languages, &user_lang); + + if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) + return -1; + if (fts_user_create_filters(user, lang, &user_lang->filter, error_r) < 0) + return -1; + return 0; +} + +static int fts_user_languages_fill_all(struct mail_user *user, + struct fts_user *fuser, + const char **error_r) +{ + const struct fts_language *lang; + + array_foreach_elem(fts_language_list_get_all(fuser->lang_list), lang) { + if (fts_user_language_create(user, fuser, lang, error_r) < 0) + return -1; + } + return 0; +} + +static int +fts_user_init_data_language(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + struct fts_user_language *user_lang; + const char *error; + + user_lang = p_new(user->pool, struct fts_user_language, 1); + user_lang->lang = &fts_language_data; + + if (fts_user_language_init_tokenizers(user, user_lang, error_r) < 0) + return -1; + + if (fts_filter_create(fts_filter_lowercase, NULL, user_lang->lang, NULL, + &user_lang->filter, &error) < 0) + i_unreached(); + i_assert(user_lang->filter != NULL); + + p_array_init(&fuser->data_languages, user->pool, 1); + array_push_back(&fuser->data_languages, &user_lang); + array_push_back(&fuser->languages, &user_lang); + + fuser->data_lang = user_lang; + return 0; +} + +struct fts_language_list *fts_user_get_language_list(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return fuser->lang_list; +} + +const ARRAY_TYPE(fts_user_language) * +fts_user_get_all_languages(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return &fuser->languages; +} + +const ARRAY_TYPE(fts_user_language) * +fts_user_get_data_languages(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return &fuser->data_languages; +} + +struct fts_user_language *fts_user_get_data_lang(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + i_assert(fuser != NULL); + return fuser->data_lang; +} + +bool fts_user_autoindex_exclude(struct mailbox *box) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(box->storage->user); + + return mailbox_match_plugin_exclude(fuser->autoindex_exclude, box); +} + +static void fts_user_language_free(struct fts_user_language *user_lang) +{ + if (user_lang->filter != NULL) + fts_filter_unref(&user_lang->filter); + if (user_lang->index_tokenizer != NULL) + fts_tokenizer_unref(&user_lang->index_tokenizer); + if (user_lang->search_tokenizer != NULL) + fts_tokenizer_unref(&user_lang->search_tokenizer); +} + +static void fts_user_free(struct fts_user *fuser) +{ + struct fts_user_language *user_lang; + + if (fuser->lang_list != NULL) + fts_language_list_deinit(&fuser->lang_list); + + if (array_is_created(&fuser->languages)) { + array_foreach_elem(&fuser->languages, user_lang) + fts_user_language_free(user_lang); + } + mailbox_match_plugin_deinit(&fuser->autoindex_exclude); +} + +static int +fts_mail_user_init_libfts(struct mail_user *user, struct fts_user *fuser, + const char **error_r) +{ + p_array_init(&fuser->languages, user->pool, 4); + + if (fts_user_init_languages(user, fuser, error_r) < 0 || + fts_user_init_data_language(user, fuser, error_r) < 0) + return -1; + if (fts_user_languages_fill_all(user, fuser, error_r) < 0) + return -1; + return 0; +} + +int fts_mail_user_init(struct mail_user *user, bool initialize_libfts, + const char **error_r) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + if (fuser != NULL) { + /* multiple fts plugins are loaded */ + fuser->refcount++; + return 0; + } + + fuser = p_new(user->pool, struct fts_user, 1); + fuser->refcount = 1; + if (initialize_libfts) { + if (fts_mail_user_init_libfts(user, fuser, error_r) < 0) { + fts_user_free(fuser); + return -1; + } + } + fuser->autoindex_exclude = + mailbox_match_plugin_init(user, "fts_autoindex_exclude"); + + MODULE_CONTEXT_SET(user, fts_user_module, fuser); + return 0; +} + +void fts_mail_user_deinit(struct mail_user *user) +{ + struct fts_user *fuser = FTS_USER_CONTEXT(user); + + if (fuser != NULL) { + i_assert(fuser->refcount > 0); + if (--fuser->refcount == 0) + fts_user_free(fuser); + } +} diff --git a/src/plugins/fts/fts-user.h b/src/plugins/fts/fts-user.h new file mode 100644 index 0000000..043f4e1 --- /dev/null +++ b/src/plugins/fts/fts-user.h @@ -0,0 +1,27 @@ +#ifndef FTS_USER_H +#define FTS_USER_H + +struct fts_user_language { + const struct fts_language *lang; + struct fts_filter *filter; + struct fts_tokenizer *index_tokenizer, *search_tokenizer; +}; +ARRAY_DEFINE_TYPE(fts_user_language, struct fts_user_language *); + +struct fts_user_language * +fts_user_language_find(struct mail_user *user, + const struct fts_language *lang); +struct fts_language_list *fts_user_get_language_list(struct mail_user *user); +const ARRAY_TYPE(fts_user_language) * +fts_user_get_all_languages(struct mail_user *user); +struct fts_user_language *fts_user_get_data_lang(struct mail_user *user); +const ARRAY_TYPE(fts_user_language) * +fts_user_get_data_languages(struct mail_user *user); + +bool fts_user_autoindex_exclude(struct mailbox *box); + +int fts_mail_user_init(struct mail_user *user, bool initialize_libfts, + const char **error_r); +void fts_mail_user_deinit(struct mail_user *user); + +#endif diff --git a/src/plugins/fts/xml2text.c b/src/plugins/fts/xml2text.c new file mode 100644 index 0000000..f3c573c --- /dev/null +++ b/src/plugins/fts/xml2text.c @@ -0,0 +1,44 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "message-parser.h" +#include "fts-parser.h" + +#include <unistd.h> + +int main(void) +{ + struct fts_parser *parser; + unsigned char buf[IO_BLOCK_SIZE]; + struct message_block block; + ssize_t ret; + struct fts_parser_context parser_context = {.content_type = "text/html"}; + + lib_init(); + + parser = fts_parser_html.try_init(&parser_context); + i_assert(parser != NULL); + + i_zero(&block); + while ((ret = read(STDIN_FILENO, buf, sizeof(buf))) > 0) { + block.data = buf; + block.size = ret; + parser->v.more(parser, &block); + if (write(STDOUT_FILENO, block.data, block.size) < 0) + i_fatal("write(stdout) failed: %m"); + } + if (ret < 0) + i_fatal("read(stdin) failed: %m"); + + for (;;) { + block.size = 0; + parser->v.more(parser, &block); + if (block.size == 0) + break; + if (write(STDOUT_FILENO, block.data, block.size) < 0) + i_fatal("write(stdout) failed: %m"); + } + + lib_deinit(); + return 0; +} |