diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 17:36:47 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 17:36:47 +0000 |
commit | 0441d265f2bb9da249c7abf333f0f771fadb4ab5 (patch) | |
tree | 3f3789daa2f6db22da6e55e92bee0062a7d613fe /src/plugins/fts-lucene | |
parent | Initial commit. (diff) | |
download | dovecot-0441d265f2bb9da249c7abf333f0f771fadb4ab5.tar.xz dovecot-0441d265f2bb9da249c7abf333f0f771fadb4ab5.zip |
Adding upstream version 1:2.3.21+dfsg1.upstream/1%2.3.21+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/plugins/fts-lucene')
-rw-r--r-- | src/plugins/fts-lucene/Makefile.am | 61 | ||||
-rw-r--r-- | src/plugins/fts-lucene/Makefile.in | 990 | ||||
-rw-r--r-- | src/plugins/fts-lucene/Snowball.cc | 151 | ||||
-rw-r--r-- | src/plugins/fts-lucene/SnowballAnalyzer.h | 51 | ||||
-rw-r--r-- | src/plugins/fts-lucene/SnowballFilter.h | 42 | ||||
-rw-r--r-- | src/plugins/fts-lucene/doveadm-fts-lucene.c | 70 | ||||
-rw-r--r-- | src/plugins/fts-lucene/fts-backend-lucene.c | 605 | ||||
-rw-r--r-- | src/plugins/fts-lucene/fts-lucene-plugin.c | 146 | ||||
-rw-r--r-- | src/plugins/fts-lucene/fts-lucene-plugin.h | 36 | ||||
-rw-r--r-- | src/plugins/fts-lucene/lucene-wrapper.cc | 1639 | ||||
-rw-r--r-- | src/plugins/fts-lucene/lucene-wrapper.h | 67 | ||||
-rw-r--r-- | src/plugins/fts-lucene/textcat.conf | 25 |
12 files changed, 3883 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/Makefile.am b/src/plugins/fts-lucene/Makefile.am new file mode 100644 index 0000000..d68e6ae --- /dev/null +++ b/src/plugins/fts-lucene/Makefile.am @@ -0,0 +1,61 @@ +doveadm_moduledir = $(moduledir)/doveadm + +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts \ + -I$(top_srcdir)/src/doveadm + +AM_CXXFLAGS = \ + $(CLUCENE_CFLAGS) \ + $(LIBEXTTEXTCAT_CFLAGS) + +NOPLUGIN_LDFLAGS = +lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib21_fts_lucene_plugin.la + +if BUILD_FTS_STEMMER +STEMMER_LIBS = -lstemmer +SHOWBALL_SOURCES = Snowball.cc +endif + +if BUILD_FTS_EXTTEXTCAT +TEXTCAT_LIBS = $(LIBEXTTEXTCAT_LIBS) +else +if BUILD_FTS_TEXTCAT +TEXTCAT_LIBS = -ltextcat +endif +endif + +lib21_fts_lucene_plugin_la_LIBADD = \ + $(CLUCENE_LIBS) $(TEXTCAT_LIBS) $(STEMMER_LIBS) + +lib21_fts_lucene_plugin_la_SOURCES = \ + fts-lucene-plugin.c \ + fts-backend-lucene.c \ + lucene-wrapper.cc \ + $(SHOWBALL_SOURCES) + +noinst_HEADERS = \ + fts-lucene-plugin.h \ + lucene-wrapper.h \ + SnowballAnalyzer.h \ + SnowballFilter.h + +if BUILD_FTS_TEXTCAT +exampledir = $(docdir)/example-config +example_DATA = \ + textcat.conf +endif +EXTRA_DIST = textcat.conf + +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_lucene_plugin.la + +lib20_doveadm_fts_lucene_plugin_la_SOURCES = \ + doveadm-fts-lucene.c diff --git a/src/plugins/fts-lucene/Makefile.in b/src/plugins/fts-lucene/Makefile.in new file mode 100644 index 0000000..3359f08 --- /dev/null +++ b/src/plugins/fts-lucene/Makefile.in @@ -0,0 +1,990 @@ +# Makefile.in generated by automake 1.16.3 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2020 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/plugins/fts-lucene +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \ + $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \ + $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \ + $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \ + $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \ + $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \ + $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \ + $(top_srcdir)/m4/flexible_array_member.m4 \ + $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \ + $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \ + $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \ + $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \ + $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \ + $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \ + $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \ + $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \ + $(top_srcdir)/m4/pr_set_dumpable.m4 \ + $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \ + $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \ + $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \ + $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \ + $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \ + $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \ + $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \ + $(top_srcdir)/m4/typeof_dev_t.m4 \ + $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \ + $(top_srcdir)/m4/want_apparmor.m4 \ + $(top_srcdir)/m4/want_bsdauth.m4 \ + $(top_srcdir)/m4/want_bzlib.m4 \ + $(top_srcdir)/m4/want_cassandra.m4 \ + $(top_srcdir)/m4/want_cdb.m4 \ + $(top_srcdir)/m4/want_checkpassword.m4 \ + $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \ + $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \ + $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \ + $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \ + $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \ + $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \ + $(top_srcdir)/m4/want_prefetch.m4 \ + $(top_srcdir)/m4/want_shadow.m4 \ + $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \ + $(top_srcdir)/m4/want_sqlite.m4 \ + $(top_srcdir)/m4/want_stemmer.m4 \ + $(top_srcdir)/m4/want_systemd.m4 \ + $(top_srcdir)/m4/want_textcat.m4 \ + $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \ + $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(doveadm_moduledir)" \ + "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(exampledir)" +LTLIBRARIES = $(doveadm_module_LTLIBRARIES) $(module_LTLIBRARIES) +lib20_doveadm_fts_lucene_plugin_la_LIBADD = +am_lib20_doveadm_fts_lucene_plugin_la_OBJECTS = doveadm-fts-lucene.lo +lib20_doveadm_fts_lucene_plugin_la_OBJECTS = \ + $(am_lib20_doveadm_fts_lucene_plugin_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +lib20_doveadm_fts_lucene_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) \ + --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ + $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(lib20_doveadm_fts_lucene_plugin_la_LDFLAGS) $(LDFLAGS) -o $@ +am__DEPENDENCIES_1 = +@BUILD_FTS_EXTTEXTCAT_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +lib21_fts_lucene_plugin_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1) +am__lib21_fts_lucene_plugin_la_SOURCES_DIST = fts-lucene-plugin.c \ + fts-backend-lucene.c lucene-wrapper.cc Snowball.cc +@BUILD_FTS_STEMMER_TRUE@am__objects_1 = Snowball.lo +am_lib21_fts_lucene_plugin_la_OBJECTS = fts-lucene-plugin.lo \ + fts-backend-lucene.lo lucene-wrapper.lo $(am__objects_1) +lib21_fts_lucene_plugin_la_OBJECTS = \ + $(am_lib21_fts_lucene_plugin_la_OBJECTS) +lib21_fts_lucene_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(lib21_fts_lucene_plugin_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/Snowball.Plo \ + ./$(DEPDIR)/doveadm-fts-lucene.Plo \ + ./$(DEPDIR)/fts-backend-lucene.Plo \ + ./$(DEPDIR)/fts-lucene-plugin.Plo \ + ./$(DEPDIR)/lucene-wrapper.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(lib20_doveadm_fts_lucene_plugin_la_SOURCES) \ + $(lib21_fts_lucene_plugin_la_SOURCES) +DIST_SOURCES = $(lib20_doveadm_fts_lucene_plugin_la_SOURCES) \ + $(am__lib21_fts_lucene_plugin_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(example_DATA) +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +APPARMOR_LIBS = @APPARMOR_LIBS@ +AR = @AR@ +AUTH_CFLAGS = @AUTH_CFLAGS@ +AUTH_LIBS = @AUTH_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BINARY_CFLAGS = @BINARY_CFLAGS@ +BINARY_LDFLAGS = @BINARY_LDFLAGS@ +BISON = @BISON@ +CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@ +CASSANDRA_LIBS = @CASSANDRA_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CDB_LIBS = @CDB_LIBS@ +CFLAGS = @CFLAGS@ +CLUCENE_CFLAGS = @CLUCENE_CFLAGS@ +CLUCENE_LIBS = @CLUCENE_LIBS@ +COMPRESS_LIBS = @COMPRESS_LIBS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRYPT_LIBS = @CRYPT_LIBS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DICT_LIBS = @DICT_LIBS@ +DLLIB = @DLLIB@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FLEX = @FLEX@ +FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@ +FUZZER_LDFLAGS = @FUZZER_LDFLAGS@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KRB5CONFIG = @KRB5CONFIG@ +KRB5_CFLAGS = @KRB5_CFLAGS@ +KRB5_LIBS = @KRB5_LIBS@ +LD = @LD@ +LDAP_LIBS = @LDAP_LIBS@ +LDFLAGS = @LDFLAGS@ +LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@ +LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@ +LIBCAP = @LIBCAP@ +LIBDOVECOT = @LIBDOVECOT@ +LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@ +LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@ +LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@ +LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@ +LIBDOVECOT_LDA = @LIBDOVECOT_LDA@ +LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@ +LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@ +LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@ +LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@ +LIBDOVECOT_LUA = @LIBDOVECOT_LUA@ +LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@ +LIBDOVECOT_SQL = @LIBDOVECOT_SQL@ +LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@ +LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@ +LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@ +LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@ +LIBICONV = @LIBICONV@ +LIBICU_CFLAGS = @LIBICU_CFLAGS@ +LIBICU_LIBS = @LIBICU_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@ +LIBSODIUM_LIBS = @LIBSODIUM_LIBS@ +LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@ +LIBTIRPC_LIBS = @LIBTIRPC_LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ +LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ +LIBWRAP_LIBS = @LIBWRAP_LIBS@ +LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LUA_CFLAGS = @LUA_CFLAGS@ +LUA_LIBS = @LUA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_LIBS = @MODULE_LIBS@ +MODULE_SUFFIX = @MODULE_SUFFIX@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_CONFIG = @MYSQL_CONFIG@ +MYSQL_LIBS = @MYSQL_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOPLUGIN_LDFLAGS = +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PANDOC = @PANDOC@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PG_CONFIG = @PG_CONFIG@ +PIE_CFLAGS = @PIE_CFLAGS@ +PIE_LDFLAGS = @PIE_LDFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +QUOTA_LIBS = @QUOTA_LIBS@ +RANLIB = @RANLIB@ +RELRO_LDFLAGS = @RELRO_LDFLAGS@ +RPCGEN = @RPCGEN@ +RUN_TEST = @RUN_TEST@ +SED = @SED@ +SETTING_FILES = @SETTING_FILES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SQLITE_CFLAGS = @SQLITE_CFLAGS@ +SQLITE_LIBS = @SQLITE_LIBS@ +SQL_CFLAGS = @SQL_CFLAGS@ +SQL_LIBS = @SQL_LIBS@ +SSL_CFLAGS = @SSL_CFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND = @VALGRIND@ +VERSION = @VERSION@ +ZSTD_CFLAGS = @ZSTD_CFLAGS@ +ZSTD_LIBS = @ZSTD_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dict_drivers = @dict_drivers@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +rundir = @rundir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sql_drivers = @sql_drivers@ +srcdir = @srcdir@ +ssldir = @ssldir@ +statedir = @statedir@ +sysconfdir = @sysconfdir@ +systemdservicetype = @systemdservicetype@ +systemdsystemunitdir = @systemdsystemunitdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +doveadm_moduledir = $(moduledir)/doveadm +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts \ + -I$(top_srcdir)/src/doveadm + +AM_CXXFLAGS = \ + $(CLUCENE_CFLAGS) \ + $(LIBEXTTEXTCAT_CFLAGS) + +lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +module_LTLIBRARIES = \ + lib21_fts_lucene_plugin.la + +@BUILD_FTS_STEMMER_TRUE@STEMMER_LIBS = -lstemmer +@BUILD_FTS_STEMMER_TRUE@SHOWBALL_SOURCES = Snowball.cc +@BUILD_FTS_EXTTEXTCAT_FALSE@@BUILD_FTS_TEXTCAT_TRUE@TEXTCAT_LIBS = -ltextcat +@BUILD_FTS_EXTTEXTCAT_TRUE@TEXTCAT_LIBS = $(LIBEXTTEXTCAT_LIBS) +lib21_fts_lucene_plugin_la_LIBADD = \ + $(CLUCENE_LIBS) $(TEXTCAT_LIBS) $(STEMMER_LIBS) + +lib21_fts_lucene_plugin_la_SOURCES = \ + fts-lucene-plugin.c \ + fts-backend-lucene.c \ + lucene-wrapper.cc \ + $(SHOWBALL_SOURCES) + +noinst_HEADERS = \ + fts-lucene-plugin.h \ + lucene-wrapper.h \ + SnowballAnalyzer.h \ + SnowballFilter.h + +@BUILD_FTS_TEXTCAT_TRUE@exampledir = $(docdir)/example-config +@BUILD_FTS_TEXTCAT_TRUE@example_DATA = \ +@BUILD_FTS_TEXTCAT_TRUE@ textcat.conf + +EXTRA_DIST = textcat.conf +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_lucene_plugin.la + +lib20_doveadm_fts_lucene_plugin_la_SOURCES = \ + doveadm-fts-lucene.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts-lucene/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/plugins/fts-lucene/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-doveadm_moduleLTLIBRARIES: $(doveadm_module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(doveadm_moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(doveadm_moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(doveadm_moduledir)"; \ + } + +uninstall-doveadm_moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(doveadm_moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(doveadm_moduledir)/$$f"; \ + done + +clean-doveadm_moduleLTLIBRARIES: + -test -z "$(doveadm_module_LTLIBRARIES)" || rm -f $(doveadm_module_LTLIBRARIES) + @list='$(doveadm_module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +lib20_doveadm_fts_lucene_plugin.la: $(lib20_doveadm_fts_lucene_plugin_la_OBJECTS) $(lib20_doveadm_fts_lucene_plugin_la_DEPENDENCIES) $(EXTRA_lib20_doveadm_fts_lucene_plugin_la_DEPENDENCIES) + $(AM_V_CCLD)$(lib20_doveadm_fts_lucene_plugin_la_LINK) -rpath $(doveadm_moduledir) $(lib20_doveadm_fts_lucene_plugin_la_OBJECTS) $(lib20_doveadm_fts_lucene_plugin_la_LIBADD) $(LIBS) + +lib21_fts_lucene_plugin.la: $(lib21_fts_lucene_plugin_la_OBJECTS) $(lib21_fts_lucene_plugin_la_DEPENDENCIES) $(EXTRA_lib21_fts_lucene_plugin_la_DEPENDENCIES) + $(AM_V_CXXLD)$(lib21_fts_lucene_plugin_la_LINK) -rpath $(moduledir) $(lib21_fts_lucene_plugin_la_OBJECTS) $(lib21_fts_lucene_plugin_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Snowball.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-fts-lucene.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-backend-lucene.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-lucene-plugin.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lucene-wrapper.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-exampleDATA: $(example_DATA) + @$(NORMAL_INSTALL) + @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(exampledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(exampledir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(exampledir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(exampledir)" || exit $$?; \ + done + +uninstall-exampleDATA: + @$(NORMAL_UNINSTALL) + @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(exampledir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(exampledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/Snowball.Plo + -rm -f ./$(DEPDIR)/doveadm-fts-lucene.Plo + -rm -f ./$(DEPDIR)/fts-backend-lucene.Plo + -rm -f ./$(DEPDIR)/fts-lucene-plugin.Plo + -rm -f ./$(DEPDIR)/lucene-wrapper.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-doveadm_moduleLTLIBRARIES install-exampleDATA \ + install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/Snowball.Plo + -rm -f ./$(DEPDIR)/doveadm-fts-lucene.Plo + -rm -f ./$(DEPDIR)/fts-backend-lucene.Plo + -rm -f ./$(DEPDIR)/fts-lucene-plugin.Plo + -rm -f ./$(DEPDIR)/lucene-wrapper.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-doveadm_moduleLTLIBRARIES \ + uninstall-exampleDATA uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am \ + install-doveadm_moduleLTLIBRARIES install-dvi install-dvi-am \ + install-exampleDATA install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-doveadm_moduleLTLIBRARIES uninstall-exampleDATA \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/plugins/fts-lucene/Snowball.cc b/src/plugins/fts-lucene/Snowball.cc new file mode 100644 index 0000000..43b54e3 --- /dev/null +++ b/src/plugins/fts-lucene/Snowball.cc @@ -0,0 +1,151 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include <CLucene.h> +#include "SnowballAnalyzer.h" +#include "SnowballFilter.h" +#include <CLucene/util/CLStreams.h> +#include <CLucene/analysis/Analyzers.h> +#include <CLucene/analysis/standard/StandardTokenizer.h> +#include <CLucene/analysis/standard/StandardFilter.h> + +extern "C" { +#include "lib.h" +#include "buffer.h" +#include "unichar.h" +#include "lucene-wrapper.h" +}; + +CL_NS_USE(analysis) +CL_NS_USE(util) +CL_NS_USE2(analysis,standard) + +CL_NS_DEF2(analysis,snowball) + + /** Builds the named analyzer with no stop words. */ + SnowballAnalyzer::SnowballAnalyzer(normalizer_func_t *_normalizer, const char* _language) + : language(i_strdup(_language)), + normalizer(_normalizer), + stopSet(NULL), + prevstream(NULL) + { + } + + SnowballAnalyzer::~SnowballAnalyzer() + { + if (prevstream) + _CLDELETE(prevstream); + i_free(language); + if ( stopSet != NULL ) + _CLDELETE(stopSet); + } + + /** Builds the named analyzer with the given stop words. + */ + SnowballAnalyzer::SnowballAnalyzer(const char* language, const TCHAR** stopWords) + : language(i_strdup(language)), + normalizer(NULL), + stopSet(_CLNEW CLTCSetList(true)), + prevstream(NULL) + { + StopFilter::fillStopTable(stopSet,stopWords); + } + + TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { + return this->tokenStream(fieldName,reader,false); + } + + /** Constructs a {@link StandardTokenizer} filtered by a {@link + StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ + TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader) { + BufferedReader* bufferedReader = reader->__asBufferedReader(); + TokenStream* result; + + if ( bufferedReader == NULL ) + result = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, deleteReader), true ); + else + result = _CLNEW StandardTokenizer(bufferedReader, deleteReader); + + result = _CLNEW StandardFilter(result, true); + result = _CLNEW CL_NS(analysis)::LowerCaseFilter(result, true); + if (stopSet != NULL) + result = _CLNEW CL_NS(analysis)::StopFilter(result, true, stopSet); + result = _CLNEW SnowballFilter(result, normalizer, language, true); + return result; + } + + TokenStream* SnowballAnalyzer::reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { + if (prevstream) _CLDELETE(prevstream); + prevstream = this->tokenStream(fieldName, reader); + return prevstream; + } + + + + + + + /** Construct the named stemming filter. + * + * @param in the input tokens to stem + * @param name the name of a stemmer + */ + SnowballFilter::SnowballFilter(TokenStream* in, normalizer_func_t *normalizer, const char* language, bool deleteTS): + TokenFilter(in,deleteTS) + { + stemmer = sb_stemmer_new(language, NULL); //use utf8 encoding + this->normalizer = normalizer; + + if ( stemmer == NULL ){ + _CLTHROWA(CL_ERR_IllegalArgument, "language not available for stemming\n"); //todo: richer error + } + } + + SnowballFilter::~SnowballFilter(){ + sb_stemmer_delete(stemmer); + } + + /** Returns the next input Token, after being stemmed */ + Token* SnowballFilter::next(Token* token){ + if (input->next(token) == NULL) + return NULL; + + unsigned char utf8text[LUCENE_MAX_WORD_LEN*5+1]; + unsigned int len = I_MIN(LUCENE_MAX_WORD_LEN, token->termLength()); + + buffer_t buf = { { 0, 0 } }; + i_assert(sizeof(wchar_t) == sizeof(unichar_t)); + buffer_create_from_data(&buf, utf8text, sizeof(utf8text)); + uni_ucs4_to_utf8((const unichar_t *)token->termBuffer(), len, &buf); + + const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8text, buf.used); + if ( stemmed == NULL ) + _CLTHROWA(CL_ERR_Runtime,"Out of memory"); + + int stemmedLen=sb_stemmer_length(stemmer); + + if (normalizer == NULL) { + unsigned int tchartext_size = + uni_utf8_strlen_n(stemmed, stemmedLen) + 1; + TCHAR tchartext[tchartext_size]; + lucene_utf8_n_to_tchar(stemmed, stemmedLen, tchartext, tchartext_size); + token->set(tchartext,token->startOffset(), token->endOffset(), token->type()); + } else T_BEGIN { + buffer_t *norm_buf = t_buffer_create(stemmedLen); + normalizer(stemmed, stemmedLen, norm_buf); + + unsigned int tchartext_size = + uni_utf8_strlen_n(norm_buf->data, norm_buf->used) + 1; + TCHAR tchartext[tchartext_size]; + lucene_utf8_n_to_tchar((const unsigned char *)norm_buf->data, + norm_buf->used, tchartext, tchartext_size); + token->set(tchartext,token->startOffset(), token->endOffset(), token->type()); + } T_END; + return token; + } + + +CL_NS_END2 diff --git a/src/plugins/fts-lucene/SnowballAnalyzer.h b/src/plugins/fts-lucene/SnowballAnalyzer.h new file mode 100644 index 0000000..45455c5 --- /dev/null +++ b/src/plugins/fts-lucene/SnowballAnalyzer.h @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_analysis_snowball_analyser_ +#define _lucene_analysis_snowball_analyser_ + +extern "C" { +#include "lib.h" +#include "unichar.h" +}; +#include "CLucene/analysis/AnalysisHeader.h" + +CL_CLASS_DEF(util,BufferedReader) +CL_NS_DEF2(analysis,snowball) + +/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link + * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}. + * + * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a + * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in + * {@link EnglishStemmer} is named "English". + */ +class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer { + char* language; + normalizer_func_t *normalizer; + CLTCSetList* stopSet; + TokenStream *prevstream; + +public: + /** Builds the named analyzer with no stop words. */ + SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english"); + + /** Builds the named analyzer with the given stop words. + */ + SnowballAnalyzer(const char* language, const TCHAR** stopWords); + + ~SnowballAnalyzer(); + + /** Constructs a {@link StandardTokenizer} filtered by a {@link + StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader); + TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); +}; + +CL_NS_END2 +#endif + diff --git a/src/plugins/fts-lucene/SnowballFilter.h b/src/plugins/fts-lucene/SnowballFilter.h new file mode 100644 index 0000000..6a0ed12 --- /dev/null +++ b/src/plugins/fts-lucene/SnowballFilter.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_analysis_snowball_filter_ +#define _lucene_analysis_snowball_filter_ + +#include "CLucene/analysis/AnalysisHeader.h" +#include "libstemmer.h" + +CL_NS_DEF2(analysis,snowball) + +/** A filter that stems words using a Snowball-generated stemmer. + * + * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a + * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in + * {@link EnglishStemmer} is named "English". + * + * Note: todo: This is not thread safe... + */ +class CLUCENE_CONTRIBS_EXPORT SnowballFilter: public TokenFilter { + struct sb_stemmer * stemmer; + normalizer_func_t *normalizer; +public: + + /** Construct the named stemming filter. + * + * @param in the input tokens to stem + * @param name the name of a stemmer + */ + SnowballFilter(TokenStream* in, normalizer_func_t *normalizer, const char* language, bool deleteTS); + + ~SnowballFilter(); + + /** Returns the next input Token, after being stemmed */ + Token* next(Token* token); +}; + +CL_NS_END2 +#endif diff --git a/src/plugins/fts-lucene/doveadm-fts-lucene.c b/src/plugins/fts-lucene/doveadm-fts-lucene.c new file mode 100644 index 0000000..a761907 --- /dev/null +++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c @@ -0,0 +1,70 @@ +/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "doveadm-dump.h" +#include "doveadm-fts.h" +#include "lucene-wrapper.h" + +#include <stdio.h> +#include <sys/stat.h> + +const char *doveadm_fts_lucene_plugin_version = DOVECOT_ABI_VERSION; + +void doveadm_fts_lucene_plugin_init(struct module *module); +void doveadm_fts_lucene_plugin_deinit(void); + +static void +cmd_dump_fts_lucene(const char *path, const char *const *args ATTR_UNUSED) +{ + struct lucene_index *index; + struct lucene_index_iter *iter; + guid_128_t prev_guid; + const struct lucene_index_record *rec; + bool first = TRUE; + + i_zero(&prev_guid); + index = lucene_index_init(path, NULL, NULL); + iter = lucene_index_iter_init(index); + while ((rec = lucene_index_iter_next(iter)) != NULL) { + if (memcmp(prev_guid, rec->mailbox_guid, + sizeof(prev_guid)) != 0) { + if (first) + first = FALSE; + else + printf("\n"); + memcpy(prev_guid, rec->mailbox_guid, sizeof(prev_guid)); + printf("%s: ", guid_128_to_string(prev_guid)); + } + printf("%u", rec->uid); + if (rec->part_num != 0) + printf("[%u]", rec->part_num); + printf("\n"); + } + printf("\n"); + if (lucene_index_iter_deinit(&iter) < 0) + i_error("Lucene index iteration failed"); + lucene_index_deinit(index); +} + +static bool test_dump_fts_lucene(const char *path) +{ + struct stat st; + + path = t_strconcat(path, "/segments.gen", NULL); + return stat(path, &st) == 0; +} + +static const struct doveadm_cmd_dump doveadm_cmd_dump_fts_lucene = { + "fts-lucene", + test_dump_fts_lucene, + cmd_dump_fts_lucene +}; + +void doveadm_fts_lucene_plugin_init(struct module *module ATTR_UNUSED) +{ + doveadm_dump_register(&doveadm_cmd_dump_fts_lucene); +} + +void doveadm_fts_lucene_plugin_deinit(void) +{ +} diff --git a/src/plugins/fts-lucene/fts-backend-lucene.c b/src/plugins/fts-lucene/fts-backend-lucene.c new file mode 100644 index 0000000..963dbdf --- /dev/null +++ b/src/plugins/fts-lucene/fts-backend-lucene.c @@ -0,0 +1,605 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hash.h" +#include "hex-binary.h" +#include "strescape.h" +#include "message-part.h" +#include "mail-namespace.h" +#include "mail-storage-private.h" +#include "fts-expunge-log.h" +#include "lucene-wrapper.h" +#include "fts-indexer.h" +#include "fts-lucene-plugin.h" + +#include <wchar.h> + +#define LUCENE_INDEX_DIR_NAME "lucene-indexes" +#define LUCENE_EXPUNGE_LOG_NAME "dovecot-expunges.log" +#define LUCENE_OPTIMIZE_BATCH_MSGS_COUNT 100 + +struct lucene_fts_backend { + struct fts_backend backend; + char *dir_path; + + struct lucene_index *index; + struct mailbox *selected_box; + unsigned int selected_box_generation; + guid_128_t selected_box_guid; + + struct fts_expunge_log *expunge_log; + + bool dir_created:1; + bool updating:1; +}; + +struct lucene_fts_backend_update_context { + struct fts_backend_update_context ctx; + + struct mailbox *box; + uint32_t last_uid; + uint32_t last_indexed_uid; + char *first_box_vname; + + uint32_t uid, part_num; + char *hdr_name; + + unsigned int added_msgs; + struct fts_expunge_log_append_ctx *expunge_ctx; + + bool lucene_opened; + bool last_indexed_uid_set; + bool mime_parts; +}; + +static int fts_backend_lucene_mkdir(struct lucene_fts_backend *backend) +{ + if (backend->dir_created) + return 0; + + backend->dir_created = TRUE; + if (mailbox_list_mkdir_root(backend->backend.ns->list, + backend->dir_path, + MAILBOX_LIST_PATH_TYPE_INDEX) < 0) + return -1; + return 0; +} + +static int +fts_lucene_get_mailbox_guid(struct mailbox *box, guid_128_t guid_r) +{ + struct mailbox_metadata metadata; + + if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, + &metadata) < 0) { + i_error("lucene: Couldn't get mailbox %s GUID: %s", + box->vname, mailbox_get_last_internal_error(box, NULL)); + return -1; + } + memcpy(guid_r, metadata.guid, GUID_128_SIZE); + return 0; +} + +static int +fts_backend_select(struct lucene_fts_backend *backend, struct mailbox *box) +{ + guid_128_t guid; + unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH]; + wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH]; + buffer_t buf; + unsigned int i; + + i_assert(box != NULL); + + if (backend->selected_box == box && + backend->selected_box_generation == box->generation_sequence) + return 0; + + if (fts_lucene_get_mailbox_guid(box, guid) < 0) + return -1; + buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH); + binary_to_hex_append(&buf, guid, GUID_128_SIZE); + for (i = 0; i < N_ELEMENTS(wguid_hex); i++) + wguid_hex[i] = guid_hex[i]; + + lucene_index_select_mailbox(backend->index, wguid_hex); + + backend->selected_box = box; + memcpy(backend->selected_box_guid, guid, + sizeof(backend->selected_box_guid)); + backend->selected_box_generation = box->generation_sequence; + return 0; +} + +static struct fts_backend *fts_backend_lucene_alloc(void) +{ + struct lucene_fts_backend *backend; + + backend = i_new(struct lucene_fts_backend, 1); + backend->backend = fts_backend_lucene; + return &backend->backend; +} + +static int +fts_backend_lucene_init(struct fts_backend *_backend, const char **error_r) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + struct fts_lucene_user *fuser = + FTS_LUCENE_USER_CONTEXT(_backend->ns->user); + const char *path; + + if (fuser == NULL) { + /* invalid settings */ + *error_r = "Invalid fts_lucene settings"; + return -1; + } + /* fts already checked that index exists */ + + if (fuser->set.use_libfts) { + /* change our flags so we get proper input */ + _backend->flags &= ENUM_NEGATE(FTS_BACKEND_FLAG_FUZZY_SEARCH); + _backend->flags |= FTS_BACKEND_FLAG_TOKENIZED_INPUT; + } + path = mailbox_list_get_root_forced(_backend->ns->list, + MAILBOX_LIST_PATH_TYPE_INDEX); + + backend->dir_path = i_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL); + backend->index = lucene_index_init(backend->dir_path, + _backend->ns->list, + &fuser->set); + + path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL); + backend->expunge_log = fts_expunge_log_init(path); + return 0; +} + +static void fts_backend_lucene_deinit(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + + if (backend->index != NULL) + lucene_index_deinit(backend->index); + if (backend->expunge_log != NULL) + fts_expunge_log_deinit(&backend->expunge_log); + i_free(backend->dir_path); + i_free(backend); +} + +static int +fts_backend_lucene_get_last_uid(struct fts_backend *_backend, + struct mailbox *box, uint32_t *last_uid_r) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + struct fts_lucene_user *fuser = + FTS_LUCENE_USER_CONTEXT_REQUIRE(_backend->ns->user); + struct fts_index_header hdr; + uint32_t set_checksum; + int ret; + + if (fts_index_get_header(box, &hdr)) { + set_checksum = fts_lucene_settings_checksum(&fuser->set); + ret = fts_index_have_compatible_settings(_backend->ns->list, + set_checksum); + if (ret < 0) + return -1; + if (ret == 0) { + /* need to rebuild the index */ + *last_uid_r = 0; + } else { + *last_uid_r = hdr.last_indexed_uid; + } + return 0; + } + + /* either nothing has been indexed, or the index was corrupted. + do it the slow way. */ + if (fts_backend_select(backend, box) < 0) + return -1; + if (lucene_index_get_last_uid(backend->index, last_uid_r) < 0) + return -1; + + fts_index_set_last_uid(box, *last_uid_r); + return 0; +} + +static struct fts_backend_update_context * +fts_backend_lucene_update_init(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + struct lucene_fts_backend_update_context *ctx; + struct fts_lucene_user *fuser = + FTS_LUCENE_USER_CONTEXT_REQUIRE(_backend->ns->user); + + i_assert(!backend->updating); + + ctx = i_new(struct lucene_fts_backend_update_context, 1); + ctx->ctx.backend = _backend; + ctx->mime_parts = fuser->set.mime_parts; + backend->updating = TRUE; + return &ctx->ctx; +} + +static bool +fts_backend_lucene_need_optimize(struct lucene_fts_backend_update_context *ctx) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)ctx->ctx.backend; + unsigned int expunges; + uint32_t numdocs; + + if (ctx->added_msgs >= LUCENE_OPTIMIZE_BATCH_MSGS_COUNT) + return TRUE; + if (lucene_index_get_doc_count(backend->index, &numdocs) < 0) + return FALSE; + + if (fts_expunge_log_uid_count(backend->expunge_log, &expunges) < 0) + return FALSE; + return expunges > 0 && + numdocs / expunges <= 50; /* >2% of index has been expunged */ +} + +static int +fts_backend_lucene_update_deinit(struct fts_backend_update_context *_ctx) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + int ret = _ctx->failed ? -1 : 0; + + i_assert(backend->updating); + + backend->updating = FALSE; + if (ctx->lucene_opened) { + if (lucene_index_build_deinit(backend->index) < 0) + ret = -1; + } + + if (ctx->expunge_ctx != NULL) { + if (fts_expunge_log_append_commit(&ctx->expunge_ctx) < 0) { + struct stat st; + ret = -1; + + if (stat(backend->dir_path, &st) < 0 && errno == ENOENT) { + /* lucene-indexes directory doesn't even exist, + so dovecot.index's last_index_uid is wrong. + rescan to update them. */ + (void)lucene_index_rescan(backend->index); + ret = 0; + } + } + } + + if (fts_backend_lucene_need_optimize(ctx)) { + if (ctx->lucene_opened) + (void)fts_backend_optimize(_ctx->backend); + else if (ctx->first_box_vname != NULL) { + struct mail_user *user = backend->backend.ns->user; + const char *cmd, *path; + int fd; + + /* the optimize affects all mailboxes within namespace, + so just use any mailbox name in it */ + cmd = t_strdup_printf("OPTIMIZE\t0\t%s\t%s\n", + str_tabescape(user->username), + str_tabescape(ctx->first_box_vname)); + fd = fts_indexer_cmd(user, cmd, &path); + i_close_fd(&fd); + } + } + + i_free(ctx->first_box_vname); + i_free(ctx); + return ret; +} + +static void +fts_backend_lucene_update_set_mailbox(struct fts_backend_update_context *_ctx, + struct mailbox *box) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + + if (ctx->last_uid != 0) { + fts_index_set_last_uid(ctx->box, ctx->last_uid); + ctx->last_uid = 0; + } + if (ctx->first_box_vname == NULL && box != NULL) + ctx->first_box_vname = i_strdup(box->vname); + ctx->box = box; + ctx->last_indexed_uid_set = FALSE; +} + +static void +fts_backend_lucene_update_expunge(struct fts_backend_update_context *_ctx, + uint32_t uid) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + struct fts_index_header hdr; + + if (!ctx->last_indexed_uid_set) { + if (!fts_index_get_header(ctx->box, &hdr)) + ctx->last_indexed_uid = 0; + else + ctx->last_indexed_uid = hdr.last_indexed_uid; + ctx->last_indexed_uid_set = TRUE; + } + if (ctx->last_indexed_uid == 0 || + uid > ctx->last_indexed_uid + 100) { + /* don't waste time adding expunge to log for a message that + isn't even indexed. this check is racy, because indexer may + just be in the middle of indexing this message. we'll + attempt to avoid that by skipping the expunging only if + indexing hasn't been done for a while (100 msgs). */ + return; + } + + if (ctx->expunge_ctx == NULL) { + ctx->expunge_ctx = + fts_expunge_log_append_begin(backend->expunge_log); + } + + if (fts_backend_select(backend, ctx->box) < 0) + _ctx->failed = TRUE; + + fts_expunge_log_append_next(ctx->expunge_ctx, + backend->selected_box_guid, uid); +} + +static bool +fts_backend_lucene_update_set_build_key(struct fts_backend_update_context *_ctx, + const struct fts_backend_build_key *key) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + + if (!ctx->lucene_opened) { + if (fts_backend_lucene_mkdir(backend) < 0) + ctx->ctx.failed = TRUE; + if (lucene_index_build_init(backend->index) < 0) + ctx->ctx.failed = TRUE; + ctx->lucene_opened = TRUE; + } + + if (fts_backend_select(backend, ctx->box) < 0) + _ctx->failed = TRUE; + + switch (key->type) { + case FTS_BACKEND_BUILD_KEY_HDR: + case FTS_BACKEND_BUILD_KEY_MIME_HDR: + i_assert(key->hdr_name != NULL); + + i_free(ctx->hdr_name); + ctx->hdr_name = i_strdup(key->hdr_name); + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART: + i_free_and_null(ctx->hdr_name); + break; + case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY: + i_unreached(); + } + + if (key->uid != ctx->last_uid) { + i_assert(key->uid >= ctx->last_uid); + ctx->last_uid = key->uid; + ctx->added_msgs++; + } + + ctx->uid = key->uid; + if (ctx->mime_parts) + ctx->part_num = message_part_to_idx(key->part); + return TRUE; +} + +static void +fts_backend_lucene_update_unset_build_key(struct fts_backend_update_context *_ctx) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + + ctx->uid = 0; + ctx->part_num = 0; + i_free_and_null(ctx->hdr_name); +} + +static int +fts_backend_lucene_update_build_more(struct fts_backend_update_context *_ctx, + const unsigned char *data, size_t size) +{ + struct lucene_fts_backend_update_context *ctx = + (struct lucene_fts_backend_update_context *)_ctx; + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_ctx->backend; + int ret; + + i_assert(ctx->uid != 0); + + if (_ctx->failed) + return -1; + + T_BEGIN { + ret = lucene_index_build_more(backend->index, ctx->uid, + ctx->part_num, data, size, + ctx->hdr_name); + } T_END; + return ret; +} + +static int +fts_backend_lucene_refresh(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + + if (backend->index != NULL) + lucene_index_close(backend->index); + return 0; +} + +static int fts_backend_lucene_rescan(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + + if (lucene_index_rescan(backend->index) < 0) + return -1; + return lucene_index_optimize(backend->index); +} + +static int fts_backend_lucene_optimize(struct fts_backend *_backend) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + int ret; + + ret = lucene_index_expunge_from_log(backend->index, + backend->expunge_log); + if (ret == 0) { + /* log was corrupted, need to rescan */ + ret = lucene_index_rescan(backend->index); + } + if (ret >= 0) + ret = lucene_index_optimize(backend->index); + return ret; +} + +static int +fts_backend_lucene_lookup(struct fts_backend *_backend, struct mailbox *box, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + int ret; + + if (fts_backend_select(backend, box) < 0) + return -1; + T_BEGIN { + ret = lucene_index_lookup(backend->index, args, flags, result); + } T_END; + return ret; +} + +/* a char* hash function from ASU -- from glib */ +static unsigned int wstr_hash(const wchar_t *s) +{ + unsigned int g, h = 0; + + while (*s != '\0') { + h = (h << 4) + *s; + if ((g = h & 0xf0000000UL) != 0) { + h = h ^ (g >> 24); + h = h ^ g; + } + s++; + } + + return h; +} + +static int +mailboxes_get_guids(struct mailbox *const boxes[], + HASH_TABLE_TYPE(wguid_result) guids, + struct fts_multi_result *result) +{ + ARRAY(struct fts_result) box_results; + struct fts_result *box_result; + const char *guid; + wchar_t *guid_dup; + unsigned int i, j; + + p_array_init(&box_results, result->pool, 32); + /* first create the box_results - we'll be using pointers to them + later on and appending to the array changes the pointers */ + for (i = 0; boxes[i] != NULL; i++) { + box_result = array_append_space(&box_results); + box_result->box = boxes[i]; + } + for (i = 0; boxes[i] != NULL; i++) { + if (fts_mailbox_get_guid(boxes[i], &guid) < 0) + return -1; + + i_assert(strlen(guid) == MAILBOX_GUID_HEX_LENGTH); + guid_dup = t_new(wchar_t, MAILBOX_GUID_HEX_LENGTH + 1); + for (j = 0; j < MAILBOX_GUID_HEX_LENGTH; j++) + guid_dup[j] = guid[j]; + + box_result = array_idx_modifiable(&box_results, i); + hash_table_insert(guids, guid_dup, box_result); + } + + array_append_zero(&box_results); + result->box_results = array_front_modifiable(&box_results); + return 0; +} + +static int +fts_backend_lucene_lookup_multi(struct fts_backend *_backend, + struct mailbox *const boxes[], + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct lucene_fts_backend *backend = + (struct lucene_fts_backend *)_backend; + int ret; + + T_BEGIN { + HASH_TABLE_TYPE(wguid_result) guids; + + hash_table_create(&guids, default_pool, 0, wstr_hash, wcscmp); + ret = mailboxes_get_guids(boxes, guids, result); + if (ret == 0) { + ret = lucene_index_lookup_multi(backend->index, + guids, args, flags, + result); + } + hash_table_destroy(&guids); + } T_END; + return ret; +} + +static void fts_backend_lucene_lookup_done(struct fts_backend *_backend) +{ + /* the next refresh is going to close the index anyway, so we might as + well do it now */ + (void)fts_backend_lucene_refresh(_backend); +} + +struct fts_backend fts_backend_lucene = { + .name = "lucene", + .flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS | + FTS_BACKEND_FLAG_FUZZY_SEARCH, + + { + fts_backend_lucene_alloc, + fts_backend_lucene_init, + fts_backend_lucene_deinit, + fts_backend_lucene_get_last_uid, + fts_backend_lucene_update_init, + fts_backend_lucene_update_deinit, + fts_backend_lucene_update_set_mailbox, + fts_backend_lucene_update_expunge, + fts_backend_lucene_update_set_build_key, + fts_backend_lucene_update_unset_build_key, + fts_backend_lucene_update_build_more, + fts_backend_lucene_refresh, + fts_backend_lucene_rescan, + fts_backend_lucene_optimize, + fts_backend_default_can_lookup, + fts_backend_lucene_lookup, + fts_backend_lucene_lookup_multi, + fts_backend_lucene_lookup_done + } +}; diff --git a/src/plugins/fts-lucene/fts-lucene-plugin.c b/src/plugins/fts-lucene/fts-lucene-plugin.c new file mode 100644 index 0000000..7c58fa7 --- /dev/null +++ b/src/plugins/fts-lucene/fts-lucene-plugin.c @@ -0,0 +1,146 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "crc32.h" +#include "mail-storage-hooks.h" +#include "lucene-wrapper.h" +#include "fts-user.h" +#include "fts-lucene-plugin.h" + +const char *fts_lucene_plugin_version = DOVECOT_ABI_VERSION; + +struct fts_lucene_user_module fts_lucene_user_module = + MODULE_CONTEXT_INIT(&mail_user_module_register); + +static int +fts_lucene_plugin_init_settings(struct mail_user *user, + struct fts_lucene_settings *set, + const char *str) +{ + const char *const *tmp; + + for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) { + if (str_begins(*tmp, "default_language=")) { + set->default_language = + p_strdup(user->pool, *tmp + 17); + } else if (str_begins(*tmp, "textcat_conf=")) { + set->textcat_conf = p_strdup(user->pool, *tmp + 13); + } else if (str_begins(*tmp, "textcat_dir=")) { + set->textcat_dir = p_strdup(user->pool, *tmp + 12); + } else if (str_begins(*tmp, "whitespace_chars=")) { + set->whitespace_chars = p_strdup(user->pool, *tmp + 17); + } else if (strcmp(*tmp, "normalize") == 0) { + set->normalize = TRUE; + } else if (strcmp(*tmp, "no_snowball") == 0) { + set->no_snowball = TRUE; + } else if (strcmp(*tmp, "mime_parts") == 0) { + set->mime_parts = TRUE; + } else if (strcmp(*tmp, "use_libfts") == 0) { + set->use_libfts = TRUE; + } else { + i_error("fts_lucene: Invalid setting: %s", *tmp); + return -1; + } + } + if (set->textcat_conf != NULL && set->textcat_dir == NULL) { + i_error("fts_lucene: textcat_conf set, but textcat_dir unset"); + return -1; + } + if (set->textcat_conf == NULL && set->textcat_dir != NULL) { + i_error("fts_lucene: textcat_dir set, but textcat_conf unset"); + return -1; + } + if (set->whitespace_chars == NULL) + set->whitespace_chars = ""; +#ifndef HAVE_FTS_STEMMER + if (set->default_language != NULL) { + i_error("fts_lucene: default_language set, " + "but Dovecot built without stemmer support"); + return -1; + } +#else + if (set->default_language == NULL) + set->default_language = "english"; +#endif +#ifndef HAVE_FTS_TEXTCAT + if (set->textcat_conf != NULL) { + i_error("fts_lucene: textcat_dir set, " + "but Dovecot built without textcat support"); + return -1; + } +#endif + return 0; +} + +uint32_t fts_lucene_settings_checksum(const struct fts_lucene_settings *set) +{ + uint32_t crc; + + if (set->use_libfts) + return crc32_str("l"); + + /* checksum is always different when compiling with/without stemmer */ + crc = set->default_language == NULL ? 0 : + crc32_str(set->default_language); + crc = crc32_str_more(crc, set->whitespace_chars); + if (set->normalize) + crc = crc32_str_more(crc, "n"); + if (set->no_snowball) + crc = crc32_str_more(crc, "s"); + /* don't include mime_parts here, since changing it doesn't + necessarily need the index to be rebuilt */ + return crc; +} + +static void fts_lucene_mail_user_deinit(struct mail_user *user) +{ + struct fts_lucene_user *fuser = FTS_LUCENE_USER_CONTEXT_REQUIRE(user); + + fts_mail_user_deinit(user); + fuser->module_ctx.super.deinit(user); +} + +static void fts_lucene_mail_user_created(struct mail_user *user) +{ + struct mail_user_vfuncs *v = user->vlast; + struct fts_lucene_user *fuser; + const char *env, *error; + + fuser = p_new(user->pool, struct fts_lucene_user, 1); + env = mail_user_plugin_getenv(user, "fts_lucene"); + if (env == NULL) + env = ""; + + if (fts_lucene_plugin_init_settings(user, &fuser->set, env) < 0) { + /* invalid settings, disabling */ + return; + } + if (fts_mail_user_init(user, fuser->set.use_libfts, &error) < 0) { + i_error("fts_lucene: %s", error); + return; + } + + fuser->module_ctx.super = *v; + user->vlast = &fuser->module_ctx.super; + v->deinit = fts_lucene_mail_user_deinit; + MODULE_CONTEXT_SET(user, fts_lucene_user_module, fuser); +} + +static struct mail_storage_hooks fts_lucene_mail_storage_hooks = { + .mail_user_created = fts_lucene_mail_user_created +}; + +void fts_lucene_plugin_init(struct module *module ATTR_UNUSED) +{ + fts_backend_register(&fts_backend_lucene); + mail_storage_hooks_add(module, &fts_lucene_mail_storage_hooks); +} + +void fts_lucene_plugin_deinit(void) +{ + fts_backend_unregister(fts_backend_lucene.name); + mail_storage_hooks_remove(&fts_lucene_mail_storage_hooks); + lucene_shutdown(); +} + +const char *fts_lucene_plugin_dependencies[] = { "fts", NULL }; diff --git a/src/plugins/fts-lucene/fts-lucene-plugin.h b/src/plugins/fts-lucene/fts-lucene-plugin.h new file mode 100644 index 0000000..69440fb --- /dev/null +++ b/src/plugins/fts-lucene/fts-lucene-plugin.h @@ -0,0 +1,36 @@ +#ifndef FTS_LUCENE_PLUGIN_H +#define FTS_LUCENE_PLUGIN_H + +#include "module-context.h" +#include "mail-user.h" +#include "fts-api-private.h" + +#define FTS_LUCENE_USER_CONTEXT(obj) \ + MODULE_CONTEXT(obj, fts_lucene_user_module) +#define FTS_LUCENE_USER_CONTEXT_REQUIRE(obj) \ + MODULE_CONTEXT_REQUIRE(obj, fts_lucene_user_module) + +struct fts_lucene_settings { + const char *default_language; + const char *textcat_conf, *textcat_dir; + const char *whitespace_chars; + bool normalize; + bool no_snowball; + bool mime_parts; + bool use_libfts; +}; + +struct fts_lucene_user { + union mail_user_module_context module_ctx; + struct fts_lucene_settings set; +}; + +extern struct fts_backend fts_backend_lucene; +extern MODULE_CONTEXT_DEFINE(fts_lucene_user_module, &mail_user_module_register); + +uint32_t fts_lucene_settings_checksum(const struct fts_lucene_settings *set); + +void fts_lucene_plugin_init(struct module *module); +void fts_lucene_plugin_deinit(void); + +#endif diff --git a/src/plugins/fts-lucene/lucene-wrapper.cc b/src/plugins/fts-lucene/lucene-wrapper.cc new file mode 100644 index 0000000..7446693 --- /dev/null +++ b/src/plugins/fts-lucene/lucene-wrapper.cc @@ -0,0 +1,1639 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +extern "C" { +#include "lib.h" +#include "array.h" +#include "unichar.h" +#include "hash.h" +#include "hex-binary.h" +#include "ioloop.h" +#include "unlink-directory.h" +#include "ioloop.h" +#include "mail-index.h" +#include "mail-search.h" +#include "mail-namespace.h" +#include "mailbox-list-private.h" +#include "mail-storage.h" +#include "fts-expunge-log.h" +#include "fts-lucene-plugin.h" +#include "lucene-wrapper.h" + +#include <sys/stat.h> +#ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H +# include <libexttextcat/textcat.h> +#elif defined (HAVE_LIBTEXTCAT_TEXTCAT_H) +# include <libtextcat/textcat.h> +#elif defined (HAVE_FTS_TEXTCAT) +# include <textcat.h> +#endif +}; +#include <CLucene.h> +#include <CLucene/util/CLStreams.h> +#include <CLucene/search/MultiPhraseQuery.h> +#include "SnowballAnalyzer.h" + +/* Lucene's default is 10000. Use it here also.. */ +#define MAX_TERMS_PER_DOCUMENT 10000 +#define FTS_LUCENE_MAX_SEARCH_TERMS 1000 + +#define LUCENE_LOCK_OVERRIDE_SECS 60 +#define LUCENE_INDEX_CLOSE_TIMEOUT_MSECS (120*1000) + +using namespace lucene::document; +using namespace lucene::index; +using namespace lucene::search; +using namespace lucene::queryParser; +using namespace lucene::analysis; +using namespace lucene::analysis; +using namespace lucene::util; + +struct lucene_query { + Query *query; + BooleanClause::Occur occur; +}; +ARRAY_DEFINE_TYPE(lucene_query, struct lucene_query); + +struct lucene_analyzer { + char *lang; + Analyzer *analyzer; +}; + +struct lucene_index { + char *path; + struct mailbox_list *list; + struct fts_lucene_settings set; + normalizer_func_t *normalizer; + + wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1]; + + IndexReader *reader; + IndexWriter *writer; + IndexSearcher *searcher; + struct timeout *to_close; + + buffer_t *normalizer_buf; + Analyzer *default_analyzer, *cur_analyzer; + ARRAY(struct lucene_analyzer) analyzers; + + Document *doc; + uint32_t prev_uid, prev_part_idx; + bool no_analyzer; +}; + +struct rescan_context { + struct lucene_index *index; + + struct mailbox *box; + guid_128_t box_guid; + int box_ret; + + pool_t pool; + HASH_TABLE(uint8_t *, uint8_t *) seen_mailbox_guids; + + ARRAY_TYPE(seq_range) uids; + struct seq_range_iter uids_iter; + unsigned int uids_iter_n; + + uint32_t last_existing_uid; + bool warned; +}; + +static void *textcat = NULL; +#ifdef HAVE_FTS_TEXTCAT +static bool textcat_broken = FALSE; +#endif +static int textcat_refcount = 0; + +static void lucene_handle_error(struct lucene_index *index, CLuceneError &err, + const char *msg); +static void rescan_clear_unseen_mailboxes(struct lucene_index *index, + struct rescan_context *rescan_ctx); + +struct lucene_index *lucene_index_init(const char *path, + struct mailbox_list *list, + const struct fts_lucene_settings *set) +{ + struct lucene_index *index; + + index = i_new(struct lucene_index, 1); + index->path = i_strdup(path); + index->list = list; + if (set != NULL) { + index->set = *set; + index->normalizer = !set->normalize ? NULL : + mailbox_list_get_namespace(list)->user->default_normalizer; + } else { + /* this is valid only for doveadm dump, so it doesn't matter */ + index->set.default_language = ""; + } + if (index->set.use_libfts) { + index->default_analyzer = _CLNEW KeywordAnalyzer(); + } else +#ifdef HAVE_FTS_STEMMER + if (set == NULL || !set->no_snowball) { + index->default_analyzer = + _CLNEW snowball::SnowballAnalyzer(index->normalizer, + index->set.default_language); + } else +#endif + { + index->default_analyzer = _CLNEW standard::StandardAnalyzer(); + if (index->normalizer != NULL) { + index->normalizer_buf = + buffer_create_dynamic(default_pool, 1024); + } + } + + i_array_init(&index->analyzers, 32); + textcat_refcount++; + + return index; +} + +void lucene_index_close(struct lucene_index *index) +{ + timeout_remove(&index->to_close); + + _CLDELETE(index->searcher); + if (index->writer != NULL) { + try { + index->writer->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::close"); + } + _CLDELETE(index->writer); + } + if (index->reader != NULL) { + try { + index->reader->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexReader::close"); + } + _CLDELETE(index->reader); + } +} + +void lucene_index_deinit(struct lucene_index *index) +{ + struct lucene_analyzer *a; + + lucene_index_close(index); + array_foreach_modifiable(&index->analyzers, a) { + i_free(a->lang); + _CLDELETE(a->analyzer); + } + array_free(&index->analyzers); + if (--textcat_refcount == 0 && textcat != NULL) { +#ifdef HAVE_FTS_TEXTCAT + textcat_Done(textcat); +#endif + textcat = NULL; + } + _CLDELETE(index->default_analyzer); + if (index->normalizer_buf != NULL) + buffer_free(&index->normalizer_buf); + i_free(index->path); + i_free(index); +} + +static void lucene_data_translate(struct lucene_index *index, + wchar_t *data, unsigned int len) +{ + const char *whitespace_chars = index->set.whitespace_chars; + unsigned int i; + + if (*whitespace_chars == '\0' || index->set.use_libfts) + return; + + for (i = 0; i < len; i++) { + if (strchr(whitespace_chars, data[i]) != NULL) + data[i] = ' '; + } +} + +void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize, + wchar_t *dest, size_t destsize) +{ + ARRAY_TYPE(unichars) dest_arr; + buffer_t buf = { { 0, 0 } }; + + i_assert(sizeof(wchar_t) == sizeof(unichar_t)); + + buffer_create_from_data(&buf, dest, sizeof(wchar_t) * destsize); + array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t)); + if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0) + i_unreached(); + i_assert(array_count(&dest_arr)+1 == destsize); + dest[destsize-1] = 0; +} + +static const wchar_t * +t_lucene_utf8_to_tchar(struct lucene_index *index, const char *str) +{ + ARRAY_TYPE(unichars) dest_arr; + const unichar_t *chars; + wchar_t *ret; + unsigned int len; + + i_assert(sizeof(wchar_t) == sizeof(unichar_t)); + + t_array_init(&dest_arr, strlen(str) + 1); + if (uni_utf8_to_ucs4(str, &dest_arr) < 0) + i_unreached(); + (void)array_append_space(&dest_arr); + + chars = array_get_modifiable(&dest_arr, &len); + ret = (wchar_t *)chars; + lucene_data_translate(index, ret, len - 1); + return ret; +} + +void lucene_index_select_mailbox(struct lucene_index *index, + const wchar_t guid[MAILBOX_GUID_HEX_LENGTH]) +{ + memcpy(index->mailbox_guid, guid, + MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t)); + index->mailbox_guid[MAILBOX_GUID_HEX_LENGTH] = '\0'; +} + +void lucene_index_unselect_mailbox(struct lucene_index *index) +{ + memset(index->mailbox_guid, 0, sizeof(index->mailbox_guid)); +} + +static void lucene_handle_error(struct lucene_index *index, CLuceneError &err, + const char *msg) +{ + const char *error, *what = err.what(); + + i_error("lucene index %s: %s failed (#%d): %s", + index->path, msg, err.number(), what); + + if (index->list != NULL && + (err.number() == CL_ERR_CorruptIndex || + err.number() == CL_ERR_IO)) { + /* delete corrupted index. most IO errors are also about + missing files and other such corruption.. */ + if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0) + i_error("unlink_directory(%s) failed: %s", index->path, error); + rescan_clear_unseen_mailboxes(index, NULL); + } +} + +static int lucene_index_open(struct lucene_index *index) +{ + if (index->reader != NULL) { + i_assert(index->to_close != NULL); + timeout_reset(index->to_close); + return 1; + } + + if (!IndexReader::indexExists(index->path)) + return 0; + + try { + index->reader = IndexReader::open(index->path); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexReader::open()"); + return -1; + } + i_assert(index->to_close == NULL); + index->to_close = timeout_add(LUCENE_INDEX_CLOSE_TIMEOUT_MSECS, + lucene_index_close, index); + return 1; +} + +static int lucene_index_open_search(struct lucene_index *index) +{ + int ret; + + if (index->searcher != NULL) + return 1; + + if ((ret = lucene_index_open(index)) <= 0) + return ret; + + index->searcher = _CLNEW IndexSearcher(index->reader); + return 1; +} + +static int +lucene_doc_get_uid(struct lucene_index *index, Document *doc, uint32_t *uid_r) +{ + Field *field = doc->getField(_T("uid")); + const TCHAR *uid = field == NULL ? NULL : field->stringValue(); + if (uid == NULL) { + i_error("lucene: Corrupted FTS index %s: No UID for document", + index->path); + return -1; + } + + uint32_t num = 0; + while (*uid != 0) { + num = num*10 + (*uid - '0'); + uid++; + } + *uid_r = num; + return 0; +} + +static uint32_t +lucene_doc_get_part(struct lucene_index *index, Document *doc) +{ + Field *field = doc->getField(_T("part")); + const TCHAR *part = field == NULL ? NULL : field->stringValue(); + if (part == NULL) + return 0; + + uint32_t num = 0; + while (*part != 0) { + num = num*10 + (*part - '0'); + part++; + } + return num; +} + +int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r) +{ + int ret = 0; + + *last_uid_r = 0; + + if ((ret = lucene_index_open_search(index)) <= 0) + return ret; + + Term mailbox_term(_T("box"), index->mailbox_guid); + TermQuery query(&mailbox_term); + + uint32_t last_uid = 0; + try { + Hits *hits = index->searcher->search(&query); + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + if (uid > last_uid) + last_uid = uid; + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "last_uid search"); + ret = -1; + } + *last_uid_r = last_uid; + return ret; +} + +int lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r) +{ + int ret; + + if (index->reader == NULL) { + lucene_index_close(index); + if ((ret = lucene_index_open(index)) < 0) + return -1; + if (ret == 0) { + *count_r = 0; + return 0; + } + } + *count_r = index->reader->numDocs(); + return 0; +} + +static int lucene_settings_check(struct lucene_index *index) +{ + uint32_t set_checksum; + const char *error; + int ret = 0; + + set_checksum = fts_lucene_settings_checksum(&index->set); + ret = fts_index_have_compatible_settings(index->list, set_checksum); + if (ret != 0) + return ret; + + i_warning("fts-lucene: Settings have changed, rebuilding index for mailbox"); + + /* settings changed, rebuild index */ + if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0) { + i_error("unlink_directory(%s) failed: %s", index->path, error); + ret = -1; + } else { + rescan_clear_unseen_mailboxes(index, NULL); + } + return ret; +} + +int lucene_index_build_init(struct lucene_index *index) +{ + const char *lock_path; + struct stat st; + + lucene_index_close(index); + + lock_path = t_strdup_printf("%s/write.lock", index->path); + if (stat(lock_path, &st) == 0 && + st.st_mtime < time(NULL) - LUCENE_LOCK_OVERRIDE_SECS) { + if (unlink(lock_path) < 0) + i_error("unlink(%s) failed: %m", lock_path); + } + + if (lucene_settings_check(index) < 0) + return -1; + + bool exists = IndexReader::indexExists(index->path); + try { + index->writer = _CLNEW IndexWriter(index->path, + index->default_analyzer, + !exists); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter()"); + return -1; + } + index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT); + return 0; +} + +#ifdef HAVE_FTS_TEXTCAT +static Analyzer *get_analyzer(struct lucene_index *index, const char *lang) +{ + normalizer_func_t *normalizer = index->normalizer; + const struct lucene_analyzer *a; + struct lucene_analyzer new_analyzer; + Analyzer *analyzer; + + array_foreach(&index->analyzers, a) { + if (strcmp(a->lang, lang) == 0) + return a->analyzer; + } + + memset(&new_analyzer, 0, sizeof(new_analyzer)); + new_analyzer.lang = i_strdup(lang); + new_analyzer.analyzer = + _CLNEW snowball::SnowballAnalyzer(normalizer, lang); + array_append_i(&index->analyzers.arr, &new_analyzer, 1); + return new_analyzer.analyzer; +} + +static void *textcat_init(struct lucene_index *index) +{ + const char *textcat_dir = index->set.textcat_dir; + unsigned int len; + + if (textcat_dir == NULL) + return NULL; + + /* textcat really wants the '/' suffix */ + len = strlen(textcat_dir); + if (len > 0 && textcat_dir[len-1] != '/') + textcat_dir = t_strconcat(textcat_dir, "/", NULL); + + return special_textcat_Init(index->set.textcat_conf, textcat_dir); +} + +static Analyzer * +guess_analyzer(struct lucene_index *index, const void *data, size_t size) +{ + const char *lang; + + if (textcat_broken) + return NULL; + + if (textcat == NULL) { + textcat = textcat_init(index); + if (textcat == NULL) { + textcat_broken = TRUE; + return NULL; + } + } + + /* try to guess the language */ + lang = textcat_Classify(textcat, (const char *)data, + I_MIN(size, 500)); + const char *p = strchr(lang, ']'); + if (lang[0] != '[' || p == NULL) + return NULL; + lang = t_strdup_until(lang+1, p); + if (strcmp(lang, index->set.default_language) == 0) + return index->default_analyzer; + + return get_analyzer(index, lang); +} +#else +static Analyzer * +guess_analyzer(struct lucene_index *index ATTR_UNUSED, + const void *data ATTR_UNUSED, size_t size ATTR_UNUSED) +{ + return NULL; +} +#endif + +static int lucene_index_build_flush(struct lucene_index *index) +{ + int ret = 0; + + if (index->doc == NULL) + return 0; + + try { + CL_NS(analysis)::Analyzer *analyzer = NULL; + + if (!index->set.use_libfts) { + analyzer = index->cur_analyzer != NULL ? + index->cur_analyzer : index->default_analyzer; + } + index->writer->addDocument(index->doc, analyzer); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::addDocument()"); + ret = -1; + } + + _CLDELETE(index->doc); + index->doc = NULL; + index->cur_analyzer = NULL; + return ret; +} + +int lucene_index_build_more(struct lucene_index *index, uint32_t uid, + uint32_t part_idx, const unsigned char *data, + size_t size, const char *hdr_name) +{ + wchar_t id[MAX_INT_STRLEN]; + size_t namesize, datasize; + + if (uid != index->prev_uid || part_idx != index->prev_part_idx) { + if (lucene_index_build_flush(index) < 0) + return -1; + index->prev_uid = uid; + index->prev_part_idx = part_idx; + + index->doc = _CLNEW Document(); + swprintf(id, N_ELEMENTS(id), L"%u", uid); + index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + if (part_idx != 0) { + swprintf(id, N_ELEMENTS(id), L"%u", part_idx); + index->doc->add(*_CLNEW Field(_T("part"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + } + index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + } + + if (index->normalizer_buf != NULL && !index->set.use_libfts) { + buffer_set_used_size(index->normalizer_buf, 0); + index->normalizer(data, size, index->normalizer_buf); + data = (const unsigned char *)index->normalizer_buf->data; + size = index->normalizer_buf->used; + } + + datasize = uni_utf8_strlen_n(data, size) + 1; + wchar_t *dest, *dest_free = NULL; + if (datasize < 4096) + dest = t_new(wchar_t, datasize); + else + dest = dest_free = i_new(wchar_t, datasize); + lucene_utf8_n_to_tchar(data, size, dest, datasize); + lucene_data_translate(index, dest, datasize-1); + + int token_flag = index->set.use_libfts ? + Field::INDEX_UNTOKENIZED : Field::INDEX_TOKENIZED; + if (hdr_name != NULL) { + /* hdr_name should be ASCII, but don't break in case it isn't */ + hdr_name = t_str_lcase(hdr_name); + namesize = uni_utf8_strlen(hdr_name) + 1; + wchar_t wname[namesize]; + lucene_utf8_n_to_tchar((const unsigned char *)hdr_name, + strlen(hdr_name), wname, namesize); + if (!index->set.use_libfts) + index->doc->add(*_CLNEW Field(_T("hdr"), wname, Field::STORE_NO | token_flag)); + index->doc->add(*_CLNEW Field(_T("hdr"), dest, Field::STORE_NO | token_flag)); + + if (fts_header_want_indexed(hdr_name)) + index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | token_flag)); + } else if (size > 0) { + if (index->cur_analyzer == NULL && !index->set.use_libfts) + index->cur_analyzer = guess_analyzer(index, data, size); + index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | token_flag)); + } + i_free(dest_free); + return 0; +} + +int lucene_index_build_deinit(struct lucene_index *index) +{ + int ret = 0; + + if (index->prev_uid == 0) { + /* no changes. */ + return 0; + } + index->prev_uid = 0; + index->prev_part_idx = 0; + + if (index->writer == NULL) { + lucene_index_close(index); + return -1; + } + + if (lucene_index_build_flush(index) < 0) + ret = -1; + + try { + index->writer->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::close()"); + ret = -1; + } + + lucene_index_close(index); + return ret; +} + +static int +wcharguid_to_guid(guid_128_t dest, const wchar_t *src) +{ + buffer_t buf = { { 0, 0 } }; + char src_chars[GUID_128_SIZE*2 + 1]; + unsigned int i; + + for (i = 0; i < sizeof(src_chars)-1; i++) { + if ((src[i] >= '0' && src[i] <= '9') || + (src[i] >= 'a' && src[i] <= 'f')) + src_chars[i] = src[i]; + else + return -1; + } + if (src[i] != '\0') + return -1; + src_chars[i] = '\0'; + + buffer_create_from_data(&buf, dest, GUID_128_SIZE); + return hex_to_binary(src_chars, &buf); +} + +static int +rescan_get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids) +{ + struct mailbox_status status; + + if (mailbox_get_status(box, STATUS_MESSAGES, &status) < 0) + return -1; + + if (status.messages > 0) T_BEGIN { + ARRAY_TYPE(seq_range) seqs; + + t_array_init(&seqs, 2); + seq_range_array_add_range(&seqs, 1, status.messages); + mailbox_get_uid_range(box, &seqs, uids); + } T_END; + return 0; +} + +static int rescan_finish(struct rescan_context *ctx) +{ + int ret; + + ret = fts_index_set_last_uid(ctx->box, ctx->last_existing_uid); + mailbox_free(&ctx->box); + return ret; +} + +static int +fts_lucene_get_mailbox_guid(struct lucene_index *index, Document *doc, + guid_128_t guid_r) +{ + Field *field = doc->getField(_T("box")); + const TCHAR *box_guid = field == NULL ? NULL : field->stringValue(); + if (box_guid == NULL) { + i_error("lucene: Corrupted FTS index %s: No mailbox for document", + index->path); + return -1; + } + + if (wcharguid_to_guid(guid_r, box_guid) < 0) { + i_error("lucene: Corrupted FTS index %s: " + "box field not in expected format", index->path); + return -1; + } + return 0; +} + +static int +rescan_open_mailbox(struct rescan_context *ctx, Document *doc) +{ + guid_128_t guid, *guidp; + int ret; + + if (fts_lucene_get_mailbox_guid(ctx->index, doc, guid) < 0) + return 0; + + if (memcmp(guid, ctx->box_guid, sizeof(guid)) == 0) { + /* same as last one */ + return ctx->box_ret; + } + memcpy(ctx->box_guid, guid, sizeof(ctx->box_guid)); + + guidp = p_new(ctx->pool, guid_128_t, 1); + memcpy(guidp, guid, sizeof(*guidp)); + hash_table_insert(ctx->seen_mailbox_guids, guidp, guidp); + + if (ctx->box != NULL) + rescan_finish(ctx); + ctx->box = mailbox_alloc_guid(ctx->index->list, guid, + (enum mailbox_flags)0); + if (mailbox_open(ctx->box) < 0) { + enum mail_error error; + const char *errstr; + + errstr = mailbox_get_last_internal_error(ctx->box, &error); + if (error == MAIL_ERROR_NOTFOUND) + ret = 0; + else { + i_error("lucene: Couldn't open mailbox %s: %s", + mailbox_get_vname(ctx->box), errstr); + ret = -1; + } + mailbox_free(&ctx->box); + ctx->box_ret = ret; + return ret; + } + if (mailbox_sync(ctx->box, (enum mailbox_sync_flags)0) < 0) { + i_error("lucene: Failed to sync mailbox %s: %s", + mailbox_get_vname(ctx->box), + mailbox_get_last_internal_error(ctx->box, NULL)); + mailbox_free(&ctx->box); + ctx->box_ret = -1; + return -1; + } + + array_clear(&ctx->uids); + rescan_get_uids(ctx->box, &ctx->uids); + + ctx->warned = FALSE; + ctx->last_existing_uid = 0; + ctx->uids_iter_n = 0; + seq_range_array_iter_init(&ctx->uids_iter, &ctx->uids); + + ctx->box_ret = 1; + return 1; +} + +static int +rescan_next(struct rescan_context *ctx, Document *doc) +{ + uint32_t lucene_uid, idx_uid; + + if (lucene_doc_get_uid(ctx->index, doc, &lucene_uid) < 0) + return 0; + + if (seq_range_array_iter_nth(&ctx->uids_iter, ctx->uids_iter_n, + &idx_uid)) { + if (idx_uid == lucene_uid) { + ctx->uids_iter_n++; + ctx->last_existing_uid = idx_uid; + return 1; + } + if (idx_uid < lucene_uid) { + /* lucene is missing an UID from the middle. delete + the rest of the messages from this mailbox and + reindex. */ + if (!ctx->warned) { + i_warning("lucene: Mailbox %s " + "missing UIDs in the middle", + mailbox_get_vname(ctx->box)); + ctx->warned = TRUE; + } + } else { + /* UID has been expunged from index. delete from + lucene as well. */ + } + return 0; + } else { + /* the rest of the messages have been expunged from index */ + return 0; + } +} + +static void +rescan_clear_unseen_mailbox(struct lucene_index *index, + struct rescan_context *rescan_ctx, + const char *vname, + const struct fts_index_header *hdr) +{ + struct mailbox *box; + struct mailbox_metadata metadata; + + box = mailbox_alloc(index->list, vname, + (enum mailbox_flags)0); + if (mailbox_open(box) == 0 && + mailbox_get_metadata(box, MAILBOX_METADATA_GUID, + &metadata) == 0 && + (rescan_ctx == NULL || + hash_table_lookup(rescan_ctx->seen_mailbox_guids, + metadata.guid) == NULL)) { + /* this mailbox had no records in lucene index. + make sure its last indexed uid is 0 */ + (void)fts_index_set_header(box, hdr); + } + mailbox_free(&box); +} + +static void rescan_clear_unseen_mailboxes(struct lucene_index *index, + struct rescan_context *rescan_ctx) +{ + const enum mailbox_list_iter_flags iter_flags = + (enum mailbox_list_iter_flags) + (MAILBOX_LIST_ITER_NO_AUTO_BOXES | + MAILBOX_LIST_ITER_RETURN_NO_FLAGS); + struct mailbox_list_iterate_context *iter; + const struct mailbox_info *info; + struct fts_index_header hdr; + struct mail_namespace *ns = index->list->ns; + const char *vname; + + memset(&hdr, 0, sizeof(hdr)); + hdr.settings_checksum = fts_lucene_settings_checksum(&index->set); + + iter = mailbox_list_iter_init(index->list, "*", iter_flags); + while ((info = mailbox_list_iter_next(iter)) != NULL) + rescan_clear_unseen_mailbox(index, rescan_ctx, info->vname, &hdr); + (void)mailbox_list_iter_deinit(&iter); + + if (ns->prefix_len > 0 && + ns->prefix[ns->prefix_len-1] == mail_namespace_get_sep(ns)) { + /* namespace prefix itself isn't returned by the listing */ + vname = t_strndup(index->list->ns->prefix, + index->list->ns->prefix_len-1); + rescan_clear_unseen_mailbox(index, rescan_ctx, vname, &hdr); + } +} + +int lucene_index_rescan(struct lucene_index *index) +{ + static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL }; + struct rescan_context ctx; + bool failed = false; + int ret; + + i_assert(index->list != NULL); + + if ((ret = lucene_index_open_search(index)) < 0) + return ret; + + Term term(_T("box"), _T("*")); + WildcardQuery query(&term); + Sort sort(sort_fields); + + memset(&ctx, 0, sizeof(ctx)); + ctx.index = index; + ctx.pool = pool_alloconly_create("guids", 1024); + hash_table_create(&ctx.seen_mailbox_guids, ctx.pool, 0, + guid_128_hash, guid_128_cmp); + i_array_init(&ctx.uids, 128); + + if (ret > 0) try { + Hits *hits = index->searcher->search(&query, &sort); + + for (size_t i = 0; i < hits->length(); i++) { + ret = rescan_open_mailbox(&ctx, &hits->doc(i)); + if (ret > 0) + ret = rescan_next(&ctx, &hits->doc(i)); + if (ret < 0) + failed = true; + else if (ret == 0) + index->reader->deleteDocument(hits->id(i)); + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "rescan search"); + failed = true; + } + lucene_index_close(index); + if (ctx.box != NULL) + rescan_finish(&ctx); + array_free(&ctx.uids); + + rescan_clear_unseen_mailboxes(index, &ctx); + hash_table_destroy(&ctx.seen_mailbox_guids); + pool_unref(&ctx.pool); + return failed ? -1 : 0; +} + +static void guid128_to_wguid(const guid_128_t guid, + wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH + 1]) +{ + buffer_t buf = { { 0, 0 } }; + unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH]; + unsigned int i; + + buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH); + binary_to_hex_append(&buf, guid, GUID_128_SIZE); + for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++) + wguid_hex[i] = guid_hex[i]; + wguid_hex[i] = '\0'; +} + +static bool +lucene_index_add_uid_filter(BooleanQuery *query, + const struct fts_expunge_log_read_record *rec) +{ + struct seq_range_iter iter; + wchar_t wuid[MAX_INT_STRLEN]; + unsigned int n; + uint32_t uid; + + /* RangeQuery and WildcardQuery work by enumerating through all terms + that match them, and then adding TermQueries for them. So we can + simply do the same directly, and if it looks like there are too + many terms just go through everything. */ + + if (seq_range_count(&rec->uids) > FTS_LUCENE_MAX_SEARCH_TERMS) + return false; + + seq_range_array_iter_init(&iter, &rec->uids); n = 0; + while (seq_range_array_iter_nth(&iter, n++, &uid)) { + swprintf(wuid, N_ELEMENTS(wuid), L"%u", uid); + + Term *term = _CLNEW Term(_T("uid"), wuid); + query->add(_CLNEW TermQuery(term), true, BooleanClause::SHOULD); + _CLDECDELETE(term); + } + return true; +} + +static int +lucene_index_expunge_record(struct lucene_index *index, + const struct fts_expunge_log_read_record *rec) +{ + int ret; + + if ((ret = lucene_index_open_search(index)) <= 0) + return ret; + + BooleanQuery query; + BooleanQuery uids_query; + + if (lucene_index_add_uid_filter(&uids_query, rec)) + query.add(&uids_query, BooleanClause::MUST); + + wchar_t wguid[MAILBOX_GUID_HEX_LENGTH + 1]; + guid128_to_wguid(rec->mailbox_guid, wguid); + Term term(_T("box"), wguid); + TermQuery mailbox_query(&term); + query.add(&mailbox_query, BooleanClause::MUST); + + try { + Hits *hits = index->searcher->search(&query); + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0 || + seq_range_exists(&rec->uids, uid)) + index->reader->deleteDocument(hits->id(i)); + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "expunge search"); + ret = -1; + } + return ret < 0 ? -1 : 0; +} + +int lucene_index_expunge_from_log(struct lucene_index *index, + struct fts_expunge_log *log) +{ + struct fts_expunge_log_read_ctx *ctx; + const struct fts_expunge_log_read_record *rec; + int ret = 0, ret2; + + ctx = fts_expunge_log_read_begin(log); + while ((rec = fts_expunge_log_read_next(ctx)) != NULL) { + if (lucene_index_expunge_record(index, rec) < 0) { + ret = -1; + break; + } + } + + lucene_index_close(index); + + ret2 = fts_expunge_log_read_end(&ctx); + if (ret < 0 || ret2 < 0) + return -1; + return ret2; +} + +int lucene_index_optimize(struct lucene_index *index) +{ + int ret = 0; + + if (!IndexReader::indexExists(index->path)) + return 0; + if (IndexReader::isLocked(index->path)) + IndexReader::unlock(index->path); + + IndexWriter *writer = NULL; + try { + writer = _CLNEW IndexWriter(index->path, index->default_analyzer, false); + writer->optimize(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::optimize()"); + ret = -1; + } + try { + writer->close(); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "IndexWriter::close()"); + ret = -1; + } + if (writer != NULL) + _CLDELETE(writer); + return ret; +} + +// Mostly copy&pasted from CLucene's QueryParser +static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + StringReader reader(queryText); + TokenStream* source = analyzer->tokenStream(_field, &reader); + + CLVector<CL_NS(analysis)::Token*, Deletor::Object<CL_NS(analysis)::Token> > v; + CL_NS(analysis)::Token* t = NULL; + int32_t positionCount = 0; + bool severalTokensAtSamePosition = false; + + while (true) { + t = _CLNEW Token(); + try { + Token* _t = source->next(t); + if (_t == NULL) _CLDELETE(t); + }_CLCATCH_ERR(CL_ERR_IO, _CLLDELETE(source);_CLLDELETE(t);,{ + t = NULL; + }); + if (t == NULL) + break; + v.push_back(t); + if (t->getPositionIncrement() != 0) + positionCount += t->getPositionIncrement(); + else + severalTokensAtSamePosition = true; + } + try { + source->close(); + } + _CLCATCH_ERR_CLEANUP(CL_ERR_IO, {_CLLDELETE(source);_CLLDELETE(t);} ); /* cleanup */ + _CLLDELETE(source); + + if (v.size() == 0) + return NULL; + else if (v.size() == 1) { + Term* tm = _CLNEW Term(_field, v.at(0)->termBuffer()); + Query* ret; + if (fuzzy) + ret = _CLNEW FuzzyQuery( tm ); + else + ret = _CLNEW TermQuery( tm ); + _CLDECDELETE(tm); + return ret; + } else { + if (severalTokensAtSamePosition) { + if (positionCount == 1) { + // no phrase query: + BooleanQuery* q = _CLNEW BooleanQuery(true); + for(size_t i=0; i<v.size(); i++ ){ + Term* tm = _CLNEW Term(_field, v.at(i)->termBuffer()); + q->add(_CLNEW TermQuery(tm), true, BooleanClause::SHOULD); + _CLDECDELETE(tm); + } + return q; + }else { + MultiPhraseQuery* mpq = _CLNEW MultiPhraseQuery(); + CLArrayList<Term*> multiTerms; + int32_t position = -1; + for (size_t i = 0; i < v.size(); i++) { + t = v.at(i); + if (t->getPositionIncrement() > 0 && multiTerms.size() > 0) { + ValueArray<Term*> termsArray(multiTerms.size()); + multiTerms.toArray(termsArray.values); + mpq->add(&termsArray,position); + multiTerms.clear(); + } + position += t->getPositionIncrement(); + multiTerms.push_back(_CLNEW Term(_field, t->termBuffer())); + } + ValueArray<Term*> termsArray(multiTerms.size()); + multiTerms.toArray(termsArray.values); + mpq->add(&termsArray,position); + return mpq; + } + }else { + PhraseQuery* pq = _CLNEW PhraseQuery(); + int32_t position = -1; + + for (size_t i = 0; i < v.size(); i++) { + t = v.at(i); + Term* tm = _CLNEW Term(_field, t->termBuffer()); + position += t->getPositionIncrement(); + pq->add(tm,position); + _CLDECDELETE(tm); + } + return pq; + } + } +} + +static Query * +lucene_get_query_str(struct lucene_index *index, + const TCHAR *key, const char *str, bool fuzzy) +{ + const TCHAR *wvalue; + Analyzer *analyzer; + + if (index->set.use_libfts) { + const wchar_t *wstr = t_lucene_utf8_to_tchar(index, str); + Term* tm = _CLNEW Term(key, wstr); + Query* ret; + if (fuzzy) + ret = _CLNEW FuzzyQuery( tm ); + else + ret = _CLNEW TermQuery( tm ); + _CLDECDELETE(tm); + return ret; + } + + if (index->normalizer_buf != NULL) { + buffer_set_used_size(index->normalizer_buf, 0); + index->normalizer(str, strlen(str), index->normalizer_buf); + buffer_append_c(index->normalizer_buf, '\0'); + str = (const char *)index->normalizer_buf->data; + } + + wvalue = t_lucene_utf8_to_tchar(index, str); + analyzer = guess_analyzer(index, str, strlen(str)); + if (analyzer == NULL) { + analyzer = index->default_analyzer; + i_assert(analyzer != NULL); + } + + return getFieldQuery(analyzer, key, wvalue, fuzzy); +} + +static Query * +lucene_get_query(struct lucene_index *index, + const TCHAR *key, const struct mail_search_arg *arg) +{ + return lucene_get_query_str(index, key, arg->value.str, arg->fuzzy); +} + +static bool +lucene_add_definite_query(struct lucene_index *index, + ARRAY_TYPE(lucene_query) &queries, + struct mail_search_arg *arg, + enum fts_lookup_flags flags) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + Query *q; + + if (arg->no_fts) + return false; + + if (arg->match_not && !and_args) { + /* FIXME: we could handle this by doing multiple queries.. */ + return false; + } + + switch (arg->type) { + case SEARCH_TEXT: { + Query *q1 = lucene_get_query(index, _T("hdr"), arg); + Query *q2 = lucene_get_query(index, _T("body"), arg); + + if (q1 == NULL && q2 == NULL) + q = NULL; + else { + BooleanQuery *bq = _CLNEW BooleanQuery(); + if (q1 != NULL) + bq->add(q1, true, BooleanClause::SHOULD); + if (q2 != NULL) + bq->add(q2, true, BooleanClause::SHOULD); + q = bq; + } + break; + } + case SEARCH_BODY: + q = lucene_get_query(index, _T("body"), arg); + break; + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if (!fts_header_want_indexed(arg->hdr_field_name) || + *arg->value.str == '\0') + return false; + + q = lucene_get_query(index, + t_lucene_utf8_to_tchar(index, t_str_lcase(arg->hdr_field_name)), + arg); + break; + default: + return false; + } + + if (q == NULL) { + /* couldn't handle this search after all (e.g. trying to search + a stop word) */ + return false; + } + + struct lucene_query *lq = array_append_space(&queries); + lq->query = q; + if (!and_args) + lq->occur = BooleanClause::SHOULD; + else if (!arg->match_not) + lq->occur = BooleanClause::MUST; + else + lq->occur = BooleanClause::MUST_NOT; + return true; +} + +static bool +lucene_add_maybe_query(struct lucene_index *index, + ARRAY_TYPE(lucene_query) &queries, + struct mail_search_arg *arg, + enum fts_lookup_flags flags) +{ + bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0; + Query *q = NULL; + + if (arg->no_fts) + return false; + + if (arg->match_not) { + /* FIXME: we could handle this by doing multiple queries.. */ + return false; + } + + switch (arg->type) { + case SEARCH_HEADER: + case SEARCH_HEADER_ADDRESS: + case SEARCH_HEADER_COMPRESS_LWSP: + if (*arg->value.str == '\0' && !index->set.use_libfts) { + /* checking potential existence of the header name */ + q = lucene_get_query_str(index, _T("hdr"), + t_str_lcase(arg->hdr_field_name), FALSE); + break; + } + + if (fts_header_want_indexed(arg->hdr_field_name)) + return false; + + /* we can check if the search key exists in some header and + filter out the messages that have no chance of matching */ + q = lucene_get_query(index, _T("hdr"), arg); + break; + default: + return false; + } + + if (q == NULL) { + /* couldn't handle this search after all (e.g. trying to search + a stop word) */ + return false; + } + struct lucene_query *lq = array_append_space(&queries); + lq->query = q; + if (!and_args) + lq->occur = BooleanClause::SHOULD; + else if (!arg->match_not) + lq->occur = BooleanClause::MUST; + else + lq->occur = BooleanClause::MUST_NOT; + return true; +} + +static bool queries_have_non_must_nots(ARRAY_TYPE(lucene_query) &queries) +{ + const struct lucene_query *lq; + + array_foreach(&queries, lq) { + if (lq->occur != BooleanClause::MUST_NOT) + return TRUE; + } + return FALSE; +} + +static void search_query_add(BooleanQuery &query, + ARRAY_TYPE(lucene_query) &queries) +{ + BooleanQuery *search_query = _CLNEW BooleanQuery(); + const struct lucene_query *lq; + + if (queries_have_non_must_nots(queries)) { + array_foreach(&queries, lq) + search_query->add(lq->query, true, lq->occur); + query.add(search_query, true, BooleanClause::MUST); + } else { + array_foreach(&queries, lq) + search_query->add(lq->query, true, BooleanClause::SHOULD); + query.add(search_query, true, BooleanClause::MUST_NOT); + } +} + +static int +lucene_index_search(struct lucene_index *index, + ARRAY_TYPE(lucene_query) &queries, + struct fts_result *result, ARRAY_TYPE(seq_range) *uids_r) +{ + struct fts_score_map *score; + int ret = 0; + + BooleanQuery query; + search_query_add(query, queries); + + Term mailbox_term(_T("box"), index->mailbox_guid); + TermQuery mailbox_query(&mailbox_term); + query.add(&mailbox_query, BooleanClause::MUST); + + try { + Hits *hits = index->searcher->search(&query); + + uint32_t last_uid = 0; + if (result != NULL) + result->scores_sorted = true; + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + if (seq_range_array_add(uids_r, uid)) { + /* duplicate result */ + } else if (result != NULL) { + if (uid < last_uid) + result->scores_sorted = false; + last_uid = uid; + + score = array_append_space(&result->scores); + score->uid = uid; + score->score = hits->score(i); + } + } + _CLDELETE(hits); + return ret; + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "search"); + return -1; + } +} + +int lucene_index_lookup(struct lucene_index *index, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result) +{ + struct mail_search_arg *arg; + + if (lucene_index_open_search(index) <= 0) + return -1; + + ARRAY_TYPE(lucene_query) def_queries; + t_array_init(&def_queries, 16); + bool have_definites = false; + + for (arg = args; arg != NULL; arg = arg->next) { + if (lucene_add_definite_query(index, def_queries, arg, flags)) { + arg->match_always = true; + have_definites = true; + } + } + + if (have_definites) { + ARRAY_TYPE(seq_range) *uids_arr = + (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ? + &result->definite_uids : &result->maybe_uids; + if (lucene_index_search(index, def_queries, result, + uids_arr) < 0) + return -1; + } + + if (have_definites) { + /* FIXME: mixing up definite + maybe queries is broken. if the + definite query matched, it'll just assume that the maybe + queries matched as well */ + return 0; + } + + ARRAY_TYPE(lucene_query) maybe_queries; + t_array_init(&maybe_queries, 16); + bool have_maybies = false; + + for (arg = args; arg != NULL; arg = arg->next) { + if (lucene_add_maybe_query(index, maybe_queries, arg, flags)) { + arg->match_always = true; + have_maybies = true; + } + } + + if (have_maybies) { + if (lucene_index_search(index, maybe_queries, NULL, + &result->maybe_uids) < 0) + return -1; + } + return 0; +} + +static int +lucene_index_search_multi(struct lucene_index *index, + HASH_TABLE_TYPE(wguid_result) guids, + ARRAY_TYPE(lucene_query) &queries, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct fts_score_map *score; + int ret = 0; + + BooleanQuery query; + search_query_add(query, queries); + + BooleanQuery mailbox_query; + struct hash_iterate_context *iter; + void *key, *value; + iter = hash_table_iterate_init(guids); + while (hash_table_iterate(iter, guids, &key, &value)) { + Term *term = _CLNEW Term(_T("box"), (wchar_t *)key); + TermQuery *q = _CLNEW TermQuery(term); + mailbox_query.add(q, true, BooleanClause::SHOULD); + } + hash_table_iterate_deinit(&iter); + + query.add(&mailbox_query, BooleanClause::MUST); + try { + Hits *hits = index->searcher->search(&query); + + for (size_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + Field *field = hits->doc(i).getField(_T("box")); + const TCHAR *box_guid = field == NULL ? NULL : field->stringValue(); + if (box_guid == NULL) { + i_error("lucene: Corrupted FTS index %s: No mailbox for document", + index->path); + ret = -1; + break; + } + struct fts_result *br = + hash_table_lookup(guids, box_guid); + if (br == NULL) { + i_warning("lucene: Returned unexpected mailbox with GUID %ls", box_guid); + continue; + } + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + ARRAY_TYPE(seq_range) *uids_arr = + (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ? + &br->maybe_uids : &br->definite_uids; + if (!array_is_created(uids_arr)) { + p_array_init(uids_arr, result->pool, 32); + p_array_init(&br->scores, result->pool, 32); + } + if (seq_range_array_add(uids_arr, uid)) { + /* duplicate result */ + } else { + score = array_append_space(&br->scores); + score->uid = uid; + score->score = hits->score(i); + } + } + _CLDELETE(hits); + return ret; + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "multi search"); + return -1; + } +} + +int lucene_index_lookup_multi(struct lucene_index *index, + HASH_TABLE_TYPE(wguid_result) guids, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result) +{ + struct mail_search_arg *arg; + + if (lucene_index_open_search(index) <= 0) + return -1; + + ARRAY_TYPE(lucene_query) def_queries; + t_array_init(&def_queries, 16); + bool have_definites = false; + + for (arg = args; arg != NULL; arg = arg->next) { + if (lucene_add_definite_query(index, def_queries, arg, flags)) { + arg->match_always = true; + have_definites = true; + } + } + + if (have_definites) { + if (lucene_index_search_multi(index, guids, def_queries, flags, + result) < 0) + return -1; + } + return 0; +} + +struct lucene_index_iter { + struct lucene_index *index; + struct lucene_index_record rec; + + Term *term; + WildcardQuery *query; + Sort *sort; + + Hits *hits; + size_t i; + bool failed; +}; + +struct lucene_index_iter * +lucene_index_iter_init(struct lucene_index *index) +{ + static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL }; + struct lucene_index_iter *iter; + int ret; + + iter = i_new(struct lucene_index_iter, 1); + iter->index = index; + if ((ret = lucene_index_open_search(index)) <= 0) { + if (ret < 0) + iter->failed = true; + return iter; + } + + iter->term = _CLNEW Term(_T("box"), _T("*")); + iter->query = _CLNEW WildcardQuery(iter->term); + iter->sort = _CLNEW Sort(sort_fields); + + try { + iter->hits = index->searcher->search(iter->query, iter->sort); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "rescan search"); + iter->failed = true; + } + return iter; +} + +const struct lucene_index_record * +lucene_index_iter_next(struct lucene_index_iter *iter) +{ + if (iter->hits == NULL) + return NULL; + if (iter->i == iter->hits->length()) + return NULL; + + Document *doc = &iter->hits->doc(iter->i); + iter->i++; + + memset(&iter->rec, 0, sizeof(iter->rec)); + (void)fts_lucene_get_mailbox_guid(iter->index, doc, + iter->rec.mailbox_guid); + (void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid); + iter->rec.part_num = lucene_doc_get_part(iter->index, doc); + return &iter->rec; +} + +int lucene_index_iter_deinit(struct lucene_index_iter **_iter) +{ + struct lucene_index_iter *iter = *_iter; + int ret = iter->failed ? -1 : 0; + + *_iter = NULL; + if (iter->hits != NULL) + _CLDELETE(iter->hits); + if (iter->query != NULL) { + _CLDELETE(iter->query); + _CLDELETE(iter->sort); + _CLDELETE(iter->term); + } + i_free(iter); + return ret; +} + +void lucene_shutdown(void) +{ + _lucene_shutdown(); +} diff --git a/src/plugins/fts-lucene/lucene-wrapper.h b/src/plugins/fts-lucene/lucene-wrapper.h new file mode 100644 index 0000000..270e902 --- /dev/null +++ b/src/plugins/fts-lucene/lucene-wrapper.h @@ -0,0 +1,67 @@ +#ifndef LUCENE_WRAPPER_H +#define LUCENE_WRAPPER_H + +#include "fts-api-private.h" +#include "guid.h" + +struct mailbox_list; +struct fts_expunge_log; +struct fts_lucene_settings; + +#define MAILBOX_GUID_HEX_LENGTH (GUID_128_SIZE*2) + +struct lucene_index_record { + guid_128_t mailbox_guid; + uint32_t uid, part_num; +}; + +HASH_TABLE_DEFINE_TYPE(wguid_result, wchar_t *, struct fts_result *); + +struct lucene_index * +lucene_index_init(const char *path, struct mailbox_list *list, + const struct fts_lucene_settings *set) + ATTR_NULL(2, 3); +void lucene_index_deinit(struct lucene_index *index); + +void lucene_index_select_mailbox(struct lucene_index *index, + const wchar_t guid[MAILBOX_GUID_HEX_LENGTH]); +void lucene_index_unselect_mailbox(struct lucene_index *index); +int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r); +int lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r); + +int lucene_index_build_init(struct lucene_index *index); +int lucene_index_build_more(struct lucene_index *index, uint32_t uid, + uint32_t part_num, const unsigned char *data, + size_t size, const char *hdr_name); +int lucene_index_build_deinit(struct lucene_index *index); + +void lucene_index_close(struct lucene_index *index); +int lucene_index_rescan(struct lucene_index *index); +int lucene_index_expunge_from_log(struct lucene_index *index, + struct fts_expunge_log *log); +int lucene_index_optimize(struct lucene_index *index); + +int lucene_index_lookup(struct lucene_index *index, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_result *result); + +int lucene_index_lookup_multi(struct lucene_index *index, + HASH_TABLE_TYPE(wguid_result) guids, + struct mail_search_arg *args, + enum fts_lookup_flags flags, + struct fts_multi_result *result); + +struct lucene_index_iter * +lucene_index_iter_init(struct lucene_index *index); +const struct lucene_index_record * +lucene_index_iter_next(struct lucene_index_iter *iter); +int lucene_index_iter_deinit(struct lucene_index_iter **iter); + +/* internal: */ +void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize, + wchar_t *dest, size_t destsize); + +void lucene_shutdown(void); + +#endif diff --git a/src/plugins/fts-lucene/textcat.conf b/src/plugins/fts-lucene/textcat.conf new file mode 100644 index 0000000..d75c4fe --- /dev/null +++ b/src/plugins/fts-lucene/textcat.conf @@ -0,0 +1,25 @@ +# +# A sample config file for the language models +# provided with Gertjan van Noords language guesser +# (http://odur.let.rug.nl/~vannoord/TextCat/) +# +# Notes: +# - You may consider eliminating a couple of small languages from this +# list because they cause false positives with big languages and are +# bad for performance. (Do you really want to recognize Drents?) +# - Putting the most probable languages at the top of the list +# improves performance, because this will raise the threshold for +# likely candidates more quickly. +# +LM/english.lm english +LM/italian.lm italian +LM/danish.lm danish +LM/dutch.lm dutch +LM/finnish.lm finnish +LM/french.lm french +LM/german.lm german +LM/norwegian.lm norwegian +LM/portuguese.lm portuguese +LM/russian.lm russian +LM/spanish.lm spanish +LM/swedish.lm swedish |