summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts-lucene
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
commitf7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
treea3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/plugins/fts-lucene
parentInitial commit. (diff)
downloaddovecot-upstream.tar.xz
dovecot-upstream.zip
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/plugins/fts-lucene/Makefile.am61
-rw-r--r--src/plugins/fts-lucene/Makefile.in990
-rw-r--r--src/plugins/fts-lucene/Snowball.cc151
-rw-r--r--src/plugins/fts-lucene/SnowballAnalyzer.h51
-rw-r--r--src/plugins/fts-lucene/SnowballFilter.h42
-rw-r--r--src/plugins/fts-lucene/doveadm-fts-lucene.c70
-rw-r--r--src/plugins/fts-lucene/fts-backend-lucene.c605
-rw-r--r--src/plugins/fts-lucene/fts-lucene-plugin.c146
-rw-r--r--src/plugins/fts-lucene/fts-lucene-plugin.h36
-rw-r--r--src/plugins/fts-lucene/lucene-wrapper.cc1639
-rw-r--r--src/plugins/fts-lucene/lucene-wrapper.h67
-rw-r--r--src/plugins/fts-lucene/textcat.conf25
12 files changed, 3883 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/Makefile.am b/src/plugins/fts-lucene/Makefile.am
new file mode 100644
index 0000000..d68e6ae
--- /dev/null
+++ b/src/plugins/fts-lucene/Makefile.am
@@ -0,0 +1,61 @@
+doveadm_moduledir = $(moduledir)/doveadm
+
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/lib \
+ -I$(top_srcdir)/src/lib-mail \
+ -I$(top_srcdir)/src/lib-index \
+ -I$(top_srcdir)/src/lib-storage \
+ -I$(top_srcdir)/src/plugins/fts \
+ -I$(top_srcdir)/src/doveadm
+
+AM_CXXFLAGS = \
+ $(CLUCENE_CFLAGS) \
+ $(LIBEXTTEXTCAT_CFLAGS)
+
+NOPLUGIN_LDFLAGS =
+lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version
+lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+ lib21_fts_lucene_plugin.la
+
+if BUILD_FTS_STEMMER
+STEMMER_LIBS = -lstemmer
+SHOWBALL_SOURCES = Snowball.cc
+endif
+
+if BUILD_FTS_EXTTEXTCAT
+TEXTCAT_LIBS = $(LIBEXTTEXTCAT_LIBS)
+else
+if BUILD_FTS_TEXTCAT
+TEXTCAT_LIBS = -ltextcat
+endif
+endif
+
+lib21_fts_lucene_plugin_la_LIBADD = \
+ $(CLUCENE_LIBS) $(TEXTCAT_LIBS) $(STEMMER_LIBS)
+
+lib21_fts_lucene_plugin_la_SOURCES = \
+ fts-lucene-plugin.c \
+ fts-backend-lucene.c \
+ lucene-wrapper.cc \
+ $(SHOWBALL_SOURCES)
+
+noinst_HEADERS = \
+ fts-lucene-plugin.h \
+ lucene-wrapper.h \
+ SnowballAnalyzer.h \
+ SnowballFilter.h
+
+if BUILD_FTS_TEXTCAT
+exampledir = $(docdir)/example-config
+example_DATA = \
+ textcat.conf
+endif
+EXTRA_DIST = textcat.conf
+
+doveadm_module_LTLIBRARIES = \
+ lib20_doveadm_fts_lucene_plugin.la
+
+lib20_doveadm_fts_lucene_plugin_la_SOURCES = \
+ doveadm-fts-lucene.c
diff --git a/src/plugins/fts-lucene/Makefile.in b/src/plugins/fts-lucene/Makefile.in
new file mode 100644
index 0000000..323982d
--- /dev/null
+++ b/src/plugins/fts-lucene/Makefile.in
@@ -0,0 +1,990 @@
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/plugins/fts-lucene
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ac_checktype2.m4 \
+ $(top_srcdir)/m4/ac_typeof.m4 $(top_srcdir)/m4/arc4random.m4 \
+ $(top_srcdir)/m4/blockdev.m4 $(top_srcdir)/m4/c99_vsnprintf.m4 \
+ $(top_srcdir)/m4/clock_gettime.m4 $(top_srcdir)/m4/crypt.m4 \
+ $(top_srcdir)/m4/crypt_xpg6.m4 $(top_srcdir)/m4/dbqlk.m4 \
+ $(top_srcdir)/m4/dirent_dtype.m4 $(top_srcdir)/m4/dovecot.m4 \
+ $(top_srcdir)/m4/fd_passing.m4 $(top_srcdir)/m4/fdatasync.m4 \
+ $(top_srcdir)/m4/flexible_array_member.m4 \
+ $(top_srcdir)/m4/glibc.m4 $(top_srcdir)/m4/gmtime_max.m4 \
+ $(top_srcdir)/m4/gmtime_tm_gmtoff.m4 \
+ $(top_srcdir)/m4/ioloop.m4 $(top_srcdir)/m4/iovec.m4 \
+ $(top_srcdir)/m4/ipv6.m4 $(top_srcdir)/m4/libcap.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libwrap.m4 \
+ $(top_srcdir)/m4/linux_mremap.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/mmap_write.m4 \
+ $(top_srcdir)/m4/mntctl.m4 $(top_srcdir)/m4/modules.m4 \
+ $(top_srcdir)/m4/notify.m4 $(top_srcdir)/m4/nsl.m4 \
+ $(top_srcdir)/m4/off_t_max.m4 $(top_srcdir)/m4/pkg.m4 \
+ $(top_srcdir)/m4/pr_set_dumpable.m4 \
+ $(top_srcdir)/m4/q_quotactl.m4 $(top_srcdir)/m4/quota.m4 \
+ $(top_srcdir)/m4/random.m4 $(top_srcdir)/m4/rlimit.m4 \
+ $(top_srcdir)/m4/sendfile.m4 $(top_srcdir)/m4/size_t_signed.m4 \
+ $(top_srcdir)/m4/sockpeercred.m4 $(top_srcdir)/m4/sql.m4 \
+ $(top_srcdir)/m4/ssl.m4 $(top_srcdir)/m4/st_tim.m4 \
+ $(top_srcdir)/m4/static_array.m4 $(top_srcdir)/m4/test_with.m4 \
+ $(top_srcdir)/m4/time_t.m4 $(top_srcdir)/m4/typeof.m4 \
+ $(top_srcdir)/m4/typeof_dev_t.m4 \
+ $(top_srcdir)/m4/uoff_t_max.m4 $(top_srcdir)/m4/vararg.m4 \
+ $(top_srcdir)/m4/want_apparmor.m4 \
+ $(top_srcdir)/m4/want_bsdauth.m4 \
+ $(top_srcdir)/m4/want_bzlib.m4 \
+ $(top_srcdir)/m4/want_cassandra.m4 \
+ $(top_srcdir)/m4/want_cdb.m4 \
+ $(top_srcdir)/m4/want_checkpassword.m4 \
+ $(top_srcdir)/m4/want_clucene.m4 $(top_srcdir)/m4/want_db.m4 \
+ $(top_srcdir)/m4/want_gssapi.m4 $(top_srcdir)/m4/want_icu.m4 \
+ $(top_srcdir)/m4/want_ldap.m4 $(top_srcdir)/m4/want_lua.m4 \
+ $(top_srcdir)/m4/want_lz4.m4 $(top_srcdir)/m4/want_lzma.m4 \
+ $(top_srcdir)/m4/want_mysql.m4 $(top_srcdir)/m4/want_pam.m4 \
+ $(top_srcdir)/m4/want_passwd.m4 $(top_srcdir)/m4/want_pgsql.m4 \
+ $(top_srcdir)/m4/want_prefetch.m4 \
+ $(top_srcdir)/m4/want_shadow.m4 \
+ $(top_srcdir)/m4/want_sodium.m4 $(top_srcdir)/m4/want_solr.m4 \
+ $(top_srcdir)/m4/want_sqlite.m4 \
+ $(top_srcdir)/m4/want_stemmer.m4 \
+ $(top_srcdir)/m4/want_systemd.m4 \
+ $(top_srcdir)/m4/want_textcat.m4 \
+ $(top_srcdir)/m4/want_unwind.m4 $(top_srcdir)/m4/want_zlib.m4 \
+ $(top_srcdir)/m4/want_zstd.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(doveadm_moduledir)" \
+ "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(exampledir)"
+LTLIBRARIES = $(doveadm_module_LTLIBRARIES) $(module_LTLIBRARIES)
+lib20_doveadm_fts_lucene_plugin_la_LIBADD =
+am_lib20_doveadm_fts_lucene_plugin_la_OBJECTS = doveadm-fts-lucene.lo
+lib20_doveadm_fts_lucene_plugin_la_OBJECTS = \
+ $(am_lib20_doveadm_fts_lucene_plugin_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+lib20_doveadm_fts_lucene_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) \
+ --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
+ $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(lib20_doveadm_fts_lucene_plugin_la_LDFLAGS) $(LDFLAGS) -o $@
+am__DEPENDENCIES_1 =
+@BUILD_FTS_EXTTEXTCAT_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1)
+lib21_fts_lucene_plugin_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1)
+am__lib21_fts_lucene_plugin_la_SOURCES_DIST = fts-lucene-plugin.c \
+ fts-backend-lucene.c lucene-wrapper.cc Snowball.cc
+@BUILD_FTS_STEMMER_TRUE@am__objects_1 = Snowball.lo
+am_lib21_fts_lucene_plugin_la_OBJECTS = fts-lucene-plugin.lo \
+ fts-backend-lucene.lo lucene-wrapper.lo $(am__objects_1)
+lib21_fts_lucene_plugin_la_OBJECTS = \
+ $(am_lib21_fts_lucene_plugin_la_OBJECTS)
+lib21_fts_lucene_plugin_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+ $(AM_CXXFLAGS) $(CXXFLAGS) \
+ $(lib21_fts_lucene_plugin_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = ./$(DEPDIR)/Snowball.Plo \
+ ./$(DEPDIR)/doveadm-fts-lucene.Plo \
+ ./$(DEPDIR)/fts-backend-lucene.Plo \
+ ./$(DEPDIR)/fts-lucene-plugin.Plo \
+ ./$(DEPDIR)/lucene-wrapper.Plo
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo " CXX " $@;
+am__v_CXX_1 =
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo " CXXLD " $@;
+am__v_CXXLD_1 =
+SOURCES = $(lib20_doveadm_fts_lucene_plugin_la_SOURCES) \
+ $(lib21_fts_lucene_plugin_la_SOURCES)
+DIST_SOURCES = $(lib20_doveadm_fts_lucene_plugin_la_SOURCES) \
+ $(am__lib21_fts_lucene_plugin_la_SOURCES_DIST)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+DATA = $(example_DATA)
+HEADERS = $(noinst_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ACLOCAL_AMFLAGS = @ACLOCAL_AMFLAGS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+APPARMOR_LIBS = @APPARMOR_LIBS@
+AR = @AR@
+AUTH_CFLAGS = @AUTH_CFLAGS@
+AUTH_LIBS = @AUTH_LIBS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BINARY_CFLAGS = @BINARY_CFLAGS@
+BINARY_LDFLAGS = @BINARY_LDFLAGS@
+BISON = @BISON@
+CASSANDRA_CFLAGS = @CASSANDRA_CFLAGS@
+CASSANDRA_LIBS = @CASSANDRA_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CDB_LIBS = @CDB_LIBS@
+CFLAGS = @CFLAGS@
+CLUCENE_CFLAGS = @CLUCENE_CFLAGS@
+CLUCENE_LIBS = @CLUCENE_LIBS@
+COMPRESS_LIBS = @COMPRESS_LIBS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CRYPT_LIBS = @CRYPT_LIBS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DICT_LIBS = @DICT_LIBS@
+DLLIB = @DLLIB@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+FLEX = @FLEX@
+FUZZER_CPPFLAGS = @FUZZER_CPPFLAGS@
+FUZZER_LDFLAGS = @FUZZER_LDFLAGS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KRB5CONFIG = @KRB5CONFIG@
+KRB5_CFLAGS = @KRB5_CFLAGS@
+KRB5_LIBS = @KRB5_LIBS@
+LD = @LD@
+LDAP_LIBS = @LDAP_LIBS@
+LDFLAGS = @LDFLAGS@
+LD_NO_WHOLE_ARCHIVE = @LD_NO_WHOLE_ARCHIVE@
+LD_WHOLE_ARCHIVE = @LD_WHOLE_ARCHIVE@
+LIBCAP = @LIBCAP@
+LIBDOVECOT = @LIBDOVECOT@
+LIBDOVECOT_COMPRESS = @LIBDOVECOT_COMPRESS@
+LIBDOVECOT_DEPS = @LIBDOVECOT_DEPS@
+LIBDOVECOT_DSYNC = @LIBDOVECOT_DSYNC@
+LIBDOVECOT_LA_LIBS = @LIBDOVECOT_LA_LIBS@
+LIBDOVECOT_LDA = @LIBDOVECOT_LDA@
+LIBDOVECOT_LDAP = @LIBDOVECOT_LDAP@
+LIBDOVECOT_LIBFTS = @LIBDOVECOT_LIBFTS@
+LIBDOVECOT_LIBFTS_DEPS = @LIBDOVECOT_LIBFTS_DEPS@
+LIBDOVECOT_LOGIN = @LIBDOVECOT_LOGIN@
+LIBDOVECOT_LUA = @LIBDOVECOT_LUA@
+LIBDOVECOT_LUA_DEPS = @LIBDOVECOT_LUA_DEPS@
+LIBDOVECOT_SQL = @LIBDOVECOT_SQL@
+LIBDOVECOT_STORAGE = @LIBDOVECOT_STORAGE@
+LIBDOVECOT_STORAGE_DEPS = @LIBDOVECOT_STORAGE_DEPS@
+LIBEXTTEXTCAT_CFLAGS = @LIBEXTTEXTCAT_CFLAGS@
+LIBEXTTEXTCAT_LIBS = @LIBEXTTEXTCAT_LIBS@
+LIBICONV = @LIBICONV@
+LIBICU_CFLAGS = @LIBICU_CFLAGS@
+LIBICU_LIBS = @LIBICU_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSODIUM_CFLAGS = @LIBSODIUM_CFLAGS@
+LIBSODIUM_LIBS = @LIBSODIUM_LIBS@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
+LIBTIRPC_LIBS = @LIBTIRPC_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
+LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
+LIBWRAP_LIBS = @LIBWRAP_LIBS@
+LINKED_STORAGE_LDADD = @LINKED_STORAGE_LDADD@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBICONV = @LTLIBICONV@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+LUA_CFLAGS = @LUA_CFLAGS@
+LUA_LIBS = @LUA_LIBS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MODULE_LIBS = @MODULE_LIBS@
+MODULE_SUFFIX = @MODULE_SUFFIX@
+MYSQL_CFLAGS = @MYSQL_CFLAGS@
+MYSQL_CONFIG = @MYSQL_CONFIG@
+MYSQL_LIBS = @MYSQL_LIBS@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOPLUGIN_LDFLAGS =
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PANDOC = @PANDOC@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PGSQL_CFLAGS = @PGSQL_CFLAGS@
+PGSQL_LIBS = @PGSQL_LIBS@
+PG_CONFIG = @PG_CONFIG@
+PIE_CFLAGS = @PIE_CFLAGS@
+PIE_LDFLAGS = @PIE_LDFLAGS@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+QUOTA_LIBS = @QUOTA_LIBS@
+RANLIB = @RANLIB@
+RELRO_LDFLAGS = @RELRO_LDFLAGS@
+RPCGEN = @RPCGEN@
+RUN_TEST = @RUN_TEST@
+SED = @SED@
+SETTING_FILES = @SETTING_FILES@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SQLITE_CFLAGS = @SQLITE_CFLAGS@
+SQLITE_LIBS = @SQLITE_LIBS@
+SQL_CFLAGS = @SQL_CFLAGS@
+SQL_LIBS = @SQL_LIBS@
+SSL_CFLAGS = @SSL_CFLAGS@
+SSL_LIBS = @SSL_LIBS@
+STRIP = @STRIP@
+SYSTEMD_CFLAGS = @SYSTEMD_CFLAGS@
+SYSTEMD_LIBS = @SYSTEMD_LIBS@
+VALGRIND = @VALGRIND@
+VERSION = @VERSION@
+ZSTD_CFLAGS = @ZSTD_CFLAGS@
+ZSTD_LIBS = @ZSTD_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+dict_drivers = @dict_drivers@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+moduledir = @moduledir@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+rundir = @rundir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+sql_drivers = @sql_drivers@
+srcdir = @srcdir@
+ssldir = @ssldir@
+statedir = @statedir@
+sysconfdir = @sysconfdir@
+systemdservicetype = @systemdservicetype@
+systemdsystemunitdir = @systemdsystemunitdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+doveadm_moduledir = $(moduledir)/doveadm
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/lib \
+ -I$(top_srcdir)/src/lib-mail \
+ -I$(top_srcdir)/src/lib-index \
+ -I$(top_srcdir)/src/lib-storage \
+ -I$(top_srcdir)/src/plugins/fts \
+ -I$(top_srcdir)/src/doveadm
+
+AM_CXXFLAGS = \
+ $(CLUCENE_CFLAGS) \
+ $(LIBEXTTEXTCAT_CFLAGS)
+
+lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version
+lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version
+module_LTLIBRARIES = \
+ lib21_fts_lucene_plugin.la
+
+@BUILD_FTS_STEMMER_TRUE@STEMMER_LIBS = -lstemmer
+@BUILD_FTS_STEMMER_TRUE@SHOWBALL_SOURCES = Snowball.cc
+@BUILD_FTS_EXTTEXTCAT_FALSE@@BUILD_FTS_TEXTCAT_TRUE@TEXTCAT_LIBS = -ltextcat
+@BUILD_FTS_EXTTEXTCAT_TRUE@TEXTCAT_LIBS = $(LIBEXTTEXTCAT_LIBS)
+lib21_fts_lucene_plugin_la_LIBADD = \
+ $(CLUCENE_LIBS) $(TEXTCAT_LIBS) $(STEMMER_LIBS)
+
+lib21_fts_lucene_plugin_la_SOURCES = \
+ fts-lucene-plugin.c \
+ fts-backend-lucene.c \
+ lucene-wrapper.cc \
+ $(SHOWBALL_SOURCES)
+
+noinst_HEADERS = \
+ fts-lucene-plugin.h \
+ lucene-wrapper.h \
+ SnowballAnalyzer.h \
+ SnowballFilter.h
+
+@BUILD_FTS_TEXTCAT_TRUE@exampledir = $(docdir)/example-config
+@BUILD_FTS_TEXTCAT_TRUE@example_DATA = \
+@BUILD_FTS_TEXTCAT_TRUE@ textcat.conf
+
+EXTRA_DIST = textcat.conf
+doveadm_module_LTLIBRARIES = \
+ lib20_doveadm_fts_lucene_plugin.la
+
+lib20_doveadm_fts_lucene_plugin_la_SOURCES = \
+ doveadm-fts-lucene.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cc .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/fts-lucene/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/plugins/fts-lucene/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+install-doveadm_moduleLTLIBRARIES: $(doveadm_module_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(doveadm_moduledir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(doveadm_moduledir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(doveadm_moduledir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(doveadm_moduledir)"; \
+ }
+
+uninstall-doveadm_moduleLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(doveadm_module_LTLIBRARIES)'; test -n "$(doveadm_moduledir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(doveadm_moduledir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(doveadm_moduledir)/$$f"; \
+ done
+
+clean-doveadm_moduleLTLIBRARIES:
+ -test -z "$(doveadm_module_LTLIBRARIES)" || rm -f $(doveadm_module_LTLIBRARIES)
+ @list='$(doveadm_module_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+install-moduleLTLIBRARIES: $(module_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \
+ }
+
+uninstall-moduleLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \
+ done
+
+clean-moduleLTLIBRARIES:
+ -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES)
+ @list='$(module_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+lib20_doveadm_fts_lucene_plugin.la: $(lib20_doveadm_fts_lucene_plugin_la_OBJECTS) $(lib20_doveadm_fts_lucene_plugin_la_DEPENDENCIES) $(EXTRA_lib20_doveadm_fts_lucene_plugin_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(lib20_doveadm_fts_lucene_plugin_la_LINK) -rpath $(doveadm_moduledir) $(lib20_doveadm_fts_lucene_plugin_la_OBJECTS) $(lib20_doveadm_fts_lucene_plugin_la_LIBADD) $(LIBS)
+
+lib21_fts_lucene_plugin.la: $(lib21_fts_lucene_plugin_la_OBJECTS) $(lib21_fts_lucene_plugin_la_DEPENDENCIES) $(EXTRA_lib21_fts_lucene_plugin_la_DEPENDENCIES)
+ $(AM_V_CXXLD)$(lib21_fts_lucene_plugin_la_LINK) -rpath $(moduledir) $(lib21_fts_lucene_plugin_la_OBJECTS) $(lib21_fts_lucene_plugin_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Snowball.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doveadm-fts-lucene.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-backend-lucene.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fts-lucene-plugin.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lucene-wrapper.Plo@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+ @$(MKDIR_P) $(@D)
+ @echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-exampleDATA: $(example_DATA)
+ @$(NORMAL_INSTALL)
+ @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(exampledir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(exampledir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(exampledir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(exampledir)" || exit $$?; \
+ done
+
+uninstall-exampleDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(example_DATA)'; test -n "$(exampledir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(exampledir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(doveadm_moduledir)" "$(DESTDIR)$(moduledir)" "$(DESTDIR)$(exampledir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \
+ clean-moduleLTLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+ -rm -f ./$(DEPDIR)/Snowball.Plo
+ -rm -f ./$(DEPDIR)/doveadm-fts-lucene.Plo
+ -rm -f ./$(DEPDIR)/fts-backend-lucene.Plo
+ -rm -f ./$(DEPDIR)/fts-lucene-plugin.Plo
+ -rm -f ./$(DEPDIR)/lucene-wrapper.Plo
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-doveadm_moduleLTLIBRARIES install-exampleDATA \
+ install-moduleLTLIBRARIES
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f ./$(DEPDIR)/Snowball.Plo
+ -rm -f ./$(DEPDIR)/doveadm-fts-lucene.Plo
+ -rm -f ./$(DEPDIR)/fts-backend-lucene.Plo
+ -rm -f ./$(DEPDIR)/fts-lucene-plugin.Plo
+ -rm -f ./$(DEPDIR)/lucene-wrapper.Plo
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-doveadm_moduleLTLIBRARIES \
+ uninstall-exampleDATA uninstall-moduleLTLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+ clean-doveadm_moduleLTLIBRARIES clean-generic clean-libtool \
+ clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am \
+ install-doveadm_moduleLTLIBRARIES install-dvi install-dvi-am \
+ install-exampleDATA install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-moduleLTLIBRARIES install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am \
+ uninstall-doveadm_moduleLTLIBRARIES uninstall-exampleDATA \
+ uninstall-moduleLTLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/plugins/fts-lucene/Snowball.cc b/src/plugins/fts-lucene/Snowball.cc
new file mode 100644
index 0000000..43b54e3
--- /dev/null
+++ b/src/plugins/fts-lucene/Snowball.cc
@@ -0,0 +1,151 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include <CLucene.h>
+#include "SnowballAnalyzer.h"
+#include "SnowballFilter.h"
+#include <CLucene/util/CLStreams.h>
+#include <CLucene/analysis/Analyzers.h>
+#include <CLucene/analysis/standard/StandardTokenizer.h>
+#include <CLucene/analysis/standard/StandardFilter.h>
+
+extern "C" {
+#include "lib.h"
+#include "buffer.h"
+#include "unichar.h"
+#include "lucene-wrapper.h"
+};
+
+CL_NS_USE(analysis)
+CL_NS_USE(util)
+CL_NS_USE2(analysis,standard)
+
+CL_NS_DEF2(analysis,snowball)
+
+ /** Builds the named analyzer with no stop words. */
+ SnowballAnalyzer::SnowballAnalyzer(normalizer_func_t *_normalizer, const char* _language)
+ : language(i_strdup(_language)),
+ normalizer(_normalizer),
+ stopSet(NULL),
+ prevstream(NULL)
+ {
+ }
+
+ SnowballAnalyzer::~SnowballAnalyzer()
+ {
+ if (prevstream)
+ _CLDELETE(prevstream);
+ i_free(language);
+ if ( stopSet != NULL )
+ _CLDELETE(stopSet);
+ }
+
+ /** Builds the named analyzer with the given stop words.
+ */
+ SnowballAnalyzer::SnowballAnalyzer(const char* language, const TCHAR** stopWords)
+ : language(i_strdup(language)),
+ normalizer(NULL),
+ stopSet(_CLNEW CLTCSetList(true)),
+ prevstream(NULL)
+ {
+ StopFilter::fillStopTable(stopSet,stopWords);
+ }
+
+ TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) {
+ return this->tokenStream(fieldName,reader,false);
+ }
+
+ /** Constructs a {@link StandardTokenizer} filtered by a {@link
+ StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+ TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader) {
+ BufferedReader* bufferedReader = reader->__asBufferedReader();
+ TokenStream* result;
+
+ if ( bufferedReader == NULL )
+ result = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, deleteReader), true );
+ else
+ result = _CLNEW StandardTokenizer(bufferedReader, deleteReader);
+
+ result = _CLNEW StandardFilter(result, true);
+ result = _CLNEW CL_NS(analysis)::LowerCaseFilter(result, true);
+ if (stopSet != NULL)
+ result = _CLNEW CL_NS(analysis)::StopFilter(result, true, stopSet);
+ result = _CLNEW SnowballFilter(result, normalizer, language, true);
+ return result;
+ }
+
+ TokenStream* SnowballAnalyzer::reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) {
+ if (prevstream) _CLDELETE(prevstream);
+ prevstream = this->tokenStream(fieldName, reader);
+ return prevstream;
+ }
+
+
+
+
+
+
+ /** Construct the named stemming filter.
+ *
+ * @param in the input tokens to stem
+ * @param name the name of a stemmer
+ */
+ SnowballFilter::SnowballFilter(TokenStream* in, normalizer_func_t *normalizer, const char* language, bool deleteTS):
+ TokenFilter(in,deleteTS)
+ {
+ stemmer = sb_stemmer_new(language, NULL); //use utf8 encoding
+ this->normalizer = normalizer;
+
+ if ( stemmer == NULL ){
+ _CLTHROWA(CL_ERR_IllegalArgument, "language not available for stemming\n"); //todo: richer error
+ }
+ }
+
+ SnowballFilter::~SnowballFilter(){
+ sb_stemmer_delete(stemmer);
+ }
+
+ /** Returns the next input Token, after being stemmed */
+ Token* SnowballFilter::next(Token* token){
+ if (input->next(token) == NULL)
+ return NULL;
+
+ unsigned char utf8text[LUCENE_MAX_WORD_LEN*5+1];
+ unsigned int len = I_MIN(LUCENE_MAX_WORD_LEN, token->termLength());
+
+ buffer_t buf = { { 0, 0 } };
+ i_assert(sizeof(wchar_t) == sizeof(unichar_t));
+ buffer_create_from_data(&buf, utf8text, sizeof(utf8text));
+ uni_ucs4_to_utf8((const unichar_t *)token->termBuffer(), len, &buf);
+
+ const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8text, buf.used);
+ if ( stemmed == NULL )
+ _CLTHROWA(CL_ERR_Runtime,"Out of memory");
+
+ int stemmedLen=sb_stemmer_length(stemmer);
+
+ if (normalizer == NULL) {
+ unsigned int tchartext_size =
+ uni_utf8_strlen_n(stemmed, stemmedLen) + 1;
+ TCHAR tchartext[tchartext_size];
+ lucene_utf8_n_to_tchar(stemmed, stemmedLen, tchartext, tchartext_size);
+ token->set(tchartext,token->startOffset(), token->endOffset(), token->type());
+ } else T_BEGIN {
+ buffer_t *norm_buf = t_buffer_create(stemmedLen);
+ normalizer(stemmed, stemmedLen, norm_buf);
+
+ unsigned int tchartext_size =
+ uni_utf8_strlen_n(norm_buf->data, norm_buf->used) + 1;
+ TCHAR tchartext[tchartext_size];
+ lucene_utf8_n_to_tchar((const unsigned char *)norm_buf->data,
+ norm_buf->used, tchartext, tchartext_size);
+ token->set(tchartext,token->startOffset(), token->endOffset(), token->type());
+ } T_END;
+ return token;
+ }
+
+
+CL_NS_END2
diff --git a/src/plugins/fts-lucene/SnowballAnalyzer.h b/src/plugins/fts-lucene/SnowballAnalyzer.h
new file mode 100644
index 0000000..45455c5
--- /dev/null
+++ b/src/plugins/fts-lucene/SnowballAnalyzer.h
@@ -0,0 +1,51 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_analysis_snowball_analyser_
+#define _lucene_analysis_snowball_analyser_
+
+extern "C" {
+#include "lib.h"
+#include "unichar.h"
+};
+#include "CLucene/analysis/AnalysisHeader.h"
+
+CL_CLASS_DEF(util,BufferedReader)
+CL_NS_DEF2(analysis,snowball)
+
+/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
+ *
+ * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
+ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+ * {@link EnglishStemmer} is named "English".
+ */
+class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer {
+ char* language;
+ normalizer_func_t *normalizer;
+ CLTCSetList* stopSet;
+ TokenStream *prevstream;
+
+public:
+ /** Builds the named analyzer with no stop words. */
+ SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english");
+
+ /** Builds the named analyzer with the given stop words.
+ */
+ SnowballAnalyzer(const char* language, const TCHAR** stopWords);
+
+ ~SnowballAnalyzer();
+
+ /** Constructs a {@link StandardTokenizer} filtered by a {@link
+ StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+ TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+ TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader);
+ TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+};
+
+CL_NS_END2
+#endif
+
diff --git a/src/plugins/fts-lucene/SnowballFilter.h b/src/plugins/fts-lucene/SnowballFilter.h
new file mode 100644
index 0000000..6a0ed12
--- /dev/null
+++ b/src/plugins/fts-lucene/SnowballFilter.h
@@ -0,0 +1,42 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_analysis_snowball_filter_
+#define _lucene_analysis_snowball_filter_
+
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "libstemmer.h"
+
+CL_NS_DEF2(analysis,snowball)
+
+/** A filter that stems words using a Snowball-generated stemmer.
+ *
+ * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
+ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+ * {@link EnglishStemmer} is named "English".
+ *
+ * Note: todo: This is not thread safe...
+ */
+class CLUCENE_CONTRIBS_EXPORT SnowballFilter: public TokenFilter {
+ struct sb_stemmer * stemmer;
+ normalizer_func_t *normalizer;
+public:
+
+ /** Construct the named stemming filter.
+ *
+ * @param in the input tokens to stem
+ * @param name the name of a stemmer
+ */
+ SnowballFilter(TokenStream* in, normalizer_func_t *normalizer, const char* language, bool deleteTS);
+
+ ~SnowballFilter();
+
+ /** Returns the next input Token, after being stemmed */
+ Token* next(Token* token);
+};
+
+CL_NS_END2
+#endif
diff --git a/src/plugins/fts-lucene/doveadm-fts-lucene.c b/src/plugins/fts-lucene/doveadm-fts-lucene.c
new file mode 100644
index 0000000..a761907
--- /dev/null
+++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c
@@ -0,0 +1,70 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "doveadm-dump.h"
+#include "doveadm-fts.h"
+#include "lucene-wrapper.h"
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+const char *doveadm_fts_lucene_plugin_version = DOVECOT_ABI_VERSION;
+
+void doveadm_fts_lucene_plugin_init(struct module *module);
+void doveadm_fts_lucene_plugin_deinit(void);
+
+static void
+cmd_dump_fts_lucene(const char *path, const char *const *args ATTR_UNUSED)
+{
+ struct lucene_index *index;
+ struct lucene_index_iter *iter;
+ guid_128_t prev_guid;
+ const struct lucene_index_record *rec;
+ bool first = TRUE;
+
+ i_zero(&prev_guid);
+ index = lucene_index_init(path, NULL, NULL);
+ iter = lucene_index_iter_init(index);
+ while ((rec = lucene_index_iter_next(iter)) != NULL) {
+ if (memcmp(prev_guid, rec->mailbox_guid,
+ sizeof(prev_guid)) != 0) {
+ if (first)
+ first = FALSE;
+ else
+ printf("\n");
+ memcpy(prev_guid, rec->mailbox_guid, sizeof(prev_guid));
+ printf("%s: ", guid_128_to_string(prev_guid));
+ }
+ printf("%u", rec->uid);
+ if (rec->part_num != 0)
+ printf("[%u]", rec->part_num);
+ printf("\n");
+ }
+ printf("\n");
+ if (lucene_index_iter_deinit(&iter) < 0)
+ i_error("Lucene index iteration failed");
+ lucene_index_deinit(index);
+}
+
+static bool test_dump_fts_lucene(const char *path)
+{
+ struct stat st;
+
+ path = t_strconcat(path, "/segments.gen", NULL);
+ return stat(path, &st) == 0;
+}
+
+static const struct doveadm_cmd_dump doveadm_cmd_dump_fts_lucene = {
+ "fts-lucene",
+ test_dump_fts_lucene,
+ cmd_dump_fts_lucene
+};
+
+void doveadm_fts_lucene_plugin_init(struct module *module ATTR_UNUSED)
+{
+ doveadm_dump_register(&doveadm_cmd_dump_fts_lucene);
+}
+
+void doveadm_fts_lucene_plugin_deinit(void)
+{
+}
diff --git a/src/plugins/fts-lucene/fts-backend-lucene.c b/src/plugins/fts-lucene/fts-backend-lucene.c
new file mode 100644
index 0000000..963dbdf
--- /dev/null
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c
@@ -0,0 +1,605 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "hash.h"
+#include "hex-binary.h"
+#include "strescape.h"
+#include "message-part.h"
+#include "mail-namespace.h"
+#include "mail-storage-private.h"
+#include "fts-expunge-log.h"
+#include "lucene-wrapper.h"
+#include "fts-indexer.h"
+#include "fts-lucene-plugin.h"
+
+#include <wchar.h>
+
+#define LUCENE_INDEX_DIR_NAME "lucene-indexes"
+#define LUCENE_EXPUNGE_LOG_NAME "dovecot-expunges.log"
+#define LUCENE_OPTIMIZE_BATCH_MSGS_COUNT 100
+
+struct lucene_fts_backend {
+ struct fts_backend backend;
+ char *dir_path;
+
+ struct lucene_index *index;
+ struct mailbox *selected_box;
+ unsigned int selected_box_generation;
+ guid_128_t selected_box_guid;
+
+ struct fts_expunge_log *expunge_log;
+
+ bool dir_created:1;
+ bool updating:1;
+};
+
+struct lucene_fts_backend_update_context {
+ struct fts_backend_update_context ctx;
+
+ struct mailbox *box;
+ uint32_t last_uid;
+ uint32_t last_indexed_uid;
+ char *first_box_vname;
+
+ uint32_t uid, part_num;
+ char *hdr_name;
+
+ unsigned int added_msgs;
+ struct fts_expunge_log_append_ctx *expunge_ctx;
+
+ bool lucene_opened;
+ bool last_indexed_uid_set;
+ bool mime_parts;
+};
+
+static int fts_backend_lucene_mkdir(struct lucene_fts_backend *backend)
+{
+ if (backend->dir_created)
+ return 0;
+
+ backend->dir_created = TRUE;
+ if (mailbox_list_mkdir_root(backend->backend.ns->list,
+ backend->dir_path,
+ MAILBOX_LIST_PATH_TYPE_INDEX) < 0)
+ return -1;
+ return 0;
+}
+
+static int
+fts_lucene_get_mailbox_guid(struct mailbox *box, guid_128_t guid_r)
+{
+ struct mailbox_metadata metadata;
+
+ if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID,
+ &metadata) < 0) {
+ i_error("lucene: Couldn't get mailbox %s GUID: %s",
+ box->vname, mailbox_get_last_internal_error(box, NULL));
+ return -1;
+ }
+ memcpy(guid_r, metadata.guid, GUID_128_SIZE);
+ return 0;
+}
+
+static int
+fts_backend_select(struct lucene_fts_backend *backend, struct mailbox *box)
+{
+ guid_128_t guid;
+ unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH];
+ wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH];
+ buffer_t buf;
+ unsigned int i;
+
+ i_assert(box != NULL);
+
+ if (backend->selected_box == box &&
+ backend->selected_box_generation == box->generation_sequence)
+ return 0;
+
+ if (fts_lucene_get_mailbox_guid(box, guid) < 0)
+ return -1;
+ buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH);
+ binary_to_hex_append(&buf, guid, GUID_128_SIZE);
+ for (i = 0; i < N_ELEMENTS(wguid_hex); i++)
+ wguid_hex[i] = guid_hex[i];
+
+ lucene_index_select_mailbox(backend->index, wguid_hex);
+
+ backend->selected_box = box;
+ memcpy(backend->selected_box_guid, guid,
+ sizeof(backend->selected_box_guid));
+ backend->selected_box_generation = box->generation_sequence;
+ return 0;
+}
+
+static struct fts_backend *fts_backend_lucene_alloc(void)
+{
+ struct lucene_fts_backend *backend;
+
+ backend = i_new(struct lucene_fts_backend, 1);
+ backend->backend = fts_backend_lucene;
+ return &backend->backend;
+}
+
+static int
+fts_backend_lucene_init(struct fts_backend *_backend, const char **error_r)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+ struct fts_lucene_user *fuser =
+ FTS_LUCENE_USER_CONTEXT(_backend->ns->user);
+ const char *path;
+
+ if (fuser == NULL) {
+ /* invalid settings */
+ *error_r = "Invalid fts_lucene settings";
+ return -1;
+ }
+ /* fts already checked that index exists */
+
+ if (fuser->set.use_libfts) {
+ /* change our flags so we get proper input */
+ _backend->flags &= ENUM_NEGATE(FTS_BACKEND_FLAG_FUZZY_SEARCH);
+ _backend->flags |= FTS_BACKEND_FLAG_TOKENIZED_INPUT;
+ }
+ path = mailbox_list_get_root_forced(_backend->ns->list,
+ MAILBOX_LIST_PATH_TYPE_INDEX);
+
+ backend->dir_path = i_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL);
+ backend->index = lucene_index_init(backend->dir_path,
+ _backend->ns->list,
+ &fuser->set);
+
+ path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL);
+ backend->expunge_log = fts_expunge_log_init(path);
+ return 0;
+}
+
+static void fts_backend_lucene_deinit(struct fts_backend *_backend)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+
+ if (backend->index != NULL)
+ lucene_index_deinit(backend->index);
+ if (backend->expunge_log != NULL)
+ fts_expunge_log_deinit(&backend->expunge_log);
+ i_free(backend->dir_path);
+ i_free(backend);
+}
+
+static int
+fts_backend_lucene_get_last_uid(struct fts_backend *_backend,
+ struct mailbox *box, uint32_t *last_uid_r)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+ struct fts_lucene_user *fuser =
+ FTS_LUCENE_USER_CONTEXT_REQUIRE(_backend->ns->user);
+ struct fts_index_header hdr;
+ uint32_t set_checksum;
+ int ret;
+
+ if (fts_index_get_header(box, &hdr)) {
+ set_checksum = fts_lucene_settings_checksum(&fuser->set);
+ ret = fts_index_have_compatible_settings(_backend->ns->list,
+ set_checksum);
+ if (ret < 0)
+ return -1;
+ if (ret == 0) {
+ /* need to rebuild the index */
+ *last_uid_r = 0;
+ } else {
+ *last_uid_r = hdr.last_indexed_uid;
+ }
+ return 0;
+ }
+
+ /* either nothing has been indexed, or the index was corrupted.
+ do it the slow way. */
+ if (fts_backend_select(backend, box) < 0)
+ return -1;
+ if (lucene_index_get_last_uid(backend->index, last_uid_r) < 0)
+ return -1;
+
+ fts_index_set_last_uid(box, *last_uid_r);
+ return 0;
+}
+
+static struct fts_backend_update_context *
+fts_backend_lucene_update_init(struct fts_backend *_backend)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+ struct lucene_fts_backend_update_context *ctx;
+ struct fts_lucene_user *fuser =
+ FTS_LUCENE_USER_CONTEXT_REQUIRE(_backend->ns->user);
+
+ i_assert(!backend->updating);
+
+ ctx = i_new(struct lucene_fts_backend_update_context, 1);
+ ctx->ctx.backend = _backend;
+ ctx->mime_parts = fuser->set.mime_parts;
+ backend->updating = TRUE;
+ return &ctx->ctx;
+}
+
+static bool
+fts_backend_lucene_need_optimize(struct lucene_fts_backend_update_context *ctx)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)ctx->ctx.backend;
+ unsigned int expunges;
+ uint32_t numdocs;
+
+ if (ctx->added_msgs >= LUCENE_OPTIMIZE_BATCH_MSGS_COUNT)
+ return TRUE;
+ if (lucene_index_get_doc_count(backend->index, &numdocs) < 0)
+ return FALSE;
+
+ if (fts_expunge_log_uid_count(backend->expunge_log, &expunges) < 0)
+ return FALSE;
+ return expunges > 0 &&
+ numdocs / expunges <= 50; /* >2% of index has been expunged */
+}
+
+static int
+fts_backend_lucene_update_deinit(struct fts_backend_update_context *_ctx)
+{
+ struct lucene_fts_backend_update_context *ctx =
+ (struct lucene_fts_backend_update_context *)_ctx;
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_ctx->backend;
+ int ret = _ctx->failed ? -1 : 0;
+
+ i_assert(backend->updating);
+
+ backend->updating = FALSE;
+ if (ctx->lucene_opened) {
+ if (lucene_index_build_deinit(backend->index) < 0)
+ ret = -1;
+ }
+
+ if (ctx->expunge_ctx != NULL) {
+ if (fts_expunge_log_append_commit(&ctx->expunge_ctx) < 0) {
+ struct stat st;
+ ret = -1;
+
+ if (stat(backend->dir_path, &st) < 0 && errno == ENOENT) {
+ /* lucene-indexes directory doesn't even exist,
+ so dovecot.index's last_index_uid is wrong.
+ rescan to update them. */
+ (void)lucene_index_rescan(backend->index);
+ ret = 0;
+ }
+ }
+ }
+
+ if (fts_backend_lucene_need_optimize(ctx)) {
+ if (ctx->lucene_opened)
+ (void)fts_backend_optimize(_ctx->backend);
+ else if (ctx->first_box_vname != NULL) {
+ struct mail_user *user = backend->backend.ns->user;
+ const char *cmd, *path;
+ int fd;
+
+ /* the optimize affects all mailboxes within namespace,
+ so just use any mailbox name in it */
+ cmd = t_strdup_printf("OPTIMIZE\t0\t%s\t%s\n",
+ str_tabescape(user->username),
+ str_tabescape(ctx->first_box_vname));
+ fd = fts_indexer_cmd(user, cmd, &path);
+ i_close_fd(&fd);
+ }
+ }
+
+ i_free(ctx->first_box_vname);
+ i_free(ctx);
+ return ret;
+}
+
+static void
+fts_backend_lucene_update_set_mailbox(struct fts_backend_update_context *_ctx,
+ struct mailbox *box)
+{
+ struct lucene_fts_backend_update_context *ctx =
+ (struct lucene_fts_backend_update_context *)_ctx;
+
+ if (ctx->last_uid != 0) {
+ fts_index_set_last_uid(ctx->box, ctx->last_uid);
+ ctx->last_uid = 0;
+ }
+ if (ctx->first_box_vname == NULL && box != NULL)
+ ctx->first_box_vname = i_strdup(box->vname);
+ ctx->box = box;
+ ctx->last_indexed_uid_set = FALSE;
+}
+
+static void
+fts_backend_lucene_update_expunge(struct fts_backend_update_context *_ctx,
+ uint32_t uid)
+{
+ struct lucene_fts_backend_update_context *ctx =
+ (struct lucene_fts_backend_update_context *)_ctx;
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_ctx->backend;
+ struct fts_index_header hdr;
+
+ if (!ctx->last_indexed_uid_set) {
+ if (!fts_index_get_header(ctx->box, &hdr))
+ ctx->last_indexed_uid = 0;
+ else
+ ctx->last_indexed_uid = hdr.last_indexed_uid;
+ ctx->last_indexed_uid_set = TRUE;
+ }
+ if (ctx->last_indexed_uid == 0 ||
+ uid > ctx->last_indexed_uid + 100) {
+ /* don't waste time adding expunge to log for a message that
+ isn't even indexed. this check is racy, because indexer may
+ just be in the middle of indexing this message. we'll
+ attempt to avoid that by skipping the expunging only if
+ indexing hasn't been done for a while (100 msgs). */
+ return;
+ }
+
+ if (ctx->expunge_ctx == NULL) {
+ ctx->expunge_ctx =
+ fts_expunge_log_append_begin(backend->expunge_log);
+ }
+
+ if (fts_backend_select(backend, ctx->box) < 0)
+ _ctx->failed = TRUE;
+
+ fts_expunge_log_append_next(ctx->expunge_ctx,
+ backend->selected_box_guid, uid);
+}
+
+static bool
+fts_backend_lucene_update_set_build_key(struct fts_backend_update_context *_ctx,
+ const struct fts_backend_build_key *key)
+{
+ struct lucene_fts_backend_update_context *ctx =
+ (struct lucene_fts_backend_update_context *)_ctx;
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_ctx->backend;
+
+ if (!ctx->lucene_opened) {
+ if (fts_backend_lucene_mkdir(backend) < 0)
+ ctx->ctx.failed = TRUE;
+ if (lucene_index_build_init(backend->index) < 0)
+ ctx->ctx.failed = TRUE;
+ ctx->lucene_opened = TRUE;
+ }
+
+ if (fts_backend_select(backend, ctx->box) < 0)
+ _ctx->failed = TRUE;
+
+ switch (key->type) {
+ case FTS_BACKEND_BUILD_KEY_HDR:
+ case FTS_BACKEND_BUILD_KEY_MIME_HDR:
+ i_assert(key->hdr_name != NULL);
+
+ i_free(ctx->hdr_name);
+ ctx->hdr_name = i_strdup(key->hdr_name);
+ break;
+ case FTS_BACKEND_BUILD_KEY_BODY_PART:
+ i_free_and_null(ctx->hdr_name);
+ break;
+ case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
+ i_unreached();
+ }
+
+ if (key->uid != ctx->last_uid) {
+ i_assert(key->uid >= ctx->last_uid);
+ ctx->last_uid = key->uid;
+ ctx->added_msgs++;
+ }
+
+ ctx->uid = key->uid;
+ if (ctx->mime_parts)
+ ctx->part_num = message_part_to_idx(key->part);
+ return TRUE;
+}
+
+static void
+fts_backend_lucene_update_unset_build_key(struct fts_backend_update_context *_ctx)
+{
+ struct lucene_fts_backend_update_context *ctx =
+ (struct lucene_fts_backend_update_context *)_ctx;
+
+ ctx->uid = 0;
+ ctx->part_num = 0;
+ i_free_and_null(ctx->hdr_name);
+}
+
+static int
+fts_backend_lucene_update_build_more(struct fts_backend_update_context *_ctx,
+ const unsigned char *data, size_t size)
+{
+ struct lucene_fts_backend_update_context *ctx =
+ (struct lucene_fts_backend_update_context *)_ctx;
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_ctx->backend;
+ int ret;
+
+ i_assert(ctx->uid != 0);
+
+ if (_ctx->failed)
+ return -1;
+
+ T_BEGIN {
+ ret = lucene_index_build_more(backend->index, ctx->uid,
+ ctx->part_num, data, size,
+ ctx->hdr_name);
+ } T_END;
+ return ret;
+}
+
+static int
+fts_backend_lucene_refresh(struct fts_backend *_backend)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+
+ if (backend->index != NULL)
+ lucene_index_close(backend->index);
+ return 0;
+}
+
+static int fts_backend_lucene_rescan(struct fts_backend *_backend)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+
+ if (lucene_index_rescan(backend->index) < 0)
+ return -1;
+ return lucene_index_optimize(backend->index);
+}
+
+static int fts_backend_lucene_optimize(struct fts_backend *_backend)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+ int ret;
+
+ ret = lucene_index_expunge_from_log(backend->index,
+ backend->expunge_log);
+ if (ret == 0) {
+ /* log was corrupted, need to rescan */
+ ret = lucene_index_rescan(backend->index);
+ }
+ if (ret >= 0)
+ ret = lucene_index_optimize(backend->index);
+ return ret;
+}
+
+static int
+fts_backend_lucene_lookup(struct fts_backend *_backend, struct mailbox *box,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_result *result)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+ int ret;
+
+ if (fts_backend_select(backend, box) < 0)
+ return -1;
+ T_BEGIN {
+ ret = lucene_index_lookup(backend->index, args, flags, result);
+ } T_END;
+ return ret;
+}
+
+/* a char* hash function from ASU -- from glib */
+static unsigned int wstr_hash(const wchar_t *s)
+{
+ unsigned int g, h = 0;
+
+ while (*s != '\0') {
+ h = (h << 4) + *s;
+ if ((g = h & 0xf0000000UL) != 0) {
+ h = h ^ (g >> 24);
+ h = h ^ g;
+ }
+ s++;
+ }
+
+ return h;
+}
+
+static int
+mailboxes_get_guids(struct mailbox *const boxes[],
+ HASH_TABLE_TYPE(wguid_result) guids,
+ struct fts_multi_result *result)
+{
+ ARRAY(struct fts_result) box_results;
+ struct fts_result *box_result;
+ const char *guid;
+ wchar_t *guid_dup;
+ unsigned int i, j;
+
+ p_array_init(&box_results, result->pool, 32);
+ /* first create the box_results - we'll be using pointers to them
+ later on and appending to the array changes the pointers */
+ for (i = 0; boxes[i] != NULL; i++) {
+ box_result = array_append_space(&box_results);
+ box_result->box = boxes[i];
+ }
+ for (i = 0; boxes[i] != NULL; i++) {
+ if (fts_mailbox_get_guid(boxes[i], &guid) < 0)
+ return -1;
+
+ i_assert(strlen(guid) == MAILBOX_GUID_HEX_LENGTH);
+ guid_dup = t_new(wchar_t, MAILBOX_GUID_HEX_LENGTH + 1);
+ for (j = 0; j < MAILBOX_GUID_HEX_LENGTH; j++)
+ guid_dup[j] = guid[j];
+
+ box_result = array_idx_modifiable(&box_results, i);
+ hash_table_insert(guids, guid_dup, box_result);
+ }
+
+ array_append_zero(&box_results);
+ result->box_results = array_front_modifiable(&box_results);
+ return 0;
+}
+
+static int
+fts_backend_lucene_lookup_multi(struct fts_backend *_backend,
+ struct mailbox *const boxes[],
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result)
+{
+ struct lucene_fts_backend *backend =
+ (struct lucene_fts_backend *)_backend;
+ int ret;
+
+ T_BEGIN {
+ HASH_TABLE_TYPE(wguid_result) guids;
+
+ hash_table_create(&guids, default_pool, 0, wstr_hash, wcscmp);
+ ret = mailboxes_get_guids(boxes, guids, result);
+ if (ret == 0) {
+ ret = lucene_index_lookup_multi(backend->index,
+ guids, args, flags,
+ result);
+ }
+ hash_table_destroy(&guids);
+ } T_END;
+ return ret;
+}
+
+static void fts_backend_lucene_lookup_done(struct fts_backend *_backend)
+{
+ /* the next refresh is going to close the index anyway, so we might as
+ well do it now */
+ (void)fts_backend_lucene_refresh(_backend);
+}
+
+struct fts_backend fts_backend_lucene = {
+ .name = "lucene",
+ .flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS |
+ FTS_BACKEND_FLAG_FUZZY_SEARCH,
+
+ {
+ fts_backend_lucene_alloc,
+ fts_backend_lucene_init,
+ fts_backend_lucene_deinit,
+ fts_backend_lucene_get_last_uid,
+ fts_backend_lucene_update_init,
+ fts_backend_lucene_update_deinit,
+ fts_backend_lucene_update_set_mailbox,
+ fts_backend_lucene_update_expunge,
+ fts_backend_lucene_update_set_build_key,
+ fts_backend_lucene_update_unset_build_key,
+ fts_backend_lucene_update_build_more,
+ fts_backend_lucene_refresh,
+ fts_backend_lucene_rescan,
+ fts_backend_lucene_optimize,
+ fts_backend_default_can_lookup,
+ fts_backend_lucene_lookup,
+ fts_backend_lucene_lookup_multi,
+ fts_backend_lucene_lookup_done
+ }
+};
diff --git a/src/plugins/fts-lucene/fts-lucene-plugin.c b/src/plugins/fts-lucene/fts-lucene-plugin.c
new file mode 100644
index 0000000..7c58fa7
--- /dev/null
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.c
@@ -0,0 +1,146 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "crc32.h"
+#include "mail-storage-hooks.h"
+#include "lucene-wrapper.h"
+#include "fts-user.h"
+#include "fts-lucene-plugin.h"
+
+const char *fts_lucene_plugin_version = DOVECOT_ABI_VERSION;
+
+struct fts_lucene_user_module fts_lucene_user_module =
+ MODULE_CONTEXT_INIT(&mail_user_module_register);
+
+static int
+fts_lucene_plugin_init_settings(struct mail_user *user,
+ struct fts_lucene_settings *set,
+ const char *str)
+{
+ const char *const *tmp;
+
+ for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) {
+ if (str_begins(*tmp, "default_language=")) {
+ set->default_language =
+ p_strdup(user->pool, *tmp + 17);
+ } else if (str_begins(*tmp, "textcat_conf=")) {
+ set->textcat_conf = p_strdup(user->pool, *tmp + 13);
+ } else if (str_begins(*tmp, "textcat_dir=")) {
+ set->textcat_dir = p_strdup(user->pool, *tmp + 12);
+ } else if (str_begins(*tmp, "whitespace_chars=")) {
+ set->whitespace_chars = p_strdup(user->pool, *tmp + 17);
+ } else if (strcmp(*tmp, "normalize") == 0) {
+ set->normalize = TRUE;
+ } else if (strcmp(*tmp, "no_snowball") == 0) {
+ set->no_snowball = TRUE;
+ } else if (strcmp(*tmp, "mime_parts") == 0) {
+ set->mime_parts = TRUE;
+ } else if (strcmp(*tmp, "use_libfts") == 0) {
+ set->use_libfts = TRUE;
+ } else {
+ i_error("fts_lucene: Invalid setting: %s", *tmp);
+ return -1;
+ }
+ }
+ if (set->textcat_conf != NULL && set->textcat_dir == NULL) {
+ i_error("fts_lucene: textcat_conf set, but textcat_dir unset");
+ return -1;
+ }
+ if (set->textcat_conf == NULL && set->textcat_dir != NULL) {
+ i_error("fts_lucene: textcat_dir set, but textcat_conf unset");
+ return -1;
+ }
+ if (set->whitespace_chars == NULL)
+ set->whitespace_chars = "";
+#ifndef HAVE_FTS_STEMMER
+ if (set->default_language != NULL) {
+ i_error("fts_lucene: default_language set, "
+ "but Dovecot built without stemmer support");
+ return -1;
+ }
+#else
+ if (set->default_language == NULL)
+ set->default_language = "english";
+#endif
+#ifndef HAVE_FTS_TEXTCAT
+ if (set->textcat_conf != NULL) {
+ i_error("fts_lucene: textcat_dir set, "
+ "but Dovecot built without textcat support");
+ return -1;
+ }
+#endif
+ return 0;
+}
+
+uint32_t fts_lucene_settings_checksum(const struct fts_lucene_settings *set)
+{
+ uint32_t crc;
+
+ if (set->use_libfts)
+ return crc32_str("l");
+
+ /* checksum is always different when compiling with/without stemmer */
+ crc = set->default_language == NULL ? 0 :
+ crc32_str(set->default_language);
+ crc = crc32_str_more(crc, set->whitespace_chars);
+ if (set->normalize)
+ crc = crc32_str_more(crc, "n");
+ if (set->no_snowball)
+ crc = crc32_str_more(crc, "s");
+ /* don't include mime_parts here, since changing it doesn't
+ necessarily need the index to be rebuilt */
+ return crc;
+}
+
+static void fts_lucene_mail_user_deinit(struct mail_user *user)
+{
+ struct fts_lucene_user *fuser = FTS_LUCENE_USER_CONTEXT_REQUIRE(user);
+
+ fts_mail_user_deinit(user);
+ fuser->module_ctx.super.deinit(user);
+}
+
+static void fts_lucene_mail_user_created(struct mail_user *user)
+{
+ struct mail_user_vfuncs *v = user->vlast;
+ struct fts_lucene_user *fuser;
+ const char *env, *error;
+
+ fuser = p_new(user->pool, struct fts_lucene_user, 1);
+ env = mail_user_plugin_getenv(user, "fts_lucene");
+ if (env == NULL)
+ env = "";
+
+ if (fts_lucene_plugin_init_settings(user, &fuser->set, env) < 0) {
+ /* invalid settings, disabling */
+ return;
+ }
+ if (fts_mail_user_init(user, fuser->set.use_libfts, &error) < 0) {
+ i_error("fts_lucene: %s", error);
+ return;
+ }
+
+ fuser->module_ctx.super = *v;
+ user->vlast = &fuser->module_ctx.super;
+ v->deinit = fts_lucene_mail_user_deinit;
+ MODULE_CONTEXT_SET(user, fts_lucene_user_module, fuser);
+}
+
+static struct mail_storage_hooks fts_lucene_mail_storage_hooks = {
+ .mail_user_created = fts_lucene_mail_user_created
+};
+
+void fts_lucene_plugin_init(struct module *module ATTR_UNUSED)
+{
+ fts_backend_register(&fts_backend_lucene);
+ mail_storage_hooks_add(module, &fts_lucene_mail_storage_hooks);
+}
+
+void fts_lucene_plugin_deinit(void)
+{
+ fts_backend_unregister(fts_backend_lucene.name);
+ mail_storage_hooks_remove(&fts_lucene_mail_storage_hooks);
+ lucene_shutdown();
+}
+
+const char *fts_lucene_plugin_dependencies[] = { "fts", NULL };
diff --git a/src/plugins/fts-lucene/fts-lucene-plugin.h b/src/plugins/fts-lucene/fts-lucene-plugin.h
new file mode 100644
index 0000000..69440fb
--- /dev/null
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.h
@@ -0,0 +1,36 @@
+#ifndef FTS_LUCENE_PLUGIN_H
+#define FTS_LUCENE_PLUGIN_H
+
+#include "module-context.h"
+#include "mail-user.h"
+#include "fts-api-private.h"
+
+#define FTS_LUCENE_USER_CONTEXT(obj) \
+ MODULE_CONTEXT(obj, fts_lucene_user_module)
+#define FTS_LUCENE_USER_CONTEXT_REQUIRE(obj) \
+ MODULE_CONTEXT_REQUIRE(obj, fts_lucene_user_module)
+
+struct fts_lucene_settings {
+ const char *default_language;
+ const char *textcat_conf, *textcat_dir;
+ const char *whitespace_chars;
+ bool normalize;
+ bool no_snowball;
+ bool mime_parts;
+ bool use_libfts;
+};
+
+struct fts_lucene_user {
+ union mail_user_module_context module_ctx;
+ struct fts_lucene_settings set;
+};
+
+extern struct fts_backend fts_backend_lucene;
+extern MODULE_CONTEXT_DEFINE(fts_lucene_user_module, &mail_user_module_register);
+
+uint32_t fts_lucene_settings_checksum(const struct fts_lucene_settings *set);
+
+void fts_lucene_plugin_init(struct module *module);
+void fts_lucene_plugin_deinit(void);
+
+#endif
diff --git a/src/plugins/fts-lucene/lucene-wrapper.cc b/src/plugins/fts-lucene/lucene-wrapper.cc
new file mode 100644
index 0000000..7446693
--- /dev/null
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc
@@ -0,0 +1,1639 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+extern "C" {
+#include "lib.h"
+#include "array.h"
+#include "unichar.h"
+#include "hash.h"
+#include "hex-binary.h"
+#include "ioloop.h"
+#include "unlink-directory.h"
+#include "ioloop.h"
+#include "mail-index.h"
+#include "mail-search.h"
+#include "mail-namespace.h"
+#include "mailbox-list-private.h"
+#include "mail-storage.h"
+#include "fts-expunge-log.h"
+#include "fts-lucene-plugin.h"
+#include "lucene-wrapper.h"
+
+#include <sys/stat.h>
+#ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H
+# include <libexttextcat/textcat.h>
+#elif defined (HAVE_LIBTEXTCAT_TEXTCAT_H)
+# include <libtextcat/textcat.h>
+#elif defined (HAVE_FTS_TEXTCAT)
+# include <textcat.h>
+#endif
+};
+#include <CLucene.h>
+#include <CLucene/util/CLStreams.h>
+#include <CLucene/search/MultiPhraseQuery.h>
+#include "SnowballAnalyzer.h"
+
+/* Lucene's default is 10000. Use it here also.. */
+#define MAX_TERMS_PER_DOCUMENT 10000
+#define FTS_LUCENE_MAX_SEARCH_TERMS 1000
+
+#define LUCENE_LOCK_OVERRIDE_SECS 60
+#define LUCENE_INDEX_CLOSE_TIMEOUT_MSECS (120*1000)
+
+using namespace lucene::document;
+using namespace lucene::index;
+using namespace lucene::search;
+using namespace lucene::queryParser;
+using namespace lucene::analysis;
+using namespace lucene::analysis;
+using namespace lucene::util;
+
+struct lucene_query {
+ Query *query;
+ BooleanClause::Occur occur;
+};
+ARRAY_DEFINE_TYPE(lucene_query, struct lucene_query);
+
+struct lucene_analyzer {
+ char *lang;
+ Analyzer *analyzer;
+};
+
+struct lucene_index {
+ char *path;
+ struct mailbox_list *list;
+ struct fts_lucene_settings set;
+ normalizer_func_t *normalizer;
+
+ wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1];
+
+ IndexReader *reader;
+ IndexWriter *writer;
+ IndexSearcher *searcher;
+ struct timeout *to_close;
+
+ buffer_t *normalizer_buf;
+ Analyzer *default_analyzer, *cur_analyzer;
+ ARRAY(struct lucene_analyzer) analyzers;
+
+ Document *doc;
+ uint32_t prev_uid, prev_part_idx;
+ bool no_analyzer;
+};
+
+struct rescan_context {
+ struct lucene_index *index;
+
+ struct mailbox *box;
+ guid_128_t box_guid;
+ int box_ret;
+
+ pool_t pool;
+ HASH_TABLE(uint8_t *, uint8_t *) seen_mailbox_guids;
+
+ ARRAY_TYPE(seq_range) uids;
+ struct seq_range_iter uids_iter;
+ unsigned int uids_iter_n;
+
+ uint32_t last_existing_uid;
+ bool warned;
+};
+
+static void *textcat = NULL;
+#ifdef HAVE_FTS_TEXTCAT
+static bool textcat_broken = FALSE;
+#endif
+static int textcat_refcount = 0;
+
+static void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
+ const char *msg);
+static void rescan_clear_unseen_mailboxes(struct lucene_index *index,
+ struct rescan_context *rescan_ctx);
+
+struct lucene_index *lucene_index_init(const char *path,
+ struct mailbox_list *list,
+ const struct fts_lucene_settings *set)
+{
+ struct lucene_index *index;
+
+ index = i_new(struct lucene_index, 1);
+ index->path = i_strdup(path);
+ index->list = list;
+ if (set != NULL) {
+ index->set = *set;
+ index->normalizer = !set->normalize ? NULL :
+ mailbox_list_get_namespace(list)->user->default_normalizer;
+ } else {
+ /* this is valid only for doveadm dump, so it doesn't matter */
+ index->set.default_language = "";
+ }
+ if (index->set.use_libfts) {
+ index->default_analyzer = _CLNEW KeywordAnalyzer();
+ } else
+#ifdef HAVE_FTS_STEMMER
+ if (set == NULL || !set->no_snowball) {
+ index->default_analyzer =
+ _CLNEW snowball::SnowballAnalyzer(index->normalizer,
+ index->set.default_language);
+ } else
+#endif
+ {
+ index->default_analyzer = _CLNEW standard::StandardAnalyzer();
+ if (index->normalizer != NULL) {
+ index->normalizer_buf =
+ buffer_create_dynamic(default_pool, 1024);
+ }
+ }
+
+ i_array_init(&index->analyzers, 32);
+ textcat_refcount++;
+
+ return index;
+}
+
+void lucene_index_close(struct lucene_index *index)
+{
+ timeout_remove(&index->to_close);
+
+ _CLDELETE(index->searcher);
+ if (index->writer != NULL) {
+ try {
+ index->writer->close();
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexWriter::close");
+ }
+ _CLDELETE(index->writer);
+ }
+ if (index->reader != NULL) {
+ try {
+ index->reader->close();
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexReader::close");
+ }
+ _CLDELETE(index->reader);
+ }
+}
+
+void lucene_index_deinit(struct lucene_index *index)
+{
+ struct lucene_analyzer *a;
+
+ lucene_index_close(index);
+ array_foreach_modifiable(&index->analyzers, a) {
+ i_free(a->lang);
+ _CLDELETE(a->analyzer);
+ }
+ array_free(&index->analyzers);
+ if (--textcat_refcount == 0 && textcat != NULL) {
+#ifdef HAVE_FTS_TEXTCAT
+ textcat_Done(textcat);
+#endif
+ textcat = NULL;
+ }
+ _CLDELETE(index->default_analyzer);
+ if (index->normalizer_buf != NULL)
+ buffer_free(&index->normalizer_buf);
+ i_free(index->path);
+ i_free(index);
+}
+
+static void lucene_data_translate(struct lucene_index *index,
+ wchar_t *data, unsigned int len)
+{
+ const char *whitespace_chars = index->set.whitespace_chars;
+ unsigned int i;
+
+ if (*whitespace_chars == '\0' || index->set.use_libfts)
+ return;
+
+ for (i = 0; i < len; i++) {
+ if (strchr(whitespace_chars, data[i]) != NULL)
+ data[i] = ' ';
+ }
+}
+
+void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
+ wchar_t *dest, size_t destsize)
+{
+ ARRAY_TYPE(unichars) dest_arr;
+ buffer_t buf = { { 0, 0 } };
+
+ i_assert(sizeof(wchar_t) == sizeof(unichar_t));
+
+ buffer_create_from_data(&buf, dest, sizeof(wchar_t) * destsize);
+ array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t));
+ if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0)
+ i_unreached();
+ i_assert(array_count(&dest_arr)+1 == destsize);
+ dest[destsize-1] = 0;
+}
+
+static const wchar_t *
+t_lucene_utf8_to_tchar(struct lucene_index *index, const char *str)
+{
+ ARRAY_TYPE(unichars) dest_arr;
+ const unichar_t *chars;
+ wchar_t *ret;
+ unsigned int len;
+
+ i_assert(sizeof(wchar_t) == sizeof(unichar_t));
+
+ t_array_init(&dest_arr, strlen(str) + 1);
+ if (uni_utf8_to_ucs4(str, &dest_arr) < 0)
+ i_unreached();
+ (void)array_append_space(&dest_arr);
+
+ chars = array_get_modifiable(&dest_arr, &len);
+ ret = (wchar_t *)chars;
+ lucene_data_translate(index, ret, len - 1);
+ return ret;
+}
+
+void lucene_index_select_mailbox(struct lucene_index *index,
+ const wchar_t guid[MAILBOX_GUID_HEX_LENGTH])
+{
+ memcpy(index->mailbox_guid, guid,
+ MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t));
+ index->mailbox_guid[MAILBOX_GUID_HEX_LENGTH] = '\0';
+}
+
+void lucene_index_unselect_mailbox(struct lucene_index *index)
+{
+ memset(index->mailbox_guid, 0, sizeof(index->mailbox_guid));
+}
+
+static void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
+ const char *msg)
+{
+ const char *error, *what = err.what();
+
+ i_error("lucene index %s: %s failed (#%d): %s",
+ index->path, msg, err.number(), what);
+
+ if (index->list != NULL &&
+ (err.number() == CL_ERR_CorruptIndex ||
+ err.number() == CL_ERR_IO)) {
+ /* delete corrupted index. most IO errors are also about
+ missing files and other such corruption.. */
+ if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0)
+ i_error("unlink_directory(%s) failed: %s", index->path, error);
+ rescan_clear_unseen_mailboxes(index, NULL);
+ }
+}
+
+static int lucene_index_open(struct lucene_index *index)
+{
+ if (index->reader != NULL) {
+ i_assert(index->to_close != NULL);
+ timeout_reset(index->to_close);
+ return 1;
+ }
+
+ if (!IndexReader::indexExists(index->path))
+ return 0;
+
+ try {
+ index->reader = IndexReader::open(index->path);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexReader::open()");
+ return -1;
+ }
+ i_assert(index->to_close == NULL);
+ index->to_close = timeout_add(LUCENE_INDEX_CLOSE_TIMEOUT_MSECS,
+ lucene_index_close, index);
+ return 1;
+}
+
+static int lucene_index_open_search(struct lucene_index *index)
+{
+ int ret;
+
+ if (index->searcher != NULL)
+ return 1;
+
+ if ((ret = lucene_index_open(index)) <= 0)
+ return ret;
+
+ index->searcher = _CLNEW IndexSearcher(index->reader);
+ return 1;
+}
+
+static int
+lucene_doc_get_uid(struct lucene_index *index, Document *doc, uint32_t *uid_r)
+{
+ Field *field = doc->getField(_T("uid"));
+ const TCHAR *uid = field == NULL ? NULL : field->stringValue();
+ if (uid == NULL) {
+ i_error("lucene: Corrupted FTS index %s: No UID for document",
+ index->path);
+ return -1;
+ }
+
+ uint32_t num = 0;
+ while (*uid != 0) {
+ num = num*10 + (*uid - '0');
+ uid++;
+ }
+ *uid_r = num;
+ return 0;
+}
+
+static uint32_t
+lucene_doc_get_part(struct lucene_index *index, Document *doc)
+{
+ Field *field = doc->getField(_T("part"));
+ const TCHAR *part = field == NULL ? NULL : field->stringValue();
+ if (part == NULL)
+ return 0;
+
+ uint32_t num = 0;
+ while (*part != 0) {
+ num = num*10 + (*part - '0');
+ part++;
+ }
+ return num;
+}
+
+int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
+{
+ int ret = 0;
+
+ *last_uid_r = 0;
+
+ if ((ret = lucene_index_open_search(index)) <= 0)
+ return ret;
+
+ Term mailbox_term(_T("box"), index->mailbox_guid);
+ TermQuery query(&mailbox_term);
+
+ uint32_t last_uid = 0;
+ try {
+ Hits *hits = index->searcher->search(&query);
+
+ for (size_t i = 0; i < hits->length(); i++) {
+ uint32_t uid;
+
+ if (lucene_doc_get_uid(index, &hits->doc(i),
+ &uid) < 0) {
+ ret = -1;
+ break;
+ }
+
+ if (uid > last_uid)
+ last_uid = uid;
+ }
+ _CLDELETE(hits);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "last_uid search");
+ ret = -1;
+ }
+ *last_uid_r = last_uid;
+ return ret;
+}
+
+int lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r)
+{
+ int ret;
+
+ if (index->reader == NULL) {
+ lucene_index_close(index);
+ if ((ret = lucene_index_open(index)) < 0)
+ return -1;
+ if (ret == 0) {
+ *count_r = 0;
+ return 0;
+ }
+ }
+ *count_r = index->reader->numDocs();
+ return 0;
+}
+
+static int lucene_settings_check(struct lucene_index *index)
+{
+ uint32_t set_checksum;
+ const char *error;
+ int ret = 0;
+
+ set_checksum = fts_lucene_settings_checksum(&index->set);
+ ret = fts_index_have_compatible_settings(index->list, set_checksum);
+ if (ret != 0)
+ return ret;
+
+ i_warning("fts-lucene: Settings have changed, rebuilding index for mailbox");
+
+ /* settings changed, rebuild index */
+ if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0) {
+ i_error("unlink_directory(%s) failed: %s", index->path, error);
+ ret = -1;
+ } else {
+ rescan_clear_unseen_mailboxes(index, NULL);
+ }
+ return ret;
+}
+
+int lucene_index_build_init(struct lucene_index *index)
+{
+ const char *lock_path;
+ struct stat st;
+
+ lucene_index_close(index);
+
+ lock_path = t_strdup_printf("%s/write.lock", index->path);
+ if (stat(lock_path, &st) == 0 &&
+ st.st_mtime < time(NULL) - LUCENE_LOCK_OVERRIDE_SECS) {
+ if (unlink(lock_path) < 0)
+ i_error("unlink(%s) failed: %m", lock_path);
+ }
+
+ if (lucene_settings_check(index) < 0)
+ return -1;
+
+ bool exists = IndexReader::indexExists(index->path);
+ try {
+ index->writer = _CLNEW IndexWriter(index->path,
+ index->default_analyzer,
+ !exists);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexWriter()");
+ return -1;
+ }
+ index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
+ return 0;
+}
+
+#ifdef HAVE_FTS_TEXTCAT
+static Analyzer *get_analyzer(struct lucene_index *index, const char *lang)
+{
+ normalizer_func_t *normalizer = index->normalizer;
+ const struct lucene_analyzer *a;
+ struct lucene_analyzer new_analyzer;
+ Analyzer *analyzer;
+
+ array_foreach(&index->analyzers, a) {
+ if (strcmp(a->lang, lang) == 0)
+ return a->analyzer;
+ }
+
+ memset(&new_analyzer, 0, sizeof(new_analyzer));
+ new_analyzer.lang = i_strdup(lang);
+ new_analyzer.analyzer =
+ _CLNEW snowball::SnowballAnalyzer(normalizer, lang);
+ array_append_i(&index->analyzers.arr, &new_analyzer, 1);
+ return new_analyzer.analyzer;
+}
+
+static void *textcat_init(struct lucene_index *index)
+{
+ const char *textcat_dir = index->set.textcat_dir;
+ unsigned int len;
+
+ if (textcat_dir == NULL)
+ return NULL;
+
+ /* textcat really wants the '/' suffix */
+ len = strlen(textcat_dir);
+ if (len > 0 && textcat_dir[len-1] != '/')
+ textcat_dir = t_strconcat(textcat_dir, "/", NULL);
+
+ return special_textcat_Init(index->set.textcat_conf, textcat_dir);
+}
+
+static Analyzer *
+guess_analyzer(struct lucene_index *index, const void *data, size_t size)
+{
+ const char *lang;
+
+ if (textcat_broken)
+ return NULL;
+
+ if (textcat == NULL) {
+ textcat = textcat_init(index);
+ if (textcat == NULL) {
+ textcat_broken = TRUE;
+ return NULL;
+ }
+ }
+
+ /* try to guess the language */
+ lang = textcat_Classify(textcat, (const char *)data,
+ I_MIN(size, 500));
+ const char *p = strchr(lang, ']');
+ if (lang[0] != '[' || p == NULL)
+ return NULL;
+ lang = t_strdup_until(lang+1, p);
+ if (strcmp(lang, index->set.default_language) == 0)
+ return index->default_analyzer;
+
+ return get_analyzer(index, lang);
+}
+#else
+static Analyzer *
+guess_analyzer(struct lucene_index *index ATTR_UNUSED,
+ const void *data ATTR_UNUSED, size_t size ATTR_UNUSED)
+{
+ return NULL;
+}
+#endif
+
+static int lucene_index_build_flush(struct lucene_index *index)
+{
+ int ret = 0;
+
+ if (index->doc == NULL)
+ return 0;
+
+ try {
+ CL_NS(analysis)::Analyzer *analyzer = NULL;
+
+ if (!index->set.use_libfts) {
+ analyzer = index->cur_analyzer != NULL ?
+ index->cur_analyzer : index->default_analyzer;
+ }
+ index->writer->addDocument(index->doc, analyzer);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexWriter::addDocument()");
+ ret = -1;
+ }
+
+ _CLDELETE(index->doc);
+ index->doc = NULL;
+ index->cur_analyzer = NULL;
+ return ret;
+}
+
+int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
+ uint32_t part_idx, const unsigned char *data,
+ size_t size, const char *hdr_name)
+{
+ wchar_t id[MAX_INT_STRLEN];
+ size_t namesize, datasize;
+
+ if (uid != index->prev_uid || part_idx != index->prev_part_idx) {
+ if (lucene_index_build_flush(index) < 0)
+ return -1;
+ index->prev_uid = uid;
+ index->prev_part_idx = part_idx;
+
+ index->doc = _CLNEW Document();
+ swprintf(id, N_ELEMENTS(id), L"%u", uid);
+ index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+ if (part_idx != 0) {
+ swprintf(id, N_ELEMENTS(id), L"%u", part_idx);
+ index->doc->add(*_CLNEW Field(_T("part"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+ }
+ index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+ }
+
+ if (index->normalizer_buf != NULL && !index->set.use_libfts) {
+ buffer_set_used_size(index->normalizer_buf, 0);
+ index->normalizer(data, size, index->normalizer_buf);
+ data = (const unsigned char *)index->normalizer_buf->data;
+ size = index->normalizer_buf->used;
+ }
+
+ datasize = uni_utf8_strlen_n(data, size) + 1;
+ wchar_t *dest, *dest_free = NULL;
+ if (datasize < 4096)
+ dest = t_new(wchar_t, datasize);
+ else
+ dest = dest_free = i_new(wchar_t, datasize);
+ lucene_utf8_n_to_tchar(data, size, dest, datasize);
+ lucene_data_translate(index, dest, datasize-1);
+
+ int token_flag = index->set.use_libfts ?
+ Field::INDEX_UNTOKENIZED : Field::INDEX_TOKENIZED;
+ if (hdr_name != NULL) {
+ /* hdr_name should be ASCII, but don't break in case it isn't */
+ hdr_name = t_str_lcase(hdr_name);
+ namesize = uni_utf8_strlen(hdr_name) + 1;
+ wchar_t wname[namesize];
+ lucene_utf8_n_to_tchar((const unsigned char *)hdr_name,
+ strlen(hdr_name), wname, namesize);
+ if (!index->set.use_libfts)
+ index->doc->add(*_CLNEW Field(_T("hdr"), wname, Field::STORE_NO | token_flag));
+ index->doc->add(*_CLNEW Field(_T("hdr"), dest, Field::STORE_NO | token_flag));
+
+ if (fts_header_want_indexed(hdr_name))
+ index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | token_flag));
+ } else if (size > 0) {
+ if (index->cur_analyzer == NULL && !index->set.use_libfts)
+ index->cur_analyzer = guess_analyzer(index, data, size);
+ index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | token_flag));
+ }
+ i_free(dest_free);
+ return 0;
+}
+
+int lucene_index_build_deinit(struct lucene_index *index)
+{
+ int ret = 0;
+
+ if (index->prev_uid == 0) {
+ /* no changes. */
+ return 0;
+ }
+ index->prev_uid = 0;
+ index->prev_part_idx = 0;
+
+ if (index->writer == NULL) {
+ lucene_index_close(index);
+ return -1;
+ }
+
+ if (lucene_index_build_flush(index) < 0)
+ ret = -1;
+
+ try {
+ index->writer->close();
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexWriter::close()");
+ ret = -1;
+ }
+
+ lucene_index_close(index);
+ return ret;
+}
+
+static int
+wcharguid_to_guid(guid_128_t dest, const wchar_t *src)
+{
+ buffer_t buf = { { 0, 0 } };
+ char src_chars[GUID_128_SIZE*2 + 1];
+ unsigned int i;
+
+ for (i = 0; i < sizeof(src_chars)-1; i++) {
+ if ((src[i] >= '0' && src[i] <= '9') ||
+ (src[i] >= 'a' && src[i] <= 'f'))
+ src_chars[i] = src[i];
+ else
+ return -1;
+ }
+ if (src[i] != '\0')
+ return -1;
+ src_chars[i] = '\0';
+
+ buffer_create_from_data(&buf, dest, GUID_128_SIZE);
+ return hex_to_binary(src_chars, &buf);
+}
+
+static int
+rescan_get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids)
+{
+ struct mailbox_status status;
+
+ if (mailbox_get_status(box, STATUS_MESSAGES, &status) < 0)
+ return -1;
+
+ if (status.messages > 0) T_BEGIN {
+ ARRAY_TYPE(seq_range) seqs;
+
+ t_array_init(&seqs, 2);
+ seq_range_array_add_range(&seqs, 1, status.messages);
+ mailbox_get_uid_range(box, &seqs, uids);
+ } T_END;
+ return 0;
+}
+
+static int rescan_finish(struct rescan_context *ctx)
+{
+ int ret;
+
+ ret = fts_index_set_last_uid(ctx->box, ctx->last_existing_uid);
+ mailbox_free(&ctx->box);
+ return ret;
+}
+
+static int
+fts_lucene_get_mailbox_guid(struct lucene_index *index, Document *doc,
+ guid_128_t guid_r)
+{
+ Field *field = doc->getField(_T("box"));
+ const TCHAR *box_guid = field == NULL ? NULL : field->stringValue();
+ if (box_guid == NULL) {
+ i_error("lucene: Corrupted FTS index %s: No mailbox for document",
+ index->path);
+ return -1;
+ }
+
+ if (wcharguid_to_guid(guid_r, box_guid) < 0) {
+ i_error("lucene: Corrupted FTS index %s: "
+ "box field not in expected format", index->path);
+ return -1;
+ }
+ return 0;
+}
+
+static int
+rescan_open_mailbox(struct rescan_context *ctx, Document *doc)
+{
+ guid_128_t guid, *guidp;
+ int ret;
+
+ if (fts_lucene_get_mailbox_guid(ctx->index, doc, guid) < 0)
+ return 0;
+
+ if (memcmp(guid, ctx->box_guid, sizeof(guid)) == 0) {
+ /* same as last one */
+ return ctx->box_ret;
+ }
+ memcpy(ctx->box_guid, guid, sizeof(ctx->box_guid));
+
+ guidp = p_new(ctx->pool, guid_128_t, 1);
+ memcpy(guidp, guid, sizeof(*guidp));
+ hash_table_insert(ctx->seen_mailbox_guids, guidp, guidp);
+
+ if (ctx->box != NULL)
+ rescan_finish(ctx);
+ ctx->box = mailbox_alloc_guid(ctx->index->list, guid,
+ (enum mailbox_flags)0);
+ if (mailbox_open(ctx->box) < 0) {
+ enum mail_error error;
+ const char *errstr;
+
+ errstr = mailbox_get_last_internal_error(ctx->box, &error);
+ if (error == MAIL_ERROR_NOTFOUND)
+ ret = 0;
+ else {
+ i_error("lucene: Couldn't open mailbox %s: %s",
+ mailbox_get_vname(ctx->box), errstr);
+ ret = -1;
+ }
+ mailbox_free(&ctx->box);
+ ctx->box_ret = ret;
+ return ret;
+ }
+ if (mailbox_sync(ctx->box, (enum mailbox_sync_flags)0) < 0) {
+ i_error("lucene: Failed to sync mailbox %s: %s",
+ mailbox_get_vname(ctx->box),
+ mailbox_get_last_internal_error(ctx->box, NULL));
+ mailbox_free(&ctx->box);
+ ctx->box_ret = -1;
+ return -1;
+ }
+
+ array_clear(&ctx->uids);
+ rescan_get_uids(ctx->box, &ctx->uids);
+
+ ctx->warned = FALSE;
+ ctx->last_existing_uid = 0;
+ ctx->uids_iter_n = 0;
+ seq_range_array_iter_init(&ctx->uids_iter, &ctx->uids);
+
+ ctx->box_ret = 1;
+ return 1;
+}
+
+static int
+rescan_next(struct rescan_context *ctx, Document *doc)
+{
+ uint32_t lucene_uid, idx_uid;
+
+ if (lucene_doc_get_uid(ctx->index, doc, &lucene_uid) < 0)
+ return 0;
+
+ if (seq_range_array_iter_nth(&ctx->uids_iter, ctx->uids_iter_n,
+ &idx_uid)) {
+ if (idx_uid == lucene_uid) {
+ ctx->uids_iter_n++;
+ ctx->last_existing_uid = idx_uid;
+ return 1;
+ }
+ if (idx_uid < lucene_uid) {
+ /* lucene is missing an UID from the middle. delete
+ the rest of the messages from this mailbox and
+ reindex. */
+ if (!ctx->warned) {
+ i_warning("lucene: Mailbox %s "
+ "missing UIDs in the middle",
+ mailbox_get_vname(ctx->box));
+ ctx->warned = TRUE;
+ }
+ } else {
+ /* UID has been expunged from index. delete from
+ lucene as well. */
+ }
+ return 0;
+ } else {
+ /* the rest of the messages have been expunged from index */
+ return 0;
+ }
+}
+
+static void
+rescan_clear_unseen_mailbox(struct lucene_index *index,
+ struct rescan_context *rescan_ctx,
+ const char *vname,
+ const struct fts_index_header *hdr)
+{
+ struct mailbox *box;
+ struct mailbox_metadata metadata;
+
+ box = mailbox_alloc(index->list, vname,
+ (enum mailbox_flags)0);
+ if (mailbox_open(box) == 0 &&
+ mailbox_get_metadata(box, MAILBOX_METADATA_GUID,
+ &metadata) == 0 &&
+ (rescan_ctx == NULL ||
+ hash_table_lookup(rescan_ctx->seen_mailbox_guids,
+ metadata.guid) == NULL)) {
+ /* this mailbox had no records in lucene index.
+ make sure its last indexed uid is 0 */
+ (void)fts_index_set_header(box, hdr);
+ }
+ mailbox_free(&box);
+}
+
+static void rescan_clear_unseen_mailboxes(struct lucene_index *index,
+ struct rescan_context *rescan_ctx)
+{
+ const enum mailbox_list_iter_flags iter_flags =
+ (enum mailbox_list_iter_flags)
+ (MAILBOX_LIST_ITER_NO_AUTO_BOXES |
+ MAILBOX_LIST_ITER_RETURN_NO_FLAGS);
+ struct mailbox_list_iterate_context *iter;
+ const struct mailbox_info *info;
+ struct fts_index_header hdr;
+ struct mail_namespace *ns = index->list->ns;
+ const char *vname;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.settings_checksum = fts_lucene_settings_checksum(&index->set);
+
+ iter = mailbox_list_iter_init(index->list, "*", iter_flags);
+ while ((info = mailbox_list_iter_next(iter)) != NULL)
+ rescan_clear_unseen_mailbox(index, rescan_ctx, info->vname, &hdr);
+ (void)mailbox_list_iter_deinit(&iter);
+
+ if (ns->prefix_len > 0 &&
+ ns->prefix[ns->prefix_len-1] == mail_namespace_get_sep(ns)) {
+ /* namespace prefix itself isn't returned by the listing */
+ vname = t_strndup(index->list->ns->prefix,
+ index->list->ns->prefix_len-1);
+ rescan_clear_unseen_mailbox(index, rescan_ctx, vname, &hdr);
+ }
+}
+
+int lucene_index_rescan(struct lucene_index *index)
+{
+ static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL };
+ struct rescan_context ctx;
+ bool failed = false;
+ int ret;
+
+ i_assert(index->list != NULL);
+
+ if ((ret = lucene_index_open_search(index)) < 0)
+ return ret;
+
+ Term term(_T("box"), _T("*"));
+ WildcardQuery query(&term);
+ Sort sort(sort_fields);
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.index = index;
+ ctx.pool = pool_alloconly_create("guids", 1024);
+ hash_table_create(&ctx.seen_mailbox_guids, ctx.pool, 0,
+ guid_128_hash, guid_128_cmp);
+ i_array_init(&ctx.uids, 128);
+
+ if (ret > 0) try {
+ Hits *hits = index->searcher->search(&query, &sort);
+
+ for (size_t i = 0; i < hits->length(); i++) {
+ ret = rescan_open_mailbox(&ctx, &hits->doc(i));
+ if (ret > 0)
+ ret = rescan_next(&ctx, &hits->doc(i));
+ if (ret < 0)
+ failed = true;
+ else if (ret == 0)
+ index->reader->deleteDocument(hits->id(i));
+ }
+ _CLDELETE(hits);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "rescan search");
+ failed = true;
+ }
+ lucene_index_close(index);
+ if (ctx.box != NULL)
+ rescan_finish(&ctx);
+ array_free(&ctx.uids);
+
+ rescan_clear_unseen_mailboxes(index, &ctx);
+ hash_table_destroy(&ctx.seen_mailbox_guids);
+ pool_unref(&ctx.pool);
+ return failed ? -1 : 0;
+}
+
+static void guid128_to_wguid(const guid_128_t guid,
+ wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH + 1])
+{
+ buffer_t buf = { { 0, 0 } };
+ unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH];
+ unsigned int i;
+
+ buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH);
+ binary_to_hex_append(&buf, guid, GUID_128_SIZE);
+ for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++)
+ wguid_hex[i] = guid_hex[i];
+ wguid_hex[i] = '\0';
+}
+
+static bool
+lucene_index_add_uid_filter(BooleanQuery *query,
+ const struct fts_expunge_log_read_record *rec)
+{
+ struct seq_range_iter iter;
+ wchar_t wuid[MAX_INT_STRLEN];
+ unsigned int n;
+ uint32_t uid;
+
+ /* RangeQuery and WildcardQuery work by enumerating through all terms
+ that match them, and then adding TermQueries for them. So we can
+ simply do the same directly, and if it looks like there are too
+ many terms just go through everything. */
+
+ if (seq_range_count(&rec->uids) > FTS_LUCENE_MAX_SEARCH_TERMS)
+ return false;
+
+ seq_range_array_iter_init(&iter, &rec->uids); n = 0;
+ while (seq_range_array_iter_nth(&iter, n++, &uid)) {
+ swprintf(wuid, N_ELEMENTS(wuid), L"%u", uid);
+
+ Term *term = _CLNEW Term(_T("uid"), wuid);
+ query->add(_CLNEW TermQuery(term), true, BooleanClause::SHOULD);
+ _CLDECDELETE(term);
+ }
+ return true;
+}
+
+static int
+lucene_index_expunge_record(struct lucene_index *index,
+ const struct fts_expunge_log_read_record *rec)
+{
+ int ret;
+
+ if ((ret = lucene_index_open_search(index)) <= 0)
+ return ret;
+
+ BooleanQuery query;
+ BooleanQuery uids_query;
+
+ if (lucene_index_add_uid_filter(&uids_query, rec))
+ query.add(&uids_query, BooleanClause::MUST);
+
+ wchar_t wguid[MAILBOX_GUID_HEX_LENGTH + 1];
+ guid128_to_wguid(rec->mailbox_guid, wguid);
+ Term term(_T("box"), wguid);
+ TermQuery mailbox_query(&term);
+ query.add(&mailbox_query, BooleanClause::MUST);
+
+ try {
+ Hits *hits = index->searcher->search(&query);
+
+ for (size_t i = 0; i < hits->length(); i++) {
+ uint32_t uid;
+
+ if (lucene_doc_get_uid(index, &hits->doc(i),
+ &uid) < 0 ||
+ seq_range_exists(&rec->uids, uid))
+ index->reader->deleteDocument(hits->id(i));
+ }
+ _CLDELETE(hits);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "expunge search");
+ ret = -1;
+ }
+ return ret < 0 ? -1 : 0;
+}
+
+int lucene_index_expunge_from_log(struct lucene_index *index,
+ struct fts_expunge_log *log)
+{
+ struct fts_expunge_log_read_ctx *ctx;
+ const struct fts_expunge_log_read_record *rec;
+ int ret = 0, ret2;
+
+ ctx = fts_expunge_log_read_begin(log);
+ while ((rec = fts_expunge_log_read_next(ctx)) != NULL) {
+ if (lucene_index_expunge_record(index, rec) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+
+ lucene_index_close(index);
+
+ ret2 = fts_expunge_log_read_end(&ctx);
+ if (ret < 0 || ret2 < 0)
+ return -1;
+ return ret2;
+}
+
+int lucene_index_optimize(struct lucene_index *index)
+{
+ int ret = 0;
+
+ if (!IndexReader::indexExists(index->path))
+ return 0;
+ if (IndexReader::isLocked(index->path))
+ IndexReader::unlock(index->path);
+
+ IndexWriter *writer = NULL;
+ try {
+ writer = _CLNEW IndexWriter(index->path, index->default_analyzer, false);
+ writer->optimize();
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexWriter::optimize()");
+ ret = -1;
+ }
+ try {
+ writer->close();
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "IndexWriter::close()");
+ ret = -1;
+ }
+ if (writer != NULL)
+ _CLDELETE(writer);
+ return ret;
+}
+
+// Mostly copy&pasted from CLucene's QueryParser
+static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) {
+ // Use the analyzer to get all the tokens, and then build a TermQuery,
+ // PhraseQuery, or nothing based on the term count
+
+ StringReader reader(queryText);
+ TokenStream* source = analyzer->tokenStream(_field, &reader);
+
+ CLVector<CL_NS(analysis)::Token*, Deletor::Object<CL_NS(analysis)::Token> > v;
+ CL_NS(analysis)::Token* t = NULL;
+ int32_t positionCount = 0;
+ bool severalTokensAtSamePosition = false;
+
+ while (true) {
+ t = _CLNEW Token();
+ try {
+ Token* _t = source->next(t);
+ if (_t == NULL) _CLDELETE(t);
+ }_CLCATCH_ERR(CL_ERR_IO, _CLLDELETE(source);_CLLDELETE(t);,{
+ t = NULL;
+ });
+ if (t == NULL)
+ break;
+ v.push_back(t);
+ if (t->getPositionIncrement() != 0)
+ positionCount += t->getPositionIncrement();
+ else
+ severalTokensAtSamePosition = true;
+ }
+ try {
+ source->close();
+ }
+ _CLCATCH_ERR_CLEANUP(CL_ERR_IO, {_CLLDELETE(source);_CLLDELETE(t);} ); /* cleanup */
+ _CLLDELETE(source);
+
+ if (v.size() == 0)
+ return NULL;
+ else if (v.size() == 1) {
+ Term* tm = _CLNEW Term(_field, v.at(0)->termBuffer());
+ Query* ret;
+ if (fuzzy)
+ ret = _CLNEW FuzzyQuery( tm );
+ else
+ ret = _CLNEW TermQuery( tm );
+ _CLDECDELETE(tm);
+ return ret;
+ } else {
+ if (severalTokensAtSamePosition) {
+ if (positionCount == 1) {
+ // no phrase query:
+ BooleanQuery* q = _CLNEW BooleanQuery(true);
+ for(size_t i=0; i<v.size(); i++ ){
+ Term* tm = _CLNEW Term(_field, v.at(i)->termBuffer());
+ q->add(_CLNEW TermQuery(tm), true, BooleanClause::SHOULD);
+ _CLDECDELETE(tm);
+ }
+ return q;
+ }else {
+ MultiPhraseQuery* mpq = _CLNEW MultiPhraseQuery();
+ CLArrayList<Term*> multiTerms;
+ int32_t position = -1;
+ for (size_t i = 0; i < v.size(); i++) {
+ t = v.at(i);
+ if (t->getPositionIncrement() > 0 && multiTerms.size() > 0) {
+ ValueArray<Term*> termsArray(multiTerms.size());
+ multiTerms.toArray(termsArray.values);
+ mpq->add(&termsArray,position);
+ multiTerms.clear();
+ }
+ position += t->getPositionIncrement();
+ multiTerms.push_back(_CLNEW Term(_field, t->termBuffer()));
+ }
+ ValueArray<Term*> termsArray(multiTerms.size());
+ multiTerms.toArray(termsArray.values);
+ mpq->add(&termsArray,position);
+ return mpq;
+ }
+ }else {
+ PhraseQuery* pq = _CLNEW PhraseQuery();
+ int32_t position = -1;
+
+ for (size_t i = 0; i < v.size(); i++) {
+ t = v.at(i);
+ Term* tm = _CLNEW Term(_field, t->termBuffer());
+ position += t->getPositionIncrement();
+ pq->add(tm,position);
+ _CLDECDELETE(tm);
+ }
+ return pq;
+ }
+ }
+}
+
+static Query *
+lucene_get_query_str(struct lucene_index *index,
+ const TCHAR *key, const char *str, bool fuzzy)
+{
+ const TCHAR *wvalue;
+ Analyzer *analyzer;
+
+ if (index->set.use_libfts) {
+ const wchar_t *wstr = t_lucene_utf8_to_tchar(index, str);
+ Term* tm = _CLNEW Term(key, wstr);
+ Query* ret;
+ if (fuzzy)
+ ret = _CLNEW FuzzyQuery( tm );
+ else
+ ret = _CLNEW TermQuery( tm );
+ _CLDECDELETE(tm);
+ return ret;
+ }
+
+ if (index->normalizer_buf != NULL) {
+ buffer_set_used_size(index->normalizer_buf, 0);
+ index->normalizer(str, strlen(str), index->normalizer_buf);
+ buffer_append_c(index->normalizer_buf, '\0');
+ str = (const char *)index->normalizer_buf->data;
+ }
+
+ wvalue = t_lucene_utf8_to_tchar(index, str);
+ analyzer = guess_analyzer(index, str, strlen(str));
+ if (analyzer == NULL) {
+ analyzer = index->default_analyzer;
+ i_assert(analyzer != NULL);
+ }
+
+ return getFieldQuery(analyzer, key, wvalue, fuzzy);
+}
+
+static Query *
+lucene_get_query(struct lucene_index *index,
+ const TCHAR *key, const struct mail_search_arg *arg)
+{
+ return lucene_get_query_str(index, key, arg->value.str, arg->fuzzy);
+}
+
+static bool
+lucene_add_definite_query(struct lucene_index *index,
+ ARRAY_TYPE(lucene_query) &queries,
+ struct mail_search_arg *arg,
+ enum fts_lookup_flags flags)
+{
+ bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
+ Query *q;
+
+ if (arg->no_fts)
+ return false;
+
+ if (arg->match_not && !and_args) {
+ /* FIXME: we could handle this by doing multiple queries.. */
+ return false;
+ }
+
+ switch (arg->type) {
+ case SEARCH_TEXT: {
+ Query *q1 = lucene_get_query(index, _T("hdr"), arg);
+ Query *q2 = lucene_get_query(index, _T("body"), arg);
+
+ if (q1 == NULL && q2 == NULL)
+ q = NULL;
+ else {
+ BooleanQuery *bq = _CLNEW BooleanQuery();
+ if (q1 != NULL)
+ bq->add(q1, true, BooleanClause::SHOULD);
+ if (q2 != NULL)
+ bq->add(q2, true, BooleanClause::SHOULD);
+ q = bq;
+ }
+ break;
+ }
+ case SEARCH_BODY:
+ q = lucene_get_query(index, _T("body"), arg);
+ break;
+ case SEARCH_HEADER:
+ case SEARCH_HEADER_ADDRESS:
+ case SEARCH_HEADER_COMPRESS_LWSP:
+ if (!fts_header_want_indexed(arg->hdr_field_name) ||
+ *arg->value.str == '\0')
+ return false;
+
+ q = lucene_get_query(index,
+ t_lucene_utf8_to_tchar(index, t_str_lcase(arg->hdr_field_name)),
+ arg);
+ break;
+ default:
+ return false;
+ }
+
+ if (q == NULL) {
+ /* couldn't handle this search after all (e.g. trying to search
+ a stop word) */
+ return false;
+ }
+
+ struct lucene_query *lq = array_append_space(&queries);
+ lq->query = q;
+ if (!and_args)
+ lq->occur = BooleanClause::SHOULD;
+ else if (!arg->match_not)
+ lq->occur = BooleanClause::MUST;
+ else
+ lq->occur = BooleanClause::MUST_NOT;
+ return true;
+}
+
+static bool
+lucene_add_maybe_query(struct lucene_index *index,
+ ARRAY_TYPE(lucene_query) &queries,
+ struct mail_search_arg *arg,
+ enum fts_lookup_flags flags)
+{
+ bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
+ Query *q = NULL;
+
+ if (arg->no_fts)
+ return false;
+
+ if (arg->match_not) {
+ /* FIXME: we could handle this by doing multiple queries.. */
+ return false;
+ }
+
+ switch (arg->type) {
+ case SEARCH_HEADER:
+ case SEARCH_HEADER_ADDRESS:
+ case SEARCH_HEADER_COMPRESS_LWSP:
+ if (*arg->value.str == '\0' && !index->set.use_libfts) {
+ /* checking potential existence of the header name */
+ q = lucene_get_query_str(index, _T("hdr"),
+ t_str_lcase(arg->hdr_field_name), FALSE);
+ break;
+ }
+
+ if (fts_header_want_indexed(arg->hdr_field_name))
+ return false;
+
+ /* we can check if the search key exists in some header and
+ filter out the messages that have no chance of matching */
+ q = lucene_get_query(index, _T("hdr"), arg);
+ break;
+ default:
+ return false;
+ }
+
+ if (q == NULL) {
+ /* couldn't handle this search after all (e.g. trying to search
+ a stop word) */
+ return false;
+ }
+ struct lucene_query *lq = array_append_space(&queries);
+ lq->query = q;
+ if (!and_args)
+ lq->occur = BooleanClause::SHOULD;
+ else if (!arg->match_not)
+ lq->occur = BooleanClause::MUST;
+ else
+ lq->occur = BooleanClause::MUST_NOT;
+ return true;
+}
+
+static bool queries_have_non_must_nots(ARRAY_TYPE(lucene_query) &queries)
+{
+ const struct lucene_query *lq;
+
+ array_foreach(&queries, lq) {
+ if (lq->occur != BooleanClause::MUST_NOT)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static void search_query_add(BooleanQuery &query,
+ ARRAY_TYPE(lucene_query) &queries)
+{
+ BooleanQuery *search_query = _CLNEW BooleanQuery();
+ const struct lucene_query *lq;
+
+ if (queries_have_non_must_nots(queries)) {
+ array_foreach(&queries, lq)
+ search_query->add(lq->query, true, lq->occur);
+ query.add(search_query, true, BooleanClause::MUST);
+ } else {
+ array_foreach(&queries, lq)
+ search_query->add(lq->query, true, BooleanClause::SHOULD);
+ query.add(search_query, true, BooleanClause::MUST_NOT);
+ }
+}
+
+static int
+lucene_index_search(struct lucene_index *index,
+ ARRAY_TYPE(lucene_query) &queries,
+ struct fts_result *result, ARRAY_TYPE(seq_range) *uids_r)
+{
+ struct fts_score_map *score;
+ int ret = 0;
+
+ BooleanQuery query;
+ search_query_add(query, queries);
+
+ Term mailbox_term(_T("box"), index->mailbox_guid);
+ TermQuery mailbox_query(&mailbox_term);
+ query.add(&mailbox_query, BooleanClause::MUST);
+
+ try {
+ Hits *hits = index->searcher->search(&query);
+
+ uint32_t last_uid = 0;
+ if (result != NULL)
+ result->scores_sorted = true;
+
+ for (size_t i = 0; i < hits->length(); i++) {
+ uint32_t uid;
+
+ if (lucene_doc_get_uid(index, &hits->doc(i),
+ &uid) < 0) {
+ ret = -1;
+ break;
+ }
+
+ if (seq_range_array_add(uids_r, uid)) {
+ /* duplicate result */
+ } else if (result != NULL) {
+ if (uid < last_uid)
+ result->scores_sorted = false;
+ last_uid = uid;
+
+ score = array_append_space(&result->scores);
+ score->uid = uid;
+ score->score = hits->score(i);
+ }
+ }
+ _CLDELETE(hits);
+ return ret;
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "search");
+ return -1;
+ }
+}
+
+int lucene_index_lookup(struct lucene_index *index,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_result *result)
+{
+ struct mail_search_arg *arg;
+
+ if (lucene_index_open_search(index) <= 0)
+ return -1;
+
+ ARRAY_TYPE(lucene_query) def_queries;
+ t_array_init(&def_queries, 16);
+ bool have_definites = false;
+
+ for (arg = args; arg != NULL; arg = arg->next) {
+ if (lucene_add_definite_query(index, def_queries, arg, flags)) {
+ arg->match_always = true;
+ have_definites = true;
+ }
+ }
+
+ if (have_definites) {
+ ARRAY_TYPE(seq_range) *uids_arr =
+ (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
+ &result->definite_uids : &result->maybe_uids;
+ if (lucene_index_search(index, def_queries, result,
+ uids_arr) < 0)
+ return -1;
+ }
+
+ if (have_definites) {
+ /* FIXME: mixing up definite + maybe queries is broken. if the
+ definite query matched, it'll just assume that the maybe
+ queries matched as well */
+ return 0;
+ }
+
+ ARRAY_TYPE(lucene_query) maybe_queries;
+ t_array_init(&maybe_queries, 16);
+ bool have_maybies = false;
+
+ for (arg = args; arg != NULL; arg = arg->next) {
+ if (lucene_add_maybe_query(index, maybe_queries, arg, flags)) {
+ arg->match_always = true;
+ have_maybies = true;
+ }
+ }
+
+ if (have_maybies) {
+ if (lucene_index_search(index, maybe_queries, NULL,
+ &result->maybe_uids) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+lucene_index_search_multi(struct lucene_index *index,
+ HASH_TABLE_TYPE(wguid_result) guids,
+ ARRAY_TYPE(lucene_query) &queries,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result)
+{
+ struct fts_score_map *score;
+ int ret = 0;
+
+ BooleanQuery query;
+ search_query_add(query, queries);
+
+ BooleanQuery mailbox_query;
+ struct hash_iterate_context *iter;
+ void *key, *value;
+ iter = hash_table_iterate_init(guids);
+ while (hash_table_iterate(iter, guids, &key, &value)) {
+ Term *term = _CLNEW Term(_T("box"), (wchar_t *)key);
+ TermQuery *q = _CLNEW TermQuery(term);
+ mailbox_query.add(q, true, BooleanClause::SHOULD);
+ }
+ hash_table_iterate_deinit(&iter);
+
+ query.add(&mailbox_query, BooleanClause::MUST);
+ try {
+ Hits *hits = index->searcher->search(&query);
+
+ for (size_t i = 0; i < hits->length(); i++) {
+ uint32_t uid;
+
+ Field *field = hits->doc(i).getField(_T("box"));
+ const TCHAR *box_guid = field == NULL ? NULL : field->stringValue();
+ if (box_guid == NULL) {
+ i_error("lucene: Corrupted FTS index %s: No mailbox for document",
+ index->path);
+ ret = -1;
+ break;
+ }
+ struct fts_result *br =
+ hash_table_lookup(guids, box_guid);
+ if (br == NULL) {
+ i_warning("lucene: Returned unexpected mailbox with GUID %ls", box_guid);
+ continue;
+ }
+
+ if (lucene_doc_get_uid(index, &hits->doc(i),
+ &uid) < 0) {
+ ret = -1;
+ break;
+ }
+
+ ARRAY_TYPE(seq_range) *uids_arr =
+ (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
+ &br->maybe_uids : &br->definite_uids;
+ if (!array_is_created(uids_arr)) {
+ p_array_init(uids_arr, result->pool, 32);
+ p_array_init(&br->scores, result->pool, 32);
+ }
+ if (seq_range_array_add(uids_arr, uid)) {
+ /* duplicate result */
+ } else {
+ score = array_append_space(&br->scores);
+ score->uid = uid;
+ score->score = hits->score(i);
+ }
+ }
+ _CLDELETE(hits);
+ return ret;
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "multi search");
+ return -1;
+ }
+}
+
+int lucene_index_lookup_multi(struct lucene_index *index,
+ HASH_TABLE_TYPE(wguid_result) guids,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result)
+{
+ struct mail_search_arg *arg;
+
+ if (lucene_index_open_search(index) <= 0)
+ return -1;
+
+ ARRAY_TYPE(lucene_query) def_queries;
+ t_array_init(&def_queries, 16);
+ bool have_definites = false;
+
+ for (arg = args; arg != NULL; arg = arg->next) {
+ if (lucene_add_definite_query(index, def_queries, arg, flags)) {
+ arg->match_always = true;
+ have_definites = true;
+ }
+ }
+
+ if (have_definites) {
+ if (lucene_index_search_multi(index, guids, def_queries, flags,
+ result) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+struct lucene_index_iter {
+ struct lucene_index *index;
+ struct lucene_index_record rec;
+
+ Term *term;
+ WildcardQuery *query;
+ Sort *sort;
+
+ Hits *hits;
+ size_t i;
+ bool failed;
+};
+
+struct lucene_index_iter *
+lucene_index_iter_init(struct lucene_index *index)
+{
+ static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL };
+ struct lucene_index_iter *iter;
+ int ret;
+
+ iter = i_new(struct lucene_index_iter, 1);
+ iter->index = index;
+ if ((ret = lucene_index_open_search(index)) <= 0) {
+ if (ret < 0)
+ iter->failed = true;
+ return iter;
+ }
+
+ iter->term = _CLNEW Term(_T("box"), _T("*"));
+ iter->query = _CLNEW WildcardQuery(iter->term);
+ iter->sort = _CLNEW Sort(sort_fields);
+
+ try {
+ iter->hits = index->searcher->search(iter->query, iter->sort);
+ } catch (CLuceneError &err) {
+ lucene_handle_error(index, err, "rescan search");
+ iter->failed = true;
+ }
+ return iter;
+}
+
+const struct lucene_index_record *
+lucene_index_iter_next(struct lucene_index_iter *iter)
+{
+ if (iter->hits == NULL)
+ return NULL;
+ if (iter->i == iter->hits->length())
+ return NULL;
+
+ Document *doc = &iter->hits->doc(iter->i);
+ iter->i++;
+
+ memset(&iter->rec, 0, sizeof(iter->rec));
+ (void)fts_lucene_get_mailbox_guid(iter->index, doc,
+ iter->rec.mailbox_guid);
+ (void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid);
+ iter->rec.part_num = lucene_doc_get_part(iter->index, doc);
+ return &iter->rec;
+}
+
+int lucene_index_iter_deinit(struct lucene_index_iter **_iter)
+{
+ struct lucene_index_iter *iter = *_iter;
+ int ret = iter->failed ? -1 : 0;
+
+ *_iter = NULL;
+ if (iter->hits != NULL)
+ _CLDELETE(iter->hits);
+ if (iter->query != NULL) {
+ _CLDELETE(iter->query);
+ _CLDELETE(iter->sort);
+ _CLDELETE(iter->term);
+ }
+ i_free(iter);
+ return ret;
+}
+
+void lucene_shutdown(void)
+{
+ _lucene_shutdown();
+}
diff --git a/src/plugins/fts-lucene/lucene-wrapper.h b/src/plugins/fts-lucene/lucene-wrapper.h
new file mode 100644
index 0000000..270e902
--- /dev/null
+++ b/src/plugins/fts-lucene/lucene-wrapper.h
@@ -0,0 +1,67 @@
+#ifndef LUCENE_WRAPPER_H
+#define LUCENE_WRAPPER_H
+
+#include "fts-api-private.h"
+#include "guid.h"
+
+struct mailbox_list;
+struct fts_expunge_log;
+struct fts_lucene_settings;
+
+#define MAILBOX_GUID_HEX_LENGTH (GUID_128_SIZE*2)
+
+struct lucene_index_record {
+ guid_128_t mailbox_guid;
+ uint32_t uid, part_num;
+};
+
+HASH_TABLE_DEFINE_TYPE(wguid_result, wchar_t *, struct fts_result *);
+
+struct lucene_index *
+lucene_index_init(const char *path, struct mailbox_list *list,
+ const struct fts_lucene_settings *set)
+ ATTR_NULL(2, 3);
+void lucene_index_deinit(struct lucene_index *index);
+
+void lucene_index_select_mailbox(struct lucene_index *index,
+ const wchar_t guid[MAILBOX_GUID_HEX_LENGTH]);
+void lucene_index_unselect_mailbox(struct lucene_index *index);
+int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r);
+int lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r);
+
+int lucene_index_build_init(struct lucene_index *index);
+int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
+ uint32_t part_num, const unsigned char *data,
+ size_t size, const char *hdr_name);
+int lucene_index_build_deinit(struct lucene_index *index);
+
+void lucene_index_close(struct lucene_index *index);
+int lucene_index_rescan(struct lucene_index *index);
+int lucene_index_expunge_from_log(struct lucene_index *index,
+ struct fts_expunge_log *log);
+int lucene_index_optimize(struct lucene_index *index);
+
+int lucene_index_lookup(struct lucene_index *index,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_result *result);
+
+int lucene_index_lookup_multi(struct lucene_index *index,
+ HASH_TABLE_TYPE(wguid_result) guids,
+ struct mail_search_arg *args,
+ enum fts_lookup_flags flags,
+ struct fts_multi_result *result);
+
+struct lucene_index_iter *
+lucene_index_iter_init(struct lucene_index *index);
+const struct lucene_index_record *
+lucene_index_iter_next(struct lucene_index_iter *iter);
+int lucene_index_iter_deinit(struct lucene_index_iter **iter);
+
+/* internal: */
+void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
+ wchar_t *dest, size_t destsize);
+
+void lucene_shutdown(void);
+
+#endif
diff --git a/src/plugins/fts-lucene/textcat.conf b/src/plugins/fts-lucene/textcat.conf
new file mode 100644
index 0000000..d75c4fe
--- /dev/null
+++ b/src/plugins/fts-lucene/textcat.conf
@@ -0,0 +1,25 @@
+#
+# A sample config file for the language models
+# provided with Gertjan van Noords language guesser
+# (http://odur.let.rug.nl/~vannoord/TextCat/)
+#
+# Notes:
+# - You may consider eliminating a couple of small languages from this
+# list because they cause false positives with big languages and are
+# bad for performance. (Do you really want to recognize Drents?)
+# - Putting the most probable languages at the top of the list
+# improves performance, because this will raise the threshold for
+# likely candidates more quickly.
+#
+LM/english.lm english
+LM/italian.lm italian
+LM/danish.lm danish
+LM/dutch.lm dutch
+LM/finnish.lm finnish
+LM/french.lm french
+LM/german.lm german
+LM/norwegian.lm norwegian
+LM/portuguese.lm portuguese
+LM/russian.lm russian
+LM/spanish.lm spanish
+LM/swedish.lm swedish