summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts-lucene/SnowballAnalyzer.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/fts-lucene/SnowballAnalyzer.h')
-rw-r--r--src/plugins/fts-lucene/SnowballAnalyzer.h51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/SnowballAnalyzer.h b/src/plugins/fts-lucene/SnowballAnalyzer.h
new file mode 100644
index 0000000..45455c5
--- /dev/null
+++ b/src/plugins/fts-lucene/SnowballAnalyzer.h
@@ -0,0 +1,51 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_analysis_snowball_analyser_
+#define _lucene_analysis_snowball_analyser_
+
+extern "C" {
+#include "lib.h"
+#include "unichar.h"
+};
+#include "CLucene/analysis/AnalysisHeader.h"
+
+CL_CLASS_DEF(util,BufferedReader)
+CL_NS_DEF2(analysis,snowball)
+
+/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
+ *
+ * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
+ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+ * {@link EnglishStemmer} is named "English".
+ */
+class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer {
+ char* language;
+ normalizer_func_t *normalizer;
+ CLTCSetList* stopSet;
+ TokenStream *prevstream;
+
+public:
+ /** Builds the named analyzer with no stop words. */
+ SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english");
+
+ /** Builds the named analyzer with the given stop words.
+ */
+ SnowballAnalyzer(const char* language, const TCHAR** stopWords);
+
+ ~SnowballAnalyzer();
+
+ /** Constructs a {@link StandardTokenizer} filtered by a {@link
+ StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+ TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+ TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader);
+ TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+};
+
+CL_NS_END2
+#endif
+