diff options
Diffstat (limited to 'src/plugins/fts-lucene/SnowballAnalyzer.h')
-rw-r--r-- | src/plugins/fts-lucene/SnowballAnalyzer.h | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/SnowballAnalyzer.h b/src/plugins/fts-lucene/SnowballAnalyzer.h new file mode 100644 index 0000000..45455c5 --- /dev/null +++ b/src/plugins/fts-lucene/SnowballAnalyzer.h @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_analysis_snowball_analyser_ +#define _lucene_analysis_snowball_analyser_ + +extern "C" { +#include "lib.h" +#include "unichar.h" +}; +#include "CLucene/analysis/AnalysisHeader.h" + +CL_CLASS_DEF(util,BufferedReader) +CL_NS_DEF2(analysis,snowball) + +/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link + * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}. + * + * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a + * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in + * {@link EnglishStemmer} is named "English". + */ +class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer { + char* language; + normalizer_func_t *normalizer; + CLTCSetList* stopSet; + TokenStream *prevstream; + +public: + /** Builds the named analyzer with no stop words. */ + SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english"); + + /** Builds the named analyzer with the given stop words. + */ + SnowballAnalyzer(const char* language, const TCHAR** stopWords); + + ~SnowballAnalyzer(); + + /** Constructs a {@link StandardTokenizer} filtered by a {@link + StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader); + TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); +}; + +CL_NS_END2 +#endif + |