summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts-lucene/SnowballAnalyzer.h
blob: 45455c50d1e631b2a21293282d23f57f5e1df0f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_analysis_snowball_analyser_
#define _lucene_analysis_snowball_analyser_

extern "C" {
#include "lib.h"
#include "unichar.h"
};
#include "CLucene/analysis/AnalysisHeader.h"

CL_CLASS_DEF(util,BufferedReader)
CL_NS_DEF2(analysis,snowball)

/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
 * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
 *
 * Available stemmers are listed in {@link net.sf.snowball.ext}.  The name of a
 * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
 * {@link EnglishStemmer} is named "English".
 */
class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer {
  char* language;
  normalizer_func_t *normalizer;
  CLTCSetList* stopSet;
  TokenStream *prevstream;

public:
  /** Builds the named analyzer with no stop words. */
  SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english");

  /** Builds the named analyzer with the given stop words.
  */
  SnowballAnalyzer(const char* language, const TCHAR** stopWords);

  ~SnowballAnalyzer();

  /** Constructs a {@link StandardTokenizer} filtered by a {@link
      StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
  TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
  TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader);
  TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
};

CL_NS_END2
#endif