summaryrefslogtreecommitdiffstats
path: root/contrib/snowball/libstemmer/modules_utf8.txt
blob: 60a0e1d0c17d6c9a6a6f2dd8ad9d62b536e725b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# This file contains a list of stemmers to include in the distribution.
# The format is a set of space separated lines - on each line:
#  First item is name of stemmer.
#  Second item is comma separated list of character sets.
#  Third item is comma separated list of names to refer to the stemmer by.
#
# Lines starting with a #, or blank lines, are ignored.

# List all the main algorithms for each language, in UTF-8.

danish          UTF_8                   danish,da,dan
dutch           UTF_8                   dutch,nl,dut,nld
english         UTF_8                   english,en,eng
finnish         UTF_8                   finnish,fi,fin
french          UTF_8                   french,fr,fre,fra
german          UTF_8                   german,de,ger,deu
hungarian       UTF_8                   hungarian,hu,hun
italian         UTF_8                   italian,it,ita
norwegian       UTF_8                   norwegian,no,nor
portuguese      UTF_8                   portuguese,pt,por
romanian        UTF_8                   romanian,ro,rum,ron
russian         UTF_8                   russian,ru,rus
spanish         UTF_8                   spanish,es,esl,spa
swedish         UTF_8                   swedish,sv,swe
turkish         UTF_8                   turkish,tr,tur

# Also include the traditional porter algorithm for english.
# The porter algorithm is included in the libstemmer distribution to assist
# with backwards compatibility, but for new systems the english algorithm
# should be used in preference.
porter          UTF_8                   porter

# Some other stemmers in the snowball project are not included in the standard
# distribution. To compile a libstemmer with them in, add them to this list,
# and regenerate the distribution. (You will need a full source checkout for
# this.) They are included in the snowball website as curiosities, but are not
# intended for general use, and use of them is is not fully supported.  These
# algorithms are:
#
# german2          - This is a slight modification of the german stemmer.
#german2          UTF_8                   german2
#
# kraaij_pohlmann  - This is a different dutch stemmer.
#kraaij_pohlmann  UTF_8                   kraaij_pohlmann
#
# lovins           - This is an english stemmer, but fairly outdated, and
#                    only really applicable to a restricted type of input text
#                    (keywords in academic publications).
#lovins           UTF_8                   lovins