diff options
Diffstat (limited to 'src/plugins/fts-lucene/textcat.conf')
-rw-r--r-- | src/plugins/fts-lucene/textcat.conf | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/textcat.conf b/src/plugins/fts-lucene/textcat.conf new file mode 100644 index 0000000..d75c4fe --- /dev/null +++ b/src/plugins/fts-lucene/textcat.conf @@ -0,0 +1,25 @@ +# +# A sample config file for the language models +# provided with Gertjan van Noords language guesser +# (http://odur.let.rug.nl/~vannoord/TextCat/) +# +# Notes: +# - You may consider eliminating a couple of small languages from this +# list because they cause false positives with big languages and are +# bad for performance. (Do you really want to recognize Drents?) +# - Putting the most probable languages at the top of the list +# improves performance, because this will raise the threshold for +# likely candidates more quickly. +# +LM/english.lm english +LM/italian.lm italian +LM/danish.lm danish +LM/dutch.lm dutch +LM/finnish.lm finnish +LM/french.lm french +LM/german.lm german +LM/norwegian.lm norwegian +LM/portuguese.lm portuguese +LM/russian.lm russian +LM/spanish.lm spanish +LM/swedish.lm swedish |