summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts-lucene/textcat.conf
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/fts-lucene/textcat.conf')
-rw-r--r--src/plugins/fts-lucene/textcat.conf25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/plugins/fts-lucene/textcat.conf b/src/plugins/fts-lucene/textcat.conf
new file mode 100644
index 0000000..d75c4fe
--- /dev/null
+++ b/src/plugins/fts-lucene/textcat.conf
@@ -0,0 +1,25 @@
+#
+# A sample config file for the language models
+# provided with Gertjan van Noords language guesser
+# (http://odur.let.rug.nl/~vannoord/TextCat/)
+#
+# Notes:
+# - You may consider eliminating a couple of small languages from this
+# list because they cause false positives with big languages and are
+# bad for performance. (Do you really want to recognize Drents?)
+# - Putting the most probable languages at the top of the list
+# improves performance, because this will raise the threshold for
+# likely candidates more quickly.
+#
+LM/english.lm english
+LM/italian.lm italian
+LM/danish.lm danish
+LM/dutch.lm dutch
+LM/finnish.lm finnish
+LM/french.lm french
+LM/german.lm german
+LM/norwegian.lm norwegian
+LM/portuguese.lm portuguese
+LM/russian.lm russian
+LM/spanish.lm spanish
+LM/swedish.lm swedish