summaryrefslogtreecommitdiffstats
path: root/sphinx/search/nl.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/search/nl.py')
-rw-r--r--sphinx/search/nl.py127
1 files changed, 127 insertions, 0 deletions
diff --git a/sphinx/search/nl.py b/sphinx/search/nl.py
new file mode 100644
index 0000000..a610b12
--- /dev/null
+++ b/sphinx/search/nl.py
@@ -0,0 +1,127 @@
+"""Dutch search language: includes the JS porter stemmer."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Dict
+
+import snowballstemmer
+
+from sphinx.search import SearchLanguage, parse_stop_word
+
+dutch_stopwords = parse_stop_word('''
+| source: http://snowball.tartarus.org/algorithms/dutch/stop.txt
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
+''')
+
+
+class SearchDutch(SearchLanguage):
+ lang = 'nl'
+ language_name = 'Dutch'
+ js_stemmer_rawcode = 'dutch-stemmer.js'
+ stopwords = dutch_stopwords
+
+ def init(self, options: dict) -> None:
+ self.stemmer = snowballstemmer.stemmer('dutch')
+
+ def stem(self, word: str) -> str:
+ return self.stemmer.stemWord(word.lower())