diff options
Diffstat (limited to 'vendor/elasticlunr-rs/src/lang/du.rs')
-rw-r--r-- | vendor/elasticlunr-rs/src/lang/du.rs | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/vendor/elasticlunr-rs/src/lang/du.rs b/vendor/elasticlunr-rs/src/lang/du.rs new file mode 100644 index 000000000..73a6d3cf7 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/du.rs @@ -0,0 +1,50 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Dutch {} + +impl Dutch { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Dutch { + fn name(&self) -> String { + "Dutch".into() + } + fn code(&self) -> String { + "du".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-du", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-du", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-du", Algorithm::Dutch)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", "aan", "al", "alles", "als", "altijd", "andere", "ben", "bij", "daar", "dan", "dat", "de", + "der", "deze", "die", "dit", "doch", "doen", "door", "dus", "een", "eens", "en", "er", "ge", + "geen", "geweest", "haar", "had", "heb", "hebben", "heeft", "hem", "het", "hier", "hij", "hoe", + "hun", "iemand", "iets", "ik", "in", "is", "ja", "je", "kan", "kon", "kunnen", "maar", "me", + "meer", "men", "met", "mij", "mijn", "moet", "na", "naar", "niet", "niets", "nog", "nu", "of", + "om", "omdat", "onder", "ons", "ook", "op", "over", "reeds", "te", "tegen", "toch", "toen", + "tot", "u", "uit", "uw", "van", "veel", "voor", "want", "waren", "was", "wat", "werd", "wezen", + "wie", "wil", "worden", "wordt", "zal", "ze", "zelf", "zich", "zij", "zijn", "zo", "zonder", + "zou", +]; |