summaryrefslogtreecommitdiffstats
path: root/vendor/elasticlunr-rs/src/lang/du.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/elasticlunr-rs/src/lang/du.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/elasticlunr-rs/src/lang/du.rs')
-rw-r--r--vendor/elasticlunr-rs/src/lang/du.rs50
1 files changed, 50 insertions, 0 deletions
diff --git a/vendor/elasticlunr-rs/src/lang/du.rs b/vendor/elasticlunr-rs/src/lang/du.rs
new file mode 100644
index 000000000..73a6d3cf7
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/du.rs
@@ -0,0 +1,50 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Dutch {}
+
+impl Dutch {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Dutch {
+ fn name(&self) -> String {
+ "Dutch".into()
+ }
+ fn code(&self) -> String {
+ "du".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-du", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-du", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-du", Algorithm::Dutch)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "", "aan", "al", "alles", "als", "altijd", "andere", "ben", "bij", "daar", "dan", "dat", "de",
+ "der", "deze", "die", "dit", "doch", "doen", "door", "dus", "een", "eens", "en", "er", "ge",
+ "geen", "geweest", "haar", "had", "heb", "hebben", "heeft", "hem", "het", "hier", "hij", "hoe",
+ "hun", "iemand", "iets", "ik", "in", "is", "ja", "je", "kan", "kon", "kunnen", "maar", "me",
+ "meer", "men", "met", "mij", "mijn", "moet", "na", "naar", "niet", "niets", "nog", "nu", "of",
+ "om", "omdat", "onder", "ons", "ook", "op", "over", "reeds", "te", "tegen", "toch", "toen",
+ "tot", "u", "uit", "uw", "van", "veel", "voor", "want", "waren", "was", "wat", "werd", "wezen",
+ "wie", "wil", "worden", "wordt", "zal", "ze", "zelf", "zich", "zij", "zijn", "zo", "zonder",
+ "zou",
+];