From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/elasticlunr-rs/src/lang/de.rs | 273 +++++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 vendor/elasticlunr-rs/src/lang/de.rs (limited to 'vendor/elasticlunr-rs/src/lang/de.rs') diff --git a/vendor/elasticlunr-rs/src/lang/de.rs b/vendor/elasticlunr-rs/src/lang/de.rs new file mode 100644 index 000000000..244685ae9 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/de.rs @@ -0,0 +1,273 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct German {} + +impl German { + pub fn new() -> Self { + Self {} + } +} + +impl Language for German { + fn name(&self) -> String { + "German".into() + } + fn code(&self) -> String { + "de".into() + } + + fn tokenize(&self, text: &str) -> Vec { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-de", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-de", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-de", Algorithm::German)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "aber", + "alle", + "allem", + "allen", + "aller", + "alles", + "als", + "also", + "am", + "an", + "ander", + "andere", + "anderem", + "anderen", + "anderer", + "anderes", + "anderm", + "andern", + "anderr", + "anders", + "auch", + "auf", + "aus", + "bei", + "bin", + "bis", + "bist", + "da", + "damit", + "dann", + "das", + "dasselbe", + "dazu", + "daß", + "dein", + "deine", + "deinem", + "deinen", + "deiner", + "deines", + "dem", + "demselben", + "den", + "denn", + "denselben", + "der", + "derer", + "derselbe", + "derselben", + "des", + "desselben", + "dessen", + "dich", + "die", + "dies", + "diese", + "dieselbe", + "dieselben", + "diesem", + "diesen", + "dieser", + "dieses", + "dir", + "doch", + "dort", + "du", + "durch", + "ein", + "eine", + "einem", + "einen", + "einer", + "eines", + "einig", + "einige", + "einigem", + "einigen", + "einiger", + "einiges", + "einmal", + "er", + "es", + "etwas", + "euch", + "euer", + "eure", + "eurem", + "euren", + "eurer", + "eures", + "für", + "gegen", + "gewesen", + "hab", + "habe", + "haben", + "hat", + "hatte", + "hatten", + "hier", + "hin", + "hinter", + "ich", + "ihm", + "ihn", + "ihnen", + "ihr", + "ihre", + "ihrem", + "ihren", + "ihrer", + "ihres", + "im", + "in", + "indem", + "ins", + "ist", + "jede", + "jedem", + "jeden", + "jeder", + "jedes", + "jene", + "jenem", + "jenen", + "jener", + "jenes", + "jetzt", + "kann", + "kein", + "keine", + "keinem", + "keinen", + "keiner", + "keines", + "können", + "könnte", + "machen", + "man", + "manche", + "manchem", + "manchen", + "mancher", + "manches", + "mein", + "meine", + "meinem", + "meinen", + "meiner", + "meines", + "mich", + "mir", + "mit", + "muss", + "musste", + "nach", + "nicht", + "nichts", + "noch", + "nun", + "nur", + "ob", + "oder", + "ohne", + "sehr", + "sein", + "seine", + "seinem", + "seinen", + "seiner", + "seines", + "selbst", + "sich", + "sie", + "sind", + "so", + "solche", + "solchem", + "solchen", + "solcher", + "solches", + "soll", + "sollte", + "sondern", + "sonst", + "um", + "und", + "uns", + "unse", + "unsem", + "unsen", + "unser", + "unses", + "unter", + "viel", + "vom", + "von", + "vor", + "war", + "waren", + "warst", + "was", + "weg", + "weil", + "weiter", + "welche", + "welchem", + "welchen", + "welcher", + "welches", + "wenn", + "werde", + "werden", + "wie", + "wieder", + "will", + "wir", + "wird", + "wirst", + "wo", + "wollen", + "wollte", + "während", + "würde", + "würden", + "zu", + "zum", + "zur", + "zwar", + "zwischen", + "über", +]; -- cgit v1.2.3