summaryrefslogtreecommitdiffstats
path: root/vendor/elasticlunr-rs/src/lang
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/elasticlunr-rs/src/lang')
-rw-r--r--vendor/elasticlunr-rs/src/lang/ar.rs66
-rw-r--r--vendor/elasticlunr-rs/src/lang/common.rs97
-rw-r--r--vendor/elasticlunr-rs/src/lang/da.rs49
-rw-r--r--vendor/elasticlunr-rs/src/lang/de.rs273
-rw-r--r--vendor/elasticlunr-rs/src/lang/du.rs50
-rw-r--r--vendor/elasticlunr-rs/src/lang/en.rs458
-rw-r--r--vendor/elasticlunr-rs/src/lang/es.rs350
-rw-r--r--vendor/elasticlunr-rs/src/lang/fi.rs277
-rw-r--r--vendor/elasticlunr-rs/src/lang/fr.rs56
-rw-r--r--vendor/elasticlunr-rs/src/lang/it.rs321
-rw-r--r--vendor/elasticlunr-rs/src/lang/ja.rs76
-rw-r--r--vendor/elasticlunr-rs/src/lang/mod.rs138
-rw-r--r--vendor/elasticlunr-rs/src/lang/no.rs218
-rw-r--r--vendor/elasticlunr-rs/src/lang/pt.rs245
-rw-r--r--vendor/elasticlunr-rs/src/lang/ro.rs323
-rw-r--r--vendor/elasticlunr-rs/src/lang/ru.rs463
-rw-r--r--vendor/elasticlunr-rs/src/lang/sv.rs51
-rw-r--r--vendor/elasticlunr-rs/src/lang/tr.rs251
-rw-r--r--vendor/elasticlunr-rs/src/lang/zh.rs55
19 files changed, 3817 insertions, 0 deletions
diff --git a/vendor/elasticlunr-rs/src/lang/ar.rs b/vendor/elasticlunr-rs/src/lang/ar.rs
new file mode 100644
index 000000000..d0a640edf
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/ar.rs
@@ -0,0 +1,66 @@
+use super::Language;
+use crate::pipeline::{Pipeline, PipelineFn};
+use regex::Regex;
+
+/// Arabic Language
+///
+/// Designed to be compatibile with the included Javascript implementation. See `js/lunr.ar.js`.
+pub struct Arabic {}
+
+impl Arabic {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Arabic {
+ fn name(&self) -> String {
+ "Arabic".into()
+ }
+ fn code(&self) -> String {
+ "ar".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![Box::new(Stemmer::new())],
+ }
+ }
+}
+
+struct Stemmer {
+ diacritics: Regex,
+ alefs: Regex,
+}
+
+impl Stemmer {
+ pub fn new() -> Self {
+ let diacritics = Regex::new("[\u{0640}\u{064b}-\u{065b}]").unwrap();
+ let alefs = Regex::new("[\u{0622}\u{0623}\u{0625}\u{0671}\u{0649}]").unwrap();
+ Self { diacritics, alefs }
+ }
+}
+
+impl PipelineFn for Stemmer {
+ fn name(&self) -> String {
+ "stemmer-ar".into()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ // remove diacritics and elongating character
+ let result = self.diacritics.replace(&token, "");
+ // replace all variations of alef (آأإٱى) to a plain alef (ا)
+ let result = self.alefs.replace(&result, "\u{0627}");
+ if result.is_empty() {
+ None
+ } else if result == token {
+ Some(token)
+ } else {
+ Some(result.into())
+ }
+ }
+}
diff --git a/vendor/elasticlunr-rs/src/lang/common.rs b/vendor/elasticlunr-rs/src/lang/common.rs
new file mode 100644
index 000000000..5616f0138
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/common.rs
@@ -0,0 +1,97 @@
+use crate::pipeline::PipelineFn;
+use regex::Regex;
+use std::collections::HashSet;
+
+#[derive(Clone)]
+pub struct StopWordFilter {
+ name: String,
+ stop_words: HashSet<String>,
+}
+
+impl StopWordFilter {
+ pub fn new(name: &str, stop_words: &[&str]) -> Self {
+ Self {
+ name: name.into(),
+ stop_words: stop_words.iter().map(|s| s.to_string()).collect(),
+ }
+ }
+}
+
+impl PipelineFn for StopWordFilter {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ if self.stop_words.contains(&token) {
+ None
+ } else {
+ Some(token)
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct RegexTrimmer {
+ name: String,
+ trimmer: Regex,
+}
+
+impl RegexTrimmer {
+ pub fn new(name: &str, word_chars: &str) -> Self {
+ let name = name.into();
+ let trimmer = Regex::new(&format!("^[^{0}]+|[^{0}]+$", word_chars)).unwrap();
+ Self { name, trimmer }
+ }
+}
+
+impl PipelineFn for RegexTrimmer {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ let result = self.trimmer.replace_all(&token, "");
+ if result.is_empty() {
+ None
+ } else if result == token {
+ Some(token)
+ } else {
+ Some(result.into())
+ }
+ }
+}
+
+#[cfg(feature = "rust-stemmers")]
+pub struct RustStemmer {
+ name: String,
+ stemmer: rust_stemmers::Stemmer,
+}
+
+#[cfg(feature = "rust-stemmers")]
+impl RustStemmer {
+ pub fn new(name: &str, algo: rust_stemmers::Algorithm) -> Self {
+ Self {
+ name: name.into(),
+ stemmer: rust_stemmers::Stemmer::create(algo),
+ }
+ }
+}
+
+#[cfg(feature = "rust-stemmers")]
+impl PipelineFn for RustStemmer {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ let result = self.stemmer.stem(&token);
+ if result.is_empty() {
+ None
+ } else if result == token {
+ Some(token)
+ } else {
+ Some(result.into())
+ }
+ }
+}
diff --git a/vendor/elasticlunr-rs/src/lang/da.rs b/vendor/elasticlunr-rs/src/lang/da.rs
new file mode 100644
index 000000000..ab3b7dffe
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/da.rs
@@ -0,0 +1,49 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Danish {}
+
+impl Danish {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Danish {
+ fn name(&self) -> String {
+ "Danish".into()
+ }
+ fn code(&self) -> String {
+ "da".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-da", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-da", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-da", Algorithm::Danish)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "", "ad", "af", "alle", "alt", "anden", "at", "blev", "blive", "bliver", "da", "de", "dem",
+ "den", "denne", "der", "deres", "det", "dette", "dig", "din", "disse", "dog", "du", "efter",
+ "eller", "en", "end", "er", "et", "for", "fra", "ham", "han", "hans", "har", "havde", "have",
+ "hende", "hendes", "her", "hos", "hun", "hvad", "hvis", "hvor", "i", "ikke", "ind", "jeg",
+ "jer", "jo", "kunne", "man", "mange", "med", "meget", "men", "mig", "min", "mine", "mit",
+ "mod", "ned", "noget", "nogle", "nu", "når", "og", "også", "om", "op", "os", "over", "på",
+ "selv", "sig", "sin", "sine", "sit", "skal", "skulle", "som", "sådan", "thi", "til", "ud",
+ "under", "var", "vi", "vil", "ville", "vor", "være", "været",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/de.rs b/vendor/elasticlunr-rs/src/lang/de.rs
new file mode 100644
index 000000000..244685ae9
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/de.rs
@@ -0,0 +1,273 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct German {}
+
+impl German {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for German {
+ fn name(&self) -> String {
+ "German".into()
+ }
+ fn code(&self) -> String {
+ "de".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-de", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-de", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-de", Algorithm::German)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "aber",
+ "alle",
+ "allem",
+ "allen",
+ "aller",
+ "alles",
+ "als",
+ "also",
+ "am",
+ "an",
+ "ander",
+ "andere",
+ "anderem",
+ "anderen",
+ "anderer",
+ "anderes",
+ "anderm",
+ "andern",
+ "anderr",
+ "anders",
+ "auch",
+ "auf",
+ "aus",
+ "bei",
+ "bin",
+ "bis",
+ "bist",
+ "da",
+ "damit",
+ "dann",
+ "das",
+ "dasselbe",
+ "dazu",
+ "daß",
+ "dein",
+ "deine",
+ "deinem",
+ "deinen",
+ "deiner",
+ "deines",
+ "dem",
+ "demselben",
+ "den",
+ "denn",
+ "denselben",
+ "der",
+ "derer",
+ "derselbe",
+ "derselben",
+ "des",
+ "desselben",
+ "dessen",
+ "dich",
+ "die",
+ "dies",
+ "diese",
+ "dieselbe",
+ "dieselben",
+ "diesem",
+ "diesen",
+ "dieser",
+ "dieses",
+ "dir",
+ "doch",
+ "dort",
+ "du",
+ "durch",
+ "ein",
+ "eine",
+ "einem",
+ "einen",
+ "einer",
+ "eines",
+ "einig",
+ "einige",
+ "einigem",
+ "einigen",
+ "einiger",
+ "einiges",
+ "einmal",
+ "er",
+ "es",
+ "etwas",
+ "euch",
+ "euer",
+ "eure",
+ "eurem",
+ "euren",
+ "eurer",
+ "eures",
+ "für",
+ "gegen",
+ "gewesen",
+ "hab",
+ "habe",
+ "haben",
+ "hat",
+ "hatte",
+ "hatten",
+ "hier",
+ "hin",
+ "hinter",
+ "ich",
+ "ihm",
+ "ihn",
+ "ihnen",
+ "ihr",
+ "ihre",
+ "ihrem",
+ "ihren",
+ "ihrer",
+ "ihres",
+ "im",
+ "in",
+ "indem",
+ "ins",
+ "ist",
+ "jede",
+ "jedem",
+ "jeden",
+ "jeder",
+ "jedes",
+ "jene",
+ "jenem",
+ "jenen",
+ "jener",
+ "jenes",
+ "jetzt",
+ "kann",
+ "kein",
+ "keine",
+ "keinem",
+ "keinen",
+ "keiner",
+ "keines",
+ "können",
+ "könnte",
+ "machen",
+ "man",
+ "manche",
+ "manchem",
+ "manchen",
+ "mancher",
+ "manches",
+ "mein",
+ "meine",
+ "meinem",
+ "meinen",
+ "meiner",
+ "meines",
+ "mich",
+ "mir",
+ "mit",
+ "muss",
+ "musste",
+ "nach",
+ "nicht",
+ "nichts",
+ "noch",
+ "nun",
+ "nur",
+ "ob",
+ "oder",
+ "ohne",
+ "sehr",
+ "sein",
+ "seine",
+ "seinem",
+ "seinen",
+ "seiner",
+ "seines",
+ "selbst",
+ "sich",
+ "sie",
+ "sind",
+ "so",
+ "solche",
+ "solchem",
+ "solchen",
+ "solcher",
+ "solches",
+ "soll",
+ "sollte",
+ "sondern",
+ "sonst",
+ "um",
+ "und",
+ "uns",
+ "unse",
+ "unsem",
+ "unsen",
+ "unser",
+ "unses",
+ "unter",
+ "viel",
+ "vom",
+ "von",
+ "vor",
+ "war",
+ "waren",
+ "warst",
+ "was",
+ "weg",
+ "weil",
+ "weiter",
+ "welche",
+ "welchem",
+ "welchen",
+ "welcher",
+ "welches",
+ "wenn",
+ "werde",
+ "werden",
+ "wie",
+ "wieder",
+ "will",
+ "wir",
+ "wird",
+ "wirst",
+ "wo",
+ "wollen",
+ "wollte",
+ "während",
+ "würde",
+ "würden",
+ "zu",
+ "zum",
+ "zur",
+ "zwar",
+ "zwischen",
+ "über",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/du.rs b/vendor/elasticlunr-rs/src/lang/du.rs
new file mode 100644
index 000000000..73a6d3cf7
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/du.rs
@@ -0,0 +1,50 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Dutch {}
+
+impl Dutch {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Dutch {
+ fn name(&self) -> String {
+ "Dutch".into()
+ }
+ fn code(&self) -> String {
+ "du".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-du", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-du", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-du", Algorithm::Dutch)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "", "aan", "al", "alles", "als", "altijd", "andere", "ben", "bij", "daar", "dan", "dat", "de",
+ "der", "deze", "die", "dit", "doch", "doen", "door", "dus", "een", "eens", "en", "er", "ge",
+ "geen", "geweest", "haar", "had", "heb", "hebben", "heeft", "hem", "het", "hier", "hij", "hoe",
+ "hun", "iemand", "iets", "ik", "in", "is", "ja", "je", "kan", "kon", "kunnen", "maar", "me",
+ "meer", "men", "met", "mij", "mijn", "moet", "na", "naar", "niet", "niets", "nog", "nu", "of",
+ "om", "omdat", "onder", "ons", "ook", "op", "over", "reeds", "te", "tegen", "toch", "toen",
+ "tot", "u", "uit", "uw", "van", "veel", "voor", "want", "waren", "was", "wat", "werd", "wezen",
+ "wie", "wil", "worden", "wordt", "zal", "ze", "zelf", "zich", "zij", "zijn", "zo", "zonder",
+ "zou",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/en.rs b/vendor/elasticlunr-rs/src/lang/en.rs
new file mode 100644
index 000000000..f133ed7c9
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/en.rs
@@ -0,0 +1,458 @@
+use super::{common::StopWordFilter, Language};
+use crate::pipeline::{FnWrapper, Pipeline, PipelineFn};
+use regex::Regex;
+
+const WORDS: &[&str] = &[
+ "", "a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an",
+ "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot",
+ "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get",
+ "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if",
+ "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me",
+ "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on",
+ "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since",
+ "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this",
+ "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where",
+ "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your",
+];
+
+#[derive(Clone)]
+pub struct English {
+ stemmer: Stemmer,
+}
+
+impl English {
+ pub fn new() -> Self {
+ let stemmer = Stemmer::new();
+ Self { stemmer }
+ }
+}
+
+impl Language for English {
+ fn name(&self) -> String {
+ "English".into()
+ }
+ fn code(&self) -> String {
+ "en".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(FnWrapper("trimmer".into(), trimmer)),
+ Box::new(StopWordFilter::new("stopWordFilter", WORDS)),
+ Box::new(self.stemmer.clone()),
+ ],
+ }
+ }
+}
+
+fn trimmer(token: String) -> Option<String> {
+ Some(
+ token
+ .trim_matches(|c: char| !c.is_digit(36) && c != '_')
+ .into(),
+ )
+}
+
+static STEP_2: &[(&str, &str)] = &[
+ ("ational", "ate"),
+ ("tional", "tion"),
+ ("enci", "ence"),
+ ("anci", "ance"),
+ ("izer", "ize"),
+ ("bli", "ble"),
+ ("alli", "al"),
+ ("entli", "ent"),
+ ("eli", "e"),
+ ("ousli", "ous"),
+ ("ization", "ize"),
+ ("ation", "ate"),
+ ("ator", "ate"),
+ ("alism", "al"),
+ ("iveness", "ive"),
+ ("fulness", "ful"),
+ ("ousness", "ous"),
+ ("aliti", "al"),
+ ("iviti", "ive"),
+ ("biliti", "ble"),
+ ("logi", "log"),
+];
+
+static STEP_3: &[(&str, &str)] = &[
+ ("icate", "ic"),
+ ("ative", ""),
+ ("alize", "al"),
+ ("iciti", "ic"),
+ ("ical", "ic"),
+ ("ful", ""),
+ ("ness", ""),
+];
+
+// This is a direct port of the stemmer from elasticlunr.js
+// It's not very efficient and very not-rusty, but it
+// generates identical output.
+
+#[derive(Clone)]
+struct Stemmer {
+ re_mgr0: Regex,
+ re_mgr1: Regex,
+ re_meq1: Regex,
+ re_s_v: Regex,
+
+ re_1a: Regex,
+ re2_1a: Regex,
+ re_1b: Regex,
+ re2_1b: Regex,
+ re2_1b_2: Regex,
+ re3_1b_2: Regex,
+ re4_1b_2: Regex,
+
+ re_1c: Regex,
+ re_2: Regex,
+
+ re_3: Regex,
+
+ re_4: Regex,
+ re2_4: Regex,
+
+ re_5: Regex,
+ re3_5: Regex,
+}
+
+impl PipelineFn for Stemmer {
+ fn name(&self) -> String {
+ "stemmer".into()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ Some(self.stem(token))
+ }
+}
+
+// vowel
+macro_rules! V {
+ () => {
+ "[aeiouy]"
+ };
+}
+
+// consonant sequence
+macro_rules! CS {
+ () => {
+ "[^aeiou][^aeiouy]*"
+ };
+}
+
+// vowel sequence
+macro_rules! VS {
+ () => {
+ "[aeiouy][aeiou]*"
+ };
+}
+
+#[inline]
+fn concat_string(strs: &[&str]) -> String {
+ strs.iter().cloned().collect()
+}
+
+impl Stemmer {
+ fn new() -> Self {
+ let mgr0 = concat!("^(", CS!(), ")?", VS!(), CS!());
+ let meq1 = concat!("^(", CS!(), ")?", VS!(), CS!(), "(", VS!(), ")?$");
+ let mgr1 = concat!("^(", CS!(), ")?", VS!(), CS!(), VS!(), CS!());
+ let s_v = concat!("^(", CS!(), ")?", V!());
+
+ let re_mgr0 = Regex::new(mgr0).unwrap();
+ let re_mgr1 = Regex::new(mgr1).unwrap();
+ let re_meq1 = Regex::new(meq1).unwrap();
+ let re_s_v = Regex::new(s_v).unwrap();
+
+ let re_1a = Regex::new("^(.+?)(ss|i)es$").unwrap();
+ let re2_1a = Regex::new("^(.+?)([^s])s$").unwrap();
+ let re_1b = Regex::new("^(.+?)eed$").unwrap();
+ let re2_1b = Regex::new("^(.+?)(ed|ing)$").unwrap();
+ let re2_1b_2 = Regex::new("(at|bl|iz)$").unwrap();
+ let re3_1b_2 = Regex::new("([^aeiouylsz]{2})$").unwrap();
+ let re4_1b_2 = Regex::new(concat!("^", CS!(), V!(), "[^aeiouwxy]$")).unwrap();
+
+ let re_1c = Regex::new("^(.+?[^aeiou])y$").unwrap();
+ let re_2 = Regex::new(
+ "^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|\
+ ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$",
+ )
+ .unwrap();
+
+ let re_3 = Regex::new("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$").unwrap();
+
+ let re_4 = Regex::new(
+ "^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$",
+ )
+ .unwrap();
+ let re2_4 = Regex::new("^(.+?)(s|t)(ion)$").unwrap();
+
+ let re_5 = Regex::new("^(.+?)e$").unwrap();
+ let re3_5 = Regex::new(concat!("^", CS!(), V!(), "[^aeiouwxy]$")).unwrap();
+
+ Stemmer {
+ re_mgr0,
+ re_mgr1,
+ re_meq1,
+ re_s_v,
+ re_1a,
+ re2_1a,
+ re_1b,
+ re2_1b,
+ re2_1b_2,
+ re3_1b_2,
+ re4_1b_2,
+ re_1c,
+ re_2,
+ re_3,
+ re_4,
+ re2_4,
+ re_5,
+ re3_5,
+ }
+ }
+
+ /// Implements the Porter stemming algorithm
+ pub fn stem(&self, mut w: String) -> String {
+ if w.len() < 3 {
+ return w;
+ }
+
+ let starts_with_y = w.as_bytes()[0] == b'y';
+ if starts_with_y {
+ w.remove(0);
+ w.insert(0, 'Y');
+ }
+
+ // TODO: There's probably a better way to handle the
+ // borrowchecker than cloning w a million times
+
+ // Step 1a
+ if let Some(caps) = self.re_1a.captures(&w.clone()) {
+ w = concat_string(&[&caps[1], &caps[2]]);
+ }
+ if let Some(caps) = self.re2_1a.captures(&w.clone()) {
+ w = concat_string(&[&caps[1], &caps[2]]);
+ }
+
+ // Step 1b
+ if let Some(caps) = self.re_1b.captures(&w.clone()) {
+ let stem = &caps[1];
+ if self.re_mgr0.is_match(stem) {
+ w.pop();
+ }
+ } else if let Some(caps) = self.re2_1b.captures(&w.clone()) {
+ let stem = &caps[1];
+ if self.re_s_v.is_match(stem) {
+ w = stem.into();
+
+ let mut re3_1b_2_matched = false;
+
+ if self.re2_1b_2.is_match(&w) {
+ w.push('e');
+ } else if let Some(m) = self.re3_1b_2.find(&w.clone()) {
+ let mut suffix = m.as_str().chars();
+ // Make sure the two characters are the same since we can't use backreferences
+ if suffix.next() == suffix.next() {
+ re3_1b_2_matched = true;
+ w.pop();
+ }
+ }
+
+ // re4_1b_2 still runs if re3_1b_2 matches but
+ // the matched chcaracters are not the same
+ if !re3_1b_2_matched && self.re4_1b_2.is_match(&w) {
+ w.push('e');
+ }
+ }
+ }
+
+ // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first
+ // letter of the word (so cry -> cri, by -> by, say -> say)
+ if let Some(caps) = self.re_1c.captures(&w.clone()) {
+ let stem = &caps[1];
+ w = concat_string(&[stem, "i"]);
+ }
+
+ // Step 2
+ if let Some(caps) = self.re_2.captures(&w.clone()) {
+ let stem = &caps[1];
+ let suffix = &caps[2];
+ if self.re_mgr0.is_match(stem) {
+ w = concat_string(&[stem, STEP_2.iter().find(|&&(k, _)| k == suffix).unwrap().1]);
+ }
+ }
+
+ // Step 3
+ if let Some(caps) = self.re_3.captures(&w.clone()) {
+ let stem = &caps[1];
+ let suffix = &caps[2];
+ if self.re_mgr0.is_match(stem) {
+ w = concat_string(&[stem, STEP_3.iter().find(|&&(k, _)| k == suffix).unwrap().1]);
+ }
+ }
+
+ // Step 4
+ if let Some(caps) = self.re_4.captures(&w.clone()) {
+ let stem = &caps[1];
+ if self.re_mgr1.is_match(stem) {
+ w = stem.into();
+ }
+ } else if let Some(caps) = self.re2_4.captures(&w.clone()) {
+ let stem = concat_string(&[&caps[1], &caps[2]]);
+ if self.re_mgr1.is_match(&stem) {
+ w = stem;
+ }
+ }
+
+ // Step 5
+ if let Some(caps) = self.re_5.captures(&w.clone()) {
+ let stem = &caps[1];
+ if self.re_mgr1.is_match(stem)
+ || (self.re_meq1.is_match(stem) && !(self.re3_5.is_match(stem)))
+ {
+ w = stem.into();
+ }
+ }
+
+ if w.ends_with("ll") && self.re_mgr1.is_match(&w) {
+ w.pop();
+ }
+
+ // replace the original 'y'
+ if starts_with_y {
+ w.remove(0);
+ w.insert(0, 'y');
+ }
+
+ w
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ macro_rules! pipeline_eq {
+ ($func:expr, $input:expr, $output:expr) => {
+ assert_eq!(&$func($input.to_string()).unwrap(), $output);
+ };
+ }
+
+ #[test]
+ fn latin_characters() {
+ pipeline_eq!(trimmer, "hello", "hello");
+ }
+
+ #[test]
+ fn removing_punctuation() {
+ pipeline_eq!(trimmer, "hello.", "hello");
+ pipeline_eq!(trimmer, "it's", "it's");
+ pipeline_eq!(trimmer, "james'", "james");
+ pipeline_eq!(trimmer, "stop!", "stop");
+ pipeline_eq!(trimmer, "first,", "first");
+ pipeline_eq!(trimmer, "", "");
+ pipeline_eq!(trimmer, "[tag]", "tag");
+ pipeline_eq!(trimmer, "[[[tag]]]", "tag");
+ pipeline_eq!(trimmer, "[[!@#@!hello]]]}}}", "hello");
+ pipeline_eq!(trimmer, "~!@@@hello***()()()]]", "hello");
+ }
+
+ #[test]
+ fn test_stemmer() {
+ let cases = [
+ ("consign", "consign"),
+ ("consigned", "consign"),
+ ("consigning", "consign"),
+ ("consignment", "consign"),
+ ("consist", "consist"),
+ ("consisted", "consist"),
+ ("consistency", "consist"),
+ ("consistent", "consist"),
+ ("consistently", "consist"),
+ ("consisting", "consist"),
+ ("consists", "consist"),
+ ("consolation", "consol"),
+ ("consolations", "consol"),
+ ("consolatory", "consolatori"),
+ ("console", "consol"),
+ ("consoled", "consol"),
+ ("consoles", "consol"),
+ ("consolidate", "consolid"),
+ ("consolidated", "consolid"),
+ ("consolidating", "consolid"),
+ ("consoling", "consol"),
+ ("consols", "consol"),
+ ("consonant", "conson"),
+ ("consort", "consort"),
+ ("consorted", "consort"),
+ ("consorting", "consort"),
+ ("conspicuous", "conspicu"),
+ ("conspicuously", "conspicu"),
+ ("conspiracy", "conspiraci"),
+ ("conspirator", "conspir"),
+ ("conspirators", "conspir"),
+ ("conspire", "conspir"),
+ ("conspired", "conspir"),
+ ("conspiring", "conspir"),
+ ("constable", "constabl"),
+ ("constables", "constabl"),
+ ("constance", "constanc"),
+ ("constancy", "constanc"),
+ ("constant", "constant"),
+ ("knack", "knack"),
+ ("knackeries", "knackeri"),
+ ("knacks", "knack"),
+ ("knag", "knag"),
+ ("knave", "knave"),
+ ("knaves", "knave"),
+ ("knavish", "knavish"),
+ ("kneaded", "knead"),
+ ("kneading", "knead"),
+ ("knee", "knee"),
+ ("kneel", "kneel"),
+ ("kneeled", "kneel"),
+ ("kneeling", "kneel"),
+ ("kneels", "kneel"),
+ ("knees", "knee"),
+ ("knell", "knell"),
+ ("knelt", "knelt"),
+ ("knew", "knew"),
+ ("knick", "knick"),
+ ("knif", "knif"),
+ ("knife", "knife"),
+ ("knight", "knight"),
+ ("knights", "knight"),
+ ("knit", "knit"),
+ ("knits", "knit"),
+ ("knitted", "knit"),
+ ("knitting", "knit"),
+ ("knives", "knive"),
+ ("knob", "knob"),
+ ("knobs", "knob"),
+ ("knock", "knock"),
+ ("knocked", "knock"),
+ ("knocker", "knocker"),
+ ("knockers", "knocker"),
+ ("knocking", "knock"),
+ ("knocks", "knock"),
+ ("knopp", "knopp"),
+ ("knot", "knot"),
+ ("knots", "knot"),
+ ("lay", "lay"),
+ ("try", "tri"),
+ ];
+
+ let stemmer = Stemmer::new();
+ for &(input, output) in cases.iter() {
+ assert_eq!(&stemmer.stem(input.into()), output);
+ }
+ }
+}
diff --git a/vendor/elasticlunr-rs/src/lang/es.rs b/vendor/elasticlunr-rs/src/lang/es.rs
new file mode 100644
index 000000000..b6c4b5bcf
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/es.rs
@@ -0,0 +1,350 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Spanish {}
+
+impl Spanish {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Spanish {
+ fn name(&self) -> String {
+ "Spanish".into()
+ }
+ fn code(&self) -> String {
+ "es".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-es", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-es", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-es", Algorithm::Spanish)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "a",
+ "al",
+ "algo",
+ "algunas",
+ "algunos",
+ "ante",
+ "antes",
+ "como",
+ "con",
+ "contra",
+ "cual",
+ "cuando",
+ "de",
+ "del",
+ "desde",
+ "donde",
+ "durante",
+ "e",
+ "el",
+ "ella",
+ "ellas",
+ "ellos",
+ "en",
+ "entre",
+ "era",
+ "erais",
+ "eran",
+ "eras",
+ "eres",
+ "es",
+ "esa",
+ "esas",
+ "ese",
+ "eso",
+ "esos",
+ "esta",
+ "estaba",
+ "estabais",
+ "estaban",
+ "estabas",
+ "estad",
+ "estada",
+ "estadas",
+ "estado",
+ "estados",
+ "estamos",
+ "estando",
+ "estar",
+ "estaremos",
+ "estará",
+ "estarán",
+ "estarás",
+ "estaré",
+ "estaréis",
+ "estaría",
+ "estaríais",
+ "estaríamos",
+ "estarían",
+ "estarías",
+ "estas",
+ "este",
+ "estemos",
+ "esto",
+ "estos",
+ "estoy",
+ "estuve",
+ "estuviera",
+ "estuvierais",
+ "estuvieran",
+ "estuvieras",
+ "estuvieron",
+ "estuviese",
+ "estuvieseis",
+ "estuviesen",
+ "estuvieses",
+ "estuvimos",
+ "estuviste",
+ "estuvisteis",
+ "estuviéramos",
+ "estuviésemos",
+ "estuvo",
+ "está",
+ "estábamos",
+ "estáis",
+ "están",
+ "estás",
+ "esté",
+ "estéis",
+ "estén",
+ "estés",
+ "fue",
+ "fuera",
+ "fuerais",
+ "fueran",
+ "fueras",
+ "fueron",
+ "fuese",
+ "fueseis",
+ "fuesen",
+ "fueses",
+ "fui",
+ "fuimos",
+ "fuiste",
+ "fuisteis",
+ "fuéramos",
+ "fuésemos",
+ "ha",
+ "habida",
+ "habidas",
+ "habido",
+ "habidos",
+ "habiendo",
+ "habremos",
+ "habrá",
+ "habrán",
+ "habrás",
+ "habré",
+ "habréis",
+ "habría",
+ "habríais",
+ "habríamos",
+ "habrían",
+ "habrías",
+ "habéis",
+ "había",
+ "habíais",
+ "habíamos",
+ "habían",
+ "habías",
+ "han",
+ "has",
+ "hasta",
+ "hay",
+ "haya",
+ "hayamos",
+ "hayan",
+ "hayas",
+ "hayáis",
+ "he",
+ "hemos",
+ "hube",
+ "hubiera",
+ "hubierais",
+ "hubieran",
+ "hubieras",
+ "hubieron",
+ "hubiese",
+ "hubieseis",
+ "hubiesen",
+ "hubieses",
+ "hubimos",
+ "hubiste",
+ "hubisteis",
+ "hubiéramos",
+ "hubiésemos",
+ "hubo",
+ "la",
+ "las",
+ "le",
+ "les",
+ "lo",
+ "los",
+ "me",
+ "mi",
+ "mis",
+ "mucho",
+ "muchos",
+ "muy",
+ "más",
+ "mí",
+ "mía",
+ "mías",
+ "mío",
+ "míos",
+ "nada",
+ "ni",
+ "no",
+ "nos",
+ "nosotras",
+ "nosotros",
+ "nuestra",
+ "nuestras",
+ "nuestro",
+ "nuestros",
+ "o",
+ "os",
+ "otra",
+ "otras",
+ "otro",
+ "otros",
+ "para",
+ "pero",
+ "poco",
+ "por",
+ "porque",
+ "que",
+ "quien",
+ "quienes",
+ "qué",
+ "se",
+ "sea",
+ "seamos",
+ "sean",
+ "seas",
+ "seremos",
+ "será",
+ "serán",
+ "serás",
+ "seré",
+ "seréis",
+ "sería",
+ "seríais",
+ "seríamos",
+ "serían",
+ "serías",
+ "seáis",
+ "sido",
+ "siendo",
+ "sin",
+ "sobre",
+ "sois",
+ "somos",
+ "son",
+ "soy",
+ "su",
+ "sus",
+ "suya",
+ "suyas",
+ "suyo",
+ "suyos",
+ "sí",
+ "también",
+ "tanto",
+ "te",
+ "tendremos",
+ "tendrá",
+ "tendrán",
+ "tendrás",
+ "tendré",
+ "tendréis",
+ "tendría",
+ "tendríais",
+ "tendríamos",
+ "tendrían",
+ "tendrías",
+ "tened",
+ "tenemos",
+ "tenga",
+ "tengamos",
+ "tengan",
+ "tengas",
+ "tengo",
+ "tengáis",
+ "tenida",
+ "tenidas",
+ "tenido",
+ "tenidos",
+ "teniendo",
+ "tenéis",
+ "tenía",
+ "teníais",
+ "teníamos",
+ "tenían",
+ "tenías",
+ "ti",
+ "tiene",
+ "tienen",
+ "tienes",
+ "todo",
+ "todos",
+ "tu",
+ "tus",
+ "tuve",
+ "tuviera",
+ "tuvierais",
+ "tuvieran",
+ "tuvieras",
+ "tuvieron",
+ "tuviese",
+ "tuvieseis",
+ "tuviesen",
+ "tuvieses",
+ "tuvimos",
+ "tuviste",
+ "tuvisteis",
+ "tuviéramos",
+ "tuviésemos",
+ "tuvo",
+ "tuya",
+ "tuyas",
+ "tuyo",
+ "tuyos",
+ "tú",
+ "un",
+ "una",
+ "uno",
+ "unos",
+ "vosotras",
+ "vosotros",
+ "vuestra",
+ "vuestras",
+ "vuestro",
+ "vuestros",
+ "y",
+ "ya",
+ "yo",
+ "él",
+ "éramos",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/fi.rs b/vendor/elasticlunr-rs/src/lang/fi.rs
new file mode 100644
index 000000000..91cfaa571
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/fi.rs
@@ -0,0 +1,277 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Finnish {}
+
+impl Finnish {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Finnish {
+ fn name(&self) -> String {
+ "Finnish".into()
+ }
+ fn code(&self) -> String {
+ "fi".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-fi", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-fi", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-fi", Algorithm::Finnish)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "ei",
+ "eivät",
+ "emme",
+ "en",
+ "et",
+ "ette",
+ "että",
+ "he",
+ "heidän",
+ "heidät",
+ "heihin",
+ "heille",
+ "heillä",
+ "heiltä",
+ "heissä",
+ "heistä",
+ "heitä",
+ "hän",
+ "häneen",
+ "hänelle",
+ "hänellä",
+ "häneltä",
+ "hänen",
+ "hänessä",
+ "hänestä",
+ "hänet",
+ "häntä",
+ "itse",
+ "ja",
+ "johon",
+ "joiden",
+ "joihin",
+ "joiksi",
+ "joilla",
+ "joille",
+ "joilta",
+ "joina",
+ "joissa",
+ "joista",
+ "joita",
+ "joka",
+ "joksi",
+ "jolla",
+ "jolle",
+ "jolta",
+ "jona",
+ "jonka",
+ "jos",
+ "jossa",
+ "josta",
+ "jota",
+ "jotka",
+ "kanssa",
+ "keiden",
+ "keihin",
+ "keiksi",
+ "keille",
+ "keillä",
+ "keiltä",
+ "keinä",
+ "keissä",
+ "keistä",
+ "keitä",
+ "keneen",
+ "keneksi",
+ "kenelle",
+ "kenellä",
+ "keneltä",
+ "kenen",
+ "kenenä",
+ "kenessä",
+ "kenestä",
+ "kenet",
+ "ketkä",
+ "ketkä",
+ "ketä",
+ "koska",
+ "kuin",
+ "kuka",
+ "kun",
+ "me",
+ "meidän",
+ "meidät",
+ "meihin",
+ "meille",
+ "meillä",
+ "meiltä",
+ "meissä",
+ "meistä",
+ "meitä",
+ "mihin",
+ "miksi",
+ "mikä",
+ "mille",
+ "millä",
+ "miltä",
+ "minkä",
+ "minkä",
+ "minua",
+ "minulla",
+ "minulle",
+ "minulta",
+ "minun",
+ "minussa",
+ "minusta",
+ "minut",
+ "minuun",
+ "minä",
+ "minä",
+ "missä",
+ "mistä",
+ "mitkä",
+ "mitä",
+ "mukaan",
+ "mutta",
+ "ne",
+ "niiden",
+ "niihin",
+ "niiksi",
+ "niille",
+ "niillä",
+ "niiltä",
+ "niin",
+ "niin",
+ "niinä",
+ "niissä",
+ "niistä",
+ "niitä",
+ "noiden",
+ "noihin",
+ "noiksi",
+ "noilla",
+ "noille",
+ "noilta",
+ "noin",
+ "noina",
+ "noissa",
+ "noista",
+ "noita",
+ "nuo",
+ "nyt",
+ "näiden",
+ "näihin",
+ "näiksi",
+ "näille",
+ "näillä",
+ "näiltä",
+ "näinä",
+ "näissä",
+ "näistä",
+ "näitä",
+ "nämä",
+ "ole",
+ "olemme",
+ "olen",
+ "olet",
+ "olette",
+ "oli",
+ "olimme",
+ "olin",
+ "olisi",
+ "olisimme",
+ "olisin",
+ "olisit",
+ "olisitte",
+ "olisivat",
+ "olit",
+ "olitte",
+ "olivat",
+ "olla",
+ "olleet",
+ "ollut",
+ "on",
+ "ovat",
+ "poikki",
+ "se",
+ "sekä",
+ "sen",
+ "siihen",
+ "siinä",
+ "siitä",
+ "siksi",
+ "sille",
+ "sillä",
+ "sillä",
+ "siltä",
+ "sinua",
+ "sinulla",
+ "sinulle",
+ "sinulta",
+ "sinun",
+ "sinussa",
+ "sinusta",
+ "sinut",
+ "sinuun",
+ "sinä",
+ "sinä",
+ "sitä",
+ "tai",
+ "te",
+ "teidän",
+ "teidät",
+ "teihin",
+ "teille",
+ "teillä",
+ "teiltä",
+ "teissä",
+ "teistä",
+ "teitä",
+ "tuo",
+ "tuohon",
+ "tuoksi",
+ "tuolla",
+ "tuolle",
+ "tuolta",
+ "tuon",
+ "tuona",
+ "tuossa",
+ "tuosta",
+ "tuota",
+ "tähän",
+ "täksi",
+ "tälle",
+ "tällä",
+ "tältä",
+ "tämä",
+ "tämän",
+ "tänä",
+ "tässä",
+ "tästä",
+ "tätä",
+ "vaan",
+ "vai",
+ "vaikka",
+ "yli",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/fr.rs b/vendor/elasticlunr-rs/src/lang/fr.rs
new file mode 100644
index 000000000..ec41f307a
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/fr.rs
@@ -0,0 +1,56 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct French {}
+
+impl French {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for French {
+ fn name(&self) -> String {
+ "French".into()
+ }
+ fn code(&self) -> String {
+ "fr".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-fr", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-fr", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-fr", Algorithm::French)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "", "ai", "aie", "aient", "aies", "ait", "as", "au", "aura", "aurai", "auraient", "aurais",
+ "aurait", "auras", "aurez", "auriez", "aurions", "aurons", "auront", "aux", "avaient", "avais",
+ "avait", "avec", "avez", "aviez", "avions", "avons", "ayant", "ayez", "ayons", "c", "ce",
+ "ceci", "celà", "ces", "cet", "cette", "d", "dans", "de", "des", "du", "elle", "en", "es",
+ "est", "et", "eu", "eue", "eues", "eurent", "eus", "eusse", "eussent", "eusses", "eussiez",
+ "eussions", "eut", "eux", "eûmes", "eût", "eûtes", "furent", "fus", "fusse", "fussent",
+ "fusses", "fussiez", "fussions", "fut", "fûmes", "fût", "fûtes", "ici", "il", "ils", "j", "je",
+ "l", "la", "le", "les", "leur", "leurs", "lui", "m", "ma", "mais", "me", "mes", "moi", "mon",
+ "même", "n", "ne", "nos", "notre", "nous", "on", "ont", "ou", "par", "pas", "pour", "qu",
+ "que", "quel", "quelle", "quelles", "quels", "qui", "s", "sa", "sans", "se", "sera", "serai",
+ "seraient", "serais", "serait", "seras", "serez", "seriez", "serions", "serons", "seront",
+ "ses", "soi", "soient", "sois", "soit", "sommes", "son", "sont", "soyez", "soyons", "suis",
+ "sur", "t", "ta", "te", "tes", "toi", "ton", "tu", "un", "une", "vos", "votre", "vous", "y",
+ "à", "étaient", "étais", "était", "étant", "étiez", "étions", "été", "étée", "étées", "étés",
+ "êtes",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/it.rs b/vendor/elasticlunr-rs/src/lang/it.rs
new file mode 100644
index 000000000..78d7e4454
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/it.rs
@@ -0,0 +1,321 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Italian {}
+
+impl Italian {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Italian {
+ fn name(&self) -> String {
+ "Italian".into()
+ }
+ fn code(&self) -> String {
+ "it".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-it", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-it", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-it", Algorithm::Italian)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "a",
+ "abbia",
+ "abbiamo",
+ "abbiano",
+ "abbiate",
+ "ad",
+ "agl",
+ "agli",
+ "ai",
+ "al",
+ "all",
+ "alla",
+ "alle",
+ "allo",
+ "anche",
+ "avemmo",
+ "avendo",
+ "avesse",
+ "avessero",
+ "avessi",
+ "avessimo",
+ "aveste",
+ "avesti",
+ "avete",
+ "aveva",
+ "avevamo",
+ "avevano",
+ "avevate",
+ "avevi",
+ "avevo",
+ "avrai",
+ "avranno",
+ "avrebbe",
+ "avrebbero",
+ "avrei",
+ "avremmo",
+ "avremo",
+ "avreste",
+ "avresti",
+ "avrete",
+ "avrà",
+ "avrò",
+ "avuta",
+ "avute",
+ "avuti",
+ "avuto",
+ "c",
+ "che",
+ "chi",
+ "ci",
+ "coi",
+ "col",
+ "come",
+ "con",
+ "contro",
+ "cui",
+ "da",
+ "dagl",
+ "dagli",
+ "dai",
+ "dal",
+ "dall",
+ "dalla",
+ "dalle",
+ "dallo",
+ "degl",
+ "degli",
+ "dei",
+ "del",
+ "dell",
+ "della",
+ "delle",
+ "dello",
+ "di",
+ "dov",
+ "dove",
+ "e",
+ "ebbe",
+ "ebbero",
+ "ebbi",
+ "ed",
+ "era",
+ "erano",
+ "eravamo",
+ "eravate",
+ "eri",
+ "ero",
+ "essendo",
+ "faccia",
+ "facciamo",
+ "facciano",
+ "facciate",
+ "faccio",
+ "facemmo",
+ "facendo",
+ "facesse",
+ "facessero",
+ "facessi",
+ "facessimo",
+ "faceste",
+ "facesti",
+ "faceva",
+ "facevamo",
+ "facevano",
+ "facevate",
+ "facevi",
+ "facevo",
+ "fai",
+ "fanno",
+ "farai",
+ "faranno",
+ "farebbe",
+ "farebbero",
+ "farei",
+ "faremmo",
+ "faremo",
+ "fareste",
+ "faresti",
+ "farete",
+ "farà",
+ "farò",
+ "fece",
+ "fecero",
+ "feci",
+ "fosse",
+ "fossero",
+ "fossi",
+ "fossimo",
+ "foste",
+ "fosti",
+ "fu",
+ "fui",
+ "fummo",
+ "furono",
+ "gli",
+ "ha",
+ "hai",
+ "hanno",
+ "ho",
+ "i",
+ "il",
+ "in",
+ "io",
+ "l",
+ "la",
+ "le",
+ "lei",
+ "li",
+ "lo",
+ "loro",
+ "lui",
+ "ma",
+ "mi",
+ "mia",
+ "mie",
+ "miei",
+ "mio",
+ "ne",
+ "negl",
+ "negli",
+ "nei",
+ "nel",
+ "nell",
+ "nella",
+ "nelle",
+ "nello",
+ "noi",
+ "non",
+ "nostra",
+ "nostre",
+ "nostri",
+ "nostro",
+ "o",
+ "per",
+ "perché",
+ "più",
+ "quale",
+ "quanta",
+ "quante",
+ "quanti",
+ "quanto",
+ "quella",
+ "quelle",
+ "quelli",
+ "quello",
+ "questa",
+ "queste",
+ "questi",
+ "questo",
+ "sarai",
+ "saranno",
+ "sarebbe",
+ "sarebbero",
+ "sarei",
+ "saremmo",
+ "saremo",
+ "sareste",
+ "saresti",
+ "sarete",
+ "sarà",
+ "sarò",
+ "se",
+ "sei",
+ "si",
+ "sia",
+ "siamo",
+ "siano",
+ "siate",
+ "siete",
+ "sono",
+ "sta",
+ "stai",
+ "stando",
+ "stanno",
+ "starai",
+ "staranno",
+ "starebbe",
+ "starebbero",
+ "starei",
+ "staremmo",
+ "staremo",
+ "stareste",
+ "staresti",
+ "starete",
+ "starà",
+ "starò",
+ "stava",
+ "stavamo",
+ "stavano",
+ "stavate",
+ "stavi",
+ "stavo",
+ "stemmo",
+ "stesse",
+ "stessero",
+ "stessi",
+ "stessimo",
+ "steste",
+ "stesti",
+ "stette",
+ "stettero",
+ "stetti",
+ "stia",
+ "stiamo",
+ "stiano",
+ "stiate",
+ "sto",
+ "su",
+ "sua",
+ "sue",
+ "sugl",
+ "sugli",
+ "sui",
+ "sul",
+ "sull",
+ "sulla",
+ "sulle",
+ "sullo",
+ "suo",
+ "suoi",
+ "ti",
+ "tra",
+ "tu",
+ "tua",
+ "tue",
+ "tuo",
+ "tuoi",
+ "tutti",
+ "tutto",
+ "un",
+ "una",
+ "uno",
+ "vi",
+ "voi",
+ "vostra",
+ "vostre",
+ "vostri",
+ "vostro",
+ "è",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/ja.rs b/vendor/elasticlunr-rs/src/lang/ja.rs
new file mode 100644
index 000000000..e38fcde9f
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/ja.rs
@@ -0,0 +1,76 @@
+use super::{common::RegexTrimmer, Language};
+use crate::pipeline::{FnWrapper, Pipeline};
+use lindera::tokenizer::{Tokenizer, TokenizerConfig};
+use lindera_core::viterbi::Mode;
+
+#[derive(Clone)]
+pub struct Japanese {
+ tokenizer: Tokenizer,
+}
+
+impl Japanese {
+ pub fn new() -> Self {
+ let config = TokenizerConfig {
+ mode: Mode::Decompose(Default::default()),
+ ..Default::default()
+ };
+ Self::with_config(config)
+ }
+
+ pub fn with_config(config: TokenizerConfig) -> Self {
+ // NB: unwrap() is okay since the errors are only related to user-supplied dictionaries.
+ let tokenizer = Tokenizer::with_config(config).unwrap();
+ Self { tokenizer }
+ }
+}
+
+impl Language for Japanese {
+ fn name(&self) -> String {
+ "Japanese".into()
+ }
+ fn code(&self) -> String {
+ "ja".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ self.tokenizer
+ .tokenize(text)
+ .unwrap()
+ .into_iter()
+ .filter_map(|tok| match tok.detail.get(0).map(|d| d.as_str()) {
+ Some("助詞") | Some("助動詞") | Some("記号") | Some("UNK") => None,
+ _ => Some(tok.text.to_string()),
+ })
+ .collect()
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-ja", WORD_CHARS)),
+ Box::new(FnWrapper("stemmer-ja".into(), stemmer)),
+ ],
+ }
+ }
+}
+
+const WORD_CHARS: &str = r"0-9A-Za-z\p{Hiragana}\p{Katakana}\p{Unified_Ideograph}";
+
+fn stemmer(token: String) -> Option<String> {
+ Some(token)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::pipeline::PipelineFn;
+ use super::*;
+
+ #[test]
+ fn test_trimmer() {
+ let trimmer = RegexTrimmer::new("trimmer-ja".into(), WORD_CHARS);
+ assert_eq!(
+ trimmer.filter(" こんにちは、世界!".to_string()),
+ Some("こんにちは、世界".to_string())
+ );
+ }
+}
diff --git a/vendor/elasticlunr-rs/src/lang/mod.rs b/vendor/elasticlunr-rs/src/lang/mod.rs
new file mode 100644
index 000000000..81966e1b2
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/mod.rs
@@ -0,0 +1,138 @@
+//! Intended to be compatible with <https://github.com/MihaiValentin/lunr-languages>. Each supported
+//! language has a trimmer, a stop word filter, and a stemmer. Most users will not need to use
+//! these modules directly.
+
+pub mod common;
+
+use crate::Pipeline;
+
+pub trait Language {
+ /// The name of the language in English
+ fn name(&self) -> String;
+
+ /// The ISO 639-1 language code of the language
+ fn code(&self) -> String;
+
+ /// Separates the input text into individual tokens. In most languages a token is a word, separated by whitespace.
+ fn tokenize(&self, text: &str) -> Vec<String>;
+
+ /// Returns the [`Pipeline`] to process the tokens with
+ fn make_pipeline(&self) -> Pipeline;
+}
+
+/// Splits a text string into a vector of individual tokens.
+pub fn tokenize_whitespace(text: &str) -> Vec<String> {
+ text.split(|c: char| c.is_whitespace() || c == '-')
+ .filter(|s| !s.is_empty())
+ .map(|s| s.trim().to_lowercase())
+ .collect()
+}
+
+macro_rules! impl_language {
+ ($( ( $name:ident, $code:ident $(, #[$cfgs:meta] )? ), )+) => {
+ /// Returns a list of all the [`Language`] implementations in the crate
+ pub fn languages() -> Vec<Box<dyn Language>> {
+ vec![
+ $(
+ $(#[$cfgs])?
+ Box::new($code::$name::new()),
+ )+
+ ]
+ }
+
+ /// Returns the [`Language`] for the given two-character [ISO 639-1][iso] language code if the
+ /// language is supported. Returns `None` if not supported.
+ ///
+ /// *Note:*
+ ///
+ /// The ISO 639-1 code for Dutch is "nl". However "du" is used for the module name
+ /// and pipeline suffix in order to match lunr-languages.
+ ///
+ /// [iso]: https://en.wikipedia.org/wiki/ISO_639-1
+ pub fn from_code(code: &str) -> Option<Box<dyn Language>> {
+ match code.to_ascii_lowercase().as_str() {
+ $(
+ $(#[$cfgs])?
+ stringify!($code) => Some(Box::new($code::$name::new())),
+ )+
+ _ => None,
+ }
+ }
+
+ /// Returns the [`Language`] for the given English language name if the
+ /// language is supported. Returns `None` if not supported. The first letter must
+ /// be capitalized.
+ pub fn from_name(name: &str) -> Option<Box<dyn Language>> {
+ match name {
+ $(
+ $(#[$cfgs])?
+ stringify!($name) => Some(Box::new($code::$name::new())),
+ )+
+ _ => None,
+ }
+ }
+
+ $(
+ $(#[$cfgs])?
+ mod $code;
+
+ $(#[$cfgs])?
+ pub use $code::$name;
+ )+
+ };
+}
+
+impl_language! {
+ (English, en),
+ (Arabic, ar, #[cfg(feature = "ar")]),
+ (Chinese, zh, #[cfg(feature = "zh")]),
+ (Danish, da, #[cfg(feature = "da")]),
+ (Dutch, du, #[cfg(feature = "du")]),
+ (Finnish, fi, #[cfg(feature = "fi")]),
+ (French, fr, #[cfg(feature = "fr")]),
+ (German, de, #[cfg(feature = "de")]),
+ (Italian, it, #[cfg(feature = "it")]),
+ (Japanese, ja, #[cfg(feature = "ja")]),
+ (Norwegian, no, #[cfg(feature = "no")]),
+ (Portuguese, pt, #[cfg(feature = "pt")]),
+ (Romanian, ro, #[cfg(feature = "ro")]),
+ (Russian, ru, #[cfg(feature = "ru")]),
+ (Spanish, es, #[cfg(feature = "es")]),
+ (Swedish, sv, #[cfg(feature = "sv")]),
+ (Turkish, tr, #[cfg(feature = "tr")]),
+}
+
+#[cfg(test)]
+mod tests {
+ use super::tokenize_whitespace;
+
+ #[test]
+ fn split_simple_strings() {
+ let string = "this is a simple string";
+ assert_eq!(
+ &tokenize_whitespace(string),
+ &["this", "is", "a", "simple", "string"]
+ );
+ }
+
+ #[test]
+ fn multiple_white_space() {
+ let string = " foo bar ";
+ assert_eq!(&tokenize_whitespace(string), &["foo", "bar"]);
+ }
+
+ #[test]
+ fn hyphens() {
+ let string = "take the New York-San Francisco flight";
+ assert_eq!(
+ &tokenize_whitespace(string),
+ &["take", "the", "new", "york", "san", "francisco", "flight"]
+ );
+ }
+
+ #[test]
+ fn splitting_strings_with_hyphens() {
+ let string = "Solve for A - B";
+ assert_eq!(&tokenize_whitespace(string), &["solve", "for", "a", "b"]);
+ }
+}
diff --git a/vendor/elasticlunr-rs/src/lang/no.rs b/vendor/elasticlunr-rs/src/lang/no.rs
new file mode 100644
index 000000000..710346fde
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/no.rs
@@ -0,0 +1,218 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Norwegian {}
+
+impl Norwegian {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Norwegian {
+ fn name(&self) -> String {
+ "Norwegian".into()
+ }
+ fn code(&self) -> String {
+ "no".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-no", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-no", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-no", Algorithm::Norwegian)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "alle",
+ "at",
+ "av",
+ "bare",
+ "begge",
+ "ble",
+ "blei",
+ "bli",
+ "blir",
+ "blitt",
+ "både",
+ "båe",
+ "da",
+ "de",
+ "deg",
+ "dei",
+ "deim",
+ "deira",
+ "deires",
+ "dem",
+ "den",
+ "denne",
+ "der",
+ "dere",
+ "deres",
+ "det",
+ "dette",
+ "di",
+ "din",
+ "disse",
+ "ditt",
+ "du",
+ "dykk",
+ "dykkar",
+ "då",
+ "eg",
+ "ein",
+ "eit",
+ "eitt",
+ "eller",
+ "elles",
+ "en",
+ "enn",
+ "er",
+ "et",
+ "ett",
+ "etter",
+ "for",
+ "fordi",
+ "fra",
+ "før",
+ "ha",
+ "hadde",
+ "han",
+ "hans",
+ "har",
+ "hennar",
+ "henne",
+ "hennes",
+ "her",
+ "hjå",
+ "ho",
+ "hoe",
+ "honom",
+ "hoss",
+ "hossen",
+ "hun",
+ "hva",
+ "hvem",
+ "hver",
+ "hvilke",
+ "hvilken",
+ "hvis",
+ "hvor",
+ "hvordan",
+ "hvorfor",
+ "i",
+ "ikke",
+ "ikkje",
+ "ikkje",
+ "ingen",
+ "ingi",
+ "inkje",
+ "inn",
+ "inni",
+ "ja",
+ "jeg",
+ "kan",
+ "kom",
+ "korleis",
+ "korso",
+ "kun",
+ "kunne",
+ "kva",
+ "kvar",
+ "kvarhelst",
+ "kven",
+ "kvi",
+ "kvifor",
+ "man",
+ "mange",
+ "me",
+ "med",
+ "medan",
+ "meg",
+ "meget",
+ "mellom",
+ "men",
+ "mi",
+ "min",
+ "mine",
+ "mitt",
+ "mot",
+ "mykje",
+ "ned",
+ "no",
+ "noe",
+ "noen",
+ "noka",
+ "noko",
+ "nokon",
+ "nokor",
+ "nokre",
+ "nå",
+ "når",
+ "og",
+ "også",
+ "om",
+ "opp",
+ "oss",
+ "over",
+ "på",
+ "samme",
+ "seg",
+ "selv",
+ "si",
+ "si",
+ "sia",
+ "sidan",
+ "siden",
+ "sin",
+ "sine",
+ "sitt",
+ "sjøl",
+ "skal",
+ "skulle",
+ "slik",
+ "so",
+ "som",
+ "som",
+ "somme",
+ "somt",
+ "så",
+ "sånn",
+ "til",
+ "um",
+ "upp",
+ "ut",
+ "uten",
+ "var",
+ "vart",
+ "varte",
+ "ved",
+ "vere",
+ "verte",
+ "vi",
+ "vil",
+ "ville",
+ "vore",
+ "vors",
+ "vort",
+ "vår",
+ "være",
+ "være",
+ "vært",
+ "å",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/pt.rs b/vendor/elasticlunr-rs/src/lang/pt.rs
new file mode 100644
index 000000000..5f36f4280
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/pt.rs
@@ -0,0 +1,245 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Portuguese {}
+
+impl Portuguese {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Portuguese {
+ fn name(&self) -> String {
+ "Portuguese".into()
+ }
+ fn code(&self) -> String {
+ "pt".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-pt", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-pt", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-pt", Algorithm::Portuguese)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "a",
+ "ao",
+ "aos",
+ "aquela",
+ "aquelas",
+ "aquele",
+ "aqueles",
+ "aquilo",
+ "as",
+ "até",
+ "com",
+ "como",
+ "da",
+ "das",
+ "de",
+ "dela",
+ "delas",
+ "dele",
+ "deles",
+ "depois",
+ "do",
+ "dos",
+ "e",
+ "ela",
+ "elas",
+ "ele",
+ "eles",
+ "em",
+ "entre",
+ "era",
+ "eram",
+ "essa",
+ "essas",
+ "esse",
+ "esses",
+ "esta",
+ "estamos",
+ "estas",
+ "estava",
+ "estavam",
+ "este",
+ "esteja",
+ "estejam",
+ "estejamos",
+ "estes",
+ "esteve",
+ "estive",
+ "estivemos",
+ "estiver",
+ "estivera",
+ "estiveram",
+ "estiverem",
+ "estivermos",
+ "estivesse",
+ "estivessem",
+ "estivéramos",
+ "estivéssemos",
+ "estou",
+ "está",
+ "estávamos",
+ "estão",
+ "eu",
+ "foi",
+ "fomos",
+ "for",
+ "fora",
+ "foram",
+ "forem",
+ "formos",
+ "fosse",
+ "fossem",
+ "fui",
+ "fôramos",
+ "fôssemos",
+ "haja",
+ "hajam",
+ "hajamos",
+ "havemos",
+ "hei",
+ "houve",
+ "houvemos",
+ "houver",
+ "houvera",
+ "houveram",
+ "houverei",
+ "houverem",
+ "houveremos",
+ "houveria",
+ "houveriam",
+ "houvermos",
+ "houverá",
+ "houverão",
+ "houveríamos",
+ "houvesse",
+ "houvessem",
+ "houvéramos",
+ "houvéssemos",
+ "há",
+ "hão",
+ "isso",
+ "isto",
+ "já",
+ "lhe",
+ "lhes",
+ "mais",
+ "mas",
+ "me",
+ "mesmo",
+ "meu",
+ "meus",
+ "minha",
+ "minhas",
+ "muito",
+ "na",
+ "nas",
+ "nem",
+ "no",
+ "nos",
+ "nossa",
+ "nossas",
+ "nosso",
+ "nossos",
+ "num",
+ "numa",
+ "não",
+ "nós",
+ "o",
+ "os",
+ "ou",
+ "para",
+ "pela",
+ "pelas",
+ "pelo",
+ "pelos",
+ "por",
+ "qual",
+ "quando",
+ "que",
+ "quem",
+ "se",
+ "seja",
+ "sejam",
+ "sejamos",
+ "sem",
+ "serei",
+ "seremos",
+ "seria",
+ "seriam",
+ "será",
+ "serão",
+ "seríamos",
+ "seu",
+ "seus",
+ "somos",
+ "sou",
+ "sua",
+ "suas",
+ "são",
+ "só",
+ "também",
+ "te",
+ "tem",
+ "temos",
+ "tenha",
+ "tenham",
+ "tenhamos",
+ "tenho",
+ "terei",
+ "teremos",
+ "teria",
+ "teriam",
+ "terá",
+ "terão",
+ "teríamos",
+ "teu",
+ "teus",
+ "teve",
+ "tinha",
+ "tinham",
+ "tive",
+ "tivemos",
+ "tiver",
+ "tivera",
+ "tiveram",
+ "tiverem",
+ "tivermos",
+ "tivesse",
+ "tivessem",
+ "tivéramos",
+ "tivéssemos",
+ "tu",
+ "tua",
+ "tuas",
+ "tém",
+ "tínhamos",
+ "um",
+ "uma",
+ "você",
+ "vocês",
+ "vos",
+ "à",
+ "às",
+ "éramos",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/ro.rs b/vendor/elasticlunr-rs/src/lang/ro.rs
new file mode 100644
index 000000000..8244fe967
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/ro.rs
@@ -0,0 +1,323 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Romanian {}
+
+impl Romanian {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Romanian {
+ fn name(&self) -> String {
+ "Romanian".into()
+ }
+ fn code(&self) -> String {
+ "ro".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-ro", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-ro", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-ro", Algorithm::Romanian)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "acea",
+ "aceasta",
+ "această",
+ "aceea",
+ "acei",
+ "aceia",
+ "acel",
+ "acela",
+ "acele",
+ "acelea",
+ "acest",
+ "acesta",
+ "aceste",
+ "acestea",
+ "aceşti",
+ "aceştia",
+ "acolo",
+ "acord",
+ "acum",
+ "ai",
+ "aia",
+ "aibă",
+ "aici",
+ "al",
+ "ale",
+ "alea",
+ "altceva",
+ "altcineva",
+ "am",
+ "ar",
+ "are",
+ "asemenea",
+ "asta",
+ "astea",
+ "astăzi",
+ "asupra",
+ "au",
+ "avea",
+ "avem",
+ "aveţi",
+ "azi",
+ "aş",
+ "aşadar",
+ "aţi",
+ "bine",
+ "bucur",
+ "bună",
+ "ca",
+ "care",
+ "caut",
+ "ce",
+ "cel",
+ "ceva",
+ "chiar",
+ "cinci",
+ "cine",
+ "cineva",
+ "contra",
+ "cu",
+ "cum",
+ "cumva",
+ "curând",
+ "curînd",
+ "când",
+ "cât",
+ "câte",
+ "câtva",
+ "câţi",
+ "cînd",
+ "cît",
+ "cîte",
+ "cîtva",
+ "cîţi",
+ "că",
+ "căci",
+ "cărei",
+ "căror",
+ "cărui",
+ "către",
+ "da",
+ "dacă",
+ "dar",
+ "datorită",
+ "dată",
+ "dau",
+ "de",
+ "deci",
+ "deja",
+ "deoarece",
+ "departe",
+ "deşi",
+ "din",
+ "dinaintea",
+ "dintr-",
+ "dintre",
+ "doi",
+ "doilea",
+ "două",
+ "drept",
+ "după",
+ "dă",
+ "ea",
+ "ei",
+ "el",
+ "ele",
+ "eram",
+ "este",
+ "eu",
+ "eşti",
+ "face",
+ "fata",
+ "fi",
+ "fie",
+ "fiecare",
+ "fii",
+ "fim",
+ "fiu",
+ "fiţi",
+ "frumos",
+ "fără",
+ "graţie",
+ "halbă",
+ "iar",
+ "ieri",
+ "la",
+ "le",
+ "li",
+ "lor",
+ "lui",
+ "lângă",
+ "lîngă",
+ "mai",
+ "mea",
+ "mei",
+ "mele",
+ "mereu",
+ "meu",
+ "mi",
+ "mie",
+ "mine",
+ "mult",
+ "multă",
+ "mulţi",
+ "mulţumesc",
+ "mâine",
+ "mîine",
+ "mă",
+ "ne",
+ "nevoie",
+ "nici",
+ "nicăieri",
+ "nimeni",
+ "nimeri",
+ "nimic",
+ "nişte",
+ "noastre",
+ "noastră",
+ "noi",
+ "noroc",
+ "nostru",
+ "nouă",
+ "noştri",
+ "nu",
+ "opt",
+ "ori",
+ "oricare",
+ "orice",
+ "oricine",
+ "oricum",
+ "oricând",
+ "oricât",
+ "oricînd",
+ "oricît",
+ "oriunde",
+ "patra",
+ "patru",
+ "patrulea",
+ "pe",
+ "pentru",
+ "peste",
+ "pic",
+ "poate",
+ "pot",
+ "prea",
+ "prima",
+ "primul",
+ "prin",
+ "puţin",
+ "puţina",
+ "puţină",
+ "până",
+ "pînă",
+ "rog",
+ "sa",
+ "sale",
+ "sau",
+ "se",
+ "spate",
+ "spre",
+ "sub",
+ "sunt",
+ "suntem",
+ "sunteţi",
+ "sută",
+ "sînt",
+ "sîntem",
+ "sînteţi",
+ "să",
+ "săi",
+ "său",
+ "ta",
+ "tale",
+ "te",
+ "timp",
+ "tine",
+ "toate",
+ "toată",
+ "tot",
+ "totuşi",
+ "toţi",
+ "trei",
+ "treia",
+ "treilea",
+ "tu",
+ "tăi",
+ "tău",
+ "un",
+ "una",
+ "unde",
+ "undeva",
+ "unei",
+ "uneia",
+ "unele",
+ "uneori",
+ "unii",
+ "unor",
+ "unora",
+ "unu",
+ "unui",
+ "unuia",
+ "unul",
+ "vi",
+ "voastre",
+ "voastră",
+ "voi",
+ "vostru",
+ "vouă",
+ "voştri",
+ "vreme",
+ "vreo",
+ "vreun",
+ "vă",
+ "zece",
+ "zero",
+ "zi",
+ "zice",
+ "îi",
+ "îl",
+ "îmi",
+ "împotriva",
+ "în",
+ "înainte",
+ "înaintea",
+ "încotro",
+ "încât",
+ "încît",
+ "între",
+ "întrucât",
+ "întrucît",
+ "îţi",
+ "ăla",
+ "ălea",
+ "ăsta",
+ "ăstea",
+ "ăştia",
+ "şapte",
+ "şase",
+ "şi",
+ "ştiu",
+ "ţi",
+ "ţie",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/ru.rs b/vendor/elasticlunr-rs/src/lang/ru.rs
new file mode 100644
index 000000000..6b210d540
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/ru.rs
@@ -0,0 +1,463 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Russian {}
+
+impl Russian {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Russian {
+ fn name(&self) -> String {
+ "Russian".into()
+ }
+ fn code(&self) -> String {
+ "ru".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-ru", r"\p{Cyrillic}")),
+ Box::new(StopWordFilter::new("stopWordFilter-ru", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-ru", Algorithm::Russian)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "алло",
+ "без",
+ "близко",
+ "более",
+ "больше",
+ "будем",
+ "будет",
+ "будете",
+ "будешь",
+ "будто",
+ "буду",
+ "будут",
+ "будь",
+ "бы",
+ "бывает",
+ "бывь",
+ "был",
+ "была",
+ "были",
+ "было",
+ "быть",
+ "в",
+ "важная",
+ "важное",
+ "важные",
+ "важный",
+ "вам",
+ "вами",
+ "вас",
+ "ваш",
+ "ваша",
+ "ваше",
+ "ваши",
+ "вверх",
+ "вдали",
+ "вдруг",
+ "ведь",
+ "везде",
+ "весь",
+ "вниз",
+ "внизу",
+ "во",
+ "вокруг",
+ "вон",
+ "восемнадцатый",
+ "восемнадцать",
+ "восемь",
+ "восьмой",
+ "вот",
+ "впрочем",
+ "времени",
+ "время",
+ "все",
+ "всегда",
+ "всего",
+ "всем",
+ "всеми",
+ "всему",
+ "всех",
+ "всею",
+ "всю",
+ "всюду",
+ "вся",
+ "всё",
+ "второй",
+ "вы",
+ "г",
+ "где",
+ "говорил",
+ "говорит",
+ "год",
+ "года",
+ "году",
+ "да",
+ "давно",
+ "даже",
+ "далеко",
+ "дальше",
+ "даром",
+ "два",
+ "двадцатый",
+ "двадцать",
+ "две",
+ "двенадцатый",
+ "двенадцать",
+ "двух",
+ "девятнадцатый",
+ "девятнадцать",
+ "девятый",
+ "девять",
+ "действительно",
+ "дел",
+ "день",
+ "десятый",
+ "десять",
+ "для",
+ "до",
+ "довольно",
+ "долго",
+ "должно",
+ "другая",
+ "другие",
+ "других",
+ "друго",
+ "другое",
+ "другой",
+ "е",
+ "его",
+ "ее",
+ "ей",
+ "ему",
+ "если",
+ "есть",
+ "еще",
+ "ещё",
+ "ею",
+ "её",
+ "ж",
+ "же",
+ "жизнь",
+ "за",
+ "занят",
+ "занята",
+ "занято",
+ "заняты",
+ "затем",
+ "зато",
+ "зачем",
+ "здесь",
+ "значит",
+ "и",
+ "из",
+ "или",
+ "им",
+ "именно",
+ "иметь",
+ "ими",
+ "имя",
+ "иногда",
+ "их",
+ "к",
+ "каждая",
+ "каждое",
+ "каждые",
+ "каждый",
+ "кажется",
+ "как",
+ "какая",
+ "какой",
+ "кем",
+ "когда",
+ "кого",
+ "ком",
+ "кому",
+ "конечно",
+ "которая",
+ "которого",
+ "которой",
+ "которые",
+ "который",
+ "которых",
+ "кроме",
+ "кругом",
+ "кто",
+ "куда",
+ "лет",
+ "ли",
+ "лишь",
+ "лучше",
+ "люди",
+ "м",
+ "мало",
+ "между",
+ "меля",
+ "менее",
+ "меньше",
+ "меня",
+ "миллионов",
+ "мимо",
+ "мира",
+ "мне",
+ "много",
+ "многочисленная",
+ "многочисленное",
+ "многочисленные",
+ "многочисленный",
+ "мной",
+ "мною",
+ "мог",
+ "могут",
+ "мож",
+ "может",
+ "можно",
+ "можхо",
+ "мои",
+ "мой",
+ "мор",
+ "мочь",
+ "моя",
+ "моё",
+ "мы",
+ "на",
+ "наверху",
+ "над",
+ "надо",
+ "назад",
+ "наиболее",
+ "наконец",
+ "нам",
+ "нами",
+ "нас",
+ "начала",
+ "наш",
+ "наша",
+ "наше",
+ "наши",
+ "не",
+ "него",
+ "недавно",
+ "недалеко",
+ "нее",
+ "ней",
+ "нельзя",
+ "нем",
+ "немного",
+ "нему",
+ "непрерывно",
+ "нередко",
+ "несколько",
+ "нет",
+ "нею",
+ "неё",
+ "ни",
+ "нибудь",
+ "ниже",
+ "низко",
+ "никогда",
+ "никуда",
+ "ними",
+ "них",
+ "ничего",
+ "но",
+ "ну",
+ "нужно",
+ "нх",
+ "о",
+ "об",
+ "оба",
+ "обычно",
+ "один",
+ "одиннадцатый",
+ "одиннадцать",
+ "однажды",
+ "однако",
+ "одного",
+ "одной",
+ "около",
+ "он",
+ "она",
+ "они",
+ "оно",
+ "опять",
+ "особенно",
+ "от",
+ "отовсюду",
+ "отсюда",
+ "очень",
+ "первый",
+ "перед",
+ "по",
+ "под",
+ "пожалуйста",
+ "позже",
+ "пока",
+ "пор",
+ "пора",
+ "после",
+ "посреди",
+ "потом",
+ "потому",
+ "почему",
+ "почти",
+ "прекрасно",
+ "при",
+ "про",
+ "просто",
+ "против",
+ "процентов",
+ "пятнадцатый",
+ "пятнадцать",
+ "пятый",
+ "пять",
+ "раз",
+ "разве",
+ "рано",
+ "раньше",
+ "рядом",
+ "с",
+ "сам",
+ "сама",
+ "сами",
+ "самим",
+ "самими",
+ "самих",
+ "само",
+ "самого",
+ "самой",
+ "самом",
+ "самому",
+ "саму",
+ "свое",
+ "своего",
+ "своей",
+ "свои",
+ "своих",
+ "свою",
+ "сеаой",
+ "себе",
+ "себя",
+ "сегодня",
+ "седьмой",
+ "сейчас",
+ "семнадцатый",
+ "семнадцать",
+ "семь",
+ "сих",
+ "сказал",
+ "сказала",
+ "сказать",
+ "сколько",
+ "слишком",
+ "сначала",
+ "снова",
+ "со",
+ "собой",
+ "собою",
+ "совсем",
+ "спасибо",
+ "стал",
+ "суть",
+ "т",
+ "та",
+ "так",
+ "такая",
+ "также",
+ "такие",
+ "такое",
+ "такой",
+ "там",
+ "твой",
+ "твоя",
+ "твоё",
+ "те",
+ "тебе",
+ "тебя",
+ "тем",
+ "теми",
+ "теперь",
+ "тех",
+ "то",
+ "тобой",
+ "тобою",
+ "тогда",
+ "того",
+ "тоже",
+ "только",
+ "том",
+ "тому",
+ "тот",
+ "тою",
+ "третий",
+ "три",
+ "тринадцатый",
+ "тринадцать",
+ "ту",
+ "туда",
+ "тут",
+ "ты",
+ "тысяч",
+ "у",
+ "уж",
+ "уже",
+ "уметь",
+ "хорошо",
+ "хотеть",
+ "хоть",
+ "хотя",
+ "хочешь",
+ "часто",
+ "чаще",
+ "чего",
+ "человек",
+ "чем",
+ "чему",
+ "через",
+ "четвертый",
+ "четыре",
+ "четырнадцатый",
+ "четырнадцать",
+ "что",
+ "чтоб",
+ "чтобы",
+ "чуть",
+ "шестнадцатый",
+ "шестнадцать",
+ "шестой",
+ "шесть",
+ "эта",
+ "эти",
+ "этим",
+ "этими",
+ "этих",
+ "это",
+ "этого",
+ "этой",
+ "этом",
+ "этому",
+ "этот",
+ "эту",
+ "я",
+ "а",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/sv.rs b/vendor/elasticlunr-rs/src/lang/sv.rs
new file mode 100644
index 000000000..29beeb7b1
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/sv.rs
@@ -0,0 +1,51 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Swedish {}
+
+impl Swedish {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Swedish {
+ fn name(&self) -> String {
+ "Swedish".into()
+ }
+ fn code(&self) -> String {
+ "sv".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-sv", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-sv", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-sv", Algorithm::Swedish)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "", "alla", "allt", "att", "av", "blev", "bli", "blir", "blivit", "de", "dem", "den", "denna",
+ "deras", "dess", "dessa", "det", "detta", "dig", "din", "dina", "ditt", "du", "där", "då",
+ "efter", "ej", "eller", "en", "er", "era", "ert", "ett", "från", "för", "ha", "hade", "han",
+ "hans", "har", "henne", "hennes", "hon", "honom", "hur", "här", "i", "icke", "ingen", "inom",
+ "inte", "jag", "ju", "kan", "kunde", "man", "med", "mellan", "men", "mig", "min", "mina",
+ "mitt", "mot", "mycket", "ni", "nu", "när", "någon", "något", "några", "och", "om", "oss",
+ "på", "samma", "sedan", "sig", "sin", "sina", "sitta", "själv", "skulle", "som", "så", "sådan",
+ "sådana", "sådant", "till", "under", "upp", "ut", "utan", "vad", "var", "vara", "varför",
+ "varit", "varje", "vars", "vart", "vem", "vi", "vid", "vilka", "vilkas", "vilken", "vilket",
+ "vår", "våra", "vårt", "än", "är", "åt", "över",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/tr.rs b/vendor/elasticlunr-rs/src/lang/tr.rs
new file mode 100644
index 000000000..1aea580fa
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/tr.rs
@@ -0,0 +1,251 @@
+use super::{
+ common::{RustStemmer, StopWordFilter, RegexTrimmer},
+ Language,
+};
+use crate::pipeline::Pipeline;
+use rust_stemmers::Algorithm;
+
+#[derive(Clone)]
+pub struct Turkish {}
+
+impl Turkish {
+ pub fn new() -> Self {
+ Self {}
+ }
+}
+
+impl Language for Turkish {
+ fn name(&self) -> String {
+ "Turkish".into()
+ }
+ fn code(&self) -> String {
+ "tr".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ super::tokenize_whitespace(text)
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-tr", r"\p{Latin}")),
+ Box::new(StopWordFilter::new("stopWordFilter-tr", STOP_WORDS)),
+ Box::new(RustStemmer::new("stemmer-tr", Algorithm::Turkish)),
+ ],
+ }
+ }
+}
+
+const STOP_WORDS: &[&str] = &[
+ "",
+ "acaba",
+ "altmış",
+ "altı",
+ "ama",
+ "ancak",
+ "arada",
+ "aslında",
+ "ayrıca",
+ "bana",
+ "bazı",
+ "belki",
+ "ben",
+ "benden",
+ "beni",
+ "benim",
+ "beri",
+ "beş",
+ "bile",
+ "bin",
+ "bir",
+ "biri",
+ "birkaç",
+ "birkez",
+ "birçok",
+ "birşey",
+ "birşeyi",
+ "biz",
+ "bizden",
+ "bize",
+ "bizi",
+ "bizim",
+ "bu",
+ "buna",
+ "bunda",
+ "bundan",
+ "bunlar",
+ "bunları",
+ "bunların",
+ "bunu",
+ "bunun",
+ "burada",
+ "böyle",
+ "böylece",
+ "da",
+ "daha",
+ "dahi",
+ "de",
+ "defa",
+ "değil",
+ "diye",
+ "diğer",
+ "doksan",
+ "dokuz",
+ "dolayı",
+ "dolayısıyla",
+ "dört",
+ "edecek",
+ "eden",
+ "ederek",
+ "edilecek",
+ "ediliyor",
+ "edilmesi",
+ "ediyor",
+ "elli",
+ "en",
+ "etmesi",
+ "etti",
+ "ettiği",
+ "ettiğini",
+ "eğer",
+ "gibi",
+ "göre",
+ "halen",
+ "hangi",
+ "hatta",
+ "hem",
+ "henüz",
+ "hep",
+ "hepsi",
+ "her",
+ "herhangi",
+ "herkesin",
+ "hiç",
+ "hiçbir",
+ "iki",
+ "ile",
+ "ilgili",
+ "ise",
+ "itibaren",
+ "itibariyle",
+ "için",
+ "işte",
+ "kadar",
+ "karşın",
+ "katrilyon",
+ "kendi",
+ "kendilerine",
+ "kendini",
+ "kendisi",
+ "kendisine",
+ "kendisini",
+ "kez",
+ "ki",
+ "kim",
+ "kimden",
+ "kime",
+ "kimi",
+ "kimse",
+ "kırk",
+ "milyar",
+ "milyon",
+ "mu",
+ "mü",
+ "mı",
+ "nasıl",
+ "ne",
+ "neden",
+ "nedenle",
+ "nerde",
+ "nerede",
+ "nereye",
+ "niye",
+ "niçin",
+ "o",
+ "olan",
+ "olarak",
+ "oldu",
+ "olduklarını",
+ "olduğu",
+ "olduğunu",
+ "olmadı",
+ "olmadığı",
+ "olmak",
+ "olması",
+ "olmayan",
+ "olmaz",
+ "olsa",
+ "olsun",
+ "olup",
+ "olur",
+ "olursa",
+ "oluyor",
+ "on",
+ "ona",
+ "ondan",
+ "onlar",
+ "onlardan",
+ "onları",
+ "onların",
+ "onu",
+ "onun",
+ "otuz",
+ "oysa",
+ "pek",
+ "rağmen",
+ "sadece",
+ "sanki",
+ "sekiz",
+ "seksen",
+ "sen",
+ "senden",
+ "seni",
+ "senin",
+ "siz",
+ "sizden",
+ "sizi",
+ "sizin",
+ "tarafından",
+ "trilyon",
+ "tüm",
+ "var",
+ "vardı",
+ "ve",
+ "veya",
+ "ya",
+ "yani",
+ "yapacak",
+ "yapmak",
+ "yaptı",
+ "yaptıkları",
+ "yaptığı",
+ "yaptığını",
+ "yapılan",
+ "yapılması",
+ "yapıyor",
+ "yedi",
+ "yerine",
+ "yetmiş",
+ "yine",
+ "yirmi",
+ "yoksa",
+ "yüz",
+ "zaten",
+ "çok",
+ "çünkü",
+ "öyle",
+ "üzere",
+ "üç",
+ "şey",
+ "şeyden",
+ "şeyi",
+ "şeyler",
+ "şu",
+ "şuna",
+ "şunda",
+ "şundan",
+ "şunları",
+ "şunu",
+ "şöyle",
+];
diff --git a/vendor/elasticlunr-rs/src/lang/zh.rs b/vendor/elasticlunr-rs/src/lang/zh.rs
new file mode 100644
index 000000000..aa10d758f
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/zh.rs
@@ -0,0 +1,55 @@
+use super::{common::RegexTrimmer, Language};
+use crate::pipeline::{FnWrapper, Pipeline};
+
+#[derive(Clone)]
+pub struct Chinese {
+ jieba: jieba_rs::Jieba,
+}
+
+impl Chinese {
+ pub fn new() -> Self {
+ Self {
+ jieba: jieba_rs::Jieba::new(),
+ }
+ }
+}
+
+impl Language for Chinese {
+ fn name(&self) -> String {
+ "Chinese".into()
+ }
+ fn code(&self) -> String {
+ "zh".into()
+ }
+
+ fn tokenize(&self, text: &str) -> Vec<String> {
+ self.jieba
+ .cut_for_search(text, false)
+ .iter()
+ .map(|s| s.to_string())
+ .collect()
+ }
+
+ fn make_pipeline(&self) -> Pipeline {
+ Pipeline {
+ queue: vec![
+ Box::new(RegexTrimmer::new("trimmer-zh", r"\p{Unified_Ideograph}\p{Latin}")),
+ Box::new(FnWrapper("stopWordFilter-zh".into(), stop_word_filter)),
+ Box::new(FnWrapper("stemmer-zh".into(), stemmer)),
+ ],
+ }
+ }
+}
+
+// TODO: lunr.zh.js has a much larger set of stop words
+fn stop_word_filter(token: String) -> Option<String> {
+ match token.as_str() {
+ "的" | "了" => None,
+ _ => Some(token),
+ }
+}
+
+// lunr.zh.js has an empty stemmer as well
+fn stemmer(token: String) -> Option<String> {
+ Some(token)
+}