summaryrefslogtreecommitdiffstats
path: root/vendor/elasticlunr-rs/src/lang/common.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/elasticlunr-rs/src/lang/common.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/elasticlunr-rs/src/lang/common.rs')
-rw-r--r--vendor/elasticlunr-rs/src/lang/common.rs97
1 files changed, 97 insertions, 0 deletions
diff --git a/vendor/elasticlunr-rs/src/lang/common.rs b/vendor/elasticlunr-rs/src/lang/common.rs
new file mode 100644
index 000000000..5616f0138
--- /dev/null
+++ b/vendor/elasticlunr-rs/src/lang/common.rs
@@ -0,0 +1,97 @@
+use crate::pipeline::PipelineFn;
+use regex::Regex;
+use std::collections::HashSet;
+
+#[derive(Clone)]
+pub struct StopWordFilter {
+ name: String,
+ stop_words: HashSet<String>,
+}
+
+impl StopWordFilter {
+ pub fn new(name: &str, stop_words: &[&str]) -> Self {
+ Self {
+ name: name.into(),
+ stop_words: stop_words.iter().map(|s| s.to_string()).collect(),
+ }
+ }
+}
+
+impl PipelineFn for StopWordFilter {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ if self.stop_words.contains(&token) {
+ None
+ } else {
+ Some(token)
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct RegexTrimmer {
+ name: String,
+ trimmer: Regex,
+}
+
+impl RegexTrimmer {
+ pub fn new(name: &str, word_chars: &str) -> Self {
+ let name = name.into();
+ let trimmer = Regex::new(&format!("^[^{0}]+|[^{0}]+$", word_chars)).unwrap();
+ Self { name, trimmer }
+ }
+}
+
+impl PipelineFn for RegexTrimmer {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ let result = self.trimmer.replace_all(&token, "");
+ if result.is_empty() {
+ None
+ } else if result == token {
+ Some(token)
+ } else {
+ Some(result.into())
+ }
+ }
+}
+
+#[cfg(feature = "rust-stemmers")]
+pub struct RustStemmer {
+ name: String,
+ stemmer: rust_stemmers::Stemmer,
+}
+
+#[cfg(feature = "rust-stemmers")]
+impl RustStemmer {
+ pub fn new(name: &str, algo: rust_stemmers::Algorithm) -> Self {
+ Self {
+ name: name.into(),
+ stemmer: rust_stemmers::Stemmer::create(algo),
+ }
+ }
+}
+
+#[cfg(feature = "rust-stemmers")]
+impl PipelineFn for RustStemmer {
+ fn name(&self) -> String {
+ self.name.clone()
+ }
+
+ fn filter(&self, token: String) -> Option<String> {
+ let result = self.stemmer.stem(&token);
+ if result.is_empty() {
+ None
+ } else if result == token {
+ Some(token)
+ } else {
+ Some(result.into())
+ }
+ }
+}