Merging upstream version 126.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-15 03:35:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-15 03:35:49 +0000
commit: d8bbc7858622b6d9c278469aab701ca0b609cddf (patch)
tree: eff41dc61d9f714852212739e6b3738b82a2af87 /third_party/rust/textwrap/src/word_splitters.rs
parent: Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff)
download: firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz
firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip
1 files changed, 314 insertions, 0 deletions
diff --git a/third_party/rust/textwrap/src/word_splitters.rs b/third_party/rust/textwrap/src/word_splitters.rs
new file mode 100644
index 0000000000..e2dc6aa01f
--- /dev/null
+++ b/third_party/rust/textwrap/src/word_splitters.rs
@@ -0,0 +1,314 @@
+//! Word splitting functionality.
+//!
+//! To wrap text into lines, long words sometimes need to be split
+//! across lines. The [`WordSplitter`] enum defines this
+//! functionality.
+
+use crate::core::{display_width, Word};
+
+/// The `WordSplitter` enum describes where words can be split.
+///
+/// If the textwrap crate has been compiled with the `hyphenation`
+/// Cargo feature enabled, you will find a
+/// [`WordSplitter::Hyphenation`] variant. Use this struct for
+/// language-aware hyphenation:
+///
+/// ```
+/// #[cfg(feature = "hyphenation")] {
+///     use hyphenation::{Language, Load, Standard};
+///     use textwrap::{wrap, Options, WordSplitter};
+///
+///     let text = "Oxidation is the loss of electrons.";
+///     let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+///     let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
+///     assert_eq!(wrap(text, &options), vec!["Oxida-",
+///                                           "tion is",
+///                                           "the loss",
+///                                           "of elec-",
+///                                           "trons."]);
+/// }
+/// ```
+///
+/// Please see the documentation for the [hyphenation] crate for more
+/// details.
+///
+/// [hyphenation]: https://docs.rs/hyphenation/
+#[derive(Clone)]
+pub enum WordSplitter {
+    /// Use this as a [`Options.word_splitter`] to avoid any kind of
+    /// hyphenation:
+    ///
+    /// ```
+    /// use textwrap::{wrap, Options, WordSplitter};
+    ///
+    /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
+    /// assert_eq!(wrap("foo bar-baz", &options),
+    ///            vec!["foo", "bar-baz"]);
+    /// ```
+    ///
+    /// [`Options.word_splitter`]: super::Options::word_splitter
+    NoHyphenation,
+
+    /// `HyphenSplitter` is the default `WordSplitter` used by
+    /// [`Options::new`](super::Options::new). It will split words on
+    /// existing hyphens in the word.
+    ///
+    /// It will only use hyphens that are surrounded by alphanumeric
+    /// characters, which prevents a word like `"--foo-bar"` from
+    /// being split into `"--"` and `"foo-bar"`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use textwrap::WordSplitter;
+    ///
+    /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
+    ///            vec![6]);
+    /// ```
+    HyphenSplitter,
+
+    /// Use a custom function as the word splitter.
+    ///
+    /// This variant lets you implement a custom word splitter using
+    /// your own function.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use textwrap::WordSplitter;
+    ///
+    /// fn split_at_underscore(word: &str) -> Vec<usize> {
+    ///     word.match_indices('_').map(|(idx, _)| idx + 1).collect()
+    /// }
+    ///
+    /// let word_splitter = WordSplitter::Custom(split_at_underscore);
+    /// assert_eq!(word_splitter.split_points("a_long_identifier"),
+    ///            vec![2, 7]);
+    /// ```
+    Custom(fn(word: &str) -> Vec<usize>),
+
+    /// A hyphenation dictionary can be used to do language-specific
+    /// hyphenation using patterns from the [hyphenation] crate.
+    ///
+    /// **Note:** Only available when the `hyphenation` Cargo feature is
+    /// enabled.
+    ///
+    /// [hyphenation]: https://docs.rs/hyphenation/
+    #[cfg(feature = "hyphenation")]
+    Hyphenation(hyphenation::Standard),
+}
+
+impl std::fmt::Debug for WordSplitter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
+            WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
+            WordSplitter::Custom(_) => f.write_str("Custom(...)"),
+            #[cfg(feature = "hyphenation")]
+            WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
+        }
+    }
+}
+
+impl PartialEq<WordSplitter> for WordSplitter {
+    fn eq(&self, other: &WordSplitter) -> bool {
+        match (self, other) {
+            (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
+            (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
+            #[cfg(feature = "hyphenation")]
+            (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
+                this_dict.language() == other_dict.language()
+            }
+            (_, _) => false,
+        }
+    }
+}
+
+impl WordSplitter {
+    /// Return all possible indices where `word` can be split.
+    ///
+    /// The indices are in the range `0..word.len()`. They point to
+    /// the index _after_ the split point, i.e., after `-` if
+    /// splitting on hyphens. This way, `word.split_at(idx)` will
+    /// break the word into two well-formed pieces.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use textwrap::WordSplitter;
+    /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
+    /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
+    /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
+    /// ```
+    pub fn split_points(&self, word: &str) -> Vec<usize> {
+        match self {
+            WordSplitter::NoHyphenation => Vec::new(),
+            WordSplitter::HyphenSplitter => {
+                let mut splits = Vec::new();
+
+                for (idx, _) in word.match_indices('-') {
+                    // We only use hyphens that are surrounded by alphanumeric
+                    // characters. This is to avoid splitting on repeated hyphens,
+                    // such as those found in --foo-bar.
+                    let prev = word[..idx].chars().next_back();
+                    let next = word[idx + 1..].chars().next();
+
+                    if prev.filter(|ch| ch.is_alphanumeric()).is_some()
+                        && next.filter(|ch| ch.is_alphanumeric()).is_some()
+                    {
+                        splits.push(idx + 1); // +1 due to width of '-'.
+                    }
+                }
+
+                splits
+            }
+            WordSplitter::Custom(splitter_func) => splitter_func(word),
+            #[cfg(feature = "hyphenation")]
+            WordSplitter::Hyphenation(dictionary) => {
+                use hyphenation::Hyphenator;
+                dictionary.hyphenate(word).breaks
+            }
+        }
+    }
+}
+
+/// Split words into smaller words according to the split points given
+/// by `word_splitter`.
+///
+/// Note that we split all words, regardless of their length. This is
+/// to more cleanly separate the business of splitting (including
+/// automatic hyphenation) from the business of word wrapping.
+pub fn split_words<'a, I>(
+    words: I,
+    word_splitter: &'a WordSplitter,
+) -> impl Iterator<Item = Word<'a>>
+where
+    I: IntoIterator<Item = Word<'a>>,
+{
+    words.into_iter().flat_map(move |word| {
+        let mut prev = 0;
+        let mut split_points = word_splitter.split_points(&word).into_iter();
+        std::iter::from_fn(move || {
+            if let Some(idx) = split_points.next() {
+                let need_hyphen = !word[..idx].ends_with('-');
+                let w = Word {
+                    word: &word.word[prev..idx],
+                    width: display_width(&word[prev..idx]),
+                    whitespace: "",
+                    penalty: if need_hyphen { "-" } else { "" },
+                };
+                prev = idx;
+                return Some(w);
+            }
+
+            if prev < word.word.len() || prev == 0 {
+                let w = Word {
+                    word: &word.word[prev..],
+                    width: display_width(&word[prev..]),
+                    whitespace: word.whitespace,
+                    penalty: word.penalty,
+                };
+                prev = word.word.len() + 1;
+                return Some(w);
+            }
+
+            None
+        })
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Like assert_eq!, but the left expression is an iterator.
+    macro_rules! assert_iter_eq {
+        ($left:expr, $right:expr) => {
+            assert_eq!($left.collect::<Vec<_>>(), $right);
+        };
+    }
+
+    #[test]
+    fn split_words_no_words() {
+        assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
+    }
+
+    #[test]
+    fn split_words_empty_word() {
+        assert_iter_eq!(
+            split_words(vec![Word::from("   ")], &WordSplitter::HyphenSplitter),
+            vec![Word::from("   ")]
+        );
+    }
+
+    #[test]
+    fn split_words_single_word() {
+        assert_iter_eq!(
+            split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
+            vec![Word::from("foobar")]
+        );
+    }
+
+    #[test]
+    fn split_words_hyphen_splitter() {
+        assert_iter_eq!(
+            split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
+            vec![Word::from("foo-"), Word::from("bar")]
+        );
+    }
+
+    #[test]
+    fn split_words_no_hyphenation() {
+        assert_iter_eq!(
+            split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
+            vec![Word::from("foo-bar")]
+        );
+    }
+
+    #[test]
+    fn split_words_adds_penalty() {
+        let fixed_split_point = |_: &str| vec![3];
+
+        assert_iter_eq!(
+            split_words(
+                vec![Word::from("foobar")].into_iter(),
+                &WordSplitter::Custom(fixed_split_point)
+            ),
+            vec![
+                Word {
+                    word: "foo",
+                    width: 3,
+                    whitespace: "",
+                    penalty: "-"
+                },
+                Word {
+                    word: "bar",
+                    width: 3,
+                    whitespace: "",
+                    penalty: ""
+                }
+            ]
+        );
+
+        assert_iter_eq!(
+            split_words(
+                vec![Word::from("fo-bar")].into_iter(),
+                &WordSplitter::Custom(fixed_split_point)
+            ),
+            vec![
+                Word {
+                    word: "fo-",
+                    width: 3,
+                    whitespace: "",
+                    penalty: ""
+                },
+                Word {
+                    word: "bar",
+                    width: 3,
+                    whitespace: "",
+                    penalty: ""
+                }
+            ]
+        );
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-15 03:35:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-15 03:35:49 +0000
commit	d8bbc7858622b6d9c278469aab701ca0b609cddf (patch)
tree	eff41dc61d9f714852212739e6b3738b82a2af87 /third_party/rust/textwrap/src/word_splitters.rs
parent	Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff)
download	firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip