summaryrefslogtreecommitdiffstats
path: root/third_party/rust/textwrap/src/wrap.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/textwrap/src/wrap.rs')
-rw-r--r--third_party/rust/textwrap/src/wrap.rs686
1 files changed, 686 insertions, 0 deletions
diff --git a/third_party/rust/textwrap/src/wrap.rs b/third_party/rust/textwrap/src/wrap.rs
new file mode 100644
index 0000000000..a7f2ccf298
--- /dev/null
+++ b/third_party/rust/textwrap/src/wrap.rs
@@ -0,0 +1,686 @@
+//! Functions for wrapping text.
+
+use std::borrow::Cow;
+
+use crate::core::{break_words, display_width, Word};
+use crate::word_splitters::split_words;
+use crate::Options;
+
+/// Wrap a line of text at a given width.
+///
+/// The result is a vector of lines, each line is of type [`Cow<'_,
+/// str>`](Cow), which means that the line will borrow from the input
+/// `&str` if possible. The lines do not have trailing whitespace,
+/// including a final `'\n'`. Please use [`fill()`](crate::fill()) if
+/// you need a [`String`] instead.
+///
+/// The easiest way to use this function is to pass an integer for
+/// `width_or_options`:
+///
+/// ```
+/// use textwrap::wrap;
+///
+/// let lines = wrap("Memory safety without garbage collection.", 15);
+/// assert_eq!(lines, &[
+/// "Memory safety",
+/// "without garbage",
+/// "collection.",
+/// ]);
+/// ```
+///
+/// If you need to customize the wrapping, you can pass an [`Options`]
+/// instead of an `usize`:
+///
+/// ```
+/// use textwrap::{wrap, Options};
+///
+/// let options = Options::new(15)
+/// .initial_indent("- ")
+/// .subsequent_indent(" ");
+/// let lines = wrap("Memory safety without garbage collection.", &options);
+/// assert_eq!(lines, &[
+/// "- Memory safety",
+/// " without",
+/// " garbage",
+/// " collection.",
+/// ]);
+/// ```
+///
+/// # Optimal-Fit Wrapping
+///
+/// By default, `wrap` will try to ensure an even right margin by
+/// finding breaks which avoid short lines. We call this an
+/// “optimal-fit algorithm” since the line breaks are computed by
+/// considering all possible line breaks. The alternative is a
+/// “first-fit algorithm” which simply accumulates words until they no
+/// longer fit on the line.
+///
+/// As an example, using the first-fit algorithm to wrap the famous
+/// Hamlet quote “To be, or not to be: that is the question” in a
+/// narrow column with room for only 10 characters looks like this:
+///
+/// ```
+/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap};
+/// #
+/// # let lines = wrap("To be, or not to be: that is the question",
+/// # Options::new(10).wrap_algorithm(FirstFit));
+/// # assert_eq!(lines.join("\n") + "\n", "\
+/// To be, or
+/// not to be:
+/// that is
+/// the
+/// question
+/// # ");
+/// ```
+///
+/// Notice how the second to last line is quite narrow because
+/// “question” was too large to fit? The greedy first-fit algorithm
+/// doesn’t look ahead, so it has no other option than to put
+/// “question” onto its own line.
+///
+/// With the optimal-fit wrapping algorithm, the previous lines are
+/// shortened slightly in order to make the word “is” go into the
+/// second last line:
+///
+/// ```
+/// # #[cfg(feature = "smawk")] {
+/// # use textwrap::{Options, WrapAlgorithm, wrap};
+/// #
+/// # let lines = wrap(
+/// # "To be, or not to be: that is the question",
+/// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit())
+/// # );
+/// # assert_eq!(lines.join("\n") + "\n", "\
+/// To be,
+/// or not to
+/// be: that
+/// is the
+/// question
+/// # "); }
+/// ```
+///
+/// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on
+/// the choices.
+///
+/// # Examples
+///
+/// The returned iterator yields lines of type `Cow<'_, str>`. If
+/// possible, the wrapped lines will borrow from the input string. As
+/// an example, a hanging indentation, the first line can borrow from
+/// the input, but the subsequent lines become owned strings:
+///
+/// ```
+/// use std::borrow::Cow::{Borrowed, Owned};
+/// use textwrap::{wrap, Options};
+///
+/// let options = Options::new(15).subsequent_indent("....");
+/// let lines = wrap("Wrapping text all day long.", &options);
+/// let annotated = lines
+/// .iter()
+/// .map(|line| match line {
+/// Borrowed(text) => format!("[Borrowed] {}", text),
+/// Owned(text) => format!("[Owned] {}", text),
+/// })
+/// .collect::<Vec<_>>();
+/// assert_eq!(
+/// annotated,
+/// &[
+/// "[Borrowed] Wrapping text",
+/// "[Owned] ....all day",
+/// "[Owned] ....long.",
+/// ]
+/// );
+/// ```
+///
+/// ## Leading and Trailing Whitespace
+///
+/// As a rule, leading whitespace (indentation) is preserved and
+/// trailing whitespace is discarded.
+///
+/// In more details, when wrapping words into lines, words are found
+/// by splitting the input text on space characters. One or more
+/// spaces (shown here as “␣”) are attached to the end of each word:
+///
+/// ```text
+/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"]
+/// ```
+///
+/// These words are then put into lines. The interword whitespace is
+/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"`
+/// word falls at the end of a line:
+///
+/// ```
+/// use textwrap::wrap;
+///
+/// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]);
+/// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]);
+/// ```
+///
+/// Notice how the trailing whitespace is removed in both case: in the
+/// first example, `"bar␣"` becomes `"bar"` and in the second case
+/// `"Foo␣␣␣"` becomes `"Foo"`.
+///
+/// Leading whitespace is preserved when the following word fits on
+/// the first line. To understand this, consider how words are found
+/// in a text with leading spaces:
+///
+/// ```text
+/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"]
+/// ```
+///
+/// When put into lines, the indentation is preserved if `"foo"` fits
+/// on the first line, otherwise you end up with an empty line:
+///
+/// ```
+/// use textwrap::wrap;
+///
+/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]);
+/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]);
+/// ```
+pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
+where
+ Opt: Into<Options<'a>>,
+{
+ let options: Options = width_or_options.into();
+ let line_ending_str = options.line_ending.as_str();
+
+ let mut lines = Vec::new();
+ for line in text.split(line_ending_str) {
+ wrap_single_line(line, &options, &mut lines);
+ }
+
+ lines
+}
+
+pub(crate) fn wrap_single_line<'a>(
+ line: &'a str,
+ options: &Options<'_>,
+ lines: &mut Vec<Cow<'a, str>>,
+) {
+ let indent = if lines.is_empty() {
+ options.initial_indent
+ } else {
+ options.subsequent_indent
+ };
+ if line.len() < options.width && indent.is_empty() {
+ lines.push(Cow::from(line.trim_end_matches(' ')));
+ } else {
+ wrap_single_line_slow_path(line, options, lines)
+ }
+}
+
+/// Wrap a single line of text.
+///
+/// This is taken when `line` is longer than `options.width`.
+pub(crate) fn wrap_single_line_slow_path<'a>(
+ line: &'a str,
+ options: &Options<'_>,
+ lines: &mut Vec<Cow<'a, str>>,
+) {
+ let initial_width = options
+ .width
+ .saturating_sub(display_width(options.initial_indent));
+ let subsequent_width = options
+ .width
+ .saturating_sub(display_width(options.subsequent_indent));
+ let line_widths = [initial_width, subsequent_width];
+
+ let words = options.word_separator.find_words(line);
+ let split_words = split_words(words, &options.word_splitter);
+ let broken_words = if options.break_words {
+ let mut broken_words = break_words(split_words, line_widths[1]);
+ if !options.initial_indent.is_empty() {
+ // Without this, the first word will always go into the
+ // first line. However, since we break words based on the
+ // _second_ line width, it can be wrong to unconditionally
+ // put the first word onto the first line. An empty
+ // zero-width word fixed this.
+ broken_words.insert(0, Word::from(""));
+ }
+ broken_words
+ } else {
+ split_words.collect::<Vec<_>>()
+ };
+
+ let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
+
+ let mut idx = 0;
+ for words in wrapped_words {
+ let last_word = match words.last() {
+ None => {
+ lines.push(Cow::from(""));
+ continue;
+ }
+ Some(word) => word,
+ };
+
+ // We assume here that all words are contiguous in `line`.
+ // That is, the sum of their lengths should add up to the
+ // length of `line`.
+ let len = words
+ .iter()
+ .map(|word| word.len() + word.whitespace.len())
+ .sum::<usize>()
+ - last_word.whitespace.len();
+
+ // The result is owned if we have indentation, otherwise we
+ // can simply borrow an empty string.
+ let mut result = if lines.is_empty() && !options.initial_indent.is_empty() {
+ Cow::Owned(options.initial_indent.to_owned())
+ } else if !lines.is_empty() && !options.subsequent_indent.is_empty() {
+ Cow::Owned(options.subsequent_indent.to_owned())
+ } else {
+ // We can use an empty string here since string
+ // concatenation for `Cow` preserves a borrowed value when
+ // either side is empty.
+ Cow::from("")
+ };
+
+ result += &line[idx..idx + len];
+
+ if !last_word.penalty.is_empty() {
+ result.to_mut().push_str(last_word.penalty);
+ }
+
+ lines.push(result);
+
+ // Advance by the length of `result`, plus the length of
+ // `last_word.whitespace` -- even if we had a penalty, we need
+ // to skip over the whitespace.
+ idx += len + last_word.whitespace.len();
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::{WordSeparator, WordSplitter, WrapAlgorithm};
+
+ #[cfg(feature = "hyphenation")]
+ use hyphenation::{Language, Load, Standard};
+
+ #[test]
+ fn no_wrap() {
+ assert_eq!(wrap("foo", 10), vec!["foo"]);
+ }
+
+ #[test]
+ fn wrap_simple() {
+ assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
+ }
+
+ #[test]
+ fn to_be_or_not() {
+ assert_eq!(
+ wrap(
+ "To be, or not to be, that is the question.",
+ Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit)
+ ),
+ vec!["To be, or", "not to be,", "that is", "the", "question."]
+ );
+ }
+
+ #[test]
+ fn multiple_words_on_first_line() {
+ assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
+ }
+
+ #[test]
+ fn long_word() {
+ assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
+ }
+
+ #[test]
+ fn long_words() {
+ assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
+ }
+
+ #[test]
+ fn max_width() {
+ assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]);
+
+ let text = "Hello there! This is some English text. \
+ It should not be wrapped given the extents below.";
+ assert_eq!(wrap(text, usize::MAX), vec![text]);
+ }
+
+ #[test]
+ fn leading_whitespace() {
+ assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
+ }
+
+ #[test]
+ fn leading_whitespace_empty_first_line() {
+ // If there is no space for the first word, the first line
+ // will be empty. This is because the string is split into
+ // words like [" ", "foobar ", "baz"], which puts "foobar " on
+ // the second line. We never output trailing whitespace
+ assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]);
+ }
+
+ #[test]
+ fn trailing_whitespace() {
+ // Whitespace is only significant inside a line. After a line
+ // gets too long and is broken, the first word starts in
+ // column zero and is not indented.
+ assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]);
+ }
+
+ #[test]
+ fn issue_99() {
+ // We did not reset the in_whitespace flag correctly and did
+ // not handle single-character words after a line break.
+ assert_eq!(
+ wrap("aaabbbccc x yyyzzzwww", 9),
+ vec!["aaabbbccc", "x", "yyyzzzwww"]
+ );
+ }
+
+ #[test]
+ fn issue_129() {
+ // The dash is an em-dash which takes up four bytes. We used
+ // to panic since we tried to index into the character.
+ let options = Options::new(1).word_separator(WordSeparator::AsciiSpace);
+ assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]);
+ }
+
+ #[test]
+ fn wide_character_handling() {
+ assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
+ assert_eq!(
+ wrap(
+ "Hello, World!",
+ Options::new(15).word_separator(WordSeparator::AsciiSpace)
+ ),
+ vec!["Hello,", "World!"]
+ );
+
+ // Wide characters are allowed to break if the
+ // unicode-linebreak feature is enabled.
+ #[cfg(feature = "unicode-linebreak")]
+ assert_eq!(
+ wrap(
+ "Hello, World!",
+ Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
+ ),
+ vec!["Hello, W", "orld!"]
+ );
+ }
+
+ #[test]
+ fn indent_empty_line() {
+ // Previously, indentation was not applied to empty lines.
+ // However, this is somewhat inconsistent and undesirable if
+ // the indentation is something like a border ("| ") which you
+ // want to apply to all lines, empty or not.
+ let options = Options::new(10).initial_indent("!!!");
+ assert_eq!(wrap("", &options), vec!["!!!"]);
+ }
+
+ #[test]
+ fn indent_single_line() {
+ let options = Options::new(10).initial_indent(">>>"); // No trailing space
+ assert_eq!(wrap("foo", &options), vec![">>>foo"]);
+ }
+
+ #[test]
+ fn indent_first_emoji() {
+ let options = Options::new(10).initial_indent("👉👉");
+ assert_eq!(
+ wrap("x x x x x x x x x x x x x", &options),
+ vec!["👉👉x x x", "x x x x x", "x x x x x"]
+ );
+ }
+
+ #[test]
+ fn indent_multiple_lines() {
+ let options = Options::new(6).initial_indent("* ").subsequent_indent(" ");
+ assert_eq!(
+ wrap("foo bar baz", &options),
+ vec!["* foo", " bar", " baz"]
+ );
+ }
+
+ #[test]
+ fn only_initial_indent_multiple_lines() {
+ let options = Options::new(10).initial_indent(" ");
+ assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]);
+ }
+
+ #[test]
+ fn only_subsequent_indent_multiple_lines() {
+ let options = Options::new(10).subsequent_indent(" ");
+ assert_eq!(
+ wrap("foo\nbar\nbaz", &options),
+ vec!["foo", " bar", " baz"]
+ );
+ }
+
+ #[test]
+ fn indent_break_words() {
+ let options = Options::new(5).initial_indent("* ").subsequent_indent(" ");
+ assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]);
+ }
+
+ #[test]
+ fn initial_indent_break_words() {
+ // This is a corner-case showing how the long word is broken
+ // according to the width of the subsequent lines. The first
+ // fragment of the word no longer fits on the first line,
+ // which ends up being pure indentation.
+ let options = Options::new(5).initial_indent("-->");
+ assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]);
+ }
+
+ #[test]
+ fn hyphens() {
+ assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
+ }
+
+ #[test]
+ fn trailing_hyphen() {
+ let options = Options::new(5).break_words(false);
+ assert_eq!(wrap("foobar-", &options), vec!["foobar-"]);
+ }
+
+ #[test]
+ fn multiple_hyphens() {
+ assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
+ }
+
+ #[test]
+ fn hyphens_flag() {
+ let options = Options::new(5).break_words(false);
+ assert_eq!(
+ wrap("The --foo-bar flag.", &options),
+ vec!["The", "--foo-", "bar", "flag."]
+ );
+ }
+
+ #[test]
+ fn repeated_hyphens() {
+ let options = Options::new(4).break_words(false);
+ assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]);
+ }
+
+ #[test]
+ fn hyphens_alphanumeric() {
+ assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
+ }
+
+ #[test]
+ fn hyphens_non_alphanumeric() {
+ let options = Options::new(5).break_words(false);
+ assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]);
+ }
+
+ #[test]
+ fn multiple_splits() {
+ assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
+ }
+
+ #[test]
+ fn forced_split() {
+ let options = Options::new(5).break_words(false);
+ assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]);
+ }
+
+ #[test]
+ fn multiple_unbroken_words_issue_193() {
+ let options = Options::new(3).break_words(false);
+ assert_eq!(
+ wrap("small large tiny", &options),
+ vec!["small", "large", "tiny"]
+ );
+ assert_eq!(
+ wrap("small large tiny", &options),
+ vec!["small", "large", "tiny"]
+ );
+ }
+
+ #[test]
+ fn very_narrow_lines_issue_193() {
+ let options = Options::new(1).break_words(false);
+ assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
+ assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
+ }
+
+ #[test]
+ fn simple_hyphens() {
+ let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter);
+ assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
+ }
+
+ #[test]
+ fn no_hyphenation() {
+ let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
+ assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn auto_hyphenation_double_hyphenation() {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let options = Options::new(10);
+ assert_eq!(
+ wrap("Internationalization", &options),
+ vec!["Internatio", "nalization"]
+ );
+
+ let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
+ assert_eq!(
+ wrap("Internationalization", &options),
+ vec!["Interna-", "tionaliza-", "tion"]
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn auto_hyphenation_issue_158() {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let options = Options::new(10);
+ assert_eq!(
+ wrap("participation is the key to success", &options),
+ vec!["participat", "ion is", "the key to", "success"]
+ );
+
+ let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
+ assert_eq!(
+ wrap("participation is the key to success", &options),
+ vec!["partici-", "pation is", "the key to", "success"]
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn split_len_hyphenation() {
+ // Test that hyphenation takes the width of the whitespace
+ // into account.
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary));
+ assert_eq!(
+ wrap("garbage collection", &options),
+ vec!["garbage col-", "lection"]
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn borrowed_lines() {
+ // Lines that end with an extra hyphen are owned, the final
+ // line is borrowed.
+ use std::borrow::Cow::{Borrowed, Owned};
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
+ let lines = wrap("Internationalization", &options);
+ assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]);
+ if let Borrowed(s) = lines[0] {
+ assert!(false, "should not have been borrowed: {:?}", s);
+ }
+ if let Borrowed(s) = lines[1] {
+ assert!(false, "should not have been borrowed: {:?}", s);
+ }
+ if let Owned(ref s) = lines[2] {
+ assert!(false, "should not have been owned: {:?}", s);
+ }
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn auto_hyphenation_with_hyphen() {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let options = Options::new(8).break_words(false);
+ assert_eq!(
+ wrap("over-caffinated", &options),
+ vec!["over-", "caffinated"]
+ );
+
+ let options = options.word_splitter(WordSplitter::Hyphenation(dictionary));
+ assert_eq!(
+ wrap("over-caffinated", &options),
+ vec!["over-", "caffi-", "nated"]
+ );
+ }
+
+ #[test]
+ fn break_words() {
+ assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
+ }
+
+ #[test]
+ fn break_words_wide_characters() {
+ // Even the poor man's version of `ch_width` counts these
+ // characters as wide.
+ let options = Options::new(5).word_separator(WordSeparator::AsciiSpace);
+ assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]);
+ }
+
+ #[test]
+ fn break_words_zero_width() {
+ assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
+ }
+
+ #[test]
+ fn break_long_first_word() {
+ assert_eq!(wrap("testx y", 4), vec!["test", "x y"]);
+ }
+
+ #[test]
+ fn wrap_preserves_line_breaks_trims_whitespace() {
+ assert_eq!(wrap(" ", 80), vec![""]);
+ assert_eq!(wrap(" \n ", 80), vec!["", ""]);
+ assert_eq!(wrap(" \n \n \n ", 80), vec!["", "", "", ""]);
+ }
+
+ #[test]
+ fn wrap_colored_text() {
+ // The words are much longer than 6 bytes, but they remain
+ // intact after filling the text.
+ let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m";
+ let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m";
+ assert_eq!(
+ wrap(&format!("{} {}", green_hello, blue_world), 6),
+ vec![green_hello, blue_world],
+ );
+ }
+}