diff options
Diffstat (limited to 'third_party/rust/textwrap/src/wrap.rs')
-rw-r--r-- | third_party/rust/textwrap/src/wrap.rs | 686 |
1 files changed, 686 insertions, 0 deletions
diff --git a/third_party/rust/textwrap/src/wrap.rs b/third_party/rust/textwrap/src/wrap.rs new file mode 100644 index 0000000000..a7f2ccf298 --- /dev/null +++ b/third_party/rust/textwrap/src/wrap.rs @@ -0,0 +1,686 @@ +//! Functions for wrapping text. + +use std::borrow::Cow; + +use crate::core::{break_words, display_width, Word}; +use crate::word_splitters::split_words; +use crate::Options; + +/// Wrap a line of text at a given width. +/// +/// The result is a vector of lines, each line is of type [`Cow<'_, +/// str>`](Cow), which means that the line will borrow from the input +/// `&str` if possible. The lines do not have trailing whitespace, +/// including a final `'\n'`. Please use [`fill()`](crate::fill()) if +/// you need a [`String`] instead. +/// +/// The easiest way to use this function is to pass an integer for +/// `width_or_options`: +/// +/// ``` +/// use textwrap::wrap; +/// +/// let lines = wrap("Memory safety without garbage collection.", 15); +/// assert_eq!(lines, &[ +/// "Memory safety", +/// "without garbage", +/// "collection.", +/// ]); +/// ``` +/// +/// If you need to customize the wrapping, you can pass an [`Options`] +/// instead of an `usize`: +/// +/// ``` +/// use textwrap::{wrap, Options}; +/// +/// let options = Options::new(15) +/// .initial_indent("- ") +/// .subsequent_indent(" "); +/// let lines = wrap("Memory safety without garbage collection.", &options); +/// assert_eq!(lines, &[ +/// "- Memory safety", +/// " without", +/// " garbage", +/// " collection.", +/// ]); +/// ``` +/// +/// # Optimal-Fit Wrapping +/// +/// By default, `wrap` will try to ensure an even right margin by +/// finding breaks which avoid short lines. We call this an +/// “optimal-fit algorithm” since the line breaks are computed by +/// considering all possible line breaks. The alternative is a +/// “first-fit algorithm” which simply accumulates words until they no +/// longer fit on the line. +/// +/// As an example, using the first-fit algorithm to wrap the famous +/// Hamlet quote “To be, or not to be: that is the question” in a +/// narrow column with room for only 10 characters looks like this: +/// +/// ``` +/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap}; +/// # +/// # let lines = wrap("To be, or not to be: that is the question", +/// # Options::new(10).wrap_algorithm(FirstFit)); +/// # assert_eq!(lines.join("\n") + "\n", "\ +/// To be, or +/// not to be: +/// that is +/// the +/// question +/// # "); +/// ``` +/// +/// Notice how the second to last line is quite narrow because +/// “question” was too large to fit? The greedy first-fit algorithm +/// doesn’t look ahead, so it has no other option than to put +/// “question” onto its own line. +/// +/// With the optimal-fit wrapping algorithm, the previous lines are +/// shortened slightly in order to make the word “is” go into the +/// second last line: +/// +/// ``` +/// # #[cfg(feature = "smawk")] { +/// # use textwrap::{Options, WrapAlgorithm, wrap}; +/// # +/// # let lines = wrap( +/// # "To be, or not to be: that is the question", +/// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit()) +/// # ); +/// # assert_eq!(lines.join("\n") + "\n", "\ +/// To be, +/// or not to +/// be: that +/// is the +/// question +/// # "); } +/// ``` +/// +/// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on +/// the choices. +/// +/// # Examples +/// +/// The returned iterator yields lines of type `Cow<'_, str>`. If +/// possible, the wrapped lines will borrow from the input string. As +/// an example, a hanging indentation, the first line can borrow from +/// the input, but the subsequent lines become owned strings: +/// +/// ``` +/// use std::borrow::Cow::{Borrowed, Owned}; +/// use textwrap::{wrap, Options}; +/// +/// let options = Options::new(15).subsequent_indent("...."); +/// let lines = wrap("Wrapping text all day long.", &options); +/// let annotated = lines +/// .iter() +/// .map(|line| match line { +/// Borrowed(text) => format!("[Borrowed] {}", text), +/// Owned(text) => format!("[Owned] {}", text), +/// }) +/// .collect::<Vec<_>>(); +/// assert_eq!( +/// annotated, +/// &[ +/// "[Borrowed] Wrapping text", +/// "[Owned] ....all day", +/// "[Owned] ....long.", +/// ] +/// ); +/// ``` +/// +/// ## Leading and Trailing Whitespace +/// +/// As a rule, leading whitespace (indentation) is preserved and +/// trailing whitespace is discarded. +/// +/// In more details, when wrapping words into lines, words are found +/// by splitting the input text on space characters. One or more +/// spaces (shown here as “␣”) are attached to the end of each word: +/// +/// ```text +/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"] +/// ``` +/// +/// These words are then put into lines. The interword whitespace is +/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"` +/// word falls at the end of a line: +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]); +/// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]); +/// ``` +/// +/// Notice how the trailing whitespace is removed in both case: in the +/// first example, `"bar␣"` becomes `"bar"` and in the second case +/// `"Foo␣␣␣"` becomes `"Foo"`. +/// +/// Leading whitespace is preserved when the following word fits on +/// the first line. To understand this, consider how words are found +/// in a text with leading spaces: +/// +/// ```text +/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"] +/// ``` +/// +/// When put into lines, the indentation is preserved if `"foo"` fits +/// on the first line, otherwise you end up with an empty line: +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]); +/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]); +/// ``` +pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>> +where + Opt: Into<Options<'a>>, +{ + let options: Options = width_or_options.into(); + let line_ending_str = options.line_ending.as_str(); + + let mut lines = Vec::new(); + for line in text.split(line_ending_str) { + wrap_single_line(line, &options, &mut lines); + } + + lines +} + +pub(crate) fn wrap_single_line<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { + let indent = if lines.is_empty() { + options.initial_indent + } else { + options.subsequent_indent + }; + if line.len() < options.width && indent.is_empty() { + lines.push(Cow::from(line.trim_end_matches(' '))); + } else { + wrap_single_line_slow_path(line, options, lines) + } +} + +/// Wrap a single line of text. +/// +/// This is taken when `line` is longer than `options.width`. +pub(crate) fn wrap_single_line_slow_path<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { + let initial_width = options + .width + .saturating_sub(display_width(options.initial_indent)); + let subsequent_width = options + .width + .saturating_sub(display_width(options.subsequent_indent)); + let line_widths = [initial_width, subsequent_width]; + + let words = options.word_separator.find_words(line); + let split_words = split_words(words, &options.word_splitter); + let broken_words = if options.break_words { + let mut broken_words = break_words(split_words, line_widths[1]); + if !options.initial_indent.is_empty() { + // Without this, the first word will always go into the + // first line. However, since we break words based on the + // _second_ line width, it can be wrong to unconditionally + // put the first word onto the first line. An empty + // zero-width word fixed this. + broken_words.insert(0, Word::from("")); + } + broken_words + } else { + split_words.collect::<Vec<_>>() + }; + + let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); + + let mut idx = 0; + for words in wrapped_words { + let last_word = match words.last() { + None => { + lines.push(Cow::from("")); + continue; + } + Some(word) => word, + }; + + // We assume here that all words are contiguous in `line`. + // That is, the sum of their lengths should add up to the + // length of `line`. + let len = words + .iter() + .map(|word| word.len() + word.whitespace.len()) + .sum::<usize>() + - last_word.whitespace.len(); + + // The result is owned if we have indentation, otherwise we + // can simply borrow an empty string. + let mut result = if lines.is_empty() && !options.initial_indent.is_empty() { + Cow::Owned(options.initial_indent.to_owned()) + } else if !lines.is_empty() && !options.subsequent_indent.is_empty() { + Cow::Owned(options.subsequent_indent.to_owned()) + } else { + // We can use an empty string here since string + // concatenation for `Cow` preserves a borrowed value when + // either side is empty. + Cow::from("") + }; + + result += &line[idx..idx + len]; + + if !last_word.penalty.is_empty() { + result.to_mut().push_str(last_word.penalty); + } + + lines.push(result); + + // Advance by the length of `result`, plus the length of + // `last_word.whitespace` -- even if we had a penalty, we need + // to skip over the whitespace. + idx += len + last_word.whitespace.len(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{WordSeparator, WordSplitter, WrapAlgorithm}; + + #[cfg(feature = "hyphenation")] + use hyphenation::{Language, Load, Standard}; + + #[test] + fn no_wrap() { + assert_eq!(wrap("foo", 10), vec!["foo"]); + } + + #[test] + fn wrap_simple() { + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn to_be_or_not() { + assert_eq!( + wrap( + "To be, or not to be, that is the question.", + Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit) + ), + vec!["To be, or", "not to be,", "that is", "the", "question."] + ); + } + + #[test] + fn multiple_words_on_first_line() { + assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); + } + + #[test] + fn long_word() { + assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); + } + + #[test] + fn long_words() { + assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn max_width() { + assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]); + + let text = "Hello there! This is some English text. \ + It should not be wrapped given the extents below."; + assert_eq!(wrap(text, usize::MAX), vec![text]); + } + + #[test] + fn leading_whitespace() { + assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); + } + + #[test] + fn leading_whitespace_empty_first_line() { + // If there is no space for the first word, the first line + // will be empty. This is because the string is split into + // words like [" ", "foobar ", "baz"], which puts "foobar " on + // the second line. We never output trailing whitespace + assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]); + } + + #[test] + fn trailing_whitespace() { + // Whitespace is only significant inside a line. After a line + // gets too long and is broken, the first word starts in + // column zero and is not indented. + assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn issue_99() { + // We did not reset the in_whitespace flag correctly and did + // not handle single-character words after a line break. + assert_eq!( + wrap("aaabbbccc x yyyzzzwww", 9), + vec!["aaabbbccc", "x", "yyyzzzwww"] + ); + } + + #[test] + fn issue_129() { + // The dash is an em-dash which takes up four bytes. We used + // to panic since we tried to index into the character. + let options = Options::new(1).word_separator(WordSeparator::AsciiSpace); + assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]); + } + + #[test] + fn wide_character_handling() { + assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); + assert_eq!( + wrap( + "Hello, World!", + Options::new(15).word_separator(WordSeparator::AsciiSpace) + ), + vec!["Hello,", "World!"] + ); + + // Wide characters are allowed to break if the + // unicode-linebreak feature is enabled. + #[cfg(feature = "unicode-linebreak")] + assert_eq!( + wrap( + "Hello, World!", + Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties), + ), + vec!["Hello, W", "orld!"] + ); + } + + #[test] + fn indent_empty_line() { + // Previously, indentation was not applied to empty lines. + // However, this is somewhat inconsistent and undesirable if + // the indentation is something like a border ("| ") which you + // want to apply to all lines, empty or not. + let options = Options::new(10).initial_indent("!!!"); + assert_eq!(wrap("", &options), vec!["!!!"]); + } + + #[test] + fn indent_single_line() { + let options = Options::new(10).initial_indent(">>>"); // No trailing space + assert_eq!(wrap("foo", &options), vec![">>>foo"]); + } + + #[test] + fn indent_first_emoji() { + let options = Options::new(10).initial_indent("👉👉"); + assert_eq!( + wrap("x x x x x x x x x x x x x", &options), + vec!["👉👉x x x", "x x x x x", "x x x x x"] + ); + } + + #[test] + fn indent_multiple_lines() { + let options = Options::new(6).initial_indent("* ").subsequent_indent(" "); + assert_eq!( + wrap("foo bar baz", &options), + vec!["* foo", " bar", " baz"] + ); + } + + #[test] + fn only_initial_indent_multiple_lines() { + let options = Options::new(10).initial_indent(" "); + assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]); + } + + #[test] + fn only_subsequent_indent_multiple_lines() { + let options = Options::new(10).subsequent_indent(" "); + assert_eq!( + wrap("foo\nbar\nbaz", &options), + vec!["foo", " bar", " baz"] + ); + } + + #[test] + fn indent_break_words() { + let options = Options::new(5).initial_indent("* ").subsequent_indent(" "); + assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn initial_indent_break_words() { + // This is a corner-case showing how the long word is broken + // according to the width of the subsequent lines. The first + // fragment of the word no longer fits on the first line, + // which ends up being pure indentation. + let options = Options::new(5).initial_indent("-->"); + assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]); + } + + #[test] + fn hyphens() { + assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); + } + + #[test] + fn trailing_hyphen() { + let options = Options::new(5).break_words(false); + assert_eq!(wrap("foobar-", &options), vec!["foobar-"]); + } + + #[test] + fn multiple_hyphens() { + assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); + } + + #[test] + fn hyphens_flag() { + let options = Options::new(5).break_words(false); + assert_eq!( + wrap("The --foo-bar flag.", &options), + vec!["The", "--foo-", "bar", "flag."] + ); + } + + #[test] + fn repeated_hyphens() { + let options = Options::new(4).break_words(false); + assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]); + } + + #[test] + fn hyphens_alphanumeric() { + assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); + } + + #[test] + fn hyphens_non_alphanumeric() { + let options = Options::new(5).break_words(false); + assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]); + } + + #[test] + fn multiple_splits() { + assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); + } + + #[test] + fn forced_split() { + let options = Options::new(5).break_words(false); + assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]); + } + + #[test] + fn multiple_unbroken_words_issue_193() { + let options = Options::new(3).break_words(false); + assert_eq!( + wrap("small large tiny", &options), + vec!["small", "large", "tiny"] + ); + assert_eq!( + wrap("small large tiny", &options), + vec!["small", "large", "tiny"] + ); + } + + #[test] + fn very_narrow_lines_issue_193() { + let options = Options::new(1).break_words(false); + assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); + assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); + } + + #[test] + fn simple_hyphens() { + let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter); + assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]); + } + + #[test] + fn no_hyphenation() { + let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); + assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_double_hyphenation() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(10); + assert_eq!( + wrap("Internationalization", &options), + vec!["Internatio", "nalization"] + ); + + let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("Internationalization", &options), + vec!["Interna-", "tionaliza-", "tion"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_issue_158() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(10); + assert_eq!( + wrap("participation is the key to success", &options), + vec!["participat", "ion is", "the key to", "success"] + ); + + let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("participation is the key to success", &options), + vec!["partici-", "pation is", "the key to", "success"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn split_len_hyphenation() { + // Test that hyphenation takes the width of the whitespace + // into account. + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("garbage collection", &options), + vec!["garbage col-", "lection"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn borrowed_lines() { + // Lines that end with an extra hyphen are owned, the final + // line is borrowed. + use std::borrow::Cow::{Borrowed, Owned}; + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); + let lines = wrap("Internationalization", &options); + assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]); + if let Borrowed(s) = lines[0] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Borrowed(s) = lines[1] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Owned(ref s) = lines[2] { + assert!(false, "should not have been owned: {:?}", s); + } + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_with_hyphen() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(8).break_words(false); + assert_eq!( + wrap("over-caffinated", &options), + vec!["over-", "caffinated"] + ); + + let options = options.word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("over-caffinated", &options), + vec!["over-", "caffi-", "nated"] + ); + } + + #[test] + fn break_words() { + assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); + } + + #[test] + fn break_words_wide_characters() { + // Even the poor man's version of `ch_width` counts these + // characters as wide. + let options = Options::new(5).word_separator(WordSeparator::AsciiSpace); + assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]); + } + + #[test] + fn break_words_zero_width() { + assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn break_long_first_word() { + assert_eq!(wrap("testx y", 4), vec!["test", "x y"]); + } + + #[test] + fn wrap_preserves_line_breaks_trims_whitespace() { + assert_eq!(wrap(" ", 80), vec![""]); + assert_eq!(wrap(" \n ", 80), vec!["", ""]); + assert_eq!(wrap(" \n \n \n ", 80), vec!["", "", "", ""]); + } + + #[test] + fn wrap_colored_text() { + // The words are much longer than 6 bytes, but they remain + // intact after filling the text. + let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m"; + let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m"; + assert_eq!( + wrap(&format!("{} {}", green_hello, blue_world), 6), + vec![green_hello, blue_world], + ); + } +} |