//! Functions for wrapping text. use std::borrow::Cow; use crate::core::{break_words, display_width, Word}; use crate::word_splitters::split_words; use crate::Options; /// Wrap a line of text at a given width. /// /// The result is a vector of lines, each line is of type [`Cow<'_, /// str>`](Cow), which means that the line will borrow from the input /// `&str` if possible. The lines do not have trailing whitespace, /// including a final `'\n'`. Please use [`fill()`](crate::fill()) if /// you need a [`String`] instead. /// /// The easiest way to use this function is to pass an integer for /// `width_or_options`: /// /// ``` /// use textwrap::wrap; /// /// let lines = wrap("Memory safety without garbage collection.", 15); /// assert_eq!(lines, &[ /// "Memory safety", /// "without garbage", /// "collection.", /// ]); /// ``` /// /// If you need to customize the wrapping, you can pass an [`Options`] /// instead of an `usize`: /// /// ``` /// use textwrap::{wrap, Options}; /// /// let options = Options::new(15) /// .initial_indent("- ") /// .subsequent_indent(" "); /// let lines = wrap("Memory safety without garbage collection.", &options); /// assert_eq!(lines, &[ /// "- Memory safety", /// " without", /// " garbage", /// " collection.", /// ]); /// ``` /// /// # Optimal-Fit Wrapping /// /// By default, `wrap` will try to ensure an even right margin by /// finding breaks which avoid short lines. We call this an /// “optimal-fit algorithm” since the line breaks are computed by /// considering all possible line breaks. The alternative is a /// “first-fit algorithm” which simply accumulates words until they no /// longer fit on the line. /// /// As an example, using the first-fit algorithm to wrap the famous /// Hamlet quote “To be, or not to be: that is the question” in a /// narrow column with room for only 10 characters looks like this: /// /// ``` /// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap}; /// # /// # let lines = wrap("To be, or not to be: that is the question", /// # Options::new(10).wrap_algorithm(FirstFit)); /// # assert_eq!(lines.join("\n") + "\n", "\ /// To be, or /// not to be: /// that is /// the /// question /// # "); /// ``` /// /// Notice how the second to last line is quite narrow because /// “question” was too large to fit? The greedy first-fit algorithm /// doesn’t look ahead, so it has no other option than to put /// “question” onto its own line. /// /// With the optimal-fit wrapping algorithm, the previous lines are /// shortened slightly in order to make the word “is” go into the /// second last line: /// /// ``` /// # #[cfg(feature = "smawk")] { /// # use textwrap::{Options, WrapAlgorithm, wrap}; /// # /// # let lines = wrap( /// # "To be, or not to be: that is the question", /// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit()) /// # ); /// # assert_eq!(lines.join("\n") + "\n", "\ /// To be, /// or not to /// be: that /// is the /// question /// # "); } /// ``` /// /// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on /// the choices. /// /// # Examples /// /// The returned iterator yields lines of type `Cow<'_, str>`. If /// possible, the wrapped lines will borrow from the input string. As /// an example, a hanging indentation, the first line can borrow from /// the input, but the subsequent lines become owned strings: /// /// ``` /// use std::borrow::Cow::{Borrowed, Owned}; /// use textwrap::{wrap, Options}; /// /// let options = Options::new(15).subsequent_indent("...."); /// let lines = wrap("Wrapping text all day long.", &options); /// let annotated = lines /// .iter() /// .map(|line| match line { /// Borrowed(text) => format!("[Borrowed] {}", text), /// Owned(text) => format!("[Owned] {}", text), /// }) /// .collect::>(); /// assert_eq!( /// annotated, /// &[ /// "[Borrowed] Wrapping text", /// "[Owned] ....all day", /// "[Owned] ....long.", /// ] /// ); /// ``` /// /// ## Leading and Trailing Whitespace /// /// As a rule, leading whitespace (indentation) is preserved and /// trailing whitespace is discarded. /// /// In more details, when wrapping words into lines, words are found /// by splitting the input text on space characters. One or more /// spaces (shown here as “␣”) are attached to the end of each word: /// /// ```text /// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"] /// ``` /// /// These words are then put into lines. The interword whitespace is /// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"` /// word falls at the end of a line: /// /// ``` /// use textwrap::wrap; /// /// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]); /// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]); /// ``` /// /// Notice how the trailing whitespace is removed in both case: in the /// first example, `"bar␣"` becomes `"bar"` and in the second case /// `"Foo␣␣␣"` becomes `"Foo"`. /// /// Leading whitespace is preserved when the following word fits on /// the first line. To understand this, consider how words are found /// in a text with leading spaces: /// /// ```text /// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"] /// ``` /// /// When put into lines, the indentation is preserved if `"foo"` fits /// on the first line, otherwise you end up with an empty line: /// /// ``` /// use textwrap::wrap; /// /// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]); /// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]); /// ``` pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec> where Opt: Into>, { let options: Options = width_or_options.into(); let line_ending_str = options.line_ending.as_str(); let mut lines = Vec::new(); for line in text.split(line_ending_str) { wrap_single_line(line, &options, &mut lines); } lines } pub(crate) fn wrap_single_line<'a>( line: &'a str, options: &Options<'_>, lines: &mut Vec>, ) { let indent = if lines.is_empty() { options.initial_indent } else { options.subsequent_indent }; if line.len() < options.width && indent.is_empty() { lines.push(Cow::from(line.trim_end_matches(' '))); } else { wrap_single_line_slow_path(line, options, lines) } } /// Wrap a single line of text. /// /// This is taken when `line` is longer than `options.width`. pub(crate) fn wrap_single_line_slow_path<'a>( line: &'a str, options: &Options<'_>, lines: &mut Vec>, ) { let initial_width = options .width .saturating_sub(display_width(options.initial_indent)); let subsequent_width = options .width .saturating_sub(display_width(options.subsequent_indent)); let line_widths = [initial_width, subsequent_width]; let words = options.word_separator.find_words(line); let split_words = split_words(words, &options.word_splitter); let broken_words = if options.break_words { let mut broken_words = break_words(split_words, line_widths[1]); if !options.initial_indent.is_empty() { // Without this, the first word will always go into the // first line. However, since we break words based on the // _second_ line width, it can be wrong to unconditionally // put the first word onto the first line. An empty // zero-width word fixed this. broken_words.insert(0, Word::from("")); } broken_words } else { split_words.collect::>() }; let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); let mut idx = 0; for words in wrapped_words { let last_word = match words.last() { None => { lines.push(Cow::from("")); continue; } Some(word) => word, }; // We assume here that all words are contiguous in `line`. // That is, the sum of their lengths should add up to the // length of `line`. let len = words .iter() .map(|word| word.len() + word.whitespace.len()) .sum::() - last_word.whitespace.len(); // The result is owned if we have indentation, otherwise we // can simply borrow an empty string. let mut result = if lines.is_empty() && !options.initial_indent.is_empty() { Cow::Owned(options.initial_indent.to_owned()) } else if !lines.is_empty() && !options.subsequent_indent.is_empty() { Cow::Owned(options.subsequent_indent.to_owned()) } else { // We can use an empty string here since string // concatenation for `Cow` preserves a borrowed value when // either side is empty. Cow::from("") }; result += &line[idx..idx + len]; if !last_word.penalty.is_empty() { result.to_mut().push_str(last_word.penalty); } lines.push(result); // Advance by the length of `result`, plus the length of // `last_word.whitespace` -- even if we had a penalty, we need // to skip over the whitespace. idx += len + last_word.whitespace.len(); } } #[cfg(test)] mod tests { use super::*; use crate::{WordSeparator, WordSplitter, WrapAlgorithm}; #[cfg(feature = "hyphenation")] use hyphenation::{Language, Load, Standard}; #[test] fn no_wrap() { assert_eq!(wrap("foo", 10), vec!["foo"]); } #[test] fn wrap_simple() { assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); } #[test] fn to_be_or_not() { assert_eq!( wrap( "To be, or not to be, that is the question.", Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit) ), vec!["To be, or", "not to be,", "that is", "the", "question."] ); } #[test] fn multiple_words_on_first_line() { assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); } #[test] fn long_word() { assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); } #[test] fn long_words() { assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); } #[test] fn max_width() { assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]); let text = "Hello there! This is some English text. \ It should not be wrapped given the extents below."; assert_eq!(wrap(text, usize::MAX), vec![text]); } #[test] fn leading_whitespace() { assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); } #[test] fn leading_whitespace_empty_first_line() { // If there is no space for the first word, the first line // will be empty. This is because the string is split into // words like [" ", "foobar ", "baz"], which puts "foobar " on // the second line. We never output trailing whitespace assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]); } #[test] fn trailing_whitespace() { // Whitespace is only significant inside a line. After a line // gets too long and is broken, the first word starts in // column zero and is not indented. assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]); } #[test] fn issue_99() { // We did not reset the in_whitespace flag correctly and did // not handle single-character words after a line break. assert_eq!( wrap("aaabbbccc x yyyzzzwww", 9), vec!["aaabbbccc", "x", "yyyzzzwww"] ); } #[test] fn issue_129() { // The dash is an em-dash which takes up four bytes. We used // to panic since we tried to index into the character. let options = Options::new(1).word_separator(WordSeparator::AsciiSpace); assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]); } #[test] fn wide_character_handling() { assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); assert_eq!( wrap( "Hello, World!", Options::new(15).word_separator(WordSeparator::AsciiSpace) ), vec!["Hello,", "World!"] ); // Wide characters are allowed to break if the // unicode-linebreak feature is enabled. #[cfg(feature = "unicode-linebreak")] assert_eq!( wrap( "Hello, World!", Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties), ), vec!["Hello, W", "orld!"] ); } #[test] fn indent_empty_line() { // Previously, indentation was not applied to empty lines. // However, this is somewhat inconsistent and undesirable if // the indentation is something like a border ("| ") which you // want to apply to all lines, empty or not. let options = Options::new(10).initial_indent("!!!"); assert_eq!(wrap("", &options), vec!["!!!"]); } #[test] fn indent_single_line() { let options = Options::new(10).initial_indent(">>>"); // No trailing space assert_eq!(wrap("foo", &options), vec![">>>foo"]); } #[test] fn indent_first_emoji() { let options = Options::new(10).initial_indent("👉👉"); assert_eq!( wrap("x x x x x x x x x x x x x", &options), vec!["👉👉x x x", "x x x x x", "x x x x x"] ); } #[test] fn indent_multiple_lines() { let options = Options::new(6).initial_indent("* ").subsequent_indent(" "); assert_eq!( wrap("foo bar baz", &options), vec!["* foo", " bar", " baz"] ); } #[test] fn only_initial_indent_multiple_lines() { let options = Options::new(10).initial_indent(" "); assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]); } #[test] fn only_subsequent_indent_multiple_lines() { let options = Options::new(10).subsequent_indent(" "); assert_eq!( wrap("foo\nbar\nbaz", &options), vec!["foo", " bar", " baz"] ); } #[test] fn indent_break_words() { let options = Options::new(5).initial_indent("* ").subsequent_indent(" "); assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); } #[test] fn initial_indent_break_words() { // This is a corner-case showing how the long word is broken // according to the width of the subsequent lines. The first // fragment of the word no longer fits on the first line, // which ends up being pure indentation. let options = Options::new(5).initial_indent("-->"); assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]); } #[test] fn hyphens() { assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); } #[test] fn trailing_hyphen() { let options = Options::new(5).break_words(false); assert_eq!(wrap("foobar-", &options), vec!["foobar-"]); } #[test] fn multiple_hyphens() { assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); } #[test] fn hyphens_flag() { let options = Options::new(5).break_words(false); assert_eq!( wrap("The --foo-bar flag.", &options), vec!["The", "--foo-", "bar", "flag."] ); } #[test] fn repeated_hyphens() { let options = Options::new(4).break_words(false); assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]); } #[test] fn hyphens_alphanumeric() { assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); } #[test] fn hyphens_non_alphanumeric() { let options = Options::new(5).break_words(false); assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]); } #[test] fn multiple_splits() { assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); } #[test] fn forced_split() { let options = Options::new(5).break_words(false); assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]); } #[test] fn multiple_unbroken_words_issue_193() { let options = Options::new(3).break_words(false); assert_eq!( wrap("small large tiny", &options), vec!["small", "large", "tiny"] ); assert_eq!( wrap("small large tiny", &options), vec!["small", "large", "tiny"] ); } #[test] fn very_narrow_lines_issue_193() { let options = Options::new(1).break_words(false); assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); } #[test] fn simple_hyphens() { let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter); assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]); } #[test] fn no_hyphenation() { let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); } #[test] #[cfg(feature = "hyphenation")] fn auto_hyphenation_double_hyphenation() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(10); assert_eq!( wrap("Internationalization", &options), vec!["Internatio", "nalization"] ); let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); assert_eq!( wrap("Internationalization", &options), vec!["Interna-", "tionaliza-", "tion"] ); } #[test] #[cfg(feature = "hyphenation")] fn auto_hyphenation_issue_158() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(10); assert_eq!( wrap("participation is the key to success", &options), vec!["participat", "ion is", "the key to", "success"] ); let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); assert_eq!( wrap("participation is the key to success", &options), vec!["partici-", "pation is", "the key to", "success"] ); } #[test] #[cfg(feature = "hyphenation")] fn split_len_hyphenation() { // Test that hyphenation takes the width of the whitespace // into account. let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary)); assert_eq!( wrap("garbage collection", &options), vec!["garbage col-", "lection"] ); } #[test] #[cfg(feature = "hyphenation")] fn borrowed_lines() { // Lines that end with an extra hyphen are owned, the final // line is borrowed. use std::borrow::Cow::{Borrowed, Owned}; let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); let lines = wrap("Internationalization", &options); assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]); if let Borrowed(s) = lines[0] { assert!(false, "should not have been borrowed: {:?}", s); } if let Borrowed(s) = lines[1] { assert!(false, "should not have been borrowed: {:?}", s); } if let Owned(ref s) = lines[2] { assert!(false, "should not have been owned: {:?}", s); } } #[test] #[cfg(feature = "hyphenation")] fn auto_hyphenation_with_hyphen() { let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); let options = Options::new(8).break_words(false); assert_eq!( wrap("over-caffinated", &options), vec!["over-", "caffinated"] ); let options = options.word_splitter(WordSplitter::Hyphenation(dictionary)); assert_eq!( wrap("over-caffinated", &options), vec!["over-", "caffi-", "nated"] ); } #[test] fn break_words() { assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); } #[test] fn break_words_wide_characters() { // Even the poor man's version of `ch_width` counts these // characters as wide. let options = Options::new(5).word_separator(WordSeparator::AsciiSpace); assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]); } #[test] fn break_words_zero_width() { assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); } #[test] fn break_long_first_word() { assert_eq!(wrap("testx y", 4), vec!["test", "x y"]); } #[test] fn wrap_preserves_line_breaks_trims_whitespace() { assert_eq!(wrap(" ", 80), vec![""]); assert_eq!(wrap(" \n ", 80), vec!["", ""]); assert_eq!(wrap(" \n \n \n ", 80), vec!["", "", "", ""]); } #[test] fn wrap_colored_text() { // The words are much longer than 6 bytes, but they remain // intact after filling the text. let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m"; let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m"; assert_eq!( wrap(&format!("{} {}", green_hello, blue_world), 6), vec![green_hello, blue_world], ); } }