//! Word wrapping algorithms. //! //! After a text has been broken into words (or [`Fragment`]s), one //! now has to decide how to break the fragments into lines. The //! simplest algorithm for this is implemented by //! [`wrap_first_fit()`]: it uses no look-ahead and simply adds //! fragments to the line as long as they fit. However, this can lead //! to poor line breaks if a large fragment almost-but-not-quite fits //! on a line. When that happens, the fragment is moved to the next //! line and it will leave behind a large gap. //! //! A more advanced algorithm, implemented by [`wrap_optimal_fit()`], //! will take this into account. The optimal-fit algorithm considers //! all possible line breaks and will attempt to minimize the gaps //! left behind by overly short lines. //! //! While both algorithms run in linear time, the first-fit algorithm //! is about 4 times faster than the optimal-fit algorithm. #[cfg(feature = "smawk")] mod optimal_fit; #[cfg(feature = "smawk")] pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties}; use crate::core::{Fragment, Word}; /// Describes how to wrap words into lines. /// /// The simplest approach is to wrap words one word at a time and /// accept the first way of wrapping which fit /// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is /// enabled, a more complex algorithm is available which will look at /// an entire paragraph at a time in order to find optimal line breaks /// ([`WrapAlgorithm::OptimalFit`]). #[derive(Clone, Copy)] pub enum WrapAlgorithm { /// Wrap words using a fast and simple algorithm. /// /// This algorithm uses no look-ahead when finding line breaks. /// Implemented by [`wrap_first_fit()`], please see that function /// for details and examples. FirstFit, /// Wrap words using an advanced algorithm with look-ahead. /// /// This wrapping algorithm considers the entire paragraph to find /// optimal line breaks. When wrapping text, "penalties" are /// assigned to line breaks based on the gaps left at the end of /// lines. See [`Penalties`] for details. /// /// The underlying wrapping algorithm is implemented by /// [`wrap_optimal_fit()`], please see that function for examples. /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. #[cfg(feature = "smawk")] OptimalFit(Penalties), /// Custom wrapping function. /// /// Use this if you want to implement your own wrapping algorithm. /// The function can freely decide how to turn a slice of /// [`Word`]s into lines. /// /// # Example /// /// ``` /// use textwrap::core::Word; /// use textwrap::{wrap, Options, WrapAlgorithm}; /// /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> { /// let mut lines = Vec::new(); /// let mut step = 1; /// let mut start_idx = 0; /// while start_idx + step <= words.len() { /// lines.push(&words[start_idx .. start_idx+step]); /// start_idx += step; /// step += 1; /// } /// lines /// } /// /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair)); /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options), /// vec!["First,", /// "second, third,", /// "fourth, fifth, sixth"]); /// ``` Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>), } impl PartialEq for WrapAlgorithm { /// Compare two wrap algorithms. /// /// ``` /// use textwrap::WrapAlgorithm; /// /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit); /// #[cfg(feature = "smawk")] { /// assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit()); /// } /// ``` /// /// Note that `WrapAlgorithm::Custom` values never compare equal: /// /// ``` /// use textwrap::WrapAlgorithm; /// /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]), /// WrapAlgorithm::Custom(|words, line_widths| vec![words])); /// ``` fn eq(&self, other: &Self) -> bool { match (self, other) { (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true, #[cfg(feature = "smawk")] (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b, (_, _) => false, } } } impl std::fmt::Debug for WrapAlgorithm { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { WrapAlgorithm::FirstFit => f.write_str("FirstFit"), #[cfg(feature = "smawk")] WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties), WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"), } } } impl WrapAlgorithm { /// Create new wrap algorithm. /// /// The best wrapping algorithm is used by default, i.e., /// [`WrapAlgorithm::OptimalFit`] if available, otherwise /// [`WrapAlgorithm::FirstFit`]. pub const fn new() -> Self { #[cfg(not(feature = "smawk"))] { WrapAlgorithm::FirstFit } #[cfg(feature = "smawk")] { WrapAlgorithm::new_optimal_fit() } } /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This /// works well for monospace text. /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. #[cfg(feature = "smawk")] pub const fn new_optimal_fit() -> Self { WrapAlgorithm::OptimalFit(Penalties::new()) } /// Wrap words according to line widths. /// /// The `line_widths` slice gives the target line width for each /// line (the last slice element is repeated as necessary). This /// can be used to implement hanging indentation. #[inline] pub fn wrap<'a, 'b>( &self, words: &'b [Word<'a>], line_widths: &'b [usize], ) -> Vec<&'b [Word<'a>]> { // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 // = 9_007_199_254_740_992 can be represented without loss by // a f64. Larger line widths will be rounded to the nearest // representable number. let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::>(); match self { WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths), #[cfg(feature = "smawk")] WrapAlgorithm::OptimalFit(penalties) => { // The computation cannot overflow when the line // widths are restricted to usize. wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap() } WrapAlgorithm::Custom(func) => func(words, line_widths), } } } impl Default for WrapAlgorithm { fn default() -> Self { WrapAlgorithm::new() } } /// Wrap abstract fragments into lines with a first-fit algorithm. /// /// The `line_widths` slice gives the target line width for each line /// (the last slice element is repeated as necessary). This can be /// used to implement hanging indentation. /// /// The fragments must already have been split into the desired /// widths, this function will not (and cannot) attempt to split them /// further when arranging them into lines. /// /// # First-Fit Algorithm /// /// This implements a simple “greedy” algorithm: accumulate fragments /// one by one and when a fragment no longer fits, start a new line. /// There is no look-ahead, we simply take first fit of the fragments /// we find. /// /// While fast and predictable, this algorithm can produce poor line /// breaks when a long fragment is moved to a new line, leaving behind /// a large gap: /// /// ``` /// use textwrap::core::Word; /// use textwrap::wrap_algorithms::wrap_first_fit; /// use textwrap::WordSeparator; /// /// // Helper to convert wrapped lines to a Vec. /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec { /// lines.iter().map(|line| { /// line.iter().map(|word| &**word).collect::>().join(" ") /// }).collect::>() /// } /// /// let text = "These few words will unfortunately not wrap nicely."; /// let words = WordSeparator::AsciiSpace.find_words(text).collect::>(); /// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), /// vec!["These few words", /// "will", // <-- short line /// "unfortunately", /// "not wrap", /// "nicely."]); /// /// // We can avoid the short line if we look ahead: /// #[cfg(feature = "smawk")] /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; /// #[cfg(feature = "smawk")] /// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()), /// vec!["These few", /// "words will", /// "unfortunately", /// "not wrap", /// "nicely."]); /// ``` /// /// The [`wrap_optimal_fit()`] function was used above to get better /// line breaks. It uses an advanced algorithm which tries to avoid /// short lines. This function is about 4 times faster than /// [`wrap_optimal_fit()`]. /// /// # Examples /// /// Imagine you're building a house site and you have a number of /// tasks you need to execute. Things like pour foundation, complete /// framing, install plumbing, electric cabling, install insulation. /// /// The construction workers can only work during daytime, so they /// need to pack up everything at night. Because they need to secure /// their tools and move machines back to the garage, this process /// takes much more time than the time it would take them to simply /// switch to another task. /// /// You would like to make a list of tasks to execute every day based /// on your estimates. You can model this with a program like this: /// /// ``` /// use textwrap::core::{Fragment, Word}; /// use textwrap::wrap_algorithms::wrap_first_fit; /// /// #[derive(Debug)] /// struct Task<'a> { /// name: &'a str, /// hours: f64, // Time needed to complete task. /// sweep: f64, // Time needed for a quick sweep after task during the day. /// cleanup: f64, // Time needed for full cleanup if day ends with this task. /// } /// /// impl Fragment for Task<'_> { /// fn width(&self) -> f64 { self.hours } /// fn whitespace_width(&self) -> f64 { self.sweep } /// fn penalty_width(&self) -> f64 { self.cleanup } /// } /// /// // The morning tasks /// let tasks = vec![ /// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 }, /// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, /// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 }, /// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, /// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, /// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, /// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, /// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 }, /// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, /// ]; /// /// // Fill tasks into days, taking `day_length` into account. The /// // output shows the hours worked per day along with the names of /// // the tasks for that day. /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> { /// let mut days = Vec::new(); /// // Assign tasks to days. The assignment is a vector of slices, /// // with a slice per day. /// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); /// for day in assigned_days.iter() { /// let last = day.last().unwrap(); /// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); /// let names = day.iter().map(|t| t.name).collect::>(); /// days.push((work_hours - last.sweep + last.cleanup, names)); /// } /// days /// } /// /// // With a single crew working 8 hours a day: /// assert_eq!( /// assign_days(&tasks, 8.0), /// [ /// (7.0, vec!["Foundation"]), /// (8.0, vec!["Framing", "Plumbing"]), /// (7.0, vec!["Electrical", "Insulation"]), /// (5.0, vec!["Drywall"]), /// (7.0, vec!["Floors", "Countertops"]), /// (4.0, vec!["Bathrooms"]), /// ] /// ); /// /// // With two crews working in shifts, 16 hours a day: /// assert_eq!( /// assign_days(&tasks, 16.0), /// [ /// (14.0, vec!["Foundation", "Framing", "Plumbing"]), /// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]), /// (6.0, vec!["Countertops", "Bathrooms"]), /// ] /// ); /// ``` /// /// Apologies to anyone who actually knows how to build a house and /// knows how long each step takes :-) pub fn wrap_first_fit<'a, T: Fragment>( fragments: &'a [T], line_widths: &[f64], ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0.0); let mut lines = Vec::new(); let mut start = 0; let mut width = 0.0; for (idx, fragment) in fragments.iter().enumerate() { let line_width = line_widths .get(lines.len()) .copied() .unwrap_or(default_line_width); if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { lines.push(&fragments[start..idx]); start = idx; width = 0.0; } width += fragment.width() + fragment.whitespace_width(); } lines.push(&fragments[start..]); lines } #[cfg(test)] mod tests { use super::*; #[derive(Debug, PartialEq)] struct Word(f64); #[rustfmt::skip] impl Fragment for Word { fn width(&self) -> f64 { self.0 } fn whitespace_width(&self) -> f64 { 1.0 } fn penalty_width(&self) -> f64 { 0.0 } } #[test] fn wrap_string_longer_than_f64() { let words = vec![ Word(1e307), Word(2e307), Word(3e307), Word(4e307), Word(5e307), Word(6e307), ]; // Wrap at just under f64::MAX (~19e307). The tiny // whitespace_widths disappear because of loss of precision. assert_eq!( wrap_first_fit(&words, &[15e307]), &[ vec![ Word(1e307), Word(2e307), Word(3e307), Word(4e307), Word(5e307) ], vec![Word(6e307)] ] ); } }