diff options
Diffstat (limited to 'third_party/rust/textwrap/src/wrap_algorithms.rs')
-rw-r--r-- | third_party/rust/textwrap/src/wrap_algorithms.rs | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/third_party/rust/textwrap/src/wrap_algorithms.rs b/third_party/rust/textwrap/src/wrap_algorithms.rs new file mode 100644 index 0000000000..5ca49c3352 --- /dev/null +++ b/third_party/rust/textwrap/src/wrap_algorithms.rs @@ -0,0 +1,381 @@ +//! Word wrapping algorithms. +//! +//! After a text has been broken into words (or [`Fragment`]s), one +//! now has to decide how to break the fragments into lines. The +//! simplest algorithm for this is implemented by [`wrap_first_fit`]: +//! it uses no look-ahead and simply adds fragments to the line as +//! long as they fit. However, this can lead to poor line breaks if a +//! large fragment almost-but-not-quite fits on a line. When that +//! happens, the fragment is moved to the next line and it will leave +//! behind a large gap. A more advanced algorithm, implemented by +//! [`wrap_optimal_fit`], will take this into account. The optimal-fit +//! algorithm considers all possible line breaks and will attempt to +//! minimize the gaps left behind by overly short lines. +//! +//! While both algorithms run in linear time, the first-fit algorithm +//! is about 4 times faster than the optimal-fit algorithm. + +#[cfg(feature = "smawk")] +mod optimal_fit; +#[cfg(feature = "smawk")] +pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties}; + +use crate::core::{Fragment, Word}; + +/// Describes how to wrap words into lines. +/// +/// The simplest approach is to wrap words one word at a time and +/// accept the first way of wrapping which fit +/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is +/// enabled, a more complex algorithm is available which will look at +/// an entire paragraph at a time in order to find optimal line breaks +/// ([`WrapAlgorithm::OptimalFit`]). +#[derive(Clone, Copy)] +pub enum WrapAlgorithm { + /// Wrap words using a fast and simple algorithm. + /// + /// This algorithm uses no look-ahead when finding line breaks. + /// Implemented by [`wrap_first_fit`], please see that function for + /// details and examples. + FirstFit, + + /// Wrap words using an advanced algorithm with look-ahead. + /// + /// This wrapping algorithm considers the entire paragraph to find + /// optimal line breaks. When wrapping text, "penalties" are + /// assigned to line breaks based on the gaps left at the end of + /// lines. See [`Penalties`] for details. + /// + /// The underlying wrapping algorithm is implemented by + /// [`wrap_optimal_fit`], please see that function for examples. + /// + /// **Note:** Only available when the `smawk` Cargo feature is + /// enabled. + #[cfg(feature = "smawk")] + OptimalFit(Penalties), + + /// Custom wrapping function. + /// + /// Use this if you want to implement your own wrapping algorithm. + /// The function can freely decide how to turn a slice of + /// [`Word`]s into lines. + /// + /// # Example + /// + /// ``` + /// use textwrap::core::Word; + /// use textwrap::{wrap, Options, WrapAlgorithm}; + /// + /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> { + /// let mut lines = Vec::new(); + /// let mut step = 1; + /// let mut start_idx = 0; + /// while start_idx + step <= words.len() { + /// lines.push(&words[start_idx .. start_idx+step]); + /// start_idx += step; + /// step += 1; + /// } + /// lines + /// } + /// + /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair)); + /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options), + /// vec!["First,", + /// "second, third,", + /// "fourth, fifth, sixth"]); + /// ``` + Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>), +} + +impl std::fmt::Debug for WrapAlgorithm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + WrapAlgorithm::FirstFit => f.write_str("FirstFit"), + #[cfg(feature = "smawk")] + WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties), + WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"), + } + } +} + +impl WrapAlgorithm { + /// Create new wrap algorithm. + /// + /// The best wrapping algorithm is used by default, i.e., + /// [`WrapAlgorithm::OptimalFit`] if available, otherwise + /// [`WrapAlgorithm::FirstFit`]. + pub const fn new() -> Self { + #[cfg(not(feature = "smawk"))] + { + WrapAlgorithm::FirstFit + } + + #[cfg(feature = "smawk")] + { + WrapAlgorithm::new_optimal_fit() + } + } + + /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This + /// works well for monospace text. + /// + /// **Note:** Only available when the `smawk` Cargo feature is + /// enabled. + #[cfg(feature = "smawk")] + pub const fn new_optimal_fit() -> Self { + WrapAlgorithm::OptimalFit(Penalties::new()) + } + + /// Wrap words according to line widths. + /// + /// The `line_widths` slice gives the target line width for each + /// line (the last slice element is repeated as necessary). This + /// can be used to implement hanging indentation. + #[inline] + pub fn wrap<'a, 'b>( + &self, + words: &'b [Word<'a>], + line_widths: &'b [usize], + ) -> Vec<&'b [Word<'a>]> { + // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 + // = 9_007_199_254_740_992 can be represented without loss by + // a f64. Larger line widths will be rounded to the nearest + // representable number. + let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>(); + + match self { + WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths), + + #[cfg(feature = "smawk")] + WrapAlgorithm::OptimalFit(penalties) => { + // The computation cannnot overflow when the line + // widths are restricted to usize. + wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap() + } + + WrapAlgorithm::Custom(func) => func(words, line_widths), + } + } +} + +impl Default for WrapAlgorithm { + fn default() -> Self { + WrapAlgorithm::new() + } +} + +/// Wrap abstract fragments into lines with a first-fit algorithm. +/// +/// The `line_widths` slice gives the target line width for each line +/// (the last slice element is repeated as necessary). This can be +/// used to implement hanging indentation. +/// +/// The fragments must already have been split into the desired +/// widths, this function will not (and cannot) attempt to split them +/// further when arranging them into lines. +/// +/// # First-Fit Algorithm +/// +/// This implements a simple “greedy” algorithm: accumulate fragments +/// one by one and when a fragment no longer fits, start a new line. +/// There is no look-ahead, we simply take first fit of the fragments +/// we find. +/// +/// While fast and predictable, this algorithm can produce poor line +/// breaks when a long fragment is moved to a new line, leaving behind +/// a large gap: +/// +/// ``` +/// use textwrap::core::Word; +/// use textwrap::wrap_algorithms::wrap_first_fit; +/// use textwrap::WordSeparator; +/// +/// // Helper to convert wrapped lines to a Vec<String>. +/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> { +/// lines.iter().map(|line| { +/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ") +/// }).collect::<Vec<_>>() +/// } +/// +/// let text = "These few words will unfortunately not wrap nicely."; +/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>(); +/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), +/// vec!["These few words", +/// "will", // <-- short line +/// "unfortunately", +/// "not wrap", +/// "nicely."]); +/// +/// // We can avoid the short line if we look ahead: +/// #[cfg(feature = "smawk")] +/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; +/// #[cfg(feature = "smawk")] +/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()), +/// vec!["These few", +/// "words will", +/// "unfortunately", +/// "not wrap", +/// "nicely."]); +/// ``` +/// +/// The [`wrap_optimal_fit`] function was used above to get better +/// line breaks. It uses an advanced algorithm which tries to avoid +/// short lines. This function is about 4 times faster than +/// [`wrap_optimal_fit`]. +/// +/// # Examples +/// +/// Imagine you're building a house site and you have a number of +/// tasks you need to execute. Things like pour foundation, complete +/// framing, install plumbing, electric cabling, install insulation. +/// +/// The construction workers can only work during daytime, so they +/// need to pack up everything at night. Because they need to secure +/// their tools and move machines back to the garage, this process +/// takes much more time than the time it would take them to simply +/// switch to another task. +/// +/// You would like to make a list of tasks to execute every day based +/// on your estimates. You can model this with a program like this: +/// +/// ``` +/// use textwrap::core::{Fragment, Word}; +/// use textwrap::wrap_algorithms::wrap_first_fit; +/// +/// #[derive(Debug)] +/// struct Task<'a> { +/// name: &'a str, +/// hours: f64, // Time needed to complete task. +/// sweep: f64, // Time needed for a quick sweep after task during the day. +/// cleanup: f64, // Time needed for full cleanup if day ends with this task. +/// } +/// +/// impl Fragment for Task<'_> { +/// fn width(&self) -> f64 { self.hours } +/// fn whitespace_width(&self) -> f64 { self.sweep } +/// fn penalty_width(&self) -> f64 { self.cleanup } +/// } +/// +/// // The morning tasks +/// let tasks = vec![ +/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 }, +/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 }, +/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// ]; +/// +/// // Fill tasks into days, taking `day_length` into account. The +/// // output shows the hours worked per day along with the names of +/// // the tasks for that day. +/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> { +/// let mut days = Vec::new(); +/// // Assign tasks to days. The assignment is a vector of slices, +/// // with a slice per day. +/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); +/// for day in assigned_days.iter() { +/// let last = day.last().unwrap(); +/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); +/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>(); +/// days.push((work_hours - last.sweep + last.cleanup, names)); +/// } +/// days +/// } +/// +/// // With a single crew working 8 hours a day: +/// assert_eq!( +/// assign_days(&tasks, 8.0), +/// [ +/// (7.0, vec!["Foundation"]), +/// (8.0, vec!["Framing", "Plumbing"]), +/// (7.0, vec!["Electrical", "Insulation"]), +/// (5.0, vec!["Drywall"]), +/// (7.0, vec!["Floors", "Countertops"]), +/// (4.0, vec!["Bathrooms"]), +/// ] +/// ); +/// +/// // With two crews working in shifts, 16 hours a day: +/// assert_eq!( +/// assign_days(&tasks, 16.0), +/// [ +/// (14.0, vec!["Foundation", "Framing", "Plumbing"]), +/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]), +/// (6.0, vec!["Countertops", "Bathrooms"]), +/// ] +/// ); +/// ``` +/// +/// Apologies to anyone who actually knows how to build a house and +/// knows how long each step takes :-) +pub fn wrap_first_fit<'a, 'b, T: Fragment>( + fragments: &'a [T], + line_widths: &'b [f64], +) -> Vec<&'a [T]> { + // The final line width is used for all remaining lines. + let default_line_width = line_widths.last().copied().unwrap_or(0.0); + let mut lines = Vec::new(); + let mut start = 0; + let mut width = 0.0; + + for (idx, fragment) in fragments.iter().enumerate() { + let line_width = line_widths + .get(lines.len()) + .copied() + .unwrap_or(default_line_width); + if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { + lines.push(&fragments[start..idx]); + start = idx; + width = 0.0; + } + width += fragment.width() + fragment.whitespace_width(); + } + lines.push(&fragments[start..]); + lines +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq)] + struct Word(f64); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> f64 { self.0 } + fn whitespace_width(&self) -> f64 { 1.0 } + fn penalty_width(&self) -> f64 { 0.0 } + } + + #[test] + fn wrap_string_longer_than_f64() { + let words = vec![ + Word(1e307), + Word(2e307), + Word(3e307), + Word(4e307), + Word(5e307), + Word(6e307), + ]; + // Wrap at just under f64::MAX (~19e307). The tiny + // whitespace_widths disappear because of loss of precision. + assert_eq!( + wrap_first_fit(&words, &[15e307]), + &[ + vec![ + Word(1e307), + Word(2e307), + Word(3e307), + Word(4e307), + Word(5e307) + ], + vec![Word(6e307)] + ] + ); + } +} |