diff options
Diffstat (limited to 'third_party/rust/textwrap')
24 files changed, 6349 insertions, 0 deletions
diff --git a/third_party/rust/textwrap/.cargo-checksum.json b/third_party/rust/textwrap/.cargo-checksum.json new file mode 100644 index 0000000000..0948683828 --- /dev/null +++ b/third_party/rust/textwrap/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"2b49672ca15da27844abb6643660faa3a517b1b731c7c5023a55de2e570f35fb","Cargo.lock":"c60631b9ccb2798984507f9e074ce15d9b6e59f34d4dc3418ef40c222ac55725","Cargo.toml":"bccd38e75da62b89a7bb2d741926688f1822ecd5165a703ffaadf9177a34e919","LICENSE":"ce93600c49fbb3e14df32efe752264644f6a2f8e08a735ba981725799e5309ef","README.md":"5dd8128a4e9057aeb6133a073d30a819230243907e717349101b41a11ec23234","rustfmt.toml":"02637ad90caa19885b25b1ce8230657b3703402775d9db83687a3f55de567509","src/columns.rs":"73432251f95ac0b84d5e971989ebc5f867d8b8ca82d5e3fc67fe3a66216fbc38","src/core.rs":"e2cc6b1e5978df0db9b6d0425e7d0ebf65dd188aff90df800f1f2dda7b1c53f2","src/fill.rs":"1fe773dad2d0bb67a7739b3931c1ee3269d677b71a0716dcdb5b01fe2539d7c2","src/fuzzing.rs":"0a77010a555a244ac5e907754b2104912299815009922cfdc0f6b48d92135295","src/indentation.rs":"f41ee8be41e01620c7d88b76f81a01ce6a619939505eaf3fcfe6c8021fae022b","src/lib.rs":"d5d39085faa4527bf6c16a91c5a44b9b894e3f3a2606763bceac22038528c28c","src/line_ending.rs":"bf416f683ab952d4df75d5dc3c199e7ae7740db2c5982ac1a20c3f4b186ded76","src/options.rs":"0d3aec6ab238f3aa14aa57e736384ec208cd3013373941c76d66c0125ca0630f","src/refill.rs":"33ce98ef31c4791893fc2136edd8f8d95cdd38fa54daa59aaf078b359c43d913","src/termwidth.rs":"2e7854e822c435341bc4d467d13614d417df4f2f530cea3c5e49e3b44e754943","src/word_separators.rs":"d3b2b5faf224bf414bf9da48be02eaffb41aec3a91674bedab02ad5748344143","src/word_splitters.rs":"8de2b92eff6d752e321f219136b45b9812267b5be7ace57602a3bb9d3b5cf332","src/wrap.rs":"52c48e2e5155100e4067363e56b180785684bca3109c95c3425ef8051738ff0e","src/wrap_algorithms.rs":"c99498f2e58634f707545ba73c3a99025086d1afb8c12aeceff2ced2887bb8ae","src/wrap_algorithms/optimal_fit.rs":"a9ce8bad61d4fa81df9e292a557fbf5303df78391d63610ec512b9b06f9193b1","tests/indent.rs":"51f977db11632a32fafecf86af88413d51238fe6efcf18ec52fac89133714278","tests/version-numbers.rs":"9e964f58dbdf051fc6fe0d6542ab312d3e95f26c3fd14bce84449bb625e45761"},"package":"23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"}
\ No newline at end of file diff --git a/third_party/rust/textwrap/CHANGELOG.md b/third_party/rust/textwrap/CHANGELOG.md new file mode 100644 index 0000000000..362c0c0b9d --- /dev/null +++ b/third_party/rust/textwrap/CHANGELOG.md @@ -0,0 +1,616 @@ +# Changelog + +This file lists the most important changes made in each release of +`textwrap`. + +## Version 0.16.1 (2024-02-17) + +This release fixes `display_width` to ignore inline-hyperlinks. The minimum +supported version of Rust is now documented to be 1.56. + +* [#526](https://github.com/mgeisler/textwrap/pull/526): Ignore ANSI hyperlinks + in `display_width`: calculations. +* [#521](https://github.com/mgeisler/textwrap/pull/521): Add `Options::width` + setter method. +* [#520](https://github.com/mgeisler/textwrap/pull/520): Clarify that + `WordSeparator` is an enum rather than a trait. +* [#518](https://github.com/mgeisler/textwrap/pull/518): Test with latest stable + and nightly Rust, but check that we can build with Rust 1.56. + +## Version 0.16.0 (2022-10-23) + +This release marks `Options` as `non_exhaustive` and extends it to +make line endings configurable, it adds new fast paths to `fill` and +`wrap`, and it fixes crashes in `unfill` and `refill`. + +* [#480](https://github.com/mgeisler/textwrap/pull/480): Mark + `Options` as `non_exhaustive`. This will allow us to extend the + struct in the future without breaking backwards compatibility. +* [#478](https://github.com/mgeisler/textwrap/pull/478): Add fast + paths to `fill` and `wrap`. This makes the functions 10-25 times + faster when the no wrapping is needed. +* [#468](https://github.com/mgeisler/textwrap/pull/468): Fix `refill` + to add back correct line ending. +* [#467](https://github.com/mgeisler/textwrap/pull/467): Fix crashes + in `unfill` and `refill`. +* [#458](https://github.com/mgeisler/textwrap/pull/458): Test with + Rust 1.56 (first compiler release with support for Rust 2021). +* [#454](https://github.com/mgeisler/textwrap/pull/454): Make line + endings configurable. +* [#448](https://github.com/mgeisler/textwrap/pull/448): Migrate to + the Rust 2021 edition. + +## Version 0.15.2 (2022-10-24) + +This release is identical to 0.15.0 and is only there to give people a +way to install crates which depend on the yanked 0.15.1 release. See +[#484](https://github.com/mgeisler/textwrap/issues/484) for details. + +## Version 0.15.1 (2022-09-15) + +This release was yanked since it accidentally broke backwards +compatibility with 0.15.0. + +## Version 0.15.0 (2022-02-27) + +This is a major feature release with two main changes: + +* [#421](https://github.com/mgeisler/textwrap/pull/421): Use `f64` + instead of `usize` for fragment widths. + + This fixes problems with overflows in the internal computations of + `wrap_optimal_fit` when fragments (words) or line lengths had + extreme values, such as `usize::MAX`. + +* [#438](https://github.com/mgeisler/textwrap/pull/438): Simplify + `Options` by removing generic type parameters. + + This change removes the new generic parameters introduced in version + 0.14, as well as the original `WrapSplitter` parameter which has + been present since very early versions. + + The result is a simplification of function and struct signatures + across the board. So what used to be + + ```rust + let options: Options< + wrap_algorithms::FirstFit, + word_separators::AsciiSpace, + word_splitters::HyphenSplitter, + > = Options::new(80); + ``` + + if types are fully written out, is now simply + + ```rust + let options: Options<'_> = Options::new(80); + ``` + + The anonymous lifetime represent the lifetime of the + `initial_indent` and `subsequent_indent` strings. The change is + nearly performance neutral (a 1-2% regression). + +Smaller improvements and changes: + +* [#404](https://github.com/mgeisler/textwrap/pull/404): Make + documentation for short last-line penalty more precise. +* [#405](https://github.com/mgeisler/textwrap/pull/405): Cleanup and + simplify `Options` docstring. +* [#411](https://github.com/mgeisler/textwrap/pull/411): Default to + `OptimalFit` in interactive example. +* [#415](https://github.com/mgeisler/textwrap/pull/415): Add demo + program to help compute binary sizes. +* [#423](https://github.com/mgeisler/textwrap/pull/423): Add fuzz + tests with fully arbitrary fragments. +* [#424](https://github.com/mgeisler/textwrap/pull/424): Change + `wrap_optimal_fit` penalties to non-negative numbers. +* [#430](https://github.com/mgeisler/textwrap/pull/430): Add + `debug-words` example. +* [#432](https://github.com/mgeisler/textwrap/pull/432): Use precise + dependency versions in Cargo.toml. + +## Version 0.14.2 (2021-06-27) + +The 0.14.1 release included more changes than intended and has been +yanked. The change intended for 0.14.1 is now included in 0.14.2. + +## Version 0.14.1 (2021-06-26) + +This release fixes a panic reported by @Makoto, thanks! + +* [#391](https://github.com/mgeisler/textwrap/pull/391): Fix panic in + `find_words` due to string access outside of a character boundary. + +## Version 0.14.0 (2021-06-05) + +This is a major feature release which makes Textwrap more configurable +and flexible. The high-level API of `textwrap::wrap` and +`textwrap::fill` remains unchanged, but low-level structs have moved +around. + +The biggest change is the introduction of new generic type parameters +to the `Options` struct. These parameters lets you statically +configure the wrapping algorithm, the word separator, and the word +splitter. If you previously spelled out the full type for `Options`, +you now need to take the extra type parameters into account. This +means that + +```rust +let options: Options<HyphenSplitter> = Options::new(80); +``` + +changes to + +```rust +let options: Options< + wrap_algorithms::FirstFit, + word_separators::AsciiSpace, + word_splitters::HyphenSplitter, +> = Options::new(80); +``` + +This is quite a mouthful, so we suggest using type inference where +possible. You won’t see any chance if you call `wrap` directly with a +width or with an `Options` value constructed on the fly. Please open +an issue if this causes problems for you! + +### New `WordSeparator` Trait + +* [#332](https://github.com/mgeisler/textwrap/pull/332): Add + `WordSeparator` trait to allow customizing how words are found in a + line of text. Until now, Textwrap would always assume that words are + separated by ASCII space characters. You can now customize this as + needed. + +* [#313](https://github.com/mgeisler/textwrap/pull/313): Add support + for using the Unicode line breaking algorithm to find words. This is + done by adding a second implementation of the new `WordSeparator` + trait. The implementation uses the unicode-linebreak crate, which is + a new optional dependency. + + With this, Textwrap can be used with East-Asian languages such as + Chinese or Japanese where there are no spaces between words. + Breaking a long sequence of emojis is another example where line + breaks might be wanted even if there are no whitespace to be found. + Feedback would be appreciated for this feature. + + +### Indent + +* [#353](https://github.com/mgeisler/textwrap/pull/353): Trim trailing + whitespace from `prefix` in `indent`. + + Before, empty lines would get no prefix added. Now, empty lines have + a trimmed prefix added. This little trick makes `indent` much more + useful since you can now safely indent with `"# "` without creating + trailing whitespace in the output due to the trailing whitespace in + your prefix. + +* [#354](https://github.com/mgeisler/textwrap/pull/354): Make `indent` + about 20% faster by preallocating the output string. + + +### Documentation + +* [#308](https://github.com/mgeisler/textwrap/pull/308): Document + handling of leading and trailing whitespace when wrapping text. + +### WebAssembly Demo + +* [#310](https://github.com/mgeisler/textwrap/pull/310): Thanks to + WebAssembly, you can now try out Textwrap directly in your browser. + Please try it out: https://mgeisler.github.io/textwrap/. + +### New Generic Parameters + +* [#331](https://github.com/mgeisler/textwrap/pull/331): Remove outer + boxing from `Options`. + +* [#357](https://github.com/mgeisler/textwrap/pull/357): Replace + `core::WrapAlgorithm` enum with a `wrap_algorithms::WrapAlgorithm` + trait. This allows for arbitrary wrapping algorithms to be plugged + into the library. + +* [#358](https://github.com/mgeisler/textwrap/pull/358): Switch + wrapping functions to use a slice for `line_widths`. + +* [#368](https://github.com/mgeisler/textwrap/pull/368): Move + `WordSeparator` and `WordSplitter` traits to separate modules. + Before, Textwrap had several top-level structs such as + `NoHyphenation` and `HyphenSplitter`. These implementations of + `WordSplitter` now lives in a dedicated `word_splitters` module. + Similarly, we have a new `word_separators` module for + implementations of `WordSeparator`. + +* [#369](https://github.com/mgeisler/textwrap/pull/369): Rename + `Options::splitter` to `Options::word_splitter` for consistency with + the other fields backed by traits. + +## Version 0.13.4 (2021-02-23) + +This release removes `println!` statements which was left behind in +`unfill` by mistake. + +* [#296](https://github.com/mgeisler/textwrap/pull/296): Improve house + building example with more comments. +* [#297](https://github.com/mgeisler/textwrap/pull/297): Remove debug + prints in the new `unfill` function. + +## Version 0.13.3 (2021-02-20) + +This release contains a bugfix for `indent` and improved handling of +emojis. We’ve also added a new function for formatting text in columns +and functions for reformatting already wrapped text. + +* [#276](https://github.com/mgeisler/textwrap/pull/276): Extend + `core::display_width` to handle emojis when the unicode-width Cargo + feature is disabled. +* [#279](https://github.com/mgeisler/textwrap/pull/279): Make `indent` + preserve existing newlines in the input string. Before, + `indent("foo", "")` would return `"foo\n"` by mistake. It now + returns `"foo"` instead. +* [#281](https://github.com/mgeisler/textwrap/pull/281): Ensure all + `Options` fields have examples. +* [#282](https://github.com/mgeisler/textwrap/pull/282): Add a + `wrap_columns` function. +* [#294](https://github.com/mgeisler/textwrap/pull/294): Add new + `unfill` and `refill` functions. + +## Version 0.13.2 (2020-12-30) + +This release primarily makes all dependencies optional. This makes it +possible to slim down textwrap as needed. + +* [#254](https://github.com/mgeisler/textwrap/pull/254): `impl + WordSplitter` for `Box<T> where T: WordSplitter`. +* [#255](https://github.com/mgeisler/textwrap/pull/255): Use command + line arguments as initial text in interactive example. +* [#256](https://github.com/mgeisler/textwrap/pull/256): Introduce + fuzz tests for `wrap_optimal_fit` and `wrap_first_fit`. +* [#260](https://github.com/mgeisler/textwrap/pull/260): Make the + unicode-width dependency optional. +* [#261](https://github.com/mgeisler/textwrap/pull/261): Make the + smawk dependency optional. + +## Version 0.13.1 (2020-12-10) + +This is a bugfix release which fixes a regression in 0.13.0. The bug +meant that colored text was wrapped incorrectly. + +* [#245](https://github.com/mgeisler/textwrap/pull/245): Support + deleting a word with Ctrl-Backspace in the interactive demo. +* [#246](https://github.com/mgeisler/textwrap/pull/246): Show build + type (debug/release) in interactive demo. +* [#249](https://github.com/mgeisler/textwrap/pull/249): Correctly + compute width while skipping over ANSI escape sequences. + +## Version 0.13.0 (2020-12-05) + +This is a major release which rewrites the core logic, adds many new +features, and fixes a couple of bugs. Most programs which use +`textwrap` stays the same, incompatibilities and upgrade notes are +given below. + +Clone the repository and run the following to explore the new features +in an interactive demo (Linux only): + +```sh +$ cargo run --example interactive --all-features +``` + +### Bug Fixes + +#### Rewritten core wrapping algorithm + +* [#221](https://github.com/mgeisler/textwrap/pull/221): Reformulate + wrapping in terms of words with whitespace and penalties. + +The core wrapping algorithm has been completely rewritten. This fixed +bugs and simplified the code, while also making it possible to use +`textwrap` outside the context of the terminal. + +As part of this, trailing whitespace is now discarded consistently +from wrapped lines. Before we would inconsistently remove whitespace +at the end of wrapped lines, except for the last. Leading whitespace +is still preserved. + +### New Features + +#### Optimal-fit wrapping + +* [#234](https://github.com/mgeisler/textwrap/pull/234): Introduce + wrapping using an optimal-fit algorithm. + +This release adds support for new wrapping algorithm which finds a +globally optimal set of line breaks, taking certain penalties into +account. As an example, the old algorithm would produce + + "To be, or" + "not to be:" + "that is" + "the" + "question" + +Notice how the fourth line with “the” is very short. The new algorithm +shortens the previous lines slightly to produce fewer short lines: + + "To be," + "or not to" + "be: that" + "is the" + "question" + +Use the new `textwrap::core::WrapAlgorithm` enum to select between the +new and old algorithm. By default, the new algorithm is used. + +The optimal-fit algorithm is inspired by the line breaking algorithm +used in TeX, described in the 1981 article [_Breaking Paragraphs into +Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf) by +Knuth and Plass. + +#### In-place wrapping + +* [#226](https://github.com/mgeisler/textwrap/pull/226): Add a + `fill_inplace` function. + +When the text you want to fill is already a temporary `String`, you +can now mutate it in-place with `fill_inplace`: + +```rust +let mut greeting = format!("Greetings {}, welcome to the game! You have {} lives left.", + player.name, player.lives); +fill_inplace(&mut greeting, line_width); +``` + +This is faster than calling `fill` and it will reuse the memory +already allocated for the string. + +### Changed Features + +#### `Wrapper` is replaced with `Options` + +* [#213](https://github.com/mgeisler/textwrap/pull/213): Simplify API + with only top-level functions. +* [#215](https://github.com/mgeisler/textwrap/pull/215): Reintroducing + the type parameter on `Options` (previously known as `Wrapper`). +* [#219](https://github.com/mgeisler/textwrap/pull/219): Allow using + trait objects with `fill` & `wrap`. +* [#227](https://github.com/mgeisler/textwrap/pull/227): Replace + `WrapOptions` with `Into<Options>`. + +The `Wrapper` struct held the options (line width, indentation, etc) +for wrapping text. It was also the entry point for actually wrapping +the text via its methods such as `wrap`, `wrap_iter`, +`into_wrap_iter`, and `fill` methods. + +The struct has been replaced by a simpler `Options` struct which only +holds options. The `Wrapper` methods are gone, their job has been +taken over by the top-level `wrap` and `fill` functions. The signature +of these functions have changed from + +```rust +fn fill(s: &str, width: usize) -> String; + +fn wrap(s: &str, width: usize) -> Vec<Cow<'_, str>>; +``` + +to the more general + +```rust +fn fill<'a, S, Opt>(text: &str, options: Opt) -> String +where + S: WordSplitter, + Opt: Into<Options<'a, S>>; + +fn wrap<'a, S, Opt>(text: &str, options: Opt) -> Vec<Cow<'_, str>> +where + S: WordSplitter, + Opt: Into<Options<'a, S>>; +``` + +The `Into<Options<'a, S>` bound allows you to pass an `usize` (which +is interpreted as the line width) *and* a full `Options` object. This +allows the new functions to work like the old, plus you can now fully +customize the behavior of the wrapping via `Options` when needed. + +Code that call `textwrap::wrap` or `textwrap::fill` can remain +unchanged. Code that calls into `Wrapper::wrap` or `Wrapper::fill` +will need to be update. This is a mechanical change, please see +[#213](https://github.com/mgeisler/textwrap/pull/213) for examples. + +Thanks to @CryptJar and @Koxiat for their support in the PRs above! + +### Removed Features + +* The `wrap_iter` and `into_wrap_iter` methods are gone. This means + that lazy iteration is no longer supported: you always get all + wrapped lines back as a `Vec`. This was done to simplify the code + and to support the optimal-fit algorithm. + + The first-fit algorithm could still be implemented in an incremental + fashion. Please let us know if this is important to you. + +### Other Changes + +* [#206](https://github.com/mgeisler/textwrap/pull/206): Change + `Wrapper.splitter` from `T: WordSplitter` to `Box<dyn + WordSplitter>`. +* [#216](https://github.com/mgeisler/textwrap/pull/216): Forbid the + use of unsafe code. + +## Version 0.12.1 (2020-07-03) + +This is a bugfix release. + +* Fixed [#176][issue-176]: Mention compile-time wrapping by linking to + the [`textwrap-macros` crate]. +* Fixed [#193][issue-193]: Wrapping with `break_words(false)` was + broken and would cause extra whitespace to be inserted when words + were longer than the line width. + +## Version 0.12.0 (2020-06-26) + +The code has been updated to the [Rust 2018 edition][rust-2018] and +each new release of `textwrap` will only support the latest stable +version of Rust. Trying to support older Rust versions is a fool's +errand: our dependencies keep releasing new patch versions that +require newer and newer versions of Rust. + +The `term_size` feature has been replaced by `terminal_size`. The API +is unchanged, it is just the name of the Cargo feature that changed. + +The `hyphenation` feature now only embeds the hyphenation patterns for +US-English. This slims down the dependency. + +* Fixed [#140][issue-140]: Ignore ANSI escape sequences. +* Fixed [#158][issue-158]: Unintended wrapping when using external splitter. +* Fixed [#177][issue-177]: Update examples to the 2018 edition. + +## Version 0.11.0 (2018-12-09) + +Due to our dependencies bumping their minimum supported version of +Rust, the minimum version of Rust we test against is now 1.22.0. + +* Merged [#141][issue-141]: Fix `dedent` handling of empty lines and + trailing newlines. Thanks @bbqsrc! +* Fixed [#151][issue-151]: Release of version with hyphenation 0.7. + +## Version 0.10.0 (2018-04-28) + +Due to our dependencies bumping their minimum supported version of +Rust, the minimum version of Rust we test against is now 1.17.0. + +* Fixed [#99][issue-99]: Word broken even though it would fit on line. +* Fixed [#107][issue-107]: Automatic hyphenation is off by one. +* Fixed [#122][issue-122]: Take newlines into account when wrapping. +* Fixed [#129][issue-129]: Panic on string with em-dash. + +## Version 0.9.0 (2017-10-05) + +The dependency on `term_size` is now optional, and by default this +feature is not enabled. This is a *breaking change* for users of +`Wrapper::with_termwidth`. Enable the `term_size` feature to restore +the old functionality. + +Added a regression test for the case where `width` is set to +`usize::MAX`, thanks @Fraser999! All public structs now implement +`Debug`, thanks @hcpl! + +* Fixed [#101][issue-101]: Make `term_size` an optional dependency. + +## Version 0.8.0 (2017-09-04) + +The `Wrapper` struct is now generic over the type of word splitter +being used. This means less boxing and a nicer API. The +`Wrapper::word_splitter` method has been removed. This is a *breaking +API change* if you used the method to change the word splitter. + +The `Wrapper` struct has two new methods that will wrap the input text +lazily: `Wrapper::wrap_iter` and `Wrapper::into_wrap_iter`. Use those +if you will be iterating over the wrapped lines one by one. + +* Fixed [#59][issue-59]: `wrap` could return an iterator. Thanks + @hcpl! +* Fixed [#81][issue-81]: Set `html_root_url`. + +## Version 0.7.0 (2017-07-20) + +Version 0.7.0 changes the return type of `Wrapper::wrap` from +`Vec<String>` to `Vec<Cow<'a, str>>`. This means that the output lines +borrow data from the input string. This is a *breaking API change* if +you relied on the exact return type of `Wrapper::wrap`. Callers of the +`textwrap::fill` convenience function will see no breakage. + +The above change and other optimizations makes version 0.7.0 roughly +15-30% faster than version 0.6.0. + +The `squeeze_whitespace` option has been removed since it was +complicating the above optimization. Let us know if this option is +important for you so we can provide a work around. + +* Fixed [#58][issue-58]: Add a "fast_wrap" function. +* Fixed [#61][issue-61]: Documentation errors. + +## Version 0.6.0 (2017-05-22) + +Version 0.6.0 adds builder methods to `Wrapper` for easy one-line +initialization and configuration: + +```rust +let wrapper = Wrapper::new(60).break_words(false); +``` + +It also add a new `NoHyphenation` word splitter that will never split +words, not even at existing hyphens. + +* Fixed [#28][issue-28]: Support not squeezing whitespace. + +## Version 0.5.0 (2017-05-15) + +Version 0.5.0 has *breaking API changes*. However, this only affects +code using the hyphenation feature. The feature is now optional, so +you will first need to enable the `hyphenation` feature as described +above. Afterwards, please change your code from +```rust +wrapper.corpus = Some(&corpus); +``` +to +```rust +wrapper.splitter = Box::new(corpus); +``` + +Other changes include optimizations, so version 0.5.0 is roughly +10-15% faster than version 0.4.0. + +* Fixed [#19][issue-19]: Add support for finding terminal size. +* Fixed [#25][issue-25]: Handle words longer than `self.width`. +* Fixed [#26][issue-26]: Support custom indentation. +* Fixed [#36][issue-36]: Support building without `hyphenation`. +* Fixed [#39][issue-39]: Respect non-breaking spaces. + +## Version 0.4.0 (2017-01-24) + +Documented complexities and tested these via `cargo bench`. + +* Fixed [#13][issue-13]: Immediatedly add word if it fits. +* Fixed [#14][issue-14]: Avoid splitting on initial hyphens. + +## Version 0.3.0 (2017-01-07) + +Added support for automatic hyphenation. + +## Version 0.2.0 (2016-12-28) + +Introduced `Wrapper` struct. Added support for wrapping on hyphens. + +## Version 0.1.0 (2016-12-17) + +First public release with support for wrapping strings on whitespace. + +[rust-2018]: https://doc.rust-lang.org/edition-guide/rust-2018/ +[`textwrap-macros` crate]: https://crates.io/crates/textwrap-macros + +[issue-13]: https://github.com/mgeisler/textwrap/issues/13 +[issue-14]: https://github.com/mgeisler/textwrap/issues/14 +[issue-19]: https://github.com/mgeisler/textwrap/issues/19 +[issue-25]: https://github.com/mgeisler/textwrap/issues/25 +[issue-26]: https://github.com/mgeisler/textwrap/issues/26 +[issue-28]: https://github.com/mgeisler/textwrap/issues/28 +[issue-36]: https://github.com/mgeisler/textwrap/issues/36 +[issue-39]: https://github.com/mgeisler/textwrap/issues/39 +[issue-58]: https://github.com/mgeisler/textwrap/issues/58 +[issue-59]: https://github.com/mgeisler/textwrap/issues/59 +[issue-61]: https://github.com/mgeisler/textwrap/issues/61 +[issue-81]: https://github.com/mgeisler/textwrap/issues/81 +[issue-99]: https://github.com/mgeisler/textwrap/issues/99 +[issue-101]: https://github.com/mgeisler/textwrap/issues/101 +[issue-107]: https://github.com/mgeisler/textwrap/issues/107 +[issue-122]: https://github.com/mgeisler/textwrap/issues/122 +[issue-129]: https://github.com/mgeisler/textwrap/issues/129 +[issue-140]: https://github.com/mgeisler/textwrap/issues/140 +[issue-141]: https://github.com/mgeisler/textwrap/issues/141 +[issue-151]: https://github.com/mgeisler/textwrap/issues/151 +[issue-158]: https://github.com/mgeisler/textwrap/issues/158 +[issue-176]: https://github.com/mgeisler/textwrap/issues/176 +[issue-177]: https://github.com/mgeisler/textwrap/issues/177 +[issue-193]: https://github.com/mgeisler/textwrap/issues/193 diff --git a/third_party/rust/textwrap/Cargo.lock b/third_party/rust/textwrap/Cargo.lock new file mode 100644 index 0000000000..98b642073e --- /dev/null +++ b/third_party/rust/textwrap/Cargo.lock @@ -0,0 +1,657 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fst" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "hermit-abi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" + +[[package]] +name = "hyphenation" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf4dd4c44ae85155502a52c48739c8a48185d1449fff1963cffee63c28a50f0" +dependencies = [ + "bincode", + "fst", + "hyphenation_commons", + "pocket-resources", + "serde", +] + +[[package]] +name = "hyphenation_commons" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5febe7a2ade5c7d98eb8b75f946c046b335324b06a14ea0998271504134c05bf" +dependencies = [ + "fst", + "serde", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libredox" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3af92c55d7d839293953fcd0fda5ecfe93297cfde6ffbdec13b41d99c0ba6607" +dependencies = [ + "bitflags 2.4.2", + "libc", + "redox_syscall", +] + +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "numtoa" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pocket-resources" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c135f38778ad324d9e9ee68690bac2c1a51f340fdf96ca13e2ab3914eb2e51d8" + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.4.2", + "memchr", + "unicase", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_termios" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20145670ba436b55d91fc92d25e71160fbfbdd57831631c8d7d36377a476f1cb" + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustix" +version = "0.37.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "semver" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" + +[[package]] +name = "serde" +version = "1.0.196" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.196" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "syn" +version = "2.0.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "terminal_size" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "termion" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4648c7def6f2043b2568617b9f9b75eae88ca185dbc1f1fda30e95a85d49d7d" +dependencies = [ + "libc", + "libredox", + "numtoa", + "redox_termios", +] + +[[package]] +name = "textwrap" +version = "0.16.1" +dependencies = [ + "hyphenation", + "smawk", + "terminal_size", + "termion", + "unic-emoji-char", + "unicode-linebreak", + "unicode-width", + "version-sync", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unic-char-property" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" +dependencies = [ + "unic-char-range", +] + +[[package]] +name = "unic-char-range" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" + +[[package]] +name = "unic-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" + +[[package]] +name = "unic-emoji-char" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-version" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +dependencies = [ + "unic-common", +] + +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "version-sync" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "835169da0173ea373ddf5987632aac1f918967fbbe58195e304342282efa6089" +dependencies = [ + "proc-macro2", + "pulldown-cmark", + "regex", + "semver", + "syn", + "toml", + "url", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] diff --git a/third_party/rust/textwrap/Cargo.toml b/third_party/rust/textwrap/Cargo.toml new file mode 100644 index 0000000000..1493f0a8a9 --- /dev/null +++ b/third_party/rust/textwrap/Cargo.toml @@ -0,0 +1,91 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.56" +name = "textwrap" +version = "0.16.1" +authors = ["Martin Geisler <martin@geisler.net>"] +exclude = [ + ".github/", + ".gitignore", + "benchmarks/", + "examples/", + "fuzz/", + "images/", +] +description = "Library for word wrapping, indenting, and dedenting strings. Has optional support for Unicode and emojis as well as machine hyphenation." +documentation = "https://docs.rs/textwrap/" +readme = "README.md" +keywords = [ + "text", + "formatting", + "wrap", + "typesetting", + "hyphenation", +] +categories = [ + "text-processing", + "command-line-interface", +] +license = "MIT" +repository = "https://github.com/mgeisler/textwrap" + +[package.metadata.docs.rs] +all-features = true + +[[example]] +name = "hyphenation" +path = "examples/hyphenation.rs" +required-features = ["hyphenation"] + +[[example]] +name = "termwidth" +path = "examples/termwidth.rs" +required-features = ["terminal_size"] + +[dependencies.hyphenation] +version = "0.8.4" +features = ["embed_en-us"] +optional = true + +[dependencies.smawk] +version = "0.3.1" +optional = true + +[dependencies.terminal_size] +version = "0.2.1" +optional = true + +[dependencies.unicode-linebreak] +version = "0.1.4" +optional = true + +[dependencies.unicode-width] +version = "0.1.10" +optional = true + +[dev-dependencies.unic-emoji-char] +version = "0.9.0" + +[dev-dependencies.version-sync] +version = "0.9.4" + +[features] +default = [ + "unicode-linebreak", + "unicode-width", + "smawk", +] + +[target."cfg(unix)".dev-dependencies.termion] +version = "2.0.1" diff --git a/third_party/rust/textwrap/LICENSE b/third_party/rust/textwrap/LICENSE new file mode 100644 index 0000000000..0d37ec3891 --- /dev/null +++ b/third_party/rust/textwrap/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Martin Geisler + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/textwrap/README.md b/third_party/rust/textwrap/README.md new file mode 100644 index 0000000000..bf13cd0957 --- /dev/null +++ b/third_party/rust/textwrap/README.md @@ -0,0 +1,176 @@ +# Textwrap + +[![](https://github.com/mgeisler/textwrap/workflows/build/badge.svg)][build-status] +[![](https://codecov.io/gh/mgeisler/textwrap/branch/master/graph/badge.svg)][codecov] +[![](https://img.shields.io/crates/v/textwrap.svg)][crates-io] +[![](https://docs.rs/textwrap/badge.svg)][api-docs] + +Textwrap is a library for wrapping and indenting text. It is most +often used by command-line programs to format dynamic output nicely so +it looks good in a terminal. You can also use Textwrap to wrap text +set in a proportional font—such as text used to generate PDF files, or +drawn on a [HTML5 canvas using WebAssembly][wasm-demo]. + +## Usage + +To use the textwrap crate, add this to your `Cargo.toml` file: +```toml +[dependencies] +textwrap = "0.16" +``` + +By default, this enables word wrapping with support for Unicode +strings. Extra features can be enabled with Cargo features—and the +Unicode support can be disabled if needed. This allows you slim down +the library and so you will only pay for the features you actually +use. + +Please see the [_Cargo Features_ in the crate +documentation](https://docs.rs/textwrap/#cargo-features) for a full +list of the available features as well as their impact on the size of +your binary. + +## Documentation + +**[API documentation][api-docs]** + +## Getting Started + +Word wrapping is easy using the `wrap` and `fill` functions: + +```rust +#[cfg(feature = "smawk")] { +let text = "textwrap: an efficient and powerful library for wrapping text."; +assert_eq!( + textwrap::wrap(text, 28), + vec![ + "textwrap: an efficient", + "and powerful library for", + "wrapping text.", + ] +); +} +``` + +Sharp-eyed readers will notice that the first line is 22 columns wide. +So why is the word “and” put in the second line when there is space +for it in the first line? + +The explanation is that textwrap does not just wrap text one line at a +time. Instead, it uses an optimal-fit algorithm which looks ahead and +chooses line breaks which minimize the gaps left at ends of lines. +This is controlled with the `smawk` Cargo feature, which is why the +example is wrapped in the `cfg`-block. + +Without look-ahead, the first line would be longer and the text would +look like this: + +```rust +#[cfg(not(feature = "smawk"))] { +let text = "textwrap: an efficient and powerful library for wrapping text."; +assert_eq!( + textwrap::wrap(text, 28), + vec![ + "textwrap: an efficient and", + "powerful library for", + "wrapping text.", + ] +); +} +``` + +The second line is now shorter and the text is more ragged. The kind +of wrapping can be configured via `Options::wrap_algorithm`. + +If you enable the `hyphenation` Cargo feature, you get support for +automatic hyphenation for [about 70 languages][patterns] via +high-quality TeX hyphenation patterns. + +Your program must load the hyphenation pattern and configure +`Options::word_splitter` to use it: + +```rust +#[cfg(feature = "hyphenation")] { +use hyphenation::{Language, Load, Standard}; +use textwrap::{fill, Options, WordSplitter}; + +let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +let options = textwrap::Options::new(28).word_splitter(WordSplitter::Hyphenation(dictionary)); +let text = "textwrap: an efficient and powerful library for wrapping text."; + +assert_eq!( + textwrap::wrap(text, &options), + vec![ + "textwrap: an efficient and", + "powerful library for wrap-", + "ping text." + ] +); +} +``` + +The US-English hyphenation patterns are embedded when you enable the +`hyphenation` feature. They are licensed under a [permissive +license][en-us license] and take up about 88 KB in your binary. If you +need hyphenation for other languages, you need to download a +[precompiled `.bincode` file][bincode] and load it yourself. Please +see the [`hyphenation` documentation] for details. + +## Wrapping Strings at Compile Time + +If your strings are known at compile time, please take a look at the +procedural macros from the [`textwrap-macros` crate]. + +## Examples + +The library comes with [a +collection](https://github.com/mgeisler/textwrap/tree/master/examples) +of small example programs that shows various features. + +If you want to see Textwrap in action right away, then take a look at +[`examples/wasm/`], which shows how to wrap sans-serif, serif, and +monospace text. It uses WebAssembly and is automatically deployed to +https://mgeisler.github.io/textwrap/. + +For the command-line examples, you’re invited to clone the repository +and try them out for yourself! Of special note is +[`examples/interactive.rs`]. This is a demo program which demonstrates +most of the available features: you can enter text and adjust the +width at which it is wrapped interactively. You can also adjust the +`Options` used to see the effect of different `WordSplitter`s and wrap +algorithms. + +Run the demo with + +```sh +$ cargo run --example interactive +``` + +The demo needs a Linux terminal to function. + +## Release History + +Please see the [CHANGELOG file] for details on the changes made in +each release. + +## License + +Textwrap can be distributed according to the [MIT license][mit]. +Contributions will be accepted under the same license. + +[crates-io]: https://crates.io/crates/textwrap +[build-status]: https://github.com/mgeisler/textwrap/actions?query=workflow%3Abuild+branch%3Amaster +[codecov]: https://codecov.io/gh/mgeisler/textwrap +[wasm-demo]: https://mgeisler.github.io/textwrap/ +[`textwrap-macros` crate]: https://crates.io/crates/textwrap-macros +[`hyphenation` example]: https://github.com/mgeisler/textwrap/blob/master/examples/hyphenation.rs +[`termwidth` example]: https://github.com/mgeisler/textwrap/blob/master/examples/termwidth.rs +[patterns]: https://github.com/tapeinosyne/hyphenation/tree/master/patterns +[en-us license]: https://github.com/hyphenation/tex-hyphen/blob/master/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-en-us.tex +[bincode]: https://github.com/tapeinosyne/hyphenation/tree/master/dictionaries +[`hyphenation` documentation]: http://docs.rs/hyphenation +[`examples/wasm/`]: https://github.com/mgeisler/textwrap/tree/master/examples/wasm +[`examples/interactive.rs`]: https://github.com/mgeisler/textwrap/tree/master/examples/interactive.rs +[api-docs]: https://docs.rs/textwrap/ +[CHANGELOG file]: https://github.com/mgeisler/textwrap/blob/master/CHANGELOG.md +[mit]: LICENSE diff --git a/third_party/rust/textwrap/rustfmt.toml b/third_party/rust/textwrap/rustfmt.toml new file mode 100644 index 0000000000..c1578aafbc --- /dev/null +++ b/third_party/rust/textwrap/rustfmt.toml @@ -0,0 +1 @@ +imports_granularity = "Module" diff --git a/third_party/rust/textwrap/src/columns.rs b/third_party/rust/textwrap/src/columns.rs new file mode 100644 index 0000000000..d14d5588fa --- /dev/null +++ b/third_party/rust/textwrap/src/columns.rs @@ -0,0 +1,193 @@ +//! Functionality for wrapping text into columns. + +use crate::core::display_width; +use crate::{wrap, Options}; + +/// Wrap text into columns with a given total width. +/// +/// The `left_gap`, `middle_gap` and `right_gap` arguments specify the +/// strings to insert before, between, and after the columns. The +/// total width of all columns and all gaps is specified using the +/// `total_width_or_options` argument. This argument can simply be an +/// integer if you want to use default settings when wrapping, or it +/// can be a [`Options`] value if you want to customize the wrapping. +/// +/// If the columns are narrow, it is recommended to set +/// [`Options::break_words`] to `true` to prevent words from +/// protruding into the margins. +/// +/// The per-column width is computed like this: +/// +/// ``` +/// # let (left_gap, middle_gap, right_gap) = ("", "", ""); +/// # let columns = 2; +/// # let options = textwrap::Options::new(80); +/// let inner_width = options.width +/// - textwrap::core::display_width(left_gap) +/// - textwrap::core::display_width(right_gap) +/// - textwrap::core::display_width(middle_gap) * (columns - 1); +/// let column_width = inner_width / columns; +/// ``` +/// +/// The `text` is wrapped using [`wrap()`] and the given `options` +/// argument, but the width is overwritten to the computed +/// `column_width`. +/// +/// # Panics +/// +/// Panics if `columns` is zero. +/// +/// # Examples +/// +/// ``` +/// use textwrap::wrap_columns; +/// +/// let text = "\ +/// This is an example text, which is wrapped into three columns. \ +/// Notice how the final column can be shorter than the others."; +/// +/// #[cfg(feature = "smawk")] +/// assert_eq!(wrap_columns(text, 3, 50, "| ", " | ", " |"), +/// vec!["| This is | into three | column can be |", +/// "| an example | columns. | shorter than |", +/// "| text, which | Notice how | the others. |", +/// "| is wrapped | the final | |"]); +/// +/// // Without the `smawk` feature, the middle column is a little more uneven: +/// #[cfg(not(feature = "smawk"))] +/// assert_eq!(wrap_columns(text, 3, 50, "| ", " | ", " |"), +/// vec!["| This is an | three | column can be |", +/// "| example text, | columns. | shorter than |", +/// "| which is | Notice how | the others. |", +/// "| wrapped into | the final | |"]); +pub fn wrap_columns<'a, Opt>( + text: &str, + columns: usize, + total_width_or_options: Opt, + left_gap: &str, + middle_gap: &str, + right_gap: &str, +) -> Vec<String> +where + Opt: Into<Options<'a>>, +{ + assert!(columns > 0); + + let mut options: Options = total_width_or_options.into(); + + let inner_width = options + .width + .saturating_sub(display_width(left_gap)) + .saturating_sub(display_width(right_gap)) + .saturating_sub(display_width(middle_gap) * (columns - 1)); + + let column_width = std::cmp::max(inner_width / columns, 1); + options.width = column_width; + let last_column_padding = " ".repeat(inner_width % column_width); + let wrapped_lines = wrap(text, options); + let lines_per_column = + wrapped_lines.len() / columns + usize::from(wrapped_lines.len() % columns > 0); + let mut lines = Vec::new(); + for line_no in 0..lines_per_column { + let mut line = String::from(left_gap); + for column_no in 0..columns { + match wrapped_lines.get(line_no + column_no * lines_per_column) { + Some(column_line) => { + line.push_str(column_line); + line.push_str(&" ".repeat(column_width - display_width(column_line))); + } + None => { + line.push_str(&" ".repeat(column_width)); + } + } + if column_no == columns - 1 { + line.push_str(&last_column_padding); + } else { + line.push_str(middle_gap); + } + } + line.push_str(right_gap); + lines.push(line); + } + + lines +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn wrap_columns_empty_text() { + assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]); + } + + #[test] + fn wrap_columns_single_column() { + assert_eq!( + wrap_columns("Foo", 3, 30, "| ", " | ", " |"), + vec!["| Foo | | |"] + ); + } + + #[test] + fn wrap_columns_uneven_columns() { + // The gaps take up a total of 5 columns, so the columns are + // (21 - 5)/4 = 4 columns wide: + assert_eq!( + wrap_columns("Foo Bar Baz Quux", 4, 21, "|", "|", "|"), + vec!["|Foo |Bar |Baz |Quux|"] + ); + // As the total width increases, the last column absorbs the + // excess width: + assert_eq!( + wrap_columns("Foo Bar Baz Quux", 4, 24, "|", "|", "|"), + vec!["|Foo |Bar |Baz |Quux |"] + ); + // Finally, when the width is 25, the columns can be resized + // to a width of (25 - 5)/4 = 5 columns: + assert_eq!( + wrap_columns("Foo Bar Baz Quux", 4, 25, "|", "|", "|"), + vec!["|Foo |Bar |Baz |Quux |"] + ); + } + + #[test] + #[cfg(feature = "unicode-width")] + fn wrap_columns_with_emojis() { + assert_eq!( + wrap_columns( + "Words and a few emojis 😍 wrapped in ⓶ columns", + 2, + 30, + "✨ ", + " ⚽ ", + " 👀" + ), + vec![ + "✨ Words ⚽ wrapped in 👀", + "✨ and a few ⚽ ⓶ columns 👀", + "✨ emojis 😍 ⚽ 👀" + ] + ); + } + + #[test] + fn wrap_columns_big_gaps() { + // The column width shrinks to 1 because the gaps take up all + // the space. + assert_eq!( + wrap_columns("xyz", 2, 10, "----> ", " !!! ", " <----"), + vec![ + "----> x !!! z <----", // + "----> y !!! <----" + ] + ); + } + + #[test] + #[should_panic] + fn wrap_columns_panic_with_zero_columns() { + wrap_columns("", 0, 10, "", "", ""); + } +} diff --git a/third_party/rust/textwrap/src/core.rs b/third_party/rust/textwrap/src/core.rs new file mode 100644 index 0000000000..6b07f763c8 --- /dev/null +++ b/third_party/rust/textwrap/src/core.rs @@ -0,0 +1,461 @@ +//! Building blocks for advanced wrapping functionality. +//! +//! The functions and structs in this module can be used to implement +//! advanced wrapping functionality when [`wrap()`](crate::wrap()) +//! [`fill()`](crate::fill()) don't do what you want. +//! +//! In general, you want to follow these steps when wrapping +//! something: +//! +//! 1. Split your input into [`Fragment`]s. These are abstract blocks +//! of text or content which can be wrapped into lines. See +//! [`WordSeparator`](crate::word_separators::WordSeparator) for +//! how to do this for text. +//! +//! 2. Potentially split your fragments into smaller pieces. This +//! allows you to implement things like hyphenation. If you use the +//! `Word` type, you can use [`WordSplitter`](crate::WordSplitter) +//! enum for this. +//! +//! 3. Potentially break apart fragments that are still too large to +//! fit on a single line. This is implemented in [`break_words`]. +//! +//! 4. Finally take your fragments and put them into lines. There are +//! two algorithms for this in the +//! [`wrap_algorithms`](crate::wrap_algorithms) module: +//! [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit) +//! and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit). +//! The former produces better line breaks, the latter is faster. +//! +//! 5. Iterate through the slices returned by the wrapping functions +//! and construct your lines of output. +//! +//! Please [open an issue](https://github.com/mgeisler/textwrap/) if +//! the functionality here is not sufficient or if you have ideas for +//! improving it. We would love to hear from you! + +/// The CSI or “Control Sequence Introducer” introduces an ANSI escape +/// sequence. This is typically used for colored text and will be +/// ignored when computing the text width. +const CSI: (char, char) = ('\x1b', '['); +/// The final bytes of an ANSI escape sequence must be in this range. +const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e'; + +/// Skip ANSI escape sequences. +/// +/// The `ch` is the current `char`, the `chars` provide the following +/// characters. The `chars` will be modified if `ch` is the start of +/// an ANSI escape sequence. +/// +/// Returns `true` if one or more chars were skipped. +#[inline] +pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool { + if ch != CSI.0 { + return false; // Nothing to skip here. + } + + let next = chars.next(); + if next == Some(CSI.1) { + // We have found the start of an ANSI escape code, typically + // used for colored terminal text. We skip until we find a + // "final byte" in the range 0x40–0x7E. + for ch in chars { + if ANSI_FINAL_BYTE.contains(&ch) { + break; + } + } + } else if next == Some(']') { + // We have found the start of an Operating System Command, + // which extends until the next sequence "\x1b\\" (the String + // Terminator sequence) or the BEL character. The BEL + // character is non-standard, but it is still used quite + // often, for example, by GNU ls. + let mut last = ']'; + for new in chars { + if new == '\x07' || (new == '\\' && last == CSI.0) { + break; + } + last = new; + } + } + + true // Indicate that some chars were skipped. +} + +#[cfg(feature = "unicode-width")] +#[inline] +fn ch_width(ch: char) -> usize { + unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) +} + +/// First character which [`ch_width`] will classify as double-width. +/// Please see [`display_width`]. +#[cfg(not(feature = "unicode-width"))] +const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}'; + +#[cfg(not(feature = "unicode-width"))] +#[inline] +fn ch_width(ch: char) -> usize { + if ch < DOUBLE_WIDTH_CUTOFF { + 1 + } else { + 2 + } +} + +/// Compute the display width of `text` while skipping over ANSI +/// escape sequences. +/// +/// # Examples +/// +/// ``` +/// use textwrap::core::display_width; +/// +/// assert_eq!(display_width("Café Plain"), 10); +/// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10); +/// assert_eq!(display_width("\x1b]8;;http://example.com\x1b\\This is a link\x1b]8;;\x1b\\"), 14); +/// ``` +/// +/// **Note:** When the `unicode-width` Cargo feature is disabled, the +/// width of a `char` is determined by a crude approximation which +/// simply counts chars below U+1100 as 1 column wide, and all other +/// characters as 2 columns wide. With the feature enabled, function +/// will correctly deal with [combining characters] in their +/// decomposed form (see [Unicode equivalence]). +/// +/// An example of a decomposed character is “é”, which can be +/// decomposed into: “e” followed by a combining acute accent: “◌́”. +/// Without the `unicode-width` Cargo feature, every `char` below +/// U+1100 has a width of 1. This includes the combining accent: +/// +/// ``` +/// use textwrap::core::display_width; +/// +/// assert_eq!(display_width("Cafe Plain"), 10); +/// #[cfg(feature = "unicode-width")] +/// assert_eq!(display_width("Cafe\u{301} Plain"), 10); +/// #[cfg(not(feature = "unicode-width"))] +/// assert_eq!(display_width("Cafe\u{301} Plain"), 11); +/// ``` +/// +/// ## Emojis and CJK Characters +/// +/// Characters such as emojis and [CJK characters] used in the +/// Chinese, Japanese, and Korean languages are seen as double-width, +/// even if the `unicode-width` feature is disabled: +/// +/// ``` +/// use textwrap::core::display_width; +/// +/// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); +/// assert_eq!(display_width("你好"), 4); // “Nǐ hǎo” or “Hello” in Chinese +/// ``` +/// +/// # Limitations +/// +/// The displayed width of a string cannot always be computed from the +/// string alone. This is because the width depends on the rendering +/// engine used. This is particularly visible with [emoji modifier +/// sequences] where a base emoji is modified with, e.g., skin tone or +/// hair color modifiers. It is up to the rendering engine to detect +/// this and to produce a suitable emoji. +/// +/// A simple example is “❤️”, which consists of “❤” (U+2764: Black +/// Heart Symbol) followed by U+FE0F (Variation Selector-16). By +/// itself, “❤” is a black heart, but if you follow it with the +/// variant selector, you may get a wider red heart. +/// +/// A more complex example would be “👨🦰” which should depict a man +/// with red hair. Here the computed width is too large — and the +/// width differs depending on the use of the `unicode-width` feature: +/// +/// ``` +/// use textwrap::core::display_width; +/// +/// assert_eq!("👨🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']); +/// #[cfg(feature = "unicode-width")] +/// assert_eq!(display_width("👨🦰"), 4); +/// #[cfg(not(feature = "unicode-width"))] +/// assert_eq!(display_width("👨🦰"), 6); +/// ``` +/// +/// This happens because the grapheme consists of three code points: +/// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰” +/// (U+1F9B0: Red Hair). You can see them above in the test. With +/// `unicode-width` enabled, the ZWJ is correctly seen as having zero +/// width, without it is counted as a double-width character. +/// +/// ## Terminal Support +/// +/// Modern browsers typically do a great job at combining characters +/// as shown above, but terminals often struggle more. As an example, +/// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but +/// shows "👨🦰" as “👨🦰”. +/// +/// [combining characters]: https://en.wikipedia.org/wiki/Combining_character +/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence +/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters +/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html +pub fn display_width(text: &str) -> usize { + let mut chars = text.chars(); + let mut width = 0; + while let Some(ch) = chars.next() { + if skip_ansi_escape_sequence(ch, &mut chars) { + continue; + } + width += ch_width(ch); + } + width +} + +/// A (text) fragment denotes the unit which we wrap into lines. +/// +/// Fragments represent an abstract _word_ plus the _whitespace_ +/// following the word. In case the word falls at the end of the line, +/// the whitespace is dropped and a so-called _penalty_ is inserted +/// instead (typically `"-"` if the word was hyphenated). +/// +/// For wrapping purposes, the precise content of the word, the +/// whitespace, and the penalty is irrelevant. All we need to know is +/// the displayed width of each part, which this trait provides. +pub trait Fragment: std::fmt::Debug { + /// Displayed width of word represented by this fragment. + fn width(&self) -> f64; + + /// Displayed width of the whitespace that must follow the word + /// when the word is not at the end of a line. + fn whitespace_width(&self) -> f64; + + /// Displayed width of the penalty that must be inserted if the + /// word falls at the end of a line. + fn penalty_width(&self) -> f64; +} + +/// A piece of wrappable text, including any trailing whitespace. +/// +/// A `Word` is an example of a [`Fragment`], so it has a width, +/// trailing whitespace, and potentially a penalty item. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Word<'a> { + /// Word content. + pub word: &'a str, + /// Whitespace to insert if the word does not fall at the end of a line. + pub whitespace: &'a str, + /// Penalty string to insert if the word falls at the end of a line. + pub penalty: &'a str, + // Cached width in columns. + pub(crate) width: usize, +} + +impl std::ops::Deref for Word<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.word + } +} + +impl<'a> Word<'a> { + /// Construct a `Word` from a string. + /// + /// A trailing stretch of `' '` is automatically taken to be the + /// whitespace part of the word. + pub fn from(word: &str) -> Word<'_> { + let trimmed = word.trim_end_matches(' '); + Word { + word: trimmed, + width: display_width(trimmed), + whitespace: &word[trimmed.len()..], + penalty: "", + } + } + + /// Break this word into smaller words with a width of at most + /// `line_width`. The whitespace and penalty from this `Word` is + /// added to the last piece. + /// + /// # Examples + /// + /// ``` + /// use textwrap::core::Word; + /// assert_eq!( + /// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(), + /// vec![Word::from("Hel"), Word::from("lo! ")] + /// ); + /// ``` + pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b { + let mut char_indices = self.word.char_indices(); + let mut offset = 0; + let mut width = 0; + + std::iter::from_fn(move || { + while let Some((idx, ch)) = char_indices.next() { + if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { + continue; + } + + if width > 0 && width + ch_width(ch) > line_width { + let word = Word { + word: &self.word[offset..idx], + width: width, + whitespace: "", + penalty: "", + }; + offset = idx; + width = ch_width(ch); + return Some(word); + } + + width += ch_width(ch); + } + + if offset < self.word.len() { + let word = Word { + word: &self.word[offset..], + width: width, + whitespace: self.whitespace, + penalty: self.penalty, + }; + offset = self.word.len(); + return Some(word); + } + + None + }) + } +} + +impl Fragment for Word<'_> { + #[inline] + fn width(&self) -> f64 { + self.width as f64 + } + + // We assume the whitespace consist of ' ' only. This allows us to + // compute the display width in constant time. + #[inline] + fn whitespace_width(&self) -> f64 { + self.whitespace.len() as f64 + } + + // We assume the penalty is `""` or `"-"`. This allows us to + // compute the display width in constant time. + #[inline] + fn penalty_width(&self) -> f64 { + self.penalty.len() as f64 + } +} + +/// Forcibly break words wider than `line_width` into smaller words. +/// +/// This simply calls [`Word::break_apart`] on words that are too +/// wide. This means that no extra `'-'` is inserted, the word is +/// simply broken into smaller pieces. +pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>> +where + I: IntoIterator<Item = Word<'a>>, +{ + let mut shortened_words = Vec::new(); + for word in words { + if word.width() > line_width as f64 { + shortened_words.extend(word.break_apart(line_width)); + } else { + shortened_words.push(word); + } + } + shortened_words +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "unicode-width")] + use unicode_width::UnicodeWidthChar; + + #[test] + fn skip_ansi_escape_sequence_works() { + let blue_text = "\u{1b}[34mHello\u{1b}[0m"; + let mut chars = blue_text.chars(); + let ch = chars.next().unwrap(); + assert!(skip_ansi_escape_sequence(ch, &mut chars)); + assert_eq!(chars.next(), Some('H')); + } + + #[test] + fn emojis_have_correct_width() { + use unic_emoji_char::is_emoji; + + // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement + // blocks all have a width of 1 column. This includes + // characters such as '#' and '©'. + for ch in '\u{1}'..'\u{FF}' { + if is_emoji(ch) { + let desc = format!("{:?} U+{:04X}", ch, ch as u32); + + #[cfg(feature = "unicode-width")] + assert_eq!(ch.width().unwrap(), 1, "char: {}", desc); + + #[cfg(not(feature = "unicode-width"))] + assert_eq!(ch_width(ch), 1, "char: {}", desc); + } + } + + // Emojis in the remaining blocks of the Basic Multilingual + // Plane (BMP), in the Supplementary Multilingual Plane (SMP), + // and in the Supplementary Ideographic Plane (SIP), are all 1 + // or 2 columns wide when unicode-width is used, and always 2 + // columns wide otherwise. This includes all of our favorite + // emojis such as 😊. + for ch in '\u{FF}'..'\u{2FFFF}' { + if is_emoji(ch) { + let desc = format!("{:?} U+{:04X}", ch, ch as u32); + + #[cfg(feature = "unicode-width")] + assert!(ch.width().unwrap() <= 2, "char: {}", desc); + + #[cfg(not(feature = "unicode-width"))] + assert_eq!(ch_width(ch), 2, "char: {}", desc); + } + } + + // The remaining planes contain almost no assigned code points + // and thus also no emojis. + } + + #[test] + fn display_width_works() { + assert_eq!("Café Plain".len(), 11); // “é” is two bytes + assert_eq!(display_width("Café Plain"), 10); + assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10); + assert_eq!( + display_width("\x1b]8;;http://example.com\x1b\\This is a link\x1b]8;;\x1b\\"), + 14 + ); + } + + #[test] + fn display_width_narrow_emojis() { + #[cfg(feature = "unicode-width")] + assert_eq!(display_width("⁉"), 1); + + // The ⁉ character is above DOUBLE_WIDTH_CUTOFF. + #[cfg(not(feature = "unicode-width"))] + assert_eq!(display_width("⁉"), 2); + } + + #[test] + fn display_width_narrow_emojis_variant_selector() { + #[cfg(feature = "unicode-width")] + assert_eq!(display_width("⁉\u{fe0f}"), 1); + + // The variant selector-16 is also counted. + #[cfg(not(feature = "unicode-width"))] + assert_eq!(display_width("⁉\u{fe0f}"), 4); + } + + #[test] + fn display_width_emojis() { + assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); + } +} diff --git a/third_party/rust/textwrap/src/fill.rs b/third_party/rust/textwrap/src/fill.rs new file mode 100644 index 0000000000..fbcaab9e21 --- /dev/null +++ b/third_party/rust/textwrap/src/fill.rs @@ -0,0 +1,298 @@ +//! Functions for filling text. + +use crate::{wrap, wrap_algorithms, Options, WordSeparator}; + +/// Fill a line of text at a given width. +/// +/// The result is a [`String`], complete with newlines between each +/// line. Use [`wrap()`] if you need access to the individual lines. +/// +/// The easiest way to use this function is to pass an integer for +/// `width_or_options`: +/// +/// ``` +/// use textwrap::fill; +/// +/// assert_eq!( +/// fill("Memory safety without garbage collection.", 15), +/// "Memory safety\nwithout garbage\ncollection." +/// ); +/// ``` +/// +/// If you need to customize the wrapping, you can pass an [`Options`] +/// instead of an `usize`: +/// +/// ``` +/// use textwrap::{fill, Options}; +/// +/// let options = Options::new(15) +/// .initial_indent("- ") +/// .subsequent_indent(" "); +/// assert_eq!( +/// fill("Memory safety without garbage collection.", &options), +/// "- Memory safety\n without\n garbage\n collection." +/// ); +/// ``` +pub fn fill<'a, Opt>(text: &str, width_or_options: Opt) -> String +where + Opt: Into<Options<'a>>, +{ + let options = width_or_options.into(); + + if text.len() < options.width && !text.contains('\n') && options.initial_indent.is_empty() { + String::from(text.trim_end_matches(' ')) + } else { + fill_slow_path(text, options) + } +} + +/// Slow path for fill. +/// +/// This is taken when `text` is longer than `options.width`. +pub(crate) fn fill_slow_path(text: &str, options: Options<'_>) -> String { + // This will avoid reallocation in simple cases (no + // indentation, no hyphenation). + let mut result = String::with_capacity(text.len()); + + let line_ending_str = options.line_ending.as_str(); + for (i, line) in wrap(text, options).iter().enumerate() { + if i > 0 { + result.push_str(line_ending_str); + } + result.push_str(line); + } + + result +} + +/// Fill `text` in-place without reallocating the input string. +/// +/// This function works by modifying the input string: some `' '` +/// characters will be replaced by `'\n'` characters. The rest of the +/// text remains untouched. +/// +/// Since we can only replace existing whitespace in the input with +/// `'\n'` (there is no space for `"\r\n"`), we cannot do hyphenation +/// nor can we split words longer than the line width. We also need to +/// use `AsciiSpace` as the word separator since we need `' '` +/// characters between words in order to replace some of them with a +/// `'\n'`. Indentation is also ruled out. In other words, +/// `fill_inplace(width)` behaves as if you had called [`fill()`] with +/// these options: +/// +/// ``` +/// # use textwrap::{core, LineEnding, Options, WordSplitter, WordSeparator, WrapAlgorithm}; +/// # let width = 80; +/// Options::new(width) +/// .break_words(false) +/// .line_ending(LineEnding::LF) +/// .word_separator(WordSeparator::AsciiSpace) +/// .wrap_algorithm(WrapAlgorithm::FirstFit) +/// .word_splitter(WordSplitter::NoHyphenation); +/// ``` +/// +/// The wrap algorithm is +/// [`WrapAlgorithm::FirstFit`](crate::WrapAlgorithm::FirstFit) since +/// this is the fastest algorithm — and the main reason to use +/// `fill_inplace` is to get the string broken into newlines as fast +/// as possible. +/// +/// A last difference is that (unlike [`fill()`]) `fill_inplace` can +/// leave trailing whitespace on lines. This is because we wrap by +/// inserting a `'\n'` at the final whitespace in the input string: +/// +/// ``` +/// let mut text = String::from("Hello World!"); +/// textwrap::fill_inplace(&mut text, 10); +/// assert_eq!(text, "Hello \nWorld!"); +/// ``` +/// +/// If we didn't do this, the word `World!` would end up being +/// indented. You can avoid this if you make sure that your input text +/// has no double spaces. +/// +/// # Performance +/// +/// In benchmarks, `fill_inplace` is about twice as fast as +/// [`fill()`]. Please see the [`linear` +/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benchmarks/linear.rs) +/// for details. +pub fn fill_inplace(text: &mut String, width: usize) { + let mut indices = Vec::new(); + + let mut offset = 0; + for line in text.split('\n') { + let words = WordSeparator::AsciiSpace + .find_words(line) + .collect::<Vec<_>>(); + let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width as f64]); + + let mut line_offset = offset; + for words in &wrapped_words[..wrapped_words.len() - 1] { + let line_len = words + .iter() + .map(|word| word.len() + word.whitespace.len()) + .sum::<usize>(); + + line_offset += line_len; + // We've advanced past all ' ' characters -- want to move + // one ' ' backwards and insert our '\n' there. + indices.push(line_offset - 1); + } + + // Advance past entire line, plus the '\n' which was removed + // by the split call above. + offset += line.len() + 1; + } + + let mut bytes = std::mem::take(text).into_bytes(); + for idx in indices { + bytes[idx] = b'\n'; + } + *text = String::from_utf8(bytes).unwrap(); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::WrapAlgorithm; + + #[test] + fn fill_simple() { + assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz"); + } + + #[test] + fn fill_unicode_boundary() { + // https://github.com/mgeisler/textwrap/issues/390 + fill("\u{1b}!Ͽ", 10); + } + + #[test] + fn non_breaking_space() { + let options = Options::new(5).break_words(false); + assert_eq!(fill("foo bar baz", &options), "foo bar baz"); + } + + #[test] + fn non_breaking_hyphen() { + let options = Options::new(5).break_words(false); + assert_eq!(fill("foo‑bar‑baz", &options), "foo‑bar‑baz"); + } + + #[test] + fn fill_preserves_line_breaks_trims_whitespace() { + assert_eq!(fill(" ", 80), ""); + assert_eq!(fill(" \n ", 80), "\n"); + assert_eq!(fill(" \n \n \n ", 80), "\n\n\n"); + } + + #[test] + fn preserve_line_breaks() { + assert_eq!(fill("", 80), ""); + assert_eq!(fill("\n", 80), "\n"); + assert_eq!(fill("\n\n\n", 80), "\n\n\n"); + assert_eq!(fill("test\n", 80), "test\n"); + assert_eq!(fill("test\n\na\n\n", 80), "test\n\na\n\n"); + assert_eq!( + fill( + "1 3 5 7\n1 3 5 7", + Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit) + ), + "1 3 5 7\n1 3 5 7" + ); + assert_eq!( + fill( + "1 3 5 7\n1 3 5 7", + Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit) + ), + "1 3 5\n7\n1 3 5\n7" + ); + } + + #[test] + fn break_words_line_breaks() { + assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl"); + assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl"); + } + + #[test] + fn break_words_empty_lines() { + assert_eq!( + fill("foo\nbar", &Options::new(2).break_words(false)), + "foo\nbar" + ); + } + + #[test] + fn fill_inplace_empty() { + let mut text = String::from(""); + fill_inplace(&mut text, 80); + assert_eq!(text, ""); + } + + #[test] + fn fill_inplace_simple() { + let mut text = String::from("foo bar baz"); + fill_inplace(&mut text, 10); + assert_eq!(text, "foo bar\nbaz"); + } + + #[test] + fn fill_inplace_multiple_lines() { + let mut text = String::from("Some text to wrap over multiple lines"); + fill_inplace(&mut text, 12); + assert_eq!(text, "Some text to\nwrap over\nmultiple\nlines"); + } + + #[test] + fn fill_inplace_long_word() { + let mut text = String::from("Internationalization is hard"); + fill_inplace(&mut text, 10); + assert_eq!(text, "Internationalization\nis hard"); + } + + #[test] + fn fill_inplace_no_hyphen_splitting() { + let mut text = String::from("A well-chosen example"); + fill_inplace(&mut text, 10); + assert_eq!(text, "A\nwell-chosen\nexample"); + } + + #[test] + fn fill_inplace_newlines() { + let mut text = String::from("foo bar\n\nbaz\n\n\n"); + fill_inplace(&mut text, 10); + assert_eq!(text, "foo bar\n\nbaz\n\n\n"); + } + + #[test] + fn fill_inplace_newlines_reset_line_width() { + let mut text = String::from("1 3 5\n1 3 5 7 9\n1 3 5 7 9 1 3"); + fill_inplace(&mut text, 10); + assert_eq!(text, "1 3 5\n1 3 5 7 9\n1 3 5 7 9\n1 3"); + } + + #[test] + fn fill_inplace_leading_whitespace() { + let mut text = String::from(" foo bar baz"); + fill_inplace(&mut text, 10); + assert_eq!(text, " foo bar\nbaz"); + } + + #[test] + fn fill_inplace_trailing_whitespace() { + let mut text = String::from("foo bar baz "); + fill_inplace(&mut text, 10); + assert_eq!(text, "foo bar\nbaz "); + } + + #[test] + fn fill_inplace_interior_whitespace() { + // To avoid an unwanted indentation of "baz", it is important + // to replace the final ' ' with '\n'. + let mut text = String::from("foo bar baz"); + fill_inplace(&mut text, 10); + assert_eq!(text, "foo bar \nbaz"); + } +} diff --git a/third_party/rust/textwrap/src/fuzzing.rs b/third_party/rust/textwrap/src/fuzzing.rs new file mode 100644 index 0000000000..b7ad4812a2 --- /dev/null +++ b/third_party/rust/textwrap/src/fuzzing.rs @@ -0,0 +1,23 @@ +//! Fuzzing helpers. + +use super::Options; +use std::borrow::Cow; + +/// Exposed for fuzzing so we can check the slow path is correct. +pub fn fill_slow_path<'a>(text: &str, options: Options<'_>) -> String { + crate::fill::fill_slow_path(text, options) +} + +/// Exposed for fuzzing so we can check the slow path is correct. +pub fn wrap_single_line<'a>(line: &'a str, options: &Options<'_>, lines: &mut Vec<Cow<'a, str>>) { + crate::wrap::wrap_single_line(line, options, lines); +} + +/// Exposed for fuzzing so we can check the slow path is correct. +pub fn wrap_single_line_slow_path<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { + crate::wrap::wrap_single_line_slow_path(line, options, lines) +} diff --git a/third_party/rust/textwrap/src/indentation.rs b/third_party/rust/textwrap/src/indentation.rs new file mode 100644 index 0000000000..2f3a853b3c --- /dev/null +++ b/third_party/rust/textwrap/src/indentation.rs @@ -0,0 +1,347 @@ +//! Functions related to adding and removing indentation from lines of +//! text. +//! +//! The functions here can be used to uniformly indent or dedent +//! (unindent) word wrapped lines of text. + +/// Indent each line by the given prefix. +/// +/// # Examples +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent("First line.\nSecond line.\n", " "), +/// " First line.\n Second line.\n"); +/// ``` +/// +/// When indenting, trailing whitespace is stripped from the prefix. +/// This means that empty lines remain empty afterwards: +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent("First line.\n\n\nSecond line.\n", " "), +/// " First line.\n\n\n Second line.\n"); +/// ``` +/// +/// Notice how `"\n\n\n"` remained as `"\n\n\n"`. +/// +/// This feature is useful when you want to indent text and have a +/// space between your prefix and the text. In this case, you _don't_ +/// want a trailing space on empty lines: +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent("foo = 123\n\nprint(foo)\n", "# "), +/// "# foo = 123\n#\n# print(foo)\n"); +/// ``` +/// +/// Notice how `"\n\n"` became `"\n#\n"` instead of `"\n# \n"` which +/// would have trailing whitespace. +/// +/// Leading and trailing whitespace coming from the text itself is +/// kept unchanged: +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo "); +/// ``` +pub fn indent(s: &str, prefix: &str) -> String { + // We know we'll need more than s.len() bytes for the output, but + // without counting '\n' characters (which is somewhat slow), we + // don't know exactly how much. However, we can preemptively do + // the first doubling of the output size. + let mut result = String::with_capacity(2 * s.len()); + let trimmed_prefix = prefix.trim_end(); + for (idx, line) in s.split_terminator('\n').enumerate() { + if idx > 0 { + result.push('\n'); + } + if line.trim().is_empty() { + result.push_str(trimmed_prefix); + } else { + result.push_str(prefix); + } + result.push_str(line); + } + if s.ends_with('\n') { + // split_terminator will have eaten the final '\n'. + result.push('\n'); + } + result +} + +/// Removes common leading whitespace from each line. +/// +/// This function will look at each non-empty line and determine the +/// maximum amount of whitespace that can be removed from all lines: +/// +/// ``` +/// use textwrap::dedent; +/// +/// assert_eq!(dedent(" +/// 1st line +/// 2nd line +/// 3rd line +/// "), " +/// 1st line +/// 2nd line +/// 3rd line +/// "); +/// ``` +pub fn dedent(s: &str) -> String { + let mut prefix = ""; + let mut lines = s.lines(); + + // We first search for a non-empty line to find a prefix. + for line in &mut lines { + let mut whitespace_idx = line.len(); + for (idx, ch) in line.char_indices() { + if !ch.is_whitespace() { + whitespace_idx = idx; + break; + } + } + + // Check if the line had anything but whitespace + if whitespace_idx < line.len() { + prefix = &line[..whitespace_idx]; + break; + } + } + + // We then continue looking through the remaining lines to + // possibly shorten the prefix. + for line in &mut lines { + let mut whitespace_idx = line.len(); + for ((idx, a), b) in line.char_indices().zip(prefix.chars()) { + if a != b { + whitespace_idx = idx; + break; + } + } + + // Check if the line had anything but whitespace and if we + // have found a shorter prefix + if whitespace_idx < line.len() && whitespace_idx < prefix.len() { + prefix = &line[..whitespace_idx]; + } + } + + // We now go over the lines a second time to build the result. + let mut result = String::new(); + for line in s.lines() { + if line.starts_with(prefix) && line.chars().any(|c| !c.is_whitespace()) { + let (_, tail) = line.split_at(prefix.len()); + result.push_str(tail); + } + result.push('\n'); + } + + if result.ends_with('\n') && !s.ends_with('\n') { + let new_len = result.len() - 1; + result.truncate(new_len); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn indent_empty() { + assert_eq!(indent("\n", " "), "\n"); + } + + #[test] + #[rustfmt::skip] + fn indent_nonempty() { + let text = [ + " foo\n", + "bar\n", + " baz\n", + ].join(""); + let expected = [ + "// foo\n", + "// bar\n", + "// baz\n", + ].join(""); + assert_eq!(indent(&text, "// "), expected); + } + + #[test] + #[rustfmt::skip] + fn indent_empty_line() { + let text = [ + " foo", + "bar", + "", + " baz", + ].join("\n"); + let expected = [ + "// foo", + "// bar", + "//", + "// baz", + ].join("\n"); + assert_eq!(indent(&text, "// "), expected); + } + + #[test] + fn dedent_empty() { + assert_eq!(dedent(""), ""); + } + + #[test] + #[rustfmt::skip] + fn dedent_multi_line() { + let x = [ + " foo", + " bar", + " baz", + ].join("\n"); + let y = [ + " foo", + "bar", + " baz" + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_empty_line() { + let x = [ + " foo", + " bar", + " ", + " baz" + ].join("\n"); + let y = [ + " foo", + "bar", + "", + " baz" + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_blank_line() { + let x = [ + " foo", + "", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + let y = [ + "foo", + "", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_whitespace_line() { + let x = [ + " foo", + " ", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + let y = [ + "foo", + "", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_mixed_whitespace() { + let x = [ + "\tfoo", + " bar", + ].join("\n"); + let y = [ + "\tfoo", + " bar", + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_tabbed_whitespace() { + let x = [ + "\t\tfoo", + "\t\t\tbar", + ].join("\n"); + let y = [ + "foo", + "\tbar", + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_mixed_tabbed_whitespace() { + let x = [ + "\t \tfoo", + "\t \t\tbar", + ].join("\n"); + let y = [ + "foo", + "\tbar", + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_mixed_tabbed_whitespace2() { + let x = [ + "\t \tfoo", + "\t \tbar", + ].join("\n"); + let y = [ + "\tfoo", + " \tbar", + ].join("\n"); + assert_eq!(dedent(&x), y); + } + + #[test] + #[rustfmt::skip] + fn dedent_preserve_no_terminating_newline() { + let x = [ + " foo", + " bar", + ].join("\n"); + let y = [ + "foo", + " bar", + ].join("\n"); + assert_eq!(dedent(&x), y); + } +} diff --git a/third_party/rust/textwrap/src/lib.rs b/third_party/rust/textwrap/src/lib.rs new file mode 100644 index 0000000000..32611c0938 --- /dev/null +++ b/third_party/rust/textwrap/src/lib.rs @@ -0,0 +1,235 @@ +//! The textwrap library provides functions for word wrapping and +//! indenting text. +//! +//! # Wrapping Text +//! +//! Wrapping text can be very useful in command-line programs where +//! you want to format dynamic output nicely so it looks good in a +//! terminal. A quick example: +//! +//! ``` +//! # #[cfg(feature = "smawk")] { +//! let text = "textwrap: a small library for wrapping text."; +//! assert_eq!(textwrap::wrap(text, 18), +//! vec!["textwrap: a", +//! "small library for", +//! "wrapping text."]); +//! # } +//! ``` +//! +//! The [`wrap()`] function returns the individual lines, use +//! [`fill()`] is you want the lines joined with `'\n'` to form a +//! `String`. +//! +//! If you enable the `hyphenation` Cargo feature, you can get +//! automatic hyphenation for a number of languages: +//! +//! ``` +//! #[cfg(feature = "hyphenation")] { +//! use hyphenation::{Language, Load, Standard}; +//! use textwrap::{wrap, Options, WordSplitter}; +//! +//! let text = "textwrap: a small library for wrapping text."; +//! let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +//! let options = Options::new(18).word_splitter(WordSplitter::Hyphenation(dictionary)); +//! assert_eq!(wrap(text, &options), +//! vec!["textwrap: a small", +//! "library for wrap-", +//! "ping text."]); +//! } +//! ``` +//! +//! See also the [`unfill()`] and [`refill()`] functions which allow +//! you to manipulate already wrapped text. +//! +//! ## Wrapping Strings at Compile Time +//! +//! If your strings are known at compile time, please take a look at +//! the procedural macros from the [textwrap-macros] crate. +//! +//! ## Displayed Width vs Byte Size +//! +//! To word wrap text, one must know the width of each word so one can +//! know when to break lines. This library will by default measure the +//! width of text using the _displayed width_, not the size in bytes. +//! The `unicode-width` Cargo feature controls this. +//! +//! This is important for non-ASCII text. ASCII characters such as `a` +//! and `!` are simple and take up one column each. This means that +//! the displayed width is equal to the string length in bytes. +//! However, non-ASCII characters and symbols take up more than one +//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is +//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively. +//! +//! This is why we take care to use the displayed width instead of the +//! byte count when computing line lengths. All functions in this +//! library handle Unicode characters like this when the +//! `unicode-width` Cargo feature is enabled (it is enabled by +//! default). +//! +//! # Indentation and Dedentation +//! +//! The textwrap library also offers functions for adding a prefix to +//! every line of a string and to remove leading whitespace. As an +//! example, [`indent()`] allows you to turn lines of text into a +//! bullet list: +//! +//! ``` +//! let before = "\ +//! foo +//! bar +//! baz +//! "; +//! let after = "\ +//! * foo +//! * bar +//! * baz +//! "; +//! assert_eq!(textwrap::indent(before, "* "), after); +//! ``` +//! +//! Removing leading whitespace is done with [`dedent()`]: +//! +//! ``` +//! let before = " +//! Some +//! indented +//! text +//! "; +//! let after = " +//! Some +//! indented +//! text +//! "; +//! assert_eq!(textwrap::dedent(before), after); +//! ``` +//! +//! # Cargo Features +//! +//! The textwrap library can be slimmed down as needed via a number of +//! Cargo features. This means you only pay for the features you +//! actually use. +//! +//! The full dependency graph, where dashed lines indicate optional +//! dependencies, is shown below: +//! +//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.16.1.svg"> +//! +//! ## Default Features +//! +//! These features are enabled by default: +//! +//! * `unicode-linebreak`: enables finding words using the +//! [unicode-linebreak] crate, which implements the line breaking +//! algorithm described in [Unicode Standard Annex +//! #14](https://www.unicode.org/reports/tr14/). +//! +//! This feature can be disabled if you are happy to find words +//! separated by ASCII space characters only. People wrapping text +//! with emojis or East-Asian characters will want most likely want +//! to enable this feature. See [`WordSeparator`] for details. +//! +//! * `unicode-width`: enables correct width computation of non-ASCII +//! characters via the [unicode-width] crate. Without this feature, +//! every [`char`] is 1 column wide, except for emojis which are 2 +//! columns wide. See [`core::display_width()`] for details. +//! +//! This feature can be disabled if you only need to wrap ASCII +//! text, or if the functions in [`core`] are used directly with +//! [`core::Fragment`]s for which the widths have been computed in +//! other ways. +//! +//! * `smawk`: enables linear-time wrapping of the whole paragraph via +//! the [smawk] crate. See [`wrap_algorithms::wrap_optimal_fit()`] +//! for details on the optimal-fit algorithm. +//! +//! This feature can be disabled if you only ever intend to use +//! [`wrap_algorithms::wrap_first_fit()`]. +//! +//! <!-- begin binary-sizes --> +//! +//! With Rust 1.64.0, the size impact of the above features on your +//! binary is as follows: +//! +//! | Configuration | Binary Size | Delta | +//! | :--- | ---: | ---: | +//! | quick-and-dirty implementation | 289 KB | — KB | +//! | textwrap without default features | 305 KB | 16 KB | +//! | textwrap with smawk | 317 KB | 28 KB | +//! | textwrap with unicode-width | 309 KB | 20 KB | +//! | textwrap with unicode-linebreak | 342 KB | 53 KB | +//! +//! <!-- end binary-sizes --> +//! +//! The above sizes are the stripped sizes and the binary is compiled +//! in release mode with this profile: +//! +//! ```toml +//! [profile.release] +//! lto = true +//! codegen-units = 1 +//! ``` +//! +//! See the [binary-sizes demo] if you want to reproduce these +//! results. +//! +//! ## Optional Features +//! +//! These Cargo features enable new functionality: +//! +//! * `terminal_size`: enables automatic detection of the terminal +//! width via the [terminal_size] crate. See +//! [`Options::with_termwidth()`] for details. +//! +//! * `hyphenation`: enables language-sensitive hyphenation via the +//! [hyphenation] crate. See the [`word_splitters::WordSplitter`] +//! trait for details. +//! +//! [unicode-linebreak]: https://docs.rs/unicode-linebreak/ +//! [unicode-width]: https://docs.rs/unicode-width/ +//! [smawk]: https://docs.rs/smawk/ +//! [binary-sizes demo]: https://github.com/mgeisler/textwrap/tree/master/examples/binary-sizes +//! [textwrap-macros]: https://docs.rs/textwrap-macros/ +//! [terminal_size]: https://docs.rs/terminal_size/ +//! [hyphenation]: https://docs.rs/hyphenation/ + +#![doc(html_root_url = "https://docs.rs/textwrap/0.16.1")] +#![forbid(unsafe_code)] // See https://github.com/mgeisler/textwrap/issues/210 +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +#![allow(clippy::redundant_field_names)] + +// Make `cargo test` execute the README doctests. +#[cfg(doctest)] +#[doc = include_str!("../README.md")] +mod readme_doctest {} + +pub mod core; +#[cfg(fuzzing)] +pub mod fuzzing; +pub mod word_splitters; +pub mod wrap_algorithms; + +mod columns; +mod fill; +mod indentation; +mod line_ending; +mod options; +mod refill; +#[cfg(feature = "terminal_size")] +mod termwidth; +mod word_separators; +mod wrap; + +pub use columns::wrap_columns; +pub use fill::{fill, fill_inplace}; +pub use indentation::{dedent, indent}; +pub use line_ending::LineEnding; +pub use options::Options; +pub use refill::{refill, unfill}; +#[cfg(feature = "terminal_size")] +pub use termwidth::termwidth; +pub use word_separators::WordSeparator; +pub use word_splitters::WordSplitter; +pub use wrap::wrap; +pub use wrap_algorithms::WrapAlgorithm; diff --git a/third_party/rust/textwrap/src/line_ending.rs b/third_party/rust/textwrap/src/line_ending.rs new file mode 100644 index 0000000000..0514fe5fc0 --- /dev/null +++ b/third_party/rust/textwrap/src/line_ending.rs @@ -0,0 +1,88 @@ +//! Line ending detection and conversion. + +use std::fmt::Debug; + +/// Supported line endings. Like in the Rust standard library, two line +/// endings are supported: `\r\n` and `\n` +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LineEnding { + /// _Carriage return and line feed_ – a line ending sequence + /// historically used in Windows. Corresponds to the sequence + /// of ASCII control characters `0x0D 0x0A` or `\r\n` + CRLF, + /// _Line feed_ – a line ending historically used in Unix. + /// Corresponds to the ASCII control character `0x0A` or `\n` + LF, +} + +impl LineEnding { + /// Turns this [`LineEnding`] value into its ASCII representation. + #[inline] + pub const fn as_str(&self) -> &'static str { + match self { + Self::CRLF => "\r\n", + Self::LF => "\n", + } + } +} + +/// An iterator over the lines of a string, as tuples of string slice +/// and [`LineEnding`] value; it only emits non-empty lines (i.e. having +/// some content before the terminating `\r\n` or `\n`). +/// +/// This struct is used internally by the library. +#[derive(Debug, Clone, Copy)] +pub(crate) struct NonEmptyLines<'a>(pub &'a str); + +impl<'a> Iterator for NonEmptyLines<'a> { + type Item = (&'a str, Option<LineEnding>); + + fn next(&mut self) -> Option<Self::Item> { + while let Some(lf) = self.0.find('\n') { + if lf == 0 || (lf == 1 && self.0.as_bytes()[lf - 1] == b'\r') { + self.0 = &self.0[(lf + 1)..]; + continue; + } + let trimmed = match self.0.as_bytes()[lf - 1] { + b'\r' => (&self.0[..(lf - 1)], Some(LineEnding::CRLF)), + _ => (&self.0[..lf], Some(LineEnding::LF)), + }; + self.0 = &self.0[(lf + 1)..]; + return Some(trimmed); + } + if self.0.is_empty() { + None + } else { + let line = std::mem::take(&mut self.0); + Some((line, None)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn non_empty_lines_full_case() { + assert_eq!( + NonEmptyLines("LF\nCRLF\r\n\r\n\nunterminated") + .collect::<Vec<(&str, Option<LineEnding>)>>(), + vec![ + ("LF", Some(LineEnding::LF)), + ("CRLF", Some(LineEnding::CRLF)), + ("unterminated", None), + ] + ); + } + + #[test] + fn non_empty_lines_new_lines_only() { + assert_eq!(NonEmptyLines("\r\n\n\n\r\n").next(), None); + } + + #[test] + fn non_empty_lines_no_input() { + assert_eq!(NonEmptyLines("").next(), None); + } +} diff --git a/third_party/rust/textwrap/src/options.rs b/third_party/rust/textwrap/src/options.rs new file mode 100644 index 0000000000..80e420d195 --- /dev/null +++ b/third_party/rust/textwrap/src/options.rs @@ -0,0 +1,300 @@ +//! Options for wrapping text. + +use crate::{LineEnding, WordSeparator, WordSplitter, WrapAlgorithm}; + +/// Holds configuration options for wrapping and filling text. +#[non_exhaustive] +#[derive(Debug, Clone)] +pub struct Options<'a> { + /// The width in columns at which the text will be wrapped. + pub width: usize, + /// Line ending used for breaking lines. + pub line_ending: LineEnding, + /// Indentation used for the first line of output. See the + /// [`Options::initial_indent`] method. + pub initial_indent: &'a str, + /// Indentation used for subsequent lines of output. See the + /// [`Options::subsequent_indent`] method. + pub subsequent_indent: &'a str, + /// Allow long words to be broken if they cannot fit on a line. + /// When set to `false`, some lines may be longer than + /// `self.width`. See the [`Options::break_words`] method. + pub break_words: bool, + /// Wrapping algorithm to use, see the implementations of the + /// [`WrapAlgorithm`] trait for details. + pub wrap_algorithm: WrapAlgorithm, + /// The line breaking algorithm to use, see the [`WordSeparator`] + /// trait for an overview and possible implementations. + pub word_separator: WordSeparator, + /// The method for splitting words. This can be used to prohibit + /// splitting words on hyphens, or it can be used to implement + /// language-aware machine hyphenation. + pub word_splitter: WordSplitter, +} + +impl<'a> From<&'a Options<'a>> for Options<'a> { + fn from(options: &'a Options<'a>) -> Self { + Self { + width: options.width, + line_ending: options.line_ending, + initial_indent: options.initial_indent, + subsequent_indent: options.subsequent_indent, + break_words: options.break_words, + word_separator: options.word_separator, + wrap_algorithm: options.wrap_algorithm, + word_splitter: options.word_splitter.clone(), + } + } +} + +impl<'a> From<usize> for Options<'a> { + fn from(width: usize) -> Self { + Options::new(width) + } +} + +impl<'a> Options<'a> { + /// Creates a new [`Options`] with the specified width. + /// + /// The other fields are given default values as follows: + /// + /// ``` + /// # use textwrap::{LineEnding, Options, WordSplitter, WordSeparator, WrapAlgorithm}; + /// # let width = 80; + /// let options = Options::new(width); + /// assert_eq!(options.line_ending, LineEnding::LF); + /// assert_eq!(options.initial_indent, ""); + /// assert_eq!(options.subsequent_indent, ""); + /// assert_eq!(options.break_words, true); + /// + /// #[cfg(feature = "unicode-linebreak")] + /// assert_eq!(options.word_separator, WordSeparator::UnicodeBreakProperties); + /// #[cfg(not(feature = "unicode-linebreak"))] + /// assert_eq!(options.word_separator, WordSeparator::AsciiSpace); + /// + /// #[cfg(feature = "smawk")] + /// assert_eq!(options.wrap_algorithm, WrapAlgorithm::new_optimal_fit()); + /// #[cfg(not(feature = "smawk"))] + /// assert_eq!(options.wrap_algorithm, WrapAlgorithm::FirstFit); + /// + /// assert_eq!(options.word_splitter, WordSplitter::HyphenSplitter); + /// ``` + /// + /// Note that the default word separator and wrap algorithms + /// changes based on the available Cargo features. The best + /// available algorithms are used by default. + pub const fn new(width: usize) -> Self { + Options { + width, + line_ending: LineEnding::LF, + initial_indent: "", + subsequent_indent: "", + break_words: true, + word_separator: WordSeparator::new(), + wrap_algorithm: WrapAlgorithm::new(), + word_splitter: WordSplitter::HyphenSplitter, + } + } + + /// Change [`self.line_ending`]. This specifies which of the + /// supported line endings should be used to break the lines of the + /// input text. + /// + /// # Examples + /// + /// ``` + /// use textwrap::{refill, LineEnding, Options}; + /// + /// let options = Options::new(15).line_ending(LineEnding::CRLF); + /// assert_eq!(refill("This is a little example.", options), + /// "This is a\r\nlittle example."); + /// ``` + /// + /// [`self.line_ending`]: #structfield.line_ending + pub fn line_ending(self, line_ending: LineEnding) -> Self { + Options { + line_ending, + ..self + } + } + + /// Set [`self.width`] to the given value. + /// + /// [`self.width`]: #structfield.width + pub fn width(self, width: usize) -> Self { + Options { width, ..self } + } + + /// Change [`self.initial_indent`]. The initial indentation is + /// used on the very first line of output. + /// + /// # Examples + /// + /// Classic paragraph indentation can be achieved by specifying an + /// initial indentation and wrapping each paragraph by itself: + /// + /// ``` + /// use textwrap::{wrap, Options}; + /// + /// let options = Options::new(16).initial_indent(" "); + /// assert_eq!(wrap("This is a little example.", options), + /// vec![" This is a", + /// "little example."]); + /// ``` + /// + /// [`self.initial_indent`]: #structfield.initial_indent + pub fn initial_indent(self, initial_indent: &'a str) -> Self { + Options { + initial_indent, + ..self + } + } + + /// Change [`self.subsequent_indent`]. The subsequent indentation + /// is used on lines following the first line of output. + /// + /// # Examples + /// + /// Combining initial and subsequent indentation lets you format a + /// single paragraph as a bullet list: + /// + /// ``` + /// use textwrap::{wrap, Options}; + /// + /// let options = Options::new(12) + /// .initial_indent("* ") + /// .subsequent_indent(" "); + /// #[cfg(feature = "smawk")] + /// assert_eq!(wrap("This is a little example.", options), + /// vec!["* This is", + /// " a little", + /// " example."]); + /// + /// // Without the `smawk` feature, the wrapping is a little different: + /// #[cfg(not(feature = "smawk"))] + /// assert_eq!(wrap("This is a little example.", options), + /// vec!["* This is a", + /// " little", + /// " example."]); + /// ``` + /// + /// [`self.subsequent_indent`]: #structfield.subsequent_indent + pub fn subsequent_indent(self, subsequent_indent: &'a str) -> Self { + Options { + subsequent_indent, + ..self + } + } + + /// Change [`self.break_words`]. This controls if words longer + /// than `self.width` can be broken, or if they will be left + /// sticking out into the right margin. + /// + /// See [`Options::word_splitter`] instead if you want to control + /// hyphenation. + /// + /// # Examples + /// + /// ``` + /// use textwrap::{wrap, Options}; + /// + /// let options = Options::new(4).break_words(true); + /// assert_eq!(wrap("This is a little example.", options), + /// vec!["This", + /// "is a", + /// "litt", + /// "le", + /// "exam", + /// "ple."]); + /// ``` + /// + /// [`self.break_words`]: #structfield.break_words + pub fn break_words(self, break_words: bool) -> Self { + Options { + break_words, + ..self + } + } + + /// Change [`self.word_separator`]. + /// + /// See the [`WordSeparator`] trait for details on the choices. + /// + /// [`self.word_separator`]: #structfield.word_separator + pub fn word_separator(self, word_separator: WordSeparator) -> Options<'a> { + Options { + word_separator, + ..self + } + } + + /// Change [`self.wrap_algorithm`]. + /// + /// See the [`WrapAlgorithm`] trait for details on the choices. + /// + /// [`self.wrap_algorithm`]: #structfield.wrap_algorithm + pub fn wrap_algorithm(self, wrap_algorithm: WrapAlgorithm) -> Options<'a> { + Options { + wrap_algorithm, + ..self + } + } + + /// Change [`self.word_splitter`]. The [`WordSplitter`] is used to + /// fit part of a word into the current line when wrapping text. + /// + /// See [`Options::break_words`] instead if you want to control the + /// handling of words longer than the line width. + /// + /// # Examples + /// + /// ``` + /// use textwrap::{wrap, Options, WordSplitter}; + /// + /// // The default is WordSplitter::HyphenSplitter. + /// let options = Options::new(5); + /// assert_eq!(wrap("foo-bar-baz", &options), + /// vec!["foo-", "bar-", "baz"]); + /// + /// // The word is now so long that break_words kick in: + /// let options = Options::new(5) + /// .word_splitter(WordSplitter::NoHyphenation); + /// assert_eq!(wrap("foo-bar-baz", &options), + /// vec!["foo-b", "ar-ba", "z"]); + /// + /// // If you want to breaks at all, disable both: + /// let options = Options::new(5) + /// .break_words(false) + /// .word_splitter(WordSplitter::NoHyphenation); + /// assert_eq!(wrap("foo-bar-baz", &options), + /// vec!["foo-bar-baz"]); + /// ``` + /// + /// [`self.word_splitter`]: #structfield.word_splitter + pub fn word_splitter(self, word_splitter: WordSplitter) -> Options<'a> { + Options { + word_splitter, + ..self + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn options_agree_with_usize() { + let opt_usize = Options::from(42_usize); + let opt_options = Options::new(42); + + assert_eq!(opt_usize.width, opt_options.width); + assert_eq!(opt_usize.initial_indent, opt_options.initial_indent); + assert_eq!(opt_usize.subsequent_indent, opt_options.subsequent_indent); + assert_eq!(opt_usize.break_words, opt_options.break_words); + assert_eq!( + opt_usize.word_splitter.split_points("hello-world"), + opt_options.word_splitter.split_points("hello-world") + ); + } +} diff --git a/third_party/rust/textwrap/src/refill.rs b/third_party/rust/textwrap/src/refill.rs new file mode 100644 index 0000000000..1be85f04eb --- /dev/null +++ b/third_party/rust/textwrap/src/refill.rs @@ -0,0 +1,352 @@ +//! Functionality for unfilling and refilling text. + +use crate::core::display_width; +use crate::line_ending::NonEmptyLines; +use crate::{fill, LineEnding, Options}; + +/// Unpack a paragraph of already-wrapped text. +/// +/// This function attempts to recover the original text from a single +/// paragraph of wrapped text, such as what [`fill()`] would produce. +/// This means that it turns +/// +/// ```text +/// textwrap: a small +/// library for +/// wrapping text. +/// ``` +/// +/// back into +/// +/// ```text +/// textwrap: a small library for wrapping text. +/// ``` +/// +/// In addition, it will recognize a common prefix and a common line +/// ending among the lines. +/// +/// The prefix of the first line is returned in +/// [`Options::initial_indent`] and the prefix (if any) of the the +/// other lines is returned in [`Options::subsequent_indent`]. +/// +/// Line ending is returned in [`Options::line_ending`]. If line ending +/// can not be confidently detected (mixed or no line endings in the +/// input), [`LineEnding::LF`] will be returned. +/// +/// In addition to `' '`, the prefixes can consist of characters used +/// for unordered lists (`'-'`, `'+'`, and `'*'`) and block quotes +/// (`'>'`) in Markdown as well as characters often used for inline +/// comments (`'#'` and `'/'`). +/// +/// The text must come from a single wrapped paragraph. This means +/// that there can be no empty lines (`"\n\n"` or `"\r\n\r\n"`) within +/// the text. It is unspecified what happens if `unfill` is called on +/// more than one paragraph of text. +/// +/// # Examples +/// +/// ``` +/// use textwrap::{LineEnding, unfill}; +/// +/// let (text, options) = unfill("\ +/// * This is an +/// example of +/// a list item. +/// "); +/// +/// assert_eq!(text, "This is an example of a list item.\n"); +/// assert_eq!(options.initial_indent, "* "); +/// assert_eq!(options.subsequent_indent, " "); +/// assert_eq!(options.line_ending, LineEnding::LF); +/// ``` +pub fn unfill(text: &str) -> (String, Options<'_>) { + let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; + + let mut options = Options::new(0); + for (idx, line) in text.lines().enumerate() { + options.width = std::cmp::max(options.width, display_width(line)); + let without_prefix = line.trim_start_matches(prefix_chars); + let prefix = &line[..line.len() - without_prefix.len()]; + + if idx == 0 { + options.initial_indent = prefix; + } else if idx == 1 { + options.subsequent_indent = prefix; + } else if idx > 1 { + for ((idx, x), y) in prefix.char_indices().zip(options.subsequent_indent.chars()) { + if x != y { + options.subsequent_indent = &prefix[..idx]; + break; + } + } + if prefix.len() < options.subsequent_indent.len() { + options.subsequent_indent = prefix; + } + } + } + + let mut unfilled = String::with_capacity(text.len()); + let mut detected_line_ending = None; + + for (idx, (line, ending)) in NonEmptyLines(text).enumerate() { + if idx == 0 { + unfilled.push_str(&line[options.initial_indent.len()..]); + } else { + unfilled.push(' '); + unfilled.push_str(&line[options.subsequent_indent.len()..]); + } + match (detected_line_ending, ending) { + (None, Some(_)) => detected_line_ending = ending, + (Some(LineEnding::CRLF), Some(LineEnding::LF)) => detected_line_ending = ending, + _ => (), + } + } + + // Add back a line ending if `text` ends with the one we detect. + if let Some(line_ending) = detected_line_ending { + if text.ends_with(line_ending.as_str()) { + unfilled.push_str(line_ending.as_str()); + } + } + + options.line_ending = detected_line_ending.unwrap_or(LineEnding::LF); + (unfilled, options) +} + +/// Refill a paragraph of wrapped text with a new width. +/// +/// This function will first use [`unfill()`] to remove newlines from +/// the text. Afterwards the text is filled again using [`fill()`]. +/// +/// The `new_width_or_options` argument specify the new width and can +/// specify other options as well — except for +/// [`Options::initial_indent`] and [`Options::subsequent_indent`], +/// which are deduced from `filled_text`. +/// +/// # Examples +/// +/// ``` +/// use textwrap::refill; +/// +/// // Some loosely wrapped text. The "> " prefix is recognized automatically. +/// let text = "\ +/// > Memory +/// > safety without garbage +/// > collection. +/// "; +/// +/// assert_eq!(refill(text, 20), "\ +/// > Memory safety +/// > without garbage +/// > collection. +/// "); +/// +/// assert_eq!(refill(text, 40), "\ +/// > Memory safety without garbage +/// > collection. +/// "); +/// +/// assert_eq!(refill(text, 60), "\ +/// > Memory safety without garbage collection. +/// "); +/// ``` +/// +/// You can also reshape bullet points: +/// +/// ``` +/// use textwrap::refill; +/// +/// let text = "\ +/// - This is my +/// list item. +/// "; +/// +/// assert_eq!(refill(text, 20), "\ +/// - This is my list +/// item. +/// "); +/// ``` +pub fn refill<'a, Opt>(filled_text: &str, new_width_or_options: Opt) -> String +where + Opt: Into<Options<'a>>, +{ + let mut new_options = new_width_or_options.into(); + let (text, options) = unfill(filled_text); + // The original line ending is kept by `unfill`. + let stripped = text.strip_suffix(options.line_ending.as_str()); + let new_line_ending = new_options.line_ending.as_str(); + + new_options.initial_indent = options.initial_indent; + new_options.subsequent_indent = options.subsequent_indent; + let mut refilled = fill(stripped.unwrap_or(&text), new_options); + + // Add back right line ending if we stripped one off above. + if stripped.is_some() { + refilled.push_str(new_line_ending); + } + refilled +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unfill_simple() { + let (text, options) = unfill("foo\nbar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_no_new_line() { + let (text, options) = unfill("foo bar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 7); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_simple_crlf() { + let (text, options) = unfill("foo\r\nbar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::CRLF); + } + + #[test] + fn unfill_mixed_new_lines() { + let (text, options) = unfill("foo\r\nbar\nbaz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn test_unfill_consecutive_different_prefix() { + let (text, options) = unfill("foo\n*\n/"); + assert_eq!(text, "foo * /"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_trailing_newlines() { + let (text, options) = unfill("foo\nbar\n\n\n"); + assert_eq!(text, "foo bar\n"); + assert_eq!(options.width, 3); + } + + #[test] + fn unfill_mixed_trailing_newlines() { + let (text, options) = unfill("foo\r\nbar\n\r\n\n"); + assert_eq!(text, "foo bar\n"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_trailing_crlf() { + let (text, options) = unfill("foo bar\r\n"); + assert_eq!(text, "foo bar\r\n"); + assert_eq!(options.width, 7); + assert_eq!(options.line_ending, LineEnding::CRLF); + } + + #[test] + fn unfill_initial_indent() { + let (text, options) = unfill(" foo\nbar\nbaz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 5); + assert_eq!(options.initial_indent, " "); + } + + #[test] + fn unfill_differing_indents() { + let (text, options) = unfill(" foo\n bar\n baz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 7); + assert_eq!(options.initial_indent, " "); + assert_eq!(options.subsequent_indent, " "); + } + + #[test] + fn unfill_list_item() { + let (text, options) = unfill("* foo\n bar\n baz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 5); + assert_eq!(options.initial_indent, "* "); + assert_eq!(options.subsequent_indent, " "); + } + + #[test] + fn unfill_multiple_char_prefix() { + let (text, options) = unfill(" // foo bar\n // baz\n // quux"); + assert_eq!(text, "foo bar baz quux"); + assert_eq!(options.width, 14); + assert_eq!(options.initial_indent, " // "); + assert_eq!(options.subsequent_indent, " // "); + } + + #[test] + fn unfill_block_quote() { + let (text, options) = unfill("> foo\n> bar\n> baz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 5); + assert_eq!(options.initial_indent, "> "); + assert_eq!(options.subsequent_indent, "> "); + } + + #[test] + fn unfill_only_prefixes_issue_466() { + // Test that we don't crash if the first line has only prefix + // chars *and* the second line is shorter than the first line. + let (text, options) = unfill("######\nfoo"); + assert_eq!(text, " foo"); + assert_eq!(options.width, 6); + assert_eq!(options.initial_indent, "######"); + assert_eq!(options.subsequent_indent, ""); + } + + #[test] + fn unfill_trailing_newlines_issue_466() { + // Test that we don't crash on a '\r' following a string of + // '\n'. The problem was that we removed both kinds of + // characters in one code path, but not in the other. + let (text, options) = unfill("foo\n##\n\n\r"); + // The \n\n changes subsequent_indent to "". + assert_eq!(text, "foo ## \r"); + assert_eq!(options.width, 3); + assert_eq!(options.initial_indent, ""); + assert_eq!(options.subsequent_indent, ""); + } + + #[test] + fn unfill_whitespace() { + assert_eq!(unfill("foo bar").0, "foo bar"); + } + + #[test] + fn refill_convert_lf_to_crlf() { + let options = Options::new(5).line_ending(LineEnding::CRLF); + assert_eq!(refill("foo\nbar\n", options), "foo\r\nbar\r\n",); + } + + #[test] + fn refill_convert_crlf_to_lf() { + let options = Options::new(5).line_ending(LineEnding::LF); + assert_eq!(refill("foo\r\nbar\r\n", options), "foo\nbar\n",); + } + + #[test] + fn refill_convert_mixed_newlines() { + let options = Options::new(5).line_ending(LineEnding::CRLF); + assert_eq!(refill("foo\r\nbar\n", options), "foo\r\nbar\r\n",); + } + + #[test] + fn refill_defaults_to_lf() { + assert_eq!(refill("foo bar baz", 5), "foo\nbar\nbaz"); + } +} diff --git a/third_party/rust/textwrap/src/termwidth.rs b/third_party/rust/textwrap/src/termwidth.rs new file mode 100644 index 0000000000..5c66191b77 --- /dev/null +++ b/third_party/rust/textwrap/src/termwidth.rs @@ -0,0 +1,52 @@ +//! Functions related to the terminal size. + +use crate::Options; + +/// Return the current terminal width. +/// +/// If the terminal width cannot be determined (typically because the +/// standard output is not connected to a terminal), a default width +/// of 80 characters will be used. +/// +/// # Examples +/// +/// Create an [`Options`] for wrapping at the current terminal width +/// with a two column margin to the left and the right: +/// +/// ```no_run +/// use textwrap::{termwidth, Options}; +/// +/// let width = termwidth() - 4; // Two columns on each side. +/// let options = Options::new(width) +/// .initial_indent(" ") +/// .subsequent_indent(" "); +/// ``` +/// +/// **Note:** Only available when the `terminal_size` Cargo feature is +/// enabled. +pub fn termwidth() -> usize { + terminal_size::terminal_size().map_or(80, |(terminal_size::Width(w), _)| w.into()) +} + +impl<'a> Options<'a> { + /// Creates a new [`Options`] with `width` set to the current + /// terminal width. If the terminal width cannot be determined + /// (typically because the standard input and output is not + /// connected to a terminal), a width of 80 characters will be + /// used. Other settings use the same defaults as + /// [`Options::new`]. + /// + /// Equivalent to: + /// + /// ```no_run + /// use textwrap::{termwidth, Options}; + /// + /// let options = Options::new(termwidth()); + /// ``` + /// + /// **Note:** Only available when the `terminal_size` feature is + /// enabled. + pub fn with_termwidth() -> Self { + Self::new(termwidth()) + } +} diff --git a/third_party/rust/textwrap/src/word_separators.rs b/third_party/rust/textwrap/src/word_separators.rs new file mode 100644 index 0000000000..e06e9b88aa --- /dev/null +++ b/third_party/rust/textwrap/src/word_separators.rs @@ -0,0 +1,481 @@ +//! Functionality for finding words. +//! +//! In order to wrap text, we need to know where the legal break +//! points are, i.e., where the words of the text are. This means that +//! we need to define what a "word" is. +//! +//! A simple approach is to simply split the text on whitespace, but +//! this does not work for East-Asian languages such as Chinese or +//! Japanese where there are no spaces between words. Breaking a long +//! sequence of emojis is another example where line breaks might be +//! wanted even if there are no whitespace to be found. +//! +//! The [`WordSeparator`] enum is responsible for determining where +//! there words are in a line of text. Please refer to the enum and +//! its variants for more information. + +#[cfg(feature = "unicode-linebreak")] +use crate::core::skip_ansi_escape_sequence; +use crate::core::Word; + +/// Describes where words occur in a line of text. +/// +/// The simplest approach is say that words are separated by one or +/// more ASCII spaces (`' '`). This works for Western languages +/// without emojis. A more complex approach is to use the Unicode line +/// breaking algorithm, which finds break points in non-ASCII text. +/// +/// The line breaks occur between words, please see +/// [`WordSplitter`](crate::WordSplitter) for options of how to handle +/// hyphenation of individual words. +/// +/// # Examples +/// +/// ``` +/// use textwrap::core::Word; +/// use textwrap::WordSeparator::AsciiSpace; +/// +/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>(); +/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]); +/// ``` +#[derive(Clone, Copy)] +pub enum WordSeparator { + /// Find words by splitting on runs of `' '` characters. + /// + /// # Examples + /// + /// ``` + /// use textwrap::core::Word; + /// use textwrap::WordSeparator::AsciiSpace; + /// + /// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>(); + /// assert_eq!(words, vec![Word::from("Hello "), + /// Word::from("World!")]); + /// ``` + AsciiSpace, + + /// Split `line` into words using Unicode break properties. + /// + /// This word separator uses the Unicode line breaking algorithm + /// described in [Unicode Standard Annex + /// #14](https://www.unicode.org/reports/tr14/) to find legal places + /// to break lines. There is a small difference in that the U+002D + /// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break: + /// to allow a line break at a hyphen, use + /// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter). + /// Soft hyphens are not currently supported. + /// + /// # Examples + /// + /// Unlike [`WordSeparator::AsciiSpace`], the Unicode line + /// breaking algorithm will find line break opportunities between + /// some characters with no intervening whitespace: + /// + /// ``` + /// #[cfg(feature = "unicode-linebreak")] { + /// use textwrap::core::Word; + /// use textwrap::WordSeparator::UnicodeBreakProperties; + /// + /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(), + /// vec![Word::from("Emojis: "), + /// Word::from("😂"), + /// Word::from("😍")]); + /// + /// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(), + /// vec![Word::from("CJK: "), + /// Word::from("你"), + /// Word::from("好")]); + /// } + /// ``` + /// + /// A U+2060 (Word Joiner) character can be inserted if you want to + /// manually override the defaults and keep the characters together: + /// + /// ``` + /// #[cfg(feature = "unicode-linebreak")] { + /// use textwrap::core::Word; + /// use textwrap::WordSeparator::UnicodeBreakProperties; + /// + /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(), + /// vec![Word::from("Emojis: "), + /// Word::from("😂\u{2060}😍")]); + /// } + /// ``` + /// + /// The Unicode line breaking algorithm will also automatically + /// suppress break breaks around certain punctuation characters:: + /// + /// ``` + /// #[cfg(feature = "unicode-linebreak")] { + /// use textwrap::core::Word; + /// use textwrap::WordSeparator::UnicodeBreakProperties; + /// + /// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(), + /// vec![Word::from("[ foo ] "), + /// Word::from("bar !")]); + /// } + /// ``` + #[cfg(feature = "unicode-linebreak")] + UnicodeBreakProperties, + + /// Find words using a custom word separator + Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>), +} + +impl PartialEq for WordSeparator { + /// Compare two word separators. + /// + /// ``` + /// use textwrap::WordSeparator; + /// + /// assert_eq!(WordSeparator::AsciiSpace, WordSeparator::AsciiSpace); + /// #[cfg(feature = "unicode-linebreak")] { + /// assert_eq!(WordSeparator::UnicodeBreakProperties, + /// WordSeparator::UnicodeBreakProperties); + /// } + /// ``` + /// + /// Note that `WordSeparator::Custom` values never compare equal: + /// + /// ``` + /// use textwrap::WordSeparator; + /// use textwrap::core::Word; + /// fn word_separator(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_> { + /// Box::new(line.split_inclusive(' ').map(Word::from)) + /// } + /// assert_ne!(WordSeparator::Custom(word_separator), + /// WordSeparator::Custom(word_separator)); + /// ``` + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (WordSeparator::AsciiSpace, WordSeparator::AsciiSpace) => true, + #[cfg(feature = "unicode-linebreak")] + (WordSeparator::UnicodeBreakProperties, WordSeparator::UnicodeBreakProperties) => true, + (_, _) => false, + } + } +} + +impl std::fmt::Debug for WordSeparator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + WordSeparator::AsciiSpace => f.write_str("AsciiSpace"), + #[cfg(feature = "unicode-linebreak")] + WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"), + WordSeparator::Custom(_) => f.write_str("Custom(...)"), + } + } +} + +impl WordSeparator { + /// Create a new word separator. + /// + /// The best available algorithm is used by default, i.e., + /// [`WordSeparator::UnicodeBreakProperties`] if available, + /// otherwise [`WordSeparator::AsciiSpace`]. + pub const fn new() -> Self { + #[cfg(feature = "unicode-linebreak")] + { + WordSeparator::UnicodeBreakProperties + } + + #[cfg(not(feature = "unicode-linebreak"))] + { + WordSeparator::AsciiSpace + } + } + + // This function should really return impl Iterator<Item = Word>, but + // this isn't possible until Rust supports higher-kinded types: + // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md + /// Find all words in `line`. + pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> { + match self { + WordSeparator::AsciiSpace => find_words_ascii_space(line), + #[cfg(feature = "unicode-linebreak")] + WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line), + WordSeparator::Custom(func) => func(line), + } + } +} + +fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> { + let mut start = 0; + let mut in_whitespace = false; + let mut char_indices = line.char_indices(); + + Box::new(std::iter::from_fn(move || { + for (idx, ch) in char_indices.by_ref() { + if in_whitespace && ch != ' ' { + let word = Word::from(&line[start..idx]); + start = idx; + in_whitespace = ch == ' '; + return Some(word); + } + + in_whitespace = ch == ' '; + } + + if start < line.len() { + let word = Word::from(&line[start..]); + start = line.len(); + return Some(word); + } + + None + })) +} + +// Strip all ANSI escape sequences from `text`. +#[cfg(feature = "unicode-linebreak")] +fn strip_ansi_escape_sequences(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + + let mut chars = text.chars(); + while let Some(ch) = chars.next() { + if skip_ansi_escape_sequence(ch, &mut chars) { + continue; + } + result.push(ch); + } + + result +} + +/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’ +/// if a line is broken at this point, and otherwise be invisible. +/// Textwrap does not currently support breaking words at soft +/// hyphens. +#[cfg(feature = "unicode-linebreak")] +const SHY: char = '\u{00ad}'; + +/// Find words in line. ANSI escape sequences are ignored in `line`. +#[cfg(feature = "unicode-linebreak")] +fn find_words_unicode_break_properties<'a>( + line: &'a str, +) -> Box<dyn Iterator<Item = Word<'a>> + 'a> { + // Construct an iterator over (original index, stripped index) + // tuples. We find the Unicode linebreaks on a stripped string, + // but we need the original indices so we can form words based on + // the original string. + let mut last_stripped_idx = 0; + let mut char_indices = line.char_indices(); + let mut idx_map = std::iter::from_fn(move || match char_indices.next() { + Some((orig_idx, ch)) => { + let stripped_idx = last_stripped_idx; + if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { + last_stripped_idx += ch.len_utf8(); + } + Some((orig_idx, stripped_idx)) + } + None => None, + }); + + let stripped = strip_ansi_escape_sequences(line); + let mut opportunities = unicode_linebreak::linebreaks(&stripped) + .filter(|(idx, _)| { + #[allow(clippy::match_like_matches_macro)] + match &stripped[..*idx].chars().next_back() { + // We suppress breaks at ‘-’ since we want to control + // this via the WordSplitter. + Some('-') => false, + // Soft hyphens are currently not supported since we + // require all `Word` fragments to be continuous in + // the input string. + Some(SHY) => false, + // Other breaks should be fine! + _ => true, + } + }) + .collect::<Vec<_>>() + .into_iter(); + + // Remove final break opportunity, we will add it below using + // &line[start..]; This ensures that we correctly include a + // trailing ANSI escape sequence. + opportunities.next_back(); + + let mut start = 0; + Box::new(std::iter::from_fn(move || { + for (idx, _) in opportunities.by_ref() { + if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) { + let word = Word::from(&line[start..orig_idx]); + start = orig_idx; + return Some(word); + } + } + + if start < line.len() { + let word = Word::from(&line[start..]); + start = line.len(); + return Some(word); + } + + None + })) +} + +#[cfg(test)] +mod tests { + use super::WordSeparator::*; + use super::*; + + // Like assert_eq!, but the left expression is an iterator. + macro_rules! assert_iter_eq { + ($left:expr, $right:expr) => { + assert_eq!($left.collect::<Vec<_>>(), $right); + }; + } + + fn to_words(words: Vec<&str>) -> Vec<Word<'_>> { + words.into_iter().map(Word::from).collect() + } + + macro_rules! test_find_words { + ($ascii_name:ident, + $unicode_name:ident, + $([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => { + #[test] + fn $ascii_name() { + $( + let expected_words = to_words($ascii_words.to_vec()); + let actual_words = WordSeparator::AsciiSpace + .find_words($line) + .collect::<Vec<_>>(); + assert_eq!(actual_words, expected_words, "Line: {:?}", $line); + )+ + } + + #[test] + #[cfg(feature = "unicode-linebreak")] + fn $unicode_name() { + $( + let expected_words = to_words($unicode_words.to_vec()); + let actual_words = WordSeparator::UnicodeBreakProperties + .find_words($line) + .collect::<Vec<_>>(); + assert_eq!(actual_words, expected_words, "Line: {:?}", $line); + )+ + } + }; + } + + test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]); + + test_find_words!( + ascii_single_word, + unicode_single_word, + ["foo", ["foo"], ["foo"]] + ); + + test_find_words!( + ascii_two_words, + unicode_two_words, + ["foo bar", ["foo ", "bar"], ["foo ", "bar"]] + ); + + test_find_words!( + ascii_multiple_words, + unicode_multiple_words, + ["foo bar", ["foo ", "bar"], ["foo ", "bar"]], + ["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]] + ); + + test_find_words!( + ascii_only_whitespace, + unicode_only_whitespace, + [" ", [" "], [" "]], + [" ", [" "], [" "]] + ); + + test_find_words!( + ascii_inter_word_whitespace, + unicode_inter_word_whitespace, + ["foo bar", ["foo ", "bar"], ["foo ", "bar"]] + ); + + test_find_words!( + ascii_trailing_whitespace, + unicode_trailing_whitespace, + ["foo ", ["foo "], ["foo "]] + ); + + test_find_words!( + ascii_leading_whitespace, + unicode_leading_whitespace, + [" foo", [" ", "foo"], [" ", "foo"]] + ); + + test_find_words!( + ascii_multi_column_char, + unicode_multi_column_char, + ["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠 + ); + + test_find_words!( + ascii_hyphens, + unicode_hyphens, + ["foo-bar", ["foo-bar"], ["foo-bar"]], + ["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]], + ["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]], + ["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]] + ); + + test_find_words!( + ascii_newline, + unicode_newline, + ["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]] + ); + + test_find_words!( + ascii_tab, + unicode_tab, + ["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]] + ); + + test_find_words!( + ascii_non_breaking_space, + unicode_non_breaking_space, + ["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]] + ); + + #[test] + #[cfg(unix)] + fn find_words_colored_text() { + use termion::color::{Blue, Fg, Green, Reset}; + + let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset)); + let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset)); + assert_iter_eq!( + AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)), + vec![Word::from(&green_hello), Word::from(&blue_world)] + ); + + #[cfg(feature = "unicode-linebreak")] + assert_iter_eq!( + UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)), + vec![Word::from(&green_hello), Word::from(&blue_world)] + ); + } + + #[test] + fn find_words_color_inside_word() { + let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz"; + assert_iter_eq!(AsciiSpace.find_words(text), vec![Word::from(text)]); + + #[cfg(feature = "unicode-linebreak")] + assert_iter_eq!( + UnicodeBreakProperties.find_words(text), + vec![Word::from(text)] + ); + } + + #[test] + fn word_separator_new() { + #[cfg(feature = "unicode-linebreak")] + assert!(matches!(WordSeparator::new(), UnicodeBreakProperties)); + + #[cfg(not(feature = "unicode-linebreak"))] + assert!(matches!(WordSeparator::new(), AsciiSpace)); + } +} diff --git a/third_party/rust/textwrap/src/word_splitters.rs b/third_party/rust/textwrap/src/word_splitters.rs new file mode 100644 index 0000000000..e2dc6aa01f --- /dev/null +++ b/third_party/rust/textwrap/src/word_splitters.rs @@ -0,0 +1,314 @@ +//! Word splitting functionality. +//! +//! To wrap text into lines, long words sometimes need to be split +//! across lines. The [`WordSplitter`] enum defines this +//! functionality. + +use crate::core::{display_width, Word}; + +/// The `WordSplitter` enum describes where words can be split. +/// +/// If the textwrap crate has been compiled with the `hyphenation` +/// Cargo feature enabled, you will find a +/// [`WordSplitter::Hyphenation`] variant. Use this struct for +/// language-aware hyphenation: +/// +/// ``` +/// #[cfg(feature = "hyphenation")] { +/// use hyphenation::{Language, Load, Standard}; +/// use textwrap::{wrap, Options, WordSplitter}; +/// +/// let text = "Oxidation is the loss of electrons."; +/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary)); +/// assert_eq!(wrap(text, &options), vec!["Oxida-", +/// "tion is", +/// "the loss", +/// "of elec-", +/// "trons."]); +/// } +/// ``` +/// +/// Please see the documentation for the [hyphenation] crate for more +/// details. +/// +/// [hyphenation]: https://docs.rs/hyphenation/ +#[derive(Clone)] +pub enum WordSplitter { + /// Use this as a [`Options.word_splitter`] to avoid any kind of + /// hyphenation: + /// + /// ``` + /// use textwrap::{wrap, Options, WordSplitter}; + /// + /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); + /// assert_eq!(wrap("foo bar-baz", &options), + /// vec!["foo", "bar-baz"]); + /// ``` + /// + /// [`Options.word_splitter`]: super::Options::word_splitter + NoHyphenation, + + /// `HyphenSplitter` is the default `WordSplitter` used by + /// [`Options::new`](super::Options::new). It will split words on + /// existing hyphens in the word. + /// + /// It will only use hyphens that are surrounded by alphanumeric + /// characters, which prevents a word like `"--foo-bar"` from + /// being split into `"--"` and `"foo-bar"`. + /// + /// # Examples + /// + /// ``` + /// use textwrap::WordSplitter; + /// + /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"), + /// vec![6]); + /// ``` + HyphenSplitter, + + /// Use a custom function as the word splitter. + /// + /// This variant lets you implement a custom word splitter using + /// your own function. + /// + /// # Examples + /// + /// ``` + /// use textwrap::WordSplitter; + /// + /// fn split_at_underscore(word: &str) -> Vec<usize> { + /// word.match_indices('_').map(|(idx, _)| idx + 1).collect() + /// } + /// + /// let word_splitter = WordSplitter::Custom(split_at_underscore); + /// assert_eq!(word_splitter.split_points("a_long_identifier"), + /// vec![2, 7]); + /// ``` + Custom(fn(word: &str) -> Vec<usize>), + + /// A hyphenation dictionary can be used to do language-specific + /// hyphenation using patterns from the [hyphenation] crate. + /// + /// **Note:** Only available when the `hyphenation` Cargo feature is + /// enabled. + /// + /// [hyphenation]: https://docs.rs/hyphenation/ + #[cfg(feature = "hyphenation")] + Hyphenation(hyphenation::Standard), +} + +impl std::fmt::Debug for WordSplitter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + WordSplitter::NoHyphenation => f.write_str("NoHyphenation"), + WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"), + WordSplitter::Custom(_) => f.write_str("Custom(...)"), + #[cfg(feature = "hyphenation")] + WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()), + } + } +} + +impl PartialEq<WordSplitter> for WordSplitter { + fn eq(&self, other: &WordSplitter) -> bool { + match (self, other) { + (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true, + (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true, + #[cfg(feature = "hyphenation")] + (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => { + this_dict.language() == other_dict.language() + } + (_, _) => false, + } + } +} + +impl WordSplitter { + /// Return all possible indices where `word` can be split. + /// + /// The indices are in the range `0..word.len()`. They point to + /// the index _after_ the split point, i.e., after `-` if + /// splitting on hyphens. This way, `word.split_at(idx)` will + /// break the word into two well-formed pieces. + /// + /// # Examples + /// + /// ``` + /// use textwrap::WordSplitter; + /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]); + /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]); + /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]); + /// ``` + pub fn split_points(&self, word: &str) -> Vec<usize> { + match self { + WordSplitter::NoHyphenation => Vec::new(), + WordSplitter::HyphenSplitter => { + let mut splits = Vec::new(); + + for (idx, _) in word.match_indices('-') { + // We only use hyphens that are surrounded by alphanumeric + // characters. This is to avoid splitting on repeated hyphens, + // such as those found in --foo-bar. + let prev = word[..idx].chars().next_back(); + let next = word[idx + 1..].chars().next(); + + if prev.filter(|ch| ch.is_alphanumeric()).is_some() + && next.filter(|ch| ch.is_alphanumeric()).is_some() + { + splits.push(idx + 1); // +1 due to width of '-'. + } + } + + splits + } + WordSplitter::Custom(splitter_func) => splitter_func(word), + #[cfg(feature = "hyphenation")] + WordSplitter::Hyphenation(dictionary) => { + use hyphenation::Hyphenator; + dictionary.hyphenate(word).breaks + } + } + } +} + +/// Split words into smaller words according to the split points given +/// by `word_splitter`. +/// +/// Note that we split all words, regardless of their length. This is +/// to more cleanly separate the business of splitting (including +/// automatic hyphenation) from the business of word wrapping. +pub fn split_words<'a, I>( + words: I, + word_splitter: &'a WordSplitter, +) -> impl Iterator<Item = Word<'a>> +where + I: IntoIterator<Item = Word<'a>>, +{ + words.into_iter().flat_map(move |word| { + let mut prev = 0; + let mut split_points = word_splitter.split_points(&word).into_iter(); + std::iter::from_fn(move || { + if let Some(idx) = split_points.next() { + let need_hyphen = !word[..idx].ends_with('-'); + let w = Word { + word: &word.word[prev..idx], + width: display_width(&word[prev..idx]), + whitespace: "", + penalty: if need_hyphen { "-" } else { "" }, + }; + prev = idx; + return Some(w); + } + + if prev < word.word.len() || prev == 0 { + let w = Word { + word: &word.word[prev..], + width: display_width(&word[prev..]), + whitespace: word.whitespace, + penalty: word.penalty, + }; + prev = word.word.len() + 1; + return Some(w); + } + + None + }) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Like assert_eq!, but the left expression is an iterator. + macro_rules! assert_iter_eq { + ($left:expr, $right:expr) => { + assert_eq!($left.collect::<Vec<_>>(), $right); + }; + } + + #[test] + fn split_words_no_words() { + assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]); + } + + #[test] + fn split_words_empty_word() { + assert_iter_eq!( + split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter), + vec![Word::from(" ")] + ); + } + + #[test] + fn split_words_single_word() { + assert_iter_eq!( + split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter), + vec![Word::from("foobar")] + ); + } + + #[test] + fn split_words_hyphen_splitter() { + assert_iter_eq!( + split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter), + vec![Word::from("foo-"), Word::from("bar")] + ); + } + + #[test] + fn split_words_no_hyphenation() { + assert_iter_eq!( + split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation), + vec![Word::from("foo-bar")] + ); + } + + #[test] + fn split_words_adds_penalty() { + let fixed_split_point = |_: &str| vec![3]; + + assert_iter_eq!( + split_words( + vec![Word::from("foobar")].into_iter(), + &WordSplitter::Custom(fixed_split_point) + ), + vec![ + Word { + word: "foo", + width: 3, + whitespace: "", + penalty: "-" + }, + Word { + word: "bar", + width: 3, + whitespace: "", + penalty: "" + } + ] + ); + + assert_iter_eq!( + split_words( + vec![Word::from("fo-bar")].into_iter(), + &WordSplitter::Custom(fixed_split_point) + ), + vec![ + Word { + word: "fo-", + width: 3, + whitespace: "", + penalty: "" + }, + Word { + word: "bar", + width: 3, + whitespace: "", + penalty: "" + } + ] + ); + } +} diff --git a/third_party/rust/textwrap/src/wrap.rs b/third_party/rust/textwrap/src/wrap.rs new file mode 100644 index 0000000000..a7f2ccf298 --- /dev/null +++ b/third_party/rust/textwrap/src/wrap.rs @@ -0,0 +1,686 @@ +//! Functions for wrapping text. + +use std::borrow::Cow; + +use crate::core::{break_words, display_width, Word}; +use crate::word_splitters::split_words; +use crate::Options; + +/// Wrap a line of text at a given width. +/// +/// The result is a vector of lines, each line is of type [`Cow<'_, +/// str>`](Cow), which means that the line will borrow from the input +/// `&str` if possible. The lines do not have trailing whitespace, +/// including a final `'\n'`. Please use [`fill()`](crate::fill()) if +/// you need a [`String`] instead. +/// +/// The easiest way to use this function is to pass an integer for +/// `width_or_options`: +/// +/// ``` +/// use textwrap::wrap; +/// +/// let lines = wrap("Memory safety without garbage collection.", 15); +/// assert_eq!(lines, &[ +/// "Memory safety", +/// "without garbage", +/// "collection.", +/// ]); +/// ``` +/// +/// If you need to customize the wrapping, you can pass an [`Options`] +/// instead of an `usize`: +/// +/// ``` +/// use textwrap::{wrap, Options}; +/// +/// let options = Options::new(15) +/// .initial_indent("- ") +/// .subsequent_indent(" "); +/// let lines = wrap("Memory safety without garbage collection.", &options); +/// assert_eq!(lines, &[ +/// "- Memory safety", +/// " without", +/// " garbage", +/// " collection.", +/// ]); +/// ``` +/// +/// # Optimal-Fit Wrapping +/// +/// By default, `wrap` will try to ensure an even right margin by +/// finding breaks which avoid short lines. We call this an +/// “optimal-fit algorithm” since the line breaks are computed by +/// considering all possible line breaks. The alternative is a +/// “first-fit algorithm” which simply accumulates words until they no +/// longer fit on the line. +/// +/// As an example, using the first-fit algorithm to wrap the famous +/// Hamlet quote “To be, or not to be: that is the question” in a +/// narrow column with room for only 10 characters looks like this: +/// +/// ``` +/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap}; +/// # +/// # let lines = wrap("To be, or not to be: that is the question", +/// # Options::new(10).wrap_algorithm(FirstFit)); +/// # assert_eq!(lines.join("\n") + "\n", "\ +/// To be, or +/// not to be: +/// that is +/// the +/// question +/// # "); +/// ``` +/// +/// Notice how the second to last line is quite narrow because +/// “question” was too large to fit? The greedy first-fit algorithm +/// doesn’t look ahead, so it has no other option than to put +/// “question” onto its own line. +/// +/// With the optimal-fit wrapping algorithm, the previous lines are +/// shortened slightly in order to make the word “is” go into the +/// second last line: +/// +/// ``` +/// # #[cfg(feature = "smawk")] { +/// # use textwrap::{Options, WrapAlgorithm, wrap}; +/// # +/// # let lines = wrap( +/// # "To be, or not to be: that is the question", +/// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit()) +/// # ); +/// # assert_eq!(lines.join("\n") + "\n", "\ +/// To be, +/// or not to +/// be: that +/// is the +/// question +/// # "); } +/// ``` +/// +/// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on +/// the choices. +/// +/// # Examples +/// +/// The returned iterator yields lines of type `Cow<'_, str>`. If +/// possible, the wrapped lines will borrow from the input string. As +/// an example, a hanging indentation, the first line can borrow from +/// the input, but the subsequent lines become owned strings: +/// +/// ``` +/// use std::borrow::Cow::{Borrowed, Owned}; +/// use textwrap::{wrap, Options}; +/// +/// let options = Options::new(15).subsequent_indent("...."); +/// let lines = wrap("Wrapping text all day long.", &options); +/// let annotated = lines +/// .iter() +/// .map(|line| match line { +/// Borrowed(text) => format!("[Borrowed] {}", text), +/// Owned(text) => format!("[Owned] {}", text), +/// }) +/// .collect::<Vec<_>>(); +/// assert_eq!( +/// annotated, +/// &[ +/// "[Borrowed] Wrapping text", +/// "[Owned] ....all day", +/// "[Owned] ....long.", +/// ] +/// ); +/// ``` +/// +/// ## Leading and Trailing Whitespace +/// +/// As a rule, leading whitespace (indentation) is preserved and +/// trailing whitespace is discarded. +/// +/// In more details, when wrapping words into lines, words are found +/// by splitting the input text on space characters. One or more +/// spaces (shown here as “␣”) are attached to the end of each word: +/// +/// ```text +/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"] +/// ``` +/// +/// These words are then put into lines. The interword whitespace is +/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"` +/// word falls at the end of a line: +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]); +/// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]); +/// ``` +/// +/// Notice how the trailing whitespace is removed in both case: in the +/// first example, `"bar␣"` becomes `"bar"` and in the second case +/// `"Foo␣␣␣"` becomes `"Foo"`. +/// +/// Leading whitespace is preserved when the following word fits on +/// the first line. To understand this, consider how words are found +/// in a text with leading spaces: +/// +/// ```text +/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"] +/// ``` +/// +/// When put into lines, the indentation is preserved if `"foo"` fits +/// on the first line, otherwise you end up with an empty line: +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]); +/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]); +/// ``` +pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>> +where + Opt: Into<Options<'a>>, +{ + let options: Options = width_or_options.into(); + let line_ending_str = options.line_ending.as_str(); + + let mut lines = Vec::new(); + for line in text.split(line_ending_str) { + wrap_single_line(line, &options, &mut lines); + } + + lines +} + +pub(crate) fn wrap_single_line<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { + let indent = if lines.is_empty() { + options.initial_indent + } else { + options.subsequent_indent + }; + if line.len() < options.width && indent.is_empty() { + lines.push(Cow::from(line.trim_end_matches(' '))); + } else { + wrap_single_line_slow_path(line, options, lines) + } +} + +/// Wrap a single line of text. +/// +/// This is taken when `line` is longer than `options.width`. +pub(crate) fn wrap_single_line_slow_path<'a>( + line: &'a str, + options: &Options<'_>, + lines: &mut Vec<Cow<'a, str>>, +) { + let initial_width = options + .width + .saturating_sub(display_width(options.initial_indent)); + let subsequent_width = options + .width + .saturating_sub(display_width(options.subsequent_indent)); + let line_widths = [initial_width, subsequent_width]; + + let words = options.word_separator.find_words(line); + let split_words = split_words(words, &options.word_splitter); + let broken_words = if options.break_words { + let mut broken_words = break_words(split_words, line_widths[1]); + if !options.initial_indent.is_empty() { + // Without this, the first word will always go into the + // first line. However, since we break words based on the + // _second_ line width, it can be wrong to unconditionally + // put the first word onto the first line. An empty + // zero-width word fixed this. + broken_words.insert(0, Word::from("")); + } + broken_words + } else { + split_words.collect::<Vec<_>>() + }; + + let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); + + let mut idx = 0; + for words in wrapped_words { + let last_word = match words.last() { + None => { + lines.push(Cow::from("")); + continue; + } + Some(word) => word, + }; + + // We assume here that all words are contiguous in `line`. + // That is, the sum of their lengths should add up to the + // length of `line`. + let len = words + .iter() + .map(|word| word.len() + word.whitespace.len()) + .sum::<usize>() + - last_word.whitespace.len(); + + // The result is owned if we have indentation, otherwise we + // can simply borrow an empty string. + let mut result = if lines.is_empty() && !options.initial_indent.is_empty() { + Cow::Owned(options.initial_indent.to_owned()) + } else if !lines.is_empty() && !options.subsequent_indent.is_empty() { + Cow::Owned(options.subsequent_indent.to_owned()) + } else { + // We can use an empty string here since string + // concatenation for `Cow` preserves a borrowed value when + // either side is empty. + Cow::from("") + }; + + result += &line[idx..idx + len]; + + if !last_word.penalty.is_empty() { + result.to_mut().push_str(last_word.penalty); + } + + lines.push(result); + + // Advance by the length of `result`, plus the length of + // `last_word.whitespace` -- even if we had a penalty, we need + // to skip over the whitespace. + idx += len + last_word.whitespace.len(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{WordSeparator, WordSplitter, WrapAlgorithm}; + + #[cfg(feature = "hyphenation")] + use hyphenation::{Language, Load, Standard}; + + #[test] + fn no_wrap() { + assert_eq!(wrap("foo", 10), vec!["foo"]); + } + + #[test] + fn wrap_simple() { + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn to_be_or_not() { + assert_eq!( + wrap( + "To be, or not to be, that is the question.", + Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit) + ), + vec!["To be, or", "not to be,", "that is", "the", "question."] + ); + } + + #[test] + fn multiple_words_on_first_line() { + assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); + } + + #[test] + fn long_word() { + assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); + } + + #[test] + fn long_words() { + assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn max_width() { + assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]); + + let text = "Hello there! This is some English text. \ + It should not be wrapped given the extents below."; + assert_eq!(wrap(text, usize::MAX), vec![text]); + } + + #[test] + fn leading_whitespace() { + assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); + } + + #[test] + fn leading_whitespace_empty_first_line() { + // If there is no space for the first word, the first line + // will be empty. This is because the string is split into + // words like [" ", "foobar ", "baz"], which puts "foobar " on + // the second line. We never output trailing whitespace + assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]); + } + + #[test] + fn trailing_whitespace() { + // Whitespace is only significant inside a line. After a line + // gets too long and is broken, the first word starts in + // column zero and is not indented. + assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn issue_99() { + // We did not reset the in_whitespace flag correctly and did + // not handle single-character words after a line break. + assert_eq!( + wrap("aaabbbccc x yyyzzzwww", 9), + vec!["aaabbbccc", "x", "yyyzzzwww"] + ); + } + + #[test] + fn issue_129() { + // The dash is an em-dash which takes up four bytes. We used + // to panic since we tried to index into the character. + let options = Options::new(1).word_separator(WordSeparator::AsciiSpace); + assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]); + } + + #[test] + fn wide_character_handling() { + assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); + assert_eq!( + wrap( + "Hello, World!", + Options::new(15).word_separator(WordSeparator::AsciiSpace) + ), + vec!["Hello,", "World!"] + ); + + // Wide characters are allowed to break if the + // unicode-linebreak feature is enabled. + #[cfg(feature = "unicode-linebreak")] + assert_eq!( + wrap( + "Hello, World!", + Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties), + ), + vec!["Hello, W", "orld!"] + ); + } + + #[test] + fn indent_empty_line() { + // Previously, indentation was not applied to empty lines. + // However, this is somewhat inconsistent and undesirable if + // the indentation is something like a border ("| ") which you + // want to apply to all lines, empty or not. + let options = Options::new(10).initial_indent("!!!"); + assert_eq!(wrap("", &options), vec!["!!!"]); + } + + #[test] + fn indent_single_line() { + let options = Options::new(10).initial_indent(">>>"); // No trailing space + assert_eq!(wrap("foo", &options), vec![">>>foo"]); + } + + #[test] + fn indent_first_emoji() { + let options = Options::new(10).initial_indent("👉👉"); + assert_eq!( + wrap("x x x x x x x x x x x x x", &options), + vec!["👉👉x x x", "x x x x x", "x x x x x"] + ); + } + + #[test] + fn indent_multiple_lines() { + let options = Options::new(6).initial_indent("* ").subsequent_indent(" "); + assert_eq!( + wrap("foo bar baz", &options), + vec!["* foo", " bar", " baz"] + ); + } + + #[test] + fn only_initial_indent_multiple_lines() { + let options = Options::new(10).initial_indent(" "); + assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]); + } + + #[test] + fn only_subsequent_indent_multiple_lines() { + let options = Options::new(10).subsequent_indent(" "); + assert_eq!( + wrap("foo\nbar\nbaz", &options), + vec!["foo", " bar", " baz"] + ); + } + + #[test] + fn indent_break_words() { + let options = Options::new(5).initial_indent("* ").subsequent_indent(" "); + assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn initial_indent_break_words() { + // This is a corner-case showing how the long word is broken + // according to the width of the subsequent lines. The first + // fragment of the word no longer fits on the first line, + // which ends up being pure indentation. + let options = Options::new(5).initial_indent("-->"); + assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]); + } + + #[test] + fn hyphens() { + assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); + } + + #[test] + fn trailing_hyphen() { + let options = Options::new(5).break_words(false); + assert_eq!(wrap("foobar-", &options), vec!["foobar-"]); + } + + #[test] + fn multiple_hyphens() { + assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); + } + + #[test] + fn hyphens_flag() { + let options = Options::new(5).break_words(false); + assert_eq!( + wrap("The --foo-bar flag.", &options), + vec!["The", "--foo-", "bar", "flag."] + ); + } + + #[test] + fn repeated_hyphens() { + let options = Options::new(4).break_words(false); + assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]); + } + + #[test] + fn hyphens_alphanumeric() { + assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); + } + + #[test] + fn hyphens_non_alphanumeric() { + let options = Options::new(5).break_words(false); + assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]); + } + + #[test] + fn multiple_splits() { + assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); + } + + #[test] + fn forced_split() { + let options = Options::new(5).break_words(false); + assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]); + } + + #[test] + fn multiple_unbroken_words_issue_193() { + let options = Options::new(3).break_words(false); + assert_eq!( + wrap("small large tiny", &options), + vec!["small", "large", "tiny"] + ); + assert_eq!( + wrap("small large tiny", &options), + vec!["small", "large", "tiny"] + ); + } + + #[test] + fn very_narrow_lines_issue_193() { + let options = Options::new(1).break_words(false); + assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); + assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); + } + + #[test] + fn simple_hyphens() { + let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter); + assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]); + } + + #[test] + fn no_hyphenation() { + let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); + assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_double_hyphenation() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(10); + assert_eq!( + wrap("Internationalization", &options), + vec!["Internatio", "nalization"] + ); + + let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("Internationalization", &options), + vec!["Interna-", "tionaliza-", "tion"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_issue_158() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(10); + assert_eq!( + wrap("participation is the key to success", &options), + vec!["participat", "ion is", "the key to", "success"] + ); + + let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("participation is the key to success", &options), + vec!["partici-", "pation is", "the key to", "success"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn split_len_hyphenation() { + // Test that hyphenation takes the width of the whitespace + // into account. + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("garbage collection", &options), + vec!["garbage col-", "lection"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn borrowed_lines() { + // Lines that end with an extra hyphen are owned, the final + // line is borrowed. + use std::borrow::Cow::{Borrowed, Owned}; + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); + let lines = wrap("Internationalization", &options); + assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]); + if let Borrowed(s) = lines[0] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Borrowed(s) = lines[1] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Owned(ref s) = lines[2] { + assert!(false, "should not have been owned: {:?}", s); + } + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_with_hyphen() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let options = Options::new(8).break_words(false); + assert_eq!( + wrap("over-caffinated", &options), + vec!["over-", "caffinated"] + ); + + let options = options.word_splitter(WordSplitter::Hyphenation(dictionary)); + assert_eq!( + wrap("over-caffinated", &options), + vec!["over-", "caffi-", "nated"] + ); + } + + #[test] + fn break_words() { + assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); + } + + #[test] + fn break_words_wide_characters() { + // Even the poor man's version of `ch_width` counts these + // characters as wide. + let options = Options::new(5).word_separator(WordSeparator::AsciiSpace); + assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]); + } + + #[test] + fn break_words_zero_width() { + assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn break_long_first_word() { + assert_eq!(wrap("testx y", 4), vec!["test", "x y"]); + } + + #[test] + fn wrap_preserves_line_breaks_trims_whitespace() { + assert_eq!(wrap(" ", 80), vec![""]); + assert_eq!(wrap(" \n ", 80), vec!["", ""]); + assert_eq!(wrap(" \n \n \n ", 80), vec!["", "", "", ""]); + } + + #[test] + fn wrap_colored_text() { + // The words are much longer than 6 bytes, but they remain + // intact after filling the text. + let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m"; + let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m"; + assert_eq!( + wrap(&format!("{} {}", green_hello, blue_world), 6), + vec![green_hello, blue_world], + ); + } +} diff --git a/third_party/rust/textwrap/src/wrap_algorithms.rs b/third_party/rust/textwrap/src/wrap_algorithms.rs new file mode 100644 index 0000000000..7737e08f99 --- /dev/null +++ b/third_party/rust/textwrap/src/wrap_algorithms.rs @@ -0,0 +1,413 @@ +//! Word wrapping algorithms. +//! +//! After a text has been broken into words (or [`Fragment`]s), one +//! now has to decide how to break the fragments into lines. The +//! simplest algorithm for this is implemented by +//! [`wrap_first_fit()`]: it uses no look-ahead and simply adds +//! fragments to the line as long as they fit. However, this can lead +//! to poor line breaks if a large fragment almost-but-not-quite fits +//! on a line. When that happens, the fragment is moved to the next +//! line and it will leave behind a large gap. +//! +//! A more advanced algorithm, implemented by [`wrap_optimal_fit()`], +//! will take this into account. The optimal-fit algorithm considers +//! all possible line breaks and will attempt to minimize the gaps +//! left behind by overly short lines. +//! +//! While both algorithms run in linear time, the first-fit algorithm +//! is about 4 times faster than the optimal-fit algorithm. + +#[cfg(feature = "smawk")] +mod optimal_fit; +#[cfg(feature = "smawk")] +pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties}; + +use crate::core::{Fragment, Word}; + +/// Describes how to wrap words into lines. +/// +/// The simplest approach is to wrap words one word at a time and +/// accept the first way of wrapping which fit +/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is +/// enabled, a more complex algorithm is available which will look at +/// an entire paragraph at a time in order to find optimal line breaks +/// ([`WrapAlgorithm::OptimalFit`]). +#[derive(Clone, Copy)] +pub enum WrapAlgorithm { + /// Wrap words using a fast and simple algorithm. + /// + /// This algorithm uses no look-ahead when finding line breaks. + /// Implemented by [`wrap_first_fit()`], please see that function + /// for details and examples. + FirstFit, + + /// Wrap words using an advanced algorithm with look-ahead. + /// + /// This wrapping algorithm considers the entire paragraph to find + /// optimal line breaks. When wrapping text, "penalties" are + /// assigned to line breaks based on the gaps left at the end of + /// lines. See [`Penalties`] for details. + /// + /// The underlying wrapping algorithm is implemented by + /// [`wrap_optimal_fit()`], please see that function for examples. + /// + /// **Note:** Only available when the `smawk` Cargo feature is + /// enabled. + #[cfg(feature = "smawk")] + OptimalFit(Penalties), + + /// Custom wrapping function. + /// + /// Use this if you want to implement your own wrapping algorithm. + /// The function can freely decide how to turn a slice of + /// [`Word`]s into lines. + /// + /// # Example + /// + /// ``` + /// use textwrap::core::Word; + /// use textwrap::{wrap, Options, WrapAlgorithm}; + /// + /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> { + /// let mut lines = Vec::new(); + /// let mut step = 1; + /// let mut start_idx = 0; + /// while start_idx + step <= words.len() { + /// lines.push(&words[start_idx .. start_idx+step]); + /// start_idx += step; + /// step += 1; + /// } + /// lines + /// } + /// + /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair)); + /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options), + /// vec!["First,", + /// "second, third,", + /// "fourth, fifth, sixth"]); + /// ``` + Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>), +} + +impl PartialEq for WrapAlgorithm { + /// Compare two wrap algorithms. + /// + /// ``` + /// use textwrap::WrapAlgorithm; + /// + /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit); + /// #[cfg(feature = "smawk")] { + /// assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit()); + /// } + /// ``` + /// + /// Note that `WrapAlgorithm::Custom` values never compare equal: + /// + /// ``` + /// use textwrap::WrapAlgorithm; + /// + /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]), + /// WrapAlgorithm::Custom(|words, line_widths| vec![words])); + /// ``` + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true, + #[cfg(feature = "smawk")] + (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b, + (_, _) => false, + } + } +} + +impl std::fmt::Debug for WrapAlgorithm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + WrapAlgorithm::FirstFit => f.write_str("FirstFit"), + #[cfg(feature = "smawk")] + WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties), + WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"), + } + } +} + +impl WrapAlgorithm { + /// Create new wrap algorithm. + /// + /// The best wrapping algorithm is used by default, i.e., + /// [`WrapAlgorithm::OptimalFit`] if available, otherwise + /// [`WrapAlgorithm::FirstFit`]. + pub const fn new() -> Self { + #[cfg(not(feature = "smawk"))] + { + WrapAlgorithm::FirstFit + } + + #[cfg(feature = "smawk")] + { + WrapAlgorithm::new_optimal_fit() + } + } + + /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This + /// works well for monospace text. + /// + /// **Note:** Only available when the `smawk` Cargo feature is + /// enabled. + #[cfg(feature = "smawk")] + pub const fn new_optimal_fit() -> Self { + WrapAlgorithm::OptimalFit(Penalties::new()) + } + + /// Wrap words according to line widths. + /// + /// The `line_widths` slice gives the target line width for each + /// line (the last slice element is repeated as necessary). This + /// can be used to implement hanging indentation. + #[inline] + pub fn wrap<'a, 'b>( + &self, + words: &'b [Word<'a>], + line_widths: &'b [usize], + ) -> Vec<&'b [Word<'a>]> { + // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 + // = 9_007_199_254_740_992 can be represented without loss by + // a f64. Larger line widths will be rounded to the nearest + // representable number. + let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>(); + + match self { + WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths), + + #[cfg(feature = "smawk")] + WrapAlgorithm::OptimalFit(penalties) => { + // The computation cannot overflow when the line + // widths are restricted to usize. + wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap() + } + + WrapAlgorithm::Custom(func) => func(words, line_widths), + } + } +} + +impl Default for WrapAlgorithm { + fn default() -> Self { + WrapAlgorithm::new() + } +} + +/// Wrap abstract fragments into lines with a first-fit algorithm. +/// +/// The `line_widths` slice gives the target line width for each line +/// (the last slice element is repeated as necessary). This can be +/// used to implement hanging indentation. +/// +/// The fragments must already have been split into the desired +/// widths, this function will not (and cannot) attempt to split them +/// further when arranging them into lines. +/// +/// # First-Fit Algorithm +/// +/// This implements a simple “greedy” algorithm: accumulate fragments +/// one by one and when a fragment no longer fits, start a new line. +/// There is no look-ahead, we simply take first fit of the fragments +/// we find. +/// +/// While fast and predictable, this algorithm can produce poor line +/// breaks when a long fragment is moved to a new line, leaving behind +/// a large gap: +/// +/// ``` +/// use textwrap::core::Word; +/// use textwrap::wrap_algorithms::wrap_first_fit; +/// use textwrap::WordSeparator; +/// +/// // Helper to convert wrapped lines to a Vec<String>. +/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> { +/// lines.iter().map(|line| { +/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ") +/// }).collect::<Vec<_>>() +/// } +/// +/// let text = "These few words will unfortunately not wrap nicely."; +/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>(); +/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), +/// vec!["These few words", +/// "will", // <-- short line +/// "unfortunately", +/// "not wrap", +/// "nicely."]); +/// +/// // We can avoid the short line if we look ahead: +/// #[cfg(feature = "smawk")] +/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; +/// #[cfg(feature = "smawk")] +/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()), +/// vec!["These few", +/// "words will", +/// "unfortunately", +/// "not wrap", +/// "nicely."]); +/// ``` +/// +/// The [`wrap_optimal_fit()`] function was used above to get better +/// line breaks. It uses an advanced algorithm which tries to avoid +/// short lines. This function is about 4 times faster than +/// [`wrap_optimal_fit()`]. +/// +/// # Examples +/// +/// Imagine you're building a house site and you have a number of +/// tasks you need to execute. Things like pour foundation, complete +/// framing, install plumbing, electric cabling, install insulation. +/// +/// The construction workers can only work during daytime, so they +/// need to pack up everything at night. Because they need to secure +/// their tools and move machines back to the garage, this process +/// takes much more time than the time it would take them to simply +/// switch to another task. +/// +/// You would like to make a list of tasks to execute every day based +/// on your estimates. You can model this with a program like this: +/// +/// ``` +/// use textwrap::core::{Fragment, Word}; +/// use textwrap::wrap_algorithms::wrap_first_fit; +/// +/// #[derive(Debug)] +/// struct Task<'a> { +/// name: &'a str, +/// hours: f64, // Time needed to complete task. +/// sweep: f64, // Time needed for a quick sweep after task during the day. +/// cleanup: f64, // Time needed for full cleanup if day ends with this task. +/// } +/// +/// impl Fragment for Task<'_> { +/// fn width(&self) -> f64 { self.hours } +/// fn whitespace_width(&self) -> f64 { self.sweep } +/// fn penalty_width(&self) -> f64 { self.cleanup } +/// } +/// +/// // The morning tasks +/// let tasks = vec![ +/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 }, +/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 }, +/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 }, +/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, +/// ]; +/// +/// // Fill tasks into days, taking `day_length` into account. The +/// // output shows the hours worked per day along with the names of +/// // the tasks for that day. +/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> { +/// let mut days = Vec::new(); +/// // Assign tasks to days. The assignment is a vector of slices, +/// // with a slice per day. +/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); +/// for day in assigned_days.iter() { +/// let last = day.last().unwrap(); +/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); +/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>(); +/// days.push((work_hours - last.sweep + last.cleanup, names)); +/// } +/// days +/// } +/// +/// // With a single crew working 8 hours a day: +/// assert_eq!( +/// assign_days(&tasks, 8.0), +/// [ +/// (7.0, vec!["Foundation"]), +/// (8.0, vec!["Framing", "Plumbing"]), +/// (7.0, vec!["Electrical", "Insulation"]), +/// (5.0, vec!["Drywall"]), +/// (7.0, vec!["Floors", "Countertops"]), +/// (4.0, vec!["Bathrooms"]), +/// ] +/// ); +/// +/// // With two crews working in shifts, 16 hours a day: +/// assert_eq!( +/// assign_days(&tasks, 16.0), +/// [ +/// (14.0, vec!["Foundation", "Framing", "Plumbing"]), +/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]), +/// (6.0, vec!["Countertops", "Bathrooms"]), +/// ] +/// ); +/// ``` +/// +/// Apologies to anyone who actually knows how to build a house and +/// knows how long each step takes :-) +pub fn wrap_first_fit<'a, T: Fragment>( + fragments: &'a [T], + line_widths: &[f64], +) -> Vec<&'a [T]> { + // The final line width is used for all remaining lines. + let default_line_width = line_widths.last().copied().unwrap_or(0.0); + let mut lines = Vec::new(); + let mut start = 0; + let mut width = 0.0; + + for (idx, fragment) in fragments.iter().enumerate() { + let line_width = line_widths + .get(lines.len()) + .copied() + .unwrap_or(default_line_width); + if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { + lines.push(&fragments[start..idx]); + start = idx; + width = 0.0; + } + width += fragment.width() + fragment.whitespace_width(); + } + lines.push(&fragments[start..]); + lines +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq)] + struct Word(f64); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> f64 { self.0 } + fn whitespace_width(&self) -> f64 { 1.0 } + fn penalty_width(&self) -> f64 { 0.0 } + } + + #[test] + fn wrap_string_longer_than_f64() { + let words = vec![ + Word(1e307), + Word(2e307), + Word(3e307), + Word(4e307), + Word(5e307), + Word(6e307), + ]; + // Wrap at just under f64::MAX (~19e307). The tiny + // whitespace_widths disappear because of loss of precision. + assert_eq!( + wrap_first_fit(&words, &[15e307]), + &[ + vec![ + Word(1e307), + Word(2e307), + Word(3e307), + Word(4e307), + Word(5e307) + ], + vec![Word(6e307)] + ] + ); + } +} diff --git a/third_party/rust/textwrap/src/wrap_algorithms/optimal_fit.rs b/third_party/rust/textwrap/src/wrap_algorithms/optimal_fit.rs new file mode 100644 index 0000000000..bdc0334539 --- /dev/null +++ b/third_party/rust/textwrap/src/wrap_algorithms/optimal_fit.rs @@ -0,0 +1,433 @@ +use std::cell::RefCell; + +use crate::core::Fragment; + +/// Penalties for +/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit) +/// and [`wrap_optimal_fit`]. +/// +/// This wrapping algorithm in [`wrap_optimal_fit`] considers the +/// entire paragraph to find optimal line breaks. When wrapping text, +/// "penalties" are assigned to line breaks based on the gaps left at +/// the end of lines. The penalties are given by this struct, with +/// [`Penalties::default`] assigning penalties that work well for +/// monospace text. +/// +/// If you are wrapping proportional text, you are advised to assign +/// your own penalties according to your font size. See the individual +/// penalties below for details. +/// +/// **Note:** Only available when the `smawk` Cargo feature is +/// enabled. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Penalties { + /// Per-line penalty. This is added for every line, which makes it + /// expensive to output more lines than the minimum required. + pub nline_penalty: usize, + + /// Per-character cost for lines that overflow the target line width. + /// + /// With a default value of 50², every single character costs as + /// much as leaving a gap of 50 characters behind. This is because + /// we assign as cost of `gap * gap` to a short line. When + /// wrapping monospace text, we can overflow the line by 1 + /// character in extreme cases: + /// + /// ``` + /// use textwrap::core::Word; + /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; + /// + /// let short = "foo "; + /// let long = "x".repeat(50); + /// let length = (short.len() + long.len()) as f64; + /// let fragments = vec![Word::from(short), Word::from(&long)]; + /// let penalties = Penalties::new(); + /// + /// // Perfect fit, both words are on a single line with no overflow. + /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); + /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); + /// + /// // The words no longer fit, yet we get a single line back. While + /// // the cost of overflow (`1 * 2500`) is the same as the cost of the + /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` + /// // which makes it cheaper to overflow than to use two lines. + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); + /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); + /// + /// // The cost of overflow would be 2 * 2500, whereas the cost of + /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = + /// // 3401`. We therefore get two lines. + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); + /// assert_eq!(wrapped, vec![&[Word::from(short)], + /// &[Word::from(&long)]]); + /// ``` + /// + /// This only happens if the overflowing word is 50 characters + /// long _and_ if the word overflows the line by exactly one + /// character. If it overflows by more than one character, the + /// overflow penalty will quickly outgrow the cost of the gap, as + /// seen above. + pub overflow_penalty: usize, + + /// When should the a single word on the last line be considered + /// "too short"? + /// + /// If the last line of the text consist of a single word and if + /// this word is shorter than `1 / short_last_line_fraction` of + /// the line width, then the final line will be considered "short" + /// and `short_last_line_penalty` is added as an extra penalty. + /// + /// The effect of this is to avoid a final line consisting of a + /// single small word. For example, with a + /// `short_last_line_penalty` of 25 (the default), a gap of up to + /// 5 columns will be seen as more desirable than having a final + /// short line. + /// + /// ## Examples + /// + /// ``` + /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm}; + /// + /// let text = "This is a demo of the short last line penalty."; + /// + /// // The first-fit algorithm leaves a single short word on the last line: + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)), + /// vec!["This is a demo of the short last line", + /// "penalty."]); + /// + /// #[cfg(feature = "smawk")] { + /// let mut penalties = wrap_algorithms::Penalties::new(); + /// + /// // Since "penalty." is shorter than 25% of the line width, the + /// // optimal-fit algorithm adds a penalty of 25. This is enough + /// // to move "line " down: + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), + /// vec!["This is a demo of the short last", + /// "line penalty."]); + /// + /// // We can change the meaning of "short" lines. Here, only words + /// // shorter than 1/10th of the line width will be considered short: + /// penalties.short_last_line_fraction = 10; + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), + /// vec!["This is a demo of the short last line", + /// "penalty."]); + /// + /// // If desired, the penalty can also be disabled: + /// penalties.short_last_line_fraction = 4; + /// penalties.short_last_line_penalty = 0; + /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), + /// vec!["This is a demo of the short last line", + /// "penalty."]); + /// } + /// ``` + pub short_last_line_fraction: usize, + + /// Penalty for a last line with a single short word. + /// + /// Set this to zero if you do not want to penalize short last lines. + pub short_last_line_penalty: usize, + + /// Penalty for lines ending with a hyphen. + pub hyphen_penalty: usize, +} + +impl Penalties { + /// Default penalties for monospace text. + /// + /// The penalties here work well for monospace text. This is + /// because they expect the gaps at the end of lines to be roughly + /// in the range `0..100`. If the gaps are larger, the + /// `overflow_penalty` and `hyphen_penalty` become insignificant. + pub const fn new() -> Self { + Penalties { + nline_penalty: 1000, + overflow_penalty: 50 * 50, + short_last_line_fraction: 4, + short_last_line_penalty: 25, + hyphen_penalty: 25, + } + } +} + +impl Default for Penalties { + fn default() -> Self { + Self::new() + } +} + +/// Cache for line numbers. This is necessary to avoid a O(n**2) +/// behavior when computing line numbers in [`wrap_optimal_fit`]. +struct LineNumbers { + line_numbers: RefCell<Vec<usize>>, +} + +impl LineNumbers { + fn new(size: usize) -> Self { + let mut line_numbers = Vec::with_capacity(size); + line_numbers.push(0); + LineNumbers { + line_numbers: RefCell::new(line_numbers), + } + } + + fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize { + while self.line_numbers.borrow_mut().len() < i + 1 { + let pos = self.line_numbers.borrow().len(); + let line_number = 1 + self.get(minima[pos].0, minima); + self.line_numbers.borrow_mut().push(line_number); + } + + self.line_numbers.borrow()[i] + } +} + +/// Overflow error during the [`wrap_optimal_fit`] computation. +#[derive(Debug, PartialEq, Eq)] +pub struct OverflowError; + +impl std::fmt::Display for OverflowError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "wrap_optimal_fit cost computation overflowed") + } +} + +impl std::error::Error for OverflowError {} + +/// Wrap abstract fragments into lines with an optimal-fit algorithm. +/// +/// The `line_widths` slice gives the target line width for each line +/// (the last slice element is repeated as necessary). This can be +/// used to implement hanging indentation. +/// +/// The fragments must already have been split into the desired +/// widths, this function will not (and cannot) attempt to split them +/// further when arranging them into lines. +/// +/// # Optimal-Fit Algorithm +/// +/// The algorithm considers all possible break points and picks the +/// breaks which minimizes the gaps at the end of each line. More +/// precisely, the algorithm assigns a cost or penalty to each break +/// point, determined by `cost = gap * gap` where `gap = target_width - +/// line_width`. Shorter lines are thus penalized more heavily since +/// they leave behind a larger gap. +/// +/// We can illustrate this with the text “To be, or not to be: that is +/// the question”. We will be wrapping it in a narrow column with room +/// for only 10 characters. The [greedy +/// algorithm](super::wrap_first_fit) will produce these lines, each +/// annotated with the corresponding penalty: +/// +/// ```text +/// "To be, or" 1² = 1 +/// "not to be:" 0² = 0 +/// "that is" 3² = 9 +/// "the" 7² = 49 +/// "question" 2² = 4 +/// ``` +/// +/// We see that line four with “the” leaves a gap of 7 columns, which +/// gives it a penalty of 49. The sum of the penalties is 63. +/// +/// There are 10 words, which means that there are `2_u32.pow(9)` or +/// 512 different ways to typeset it. We can compute +/// the sum of the penalties for each possible line break and search +/// for the one with the lowest sum: +/// +/// ```text +/// "To be," 4² = 16 +/// "or not to" 1² = 1 +/// "be: that" 2² = 4 +/// "is the" 4² = 16 +/// "question" 2² = 4 +/// ``` +/// +/// The sum of the penalties is 41, which is better than what the +/// greedy algorithm produced. +/// +/// Searching through all possible combinations would normally be +/// prohibitively slow. However, it turns out that the problem can be +/// formulated as the task of finding column minima in a cost matrix. +/// This matrix has a special form (totally monotone) which lets us +/// use a [linear-time algorithm called +/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break +/// points. +/// +/// This means that the time complexity remains O(_n_) where _n_ is +/// the number of words. Compared to +/// [`wrap_first_fit()`](super::wrap_first_fit), this function is +/// about 4 times slower. +/// +/// The optimization of per-line costs over the entire paragraph is +/// inspired by the line breaking algorithm used in TeX, as described +/// in the 1981 article [_Breaking Paragraphs into +/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf) +/// by Knuth and Plass. The implementation here is based on [Python +/// code by David +/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py). +/// +/// # Errors +/// +/// In case of an overflow during the cost computation, an `Err` is +/// returned. Overflows happens when fragments or lines have infinite +/// widths (`f64::INFINITY`) or if the widths are so large that the +/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()` +/// (approximately 1e154): +/// +/// ``` +/// use textwrap::core::Fragment; +/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties}; +/// +/// #[derive(Debug, PartialEq)] +/// struct Word(f64); +/// +/// impl Fragment for Word { +/// fn width(&self) -> f64 { self.0 } +/// fn whitespace_width(&self) -> f64 { 1.0 } +/// fn penalty_width(&self) -> f64 { 0.0 } +/// } +/// +/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is +/// // larger than f64::MAX: +/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), +/// Err(OverflowError)); +/// ``` +/// +/// When using fragment widths and line widths which fit inside an +/// `u64`, overflows cannot happen. This means that fragments derived +/// from a `&str` cannot cause overflows. +/// +/// **Note:** Only available when the `smawk` Cargo feature is +/// enabled. +pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( + fragments: &'a [T], + line_widths: &'b [f64], + penalties: &'b Penalties, +) -> Result<Vec<&'a [T]>, OverflowError> { + // The final line width is used for all remaining lines. + let default_line_width = line_widths.last().copied().unwrap_or(0.0); + let mut widths = Vec::with_capacity(fragments.len() + 1); + let mut width = 0.0; + widths.push(width); + for fragment in fragments { + width += fragment.width() + fragment.whitespace_width(); + widths.push(width); + } + + let line_numbers = LineNumbers::new(fragments.len()); + + let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| { + // Line number for fragment `i`. + let line_number = line_numbers.get(i, minima); + let line_width = line_widths + .get(line_number) + .copied() + .unwrap_or(default_line_width); + let target_width = line_width.max(1.0); + + // Compute the width of a line spanning fragments[i..j] in + // constant time. We need to adjust widths[j] by subtracting + // the whitespace of fragment[j-1] and then add the penalty. + let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() + + fragments[j - 1].penalty_width(); + + // We compute cost of the line containing fragments[i..j]. We + // start with values[i].1, which is the optimal cost for + // breaking before fragments[i]. + // + // First, every extra line cost NLINE_PENALTY. + let mut cost = minima[i].1 + penalties.nline_penalty as f64; + + // Next, we add a penalty depending on the line length. + if line_width > target_width { + // Lines that overflow get a hefty penalty. + let overflow = line_width - target_width; + cost += overflow * penalties.overflow_penalty as f64; + } else if j < fragments.len() { + // Other lines (except for the last line) get a milder + // penalty which depend on the size of the gap. + let gap = target_width - line_width; + cost += gap * gap; + } else if i + 1 == j + && line_width < target_width / penalties.short_last_line_fraction as f64 + { + // The last line can have any size gap, but we do add a + // penalty if the line is very short (typically because it + // contains just a single word). + cost += penalties.short_last_line_penalty as f64; + } + + // Finally, we discourage hyphens. + if fragments[j - 1].penalty_width() > 0.0 { + // TODO: this should use a penalty value from the fragment + // instead. + cost += penalties.hyphen_penalty as f64; + } + + cost + }); + + for (_, cost) in &minima { + if cost.is_infinite() { + return Err(OverflowError); + } + } + + let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima)); + let mut pos = fragments.len(); + loop { + let prev = minima[pos].0; + lines.push(&fragments[prev..pos]); + pos = prev; + if pos == 0 { + break; + } + } + + lines.reverse(); + Ok(lines) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq)] + struct Word(f64); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> f64 { self.0 } + fn whitespace_width(&self) -> f64 { 1.0 } + fn penalty_width(&self) -> f64 { 0.0 } + } + + #[test] + fn wrap_fragments_with_infinite_widths() { + let words = vec![Word(f64::INFINITY)]; + assert_eq!( + wrap_optimal_fit(&words, &[0.0], &Penalties::default()), + Err(OverflowError) + ); + } + + #[test] + fn wrap_fragments_with_huge_widths() { + let words = vec![Word(1e200), Word(1e250), Word(1e300)]; + assert_eq!( + wrap_optimal_fit(&words, &[1e300], &Penalties::default()), + Err(OverflowError) + ); + } + + #[test] + fn wrap_fragments_with_large_widths() { + // The gaps will be of the sizes between 1e25 and 1e75. This + // makes the `gap * gap` cost fit comfortably in a f64. + let words = vec![Word(1e25), Word(1e50), Word(1e75)]; + assert_eq!( + wrap_optimal_fit(&words, &[1e100], &Penalties::default()), + Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) + ); + } +} diff --git a/third_party/rust/textwrap/tests/indent.rs b/third_party/rust/textwrap/tests/indent.rs new file mode 100644 index 0000000000..9dd5ad2642 --- /dev/null +++ b/third_party/rust/textwrap/tests/indent.rs @@ -0,0 +1,88 @@ +/// tests cases ported over from python standard library +use textwrap::{dedent, indent}; + +const ROUNDTRIP_CASES: [&str; 3] = [ + // basic test case + "Hi.\nThis is a test.\nTesting.", + // include a blank line + "Hi.\nThis is a test.\n\nTesting.", + // include leading and trailing blank lines + "\nHi.\nThis is a test.\nTesting.\n", +]; + +const WINDOWS_CASES: [&str; 2] = [ + // use windows line endings + "Hi.\r\nThis is a test.\r\nTesting.", + // pathological case + "Hi.\r\nThis is a test.\n\r\nTesting.\r\n\n", +]; + +#[test] +fn test_indent_nomargin_default() { + // indent should do nothing if 'prefix' is empty. + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&indent(text, ""), text); + } + for text in WINDOWS_CASES.iter() { + assert_eq!(&indent(text, ""), text); + } +} + +#[test] +fn test_roundtrip_spaces() { + // A whitespace prefix should roundtrip with dedent + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&dedent(&indent(text, " ")), text); + } +} + +#[test] +fn test_roundtrip_tabs() { + // A whitespace prefix should roundtrip with dedent + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&dedent(&indent(text, "\t\t")), text); + } +} + +#[test] +fn test_roundtrip_mixed() { + // A whitespace prefix should roundtrip with dedent + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&dedent(&indent(text, " \t \t ")), text); + } +} + +#[test] +fn test_indent_default() { + // Test default indenting of lines that are not whitespace only + let prefix = " "; + let expected = [ + // Basic test case + " Hi.\n This is a test.\n Testing.", + // Include a blank line + " Hi.\n This is a test.\n\n Testing.", + // Include leading and trailing blank lines + "\n Hi.\n This is a test.\n Testing.\n", + ]; + for (text, expect) in ROUNDTRIP_CASES.iter().zip(expected.iter()) { + assert_eq!(&indent(text, prefix), expect) + } + let expected = [ + // Use Windows line endings + " Hi.\r\n This is a test.\r\n Testing.", + // Pathological case + " Hi.\r\n This is a test.\n\r\n Testing.\r\n\n", + ]; + for (text, expect) in WINDOWS_CASES.iter().zip(expected.iter()) { + assert_eq!(&indent(text, prefix), expect) + } +} + +#[test] +fn indented_text_should_have_the_same_number_of_lines_as_the_original_text() { + let texts = ["foo\nbar", "foo\nbar\n", "foo\nbar\nbaz"]; + for original in texts.iter() { + let indented = indent(original, ""); + assert_eq!(&indented, original); + } +} diff --git a/third_party/rust/textwrap/tests/version-numbers.rs b/third_party/rust/textwrap/tests/version-numbers.rs new file mode 100644 index 0000000000..3f429b187a --- /dev/null +++ b/third_party/rust/textwrap/tests/version-numbers.rs @@ -0,0 +1,22 @@ +#[test] +fn test_readme_deps() { + version_sync::assert_markdown_deps_updated!("README.md"); +} + +#[test] +fn test_changelog() { + version_sync::assert_contains_regex!( + "CHANGELOG.md", + r"^## Version {version} \(20\d\d-\d\d-\d\d\)" + ); +} + +#[test] +fn test_html_root_url() { + version_sync::assert_html_root_url_updated!("src/lib.rs"); +} + +#[test] +fn test_dependency_graph() { + version_sync::assert_contains_regex!("src/lib.rs", "master/images/textwrap-{version}.svg"); +} |