diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/textwrap | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/textwrap')
-rw-r--r-- | third_party/rust/textwrap/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/textwrap/Cargo.toml | 49 | ||||
-rw-r--r-- | third_party/rust/textwrap/LICENSE | 21 | ||||
-rw-r--r-- | third_party/rust/textwrap/README.md | 323 | ||||
-rw-r--r-- | third_party/rust/textwrap/benches/linear.rs | 122 | ||||
-rw-r--r-- | third_party/rust/textwrap/examples/layout.rs | 39 | ||||
-rw-r--r-- | third_party/rust/textwrap/examples/termwidth.rs | 21 | ||||
-rw-r--r-- | third_party/rust/textwrap/src/lib.rs | 1216 | ||||
-rw-r--r-- | third_party/rust/textwrap/tests/version-numbers.rs | 12 |
9 files changed, 1804 insertions, 0 deletions
diff --git a/third_party/rust/textwrap/.cargo-checksum.json b/third_party/rust/textwrap/.cargo-checksum.json new file mode 100644 index 0000000000..fef68eef83 --- /dev/null +++ b/third_party/rust/textwrap/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"cb72f8ac90e87d794252acfe2e957fbe0effe937ab2eded759af1daad4bc052d","LICENSE":"ce93600c49fbb3e14df32efe752264644f6a2f8e08a735ba981725799e5309ef","README.md":"9cd343e0b056f823677521bfe96b3ff1440d6a5e4455179ae3fe28cd6c4038da","benches/linear.rs":"0484029d32f3f0e8e0d61085db9cdae610019241a73f05485f9b9a9a7a5e1c53","examples/layout.rs":"7758e5526a8b1718c7a69748a4f63703545b6ac13042d753a6d14de0a1b3edcf","examples/termwidth.rs":"f969e869802d21daf59d1ae23c668539bfb493d5b9ef9c7cbc9de30ce315a529","src/lib.rs":"2c42a300a12236d36d855c2799dae69b1a9de69de97e655d4ced409e814d5c7b","tests/version-numbers.rs":"0d75532e72e9054cb8c8c12b8851e55fad7319ed14482b562f6806872f18dd0f"},"package":"c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693"}
\ No newline at end of file diff --git a/third_party/rust/textwrap/Cargo.toml b/third_party/rust/textwrap/Cargo.toml new file mode 100644 index 0000000000..73e8d0f8c1 --- /dev/null +++ b/third_party/rust/textwrap/Cargo.toml @@ -0,0 +1,49 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g. crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "textwrap" +version = "0.9.0" +authors = ["Martin Geisler <martin@geisler.net>"] +exclude = [".dir-locals.el"] +description = "Textwrap is a small library for word wrapping, indenting, and\ndedenting strings.\n\nYou can use it to format strings (such as help and error messages) for\ndisplay in commandline applications. It is designed to be efficient\nand handle Unicode characters correctly.\n" +documentation = "https://docs.rs/textwrap/" +readme = "README.md" +keywords = ["text", "formatting", "wrap", "typesetting", "hyphenation"] +categories = ["text-processing", "command-line-interface"] +license = "MIT" +repository = "https://github.com/mgeisler/textwrap" +[package.metadata.docs.rs] +all-features = true +[dependencies.unicode-width] +version = "0.1.3" + +[dependencies.hyphenation] +version = "0.6.1" +optional = true + +[dependencies.term_size] +version = "0.3.0" +optional = true +[dev-dependencies.lipsum] +version = "0.3" + +[dev-dependencies.version-sync] +version = "0.2" + +[dev-dependencies.rand] +version = "0.3" +[badges.appveyor] +repository = "mgeisler/textwrap" + +[badges.travis-ci] +repository = "mgeisler/textwrap" diff --git a/third_party/rust/textwrap/LICENSE b/third_party/rust/textwrap/LICENSE new file mode 100644 index 0000000000..0d37ec3891 --- /dev/null +++ b/third_party/rust/textwrap/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Martin Geisler + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/textwrap/README.md b/third_party/rust/textwrap/README.md new file mode 100644 index 0000000000..7cdfb6006e --- /dev/null +++ b/third_party/rust/textwrap/README.md @@ -0,0 +1,323 @@ +# Textwrap + +[![](https://img.shields.io/crates/v/textwrap.svg)][crates-io] +[![](https://docs.rs/textwrap/badge.svg)][api-docs] +[![](https://travis-ci.org/mgeisler/textwrap.svg?branch=master)][travis-ci] +[![](https://ci.appveyor.com/api/projects/status/yo6iak55nraupjw3/branch/master?svg=true)][appveyor] + +Textwrap is a small Rust crate for word wrapping text. You can use it +to format strings for display in commandline applications. The crate +name and interface is inspired by +the [Python textwrap module][py-textwrap]. + +## Usage + +Add this to your `Cargo.toml`: +```toml +[dependencies] +textwrap = "0.9" +``` + +and this to your crate root: +```rust +extern crate textwrap; +``` + +If you would like to have automatic hyphenation, specify the +dependency as: +```toml +[dependencies] +textwrap = { version = "0.9", features = ["hyphenation"] } +``` + +To conveniently wrap text at the current terminal width, enable the +`term_size` feature: + +```toml +[dependencies] +textwrap = { version = "0.9", features = ["term_size"] } +``` + +## Documentation + +**[API documentation][api-docs]** + +## Getting Started + +Word wrapping single strings is easy using the `fill` function: +```rust +extern crate textwrap; +use textwrap::fill; + +fn main() { + let text = "textwrap: a small library for wrapping text."; + println!("{}", fill(text, 18)); +} +``` +The output is +``` +textwrap: a small +library for +wrapping text. +``` + +With the `hyphenation` feature, you can get automatic hyphenation +for [about 70 languages][patterns]. Your program must load and +configure the hyphenation patterns to use: +```rust +extern crate hyphenation; +extern crate textwrap; + +use hyphenation::Language; +use textwrap::Wrapper; + +fn main() { + let corpus = hyphenation::load(Language::English_US).unwrap(); + let wrapper = Wrapper::with_splitter(18, corpus); + let text = "textwrap: a small library for wrapping text."; + println!("{}", wrapper.fill(text)) +} +``` + +The output now looks like this: +``` +textwrap: a small +library for wrap- +ping text. +``` + +The hyphenation uses high-quality TeX hyphenation patterns. + +## Examples + +The library comes with some small example programs that shows various +features. + +### Layout Example + +The `layout` example shows how a fixed example string is wrapped at +different widths. Run the example with: + +```shell +$ cargo run --features hyphenation --example layout +``` + +The program will use the following string: + +> Memory safety without garbage collection. Concurrency without data +> races. Zero-cost abstractions. + +The string is wrapped at all widths between 15 and 60 columns. With +narrow columns the output looks like this: + +``` +.--- Width: 15 ---. +| Memory safety | +| without garbage | +| collection. | +| Concurrency | +| without data | +| races. Zero- | +| cost abstrac- | +| tions. | +.--- Width: 16 ----. +| Memory safety | +| without garbage | +| collection. Con- | +| currency without | +| data races. Ze- | +| ro-cost abstrac- | +| tions. | +``` + +Later, longer lines are used and the output now looks like this: + +``` +.-------------------- Width: 49 --------------------. +| Memory safety without garbage collection. Concur- | +| rency without data races. Zero-cost abstractions. | +.---------------------- Width: 53 ----------------------. +| Memory safety without garbage collection. Concurrency | +| without data races. Zero-cost abstractions. | +.------------------------- Width: 59 -------------------------. +| Memory safety without garbage collection. Concurrency with- | +| out data races. Zero-cost abstractions. | +``` + +Notice how words are split at hyphens (such as "zero-cost") but also +how words are hyphenated using automatic/machine hyphenation. + +### Terminal Width Example + +The `termwidth` example simply shows how the width can be set +automatically to the current terminal width. Run it with this command: + +``` +$ cargo run --example termwidth +``` + +If you run it in a narrow terminal, you'll see output like this: +``` +Formatted in within 60 columns: +---- +Memory safety without garbage collection. Concurrency +without data races. Zero-cost abstractions. +---- +``` + +If `stdout` is not connected to the terminal, the program will use a +default of 80 columns for the width: + +``` +$ cargo run --example termwidth | cat +Formatted in within 80 columns: +---- +Memory safety without garbage collection. Concurrency without data races. Zero- +cost abstractions. +---- +``` + +## Release History + +This section lists the largest changes per release. + +### Version 0.9.0 — October 5th, 2017 + +The dependency on `term_size` is now optional, and by default this +feature is not enabled. This is a *breaking change* for users of +`Wrapper::with_termwidth`. Enable the `term_size` feature to restore +the old functionality. + +Added a regression test for case where width is set to usize::MAX. +Thanks @Fraser999! All public structs now implement `Debug`. + +Issues closed: + +* Fixed [#101][issue-101]: Remove `term_size` as a (hard required) + dependency. + +### Version 0.8.0 — September 4th, 2017 + +The `Wrapper` stuct is now generic over the type of word splitter +being used. This means less boxing and a nicer API. The +`Wrapper::word_splitter` method has been removed. This is a *breaking +API change* if you used the method to change the word splitter. + +The `Wrapper` struct has two new methods that will wrap the input text +lazily: `Wrapper::wrap_iter` and `Wrapper::into_wrap_iter`. Use those +if you will be iterating over the wrapped lines one by one. + +Issues closed: + +* Fixed [#59][issue-59]: `wrap` could return an iterator. Thanks + @hcpl! + +* Fixed [#81][issue-81]: Set `html_root_url` + +### Version 0.7.0 — July 20th, 2017 + +Version 0.7.0 changes the return type of `Wrapper::wrap` from +`Vec<String>` to `Vec<Cow<'a, str>>`. This means that the output lines +borrow data from the input string. This is a *breaking API change* if +you relied on the exact return type of `Wrapper::wrap`. Callers of the +`textwrap::fill` convenience function will see no breakage. + +The above change and other optimizations makes version 0.7.0 roughly +15-30% faster than version 0.6.0. + +The `squeeze_whitespace` option has been removed since it was +complicating the above optimization. Let us know if this option is +important for you so we can provide a work around. + +Issues closed: + +* Fixed [#58][issue-58]: Add a "fast_wrap" function that reuses the + input string + +* Fixed [#61][issue-61]: Documentation errors + +### Version 0.6.0 — May 22nd, 2017 + +Version 0.6.0 adds builder methods to `Wrapper` for easy one-line +initialization and configuration: + +```rust +let wrapper = Wrapper::new(60).break_words(false); +``` + +It also add a new `NoHyphenation` word splitter that will never split +words, not even at existing hyphens. + +Issues closed: + +* Fixed [#28][issue-28]: Support not squeezing whitespace + +### Version 0.5.0 — May 15th, 2017 + +Version 0.5.0 has *breaking API changes*. However, this only affects +code using the hyphenation feature. The feature is now optional, so +you will first need to enable the `hyphenation` feature as described +above. Afterwards, please change your code from +```rust +wrapper.corpus = Some(&corpus); +``` +to +```rust +wrapper.splitter = Box::new(corpus); +``` + +Other changes include optimizations, so version 0.5.0 is roughly +10-15% faster than version 0.4.0. + +Issues closed: + +* Fixed [#19][issue-19]: Add support for finding terminal size +* Fixed [#25][issue-25]: Handle words longer than `self.width` +* Fixed [#26][issue-26]: Support custom indentation +* Fixed [#36][issue-36]: Support building without `hyphenation` +* Fixed [#39][issue-39]: Respect non-breaking spaces + +### Version 0.4.0 — January 24th, 2017 + +Documented complexities and tested these via `cargo bench`. + +* Fixed [#13][issue-13]: Immediatedly add word if it fits +* Fixed [#14][issue-14]: Avoid splitting on initial hyphens in `--foo-bar` + +### Version 0.3.0 — January 7th, 2017 + +Added support for automatic hyphenation. + +### Version 0.2.0 — December 28th, 2016 + +Introduced `Wrapper` struct. Added support for wrapping on hyphens. + +### Version 0.1.0 — December 17th, 2016 + +First public release with support for wrapping strings on whitespace. + +## License + +Textwrap can be distributed according to the [MIT license][mit]. +Contributions will be accepted under the same license. + +[crates-io]: https://crates.io/crates/textwrap +[travis-ci]: https://travis-ci.org/mgeisler/textwrap +[appveyor]: https://ci.appveyor.com/project/mgeisler/textwrap +[py-textwrap]: https://docs.python.org/library/textwrap +[patterns]: https://github.com/tapeinosyne/hyphenation/tree/master/patterns-tex +[api-docs]: https://docs.rs/textwrap/ +[issue-13]: https://github.com/mgeisler/textwrap/issues/13 +[issue-14]: https://github.com/mgeisler/textwrap/issues/14 +[issue-19]: https://github.com/mgeisler/textwrap/issues/19 +[issue-25]: https://github.com/mgeisler/textwrap/issues/25 +[issue-26]: https://github.com/mgeisler/textwrap/issues/26 +[issue-28]: https://github.com/mgeisler/textwrap/issues/28 +[issue-36]: https://github.com/mgeisler/textwrap/issues/36 +[issue-39]: https://github.com/mgeisler/textwrap/issues/39 +[issue-58]: https://github.com/mgeisler/textwrap/issues/58 +[issue-59]: https://github.com/mgeisler/textwrap/issues/59 +[issue-61]: https://github.com/mgeisler/textwrap/issues/61 +[issue-81]: https://github.com/mgeisler/textwrap/issues/81 +[issue-101]: https://github.com/mgeisler/textwrap/issues/101 +[mit]: LICENSE diff --git a/third_party/rust/textwrap/benches/linear.rs b/third_party/rust/textwrap/benches/linear.rs new file mode 100644 index 0000000000..cc805474d8 --- /dev/null +++ b/third_party/rust/textwrap/benches/linear.rs @@ -0,0 +1,122 @@ +#![feature(test)] + +// The benchmarks here verify that the complexity grows as O(*n*) +// where *n* is the number of characters in the text to be wrapped. + +extern crate test; +extern crate rand; +extern crate lipsum; +extern crate textwrap; +#[cfg(feature = "hyphenation")] +extern crate hyphenation; + +use test::Bencher; +#[cfg(feature = "hyphenation")] +use hyphenation::Language; +#[cfg(feature = "hyphenation")] +use textwrap::Wrapper; +use rand::XorShiftRng; +use lipsum::MarkovChain; + +const LINE_LENGTH: usize = 60; + +/// Generate a lorem ipsum text with the given number of characters. +fn lorem_ipsum(length: usize) -> String { + // The average word length in the lorem ipsum text is somewhere + // between 6 and 7. So we conservatively divide by 5 to have a + // long enough text that we can truncate below. + let rng = XorShiftRng::new_unseeded(); + let mut chain = MarkovChain::new_with_rng(rng); + chain.learn(lipsum::LOREM_IPSUM); + chain.learn(lipsum::LIBER_PRIMUS); + + let mut text = chain.generate_from(length / 5, ("Lorem", "ipsum")); + text.truncate(length); + text +} + +#[bench] +fn fill_100(b: &mut Bencher) { + let text = &lorem_ipsum(100); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn fill_200(b: &mut Bencher) { + let text = &lorem_ipsum(200); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn fill_400(b: &mut Bencher) { + let text = &lorem_ipsum(400); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn fill_800(b: &mut Bencher) { + let text = &lorem_ipsum(800); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + + +#[bench] +fn wrap_100(b: &mut Bencher) { + let text = &lorem_ipsum(100); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_200(b: &mut Bencher) { + let text = &lorem_ipsum(200); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_400(b: &mut Bencher) { + let text = &lorem_ipsum(400); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_800(b: &mut Bencher) { + let text = &lorem_ipsum(800); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_100(b: &mut Bencher) { + let text = &lorem_ipsum(100); + let corpus = hyphenation::load(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, corpus); + b.iter(|| wrapper.fill(text)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_200(b: &mut Bencher) { + let text = &lorem_ipsum(200); + let corpus = hyphenation::load(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, corpus); + b.iter(|| wrapper.fill(text)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_400(b: &mut Bencher) { + let text = &lorem_ipsum(400); + let corpus = hyphenation::load(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, corpus); + b.iter(|| wrapper.fill(text)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_800(b: &mut Bencher) { + let text = &lorem_ipsum(800); + let corpus = hyphenation::load(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, corpus); + b.iter(|| wrapper.fill(text)) +} diff --git a/third_party/rust/textwrap/examples/layout.rs b/third_party/rust/textwrap/examples/layout.rs new file mode 100644 index 0000000000..6d3d10242d --- /dev/null +++ b/third_party/rust/textwrap/examples/layout.rs @@ -0,0 +1,39 @@ +#[cfg(feature = "hyphenation")] +extern crate hyphenation; +extern crate textwrap; + +#[cfg(feature = "hyphenation")] +use hyphenation::Language; +use textwrap::Wrapper; + + +#[cfg(not(feature = "hyphenation"))] +fn new_wrapper<'a>() -> Wrapper<'a, textwrap::HyphenSplitter> { + Wrapper::new(0) +} + +#[cfg(feature = "hyphenation")] +fn new_wrapper<'a>() -> Wrapper<'a, hyphenation::Corpus> { + let corpus = hyphenation::load(Language::English_US).unwrap(); + Wrapper::with_splitter(0, corpus) +} + +fn main() { + let example = "Memory safety without garbage collection. \ + Concurrency without data races. \ + Zero-cost abstractions."; + let mut prev_lines = vec![]; + let mut wrapper = new_wrapper(); + for width in 15..60 { + wrapper.width = width; + let lines = wrapper.wrap(example); + if lines != prev_lines { + let title = format!(" Width: {} ", width); + println!(".{:-^1$}.", title, width + 2); + for line in &lines { + println!("| {:1$} |", line, width); + } + prev_lines = lines; + } + } +} diff --git a/third_party/rust/textwrap/examples/termwidth.rs b/third_party/rust/textwrap/examples/termwidth.rs new file mode 100644 index 0000000000..7d81a5c654 --- /dev/null +++ b/third_party/rust/textwrap/examples/termwidth.rs @@ -0,0 +1,21 @@ +#[cfg(feature = "term_size")] +extern crate textwrap; + +#[cfg(not(feature = "term_size"))] +fn main() { + println!("Please enable the term_size feature to run this example."); +} + +#[cfg(feature = "term_size")] +fn main() { + let example = "Memory safety without garbage collection. \ + Concurrency without data races. \ + Zero-cost abstractions."; + // Create a new Wrapper -- automatically set the width to the + // current terminal width. + let wrapper = textwrap::Wrapper::with_termwidth(); + println!("Formatted in within {} columns:", wrapper.width); + println!("----"); + println!("{}", wrapper.fill(example)); + println!("----"); +} diff --git a/third_party/rust/textwrap/src/lib.rs b/third_party/rust/textwrap/src/lib.rs new file mode 100644 index 0000000000..14476b36cf --- /dev/null +++ b/third_party/rust/textwrap/src/lib.rs @@ -0,0 +1,1216 @@ +//! `textwrap` provides functions for word wrapping and filling text. +//! +//! Wrapping text can be very useful in commandline programs where you +//! want to format dynamic output nicely so it looks good in a +//! terminal. A quick example: +//! +//! ```no_run +//! extern crate textwrap; +//! use textwrap::fill; +//! +//! fn main() { +//! let text = "textwrap: a small library for wrapping text."; +//! println!("{}", fill(text, 18)); +//! } +//! ``` +//! +//! This will display the following output: +//! +//! ```text +//! textwrap: a small +//! library for +//! wrapping text. +//! ``` +//! +//! # Displayed Width vs Byte Size +//! +//! To word wrap text, one must know the width of each word so one can +//! know when to break lines. This library measures the width of text +//! using the [displayed width][unicode-width], not the size in bytes. +//! +//! This is important for non-ASCII text. ASCII characters such as `a` +//! and `!` are simple and take up one column each. This means that +//! the displayed width is equal to the string length in bytes. +//! However, non-ASCII characters and symbols take up more than one +//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is +//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively. +//! +//! This is why we take care to use the displayed width instead of the +//! byte count when computing line lengths. All functions in this +//! library handle Unicode characters like this. +//! +//! [unicode-width]: https://docs.rs/unicode-width/ + +#![doc(html_root_url = "https://docs.rs/textwrap/0.9.0")] +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] + +extern crate unicode_width; +#[cfg(feature = "term_size")] +extern crate term_size; +#[cfg(feature = "hyphenation")] +extern crate hyphenation; + +use std::fmt; +use std::borrow::Cow; +use std::str::CharIndices; + +use unicode_width::UnicodeWidthStr; +use unicode_width::UnicodeWidthChar; +#[cfg(feature = "hyphenation")] +use hyphenation::{Hyphenation, Corpus}; + +/// A non-breaking space. +const NBSP: char = '\u{a0}'; + +/// An interface for splitting words. +/// +/// When the [`wrap_iter`] method will try to fit text into a line, it +/// will eventually find a word that it too large the current text +/// width. It will then call the currently configured `WordSplitter` to +/// have it attempt to split the word into smaller parts. This trait +/// describes that functionality via the [`split`] method. +/// +/// If the `textwrap` crate has been compiled with the `hyphenation` +/// feature enabled, you will find an implementation of `WordSplitter` +/// by the `hyphenation::language::Corpus` struct. Use this struct for +/// language-aware hyphenation. See the [`hyphenation` documentation] +/// for details. +/// +/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter +/// [`split`]: #tymethod.split +/// [`hyphenation` documentation]: https://docs.rs/hyphenation/ +pub trait WordSplitter { + /// Return all possible splits of word. Each split is a triple + /// with a head, a hyphen, and a tail where `head + &hyphen + + /// &tail == word`. The hyphen can be empty if there is already a + /// hyphen in the head. + /// + /// The splits should go from smallest to longest and should + /// include no split at all. So the word "technology" could be + /// split into + /// + /// ```no_run + /// vec![("tech", "-", "nology"), + /// ("technol", "-", "ogy"), + /// ("technolo", "-", "gy"), + /// ("technology", "", "")]; + /// ``` + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; +} + +/// Use this as a [`Wrapper.splitter`] to avoid any kind of +/// hyphenation: +/// +/// ``` +/// use textwrap::{Wrapper, NoHyphenation}; +/// +/// let wrapper = Wrapper::with_splitter(8, NoHyphenation); +/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); +/// ``` +/// +/// [`Wrapper.splitter`]: struct.Wrapper.html#structfield.splitter +#[derive(Clone, Debug)] +pub struct NoHyphenation; + +/// `NoHyphenation` implements `WordSplitter` by not splitting the +/// word at all. +impl WordSplitter for NoHyphenation { + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + vec![(word, "", "")] + } +} + +/// Simple and default way to split words: splitting on existing +/// hyphens only. +/// +/// You probably don't need to use this type since it's already used +/// by default by `Wrapper::new`. +#[derive(Clone, Debug)] +pub struct HyphenSplitter; + +/// `HyphenSplitter` is the default `WordSplitter` used by +/// `Wrapper::new`. It will split words on any existing hyphens in the +/// word. +/// +/// It will only use hyphens that are surrounded by alphanumeric +/// characters, which prevents a word like "--foo-bar" from being +/// split on the first or second hyphen. +impl WordSplitter for HyphenSplitter { + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + let mut triples = Vec::new(); + // Split on hyphens, smallest split first. We only use hyphens + // that are surrounded by alphanumeric characters. This is to + // avoid splitting on repeated hyphens, such as those found in + // --foo-bar. + let mut char_indices = word.char_indices(); + // Early return if the word is empty. + let mut prev = match char_indices.next() { + None => return vec![(word, "", "")], + Some((_, ch)) => ch, + }; + + // Find current word, or return early if the word only has a + // single character. + let (mut idx, mut cur) = match char_indices.next() { + None => return vec![(word, "", "")], + Some((idx, cur)) => (idx, cur), + }; + + for (i, next) in char_indices { + if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { + let (head, tail) = word.split_at(idx + 1); + triples.push((head, "", tail)); + } + prev = cur; + idx = i; + cur = next; + } + + // Finally option is no split at all. + triples.push((word, "", "")); + + triples + } +} + +/// A hyphenation Corpus can be used to do language-specific +/// hyphenation using patterns from the hyphenation crate. +#[cfg(feature = "hyphenation")] +impl WordSplitter for Corpus { + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + // Find splits based on language corpus. + let mut triples = Vec::new(); + for n in word.opportunities(self) { + let (head, tail) = word.split_at(n); + let hyphen = if head.ends_with('-') { "" } else { "-" }; + triples.push((head, hyphen, tail)); + } + // Finally option is no split at all. + triples.push((word, "", "")); + + triples + } +} + +/// Backport of the `AddAssign` trait implementation from Rust 1.14. +fn cow_add_assign<'a>(lhs: &mut Cow<'a, str>, rhs: &'a str) { + if lhs.is_empty() { + *lhs = Cow::Borrowed(rhs) + } else if rhs.is_empty() { + return; + } else { + if let Cow::Borrowed(inner) = *lhs { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(inner); + *lhs = Cow::Owned(s); + } + lhs.to_mut().push_str(rhs); + } +} + + +/// A Wrapper holds settings for wrapping and filling text. Use it +/// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions +/// are not flexible enough. +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`wrap`]: fn.wrap.html +/// [`fill`]: fn.fill.html +/// +/// The algorithm used by the `WrapIter` iterator (returned from the +/// `wrap_iter` method) works by doing successive partial scans over +/// words in the input string (where each single scan yields a single +/// line) so that the overall time and memory complexity is O(*n*) where +/// *n* is the length of the input string. +#[derive(Clone, Debug)] +pub struct Wrapper<'a, S: WordSplitter> { + /// The width in columns at which the text will be wrapped. + pub width: usize, + /// Indentation used for the first line of output. + pub initial_indent: &'a str, + /// Indentation used for subsequent lines of output. + pub subsequent_indent: &'a str, + /// Allow long words to be broken if they cannot fit on a line. + /// When set to `false`, some lines may be longer than + /// `self.width`. + pub break_words: bool, + /// The method for splitting words. If the `hyphenation` feature + /// is enabled, you can use a `hyphenation::language::Corpus` here + /// to get language-aware hyphenation. + pub splitter: S, +} + +impl<'a> Wrapper<'a, HyphenSplitter> { + /// Create a new Wrapper for wrapping at the specified width. By + /// default, we allow words longer than `width` to be broken. A + /// [`HyphenSplitter`] will be used by default for splitting + /// words. See the [`WordSplitter`] trait for other options. + /// + /// [`HyphenSplitter`]: struct.HyphenSplitter.html + /// [`WordSplitter`]: trait.WordSplitter.html + pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> { + Wrapper::with_splitter(width, HyphenSplitter) + } + + /// Create a new Wrapper for wrapping text at the current terminal + /// width. If the terminal width cannot be determined (typically + /// because the standard input and output is not connected to a + /// terminal), a width of 80 characters will be used. Other + /// settings use the same defaults as `Wrapper::new`. + /// + /// Equivalent to: + /// + /// ```no_run + /// # #![allow(unused_variables)] + /// use textwrap::{Wrapper, termwidth}; + /// + /// let wrapper = Wrapper::new(termwidth()); + /// ``` + #[cfg(feature = "term_size")] + pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> { + Wrapper::new(termwidth()) + } +} + +impl<'w, 'a: 'w, S: WordSplitter> Wrapper<'a, S> { + /// Use the given [`WordSplitter`] to create a new Wrapper for + /// wrapping at the specified width. By default, we allow words + /// longer than `width` to be broken. + /// + /// [`WordSplitter`]: trait.WordSplitter.html + pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> { + Wrapper { + width: width, + initial_indent: "", + subsequent_indent: "", + break_words: true, + splitter: splitter, + } + } + + /// Change [`self.initial_indent`]. The initial indentation is + /// used on the very first line of output. + /// + /// # Examples + /// + /// Classic paragraph indentation can be achieved by specifying an + /// initial indentation and wrapping each paragraph by itself: + /// + /// ```no_run + /// # #![allow(unused_variables)] + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(15).initial_indent(" "); + /// ``` + /// + /// [`self.initial_indent`]: #structfield.initial_indent + pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> { + Wrapper { initial_indent: indent, ..self } + } + + /// Change [`self.subsequent_indent`]. The subsequent indentation + /// is used on lines following the first line of output. + /// + /// # Examples + /// + /// Combining initial and subsequent indentation lets you format a + /// single paragraph as a bullet list: + /// + /// ```no_run + /// # #![allow(unused_variables)] + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(15) + /// .initial_indent("* ") + /// .subsequent_indent(" "); + /// ``` + /// + /// [`self.subsequent_indent`]: #structfield.subsequent_indent + pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> { + Wrapper { subsequent_indent: indent, ..self } + } + + /// Change [`self.break_words`]. This controls if words longer + /// than `self.width` can be broken, or if they will be left + /// sticking out into the right margin. + /// + /// [`self.break_words`]: #structfield.break_words + pub fn break_words(self, setting: bool) -> Wrapper<'a, S> { + Wrapper { break_words: setting, ..self } + } + + /// Fill a line of text at `self.width` characters. Strings are + /// wrapped based on their displayed width, not their size in + /// bytes. + /// + /// The result is a string with newlines between each line. Use + /// the `wrap` method if you need access to the individual lines. + /// + /// # Complexities + /// + /// This method simply joins the lines produced by `wrap_iter`. As + /// such, it inherits the O(*n*) time and memory complexity where + /// *n* is the input string length. + /// + /// # Examples + /// + /// ``` + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(15); + /// assert_eq!(wrapper.fill("Memory safety without garbage collection."), + /// "Memory safety\nwithout garbage\ncollection."); + /// ``` + pub fn fill(&self, s: &str) -> String { + // This will avoid reallocation in simple cases (no + // indentation, no hyphenation). + let mut result = String::with_capacity(s.len()); + + for (i, line) in self.wrap_iter(s).enumerate() { + if i > 0 { + result.push('\n'); + } + result.push_str(&line); + } + + result + } + + /// Wrap a line of text at `self.width` characters. Strings are + /// wrapped based on their displayed width, not their size in + /// bytes. + /// + /// # Complexities + /// + /// This method simply collects the lines produced by `wrap_iter`. + /// As such, it inherits the O(*n*) overall time and memory + /// complexity where *n* is the input string length. + /// + /// # Examples + /// + /// ``` + /// use textwrap::Wrapper; + /// + /// let wrap15 = Wrapper::new(15); + /// assert_eq!(wrap15.wrap("Concurrency without data races."), + /// vec!["Concurrency", + /// "without data", + /// "races."]); + /// + /// let wrap20 = Wrapper::new(20); + /// assert_eq!(wrap20.wrap("Concurrency without data races."), + /// vec!["Concurrency without", + /// "data races."]); + /// ``` + pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> { + self.wrap_iter(s).collect::<Vec<_>>() + } + + /// Lazily wrap a line of text at `self.width` characters. Strings + /// are wrapped based on their displayed width, not their size in + /// bytes. + /// + /// The [`WordSplitter`] stored in [`self.splitter`] is used + /// whenever when a word is too large to fit on the current line. + /// By changing the field, different hyphenation strategies can be + /// implemented. + /// + /// # Complexities + /// + /// This method returns a [`WrapIter`] iterator which borrows this + /// `Wrapper`. The algorithm used has a linear complexity, so + /// getting the next line from the iterator will take O(*w*) time, + /// where *w* is the wrapping width. Fully processing the iterator + /// will take O(*n*) time for an input string of length *n*. + /// + /// When no indentation is used, each line returned is a slice of + /// the input string and the memory overhead is thus constant. + /// Otherwise new memory is allocated for each line returned. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// use textwrap::Wrapper; + /// + /// let wrap20 = Wrapper::new(20); + /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions."); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); + /// assert_eq!(wrap20_iter.next(), None); + /// + /// let wrap25 = Wrapper::new(25); + /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions."); + /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions."))); + /// assert_eq!(wrap25_iter.next(), None); + /// ``` + /// + /// [`self.splitter`]: #structfield.splitter + /// [`WordSplitter`]: trait.WordSplitter.html + /// [`WrapIter`]: struct.WrapIter.html + pub fn wrap_iter(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> { + WrapIter { + wrapper: self, + wrap_iter_impl: WrapIterImpl::new(self, s), + } + } + + /// Lazily wrap a line of text at `self.width` characters. Strings + /// are wrapped based on their displayed width, not their size in + /// bytes. + /// + /// The [`WordSplitter`] stored in [`self.splitter`] is used + /// whenever when a word is too large to fit on the current line. + /// By changing the field, different hyphenation strategies can be + /// implemented. + /// + /// # Complexities + /// + /// This method consumes the `Wrapper` and returns a + /// [`IntoWrapIter`] iterator. Fully processing the iterator has + /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is + /// the length of the input string. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// use textwrap::Wrapper; + /// + /// let wrap20 = Wrapper::new(20); + /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions."); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); + /// assert_eq!(wrap20_iter.next(), None); + /// ``` + /// + /// [`self.splitter`]: #structfield.splitter + /// [`WordSplitter`]: trait.WordSplitter.html + /// [`IntoWrapIter`]: struct.IntoWrapIter.html + /// [`wrap_iter`]: #method.wrap_iter + pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> { + let wrap_iter_impl = WrapIterImpl::new(&self, s); + + IntoWrapIter { + wrapper: self, + wrap_iter_impl: wrap_iter_impl, + } + } +} + + +/// An iterator over the lines of the input string which owns a +/// `Wrapper`. An instance of `IntoWrapIter` is typically obtained +/// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`]. +/// +/// Each call of `.next()` method yields a line wrapped in `Some` if the +/// input hasn't been fully processed yet. Otherwise it returns `None`. +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter +#[derive(Debug)] +pub struct IntoWrapIter<'a, S: WordSplitter> { + wrapper: Wrapper<'a, S>, + wrap_iter_impl: WrapIterImpl<'a>, +} + +impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> { + type Item = Cow<'a, str>; + + fn next(&mut self) -> Option<Cow<'a, str>> { + self.wrap_iter_impl.impl_next(&self.wrapper) + } +} + +/// An iterator over the lines of the input string which borrows a +/// `Wrapper`. An instance of `WrapIter` is typically obtained +/// through the [`Wrapper::wrap_iter`] method. +/// +/// Each call of `.next()` method yields a line wrapped in `Some` if the +/// input hasn't been fully processed yet. Otherwise it returns `None`. +/// +/// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter +#[derive(Debug)] +pub struct WrapIter<'w, 'a: 'w, S: WordSplitter + 'w> { + wrapper: &'w Wrapper<'a, S>, + wrap_iter_impl: WrapIterImpl<'a>, +} + +impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> { + type Item = Cow<'a, str>; + + fn next(&mut self) -> Option<Cow<'a, str>> { + self.wrap_iter_impl.impl_next(self.wrapper) + } +} + +struct WrapIterImpl<'a> { + // String to wrap. + source: &'a str, + // CharIndices iterator over self.source. + char_indices: CharIndices<'a>, + // Is the next element the first one ever produced? + is_next_first: bool, + // Byte index where the current line starts. + start: usize, + // Byte index of the last place where the string can be split. + split: usize, + // Size in bytes of the character at self.source[self.split]. + split_len: usize, + // Width of self.source[self.start..idx]. + line_width: usize, + // Width of self.source[self.start..self.split]. + line_width_at_split: usize, + // Tracking runs of whitespace characters. + in_whitespace: bool, + // Has iterator finished producing elements? + finished: bool, +} + +impl<'a> fmt::Debug for WrapIterImpl<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("WrapIterImpl") + .field("source", &self.source) + .field("char_indices", &"CharIndices { ... }") + .field("is_next_first", &self.is_next_first) + .field("start", &self.start) + .field("split", &self.split) + .field("split_len", &self.split_len) + .field("line_width", &self.line_width) + .field("line_width_at_split", &self.line_width_at_split) + .field("in_whitespace", &self.in_whitespace) + .field("finished", &self.finished) + .finish() + } +} + +impl<'a> WrapIterImpl<'a> { + fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> { + WrapIterImpl { + source: s, + char_indices: s.char_indices(), + is_next_first: true, + start: 0, + split: 0, + split_len: 0, + line_width: wrapper.initial_indent.width(), + line_width_at_split: wrapper.initial_indent.width(), + in_whitespace: false, + finished: false, + } + } + + fn create_result_line<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> { + if self.is_next_first { + self.is_next_first = false; + Cow::from(wrapper.initial_indent) + } else { + Cow::from(wrapper.subsequent_indent) + } + } + + fn impl_next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> { + if self.finished { + return None; + } + + while let Some((idx, ch)) = self.char_indices.next() { + let char_width = ch.width().unwrap_or(0); + let char_len = ch.len_utf8(); + if ch.is_whitespace() && ch != NBSP { + // Extend the previous split or create a new one. + if self.in_whitespace { + self.split_len += char_len; + } else { + self.split = idx; + self.split_len = char_len; + } + self.line_width_at_split = self.line_width + char_width; + self.in_whitespace = true; + } else if self.line_width + char_width > wrapper.width { + // There is no room for this character on the current + // line. Try to split the final word. + let remaining_text = &self.source[self.split + self.split_len..]; + let final_word = match remaining_text + .find(|ch: char| ch.is_whitespace() && ch != NBSP) { + Some(i) => &remaining_text[..i], + None => remaining_text, + }; + + let mut hyphen = ""; + let splits = wrapper.splitter.split(final_word); + for &(head, hyp, _) in splits.iter().rev() { + if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width { + self.split += head.len(); + self.split_len = 0; + hyphen = hyp; + break; + } + } + + if self.start >= self.split { + // The word is too big to fit on a single line, so we + // need to split it at the current index. + if wrapper.break_words { + // Break work at current index. + self.split = idx; + self.split_len = 0; + self.line_width_at_split = self.line_width; + } else { + // Add smallest split. + self.split = self.start + splits[0].0.len(); + self.split_len = 0; + self.line_width_at_split = self.line_width; + } + } + + if self.start < self.split { + let mut result_line = self.create_result_line(wrapper); + cow_add_assign(&mut result_line, &self.source[self.start..self.split]); + cow_add_assign(&mut result_line, hyphen); + + self.start = self.split + self.split_len; + self.line_width += wrapper.subsequent_indent.width(); + self.line_width -= self.line_width_at_split; + self.line_width += char_width; + + return Some(result_line); + } + } else { + self.in_whitespace = false; + } + self.line_width += char_width; + } + + // Add final line. + let final_line = if self.start < self.source.len() { + let mut result_line = self.create_result_line(wrapper); + cow_add_assign(&mut result_line, &self.source[self.start..]); + + Some(result_line) + } else { + None + }; + + self.finished = true; + + final_line + } +} + + +/// Return the current terminal width. If the terminal width cannot be +/// determined (typically because the standard output is not connected +/// to a terminal), a default width of 80 characters will be used. +/// +/// # Examples +/// +/// Create a `Wrapper` for the current terminal with a two column +/// margin: +/// +/// ```no_run +/// # #![allow(unused_variables)] +/// use textwrap::{Wrapper, NoHyphenation, termwidth}; +/// +/// let width = termwidth() - 4; // Two columns on each side. +/// let wrapper = Wrapper::with_splitter(width, NoHyphenation) +/// .initial_indent(" ") +/// .subsequent_indent(" "); +/// ``` +#[cfg(feature = "term_size")] +pub fn termwidth() -> usize { + term_size::dimensions_stdout().map_or(80, |(w, _)| w) +} + +/// Fill a line of text at `width` characters. Strings are wrapped +/// based on their displayed width, not their size in bytes. +/// +/// The result is a string with newlines between each line. Use +/// [`wrap`] if you need access to the individual lines or +/// [`wrap_iter`] for its iterator counterpart. +/// +/// ``` +/// use textwrap::fill; +/// +/// assert_eq!(fill("Memory safety without garbage collection.", 15), +/// "Memory safety\nwithout garbage\ncollection."); +/// ``` +/// +/// This function creates a Wrapper on the fly with default settings. +/// If you need to set a language corpus for automatic hyphenation, or +/// need to fill many strings, then it is suggested to create a Wrapper +/// and call its [`fill` method]. +/// +/// [`wrap`]: fn.wrap.html +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`fill` method]: struct.Wrapper.html#method.fill +pub fn fill(s: &str, width: usize) -> String { + Wrapper::new(width).fill(s) +} + +/// Wrap a line of text at `width` characters. Strings are wrapped +/// based on their displayed width, not their size in bytes. +/// +/// This function creates a Wrapper on the fly with default settings. +/// If you need to set a language corpus for automatic hyphenation, or +/// need to wrap many strings, then it is suggested to create a Wrapper +/// and call its [`wrap` method]. +/// +/// The result is a vector of strings. Use [`wrap_iter`] if you need an +/// iterator version. +/// +/// # Examples +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap("Concurrency without data races.", 15), +/// vec!["Concurrency", +/// "without data", +/// "races."]); +/// +/// assert_eq!(wrap("Concurrency without data races.", 20), +/// vec!["Concurrency without", +/// "data races."]); +/// ``` +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`wrap` method]: struct.Wrapper.html#method.wrap +pub fn wrap(s: &str, width: usize) -> Vec<Cow<str>> { + Wrapper::new(width).wrap(s) +} + +/// Lazily wrap a line of text at `self.width` characters. Strings are +/// wrapped based on their displayed width, not their size in bytes. +/// +/// This function creates a Wrapper on the fly with default settings. +/// It then calls the [`into_wrap_iter`] method. Hence, the return +/// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function +/// name would otherwise suggest. +/// +/// If you need to set a language corpus for automatic hyphenation, or +/// need to wrap many strings, then it is suggested to create a Wrapper +/// and call its [`wrap_iter`] or [`into_wrap_iter`] methods. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// use textwrap::wrap_iter; +/// +/// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20); +/// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); +/// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); +/// assert_eq!(wrap20_iter.next(), None); +/// +/// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25); +/// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions."))); +/// assert_eq!(wrap25_iter.next(), None); +/// ``` +/// +/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter +/// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter +/// [`IntoWrapIter`]: struct.IntoWrapIter.html +/// [`WrapIter`]: struct.WrapIter.html +pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<HyphenSplitter> { + Wrapper::new(width).into_wrap_iter(s) +} + +/// Add prefix to each non-empty line. +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent("Foo\nBar\n", " "), " Foo\n Bar\n"); +/// ``` +/// +/// Empty lines (lines consisting only of whitespace) are not indented +/// and the whitespace is replaced by a single newline (`\n`): +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent("Foo\n\nBar\n \t \nBaz\n", " "), +/// " Foo\n\n Bar\n\n Baz\n"); +/// ``` +/// +/// Leading and trailing whitespace on non-empty lines is kept +/// unchanged: +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" \t Foo ", " "), " \t Foo \n"); +/// ``` +pub fn indent(s: &str, prefix: &str) -> String { + let mut result = String::new(); + for line in s.lines() { + if line.chars().any(|c| !c.is_whitespace()) { + result.push_str(prefix); + result.push_str(line); + } + result.push('\n'); + } + result +} + +/// Removes common leading whitespace from each line. +/// +/// This will look at each non-empty line and determine the maximum +/// amount of whitespace that can be removed from the line. +/// +/// ``` +/// use textwrap::dedent; +/// +/// assert_eq!(dedent(" 1st line\n 2nd line\n"), +/// "1st line\n2nd line\n"); +/// ``` +pub fn dedent(s: &str) -> String { + let mut prefix = String::new(); + let mut lines = s.lines(); + + // We first search for a non-empty line to find a prefix. + for line in &mut lines { + let whitespace = line.chars() + .take_while(|c| c.is_whitespace()) + .collect::<String>(); + // Check if the line had anything but whitespace + if whitespace.len() < line.len() { + prefix = whitespace; + break; + } + } + + // We then continue looking through the remaining lines to + // possibly shorten the prefix. + for line in &mut lines { + let whitespace = line.chars() + .zip(prefix.chars()) + .take_while(|&(a, b)| a == b) + .map(|(_, b)| b) + .collect::<String>(); + // Check if we have found a shorter prefix + if whitespace.len() < prefix.len() { + prefix = whitespace; + } + } + + // We now go over the lines a second time to build the result. + let mut result = String::new(); + for line in s.lines() { + if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) { + let (_, tail) = line.split_at(prefix.len()); + result.push_str(tail); + } + result.push('\n'); + } + result +} + +#[cfg(test)] +mod tests { + #[cfg(feature = "hyphenation")] + extern crate hyphenation; + + #[cfg(feature = "hyphenation")] + use hyphenation::Language; + use super::*; + + /// Add newlines. Ensures that the final line in the vector also + /// has a newline. + fn add_nl(lines: &[&str]) -> String { + lines.join("\n") + "\n" + } + + #[test] + fn no_wrap() { + assert_eq!(wrap("foo", 10), vec!["foo"]); + } + + #[test] + fn simple() { + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn multi_word_on_line() { + assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); + } + + #[test] + fn long_word() { + assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); + } + + #[test] + fn long_words() { + assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn max_width() { + assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); + } + + #[test] + fn leading_whitespace() { + assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); + } + + #[test] + fn trailing_whitespace() { + assert_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]); + } + + #[test] + fn interior_whitespace() { + assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]); + } + + #[test] + fn extra_whitespace_start_of_line() { + // Whitespace is only significant inside a line. After a line + // gets too long and is broken, the first word starts in + // column zero and is not indented. The line before might end + // up with trailing whitespace. + assert_eq!(wrap("foo bar", 5), vec!["foo", "bar"]); + } + + #[test] + fn wide_character_handling() { + assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); + assert_eq!(wrap("Hello, World!", 15), + vec!["Hello,", "World!"]); + } + + #[test] + fn indent_empty() { + let wrapper = Wrapper::new(10).initial_indent("!!!"); + assert_eq!(wrapper.fill(""), ""); + } + + #[test] + fn indent_single_line() { + let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space + assert_eq!(wrapper.fill("foo"), ">>>foo"); + } + + #[test] + fn indent_multiple_lines() { + let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" "); + assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn indent_break_words() { + let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" "); + assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn hyphens() { + assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); + } + + #[test] + fn trailing_hyphen() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]); + } + + #[test] + fn multiple_hyphens() { + assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); + } + + #[test] + fn hyphens_flag() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("The --foo-bar flag."), + vec!["The", "--foo-", "bar", "flag."]); + } + + #[test] + fn repeated_hyphens() { + let wrapper = Wrapper::new(4).break_words(false); + assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]); + } + + #[test] + fn hyphens_alphanumeric() { + assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); + } + + #[test] + fn hyphens_non_alphanumeric() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]); + } + + #[test] + fn multiple_splits() { + assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); + } + + #[test] + fn forced_split() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]); + } + + #[test] + fn no_hyphenation() { + let wrapper = Wrapper::with_splitter(8, NoHyphenation); + assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation() { + let corpus = hyphenation::load(Language::English_US).unwrap(); + let wrapper = Wrapper::new(10); + assert_eq!(wrapper.wrap("Internationalization"), + vec!["Internatio", "nalization"]); + + let wrapper = Wrapper::with_splitter(10, corpus); + assert_eq!(wrapper.wrap("Internationalization"), + vec!["Interna-", "tionaliza-", "tion"]); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn borrowed_lines() { + // Lines that end with an extra hyphen are owned, the final + // line is borrowed. + use std::borrow::Cow::{Borrowed, Owned}; + let corpus = hyphenation::load(Language::English_US).unwrap(); + let wrapper = Wrapper::with_splitter(10, corpus); + let lines = wrapper.wrap("Internationalization"); + if let Borrowed(s) = lines[0] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Borrowed(s) = lines[1] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Owned(ref s) = lines[2] { + assert!(false, "should not have been owned: {:?}", s); + } + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_with_hyphen() { + let corpus = hyphenation::load(Language::English_US).unwrap(); + let wrapper = Wrapper::new(8).break_words(false); + assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]); + + let wrapper = Wrapper::with_splitter(8, corpus).break_words(false); + assert_eq!(wrapper.wrap("over-caffinated"), + vec!["over-", "caffi-", "nated"]); + } + + #[test] + fn break_words() { + assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); + } + + #[test] + fn break_words_wide_characters() { + assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]); + } + + #[test] + fn break_words_zero_width() { + assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn test_non_breaking_space() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz"); + } + + #[test] + fn test_non_breaking_hyphen() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz"); + } + + #[test] + fn test_fill() { + assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz"); + } + + #[test] + fn test_indent_empty() { + assert_eq!(indent("\n", " "), "\n"); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn test_indent_nonempty() { + let x = vec![" foo", + "bar", + " baz"]; + let y = vec!["// foo", + "//bar", + "// baz"]; + assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn test_indent_empty_line() { + let x = vec![" foo", + "bar", + "", + " baz"]; + let y = vec!["// foo", + "//bar", + "", + "// baz"]; + assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); + } + + #[test] + fn test_dedent_empty() { + assert_eq!(dedent(""), ""); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn test_dedent_multi_line() { + let x = vec![" foo", + " bar", + " baz"]; + let y = vec![" foo", + "bar", + " baz"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn test_dedent_empty_line() { + let x = vec![" foo", + " bar", + " ", + " baz"]; + let y = vec![" foo", + "bar", + "", + " baz"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn test_dedent_mixed_whitespace() { + let x = vec!["\tfoo", + " bar"]; + let y = vec!["\tfoo", + " bar"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } +} diff --git a/third_party/rust/textwrap/tests/version-numbers.rs b/third_party/rust/textwrap/tests/version-numbers.rs new file mode 100644 index 0000000000..411eea548e --- /dev/null +++ b/third_party/rust/textwrap/tests/version-numbers.rs @@ -0,0 +1,12 @@ +#[macro_use] +extern crate version_sync; + +#[test] +fn test_readme_deps() { + assert_markdown_deps_updated!("README.md"); +} + +#[test] +fn test_html_root_url() { + assert_html_root_url_updated!("src/lib.rs"); +} |