diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/mdbook/src/utils | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/mdbook/src/utils')
-rw-r--r-- | vendor/mdbook/src/utils/fs.rs | 275 | ||||
-rw-r--r-- | vendor/mdbook/src/utils/mod.rs | 494 | ||||
-rw-r--r-- | vendor/mdbook/src/utils/string.rs | 255 | ||||
-rw-r--r-- | vendor/mdbook/src/utils/toml_ext.rs | 130 |
4 files changed, 1154 insertions, 0 deletions
diff --git a/vendor/mdbook/src/utils/fs.rs b/vendor/mdbook/src/utils/fs.rs new file mode 100644 index 000000000..a933d548a --- /dev/null +++ b/vendor/mdbook/src/utils/fs.rs @@ -0,0 +1,275 @@ +use crate::errors::*; +use std::convert::Into; +use std::fs::{self, File}; +use std::io::Write; +use std::path::{Component, Path, PathBuf}; + +/// Naively replaces any path separator with a forward-slash '/' +pub fn normalize_path(path: &str) -> String { + use std::path::is_separator; + path.chars() + .map(|ch| if is_separator(ch) { '/' } else { ch }) + .collect::<String>() +} + +/// Write the given data to a file, creating it first if necessary +pub fn write_file<P: AsRef<Path>>(build_dir: &Path, filename: P, content: &[u8]) -> Result<()> { + let path = build_dir.join(filename); + + create_file(&path)?.write_all(content).map_err(Into::into) +} + +/// Takes a path and returns a path containing just enough `../` to point to +/// the root of the given path. +/// +/// This is mostly interesting for a relative path to point back to the +/// directory from where the path starts. +/// +/// ```rust +/// # use std::path::Path; +/// # use mdbook::utils::fs::path_to_root; +/// let path = Path::new("some/relative/path"); +/// assert_eq!(path_to_root(path), "../../"); +/// ``` +/// +/// **note:** it's not very fool-proof, if you find a situation where +/// it doesn't return the correct path. +/// Consider [submitting a new issue](https://github.com/rust-lang/mdBook/issues) +/// or a [pull-request](https://github.com/rust-lang/mdBook/pulls) to improve it. +pub fn path_to_root<P: Into<PathBuf>>(path: P) -> String { + debug!("path_to_root"); + // Remove filename and add "../" for every directory + + path.into() + .parent() + .expect("") + .components() + .fold(String::new(), |mut s, c| { + match c { + Component::Normal(_) => s.push_str("../"), + _ => { + debug!("Other path component... {:?}", c); + } + } + s + }) +} + +/// This function creates a file and returns it. But before creating the file +/// it checks every directory in the path to see if it exists, +/// and if it does not it will be created. +pub fn create_file(path: &Path) -> Result<File> { + debug!("Creating {}", path.display()); + + // Construct path + if let Some(p) = path.parent() { + trace!("Parent directory is: {:?}", p); + + fs::create_dir_all(p)?; + } + + File::create(path).map_err(Into::into) +} + +/// Removes all the content of a directory but not the directory itself +pub fn remove_dir_content(dir: &Path) -> Result<()> { + for item in fs::read_dir(dir)? { + if let Ok(item) = item { + let item = item.path(); + if item.is_dir() { + fs::remove_dir_all(item)?; + } else { + fs::remove_file(item)?; + } + } + } + Ok(()) +} + +/// Copies all files of a directory to another one except the files +/// with the extensions given in the `ext_blacklist` array +pub fn copy_files_except_ext( + from: &Path, + to: &Path, + recursive: bool, + avoid_dir: Option<&PathBuf>, + ext_blacklist: &[&str], +) -> Result<()> { + debug!( + "Copying all files from {} to {} (blacklist: {:?}), avoiding {:?}", + from.display(), + to.display(), + ext_blacklist, + avoid_dir + ); + + // Check that from and to are different + if from == to { + return Ok(()); + } + + for entry in fs::read_dir(from)? { + let entry = entry?; + let metadata = entry + .path() + .metadata() + .with_context(|| format!("Failed to read {:?}", entry.path()))?; + + // If the entry is a dir and the recursive option is enabled, call itself + if metadata.is_dir() && recursive { + if entry.path() == to.to_path_buf() { + continue; + } + + if let Some(avoid) = avoid_dir { + if entry.path() == *avoid { + continue; + } + } + + // check if output dir already exists + if !to.join(entry.file_name()).exists() { + fs::create_dir(&to.join(entry.file_name()))?; + } + + copy_files_except_ext( + &from.join(entry.file_name()), + &to.join(entry.file_name()), + true, + avoid_dir, + ext_blacklist, + )?; + } else if metadata.is_file() { + // Check if it is in the blacklist + if let Some(ext) = entry.path().extension() { + if ext_blacklist.contains(&ext.to_str().unwrap()) { + continue; + } + } + debug!( + "creating path for file: {:?}", + &to.join( + entry + .path() + .file_name() + .expect("a file should have a file name...") + ) + ); + + debug!( + "Copying {:?} to {:?}", + entry.path(), + &to.join( + entry + .path() + .file_name() + .expect("a file should have a file name...") + ) + ); + fs::copy( + entry.path(), + &to.join( + entry + .path() + .file_name() + .expect("a file should have a file name..."), + ), + )?; + } + } + Ok(()) +} + +pub fn get_404_output_file(input_404: &Option<String>) -> String { + input_404 + .as_ref() + .unwrap_or(&"404.md".to_string()) + .replace(".md", ".html") +} + +#[cfg(test)] +mod tests { + use super::copy_files_except_ext; + use std::{fs, io::Result, path::Path}; + + #[cfg(target_os = "windows")] + fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) -> Result<()> { + std::os::windows::fs::symlink_file(src, dst) + } + + #[cfg(not(target_os = "windows"))] + fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) -> Result<()> { + std::os::unix::fs::symlink(src, dst) + } + + #[test] + fn copy_files_except_ext_test() { + let tmp = match tempfile::TempDir::new() { + Ok(t) => t, + Err(e) => panic!("Could not create a temp dir: {}", e), + }; + + // Create a couple of files + if let Err(err) = fs::File::create(&tmp.path().join("file.txt")) { + panic!("Could not create file.txt: {}", err); + } + if let Err(err) = fs::File::create(&tmp.path().join("file.md")) { + panic!("Could not create file.md: {}", err); + } + if let Err(err) = fs::File::create(&tmp.path().join("file.png")) { + panic!("Could not create file.png: {}", err); + } + if let Err(err) = fs::create_dir(&tmp.path().join("sub_dir")) { + panic!("Could not create sub_dir: {}", err); + } + if let Err(err) = fs::File::create(&tmp.path().join("sub_dir/file.png")) { + panic!("Could not create sub_dir/file.png: {}", err); + } + if let Err(err) = fs::create_dir(&tmp.path().join("sub_dir_exists")) { + panic!("Could not create sub_dir_exists: {}", err); + } + if let Err(err) = fs::File::create(&tmp.path().join("sub_dir_exists/file.txt")) { + panic!("Could not create sub_dir_exists/file.txt: {}", err); + } + if let Err(err) = symlink( + &tmp.path().join("file.png"), + &tmp.path().join("symlink.png"), + ) { + panic!("Could not symlink file.png: {}", err); + } + + // Create output dir + if let Err(err) = fs::create_dir(&tmp.path().join("output")) { + panic!("Could not create output: {}", err); + } + if let Err(err) = fs::create_dir(&tmp.path().join("output/sub_dir_exists")) { + panic!("Could not create output/sub_dir_exists: {}", err); + } + + if let Err(e) = + copy_files_except_ext(tmp.path(), &tmp.path().join("output"), true, None, &["md"]) + { + panic!("Error while executing the function:\n{:?}", e); + } + + // Check if the correct files where created + if !(&tmp.path().join("output/file.txt")).exists() { + panic!("output/file.txt should exist") + } + if (&tmp.path().join("output/file.md")).exists() { + panic!("output/file.md should not exist") + } + if !(&tmp.path().join("output/file.png")).exists() { + panic!("output/file.png should exist") + } + if !(&tmp.path().join("output/sub_dir/file.png")).exists() { + panic!("output/sub_dir/file.png should exist") + } + if !(&tmp.path().join("output/sub_dir_exists/file.txt")).exists() { + panic!("output/sub_dir/file.png should exist") + } + if !(&tmp.path().join("output/symlink.png")).exists() { + panic!("output/symlink.png should exist") + } + } +} diff --git a/vendor/mdbook/src/utils/mod.rs b/vendor/mdbook/src/utils/mod.rs new file mode 100644 index 000000000..a205633f9 --- /dev/null +++ b/vendor/mdbook/src/utils/mod.rs @@ -0,0 +1,494 @@ +#![allow(missing_docs)] // FIXME: Document this + +pub mod fs; +mod string; +pub(crate) mod toml_ext; +use crate::errors::Error; +use regex::Regex; + +use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag}; + +use std::borrow::Cow; +use std::collections::HashMap; +use std::fmt::Write; +use std::path::Path; + +pub use self::string::{ + take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, + take_rustdoc_include_lines, +}; + +/// Replaces multiple consecutive whitespace characters with a single space character. +pub fn collapse_whitespace(text: &str) -> Cow<'_, str> { + lazy_static! { + static ref RE: Regex = Regex::new(r"\s\s+").unwrap(); + } + RE.replace_all(text, " ") +} + +/// Convert the given string to a valid HTML element ID. +/// The only restriction is that the ID must not contain any ASCII whitespace. +pub fn normalize_id(content: &str) -> String { + content + .chars() + .filter_map(|ch| { + if ch.is_alphanumeric() || ch == '_' || ch == '-' { + Some(ch.to_ascii_lowercase()) + } else if ch.is_whitespace() { + Some('-') + } else { + None + } + }) + .collect::<String>() +} + +/// Generate an ID for use with anchors which is derived from a "normalised" +/// string. +// This function should be made private when the deprecation expires. +#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")] +pub fn id_from_content(content: &str) -> String { + let mut content = content.to_string(); + + // Skip any tags or html-encoded stuff + lazy_static! { + static ref HTML: Regex = Regex::new(r"(<.*?>)").unwrap(); + } + content = HTML.replace_all(&content, "").into(); + const REPL_SUB: &[&str] = &["<", ">", "&", "'", """]; + for sub in REPL_SUB { + content = content.replace(sub, ""); + } + + // Remove spaces and hashes indicating a header + let trimmed = content.trim().trim_start_matches('#').trim(); + normalize_id(trimmed) +} + +/// Generate an ID for use with anchors which is derived from a "normalised" +/// string. +/// +/// Each ID returned will be unique, if the same `id_counter` is provided on +/// each call. +pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String { + let id = { + #[allow(deprecated)] + id_from_content(content) + }; + + // If we have headers with the same normalized id, append an incrementing counter + let id_count = id_counter.entry(id.clone()).or_insert(0); + let unique_id = match *id_count { + 0 => id, + id_count => format!("{}-{}", id, id_count), + }; + *id_count += 1; + unique_id +} + +/// Fix links to the correct location. +/// +/// This adjusts links, such as turning `.md` extensions to `.html`. +/// +/// `path` is the path to the page being rendered relative to the root of the +/// book. This is used for the `print.html` page so that links on the print +/// page go to the original location. Normal page rendering sets `path` to +/// None. Ideally, print page links would link to anchors on the print page, +/// but that is very difficult. +fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { + lazy_static! { + static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap(); + static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap(); + } + + fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + if dest.starts_with('#') { + // Fragment-only link. + if let Some(path) = path { + let mut base = path.display().to_string(); + if base.ends_with(".md") { + base.replace_range(base.len() - 3.., ".html"); + } + return format!("{}{}", base, dest).into(); + } else { + return dest; + } + } + // Don't modify links with schemes like `https`. + if !SCHEME_LINK.is_match(&dest) { + // This is a relative link, adjust it as necessary. + let mut fixed_link = String::new(); + if let Some(path) = path { + let base = path + .parent() + .expect("path can't be empty") + .to_str() + .expect("utf-8 paths only"); + if !base.is_empty() { + write!(fixed_link, "{}/", base).unwrap(); + } + } + + if let Some(caps) = MD_LINK.captures(&dest) { + fixed_link.push_str(&caps["link"]); + fixed_link.push_str(".html"); + if let Some(anchor) = caps.name("anchor") { + fixed_link.push_str(anchor.as_str()); + } + } else { + fixed_link.push_str(&dest); + }; + return CowStr::from(fixed_link); + } + dest + } + + fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { + // This is a terrible hack, but should be reasonably reliable. Nobody + // should ever parse a tag with a regex. However, there isn't anything + // in Rust that I know of that is suitable for handling partial html + // fragments like those generated by pulldown_cmark. + // + // There are dozens of HTML tags/attributes that contain paths, so + // feel free to add more tags if desired; these are the only ones I + // care about right now. + lazy_static! { + static ref HTML_LINK: Regex = + Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap(); + } + + HTML_LINK + .replace_all(&html, |caps: ®ex::Captures<'_>| { + let fixed = fix(caps[2].into(), path); + format!("{}{}\"", &caps[1], fixed) + }) + .into_owned() + .into() + } + + match event { + Event::Start(Tag::Link(link_type, dest, title)) => { + Event::Start(Tag::Link(link_type, fix(dest, path), title)) + } + Event::Start(Tag::Image(link_type, dest, title)) => { + Event::Start(Tag::Image(link_type, fix(dest, path), title)) + } + Event::Html(html) => Event::Html(fix_html(html, path)), + _ => event, + } +} + +/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML. +pub fn render_markdown(text: &str, curly_quotes: bool) -> String { + render_markdown_with_path(text, curly_quotes, None) +} + +pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> { + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + opts.insert(Options::ENABLE_FOOTNOTES); + opts.insert(Options::ENABLE_STRIKETHROUGH); + opts.insert(Options::ENABLE_TASKLISTS); + if curly_quotes { + opts.insert(Options::ENABLE_SMART_PUNCTUATION); + } + Parser::new_ext(text, opts) +} + +pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String { + let mut s = String::with_capacity(text.len() * 3 / 2); + let p = new_cmark_parser(text, curly_quotes); + let events = p + .map(clean_codeblock_headers) + .map(|event| adjust_links(event, path)) + .flat_map(|event| { + let (a, b) = wrap_tables(event); + a.into_iter().chain(b) + }); + + html::push_html(&mut s, events); + s +} + +/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to. +fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) { + match event { + Event::Start(Tag::Table(_)) => ( + Some(Event::Html(r#"<div class="table-wrapper">"#.into())), + Some(event), + ), + Event::End(Tag::Table(_)) => (Some(event), Some(Event::Html(r#"</div>"#.into()))), + _ => (Some(event), None), + } +} + +fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> { + match event { + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => { + let info: String = info + .chars() + .map(|x| match x { + ' ' | '\t' => ',', + _ => x, + }) + .filter(|ch| !ch.is_whitespace()) + .collect(); + + Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info)))) + } + _ => event, + } +} + +/// Prints a "backtrace" of some `Error`. +pub fn log_backtrace(e: &Error) { + error!("Error: {}", e); + + for cause in e.chain().skip(1) { + error!("\tCaused By: {}", cause); + } +} + +pub(crate) fn bracket_escape(mut s: &str) -> String { + let mut escaped = String::with_capacity(s.len()); + let needs_escape: &[char] = &['<', '>']; + while let Some(next) = s.find(needs_escape) { + escaped.push_str(&s[..next]); + match s.as_bytes()[next] { + b'<' => escaped.push_str("<"), + b'>' => escaped.push_str(">"), + _ => unreachable!(), + } + s = &s[next + 1..]; + } + escaped.push_str(s); + escaped +} + +#[cfg(test)] +mod tests { + use super::bracket_escape; + + mod render_markdown { + use super::super::render_markdown; + + #[test] + fn preserves_external_links() { + assert_eq!( + render_markdown("[example](https://www.rust-lang.org/)", false), + "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n" + ); + } + + #[test] + fn it_can_adjust_markdown_links() { + assert_eq!( + render_markdown("[example](example.md)", false), + "<p><a href=\"example.html\">example</a></p>\n" + ); + assert_eq!( + render_markdown("[example_anchor](example.md#anchor)", false), + "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n" + ); + + // this anchor contains 'md' inside of it + assert_eq!( + render_markdown("[phantom data](foo.html#phantomdata)", false), + "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n" + ); + } + + #[test] + fn it_can_wrap_tables() { + let src = r#" +| Original | Punycode | Punycode + Encoding | +|-----------------|-----------------|---------------------| +| føø | f-5gaa | f_5gaa | +"#; + let out = r#" +<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody> +<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr> +</tbody></table> +</div> +"#.trim(); + assert_eq!(render_markdown(src, false), out); + } + + #[test] + fn it_can_keep_quotes_straight() { + assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n"); + } + + #[test] + fn it_can_make_quotes_curly_except_when_they_are_in_code() { + let input = r#" +'one' +``` +'two' +``` +`'three'` 'four'"#; + let expected = r#"<p>‘one’</p> +<pre><code>'two' +</code></pre> +<p><code>'three'</code> ‘four’</p> +"#; + assert_eq!(render_markdown(input, true), expected); + } + + #[test] + fn whitespace_outside_of_codeblock_header_is_preserved() { + let input = r#" +some text with spaces +```rust +fn main() { +// code inside is unchanged +} +``` +more text with spaces +"#; + + let expected = r#"<p>some text with spaces</p> +<pre><code class="language-rust">fn main() { +// code inside is unchanged +} +</code></pre> +<p>more text with spaces</p> +"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); + } + + #[test] + fn rust_code_block_properties_are_passed_as_space_delimited_class() { + let input = r#" +```rust,no_run,should_panic,property_3 +``` +"#; + + let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre> +"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); + } + + #[test] + fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() { + let input = r#" +```rust, no_run,,,should_panic , ,property_3 +``` +"#; + + let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre> +"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); + } + + #[test] + fn rust_code_block_without_properties_has_proper_html_class() { + let input = r#" +```rust +``` +"#; + + let expected = r#"<pre><code class="language-rust"></code></pre> +"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); + + let input = r#" +```rust +``` +"#; + assert_eq!(render_markdown(input, false), expected); + assert_eq!(render_markdown(input, true), expected); + } + } + + #[allow(deprecated)] + mod id_from_content { + use super::super::id_from_content; + + #[test] + fn it_generates_anchors() { + assert_eq!( + id_from_content("## Method-call expressions"), + "method-call-expressions" + ); + assert_eq!(id_from_content("## **Bold** title"), "bold-title"); + assert_eq!(id_from_content("## `Code` title"), "code-title"); + assert_eq!( + id_from_content("## title <span dir=rtl>foo</span>"), + "title-foo" + ); + } + + #[test] + fn it_generates_anchors_from_non_ascii_initial() { + assert_eq!( + id_from_content("## `--passes`: add more rustdoc passes"), + "--passes-add-more-rustdoc-passes" + ); + assert_eq!( + id_from_content("## 中文標題 CJK title"), + "中文標題-cjk-title" + ); + assert_eq!(id_from_content("## Über"), "Über"); + } + } + + mod html_munging { + use super::super::{normalize_id, unique_id_from_content}; + + #[test] + fn it_normalizes_ids() { + assert_eq!( + normalize_id("`--passes`: add more rustdoc passes"), + "--passes-add-more-rustdoc-passes" + ); + assert_eq!( + normalize_id("Method-call 🐙 expressions \u{1f47c}"), + "method-call--expressions-" + ); + assert_eq!(normalize_id("_-_12345"), "_-_12345"); + assert_eq!(normalize_id("12345"), "12345"); + assert_eq!(normalize_id("中文"), "中文"); + assert_eq!(normalize_id("にほんご"), "にほんご"); + assert_eq!(normalize_id("한국어"), "한국어"); + assert_eq!(normalize_id(""), ""); + } + + #[test] + fn it_generates_unique_ids_from_content() { + // Same id if not given shared state + assert_eq!( + unique_id_from_content("## 中文標題 CJK title", &mut Default::default()), + "中文標題-cjk-title" + ); + assert_eq!( + unique_id_from_content("## 中文標題 CJK title", &mut Default::default()), + "中文標題-cjk-title" + ); + + // Different id if given shared state + let mut id_counter = Default::default(); + assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über"); + assert_eq!( + unique_id_from_content("## 中文標題 CJK title", &mut id_counter), + "中文標題-cjk-title" + ); + assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1"); + assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2"); + } + } + + #[test] + fn escaped_brackets() { + assert_eq!(bracket_escape(""), ""); + assert_eq!(bracket_escape("<"), "<"); + assert_eq!(bracket_escape(">"), ">"); + assert_eq!(bracket_escape("<>"), "<>"); + assert_eq!(bracket_escape("<test>"), "<test>"); + assert_eq!(bracket_escape("a<test>b"), "a<test>b"); + } +} diff --git a/vendor/mdbook/src/utils/string.rs b/vendor/mdbook/src/utils/string.rs new file mode 100644 index 000000000..97485d7b6 --- /dev/null +++ b/vendor/mdbook/src/utils/string.rs @@ -0,0 +1,255 @@ +use regex::Regex; +use std::ops::Bound::{Excluded, Included, Unbounded}; +use std::ops::RangeBounds; + +/// Take a range of lines from a string. +pub fn take_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String { + let start = match range.start_bound() { + Excluded(&n) => n + 1, + Included(&n) => n, + Unbounded => 0, + }; + let lines = s.lines().skip(start); + match range.end_bound() { + Excluded(end) => lines + .take(end.saturating_sub(start)) + .collect::<Vec<_>>() + .join("\n"), + Included(end) => lines + .take((end + 1).saturating_sub(start)) + .collect::<Vec<_>>() + .join("\n"), + Unbounded => lines.collect::<Vec<_>>().join("\n"), + } +} + +lazy_static! { + static ref ANCHOR_START: Regex = Regex::new(r"ANCHOR:\s*(?P<anchor_name>[\w_-]+)").unwrap(); + static ref ANCHOR_END: Regex = Regex::new(r"ANCHOR_END:\s*(?P<anchor_name>[\w_-]+)").unwrap(); +} + +/// Take anchored lines from a string. +/// Lines containing anchor are ignored. +pub fn take_anchored_lines(s: &str, anchor: &str) -> String { + let mut retained = Vec::<&str>::new(); + let mut anchor_found = false; + + for l in s.lines() { + if anchor_found { + match ANCHOR_END.captures(l) { + Some(cap) => { + if &cap["anchor_name"] == anchor { + break; + } + } + None => { + if !ANCHOR_START.is_match(l) { + retained.push(l); + } + } + } + } else if let Some(cap) = ANCHOR_START.captures(l) { + if &cap["anchor_name"] == anchor { + anchor_found = true; + } + } + } + + retained.join("\n") +} + +/// Keep lines contained within the range specified as-is. +/// For any lines not in the range, include them but use `#` at the beginning. This will hide the +/// lines from initial display but include them when expanding the code snippet or testing with +/// rustdoc. +pub fn take_rustdoc_include_lines<R: RangeBounds<usize>>(s: &str, range: R) -> String { + let mut output = String::with_capacity(s.len()); + + for (index, line) in s.lines().enumerate() { + if !range.contains(&index) { + output.push_str("# "); + } + output.push_str(line); + output.push('\n'); + } + output.pop(); + output +} + +/// Keep lines between the anchor comments specified as-is. +/// For any lines not between the anchors, include them but use `#` at the beginning. This will +/// hide the lines from initial display but include them when expanding the code snippet or testing +/// with rustdoc. +pub fn take_rustdoc_include_anchored_lines(s: &str, anchor: &str) -> String { + let mut output = String::with_capacity(s.len()); + let mut within_anchored_section = false; + + for l in s.lines() { + if within_anchored_section { + match ANCHOR_END.captures(l) { + Some(cap) => { + if &cap["anchor_name"] == anchor { + within_anchored_section = false; + } + } + None => { + if !ANCHOR_START.is_match(l) { + output.push_str(l); + output.push('\n'); + } + } + } + } else if let Some(cap) = ANCHOR_START.captures(l) { + if &cap["anchor_name"] == anchor { + within_anchored_section = true; + } + } else if !ANCHOR_END.is_match(l) { + output.push_str("# "); + output.push_str(l); + output.push('\n'); + } + } + + output.pop(); + output +} + +#[cfg(test)] +mod tests { + use super::{ + take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, + take_rustdoc_include_lines, + }; + + #[test] + #[allow(clippy::reversed_empty_ranges)] // Intentionally checking that those are correctly handled + fn take_lines_test() { + let s = "Lorem\nipsum\ndolor\nsit\namet"; + assert_eq!(take_lines(s, 1..3), "ipsum\ndolor"); + assert_eq!(take_lines(s, 3..), "sit\namet"); + assert_eq!(take_lines(s, ..3), "Lorem\nipsum\ndolor"); + assert_eq!(take_lines(s, ..), s); + // corner cases + assert_eq!(take_lines(s, 4..3), ""); + assert_eq!(take_lines(s, ..100), s); + } + + #[test] + fn take_anchored_lines_test() { + let s = "Lorem\nipsum\ndolor\nsit\namet"; + assert_eq!(take_anchored_lines(s, "test"), ""); + + let s = "Lorem\nipsum\ndolor\nANCHOR_END: test\nsit\namet"; + assert_eq!(take_anchored_lines(s, "test"), ""); + + let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet"; + assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet"); + assert_eq!(take_anchored_lines(s, "something"), ""); + + let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum"; + assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet"); + assert_eq!(take_anchored_lines(s, "something"), ""); + + let s = "Lorem\nANCHOR: test\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum"; + assert_eq!(take_anchored_lines(s, "test"), "ipsum\ndolor\nsit\namet"); + assert_eq!(take_anchored_lines(s, "something"), ""); + + let s = "Lorem\nANCHOR: test2\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nANCHOR_END:test2\nipsum"; + assert_eq!( + take_anchored_lines(s, "test2"), + "ipsum\ndolor\nsit\namet\nlorem" + ); + assert_eq!(take_anchored_lines(s, "test"), "dolor\nsit\namet"); + assert_eq!(take_anchored_lines(s, "something"), ""); + } + + #[test] + #[allow(clippy::reversed_empty_ranges)] // Intentionally checking that those are correctly handled + fn take_rustdoc_include_lines_test() { + let s = "Lorem\nipsum\ndolor\nsit\namet"; + assert_eq!( + take_rustdoc_include_lines(s, 1..3), + "# Lorem\nipsum\ndolor\n# sit\n# amet" + ); + assert_eq!( + take_rustdoc_include_lines(s, 3..), + "# Lorem\n# ipsum\n# dolor\nsit\namet" + ); + assert_eq!( + take_rustdoc_include_lines(s, ..3), + "Lorem\nipsum\ndolor\n# sit\n# amet" + ); + assert_eq!(take_rustdoc_include_lines(s, ..), s); + // corner cases + assert_eq!( + take_rustdoc_include_lines(s, 4..3), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet" + ); + assert_eq!(take_rustdoc_include_lines(s, ..100), s); + } + + #[test] + fn take_rustdoc_include_anchored_lines_test() { + let s = "Lorem\nipsum\ndolor\nsit\namet"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet" + ); + + let s = "Lorem\nipsum\ndolor\nANCHOR_END: test\nsit\namet"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet" + ); + + let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\n# ipsum\ndolor\nsit\namet" + ); + assert_eq!( + take_rustdoc_include_anchored_lines(s, "something"), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet" + ); + + let s = "Lorem\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\n# ipsum\ndolor\nsit\namet\n# lorem\n# ipsum" + ); + assert_eq!( + take_rustdoc_include_anchored_lines(s, "something"), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet\n# lorem\n# ipsum" + ); + + let s = "Lorem\nANCHOR: test\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nipsum"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\nipsum\ndolor\nsit\namet\n# lorem\n# ipsum" + ); + assert_eq!( + take_rustdoc_include_anchored_lines(s, "something"), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet\n# lorem\n# ipsum" + ); + + let s = "Lorem\nANCHOR: test2\nipsum\nANCHOR: test\ndolor\nsit\namet\nANCHOR_END: test\nlorem\nANCHOR_END:test2\nipsum"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test2"), + "# Lorem\nipsum\ndolor\nsit\namet\nlorem\n# ipsum" + ); + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\n# ipsum\ndolor\nsit\namet\n# lorem\n# ipsum" + ); + assert_eq!( + take_rustdoc_include_anchored_lines(s, "something"), + "# Lorem\n# ipsum\n# dolor\n# sit\n# amet\n# lorem\n# ipsum" + ); + + let s = "Lorem\nANCHOR: test\nipsum\nANCHOR_END: test\ndolor\nANCHOR: test\nsit\nANCHOR_END: test\namet"; + assert_eq!( + take_rustdoc_include_anchored_lines(s, "test"), + "# Lorem\nipsum\n# dolor\nsit\n# amet" + ); + } +} diff --git a/vendor/mdbook/src/utils/toml_ext.rs b/vendor/mdbook/src/utils/toml_ext.rs new file mode 100644 index 000000000..bf25ad11b --- /dev/null +++ b/vendor/mdbook/src/utils/toml_ext.rs @@ -0,0 +1,130 @@ +use toml::value::{Table, Value}; + +pub(crate) trait TomlExt { + fn read(&self, key: &str) -> Option<&Value>; + fn read_mut(&mut self, key: &str) -> Option<&mut Value>; + fn insert(&mut self, key: &str, value: Value); + fn delete(&mut self, key: &str) -> Option<Value>; +} + +impl TomlExt for Value { + fn read(&self, key: &str) -> Option<&Value> { + if let Some((head, tail)) = split(key) { + self.get(head)?.read(tail) + } else { + self.get(key) + } + } + + fn read_mut(&mut self, key: &str) -> Option<&mut Value> { + if let Some((head, tail)) = split(key) { + self.get_mut(head)?.read_mut(tail) + } else { + self.get_mut(key) + } + } + + fn insert(&mut self, key: &str, value: Value) { + if !self.is_table() { + *self = Value::Table(Table::new()); + } + + let table = self.as_table_mut().expect("unreachable"); + + if let Some((head, tail)) = split(key) { + table + .entry(head) + .or_insert_with(|| Value::Table(Table::new())) + .insert(tail, value); + } else { + table.insert(key.to_string(), value); + } + } + + fn delete(&mut self, key: &str) -> Option<Value> { + if let Some((head, tail)) = split(key) { + self.get_mut(head)?.delete(tail) + } else if let Some(table) = self.as_table_mut() { + table.remove(key) + } else { + None + } + } +} + +fn split(key: &str) -> Option<(&str, &str)> { + let ix = key.find('.')?; + + let (head, tail) = key.split_at(ix); + // splitting will leave the "." + let tail = &tail[1..]; + + Some((head, tail)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::str::FromStr; + + #[test] + fn read_simple_table() { + let src = "[table]"; + let value = Value::from_str(src).unwrap(); + + let got = value.read("table").unwrap(); + + assert!(got.is_table()); + } + + #[test] + fn read_nested_item() { + let src = "[table]\nnested=true"; + let value = Value::from_str(src).unwrap(); + + let got = value.read("table.nested").unwrap(); + + assert_eq!(got, &Value::Boolean(true)); + } + + #[test] + fn insert_item_at_top_level() { + let mut value = Value::Table(Table::default()); + let item = Value::Boolean(true); + + value.insert("first", item.clone()); + + assert_eq!(value.get("first").unwrap(), &item); + } + + #[test] + fn insert_nested_item() { + let mut value = Value::Table(Table::default()); + let item = Value::Boolean(true); + + value.insert("first.second", item.clone()); + + let inserted = value.read("first.second").unwrap(); + assert_eq!(inserted, &item); + } + + #[test] + fn delete_a_top_level_item() { + let src = "top = true"; + let mut value = Value::from_str(src).unwrap(); + + let got = value.delete("top").unwrap(); + + assert_eq!(got, Value::Boolean(true)); + } + + #[test] + fn delete_a_nested_item() { + let src = "[table]\n nested = true"; + let mut value = Value::from_str(src).unwrap(); + + let got = value.delete("table.nested").unwrap(); + + assert_eq!(got, Value::Boolean(true)); + } +} |