diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:57:31 +0000 |
commit | dc0db358abe19481e475e10c32149b53370f1a1c (patch) | |
tree | ab8ce99c4b255ce46f99ef402c27916055b899ee /compiler/rustc_errors/src/markdown | |
parent | Releasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff) | |
download | rustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip |
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_errors/src/markdown')
-rw-r--r-- | compiler/rustc_errors/src/markdown/mod.rs | 76 | ||||
-rw-r--r-- | compiler/rustc_errors/src/markdown/parse.rs | 588 | ||||
-rw-r--r-- | compiler/rustc_errors/src/markdown/term.rs | 189 | ||||
-rw-r--r-- | compiler/rustc_errors/src/markdown/tests/input.md | 50 | ||||
-rw-r--r-- | compiler/rustc_errors/src/markdown/tests/output.stdout | 35 | ||||
-rw-r--r-- | compiler/rustc_errors/src/markdown/tests/parse.rs | 312 | ||||
-rw-r--r-- | compiler/rustc_errors/src/markdown/tests/term.rs | 90 |
7 files changed, 1340 insertions, 0 deletions
diff --git a/compiler/rustc_errors/src/markdown/mod.rs b/compiler/rustc_errors/src/markdown/mod.rs new file mode 100644 index 000000000..53b766dfc --- /dev/null +++ b/compiler/rustc_errors/src/markdown/mod.rs @@ -0,0 +1,76 @@ +//! A simple markdown parser that can write formatted text to the terminal +//! +//! Entrypoint is `MdStream::parse_str(...)` +use std::io; + +use termcolor::{Buffer, BufferWriter, ColorChoice}; +mod parse; +mod term; + +/// An AST representation of a Markdown document +#[derive(Clone, Debug, Default, PartialEq)] +pub struct MdStream<'a>(Vec<MdTree<'a>>); + +impl<'a> MdStream<'a> { + /// Parse a markdown string to a tokenstream + #[must_use] + pub fn parse_str(s: &str) -> MdStream<'_> { + parse::entrypoint(s) + } + + /// Write formatted output to a termcolor buffer + pub fn write_termcolor_buf(&self, buf: &mut Buffer) -> io::Result<()> { + term::entrypoint(self, buf) + } +} + +/// Create a termcolor buffer with the `Always` color choice +pub fn create_stdout_bufwtr() -> BufferWriter { + BufferWriter::stdout(ColorChoice::Always) +} + +/// A single tokentree within a Markdown document +#[derive(Clone, Debug, PartialEq)] +pub enum MdTree<'a> { + /// Leaf types + Comment(&'a str), + CodeBlock { + txt: &'a str, + lang: Option<&'a str>, + }, + CodeInline(&'a str), + Strong(&'a str), + Emphasis(&'a str), + Strikethrough(&'a str), + PlainText(&'a str), + /// [Foo](www.foo.com) or simple anchor <www.foo.com> + Link { + disp: &'a str, + link: &'a str, + }, + /// `[Foo link][ref]` + RefLink { + disp: &'a str, + id: Option<&'a str>, + }, + /// [ref]: www.foo.com + LinkDef { + id: &'a str, + link: &'a str, + }, + /// Break bewtween two paragraphs (double `\n`), not directly parsed but + /// added later + ParagraphBreak, + /// Break bewtween two lines (single `\n`) + LineBreak, + HorizontalRule, + Heading(u8, MdStream<'a>), + OrderedListItem(u16, MdStream<'a>), + UnorderedListItem(MdStream<'a>), +} + +impl<'a> From<Vec<MdTree<'a>>> for MdStream<'a> { + fn from(value: Vec<MdTree<'a>>) -> Self { + Self(value) + } +} diff --git a/compiler/rustc_errors/src/markdown/parse.rs b/compiler/rustc_errors/src/markdown/parse.rs new file mode 100644 index 000000000..362a451fd --- /dev/null +++ b/compiler/rustc_errors/src/markdown/parse.rs @@ -0,0 +1,588 @@ +use crate::markdown::{MdStream, MdTree}; +use std::{iter, mem, str}; + +/// Short aliases that we can use in match patterns. If an end pattern is not +/// included, this type may be variable +const ANC_E: &[u8] = b">"; +const ANC_S: &[u8] = b"<"; +const BRK: &[u8] = b"---"; +const CBK: &[u8] = b"```"; +const CIL: &[u8] = b"`"; +const CMT_E: &[u8] = b"-->"; +const CMT_S: &[u8] = b"<!--"; +const EMP: &[u8] = b"_"; +const HDG: &[u8] = b"#"; +const LNK_CHARS: &str = "$-_.+!*'()/&?=:%"; +const LNK_E: &[u8] = b"]"; +const LNK_S: &[u8] = b"["; +const STG: &[u8] = b"**"; +const STK: &[u8] = b"~~"; +const UL1: &[u8] = b"* "; +const UL2: &[u8] = b"- "; + +/// Pattern replacements +const REPLACEMENTS: &[(&str, &str)] = &[ + ("(c)", "©"), + ("(C)", "©"), + ("(r)", "®"), + ("(R)", "®"), + ("(tm)", "™"), + ("(TM)", "™"), + (":crab:", "🦀"), + ("\n", " "), +]; + +/// `(extracted, remaining)` +type Parsed<'a> = (MdTree<'a>, &'a [u8]); +/// Output of a parse function +type ParseResult<'a> = Option<Parsed<'a>>; + +/// Parsing context +#[derive(Clone, Copy, Debug, PartialEq)] +struct Context { + /// If true, we are at a the topmost level (not recursing a nested tt) + top_block: bool, + /// Previous character + prev: Prev, +} + +/// Character class preceding this one +#[derive(Clone, Copy, Debug, PartialEq)] +enum Prev { + Newline, + /// Whitespace that is not a newline + Whitespace, + Escape, + Any, +} + +impl Default for Context { + /// Most common setting for non top-level parsing: not top block, not at + /// line start (yes leading whitespace, not escaped) + fn default() -> Self { + Self { top_block: false, prev: Prev::Whitespace } + } +} + +/// Flags to simple parser function +#[derive(Clone, Copy, Debug, PartialEq)] +enum ParseOpt { + /// Ignore escapes before closing pattern, trim content + TrimNoEsc, + None, +} + +/// Parse a buffer +pub fn entrypoint(txt: &str) -> MdStream<'_> { + let ctx = Context { top_block: true, prev: Prev::Newline }; + normalize(parse_recursive(txt.trim().as_bytes(), ctx), &mut Vec::new()) +} + +/// Parse a buffer with specified context +fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> { + use ParseOpt as Po; + use Prev::{Escape, Newline, Whitespace}; + + let mut stream: Vec<MdTree<'a>> = Vec::new(); + let Context { top_block: top_blk, mut prev } = ctx; + + // wip_buf is our entire unprocessed (unpushed) buffer, loop_buf is our to + // check buffer that shrinks with each loop + let mut wip_buf = buf; + let mut loop_buf = wip_buf; + + while !loop_buf.is_empty() { + let next_prev = match loop_buf[0] { + b'\n' => Newline, + b'\\' => Escape, + x if x.is_ascii_whitespace() => Whitespace, + _ => Prev::Any, + }; + + let res: ParseResult<'_> = match (top_blk, prev) { + (_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => { + parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment) + } + (true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)), + (_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf), + (true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf), + (true, Newline) if loop_buf.starts_with(BRK) => { + Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1)) + } + (_, Newline | Whitespace) if loop_buf.starts_with(EMP) => { + parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis) + } + (_, Newline | Whitespace) if loop_buf.starts_with(STG) => { + parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong) + } + (_, Newline | Whitespace) if loop_buf.starts_with(STK) => { + parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough) + } + (_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => { + let tt_fn = |link| MdTree::Link { disp: link, link }; + let ret = parse_simple_pat(loop_buf, ANC_S, ANC_E, Po::None, tt_fn); + match ret { + Some((MdTree::Link { disp, .. }, _)) + if disp.chars().all(|ch| LNK_CHARS.contains(ch)) => + { + ret + } + _ => None, + } + } + (_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => { + Some(parse_unordered_li(loop_buf)) + } + (_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)), + (_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => { + parse_any_link(loop_buf, top_blk && prev == Prev::Newline) + } + (_, Escape | _) => None, + }; + + if let Some((tree, rest)) = res { + // We found something: push our WIP and then push the found tree + let prev_buf = &wip_buf[..(wip_buf.len() - loop_buf.len())]; + if !prev_buf.is_empty() { + let prev_str = str::from_utf8(prev_buf).unwrap(); + stream.push(MdTree::PlainText(prev_str)); + } + stream.push(tree); + + wip_buf = rest; + loop_buf = rest; + } else { + // Just move on to the next character + loop_buf = &loop_buf[1..]; + // If we are at the end and haven't found anything, just push plain text + if loop_buf.is_empty() && !wip_buf.is_empty() { + let final_str = str::from_utf8(wip_buf).unwrap(); + stream.push(MdTree::PlainText(final_str)); + } + }; + + prev = next_prev; + } + + MdStream(stream) +} + +/// The simplest kind of patterns: data within start and end patterns +fn parse_simple_pat<'a, F>( + buf: &'a [u8], + start_pat: &[u8], + end_pat: &[u8], + opts: ParseOpt, + create_tt: F, +) -> ParseResult<'a> +where + F: FnOnce(&'a str) -> MdTree<'a>, +{ + let ignore_esc = matches!(opts, ParseOpt::TrimNoEsc); + let trim = matches!(opts, ParseOpt::TrimNoEsc); + let (txt, rest) = parse_with_end_pat(&buf[start_pat.len()..], end_pat, ignore_esc)?; + let mut txt = str::from_utf8(txt).unwrap(); + if trim { + txt = txt.trim(); + } + Some((create_tt(txt), rest)) +} + +/// Parse backtick-wrapped inline code. Accounts for >1 backtick sets +fn parse_codeinline(buf: &[u8]) -> ParseResult<'_> { + let seps = buf.iter().take_while(|ch| **ch == b'`').count(); + let (txt, rest) = parse_with_end_pat(&buf[seps..], &buf[..seps], true)?; + Some((MdTree::CodeInline(str::from_utf8(txt).unwrap()), rest)) +} + +/// Parse a codeblock. Accounts for >3 backticks and language specification +fn parse_codeblock(buf: &[u8]) -> Parsed<'_> { + // account for ````code```` style + let seps = buf.iter().take_while(|ch| **ch == b'`').count(); + let end_sep = &buf[..seps]; + let mut working = &buf[seps..]; + + // Handle "````rust" style language specifications + let next_ws_idx = working.iter().take_while(|ch| !ch.is_ascii_whitespace()).count(); + + let lang = if next_ws_idx > 0 { + // Munch the lang + let tmp = str::from_utf8(&working[..next_ws_idx]).unwrap(); + working = &working[next_ws_idx..]; + Some(tmp) + } else { + None + }; + + let mut end_pat = vec![b'\n']; + end_pat.extend(end_sep); + + // Find first end pattern with nothing else on its line + let mut found = None; + for idx in (0..working.len()).filter(|idx| working[*idx..].starts_with(&end_pat)) { + let (eol_txt, rest) = parse_to_newline(&working[(idx + end_pat.len())..]); + if !eol_txt.iter().any(u8::is_ascii_whitespace) { + found = Some((&working[..idx], rest)); + break; + } + } + + let (txt, rest) = found.unwrap_or((working, &[])); + let txt = str::from_utf8(txt).unwrap().trim_matches('\n'); + + (MdTree::CodeBlock { txt, lang }, rest) +} + +fn parse_heading(buf: &[u8]) -> ParseResult<'_> { + let level = buf.iter().take_while(|ch| **ch == b'#').count(); + let buf = &buf[level..]; + + if level > 6 || (buf.len() > 1 && !buf[0].is_ascii_whitespace()) { + // Enforce max 6 levels and whitespace following the `##` pattern + return None; + } + + let (txt, rest) = parse_to_newline(&buf[1..]); + let ctx = Context { top_block: false, prev: Prev::Whitespace }; + let stream = parse_recursive(txt, ctx); + + Some((MdTree::Heading(level.try_into().unwrap(), stream), rest)) +} + +/// Bulleted list +fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> { + debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- ")); + let (txt, rest) = get_indented_section(&buf[2..]); + let ctx = Context { top_block: false, prev: Prev::Whitespace }; + let stream = parse_recursive(trim_ascii_start(txt), ctx); + (MdTree::UnorderedListItem(stream), rest) +} + +/// Numbered list +fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> { + let (num, pos) = ord_list_start(buf).unwrap(); // success tested in caller + let (txt, rest) = get_indented_section(&buf[pos..]); + let ctx = Context { top_block: false, prev: Prev::Whitespace }; + let stream = parse_recursive(trim_ascii_start(txt), ctx); + (MdTree::OrderedListItem(num, stream), rest) +} + +/// Find first line that isn't empty or doesn't start with whitespace, that will +/// be our contents +fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) { + let mut end = buf.len(); + for (idx, window) in buf.windows(2).enumerate() { + let &[ch, next_ch] = window else {unreachable!("always 2 elements")}; + if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' { + // End of stream + end = buf.len().saturating_sub(1); + break; + } else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') { + end = idx; + break; + } + } + + (&buf[..end], &buf[end..]) +} + +/// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the +/// parsed number and offset of character after the dot. +fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> { + let pos = buf.iter().take(10).position(|ch| *ch == b'.')?; + let n = str::from_utf8(&buf[..pos]).ok()?; + if !buf.get(pos + 1)?.is_ascii_whitespace() { + return None; + } + n.parse::<u16>().ok().map(|v| (v, pos + 2)) +} + +/// Parse links. `can_be_def` indicates that a link definition is possible (top +/// level, located at the start of a line) +fn parse_any_link(buf: &[u8], can_be_def: bool) -> ParseResult<'_> { + let (bracketed, rest) = parse_with_end_pat(&buf[1..], LNK_E, true)?; + if rest.is_empty() { + return None; + } + + let disp = str::from_utf8(bracketed).unwrap(); + match (can_be_def, rest[0]) { + (true, b':') => { + let (link, tmp) = parse_to_newline(&rest[1..]); + let link = str::from_utf8(link).unwrap().trim(); + Some((MdTree::LinkDef { id: disp, link }, tmp)) + } + (_, b'(') => parse_simple_pat(rest, b"(", b")", ParseOpt::TrimNoEsc, |link| MdTree::Link { + disp, + link, + }), + (_, b'[') => parse_simple_pat(rest, b"[", b"]", ParseOpt::TrimNoEsc, |id| { + MdTree::RefLink { disp, id: Some(id) } + }), + _ => Some((MdTree::RefLink { disp, id: None }, rest)), + } +} + +/// Find and consume an end pattern, return `(match, residual)` +fn parse_with_end_pat<'a>( + buf: &'a [u8], + end_sep: &[u8], + ignore_esc: bool, +) -> Option<(&'a [u8], &'a [u8])> { + // Find positions that start with the end seperator + for idx in (0..buf.len()).filter(|idx| buf[*idx..].starts_with(end_sep)) { + if !ignore_esc && idx > 0 && buf[idx - 1] == b'\\' { + continue; + } + return Some((&buf[..idx], &buf[idx + end_sep.len()..])); + } + None +} + +/// Resturn `(match, residual)` to end of line. The EOL is returned with the +/// residual. +fn parse_to_newline(buf: &[u8]) -> (&[u8], &[u8]) { + buf.iter().position(|ch| *ch == b'\n').map_or((buf, &[]), |pos| buf.split_at(pos)) +} + +/// Take a parsed stream and fix the little things +fn normalize<'a>(MdStream(stream): MdStream<'a>, linkdefs: &mut Vec<MdTree<'a>>) -> MdStream<'a> { + let mut new_stream = Vec::with_capacity(stream.len()); + let new_defs = stream.iter().filter(|tt| matches!(tt, MdTree::LinkDef { .. })); + linkdefs.extend(new_defs.cloned()); + + // Run plaintest expansions on types that need it, call this function on nested types + for item in stream { + match item { + MdTree::PlainText(txt) => expand_plaintext(txt, &mut new_stream, MdTree::PlainText), + MdTree::Strong(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Strong), + MdTree::Emphasis(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Emphasis), + MdTree::Strikethrough(txt) => { + expand_plaintext(txt, &mut new_stream, MdTree::Strikethrough); + } + MdTree::RefLink { disp, id } => new_stream.push(match_reflink(linkdefs, disp, id)), + MdTree::OrderedListItem(n, st) => { + new_stream.push(MdTree::OrderedListItem(n, normalize(st, linkdefs))); + } + MdTree::UnorderedListItem(st) => { + new_stream.push(MdTree::UnorderedListItem(normalize(st, linkdefs))); + } + MdTree::Heading(n, st) => new_stream.push(MdTree::Heading(n, normalize(st, linkdefs))), + _ => new_stream.push(item), + } + } + + // Remove non printing types, duplicate paragraph breaks, and breaks at start/end + new_stream.retain(|x| !matches!(x, MdTree::Comment(_) | MdTree::LinkDef { .. })); + new_stream.dedup_by(|r, l| matches!((r, l), (MdTree::ParagraphBreak, MdTree::ParagraphBreak))); + + if new_stream.first().is_some_and(is_break_ty) { + new_stream.remove(0); + } + if new_stream.last().is_some_and(is_break_ty) { + new_stream.pop(); + } + + // Remove paragraph breaks that shouldn't be there. w[1] is what will be + // removed in these cases. Note that these are the items to keep, not delete + // (for `retain`) + let to_keep: Vec<bool> = new_stream + .windows(3) + .map(|w| { + !((matches!(&w[1], MdTree::ParagraphBreak) + && matches!(should_break(&w[0], &w[2]), BreakRule::Always(1) | BreakRule::Never)) + || (matches!(&w[1], MdTree::PlainText(txt) if txt.trim().is_empty()) + && matches!( + should_break(&w[0], &w[2]), + BreakRule::Always(_) | BreakRule::Never + ))) + }) + .collect(); + let mut iter = iter::once(true).chain(to_keep).chain(iter::once(true)); + new_stream.retain(|_| iter.next().unwrap()); + + // Insert line or paragraph breaks where there should be some + let mut insertions = 0; + let to_insert: Vec<(usize, MdTree<'_>)> = new_stream + .windows(2) + .enumerate() + .filter_map(|(idx, w)| match should_break(&w[0], &w[1]) { + BreakRule::Always(1) => Some((idx, MdTree::LineBreak)), + BreakRule::Always(2) => Some((idx, MdTree::ParagraphBreak)), + _ => None, + }) + .map(|(idx, tt)| { + insertions += 1; + (idx + insertions, tt) + }) + .collect(); + to_insert.into_iter().for_each(|(idx, tt)| new_stream.insert(idx, tt)); + + MdStream(new_stream) +} + +/// Whether two types should or shouldn't have a paragraph break between them +#[derive(Clone, Copy, Debug, PartialEq)] +enum BreakRule { + Always(u8), + Never, + Optional, +} + +/// Blocks that automatically handle their own text wrapping +fn should_break(left: &MdTree<'_>, right: &MdTree<'_>) -> BreakRule { + use MdTree::*; + + match (left, right) { + // Separate these types with a single line + (HorizontalRule, _) + | (_, HorizontalRule) + | (OrderedListItem(_, _), OrderedListItem(_, _)) + | (UnorderedListItem(_), UnorderedListItem(_)) => BreakRule::Always(1), + // Condensed types shouldn't have an extra break on either side + (Comment(_) | ParagraphBreak | Heading(_, _), _) | (_, Comment(_) | ParagraphBreak) => { + BreakRule::Never + } + // Block types should always be separated by full breaks + (CodeBlock { .. } | OrderedListItem(_, _) | UnorderedListItem(_), _) + | (_, CodeBlock { .. } | Heading(_, _) | OrderedListItem(_, _) | UnorderedListItem(_)) => { + BreakRule::Always(2) + } + // Text types may or may not be separated by a break + ( + CodeInline(_) + | Strong(_) + | Emphasis(_) + | Strikethrough(_) + | PlainText(_) + | Link { .. } + | RefLink { .. } + | LinkDef { .. }, + CodeInline(_) + | Strong(_) + | Emphasis(_) + | Strikethrough(_) + | PlainText(_) + | Link { .. } + | RefLink { .. } + | LinkDef { .. }, + ) => BreakRule::Optional, + (LineBreak, _) | (_, LineBreak) => { + unreachable!("should have been removed during deduplication") + } + } +} + +/// Types that indicate some form of break +fn is_break_ty(val: &MdTree<'_>) -> bool { + matches!(val, MdTree::ParagraphBreak | MdTree::LineBreak) + // >1 break between paragraphs acts as a break + || matches!(val, MdTree::PlainText(txt) if txt.trim().is_empty()) +} + +/// Perform tranformations to text. This splits paragraphs, replaces patterns, +/// and corrects newlines. +/// +/// To avoid allocating strings (and using a different heavier tt type), our +/// replace method means split into three and append each. For this reason, any +/// viewer should treat consecutive `PlainText` types as belonging to the same +/// paragraph. +fn expand_plaintext<'a>( + txt: &'a str, + stream: &mut Vec<MdTree<'a>>, + mut f: fn(&'a str) -> MdTree<'a>, +) { + if txt.is_empty() { + return; + } else if txt == "\n" { + if let Some(tt) = stream.last() { + let tmp = MdTree::PlainText(" "); + if should_break(tt, &tmp) == BreakRule::Optional { + stream.push(tmp); + } + } + return; + } + let mut queue1 = Vec::new(); + let mut queue2 = Vec::new(); + let stream_start_len = stream.len(); + for paragraph in txt.split("\n\n") { + if paragraph.is_empty() { + stream.push(MdTree::ParagraphBreak); + continue; + } + let paragraph = trim_extra_ws(paragraph); + + queue1.clear(); + queue1.push(paragraph); + + for (from, to) in REPLACEMENTS { + queue2.clear(); + for item in &queue1 { + for s in item.split(from) { + queue2.extend(&[s, to]); + } + if queue2.len() > 1 { + let _ = queue2.pop(); // remove last unnecessary intersperse + } + } + mem::swap(&mut queue1, &mut queue2); + } + + // Make sure we don't double whitespace + queue1.retain(|s| !s.is_empty()); + for idx in 0..queue1.len() { + queue1[idx] = trim_extra_ws(queue1[idx]); + if idx < queue1.len() - 1 + && queue1[idx].ends_with(char::is_whitespace) + && queue1[idx + 1].starts_with(char::is_whitespace) + { + queue1[idx] = queue1[idx].trim_end(); + } + } + stream.extend(queue1.iter().copied().filter(|txt| !txt.is_empty()).map(&mut f)); + stream.push(MdTree::ParagraphBreak); + } + + if stream.len() - stream_start_len > 1 { + let _ = stream.pop(); // remove last unnecessary intersperse + } +} + +/// Turn reflinks (links with reference IDs) into normal standalone links using +/// listed link definitions +fn match_reflink<'a>(linkdefs: &[MdTree<'a>], disp: &'a str, match_id: Option<&str>) -> MdTree<'a> { + let to_match = match_id.unwrap_or(disp); // Match with the display name if there isn't an id + for def in linkdefs { + if let MdTree::LinkDef { id, link } = def { + if *id == to_match { + return MdTree::Link { disp, link }; + } + } + } + MdTree::Link { disp, link: "" } // link not found +} + +/// If there is more than one whitespace char at start or end, trim the extras +fn trim_extra_ws(mut txt: &str) -> &str { + let start_ws = + txt.bytes().position(|ch| !ch.is_ascii_whitespace()).unwrap_or(txt.len()).saturating_sub(1); + txt = &txt[start_ws..]; + let end_ws = txt + .bytes() + .rev() + .position(|ch| !ch.is_ascii_whitespace()) + .unwrap_or(txt.len()) + .saturating_sub(1); + &txt[..txt.len() - end_ws] +} + +/// If there is more than one whitespace char at start, trim the extras +fn trim_ascii_start(buf: &[u8]) -> &[u8] { + let count = buf.iter().take_while(|ch| ch.is_ascii_whitespace()).count(); + &buf[count..] +} + +#[cfg(test)] +#[path = "tests/parse.rs"] +mod tests; diff --git a/compiler/rustc_errors/src/markdown/term.rs b/compiler/rustc_errors/src/markdown/term.rs new file mode 100644 index 000000000..e45ba6d2c --- /dev/null +++ b/compiler/rustc_errors/src/markdown/term.rs @@ -0,0 +1,189 @@ +use std::cell::Cell; +use std::io::{self, Write}; + +use termcolor::{Buffer, Color, ColorSpec, WriteColor}; + +use crate::markdown::{MdStream, MdTree}; + +const DEFAULT_COLUMN_WIDTH: usize = 140; + +thread_local! { + /// Track the position of viewable characters in our buffer + static CURSOR: Cell<usize> = Cell::new(0); + /// Width of the terminal + static WIDTH: Cell<usize> = Cell::new(DEFAULT_COLUMN_WIDTH); +} + +/// Print to terminal output to a buffer +pub fn entrypoint(stream: &MdStream<'_>, buf: &mut Buffer) -> io::Result<()> { + #[cfg(not(test))] + if let Some((w, _)) = termize::dimensions() { + WIDTH.with(|c| c.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH))); + } + write_stream(stream, buf, None, 0)?; + buf.write_all(b"\n") +} + +/// Write the buffer, reset to the default style after each +fn write_stream( + MdStream(stream): &MdStream<'_>, + buf: &mut Buffer, + default: Option<&ColorSpec>, + indent: usize, +) -> io::Result<()> { + match default { + Some(c) => buf.set_color(c)?, + None => buf.reset()?, + } + + for tt in stream { + write_tt(tt, buf, indent)?; + if let Some(c) = default { + buf.set_color(c)?; + } + } + + buf.reset()?; + Ok(()) +} + +pub fn write_tt(tt: &MdTree<'_>, buf: &mut Buffer, indent: usize) -> io::Result<()> { + match tt { + MdTree::CodeBlock { txt, lang: _ } => { + buf.set_color(ColorSpec::new().set_dimmed(true))?; + buf.write_all(txt.as_bytes())?; + } + MdTree::CodeInline(txt) => { + buf.set_color(ColorSpec::new().set_dimmed(true))?; + write_wrapping(buf, txt, indent, None)?; + } + MdTree::Strong(txt) => { + buf.set_color(ColorSpec::new().set_bold(true))?; + write_wrapping(buf, txt, indent, None)?; + } + MdTree::Emphasis(txt) => { + buf.set_color(ColorSpec::new().set_italic(true))?; + write_wrapping(buf, txt, indent, None)?; + } + MdTree::Strikethrough(txt) => { + buf.set_color(ColorSpec::new().set_strikethrough(true))?; + write_wrapping(buf, txt, indent, None)?; + } + MdTree::PlainText(txt) => { + write_wrapping(buf, txt, indent, None)?; + } + MdTree::Link { disp, link } => { + write_wrapping(buf, disp, indent, Some(link))?; + } + MdTree::ParagraphBreak => { + buf.write_all(b"\n\n")?; + reset_cursor(); + } + MdTree::LineBreak => { + buf.write_all(b"\n")?; + reset_cursor(); + } + MdTree::HorizontalRule => { + (0..WIDTH.with(Cell::get)).for_each(|_| buf.write_all(b"-").unwrap()); + reset_cursor(); + } + MdTree::Heading(n, stream) => { + let mut cs = ColorSpec::new(); + cs.set_fg(Some(Color::Cyan)); + match n { + 1 => cs.set_intense(true).set_bold(true).set_underline(true), + 2 => cs.set_intense(true).set_underline(true), + 3 => cs.set_intense(true).set_italic(true), + 4.. => cs.set_underline(true).set_italic(true), + 0 => unreachable!(), + }; + write_stream(stream, buf, Some(&cs), 0)?; + buf.write_all(b"\n")?; + } + MdTree::OrderedListItem(n, stream) => { + let base = format!("{n}. "); + write_wrapping(buf, &format!("{base:<4}"), indent, None)?; + write_stream(stream, buf, None, indent + 4)?; + } + MdTree::UnorderedListItem(stream) => { + let base = "* "; + write_wrapping(buf, &format!("{base:<4}"), indent, None)?; + write_stream(stream, buf, None, indent + 4)?; + } + // Patterns popped in previous step + MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(), + } + + buf.reset()?; + + Ok(()) +} + +/// End of that block, just wrap the line +fn reset_cursor() { + CURSOR.with(|cur| cur.set(0)); +} + +/// Change to be generic on Write for testing. If we have a link URL, we don't +/// count the extra tokens to make it clickable. +fn write_wrapping<B: io::Write>( + buf: &mut B, + text: &str, + indent: usize, + link_url: Option<&str>, +) -> io::Result<()> { + let ind_ws = &b" "[..indent]; + let mut to_write = text; + if let Some(url) = link_url { + // This is a nonprinting prefix so we don't increment our cursor + write!(buf, "\x1b]8;;{url}\x1b\\")?; + } + CURSOR.with(|cur| { + loop { + if cur.get() == 0 { + buf.write_all(ind_ws)?; + cur.set(indent); + } + let ch_count = WIDTH.with(Cell::get) - cur.get(); + let mut iter = to_write.char_indices(); + let Some((end_idx, _ch)) = iter.nth(ch_count) else { + // Write entire line + buf.write_all(to_write.as_bytes())?; + cur.set(cur.get()+to_write.chars().count()); + break; + }; + + if let Some((break_idx, ch)) = to_write[..end_idx] + .char_indices() + .rev() + .find(|(_idx, ch)| ch.is_whitespace() || ['_', '-'].contains(ch)) + { + // Found whitespace to break at + if ch.is_whitespace() { + writeln!(buf, "{}", &to_write[..break_idx])?; + to_write = to_write[break_idx..].trim_start(); + } else { + // Break at a `-` or `_` separator + writeln!(buf, "{}", &to_write.get(..break_idx + 1).unwrap_or(to_write))?; + to_write = to_write.get(break_idx + 1..).unwrap_or_default().trim_start(); + } + } else { + // No whitespace, we need to just split + let ws_idx = + iter.find(|(_, ch)| ch.is_whitespace()).map_or(to_write.len(), |(idx, _)| idx); + writeln!(buf, "{}", &to_write[..ws_idx])?; + to_write = to_write.get(ws_idx + 1..).map_or("", str::trim_start); + } + cur.set(0); + } + if link_url.is_some() { + buf.write_all(b"\x1b]8;;\x1b\\")?; + } + + Ok(()) + }) +} + +#[cfg(test)] +#[path = "tests/term.rs"] +mod tests; diff --git a/compiler/rustc_errors/src/markdown/tests/input.md b/compiler/rustc_errors/src/markdown/tests/input.md new file mode 100644 index 000000000..7d207fc42 --- /dev/null +++ b/compiler/rustc_errors/src/markdown/tests/input.md @@ -0,0 +1,50 @@ +# H1 Heading [with a link][remote-link] + +H1 content: **some words in bold** and `so does inline code` + +## H2 Heading + +H2 content: _some words in italic_ + +### H3 Heading + +H3 content: ~~strikethrough~~ text + +#### H4 Heading + +H4 content: A [simple link](https://docs.rs) and a [remote-link]. + +--- + +A section break was above. We can also do paragraph breaks: + +(new paragraph) and unordered lists: + +- Item 1 in `code` +- Item 2 in _italics_ + +Or ordered: + +1. Item 1 in **bold** +2. Item 2 with some long lines that should wrap: Lorem ipsum dolor sit amet, + consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus elit quam, + pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan + in cursus sit amet, dictum a nunc. Suspendisse aliquet, lorem eu eleifend + accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem. + +--- + +## Code + +Both `inline code` and code blocks are supported: + +```rust +/// A rust enum +#[derive(Debug, PartialEq, Clone)] +enum Foo { + /// Start of line + Bar +} +``` + +[remote-link]: http://docs.rs diff --git a/compiler/rustc_errors/src/markdown/tests/output.stdout b/compiler/rustc_errors/src/markdown/tests/output.stdout new file mode 100644 index 000000000..23c60d5c3 --- /dev/null +++ b/compiler/rustc_errors/src/markdown/tests/output.stdout @@ -0,0 +1,35 @@ +[0m[0m[1m[4m[38;5;14mH1 Heading [0m[0m[1m[4m[38;5;14m]8;;http://docs.rs\with a link]8;;\[0m[0m[1m[4m[38;5;14m[0m +[0mH1 content: [0m[0m[1msome words in bold[0m and [0m[0m[2mso does inline code[0m + +[0m[0m[4m[38;5;14mH2 Heading[0m[0m[4m[38;5;14m[0m +[0mH2 content: [0m[0m[3msome words in italic[0m + +[0m[0m[3m[38;5;14mH3 Heading[0m[0m[3m[38;5;14m[0m +[0mH3 content: [0m[0m[9mstrikethrough[0m text[0m + +[0m[0m[3m[4m[36mH4 Heading[0m[0m[3m[4m[36m[0m +[0mH4 content: A [0m]8;;https://docs.rs\simple link]8;;\[0m and a [0m]8;;http://docs.rs\remote-link]8;;\[0m.[0m +[0m--------------------------------------------------------------------------------------------------------------------------------------------[0m +[0mA section break was above. We can also do paragraph breaks:[0m + +[0m(new paragraph) and unordered lists:[0m + +[0m* [0mItem 1 in [0m[0m[2mcode[0m[0m[0m +[0m* [0mItem 2 in [0m[0m[3mitalics[0m[0m[0m + +[0mOr ordered:[0m + +[0m1. [0mItem 1 in [0m[0m[1mbold[0m[0m[0m +[0m2. [0mItem 2 with some long lines that should wrap: Lorem ipsum dolor sit amet,[0m consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus + elit quam,[0m pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan[0m in cursus sit amet, dictum a nunc. Suspendisse + aliquet, lorem eu eleifend[0m accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.[0m[0m[0m +[0m--------------------------------------------------------------------------------------------------------------------------------------------[0m +[0m[0m[4m[38;5;14mCode[0m[0m[4m[38;5;14m[0m +[0mBoth [0m[0m[2minline code[0m and code blocks are supported:[0m + +[0m[0m[2m/// A rust enum +#[derive(Debug, PartialEq, Clone)] +enum Foo { + /// Start of line + Bar +}[0m[0m diff --git a/compiler/rustc_errors/src/markdown/tests/parse.rs b/compiler/rustc_errors/src/markdown/tests/parse.rs new file mode 100644 index 000000000..e39e8c89b --- /dev/null +++ b/compiler/rustc_errors/src/markdown/tests/parse.rs @@ -0,0 +1,312 @@ +use super::*; +use ParseOpt as PO; + +#[test] +fn test_parse_simple() { + let buf = "**abcd** rest"; + let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap(); + assert_eq!(t, MdTree::Strong("abcd")); + assert_eq!(r, b" rest"); + + // Escaping should fail + let buf = r"**abcd\** rest"; + let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong); + assert!(res.is_none()); +} + +#[test] +fn test_parse_comment() { + let opt = PO::TrimNoEsc; + let buf = "<!-- foobar! -->rest"; + let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap(); + assert_eq!(t, MdTree::Comment("foobar!")); + assert_eq!(r, b"rest"); + + let buf = r"<!-- foobar! \-->rest"; + let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap(); + assert_eq!(t, MdTree::Comment(r"foobar! \")); + assert_eq!(r, b"rest"); +} + +#[test] +fn test_parse_heading() { + let buf1 = "# Top level\nrest"; + let (t, r) = parse_heading(buf1.as_bytes()).unwrap(); + assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Top level")].into())); + assert_eq!(r, b"\nrest"); + + let buf1 = "# Empty"; + let (t, r) = parse_heading(buf1.as_bytes()).unwrap(); + assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Empty")].into())); + assert_eq!(r, b""); + + // Combo + let buf2 = "### Top `level` _woo_\nrest"; + let (t, r) = parse_heading(buf2.as_bytes()).unwrap(); + assert_eq!( + t, + MdTree::Heading( + 3, + vec![ + MdTree::PlainText("Top "), + MdTree::CodeInline("level"), + MdTree::PlainText(" "), + MdTree::Emphasis("woo"), + ] + .into() + ) + ); + assert_eq!(r, b"\nrest"); +} + +#[test] +fn test_parse_code_inline() { + let buf1 = "`abcd` rest"; + let (t, r) = parse_codeinline(buf1.as_bytes()).unwrap(); + assert_eq!(t, MdTree::CodeInline("abcd")); + assert_eq!(r, b" rest"); + + // extra backticks, newline + let buf2 = "```ab\ncd``` rest"; + let (t, r) = parse_codeinline(buf2.as_bytes()).unwrap(); + assert_eq!(t, MdTree::CodeInline("ab\ncd")); + assert_eq!(r, b" rest"); + + // test no escaping + let buf3 = r"`abcd\` rest"; + let (t, r) = parse_codeinline(buf3.as_bytes()).unwrap(); + assert_eq!(t, MdTree::CodeInline(r"abcd\")); + assert_eq!(r, b" rest"); +} + +#[test] +fn test_parse_code_block() { + let buf1 = "```rust\ncode\ncode\n```\nleftovers"; + let (t, r) = parse_codeblock(buf1.as_bytes()); + assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode", lang: Some("rust") }); + assert_eq!(r, b"\nleftovers"); + + let buf2 = "`````\ncode\ncode````\n`````\nleftovers"; + let (t, r) = parse_codeblock(buf2.as_bytes()); + assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode````", lang: None }); + assert_eq!(r, b"\nleftovers"); +} + +#[test] +fn test_parse_link() { + let simple = "[see here](docs.rs) other"; + let (t, r) = parse_any_link(simple.as_bytes(), false).unwrap(); + assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" }); + assert_eq!(r, b" other"); + + let simple_toplevel = "[see here](docs.rs) other"; + let (t, r) = parse_any_link(simple_toplevel.as_bytes(), true).unwrap(); + assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" }); + assert_eq!(r, b" other"); + + let reference = "[see here] other"; + let (t, r) = parse_any_link(reference.as_bytes(), true).unwrap(); + assert_eq!(t, MdTree::RefLink { disp: "see here", id: None }); + assert_eq!(r, b" other"); + + let reference_full = "[see here][docs-rs] other"; + let (t, r) = parse_any_link(reference_full.as_bytes(), false).unwrap(); + assert_eq!(t, MdTree::RefLink { disp: "see here", id: Some("docs-rs") }); + assert_eq!(r, b" other"); + + let reference_def = "[see here]: docs.rs\nother"; + let (t, r) = parse_any_link(reference_def.as_bytes(), true).unwrap(); + assert_eq!(t, MdTree::LinkDef { id: "see here", link: "docs.rs" }); + assert_eq!(r, b"\nother"); +} + +const IND1: &str = r"test standard + ind + ind2 +not ind"; +const IND2: &str = r"test end of stream + 1 + 2 +"; +const IND3: &str = r"test empty lines + 1 + 2 + +not ind"; + +#[test] +fn test_indented_section() { + let (t, r) = get_indented_section(IND1.as_bytes()); + assert_eq!(str::from_utf8(t).unwrap(), "test standard\n ind\n ind2"); + assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind"); + + let (txt, rest) = get_indented_section(IND2.as_bytes()); + assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n 1\n 2"); + assert_eq!(str::from_utf8(rest).unwrap(), "\n"); + + let (txt, rest) = get_indented_section(IND3.as_bytes()); + assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n 1\n 2"); + assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind"); +} + +const HBT: &str = r"# Heading + +content"; + +#[test] +fn test_heading_breaks() { + let expected = vec![ + MdTree::Heading(1, vec![MdTree::PlainText("Heading")].into()), + MdTree::PlainText("content"), + ] + .into(); + let res = entrypoint(HBT); + assert_eq!(res, expected); +} + +const NL1: &str = r"start + +end"; +const NL2: &str = r"start + + +end"; +const NL3: &str = r"start + + + +end"; + +#[test] +fn test_newline_breaks() { + let expected = + vec![MdTree::PlainText("start"), MdTree::ParagraphBreak, MdTree::PlainText("end")].into(); + for (idx, check) in [NL1, NL2, NL3].iter().enumerate() { + let res = entrypoint(check); + assert_eq!(res, expected, "failed {idx}"); + } +} + +const WRAP: &str = "plain _italics +italics_"; + +#[test] +fn test_wrap_pattern() { + let expected = vec![ + MdTree::PlainText("plain "), + MdTree::Emphasis("italics"), + MdTree::Emphasis(" "), + MdTree::Emphasis("italics"), + ] + .into(); + let res = entrypoint(WRAP); + assert_eq!(res, expected); +} + +const WRAP_NOTXT: &str = r"_italics_ +**bold**"; + +#[test] +fn test_wrap_notxt() { + let expected = + vec![MdTree::Emphasis("italics"), MdTree::PlainText(" "), MdTree::Strong("bold")].into(); + let res = entrypoint(WRAP_NOTXT); + assert_eq!(res, expected); +} + +const MIXED_LIST: &str = r"start +- _italics item_ +<!-- comment --> +- **bold item** + second line [link1](foobar1) + third line [link2][link-foo] +- :crab: + extra indent +end +[link-foo]: foobar2 +"; + +#[test] +fn test_list() { + let expected = vec![ + MdTree::PlainText("start"), + MdTree::ParagraphBreak, + MdTree::UnorderedListItem(vec![MdTree::Emphasis("italics item")].into()), + MdTree::LineBreak, + MdTree::UnorderedListItem( + vec![ + MdTree::Strong("bold item"), + MdTree::PlainText(" second line "), + MdTree::Link { disp: "link1", link: "foobar1" }, + MdTree::PlainText(" third line "), + MdTree::Link { disp: "link2", link: "foobar2" }, + ] + .into(), + ), + MdTree::LineBreak, + MdTree::UnorderedListItem( + vec![MdTree::PlainText("🦀"), MdTree::PlainText(" extra indent")].into(), + ), + MdTree::ParagraphBreak, + MdTree::PlainText("end"), + ] + .into(); + let res = entrypoint(MIXED_LIST); + assert_eq!(res, expected); +} + +const SMOOSHED: &str = r#" +start +### heading +1. ordered item +```rust +println!("Hello, world!"); +``` +`inline` +``end`` +"#; + +#[test] +fn test_without_breaks() { + let expected = vec![ + MdTree::PlainText("start"), + MdTree::ParagraphBreak, + MdTree::Heading(3, vec![MdTree::PlainText("heading")].into()), + MdTree::OrderedListItem(1, vec![MdTree::PlainText("ordered item")].into()), + MdTree::ParagraphBreak, + MdTree::CodeBlock { txt: r#"println!("Hello, world!");"#, lang: Some("rust") }, + MdTree::ParagraphBreak, + MdTree::CodeInline("inline"), + MdTree::PlainText(" "), + MdTree::CodeInline("end"), + ] + .into(); + let res = entrypoint(SMOOSHED); + assert_eq!(res, expected); +} + +const CODE_STARTLINE: &str = r#" +start +`code` +middle +`more code` +end +"#; + +#[test] +fn test_code_at_start() { + let expected = vec![ + MdTree::PlainText("start"), + MdTree::PlainText(" "), + MdTree::CodeInline("code"), + MdTree::PlainText(" "), + MdTree::PlainText("middle"), + MdTree::PlainText(" "), + MdTree::CodeInline("more code"), + MdTree::PlainText(" "), + MdTree::PlainText("end"), + ] + .into(); + let res = entrypoint(CODE_STARTLINE); + assert_eq!(res, expected); +} diff --git a/compiler/rustc_errors/src/markdown/tests/term.rs b/compiler/rustc_errors/src/markdown/tests/term.rs new file mode 100644 index 000000000..3b31c6d62 --- /dev/null +++ b/compiler/rustc_errors/src/markdown/tests/term.rs @@ -0,0 +1,90 @@ +use std::io::BufWriter; +use std::path::PathBuf; +use termcolor::{BufferWriter, ColorChoice}; + +use super::*; +use crate::markdown::MdStream; + +const INPUT: &str = include_str!("input.md"); +const OUTPUT_PATH: &[&str] = &[env!("CARGO_MANIFEST_DIR"), "src","markdown","tests","output.stdout"]; + +const TEST_WIDTH: usize = 80; + +// We try to make some words long to create corner cases +const TXT: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit. +Fusce-id-urna-sollicitudin, pharetra nisl nec, lobortis tellus. In at +metus hendrerit, tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, +porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor quis +dolor non venenatis. Aliquam ut. "; + +const WRAPPED: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna- +sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit, +tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-sed, +ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam ut. Lorem + ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna- + sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit, + tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc- + sed, ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam + ut. Sample link lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, +consecteturadipiscingelit. Fusce-id-urna-sollicitudin, pharetra nisl nec, +lobortis tellus. In at metus hendrerit, tincidunteratvel, ultrices turpis. +Curabitur_risus_sapien, porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor +quis dolor non venenatis. Aliquam ut. "; + +#[test] +fn test_wrapping_write() { + WIDTH.with(|w| w.set(TEST_WIDTH)); + let mut buf = BufWriter::new(Vec::new()); + let txt = TXT.replace("-\n","-").replace("_\n","_").replace('\n', " ").replace(" ", ""); + write_wrapping(&mut buf, &txt, 0, None).unwrap(); + write_wrapping(&mut buf, &txt, 4, None).unwrap(); + write_wrapping( + &mut buf, + "Sample link lorem ipsum dolor sit amet. ", + 4, + Some("link-address-placeholder"), + ) + .unwrap(); + write_wrapping(&mut buf, &txt, 0, None).unwrap(); + let out = String::from_utf8(buf.into_inner().unwrap()).unwrap(); + let out = out + .replace("\x1b\\", "") + .replace('\x1b', "") + .replace("]8;;", "") + .replace("link-address-placeholder", ""); + + for line in out.lines() { + assert!(line.len() <= TEST_WIDTH, "line length\n'{line}'") + } + + assert_eq!(out, WRAPPED); +} + +#[test] +fn test_output() { + // Capture `--bless` when run via ./x + let bless = std::env::var("RUSTC_BLESS").unwrap_or_default() == "1"; + let ast = MdStream::parse_str(INPUT); + let bufwtr = BufferWriter::stderr(ColorChoice::Always); + let mut buffer = bufwtr.buffer(); + ast.write_termcolor_buf(&mut buffer).unwrap(); + + let mut blessed = PathBuf::new(); + blessed.extend(OUTPUT_PATH); + + if bless { + std::fs::write(&blessed, buffer.into_inner()).unwrap(); + eprintln!("blessed output at {}", blessed.display()); + } else { + let output = buffer.into_inner(); + if std::fs::read(blessed).unwrap() != output { + // hack: I don't know any way to write bytes to the captured stdout + // that cargo test uses + let mut out = std::io::stdout(); + out.write_all(b"\n\nMarkdown output did not match. Expected:\n").unwrap(); + out.write_all(&output).unwrap(); + out.write_all(b"\n\n").unwrap(); + panic!("markdown output mismatch"); + } + } +} |