summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_errors/src/markdown
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
commitdc0db358abe19481e475e10c32149b53370f1a1c (patch)
treeab8ce99c4b255ce46f99ef402c27916055b899ee /compiler/rustc_errors/src/markdown
parentReleasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff)
downloadrustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz
rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_errors/src/markdown')
-rw-r--r--compiler/rustc_errors/src/markdown/mod.rs76
-rw-r--r--compiler/rustc_errors/src/markdown/parse.rs588
-rw-r--r--compiler/rustc_errors/src/markdown/term.rs189
-rw-r--r--compiler/rustc_errors/src/markdown/tests/input.md50
-rw-r--r--compiler/rustc_errors/src/markdown/tests/output.stdout35
-rw-r--r--compiler/rustc_errors/src/markdown/tests/parse.rs312
-rw-r--r--compiler/rustc_errors/src/markdown/tests/term.rs90
7 files changed, 1340 insertions, 0 deletions
diff --git a/compiler/rustc_errors/src/markdown/mod.rs b/compiler/rustc_errors/src/markdown/mod.rs
new file mode 100644
index 000000000..53b766dfc
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/mod.rs
@@ -0,0 +1,76 @@
+//! A simple markdown parser that can write formatted text to the terminal
+//!
+//! Entrypoint is `MdStream::parse_str(...)`
+use std::io;
+
+use termcolor::{Buffer, BufferWriter, ColorChoice};
+mod parse;
+mod term;
+
+/// An AST representation of a Markdown document
+#[derive(Clone, Debug, Default, PartialEq)]
+pub struct MdStream<'a>(Vec<MdTree<'a>>);
+
+impl<'a> MdStream<'a> {
+ /// Parse a markdown string to a tokenstream
+ #[must_use]
+ pub fn parse_str(s: &str) -> MdStream<'_> {
+ parse::entrypoint(s)
+ }
+
+ /// Write formatted output to a termcolor buffer
+ pub fn write_termcolor_buf(&self, buf: &mut Buffer) -> io::Result<()> {
+ term::entrypoint(self, buf)
+ }
+}
+
+/// Create a termcolor buffer with the `Always` color choice
+pub fn create_stdout_bufwtr() -> BufferWriter {
+ BufferWriter::stdout(ColorChoice::Always)
+}
+
+/// A single tokentree within a Markdown document
+#[derive(Clone, Debug, PartialEq)]
+pub enum MdTree<'a> {
+ /// Leaf types
+ Comment(&'a str),
+ CodeBlock {
+ txt: &'a str,
+ lang: Option<&'a str>,
+ },
+ CodeInline(&'a str),
+ Strong(&'a str),
+ Emphasis(&'a str),
+ Strikethrough(&'a str),
+ PlainText(&'a str),
+ /// [Foo](www.foo.com) or simple anchor <www.foo.com>
+ Link {
+ disp: &'a str,
+ link: &'a str,
+ },
+ /// `[Foo link][ref]`
+ RefLink {
+ disp: &'a str,
+ id: Option<&'a str>,
+ },
+ /// [ref]: www.foo.com
+ LinkDef {
+ id: &'a str,
+ link: &'a str,
+ },
+ /// Break bewtween two paragraphs (double `\n`), not directly parsed but
+ /// added later
+ ParagraphBreak,
+ /// Break bewtween two lines (single `\n`)
+ LineBreak,
+ HorizontalRule,
+ Heading(u8, MdStream<'a>),
+ OrderedListItem(u16, MdStream<'a>),
+ UnorderedListItem(MdStream<'a>),
+}
+
+impl<'a> From<Vec<MdTree<'a>>> for MdStream<'a> {
+ fn from(value: Vec<MdTree<'a>>) -> Self {
+ Self(value)
+ }
+}
diff --git a/compiler/rustc_errors/src/markdown/parse.rs b/compiler/rustc_errors/src/markdown/parse.rs
new file mode 100644
index 000000000..362a451fd
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/parse.rs
@@ -0,0 +1,588 @@
+use crate::markdown::{MdStream, MdTree};
+use std::{iter, mem, str};
+
+/// Short aliases that we can use in match patterns. If an end pattern is not
+/// included, this type may be variable
+const ANC_E: &[u8] = b">";
+const ANC_S: &[u8] = b"<";
+const BRK: &[u8] = b"---";
+const CBK: &[u8] = b"```";
+const CIL: &[u8] = b"`";
+const CMT_E: &[u8] = b"-->";
+const CMT_S: &[u8] = b"<!--";
+const EMP: &[u8] = b"_";
+const HDG: &[u8] = b"#";
+const LNK_CHARS: &str = "$-_.+!*'()/&?=:%";
+const LNK_E: &[u8] = b"]";
+const LNK_S: &[u8] = b"[";
+const STG: &[u8] = b"**";
+const STK: &[u8] = b"~~";
+const UL1: &[u8] = b"* ";
+const UL2: &[u8] = b"- ";
+
+/// Pattern replacements
+const REPLACEMENTS: &[(&str, &str)] = &[
+ ("(c)", "©"),
+ ("(C)", "©"),
+ ("(r)", "®"),
+ ("(R)", "®"),
+ ("(tm)", "™"),
+ ("(TM)", "™"),
+ (":crab:", "🦀"),
+ ("\n", " "),
+];
+
+/// `(extracted, remaining)`
+type Parsed<'a> = (MdTree<'a>, &'a [u8]);
+/// Output of a parse function
+type ParseResult<'a> = Option<Parsed<'a>>;
+
+/// Parsing context
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct Context {
+ /// If true, we are at a the topmost level (not recursing a nested tt)
+ top_block: bool,
+ /// Previous character
+ prev: Prev,
+}
+
+/// Character class preceding this one
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Prev {
+ Newline,
+ /// Whitespace that is not a newline
+ Whitespace,
+ Escape,
+ Any,
+}
+
+impl Default for Context {
+ /// Most common setting for non top-level parsing: not top block, not at
+ /// line start (yes leading whitespace, not escaped)
+ fn default() -> Self {
+ Self { top_block: false, prev: Prev::Whitespace }
+ }
+}
+
+/// Flags to simple parser function
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum ParseOpt {
+ /// Ignore escapes before closing pattern, trim content
+ TrimNoEsc,
+ None,
+}
+
+/// Parse a buffer
+pub fn entrypoint(txt: &str) -> MdStream<'_> {
+ let ctx = Context { top_block: true, prev: Prev::Newline };
+ normalize(parse_recursive(txt.trim().as_bytes(), ctx), &mut Vec::new())
+}
+
+/// Parse a buffer with specified context
+fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
+ use ParseOpt as Po;
+ use Prev::{Escape, Newline, Whitespace};
+
+ let mut stream: Vec<MdTree<'a>> = Vec::new();
+ let Context { top_block: top_blk, mut prev } = ctx;
+
+ // wip_buf is our entire unprocessed (unpushed) buffer, loop_buf is our to
+ // check buffer that shrinks with each loop
+ let mut wip_buf = buf;
+ let mut loop_buf = wip_buf;
+
+ while !loop_buf.is_empty() {
+ let next_prev = match loop_buf[0] {
+ b'\n' => Newline,
+ b'\\' => Escape,
+ x if x.is_ascii_whitespace() => Whitespace,
+ _ => Prev::Any,
+ };
+
+ let res: ParseResult<'_> = match (top_blk, prev) {
+ (_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => {
+ parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment)
+ }
+ (true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)),
+ (_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
+ (true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf),
+ (true, Newline) if loop_buf.starts_with(BRK) => {
+ Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1))
+ }
+ (_, Newline | Whitespace) if loop_buf.starts_with(EMP) => {
+ parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis)
+ }
+ (_, Newline | Whitespace) if loop_buf.starts_with(STG) => {
+ parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong)
+ }
+ (_, Newline | Whitespace) if loop_buf.starts_with(STK) => {
+ parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough)
+ }
+ (_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => {
+ let tt_fn = |link| MdTree::Link { disp: link, link };
+ let ret = parse_simple_pat(loop_buf, ANC_S, ANC_E, Po::None, tt_fn);
+ match ret {
+ Some((MdTree::Link { disp, .. }, _))
+ if disp.chars().all(|ch| LNK_CHARS.contains(ch)) =>
+ {
+ ret
+ }
+ _ => None,
+ }
+ }
+ (_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => {
+ Some(parse_unordered_li(loop_buf))
+ }
+ (_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)),
+ (_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => {
+ parse_any_link(loop_buf, top_blk && prev == Prev::Newline)
+ }
+ (_, Escape | _) => None,
+ };
+
+ if let Some((tree, rest)) = res {
+ // We found something: push our WIP and then push the found tree
+ let prev_buf = &wip_buf[..(wip_buf.len() - loop_buf.len())];
+ if !prev_buf.is_empty() {
+ let prev_str = str::from_utf8(prev_buf).unwrap();
+ stream.push(MdTree::PlainText(prev_str));
+ }
+ stream.push(tree);
+
+ wip_buf = rest;
+ loop_buf = rest;
+ } else {
+ // Just move on to the next character
+ loop_buf = &loop_buf[1..];
+ // If we are at the end and haven't found anything, just push plain text
+ if loop_buf.is_empty() && !wip_buf.is_empty() {
+ let final_str = str::from_utf8(wip_buf).unwrap();
+ stream.push(MdTree::PlainText(final_str));
+ }
+ };
+
+ prev = next_prev;
+ }
+
+ MdStream(stream)
+}
+
+/// The simplest kind of patterns: data within start and end patterns
+fn parse_simple_pat<'a, F>(
+ buf: &'a [u8],
+ start_pat: &[u8],
+ end_pat: &[u8],
+ opts: ParseOpt,
+ create_tt: F,
+) -> ParseResult<'a>
+where
+ F: FnOnce(&'a str) -> MdTree<'a>,
+{
+ let ignore_esc = matches!(opts, ParseOpt::TrimNoEsc);
+ let trim = matches!(opts, ParseOpt::TrimNoEsc);
+ let (txt, rest) = parse_with_end_pat(&buf[start_pat.len()..], end_pat, ignore_esc)?;
+ let mut txt = str::from_utf8(txt).unwrap();
+ if trim {
+ txt = txt.trim();
+ }
+ Some((create_tt(txt), rest))
+}
+
+/// Parse backtick-wrapped inline code. Accounts for >1 backtick sets
+fn parse_codeinline(buf: &[u8]) -> ParseResult<'_> {
+ let seps = buf.iter().take_while(|ch| **ch == b'`').count();
+ let (txt, rest) = parse_with_end_pat(&buf[seps..], &buf[..seps], true)?;
+ Some((MdTree::CodeInline(str::from_utf8(txt).unwrap()), rest))
+}
+
+/// Parse a codeblock. Accounts for >3 backticks and language specification
+fn parse_codeblock(buf: &[u8]) -> Parsed<'_> {
+ // account for ````code```` style
+ let seps = buf.iter().take_while(|ch| **ch == b'`').count();
+ let end_sep = &buf[..seps];
+ let mut working = &buf[seps..];
+
+ // Handle "````rust" style language specifications
+ let next_ws_idx = working.iter().take_while(|ch| !ch.is_ascii_whitespace()).count();
+
+ let lang = if next_ws_idx > 0 {
+ // Munch the lang
+ let tmp = str::from_utf8(&working[..next_ws_idx]).unwrap();
+ working = &working[next_ws_idx..];
+ Some(tmp)
+ } else {
+ None
+ };
+
+ let mut end_pat = vec![b'\n'];
+ end_pat.extend(end_sep);
+
+ // Find first end pattern with nothing else on its line
+ let mut found = None;
+ for idx in (0..working.len()).filter(|idx| working[*idx..].starts_with(&end_pat)) {
+ let (eol_txt, rest) = parse_to_newline(&working[(idx + end_pat.len())..]);
+ if !eol_txt.iter().any(u8::is_ascii_whitespace) {
+ found = Some((&working[..idx], rest));
+ break;
+ }
+ }
+
+ let (txt, rest) = found.unwrap_or((working, &[]));
+ let txt = str::from_utf8(txt).unwrap().trim_matches('\n');
+
+ (MdTree::CodeBlock { txt, lang }, rest)
+}
+
+fn parse_heading(buf: &[u8]) -> ParseResult<'_> {
+ let level = buf.iter().take_while(|ch| **ch == b'#').count();
+ let buf = &buf[level..];
+
+ if level > 6 || (buf.len() > 1 && !buf[0].is_ascii_whitespace()) {
+ // Enforce max 6 levels and whitespace following the `##` pattern
+ return None;
+ }
+
+ let (txt, rest) = parse_to_newline(&buf[1..]);
+ let ctx = Context { top_block: false, prev: Prev::Whitespace };
+ let stream = parse_recursive(txt, ctx);
+
+ Some((MdTree::Heading(level.try_into().unwrap(), stream), rest))
+}
+
+/// Bulleted list
+fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> {
+ debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- "));
+ let (txt, rest) = get_indented_section(&buf[2..]);
+ let ctx = Context { top_block: false, prev: Prev::Whitespace };
+ let stream = parse_recursive(trim_ascii_start(txt), ctx);
+ (MdTree::UnorderedListItem(stream), rest)
+}
+
+/// Numbered list
+fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> {
+ let (num, pos) = ord_list_start(buf).unwrap(); // success tested in caller
+ let (txt, rest) = get_indented_section(&buf[pos..]);
+ let ctx = Context { top_block: false, prev: Prev::Whitespace };
+ let stream = parse_recursive(trim_ascii_start(txt), ctx);
+ (MdTree::OrderedListItem(num, stream), rest)
+}
+
+/// Find first line that isn't empty or doesn't start with whitespace, that will
+/// be our contents
+fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) {
+ let mut end = buf.len();
+ for (idx, window) in buf.windows(2).enumerate() {
+ let &[ch, next_ch] = window else {unreachable!("always 2 elements")};
+ if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' {
+ // End of stream
+ end = buf.len().saturating_sub(1);
+ break;
+ } else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') {
+ end = idx;
+ break;
+ }
+ }
+
+ (&buf[..end], &buf[end..])
+}
+
+/// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the
+/// parsed number and offset of character after the dot.
+fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> {
+ let pos = buf.iter().take(10).position(|ch| *ch == b'.')?;
+ let n = str::from_utf8(&buf[..pos]).ok()?;
+ if !buf.get(pos + 1)?.is_ascii_whitespace() {
+ return None;
+ }
+ n.parse::<u16>().ok().map(|v| (v, pos + 2))
+}
+
+/// Parse links. `can_be_def` indicates that a link definition is possible (top
+/// level, located at the start of a line)
+fn parse_any_link(buf: &[u8], can_be_def: bool) -> ParseResult<'_> {
+ let (bracketed, rest) = parse_with_end_pat(&buf[1..], LNK_E, true)?;
+ if rest.is_empty() {
+ return None;
+ }
+
+ let disp = str::from_utf8(bracketed).unwrap();
+ match (can_be_def, rest[0]) {
+ (true, b':') => {
+ let (link, tmp) = parse_to_newline(&rest[1..]);
+ let link = str::from_utf8(link).unwrap().trim();
+ Some((MdTree::LinkDef { id: disp, link }, tmp))
+ }
+ (_, b'(') => parse_simple_pat(rest, b"(", b")", ParseOpt::TrimNoEsc, |link| MdTree::Link {
+ disp,
+ link,
+ }),
+ (_, b'[') => parse_simple_pat(rest, b"[", b"]", ParseOpt::TrimNoEsc, |id| {
+ MdTree::RefLink { disp, id: Some(id) }
+ }),
+ _ => Some((MdTree::RefLink { disp, id: None }, rest)),
+ }
+}
+
+/// Find and consume an end pattern, return `(match, residual)`
+fn parse_with_end_pat<'a>(
+ buf: &'a [u8],
+ end_sep: &[u8],
+ ignore_esc: bool,
+) -> Option<(&'a [u8], &'a [u8])> {
+ // Find positions that start with the end seperator
+ for idx in (0..buf.len()).filter(|idx| buf[*idx..].starts_with(end_sep)) {
+ if !ignore_esc && idx > 0 && buf[idx - 1] == b'\\' {
+ continue;
+ }
+ return Some((&buf[..idx], &buf[idx + end_sep.len()..]));
+ }
+ None
+}
+
+/// Resturn `(match, residual)` to end of line. The EOL is returned with the
+/// residual.
+fn parse_to_newline(buf: &[u8]) -> (&[u8], &[u8]) {
+ buf.iter().position(|ch| *ch == b'\n').map_or((buf, &[]), |pos| buf.split_at(pos))
+}
+
+/// Take a parsed stream and fix the little things
+fn normalize<'a>(MdStream(stream): MdStream<'a>, linkdefs: &mut Vec<MdTree<'a>>) -> MdStream<'a> {
+ let mut new_stream = Vec::with_capacity(stream.len());
+ let new_defs = stream.iter().filter(|tt| matches!(tt, MdTree::LinkDef { .. }));
+ linkdefs.extend(new_defs.cloned());
+
+ // Run plaintest expansions on types that need it, call this function on nested types
+ for item in stream {
+ match item {
+ MdTree::PlainText(txt) => expand_plaintext(txt, &mut new_stream, MdTree::PlainText),
+ MdTree::Strong(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Strong),
+ MdTree::Emphasis(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Emphasis),
+ MdTree::Strikethrough(txt) => {
+ expand_plaintext(txt, &mut new_stream, MdTree::Strikethrough);
+ }
+ MdTree::RefLink { disp, id } => new_stream.push(match_reflink(linkdefs, disp, id)),
+ MdTree::OrderedListItem(n, st) => {
+ new_stream.push(MdTree::OrderedListItem(n, normalize(st, linkdefs)));
+ }
+ MdTree::UnorderedListItem(st) => {
+ new_stream.push(MdTree::UnorderedListItem(normalize(st, linkdefs)));
+ }
+ MdTree::Heading(n, st) => new_stream.push(MdTree::Heading(n, normalize(st, linkdefs))),
+ _ => new_stream.push(item),
+ }
+ }
+
+ // Remove non printing types, duplicate paragraph breaks, and breaks at start/end
+ new_stream.retain(|x| !matches!(x, MdTree::Comment(_) | MdTree::LinkDef { .. }));
+ new_stream.dedup_by(|r, l| matches!((r, l), (MdTree::ParagraphBreak, MdTree::ParagraphBreak)));
+
+ if new_stream.first().is_some_and(is_break_ty) {
+ new_stream.remove(0);
+ }
+ if new_stream.last().is_some_and(is_break_ty) {
+ new_stream.pop();
+ }
+
+ // Remove paragraph breaks that shouldn't be there. w[1] is what will be
+ // removed in these cases. Note that these are the items to keep, not delete
+ // (for `retain`)
+ let to_keep: Vec<bool> = new_stream
+ .windows(3)
+ .map(|w| {
+ !((matches!(&w[1], MdTree::ParagraphBreak)
+ && matches!(should_break(&w[0], &w[2]), BreakRule::Always(1) | BreakRule::Never))
+ || (matches!(&w[1], MdTree::PlainText(txt) if txt.trim().is_empty())
+ && matches!(
+ should_break(&w[0], &w[2]),
+ BreakRule::Always(_) | BreakRule::Never
+ )))
+ })
+ .collect();
+ let mut iter = iter::once(true).chain(to_keep).chain(iter::once(true));
+ new_stream.retain(|_| iter.next().unwrap());
+
+ // Insert line or paragraph breaks where there should be some
+ let mut insertions = 0;
+ let to_insert: Vec<(usize, MdTree<'_>)> = new_stream
+ .windows(2)
+ .enumerate()
+ .filter_map(|(idx, w)| match should_break(&w[0], &w[1]) {
+ BreakRule::Always(1) => Some((idx, MdTree::LineBreak)),
+ BreakRule::Always(2) => Some((idx, MdTree::ParagraphBreak)),
+ _ => None,
+ })
+ .map(|(idx, tt)| {
+ insertions += 1;
+ (idx + insertions, tt)
+ })
+ .collect();
+ to_insert.into_iter().for_each(|(idx, tt)| new_stream.insert(idx, tt));
+
+ MdStream(new_stream)
+}
+
+/// Whether two types should or shouldn't have a paragraph break between them
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum BreakRule {
+ Always(u8),
+ Never,
+ Optional,
+}
+
+/// Blocks that automatically handle their own text wrapping
+fn should_break(left: &MdTree<'_>, right: &MdTree<'_>) -> BreakRule {
+ use MdTree::*;
+
+ match (left, right) {
+ // Separate these types with a single line
+ (HorizontalRule, _)
+ | (_, HorizontalRule)
+ | (OrderedListItem(_, _), OrderedListItem(_, _))
+ | (UnorderedListItem(_), UnorderedListItem(_)) => BreakRule::Always(1),
+ // Condensed types shouldn't have an extra break on either side
+ (Comment(_) | ParagraphBreak | Heading(_, _), _) | (_, Comment(_) | ParagraphBreak) => {
+ BreakRule::Never
+ }
+ // Block types should always be separated by full breaks
+ (CodeBlock { .. } | OrderedListItem(_, _) | UnorderedListItem(_), _)
+ | (_, CodeBlock { .. } | Heading(_, _) | OrderedListItem(_, _) | UnorderedListItem(_)) => {
+ BreakRule::Always(2)
+ }
+ // Text types may or may not be separated by a break
+ (
+ CodeInline(_)
+ | Strong(_)
+ | Emphasis(_)
+ | Strikethrough(_)
+ | PlainText(_)
+ | Link { .. }
+ | RefLink { .. }
+ | LinkDef { .. },
+ CodeInline(_)
+ | Strong(_)
+ | Emphasis(_)
+ | Strikethrough(_)
+ | PlainText(_)
+ | Link { .. }
+ | RefLink { .. }
+ | LinkDef { .. },
+ ) => BreakRule::Optional,
+ (LineBreak, _) | (_, LineBreak) => {
+ unreachable!("should have been removed during deduplication")
+ }
+ }
+}
+
+/// Types that indicate some form of break
+fn is_break_ty(val: &MdTree<'_>) -> bool {
+ matches!(val, MdTree::ParagraphBreak | MdTree::LineBreak)
+ // >1 break between paragraphs acts as a break
+ || matches!(val, MdTree::PlainText(txt) if txt.trim().is_empty())
+}
+
+/// Perform tranformations to text. This splits paragraphs, replaces patterns,
+/// and corrects newlines.
+///
+/// To avoid allocating strings (and using a different heavier tt type), our
+/// replace method means split into three and append each. For this reason, any
+/// viewer should treat consecutive `PlainText` types as belonging to the same
+/// paragraph.
+fn expand_plaintext<'a>(
+ txt: &'a str,
+ stream: &mut Vec<MdTree<'a>>,
+ mut f: fn(&'a str) -> MdTree<'a>,
+) {
+ if txt.is_empty() {
+ return;
+ } else if txt == "\n" {
+ if let Some(tt) = stream.last() {
+ let tmp = MdTree::PlainText(" ");
+ if should_break(tt, &tmp) == BreakRule::Optional {
+ stream.push(tmp);
+ }
+ }
+ return;
+ }
+ let mut queue1 = Vec::new();
+ let mut queue2 = Vec::new();
+ let stream_start_len = stream.len();
+ for paragraph in txt.split("\n\n") {
+ if paragraph.is_empty() {
+ stream.push(MdTree::ParagraphBreak);
+ continue;
+ }
+ let paragraph = trim_extra_ws(paragraph);
+
+ queue1.clear();
+ queue1.push(paragraph);
+
+ for (from, to) in REPLACEMENTS {
+ queue2.clear();
+ for item in &queue1 {
+ for s in item.split(from) {
+ queue2.extend(&[s, to]);
+ }
+ if queue2.len() > 1 {
+ let _ = queue2.pop(); // remove last unnecessary intersperse
+ }
+ }
+ mem::swap(&mut queue1, &mut queue2);
+ }
+
+ // Make sure we don't double whitespace
+ queue1.retain(|s| !s.is_empty());
+ for idx in 0..queue1.len() {
+ queue1[idx] = trim_extra_ws(queue1[idx]);
+ if idx < queue1.len() - 1
+ && queue1[idx].ends_with(char::is_whitespace)
+ && queue1[idx + 1].starts_with(char::is_whitespace)
+ {
+ queue1[idx] = queue1[idx].trim_end();
+ }
+ }
+ stream.extend(queue1.iter().copied().filter(|txt| !txt.is_empty()).map(&mut f));
+ stream.push(MdTree::ParagraphBreak);
+ }
+
+ if stream.len() - stream_start_len > 1 {
+ let _ = stream.pop(); // remove last unnecessary intersperse
+ }
+}
+
+/// Turn reflinks (links with reference IDs) into normal standalone links using
+/// listed link definitions
+fn match_reflink<'a>(linkdefs: &[MdTree<'a>], disp: &'a str, match_id: Option<&str>) -> MdTree<'a> {
+ let to_match = match_id.unwrap_or(disp); // Match with the display name if there isn't an id
+ for def in linkdefs {
+ if let MdTree::LinkDef { id, link } = def {
+ if *id == to_match {
+ return MdTree::Link { disp, link };
+ }
+ }
+ }
+ MdTree::Link { disp, link: "" } // link not found
+}
+
+/// If there is more than one whitespace char at start or end, trim the extras
+fn trim_extra_ws(mut txt: &str) -> &str {
+ let start_ws =
+ txt.bytes().position(|ch| !ch.is_ascii_whitespace()).unwrap_or(txt.len()).saturating_sub(1);
+ txt = &txt[start_ws..];
+ let end_ws = txt
+ .bytes()
+ .rev()
+ .position(|ch| !ch.is_ascii_whitespace())
+ .unwrap_or(txt.len())
+ .saturating_sub(1);
+ &txt[..txt.len() - end_ws]
+}
+
+/// If there is more than one whitespace char at start, trim the extras
+fn trim_ascii_start(buf: &[u8]) -> &[u8] {
+ let count = buf.iter().take_while(|ch| ch.is_ascii_whitespace()).count();
+ &buf[count..]
+}
+
+#[cfg(test)]
+#[path = "tests/parse.rs"]
+mod tests;
diff --git a/compiler/rustc_errors/src/markdown/term.rs b/compiler/rustc_errors/src/markdown/term.rs
new file mode 100644
index 000000000..e45ba6d2c
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/term.rs
@@ -0,0 +1,189 @@
+use std::cell::Cell;
+use std::io::{self, Write};
+
+use termcolor::{Buffer, Color, ColorSpec, WriteColor};
+
+use crate::markdown::{MdStream, MdTree};
+
+const DEFAULT_COLUMN_WIDTH: usize = 140;
+
+thread_local! {
+ /// Track the position of viewable characters in our buffer
+ static CURSOR: Cell<usize> = Cell::new(0);
+ /// Width of the terminal
+ static WIDTH: Cell<usize> = Cell::new(DEFAULT_COLUMN_WIDTH);
+}
+
+/// Print to terminal output to a buffer
+pub fn entrypoint(stream: &MdStream<'_>, buf: &mut Buffer) -> io::Result<()> {
+ #[cfg(not(test))]
+ if let Some((w, _)) = termize::dimensions() {
+ WIDTH.with(|c| c.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH)));
+ }
+ write_stream(stream, buf, None, 0)?;
+ buf.write_all(b"\n")
+}
+
+/// Write the buffer, reset to the default style after each
+fn write_stream(
+ MdStream(stream): &MdStream<'_>,
+ buf: &mut Buffer,
+ default: Option<&ColorSpec>,
+ indent: usize,
+) -> io::Result<()> {
+ match default {
+ Some(c) => buf.set_color(c)?,
+ None => buf.reset()?,
+ }
+
+ for tt in stream {
+ write_tt(tt, buf, indent)?;
+ if let Some(c) = default {
+ buf.set_color(c)?;
+ }
+ }
+
+ buf.reset()?;
+ Ok(())
+}
+
+pub fn write_tt(tt: &MdTree<'_>, buf: &mut Buffer, indent: usize) -> io::Result<()> {
+ match tt {
+ MdTree::CodeBlock { txt, lang: _ } => {
+ buf.set_color(ColorSpec::new().set_dimmed(true))?;
+ buf.write_all(txt.as_bytes())?;
+ }
+ MdTree::CodeInline(txt) => {
+ buf.set_color(ColorSpec::new().set_dimmed(true))?;
+ write_wrapping(buf, txt, indent, None)?;
+ }
+ MdTree::Strong(txt) => {
+ buf.set_color(ColorSpec::new().set_bold(true))?;
+ write_wrapping(buf, txt, indent, None)?;
+ }
+ MdTree::Emphasis(txt) => {
+ buf.set_color(ColorSpec::new().set_italic(true))?;
+ write_wrapping(buf, txt, indent, None)?;
+ }
+ MdTree::Strikethrough(txt) => {
+ buf.set_color(ColorSpec::new().set_strikethrough(true))?;
+ write_wrapping(buf, txt, indent, None)?;
+ }
+ MdTree::PlainText(txt) => {
+ write_wrapping(buf, txt, indent, None)?;
+ }
+ MdTree::Link { disp, link } => {
+ write_wrapping(buf, disp, indent, Some(link))?;
+ }
+ MdTree::ParagraphBreak => {
+ buf.write_all(b"\n\n")?;
+ reset_cursor();
+ }
+ MdTree::LineBreak => {
+ buf.write_all(b"\n")?;
+ reset_cursor();
+ }
+ MdTree::HorizontalRule => {
+ (0..WIDTH.with(Cell::get)).for_each(|_| buf.write_all(b"-").unwrap());
+ reset_cursor();
+ }
+ MdTree::Heading(n, stream) => {
+ let mut cs = ColorSpec::new();
+ cs.set_fg(Some(Color::Cyan));
+ match n {
+ 1 => cs.set_intense(true).set_bold(true).set_underline(true),
+ 2 => cs.set_intense(true).set_underline(true),
+ 3 => cs.set_intense(true).set_italic(true),
+ 4.. => cs.set_underline(true).set_italic(true),
+ 0 => unreachable!(),
+ };
+ write_stream(stream, buf, Some(&cs), 0)?;
+ buf.write_all(b"\n")?;
+ }
+ MdTree::OrderedListItem(n, stream) => {
+ let base = format!("{n}. ");
+ write_wrapping(buf, &format!("{base:<4}"), indent, None)?;
+ write_stream(stream, buf, None, indent + 4)?;
+ }
+ MdTree::UnorderedListItem(stream) => {
+ let base = "* ";
+ write_wrapping(buf, &format!("{base:<4}"), indent, None)?;
+ write_stream(stream, buf, None, indent + 4)?;
+ }
+ // Patterns popped in previous step
+ MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),
+ }
+
+ buf.reset()?;
+
+ Ok(())
+}
+
+/// End of that block, just wrap the line
+fn reset_cursor() {
+ CURSOR.with(|cur| cur.set(0));
+}
+
+/// Change to be generic on Write for testing. If we have a link URL, we don't
+/// count the extra tokens to make it clickable.
+fn write_wrapping<B: io::Write>(
+ buf: &mut B,
+ text: &str,
+ indent: usize,
+ link_url: Option<&str>,
+) -> io::Result<()> {
+ let ind_ws = &b" "[..indent];
+ let mut to_write = text;
+ if let Some(url) = link_url {
+ // This is a nonprinting prefix so we don't increment our cursor
+ write!(buf, "\x1b]8;;{url}\x1b\\")?;
+ }
+ CURSOR.with(|cur| {
+ loop {
+ if cur.get() == 0 {
+ buf.write_all(ind_ws)?;
+ cur.set(indent);
+ }
+ let ch_count = WIDTH.with(Cell::get) - cur.get();
+ let mut iter = to_write.char_indices();
+ let Some((end_idx, _ch)) = iter.nth(ch_count) else {
+ // Write entire line
+ buf.write_all(to_write.as_bytes())?;
+ cur.set(cur.get()+to_write.chars().count());
+ break;
+ };
+
+ if let Some((break_idx, ch)) = to_write[..end_idx]
+ .char_indices()
+ .rev()
+ .find(|(_idx, ch)| ch.is_whitespace() || ['_', '-'].contains(ch))
+ {
+ // Found whitespace to break at
+ if ch.is_whitespace() {
+ writeln!(buf, "{}", &to_write[..break_idx])?;
+ to_write = to_write[break_idx..].trim_start();
+ } else {
+ // Break at a `-` or `_` separator
+ writeln!(buf, "{}", &to_write.get(..break_idx + 1).unwrap_or(to_write))?;
+ to_write = to_write.get(break_idx + 1..).unwrap_or_default().trim_start();
+ }
+ } else {
+ // No whitespace, we need to just split
+ let ws_idx =
+ iter.find(|(_, ch)| ch.is_whitespace()).map_or(to_write.len(), |(idx, _)| idx);
+ writeln!(buf, "{}", &to_write[..ws_idx])?;
+ to_write = to_write.get(ws_idx + 1..).map_or("", str::trim_start);
+ }
+ cur.set(0);
+ }
+ if link_url.is_some() {
+ buf.write_all(b"\x1b]8;;\x1b\\")?;
+ }
+
+ Ok(())
+ })
+}
+
+#[cfg(test)]
+#[path = "tests/term.rs"]
+mod tests;
diff --git a/compiler/rustc_errors/src/markdown/tests/input.md b/compiler/rustc_errors/src/markdown/tests/input.md
new file mode 100644
index 000000000..7d207fc42
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/tests/input.md
@@ -0,0 +1,50 @@
+# H1 Heading [with a link][remote-link]
+
+H1 content: **some words in bold** and `so does inline code`
+
+## H2 Heading
+
+H2 content: _some words in italic_
+
+### H3 Heading
+
+H3 content: ~~strikethrough~~ text
+
+#### H4 Heading
+
+H4 content: A [simple link](https://docs.rs) and a [remote-link].
+
+---
+
+A section break was above. We can also do paragraph breaks:
+
+(new paragraph) and unordered lists:
+
+- Item 1 in `code`
+- Item 2 in _italics_
+
+Or ordered:
+
+1. Item 1 in **bold**
+2. Item 2 with some long lines that should wrap: Lorem ipsum dolor sit amet,
+ consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus elit quam,
+ pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan
+ in cursus sit amet, dictum a nunc. Suspendisse aliquet, lorem eu eleifend
+ accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
+
+---
+
+## Code
+
+Both `inline code` and code blocks are supported:
+
+```rust
+/// A rust enum
+#[derive(Debug, PartialEq, Clone)]
+enum Foo {
+ /// Start of line
+ Bar
+}
+```
+
+[remote-link]: http://docs.rs
diff --git a/compiler/rustc_errors/src/markdown/tests/output.stdout b/compiler/rustc_errors/src/markdown/tests/output.stdout
new file mode 100644
index 000000000..23c60d5c3
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/tests/output.stdout
@@ -0,0 +1,35 @@
+H1 Heading ]8;;http://docs.rs\with a link]8;;\
+H1 content: some words in bold and so does inline code
+
+H2 Heading
+H2 content: some words in italic
+
+H3 Heading
+H3 content: strikethrough text
+
+H4 Heading
+H4 content: A ]8;;https://docs.rs\simple link]8;;\ and a ]8;;http://docs.rs\remote-link]8;;\.
+--------------------------------------------------------------------------------------------------------------------------------------------
+A section break was above. We can also do paragraph breaks:
+
+(new paragraph) and unordered lists:
+
+* Item 1 in code
+* Item 2 in italics
+
+Or ordered:
+
+1. Item 1 in bold
+2. Item 2 with some long lines that should wrap: Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus
+ elit quam, pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan in cursus sit amet, dictum a nunc. Suspendisse
+ aliquet, lorem eu eleifend accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
+--------------------------------------------------------------------------------------------------------------------------------------------
+Code
+Both inline code and code blocks are supported:
+
+/// A rust enum
+#[derive(Debug, PartialEq, Clone)]
+enum Foo {
+ /// Start of line
+ Bar
+}
diff --git a/compiler/rustc_errors/src/markdown/tests/parse.rs b/compiler/rustc_errors/src/markdown/tests/parse.rs
new file mode 100644
index 000000000..e39e8c89b
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/tests/parse.rs
@@ -0,0 +1,312 @@
+use super::*;
+use ParseOpt as PO;
+
+#[test]
+fn test_parse_simple() {
+ let buf = "**abcd** rest";
+ let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap();
+ assert_eq!(t, MdTree::Strong("abcd"));
+ assert_eq!(r, b" rest");
+
+ // Escaping should fail
+ let buf = r"**abcd\** rest";
+ let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong);
+ assert!(res.is_none());
+}
+
+#[test]
+fn test_parse_comment() {
+ let opt = PO::TrimNoEsc;
+ let buf = "<!-- foobar! -->rest";
+ let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap();
+ assert_eq!(t, MdTree::Comment("foobar!"));
+ assert_eq!(r, b"rest");
+
+ let buf = r"<!-- foobar! \-->rest";
+ let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap();
+ assert_eq!(t, MdTree::Comment(r"foobar! \"));
+ assert_eq!(r, b"rest");
+}
+
+#[test]
+fn test_parse_heading() {
+ let buf1 = "# Top level\nrest";
+ let (t, r) = parse_heading(buf1.as_bytes()).unwrap();
+ assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Top level")].into()));
+ assert_eq!(r, b"\nrest");
+
+ let buf1 = "# Empty";
+ let (t, r) = parse_heading(buf1.as_bytes()).unwrap();
+ assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Empty")].into()));
+ assert_eq!(r, b"");
+
+ // Combo
+ let buf2 = "### Top `level` _woo_\nrest";
+ let (t, r) = parse_heading(buf2.as_bytes()).unwrap();
+ assert_eq!(
+ t,
+ MdTree::Heading(
+ 3,
+ vec![
+ MdTree::PlainText("Top "),
+ MdTree::CodeInline("level"),
+ MdTree::PlainText(" "),
+ MdTree::Emphasis("woo"),
+ ]
+ .into()
+ )
+ );
+ assert_eq!(r, b"\nrest");
+}
+
+#[test]
+fn test_parse_code_inline() {
+ let buf1 = "`abcd` rest";
+ let (t, r) = parse_codeinline(buf1.as_bytes()).unwrap();
+ assert_eq!(t, MdTree::CodeInline("abcd"));
+ assert_eq!(r, b" rest");
+
+ // extra backticks, newline
+ let buf2 = "```ab\ncd``` rest";
+ let (t, r) = parse_codeinline(buf2.as_bytes()).unwrap();
+ assert_eq!(t, MdTree::CodeInline("ab\ncd"));
+ assert_eq!(r, b" rest");
+
+ // test no escaping
+ let buf3 = r"`abcd\` rest";
+ let (t, r) = parse_codeinline(buf3.as_bytes()).unwrap();
+ assert_eq!(t, MdTree::CodeInline(r"abcd\"));
+ assert_eq!(r, b" rest");
+}
+
+#[test]
+fn test_parse_code_block() {
+ let buf1 = "```rust\ncode\ncode\n```\nleftovers";
+ let (t, r) = parse_codeblock(buf1.as_bytes());
+ assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode", lang: Some("rust") });
+ assert_eq!(r, b"\nleftovers");
+
+ let buf2 = "`````\ncode\ncode````\n`````\nleftovers";
+ let (t, r) = parse_codeblock(buf2.as_bytes());
+ assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode````", lang: None });
+ assert_eq!(r, b"\nleftovers");
+}
+
+#[test]
+fn test_parse_link() {
+ let simple = "[see here](docs.rs) other";
+ let (t, r) = parse_any_link(simple.as_bytes(), false).unwrap();
+ assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" });
+ assert_eq!(r, b" other");
+
+ let simple_toplevel = "[see here](docs.rs) other";
+ let (t, r) = parse_any_link(simple_toplevel.as_bytes(), true).unwrap();
+ assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" });
+ assert_eq!(r, b" other");
+
+ let reference = "[see here] other";
+ let (t, r) = parse_any_link(reference.as_bytes(), true).unwrap();
+ assert_eq!(t, MdTree::RefLink { disp: "see here", id: None });
+ assert_eq!(r, b" other");
+
+ let reference_full = "[see here][docs-rs] other";
+ let (t, r) = parse_any_link(reference_full.as_bytes(), false).unwrap();
+ assert_eq!(t, MdTree::RefLink { disp: "see here", id: Some("docs-rs") });
+ assert_eq!(r, b" other");
+
+ let reference_def = "[see here]: docs.rs\nother";
+ let (t, r) = parse_any_link(reference_def.as_bytes(), true).unwrap();
+ assert_eq!(t, MdTree::LinkDef { id: "see here", link: "docs.rs" });
+ assert_eq!(r, b"\nother");
+}
+
+const IND1: &str = r"test standard
+ ind
+ ind2
+not ind";
+const IND2: &str = r"test end of stream
+ 1
+ 2
+";
+const IND3: &str = r"test empty lines
+ 1
+ 2
+
+not ind";
+
+#[test]
+fn test_indented_section() {
+ let (t, r) = get_indented_section(IND1.as_bytes());
+ assert_eq!(str::from_utf8(t).unwrap(), "test standard\n ind\n ind2");
+ assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind");
+
+ let (txt, rest) = get_indented_section(IND2.as_bytes());
+ assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n 1\n 2");
+ assert_eq!(str::from_utf8(rest).unwrap(), "\n");
+
+ let (txt, rest) = get_indented_section(IND3.as_bytes());
+ assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n 1\n 2");
+ assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind");
+}
+
+const HBT: &str = r"# Heading
+
+content";
+
+#[test]
+fn test_heading_breaks() {
+ let expected = vec![
+ MdTree::Heading(1, vec![MdTree::PlainText("Heading")].into()),
+ MdTree::PlainText("content"),
+ ]
+ .into();
+ let res = entrypoint(HBT);
+ assert_eq!(res, expected);
+}
+
+const NL1: &str = r"start
+
+end";
+const NL2: &str = r"start
+
+
+end";
+const NL3: &str = r"start
+
+
+
+end";
+
+#[test]
+fn test_newline_breaks() {
+ let expected =
+ vec![MdTree::PlainText("start"), MdTree::ParagraphBreak, MdTree::PlainText("end")].into();
+ for (idx, check) in [NL1, NL2, NL3].iter().enumerate() {
+ let res = entrypoint(check);
+ assert_eq!(res, expected, "failed {idx}");
+ }
+}
+
+const WRAP: &str = "plain _italics
+italics_";
+
+#[test]
+fn test_wrap_pattern() {
+ let expected = vec![
+ MdTree::PlainText("plain "),
+ MdTree::Emphasis("italics"),
+ MdTree::Emphasis(" "),
+ MdTree::Emphasis("italics"),
+ ]
+ .into();
+ let res = entrypoint(WRAP);
+ assert_eq!(res, expected);
+}
+
+const WRAP_NOTXT: &str = r"_italics_
+**bold**";
+
+#[test]
+fn test_wrap_notxt() {
+ let expected =
+ vec![MdTree::Emphasis("italics"), MdTree::PlainText(" "), MdTree::Strong("bold")].into();
+ let res = entrypoint(WRAP_NOTXT);
+ assert_eq!(res, expected);
+}
+
+const MIXED_LIST: &str = r"start
+- _italics item_
+<!-- comment -->
+- **bold item**
+ second line [link1](foobar1)
+ third line [link2][link-foo]
+- :crab:
+ extra indent
+end
+[link-foo]: foobar2
+";
+
+#[test]
+fn test_list() {
+ let expected = vec![
+ MdTree::PlainText("start"),
+ MdTree::ParagraphBreak,
+ MdTree::UnorderedListItem(vec![MdTree::Emphasis("italics item")].into()),
+ MdTree::LineBreak,
+ MdTree::UnorderedListItem(
+ vec![
+ MdTree::Strong("bold item"),
+ MdTree::PlainText(" second line "),
+ MdTree::Link { disp: "link1", link: "foobar1" },
+ MdTree::PlainText(" third line "),
+ MdTree::Link { disp: "link2", link: "foobar2" },
+ ]
+ .into(),
+ ),
+ MdTree::LineBreak,
+ MdTree::UnorderedListItem(
+ vec![MdTree::PlainText("🦀"), MdTree::PlainText(" extra indent")].into(),
+ ),
+ MdTree::ParagraphBreak,
+ MdTree::PlainText("end"),
+ ]
+ .into();
+ let res = entrypoint(MIXED_LIST);
+ assert_eq!(res, expected);
+}
+
+const SMOOSHED: &str = r#"
+start
+### heading
+1. ordered item
+```rust
+println!("Hello, world!");
+```
+`inline`
+``end``
+"#;
+
+#[test]
+fn test_without_breaks() {
+ let expected = vec![
+ MdTree::PlainText("start"),
+ MdTree::ParagraphBreak,
+ MdTree::Heading(3, vec![MdTree::PlainText("heading")].into()),
+ MdTree::OrderedListItem(1, vec![MdTree::PlainText("ordered item")].into()),
+ MdTree::ParagraphBreak,
+ MdTree::CodeBlock { txt: r#"println!("Hello, world!");"#, lang: Some("rust") },
+ MdTree::ParagraphBreak,
+ MdTree::CodeInline("inline"),
+ MdTree::PlainText(" "),
+ MdTree::CodeInline("end"),
+ ]
+ .into();
+ let res = entrypoint(SMOOSHED);
+ assert_eq!(res, expected);
+}
+
+const CODE_STARTLINE: &str = r#"
+start
+`code`
+middle
+`more code`
+end
+"#;
+
+#[test]
+fn test_code_at_start() {
+ let expected = vec![
+ MdTree::PlainText("start"),
+ MdTree::PlainText(" "),
+ MdTree::CodeInline("code"),
+ MdTree::PlainText(" "),
+ MdTree::PlainText("middle"),
+ MdTree::PlainText(" "),
+ MdTree::CodeInline("more code"),
+ MdTree::PlainText(" "),
+ MdTree::PlainText("end"),
+ ]
+ .into();
+ let res = entrypoint(CODE_STARTLINE);
+ assert_eq!(res, expected);
+}
diff --git a/compiler/rustc_errors/src/markdown/tests/term.rs b/compiler/rustc_errors/src/markdown/tests/term.rs
new file mode 100644
index 000000000..3b31c6d62
--- /dev/null
+++ b/compiler/rustc_errors/src/markdown/tests/term.rs
@@ -0,0 +1,90 @@
+use std::io::BufWriter;
+use std::path::PathBuf;
+use termcolor::{BufferWriter, ColorChoice};
+
+use super::*;
+use crate::markdown::MdStream;
+
+const INPUT: &str = include_str!("input.md");
+const OUTPUT_PATH: &[&str] = &[env!("CARGO_MANIFEST_DIR"), "src","markdown","tests","output.stdout"];
+
+const TEST_WIDTH: usize = 80;
+
+// We try to make some words long to create corner cases
+const TXT: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit.
+Fusce-id-urna-sollicitudin, pharetra nisl nec, lobortis tellus. In at
+metus hendrerit, tincidunteratvel, ultrices turpis. Curabitur_risus_sapien,
+porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor quis
+dolor non venenatis. Aliquam ut. ";
+
+const WRAPPED: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna-
+sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit,
+tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-sed,
+ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam ut. Lorem
+ ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna-
+ sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit,
+ tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-
+ sed, ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam
+ ut. Sample link lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet,
+consecteturadipiscingelit. Fusce-id-urna-sollicitudin, pharetra nisl nec,
+lobortis tellus. In at metus hendrerit, tincidunteratvel, ultrices turpis.
+Curabitur_risus_sapien, porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor
+quis dolor non venenatis. Aliquam ut. ";
+
+#[test]
+fn test_wrapping_write() {
+ WIDTH.with(|w| w.set(TEST_WIDTH));
+ let mut buf = BufWriter::new(Vec::new());
+ let txt = TXT.replace("-\n","-").replace("_\n","_").replace('\n', " ").replace(" ", "");
+ write_wrapping(&mut buf, &txt, 0, None).unwrap();
+ write_wrapping(&mut buf, &txt, 4, None).unwrap();
+ write_wrapping(
+ &mut buf,
+ "Sample link lorem ipsum dolor sit amet. ",
+ 4,
+ Some("link-address-placeholder"),
+ )
+ .unwrap();
+ write_wrapping(&mut buf, &txt, 0, None).unwrap();
+ let out = String::from_utf8(buf.into_inner().unwrap()).unwrap();
+ let out = out
+ .replace("\x1b\\", "")
+ .replace('\x1b', "")
+ .replace("]8;;", "")
+ .replace("link-address-placeholder", "");
+
+ for line in out.lines() {
+ assert!(line.len() <= TEST_WIDTH, "line length\n'{line}'")
+ }
+
+ assert_eq!(out, WRAPPED);
+}
+
+#[test]
+fn test_output() {
+ // Capture `--bless` when run via ./x
+ let bless = std::env::var("RUSTC_BLESS").unwrap_or_default() == "1";
+ let ast = MdStream::parse_str(INPUT);
+ let bufwtr = BufferWriter::stderr(ColorChoice::Always);
+ let mut buffer = bufwtr.buffer();
+ ast.write_termcolor_buf(&mut buffer).unwrap();
+
+ let mut blessed = PathBuf::new();
+ blessed.extend(OUTPUT_PATH);
+
+ if bless {
+ std::fs::write(&blessed, buffer.into_inner()).unwrap();
+ eprintln!("blessed output at {}", blessed.display());
+ } else {
+ let output = buffer.into_inner();
+ if std::fs::read(blessed).unwrap() != output {
+ // hack: I don't know any way to write bytes to the captured stdout
+ // that cargo test uses
+ let mut out = std::io::stdout();
+ out.write_all(b"\n\nMarkdown output did not match. Expected:\n").unwrap();
+ out.write_all(&output).unwrap();
+ out.write_all(b"\n\n").unwrap();
+ panic!("markdown output mismatch");
+ }
+ }
+}