diff options
Diffstat (limited to 'crates/mdman/src/format/man.rs')
-rw-r--r-- | crates/mdman/src/format/man.rs | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/crates/mdman/src/format/man.rs b/crates/mdman/src/format/man.rs new file mode 100644 index 0000000..9767fdd --- /dev/null +++ b/crates/mdman/src/format/man.rs @@ -0,0 +1,436 @@ +//! Man-page formatter. + +use crate::util::{header_text, parse_name_and_section}; +use crate::EventIter; +use anyhow::{bail, Error}; +use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag}; +use std::fmt::Write; +use url::Url; + +pub struct ManFormatter { + url: Option<Url>, +} + +impl ManFormatter { + pub fn new(url: Option<Url>) -> ManFormatter { + ManFormatter { url } + } +} + +impl super::Formatter for ManFormatter { + fn render(&self, input: &str) -> Result<String, Error> { + ManRenderer::render(input, self.url.clone()) + } + + fn render_options_start(&self) -> &'static str { + // Tell pulldown_cmark to ignore this. + // This will be stripped out later. + "<![CDATA[" + } + + fn render_options_end(&self) -> &'static str { + "]]>" + } + + fn render_option( + &self, + params: &[&str], + block: &str, + _man_name: &str, + ) -> Result<String, Error> { + let rendered_options = params + .iter() + .map(|param| { + let r = self.render(param)?; + Ok(r.trim().trim_start_matches(".sp").to_string()) + }) + .collect::<Result<Vec<_>, Error>>()?; + let rendered_block = self.render(block)?; + let rendered_block = rendered_block.trim().trim_start_matches(".sp").trim(); + // .RS = move left margin to right 4. + // .RE = move margin back one level. + Ok(format!( + "\n.sp\n{}\n.RS 4\n{}\n.RE\n", + rendered_options.join(", "), + rendered_block + )) + } + + fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> { + Ok(format!("`{}`({})", name, section)) + } +} + +#[derive(Copy, Clone)] +enum Font { + Bold, + Italic, +} + +impl Font { + fn str_from_stack(font_stack: &[Font]) -> &'static str { + let has_bold = font_stack.iter().any(|font| matches!(font, Font::Bold)); + let has_italic = font_stack.iter().any(|font| matches!(font, Font::Italic)); + match (has_bold, has_italic) { + (false, false) => "\\fR", // roman (normal) + (false, true) => "\\fI", // italic + (true, false) => "\\fB", // bold + (true, true) => "\\f(BI", // bold italic + } + } +} + +struct ManRenderer<'e> { + output: String, + parser: EventIter<'e>, + font_stack: Vec<Font>, +} + +impl<'e> ManRenderer<'e> { + fn render(input: &str, url: Option<Url>) -> Result<String, Error> { + let parser = crate::md_parser(input, url); + let output = String::with_capacity(input.len() * 3 / 2); + let mut mr = ManRenderer { + parser, + output, + font_stack: Vec::new(), + }; + mr.push_man()?; + Ok(mr.output) + } + + fn push_man(&mut self) -> Result<(), Error> { + // If this is true, this is inside a cdata block used for hiding + // content from pulldown_cmark. + let mut in_cdata = false; + // The current list stack. None if unordered, Some if ordered with the + // given number as the current index. + let mut list: Vec<Option<u64>> = Vec::new(); + // Used in some cases where spacing isn't desired. + let mut suppress_paragraph = false; + let mut table_cell_index = 0; + + while let Some((event, range)) = self.parser.next() { + let this_suppress_paragraph = suppress_paragraph; + suppress_paragraph = false; + match event { + Event::Start(tag) => { + match tag { + Tag::Paragraph => { + if !this_suppress_paragraph { + self.flush(); + self.output.push_str(".sp\n"); + } + } + Tag::Heading(level, ..) => { + if level == HeadingLevel::H1 { + self.push_top_header()?; + } else if level == HeadingLevel::H2 { + // Section header + let text = header_text(&mut self.parser)?; + self.flush(); + write!(self.output, ".SH \"{}\"\n", text)?; + suppress_paragraph = true; + } else { + // Subsection header + let text = header_text(&mut self.parser)?; + self.flush(); + write!(self.output, ".SS \"{}\"\n", text)?; + suppress_paragraph = true; + } + } + Tag::BlockQuote => { + self.flush(); + // .RS = move left margin over 3 + // .ll = shrink line length + self.output.push_str(".RS 3\n.ll -5\n.sp\n"); + suppress_paragraph = true; + } + Tag::CodeBlock(_kind) => { + // space down, indent 4, no-fill mode + self.flush(); + self.output.push_str(".sp\n.RS 4\n.nf\n"); + } + Tag::List(start) => list.push(start), + Tag::Item => { + // Note: This uses explicit movement instead of .IP + // because the spacing on .IP looks weird to me. + // space down, indent 4 + self.flush(); + self.output.push_str(".sp\n.RS 4\n"); + match list.last_mut().expect("item must have list start") { + // Ordered list. + Some(n) => { + // move left 4, output the list index number, move right 1. + write!(self.output, "\\h'-04' {}.\\h'+01'", n)?; + *n += 1; + } + // Unordered list. + None => self.output.push_str("\\h'-04'\\(bu\\h'+02'"), + } + suppress_paragraph = true; + } + Tag::FootnoteDefinition(_label) => unimplemented!(), + Tag::Table(alignment) => { + // Table start + // allbox = draw a box around all the cells + // tab(:) = Use `:` to separate cell data (instead of tab) + // ; = end of options + self.output.push_str( + "\n.TS\n\ + allbox tab(:);\n", + ); + let alignments: Vec<_> = alignment + .iter() + .map(|a| match a { + Alignment::Left | Alignment::None => "lt", + Alignment::Center => "ct", + Alignment::Right => "rt", + }) + .collect(); + self.output.push_str(&alignments.join(" ")); + self.output.push_str(".\n"); + table_cell_index = 0; + } + Tag::TableHead => { + table_cell_index = 0; + } + Tag::TableRow => { + table_cell_index = 0; + self.output.push('\n'); + } + Tag::TableCell => { + if table_cell_index != 0 { + // Separator between columns. + self.output.push(':'); + } + // Start a text block. + self.output.push_str("T{\n"); + table_cell_index += 1 + } + Tag::Emphasis => self.push_font(Font::Italic), + Tag::Strong => self.push_font(Font::Bold), + // Strikethrough isn't usually supported for TTY. + Tag::Strikethrough => self.output.push_str("~~"), + Tag::Link(link_type, dest_url, _title) => { + if dest_url.starts_with('#') { + // In a man page, page-relative anchors don't + // have much meaning. + continue; + } + match link_type { + LinkType::Autolink | LinkType::Email => { + // The text is a copy of the URL, which is not needed. + match self.parser.next() { + Some((Event::Text(_), _range)) => {} + _ => bail!("expected text after autolink"), + } + } + LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut => { + self.push_font(Font::Italic); + } + // This is currently unused. This is only + // emitted with a broken link callback, but I + // felt it is too annoying to escape `[` in + // option descriptions. + LinkType::ReferenceUnknown + | LinkType::CollapsedUnknown + | LinkType::ShortcutUnknown => { + bail!( + "link with missing reference `{}` located at offset {}", + dest_url, + range.start + ); + } + } + } + Tag::Image(_link_type, _dest_url, _title) => { + bail!("images are not currently supported") + } + } + } + Event::End(tag) => { + match &tag { + Tag::Paragraph => self.flush(), + Tag::Heading(..) => {} + Tag::BlockQuote => { + self.flush(); + // restore left margin, restore line length + self.output.push_str(".br\n.RE\n.ll\n"); + } + Tag::CodeBlock(_kind) => { + self.flush(); + // Restore fill mode, move margin back one level. + self.output.push_str(".fi\n.RE\n"); + } + Tag::List(_) => { + list.pop(); + } + Tag::Item => { + self.flush(); + // Move margin back one level. + self.output.push_str(".RE\n"); + } + Tag::FootnoteDefinition(_label) => {} + Tag::Table(_) => { + // Table end + // I don't know why, but the .sp is needed to provide + // space with the following content. + self.output.push_str("\n.TE\n.sp\n"); + } + Tag::TableHead => {} + Tag::TableRow => {} + Tag::TableCell => { + // End text block. + self.output.push_str("\nT}"); + } + Tag::Emphasis | Tag::Strong => self.pop_font(), + Tag::Strikethrough => self.output.push_str("~~"), + Tag::Link(link_type, dest_url, _title) => { + if dest_url.starts_with('#') { + continue; + } + match link_type { + LinkType::Autolink | LinkType::Email => {} + LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut => { + self.pop_font(); + self.output.push(' '); + } + _ => { + panic!("unexpected tag {:?}", tag); + } + } + write!(self.output, "<{}>", escape(&dest_url)?)?; + } + Tag::Image(_link_type, _dest_url, _title) => {} + } + } + Event::Text(t) => { + self.output.push_str(&escape(&t)?); + } + Event::Code(t) => { + self.push_font(Font::Bold); + self.output.push_str(&escape(&t)?); + self.pop_font(); + } + Event::Html(t) => { + if t.starts_with("<![CDATA[") { + // CDATA is a special marker used for handling options. + in_cdata = true; + } else if in_cdata { + if t.trim().ends_with("]]>") { + in_cdata = false; + } else if !t.trim().is_empty() { + self.output.push_str(&t); + } + } else { + self.output.push_str(&escape(&t)?); + } + } + Event::FootnoteReference(_t) => {} + Event::SoftBreak => self.output.push('\n'), + Event::HardBreak => { + self.flush(); + self.output.push_str(".br\n"); + } + Event::Rule => { + self.flush(); + // \l' **length** ' Draw horizontal line (default underscore). + // \n(.lu Gets value from register "lu" (current line length) + self.output.push_str("\\l'\\n(.lu'\n"); + } + Event::TaskListMarker(_b) => unimplemented!(), + } + } + Ok(()) + } + + fn flush(&mut self) { + if !self.output.ends_with('\n') { + self.output.push('\n'); + } + } + + /// Switch to the given font. + /// + /// Because the troff sequence `\fP` for switching to the "previous" font + /// doesn't support nesting, this needs to emulate it here. This is needed + /// for situations like **hi _there_**. + fn push_font(&mut self, font: Font) { + self.font_stack.push(font); + self.output.push_str(Font::str_from_stack(&self.font_stack)); + } + + fn pop_font(&mut self) { + self.font_stack.pop(); + self.output.push_str(Font::str_from_stack(&self.font_stack)); + } + + /// Parse and render the first top-level header of the document. + fn push_top_header(&mut self) -> Result<(), Error> { + // This enables the tbl preprocessor for tables. + // This seems to be enabled by default on every modern system I could + // find, but it doesn't seem to hurt to enable this. + self.output.push_str("'\\\" t\n"); + // Extract the name of the man page. + let text = header_text(&mut self.parser)?; + let (name, section) = parse_name_and_section(&text)?; + // .TH = Table header + // .nh = disable hyphenation + // .ad l = Left-adjust mode (disable justified). + // .ss sets sentence_space_size to 0 (prevents double spaces after . + // if . is last on the line) + write!( + self.output, + ".TH \"{}\" \"{}\"\n\ + .nh\n\ + .ad l\n\ + .ss \\n[.ss] 0\n", + escape(&name.to_uppercase())?, + section + )?; + Ok(()) + } +} + +fn escape(s: &str) -> Result<String, Error> { + // Note: Possible source on output escape sequences: https://man7.org/linux/man-pages/man7/groff_char.7.html. + // Otherwise, use generic escaping in the form `\[u1EE7]` or `\[u1F994]`. + + let mut replaced = s + .replace('\\', "\\(rs") + .replace('-', "\\-") + .replace('\u{00A0}', "\\ ") // non-breaking space (non-stretchable) + .replace('–', "\\[en]") // \u{2013} en-dash + .replace('—', "\\[em]") // \u{2014} em-dash + .replace('‘', "\\[oq]") // \u{2018} left single quote + .replace('’', "\\[cq]") // \u{2019} right single quote or apostrophe + .replace('“', "\\[lq]") // \u{201C} left double quote + .replace('”', "\\[rq]") // \u{201D} right double quote + .replace('…', "\\[u2026]") // \u{2026} ellipsis + .replace('│', "|") // \u{2502} box drawing light vertical (could use \[br]) + .replace('├', "|") // \u{251C} box drawings light vertical and right + .replace('└', "`") // \u{2514} box drawings light up and right + .replace('─', "\\-") // \u{2500} box drawing light horizontal + ; + if replaced.starts_with('.') { + replaced = format!("\\&.{}", &replaced[1..]); + } + + if let Some(ch) = replaced.chars().find(|ch| { + !matches!(ch, '\n' | ' ' | '!'..='/' | '0'..='9' + | ':'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~') + }) { + bail!( + "character {:?} is not allowed (update the translation table if needed)", + ch + ); + } + Ok(replaced) +} |