summaryrefslogtreecommitdiffstats
path: root/crates/mdman/src
diff options
context:
space:
mode:
Diffstat (limited to 'crates/mdman/src')
-rw-r--r--crates/mdman/src/format.rs20
-rw-r--r--crates/mdman/src/format/man.rs436
-rw-r--r--crates/mdman/src/format/md.rs112
-rw-r--r--crates/mdman/src/format/text.rs605
-rw-r--r--crates/mdman/src/hbs.rs215
-rw-r--r--crates/mdman/src/lib.rs122
-rw-r--r--crates/mdman/src/main.rs133
-rw-r--r--crates/mdman/src/util.rs44
8 files changed, 1687 insertions, 0 deletions
diff --git a/crates/mdman/src/format.rs b/crates/mdman/src/format.rs
new file mode 100644
index 0000000..7bc9781
--- /dev/null
+++ b/crates/mdman/src/format.rs
@@ -0,0 +1,20 @@
+use anyhow::Error;
+
+pub mod man;
+pub mod md;
+pub mod text;
+
+pub trait Formatter {
+ /// Renders the given markdown to the formatter's output.
+ fn render(&self, input: &str) -> Result<String, Error>;
+ /// Renders the start of a block of options (triggered by `{{#options}}`).
+ fn render_options_start(&self) -> &'static str;
+ /// Renders the end of a block of options (triggered by `{{/options}}`).
+ fn render_options_end(&self) -> &'static str;
+ /// Renders an option (triggered by `{{#option}}`).
+ fn render_option(&self, params: &[&str], block: &str, man_name: &str) -> Result<String, Error>;
+ /// Converts a man page reference into markdown that is appropriate for this format.
+ ///
+ /// Triggered by `{{man name section}}`.
+ fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error>;
+}
diff --git a/crates/mdman/src/format/man.rs b/crates/mdman/src/format/man.rs
new file mode 100644
index 0000000..9767fdd
--- /dev/null
+++ b/crates/mdman/src/format/man.rs
@@ -0,0 +1,436 @@
+//! Man-page formatter.
+
+use crate::util::{header_text, parse_name_and_section};
+use crate::EventIter;
+use anyhow::{bail, Error};
+use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag};
+use std::fmt::Write;
+use url::Url;
+
+pub struct ManFormatter {
+ url: Option<Url>,
+}
+
+impl ManFormatter {
+ pub fn new(url: Option<Url>) -> ManFormatter {
+ ManFormatter { url }
+ }
+}
+
+impl super::Formatter for ManFormatter {
+ fn render(&self, input: &str) -> Result<String, Error> {
+ ManRenderer::render(input, self.url.clone())
+ }
+
+ fn render_options_start(&self) -> &'static str {
+ // Tell pulldown_cmark to ignore this.
+ // This will be stripped out later.
+ "<![CDATA["
+ }
+
+ fn render_options_end(&self) -> &'static str {
+ "]]>"
+ }
+
+ fn render_option(
+ &self,
+ params: &[&str],
+ block: &str,
+ _man_name: &str,
+ ) -> Result<String, Error> {
+ let rendered_options = params
+ .iter()
+ .map(|param| {
+ let r = self.render(param)?;
+ Ok(r.trim().trim_start_matches(".sp").to_string())
+ })
+ .collect::<Result<Vec<_>, Error>>()?;
+ let rendered_block = self.render(block)?;
+ let rendered_block = rendered_block.trim().trim_start_matches(".sp").trim();
+ // .RS = move left margin to right 4.
+ // .RE = move margin back one level.
+ Ok(format!(
+ "\n.sp\n{}\n.RS 4\n{}\n.RE\n",
+ rendered_options.join(", "),
+ rendered_block
+ ))
+ }
+
+ fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
+ Ok(format!("`{}`({})", name, section))
+ }
+}
+
+#[derive(Copy, Clone)]
+enum Font {
+ Bold,
+ Italic,
+}
+
+impl Font {
+ fn str_from_stack(font_stack: &[Font]) -> &'static str {
+ let has_bold = font_stack.iter().any(|font| matches!(font, Font::Bold));
+ let has_italic = font_stack.iter().any(|font| matches!(font, Font::Italic));
+ match (has_bold, has_italic) {
+ (false, false) => "\\fR", // roman (normal)
+ (false, true) => "\\fI", // italic
+ (true, false) => "\\fB", // bold
+ (true, true) => "\\f(BI", // bold italic
+ }
+ }
+}
+
+struct ManRenderer<'e> {
+ output: String,
+ parser: EventIter<'e>,
+ font_stack: Vec<Font>,
+}
+
+impl<'e> ManRenderer<'e> {
+ fn render(input: &str, url: Option<Url>) -> Result<String, Error> {
+ let parser = crate::md_parser(input, url);
+ let output = String::with_capacity(input.len() * 3 / 2);
+ let mut mr = ManRenderer {
+ parser,
+ output,
+ font_stack: Vec::new(),
+ };
+ mr.push_man()?;
+ Ok(mr.output)
+ }
+
+ fn push_man(&mut self) -> Result<(), Error> {
+ // If this is true, this is inside a cdata block used for hiding
+ // content from pulldown_cmark.
+ let mut in_cdata = false;
+ // The current list stack. None if unordered, Some if ordered with the
+ // given number as the current index.
+ let mut list: Vec<Option<u64>> = Vec::new();
+ // Used in some cases where spacing isn't desired.
+ let mut suppress_paragraph = false;
+ let mut table_cell_index = 0;
+
+ while let Some((event, range)) = self.parser.next() {
+ let this_suppress_paragraph = suppress_paragraph;
+ suppress_paragraph = false;
+ match event {
+ Event::Start(tag) => {
+ match tag {
+ Tag::Paragraph => {
+ if !this_suppress_paragraph {
+ self.flush();
+ self.output.push_str(".sp\n");
+ }
+ }
+ Tag::Heading(level, ..) => {
+ if level == HeadingLevel::H1 {
+ self.push_top_header()?;
+ } else if level == HeadingLevel::H2 {
+ // Section header
+ let text = header_text(&mut self.parser)?;
+ self.flush();
+ write!(self.output, ".SH \"{}\"\n", text)?;
+ suppress_paragraph = true;
+ } else {
+ // Subsection header
+ let text = header_text(&mut self.parser)?;
+ self.flush();
+ write!(self.output, ".SS \"{}\"\n", text)?;
+ suppress_paragraph = true;
+ }
+ }
+ Tag::BlockQuote => {
+ self.flush();
+ // .RS = move left margin over 3
+ // .ll = shrink line length
+ self.output.push_str(".RS 3\n.ll -5\n.sp\n");
+ suppress_paragraph = true;
+ }
+ Tag::CodeBlock(_kind) => {
+ // space down, indent 4, no-fill mode
+ self.flush();
+ self.output.push_str(".sp\n.RS 4\n.nf\n");
+ }
+ Tag::List(start) => list.push(start),
+ Tag::Item => {
+ // Note: This uses explicit movement instead of .IP
+ // because the spacing on .IP looks weird to me.
+ // space down, indent 4
+ self.flush();
+ self.output.push_str(".sp\n.RS 4\n");
+ match list.last_mut().expect("item must have list start") {
+ // Ordered list.
+ Some(n) => {
+ // move left 4, output the list index number, move right 1.
+ write!(self.output, "\\h'-04' {}.\\h'+01'", n)?;
+ *n += 1;
+ }
+ // Unordered list.
+ None => self.output.push_str("\\h'-04'\\(bu\\h'+02'"),
+ }
+ suppress_paragraph = true;
+ }
+ Tag::FootnoteDefinition(_label) => unimplemented!(),
+ Tag::Table(alignment) => {
+ // Table start
+ // allbox = draw a box around all the cells
+ // tab(:) = Use `:` to separate cell data (instead of tab)
+ // ; = end of options
+ self.output.push_str(
+ "\n.TS\n\
+ allbox tab(:);\n",
+ );
+ let alignments: Vec<_> = alignment
+ .iter()
+ .map(|a| match a {
+ Alignment::Left | Alignment::None => "lt",
+ Alignment::Center => "ct",
+ Alignment::Right => "rt",
+ })
+ .collect();
+ self.output.push_str(&alignments.join(" "));
+ self.output.push_str(".\n");
+ table_cell_index = 0;
+ }
+ Tag::TableHead => {
+ table_cell_index = 0;
+ }
+ Tag::TableRow => {
+ table_cell_index = 0;
+ self.output.push('\n');
+ }
+ Tag::TableCell => {
+ if table_cell_index != 0 {
+ // Separator between columns.
+ self.output.push(':');
+ }
+ // Start a text block.
+ self.output.push_str("T{\n");
+ table_cell_index += 1
+ }
+ Tag::Emphasis => self.push_font(Font::Italic),
+ Tag::Strong => self.push_font(Font::Bold),
+ // Strikethrough isn't usually supported for TTY.
+ Tag::Strikethrough => self.output.push_str("~~"),
+ Tag::Link(link_type, dest_url, _title) => {
+ if dest_url.starts_with('#') {
+ // In a man page, page-relative anchors don't
+ // have much meaning.
+ continue;
+ }
+ match link_type {
+ LinkType::Autolink | LinkType::Email => {
+ // The text is a copy of the URL, which is not needed.
+ match self.parser.next() {
+ Some((Event::Text(_), _range)) => {}
+ _ => bail!("expected text after autolink"),
+ }
+ }
+ LinkType::Inline
+ | LinkType::Reference
+ | LinkType::Collapsed
+ | LinkType::Shortcut => {
+ self.push_font(Font::Italic);
+ }
+ // This is currently unused. This is only
+ // emitted with a broken link callback, but I
+ // felt it is too annoying to escape `[` in
+ // option descriptions.
+ LinkType::ReferenceUnknown
+ | LinkType::CollapsedUnknown
+ | LinkType::ShortcutUnknown => {
+ bail!(
+ "link with missing reference `{}` located at offset {}",
+ dest_url,
+ range.start
+ );
+ }
+ }
+ }
+ Tag::Image(_link_type, _dest_url, _title) => {
+ bail!("images are not currently supported")
+ }
+ }
+ }
+ Event::End(tag) => {
+ match &tag {
+ Tag::Paragraph => self.flush(),
+ Tag::Heading(..) => {}
+ Tag::BlockQuote => {
+ self.flush();
+ // restore left margin, restore line length
+ self.output.push_str(".br\n.RE\n.ll\n");
+ }
+ Tag::CodeBlock(_kind) => {
+ self.flush();
+ // Restore fill mode, move margin back one level.
+ self.output.push_str(".fi\n.RE\n");
+ }
+ Tag::List(_) => {
+ list.pop();
+ }
+ Tag::Item => {
+ self.flush();
+ // Move margin back one level.
+ self.output.push_str(".RE\n");
+ }
+ Tag::FootnoteDefinition(_label) => {}
+ Tag::Table(_) => {
+ // Table end
+ // I don't know why, but the .sp is needed to provide
+ // space with the following content.
+ self.output.push_str("\n.TE\n.sp\n");
+ }
+ Tag::TableHead => {}
+ Tag::TableRow => {}
+ Tag::TableCell => {
+ // End text block.
+ self.output.push_str("\nT}");
+ }
+ Tag::Emphasis | Tag::Strong => self.pop_font(),
+ Tag::Strikethrough => self.output.push_str("~~"),
+ Tag::Link(link_type, dest_url, _title) => {
+ if dest_url.starts_with('#') {
+ continue;
+ }
+ match link_type {
+ LinkType::Autolink | LinkType::Email => {}
+ LinkType::Inline
+ | LinkType::Reference
+ | LinkType::Collapsed
+ | LinkType::Shortcut => {
+ self.pop_font();
+ self.output.push(' ');
+ }
+ _ => {
+ panic!("unexpected tag {:?}", tag);
+ }
+ }
+ write!(self.output, "<{}>", escape(&dest_url)?)?;
+ }
+ Tag::Image(_link_type, _dest_url, _title) => {}
+ }
+ }
+ Event::Text(t) => {
+ self.output.push_str(&escape(&t)?);
+ }
+ Event::Code(t) => {
+ self.push_font(Font::Bold);
+ self.output.push_str(&escape(&t)?);
+ self.pop_font();
+ }
+ Event::Html(t) => {
+ if t.starts_with("<![CDATA[") {
+ // CDATA is a special marker used for handling options.
+ in_cdata = true;
+ } else if in_cdata {
+ if t.trim().ends_with("]]>") {
+ in_cdata = false;
+ } else if !t.trim().is_empty() {
+ self.output.push_str(&t);
+ }
+ } else {
+ self.output.push_str(&escape(&t)?);
+ }
+ }
+ Event::FootnoteReference(_t) => {}
+ Event::SoftBreak => self.output.push('\n'),
+ Event::HardBreak => {
+ self.flush();
+ self.output.push_str(".br\n");
+ }
+ Event::Rule => {
+ self.flush();
+ // \l' **length** ' Draw horizontal line (default underscore).
+ // \n(.lu Gets value from register "lu" (current line length)
+ self.output.push_str("\\l'\\n(.lu'\n");
+ }
+ Event::TaskListMarker(_b) => unimplemented!(),
+ }
+ }
+ Ok(())
+ }
+
+ fn flush(&mut self) {
+ if !self.output.ends_with('\n') {
+ self.output.push('\n');
+ }
+ }
+
+ /// Switch to the given font.
+ ///
+ /// Because the troff sequence `\fP` for switching to the "previous" font
+ /// doesn't support nesting, this needs to emulate it here. This is needed
+ /// for situations like **hi _there_**.
+ fn push_font(&mut self, font: Font) {
+ self.font_stack.push(font);
+ self.output.push_str(Font::str_from_stack(&self.font_stack));
+ }
+
+ fn pop_font(&mut self) {
+ self.font_stack.pop();
+ self.output.push_str(Font::str_from_stack(&self.font_stack));
+ }
+
+ /// Parse and render the first top-level header of the document.
+ fn push_top_header(&mut self) -> Result<(), Error> {
+ // This enables the tbl preprocessor for tables.
+ // This seems to be enabled by default on every modern system I could
+ // find, but it doesn't seem to hurt to enable this.
+ self.output.push_str("'\\\" t\n");
+ // Extract the name of the man page.
+ let text = header_text(&mut self.parser)?;
+ let (name, section) = parse_name_and_section(&text)?;
+ // .TH = Table header
+ // .nh = disable hyphenation
+ // .ad l = Left-adjust mode (disable justified).
+ // .ss sets sentence_space_size to 0 (prevents double spaces after .
+ // if . is last on the line)
+ write!(
+ self.output,
+ ".TH \"{}\" \"{}\"\n\
+ .nh\n\
+ .ad l\n\
+ .ss \\n[.ss] 0\n",
+ escape(&name.to_uppercase())?,
+ section
+ )?;
+ Ok(())
+ }
+}
+
+fn escape(s: &str) -> Result<String, Error> {
+ // Note: Possible source on output escape sequences: https://man7.org/linux/man-pages/man7/groff_char.7.html.
+ // Otherwise, use generic escaping in the form `\[u1EE7]` or `\[u1F994]`.
+
+ let mut replaced = s
+ .replace('\\', "\\(rs")
+ .replace('-', "\\-")
+ .replace('\u{00A0}', "\\ ") // non-breaking space (non-stretchable)
+ .replace('–', "\\[en]") // \u{2013} en-dash
+ .replace('—', "\\[em]") // \u{2014} em-dash
+ .replace('‘', "\\[oq]") // \u{2018} left single quote
+ .replace('’', "\\[cq]") // \u{2019} right single quote or apostrophe
+ .replace('“', "\\[lq]") // \u{201C} left double quote
+ .replace('”', "\\[rq]") // \u{201D} right double quote
+ .replace('…', "\\[u2026]") // \u{2026} ellipsis
+ .replace('│', "|") // \u{2502} box drawing light vertical (could use \[br])
+ .replace('├', "|") // \u{251C} box drawings light vertical and right
+ .replace('└', "`") // \u{2514} box drawings light up and right
+ .replace('─', "\\-") // \u{2500} box drawing light horizontal
+ ;
+ if replaced.starts_with('.') {
+ replaced = format!("\\&.{}", &replaced[1..]);
+ }
+
+ if let Some(ch) = replaced.chars().find(|ch| {
+ !matches!(ch, '\n' | ' ' | '!'..='/' | '0'..='9'
+ | ':'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~')
+ }) {
+ bail!(
+ "character {:?} is not allowed (update the translation table if needed)",
+ ch
+ );
+ }
+ Ok(replaced)
+}
diff --git a/crates/mdman/src/format/md.rs b/crates/mdman/src/format/md.rs
new file mode 100644
index 0000000..0e1c498
--- /dev/null
+++ b/crates/mdman/src/format/md.rs
@@ -0,0 +1,112 @@
+//! Markdown formatter.
+
+use crate::util::unwrap;
+use crate::ManMap;
+use anyhow::{bail, format_err, Error};
+use std::fmt::Write;
+
+pub struct MdFormatter {
+ man_map: ManMap,
+}
+
+impl MdFormatter {
+ pub fn new(man_map: ManMap) -> MdFormatter {
+ MdFormatter { man_map }
+ }
+}
+
+impl MdFormatter {
+ fn render_html(&self, input: &str) -> Result<String, Error> {
+ let parser = crate::md_parser(input, None);
+ let mut html_output: String = String::with_capacity(input.len() * 3 / 2);
+ pulldown_cmark::html::push_html(&mut html_output, parser.map(|(e, _r)| e));
+ Ok(html_output)
+ }
+}
+
+impl super::Formatter for MdFormatter {
+ fn render(&self, input: &str) -> Result<String, Error> {
+ Ok(input.replace("\r\n", "\n"))
+ }
+
+ fn render_options_start(&self) -> &'static str {
+ "<dl>"
+ }
+
+ fn render_options_end(&self) -> &'static str {
+ "</dl>"
+ }
+
+ fn render_option(&self, params: &[&str], block: &str, man_name: &str) -> Result<String, Error> {
+ let mut result = String::new();
+ fn unwrap_p(t: &str) -> &str {
+ unwrap(t, "<p>", "</p>")
+ }
+
+ for param in params {
+ let rendered = self.render_html(param)?;
+ let no_p = unwrap_p(&rendered);
+ // split out first term to use as the id.
+ let first = no_p
+ .split_whitespace()
+ .next()
+ .ok_or_else(|| format_err!("did not expect option `{}` to be empty", param))?;
+ let no_tags = trim_tags(first);
+ if no_tags.is_empty() {
+ bail!("unexpected empty option with no tags `{}`", param);
+ }
+ let id = format!("option-{}-{}", man_name, no_tags);
+ write!(
+ result,
+ "<dt class=\"option-term\" id=\"{ID}\">\
+ <a class=\"option-anchor\" href=\"#{ID}\"></a>{OPTION}</dt>\n",
+ ID = id,
+ OPTION = no_p
+ )?;
+ }
+ let rendered_block = self.render_html(block)?;
+ write!(
+ result,
+ "<dd class=\"option-desc\">{}</dd>\n",
+ unwrap_p(&rendered_block)
+ )?;
+ Ok(result)
+ }
+
+ fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
+ let s = match self.man_map.get(&(name.to_string(), section)) {
+ Some(link) => format!("[{}({})]({})", name, section, link),
+ None => format!("[{}({})]({}.html)", name, section, name),
+ };
+ Ok(s)
+ }
+}
+
+fn trim_tags(s: &str) -> String {
+ // This is a hack. It removes all HTML tags.
+ let mut in_tag = false;
+ let mut in_char_ref = false;
+ s.chars()
+ .filter(|&ch| match ch {
+ '<' if in_tag => panic!("unexpected nested tag"),
+ '&' if in_char_ref => panic!("unexpected nested char ref"),
+ '<' => {
+ in_tag = true;
+ false
+ }
+ '&' => {
+ in_char_ref = true;
+ false
+ }
+ '>' if in_tag => {
+ in_tag = false;
+ false
+ }
+ ';' if in_char_ref => {
+ in_char_ref = false;
+ false
+ }
+ _ => !in_tag && !in_char_ref,
+ })
+ .collect()
+}
diff --git a/crates/mdman/src/format/text.rs b/crates/mdman/src/format/text.rs
new file mode 100644
index 0000000..ae07985
--- /dev/null
+++ b/crates/mdman/src/format/text.rs
@@ -0,0 +1,605 @@
+//! Text formatter.
+
+use crate::util::{header_text, unwrap};
+use crate::EventIter;
+use anyhow::{bail, Error};
+use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag};
+use std::fmt::Write;
+use std::mem;
+use url::Url;
+
+pub struct TextFormatter {
+ url: Option<Url>,
+}
+
+impl TextFormatter {
+ pub fn new(url: Option<Url>) -> TextFormatter {
+ TextFormatter { url }
+ }
+}
+
+impl super::Formatter for TextFormatter {
+ fn render(&self, input: &str) -> Result<String, Error> {
+ TextRenderer::render(input, self.url.clone(), 0)
+ }
+
+ fn render_options_start(&self) -> &'static str {
+ // Tell pulldown_cmark to ignore this.
+ // This will be stripped out later.
+ "<![CDATA["
+ }
+
+ fn render_options_end(&self) -> &'static str {
+ "]]>"
+ }
+
+ fn render_option(
+ &self,
+ params: &[&str],
+ block: &str,
+ _man_name: &str,
+ ) -> Result<String, Error> {
+ let rendered_options = params
+ .iter()
+ .map(|param| TextRenderer::render(param, self.url.clone(), 0))
+ .collect::<Result<Vec<_>, Error>>()?;
+ let trimmed: Vec<_> = rendered_options.iter().map(|o| o.trim()).collect();
+ // Wrap in HTML tags, they will be stripped out during rendering.
+ Ok(format!(
+ "<dt>{}</dt>\n<dd>{}</dd>\n<br>\n",
+ trimmed.join(", "),
+ block
+ ))
+ }
+
+ fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> {
+ Ok(format!("`{}`({})", name, section))
+ }
+}
+
+struct TextRenderer<'e> {
+ output: String,
+ indent: usize,
+ /// The current line being written. Once a line break is encountered (such
+ /// as starting a new paragraph), this will be written to `output` via
+ /// `flush`.
+ line: String,
+ /// The current word being written. Once a break is encountered (such as a
+ /// space) this will be written to `line` via `flush_word`.
+ word: String,
+ parser: EventIter<'e>,
+ /// The base URL used for relative URLs.
+ url: Option<Url>,
+ table: Table,
+}
+
+impl<'e> TextRenderer<'e> {
+ fn render(input: &str, url: Option<Url>, indent: usize) -> Result<String, Error> {
+ let parser = crate::md_parser(input, url.clone());
+ let output = String::with_capacity(input.len() * 3 / 2);
+ let mut mr = TextRenderer {
+ output,
+ indent,
+ line: String::new(),
+ word: String::new(),
+ parser,
+ url,
+ table: Table::new(),
+ };
+ mr.push_md()?;
+ Ok(mr.output)
+ }
+
+ fn push_md(&mut self) -> Result<(), Error> {
+ // If this is true, this is inside a cdata block used for hiding
+ // content from pulldown_cmark.
+ let mut in_cdata = false;
+ // The current list stack. None if unordered, Some if ordered with the
+ // given number as the current index.
+ let mut list: Vec<Option<u64>> = Vec::new();
+ // Used in some cases where spacing isn't desired.
+ let mut suppress_paragraph = false;
+ // Whether or not word-wrapping is enabled.
+ let mut wrap_text = true;
+
+ while let Some((event, range)) = self.parser.next() {
+ let this_suppress_paragraph = suppress_paragraph;
+ // Always reset suppression, even if the next event isn't a
+ // paragraph. This is in essence, a 1-token lookahead where the
+ // suppression is only enabled if the next event is a paragraph.
+ suppress_paragraph = false;
+ match event {
+ Event::Start(tag) => {
+ match tag {
+ Tag::Paragraph => {
+ if !this_suppress_paragraph {
+ self.flush();
+ }
+ }
+ Tag::Heading(level, ..) => {
+ self.flush();
+ if level == HeadingLevel::H1 {
+ let text = header_text(&mut self.parser)?;
+ self.push_to_line(&text.to_uppercase());
+ self.hard_break();
+ self.hard_break();
+ } else if level == HeadingLevel::H2 {
+ let text = header_text(&mut self.parser)?;
+ self.push_to_line(&text.to_uppercase());
+ self.flush();
+ self.indent = 7;
+ } else {
+ let text = header_text(&mut self.parser)?;
+ self.push_indent((level as usize - 2) * 3);
+ self.push_to_line(&text);
+ self.flush();
+ self.indent = (level as usize - 1) * 3 + 1;
+ }
+ }
+ Tag::BlockQuote => {
+ self.indent += 3;
+ }
+ Tag::CodeBlock(_kind) => {
+ self.flush();
+ wrap_text = false;
+ self.indent += 4;
+ }
+ Tag::List(start) => list.push(start),
+ Tag::Item => {
+ self.flush();
+ match list.last_mut().expect("item must have list start") {
+ // Ordered list.
+ Some(n) => {
+ self.push_indent(self.indent);
+ write!(self.line, "{}.", n)?;
+ *n += 1;
+ }
+ // Unordered list.
+ None => {
+ self.push_indent(self.indent);
+ self.push_to_line("o ")
+ }
+ }
+ self.indent += 3;
+ suppress_paragraph = true;
+ }
+ Tag::FootnoteDefinition(_label) => unimplemented!(),
+ Tag::Table(alignment) => {
+ assert!(self.table.alignment.is_empty());
+ self.flush();
+ self.table.alignment.extend(alignment);
+ let table = self.table.process(&mut self.parser, self.indent)?;
+ self.output.push_str(&table);
+ self.hard_break();
+ self.table = Table::new();
+ }
+ Tag::TableHead | Tag::TableRow | Tag::TableCell => {
+ bail!("unexpected table element")
+ }
+ Tag::Emphasis => {}
+ Tag::Strong => {}
+ // Strikethrough isn't usually supported for TTY.
+ Tag::Strikethrough => self.word.push_str("~~"),
+ Tag::Link(link_type, dest_url, _title) => {
+ if dest_url.starts_with('#') {
+ // In a man page, page-relative anchors don't
+ // have much meaning.
+ continue;
+ }
+ match link_type {
+ LinkType::Autolink | LinkType::Email => {
+ // The text is a copy of the URL, which is not needed.
+ match self.parser.next() {
+ Some((Event::Text(_), _range)) => {}
+ _ => bail!("expected text after autolink"),
+ }
+ }
+ LinkType::Inline
+ | LinkType::Reference
+ | LinkType::Collapsed
+ | LinkType::Shortcut => {}
+ // This is currently unused. This is only
+ // emitted with a broken link callback, but I
+ // felt it is too annoying to escape `[` in
+ // option descriptions.
+ LinkType::ReferenceUnknown
+ | LinkType::CollapsedUnknown
+ | LinkType::ShortcutUnknown => {
+ bail!(
+ "link with missing reference `{}` located at offset {}",
+ dest_url,
+ range.start
+ );
+ }
+ }
+ }
+ Tag::Image(_link_type, _dest_url, _title) => {
+ bail!("images are not currently supported")
+ }
+ }
+ }
+ Event::End(tag) => match &tag {
+ Tag::Paragraph => {
+ self.flush();
+ self.hard_break();
+ }
+ Tag::Heading(..) => {}
+ Tag::BlockQuote => {
+ self.indent -= 3;
+ }
+ Tag::CodeBlock(_kind) => {
+ self.hard_break();
+ wrap_text = true;
+ self.indent -= 4;
+ }
+ Tag::List(_) => {
+ list.pop();
+ }
+ Tag::Item => {
+ self.flush();
+ self.indent -= 3;
+ self.hard_break();
+ }
+ Tag::FootnoteDefinition(_label) => {}
+ Tag::Table(_) => {}
+ Tag::TableHead => {}
+ Tag::TableRow => {}
+ Tag::TableCell => {}
+ Tag::Emphasis => {}
+ Tag::Strong => {}
+ Tag::Strikethrough => self.word.push_str("~~"),
+ Tag::Link(link_type, dest_url, _title) => {
+ if dest_url.starts_with('#') {
+ continue;
+ }
+ match link_type {
+ LinkType::Autolink | LinkType::Email => {}
+ LinkType::Inline
+ | LinkType::Reference
+ | LinkType::Collapsed
+ | LinkType::Shortcut => self.flush_word(),
+ _ => {
+ panic!("unexpected tag {:?}", tag);
+ }
+ }
+ self.flush_word();
+ write!(self.word, "<{}>", dest_url)?;
+ }
+ Tag::Image(_link_type, _dest_url, _title) => {}
+ },
+ Event::Text(t) | Event::Code(t) => {
+ if wrap_text {
+ let chunks = split_chunks(&t);
+ for chunk in chunks {
+ if chunk == " " {
+ self.flush_word();
+ } else {
+ self.word.push_str(chunk);
+ }
+ }
+ } else {
+ for line in t.lines() {
+ self.push_indent(self.indent);
+ self.push_to_line(line);
+ self.flush();
+ }
+ }
+ }
+ Event::Html(t) => {
+ if t.starts_with("<![CDATA[") {
+ // CDATA is a special marker used for handling options.
+ in_cdata = true;
+ self.flush();
+ } else if in_cdata {
+ if t.trim().ends_with("]]>") {
+ in_cdata = false;
+ } else {
+ let trimmed = t.trim();
+ if trimmed.is_empty() {
+ continue;
+ }
+ if trimmed == "<br>" {
+ self.hard_break();
+ } else if trimmed.starts_with("<dt>") {
+ let opts = unwrap(trimmed, "<dt>", "</dt>");
+ self.push_indent(self.indent);
+ self.push_to_line(opts);
+ self.flush();
+ } else if trimmed.starts_with("<dd>") {
+ let mut def = String::new();
+ while let Some((Event::Html(t), _range)) = self.parser.next() {
+ if t.starts_with("</dd>") {
+ break;
+ }
+ def.push_str(&t);
+ }
+ let rendered =
+ TextRenderer::render(&def, self.url.clone(), self.indent + 4)?;
+ self.push_to_line(rendered.trim_end());
+ self.flush();
+ } else {
+ self.push_to_line(&t);
+ self.flush();
+ }
+ }
+ } else {
+ self.push_to_line(&t);
+ self.flush();
+ }
+ }
+ Event::FootnoteReference(_t) => {}
+ Event::SoftBreak => self.flush_word(),
+ Event::HardBreak => self.flush(),
+ Event::Rule => {
+ self.flush();
+ self.push_indent(self.indent);
+ self.push_to_line(&"_".repeat(79 - self.indent * 2));
+ self.flush();
+ }
+ Event::TaskListMarker(_b) => unimplemented!(),
+ }
+ }
+ Ok(())
+ }
+
+ fn flush(&mut self) {
+ self.flush_word();
+ if !self.line.is_empty() {
+ self.output.push_str(&self.line);
+ self.output.push('\n');
+ self.line.clear();
+ }
+ }
+
+ fn hard_break(&mut self) {
+ self.flush();
+ if !self.output.ends_with("\n\n") {
+ self.output.push('\n');
+ }
+ }
+
+ fn flush_word(&mut self) {
+ if self.word.is_empty() {
+ return;
+ }
+ if self.line.len() + self.word.len() >= 79 {
+ self.output.push_str(&self.line);
+ self.output.push('\n');
+ self.line.clear();
+ }
+ if self.line.is_empty() {
+ self.push_indent(self.indent);
+ self.line.push_str(&self.word);
+ } else {
+ self.line.push(' ');
+ self.line.push_str(&self.word);
+ }
+ self.word.clear();
+ }
+
+ fn push_indent(&mut self, indent: usize) {
+ for _ in 0..indent {
+ self.line.push(' ');
+ }
+ }
+
+ fn push_to_line(&mut self, text: &str) {
+ self.flush_word();
+ self.line.push_str(text);
+ }
+}
+
+/// Splits the text on whitespace.
+///
+/// Consecutive whitespace is collapsed to a single ' ', and is included as a
+/// separate element in the result.
+fn split_chunks(text: &str) -> Vec<&str> {
+ let mut result = Vec::new();
+ let mut start = 0;
+ while start < text.len() {
+ match text[start..].find(' ') {
+ Some(i) => {
+ if i != 0 {
+ result.push(&text[start..start + i]);
+ }
+ result.push(" ");
+ // Skip past whitespace.
+ match text[start + i..].find(|c| c != ' ') {
+ Some(n) => {
+ start = start + i + n;
+ }
+ None => {
+ break;
+ }
+ }
+ }
+ None => {
+ result.push(&text[start..]);
+ break;
+ }
+ }
+ }
+ result
+}
+
+struct Table {
+ alignment: Vec<Alignment>,
+ rows: Vec<Vec<String>>,
+ row: Vec<String>,
+ cell: String,
+}
+
+impl Table {
+ fn new() -> Table {
+ Table {
+ alignment: Vec::new(),
+ rows: Vec::new(),
+ row: Vec::new(),
+ cell: String::new(),
+ }
+ }
+
+ /// Processes table events and generates a text table.
+ fn process(&mut self, parser: &mut EventIter<'_>, indent: usize) -> Result<String, Error> {
+ while let Some((event, _range)) = parser.next() {
+ match event {
+ Event::Start(tag) => match tag {
+ Tag::TableHead
+ | Tag::TableRow
+ | Tag::TableCell
+ | Tag::Emphasis
+ | Tag::Strong => {}
+ Tag::Strikethrough => self.cell.push_str("~~"),
+ // Links not yet supported, they usually won't fit.
+ Tag::Link(_, _, _) => {}
+ _ => bail!("unexpected tag in table: {:?}", tag),
+ },
+ Event::End(tag) => match tag {
+ Tag::Table(_) => return self.render(indent),
+ Tag::TableCell => {
+ let cell = mem::replace(&mut self.cell, String::new());
+ self.row.push(cell);
+ }
+ Tag::TableHead | Tag::TableRow => {
+ let row = mem::replace(&mut self.row, Vec::new());
+ self.rows.push(row);
+ }
+ Tag::Strikethrough => self.cell.push_str("~~"),
+ _ => {}
+ },
+ Event::Text(t) | Event::Code(t) => {
+ self.cell.push_str(&t);
+ }
+ Event::Html(t) => bail!("html unsupported in tables: {:?}", t),
+ _ => bail!("unexpected event in table: {:?}", event),
+ }
+ }
+ bail!("table end not reached");
+ }
+
+ fn render(&self, indent: usize) -> Result<String, Error> {
+ // This is an extremely primitive layout routine.
+ // First compute the potential maximum width of each cell.
+ // 2 for 1 space margin on left and right.
+ let width_acc = vec![2; self.alignment.len()];
+ let mut col_widths = self
+ .rows
+ .iter()
+ .map(|row| row.iter().map(|cell| cell.len()))
+ .fold(width_acc, |mut acc, row| {
+ acc.iter_mut()
+ .zip(row)
+ // +3 for left/right margin and | symbol
+ .for_each(|(a, b)| *a = (*a).max(b + 3));
+ acc
+ });
+ // Shrink each column until it fits the total width, proportional to
+ // the columns total percent width.
+ let max_width = 78 - indent;
+ // Include total len for | characters, and +1 for final |.
+ let total_width = col_widths.iter().sum::<usize>() + col_widths.len() + 1;
+ if total_width > max_width {
+ let to_shrink = total_width - max_width;
+ // Compute percentage widths, and shrink each column based on its
+ // total percentage.
+ for width in &mut col_widths {
+ let percent = *width as f64 / total_width as f64;
+ *width -= (to_shrink as f64 * percent).ceil() as usize;
+ }
+ }
+ // Start rendering.
+ let mut result = String::new();
+
+ // Draw the horizontal line separating each row.
+ let mut row_line = String::new();
+ row_line.push_str(&" ".repeat(indent));
+ row_line.push('+');
+ let lines = col_widths
+ .iter()
+ .map(|width| "-".repeat(*width))
+ .collect::<Vec<_>>();
+ row_line.push_str(&lines.join("+"));
+ row_line.push('+');
+ row_line.push('\n');
+
+ // Draw top of the table.
+ result.push_str(&row_line);
+ // Draw each row.
+ for row in &self.rows {
+ // Word-wrap and fill each column as needed.
+ let filled = fill_row(row, &col_widths, &self.alignment);
+ // Need to transpose the cells across rows for cells that span
+ // multiple rows.
+ let height = filled.iter().map(|c| c.len()).max().unwrap();
+ for row_i in 0..height {
+ result.push_str(&" ".repeat(indent));
+ result.push('|');
+ for filled_row in &filled {
+ let cell = &filled_row[row_i];
+ result.push_str(cell);
+ result.push('|');
+ }
+ result.push('\n');
+ }
+ result.push_str(&row_line);
+ }
+ Ok(result)
+ }
+}
+
+/// Formats a row, filling cells with spaces and word-wrapping text.
+///
+/// Returns a vec of cells, where each cell is split into multiple lines.
+fn fill_row(row: &[String], col_widths: &[usize], alignment: &[Alignment]) -> Vec<Vec<String>> {
+ let mut cell_lines = row
+ .iter()
+ .zip(col_widths)
+ .zip(alignment)
+ .map(|((cell, width), alignment)| fill_cell(cell, *width - 2, *alignment))
+ .collect::<Vec<_>>();
+ // Fill each cell to match the maximum vertical height of the tallest cell.
+ let max_lines = cell_lines.iter().map(|cell| cell.len()).max().unwrap();
+ for (cell, width) in cell_lines.iter_mut().zip(col_widths) {
+ if cell.len() < max_lines {
+ cell.extend(std::iter::repeat(" ".repeat(*width)).take(max_lines - cell.len()));
+ }
+ }
+ cell_lines
+}
+
+/// Formats a cell. Word-wraps based on width, and adjusts based on alignment.
+///
+/// Returns a vec of lines for the cell.
+fn fill_cell(text: &str, width: usize, alignment: Alignment) -> Vec<String> {
+ let fill_width = |text: &str| match alignment {
+ Alignment::None | Alignment::Left => format!(" {:<width$} ", text, width = width),
+ Alignment::Center => format!(" {:^width$} ", text, width = width),
+ Alignment::Right => format!(" {:>width$} ", text, width = width),
+ };
+ if text.len() < width {
+ // No wrapping necessary, just format.
+ vec![fill_width(text)]
+ } else {
+ // Word-wrap the cell.
+ let mut result = Vec::new();
+ let mut line = String::new();
+ for word in text.split_whitespace() {
+ if line.len() + word.len() >= width {
+ // todo: word.len() > width
+ result.push(fill_width(&line));
+ line.clear();
+ }
+ if line.is_empty() {
+ line.push_str(word);
+ } else {
+ line.push(' ');
+ line.push_str(&word);
+ }
+ }
+ if !line.is_empty() {
+ result.push(fill_width(&line));
+ }
+
+ result
+ }
+}
diff --git a/crates/mdman/src/hbs.rs b/crates/mdman/src/hbs.rs
new file mode 100644
index 0000000..81ad7ee
--- /dev/null
+++ b/crates/mdman/src/hbs.rs
@@ -0,0 +1,215 @@
+//! Handlebars template processing.
+
+use crate::format::Formatter;
+use anyhow::Error;
+use handlebars::{
+ handlebars_helper, Context, Decorator, Handlebars, Helper, HelperDef, HelperResult, Output,
+ RenderContext, RenderError, Renderable,
+};
+use std::collections::HashMap;
+use std::path::Path;
+
+type FormatterRef<'a> = &'a (dyn Formatter + Send + Sync);
+
+/// Processes the handlebars template at the given file.
+pub fn expand(file: &Path, formatter: FormatterRef) -> Result<String, Error> {
+ let mut handlebars = Handlebars::new();
+ handlebars.set_strict_mode(true);
+ handlebars.register_helper("lower", Box::new(lower));
+ handlebars.register_helper("options", Box::new(OptionsHelper { formatter }));
+ handlebars.register_helper("option", Box::new(OptionHelper { formatter }));
+ handlebars.register_helper("man", Box::new(ManLinkHelper { formatter }));
+ handlebars.register_decorator("set", Box::new(set_decorator));
+ handlebars.register_template_file("template", file)?;
+ let includes = file.parent().unwrap().join("includes");
+ handlebars.register_templates_directory(".md", includes)?;
+ let man_name = file
+ .file_stem()
+ .expect("expected filename")
+ .to_str()
+ .expect("utf8 filename")
+ .to_string();
+ let data = HashMap::from([("man_name", man_name)]);
+ let expanded = handlebars.render("template", &data)?;
+ Ok(expanded)
+}
+
+/// Helper for `{{#options}}` block.
+struct OptionsHelper<'a> {
+ formatter: FormatterRef<'a>,
+}
+
+impl HelperDef for OptionsHelper<'_> {
+ fn call<'reg: 'rc, 'rc>(
+ &self,
+ h: &Helper<'reg, 'rc>,
+ r: &'reg Handlebars<'reg>,
+ ctx: &'rc Context,
+ rc: &mut RenderContext<'reg, 'rc>,
+ out: &mut dyn Output,
+ ) -> HelperResult {
+ if in_options(rc) {
+ return Err(RenderError::new("options blocks cannot be nested"));
+ }
+ // Prevent nested {{#options}}.
+ set_in_context(rc, "__MDMAN_IN_OPTIONS", serde_json::Value::Bool(true));
+ let s = self.formatter.render_options_start();
+ out.write(&s)?;
+ let t = match h.template() {
+ Some(t) => t,
+ None => return Err(RenderError::new("options block must not be empty")),
+ };
+ let block = t.renders(r, ctx, rc)?;
+ out.write(&block)?;
+
+ let s = self.formatter.render_options_end();
+ out.write(&s)?;
+ remove_from_context(rc, "__MDMAN_IN_OPTIONS");
+ Ok(())
+ }
+}
+
+/// Whether or not the context is currently inside a `{{#options}}` block.
+fn in_options(rc: &RenderContext<'_, '_>) -> bool {
+ rc.context()
+ .map_or(false, |ctx| ctx.data().get("__MDMAN_IN_OPTIONS").is_some())
+}
+
+/// Helper for `{{#option}}` block.
+struct OptionHelper<'a> {
+ formatter: FormatterRef<'a>,
+}
+
+impl HelperDef for OptionHelper<'_> {
+ fn call<'reg: 'rc, 'rc>(
+ &self,
+ h: &Helper<'reg, 'rc>,
+ r: &'reg Handlebars<'reg>,
+ ctx: &'rc Context,
+ rc: &mut RenderContext<'reg, 'rc>,
+ out: &mut dyn Output,
+ ) -> HelperResult {
+ if !in_options(rc) {
+ return Err(RenderError::new("option must be in options block"));
+ }
+ let params = h.params();
+ if params.is_empty() {
+ return Err(RenderError::new(
+ "option block must have at least one param",
+ ));
+ }
+ // Convert params to strings.
+ let params = params
+ .iter()
+ .map(|param| {
+ param
+ .value()
+ .as_str()
+ .ok_or_else(|| RenderError::new("option params must be strings"))
+ })
+ .collect::<Result<Vec<&str>, RenderError>>()?;
+ let t = match h.template() {
+ Some(t) => t,
+ None => return Err(RenderError::new("option block must not be empty")),
+ };
+ // Render the block.
+ let block = t.renders(r, ctx, rc)?;
+
+ // Get the name of this page.
+ let man_name = ctx
+ .data()
+ .get("man_name")
+ .expect("expected man_name in context")
+ .as_str()
+ .expect("expect man_name str");
+
+ // Ask the formatter to convert this option to its format.
+ let option = self
+ .formatter
+ .render_option(&params, &block, man_name)
+ .map_err(|e| RenderError::new(format!("option render failed: {}", e)))?;
+ out.write(&option)?;
+ Ok(())
+ }
+}
+
+/// Helper for `{{man name section}}` expression.
+struct ManLinkHelper<'a> {
+ formatter: FormatterRef<'a>,
+}
+
+impl HelperDef for ManLinkHelper<'_> {
+ fn call<'reg: 'rc, 'rc>(
+ &self,
+ h: &Helper<'reg, 'rc>,
+ _r: &'reg Handlebars<'reg>,
+ _ctx: &'rc Context,
+ _rc: &mut RenderContext<'reg, 'rc>,
+ out: &mut dyn Output,
+ ) -> HelperResult {
+ let params = h.params();
+ if params.len() != 2 {
+ return Err(RenderError::new("{{man}} must have two arguments"));
+ }
+ let name = params[0]
+ .value()
+ .as_str()
+ .ok_or_else(|| RenderError::new("man link name must be a string"))?;
+ let section = params[1]
+ .value()
+ .as_u64()
+ .ok_or_else(|| RenderError::new("man link section must be an integer"))?;
+ let section =
+ u8::try_from(section).map_err(|_e| RenderError::new("section number too large"))?;
+ let link = self
+ .formatter
+ .linkify_man_to_md(name, section)
+ .map_err(|e| RenderError::new(format!("failed to linkify man: {}", e)))?;
+ out.write(&link)?;
+ Ok(())
+ }
+}
+
+/// `{{*set var=value}}` decorator.
+///
+/// This sets a variable to a value within the template context.
+fn set_decorator(
+ d: &Decorator,
+ _: &Handlebars,
+ _ctx: &Context,
+ rc: &mut RenderContext,
+) -> Result<(), RenderError> {
+ let data_to_set = d.hash();
+ for (k, v) in data_to_set {
+ set_in_context(rc, k, v.value().clone());
+ }
+ Ok(())
+}
+
+/// Sets a variable to a value within the context.
+fn set_in_context(rc: &mut RenderContext, key: &str, value: serde_json::Value) {
+ let mut ctx = match rc.context() {
+ Some(c) => (*c).clone(),
+ None => Context::wraps(serde_json::Value::Object(serde_json::Map::new())).unwrap(),
+ };
+ if let serde_json::Value::Object(m) = ctx.data_mut() {
+ m.insert(key.to_string(), value);
+ rc.set_context(ctx);
+ } else {
+ panic!("expected object in context");
+ }
+}
+
+/// Removes a variable from the context.
+fn remove_from_context(rc: &mut RenderContext, key: &str) {
+ let ctx = rc.context().expect("cannot remove from null context");
+ let mut ctx = (*ctx).clone();
+ if let serde_json::Value::Object(m) = ctx.data_mut() {
+ m.remove(key);
+ rc.set_context(ctx);
+ } else {
+ panic!("expected object in context");
+ }
+}
+
+handlebars_helper!(lower: |s: str| s.to_lowercase());
diff --git a/crates/mdman/src/lib.rs b/crates/mdman/src/lib.rs
new file mode 100644
index 0000000..01c3c8d
--- /dev/null
+++ b/crates/mdman/src/lib.rs
@@ -0,0 +1,122 @@
+//! mdman markdown to man converter.
+
+use anyhow::{bail, Context, Error};
+use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag};
+use std::collections::HashMap;
+use std::fs;
+use std::io::{self, BufRead};
+use std::ops::Range;
+use std::path::Path;
+use url::Url;
+
+mod format;
+mod hbs;
+mod util;
+
+use format::Formatter;
+
+/// Mapping of `(name, section)` of a man page to a URL.
+pub type ManMap = HashMap<(String, u8), String>;
+
+/// A man section.
+pub type Section = u8;
+
+/// The output formats supported by mdman.
+#[derive(Copy, Clone)]
+pub enum Format {
+ Man,
+ Md,
+ Text,
+}
+
+impl Format {
+ /// The filename extension for the format.
+ pub fn extension(&self, section: Section) -> String {
+ match self {
+ Format::Man => section.to_string(),
+ Format::Md => "md".to_string(),
+ Format::Text => "txt".to_string(),
+ }
+ }
+}
+
+/// Converts the handlebars markdown file at the given path into the given
+/// format, returning the translated result.
+pub fn convert(
+ file: &Path,
+ format: Format,
+ url: Option<Url>,
+ man_map: ManMap,
+) -> Result<String, Error> {
+ let formatter: Box<dyn Formatter + Send + Sync> = match format {
+ Format::Man => Box::new(format::man::ManFormatter::new(url)),
+ Format::Md => Box::new(format::md::MdFormatter::new(man_map)),
+ Format::Text => Box::new(format::text::TextFormatter::new(url)),
+ };
+ let expanded = hbs::expand(file, &*formatter)?;
+ // pulldown-cmark can behave a little differently with Windows newlines,
+ // just normalize it.
+ let expanded = expanded.replace("\r\n", "\n");
+ formatter.render(&expanded)
+}
+
+/// Pulldown-cmark iterator yielding an `(event, range)` tuple.
+type EventIter<'a> = Box<dyn Iterator<Item = (Event<'a>, Range<usize>)> + 'a>;
+
+/// Creates a new markdown parser with the given input.
+pub(crate) fn md_parser(input: &str, url: Option<Url>) -> EventIter {
+ let mut options = Options::empty();
+ options.insert(Options::ENABLE_TABLES);
+ options.insert(Options::ENABLE_FOOTNOTES);
+ options.insert(Options::ENABLE_STRIKETHROUGH);
+ options.insert(Options::ENABLE_SMART_PUNCTUATION);
+ let parser = Parser::new_ext(input, options);
+ let parser = parser.into_offset_iter();
+ // Translate all links to include the base url.
+ let parser = parser.map(move |(event, range)| match event {
+ Event::Start(Tag::Link(lt, dest_url, title)) if !matches!(lt, LinkType::Email) => (
+ Event::Start(Tag::Link(lt, join_url(url.as_ref(), dest_url), title)),
+ range,
+ ),
+ Event::End(Tag::Link(lt, dest_url, title)) if !matches!(lt, LinkType::Email) => (
+ Event::End(Tag::Link(lt, join_url(url.as_ref(), dest_url), title)),
+ range,
+ ),
+ _ => (event, range),
+ });
+ Box::new(parser)
+}
+
+fn join_url<'a>(base: Option<&Url>, dest: CowStr<'a>) -> CowStr<'a> {
+ match base {
+ Some(base_url) => {
+ // Absolute URL or page-relative anchor doesn't need to be translated.
+ if dest.contains(':') || dest.starts_with('#') {
+ dest
+ } else {
+ let joined = base_url.join(&dest).unwrap_or_else(|e| {
+ panic!("failed to join URL `{}` to `{}`: {}", dest, base_url, e)
+ });
+ String::from(joined).into()
+ }
+ }
+ None => dest,
+ }
+}
+
+pub fn extract_section(file: &Path) -> Result<Section, Error> {
+ let f = fs::File::open(file).with_context(|| format!("could not open `{}`", file.display()))?;
+ let mut f = io::BufReader::new(f);
+ let mut line = String::new();
+ f.read_line(&mut line)?;
+ if !line.starts_with("# ") {
+ bail!("expected input file to start with # header");
+ }
+ let (_name, section) = util::parse_name_and_section(&line[2..].trim()).with_context(|| {
+ format!(
+ "expected input file to have header with the format `# command-name(1)`, found: `{}`",
+ line
+ )
+ })?;
+ Ok(section)
+}
diff --git a/crates/mdman/src/main.rs b/crates/mdman/src/main.rs
new file mode 100644
index 0000000..2bdf96d
--- /dev/null
+++ b/crates/mdman/src/main.rs
@@ -0,0 +1,133 @@
+use anyhow::{bail, format_err, Context, Error};
+use mdman::{Format, ManMap};
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use url::Url;
+
+/// Command-line options.
+struct Options {
+ format: Format,
+ output_dir: PathBuf,
+ sources: Vec<PathBuf>,
+ url: Option<Url>,
+ man_map: ManMap,
+}
+
+fn main() {
+ if let Err(e) = run() {
+ eprintln!("error: {}", e);
+ for cause in e.chain().skip(1) {
+ eprintln!("\nCaused by:");
+ for line in cause.to_string().lines() {
+ if line.is_empty() {
+ eprintln!();
+ } else {
+ eprintln!(" {}", line);
+ }
+ }
+ }
+ std::process::exit(1);
+ }
+}
+
+fn run() -> Result<(), Error> {
+ let opts = process_args()?;
+ if !opts.output_dir.exists() {
+ std::fs::create_dir_all(&opts.output_dir).with_context(|| {
+ format!(
+ "failed to create output directory {}",
+ opts.output_dir.display()
+ )
+ })?;
+ }
+ for source in &opts.sources {
+ let section = mdman::extract_section(source)?;
+ let filename =
+ Path::new(source.file_name().unwrap()).with_extension(opts.format.extension(section));
+ let out_path = opts.output_dir.join(filename);
+ if same_file::is_same_file(source, &out_path).unwrap_or(false) {
+ bail!("cannot output to the same file as the source");
+ }
+ println!("Converting {} -> {}", source.display(), out_path.display());
+ let result = mdman::convert(&source, opts.format, opts.url.clone(), opts.man_map.clone())
+ .with_context(|| format!("failed to translate {}", source.display()))?;
+
+ std::fs::write(out_path, result)?;
+ }
+ Ok(())
+}
+
+fn process_args() -> Result<Options, Error> {
+ let mut format = None;
+ let mut output = None;
+ let mut url = None;
+ let mut man_map: ManMap = HashMap::new();
+ let mut sources = Vec::new();
+ let mut args = std::env::args().skip(1);
+ while let Some(arg) = args.next() {
+ match arg.as_str() {
+ "-t" => {
+ format = match args.next().as_deref() {
+ Some("man") => Some(Format::Man),
+ Some("md") => Some(Format::Md),
+ Some("txt") => Some(Format::Text),
+ Some(s) => bail!("unknown output format: {}", s),
+ None => bail!("-t requires a value (man, md, txt)"),
+ };
+ }
+ "-o" => {
+ output = match args.next() {
+ Some(s) => Some(PathBuf::from(s)),
+ None => bail!("-o requires a value"),
+ };
+ }
+ "--url" => {
+ url = match args.next() {
+ Some(s) => {
+ let url = Url::parse(&s)
+ .with_context(|| format!("could not convert `{}` to a url", s))?;
+ if !url.path().ends_with('/') {
+ bail!("url `{}` should end with a /", url);
+ }
+ Some(url)
+ }
+ None => bail!("--url requires a value"),
+ }
+ }
+ "--man" => {
+ let man = args
+ .next()
+ .ok_or_else(|| format_err!("--man requires a value"))?;
+ let parts: Vec<_> = man.splitn(2, '=').collect();
+ let key_parts: Vec<_> = parts[0].splitn(2, ':').collect();
+ if parts.len() != 2 || key_parts.len() != 2 {
+ bail!("--man expected value with form name:1=link");
+ }
+ let section: u8 = key_parts[1].parse().with_context(|| {
+ format!("expected unsigned integer for section, got `{}`", parts[1])
+ })?;
+ man_map.insert((key_parts[0].to_string(), section), parts[1].to_string());
+ }
+ s => {
+ sources.push(PathBuf::from(s));
+ }
+ }
+ }
+ if format.is_none() {
+ bail!("-t must be specified (man, md, txt)");
+ }
+ if output.is_none() {
+ bail!("-o must be specified (output directory)");
+ }
+ if sources.is_empty() {
+ bail!("at least one source must be specified");
+ }
+ let opts = Options {
+ format: format.unwrap(),
+ output_dir: output.unwrap(),
+ sources,
+ url,
+ man_map,
+ };
+ Ok(opts)
+}
diff --git a/crates/mdman/src/util.rs b/crates/mdman/src/util.rs
new file mode 100644
index 0000000..a4c71ad
--- /dev/null
+++ b/crates/mdman/src/util.rs
@@ -0,0 +1,44 @@
+///! General utilities.
+use crate::EventIter;
+use anyhow::{bail, format_err, Context, Error};
+use pulldown_cmark::{CowStr, Event, Tag};
+
+/// Splits the text `foo(1)` into "foo" and `1`.
+pub fn parse_name_and_section(text: &str) -> Result<(&str, u8), Error> {
+ let mut i = text.split_terminator(&['(', ')'][..]);
+ let name = i
+ .next()
+ .ok_or_else(|| format_err!("man reference must have a name"))?;
+ let section = i
+ .next()
+ .ok_or_else(|| format_err!("man reference must have a section such as mycommand(1)"))?;
+ if let Some(s) = i.next() {
+ bail!(
+ "man reference must have the form mycommand(1), got extra part `{}`",
+ s
+ );
+ }
+ let section: u8 = section
+ .parse()
+ .with_context(|| format!("section must be a number, got {}", section))?;
+ Ok((name, section))
+}
+
+/// Extracts the text from a header after Tag::Heading has been received.
+pub fn header_text<'e>(parser: &mut EventIter<'e>) -> Result<CowStr<'e>, Error> {
+ let text = match parser.next() {
+ Some((Event::Text(t), _range)) => t,
+ e => bail!("expected plain text in man header, got {:?}", e),
+ };
+ match parser.next() {
+ Some((Event::End(Tag::Heading(..)), _range)) => {
+ return Ok(text);
+ }
+ e => bail!("expected plain text in man header, got {:?}", e),
+ }
+}
+
+/// Removes tags from the front and back of a string.
+pub fn unwrap<'t>(text: &'t str, front: &str, back: &str) -> &'t str {
+ text.trim().trim_start_matches(front).trim_end_matches(back)
+}