diff options
Diffstat (limited to 'crates/mdman/src')
-rw-r--r-- | crates/mdman/src/format.rs | 20 | ||||
-rw-r--r-- | crates/mdman/src/format/man.rs | 436 | ||||
-rw-r--r-- | crates/mdman/src/format/md.rs | 112 | ||||
-rw-r--r-- | crates/mdman/src/format/text.rs | 605 | ||||
-rw-r--r-- | crates/mdman/src/hbs.rs | 215 | ||||
-rw-r--r-- | crates/mdman/src/lib.rs | 122 | ||||
-rw-r--r-- | crates/mdman/src/main.rs | 133 | ||||
-rw-r--r-- | crates/mdman/src/util.rs | 44 |
8 files changed, 1687 insertions, 0 deletions
diff --git a/crates/mdman/src/format.rs b/crates/mdman/src/format.rs new file mode 100644 index 0000000..7bc9781 --- /dev/null +++ b/crates/mdman/src/format.rs @@ -0,0 +1,20 @@ +use anyhow::Error; + +pub mod man; +pub mod md; +pub mod text; + +pub trait Formatter { + /// Renders the given markdown to the formatter's output. + fn render(&self, input: &str) -> Result<String, Error>; + /// Renders the start of a block of options (triggered by `{{#options}}`). + fn render_options_start(&self) -> &'static str; + /// Renders the end of a block of options (triggered by `{{/options}}`). + fn render_options_end(&self) -> &'static str; + /// Renders an option (triggered by `{{#option}}`). + fn render_option(&self, params: &[&str], block: &str, man_name: &str) -> Result<String, Error>; + /// Converts a man page reference into markdown that is appropriate for this format. + /// + /// Triggered by `{{man name section}}`. + fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error>; +} diff --git a/crates/mdman/src/format/man.rs b/crates/mdman/src/format/man.rs new file mode 100644 index 0000000..9767fdd --- /dev/null +++ b/crates/mdman/src/format/man.rs @@ -0,0 +1,436 @@ +//! Man-page formatter. + +use crate::util::{header_text, parse_name_and_section}; +use crate::EventIter; +use anyhow::{bail, Error}; +use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag}; +use std::fmt::Write; +use url::Url; + +pub struct ManFormatter { + url: Option<Url>, +} + +impl ManFormatter { + pub fn new(url: Option<Url>) -> ManFormatter { + ManFormatter { url } + } +} + +impl super::Formatter for ManFormatter { + fn render(&self, input: &str) -> Result<String, Error> { + ManRenderer::render(input, self.url.clone()) + } + + fn render_options_start(&self) -> &'static str { + // Tell pulldown_cmark to ignore this. + // This will be stripped out later. + "<![CDATA[" + } + + fn render_options_end(&self) -> &'static str { + "]]>" + } + + fn render_option( + &self, + params: &[&str], + block: &str, + _man_name: &str, + ) -> Result<String, Error> { + let rendered_options = params + .iter() + .map(|param| { + let r = self.render(param)?; + Ok(r.trim().trim_start_matches(".sp").to_string()) + }) + .collect::<Result<Vec<_>, Error>>()?; + let rendered_block = self.render(block)?; + let rendered_block = rendered_block.trim().trim_start_matches(".sp").trim(); + // .RS = move left margin to right 4. + // .RE = move margin back one level. + Ok(format!( + "\n.sp\n{}\n.RS 4\n{}\n.RE\n", + rendered_options.join(", "), + rendered_block + )) + } + + fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> { + Ok(format!("`{}`({})", name, section)) + } +} + +#[derive(Copy, Clone)] +enum Font { + Bold, + Italic, +} + +impl Font { + fn str_from_stack(font_stack: &[Font]) -> &'static str { + let has_bold = font_stack.iter().any(|font| matches!(font, Font::Bold)); + let has_italic = font_stack.iter().any(|font| matches!(font, Font::Italic)); + match (has_bold, has_italic) { + (false, false) => "\\fR", // roman (normal) + (false, true) => "\\fI", // italic + (true, false) => "\\fB", // bold + (true, true) => "\\f(BI", // bold italic + } + } +} + +struct ManRenderer<'e> { + output: String, + parser: EventIter<'e>, + font_stack: Vec<Font>, +} + +impl<'e> ManRenderer<'e> { + fn render(input: &str, url: Option<Url>) -> Result<String, Error> { + let parser = crate::md_parser(input, url); + let output = String::with_capacity(input.len() * 3 / 2); + let mut mr = ManRenderer { + parser, + output, + font_stack: Vec::new(), + }; + mr.push_man()?; + Ok(mr.output) + } + + fn push_man(&mut self) -> Result<(), Error> { + // If this is true, this is inside a cdata block used for hiding + // content from pulldown_cmark. + let mut in_cdata = false; + // The current list stack. None if unordered, Some if ordered with the + // given number as the current index. + let mut list: Vec<Option<u64>> = Vec::new(); + // Used in some cases where spacing isn't desired. + let mut suppress_paragraph = false; + let mut table_cell_index = 0; + + while let Some((event, range)) = self.parser.next() { + let this_suppress_paragraph = suppress_paragraph; + suppress_paragraph = false; + match event { + Event::Start(tag) => { + match tag { + Tag::Paragraph => { + if !this_suppress_paragraph { + self.flush(); + self.output.push_str(".sp\n"); + } + } + Tag::Heading(level, ..) => { + if level == HeadingLevel::H1 { + self.push_top_header()?; + } else if level == HeadingLevel::H2 { + // Section header + let text = header_text(&mut self.parser)?; + self.flush(); + write!(self.output, ".SH \"{}\"\n", text)?; + suppress_paragraph = true; + } else { + // Subsection header + let text = header_text(&mut self.parser)?; + self.flush(); + write!(self.output, ".SS \"{}\"\n", text)?; + suppress_paragraph = true; + } + } + Tag::BlockQuote => { + self.flush(); + // .RS = move left margin over 3 + // .ll = shrink line length + self.output.push_str(".RS 3\n.ll -5\n.sp\n"); + suppress_paragraph = true; + } + Tag::CodeBlock(_kind) => { + // space down, indent 4, no-fill mode + self.flush(); + self.output.push_str(".sp\n.RS 4\n.nf\n"); + } + Tag::List(start) => list.push(start), + Tag::Item => { + // Note: This uses explicit movement instead of .IP + // because the spacing on .IP looks weird to me. + // space down, indent 4 + self.flush(); + self.output.push_str(".sp\n.RS 4\n"); + match list.last_mut().expect("item must have list start") { + // Ordered list. + Some(n) => { + // move left 4, output the list index number, move right 1. + write!(self.output, "\\h'-04' {}.\\h'+01'", n)?; + *n += 1; + } + // Unordered list. + None => self.output.push_str("\\h'-04'\\(bu\\h'+02'"), + } + suppress_paragraph = true; + } + Tag::FootnoteDefinition(_label) => unimplemented!(), + Tag::Table(alignment) => { + // Table start + // allbox = draw a box around all the cells + // tab(:) = Use `:` to separate cell data (instead of tab) + // ; = end of options + self.output.push_str( + "\n.TS\n\ + allbox tab(:);\n", + ); + let alignments: Vec<_> = alignment + .iter() + .map(|a| match a { + Alignment::Left | Alignment::None => "lt", + Alignment::Center => "ct", + Alignment::Right => "rt", + }) + .collect(); + self.output.push_str(&alignments.join(" ")); + self.output.push_str(".\n"); + table_cell_index = 0; + } + Tag::TableHead => { + table_cell_index = 0; + } + Tag::TableRow => { + table_cell_index = 0; + self.output.push('\n'); + } + Tag::TableCell => { + if table_cell_index != 0 { + // Separator between columns. + self.output.push(':'); + } + // Start a text block. + self.output.push_str("T{\n"); + table_cell_index += 1 + } + Tag::Emphasis => self.push_font(Font::Italic), + Tag::Strong => self.push_font(Font::Bold), + // Strikethrough isn't usually supported for TTY. + Tag::Strikethrough => self.output.push_str("~~"), + Tag::Link(link_type, dest_url, _title) => { + if dest_url.starts_with('#') { + // In a man page, page-relative anchors don't + // have much meaning. + continue; + } + match link_type { + LinkType::Autolink | LinkType::Email => { + // The text is a copy of the URL, which is not needed. + match self.parser.next() { + Some((Event::Text(_), _range)) => {} + _ => bail!("expected text after autolink"), + } + } + LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut => { + self.push_font(Font::Italic); + } + // This is currently unused. This is only + // emitted with a broken link callback, but I + // felt it is too annoying to escape `[` in + // option descriptions. + LinkType::ReferenceUnknown + | LinkType::CollapsedUnknown + | LinkType::ShortcutUnknown => { + bail!( + "link with missing reference `{}` located at offset {}", + dest_url, + range.start + ); + } + } + } + Tag::Image(_link_type, _dest_url, _title) => { + bail!("images are not currently supported") + } + } + } + Event::End(tag) => { + match &tag { + Tag::Paragraph => self.flush(), + Tag::Heading(..) => {} + Tag::BlockQuote => { + self.flush(); + // restore left margin, restore line length + self.output.push_str(".br\n.RE\n.ll\n"); + } + Tag::CodeBlock(_kind) => { + self.flush(); + // Restore fill mode, move margin back one level. + self.output.push_str(".fi\n.RE\n"); + } + Tag::List(_) => { + list.pop(); + } + Tag::Item => { + self.flush(); + // Move margin back one level. + self.output.push_str(".RE\n"); + } + Tag::FootnoteDefinition(_label) => {} + Tag::Table(_) => { + // Table end + // I don't know why, but the .sp is needed to provide + // space with the following content. + self.output.push_str("\n.TE\n.sp\n"); + } + Tag::TableHead => {} + Tag::TableRow => {} + Tag::TableCell => { + // End text block. + self.output.push_str("\nT}"); + } + Tag::Emphasis | Tag::Strong => self.pop_font(), + Tag::Strikethrough => self.output.push_str("~~"), + Tag::Link(link_type, dest_url, _title) => { + if dest_url.starts_with('#') { + continue; + } + match link_type { + LinkType::Autolink | LinkType::Email => {} + LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut => { + self.pop_font(); + self.output.push(' '); + } + _ => { + panic!("unexpected tag {:?}", tag); + } + } + write!(self.output, "<{}>", escape(&dest_url)?)?; + } + Tag::Image(_link_type, _dest_url, _title) => {} + } + } + Event::Text(t) => { + self.output.push_str(&escape(&t)?); + } + Event::Code(t) => { + self.push_font(Font::Bold); + self.output.push_str(&escape(&t)?); + self.pop_font(); + } + Event::Html(t) => { + if t.starts_with("<![CDATA[") { + // CDATA is a special marker used for handling options. + in_cdata = true; + } else if in_cdata { + if t.trim().ends_with("]]>") { + in_cdata = false; + } else if !t.trim().is_empty() { + self.output.push_str(&t); + } + } else { + self.output.push_str(&escape(&t)?); + } + } + Event::FootnoteReference(_t) => {} + Event::SoftBreak => self.output.push('\n'), + Event::HardBreak => { + self.flush(); + self.output.push_str(".br\n"); + } + Event::Rule => { + self.flush(); + // \l' **length** ' Draw horizontal line (default underscore). + // \n(.lu Gets value from register "lu" (current line length) + self.output.push_str("\\l'\\n(.lu'\n"); + } + Event::TaskListMarker(_b) => unimplemented!(), + } + } + Ok(()) + } + + fn flush(&mut self) { + if !self.output.ends_with('\n') { + self.output.push('\n'); + } + } + + /// Switch to the given font. + /// + /// Because the troff sequence `\fP` for switching to the "previous" font + /// doesn't support nesting, this needs to emulate it here. This is needed + /// for situations like **hi _there_**. + fn push_font(&mut self, font: Font) { + self.font_stack.push(font); + self.output.push_str(Font::str_from_stack(&self.font_stack)); + } + + fn pop_font(&mut self) { + self.font_stack.pop(); + self.output.push_str(Font::str_from_stack(&self.font_stack)); + } + + /// Parse and render the first top-level header of the document. + fn push_top_header(&mut self) -> Result<(), Error> { + // This enables the tbl preprocessor for tables. + // This seems to be enabled by default on every modern system I could + // find, but it doesn't seem to hurt to enable this. + self.output.push_str("'\\\" t\n"); + // Extract the name of the man page. + let text = header_text(&mut self.parser)?; + let (name, section) = parse_name_and_section(&text)?; + // .TH = Table header + // .nh = disable hyphenation + // .ad l = Left-adjust mode (disable justified). + // .ss sets sentence_space_size to 0 (prevents double spaces after . + // if . is last on the line) + write!( + self.output, + ".TH \"{}\" \"{}\"\n\ + .nh\n\ + .ad l\n\ + .ss \\n[.ss] 0\n", + escape(&name.to_uppercase())?, + section + )?; + Ok(()) + } +} + +fn escape(s: &str) -> Result<String, Error> { + // Note: Possible source on output escape sequences: https://man7.org/linux/man-pages/man7/groff_char.7.html. + // Otherwise, use generic escaping in the form `\[u1EE7]` or `\[u1F994]`. + + let mut replaced = s + .replace('\\', "\\(rs") + .replace('-', "\\-") + .replace('\u{00A0}', "\\ ") // non-breaking space (non-stretchable) + .replace('–', "\\[en]") // \u{2013} en-dash + .replace('—', "\\[em]") // \u{2014} em-dash + .replace('‘', "\\[oq]") // \u{2018} left single quote + .replace('’', "\\[cq]") // \u{2019} right single quote or apostrophe + .replace('“', "\\[lq]") // \u{201C} left double quote + .replace('”', "\\[rq]") // \u{201D} right double quote + .replace('…', "\\[u2026]") // \u{2026} ellipsis + .replace('│', "|") // \u{2502} box drawing light vertical (could use \[br]) + .replace('├', "|") // \u{251C} box drawings light vertical and right + .replace('└', "`") // \u{2514} box drawings light up and right + .replace('─', "\\-") // \u{2500} box drawing light horizontal + ; + if replaced.starts_with('.') { + replaced = format!("\\&.{}", &replaced[1..]); + } + + if let Some(ch) = replaced.chars().find(|ch| { + !matches!(ch, '\n' | ' ' | '!'..='/' | '0'..='9' + | ':'..='@' | 'A'..='Z' | '['..='`' | 'a'..='z' | '{'..='~') + }) { + bail!( + "character {:?} is not allowed (update the translation table if needed)", + ch + ); + } + Ok(replaced) +} diff --git a/crates/mdman/src/format/md.rs b/crates/mdman/src/format/md.rs new file mode 100644 index 0000000..0e1c498 --- /dev/null +++ b/crates/mdman/src/format/md.rs @@ -0,0 +1,112 @@ +//! Markdown formatter. + +use crate::util::unwrap; +use crate::ManMap; +use anyhow::{bail, format_err, Error}; +use std::fmt::Write; + +pub struct MdFormatter { + man_map: ManMap, +} + +impl MdFormatter { + pub fn new(man_map: ManMap) -> MdFormatter { + MdFormatter { man_map } + } +} + +impl MdFormatter { + fn render_html(&self, input: &str) -> Result<String, Error> { + let parser = crate::md_parser(input, None); + let mut html_output: String = String::with_capacity(input.len() * 3 / 2); + pulldown_cmark::html::push_html(&mut html_output, parser.map(|(e, _r)| e)); + Ok(html_output) + } +} + +impl super::Formatter for MdFormatter { + fn render(&self, input: &str) -> Result<String, Error> { + Ok(input.replace("\r\n", "\n")) + } + + fn render_options_start(&self) -> &'static str { + "<dl>" + } + + fn render_options_end(&self) -> &'static str { + "</dl>" + } + + fn render_option(&self, params: &[&str], block: &str, man_name: &str) -> Result<String, Error> { + let mut result = String::new(); + fn unwrap_p(t: &str) -> &str { + unwrap(t, "<p>", "</p>") + } + + for param in params { + let rendered = self.render_html(param)?; + let no_p = unwrap_p(&rendered); + // split out first term to use as the id. + let first = no_p + .split_whitespace() + .next() + .ok_or_else(|| format_err!("did not expect option `{}` to be empty", param))?; + let no_tags = trim_tags(first); + if no_tags.is_empty() { + bail!("unexpected empty option with no tags `{}`", param); + } + let id = format!("option-{}-{}", man_name, no_tags); + write!( + result, + "<dt class=\"option-term\" id=\"{ID}\">\ + <a class=\"option-anchor\" href=\"#{ID}\"></a>{OPTION}</dt>\n", + ID = id, + OPTION = no_p + )?; + } + let rendered_block = self.render_html(block)?; + write!( + result, + "<dd class=\"option-desc\">{}</dd>\n", + unwrap_p(&rendered_block) + )?; + Ok(result) + } + + fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> { + let s = match self.man_map.get(&(name.to_string(), section)) { + Some(link) => format!("[{}({})]({})", name, section, link), + None => format!("[{}({})]({}.html)", name, section, name), + }; + Ok(s) + } +} + +fn trim_tags(s: &str) -> String { + // This is a hack. It removes all HTML tags. + let mut in_tag = false; + let mut in_char_ref = false; + s.chars() + .filter(|&ch| match ch { + '<' if in_tag => panic!("unexpected nested tag"), + '&' if in_char_ref => panic!("unexpected nested char ref"), + '<' => { + in_tag = true; + false + } + '&' => { + in_char_ref = true; + false + } + '>' if in_tag => { + in_tag = false; + false + } + ';' if in_char_ref => { + in_char_ref = false; + false + } + _ => !in_tag && !in_char_ref, + }) + .collect() +} diff --git a/crates/mdman/src/format/text.rs b/crates/mdman/src/format/text.rs new file mode 100644 index 0000000..ae07985 --- /dev/null +++ b/crates/mdman/src/format/text.rs @@ -0,0 +1,605 @@ +//! Text formatter. + +use crate::util::{header_text, unwrap}; +use crate::EventIter; +use anyhow::{bail, Error}; +use pulldown_cmark::{Alignment, Event, HeadingLevel, LinkType, Tag}; +use std::fmt::Write; +use std::mem; +use url::Url; + +pub struct TextFormatter { + url: Option<Url>, +} + +impl TextFormatter { + pub fn new(url: Option<Url>) -> TextFormatter { + TextFormatter { url } + } +} + +impl super::Formatter for TextFormatter { + fn render(&self, input: &str) -> Result<String, Error> { + TextRenderer::render(input, self.url.clone(), 0) + } + + fn render_options_start(&self) -> &'static str { + // Tell pulldown_cmark to ignore this. + // This will be stripped out later. + "<![CDATA[" + } + + fn render_options_end(&self) -> &'static str { + "]]>" + } + + fn render_option( + &self, + params: &[&str], + block: &str, + _man_name: &str, + ) -> Result<String, Error> { + let rendered_options = params + .iter() + .map(|param| TextRenderer::render(param, self.url.clone(), 0)) + .collect::<Result<Vec<_>, Error>>()?; + let trimmed: Vec<_> = rendered_options.iter().map(|o| o.trim()).collect(); + // Wrap in HTML tags, they will be stripped out during rendering. + Ok(format!( + "<dt>{}</dt>\n<dd>{}</dd>\n<br>\n", + trimmed.join(", "), + block + )) + } + + fn linkify_man_to_md(&self, name: &str, section: u8) -> Result<String, Error> { + Ok(format!("`{}`({})", name, section)) + } +} + +struct TextRenderer<'e> { + output: String, + indent: usize, + /// The current line being written. Once a line break is encountered (such + /// as starting a new paragraph), this will be written to `output` via + /// `flush`. + line: String, + /// The current word being written. Once a break is encountered (such as a + /// space) this will be written to `line` via `flush_word`. + word: String, + parser: EventIter<'e>, + /// The base URL used for relative URLs. + url: Option<Url>, + table: Table, +} + +impl<'e> TextRenderer<'e> { + fn render(input: &str, url: Option<Url>, indent: usize) -> Result<String, Error> { + let parser = crate::md_parser(input, url.clone()); + let output = String::with_capacity(input.len() * 3 / 2); + let mut mr = TextRenderer { + output, + indent, + line: String::new(), + word: String::new(), + parser, + url, + table: Table::new(), + }; + mr.push_md()?; + Ok(mr.output) + } + + fn push_md(&mut self) -> Result<(), Error> { + // If this is true, this is inside a cdata block used for hiding + // content from pulldown_cmark. + let mut in_cdata = false; + // The current list stack. None if unordered, Some if ordered with the + // given number as the current index. + let mut list: Vec<Option<u64>> = Vec::new(); + // Used in some cases where spacing isn't desired. + let mut suppress_paragraph = false; + // Whether or not word-wrapping is enabled. + let mut wrap_text = true; + + while let Some((event, range)) = self.parser.next() { + let this_suppress_paragraph = suppress_paragraph; + // Always reset suppression, even if the next event isn't a + // paragraph. This is in essence, a 1-token lookahead where the + // suppression is only enabled if the next event is a paragraph. + suppress_paragraph = false; + match event { + Event::Start(tag) => { + match tag { + Tag::Paragraph => { + if !this_suppress_paragraph { + self.flush(); + } + } + Tag::Heading(level, ..) => { + self.flush(); + if level == HeadingLevel::H1 { + let text = header_text(&mut self.parser)?; + self.push_to_line(&text.to_uppercase()); + self.hard_break(); + self.hard_break(); + } else if level == HeadingLevel::H2 { + let text = header_text(&mut self.parser)?; + self.push_to_line(&text.to_uppercase()); + self.flush(); + self.indent = 7; + } else { + let text = header_text(&mut self.parser)?; + self.push_indent((level as usize - 2) * 3); + self.push_to_line(&text); + self.flush(); + self.indent = (level as usize - 1) * 3 + 1; + } + } + Tag::BlockQuote => { + self.indent += 3; + } + Tag::CodeBlock(_kind) => { + self.flush(); + wrap_text = false; + self.indent += 4; + } + Tag::List(start) => list.push(start), + Tag::Item => { + self.flush(); + match list.last_mut().expect("item must have list start") { + // Ordered list. + Some(n) => { + self.push_indent(self.indent); + write!(self.line, "{}.", n)?; + *n += 1; + } + // Unordered list. + None => { + self.push_indent(self.indent); + self.push_to_line("o ") + } + } + self.indent += 3; + suppress_paragraph = true; + } + Tag::FootnoteDefinition(_label) => unimplemented!(), + Tag::Table(alignment) => { + assert!(self.table.alignment.is_empty()); + self.flush(); + self.table.alignment.extend(alignment); + let table = self.table.process(&mut self.parser, self.indent)?; + self.output.push_str(&table); + self.hard_break(); + self.table = Table::new(); + } + Tag::TableHead | Tag::TableRow | Tag::TableCell => { + bail!("unexpected table element") + } + Tag::Emphasis => {} + Tag::Strong => {} + // Strikethrough isn't usually supported for TTY. + Tag::Strikethrough => self.word.push_str("~~"), + Tag::Link(link_type, dest_url, _title) => { + if dest_url.starts_with('#') { + // In a man page, page-relative anchors don't + // have much meaning. + continue; + } + match link_type { + LinkType::Autolink | LinkType::Email => { + // The text is a copy of the URL, which is not needed. + match self.parser.next() { + Some((Event::Text(_), _range)) => {} + _ => bail!("expected text after autolink"), + } + } + LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut => {} + // This is currently unused. This is only + // emitted with a broken link callback, but I + // felt it is too annoying to escape `[` in + // option descriptions. + LinkType::ReferenceUnknown + | LinkType::CollapsedUnknown + | LinkType::ShortcutUnknown => { + bail!( + "link with missing reference `{}` located at offset {}", + dest_url, + range.start + ); + } + } + } + Tag::Image(_link_type, _dest_url, _title) => { + bail!("images are not currently supported") + } + } + } + Event::End(tag) => match &tag { + Tag::Paragraph => { + self.flush(); + self.hard_break(); + } + Tag::Heading(..) => {} + Tag::BlockQuote => { + self.indent -= 3; + } + Tag::CodeBlock(_kind) => { + self.hard_break(); + wrap_text = true; + self.indent -= 4; + } + Tag::List(_) => { + list.pop(); + } + Tag::Item => { + self.flush(); + self.indent -= 3; + self.hard_break(); + } + Tag::FootnoteDefinition(_label) => {} + Tag::Table(_) => {} + Tag::TableHead => {} + Tag::TableRow => {} + Tag::TableCell => {} + Tag::Emphasis => {} + Tag::Strong => {} + Tag::Strikethrough => self.word.push_str("~~"), + Tag::Link(link_type, dest_url, _title) => { + if dest_url.starts_with('#') { + continue; + } + match link_type { + LinkType::Autolink | LinkType::Email => {} + LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut => self.flush_word(), + _ => { + panic!("unexpected tag {:?}", tag); + } + } + self.flush_word(); + write!(self.word, "<{}>", dest_url)?; + } + Tag::Image(_link_type, _dest_url, _title) => {} + }, + Event::Text(t) | Event::Code(t) => { + if wrap_text { + let chunks = split_chunks(&t); + for chunk in chunks { + if chunk == " " { + self.flush_word(); + } else { + self.word.push_str(chunk); + } + } + } else { + for line in t.lines() { + self.push_indent(self.indent); + self.push_to_line(line); + self.flush(); + } + } + } + Event::Html(t) => { + if t.starts_with("<![CDATA[") { + // CDATA is a special marker used for handling options. + in_cdata = true; + self.flush(); + } else if in_cdata { + if t.trim().ends_with("]]>") { + in_cdata = false; + } else { + let trimmed = t.trim(); + if trimmed.is_empty() { + continue; + } + if trimmed == "<br>" { + self.hard_break(); + } else if trimmed.starts_with("<dt>") { + let opts = unwrap(trimmed, "<dt>", "</dt>"); + self.push_indent(self.indent); + self.push_to_line(opts); + self.flush(); + } else if trimmed.starts_with("<dd>") { + let mut def = String::new(); + while let Some((Event::Html(t), _range)) = self.parser.next() { + if t.starts_with("</dd>") { + break; + } + def.push_str(&t); + } + let rendered = + TextRenderer::render(&def, self.url.clone(), self.indent + 4)?; + self.push_to_line(rendered.trim_end()); + self.flush(); + } else { + self.push_to_line(&t); + self.flush(); + } + } + } else { + self.push_to_line(&t); + self.flush(); + } + } + Event::FootnoteReference(_t) => {} + Event::SoftBreak => self.flush_word(), + Event::HardBreak => self.flush(), + Event::Rule => { + self.flush(); + self.push_indent(self.indent); + self.push_to_line(&"_".repeat(79 - self.indent * 2)); + self.flush(); + } + Event::TaskListMarker(_b) => unimplemented!(), + } + } + Ok(()) + } + + fn flush(&mut self) { + self.flush_word(); + if !self.line.is_empty() { + self.output.push_str(&self.line); + self.output.push('\n'); + self.line.clear(); + } + } + + fn hard_break(&mut self) { + self.flush(); + if !self.output.ends_with("\n\n") { + self.output.push('\n'); + } + } + + fn flush_word(&mut self) { + if self.word.is_empty() { + return; + } + if self.line.len() + self.word.len() >= 79 { + self.output.push_str(&self.line); + self.output.push('\n'); + self.line.clear(); + } + if self.line.is_empty() { + self.push_indent(self.indent); + self.line.push_str(&self.word); + } else { + self.line.push(' '); + self.line.push_str(&self.word); + } + self.word.clear(); + } + + fn push_indent(&mut self, indent: usize) { + for _ in 0..indent { + self.line.push(' '); + } + } + + fn push_to_line(&mut self, text: &str) { + self.flush_word(); + self.line.push_str(text); + } +} + +/// Splits the text on whitespace. +/// +/// Consecutive whitespace is collapsed to a single ' ', and is included as a +/// separate element in the result. +fn split_chunks(text: &str) -> Vec<&str> { + let mut result = Vec::new(); + let mut start = 0; + while start < text.len() { + match text[start..].find(' ') { + Some(i) => { + if i != 0 { + result.push(&text[start..start + i]); + } + result.push(" "); + // Skip past whitespace. + match text[start + i..].find(|c| c != ' ') { + Some(n) => { + start = start + i + n; + } + None => { + break; + } + } + } + None => { + result.push(&text[start..]); + break; + } + } + } + result +} + +struct Table { + alignment: Vec<Alignment>, + rows: Vec<Vec<String>>, + row: Vec<String>, + cell: String, +} + +impl Table { + fn new() -> Table { + Table { + alignment: Vec::new(), + rows: Vec::new(), + row: Vec::new(), + cell: String::new(), + } + } + + /// Processes table events and generates a text table. + fn process(&mut self, parser: &mut EventIter<'_>, indent: usize) -> Result<String, Error> { + while let Some((event, _range)) = parser.next() { + match event { + Event::Start(tag) => match tag { + Tag::TableHead + | Tag::TableRow + | Tag::TableCell + | Tag::Emphasis + | Tag::Strong => {} + Tag::Strikethrough => self.cell.push_str("~~"), + // Links not yet supported, they usually won't fit. + Tag::Link(_, _, _) => {} + _ => bail!("unexpected tag in table: {:?}", tag), + }, + Event::End(tag) => match tag { + Tag::Table(_) => return self.render(indent), + Tag::TableCell => { + let cell = mem::replace(&mut self.cell, String::new()); + self.row.push(cell); + } + Tag::TableHead | Tag::TableRow => { + let row = mem::replace(&mut self.row, Vec::new()); + self.rows.push(row); + } + Tag::Strikethrough => self.cell.push_str("~~"), + _ => {} + }, + Event::Text(t) | Event::Code(t) => { + self.cell.push_str(&t); + } + Event::Html(t) => bail!("html unsupported in tables: {:?}", t), + _ => bail!("unexpected event in table: {:?}", event), + } + } + bail!("table end not reached"); + } + + fn render(&self, indent: usize) -> Result<String, Error> { + // This is an extremely primitive layout routine. + // First compute the potential maximum width of each cell. + // 2 for 1 space margin on left and right. + let width_acc = vec![2; self.alignment.len()]; + let mut col_widths = self + .rows + .iter() + .map(|row| row.iter().map(|cell| cell.len())) + .fold(width_acc, |mut acc, row| { + acc.iter_mut() + .zip(row) + // +3 for left/right margin and | symbol + .for_each(|(a, b)| *a = (*a).max(b + 3)); + acc + }); + // Shrink each column until it fits the total width, proportional to + // the columns total percent width. + let max_width = 78 - indent; + // Include total len for | characters, and +1 for final |. + let total_width = col_widths.iter().sum::<usize>() + col_widths.len() + 1; + if total_width > max_width { + let to_shrink = total_width - max_width; + // Compute percentage widths, and shrink each column based on its + // total percentage. + for width in &mut col_widths { + let percent = *width as f64 / total_width as f64; + *width -= (to_shrink as f64 * percent).ceil() as usize; + } + } + // Start rendering. + let mut result = String::new(); + + // Draw the horizontal line separating each row. + let mut row_line = String::new(); + row_line.push_str(&" ".repeat(indent)); + row_line.push('+'); + let lines = col_widths + .iter() + .map(|width| "-".repeat(*width)) + .collect::<Vec<_>>(); + row_line.push_str(&lines.join("+")); + row_line.push('+'); + row_line.push('\n'); + + // Draw top of the table. + result.push_str(&row_line); + // Draw each row. + for row in &self.rows { + // Word-wrap and fill each column as needed. + let filled = fill_row(row, &col_widths, &self.alignment); + // Need to transpose the cells across rows for cells that span + // multiple rows. + let height = filled.iter().map(|c| c.len()).max().unwrap(); + for row_i in 0..height { + result.push_str(&" ".repeat(indent)); + result.push('|'); + for filled_row in &filled { + let cell = &filled_row[row_i]; + result.push_str(cell); + result.push('|'); + } + result.push('\n'); + } + result.push_str(&row_line); + } + Ok(result) + } +} + +/// Formats a row, filling cells with spaces and word-wrapping text. +/// +/// Returns a vec of cells, where each cell is split into multiple lines. +fn fill_row(row: &[String], col_widths: &[usize], alignment: &[Alignment]) -> Vec<Vec<String>> { + let mut cell_lines = row + .iter() + .zip(col_widths) + .zip(alignment) + .map(|((cell, width), alignment)| fill_cell(cell, *width - 2, *alignment)) + .collect::<Vec<_>>(); + // Fill each cell to match the maximum vertical height of the tallest cell. + let max_lines = cell_lines.iter().map(|cell| cell.len()).max().unwrap(); + for (cell, width) in cell_lines.iter_mut().zip(col_widths) { + if cell.len() < max_lines { + cell.extend(std::iter::repeat(" ".repeat(*width)).take(max_lines - cell.len())); + } + } + cell_lines +} + +/// Formats a cell. Word-wraps based on width, and adjusts based on alignment. +/// +/// Returns a vec of lines for the cell. +fn fill_cell(text: &str, width: usize, alignment: Alignment) -> Vec<String> { + let fill_width = |text: &str| match alignment { + Alignment::None | Alignment::Left => format!(" {:<width$} ", text, width = width), + Alignment::Center => format!(" {:^width$} ", text, width = width), + Alignment::Right => format!(" {:>width$} ", text, width = width), + }; + if text.len() < width { + // No wrapping necessary, just format. + vec![fill_width(text)] + } else { + // Word-wrap the cell. + let mut result = Vec::new(); + let mut line = String::new(); + for word in text.split_whitespace() { + if line.len() + word.len() >= width { + // todo: word.len() > width + result.push(fill_width(&line)); + line.clear(); + } + if line.is_empty() { + line.push_str(word); + } else { + line.push(' '); + line.push_str(&word); + } + } + if !line.is_empty() { + result.push(fill_width(&line)); + } + + result + } +} diff --git a/crates/mdman/src/hbs.rs b/crates/mdman/src/hbs.rs new file mode 100644 index 0000000..81ad7ee --- /dev/null +++ b/crates/mdman/src/hbs.rs @@ -0,0 +1,215 @@ +//! Handlebars template processing. + +use crate::format::Formatter; +use anyhow::Error; +use handlebars::{ + handlebars_helper, Context, Decorator, Handlebars, Helper, HelperDef, HelperResult, Output, + RenderContext, RenderError, Renderable, +}; +use std::collections::HashMap; +use std::path::Path; + +type FormatterRef<'a> = &'a (dyn Formatter + Send + Sync); + +/// Processes the handlebars template at the given file. +pub fn expand(file: &Path, formatter: FormatterRef) -> Result<String, Error> { + let mut handlebars = Handlebars::new(); + handlebars.set_strict_mode(true); + handlebars.register_helper("lower", Box::new(lower)); + handlebars.register_helper("options", Box::new(OptionsHelper { formatter })); + handlebars.register_helper("option", Box::new(OptionHelper { formatter })); + handlebars.register_helper("man", Box::new(ManLinkHelper { formatter })); + handlebars.register_decorator("set", Box::new(set_decorator)); + handlebars.register_template_file("template", file)?; + let includes = file.parent().unwrap().join("includes"); + handlebars.register_templates_directory(".md", includes)?; + let man_name = file + .file_stem() + .expect("expected filename") + .to_str() + .expect("utf8 filename") + .to_string(); + let data = HashMap::from([("man_name", man_name)]); + let expanded = handlebars.render("template", &data)?; + Ok(expanded) +} + +/// Helper for `{{#options}}` block. +struct OptionsHelper<'a> { + formatter: FormatterRef<'a>, +} + +impl HelperDef for OptionsHelper<'_> { + fn call<'reg: 'rc, 'rc>( + &self, + h: &Helper<'reg, 'rc>, + r: &'reg Handlebars<'reg>, + ctx: &'rc Context, + rc: &mut RenderContext<'reg, 'rc>, + out: &mut dyn Output, + ) -> HelperResult { + if in_options(rc) { + return Err(RenderError::new("options blocks cannot be nested")); + } + // Prevent nested {{#options}}. + set_in_context(rc, "__MDMAN_IN_OPTIONS", serde_json::Value::Bool(true)); + let s = self.formatter.render_options_start(); + out.write(&s)?; + let t = match h.template() { + Some(t) => t, + None => return Err(RenderError::new("options block must not be empty")), + }; + let block = t.renders(r, ctx, rc)?; + out.write(&block)?; + + let s = self.formatter.render_options_end(); + out.write(&s)?; + remove_from_context(rc, "__MDMAN_IN_OPTIONS"); + Ok(()) + } +} + +/// Whether or not the context is currently inside a `{{#options}}` block. +fn in_options(rc: &RenderContext<'_, '_>) -> bool { + rc.context() + .map_or(false, |ctx| ctx.data().get("__MDMAN_IN_OPTIONS").is_some()) +} + +/// Helper for `{{#option}}` block. +struct OptionHelper<'a> { + formatter: FormatterRef<'a>, +} + +impl HelperDef for OptionHelper<'_> { + fn call<'reg: 'rc, 'rc>( + &self, + h: &Helper<'reg, 'rc>, + r: &'reg Handlebars<'reg>, + ctx: &'rc Context, + rc: &mut RenderContext<'reg, 'rc>, + out: &mut dyn Output, + ) -> HelperResult { + if !in_options(rc) { + return Err(RenderError::new("option must be in options block")); + } + let params = h.params(); + if params.is_empty() { + return Err(RenderError::new( + "option block must have at least one param", + )); + } + // Convert params to strings. + let params = params + .iter() + .map(|param| { + param + .value() + .as_str() + .ok_or_else(|| RenderError::new("option params must be strings")) + }) + .collect::<Result<Vec<&str>, RenderError>>()?; + let t = match h.template() { + Some(t) => t, + None => return Err(RenderError::new("option block must not be empty")), + }; + // Render the block. + let block = t.renders(r, ctx, rc)?; + + // Get the name of this page. + let man_name = ctx + .data() + .get("man_name") + .expect("expected man_name in context") + .as_str() + .expect("expect man_name str"); + + // Ask the formatter to convert this option to its format. + let option = self + .formatter + .render_option(¶ms, &block, man_name) + .map_err(|e| RenderError::new(format!("option render failed: {}", e)))?; + out.write(&option)?; + Ok(()) + } +} + +/// Helper for `{{man name section}}` expression. +struct ManLinkHelper<'a> { + formatter: FormatterRef<'a>, +} + +impl HelperDef for ManLinkHelper<'_> { + fn call<'reg: 'rc, 'rc>( + &self, + h: &Helper<'reg, 'rc>, + _r: &'reg Handlebars<'reg>, + _ctx: &'rc Context, + _rc: &mut RenderContext<'reg, 'rc>, + out: &mut dyn Output, + ) -> HelperResult { + let params = h.params(); + if params.len() != 2 { + return Err(RenderError::new("{{man}} must have two arguments")); + } + let name = params[0] + .value() + .as_str() + .ok_or_else(|| RenderError::new("man link name must be a string"))?; + let section = params[1] + .value() + .as_u64() + .ok_or_else(|| RenderError::new("man link section must be an integer"))?; + let section = + u8::try_from(section).map_err(|_e| RenderError::new("section number too large"))?; + let link = self + .formatter + .linkify_man_to_md(name, section) + .map_err(|e| RenderError::new(format!("failed to linkify man: {}", e)))?; + out.write(&link)?; + Ok(()) + } +} + +/// `{{*set var=value}}` decorator. +/// +/// This sets a variable to a value within the template context. +fn set_decorator( + d: &Decorator, + _: &Handlebars, + _ctx: &Context, + rc: &mut RenderContext, +) -> Result<(), RenderError> { + let data_to_set = d.hash(); + for (k, v) in data_to_set { + set_in_context(rc, k, v.value().clone()); + } + Ok(()) +} + +/// Sets a variable to a value within the context. +fn set_in_context(rc: &mut RenderContext, key: &str, value: serde_json::Value) { + let mut ctx = match rc.context() { + Some(c) => (*c).clone(), + None => Context::wraps(serde_json::Value::Object(serde_json::Map::new())).unwrap(), + }; + if let serde_json::Value::Object(m) = ctx.data_mut() { + m.insert(key.to_string(), value); + rc.set_context(ctx); + } else { + panic!("expected object in context"); + } +} + +/// Removes a variable from the context. +fn remove_from_context(rc: &mut RenderContext, key: &str) { + let ctx = rc.context().expect("cannot remove from null context"); + let mut ctx = (*ctx).clone(); + if let serde_json::Value::Object(m) = ctx.data_mut() { + m.remove(key); + rc.set_context(ctx); + } else { + panic!("expected object in context"); + } +} + +handlebars_helper!(lower: |s: str| s.to_lowercase()); diff --git a/crates/mdman/src/lib.rs b/crates/mdman/src/lib.rs new file mode 100644 index 0000000..01c3c8d --- /dev/null +++ b/crates/mdman/src/lib.rs @@ -0,0 +1,122 @@ +//! mdman markdown to man converter. + +use anyhow::{bail, Context, Error}; +use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag}; +use std::collections::HashMap; +use std::fs; +use std::io::{self, BufRead}; +use std::ops::Range; +use std::path::Path; +use url::Url; + +mod format; +mod hbs; +mod util; + +use format::Formatter; + +/// Mapping of `(name, section)` of a man page to a URL. +pub type ManMap = HashMap<(String, u8), String>; + +/// A man section. +pub type Section = u8; + +/// The output formats supported by mdman. +#[derive(Copy, Clone)] +pub enum Format { + Man, + Md, + Text, +} + +impl Format { + /// The filename extension for the format. + pub fn extension(&self, section: Section) -> String { + match self { + Format::Man => section.to_string(), + Format::Md => "md".to_string(), + Format::Text => "txt".to_string(), + } + } +} + +/// Converts the handlebars markdown file at the given path into the given +/// format, returning the translated result. +pub fn convert( + file: &Path, + format: Format, + url: Option<Url>, + man_map: ManMap, +) -> Result<String, Error> { + let formatter: Box<dyn Formatter + Send + Sync> = match format { + Format::Man => Box::new(format::man::ManFormatter::new(url)), + Format::Md => Box::new(format::md::MdFormatter::new(man_map)), + Format::Text => Box::new(format::text::TextFormatter::new(url)), + }; + let expanded = hbs::expand(file, &*formatter)?; + // pulldown-cmark can behave a little differently with Windows newlines, + // just normalize it. + let expanded = expanded.replace("\r\n", "\n"); + formatter.render(&expanded) +} + +/// Pulldown-cmark iterator yielding an `(event, range)` tuple. +type EventIter<'a> = Box<dyn Iterator<Item = (Event<'a>, Range<usize>)> + 'a>; + +/// Creates a new markdown parser with the given input. +pub(crate) fn md_parser(input: &str, url: Option<Url>) -> EventIter { + let mut options = Options::empty(); + options.insert(Options::ENABLE_TABLES); + options.insert(Options::ENABLE_FOOTNOTES); + options.insert(Options::ENABLE_STRIKETHROUGH); + options.insert(Options::ENABLE_SMART_PUNCTUATION); + let parser = Parser::new_ext(input, options); + let parser = parser.into_offset_iter(); + // Translate all links to include the base url. + let parser = parser.map(move |(event, range)| match event { + Event::Start(Tag::Link(lt, dest_url, title)) if !matches!(lt, LinkType::Email) => ( + Event::Start(Tag::Link(lt, join_url(url.as_ref(), dest_url), title)), + range, + ), + Event::End(Tag::Link(lt, dest_url, title)) if !matches!(lt, LinkType::Email) => ( + Event::End(Tag::Link(lt, join_url(url.as_ref(), dest_url), title)), + range, + ), + _ => (event, range), + }); + Box::new(parser) +} + +fn join_url<'a>(base: Option<&Url>, dest: CowStr<'a>) -> CowStr<'a> { + match base { + Some(base_url) => { + // Absolute URL or page-relative anchor doesn't need to be translated. + if dest.contains(':') || dest.starts_with('#') { + dest + } else { + let joined = base_url.join(&dest).unwrap_or_else(|e| { + panic!("failed to join URL `{}` to `{}`: {}", dest, base_url, e) + }); + String::from(joined).into() + } + } + None => dest, + } +} + +pub fn extract_section(file: &Path) -> Result<Section, Error> { + let f = fs::File::open(file).with_context(|| format!("could not open `{}`", file.display()))?; + let mut f = io::BufReader::new(f); + let mut line = String::new(); + f.read_line(&mut line)?; + if !line.starts_with("# ") { + bail!("expected input file to start with # header"); + } + let (_name, section) = util::parse_name_and_section(&line[2..].trim()).with_context(|| { + format!( + "expected input file to have header with the format `# command-name(1)`, found: `{}`", + line + ) + })?; + Ok(section) +} diff --git a/crates/mdman/src/main.rs b/crates/mdman/src/main.rs new file mode 100644 index 0000000..2bdf96d --- /dev/null +++ b/crates/mdman/src/main.rs @@ -0,0 +1,133 @@ +use anyhow::{bail, format_err, Context, Error}; +use mdman::{Format, ManMap}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use url::Url; + +/// Command-line options. +struct Options { + format: Format, + output_dir: PathBuf, + sources: Vec<PathBuf>, + url: Option<Url>, + man_map: ManMap, +} + +fn main() { + if let Err(e) = run() { + eprintln!("error: {}", e); + for cause in e.chain().skip(1) { + eprintln!("\nCaused by:"); + for line in cause.to_string().lines() { + if line.is_empty() { + eprintln!(); + } else { + eprintln!(" {}", line); + } + } + } + std::process::exit(1); + } +} + +fn run() -> Result<(), Error> { + let opts = process_args()?; + if !opts.output_dir.exists() { + std::fs::create_dir_all(&opts.output_dir).with_context(|| { + format!( + "failed to create output directory {}", + opts.output_dir.display() + ) + })?; + } + for source in &opts.sources { + let section = mdman::extract_section(source)?; + let filename = + Path::new(source.file_name().unwrap()).with_extension(opts.format.extension(section)); + let out_path = opts.output_dir.join(filename); + if same_file::is_same_file(source, &out_path).unwrap_or(false) { + bail!("cannot output to the same file as the source"); + } + println!("Converting {} -> {}", source.display(), out_path.display()); + let result = mdman::convert(&source, opts.format, opts.url.clone(), opts.man_map.clone()) + .with_context(|| format!("failed to translate {}", source.display()))?; + + std::fs::write(out_path, result)?; + } + Ok(()) +} + +fn process_args() -> Result<Options, Error> { + let mut format = None; + let mut output = None; + let mut url = None; + let mut man_map: ManMap = HashMap::new(); + let mut sources = Vec::new(); + let mut args = std::env::args().skip(1); + while let Some(arg) = args.next() { + match arg.as_str() { + "-t" => { + format = match args.next().as_deref() { + Some("man") => Some(Format::Man), + Some("md") => Some(Format::Md), + Some("txt") => Some(Format::Text), + Some(s) => bail!("unknown output format: {}", s), + None => bail!("-t requires a value (man, md, txt)"), + }; + } + "-o" => { + output = match args.next() { + Some(s) => Some(PathBuf::from(s)), + None => bail!("-o requires a value"), + }; + } + "--url" => { + url = match args.next() { + Some(s) => { + let url = Url::parse(&s) + .with_context(|| format!("could not convert `{}` to a url", s))?; + if !url.path().ends_with('/') { + bail!("url `{}` should end with a /", url); + } + Some(url) + } + None => bail!("--url requires a value"), + } + } + "--man" => { + let man = args + .next() + .ok_or_else(|| format_err!("--man requires a value"))?; + let parts: Vec<_> = man.splitn(2, '=').collect(); + let key_parts: Vec<_> = parts[0].splitn(2, ':').collect(); + if parts.len() != 2 || key_parts.len() != 2 { + bail!("--man expected value with form name:1=link"); + } + let section: u8 = key_parts[1].parse().with_context(|| { + format!("expected unsigned integer for section, got `{}`", parts[1]) + })?; + man_map.insert((key_parts[0].to_string(), section), parts[1].to_string()); + } + s => { + sources.push(PathBuf::from(s)); + } + } + } + if format.is_none() { + bail!("-t must be specified (man, md, txt)"); + } + if output.is_none() { + bail!("-o must be specified (output directory)"); + } + if sources.is_empty() { + bail!("at least one source must be specified"); + } + let opts = Options { + format: format.unwrap(), + output_dir: output.unwrap(), + sources, + url, + man_map, + }; + Ok(opts) +} diff --git a/crates/mdman/src/util.rs b/crates/mdman/src/util.rs new file mode 100644 index 0000000..a4c71ad --- /dev/null +++ b/crates/mdman/src/util.rs @@ -0,0 +1,44 @@ +///! General utilities. +use crate::EventIter; +use anyhow::{bail, format_err, Context, Error}; +use pulldown_cmark::{CowStr, Event, Tag}; + +/// Splits the text `foo(1)` into "foo" and `1`. +pub fn parse_name_and_section(text: &str) -> Result<(&str, u8), Error> { + let mut i = text.split_terminator(&['(', ')'][..]); + let name = i + .next() + .ok_or_else(|| format_err!("man reference must have a name"))?; + let section = i + .next() + .ok_or_else(|| format_err!("man reference must have a section such as mycommand(1)"))?; + if let Some(s) = i.next() { + bail!( + "man reference must have the form mycommand(1), got extra part `{}`", + s + ); + } + let section: u8 = section + .parse() + .with_context(|| format!("section must be a number, got {}", section))?; + Ok((name, section)) +} + +/// Extracts the text from a header after Tag::Heading has been received. +pub fn header_text<'e>(parser: &mut EventIter<'e>) -> Result<CowStr<'e>, Error> { + let text = match parser.next() { + Some((Event::Text(t), _range)) => t, + e => bail!("expected plain text in man header, got {:?}", e), + }; + match parser.next() { + Some((Event::End(Tag::Heading(..)), _range)) => { + return Ok(text); + } + e => bail!("expected plain text in man header, got {:?}", e), + } +} + +/// Removes tags from the front and back of a string. +pub fn unwrap<'t>(text: &'t str, front: &str, back: &str) -> &'t str { + text.trim().trim_start_matches(front).trim_end_matches(back) +} |