diff options
Diffstat (limited to 'md2man/roff.go')
-rw-r--r-- | md2man/roff.go | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/md2man/roff.go b/md2man/roff.go new file mode 100644 index 0000000..8a290f1 --- /dev/null +++ b/md2man/roff.go @@ -0,0 +1,382 @@ +package md2man + +import ( + "bufio" + "bytes" + "fmt" + "io" + "os" + "strings" + + "github.com/russross/blackfriday/v2" +) + +// roffRenderer implements the blackfriday.Renderer interface for creating +// roff format (manpages) from markdown text +type roffRenderer struct { + extensions blackfriday.Extensions + listCounters []int + firstHeader bool + firstDD bool + listDepth int +} + +const ( + titleHeader = ".TH " + topLevelHeader = "\n\n.SH " + secondLevelHdr = "\n.SH " + otherHeader = "\n.SS " + crTag = "\n" + emphTag = "\\fI" + emphCloseTag = "\\fP" + strongTag = "\\fB" + strongCloseTag = "\\fP" + breakTag = "\n.br\n" + paraTag = "\n.PP\n" + hruleTag = "\n.ti 0\n\\l'\\n(.lu'\n" + linkTag = "\n\\[la]" + linkCloseTag = "\\[ra]" + codespanTag = "\\fB" + codespanCloseTag = "\\fR" + codeTag = "\n.EX\n" + codeCloseTag = ".EE\n" // Do not prepend a newline character since code blocks, by definition, include a newline already (or at least as how blackfriday gives us on). + quoteTag = "\n.PP\n.RS\n" + quoteCloseTag = "\n.RE\n" + listTag = "\n.RS\n" + listCloseTag = "\n.RE\n" + dtTag = "\n.TP\n" + dd2Tag = "\n" + tableStart = "\n.TS\nallbox;\n" + tableEnd = ".TE\n" + tableCellStart = "T{\n" + tableCellEnd = "\nT}\n" + tablePreprocessor = `'\" t` +) + +// NewRoffRenderer creates a new blackfriday Renderer for generating roff documents +// from markdown +func NewRoffRenderer() *roffRenderer { // nolint: golint + var extensions blackfriday.Extensions + + extensions |= blackfriday.NoIntraEmphasis + extensions |= blackfriday.Tables + extensions |= blackfriday.FencedCode + extensions |= blackfriday.SpaceHeadings + extensions |= blackfriday.Footnotes + extensions |= blackfriday.Titleblock + extensions |= blackfriday.DefinitionLists + return &roffRenderer{ + extensions: extensions, + } +} + +// GetExtensions returns the list of extensions used by this renderer implementation +func (r *roffRenderer) GetExtensions() blackfriday.Extensions { + return r.extensions +} + +// RenderHeader handles outputting the header at document start +func (r *roffRenderer) RenderHeader(w io.Writer, ast *blackfriday.Node) { + // We need to walk the tree to check if there are any tables. + // If there are, we need to enable the roff table preprocessor. + ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { + if node.Type == blackfriday.Table { + out(w, tablePreprocessor+"\n") + return blackfriday.Terminate + } + return blackfriday.GoToNext + }) + + // disable hyphenation + out(w, ".nh\n") +} + +// RenderFooter handles outputting the footer at the document end; the roff +// renderer has no footer information +func (r *roffRenderer) RenderFooter(w io.Writer, ast *blackfriday.Node) { +} + +// RenderNode is called for each node in a markdown document; based on the node +// type the equivalent roff output is sent to the writer +func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { + walkAction := blackfriday.GoToNext + + switch node.Type { + case blackfriday.Text: + escapeSpecialChars(w, node.Literal) + case blackfriday.Softbreak: + out(w, crTag) + case blackfriday.Hardbreak: + out(w, breakTag) + case blackfriday.Emph: + if entering { + out(w, emphTag) + } else { + out(w, emphCloseTag) + } + case blackfriday.Strong: + if entering { + out(w, strongTag) + } else { + out(w, strongCloseTag) + } + case blackfriday.Link: + // Don't render the link text for automatic links, because this + // will only duplicate the URL in the roff output. + // See https://daringfireball.net/projects/markdown/syntax#autolink + if !bytes.Equal(node.LinkData.Destination, node.FirstChild.Literal) { + out(w, string(node.FirstChild.Literal)) + } + // Hyphens in a link must be escaped to avoid word-wrap in the rendered man page. + escapedLink := strings.ReplaceAll(string(node.LinkData.Destination), "-", "\\-") + out(w, linkTag+escapedLink+linkCloseTag) + walkAction = blackfriday.SkipChildren + case blackfriday.Image: + // ignore images + walkAction = blackfriday.SkipChildren + case blackfriday.Code: + out(w, codespanTag) + escapeSpecialChars(w, node.Literal) + out(w, codespanCloseTag) + case blackfriday.Document: + break + case blackfriday.Paragraph: + // roff .PP markers break lists + if r.listDepth > 0 { + return blackfriday.GoToNext + } + if entering { + out(w, paraTag) + } else { + out(w, crTag) + } + case blackfriday.BlockQuote: + if entering { + out(w, quoteTag) + } else { + out(w, quoteCloseTag) + } + case blackfriday.Heading: + r.handleHeading(w, node, entering) + case blackfriday.HorizontalRule: + out(w, hruleTag) + case blackfriday.List: + r.handleList(w, node, entering) + case blackfriday.Item: + r.handleItem(w, node, entering) + case blackfriday.CodeBlock: + out(w, codeTag) + escapeSpecialChars(w, node.Literal) + out(w, codeCloseTag) + case blackfriday.Table: + r.handleTable(w, node, entering) + case blackfriday.TableHead: + case blackfriday.TableBody: + case blackfriday.TableRow: + // no action as cell entries do all the nroff formatting + return blackfriday.GoToNext + case blackfriday.TableCell: + r.handleTableCell(w, node, entering) + case blackfriday.HTMLSpan: + // ignore other HTML tags + case blackfriday.HTMLBlock: + if bytes.HasPrefix(node.Literal, []byte("<!--")) { + break // ignore comments, no warning + } + fmt.Fprintln(os.Stderr, "WARNING: go-md2man does not handle node type "+node.Type.String()) + default: + fmt.Fprintln(os.Stderr, "WARNING: go-md2man does not handle node type "+node.Type.String()) + } + return walkAction +} + +func (r *roffRenderer) handleHeading(w io.Writer, node *blackfriday.Node, entering bool) { + if entering { + switch node.Level { + case 1: + if !r.firstHeader { + out(w, titleHeader) + r.firstHeader = true + break + } + out(w, topLevelHeader) + case 2: + out(w, secondLevelHdr) + default: + out(w, otherHeader) + } + } +} + +func (r *roffRenderer) handleList(w io.Writer, node *blackfriday.Node, entering bool) { + openTag := listTag + closeTag := listCloseTag + if node.ListFlags&blackfriday.ListTypeDefinition != 0 { + // tags for definition lists handled within Item node + openTag = "" + closeTag = "" + } + if entering { + r.listDepth++ + if node.ListFlags&blackfriday.ListTypeOrdered != 0 { + r.listCounters = append(r.listCounters, 1) + } + out(w, openTag) + } else { + if node.ListFlags&blackfriday.ListTypeOrdered != 0 { + r.listCounters = r.listCounters[:len(r.listCounters)-1] + } + out(w, closeTag) + r.listDepth-- + } +} + +func (r *roffRenderer) handleItem(w io.Writer, node *blackfriday.Node, entering bool) { + if entering { + if node.ListFlags&blackfriday.ListTypeOrdered != 0 { + out(w, fmt.Sprintf(".IP \"%3d.\" 5\n", r.listCounters[len(r.listCounters)-1])) + r.listCounters[len(r.listCounters)-1]++ + } else if node.ListFlags&blackfriday.ListTypeTerm != 0 { + // DT (definition term): line just before DD (see below). + out(w, dtTag) + r.firstDD = true + } else if node.ListFlags&blackfriday.ListTypeDefinition != 0 { + // DD (definition description): line that starts with ": ". + // + // We have to distinguish between the first DD and the + // subsequent ones, as there should be no vertical + // whitespace between the DT and the first DD. + if r.firstDD { + r.firstDD = false + } else { + out(w, dd2Tag) + } + } else { + out(w, ".IP \\(bu 2\n") + } + } else { + out(w, "\n") + } +} + +func (r *roffRenderer) handleTable(w io.Writer, node *blackfriday.Node, entering bool) { + if entering { + out(w, tableStart) + // call walker to count cells (and rows?) so format section can be produced + columns := countColumns(node) + out(w, strings.Repeat("l ", columns)+"\n") + out(w, strings.Repeat("l ", columns)+".\n") + } else { + out(w, tableEnd) + } +} + +func (r *roffRenderer) handleTableCell(w io.Writer, node *blackfriday.Node, entering bool) { + if entering { + var start string + if node.Prev != nil && node.Prev.Type == blackfriday.TableCell { + start = "\t" + } + if node.IsHeader { + start += strongTag + } else if nodeLiteralSize(node) > 30 { + start += tableCellStart + } + out(w, start) + } else { + var end string + if node.IsHeader { + end = strongCloseTag + } else if nodeLiteralSize(node) > 30 { + end = tableCellEnd + } + if node.Next == nil && end != tableCellEnd { + // Last cell: need to carriage return if we are at the end of the + // header row and content isn't wrapped in a "tablecell" + end += crTag + } + out(w, end) + } +} + +func nodeLiteralSize(node *blackfriday.Node) int { + total := 0 + for n := node.FirstChild; n != nil; n = n.FirstChild { + total += len(n.Literal) + } + return total +} + +// because roff format requires knowing the column count before outputting any table +// data we need to walk a table tree and count the columns +func countColumns(node *blackfriday.Node) int { + var columns int + + node.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { + switch node.Type { + case blackfriday.TableRow: + if !entering { + return blackfriday.Terminate + } + case blackfriday.TableCell: + if entering { + columns++ + } + default: + } + return blackfriday.GoToNext + }) + return columns +} + +func out(w io.Writer, output string) { + io.WriteString(w, output) // nolint: errcheck +} + +func escapeSpecialChars(w io.Writer, text []byte) { + scanner := bufio.NewScanner(bytes.NewReader(text)) + + // count the number of lines in the text + // we need to know this to avoid adding a newline after the last line + n := bytes.Count(text, []byte{'\n'}) + idx := 0 + + for scanner.Scan() { + dt := scanner.Bytes() + if idx < n { + idx++ + dt = append(dt, '\n') + } + escapeSpecialCharsLine(w, dt) + } + + if err := scanner.Err(); err != nil { + panic(err) + } +} + +func escapeSpecialCharsLine(w io.Writer, text []byte) { + for i := 0; i < len(text); i++ { + // escape initial apostrophe or period + if len(text) >= 1 && (text[0] == '\'' || text[0] == '.') { + out(w, "\\&") + } + + // directly copy normal characters + org := i + + for i < len(text) && text[i] != '\\' { + i++ + } + if i > org { + w.Write(text[org:i]) // nolint: errcheck + } + + // escape a character + if i >= len(text) { + break + } + + w.Write([]byte{'\\', text[i]}) // nolint: errcheck + } +} |