diff options
Diffstat (limited to 'src/go/doc/comment/text.go')
-rw-r--r-- | src/go/doc/comment/text.go | 337 |
1 files changed, 337 insertions, 0 deletions
diff --git a/src/go/doc/comment/text.go b/src/go/doc/comment/text.go new file mode 100644 index 0000000..6f9c2e2 --- /dev/null +++ b/src/go/doc/comment/text.go @@ -0,0 +1,337 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "bytes" + "fmt" + "sort" + "strings" + "unicode/utf8" +) + +// A textPrinter holds the state needed for printing a Doc as plain text. +type textPrinter struct { + *Printer + long strings.Builder + prefix string + codePrefix string + width int +} + +// Text returns a textual formatting of the Doc. +// See the [Printer] documentation for ways to customize the text output. +func (p *Printer) Text(d *Doc) []byte { + tp := &textPrinter{ + Printer: p, + prefix: p.TextPrefix, + codePrefix: p.TextCodePrefix, + width: p.TextWidth, + } + if tp.codePrefix == "" { + tp.codePrefix = p.TextPrefix + "\t" + } + if tp.width == 0 { + tp.width = 80 - utf8.RuneCountInString(tp.prefix) + } + + var out bytes.Buffer + for i, x := range d.Content { + if i > 0 && blankBefore(x) { + out.WriteString(tp.prefix) + writeNL(&out) + } + tp.block(&out, x) + } + anyUsed := false + for _, def := range d.Links { + if def.Used { + anyUsed = true + break + } + } + if anyUsed { + writeNL(&out) + for _, def := range d.Links { + if def.Used { + fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL) + } + } + } + return out.Bytes() +} + +// writeNL calls out.WriteByte('\n') +// but first trims trailing spaces on the previous line. +func writeNL(out *bytes.Buffer) { + // Trim trailing spaces. + data := out.Bytes() + n := 0 + for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') { + n++ + } + if n > 0 { + out.Truncate(len(data) - n) + } + out.WriteByte('\n') +} + +// block prints the block x to out. +func (p *textPrinter) block(out *bytes.Buffer, x Block) { + switch x := x.(type) { + default: + fmt.Fprintf(out, "?%T\n", x) + + case *Paragraph: + out.WriteString(p.prefix) + p.text(out, "", x.Text) + + case *Heading: + out.WriteString(p.prefix) + out.WriteString("# ") + p.text(out, "", x.Text) + + case *Code: + text := x.Text + for text != "" { + var line string + line, text, _ = strings.Cut(text, "\n") + if line != "" { + out.WriteString(p.codePrefix) + out.WriteString(line) + } + writeNL(out) + } + + case *List: + loose := x.BlankBetween() + for i, item := range x.Items { + if i > 0 && loose { + out.WriteString(p.prefix) + writeNL(out) + } + out.WriteString(p.prefix) + out.WriteString(" ") + if item.Number == "" { + out.WriteString(" - ") + } else { + out.WriteString(item.Number) + out.WriteString(". ") + } + for i, blk := range item.Content { + const fourSpace = " " + if i > 0 { + writeNL(out) + out.WriteString(p.prefix) + out.WriteString(fourSpace) + } + p.text(out, fourSpace, blk.(*Paragraph).Text) + } + } + } +} + +// text prints the text sequence x to out. +func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) { + p.oneLongLine(&p.long, x) + words := strings.Fields(p.long.String()) + p.long.Reset() + + var seq []int + if p.width < 0 || len(words) == 0 { + seq = []int{0, len(words)} // one long line + } else { + seq = wrap(words, p.width-utf8.RuneCountInString(indent)) + } + for i := 0; i+1 < len(seq); i++ { + if i > 0 { + out.WriteString(p.prefix) + out.WriteString(indent) + } + for j, w := range words[seq[i]:seq[i+1]] { + if j > 0 { + out.WriteString(" ") + } + out.WriteString(w) + } + writeNL(out) + } +} + +// oneLongLine prints the text sequence x to out as one long line, +// without worrying about line wrapping. +// Explicit links have the [ ] dropped to improve readability. +func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) { + for _, t := range x { + switch t := t.(type) { + case Plain: + out.WriteString(string(t)) + case Italic: + out.WriteString(string(t)) + case *Link: + p.oneLongLine(out, t.Text) + case *DocLink: + p.oneLongLine(out, t.Text) + } + } +} + +// wrap wraps words into lines of at most max runes, +// minimizing the sum of the squares of the leftover lengths +// at the end of each line (except the last, of course), +// with a preference for ending lines at punctuation (.,:;). +// +// The returned slice gives the indexes of the first words +// on each line in the wrapped text with a final entry of len(words). +// Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]], +// ..., words[seq[len(seq)-2]:seq[len(seq)-1]]. +// +// The implementation runs in O(n log n) time, where n = len(words), +// using the algorithm described in D. S. Hirschberg and L. L. Larmore, +// “[The least weight subsequence problem],” FOCS 1985, pp. 137-143. +// +// [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60 +func wrap(words []string, max int) (seq []int) { + // The algorithm requires that our scoring function be concave, + // meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁, + // weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀). + // + // Our weights are two-element pairs [hi, lo] + // ordered by elementwise comparison. + // The hi entry counts the weight for lines that are longer than max, + // and the lo entry counts the weight for lines that are not. + // This forces the algorithm to first minimize the number of lines + // that are longer than max, which correspond to lines with + // single very long words. Having done that, it can move on to + // minimizing the lo score, which is more interesting. + // + // The lo score is the sum for each line of the square of the + // number of spaces remaining at the end of the line and a + // penalty of 64 given out for not ending the line in a + // punctuation character (.,:;). + // The penalty is somewhat arbitrarily chosen by trying + // different amounts and judging how nice the wrapped text looks. + // Roughly speaking, using 64 means that we are willing to + // end a line with eight blank spaces in order to end at a + // punctuation character, even if the next word would fit in + // those spaces. + // + // We care about ending in punctuation characters because + // it makes the text easier to skim if not too many sentences + // or phrases begin with a single word on the previous line. + + // A score is the score (also called weight) for a given line. + // add and cmp add and compare scores. + type score struct { + hi int64 + lo int64 + } + add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} } + cmp := func(s, t score) int { + switch { + case s.hi < t.hi: + return -1 + case s.hi > t.hi: + return +1 + case s.lo < t.lo: + return -1 + case s.lo > t.lo: + return +1 + } + return 0 + } + + // total[j] is the total number of runes + // (including separating spaces) in words[:j]. + total := make([]int, len(words)+1) + total[0] = 0 + for i, s := range words { + total[1+i] = total[i] + utf8.RuneCountInString(s) + 1 + } + + // weight returns weight(i, j). + weight := func(i, j int) score { + // On the last line, there is zero weight for being too short. + n := total[j] - 1 - total[i] + if j == len(words) && n <= max { + return score{0, 0} + } + + // Otherwise the weight is the penalty plus the square of the number of + // characters remaining on the line or by which the line goes over. + // In the latter case, that value goes in the hi part of the score. + // (See note above.) + p := wrapPenalty(words[j-1]) + v := int64(max-n) * int64(max-n) + if n > max { + return score{v, p} + } + return score{0, v + p} + } + + // The rest of this function is “The Basic Algorithm” from + // Hirschberg and Larmore's conference paper, + // using the same names as in the paper. + f := []score{{0, 0}} + g := func(i, j int) score { return add(f[i], weight(i, j)) } + + bridge := func(a, b, c int) bool { + k := c + sort.Search(len(words)+1-c, func(k int) bool { + k += c + return cmp(g(a, k), g(b, k)) > 0 + }) + if k > len(words) { + return true + } + return cmp(g(c, k), g(b, k)) <= 0 + } + + // d is a one-ended deque implemented as a slice. + d := make([]int, 1, len(words)) + d[0] = 0 + bestleft := make([]int, 1, len(words)) + bestleft[0] = -1 + for m := 1; m < len(words); m++ { + f = append(f, g(d[0], m)) + bestleft = append(bestleft, d[0]) + for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { + d = d[1:] // “Retire” + } + for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) { + d = d[:len(d)-1] // “Fire” + } + if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 { + d = append(d, m) // “Hire” + // The next few lines are not in the paper but are necessary + // to handle two-word inputs correctly. It appears to be + // just a bug in the paper's pseudocode. + if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { + d = d[1:] + } + } + } + bestleft = append(bestleft, d[0]) + + // Recover least weight sequence from bestleft. + n := 1 + for m := len(words); m > 0; m = bestleft[m] { + n++ + } + seq = make([]int, n) + for m := len(words); m > 0; m = bestleft[m] { + n-- + seq[n] = m + } + return seq +} + +// wrapPenalty is the penalty for inserting a line break after word s. +func wrapPenalty(s string) int64 { + switch s[len(s)-1] { + case '.', ',', ':', ';': + return 0 + } + return 64 +} |