src/go/doc/comment/text.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package comment

import (
	"bytes"
	"fmt"
	"sort"
	"strings"
	"unicode/utf8"
)

// A textPrinter holds the state needed for printing a Doc as plain text.
type textPrinter struct {
	*Printer
	long       strings.Builder
	prefix     string
	codePrefix string
	width      int
}

// Text returns a textual formatting of the Doc.
// See the [Printer] documentation for ways to customize the text output.
func (p *Printer) Text(d *Doc) []byte {
	tp := &textPrinter{
		Printer:    p,
		prefix:     p.TextPrefix,
		codePrefix: p.TextCodePrefix,
		width:      p.TextWidth,
	}
	if tp.codePrefix == "" {
		tp.codePrefix = p.TextPrefix + "\t"
	}
	if tp.width == 0 {
		tp.width = 80 - utf8.RuneCountInString(tp.prefix)
	}

	var out bytes.Buffer
	for i, x := range d.Content {
		if i > 0 && blankBefore(x) {
			out.WriteString(tp.prefix)
			writeNL(&out)
		}
		tp.block(&out, x)
	}
	anyUsed := false
	for _, def := range d.Links {
		if def.Used {
			anyUsed = true
			break
		}
	}
	if anyUsed {
		writeNL(&out)
		for _, def := range d.Links {
			if def.Used {
				fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL)
			}
		}
	}
	return out.Bytes()
}

// writeNL calls out.WriteByte('\n')
// but first trims trailing spaces on the previous line.
func writeNL(out *bytes.Buffer) {
	// Trim trailing spaces.
	data := out.Bytes()
	n := 0
	for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') {
		n++
	}
	if n > 0 {
		out.Truncate(len(data) - n)
	}
	out.WriteByte('\n')
}

// block prints the block x to out.
func (p *textPrinter) block(out *bytes.Buffer, x Block) {
	switch x := x.(type) {
	default:
		fmt.Fprintf(out, "?%T\n", x)

	case *Paragraph:
		out.WriteString(p.prefix)
		p.text(out, "", x.Text)

	case *Heading:
		out.WriteString(p.prefix)
		out.WriteString("# ")
		p.text(out, "", x.Text)

	case *Code:
		text := x.Text
		for text != "" {
			var line string
			line, text, _ = strings.Cut(text, "\n")
			if line != "" {
				out.WriteString(p.codePrefix)
				out.WriteString(line)
			}
			writeNL(out)
		}

	case *List:
		loose := x.BlankBetween()
		for i, item := range x.Items {
			if i > 0 && loose {
				out.WriteString(p.prefix)
				writeNL(out)
			}
			out.WriteString(p.prefix)
			out.WriteString(" ")
			if item.Number == "" {
				out.WriteString(" - ")
			} else {
				out.WriteString(item.Number)
				out.WriteString(". ")
			}
			for i, blk := range item.Content {
				const fourSpace = "    "
				if i > 0 {
					writeNL(out)
					out.WriteString(p.prefix)
					out.WriteString(fourSpace)
				}
				p.text(out, fourSpace, blk.(*Paragraph).Text)
			}
		}
	}
}

// text prints the text sequence x to out.
func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) {
	p.oneLongLine(&p.long, x)
	words := strings.Fields(p.long.String())
	p.long.Reset()

	var seq []int
	if p.width < 0 || len(words) == 0 {
		seq = []int{0, len(words)} // one long line
	} else {
		seq = wrap(words, p.width-utf8.RuneCountInString(indent))
	}
	for i := 0; i+1 < len(seq); i++ {
		if i > 0 {
			out.WriteString(p.prefix)
			out.WriteString(indent)
		}
		for j, w := range words[seq[i]:seq[i+1]] {
			if j > 0 {
				out.WriteString(" ")
			}
			out.WriteString(w)
		}
		writeNL(out)
	}
}

// oneLongLine prints the text sequence x to out as one long line,
// without worrying about line wrapping.
// Explicit links have the [ ] dropped to improve readability.
func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) {
	for _, t := range x {
		switch t := t.(type) {
		case Plain:
			out.WriteString(string(t))
		case Italic:
			out.WriteString(string(t))
		case *Link:
			p.oneLongLine(out, t.Text)
		case *DocLink:
			p.oneLongLine(out, t.Text)
		}
	}
}

// wrap wraps words into lines of at most max runes,
// minimizing the sum of the squares of the leftover lengths
// at the end of each line (except the last, of course),
// with a preference for ending lines at punctuation (.,:;).
//
// The returned slice gives the indexes of the first words
// on each line in the wrapped text with a final entry of len(words).
// Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]],
// ..., words[seq[len(seq)-2]:seq[len(seq)-1]].
//
// The implementation runs in O(n log n) time, where n = len(words),
// using the algorithm described in D. S. Hirschberg and L. L. Larmore,
// “[The least weight subsequence problem],” FOCS 1985, pp. 137-143.
//
// [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60
func wrap(words []string, max int) (seq []int) {
	// The algorithm requires that our scoring function be concave,
	// meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁,
	// weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀).
	//
	// Our weights are two-element pairs [hi, lo]
	// ordered by elementwise comparison.
	// The hi entry counts the weight for lines that are longer than max,
	// and the lo entry counts the weight for lines that are not.
	// This forces the algorithm to first minimize the number of lines
	// that are longer than max, which correspond to lines with
	// single very long words. Having done that, it can move on to
	// minimizing the lo score, which is more interesting.
	//
	// The lo score is the sum for each line of the square of the
	// number of spaces remaining at the end of the line and a
	// penalty of 64 given out for not ending the line in a
	// punctuation character (.,:;).
	// The penalty is somewhat arbitrarily chosen by trying
	// different amounts and judging how nice the wrapped text looks.
	// Roughly speaking, using 64 means that we are willing to
	// end a line with eight blank spaces in order to end at a
	// punctuation character, even if the next word would fit in
	// those spaces.
	//
	// We care about ending in punctuation characters because
	// it makes the text easier to skim if not too many sentences
	// or phrases begin with a single word on the previous line.

	// A score is the score (also called weight) for a given line.
	// add and cmp add and compare scores.
	type score struct {
		hi int64
		lo int64
	}
	add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} }
	cmp := func(s, t score) int {
		switch {
		case s.hi < t.hi:
			return -1
		case s.hi > t.hi:
			return +1
		case s.lo < t.lo:
			return -1
		case s.lo > t.lo:
			return +1
		}
		return 0
	}

	// total[j] is the total number of runes
	// (including separating spaces) in words[:j].
	total := make([]int, len(words)+1)
	total[0] = 0
	for i, s := range words {
		total[1+i] = total[i] + utf8.RuneCountInString(s) + 1
	}

	// weight returns weight(i, j).
	weight := func(i, j int) score {
		// On the last line, there is zero weight for being too short.
		n := total[j] - 1 - total[i]
		if j == len(words) && n <= max {
			return score{0, 0}
		}

		// Otherwise the weight is the penalty plus the square of the number of
		// characters remaining on the line or by which the line goes over.
		// In the latter case, that value goes in the hi part of the score.
		// (See note above.)
		p := wrapPenalty(words[j-1])
		v := int64(max-n) * int64(max-n)
		if n > max {
			return score{v, p}
		}
		return score{0, v + p}
	}

	// The rest of this function is “The Basic Algorithm” from
	// Hirschberg and Larmore's conference paper,
	// using the same names as in the paper.
	f := []score{{0, 0}}
	g := func(i, j int) score { return add(f[i], weight(i, j)) }

	bridge := func(a, b, c int) bool {
		k := c + sort.Search(len(words)+1-c, func(k int) bool {
			k += c
			return cmp(g(a, k), g(b, k)) > 0
		})
		if k > len(words) {
			return true
		}
		return cmp(g(c, k), g(b, k)) <= 0
	}

	// d is a one-ended deque implemented as a slice.
	d := make([]int, 1, len(words))
	d[0] = 0
	bestleft := make([]int, 1, len(words))
	bestleft[0] = -1
	for m := 1; m < len(words); m++ {
		f = append(f, g(d[0], m))
		bestleft = append(bestleft, d[0])
		for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
			d = d[1:] // “Retire”
		}
		for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) {
			d = d[:len(d)-1] // “Fire”
		}
		if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 {
			d = append(d, m) // “Hire”
			// The next few lines are not in the paper but are necessary
			// to handle two-word inputs correctly. It appears to be
			// just a bug in the paper's pseudocode.
			if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
				d = d[1:]
			}
		}
	}
	bestleft = append(bestleft, d[0])

	// Recover least weight sequence from bestleft.
	n := 1
	for m := len(words); m > 0; m = bestleft[m] {
		n++
	}
	seq = make([]int, n)
	for m := len(words); m > 0; m = bestleft[m] {
		n--
		seq[n] = m
	}
	return seq
}

// wrapPenalty is the penalty for inserting a line break after word s.
func wrapPenalty(s string) int64 {
	switch s[len(s)-1] {
	case '.', ',', ':', ';':
		return 0
	}
	return 64
}