summaryrefslogtreecommitdiffstats
path: root/src/go/doc/comment
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/go/doc/comment.go71
-rw-r--r--src/go/doc/comment/doc.go36
-rw-r--r--src/go/doc/comment/html.go169
-rw-r--r--src/go/doc/comment/markdown.go188
-rwxr-xr-xsrc/go/doc/comment/mkstd.sh24
-rw-r--r--src/go/doc/comment/old_test.go80
-rw-r--r--src/go/doc/comment/parse.go1262
-rw-r--r--src/go/doc/comment/parse_test.go12
-rw-r--r--src/go/doc/comment/print.go290
-rw-r--r--src/go/doc/comment/std.go44
-rw-r--r--src/go/doc/comment/std_test.go34
-rw-r--r--src/go/doc/comment/testdata/README.md42
-rw-r--r--src/go/doc/comment/testdata/blank.txt12
-rw-r--r--src/go/doc/comment/testdata/code.txt94
-rw-r--r--src/go/doc/comment/testdata/code2.txt31
-rw-r--r--src/go/doc/comment/testdata/code3.txt33
-rw-r--r--src/go/doc/comment/testdata/code4.txt38
-rw-r--r--src/go/doc/comment/testdata/code5.txt21
-rw-r--r--src/go/doc/comment/testdata/code6.txt24
-rw-r--r--src/go/doc/comment/testdata/crash1.txt16
-rw-r--r--src/go/doc/comment/testdata/doclink.txt21
-rw-r--r--src/go/doc/comment/testdata/doclink2.txt8
-rw-r--r--src/go/doc/comment/testdata/doclink3.txt8
-rw-r--r--src/go/doc/comment/testdata/doclink4.txt7
-rw-r--r--src/go/doc/comment/testdata/doclink5.txt5
-rw-r--r--src/go/doc/comment/testdata/doclink6.txt5
-rw-r--r--src/go/doc/comment/testdata/doclink7.txt4
-rw-r--r--src/go/doc/comment/testdata/escape.txt55
-rw-r--r--src/go/doc/comment/testdata/head.txt92
-rw-r--r--src/go/doc/comment/testdata/head2.txt36
-rw-r--r--src/go/doc/comment/testdata/head3.txt7
-rw-r--r--src/go/doc/comment/testdata/hello.txt35
-rw-r--r--src/go/doc/comment/testdata/link.txt17
-rw-r--r--src/go/doc/comment/testdata/link2.txt31
-rw-r--r--src/go/doc/comment/testdata/link3.txt14
-rw-r--r--src/go/doc/comment/testdata/link4.txt77
-rw-r--r--src/go/doc/comment/testdata/link5.txt36
-rw-r--r--src/go/doc/comment/testdata/link6.txt50
-rw-r--r--src/go/doc/comment/testdata/link7.txt25
-rw-r--r--src/go/doc/comment/testdata/linklist.txt18
-rw-r--r--src/go/doc/comment/testdata/linklist2.txt39
-rw-r--r--src/go/doc/comment/testdata/linklist3.txt31
-rw-r--r--src/go/doc/comment/testdata/linklist4.txt36
-rw-r--r--src/go/doc/comment/testdata/list.txt48
-rw-r--r--src/go/doc/comment/testdata/list10.txt13
-rw-r--r--src/go/doc/comment/testdata/list2.txt57
-rw-r--r--src/go/doc/comment/testdata/list3.txt32
-rw-r--r--src/go/doc/comment/testdata/list4.txt38
-rw-r--r--src/go/doc/comment/testdata/list5.txt40
-rw-r--r--src/go/doc/comment/testdata/list6.txt129
-rw-r--r--src/go/doc/comment/testdata/list7.txt98
-rw-r--r--src/go/doc/comment/testdata/list8.txt56
-rw-r--r--src/go/doc/comment/testdata/list9.txt30
-rw-r--r--src/go/doc/comment/testdata/para.txt17
-rw-r--r--src/go/doc/comment/testdata/quote.txt15
-rw-r--r--src/go/doc/comment/testdata/text.txt62
-rw-r--r--src/go/doc/comment/testdata/text2.txt14
-rw-r--r--src/go/doc/comment/testdata/text3.txt28
-rw-r--r--src/go/doc/comment/testdata/text4.txt29
-rw-r--r--src/go/doc/comment/testdata/text5.txt38
-rw-r--r--src/go/doc/comment/testdata/text6.txt18
-rw-r--r--src/go/doc/comment/testdata/text7.txt21
-rw-r--r--src/go/doc/comment/testdata/text8.txt94
-rw-r--r--src/go/doc/comment/testdata/text9.txt12
-rw-r--r--src/go/doc/comment/testdata/words.txt10
-rw-r--r--src/go/doc/comment/testdata_test.go202
-rw-r--r--src/go/doc/comment/text.go337
-rw-r--r--src/go/doc/comment/wrap_test.go141
-rw-r--r--src/go/doc/comment_test.go67
69 files changed, 4824 insertions, 0 deletions
diff --git a/src/go/doc/comment.go b/src/go/doc/comment.go
new file mode 100644
index 0000000..4f73664
--- /dev/null
+++ b/src/go/doc/comment.go
@@ -0,0 +1,71 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package doc
+
+import (
+ "go/doc/comment"
+ "io"
+)
+
+// ToHTML converts comment text to formatted HTML.
+//
+// Deprecated: ToHTML cannot identify documentation links
+// in the doc comment, because they depend on knowing what
+// package the text came from, which is not included in this API.
+//
+// Given the *[doc.Package] p where text was found,
+// ToHTML(w, text, nil) can be replaced by:
+//
+// w.Write(p.HTML(text))
+//
+// which is in turn shorthand for:
+//
+// w.Write(p.Printer().HTML(p.Parser().Parse(text)))
+//
+// If words may be non-nil, the longer replacement is:
+//
+// parser := p.Parser()
+// parser.Words = words
+// w.Write(p.Printer().HTML(parser.Parse(d)))
+func ToHTML(w io.Writer, text string, words map[string]string) {
+ p := new(Package).Parser()
+ p.Words = words
+ d := p.Parse(text)
+ pr := new(comment.Printer)
+ w.Write(pr.HTML(d))
+}
+
+// ToText converts comment text to formatted text.
+//
+// Deprecated: ToText cannot identify documentation links
+// in the doc comment, because they depend on knowing what
+// package the text came from, which is not included in this API.
+//
+// Given the *[doc.Package] p where text was found,
+// ToText(w, text, "", "\t", 80) can be replaced by:
+//
+// w.Write(p.Text(text))
+//
+// In the general case, ToText(w, text, prefix, codePrefix, width)
+// can be replaced by:
+//
+// d := p.Parser().Parse(text)
+// pr := p.Printer()
+// pr.TextPrefix = prefix
+// pr.TextCodePrefix = codePrefix
+// pr.TextWidth = width
+// w.Write(pr.Text(d))
+//
+// See the documentation for [Package.Text] and [comment.Printer.Text]
+// for more details.
+func ToText(w io.Writer, text string, prefix, codePrefix string, width int) {
+ d := new(Package).Parser().Parse(text)
+ pr := &comment.Printer{
+ TextPrefix: prefix,
+ TextCodePrefix: codePrefix,
+ TextWidth: width,
+ }
+ w.Write(pr.Text(d))
+}
diff --git a/src/go/doc/comment/doc.go b/src/go/doc/comment/doc.go
new file mode 100644
index 0000000..45a476a
--- /dev/null
+++ b/src/go/doc/comment/doc.go
@@ -0,0 +1,36 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package comment implements parsing and reformatting of Go doc comments,
+(documentation comments), which are comments that immediately precede
+a top-level declaration of a package, const, func, type, or var.
+
+Go doc comment syntax is a simplified subset of Markdown that supports
+links, headings, paragraphs, lists (without nesting), and preformatted text blocks.
+The details of the syntax are documented at https://go.dev/doc/comment.
+
+To parse the text associated with a doc comment (after removing comment markers),
+use a [Parser]:
+
+ var p comment.Parser
+ doc := p.Parse(text)
+
+The result is a [*Doc].
+To reformat it as a doc comment, HTML, Markdown, or plain text,
+use a [Printer]:
+
+ var pr comment.Printer
+ os.Stdout.Write(pr.Text(doc))
+
+The [Parser] and [Printer] types are structs whose fields can be
+modified to customize the operations.
+For details, see the documentation for those types.
+
+Use cases that need additional control over reformatting can
+implement their own logic by inspecting the parsed syntax itself.
+See the documentation for [Doc], [Block], [Text] for an overview
+and links to additional types.
+*/
+package comment
diff --git a/src/go/doc/comment/html.go b/src/go/doc/comment/html.go
new file mode 100644
index 0000000..bc076f6
--- /dev/null
+++ b/src/go/doc/comment/html.go
@@ -0,0 +1,169 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "bytes"
+ "fmt"
+ "strconv"
+)
+
+// An htmlPrinter holds the state needed for printing a Doc as HTML.
+type htmlPrinter struct {
+ *Printer
+ tight bool
+}
+
+// HTML returns an HTML formatting of the Doc.
+// See the [Printer] documentation for ways to customize the HTML output.
+func (p *Printer) HTML(d *Doc) []byte {
+ hp := &htmlPrinter{Printer: p}
+ var out bytes.Buffer
+ for _, x := range d.Content {
+ hp.block(&out, x)
+ }
+ return out.Bytes()
+}
+
+// block prints the block x to out.
+func (p *htmlPrinter) block(out *bytes.Buffer, x Block) {
+ switch x := x.(type) {
+ default:
+ fmt.Fprintf(out, "?%T", x)
+
+ case *Paragraph:
+ if !p.tight {
+ out.WriteString("<p>")
+ }
+ p.text(out, x.Text)
+ out.WriteString("\n")
+
+ case *Heading:
+ out.WriteString("<h")
+ h := strconv.Itoa(p.headingLevel())
+ out.WriteString(h)
+ if id := p.headingID(x); id != "" {
+ out.WriteString(` id="`)
+ p.escape(out, id)
+ out.WriteString(`"`)
+ }
+ out.WriteString(">")
+ p.text(out, x.Text)
+ out.WriteString("</h")
+ out.WriteString(h)
+ out.WriteString(">\n")
+
+ case *Code:
+ out.WriteString("<pre>")
+ p.escape(out, x.Text)
+ out.WriteString("</pre>\n")
+
+ case *List:
+ kind := "ol>\n"
+ if x.Items[0].Number == "" {
+ kind = "ul>\n"
+ }
+ out.WriteString("<")
+ out.WriteString(kind)
+ next := "1"
+ for _, item := range x.Items {
+ out.WriteString("<li")
+ if n := item.Number; n != "" {
+ if n != next {
+ out.WriteString(` value="`)
+ out.WriteString(n)
+ out.WriteString(`"`)
+ next = n
+ }
+ next = inc(next)
+ }
+ out.WriteString(">")
+ p.tight = !x.BlankBetween()
+ for _, blk := range item.Content {
+ p.block(out, blk)
+ }
+ p.tight = false
+ }
+ out.WriteString("</")
+ out.WriteString(kind)
+ }
+}
+
+// inc increments the decimal string s.
+// For example, inc("1199") == "1200".
+func inc(s string) string {
+ b := []byte(s)
+ for i := len(b) - 1; i >= 0; i-- {
+ if b[i] < '9' {
+ b[i]++
+ return string(b)
+ }
+ b[i] = '0'
+ }
+ return "1" + string(b)
+}
+
+// text prints the text sequence x to out.
+func (p *htmlPrinter) text(out *bytes.Buffer, x []Text) {
+ for _, t := range x {
+ switch t := t.(type) {
+ case Plain:
+ p.escape(out, string(t))
+ case Italic:
+ out.WriteString("<i>")
+ p.escape(out, string(t))
+ out.WriteString("</i>")
+ case *Link:
+ out.WriteString(`<a href="`)
+ p.escape(out, t.URL)
+ out.WriteString(`">`)
+ p.text(out, t.Text)
+ out.WriteString("</a>")
+ case *DocLink:
+ url := p.docLinkURL(t)
+ if url != "" {
+ out.WriteString(`<a href="`)
+ p.escape(out, url)
+ out.WriteString(`">`)
+ }
+ p.text(out, t.Text)
+ if url != "" {
+ out.WriteString("</a>")
+ }
+ }
+ }
+}
+
+// escape prints s to out as plain text,
+// escaping < & " ' and > to avoid being misinterpreted
+// in larger HTML constructs.
+func (p *htmlPrinter) escape(out *bytes.Buffer, s string) {
+ start := 0
+ for i := 0; i < len(s); i++ {
+ switch s[i] {
+ case '<':
+ out.WriteString(s[start:i])
+ out.WriteString("&lt;")
+ start = i + 1
+ case '&':
+ out.WriteString(s[start:i])
+ out.WriteString("&amp;")
+ start = i + 1
+ case '"':
+ out.WriteString(s[start:i])
+ out.WriteString("&quot;")
+ start = i + 1
+ case '\'':
+ out.WriteString(s[start:i])
+ out.WriteString("&apos;")
+ start = i + 1
+ case '>':
+ out.WriteString(s[start:i])
+ out.WriteString("&gt;")
+ start = i + 1
+ }
+ }
+ out.WriteString(s[start:])
+}
diff --git a/src/go/doc/comment/markdown.go b/src/go/doc/comment/markdown.go
new file mode 100644
index 0000000..d8550f2
--- /dev/null
+++ b/src/go/doc/comment/markdown.go
@@ -0,0 +1,188 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+)
+
+// An mdPrinter holds the state needed for printing a Doc as Markdown.
+type mdPrinter struct {
+ *Printer
+ headingPrefix string
+ raw bytes.Buffer
+}
+
+// Markdown returns a Markdown formatting of the Doc.
+// See the [Printer] documentation for ways to customize the Markdown output.
+func (p *Printer) Markdown(d *Doc) []byte {
+ mp := &mdPrinter{
+ Printer: p,
+ headingPrefix: strings.Repeat("#", p.headingLevel()) + " ",
+ }
+
+ var out bytes.Buffer
+ for i, x := range d.Content {
+ if i > 0 {
+ out.WriteByte('\n')
+ }
+ mp.block(&out, x)
+ }
+ return out.Bytes()
+}
+
+// block prints the block x to out.
+func (p *mdPrinter) block(out *bytes.Buffer, x Block) {
+ switch x := x.(type) {
+ default:
+ fmt.Fprintf(out, "?%T", x)
+
+ case *Paragraph:
+ p.text(out, x.Text)
+ out.WriteString("\n")
+
+ case *Heading:
+ out.WriteString(p.headingPrefix)
+ p.text(out, x.Text)
+ if id := p.headingID(x); id != "" {
+ out.WriteString(" {#")
+ out.WriteString(id)
+ out.WriteString("}")
+ }
+ out.WriteString("\n")
+
+ case *Code:
+ md := x.Text
+ for md != "" {
+ var line string
+ line, md, _ = strings.Cut(md, "\n")
+ if line != "" {
+ out.WriteString("\t")
+ out.WriteString(line)
+ }
+ out.WriteString("\n")
+ }
+
+ case *List:
+ loose := x.BlankBetween()
+ for i, item := range x.Items {
+ if i > 0 && loose {
+ out.WriteString("\n")
+ }
+ if n := item.Number; n != "" {
+ out.WriteString(" ")
+ out.WriteString(n)
+ out.WriteString(". ")
+ } else {
+ out.WriteString(" - ") // SP SP - SP
+ }
+ for i, blk := range item.Content {
+ const fourSpace = " "
+ if i > 0 {
+ out.WriteString("\n" + fourSpace)
+ }
+ p.text(out, blk.(*Paragraph).Text)
+ out.WriteString("\n")
+ }
+ }
+ }
+}
+
+// text prints the text sequence x to out.
+func (p *mdPrinter) text(out *bytes.Buffer, x []Text) {
+ p.raw.Reset()
+ p.rawText(&p.raw, x)
+ line := bytes.TrimSpace(p.raw.Bytes())
+ if len(line) == 0 {
+ return
+ }
+ switch line[0] {
+ case '+', '-', '*', '#':
+ // Escape what would be the start of an unordered list or heading.
+ out.WriteByte('\\')
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ i := 1
+ for i < len(line) && '0' <= line[i] && line[i] <= '9' {
+ i++
+ }
+ if i < len(line) && (line[i] == '.' || line[i] == ')') {
+ // Escape what would be the start of an ordered list.
+ out.Write(line[:i])
+ out.WriteByte('\\')
+ line = line[i:]
+ }
+ }
+ out.Write(line)
+}
+
+// rawText prints the text sequence x to out,
+// without worrying about escaping characters
+// that have special meaning at the start of a Markdown line.
+func (p *mdPrinter) rawText(out *bytes.Buffer, x []Text) {
+ for _, t := range x {
+ switch t := t.(type) {
+ case Plain:
+ p.escape(out, string(t))
+ case Italic:
+ out.WriteString("*")
+ p.escape(out, string(t))
+ out.WriteString("*")
+ case *Link:
+ out.WriteString("[")
+ p.rawText(out, t.Text)
+ out.WriteString("](")
+ out.WriteString(t.URL)
+ out.WriteString(")")
+ case *DocLink:
+ url := p.docLinkURL(t)
+ if url != "" {
+ out.WriteString("[")
+ }
+ p.rawText(out, t.Text)
+ if url != "" {
+ out.WriteString("](")
+ url = strings.ReplaceAll(url, "(", "%28")
+ url = strings.ReplaceAll(url, ")", "%29")
+ out.WriteString(url)
+ out.WriteString(")")
+ }
+ }
+ }
+}
+
+// escape prints s to out as plain text,
+// escaping special characters to avoid being misinterpreted
+// as Markdown markup sequences.
+func (p *mdPrinter) escape(out *bytes.Buffer, s string) {
+ start := 0
+ for i := 0; i < len(s); i++ {
+ switch s[i] {
+ case '\n':
+ // Turn all \n into spaces, for a few reasons:
+ // - Avoid introducing paragraph breaks accidentally.
+ // - Avoid the need to reindent after the newline.
+ // - Avoid problems with Markdown renderers treating
+ // every mid-paragraph newline as a <br>.
+ out.WriteString(s[start:i])
+ out.WriteByte(' ')
+ start = i + 1
+ continue
+ case '`', '_', '*', '[', '<', '\\':
+ // Not all of these need to be escaped all the time,
+ // but is valid and easy to do so.
+ // We assume the Markdown is being passed to a
+ // Markdown renderer, not edited by a person,
+ // so it's fine to have escapes that are not strictly
+ // necessary in some cases.
+ out.WriteString(s[start:i])
+ out.WriteByte('\\')
+ out.WriteByte(s[i])
+ start = i + 1
+ }
+ }
+ out.WriteString(s[start:])
+}
diff --git a/src/go/doc/comment/mkstd.sh b/src/go/doc/comment/mkstd.sh
new file mode 100755
index 0000000..c9dee8c
--- /dev/null
+++ b/src/go/doc/comment/mkstd.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copyright 2022 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This could be a good use for embed but go/doc/comment
+# is built into the bootstrap go command, so it can't use embed.
+# Also not using embed lets us emit a string array directly
+# and avoid init-time work.
+
+(
+echo "// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by 'go generate' DO NOT EDIT.
+//go:generate ./mkstd.sh
+
+package comment
+
+var stdPkgs = []string{"
+go list std | grep -v / | sort | sed 's/.*/"&",/'
+echo "}"
+) | gofmt >std.go.tmp && mv std.go.tmp std.go
diff --git a/src/go/doc/comment/old_test.go b/src/go/doc/comment/old_test.go
new file mode 100644
index 0000000..944f94d
--- /dev/null
+++ b/src/go/doc/comment/old_test.go
@@ -0,0 +1,80 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests are carried forward from the old go/doc implementation.
+
+package comment
+
+import "testing"
+
+var oldHeadingTests = []struct {
+ line string
+ ok bool
+}{
+ {"Section", true},
+ {"A typical usage", true},
+ {"ΔΛΞ is Greek", true},
+ {"Foo 42", true},
+ {"", false},
+ {"section", false},
+ {"A typical usage:", false},
+ {"This code:", false},
+ {"δ is Greek", false},
+ {"Foo §", false},
+ {"Fermat's Last Sentence", true},
+ {"Fermat's", true},
+ {"'sX", false},
+ {"Ted 'Too' Bar", false},
+ {"Use n+m", false},
+ {"Scanning:", false},
+ {"N:M", false},
+}
+
+func TestIsOldHeading(t *testing.T) {
+ for _, tt := range oldHeadingTests {
+ if isOldHeading(tt.line, []string{"Text.", "", tt.line, "", "Text."}, 2) != tt.ok {
+ t.Errorf("isOldHeading(%q) = %v, want %v", tt.line, !tt.ok, tt.ok)
+ }
+ }
+}
+
+var autoURLTests = []struct {
+ in, out string
+}{
+ {"", ""},
+ {"http://[::1]:8080/foo.txt", "http://[::1]:8080/foo.txt"},
+ {"https://www.google.com) after", "https://www.google.com"},
+ {"https://www.google.com:30/x/y/z:b::c. After", "https://www.google.com:30/x/y/z:b::c"},
+ {"http://www.google.com/path/:;!-/?query=%34b#093124", "http://www.google.com/path/:;!-/?query=%34b#093124"},
+ {"http://www.google.com/path/:;!-/?query=%34bar#093124", "http://www.google.com/path/:;!-/?query=%34bar#093124"},
+ {"http://www.google.com/index.html! After", "http://www.google.com/index.html"},
+ {"http://www.google.com/", "http://www.google.com/"},
+ {"https://www.google.com/", "https://www.google.com/"},
+ {"http://www.google.com/path.", "http://www.google.com/path"},
+ {"http://en.wikipedia.org/wiki/Camellia_(cipher)", "http://en.wikipedia.org/wiki/Camellia_(cipher)"},
+ {"http://www.google.com/)", "http://www.google.com/"},
+ {"http://gmail.com)", "http://gmail.com"},
+ {"http://gmail.com))", "http://gmail.com"},
+ {"http://gmail.com ((http://gmail.com)) ()", "http://gmail.com"},
+ {"http://example.com/ quux!", "http://example.com/"},
+ {"http://example.com/%2f/ /world.", "http://example.com/%2f/"},
+ {"http: ipsum //host/path", ""},
+ {"javascript://is/not/linked", ""},
+ {"http://foo", "http://foo"},
+ {"https://www.example.com/person/][Person Name]]", "https://www.example.com/person/"},
+ {"http://golang.org/)", "http://golang.org/"},
+ {"http://golang.org/hello())", "http://golang.org/hello()"},
+ {"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", "http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD"},
+ {"https://foo.bar/bal/x(])", "https://foo.bar/bal/x"}, // inner ] causes (]) to be cut off from URL
+ {"http://bar(])", "http://bar"}, // same
+}
+
+func TestAutoURL(t *testing.T) {
+ for _, tt := range autoURLTests {
+ url, ok := autoURL(tt.in)
+ if url != tt.out || ok != (tt.out != "") {
+ t.Errorf("autoURL(%q) = %q, %v, want %q, %v", tt.in, url, ok, tt.out, tt.out != "")
+ }
+ }
+}
diff --git a/src/go/doc/comment/parse.go b/src/go/doc/comment/parse.go
new file mode 100644
index 0000000..62a0f8f
--- /dev/null
+++ b/src/go/doc/comment/parse.go
@@ -0,0 +1,1262 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "sort"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// A Doc is a parsed Go doc comment.
+type Doc struct {
+ // Content is the sequence of content blocks in the comment.
+ Content []Block
+
+ // Links is the link definitions in the comment.
+ Links []*LinkDef
+}
+
+// A LinkDef is a single link definition.
+type LinkDef struct {
+ Text string // the link text
+ URL string // the link URL
+ Used bool // whether the comment uses the definition
+}
+
+// A Block is block-level content in a doc comment,
+// one of [*Code], [*Heading], [*List], or [*Paragraph].
+type Block interface {
+ block()
+}
+
+// A Heading is a doc comment heading.
+type Heading struct {
+ Text []Text // the heading text
+}
+
+func (*Heading) block() {}
+
+// A List is a numbered or bullet list.
+// Lists are always non-empty: len(Items) > 0.
+// In a numbered list, every Items[i].Number is a non-empty string.
+// In a bullet list, every Items[i].Number is an empty string.
+type List struct {
+ // Items is the list items.
+ Items []*ListItem
+
+ // ForceBlankBefore indicates that the list must be
+ // preceded by a blank line when reformatting the comment,
+ // overriding the usual conditions. See the BlankBefore method.
+ //
+ // The comment parser sets ForceBlankBefore for any list
+ // that is preceded by a blank line, to make sure
+ // the blank line is preserved when printing.
+ ForceBlankBefore bool
+
+ // ForceBlankBetween indicates that list items must be
+ // separated by blank lines when reformatting the comment,
+ // overriding the usual conditions. See the BlankBetween method.
+ //
+ // The comment parser sets ForceBlankBetween for any list
+ // that has a blank line between any two of its items, to make sure
+ // the blank lines are preserved when printing.
+ ForceBlankBetween bool
+}
+
+func (*List) block() {}
+
+// BlankBefore reports whether a reformatting of the comment
+// should include a blank line before the list.
+// The default rule is the same as for [BlankBetween]:
+// if the list item content contains any blank lines
+// (meaning at least one item has multiple paragraphs)
+// then the list itself must be preceded by a blank line.
+// A preceding blank line can be forced by setting [List].ForceBlankBefore.
+func (l *List) BlankBefore() bool {
+ return l.ForceBlankBefore || l.BlankBetween()
+}
+
+// BlankBetween reports whether a reformatting of the comment
+// should include a blank line between each pair of list items.
+// The default rule is that if the list item content contains any blank lines
+// (meaning at least one item has multiple paragraphs)
+// then list items must themselves be separated by blank lines.
+// Blank line separators can be forced by setting [List].ForceBlankBetween.
+func (l *List) BlankBetween() bool {
+ if l.ForceBlankBetween {
+ return true
+ }
+ for _, item := range l.Items {
+ if len(item.Content) != 1 {
+ // Unreachable for parsed comments today,
+ // since the only way to get multiple item.Content
+ // is multiple paragraphs, which must have been
+ // separated by a blank line.
+ return true
+ }
+ }
+ return false
+}
+
+// A ListItem is a single item in a numbered or bullet list.
+type ListItem struct {
+ // Number is a decimal string in a numbered list
+ // or an empty string in a bullet list.
+ Number string // "1", "2", ...; "" for bullet list
+
+ // Content is the list content.
+ // Currently, restrictions in the parser and printer
+ // require every element of Content to be a *Paragraph.
+ Content []Block // Content of this item.
+}
+
+// A Paragraph is a paragraph of text.
+type Paragraph struct {
+ Text []Text
+}
+
+func (*Paragraph) block() {}
+
+// A Code is a preformatted code block.
+type Code struct {
+ // Text is the preformatted text, ending with a newline character.
+ // It may be multiple lines, each of which ends with a newline character.
+ // It is never empty, nor does it start or end with a blank line.
+ Text string
+}
+
+func (*Code) block() {}
+
+// A Text is text-level content in a doc comment,
+// one of [Plain], [Italic], [*Link], or [*DocLink].
+type Text interface {
+ text()
+}
+
+// A Plain is a string rendered as plain text (not italicized).
+type Plain string
+
+func (Plain) text() {}
+
+// An Italic is a string rendered as italicized text.
+type Italic string
+
+func (Italic) text() {}
+
+// A Link is a link to a specific URL.
+type Link struct {
+ Auto bool // is this an automatic (implicit) link of a literal URL?
+ Text []Text // text of link
+ URL string // target URL of link
+}
+
+func (*Link) text() {}
+
+// A DocLink is a link to documentation for a Go package or symbol.
+type DocLink struct {
+ Text []Text // text of link
+
+ // ImportPath, Recv, and Name identify the Go package or symbol
+ // that is the link target. The potential combinations of
+ // non-empty fields are:
+ // - ImportPath: a link to another package
+ // - ImportPath, Name: a link to a const, func, type, or var in another package
+ // - ImportPath, Recv, Name: a link to a method in another package
+ // - Name: a link to a const, func, type, or var in this package
+ // - Recv, Name: a link to a method in this package
+ ImportPath string // import path
+ Recv string // receiver type, without any pointer star, for methods
+ Name string // const, func, type, var, or method name
+}
+
+func (*DocLink) text() {}
+
+// A Parser is a doc comment parser.
+// The fields in the struct can be filled in before calling Parse
+// in order to customize the details of the parsing process.
+type Parser struct {
+ // Words is a map of Go identifier words that
+ // should be italicized and potentially linked.
+ // If Words[w] is the empty string, then the word w
+ // is only italicized. Otherwise it is linked, using
+ // Words[w] as the link target.
+ // Words corresponds to the [go/doc.ToHTML] words parameter.
+ Words map[string]string
+
+ // LookupPackage resolves a package name to an import path.
+ //
+ // If LookupPackage(name) returns ok == true, then [name]
+ // (or [name.Sym] or [name.Sym.Method])
+ // is considered a documentation link to importPath's package docs.
+ // It is valid to return "", true, in which case name is considered
+ // to refer to the current package.
+ //
+ // If LookupPackage(name) returns ok == false,
+ // then [name] (or [name.Sym] or [name.Sym.Method])
+ // will not be considered a documentation link,
+ // except in the case where name is the full (but single-element) import path
+ // of a package in the standard library, such as in [math] or [io.Reader].
+ // LookupPackage is still called for such names,
+ // in order to permit references to imports of other packages
+ // with the same package names.
+ //
+ // Setting LookupPackage to nil is equivalent to setting it to
+ // a function that always returns "", false.
+ LookupPackage func(name string) (importPath string, ok bool)
+
+ // LookupSym reports whether a symbol name or method name
+ // exists in the current package.
+ //
+ // If LookupSym("", "Name") returns true, then [Name]
+ // is considered a documentation link for a const, func, type, or var.
+ //
+ // Similarly, if LookupSym("Recv", "Name") returns true,
+ // then [Recv.Name] is considered a documentation link for
+ // type Recv's method Name.
+ //
+ // Setting LookupSym to nil is equivalent to setting it to a function
+ // that always returns false.
+ LookupSym func(recv, name string) (ok bool)
+}
+
+// parseDoc is parsing state for a single doc comment.
+type parseDoc struct {
+ *Parser
+ *Doc
+ links map[string]*LinkDef
+ lines []string
+ lookupSym func(recv, name string) bool
+}
+
+// lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv].
+// If pkg has a slash, it is assumed to be the full import path and is returned with ok = true.
+//
+// Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand").
+// d.LookupPackage provides a way for the caller to allow resolving such names with reference
+// to the imports in the surrounding package.
+//
+// There is one collision between these two cases: single-element standard library names
+// like "math" are full import paths but don't contain slashes. We let d.LookupPackage have
+// the first chance to resolve it, in case there's a different package imported as math,
+// and otherwise we refer to a built-in list of single-element standard library package names.
+func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) {
+ if strings.Contains(pkg, "/") { // assume a full import path
+ if validImportPath(pkg) {
+ return pkg, true
+ }
+ return "", false
+ }
+ if d.LookupPackage != nil {
+ // Give LookupPackage a chance.
+ if path, ok := d.LookupPackage(pkg); ok {
+ return path, true
+ }
+ }
+ return DefaultLookupPackage(pkg)
+}
+
+func isStdPkg(path string) bool {
+ // TODO(rsc): Use sort.Find once we don't have to worry about
+ // copying this code into older Go environments.
+ i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path })
+ return i < len(stdPkgs) && stdPkgs[i] == path
+}
+
+// DefaultLookupPackage is the default package lookup
+// function, used when [Parser].LookupPackage is nil.
+// It recognizes names of the packages from the standard
+// library with single-element import paths, such as math,
+// which would otherwise be impossible to name.
+//
+// Note that the go/doc package provides a more sophisticated
+// lookup based on the imports used in the current package.
+func DefaultLookupPackage(name string) (importPath string, ok bool) {
+ if isStdPkg(name) {
+ return name, true
+ }
+ return "", false
+}
+
+// Parse parses the doc comment text and returns the *Doc form.
+// Comment markers (/* // and */) in the text must have already been removed.
+func (p *Parser) Parse(text string) *Doc {
+ lines := unindent(strings.Split(text, "\n"))
+ d := &parseDoc{
+ Parser: p,
+ Doc: new(Doc),
+ links: make(map[string]*LinkDef),
+ lines: lines,
+ lookupSym: func(recv, name string) bool { return false },
+ }
+ if p.LookupSym != nil {
+ d.lookupSym = p.LookupSym
+ }
+
+ // First pass: break into block structure and collect known links.
+ // The text is all recorded as Plain for now.
+ var prev span
+ for _, s := range parseSpans(lines) {
+ var b Block
+ switch s.kind {
+ default:
+ panic("go/doc/comment: internal error: unknown span kind")
+ case spanList:
+ b = d.list(lines[s.start:s.end], prev.end < s.start)
+ case spanCode:
+ b = d.code(lines[s.start:s.end])
+ case spanOldHeading:
+ b = d.oldHeading(lines[s.start])
+ case spanHeading:
+ b = d.heading(lines[s.start])
+ case spanPara:
+ b = d.paragraph(lines[s.start:s.end])
+ }
+ if b != nil {
+ d.Content = append(d.Content, b)
+ }
+ prev = s
+ }
+
+ // Second pass: interpret all the Plain text now that we know the links.
+ for _, b := range d.Content {
+ switch b := b.(type) {
+ case *Paragraph:
+ b.Text = d.parseLinkedText(string(b.Text[0].(Plain)))
+ case *List:
+ for _, i := range b.Items {
+ for _, c := range i.Content {
+ p := c.(*Paragraph)
+ p.Text = d.parseLinkedText(string(p.Text[0].(Plain)))
+ }
+ }
+ }
+ }
+
+ return d.Doc
+}
+
+// A span represents a single span of comment lines (lines[start:end])
+// of an identified kind (code, heading, paragraph, and so on).
+type span struct {
+ start int
+ end int
+ kind spanKind
+}
+
+// A spanKind describes the kind of span.
+type spanKind int
+
+const (
+ _ spanKind = iota
+ spanCode
+ spanHeading
+ spanList
+ spanOldHeading
+ spanPara
+)
+
+func parseSpans(lines []string) []span {
+ var spans []span
+
+ // The loop may process a line twice: once as unindented
+ // and again forced indented. So the maximum expected
+ // number of iterations is 2*len(lines). The repeating logic
+ // can be subtle, though, and to protect against introduction
+ // of infinite loops in future changes, we watch to see that
+ // we are not looping too much. A panic is better than a
+ // quiet infinite loop.
+ watchdog := 2 * len(lines)
+
+ i := 0
+ forceIndent := 0
+Spans:
+ for {
+ // Skip blank lines.
+ for i < len(lines) && lines[i] == "" {
+ i++
+ }
+ if i >= len(lines) {
+ break
+ }
+ if watchdog--; watchdog < 0 {
+ panic("go/doc/comment: internal error: not making progress")
+ }
+
+ var kind spanKind
+ start := i
+ end := i
+ if i < forceIndent || indented(lines[i]) {
+ // Indented (or force indented).
+ // Ends before next unindented. (Blank lines are OK.)
+ // If this is an unindented list that we are heuristically treating as indented,
+ // then accept unindented list item lines up to the first blank lines.
+ // The heuristic is disabled at blank lines to contain its effect
+ // to non-gofmt'ed sections of the comment.
+ unindentedListOK := isList(lines[i]) && i < forceIndent
+ i++
+ for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) {
+ if lines[i] == "" {
+ unindentedListOK = false
+ }
+ i++
+ }
+
+ // Drop trailing blank lines.
+ end = i
+ for end > start && lines[end-1] == "" {
+ end--
+ }
+
+ // If indented lines are followed (without a blank line)
+ // by an unindented line ending in a brace,
+ // take that one line too. This fixes the common mistake
+ // of pasting in something like
+ //
+ // func main() {
+ // fmt.Println("hello, world")
+ // }
+ //
+ // and forgetting to indent it.
+ // The heuristic will never trigger on a gofmt'ed comment,
+ // because any gofmt'ed code block or list would be
+ // followed by a blank line or end of comment.
+ if end < len(lines) && strings.HasPrefix(lines[end], "}") {
+ end++
+ }
+
+ if isList(lines[start]) {
+ kind = spanList
+ } else {
+ kind = spanCode
+ }
+ } else {
+ // Unindented. Ends at next blank or indented line.
+ i++
+ for i < len(lines) && lines[i] != "" && !indented(lines[i]) {
+ i++
+ }
+ end = i
+
+ // If unindented lines are followed (without a blank line)
+ // by an indented line that would start a code block,
+ // check whether the final unindented lines
+ // should be left for the indented section.
+ // This can happen for the common mistakes of
+ // unindented code or unindented lists.
+ // The heuristic will never trigger on a gofmt'ed comment,
+ // because any gofmt'ed code block would have a blank line
+ // preceding it after the unindented lines.
+ if i < len(lines) && lines[i] != "" && !isList(lines[i]) {
+ switch {
+ case isList(lines[i-1]):
+ // If the final unindented line looks like a list item,
+ // this may be the first indented line wrap of
+ // a mistakenly unindented list.
+ // Leave all the unindented list items.
+ forceIndent = end
+ end--
+ for end > start && isList(lines[end-1]) {
+ end--
+ }
+
+ case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`):
+ // If the final unindented line ended in { or \
+ // it is probably the start of a misindented code block.
+ // Give the user a single line fix.
+ // Often that's enough; if not, the user can fix the others themselves.
+ forceIndent = end
+ end--
+ }
+
+ if start == end && forceIndent > start {
+ i = start
+ continue Spans
+ }
+ }
+
+ // Span is either paragraph or heading.
+ if end-start == 1 && isHeading(lines[start]) {
+ kind = spanHeading
+ } else if end-start == 1 && isOldHeading(lines[start], lines, start) {
+ kind = spanOldHeading
+ } else {
+ kind = spanPara
+ }
+ }
+
+ spans = append(spans, span{start, end, kind})
+ i = end
+ }
+
+ return spans
+}
+
+// indented reports whether line is indented
+// (starts with a leading space or tab).
+func indented(line string) bool {
+ return line != "" && (line[0] == ' ' || line[0] == '\t')
+}
+
+// unindent removes any common space/tab prefix
+// from each line in lines, returning a copy of lines in which
+// those prefixes have been trimmed from each line.
+// It also replaces any lines containing only spaces with blank lines (empty strings).
+func unindent(lines []string) []string {
+ // Trim leading and trailing blank lines.
+ for len(lines) > 0 && isBlank(lines[0]) {
+ lines = lines[1:]
+ }
+ for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
+ lines = lines[:len(lines)-1]
+ }
+ if len(lines) == 0 {
+ return nil
+ }
+
+ // Compute and remove common indentation.
+ prefix := leadingSpace(lines[0])
+ for _, line := range lines[1:] {
+ if !isBlank(line) {
+ prefix = commonPrefix(prefix, leadingSpace(line))
+ }
+ }
+
+ out := make([]string, len(lines))
+ for i, line := range lines {
+ line = strings.TrimPrefix(line, prefix)
+ if strings.TrimSpace(line) == "" {
+ line = ""
+ }
+ out[i] = line
+ }
+ for len(out) > 0 && out[0] == "" {
+ out = out[1:]
+ }
+ for len(out) > 0 && out[len(out)-1] == "" {
+ out = out[:len(out)-1]
+ }
+ return out
+}
+
+// isBlank reports whether s is a blank line.
+func isBlank(s string) bool {
+ return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
+
+// commonPrefix returns the longest common prefix of a and b.
+func commonPrefix(a, b string) string {
+ i := 0
+ for i < len(a) && i < len(b) && a[i] == b[i] {
+ i++
+ }
+ return a[0:i]
+}
+
+// leadingSpace returns the longest prefix of s consisting of spaces and tabs.
+func leadingSpace(s string) string {
+ i := 0
+ for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
+ i++
+ }
+ return s[:i]
+}
+
+// isOldHeading reports whether line is an old-style section heading.
+// line is all[off].
+func isOldHeading(line string, all []string, off int) bool {
+ if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" {
+ return false
+ }
+
+ line = strings.TrimSpace(line)
+
+ // a heading must start with an uppercase letter
+ r, _ := utf8.DecodeRuneInString(line)
+ if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
+ return false
+ }
+
+ // it must end in a letter or digit:
+ r, _ = utf8.DecodeLastRuneInString(line)
+ if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+ return false
+ }
+
+ // exclude lines with illegal characters. we allow "(),"
+ if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
+ return false
+ }
+
+ // allow "'" for possessive "'s" only
+ for b := line; ; {
+ var ok bool
+ if _, b, ok = strings.Cut(b, "'"); !ok {
+ break
+ }
+ if b != "s" && !strings.HasPrefix(b, "s ") {
+ return false // ' not followed by s and then end-of-word
+ }
+ }
+
+ // allow "." when followed by non-space
+ for b := line; ; {
+ var ok bool
+ if _, b, ok = strings.Cut(b, "."); !ok {
+ break
+ }
+ if b == "" || strings.HasPrefix(b, " ") {
+ return false // not followed by non-space
+ }
+ }
+
+ return true
+}
+
+// oldHeading returns the *Heading for the given old-style section heading line.
+func (d *parseDoc) oldHeading(line string) Block {
+ return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}}
+}
+
+// isHeading reports whether line is a new-style section heading.
+func isHeading(line string) bool {
+ return len(line) >= 2 &&
+ line[0] == '#' &&
+ (line[1] == ' ' || line[1] == '\t') &&
+ strings.TrimSpace(line) != "#"
+}
+
+// heading returns the *Heading for the given new-style section heading line.
+func (d *parseDoc) heading(line string) Block {
+ return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}}
+}
+
+// code returns a code block built from the lines.
+func (d *parseDoc) code(lines []string) *Code {
+ body := unindent(lines)
+ body = append(body, "") // to get final \n from Join
+ return &Code{Text: strings.Join(body, "\n")}
+}
+
+// paragraph returns a paragraph block built from the lines.
+// If the lines are link definitions, paragraph adds them to d and returns nil.
+func (d *parseDoc) paragraph(lines []string) Block {
+ // Is this a block of known links? Handle.
+ var defs []*LinkDef
+ for _, line := range lines {
+ def, ok := parseLink(line)
+ if !ok {
+ goto NoDefs
+ }
+ defs = append(defs, def)
+ }
+ for _, def := range defs {
+ d.Links = append(d.Links, def)
+ if d.links[def.Text] == nil {
+ d.links[def.Text] = def
+ }
+ }
+ return nil
+NoDefs:
+
+ return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}
+}
+
+// parseLink parses a single link definition line:
+//
+// [text]: url
+//
+// It returns the link definition and whether the line was well formed.
+func parseLink(line string) (*LinkDef, bool) {
+ if line == "" || line[0] != '[' {
+ return nil, false
+ }
+ i := strings.Index(line, "]:")
+ if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') {
+ return nil, false
+ }
+
+ text := line[1:i]
+ url := strings.TrimSpace(line[i+3:])
+ j := strings.Index(url, "://")
+ if j < 0 || !isScheme(url[:j]) {
+ return nil, false
+ }
+
+ // Line has right form and has valid scheme://.
+ // That's good enough for us - we are not as picky
+ // about the characters beyond the :// as we are
+ // when extracting inline URLs from text.
+ return &LinkDef{Text: text, URL: url}, true
+}
+
+// list returns a list built from the indented lines,
+// using forceBlankBefore as the value of the List's ForceBlankBefore field.
+func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List {
+ num, _, _ := listMarker(lines[0])
+ var (
+ list *List = &List{ForceBlankBefore: forceBlankBefore}
+ item *ListItem
+ text []string
+ )
+ flush := func() {
+ if item != nil {
+ if para := d.paragraph(text); para != nil {
+ item.Content = append(item.Content, para)
+ }
+ }
+ text = nil
+ }
+
+ for _, line := range lines {
+ if n, after, ok := listMarker(line); ok && (n != "") == (num != "") {
+ // start new list item
+ flush()
+
+ item = &ListItem{Number: n}
+ list.Items = append(list.Items, item)
+ line = after
+ }
+ line = strings.TrimSpace(line)
+ if line == "" {
+ list.ForceBlankBetween = true
+ flush()
+ continue
+ }
+ text = append(text, strings.TrimSpace(line))
+ }
+ flush()
+ return list
+}
+
+// listMarker parses the line as beginning with a list marker.
+// If it can do that, it returns the numeric marker ("" for a bullet list),
+// the rest of the line, and ok == true.
+// Otherwise, it returns "", "", false.
+func listMarker(line string) (num, rest string, ok bool) {
+ line = strings.TrimSpace(line)
+ if line == "" {
+ return "", "", false
+ }
+
+ // Can we find a marker?
+ if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' {
+ num, rest = "", line[n:]
+ } else if '0' <= line[0] && line[0] <= '9' {
+ n := 1
+ for n < len(line) && '0' <= line[n] && line[n] <= '9' {
+ n++
+ }
+ if n >= len(line) || (line[n] != '.' && line[n] != ')') {
+ return "", "", false
+ }
+ num, rest = line[:n], line[n+1:]
+ } else {
+ return "", "", false
+ }
+
+ if !indented(rest) || strings.TrimSpace(rest) == "" {
+ return "", "", false
+ }
+
+ return num, rest, true
+}
+
+// isList reports whether the line is the first line of a list,
+// meaning starts with a list marker after any indentation.
+// (The caller is responsible for checking the line is indented, as appropriate.)
+func isList(line string) bool {
+ _, _, ok := listMarker(line)
+ return ok
+}
+
+// parseLinkedText parses text that is allowed to contain explicit links,
+// such as [math.Sin] or [Go home page], into a slice of Text items.
+//
+// A “pkg” is only assumed to be a full import path if it starts with
+// a domain name (a path element with a dot) or is one of the packages
+// from the standard library (“[os]”, “[encoding/json]”, and so on).
+// To avoid problems with maps, generics, and array types, doc links
+// must be both preceded and followed by punctuation, spaces, tabs,
+// or the start or end of a line. An example problem would be treating
+// map[ast.Expr]TypeAndValue as containing a link.
+func (d *parseDoc) parseLinkedText(text string) []Text {
+ var out []Text
+ wrote := 0
+ flush := func(i int) {
+ if wrote < i {
+ out = d.parseText(out, text[wrote:i], true)
+ wrote = i
+ }
+ }
+
+ start := -1
+ var buf []byte
+ for i := 0; i < len(text); i++ {
+ c := text[i]
+ if c == '\n' || c == '\t' {
+ c = ' '
+ }
+ switch c {
+ case '[':
+ start = i
+ case ']':
+ if start >= 0 {
+ if def, ok := d.links[string(buf)]; ok {
+ def.Used = true
+ flush(start)
+ out = append(out, &Link{
+ Text: d.parseText(nil, text[start+1:i], false),
+ URL: def.URL,
+ })
+ wrote = i + 1
+ } else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok {
+ flush(start)
+ link.Text = d.parseText(nil, text[start+1:i], false)
+ out = append(out, link)
+ wrote = i + 1
+ }
+ }
+ start = -1
+ buf = buf[:0]
+ }
+ if start >= 0 && i != start {
+ buf = append(buf, c)
+ }
+ }
+
+ flush(len(text))
+ return out
+}
+
+// docLink parses text, which was found inside [ ] brackets,
+// as a doc link if possible, returning the DocLink and ok == true
+// or else nil, false.
+// The before and after strings are the text before the [ and after the ]
+// on the same line. Doc links must be preceded and followed by
+// punctuation, spaces, tabs, or the start or end of a line.
+func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) {
+ if before != "" {
+ r, _ := utf8.DecodeLastRuneInString(before)
+ if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
+ return nil, false
+ }
+ }
+ if after != "" {
+ r, _ := utf8.DecodeRuneInString(after)
+ if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
+ return nil, false
+ }
+ }
+ text = strings.TrimPrefix(text, "*")
+ pkg, name, ok := splitDocName(text)
+ var recv string
+ if ok {
+ pkg, recv, _ = splitDocName(pkg)
+ }
+ if pkg != "" {
+ if pkg, ok = d.lookupPkg(pkg); !ok {
+ return nil, false
+ }
+ } else {
+ if ok = d.lookupSym(recv, name); !ok {
+ return nil, false
+ }
+ }
+ link = &DocLink{
+ ImportPath: pkg,
+ Recv: recv,
+ Name: name,
+ }
+ return link, true
+}
+
+// If text is of the form before.Name, where Name is a capitalized Go identifier,
+// then splitDocName returns before, name, true.
+// Otherwise it returns text, "", false.
+func splitDocName(text string) (before, name string, foundDot bool) {
+ i := strings.LastIndex(text, ".")
+ name = text[i+1:]
+ if !isName(name) {
+ return text, "", false
+ }
+ if i >= 0 {
+ before = text[:i]
+ }
+ return before, name, true
+}
+
+// parseText parses s as text and returns the result of appending
+// those parsed Text elements to out.
+// parseText does not handle explicit links like [math.Sin] or [Go home page]:
+// those are handled by parseLinkedText.
+// If autoLink is true, then parseText recognizes URLs and words from d.Words
+// and converts those to links as appropriate.
+func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text {
+ var w strings.Builder
+ wrote := 0
+ writeUntil := func(i int) {
+ w.WriteString(s[wrote:i])
+ wrote = i
+ }
+ flush := func(i int) {
+ writeUntil(i)
+ if w.Len() > 0 {
+ out = append(out, Plain(w.String()))
+ w.Reset()
+ }
+ }
+ for i := 0; i < len(s); {
+ t := s[i:]
+ if autoLink {
+ if url, ok := autoURL(t); ok {
+ flush(i)
+ // Note: The old comment parser would look up the URL in words
+ // and replace the target with words[URL] if it was non-empty.
+ // That would allow creating links that display as one URL but
+ // when clicked go to a different URL. Not sure what the point
+ // of that is, so we're not doing that lookup here.
+ out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url})
+ i += len(url)
+ wrote = i
+ continue
+ }
+ if id, ok := ident(t); ok {
+ url, italics := d.Words[id]
+ if !italics {
+ i += len(id)
+ continue
+ }
+ flush(i)
+ if url == "" {
+ out = append(out, Italic(id))
+ } else {
+ out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url})
+ }
+ i += len(id)
+ wrote = i
+ continue
+ }
+ }
+ switch {
+ case strings.HasPrefix(t, "``"):
+ if len(t) >= 3 && t[2] == '`' {
+ // Do not convert `` inside ```, in case people are mistakenly writing Markdown.
+ i += 3
+ for i < len(t) && t[i] == '`' {
+ i++
+ }
+ break
+ }
+ writeUntil(i)
+ w.WriteRune('“')
+ i += 2
+ wrote = i
+ case strings.HasPrefix(t, "''"):
+ writeUntil(i)
+ w.WriteRune('”')
+ i += 2
+ wrote = i
+ default:
+ i++
+ }
+ }
+ flush(len(s))
+ return out
+}
+
+// autoURL checks whether s begins with a URL that should be hyperlinked.
+// If so, it returns the URL, which is a prefix of s, and ok == true.
+// Otherwise it returns "", false.
+// The caller should skip over the first len(url) bytes of s
+// before further processing.
+func autoURL(s string) (url string, ok bool) {
+ // Find the ://. Fast path to pick off non-URL,
+ // since we call this at every position in the string.
+ // The shortest possible URL is ftp://x, 7 bytes.
+ var i int
+ switch {
+ case len(s) < 7:
+ return "", false
+ case s[3] == ':':
+ i = 3
+ case s[4] == ':':
+ i = 4
+ case s[5] == ':':
+ i = 5
+ case s[6] == ':':
+ i = 6
+ default:
+ return "", false
+ }
+ if i+3 > len(s) || s[i:i+3] != "://" {
+ return "", false
+ }
+
+ // Check valid scheme.
+ if !isScheme(s[:i]) {
+ return "", false
+ }
+
+ // Scan host part. Must have at least one byte,
+ // and must start and end in non-punctuation.
+ i += 3
+ if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) {
+ return "", false
+ }
+ i++
+ end := i
+ for i < len(s) && isHost(s[i]) {
+ if !isPunct(s[i]) {
+ end = i + 1
+ }
+ i++
+ }
+ i = end
+
+ // At this point we are definitely returning a URL (scheme://host).
+ // We just have to find the longest path we can add to it.
+ // Heuristics abound.
+ // We allow parens, braces, and brackets,
+ // but only if they match (#5043, #22285).
+ // We allow .,:;?! in the path but not at the end,
+ // to avoid end-of-sentence punctuation (#18139, #16565).
+ stk := []byte{}
+ end = i
+Path:
+ for ; i < len(s); i++ {
+ if isPunct(s[i]) {
+ continue
+ }
+ if !isPath(s[i]) {
+ break
+ }
+ switch s[i] {
+ case '(':
+ stk = append(stk, ')')
+ case '{':
+ stk = append(stk, '}')
+ case '[':
+ stk = append(stk, ']')
+ case ')', '}', ']':
+ if len(stk) == 0 || stk[len(stk)-1] != s[i] {
+ break Path
+ }
+ stk = stk[:len(stk)-1]
+ }
+ if len(stk) == 0 {
+ end = i + 1
+ }
+ }
+
+ return s[:end], true
+}
+
+// isScheme reports whether s is a recognized URL scheme.
+// Note that if strings of new length (beyond 3-7)
+// are added here, the fast path at the top of autoURL will need updating.
+func isScheme(s string) bool {
+ switch s {
+ case "file",
+ "ftp",
+ "gopher",
+ "http",
+ "https",
+ "mailto",
+ "nntp":
+ return true
+ }
+ return false
+}
+
+// isHost reports whether c is a byte that can appear in a URL host,
+// like www.example.com or user@[::1]:8080
+func isHost(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+ // and this function will return false.
+ const mask = 0 |
+ (1<<26-1)<<'A' |
+ (1<<26-1)<<'a' |
+ (1<<10-1)<<'0' |
+ 1<<'_' |
+ 1<<'@' |
+ 1<<'-' |
+ 1<<'.' |
+ 1<<'[' |
+ 1<<']' |
+ 1<<':'
+
+ return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// isPunct reports whether c is a punctuation byte that can appear
+// inside a path but not at the end.
+func isPunct(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+ // and this function will return false.
+ const mask = 0 |
+ 1<<'.' |
+ 1<<',' |
+ 1<<':' |
+ 1<<';' |
+ 1<<'?' |
+ 1<<'!'
+
+ return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// isPath reports whether c is a (non-punctuation) path byte.
+func isPath(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+ // and this function will return false.
+ const mask = 0 |
+ (1<<26-1)<<'A' |
+ (1<<26-1)<<'a' |
+ (1<<10-1)<<'0' |
+ 1<<'$' |
+ 1<<'\'' |
+ 1<<'(' |
+ 1<<')' |
+ 1<<'*' |
+ 1<<'+' |
+ 1<<'&' |
+ 1<<'#' |
+ 1<<'=' |
+ 1<<'@' |
+ 1<<'~' |
+ 1<<'_' |
+ 1<<'/' |
+ 1<<'-' |
+ 1<<'[' |
+ 1<<']' |
+ 1<<'{' |
+ 1<<'}' |
+ 1<<'%'
+
+ return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// isName reports whether s is a capitalized Go identifier (like Name).
+func isName(s string) bool {
+ t, ok := ident(s)
+ if !ok || t != s {
+ return false
+ }
+ r, _ := utf8.DecodeRuneInString(s)
+ return unicode.IsUpper(r)
+}
+
+// ident checks whether s begins with a Go identifier.
+// If so, it returns the identifier, which is a prefix of s, and ok == true.
+// Otherwise it returns "", false.
+// The caller should skip over the first len(id) bytes of s
+// before further processing.
+func ident(s string) (id string, ok bool) {
+ // Scan [\pL_][\pL_0-9]*
+ n := 0
+ for n < len(s) {
+ if c := s[n]; c < utf8.RuneSelf {
+ if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') {
+ n++
+ continue
+ }
+ break
+ }
+ r, nr := utf8.DecodeRuneInString(s[n:])
+ if unicode.IsLetter(r) {
+ n += nr
+ continue
+ }
+ break
+ }
+ return s[:n], n > 0
+}
+
+// isIdentASCII reports whether c is an ASCII identifier byte.
+func isIdentASCII(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+ // and this function will return false.
+ const mask = 0 |
+ (1<<26-1)<<'A' |
+ (1<<26-1)<<'a' |
+ (1<<10-1)<<'0' |
+ 1<<'_'
+
+ return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// validImportPath reports whether path is a valid import path.
+// It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath.
+func validImportPath(path string) bool {
+ if !utf8.ValidString(path) {
+ return false
+ }
+ if path == "" {
+ return false
+ }
+ if path[0] == '-' {
+ return false
+ }
+ if strings.Contains(path, "//") {
+ return false
+ }
+ if path[len(path)-1] == '/' {
+ return false
+ }
+ elemStart := 0
+ for i, r := range path {
+ if r == '/' {
+ if !validImportPathElem(path[elemStart:i]) {
+ return false
+ }
+ elemStart = i + 1
+ }
+ }
+ return validImportPathElem(path[elemStart:])
+}
+
+func validImportPathElem(elem string) bool {
+ if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' {
+ return false
+ }
+ for i := 0; i < len(elem); i++ {
+ if !importPathOK(elem[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func importPathOK(c byte) bool {
+ // mask is a 128-bit bitmap with 1s for allowed bytes,
+ // so that the byte c can be tested with a shift and an and.
+ // If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+ // and this function will return false.
+ const mask = 0 |
+ (1<<26-1)<<'A' |
+ (1<<26-1)<<'a' |
+ (1<<10-1)<<'0' |
+ 1<<'-' |
+ 1<<'.' |
+ 1<<'~' |
+ 1<<'_' |
+ 1<<'+'
+
+ return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+ (uint64(1)<<(c-64))&(mask>>64)) != 0
+}
diff --git a/src/go/doc/comment/parse_test.go b/src/go/doc/comment/parse_test.go
new file mode 100644
index 0000000..bce733e
--- /dev/null
+++ b/src/go/doc/comment/parse_test.go
@@ -0,0 +1,12 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import "testing"
+
+// See https://golang.org/issue/52353
+func Test52353(t *testing.T) {
+ ident("𫕐ﯯ")
+}
diff --git a/src/go/doc/comment/print.go b/src/go/doc/comment/print.go
new file mode 100644
index 0000000..4e9da3d
--- /dev/null
+++ b/src/go/doc/comment/print.go
@@ -0,0 +1,290 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+)
+
+// A Printer is a doc comment printer.
+// The fields in the struct can be filled in before calling
+// any of the printing methods
+// in order to customize the details of the printing process.
+type Printer struct {
+ // HeadingLevel is the nesting level used for
+ // HTML and Markdown headings.
+ // If HeadingLevel is zero, it defaults to level 3,
+ // meaning to use <h3> and ###.
+ HeadingLevel int
+
+ // HeadingID is a function that computes the heading ID
+ // (anchor tag) to use for the heading h when generating
+ // HTML and Markdown. If HeadingID returns an empty string,
+ // then the heading ID is omitted.
+ // If HeadingID is nil, h.DefaultID is used.
+ HeadingID func(h *Heading) string
+
+ // DocLinkURL is a function that computes the URL for the given DocLink.
+ // If DocLinkURL is nil, then link.DefaultURL(p.DocLinkBaseURL) is used.
+ DocLinkURL func(link *DocLink) string
+
+ // DocLinkBaseURL is used when DocLinkURL is nil,
+ // passed to [DocLink.DefaultURL] to construct a DocLink's URL.
+ // See that method's documentation for details.
+ DocLinkBaseURL string
+
+ // TextPrefix is a prefix to print at the start of every line
+ // when generating text output using the Text method.
+ TextPrefix string
+
+ // TextCodePrefix is the prefix to print at the start of each
+ // preformatted (code block) line when generating text output,
+ // instead of (not in addition to) TextPrefix.
+ // If TextCodePrefix is the empty string, it defaults to TextPrefix+"\t".
+ TextCodePrefix string
+
+ // TextWidth is the maximum width text line to generate,
+ // measured in Unicode code points,
+ // excluding TextPrefix and the newline character.
+ // If TextWidth is zero, it defaults to 80 minus the number of code points in TextPrefix.
+ // If TextWidth is negative, there is no limit.
+ TextWidth int
+}
+
+func (p *Printer) headingLevel() int {
+ if p.HeadingLevel <= 0 {
+ return 3
+ }
+ return p.HeadingLevel
+}
+
+func (p *Printer) headingID(h *Heading) string {
+ if p.HeadingID == nil {
+ return h.DefaultID()
+ }
+ return p.HeadingID(h)
+}
+
+func (p *Printer) docLinkURL(link *DocLink) string {
+ if p.DocLinkURL != nil {
+ return p.DocLinkURL(link)
+ }
+ return link.DefaultURL(p.DocLinkBaseURL)
+}
+
+// DefaultURL constructs and returns the documentation URL for l,
+// using baseURL as a prefix for links to other packages.
+//
+// The possible forms returned by DefaultURL are:
+// - baseURL/ImportPath, for a link to another package
+// - baseURL/ImportPath#Name, for a link to a const, func, type, or var in another package
+// - baseURL/ImportPath#Recv.Name, for a link to a method in another package
+// - #Name, for a link to a const, func, type, or var in this package
+// - #Recv.Name, for a link to a method in this package
+//
+// If baseURL ends in a trailing slash, then DefaultURL inserts
+// a slash between ImportPath and # in the anchored forms.
+// For example, here are some baseURL values and URLs they can generate:
+//
+// "/pkg/" → "/pkg/math/#Sqrt"
+// "/pkg" → "/pkg/math#Sqrt"
+// "/" → "/math/#Sqrt"
+// "" → "/math#Sqrt"
+func (l *DocLink) DefaultURL(baseURL string) string {
+ if l.ImportPath != "" {
+ slash := ""
+ if strings.HasSuffix(baseURL, "/") {
+ slash = "/"
+ } else {
+ baseURL += "/"
+ }
+ switch {
+ case l.Name == "":
+ return baseURL + l.ImportPath + slash
+ case l.Recv != "":
+ return baseURL + l.ImportPath + slash + "#" + l.Recv + "." + l.Name
+ default:
+ return baseURL + l.ImportPath + slash + "#" + l.Name
+ }
+ }
+ if l.Recv != "" {
+ return "#" + l.Recv + "." + l.Name
+ }
+ return "#" + l.Name
+}
+
+// DefaultID returns the default anchor ID for the heading h.
+//
+// The default anchor ID is constructed by converting every
+// rune that is not alphanumeric ASCII to an underscore
+// and then adding the prefix “hdr-”.
+// For example, if the heading text is “Go Doc Comments”,
+// the default ID is “hdr-Go_Doc_Comments”.
+func (h *Heading) DefaultID() string {
+ // Note: The “hdr-” prefix is important to avoid DOM clobbering attacks.
+ // See https://pkg.go.dev/github.com/google/safehtml#Identifier.
+ var out strings.Builder
+ var p textPrinter
+ p.oneLongLine(&out, h.Text)
+ s := strings.TrimSpace(out.String())
+ if s == "" {
+ return ""
+ }
+ out.Reset()
+ out.WriteString("hdr-")
+ for _, r := range s {
+ if r < 0x80 && isIdentASCII(byte(r)) {
+ out.WriteByte(byte(r))
+ } else {
+ out.WriteByte('_')
+ }
+ }
+ return out.String()
+}
+
+type commentPrinter struct {
+ *Printer
+ headingPrefix string
+ needDoc map[string]bool
+}
+
+// Comment returns the standard Go formatting of the Doc,
+// without any comment markers.
+func (p *Printer) Comment(d *Doc) []byte {
+ cp := &commentPrinter{Printer: p}
+ var out bytes.Buffer
+ for i, x := range d.Content {
+ if i > 0 && blankBefore(x) {
+ out.WriteString("\n")
+ }
+ cp.block(&out, x)
+ }
+
+ // Print one block containing all the link definitions that were used,
+ // and then a second block containing all the unused ones.
+ // This makes it easy to clean up the unused ones: gofmt and
+ // delete the final block. And it's a nice visual signal without
+ // affecting the way the comment formats for users.
+ for i := 0; i < 2; i++ {
+ used := i == 0
+ first := true
+ for _, def := range d.Links {
+ if def.Used == used {
+ if first {
+ out.WriteString("\n")
+ first = false
+ }
+ out.WriteString("[")
+ out.WriteString(def.Text)
+ out.WriteString("]: ")
+ out.WriteString(def.URL)
+ out.WriteString("\n")
+ }
+ }
+ }
+
+ return out.Bytes()
+}
+
+// blankBefore reports whether the block x requires a blank line before it.
+// All blocks do, except for Lists that return false from x.BlankBefore().
+func blankBefore(x Block) bool {
+ if x, ok := x.(*List); ok {
+ return x.BlankBefore()
+ }
+ return true
+}
+
+// block prints the block x to out.
+func (p *commentPrinter) block(out *bytes.Buffer, x Block) {
+ switch x := x.(type) {
+ default:
+ fmt.Fprintf(out, "?%T", x)
+
+ case *Paragraph:
+ p.text(out, "", x.Text)
+ out.WriteString("\n")
+
+ case *Heading:
+ out.WriteString("# ")
+ p.text(out, "", x.Text)
+ out.WriteString("\n")
+
+ case *Code:
+ md := x.Text
+ for md != "" {
+ var line string
+ line, md, _ = strings.Cut(md, "\n")
+ if line != "" {
+ out.WriteString("\t")
+ out.WriteString(line)
+ }
+ out.WriteString("\n")
+ }
+
+ case *List:
+ loose := x.BlankBetween()
+ for i, item := range x.Items {
+ if i > 0 && loose {
+ out.WriteString("\n")
+ }
+ out.WriteString(" ")
+ if item.Number == "" {
+ out.WriteString(" - ")
+ } else {
+ out.WriteString(item.Number)
+ out.WriteString(". ")
+ }
+ for i, blk := range item.Content {
+ const fourSpace = " "
+ if i > 0 {
+ out.WriteString("\n" + fourSpace)
+ }
+ p.text(out, fourSpace, blk.(*Paragraph).Text)
+ out.WriteString("\n")
+ }
+ }
+ }
+}
+
+// text prints the text sequence x to out.
+func (p *commentPrinter) text(out *bytes.Buffer, indent string, x []Text) {
+ for _, t := range x {
+ switch t := t.(type) {
+ case Plain:
+ p.indent(out, indent, string(t))
+ case Italic:
+ p.indent(out, indent, string(t))
+ case *Link:
+ if t.Auto {
+ p.text(out, indent, t.Text)
+ } else {
+ out.WriteString("[")
+ p.text(out, indent, t.Text)
+ out.WriteString("]")
+ }
+ case *DocLink:
+ out.WriteString("[")
+ p.text(out, indent, t.Text)
+ out.WriteString("]")
+ }
+ }
+}
+
+// indent prints s to out, indenting with the indent string
+// after each newline in s.
+func (p *commentPrinter) indent(out *bytes.Buffer, indent, s string) {
+ for s != "" {
+ line, rest, ok := strings.Cut(s, "\n")
+ out.WriteString(line)
+ if ok {
+ out.WriteString("\n")
+ out.WriteString(indent)
+ }
+ s = rest
+ }
+}
diff --git a/src/go/doc/comment/std.go b/src/go/doc/comment/std.go
new file mode 100644
index 0000000..71f15f4
--- /dev/null
+++ b/src/go/doc/comment/std.go
@@ -0,0 +1,44 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by 'go generate' DO NOT EDIT.
+//go:generate ./mkstd.sh
+
+package comment
+
+var stdPkgs = []string{
+ "bufio",
+ "bytes",
+ "context",
+ "crypto",
+ "embed",
+ "encoding",
+ "errors",
+ "expvar",
+ "flag",
+ "fmt",
+ "hash",
+ "html",
+ "image",
+ "io",
+ "log",
+ "math",
+ "mime",
+ "net",
+ "os",
+ "path",
+ "plugin",
+ "reflect",
+ "regexp",
+ "runtime",
+ "sort",
+ "strconv",
+ "strings",
+ "sync",
+ "syscall",
+ "testing",
+ "time",
+ "unicode",
+ "unsafe",
+}
diff --git a/src/go/doc/comment/std_test.go b/src/go/doc/comment/std_test.go
new file mode 100644
index 0000000..89206e6
--- /dev/null
+++ b/src/go/doc/comment/std_test.go
@@ -0,0 +1,34 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "internal/diff"
+ "internal/testenv"
+ "sort"
+ "strings"
+ "testing"
+)
+
+func TestStd(t *testing.T) {
+ out, err := testenv.Command(t, testenv.GoToolPath(t), "list", "std").CombinedOutput()
+ if err != nil {
+ t.Fatalf("%v\n%s", err, out)
+ }
+
+ var list []string
+ for _, pkg := range strings.Fields(string(out)) {
+ if !strings.Contains(pkg, "/") {
+ list = append(list, pkg)
+ }
+ }
+ sort.Strings(list)
+
+ have := strings.Join(stdPkgs, "\n") + "\n"
+ want := strings.Join(list, "\n") + "\n"
+ if have != want {
+ t.Errorf("stdPkgs is out of date: regenerate with 'go generate'\n%s", diff.Diff("stdPkgs", []byte(have), "want", []byte(want)))
+ }
+}
diff --git a/src/go/doc/comment/testdata/README.md b/src/go/doc/comment/testdata/README.md
new file mode 100644
index 0000000..d6f2c54
--- /dev/null
+++ b/src/go/doc/comment/testdata/README.md
@@ -0,0 +1,42 @@
+This directory contains test files (*.txt) for the comment parser.
+
+The files are in [txtar format](https://pkg.go.dev/golang.org/x/tools/txtar).
+Consider this example:
+
+ -- input --
+ Hello.
+ -- gofmt --
+ Hello.
+ -- html --
+ <p>Hello.
+ -- markdown --
+ Hello.
+ -- text --
+ Hello.
+
+Each `-- name --` line introduces a new file with the given name.
+The file named “input” must be first and contains the input to
+[comment.Parser](https://pkg.go.dev/go/doc/comment/#Parser).
+
+The remaining files contain the expected output for the named format generated by
+[comment.Printer](https://pkg.go.dev/go/doc/comment/#Printer):
+“gofmt” for Printer.Comment (Go comment format, as used by gofmt),
+“html” for Printer.HTML, “markdown” for Printer.Markdown, and “text” for Printer.Text.
+The format can also be “dump” for a textual dump of the raw data structures.
+
+The text before the `-- input --` line, if present, is JSON to be unmarshalled
+to initialize a comment.Printer. For example, this test case sets the Printer's
+TextWidth field to 20:
+
+ {"TextWidth": 20}
+ -- input --
+ Package gob manages streams of gobs - binary values exchanged between an
+ Encoder (transmitter) and a Decoder (receiver).
+ -- text --
+ Package gob
+ manages streams
+ of gobs - binary
+ values exchanged
+ between an Encoder
+ (transmitter) and a
+ Decoder (receiver).
diff --git a/src/go/doc/comment/testdata/blank.txt b/src/go/doc/comment/testdata/blank.txt
new file mode 100644
index 0000000..9049fde
--- /dev/null
+++ b/src/go/doc/comment/testdata/blank.txt
@@ -0,0 +1,12 @@
+-- input --
+ $
+ Blank line at start and end.
+ $
+-- gofmt --
+Blank line at start and end.
+-- text --
+Blank line at start and end.
+-- markdown --
+Blank line at start and end.
+-- html --
+<p>Blank line at start and end.
diff --git a/src/go/doc/comment/testdata/code.txt b/src/go/doc/comment/testdata/code.txt
new file mode 100644
index 0000000..06b1519
--- /dev/null
+++ b/src/go/doc/comment/testdata/code.txt
@@ -0,0 +1,94 @@
+-- input --
+Text.
+ A tab-indented
+ (no, not eight-space indented)
+ code block and haiku.
+More text.
+ One space
+ is
+ enough
+ to
+ start
+ a
+ block.
+More text.
+
+ Blocks
+ can
+
+ have
+ blank
+ lines.
+-- gofmt --
+Text.
+
+ A tab-indented
+ (no, not eight-space indented)
+ code block and haiku.
+
+More text.
+
+ One space
+ is
+ enough
+ to
+ start
+ a
+ block.
+
+More text.
+
+ Blocks
+ can
+
+ have
+ blank
+ lines.
+-- markdown --
+Text.
+
+ A tab-indented
+ (no, not eight-space indented)
+ code block and haiku.
+
+More text.
+
+ One space
+ is
+ enough
+ to
+ start
+ a
+ block.
+
+More text.
+
+ Blocks
+ can
+
+ have
+ blank
+ lines.
+-- html --
+<p>Text.
+<pre>A tab-indented
+(no, not eight-space indented)
+code block and haiku.
+</pre>
+<p>More text.
+<pre>One space
+ is
+ enough
+ to
+ start
+ a
+ block.
+</pre>
+<p>More text.
+<pre> Blocks
+ can
+
+have
+ blank
+ lines.
+</pre>
diff --git a/src/go/doc/comment/testdata/code2.txt b/src/go/doc/comment/testdata/code2.txt
new file mode 100644
index 0000000..0810bed
--- /dev/null
+++ b/src/go/doc/comment/testdata/code2.txt
@@ -0,0 +1,31 @@
+-- input --
+Text.
+
+ A tab-indented
+ (no, not eight-space indented)
+ code block and haiku.
+
+More text.
+-- gofmt --
+Text.
+
+ A tab-indented
+ (no, not eight-space indented)
+ code block and haiku.
+
+More text.
+-- markdown --
+Text.
+
+ A tab-indented
+ (no, not eight-space indented)
+ code block and haiku.
+
+More text.
+-- html --
+<p>Text.
+<pre>A tab-indented
+(no, not eight-space indented)
+code block and haiku.
+</pre>
+<p>More text.
diff --git a/src/go/doc/comment/testdata/code3.txt b/src/go/doc/comment/testdata/code3.txt
new file mode 100644
index 0000000..4a96a0e
--- /dev/null
+++ b/src/go/doc/comment/testdata/code3.txt
@@ -0,0 +1,33 @@
+-- input --
+Text.
+
+ $
+ A tab-indented
+ (surrounded by more blank lines)
+ code block and haiku.
+ $
+
+More text.
+-- gofmt --
+Text.
+
+ A tab-indented
+ (surrounded by more blank lines)
+ code block and haiku.
+
+More text.
+-- markdown --
+Text.
+
+ A tab-indented
+ (surrounded by more blank lines)
+ code block and haiku.
+
+More text.
+-- html --
+<p>Text.
+<pre>A tab-indented
+(surrounded by more blank lines)
+code block and haiku.
+</pre>
+<p>More text.
diff --git a/src/go/doc/comment/testdata/code4.txt b/src/go/doc/comment/testdata/code4.txt
new file mode 100644
index 0000000..f128c9a
--- /dev/null
+++ b/src/go/doc/comment/testdata/code4.txt
@@ -0,0 +1,38 @@
+-- input --
+To test, run this command:
+ go test -more
+
+Or, to test specific things, run this command:
+
+go test -more \
+ -pkg first/package \
+ -pkg second/package \
+ -pkg third/package
+
+Happy testing!
+-- gofmt --
+To test, run this command:
+
+ go test -more
+
+Or, to test specific things, run this command:
+
+ go test -more \
+ -pkg first/package \
+ -pkg second/package \
+ -pkg third/package
+
+Happy testing!
+-- markdown --
+To test, run this command:
+
+ go test -more
+
+Or, to test specific things, run this command:
+
+ go test -more \
+ -pkg first/package \
+ -pkg second/package \
+ -pkg third/package
+
+Happy testing!
diff --git a/src/go/doc/comment/testdata/code5.txt b/src/go/doc/comment/testdata/code5.txt
new file mode 100644
index 0000000..0e340dd
--- /dev/null
+++ b/src/go/doc/comment/testdata/code5.txt
@@ -0,0 +1,21 @@
+-- input --
+L1
+L2
+L3
+L4
+L5
+- L6 {
+ L7
+}
+L8
+-- gofmt --
+L1
+L2
+L3
+L4
+L5
+ - L6 {
+ L7
+ }
+
+L8
diff --git a/src/go/doc/comment/testdata/code6.txt b/src/go/doc/comment/testdata/code6.txt
new file mode 100644
index 0000000..d2915d1
--- /dev/null
+++ b/src/go/doc/comment/testdata/code6.txt
@@ -0,0 +1,24 @@
+-- input --
+Run this program:
+
+func main() {
+ fmt.Println("hello, world")
+}
+
+Or this:
+
+go func() {
+ fmt.Println("hello, world")
+}()
+-- gofmt --
+Run this program:
+
+ func main() {
+ fmt.Println("hello, world")
+ }
+
+Or this:
+
+ go func() {
+ fmt.Println("hello, world")
+ }()
diff --git a/src/go/doc/comment/testdata/crash1.txt b/src/go/doc/comment/testdata/crash1.txt
new file mode 100644
index 0000000..6bb2f6f
--- /dev/null
+++ b/src/go/doc/comment/testdata/crash1.txt
@@ -0,0 +1,16 @@
+-- input --
+[]
+
+[]: http://
+-- gofmt --
+[]
+
+[]: http://
+-- html --
+<p><a href="http://"></a>
+-- markdown --
+[](http://)
+-- text --
+
+
+[]: http://
diff --git a/src/go/doc/comment/testdata/doclink.txt b/src/go/doc/comment/testdata/doclink.txt
new file mode 100644
index 0000000..a932347
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink.txt
@@ -0,0 +1,21 @@
+-- input --
+In this package, see [Doc] and [Parser.Parse].
+There is no [Undef] or [Undef.Method].
+See also the [comment] package,
+especially [comment.Doc] and [comment.Parser.Parse].
+-- gofmt --
+In this package, see [Doc] and [Parser.Parse].
+There is no [Undef] or [Undef.Method].
+See also the [comment] package,
+especially [comment.Doc] and [comment.Parser.Parse].
+-- text --
+In this package, see Doc and Parser.Parse. There is no [Undef] or
+[Undef.Method]. See also the comment package, especially comment.Doc and
+comment.Parser.Parse.
+-- markdown --
+In this package, see [Doc](#Doc) and [Parser.Parse](#Parser.Parse). There is no \[Undef] or \[Undef.Method]. See also the [comment](/go/doc/comment) package, especially [comment.Doc](/go/doc/comment#Doc) and [comment.Parser.Parse](/go/doc/comment#Parser.Parse).
+-- html --
+<p>In this package, see <a href="#Doc">Doc</a> and <a href="#Parser.Parse">Parser.Parse</a>.
+There is no [Undef] or [Undef.Method].
+See also the <a href="/go/doc/comment">comment</a> package,
+especially <a href="/go/doc/comment#Doc">comment.Doc</a> and <a href="/go/doc/comment#Parser.Parse">comment.Parser.Parse</a>.
diff --git a/src/go/doc/comment/testdata/doclink2.txt b/src/go/doc/comment/testdata/doclink2.txt
new file mode 100644
index 0000000..ecd8e4e
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink2.txt
@@ -0,0 +1,8 @@
+-- input --
+We use [io.Reader] a lot, and also a few map[io.Reader]string.
+
+Never [io.Reader]int or Slice[io.Reader] though.
+-- markdown --
+We use [io.Reader](/io#Reader) a lot, and also a few map\[io.Reader]string.
+
+Never \[io.Reader]int or Slice\[io.Reader] though.
diff --git a/src/go/doc/comment/testdata/doclink3.txt b/src/go/doc/comment/testdata/doclink3.txt
new file mode 100644
index 0000000..0ccfb3d
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink3.txt
@@ -0,0 +1,8 @@
+-- input --
+[encoding/json.Marshal] is a doc link.
+
+[rot13.Marshal] is not.
+-- markdown --
+[encoding/json.Marshal](/encoding/json#Marshal) is a doc link.
+
+\[rot13.Marshal] is not.
diff --git a/src/go/doc/comment/testdata/doclink4.txt b/src/go/doc/comment/testdata/doclink4.txt
new file mode 100644
index 0000000..c709527
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink4.txt
@@ -0,0 +1,7 @@
+-- input --
+[io] at start of comment.
+[io] at start of line.
+At end of line: [io]
+At end of comment: [io]
+-- markdown --
+[io](/io) at start of comment. [io](/io) at start of line. At end of line: [io](/io) At end of comment: [io](/io)
diff --git a/src/go/doc/comment/testdata/doclink5.txt b/src/go/doc/comment/testdata/doclink5.txt
new file mode 100644
index 0000000..ac7b3ae
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink5.txt
@@ -0,0 +1,5 @@
+{"DocLinkBaseURL": "https://pkg.go.dev"}
+-- input --
+[encoding/json.Marshal] is a doc link.
+-- markdown --
+[encoding/json.Marshal](https://pkg.go.dev/encoding/json#Marshal) is a doc link.
diff --git a/src/go/doc/comment/testdata/doclink6.txt b/src/go/doc/comment/testdata/doclink6.txt
new file mode 100644
index 0000000..1acd03b
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink6.txt
@@ -0,0 +1,5 @@
+{"DocLinkBaseURL": "https://go.dev/pkg/"}
+-- input --
+[encoding/json.Marshal] is a doc link, and so is [rsc.io/quote.NonExist].
+-- markdown --
+[encoding/json.Marshal](https://go.dev/pkg/encoding/json/#Marshal) is a doc link, and so is [rsc.io/quote.NonExist](https://go.dev/pkg/rsc.io/quote/#NonExist).
diff --git a/src/go/doc/comment/testdata/doclink7.txt b/src/go/doc/comment/testdata/doclink7.txt
new file mode 100644
index 0000000..d34979a
--- /dev/null
+++ b/src/go/doc/comment/testdata/doclink7.txt
@@ -0,0 +1,4 @@
+-- input --
+You see more [*bytes.Buffer] than [bytes.Buffer].
+-- markdown --
+You see more [\*bytes.Buffer](/bytes#Buffer) than [bytes.Buffer](/bytes#Buffer).
diff --git a/src/go/doc/comment/testdata/escape.txt b/src/go/doc/comment/testdata/escape.txt
new file mode 100644
index 0000000..f54663f
--- /dev/null
+++ b/src/go/doc/comment/testdata/escape.txt
@@ -0,0 +1,55 @@
+-- input --
+What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>?
+
++ Line
+
+- Line
+
+* Line
+
+999. Line
+
+## Line
+-- gofmt --
+What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>?
+
++ Line
+
+- Line
+
+* Line
+
+999. Line
+
+## Line
+-- text --
+What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>?
+
++ Line
+
+- Line
+
+* Line
+
+999. Line
+
+## Line
+-- markdown --
+What the ~!@#$%^&\*()\_+-=\`{}|\[]\\:";',./\<>?
+
+\+ Line
+
+\- Line
+
+\* Line
+
+999\. Line
+
+\## Line
+-- html --
+<p>What the ~!@#$%^&amp;*()_+-=`{}|[]\:&quot;;&apos;,./&lt;&gt;?
+<p>+ Line
+<p>- Line
+<p>* Line
+<p>999. Line
+<p>## Line
diff --git a/src/go/doc/comment/testdata/head.txt b/src/go/doc/comment/testdata/head.txt
new file mode 100644
index 0000000..b99a8c5
--- /dev/null
+++ b/src/go/doc/comment/testdata/head.txt
@@ -0,0 +1,92 @@
+-- input --
+Some text.
+
+An Old Heading
+
+Not An Old Heading.
+
+And some text.
+
+# A New Heading.
+
+And some more text.
+
+# Not a heading,
+because text follows it.
+
+Because text precedes it,
+# not a heading.
+
+## Not a heading either.
+
+-- gofmt --
+Some text.
+
+# An Old Heading
+
+Not An Old Heading.
+
+And some text.
+
+# A New Heading.
+
+And some more text.
+
+# Not a heading,
+because text follows it.
+
+Because text precedes it,
+# not a heading.
+
+## Not a heading either.
+
+-- text --
+Some text.
+
+# An Old Heading
+
+Not An Old Heading.
+
+And some text.
+
+# A New Heading.
+
+And some more text.
+
+# Not a heading, because text follows it.
+
+Because text precedes it, # not a heading.
+
+## Not a heading either.
+
+-- markdown --
+Some text.
+
+### An Old Heading {#hdr-An_Old_Heading}
+
+Not An Old Heading.
+
+And some text.
+
+### A New Heading. {#hdr-A_New_Heading_}
+
+And some more text.
+
+\# Not a heading, because text follows it.
+
+Because text precedes it, # not a heading.
+
+\## Not a heading either.
+
+-- html --
+<p>Some text.
+<h3 id="hdr-An_Old_Heading">An Old Heading</h3>
+<p>Not An Old Heading.
+<p>And some text.
+<h3 id="hdr-A_New_Heading_">A New Heading.</h3>
+<p>And some more text.
+<p># Not a heading,
+because text follows it.
+<p>Because text precedes it,
+# not a heading.
+<p>## Not a heading either.
diff --git a/src/go/doc/comment/testdata/head2.txt b/src/go/doc/comment/testdata/head2.txt
new file mode 100644
index 0000000..d357632
--- /dev/null
+++ b/src/go/doc/comment/testdata/head2.txt
@@ -0,0 +1,36 @@
+-- input --
+✦
+
+Almost a+heading
+
+✦
+
+Don't be a heading
+
+✦
+
+A.b is a heading
+
+✦
+
+A. b is not a heading
+
+✦
+-- gofmt --
+✦
+
+Almost a+heading
+
+✦
+
+Don't be a heading
+
+✦
+
+# A.b is a heading
+
+✦
+
+A. b is not a heading
+
+✦
diff --git a/src/go/doc/comment/testdata/head3.txt b/src/go/doc/comment/testdata/head3.txt
new file mode 100644
index 0000000..dbb7cb3
--- /dev/null
+++ b/src/go/doc/comment/testdata/head3.txt
@@ -0,0 +1,7 @@
+{"HeadingLevel": 5}
+-- input --
+# Heading
+-- markdown --
+##### Heading {#hdr-Heading}
+-- html --
+<h5 id="hdr-Heading">Heading</h5>
diff --git a/src/go/doc/comment/testdata/hello.txt b/src/go/doc/comment/testdata/hello.txt
new file mode 100644
index 0000000..fb07f1e
--- /dev/null
+++ b/src/go/doc/comment/testdata/hello.txt
@@ -0,0 +1,35 @@
+-- input --
+ Hello,
+ world
+
+ This is
+ a test.
+-- dump --
+Doc
+ Paragraph
+ Plain
+ "Hello,\n"
+ "world"
+ Paragraph
+ Plain
+ "This is\n"
+ "a test."
+-- gofmt --
+Hello,
+world
+
+This is
+a test.
+-- html --
+<p>Hello,
+world
+<p>This is
+a test.
+-- markdown --
+Hello, world
+
+This is a test.
+-- text --
+Hello, world
+
+This is a test.
diff --git a/src/go/doc/comment/testdata/link.txt b/src/go/doc/comment/testdata/link.txt
new file mode 100644
index 0000000..551e306
--- /dev/null
+++ b/src/go/doc/comment/testdata/link.txt
@@ -0,0 +1,17 @@
+-- input --
+The Go home page is https://go.dev/.
+It used to be https://golang.org.
+
+-- gofmt --
+The Go home page is https://go.dev/.
+It used to be https://golang.org.
+
+-- text --
+The Go home page is https://go.dev/. It used to be https://golang.org.
+
+-- markdown --
+The Go home page is [https://go.dev/](https://go.dev/). It used to be [https://golang.org](https://golang.org).
+
+-- html --
+<p>The Go home page is <a href="https://go.dev/">https://go.dev/</a>.
+It used to be <a href="https://golang.org">https://golang.org</a>.
diff --git a/src/go/doc/comment/testdata/link2.txt b/src/go/doc/comment/testdata/link2.txt
new file mode 100644
index 0000000..8637a32
--- /dev/null
+++ b/src/go/doc/comment/testdata/link2.txt
@@ -0,0 +1,31 @@
+-- input --
+The Go home page is https://go.dev/.
+It used to be https://golang.org.
+https:// is not a link.
+Nor is https://
+https://☺ is not a link.
+https://:80 is not a link.
+
+-- gofmt --
+The Go home page is https://go.dev/.
+It used to be https://golang.org.
+https:// is not a link.
+Nor is https://
+https://☺ is not a link.
+https://:80 is not a link.
+
+-- text --
+The Go home page is https://go.dev/. It used to be https://golang.org. https://
+is not a link. Nor is https:// https://☺ is not a link. https://:80 is not a
+link.
+
+-- markdown --
+The Go home page is [https://go.dev/](https://go.dev/). It used to be [https://golang.org](https://golang.org). https:// is not a link. Nor is https:// https://☺ is not a link. https://:80 is not a link.
+
+-- html --
+<p>The Go home page is <a href="https://go.dev/">https://go.dev/</a>.
+It used to be <a href="https://golang.org">https://golang.org</a>.
+https:// is not a link.
+Nor is https://
+https://☺ is not a link.
+https://:80 is not a link.
diff --git a/src/go/doc/comment/testdata/link3.txt b/src/go/doc/comment/testdata/link3.txt
new file mode 100644
index 0000000..5a115b5
--- /dev/null
+++ b/src/go/doc/comment/testdata/link3.txt
@@ -0,0 +1,14 @@
+-- input --
+Doc text.
+
+[Go home page]: https://go.dev
+-- gofmt --
+Doc text.
+
+[Go home page]: https://go.dev
+-- text --
+Doc text.
+-- markdown --
+Doc text.
+-- html --
+<p>Doc text.
diff --git a/src/go/doc/comment/testdata/link4.txt b/src/go/doc/comment/testdata/link4.txt
new file mode 100644
index 0000000..75f194c
--- /dev/null
+++ b/src/go/doc/comment/testdata/link4.txt
@@ -0,0 +1,77 @@
+-- input --
+These are not links.
+
+[x
+
+[x]:
+
+[x]:https://go.dev
+
+[x]https://go.dev
+
+[x]: surprise://go.dev
+
+[x]: surprise!
+
+But this is, with a tab (although it's unused).
+
+[z]: https://go.dev
+-- gofmt --
+These are not links.
+
+[x
+
+[x]:
+
+[x]:https://go.dev
+
+[x]https://go.dev
+
+[x]: surprise://go.dev
+
+[x]: surprise!
+
+But this is, with a tab (although it's unused).
+
+[z]: https://go.dev
+-- text --
+These are not links.
+
+[x
+
+[x]:
+
+[x]:https://go.dev
+
+[x]https://go.dev
+
+[x]: surprise://go.dev
+
+[x]: surprise!
+
+But this is, with a tab (although it's unused).
+-- markdown --
+These are not links.
+
+\[x
+
+\[x]:
+
+\[x]:[https://go.dev](https://go.dev)
+
+\[x][https://go.dev](https://go.dev)
+
+\[x]: surprise://go.dev
+
+\[x]: surprise!
+
+But this is, with a tab (although it's unused).
+-- html --
+<p>These are not links.
+<p>[x
+<p>[x]:
+<p>[x]:<a href="https://go.dev">https://go.dev</a>
+<p>[x]<a href="https://go.dev">https://go.dev</a>
+<p>[x]: surprise://go.dev
+<p>[x]: surprise!
+<p>But this is, with a tab (although it&apos;s unused).
diff --git a/src/go/doc/comment/testdata/link5.txt b/src/go/doc/comment/testdata/link5.txt
new file mode 100644
index 0000000..b4fb588
--- /dev/null
+++ b/src/go/doc/comment/testdata/link5.txt
@@ -0,0 +1,36 @@
+-- input --
+See the [Go home page] and the [pkg
+site].
+
+[Go home page]: https://go.dev/
+[pkg site]: https://pkg.go.dev
+[Go home page]: https://duplicate.ignored
+
+They're really great!
+
+-- gofmt --
+See the [Go home page] and the [pkg
+site].
+
+They're really great!
+
+[Go home page]: https://go.dev/
+[pkg site]: https://pkg.go.dev
+
+[Go home page]: https://duplicate.ignored
+
+-- text --
+See the Go home page and the pkg site.
+
+They're really great!
+
+[Go home page]: https://go.dev/
+[pkg site]: https://pkg.go.dev
+-- markdown --
+See the [Go home page](https://go.dev/) and the [pkg site](https://pkg.go.dev).
+
+They're really great!
+-- html --
+<p>See the <a href="https://go.dev/">Go home page</a> and the <a href="https://pkg.go.dev">pkg
+site</a>.
+<p>They&apos;re really great!
diff --git a/src/go/doc/comment/testdata/link6.txt b/src/go/doc/comment/testdata/link6.txt
new file mode 100644
index 0000000..ff629b4
--- /dev/null
+++ b/src/go/doc/comment/testdata/link6.txt
@@ -0,0 +1,50 @@
+-- input --
+URLs with punctuation are hard.
+We don't want to consume the end-of-sentence punctuation.
+
+For example, https://en.wikipedia.org/wiki/John_Adams_(miniseries).
+And https://example.com/[foo]/bar{.
+And https://example.com/(foo)/bar!
+And https://example.com/{foo}/bar{.
+And https://example.com/)baz{foo}.
+
+[And https://example.com/].
+
+-- gofmt --
+URLs with punctuation are hard.
+We don't want to consume the end-of-sentence punctuation.
+
+For example, https://en.wikipedia.org/wiki/John_Adams_(miniseries).
+And https://example.com/[foo]/bar{.
+And https://example.com/(foo)/bar!
+And https://example.com/{foo}/bar{.
+And https://example.com/)baz{foo}.
+
+[And https://example.com/].
+
+-- text --
+URLs with punctuation are hard. We don't want to consume the end-of-sentence
+punctuation.
+
+For example, https://en.wikipedia.org/wiki/John_Adams_(miniseries).
+And https://example.com/[foo]/bar{. And https://example.com/(foo)/bar! And
+https://example.com/{foo}/bar{. And https://example.com/)baz{foo}.
+
+[And https://example.com/].
+
+-- markdown --
+URLs with punctuation are hard. We don't want to consume the end-of-sentence punctuation.
+
+For example, [https://en.wikipedia.org/wiki/John\_Adams\_(miniseries)](https://en.wikipedia.org/wiki/John_Adams_(miniseries)). And [https://example.com/\[foo]/bar](https://example.com/[foo]/bar){. And [https://example.com/(foo)/bar](https://example.com/(foo)/bar)! And [https://example.com/{foo}/bar](https://example.com/{foo}/bar){. And [https://example.com/](https://example.com/))baz{foo}.
+
+\[And [https://example.com/](https://example.com/)].
+
+-- html --
+<p>URLs with punctuation are hard.
+We don&apos;t want to consume the end-of-sentence punctuation.
+<p>For example, <a href="https://en.wikipedia.org/wiki/John_Adams_(miniseries)">https://en.wikipedia.org/wiki/John_Adams_(miniseries)</a>.
+And <a href="https://example.com/[foo]/bar">https://example.com/[foo]/bar</a>{.
+And <a href="https://example.com/(foo)/bar">https://example.com/(foo)/bar</a>!
+And <a href="https://example.com/{foo}/bar">https://example.com/{foo}/bar</a>{.
+And <a href="https://example.com/">https://example.com/</a>)baz{foo}.
+<p>[And <a href="https://example.com/">https://example.com/</a>].
diff --git a/src/go/doc/comment/testdata/link7.txt b/src/go/doc/comment/testdata/link7.txt
new file mode 100644
index 0000000..89a8b31
--- /dev/null
+++ b/src/go/doc/comment/testdata/link7.txt
@@ -0,0 +1,25 @@
+-- input --
+[math] is a package but this is not a doc link.
+
+[io] is a doc link.
+
+[math]: https://example.com
+-- gofmt --
+[math] is a package but this is not a doc link.
+
+[io] is a doc link.
+
+[math]: https://example.com
+-- text --
+math is a package but this is not a doc link.
+
+io is a doc link.
+
+[math]: https://example.com
+-- markdown --
+[math](https://example.com) is a package but this is not a doc link.
+
+[io](/io) is a doc link.
+-- html --
+<p><a href="https://example.com">math</a> is a package but this is not a doc link.
+<p><a href="/io">io</a> is a doc link.
diff --git a/src/go/doc/comment/testdata/linklist.txt b/src/go/doc/comment/testdata/linklist.txt
new file mode 100644
index 0000000..baf4062
--- /dev/null
+++ b/src/go/doc/comment/testdata/linklist.txt
@@ -0,0 +1,18 @@
+{"DocLinkBaseURL": "https://pkg.go.dev"}
+-- input --
+Did you know?
+
+ - [encoding/json.Marshal] is a doc link. So is [encoding/json.Unmarshal].
+-- text --
+Did you know?
+
+ - encoding/json.Marshal is a doc link. So is encoding/json.Unmarshal.
+-- markdown --
+Did you know?
+
+ - [encoding/json.Marshal](https://pkg.go.dev/encoding/json#Marshal) is a doc link. So is [encoding/json.Unmarshal](https://pkg.go.dev/encoding/json#Unmarshal).
+-- html --
+<p>Did you know?
+<ul>
+<li><a href="https://pkg.go.dev/encoding/json#Marshal">encoding/json.Marshal</a> is a doc link. So is <a href="https://pkg.go.dev/encoding/json#Unmarshal">encoding/json.Unmarshal</a>.
+</ul>
diff --git a/src/go/doc/comment/testdata/linklist2.txt b/src/go/doc/comment/testdata/linklist2.txt
new file mode 100644
index 0000000..81b3061
--- /dev/null
+++ b/src/go/doc/comment/testdata/linklist2.txt
@@ -0,0 +1,39 @@
+{"DocLinkBaseURL": "https://pkg.go.dev"}
+-- input --
+Did you know?
+
+ - [testing.T] is one doc link.
+ - So is [testing.M].
+ - So is [testing.B].
+ This is the same list paragraph.
+
+ There is [testing.PB] in this list item, too!
+-- text --
+Did you know?
+
+ - testing.T is one doc link.
+
+ - So is testing.M.
+
+ - So is testing.B. This is the same list paragraph.
+
+ There is testing.PB in this list item, too!
+-- markdown --
+Did you know?
+
+ - [testing.T](https://pkg.go.dev/testing#T) is one doc link.
+
+ - So is [testing.M](https://pkg.go.dev/testing#M).
+
+ - So is [testing.B](https://pkg.go.dev/testing#B). This is the same list paragraph.
+
+ There is [testing.PB](https://pkg.go.dev/testing#PB) in this list item, too!
+-- html --
+<p>Did you know?
+<ul>
+<li><p><a href="https://pkg.go.dev/testing#T">testing.T</a> is one doc link.
+<li><p>So is <a href="https://pkg.go.dev/testing#M">testing.M</a>.
+<li><p>So is <a href="https://pkg.go.dev/testing#B">testing.B</a>.
+This is the same list paragraph.
+<p>There is <a href="https://pkg.go.dev/testing#PB">testing.PB</a> in this list item, too!
+</ul>
diff --git a/src/go/doc/comment/testdata/linklist3.txt b/src/go/doc/comment/testdata/linklist3.txt
new file mode 100644
index 0000000..701a54e
--- /dev/null
+++ b/src/go/doc/comment/testdata/linklist3.txt
@@ -0,0 +1,31 @@
+{"DocLinkBaseURL": "https://pkg.go.dev"}
+-- input --
+Cool things:
+
+ - Foo
+ - [Go]
+ - Bar
+
+[Go]: https://go.dev/
+-- text --
+Cool things:
+
+ - Foo
+ - Go
+ - Bar
+
+[Go]: https://go.dev/
+-- markdown --
+Cool things:
+
+ - Foo
+ - [Go](https://go.dev/)
+ - Bar
+
+-- html --
+<p>Cool things:
+<ul>
+<li>Foo
+<li><a href="https://go.dev/">Go</a>
+<li>Bar
+</ul>
diff --git a/src/go/doc/comment/testdata/linklist4.txt b/src/go/doc/comment/testdata/linklist4.txt
new file mode 100644
index 0000000..db39ec4
--- /dev/null
+++ b/src/go/doc/comment/testdata/linklist4.txt
@@ -0,0 +1,36 @@
+{"DocLinkBaseURL": "https://pkg.go.dev"}
+-- input --
+Cool things:
+
+ - Foo
+ - [Go] is great
+
+ [Go]: https://go.dev/
+ - Bar
+
+-- text --
+Cool things:
+
+ - Foo
+
+ - Go is great
+
+ - Bar
+
+[Go]: https://go.dev/
+-- markdown --
+Cool things:
+
+ - Foo
+
+ - [Go](https://go.dev/) is great
+
+ - Bar
+
+-- html --
+<p>Cool things:
+<ul>
+<li><p>Foo
+<li><p><a href="https://go.dev/">Go</a> is great
+<li><p>Bar
+</ul>
diff --git a/src/go/doc/comment/testdata/list.txt b/src/go/doc/comment/testdata/list.txt
new file mode 100644
index 0000000..455782f
--- /dev/null
+++ b/src/go/doc/comment/testdata/list.txt
@@ -0,0 +1,48 @@
+-- input --
+Text.
+- Not a list.
+ - Here is the list.
+ • Using multiple bullets.
+ * Indentation does not matter.
+ + Lots of bullets.
+More text.
+
+-- gofmt --
+Text.
+- Not a list.
+ - Here is the list.
+ - Using multiple bullets.
+ - Indentation does not matter.
+ - Lots of bullets.
+
+More text.
+
+-- text --
+Text. - Not a list.
+ - Here is the list.
+ - Using multiple bullets.
+ - Indentation does not matter.
+ - Lots of bullets.
+
+More text.
+
+-- markdown --
+Text. - Not a list.
+
+ - Here is the list.
+ - Using multiple bullets.
+ - Indentation does not matter.
+ - Lots of bullets.
+
+More text.
+
+-- html --
+<p>Text.
+- Not a list.
+<ul>
+<li>Here is the list.
+<li>Using multiple bullets.
+<li>Indentation does not matter.
+<li>Lots of bullets.
+</ul>
+<p>More text.
diff --git a/src/go/doc/comment/testdata/list10.txt b/src/go/doc/comment/testdata/list10.txt
new file mode 100644
index 0000000..9c49083
--- /dev/null
+++ b/src/go/doc/comment/testdata/list10.txt
@@ -0,0 +1,13 @@
+-- input --
+
+ 1. This list
+ 2. Starts the comment
+ 3. And also has a blank line before it.
+
+All of which is a little weird.
+-- gofmt --
+ 1. This list
+ 2. Starts the comment
+ 3. And also has a blank line before it.
+
+All of which is a little weird.
diff --git a/src/go/doc/comment/testdata/list2.txt b/src/go/doc/comment/testdata/list2.txt
new file mode 100644
index 0000000..c390b3d
--- /dev/null
+++ b/src/go/doc/comment/testdata/list2.txt
@@ -0,0 +1,57 @@
+-- input --
+Text.
+ 1. Uno
+ 2) Dos
+ 3. Tres
+ 5. Cinco
+ 7. Siete
+ 11. Once
+ 12. Doce
+ 13. Trece.
+
+-- gofmt --
+Text.
+ 1. Uno
+ 2. Dos
+ 3. Tres
+ 5. Cinco
+ 7. Siete
+ 11. Once
+ 12. Doce
+ 13. Trece.
+
+-- text --
+Text.
+ 1. Uno
+ 2. Dos
+ 3. Tres
+ 5. Cinco
+ 7. Siete
+ 11. Once
+ 12. Doce
+ 13. Trece.
+
+-- markdown --
+Text.
+
+ 1. Uno
+ 2. Dos
+ 3. Tres
+ 5. Cinco
+ 7. Siete
+ 11. Once
+ 12. Doce
+ 13. Trece.
+
+-- html --
+<p>Text.
+<ol>
+<li>Uno
+<li>Dos
+<li>Tres
+<li value="5">Cinco
+<li value="7">Siete
+<li value="11">Once
+<li>Doce
+<li>Trece.
+</ol>
diff --git a/src/go/doc/comment/testdata/list3.txt b/src/go/doc/comment/testdata/list3.txt
new file mode 100644
index 0000000..d7d345d
--- /dev/null
+++ b/src/go/doc/comment/testdata/list3.txt
@@ -0,0 +1,32 @@
+-- input --
+Text.
+
+ 1. Uno
+ 1. Dos
+ 1. Tres
+ 1. Quatro
+
+-- gofmt --
+Text.
+
+ 1. Uno
+ 1. Dos
+ 1. Tres
+ 1. Quatro
+
+-- markdown --
+Text.
+
+ 1. Uno
+ 1. Dos
+ 1. Tres
+ 1. Quatro
+
+-- html --
+<p>Text.
+<ol>
+<li>Uno
+<li value="1">Dos
+<li value="1">Tres
+<li value="1">Quatro
+</ol>
diff --git a/src/go/doc/comment/testdata/list4.txt b/src/go/doc/comment/testdata/list4.txt
new file mode 100644
index 0000000..9c28d65
--- /dev/null
+++ b/src/go/doc/comment/testdata/list4.txt
@@ -0,0 +1,38 @@
+-- input --
+Text.
+ 1. List
+2. Not indented, not a list.
+ 3. Another list.
+
+-- gofmt --
+Text.
+ 1. List
+
+2. Not indented, not a list.
+ 3. Another list.
+
+-- text --
+Text.
+ 1. List
+
+2. Not indented, not a list.
+ 3. Another list.
+
+-- markdown --
+Text.
+
+ 1. List
+
+2\. Not indented, not a list.
+
+ 3. Another list.
+
+-- html --
+<p>Text.
+<ol>
+<li>List
+</ol>
+<p>2. Not indented, not a list.
+<ol>
+<li value="3">Another list.
+</ol>
diff --git a/src/go/doc/comment/testdata/list5.txt b/src/go/doc/comment/testdata/list5.txt
new file mode 100644
index 0000000..a5128e5
--- /dev/null
+++ b/src/go/doc/comment/testdata/list5.txt
@@ -0,0 +1,40 @@
+-- input --
+Text.
+
+ 1. One
+ 999999999999999999999. Big
+ 1000000000000000000000. Bigger
+ 1000000000000000000001. Biggest
+
+-- gofmt --
+Text.
+
+ 1. One
+ 999999999999999999999. Big
+ 1000000000000000000000. Bigger
+ 1000000000000000000001. Biggest
+
+-- text --
+Text.
+
+ 1. One
+ 999999999999999999999. Big
+ 1000000000000000000000. Bigger
+ 1000000000000000000001. Biggest
+
+-- markdown --
+Text.
+
+ 1. One
+ 999999999999999999999. Big
+ 1000000000000000000000. Bigger
+ 1000000000000000000001. Biggest
+
+-- html --
+<p>Text.
+<ol>
+<li>One
+<li value="999999999999999999999">Big
+<li>Bigger
+<li>Biggest
+</ol>
diff --git a/src/go/doc/comment/testdata/list6.txt b/src/go/doc/comment/testdata/list6.txt
new file mode 100644
index 0000000..ffc0122
--- /dev/null
+++ b/src/go/doc/comment/testdata/list6.txt
@@ -0,0 +1,129 @@
+-- input --
+Text.
+ - List immediately after.
+ - Another.
+
+More text.
+
+ - List after blank line.
+ - Another.
+
+Even more text.
+ - List immediately after.
+
+ - Blank line between items.
+
+Yet more text.
+
+ - Another list after blank line.
+
+ - Blank line between items.
+
+Still more text.
+ - One list item.
+
+ Multiple paragraphs.
+-- dump --
+Doc
+ Paragraph
+ Plain "Text."
+ List ForceBlankBefore=false ForceBlankBetween=false
+ Item Number=""
+ Paragraph
+ Plain "List immediately after."
+ Item Number=""
+ Paragraph
+ Plain "Another."
+ Paragraph
+ Plain "More text."
+ List ForceBlankBefore=true ForceBlankBetween=false
+ Item Number=""
+ Paragraph
+ Plain "List after blank line."
+ Item Number=""
+ Paragraph
+ Plain "Another."
+ Paragraph
+ Plain "Even more text."
+ List ForceBlankBefore=false ForceBlankBetween=true
+ Item Number=""
+ Paragraph
+ Plain "List immediately after."
+ Item Number=""
+ Paragraph
+ Plain "Blank line between items."
+ Paragraph
+ Plain "Yet more text."
+ List ForceBlankBefore=true ForceBlankBetween=true
+ Item Number=""
+ Paragraph
+ Plain "Another list after blank line."
+ Item Number=""
+ Paragraph
+ Plain "Blank line between items."
+ Paragraph
+ Plain "Still more text."
+ List ForceBlankBefore=false ForceBlankBetween=true
+ Item Number=""
+ Paragraph
+ Plain "One list item."
+ Paragraph
+ Plain "Multiple paragraphs."
+
+-- gofmt --
+Text.
+ - List immediately after.
+ - Another.
+
+More text.
+
+ - List after blank line.
+ - Another.
+
+Even more text.
+
+ - List immediately after.
+
+ - Blank line between items.
+
+Yet more text.
+
+ - Another list after blank line.
+
+ - Blank line between items.
+
+Still more text.
+
+ - One list item.
+
+ Multiple paragraphs.
+
+-- markdown --
+Text.
+
+ - List immediately after.
+ - Another.
+
+More text.
+
+ - List after blank line.
+ - Another.
+
+Even more text.
+
+ - List immediately after.
+
+ - Blank line between items.
+
+Yet more text.
+
+ - Another list after blank line.
+
+ - Blank line between items.
+
+Still more text.
+
+ - One list item.
+
+ Multiple paragraphs.
+
diff --git a/src/go/doc/comment/testdata/list7.txt b/src/go/doc/comment/testdata/list7.txt
new file mode 100644
index 0000000..4466050
--- /dev/null
+++ b/src/go/doc/comment/testdata/list7.txt
@@ -0,0 +1,98 @@
+-- input --
+Almost list markers (but not quite):
+
+ -
+
+❦
+
+ - $
+
+❦
+
+ - $
+
+❦
+
+ $
+ $
+
+❦
+
+ 1! List.
+
+❦
+-- gofmt --
+Almost list markers (but not quite):
+
+ -
+
+❦
+
+ - $
+
+❦
+
+ - $
+
+❦
+
+❦
+
+ 1! List.
+
+❦
+-- text --
+Almost list markers (but not quite):
+
+ -
+
+❦
+
+ -
+
+❦
+
+ -
+
+❦
+
+❦
+
+ 1! List.
+
+❦
+-- markdown --
+Almost list markers (but not quite):
+
+ -
+
+❦
+
+ - $
+
+❦
+
+ - $
+
+❦
+
+❦
+
+ 1! List.
+
+❦
+-- html --
+<p>Almost list markers (but not quite):
+<pre>-
+</pre>
+<p>❦
+<pre>- $
+</pre>
+<p>❦
+<pre>- $
+</pre>
+<p>❦
+<p>❦
+<pre>1! List.
+</pre>
+<p>❦
diff --git a/src/go/doc/comment/testdata/list8.txt b/src/go/doc/comment/testdata/list8.txt
new file mode 100644
index 0000000..fc46b0d
--- /dev/null
+++ b/src/go/doc/comment/testdata/list8.txt
@@ -0,0 +1,56 @@
+-- input --
+Loose lists.
+ - A
+
+ B
+ - C
+ D
+ - E
+ - F
+-- gofmt --
+Loose lists.
+
+ - A
+
+ B
+
+ - C
+ D
+
+ - E
+
+ - F
+-- text --
+Loose lists.
+
+ - A
+
+ B
+
+ - C D
+
+ - E
+
+ - F
+-- markdown --
+Loose lists.
+
+ - A
+
+ B
+
+ - C D
+
+ - E
+
+ - F
+-- html --
+<p>Loose lists.
+<ul>
+<li><p>A
+<p>B
+<li><p>C
+D
+<li><p>E
+<li><p>F
+</ul>
diff --git a/src/go/doc/comment/testdata/list9.txt b/src/go/doc/comment/testdata/list9.txt
new file mode 100644
index 0000000..48e4673
--- /dev/null
+++ b/src/go/doc/comment/testdata/list9.txt
@@ -0,0 +1,30 @@
+-- input --
+Text.
+
+1. Not a list
+2. because it is
+3. unindented.
+
+4. This one
+ is a list
+ because of the indented text.
+5. More wrapped
+ items.
+6. And unwrapped.
+
+7. The blank line stops the heuristic.
+-- gofmt --
+Text.
+
+1. Not a list
+2. because it is
+3. unindented.
+
+ 4. This one
+ is a list
+ because of the indented text.
+ 5. More wrapped
+ items.
+ 6. And unwrapped.
+
+7. The blank line stops the heuristic.
diff --git a/src/go/doc/comment/testdata/para.txt b/src/go/doc/comment/testdata/para.txt
new file mode 100644
index 0000000..2355fa8
--- /dev/null
+++ b/src/go/doc/comment/testdata/para.txt
@@ -0,0 +1,17 @@
+-- input --
+Hello, world.
+This is a paragraph.
+
+-- gofmt --
+Hello, world.
+This is a paragraph.
+
+-- text --
+Hello, world. This is a paragraph.
+
+-- markdown --
+Hello, world. This is a paragraph.
+
+-- html --
+<p>Hello, world.
+This is a paragraph.
diff --git a/src/go/doc/comment/testdata/quote.txt b/src/go/doc/comment/testdata/quote.txt
new file mode 100644
index 0000000..b64adae
--- /dev/null
+++ b/src/go/doc/comment/testdata/quote.txt
@@ -0,0 +1,15 @@
+-- input --
+Doubled single quotes like `` and '' turn into Unicode double quotes,
+but single quotes ` and ' do not.
+Misplaced markdown fences ``` do not either.
+-- gofmt --
+Doubled single quotes like “ and ” turn into Unicode double quotes,
+but single quotes ` and ' do not.
+Misplaced markdown fences ``` do not either.
+-- text --
+Doubled single quotes like “ and ” turn into Unicode double quotes, but single
+quotes ` and ' do not. Misplaced markdown fences ``` do not either.
+-- html --
+<p>Doubled single quotes like “ and ” turn into Unicode double quotes,
+but single quotes ` and &apos; do not.
+Misplaced markdown fences ``` do not either.
diff --git a/src/go/doc/comment/testdata/text.txt b/src/go/doc/comment/testdata/text.txt
new file mode 100644
index 0000000..c4de6e2
--- /dev/null
+++ b/src/go/doc/comment/testdata/text.txt
@@ -0,0 +1,62 @@
+{"TextPrefix":"|", "TextCodePrefix": "@"}
+-- input --
+Hello, world
+ Code block here.
+More text.
+Tight list
+ - one
+ - two
+ - three
+Loose list
+ - one
+
+ - two
+
+ - three
+
+# Heading
+
+More text.
+-- gofmt --
+Hello, world
+
+ Code block here.
+
+More text.
+Tight list
+ - one
+ - two
+ - three
+
+Loose list
+
+ - one
+
+ - two
+
+ - three
+
+# Heading
+
+More text.
+-- text --
+|Hello, world
+|
+@Code block here.
+|
+|More text. Tight list
+| - one
+| - two
+| - three
+|
+|Loose list
+|
+| - one
+|
+| - two
+|
+| - three
+|
+|# Heading
+|
+|More text.
diff --git a/src/go/doc/comment/testdata/text2.txt b/src/go/doc/comment/testdata/text2.txt
new file mode 100644
index 0000000..a099d0b
--- /dev/null
+++ b/src/go/doc/comment/testdata/text2.txt
@@ -0,0 +1,14 @@
+{"TextWidth": -1}
+-- input --
+Package gob manages streams of gobs - binary values exchanged between an
+Encoder (transmitter) and a Decoder (receiver). A typical use is
+transporting arguments and results of remote procedure calls (RPCs) such as
+those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream
+and is most efficient when a single Encoder is used to transmit a stream of
+values, amortizing the cost of compilation.
+-- text --
+Package gob manages streams of gobs - binary values exchanged between an Encoder (transmitter) and a Decoder (receiver). A typical use is transporting arguments and results of remote procedure calls (RPCs) such as those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream and is most efficient when a single Encoder is used to transmit a stream of values, amortizing the cost of compilation.
diff --git a/src/go/doc/comment/testdata/text3.txt b/src/go/doc/comment/testdata/text3.txt
new file mode 100644
index 0000000..75d2c37
--- /dev/null
+++ b/src/go/doc/comment/testdata/text3.txt
@@ -0,0 +1,28 @@
+{"TextWidth": 30}
+-- input --
+Package gob manages streams of gobs - binary values exchanged between an
+Encoder (transmitter) and a Decoder (receiver). A typical use is
+transporting arguments and results of remote procedure calls (RPCs) such as
+those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream
+and is most efficient when a single Encoder is used to transmit a stream of
+values, amortizing the cost of compilation.
+-- text --
+Package gob manages streams
+of gobs - binary values
+exchanged between an Encoder
+(transmitter) and a Decoder
+(receiver). A typical use is
+transporting arguments and
+results of remote procedure
+calls (RPCs) such as those
+provided by package "net/rpc".
+
+The implementation compiles
+a custom codec for each data
+type in the stream and is
+most efficient when a single
+Encoder is used to transmit a
+stream of values, amortizing
+the cost of compilation.
diff --git a/src/go/doc/comment/testdata/text4.txt b/src/go/doc/comment/testdata/text4.txt
new file mode 100644
index 0000000..e429985
--- /dev/null
+++ b/src/go/doc/comment/testdata/text4.txt
@@ -0,0 +1,29 @@
+{"TextWidth": 29}
+-- input --
+Package gob manages streams of gobs - binary values exchanged between an
+Encoder (transmitter) and a Decoder (receiver). A typical use is
+transporting arguments and results of remote procedure calls (RPCs) such as
+those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream
+and is most efficient when a single Encoder is used to transmit a stream of
+values, amortizing the cost of compilation.
+-- text --
+Package gob manages streams
+of gobs - binary values
+exchanged between an Encoder
+(transmitter) and a Decoder
+(receiver). A typical use
+is transporting arguments
+and results of remote
+procedure calls (RPCs) such
+as those provided by package
+"net/rpc".
+
+The implementation compiles
+a custom codec for each data
+type in the stream and is
+most efficient when a single
+Encoder is used to transmit a
+stream of values, amortizing
+the cost of compilation.
diff --git a/src/go/doc/comment/testdata/text5.txt b/src/go/doc/comment/testdata/text5.txt
new file mode 100644
index 0000000..2408fc5
--- /dev/null
+++ b/src/go/doc/comment/testdata/text5.txt
@@ -0,0 +1,38 @@
+{"TextWidth": 20}
+-- input --
+Package gob manages streams of gobs - binary values exchanged between an
+Encoder (transmitter) and a Decoder (receiver). A typical use is
+transporting arguments and results of remote procedure calls (RPCs) such as
+those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream
+and is most efficient when a single Encoder is used to transmit a stream of
+values, amortizing the cost of compilation.
+-- text --
+Package gob
+manages streams
+of gobs - binary
+values exchanged
+between an Encoder
+(transmitter) and a
+Decoder (receiver).
+A typical use
+is transporting
+arguments and
+results of remote
+procedure calls
+(RPCs) such as those
+provided by package
+"net/rpc".
+
+The implementation
+compiles a custom
+codec for each
+data type in the
+stream and is most
+efficient when a
+single Encoder is
+used to transmit a
+stream of values,
+amortizing the cost
+of compilation.
diff --git a/src/go/doc/comment/testdata/text6.txt b/src/go/doc/comment/testdata/text6.txt
new file mode 100644
index 0000000..d6deff5
--- /dev/null
+++ b/src/go/doc/comment/testdata/text6.txt
@@ -0,0 +1,18 @@
+-- input --
+Package gob manages streams of gobs - binary values exchanged between an
+Encoder (transmitter) and a Decoder (receiver). A typical use is
+transporting arguments and results of remote procedure calls (RPCs) such as
+those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream
+and is most efficient when a single Encoder is used to transmit a stream of
+values, amortizing the cost of compilation.
+-- text --
+Package gob manages streams of gobs - binary values exchanged between an Encoder
+(transmitter) and a Decoder (receiver). A typical use is transporting arguments
+and results of remote procedure calls (RPCs) such as those provided by package
+"net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream and
+is most efficient when a single Encoder is used to transmit a stream of values,
+amortizing the cost of compilation.
diff --git a/src/go/doc/comment/testdata/text7.txt b/src/go/doc/comment/testdata/text7.txt
new file mode 100644
index 0000000..c9fb6d3
--- /dev/null
+++ b/src/go/doc/comment/testdata/text7.txt
@@ -0,0 +1,21 @@
+{"TextPrefix": " "}
+-- input --
+Package gob manages streams of gobs - binary values exchanged between an
+Encoder (transmitter) and a Decoder (receiver). A typical use is
+transporting arguments and results of remote procedure calls (RPCs) such as
+those provided by package "net/rpc".
+
+The implementation compiles a custom codec for each data type in the stream
+and is most efficient when a single Encoder is used to transmit a stream of
+values, amortizing the cost of compilation.
+-- text --
+ Package gob manages streams of gobs - binary values
+ exchanged between an Encoder (transmitter) and a Decoder
+ (receiver). A typical use is transporting arguments and
+ results of remote procedure calls (RPCs) such as those
+ provided by package "net/rpc".
+
+ The implementation compiles a custom codec for each data
+ type in the stream and is most efficient when a single
+ Encoder is used to transmit a stream of values, amortizing
+ the cost of compilation.
diff --git a/src/go/doc/comment/testdata/text8.txt b/src/go/doc/comment/testdata/text8.txt
new file mode 100644
index 0000000..560ac95
--- /dev/null
+++ b/src/go/doc/comment/testdata/text8.txt
@@ -0,0 +1,94 @@
+{"TextWidth": 40}
+-- input --
+If the arguments have version suffixes (like @latest or @v1.0.0), "go install"
+builds packages in module-aware mode, ignoring the go.mod file in the current
+directory or any parent directory, if there is one. This is useful for
+installing executables without affecting the dependencies of the main module.
+To eliminate ambiguity about which module versions are used in the build, the
+arguments must satisfy the following constraints:
+
+ - Arguments must be package paths or package patterns (with "..." wildcards).
+ They must not be standard packages (like fmt), meta-patterns (std, cmd,
+ all), or relative or absolute file paths.
+
+ - All arguments must have the same version suffix. Different queries are not
+ allowed, even if they refer to the same version.
+
+ - All arguments must refer to packages in the same module at the same version.
+
+ - Package path arguments must refer to main packages. Pattern arguments
+ will only match main packages.
+
+ - No module is considered the "main" module. If the module containing
+ packages named on the command line has a go.mod file, it must not contain
+ directives (replace and exclude) that would cause it to be interpreted
+ differently than if it were the main module. The module must not require
+ a higher version of itself.
+
+ - Vendor directories are not used in any module. (Vendor directories are not
+ included in the module zip files downloaded by 'go install'.)
+
+If the arguments don't have version suffixes, "go install" may run in
+module-aware mode or GOPATH mode, depending on the GO111MODULE environment
+variable and the presence of a go.mod file. See 'go help modules' for details.
+If module-aware mode is enabled, "go install" runs in the context of the main
+module.
+-- text --
+If the arguments have version suffixes
+(like @latest or @v1.0.0), "go install"
+builds packages in module-aware mode,
+ignoring the go.mod file in the current
+directory or any parent directory,
+if there is one. This is useful for
+installing executables without affecting
+the dependencies of the main module.
+To eliminate ambiguity about which
+module versions are used in the build,
+the arguments must satisfy the following
+constraints:
+
+ - Arguments must be package paths
+ or package patterns (with "..."
+ wildcards). They must not be
+ standard packages (like fmt),
+ meta-patterns (std, cmd, all),
+ or relative or absolute file paths.
+
+ - All arguments must have the same
+ version suffix. Different queries
+ are not allowed, even if they refer
+ to the same version.
+
+ - All arguments must refer to packages
+ in the same module at the same
+ version.
+
+ - Package path arguments must refer
+ to main packages. Pattern arguments
+ will only match main packages.
+
+ - No module is considered the "main"
+ module. If the module containing
+ packages named on the command line
+ has a go.mod file, it must not
+ contain directives (replace and
+ exclude) that would cause it to be
+ interpreted differently than if it
+ were the main module. The module
+ must not require a higher version of
+ itself.
+
+ - Vendor directories are not used in
+ any module. (Vendor directories are
+ not included in the module zip files
+ downloaded by 'go install'.)
+
+If the arguments don't have version
+suffixes, "go install" may run in
+module-aware mode or GOPATH mode,
+depending on the GO111MODULE environment
+variable and the presence of a go.mod
+file. See 'go help modules' for details.
+If module-aware mode is enabled,
+"go install" runs in the context of the
+main module.
diff --git a/src/go/doc/comment/testdata/text9.txt b/src/go/doc/comment/testdata/text9.txt
new file mode 100644
index 0000000..07a64aa
--- /dev/null
+++ b/src/go/doc/comment/testdata/text9.txt
@@ -0,0 +1,12 @@
+{"TextPrefix":"|", "TextCodePrefix": "@"}
+-- input --
+Hello, world
+ Code block here.
+-- gofmt --
+Hello, world
+
+ Code block here.
+-- text --
+|Hello, world
+|
+@Code block here.
diff --git a/src/go/doc/comment/testdata/words.txt b/src/go/doc/comment/testdata/words.txt
new file mode 100644
index 0000000..63c7e1a
--- /dev/null
+++ b/src/go/doc/comment/testdata/words.txt
@@ -0,0 +1,10 @@
+-- input --
+This is an italicword and a linkedword and Unicöde.
+-- gofmt --
+This is an italicword and a linkedword and Unicöde.
+-- text --
+This is an italicword and a linkedword and Unicöde.
+-- markdown --
+This is an *italicword* and a [*linkedword*](https://example.com/linkedword) and Unicöde.
+-- html --
+<p>This is an <i>italicword</i> and a <a href="https://example.com/linkedword"><i>linkedword</i></a> and Unicöde.
diff --git a/src/go/doc/comment/testdata_test.go b/src/go/doc/comment/testdata_test.go
new file mode 100644
index 0000000..0676d86
--- /dev/null
+++ b/src/go/doc/comment/testdata_test.go
@@ -0,0 +1,202 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "internal/diff"
+ "internal/txtar"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func TestTestdata(t *testing.T) {
+ files, _ := filepath.Glob("testdata/*.txt")
+ if len(files) == 0 {
+ t.Fatalf("no testdata")
+ }
+ var p Parser
+ p.Words = map[string]string{
+ "italicword": "",
+ "linkedword": "https://example.com/linkedword",
+ }
+ p.LookupPackage = func(name string) (importPath string, ok bool) {
+ if name == "comment" {
+ return "go/doc/comment", true
+ }
+ return DefaultLookupPackage(name)
+ }
+ p.LookupSym = func(recv, name string) (ok bool) {
+ if recv == "Parser" && name == "Parse" ||
+ recv == "" && name == "Doc" ||
+ recv == "" && name == "NoURL" {
+ return true
+ }
+ return false
+ }
+
+ stripDollars := func(b []byte) []byte {
+ // Remove trailing $ on lines.
+ // They make it easier to see lines with trailing spaces,
+ // as well as turning them into lines without trailing spaces,
+ // in case editors remove trailing spaces.
+ return bytes.ReplaceAll(b, []byte("$\n"), []byte("\n"))
+ }
+ for _, file := range files {
+ t.Run(filepath.Base(file), func(t *testing.T) {
+ var pr Printer
+ a, err := txtar.ParseFile(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(a.Comment) > 0 {
+ err := json.Unmarshal(a.Comment, &pr)
+ if err != nil {
+ t.Fatalf("unmarshalling top json: %v", err)
+ }
+ }
+ if len(a.Files) < 1 || a.Files[0].Name != "input" {
+ t.Fatalf("first file is not %q", "input")
+ }
+ d := p.Parse(string(stripDollars(a.Files[0].Data)))
+ for _, f := range a.Files[1:] {
+ want := stripDollars(f.Data)
+ for len(want) >= 2 && want[len(want)-1] == '\n' && want[len(want)-2] == '\n' {
+ want = want[:len(want)-1]
+ }
+ var out []byte
+ switch f.Name {
+ default:
+ t.Fatalf("unknown output file %q", f.Name)
+ case "dump":
+ out = dump(d)
+ case "gofmt":
+ out = pr.Comment(d)
+ case "html":
+ out = pr.HTML(d)
+ case "markdown":
+ out = pr.Markdown(d)
+ case "text":
+ out = pr.Text(d)
+ }
+ if string(out) != string(want) {
+ t.Errorf("%s: %s", file, diff.Diff(f.Name, want, "have", out))
+ }
+ }
+ })
+ }
+}
+
+func dump(d *Doc) []byte {
+ var out bytes.Buffer
+ dumpTo(&out, 0, d)
+ return out.Bytes()
+}
+
+func dumpTo(out *bytes.Buffer, indent int, x any) {
+ switch x := x.(type) {
+ default:
+ fmt.Fprintf(out, "?%T", x)
+
+ case *Doc:
+ fmt.Fprintf(out, "Doc")
+ dumpTo(out, indent+1, x.Content)
+ if len(x.Links) > 0 {
+ dumpNL(out, indent+1)
+ fmt.Fprintf(out, "Links")
+ dumpTo(out, indent+2, x.Links)
+ }
+ fmt.Fprintf(out, "\n")
+
+ case []*LinkDef:
+ for _, def := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, def)
+ }
+
+ case *LinkDef:
+ fmt.Fprintf(out, "LinkDef Used:%v Text:%q URL:%s", x.Used, x.Text, x.URL)
+
+ case []Block:
+ for _, blk := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, blk)
+ }
+
+ case *Heading:
+ fmt.Fprintf(out, "Heading")
+ dumpTo(out, indent+1, x.Text)
+
+ case *List:
+ fmt.Fprintf(out, "List ForceBlankBefore=%v ForceBlankBetween=%v", x.ForceBlankBefore, x.ForceBlankBetween)
+ dumpTo(out, indent+1, x.Items)
+
+ case []*ListItem:
+ for _, item := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, item)
+ }
+
+ case *ListItem:
+ fmt.Fprintf(out, "Item Number=%q", x.Number)
+ dumpTo(out, indent+1, x.Content)
+
+ case *Paragraph:
+ fmt.Fprintf(out, "Paragraph")
+ dumpTo(out, indent+1, x.Text)
+
+ case *Code:
+ fmt.Fprintf(out, "Code")
+ dumpTo(out, indent+1, x.Text)
+
+ case []Text:
+ for _, t := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, t)
+ }
+
+ case Plain:
+ if !strings.Contains(string(x), "\n") {
+ fmt.Fprintf(out, "Plain %q", string(x))
+ } else {
+ fmt.Fprintf(out, "Plain")
+ dumpTo(out, indent+1, string(x))
+ }
+
+ case Italic:
+ if !strings.Contains(string(x), "\n") {
+ fmt.Fprintf(out, "Italic %q", string(x))
+ } else {
+ fmt.Fprintf(out, "Italic")
+ dumpTo(out, indent+1, string(x))
+ }
+
+ case string:
+ for _, line := range strings.SplitAfter(x, "\n") {
+ if line != "" {
+ dumpNL(out, indent)
+ fmt.Fprintf(out, "%q", line)
+ }
+ }
+
+ case *Link:
+ fmt.Fprintf(out, "Link %q", x.URL)
+ dumpTo(out, indent+1, x.Text)
+
+ case *DocLink:
+ fmt.Fprintf(out, "DocLink pkg:%q, recv:%q, name:%q", x.ImportPath, x.Recv, x.Name)
+ dumpTo(out, indent+1, x.Text)
+ }
+}
+
+func dumpNL(out *bytes.Buffer, n int) {
+ out.WriteByte('\n')
+ for i := 0; i < n; i++ {
+ out.WriteByte('\t')
+ }
+}
diff --git a/src/go/doc/comment/text.go b/src/go/doc/comment/text.go
new file mode 100644
index 0000000..6f9c2e2
--- /dev/null
+++ b/src/go/doc/comment/text.go
@@ -0,0 +1,337 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "strings"
+ "unicode/utf8"
+)
+
+// A textPrinter holds the state needed for printing a Doc as plain text.
+type textPrinter struct {
+ *Printer
+ long strings.Builder
+ prefix string
+ codePrefix string
+ width int
+}
+
+// Text returns a textual formatting of the Doc.
+// See the [Printer] documentation for ways to customize the text output.
+func (p *Printer) Text(d *Doc) []byte {
+ tp := &textPrinter{
+ Printer: p,
+ prefix: p.TextPrefix,
+ codePrefix: p.TextCodePrefix,
+ width: p.TextWidth,
+ }
+ if tp.codePrefix == "" {
+ tp.codePrefix = p.TextPrefix + "\t"
+ }
+ if tp.width == 0 {
+ tp.width = 80 - utf8.RuneCountInString(tp.prefix)
+ }
+
+ var out bytes.Buffer
+ for i, x := range d.Content {
+ if i > 0 && blankBefore(x) {
+ out.WriteString(tp.prefix)
+ writeNL(&out)
+ }
+ tp.block(&out, x)
+ }
+ anyUsed := false
+ for _, def := range d.Links {
+ if def.Used {
+ anyUsed = true
+ break
+ }
+ }
+ if anyUsed {
+ writeNL(&out)
+ for _, def := range d.Links {
+ if def.Used {
+ fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL)
+ }
+ }
+ }
+ return out.Bytes()
+}
+
+// writeNL calls out.WriteByte('\n')
+// but first trims trailing spaces on the previous line.
+func writeNL(out *bytes.Buffer) {
+ // Trim trailing spaces.
+ data := out.Bytes()
+ n := 0
+ for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') {
+ n++
+ }
+ if n > 0 {
+ out.Truncate(len(data) - n)
+ }
+ out.WriteByte('\n')
+}
+
+// block prints the block x to out.
+func (p *textPrinter) block(out *bytes.Buffer, x Block) {
+ switch x := x.(type) {
+ default:
+ fmt.Fprintf(out, "?%T\n", x)
+
+ case *Paragraph:
+ out.WriteString(p.prefix)
+ p.text(out, "", x.Text)
+
+ case *Heading:
+ out.WriteString(p.prefix)
+ out.WriteString("# ")
+ p.text(out, "", x.Text)
+
+ case *Code:
+ text := x.Text
+ for text != "" {
+ var line string
+ line, text, _ = strings.Cut(text, "\n")
+ if line != "" {
+ out.WriteString(p.codePrefix)
+ out.WriteString(line)
+ }
+ writeNL(out)
+ }
+
+ case *List:
+ loose := x.BlankBetween()
+ for i, item := range x.Items {
+ if i > 0 && loose {
+ out.WriteString(p.prefix)
+ writeNL(out)
+ }
+ out.WriteString(p.prefix)
+ out.WriteString(" ")
+ if item.Number == "" {
+ out.WriteString(" - ")
+ } else {
+ out.WriteString(item.Number)
+ out.WriteString(". ")
+ }
+ for i, blk := range item.Content {
+ const fourSpace = " "
+ if i > 0 {
+ writeNL(out)
+ out.WriteString(p.prefix)
+ out.WriteString(fourSpace)
+ }
+ p.text(out, fourSpace, blk.(*Paragraph).Text)
+ }
+ }
+ }
+}
+
+// text prints the text sequence x to out.
+func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) {
+ p.oneLongLine(&p.long, x)
+ words := strings.Fields(p.long.String())
+ p.long.Reset()
+
+ var seq []int
+ if p.width < 0 || len(words) == 0 {
+ seq = []int{0, len(words)} // one long line
+ } else {
+ seq = wrap(words, p.width-utf8.RuneCountInString(indent))
+ }
+ for i := 0; i+1 < len(seq); i++ {
+ if i > 0 {
+ out.WriteString(p.prefix)
+ out.WriteString(indent)
+ }
+ for j, w := range words[seq[i]:seq[i+1]] {
+ if j > 0 {
+ out.WriteString(" ")
+ }
+ out.WriteString(w)
+ }
+ writeNL(out)
+ }
+}
+
+// oneLongLine prints the text sequence x to out as one long line,
+// without worrying about line wrapping.
+// Explicit links have the [ ] dropped to improve readability.
+func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) {
+ for _, t := range x {
+ switch t := t.(type) {
+ case Plain:
+ out.WriteString(string(t))
+ case Italic:
+ out.WriteString(string(t))
+ case *Link:
+ p.oneLongLine(out, t.Text)
+ case *DocLink:
+ p.oneLongLine(out, t.Text)
+ }
+ }
+}
+
+// wrap wraps words into lines of at most max runes,
+// minimizing the sum of the squares of the leftover lengths
+// at the end of each line (except the last, of course),
+// with a preference for ending lines at punctuation (.,:;).
+//
+// The returned slice gives the indexes of the first words
+// on each line in the wrapped text with a final entry of len(words).
+// Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]],
+// ..., words[seq[len(seq)-2]:seq[len(seq)-1]].
+//
+// The implementation runs in O(n log n) time, where n = len(words),
+// using the algorithm described in D. S. Hirschberg and L. L. Larmore,
+// “[The least weight subsequence problem],” FOCS 1985, pp. 137-143.
+//
+// [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60
+func wrap(words []string, max int) (seq []int) {
+ // The algorithm requires that our scoring function be concave,
+ // meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁,
+ // weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀).
+ //
+ // Our weights are two-element pairs [hi, lo]
+ // ordered by elementwise comparison.
+ // The hi entry counts the weight for lines that are longer than max,
+ // and the lo entry counts the weight for lines that are not.
+ // This forces the algorithm to first minimize the number of lines
+ // that are longer than max, which correspond to lines with
+ // single very long words. Having done that, it can move on to
+ // minimizing the lo score, which is more interesting.
+ //
+ // The lo score is the sum for each line of the square of the
+ // number of spaces remaining at the end of the line and a
+ // penalty of 64 given out for not ending the line in a
+ // punctuation character (.,:;).
+ // The penalty is somewhat arbitrarily chosen by trying
+ // different amounts and judging how nice the wrapped text looks.
+ // Roughly speaking, using 64 means that we are willing to
+ // end a line with eight blank spaces in order to end at a
+ // punctuation character, even if the next word would fit in
+ // those spaces.
+ //
+ // We care about ending in punctuation characters because
+ // it makes the text easier to skim if not too many sentences
+ // or phrases begin with a single word on the previous line.
+
+ // A score is the score (also called weight) for a given line.
+ // add and cmp add and compare scores.
+ type score struct {
+ hi int64
+ lo int64
+ }
+ add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} }
+ cmp := func(s, t score) int {
+ switch {
+ case s.hi < t.hi:
+ return -1
+ case s.hi > t.hi:
+ return +1
+ case s.lo < t.lo:
+ return -1
+ case s.lo > t.lo:
+ return +1
+ }
+ return 0
+ }
+
+ // total[j] is the total number of runes
+ // (including separating spaces) in words[:j].
+ total := make([]int, len(words)+1)
+ total[0] = 0
+ for i, s := range words {
+ total[1+i] = total[i] + utf8.RuneCountInString(s) + 1
+ }
+
+ // weight returns weight(i, j).
+ weight := func(i, j int) score {
+ // On the last line, there is zero weight for being too short.
+ n := total[j] - 1 - total[i]
+ if j == len(words) && n <= max {
+ return score{0, 0}
+ }
+
+ // Otherwise the weight is the penalty plus the square of the number of
+ // characters remaining on the line or by which the line goes over.
+ // In the latter case, that value goes in the hi part of the score.
+ // (See note above.)
+ p := wrapPenalty(words[j-1])
+ v := int64(max-n) * int64(max-n)
+ if n > max {
+ return score{v, p}
+ }
+ return score{0, v + p}
+ }
+
+ // The rest of this function is “The Basic Algorithm” from
+ // Hirschberg and Larmore's conference paper,
+ // using the same names as in the paper.
+ f := []score{{0, 0}}
+ g := func(i, j int) score { return add(f[i], weight(i, j)) }
+
+ bridge := func(a, b, c int) bool {
+ k := c + sort.Search(len(words)+1-c, func(k int) bool {
+ k += c
+ return cmp(g(a, k), g(b, k)) > 0
+ })
+ if k > len(words) {
+ return true
+ }
+ return cmp(g(c, k), g(b, k)) <= 0
+ }
+
+ // d is a one-ended deque implemented as a slice.
+ d := make([]int, 1, len(words))
+ d[0] = 0
+ bestleft := make([]int, 1, len(words))
+ bestleft[0] = -1
+ for m := 1; m < len(words); m++ {
+ f = append(f, g(d[0], m))
+ bestleft = append(bestleft, d[0])
+ for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
+ d = d[1:] // “Retire”
+ }
+ for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) {
+ d = d[:len(d)-1] // “Fire”
+ }
+ if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 {
+ d = append(d, m) // “Hire”
+ // The next few lines are not in the paper but are necessary
+ // to handle two-word inputs correctly. It appears to be
+ // just a bug in the paper's pseudocode.
+ if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 {
+ d = d[1:]
+ }
+ }
+ }
+ bestleft = append(bestleft, d[0])
+
+ // Recover least weight sequence from bestleft.
+ n := 1
+ for m := len(words); m > 0; m = bestleft[m] {
+ n++
+ }
+ seq = make([]int, n)
+ for m := len(words); m > 0; m = bestleft[m] {
+ n--
+ seq[n] = m
+ }
+ return seq
+}
+
+// wrapPenalty is the penalty for inserting a line break after word s.
+func wrapPenalty(s string) int64 {
+ switch s[len(s)-1] {
+ case '.', ',', ':', ';':
+ return 0
+ }
+ return 64
+}
diff --git a/src/go/doc/comment/wrap_test.go b/src/go/doc/comment/wrap_test.go
new file mode 100644
index 0000000..f9802c9
--- /dev/null
+++ b/src/go/doc/comment/wrap_test.go
@@ -0,0 +1,141 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "flag"
+ "fmt"
+ "math/rand"
+ "testing"
+ "time"
+ "unicode/utf8"
+)
+
+var wrapSeed = flag.Int64("wrapseed", 0, "use `seed` for wrap test (default auto-seeds)")
+
+func TestWrap(t *testing.T) {
+ if *wrapSeed == 0 {
+ *wrapSeed = time.Now().UnixNano()
+ }
+ t.Logf("-wrapseed=%#x\n", *wrapSeed)
+ r := rand.New(rand.NewSource(*wrapSeed))
+
+ // Generate words of random length.
+ s := "1234567890αβcdefghijklmnopqrstuvwxyz"
+ sN := utf8.RuneCountInString(s)
+ var words []string
+ for i := 0; i < 100; i++ {
+ n := 1 + r.Intn(sN-1)
+ if n >= 12 {
+ n++ // extra byte for β
+ }
+ if n >= 11 {
+ n++ // extra byte for α
+ }
+ words = append(words, s[:n])
+ }
+
+ for n := 1; n <= len(words) && !t.Failed(); n++ {
+ t.Run(fmt.Sprint("n=", n), func(t *testing.T) {
+ words := words[:n]
+ t.Logf("words: %v", words)
+ for max := 1; max < 100 && !t.Failed(); max++ {
+ t.Run(fmt.Sprint("max=", max), func(t *testing.T) {
+ seq := wrap(words, max)
+
+ // Compute score for seq.
+ start := 0
+ score := int64(0)
+ if len(seq) == 0 {
+ t.Fatalf("wrap seq is empty")
+ }
+ if seq[0] != 0 {
+ t.Fatalf("wrap seq does not start with 0")
+ }
+ for _, n := range seq[1:] {
+ if n <= start {
+ t.Fatalf("wrap seq is non-increasing: %v", seq)
+ }
+ if n > len(words) {
+ t.Fatalf("wrap seq contains %d > %d: %v", n, len(words), seq)
+ }
+ size := -1
+ for _, s := range words[start:n] {
+ size += 1 + utf8.RuneCountInString(s)
+ }
+ if n-start == 1 && size >= max {
+ // no score
+ } else if size > max {
+ t.Fatalf("wrap used overlong line %d:%d: %v", start, n, words[start:n])
+ } else if n != len(words) {
+ score += int64(max-size)*int64(max-size) + wrapPenalty(words[n-1])
+ }
+ start = n
+ }
+ if start != len(words) {
+ t.Fatalf("wrap seq does not use all words (%d < %d): %v", start, len(words), seq)
+ }
+
+ // Check that score matches slow reference implementation.
+ slowSeq, slowScore := wrapSlow(words, max)
+ if score != slowScore {
+ t.Fatalf("wrap score = %d != wrapSlow score %d\nwrap: %v\nslow: %v", score, slowScore, seq, slowSeq)
+ }
+ })
+ }
+ })
+ }
+}
+
+// wrapSlow is an O(n²) reference implementation for wrap.
+// It returns a minimal-score sequence along with the score.
+// It is OK if wrap returns a different sequence as long as that
+// sequence has the same score.
+func wrapSlow(words []string, max int) (seq []int, score int64) {
+ // Quadratic dynamic programming algorithm for line wrapping problem.
+ // best[i] tracks the best score possible for words[:i],
+ // assuming that for i < len(words) the line breaks after those words.
+ // bestleft[i] tracks the previous line break for best[i].
+ best := make([]int64, len(words)+1)
+ bestleft := make([]int, len(words)+1)
+ best[0] = 0
+ for i, w := range words {
+ if utf8.RuneCountInString(w) >= max {
+ // Overlong word must appear on line by itself. No effect on score.
+ best[i+1] = best[i]
+ continue
+ }
+ best[i+1] = 1e18
+ p := wrapPenalty(w)
+ n := -1
+ for j := i; j >= 0; j-- {
+ n += 1 + utf8.RuneCountInString(words[j])
+ if n > max {
+ break
+ }
+ line := int64(n-max)*int64(n-max) + p
+ if i == len(words)-1 {
+ line = 0 // no score for final line being too short
+ }
+ s := best[j] + line
+ if best[i+1] > s {
+ best[i+1] = s
+ bestleft[i+1] = j
+ }
+ }
+ }
+
+ // Recover least weight sequence from bestleft.
+ n := 1
+ for m := len(words); m > 0; m = bestleft[m] {
+ n++
+ }
+ seq = make([]int, n)
+ for m := len(words); m > 0; m = bestleft[m] {
+ n--
+ seq[n] = m
+ }
+ return seq, best[len(words)]
+}
diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go
new file mode 100644
index 0000000..004ae9d
--- /dev/null
+++ b/src/go/doc/comment_test.go
@@ -0,0 +1,67 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package doc
+
+import (
+ "bytes"
+ "go/parser"
+ "go/token"
+ "internal/diff"
+ "testing"
+)
+
+func TestComment(t *testing.T) {
+ fset := token.NewFileSet()
+ pkgs, err := parser.ParseDir(fset, "testdata/pkgdoc", nil, parser.ParseComments)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if pkgs["pkgdoc"] == nil {
+ t.Fatal("missing package pkgdoc")
+ }
+ pkg := New(pkgs["pkgdoc"], "testdata/pkgdoc", 0)
+
+ var (
+ input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n"
+ wantHTML = `<p><a href="#T">T</a> and <a href="#U">U</a> are types, and <a href="#T.M">T.M</a> is a method, but [V] is a broken link. <a href="/math/rand#Int">rand.Int</a> and <a href="/crypto/rand#Reader">crand.Reader</a> are things. <a href="#G.M1">G.M1</a> and <a href="#G.M2">G.M2</a> are generic methods.` + "\n"
+ wantOldHTML = "<p>[T] and [U] are <i>types</i>, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n"
+ wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods.\n"
+ wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.M1 and G.M2 are generic methods.\n"
+ wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n"
+ wantSynopsis = "T and U are types, and T.M is a method, but [V] is a broken link."
+ wantOldSynopsis = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link."
+ )
+
+ if b := pkg.HTML(input); string(b) != wantHTML {
+ t.Errorf("%s", diff.Diff("pkg.HTML", b, "want", []byte(wantHTML)))
+ }
+ if b := pkg.Markdown(input); string(b) != wantMarkdown {
+ t.Errorf("%s", diff.Diff("pkg.Markdown", b, "want", []byte(wantMarkdown)))
+ }
+ if b := pkg.Text(input); string(b) != wantText {
+ t.Errorf("%s", diff.Diff("pkg.Text", b, "want", []byte(wantText)))
+ }
+ if b := pkg.Synopsis(input); b != wantSynopsis {
+ t.Errorf("%s", diff.Diff("pkg.Synopsis", []byte(b), "want", []byte(wantText)))
+ }
+
+ var buf bytes.Buffer
+
+ buf.Reset()
+ ToHTML(&buf, input, map[string]string{"types": ""})
+ if b := buf.Bytes(); string(b) != wantOldHTML {
+ t.Errorf("%s", diff.Diff("ToHTML", b, "want", []byte(wantOldHTML)))
+ }
+
+ buf.Reset()
+ ToText(&buf, input, "", "\t", 80)
+ if b := buf.Bytes(); string(b) != wantOldText {
+ t.Errorf("%s", diff.Diff("ToText", b, "want", []byte(wantOldText)))
+ }
+
+ if b := Synopsis(input); b != wantOldSynopsis {
+ t.Errorf("%s", diff.Diff("Synopsis", []byte(b), "want", []byte(wantOldText)))
+ }
+}