1 files changed, 1262 insertions, 0 deletions
diff --git a/src/go/doc/comment/parse.go b/src/go/doc/comment/parse.go
new file mode 100644
index 0000000..62a0f8f
--- /dev/null
+++ b/src/go/doc/comment/parse.go
@@ -0,0 +1,1262 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+	"sort"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// A Doc is a parsed Go doc comment.
+type Doc struct {
+	// Content is the sequence of content blocks in the comment.
+	Content []Block
+
+	// Links is the link definitions in the comment.
+	Links []*LinkDef
+}
+
+// A LinkDef is a single link definition.
+type LinkDef struct {
+	Text string // the link text
+	URL  string // the link URL
+	Used bool   // whether the comment uses the definition
+}
+
+// A Block is block-level content in a doc comment,
+// one of [*Code], [*Heading], [*List], or [*Paragraph].
+type Block interface {
+	block()
+}
+
+// A Heading is a doc comment heading.
+type Heading struct {
+	Text []Text // the heading text
+}
+
+func (*Heading) block() {}
+
+// A List is a numbered or bullet list.
+// Lists are always non-empty: len(Items) > 0.
+// In a numbered list, every Items[i].Number is a non-empty string.
+// In a bullet list, every Items[i].Number is an empty string.
+type List struct {
+	// Items is the list items.
+	Items []*ListItem
+
+	// ForceBlankBefore indicates that the list must be
+	// preceded by a blank line when reformatting the comment,
+	// overriding the usual conditions. See the BlankBefore method.
+	//
+	// The comment parser sets ForceBlankBefore for any list
+	// that is preceded by a blank line, to make sure
+	// the blank line is preserved when printing.
+	ForceBlankBefore bool
+
+	// ForceBlankBetween indicates that list items must be
+	// separated by blank lines when reformatting the comment,
+	// overriding the usual conditions. See the BlankBetween method.
+	//
+	// The comment parser sets ForceBlankBetween for any list
+	// that has a blank line between any two of its items, to make sure
+	// the blank lines are preserved when printing.
+	ForceBlankBetween bool
+}
+
+func (*List) block() {}
+
+// BlankBefore reports whether a reformatting of the comment
+// should include a blank line before the list.
+// The default rule is the same as for [BlankBetween]:
+// if the list item content contains any blank lines
+// (meaning at least one item has multiple paragraphs)
+// then the list itself must be preceded by a blank line.
+// A preceding blank line can be forced by setting [List].ForceBlankBefore.
+func (l *List) BlankBefore() bool {
+	return l.ForceBlankBefore || l.BlankBetween()
+}
+
+// BlankBetween reports whether a reformatting of the comment
+// should include a blank line between each pair of list items.
+// The default rule is that if the list item content contains any blank lines
+// (meaning at least one item has multiple paragraphs)
+// then list items must themselves be separated by blank lines.
+// Blank line separators can be forced by setting [List].ForceBlankBetween.
+func (l *List) BlankBetween() bool {
+	if l.ForceBlankBetween {
+		return true
+	}
+	for _, item := range l.Items {
+		if len(item.Content) != 1 {
+			// Unreachable for parsed comments today,
+			// since the only way to get multiple item.Content
+			// is multiple paragraphs, which must have been
+			// separated by a blank line.
+			return true
+		}
+	}
+	return false
+}
+
+// A ListItem is a single item in a numbered or bullet list.
+type ListItem struct {
+	// Number is a decimal string in a numbered list
+	// or an empty string in a bullet list.
+	Number string // "1", "2", ...; "" for bullet list
+
+	// Content is the list content.
+	// Currently, restrictions in the parser and printer
+	// require every element of Content to be a *Paragraph.
+	Content []Block // Content of this item.
+}
+
+// A Paragraph is a paragraph of text.
+type Paragraph struct {
+	Text []Text
+}
+
+func (*Paragraph) block() {}
+
+// A Code is a preformatted code block.
+type Code struct {
+	// Text is the preformatted text, ending with a newline character.
+	// It may be multiple lines, each of which ends with a newline character.
+	// It is never empty, nor does it start or end with a blank line.
+	Text string
+}
+
+func (*Code) block() {}
+
+// A Text is text-level content in a doc comment,
+// one of [Plain], [Italic], [*Link], or [*DocLink].
+type Text interface {
+	text()
+}
+
+// A Plain is a string rendered as plain text (not italicized).
+type Plain string
+
+func (Plain) text() {}
+
+// An Italic is a string rendered as italicized text.
+type Italic string
+
+func (Italic) text() {}
+
+// A Link is a link to a specific URL.
+type Link struct {
+	Auto bool   // is this an automatic (implicit) link of a literal URL?
+	Text []Text // text of link
+	URL  string // target URL of link
+}
+
+func (*Link) text() {}
+
+// A DocLink is a link to documentation for a Go package or symbol.
+type DocLink struct {
+	Text []Text // text of link
+
+	// ImportPath, Recv, and Name identify the Go package or symbol
+	// that is the link target. The potential combinations of
+	// non-empty fields are:
+	//  - ImportPath: a link to another package
+	//  - ImportPath, Name: a link to a const, func, type, or var in another package
+	//  - ImportPath, Recv, Name: a link to a method in another package
+	//  - Name: a link to a const, func, type, or var in this package
+	//  - Recv, Name: a link to a method in this package
+	ImportPath string // import path
+	Recv       string // receiver type, without any pointer star, for methods
+	Name       string // const, func, type, var, or method name
+}
+
+func (*DocLink) text() {}
+
+// A Parser is a doc comment parser.
+// The fields in the struct can be filled in before calling Parse
+// in order to customize the details of the parsing process.
+type Parser struct {
+	// Words is a map of Go identifier words that
+	// should be italicized and potentially linked.
+	// If Words[w] is the empty string, then the word w
+	// is only italicized. Otherwise it is linked, using
+	// Words[w] as the link target.
+	// Words corresponds to the [go/doc.ToHTML] words parameter.
+	Words map[string]string
+
+	// LookupPackage resolves a package name to an import path.
+	//
+	// If LookupPackage(name) returns ok == true, then [name]
+	// (or [name.Sym] or [name.Sym.Method])
+	// is considered a documentation link to importPath's package docs.
+	// It is valid to return "", true, in which case name is considered
+	// to refer to the current package.
+	//
+	// If LookupPackage(name) returns ok == false,
+	// then [name] (or [name.Sym] or [name.Sym.Method])
+	// will not be considered a documentation link,
+	// except in the case where name is the full (but single-element) import path
+	// of a package in the standard library, such as in [math] or [io.Reader].
+	// LookupPackage is still called for such names,
+	// in order to permit references to imports of other packages
+	// with the same package names.
+	//
+	// Setting LookupPackage to nil is equivalent to setting it to
+	// a function that always returns "", false.
+	LookupPackage func(name string) (importPath string, ok bool)
+
+	// LookupSym reports whether a symbol name or method name
+	// exists in the current package.
+	//
+	// If LookupSym("", "Name") returns true, then [Name]
+	// is considered a documentation link for a const, func, type, or var.
+	//
+	// Similarly, if LookupSym("Recv", "Name") returns true,
+	// then [Recv.Name] is considered a documentation link for
+	// type Recv's method Name.
+	//
+	// Setting LookupSym to nil is equivalent to setting it to a function
+	// that always returns false.
+	LookupSym func(recv, name string) (ok bool)
+}
+
+// parseDoc is parsing state for a single doc comment.
+type parseDoc struct {
+	*Parser
+	*Doc
+	links     map[string]*LinkDef
+	lines     []string
+	lookupSym func(recv, name string) bool
+}
+
+// lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv].
+// If pkg has a slash, it is assumed to be the full import path and is returned with ok = true.
+//
+// Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand").
+// d.LookupPackage provides a way for the caller to allow resolving such names with reference
+// to the imports in the surrounding package.
+//
+// There is one collision between these two cases: single-element standard library names
+// like "math" are full import paths but don't contain slashes. We let d.LookupPackage have
+// the first chance to resolve it, in case there's a different package imported as math,
+// and otherwise we refer to a built-in list of single-element standard library package names.
+func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) {
+	if strings.Contains(pkg, "/") { // assume a full import path
+		if validImportPath(pkg) {
+			return pkg, true
+		}
+		return "", false
+	}
+	if d.LookupPackage != nil {
+		// Give LookupPackage a chance.
+		if path, ok := d.LookupPackage(pkg); ok {
+			return path, true
+		}
+	}
+	return DefaultLookupPackage(pkg)
+}
+
+func isStdPkg(path string) bool {
+	// TODO(rsc): Use sort.Find once we don't have to worry about
+	// copying this code into older Go environments.
+	i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path })
+	return i < len(stdPkgs) && stdPkgs[i] == path
+}
+
+// DefaultLookupPackage is the default package lookup
+// function, used when [Parser].LookupPackage is nil.
+// It recognizes names of the packages from the standard
+// library with single-element import paths, such as math,
+// which would otherwise be impossible to name.
+//
+// Note that the go/doc package provides a more sophisticated
+// lookup based on the imports used in the current package.
+func DefaultLookupPackage(name string) (importPath string, ok bool) {
+	if isStdPkg(name) {
+		return name, true
+	}
+	return "", false
+}
+
+// Parse parses the doc comment text and returns the *Doc form.
+// Comment markers (/* // and */) in the text must have already been removed.
+func (p *Parser) Parse(text string) *Doc {
+	lines := unindent(strings.Split(text, "\n"))
+	d := &parseDoc{
+		Parser:    p,
+		Doc:       new(Doc),
+		links:     make(map[string]*LinkDef),
+		lines:     lines,
+		lookupSym: func(recv, name string) bool { return false },
+	}
+	if p.LookupSym != nil {
+		d.lookupSym = p.LookupSym
+	}
+
+	// First pass: break into block structure and collect known links.
+	// The text is all recorded as Plain for now.
+	var prev span
+	for _, s := range parseSpans(lines) {
+		var b Block
+		switch s.kind {
+		default:
+			panic("go/doc/comment: internal error: unknown span kind")
+		case spanList:
+			b = d.list(lines[s.start:s.end], prev.end < s.start)
+		case spanCode:
+			b = d.code(lines[s.start:s.end])
+		case spanOldHeading:
+			b = d.oldHeading(lines[s.start])
+		case spanHeading:
+			b = d.heading(lines[s.start])
+		case spanPara:
+			b = d.paragraph(lines[s.start:s.end])
+		}
+		if b != nil {
+			d.Content = append(d.Content, b)
+		}
+		prev = s
+	}
+
+	// Second pass: interpret all the Plain text now that we know the links.
+	for _, b := range d.Content {
+		switch b := b.(type) {
+		case *Paragraph:
+			b.Text = d.parseLinkedText(string(b.Text[0].(Plain)))
+		case *List:
+			for _, i := range b.Items {
+				for _, c := range i.Content {
+					p := c.(*Paragraph)
+					p.Text = d.parseLinkedText(string(p.Text[0].(Plain)))
+				}
+			}
+		}
+	}
+
+	return d.Doc
+}
+
+// A span represents a single span of comment lines (lines[start:end])
+// of an identified kind (code, heading, paragraph, and so on).
+type span struct {
+	start int
+	end   int
+	kind  spanKind
+}
+
+// A spanKind describes the kind of span.
+type spanKind int
+
+const (
+	_ spanKind = iota
+	spanCode
+	spanHeading
+	spanList
+	spanOldHeading
+	spanPara
+)
+
+func parseSpans(lines []string) []span {
+	var spans []span
+
+	// The loop may process a line twice: once as unindented
+	// and again forced indented. So the maximum expected
+	// number of iterations is 2*len(lines). The repeating logic
+	// can be subtle, though, and to protect against introduction
+	// of infinite loops in future changes, we watch to see that
+	// we are not looping too much. A panic is better than a
+	// quiet infinite loop.
+	watchdog := 2 * len(lines)
+
+	i := 0
+	forceIndent := 0
+Spans:
+	for {
+		// Skip blank lines.
+		for i < len(lines) && lines[i] == "" {
+			i++
+		}
+		if i >= len(lines) {
+			break
+		}
+		if watchdog--; watchdog < 0 {
+			panic("go/doc/comment: internal error: not making progress")
+		}
+
+		var kind spanKind
+		start := i
+		end := i
+		if i < forceIndent || indented(lines[i]) {
+			// Indented (or force indented).
+			// Ends before next unindented. (Blank lines are OK.)
+			// If this is an unindented list that we are heuristically treating as indented,
+			// then accept unindented list item lines up to the first blank lines.
+			// The heuristic is disabled at blank lines to contain its effect
+			// to non-gofmt'ed sections of the comment.
+			unindentedListOK := isList(lines[i]) && i < forceIndent
+			i++
+			for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) {
+				if lines[i] == "" {
+					unindentedListOK = false
+				}
+				i++
+			}
+
+			// Drop trailing blank lines.
+			end = i
+			for end > start && lines[end-1] == "" {
+				end--
+			}
+
+			// If indented lines are followed (without a blank line)
+			// by an unindented line ending in a brace,
+			// take that one line too. This fixes the common mistake
+			// of pasting in something like
+			//
+			// func main() {
+			//	fmt.Println("hello, world")
+			// }
+			//
+			// and forgetting to indent it.
+			// The heuristic will never trigger on a gofmt'ed comment,
+			// because any gofmt'ed code block or list would be
+			// followed by a blank line or end of comment.
+			if end < len(lines) && strings.HasPrefix(lines[end], "}") {
+				end++
+			}
+
+			if isList(lines[start]) {
+				kind = spanList
+			} else {
+				kind = spanCode
+			}
+		} else {
+			// Unindented. Ends at next blank or indented line.
+			i++
+			for i < len(lines) && lines[i] != "" && !indented(lines[i]) {
+				i++
+			}
+			end = i
+
+			// If unindented lines are followed (without a blank line)
+			// by an indented line that would start a code block,
+			// check whether the final unindented lines
+			// should be left for the indented section.
+			// This can happen for the common mistakes of
+			// unindented code or unindented lists.
+			// The heuristic will never trigger on a gofmt'ed comment,
+			// because any gofmt'ed code block would have a blank line
+			// preceding it after the unindented lines.
+			if i < len(lines) && lines[i] != "" && !isList(lines[i]) {
+				switch {
+				case isList(lines[i-1]):
+					// If the final unindented line looks like a list item,
+					// this may be the first indented line wrap of
+					// a mistakenly unindented list.
+					// Leave all the unindented list items.
+					forceIndent = end
+					end--
+					for end > start && isList(lines[end-1]) {
+						end--
+					}
+
+				case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`):
+					// If the final unindented line ended in { or \
+					// it is probably the start of a misindented code block.
+					// Give the user a single line fix.
+					// Often that's enough; if not, the user can fix the others themselves.
+					forceIndent = end
+					end--
+				}
+
+				if start == end && forceIndent > start {
+					i = start
+					continue Spans
+				}
+			}
+
+			// Span is either paragraph or heading.
+			if end-start == 1 && isHeading(lines[start]) {
+				kind = spanHeading
+			} else if end-start == 1 && isOldHeading(lines[start], lines, start) {
+				kind = spanOldHeading
+			} else {
+				kind = spanPara
+			}
+		}
+
+		spans = append(spans, span{start, end, kind})
+		i = end
+	}
+
+	return spans
+}
+
+// indented reports whether line is indented
+// (starts with a leading space or tab).
+func indented(line string) bool {
+	return line != "" && (line[0] == ' ' || line[0] == '\t')
+}
+
+// unindent removes any common space/tab prefix
+// from each line in lines, returning a copy of lines in which
+// those prefixes have been trimmed from each line.
+// It also replaces any lines containing only spaces with blank lines (empty strings).
+func unindent(lines []string) []string {
+	// Trim leading and trailing blank lines.
+	for len(lines) > 0 && isBlank(lines[0]) {
+		lines = lines[1:]
+	}
+	for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
+		lines = lines[:len(lines)-1]
+	}
+	if len(lines) == 0 {
+		return nil
+	}
+
+	// Compute and remove common indentation.
+	prefix := leadingSpace(lines[0])
+	for _, line := range lines[1:] {
+		if !isBlank(line) {
+			prefix = commonPrefix(prefix, leadingSpace(line))
+		}
+	}
+
+	out := make([]string, len(lines))
+	for i, line := range lines {
+		line = strings.TrimPrefix(line, prefix)
+		if strings.TrimSpace(line) == "" {
+			line = ""
+		}
+		out[i] = line
+	}
+	for len(out) > 0 && out[0] == "" {
+		out = out[1:]
+	}
+	for len(out) > 0 && out[len(out)-1] == "" {
+		out = out[:len(out)-1]
+	}
+	return out
+}
+
+// isBlank reports whether s is a blank line.
+func isBlank(s string) bool {
+	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
+
+// commonPrefix returns the longest common prefix of a and b.
+func commonPrefix(a, b string) string {
+	i := 0
+	for i < len(a) && i < len(b) && a[i] == b[i] {
+		i++
+	}
+	return a[0:i]
+}
+
+// leadingSpace returns the longest prefix of s consisting of spaces and tabs.
+func leadingSpace(s string) string {
+	i := 0
+	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
+		i++
+	}
+	return s[:i]
+}
+
+// isOldHeading reports whether line is an old-style section heading.
+// line is all[off].
+func isOldHeading(line string, all []string, off int) bool {
+	if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" {
+		return false
+	}
+
+	line = strings.TrimSpace(line)
+
+	// a heading must start with an uppercase letter
+	r, _ := utf8.DecodeRuneInString(line)
+	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
+		return false
+	}
+
+	// it must end in a letter or digit:
+	r, _ = utf8.DecodeLastRuneInString(line)
+	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+		return false
+	}
+
+	// exclude lines with illegal characters. we allow "(),"
+	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
+		return false
+	}
+
+	// allow "'" for possessive "'s" only
+	for b := line; ; {
+		var ok bool
+		if _, b, ok = strings.Cut(b, "'"); !ok {
+			break
+		}
+		if b != "s" && !strings.HasPrefix(b, "s ") {
+			return false // ' not followed by s and then end-of-word
+		}
+	}
+
+	// allow "." when followed by non-space
+	for b := line; ; {
+		var ok bool
+		if _, b, ok = strings.Cut(b, "."); !ok {
+			break
+		}
+		if b == "" || strings.HasPrefix(b, " ") {
+			return false // not followed by non-space
+		}
+	}
+
+	return true
+}
+
+// oldHeading returns the *Heading for the given old-style section heading line.
+func (d *parseDoc) oldHeading(line string) Block {
+	return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}}
+}
+
+// isHeading reports whether line is a new-style section heading.
+func isHeading(line string) bool {
+	return len(line) >= 2 &&
+		line[0] == '#' &&
+		(line[1] == ' ' || line[1] == '\t') &&
+		strings.TrimSpace(line) != "#"
+}
+
+// heading returns the *Heading for the given new-style section heading line.
+func (d *parseDoc) heading(line string) Block {
+	return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}}
+}
+
+// code returns a code block built from the lines.
+func (d *parseDoc) code(lines []string) *Code {
+	body := unindent(lines)
+	body = append(body, "") // to get final \n from Join
+	return &Code{Text: strings.Join(body, "\n")}
+}
+
+// paragraph returns a paragraph block built from the lines.
+// If the lines are link definitions, paragraph adds them to d and returns nil.
+func (d *parseDoc) paragraph(lines []string) Block {
+	// Is this a block of known links? Handle.
+	var defs []*LinkDef
+	for _, line := range lines {
+		def, ok := parseLink(line)
+		if !ok {
+			goto NoDefs
+		}
+		defs = append(defs, def)
+	}
+	for _, def := range defs {
+		d.Links = append(d.Links, def)
+		if d.links[def.Text] == nil {
+			d.links[def.Text] = def
+		}
+	}
+	return nil
+NoDefs:
+
+	return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}
+}
+
+// parseLink parses a single link definition line:
+//
+//	[text]: url
+//
+// It returns the link definition and whether the line was well formed.
+func parseLink(line string) (*LinkDef, bool) {
+	if line == "" || line[0] != '[' {
+		return nil, false
+	}
+	i := strings.Index(line, "]:")
+	if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') {
+		return nil, false
+	}
+
+	text := line[1:i]
+	url := strings.TrimSpace(line[i+3:])
+	j := strings.Index(url, "://")
+	if j < 0 || !isScheme(url[:j]) {
+		return nil, false
+	}
+
+	// Line has right form and has valid scheme://.
+	// That's good enough for us - we are not as picky
+	// about the characters beyond the :// as we are
+	// when extracting inline URLs from text.
+	return &LinkDef{Text: text, URL: url}, true
+}
+
+// list returns a list built from the indented lines,
+// using forceBlankBefore as the value of the List's ForceBlankBefore field.
+func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List {
+	num, _, _ := listMarker(lines[0])
+	var (
+		list *List = &List{ForceBlankBefore: forceBlankBefore}
+		item *ListItem
+		text []string
+	)
+	flush := func() {
+		if item != nil {
+			if para := d.paragraph(text); para != nil {
+				item.Content = append(item.Content, para)
+			}
+		}
+		text = nil
+	}
+
+	for _, line := range lines {
+		if n, after, ok := listMarker(line); ok && (n != "") == (num != "") {
+			// start new list item
+			flush()
+
+			item = &ListItem{Number: n}
+			list.Items = append(list.Items, item)
+			line = after
+		}
+		line = strings.TrimSpace(line)
+		if line == "" {
+			list.ForceBlankBetween = true
+			flush()
+			continue
+		}
+		text = append(text, strings.TrimSpace(line))
+	}
+	flush()
+	return list
+}
+
+// listMarker parses the line as beginning with a list marker.
+// If it can do that, it returns the numeric marker ("" for a bullet list),
+// the rest of the line, and ok == true.
+// Otherwise, it returns "", "", false.
+func listMarker(line string) (num, rest string, ok bool) {
+	line = strings.TrimSpace(line)
+	if line == "" {
+		return "", "", false
+	}
+
+	// Can we find a marker?
+	if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' {
+		num, rest = "", line[n:]
+	} else if '0' <= line[0] && line[0] <= '9' {
+		n := 1
+		for n < len(line) && '0' <= line[n] && line[n] <= '9' {
+			n++
+		}
+		if n >= len(line) || (line[n] != '.' && line[n] != ')') {
+			return "", "", false
+		}
+		num, rest = line[:n], line[n+1:]
+	} else {
+		return "", "", false
+	}
+
+	if !indented(rest) || strings.TrimSpace(rest) == "" {
+		return "", "", false
+	}
+
+	return num, rest, true
+}
+
+// isList reports whether the line is the first line of a list,
+// meaning starts with a list marker after any indentation.
+// (The caller is responsible for checking the line is indented, as appropriate.)
+func isList(line string) bool {
+	_, _, ok := listMarker(line)
+	return ok
+}
+
+// parseLinkedText parses text that is allowed to contain explicit links,
+// such as [math.Sin] or [Go home page], into a slice of Text items.
+//
+// A “pkg” is only assumed to be a full import path if it starts with
+// a domain name (a path element with a dot) or is one of the packages
+// from the standard library (“[os]”, “[encoding/json]”, and so on).
+// To avoid problems with maps, generics, and array types, doc links
+// must be both preceded and followed by punctuation, spaces, tabs,
+// or the start or end of a line. An example problem would be treating
+// map[ast.Expr]TypeAndValue as containing a link.
+func (d *parseDoc) parseLinkedText(text string) []Text {
+	var out []Text
+	wrote := 0
+	flush := func(i int) {
+		if wrote < i {
+			out = d.parseText(out, text[wrote:i], true)
+			wrote = i
+		}
+	}
+
+	start := -1
+	var buf []byte
+	for i := 0; i < len(text); i++ {
+		c := text[i]
+		if c == '\n' || c == '\t' {
+			c = ' '
+		}
+		switch c {
+		case '[':
+			start = i
+		case ']':
+			if start >= 0 {
+				if def, ok := d.links[string(buf)]; ok {
+					def.Used = true
+					flush(start)
+					out = append(out, &Link{
+						Text: d.parseText(nil, text[start+1:i], false),
+						URL:  def.URL,
+					})
+					wrote = i + 1
+				} else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok {
+					flush(start)
+					link.Text = d.parseText(nil, text[start+1:i], false)
+					out = append(out, link)
+					wrote = i + 1
+				}
+			}
+			start = -1
+			buf = buf[:0]
+		}
+		if start >= 0 && i != start {
+			buf = append(buf, c)
+		}
+	}
+
+	flush(len(text))
+	return out
+}
+
+// docLink parses text, which was found inside [ ] brackets,
+// as a doc link if possible, returning the DocLink and ok == true
+// or else nil, false.
+// The before and after strings are the text before the [ and after the ]
+// on the same line. Doc links must be preceded and followed by
+// punctuation, spaces, tabs, or the start or end of a line.
+func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) {
+	if before != "" {
+		r, _ := utf8.DecodeLastRuneInString(before)
+		if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
+			return nil, false
+		}
+	}
+	if after != "" {
+		r, _ := utf8.DecodeRuneInString(after)
+		if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
+			return nil, false
+		}
+	}
+	text = strings.TrimPrefix(text, "*")
+	pkg, name, ok := splitDocName(text)
+	var recv string
+	if ok {
+		pkg, recv, _ = splitDocName(pkg)
+	}
+	if pkg != "" {
+		if pkg, ok = d.lookupPkg(pkg); !ok {
+			return nil, false
+		}
+	} else {
+		if ok = d.lookupSym(recv, name); !ok {
+			return nil, false
+		}
+	}
+	link = &DocLink{
+		ImportPath: pkg,
+		Recv:       recv,
+		Name:       name,
+	}
+	return link, true
+}
+
+// If text is of the form before.Name, where Name is a capitalized Go identifier,
+// then splitDocName returns before, name, true.
+// Otherwise it returns text, "", false.
+func splitDocName(text string) (before, name string, foundDot bool) {
+	i := strings.LastIndex(text, ".")
+	name = text[i+1:]
+	if !isName(name) {
+		return text, "", false
+	}
+	if i >= 0 {
+		before = text[:i]
+	}
+	return before, name, true
+}
+
+// parseText parses s as text and returns the result of appending
+// those parsed Text elements to out.
+// parseText does not handle explicit links like [math.Sin] or [Go home page]:
+// those are handled by parseLinkedText.
+// If autoLink is true, then parseText recognizes URLs and words from d.Words
+// and converts those to links as appropriate.
+func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text {
+	var w strings.Builder
+	wrote := 0
+	writeUntil := func(i int) {
+		w.WriteString(s[wrote:i])
+		wrote = i
+	}
+	flush := func(i int) {
+		writeUntil(i)
+		if w.Len() > 0 {
+			out = append(out, Plain(w.String()))
+			w.Reset()
+		}
+	}
+	for i := 0; i < len(s); {
+		t := s[i:]
+		if autoLink {
+			if url, ok := autoURL(t); ok {
+				flush(i)
+				// Note: The old comment parser would look up the URL in words
+				// and replace the target with words[URL] if it was non-empty.
+				// That would allow creating links that display as one URL but
+				// when clicked go to a different URL. Not sure what the point
+				// of that is, so we're not doing that lookup here.
+				out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url})
+				i += len(url)
+				wrote = i
+				continue
+			}
+			if id, ok := ident(t); ok {
+				url, italics := d.Words[id]
+				if !italics {
+					i += len(id)
+					continue
+				}
+				flush(i)
+				if url == "" {
+					out = append(out, Italic(id))
+				} else {
+					out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url})
+				}
+				i += len(id)
+				wrote = i
+				continue
+			}
+		}
+		switch {
+		case strings.HasPrefix(t, "``"):
+			if len(t) >= 3 && t[2] == '`' {
+				// Do not convert `` inside ```, in case people are mistakenly writing Markdown.
+				i += 3
+				for i < len(t) && t[i] == '`' {
+					i++
+				}
+				break
+			}
+			writeUntil(i)
+			w.WriteRune('“')
+			i += 2
+			wrote = i
+		case strings.HasPrefix(t, "''"):
+			writeUntil(i)
+			w.WriteRune('”')
+			i += 2
+			wrote = i
+		default:
+			i++
+		}
+	}
+	flush(len(s))
+	return out
+}
+
+// autoURL checks whether s begins with a URL that should be hyperlinked.
+// If so, it returns the URL, which is a prefix of s, and ok == true.
+// Otherwise it returns "", false.
+// The caller should skip over the first len(url) bytes of s
+// before further processing.
+func autoURL(s string) (url string, ok bool) {
+	// Find the ://. Fast path to pick off non-URL,
+	// since we call this at every position in the string.
+	// The shortest possible URL is ftp://x, 7 bytes.
+	var i int
+	switch {
+	case len(s) < 7:
+		return "", false
+	case s[3] == ':':
+		i = 3
+	case s[4] == ':':
+		i = 4
+	case s[5] == ':':
+		i = 5
+	case s[6] == ':':
+		i = 6
+	default:
+		return "", false
+	}
+	if i+3 > len(s) || s[i:i+3] != "://" {
+		return "", false
+	}
+
+	// Check valid scheme.
+	if !isScheme(s[:i]) {
+		return "", false
+	}
+
+	// Scan host part. Must have at least one byte,
+	// and must start and end in non-punctuation.
+	i += 3
+	if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) {
+		return "", false
+	}
+	i++
+	end := i
+	for i < len(s) && isHost(s[i]) {
+		if !isPunct(s[i]) {
+			end = i + 1
+		}
+		i++
+	}
+	i = end
+
+	// At this point we are definitely returning a URL (scheme://host).
+	// We just have to find the longest path we can add to it.
+	// Heuristics abound.
+	// We allow parens, braces, and brackets,
+	// but only if they match (#5043, #22285).
+	// We allow .,:;?! in the path but not at the end,
+	// to avoid end-of-sentence punctuation (#18139, #16565).
+	stk := []byte{}
+	end = i
+Path:
+	for ; i < len(s); i++ {
+		if isPunct(s[i]) {
+			continue
+		}
+		if !isPath(s[i]) {
+			break
+		}
+		switch s[i] {
+		case '(':
+			stk = append(stk, ')')
+		case '{':
+			stk = append(stk, '}')
+		case '[':
+			stk = append(stk, ']')
+		case ')', '}', ']':
+			if len(stk) == 0 || stk[len(stk)-1] != s[i] {
+				break Path
+			}
+			stk = stk[:len(stk)-1]
+		}
+		if len(stk) == 0 {
+			end = i + 1
+		}
+	}
+
+	return s[:end], true
+}
+
+// isScheme reports whether s is a recognized URL scheme.
+// Note that if strings of new length (beyond 3-7)
+// are added here, the fast path at the top of autoURL will need updating.
+func isScheme(s string) bool {
+	switch s {
+	case "file",
+		"ftp",
+		"gopher",
+		"http",
+		"https",
+		"mailto",
+		"nntp":
+		return true
+	}
+	return false
+}
+
+// isHost reports whether c is a byte that can appear in a URL host,
+// like www.example.com or user@[::1]:8080
+func isHost(c byte) bool {
+	// mask is a 128-bit bitmap with 1s for allowed bytes,
+	// so that the byte c can be tested with a shift and an and.
+	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+	// and this function will return false.
+	const mask = 0 |
+		(1<<26-1)<<'A' |
+		(1<<26-1)<<'a' |
+		(1<<10-1)<<'0' |
+		1<<'_' |
+		1<<'@' |
+		1<<'-' |
+		1<<'.' |
+		1<<'[' |
+		1<<']' |
+		1<<':'
+
+	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+		(uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// isPunct reports whether c is a punctuation byte that can appear
+// inside a path but not at the end.
+func isPunct(c byte) bool {
+	// mask is a 128-bit bitmap with 1s for allowed bytes,
+	// so that the byte c can be tested with a shift and an and.
+	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+	// and this function will return false.
+	const mask = 0 |
+		1<<'.' |
+		1<<',' |
+		1<<':' |
+		1<<';' |
+		1<<'?' |
+		1<<'!'
+
+	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+		(uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// isPath reports whether c is a (non-punctuation) path byte.
+func isPath(c byte) bool {
+	// mask is a 128-bit bitmap with 1s for allowed bytes,
+	// so that the byte c can be tested with a shift and an and.
+	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+	// and this function will return false.
+	const mask = 0 |
+		(1<<26-1)<<'A' |
+		(1<<26-1)<<'a' |
+		(1<<10-1)<<'0' |
+		1<<'$' |
+		1<<'\'' |
+		1<<'(' |
+		1<<')' |
+		1<<'*' |
+		1<<'+' |
+		1<<'&' |
+		1<<'#' |
+		1<<'=' |
+		1<<'@' |
+		1<<'~' |
+		1<<'_' |
+		1<<'/' |
+		1<<'-' |
+		1<<'[' |
+		1<<']' |
+		1<<'{' |
+		1<<'}' |
+		1<<'%'
+
+	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+		(uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// isName reports whether s is a capitalized Go identifier (like Name).
+func isName(s string) bool {
+	t, ok := ident(s)
+	if !ok || t != s {
+		return false
+	}
+	r, _ := utf8.DecodeRuneInString(s)
+	return unicode.IsUpper(r)
+}
+
+// ident checks whether s begins with a Go identifier.
+// If so, it returns the identifier, which is a prefix of s, and ok == true.
+// Otherwise it returns "", false.
+// The caller should skip over the first len(id) bytes of s
+// before further processing.
+func ident(s string) (id string, ok bool) {
+	// Scan [\pL_][\pL_0-9]*
+	n := 0
+	for n < len(s) {
+		if c := s[n]; c < utf8.RuneSelf {
+			if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') {
+				n++
+				continue
+			}
+			break
+		}
+		r, nr := utf8.DecodeRuneInString(s[n:])
+		if unicode.IsLetter(r) {
+			n += nr
+			continue
+		}
+		break
+	}
+	return s[:n], n > 0
+}
+
+// isIdentASCII reports whether c is an ASCII identifier byte.
+func isIdentASCII(c byte) bool {
+	// mask is a 128-bit bitmap with 1s for allowed bytes,
+	// so that the byte c can be tested with a shift and an and.
+	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+	// and this function will return false.
+	const mask = 0 |
+		(1<<26-1)<<'A' |
+		(1<<26-1)<<'a' |
+		(1<<10-1)<<'0' |
+		1<<'_'
+
+	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+		(uint64(1)<<(c-64))&(mask>>64)) != 0
+}
+
+// validImportPath reports whether path is a valid import path.
+// It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath.
+func validImportPath(path string) bool {
+	if !utf8.ValidString(path) {
+		return false
+	}
+	if path == "" {
+		return false
+	}
+	if path[0] == '-' {
+		return false
+	}
+	if strings.Contains(path, "//") {
+		return false
+	}
+	if path[len(path)-1] == '/' {
+		return false
+	}
+	elemStart := 0
+	for i, r := range path {
+		if r == '/' {
+			if !validImportPathElem(path[elemStart:i]) {
+				return false
+			}
+			elemStart = i + 1
+		}
+	}
+	return validImportPathElem(path[elemStart:])
+}
+
+func validImportPathElem(elem string) bool {
+	if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' {
+		return false
+	}
+	for i := 0; i < len(elem); i++ {
+		if !importPathOK(elem[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+func importPathOK(c byte) bool {
+	// mask is a 128-bit bitmap with 1s for allowed bytes,
+	// so that the byte c can be tested with a shift and an and.
+	// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
+	// and this function will return false.
+	const mask = 0 |
+		(1<<26-1)<<'A' |
+		(1<<26-1)<<'a' |
+		(1<<10-1)<<'0' |
+		1<<'-' |
+		1<<'.' |
+		1<<'~' |
+		1<<'_' |
+		1<<'+'
+
+	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
+		(uint64(1)<<(c-64))&(mask>>64)) != 0
+}