diff options
Diffstat (limited to 'src/go/doc/comment')
67 files changed, 4686 insertions, 0 deletions
diff --git a/src/go/doc/comment/doc.go b/src/go/doc/comment/doc.go new file mode 100644 index 0000000..45a476a --- /dev/null +++ b/src/go/doc/comment/doc.go @@ -0,0 +1,36 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package comment implements parsing and reformatting of Go doc comments, +(documentation comments), which are comments that immediately precede +a top-level declaration of a package, const, func, type, or var. + +Go doc comment syntax is a simplified subset of Markdown that supports +links, headings, paragraphs, lists (without nesting), and preformatted text blocks. +The details of the syntax are documented at https://go.dev/doc/comment. + +To parse the text associated with a doc comment (after removing comment markers), +use a [Parser]: + + var p comment.Parser + doc := p.Parse(text) + +The result is a [*Doc]. +To reformat it as a doc comment, HTML, Markdown, or plain text, +use a [Printer]: + + var pr comment.Printer + os.Stdout.Write(pr.Text(doc)) + +The [Parser] and [Printer] types are structs whose fields can be +modified to customize the operations. +For details, see the documentation for those types. + +Use cases that need additional control over reformatting can +implement their own logic by inspecting the parsed syntax itself. +See the documentation for [Doc], [Block], [Text] for an overview +and links to additional types. +*/ +package comment diff --git a/src/go/doc/comment/html.go b/src/go/doc/comment/html.go new file mode 100644 index 0000000..bc076f6 --- /dev/null +++ b/src/go/doc/comment/html.go @@ -0,0 +1,169 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "bytes" + "fmt" + "strconv" +) + +// An htmlPrinter holds the state needed for printing a Doc as HTML. +type htmlPrinter struct { + *Printer + tight bool +} + +// HTML returns an HTML formatting of the Doc. +// See the [Printer] documentation for ways to customize the HTML output. +func (p *Printer) HTML(d *Doc) []byte { + hp := &htmlPrinter{Printer: p} + var out bytes.Buffer + for _, x := range d.Content { + hp.block(&out, x) + } + return out.Bytes() +} + +// block prints the block x to out. +func (p *htmlPrinter) block(out *bytes.Buffer, x Block) { + switch x := x.(type) { + default: + fmt.Fprintf(out, "?%T", x) + + case *Paragraph: + if !p.tight { + out.WriteString("<p>") + } + p.text(out, x.Text) + out.WriteString("\n") + + case *Heading: + out.WriteString("<h") + h := strconv.Itoa(p.headingLevel()) + out.WriteString(h) + if id := p.headingID(x); id != "" { + out.WriteString(` id="`) + p.escape(out, id) + out.WriteString(`"`) + } + out.WriteString(">") + p.text(out, x.Text) + out.WriteString("</h") + out.WriteString(h) + out.WriteString(">\n") + + case *Code: + out.WriteString("<pre>") + p.escape(out, x.Text) + out.WriteString("</pre>\n") + + case *List: + kind := "ol>\n" + if x.Items[0].Number == "" { + kind = "ul>\n" + } + out.WriteString("<") + out.WriteString(kind) + next := "1" + for _, item := range x.Items { + out.WriteString("<li") + if n := item.Number; n != "" { + if n != next { + out.WriteString(` value="`) + out.WriteString(n) + out.WriteString(`"`) + next = n + } + next = inc(next) + } + out.WriteString(">") + p.tight = !x.BlankBetween() + for _, blk := range item.Content { + p.block(out, blk) + } + p.tight = false + } + out.WriteString("</") + out.WriteString(kind) + } +} + +// inc increments the decimal string s. +// For example, inc("1199") == "1200". +func inc(s string) string { + b := []byte(s) + for i := len(b) - 1; i >= 0; i-- { + if b[i] < '9' { + b[i]++ + return string(b) + } + b[i] = '0' + } + return "1" + string(b) +} + +// text prints the text sequence x to out. +func (p *htmlPrinter) text(out *bytes.Buffer, x []Text) { + for _, t := range x { + switch t := t.(type) { + case Plain: + p.escape(out, string(t)) + case Italic: + out.WriteString("<i>") + p.escape(out, string(t)) + out.WriteString("</i>") + case *Link: + out.WriteString(`<a href="`) + p.escape(out, t.URL) + out.WriteString(`">`) + p.text(out, t.Text) + out.WriteString("</a>") + case *DocLink: + url := p.docLinkURL(t) + if url != "" { + out.WriteString(`<a href="`) + p.escape(out, url) + out.WriteString(`">`) + } + p.text(out, t.Text) + if url != "" { + out.WriteString("</a>") + } + } + } +} + +// escape prints s to out as plain text, +// escaping < & " ' and > to avoid being misinterpreted +// in larger HTML constructs. +func (p *htmlPrinter) escape(out *bytes.Buffer, s string) { + start := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '<': + out.WriteString(s[start:i]) + out.WriteString("<") + start = i + 1 + case '&': + out.WriteString(s[start:i]) + out.WriteString("&") + start = i + 1 + case '"': + out.WriteString(s[start:i]) + out.WriteString(""") + start = i + 1 + case '\'': + out.WriteString(s[start:i]) + out.WriteString("'") + start = i + 1 + case '>': + out.WriteString(s[start:i]) + out.WriteString(">") + start = i + 1 + } + } + out.WriteString(s[start:]) +} diff --git a/src/go/doc/comment/markdown.go b/src/go/doc/comment/markdown.go new file mode 100644 index 0000000..d8550f2 --- /dev/null +++ b/src/go/doc/comment/markdown.go @@ -0,0 +1,188 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "bytes" + "fmt" + "strings" +) + +// An mdPrinter holds the state needed for printing a Doc as Markdown. +type mdPrinter struct { + *Printer + headingPrefix string + raw bytes.Buffer +} + +// Markdown returns a Markdown formatting of the Doc. +// See the [Printer] documentation for ways to customize the Markdown output. +func (p *Printer) Markdown(d *Doc) []byte { + mp := &mdPrinter{ + Printer: p, + headingPrefix: strings.Repeat("#", p.headingLevel()) + " ", + } + + var out bytes.Buffer + for i, x := range d.Content { + if i > 0 { + out.WriteByte('\n') + } + mp.block(&out, x) + } + return out.Bytes() +} + +// block prints the block x to out. +func (p *mdPrinter) block(out *bytes.Buffer, x Block) { + switch x := x.(type) { + default: + fmt.Fprintf(out, "?%T", x) + + case *Paragraph: + p.text(out, x.Text) + out.WriteString("\n") + + case *Heading: + out.WriteString(p.headingPrefix) + p.text(out, x.Text) + if id := p.headingID(x); id != "" { + out.WriteString(" {#") + out.WriteString(id) + out.WriteString("}") + } + out.WriteString("\n") + + case *Code: + md := x.Text + for md != "" { + var line string + line, md, _ = strings.Cut(md, "\n") + if line != "" { + out.WriteString("\t") + out.WriteString(line) + } + out.WriteString("\n") + } + + case *List: + loose := x.BlankBetween() + for i, item := range x.Items { + if i > 0 && loose { + out.WriteString("\n") + } + if n := item.Number; n != "" { + out.WriteString(" ") + out.WriteString(n) + out.WriteString(". ") + } else { + out.WriteString(" - ") // SP SP - SP + } + for i, blk := range item.Content { + const fourSpace = " " + if i > 0 { + out.WriteString("\n" + fourSpace) + } + p.text(out, blk.(*Paragraph).Text) + out.WriteString("\n") + } + } + } +} + +// text prints the text sequence x to out. +func (p *mdPrinter) text(out *bytes.Buffer, x []Text) { + p.raw.Reset() + p.rawText(&p.raw, x) + line := bytes.TrimSpace(p.raw.Bytes()) + if len(line) == 0 { + return + } + switch line[0] { + case '+', '-', '*', '#': + // Escape what would be the start of an unordered list or heading. + out.WriteByte('\\') + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + i := 1 + for i < len(line) && '0' <= line[i] && line[i] <= '9' { + i++ + } + if i < len(line) && (line[i] == '.' || line[i] == ')') { + // Escape what would be the start of an ordered list. + out.Write(line[:i]) + out.WriteByte('\\') + line = line[i:] + } + } + out.Write(line) +} + +// rawText prints the text sequence x to out, +// without worrying about escaping characters +// that have special meaning at the start of a Markdown line. +func (p *mdPrinter) rawText(out *bytes.Buffer, x []Text) { + for _, t := range x { + switch t := t.(type) { + case Plain: + p.escape(out, string(t)) + case Italic: + out.WriteString("*") + p.escape(out, string(t)) + out.WriteString("*") + case *Link: + out.WriteString("[") + p.rawText(out, t.Text) + out.WriteString("](") + out.WriteString(t.URL) + out.WriteString(")") + case *DocLink: + url := p.docLinkURL(t) + if url != "" { + out.WriteString("[") + } + p.rawText(out, t.Text) + if url != "" { + out.WriteString("](") + url = strings.ReplaceAll(url, "(", "%28") + url = strings.ReplaceAll(url, ")", "%29") + out.WriteString(url) + out.WriteString(")") + } + } + } +} + +// escape prints s to out as plain text, +// escaping special characters to avoid being misinterpreted +// as Markdown markup sequences. +func (p *mdPrinter) escape(out *bytes.Buffer, s string) { + start := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '\n': + // Turn all \n into spaces, for a few reasons: + // - Avoid introducing paragraph breaks accidentally. + // - Avoid the need to reindent after the newline. + // - Avoid problems with Markdown renderers treating + // every mid-paragraph newline as a <br>. + out.WriteString(s[start:i]) + out.WriteByte(' ') + start = i + 1 + continue + case '`', '_', '*', '[', '<', '\\': + // Not all of these need to be escaped all the time, + // but is valid and easy to do so. + // We assume the Markdown is being passed to a + // Markdown renderer, not edited by a person, + // so it's fine to have escapes that are not strictly + // necessary in some cases. + out.WriteString(s[start:i]) + out.WriteByte('\\') + out.WriteByte(s[i]) + start = i + 1 + } + } + out.WriteString(s[start:]) +} diff --git a/src/go/doc/comment/mkstd.sh b/src/go/doc/comment/mkstd.sh new file mode 100755 index 0000000..c9dee8c --- /dev/null +++ b/src/go/doc/comment/mkstd.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright 2022 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# This could be a good use for embed but go/doc/comment +# is built into the bootstrap go command, so it can't use embed. +# Also not using embed lets us emit a string array directly +# and avoid init-time work. + +( +echo "// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by 'go generate' DO NOT EDIT. +//go:generate ./mkstd.sh + +package comment + +var stdPkgs = []string{" +go list std | grep -v / | sort | sed 's/.*/"&",/' +echo "}" +) | gofmt >std.go.tmp && mv std.go.tmp std.go diff --git a/src/go/doc/comment/old_test.go b/src/go/doc/comment/old_test.go new file mode 100644 index 0000000..944f94d --- /dev/null +++ b/src/go/doc/comment/old_test.go @@ -0,0 +1,80 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// These tests are carried forward from the old go/doc implementation. + +package comment + +import "testing" + +var oldHeadingTests = []struct { + line string + ok bool +}{ + {"Section", true}, + {"A typical usage", true}, + {"ΔΛΞ is Greek", true}, + {"Foo 42", true}, + {"", false}, + {"section", false}, + {"A typical usage:", false}, + {"This code:", false}, + {"δ is Greek", false}, + {"Foo §", false}, + {"Fermat's Last Sentence", true}, + {"Fermat's", true}, + {"'sX", false}, + {"Ted 'Too' Bar", false}, + {"Use n+m", false}, + {"Scanning:", false}, + {"N:M", false}, +} + +func TestIsOldHeading(t *testing.T) { + for _, tt := range oldHeadingTests { + if isOldHeading(tt.line, []string{"Text.", "", tt.line, "", "Text."}, 2) != tt.ok { + t.Errorf("isOldHeading(%q) = %v, want %v", tt.line, !tt.ok, tt.ok) + } + } +} + +var autoURLTests = []struct { + in, out string +}{ + {"", ""}, + {"http://[::1]:8080/foo.txt", "http://[::1]:8080/foo.txt"}, + {"https://www.google.com) after", "https://www.google.com"}, + {"https://www.google.com:30/x/y/z:b::c. After", "https://www.google.com:30/x/y/z:b::c"}, + {"http://www.google.com/path/:;!-/?query=%34b#093124", "http://www.google.com/path/:;!-/?query=%34b#093124"}, + {"http://www.google.com/path/:;!-/?query=%34bar#093124", "http://www.google.com/path/:;!-/?query=%34bar#093124"}, + {"http://www.google.com/index.html! After", "http://www.google.com/index.html"}, + {"http://www.google.com/", "http://www.google.com/"}, + {"https://www.google.com/", "https://www.google.com/"}, + {"http://www.google.com/path.", "http://www.google.com/path"}, + {"http://en.wikipedia.org/wiki/Camellia_(cipher)", "http://en.wikipedia.org/wiki/Camellia_(cipher)"}, + {"http://www.google.com/)", "http://www.google.com/"}, + {"http://gmail.com)", "http://gmail.com"}, + {"http://gmail.com))", "http://gmail.com"}, + {"http://gmail.com ((http://gmail.com)) ()", "http://gmail.com"}, + {"http://example.com/ quux!", "http://example.com/"}, + {"http://example.com/%2f/ /world.", "http://example.com/%2f/"}, + {"http: ipsum //host/path", ""}, + {"javascript://is/not/linked", ""}, + {"http://foo", "http://foo"}, + {"https://www.example.com/person/][Person Name]]", "https://www.example.com/person/"}, + {"http://golang.org/)", "http://golang.org/"}, + {"http://golang.org/hello())", "http://golang.org/hello()"}, + {"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", "http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD"}, + {"https://foo.bar/bal/x(])", "https://foo.bar/bal/x"}, // inner ] causes (]) to be cut off from URL + {"http://bar(])", "http://bar"}, // same +} + +func TestAutoURL(t *testing.T) { + for _, tt := range autoURLTests { + url, ok := autoURL(tt.in) + if url != tt.out || ok != (tt.out != "") { + t.Errorf("autoURL(%q) = %q, %v, want %q, %v", tt.in, url, ok, tt.out, tt.out != "") + } + } +} diff --git a/src/go/doc/comment/parse.go b/src/go/doc/comment/parse.go new file mode 100644 index 0000000..62a0f8f --- /dev/null +++ b/src/go/doc/comment/parse.go @@ -0,0 +1,1262 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "sort" + "strings" + "unicode" + "unicode/utf8" +) + +// A Doc is a parsed Go doc comment. +type Doc struct { + // Content is the sequence of content blocks in the comment. + Content []Block + + // Links is the link definitions in the comment. + Links []*LinkDef +} + +// A LinkDef is a single link definition. +type LinkDef struct { + Text string // the link text + URL string // the link URL + Used bool // whether the comment uses the definition +} + +// A Block is block-level content in a doc comment, +// one of [*Code], [*Heading], [*List], or [*Paragraph]. +type Block interface { + block() +} + +// A Heading is a doc comment heading. +type Heading struct { + Text []Text // the heading text +} + +func (*Heading) block() {} + +// A List is a numbered or bullet list. +// Lists are always non-empty: len(Items) > 0. +// In a numbered list, every Items[i].Number is a non-empty string. +// In a bullet list, every Items[i].Number is an empty string. +type List struct { + // Items is the list items. + Items []*ListItem + + // ForceBlankBefore indicates that the list must be + // preceded by a blank line when reformatting the comment, + // overriding the usual conditions. See the BlankBefore method. + // + // The comment parser sets ForceBlankBefore for any list + // that is preceded by a blank line, to make sure + // the blank line is preserved when printing. + ForceBlankBefore bool + + // ForceBlankBetween indicates that list items must be + // separated by blank lines when reformatting the comment, + // overriding the usual conditions. See the BlankBetween method. + // + // The comment parser sets ForceBlankBetween for any list + // that has a blank line between any two of its items, to make sure + // the blank lines are preserved when printing. + ForceBlankBetween bool +} + +func (*List) block() {} + +// BlankBefore reports whether a reformatting of the comment +// should include a blank line before the list. +// The default rule is the same as for [BlankBetween]: +// if the list item content contains any blank lines +// (meaning at least one item has multiple paragraphs) +// then the list itself must be preceded by a blank line. +// A preceding blank line can be forced by setting [List].ForceBlankBefore. +func (l *List) BlankBefore() bool { + return l.ForceBlankBefore || l.BlankBetween() +} + +// BlankBetween reports whether a reformatting of the comment +// should include a blank line between each pair of list items. +// The default rule is that if the list item content contains any blank lines +// (meaning at least one item has multiple paragraphs) +// then list items must themselves be separated by blank lines. +// Blank line separators can be forced by setting [List].ForceBlankBetween. +func (l *List) BlankBetween() bool { + if l.ForceBlankBetween { + return true + } + for _, item := range l.Items { + if len(item.Content) != 1 { + // Unreachable for parsed comments today, + // since the only way to get multiple item.Content + // is multiple paragraphs, which must have been + // separated by a blank line. + return true + } + } + return false +} + +// A ListItem is a single item in a numbered or bullet list. +type ListItem struct { + // Number is a decimal string in a numbered list + // or an empty string in a bullet list. + Number string // "1", "2", ...; "" for bullet list + + // Content is the list content. + // Currently, restrictions in the parser and printer + // require every element of Content to be a *Paragraph. + Content []Block // Content of this item. +} + +// A Paragraph is a paragraph of text. +type Paragraph struct { + Text []Text +} + +func (*Paragraph) block() {} + +// A Code is a preformatted code block. +type Code struct { + // Text is the preformatted text, ending with a newline character. + // It may be multiple lines, each of which ends with a newline character. + // It is never empty, nor does it start or end with a blank line. + Text string +} + +func (*Code) block() {} + +// A Text is text-level content in a doc comment, +// one of [Plain], [Italic], [*Link], or [*DocLink]. +type Text interface { + text() +} + +// A Plain is a string rendered as plain text (not italicized). +type Plain string + +func (Plain) text() {} + +// An Italic is a string rendered as italicized text. +type Italic string + +func (Italic) text() {} + +// A Link is a link to a specific URL. +type Link struct { + Auto bool // is this an automatic (implicit) link of a literal URL? + Text []Text // text of link + URL string // target URL of link +} + +func (*Link) text() {} + +// A DocLink is a link to documentation for a Go package or symbol. +type DocLink struct { + Text []Text // text of link + + // ImportPath, Recv, and Name identify the Go package or symbol + // that is the link target. The potential combinations of + // non-empty fields are: + // - ImportPath: a link to another package + // - ImportPath, Name: a link to a const, func, type, or var in another package + // - ImportPath, Recv, Name: a link to a method in another package + // - Name: a link to a const, func, type, or var in this package + // - Recv, Name: a link to a method in this package + ImportPath string // import path + Recv string // receiver type, without any pointer star, for methods + Name string // const, func, type, var, or method name +} + +func (*DocLink) text() {} + +// A Parser is a doc comment parser. +// The fields in the struct can be filled in before calling Parse +// in order to customize the details of the parsing process. +type Parser struct { + // Words is a map of Go identifier words that + // should be italicized and potentially linked. + // If Words[w] is the empty string, then the word w + // is only italicized. Otherwise it is linked, using + // Words[w] as the link target. + // Words corresponds to the [go/doc.ToHTML] words parameter. + Words map[string]string + + // LookupPackage resolves a package name to an import path. + // + // If LookupPackage(name) returns ok == true, then [name] + // (or [name.Sym] or [name.Sym.Method]) + // is considered a documentation link to importPath's package docs. + // It is valid to return "", true, in which case name is considered + // to refer to the current package. + // + // If LookupPackage(name) returns ok == false, + // then [name] (or [name.Sym] or [name.Sym.Method]) + // will not be considered a documentation link, + // except in the case where name is the full (but single-element) import path + // of a package in the standard library, such as in [math] or [io.Reader]. + // LookupPackage is still called for such names, + // in order to permit references to imports of other packages + // with the same package names. + // + // Setting LookupPackage to nil is equivalent to setting it to + // a function that always returns "", false. + LookupPackage func(name string) (importPath string, ok bool) + + // LookupSym reports whether a symbol name or method name + // exists in the current package. + // + // If LookupSym("", "Name") returns true, then [Name] + // is considered a documentation link for a const, func, type, or var. + // + // Similarly, if LookupSym("Recv", "Name") returns true, + // then [Recv.Name] is considered a documentation link for + // type Recv's method Name. + // + // Setting LookupSym to nil is equivalent to setting it to a function + // that always returns false. + LookupSym func(recv, name string) (ok bool) +} + +// parseDoc is parsing state for a single doc comment. +type parseDoc struct { + *Parser + *Doc + links map[string]*LinkDef + lines []string + lookupSym func(recv, name string) bool +} + +// lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv]. +// If pkg has a slash, it is assumed to be the full import path and is returned with ok = true. +// +// Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand"). +// d.LookupPackage provides a way for the caller to allow resolving such names with reference +// to the imports in the surrounding package. +// +// There is one collision between these two cases: single-element standard library names +// like "math" are full import paths but don't contain slashes. We let d.LookupPackage have +// the first chance to resolve it, in case there's a different package imported as math, +// and otherwise we refer to a built-in list of single-element standard library package names. +func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) { + if strings.Contains(pkg, "/") { // assume a full import path + if validImportPath(pkg) { + return pkg, true + } + return "", false + } + if d.LookupPackage != nil { + // Give LookupPackage a chance. + if path, ok := d.LookupPackage(pkg); ok { + return path, true + } + } + return DefaultLookupPackage(pkg) +} + +func isStdPkg(path string) bool { + // TODO(rsc): Use sort.Find once we don't have to worry about + // copying this code into older Go environments. + i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path }) + return i < len(stdPkgs) && stdPkgs[i] == path +} + +// DefaultLookupPackage is the default package lookup +// function, used when [Parser].LookupPackage is nil. +// It recognizes names of the packages from the standard +// library with single-element import paths, such as math, +// which would otherwise be impossible to name. +// +// Note that the go/doc package provides a more sophisticated +// lookup based on the imports used in the current package. +func DefaultLookupPackage(name string) (importPath string, ok bool) { + if isStdPkg(name) { + return name, true + } + return "", false +} + +// Parse parses the doc comment text and returns the *Doc form. +// Comment markers (/* // and */) in the text must have already been removed. +func (p *Parser) Parse(text string) *Doc { + lines := unindent(strings.Split(text, "\n")) + d := &parseDoc{ + Parser: p, + Doc: new(Doc), + links: make(map[string]*LinkDef), + lines: lines, + lookupSym: func(recv, name string) bool { return false }, + } + if p.LookupSym != nil { + d.lookupSym = p.LookupSym + } + + // First pass: break into block structure and collect known links. + // The text is all recorded as Plain for now. + var prev span + for _, s := range parseSpans(lines) { + var b Block + switch s.kind { + default: + panic("go/doc/comment: internal error: unknown span kind") + case spanList: + b = d.list(lines[s.start:s.end], prev.end < s.start) + case spanCode: + b = d.code(lines[s.start:s.end]) + case spanOldHeading: + b = d.oldHeading(lines[s.start]) + case spanHeading: + b = d.heading(lines[s.start]) + case spanPara: + b = d.paragraph(lines[s.start:s.end]) + } + if b != nil { + d.Content = append(d.Content, b) + } + prev = s + } + + // Second pass: interpret all the Plain text now that we know the links. + for _, b := range d.Content { + switch b := b.(type) { + case *Paragraph: + b.Text = d.parseLinkedText(string(b.Text[0].(Plain))) + case *List: + for _, i := range b.Items { + for _, c := range i.Content { + p := c.(*Paragraph) + p.Text = d.parseLinkedText(string(p.Text[0].(Plain))) + } + } + } + } + + return d.Doc +} + +// A span represents a single span of comment lines (lines[start:end]) +// of an identified kind (code, heading, paragraph, and so on). +type span struct { + start int + end int + kind spanKind +} + +// A spanKind describes the kind of span. +type spanKind int + +const ( + _ spanKind = iota + spanCode + spanHeading + spanList + spanOldHeading + spanPara +) + +func parseSpans(lines []string) []span { + var spans []span + + // The loop may process a line twice: once as unindented + // and again forced indented. So the maximum expected + // number of iterations is 2*len(lines). The repeating logic + // can be subtle, though, and to protect against introduction + // of infinite loops in future changes, we watch to see that + // we are not looping too much. A panic is better than a + // quiet infinite loop. + watchdog := 2 * len(lines) + + i := 0 + forceIndent := 0 +Spans: + for { + // Skip blank lines. + for i < len(lines) && lines[i] == "" { + i++ + } + if i >= len(lines) { + break + } + if watchdog--; watchdog < 0 { + panic("go/doc/comment: internal error: not making progress") + } + + var kind spanKind + start := i + end := i + if i < forceIndent || indented(lines[i]) { + // Indented (or force indented). + // Ends before next unindented. (Blank lines are OK.) + // If this is an unindented list that we are heuristically treating as indented, + // then accept unindented list item lines up to the first blank lines. + // The heuristic is disabled at blank lines to contain its effect + // to non-gofmt'ed sections of the comment. + unindentedListOK := isList(lines[i]) && i < forceIndent + i++ + for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) { + if lines[i] == "" { + unindentedListOK = false + } + i++ + } + + // Drop trailing blank lines. + end = i + for end > start && lines[end-1] == "" { + end-- + } + + // If indented lines are followed (without a blank line) + // by an unindented line ending in a brace, + // take that one line too. This fixes the common mistake + // of pasting in something like + // + // func main() { + // fmt.Println("hello, world") + // } + // + // and forgetting to indent it. + // The heuristic will never trigger on a gofmt'ed comment, + // because any gofmt'ed code block or list would be + // followed by a blank line or end of comment. + if end < len(lines) && strings.HasPrefix(lines[end], "}") { + end++ + } + + if isList(lines[start]) { + kind = spanList + } else { + kind = spanCode + } + } else { + // Unindented. Ends at next blank or indented line. + i++ + for i < len(lines) && lines[i] != "" && !indented(lines[i]) { + i++ + } + end = i + + // If unindented lines are followed (without a blank line) + // by an indented line that would start a code block, + // check whether the final unindented lines + // should be left for the indented section. + // This can happen for the common mistakes of + // unindented code or unindented lists. + // The heuristic will never trigger on a gofmt'ed comment, + // because any gofmt'ed code block would have a blank line + // preceding it after the unindented lines. + if i < len(lines) && lines[i] != "" && !isList(lines[i]) { + switch { + case isList(lines[i-1]): + // If the final unindented line looks like a list item, + // this may be the first indented line wrap of + // a mistakenly unindented list. + // Leave all the unindented list items. + forceIndent = end + end-- + for end > start && isList(lines[end-1]) { + end-- + } + + case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`): + // If the final unindented line ended in { or \ + // it is probably the start of a misindented code block. + // Give the user a single line fix. + // Often that's enough; if not, the user can fix the others themselves. + forceIndent = end + end-- + } + + if start == end && forceIndent > start { + i = start + continue Spans + } + } + + // Span is either paragraph or heading. + if end-start == 1 && isHeading(lines[start]) { + kind = spanHeading + } else if end-start == 1 && isOldHeading(lines[start], lines, start) { + kind = spanOldHeading + } else { + kind = spanPara + } + } + + spans = append(spans, span{start, end, kind}) + i = end + } + + return spans +} + +// indented reports whether line is indented +// (starts with a leading space or tab). +func indented(line string) bool { + return line != "" && (line[0] == ' ' || line[0] == '\t') +} + +// unindent removes any common space/tab prefix +// from each line in lines, returning a copy of lines in which +// those prefixes have been trimmed from each line. +// It also replaces any lines containing only spaces with blank lines (empty strings). +func unindent(lines []string) []string { + // Trim leading and trailing blank lines. + for len(lines) > 0 && isBlank(lines[0]) { + lines = lines[1:] + } + for len(lines) > 0 && isBlank(lines[len(lines)-1]) { + lines = lines[:len(lines)-1] + } + if len(lines) == 0 { + return nil + } + + // Compute and remove common indentation. + prefix := leadingSpace(lines[0]) + for _, line := range lines[1:] { + if !isBlank(line) { + prefix = commonPrefix(prefix, leadingSpace(line)) + } + } + + out := make([]string, len(lines)) + for i, line := range lines { + line = strings.TrimPrefix(line, prefix) + if strings.TrimSpace(line) == "" { + line = "" + } + out[i] = line + } + for len(out) > 0 && out[0] == "" { + out = out[1:] + } + for len(out) > 0 && out[len(out)-1] == "" { + out = out[:len(out)-1] + } + return out +} + +// isBlank reports whether s is a blank line. +func isBlank(s string) bool { + return len(s) == 0 || (len(s) == 1 && s[0] == '\n') +} + +// commonPrefix returns the longest common prefix of a and b. +func commonPrefix(a, b string) string { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] { + i++ + } + return a[0:i] +} + +// leadingSpace returns the longest prefix of s consisting of spaces and tabs. +func leadingSpace(s string) string { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return s[:i] +} + +// isOldHeading reports whether line is an old-style section heading. +// line is all[off]. +func isOldHeading(line string, all []string, off int) bool { + if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" { + return false + } + + line = strings.TrimSpace(line) + + // a heading must start with an uppercase letter + r, _ := utf8.DecodeRuneInString(line) + if !unicode.IsLetter(r) || !unicode.IsUpper(r) { + return false + } + + // it must end in a letter or digit: + r, _ = utf8.DecodeLastRuneInString(line) + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return false + } + + // exclude lines with illegal characters. we allow "()," + if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { + return false + } + + // allow "'" for possessive "'s" only + for b := line; ; { + var ok bool + if _, b, ok = strings.Cut(b, "'"); !ok { + break + } + if b != "s" && !strings.HasPrefix(b, "s ") { + return false // ' not followed by s and then end-of-word + } + } + + // allow "." when followed by non-space + for b := line; ; { + var ok bool + if _, b, ok = strings.Cut(b, "."); !ok { + break + } + if b == "" || strings.HasPrefix(b, " ") { + return false // not followed by non-space + } + } + + return true +} + +// oldHeading returns the *Heading for the given old-style section heading line. +func (d *parseDoc) oldHeading(line string) Block { + return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}} +} + +// isHeading reports whether line is a new-style section heading. +func isHeading(line string) bool { + return len(line) >= 2 && + line[0] == '#' && + (line[1] == ' ' || line[1] == '\t') && + strings.TrimSpace(line) != "#" +} + +// heading returns the *Heading for the given new-style section heading line. +func (d *parseDoc) heading(line string) Block { + return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}} +} + +// code returns a code block built from the lines. +func (d *parseDoc) code(lines []string) *Code { + body := unindent(lines) + body = append(body, "") // to get final \n from Join + return &Code{Text: strings.Join(body, "\n")} +} + +// paragraph returns a paragraph block built from the lines. +// If the lines are link definitions, paragraph adds them to d and returns nil. +func (d *parseDoc) paragraph(lines []string) Block { + // Is this a block of known links? Handle. + var defs []*LinkDef + for _, line := range lines { + def, ok := parseLink(line) + if !ok { + goto NoDefs + } + defs = append(defs, def) + } + for _, def := range defs { + d.Links = append(d.Links, def) + if d.links[def.Text] == nil { + d.links[def.Text] = def + } + } + return nil +NoDefs: + + return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}} +} + +// parseLink parses a single link definition line: +// +// [text]: url +// +// It returns the link definition and whether the line was well formed. +func parseLink(line string) (*LinkDef, bool) { + if line == "" || line[0] != '[' { + return nil, false + } + i := strings.Index(line, "]:") + if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') { + return nil, false + } + + text := line[1:i] + url := strings.TrimSpace(line[i+3:]) + j := strings.Index(url, "://") + if j < 0 || !isScheme(url[:j]) { + return nil, false + } + + // Line has right form and has valid scheme://. + // That's good enough for us - we are not as picky + // about the characters beyond the :// as we are + // when extracting inline URLs from text. + return &LinkDef{Text: text, URL: url}, true +} + +// list returns a list built from the indented lines, +// using forceBlankBefore as the value of the List's ForceBlankBefore field. +func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List { + num, _, _ := listMarker(lines[0]) + var ( + list *List = &List{ForceBlankBefore: forceBlankBefore} + item *ListItem + text []string + ) + flush := func() { + if item != nil { + if para := d.paragraph(text); para != nil { + item.Content = append(item.Content, para) + } + } + text = nil + } + + for _, line := range lines { + if n, after, ok := listMarker(line); ok && (n != "") == (num != "") { + // start new list item + flush() + + item = &ListItem{Number: n} + list.Items = append(list.Items, item) + line = after + } + line = strings.TrimSpace(line) + if line == "" { + list.ForceBlankBetween = true + flush() + continue + } + text = append(text, strings.TrimSpace(line)) + } + flush() + return list +} + +// listMarker parses the line as beginning with a list marker. +// If it can do that, it returns the numeric marker ("" for a bullet list), +// the rest of the line, and ok == true. +// Otherwise, it returns "", "", false. +func listMarker(line string) (num, rest string, ok bool) { + line = strings.TrimSpace(line) + if line == "" { + return "", "", false + } + + // Can we find a marker? + if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' { + num, rest = "", line[n:] + } else if '0' <= line[0] && line[0] <= '9' { + n := 1 + for n < len(line) && '0' <= line[n] && line[n] <= '9' { + n++ + } + if n >= len(line) || (line[n] != '.' && line[n] != ')') { + return "", "", false + } + num, rest = line[:n], line[n+1:] + } else { + return "", "", false + } + + if !indented(rest) || strings.TrimSpace(rest) == "" { + return "", "", false + } + + return num, rest, true +} + +// isList reports whether the line is the first line of a list, +// meaning starts with a list marker after any indentation. +// (The caller is responsible for checking the line is indented, as appropriate.) +func isList(line string) bool { + _, _, ok := listMarker(line) + return ok +} + +// parseLinkedText parses text that is allowed to contain explicit links, +// such as [math.Sin] or [Go home page], into a slice of Text items. +// +// A “pkg” is only assumed to be a full import path if it starts with +// a domain name (a path element with a dot) or is one of the packages +// from the standard library (“[os]”, “[encoding/json]”, and so on). +// To avoid problems with maps, generics, and array types, doc links +// must be both preceded and followed by punctuation, spaces, tabs, +// or the start or end of a line. An example problem would be treating +// map[ast.Expr]TypeAndValue as containing a link. +func (d *parseDoc) parseLinkedText(text string) []Text { + var out []Text + wrote := 0 + flush := func(i int) { + if wrote < i { + out = d.parseText(out, text[wrote:i], true) + wrote = i + } + } + + start := -1 + var buf []byte + for i := 0; i < len(text); i++ { + c := text[i] + if c == '\n' || c == '\t' { + c = ' ' + } + switch c { + case '[': + start = i + case ']': + if start >= 0 { + if def, ok := d.links[string(buf)]; ok { + def.Used = true + flush(start) + out = append(out, &Link{ + Text: d.parseText(nil, text[start+1:i], false), + URL: def.URL, + }) + wrote = i + 1 + } else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok { + flush(start) + link.Text = d.parseText(nil, text[start+1:i], false) + out = append(out, link) + wrote = i + 1 + } + } + start = -1 + buf = buf[:0] + } + if start >= 0 && i != start { + buf = append(buf, c) + } + } + + flush(len(text)) + return out +} + +// docLink parses text, which was found inside [ ] brackets, +// as a doc link if possible, returning the DocLink and ok == true +// or else nil, false. +// The before and after strings are the text before the [ and after the ] +// on the same line. Doc links must be preceded and followed by +// punctuation, spaces, tabs, or the start or end of a line. +func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) { + if before != "" { + r, _ := utf8.DecodeLastRuneInString(before) + if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { + return nil, false + } + } + if after != "" { + r, _ := utf8.DecodeRuneInString(after) + if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { + return nil, false + } + } + text = strings.TrimPrefix(text, "*") + pkg, name, ok := splitDocName(text) + var recv string + if ok { + pkg, recv, _ = splitDocName(pkg) + } + if pkg != "" { + if pkg, ok = d.lookupPkg(pkg); !ok { + return nil, false + } + } else { + if ok = d.lookupSym(recv, name); !ok { + return nil, false + } + } + link = &DocLink{ + ImportPath: pkg, + Recv: recv, + Name: name, + } + return link, true +} + +// If text is of the form before.Name, where Name is a capitalized Go identifier, +// then splitDocName returns before, name, true. +// Otherwise it returns text, "", false. +func splitDocName(text string) (before, name string, foundDot bool) { + i := strings.LastIndex(text, ".") + name = text[i+1:] + if !isName(name) { + return text, "", false + } + if i >= 0 { + before = text[:i] + } + return before, name, true +} + +// parseText parses s as text and returns the result of appending +// those parsed Text elements to out. +// parseText does not handle explicit links like [math.Sin] or [Go home page]: +// those are handled by parseLinkedText. +// If autoLink is true, then parseText recognizes URLs and words from d.Words +// and converts those to links as appropriate. +func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text { + var w strings.Builder + wrote := 0 + writeUntil := func(i int) { + w.WriteString(s[wrote:i]) + wrote = i + } + flush := func(i int) { + writeUntil(i) + if w.Len() > 0 { + out = append(out, Plain(w.String())) + w.Reset() + } + } + for i := 0; i < len(s); { + t := s[i:] + if autoLink { + if url, ok := autoURL(t); ok { + flush(i) + // Note: The old comment parser would look up the URL in words + // and replace the target with words[URL] if it was non-empty. + // That would allow creating links that display as one URL but + // when clicked go to a different URL. Not sure what the point + // of that is, so we're not doing that lookup here. + out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url}) + i += len(url) + wrote = i + continue + } + if id, ok := ident(t); ok { + url, italics := d.Words[id] + if !italics { + i += len(id) + continue + } + flush(i) + if url == "" { + out = append(out, Italic(id)) + } else { + out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url}) + } + i += len(id) + wrote = i + continue + } + } + switch { + case strings.HasPrefix(t, "``"): + if len(t) >= 3 && t[2] == '`' { + // Do not convert `` inside ```, in case people are mistakenly writing Markdown. + i += 3 + for i < len(t) && t[i] == '`' { + i++ + } + break + } + writeUntil(i) + w.WriteRune('“') + i += 2 + wrote = i + case strings.HasPrefix(t, "''"): + writeUntil(i) + w.WriteRune('”') + i += 2 + wrote = i + default: + i++ + } + } + flush(len(s)) + return out +} + +// autoURL checks whether s begins with a URL that should be hyperlinked. +// If so, it returns the URL, which is a prefix of s, and ok == true. +// Otherwise it returns "", false. +// The caller should skip over the first len(url) bytes of s +// before further processing. +func autoURL(s string) (url string, ok bool) { + // Find the ://. Fast path to pick off non-URL, + // since we call this at every position in the string. + // The shortest possible URL is ftp://x, 7 bytes. + var i int + switch { + case len(s) < 7: + return "", false + case s[3] == ':': + i = 3 + case s[4] == ':': + i = 4 + case s[5] == ':': + i = 5 + case s[6] == ':': + i = 6 + default: + return "", false + } + if i+3 > len(s) || s[i:i+3] != "://" { + return "", false + } + + // Check valid scheme. + if !isScheme(s[:i]) { + return "", false + } + + // Scan host part. Must have at least one byte, + // and must start and end in non-punctuation. + i += 3 + if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) { + return "", false + } + i++ + end := i + for i < len(s) && isHost(s[i]) { + if !isPunct(s[i]) { + end = i + 1 + } + i++ + } + i = end + + // At this point we are definitely returning a URL (scheme://host). + // We just have to find the longest path we can add to it. + // Heuristics abound. + // We allow parens, braces, and brackets, + // but only if they match (#5043, #22285). + // We allow .,:;?! in the path but not at the end, + // to avoid end-of-sentence punctuation (#18139, #16565). + stk := []byte{} + end = i +Path: + for ; i < len(s); i++ { + if isPunct(s[i]) { + continue + } + if !isPath(s[i]) { + break + } + switch s[i] { + case '(': + stk = append(stk, ')') + case '{': + stk = append(stk, '}') + case '[': + stk = append(stk, ']') + case ')', '}', ']': + if len(stk) == 0 || stk[len(stk)-1] != s[i] { + break Path + } + stk = stk[:len(stk)-1] + } + if len(stk) == 0 { + end = i + 1 + } + } + + return s[:end], true +} + +// isScheme reports whether s is a recognized URL scheme. +// Note that if strings of new length (beyond 3-7) +// are added here, the fast path at the top of autoURL will need updating. +func isScheme(s string) bool { + switch s { + case "file", + "ftp", + "gopher", + "http", + "https", + "mailto", + "nntp": + return true + } + return false +} + +// isHost reports whether c is a byte that can appear in a URL host, +// like www.example.com or user@[::1]:8080 +func isHost(c byte) bool { + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c > 128, then 1<<c and 1<<(c-64) will both be zero, + // and this function will return false. + const mask = 0 | + (1<<26-1)<<'A' | + (1<<26-1)<<'a' | + (1<<10-1)<<'0' | + 1<<'_' | + 1<<'@' | + 1<<'-' | + 1<<'.' | + 1<<'[' | + 1<<']' | + 1<<':' + + return ((uint64(1)<<c)&(mask&(1<<64-1)) | + (uint64(1)<<(c-64))&(mask>>64)) != 0 +} + +// isPunct reports whether c is a punctuation byte that can appear +// inside a path but not at the end. +func isPunct(c byte) bool { + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c > 128, then 1<<c and 1<<(c-64) will both be zero, + // and this function will return false. + const mask = 0 | + 1<<'.' | + 1<<',' | + 1<<':' | + 1<<';' | + 1<<'?' | + 1<<'!' + + return ((uint64(1)<<c)&(mask&(1<<64-1)) | + (uint64(1)<<(c-64))&(mask>>64)) != 0 +} + +// isPath reports whether c is a (non-punctuation) path byte. +func isPath(c byte) bool { + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c > 128, then 1<<c and 1<<(c-64) will both be zero, + // and this function will return false. + const mask = 0 | + (1<<26-1)<<'A' | + (1<<26-1)<<'a' | + (1<<10-1)<<'0' | + 1<<'$' | + 1<<'\'' | + 1<<'(' | + 1<<')' | + 1<<'*' | + 1<<'+' | + 1<<'&' | + 1<<'#' | + 1<<'=' | + 1<<'@' | + 1<<'~' | + 1<<'_' | + 1<<'/' | + 1<<'-' | + 1<<'[' | + 1<<']' | + 1<<'{' | + 1<<'}' | + 1<<'%' + + return ((uint64(1)<<c)&(mask&(1<<64-1)) | + (uint64(1)<<(c-64))&(mask>>64)) != 0 +} + +// isName reports whether s is a capitalized Go identifier (like Name). +func isName(s string) bool { + t, ok := ident(s) + if !ok || t != s { + return false + } + r, _ := utf8.DecodeRuneInString(s) + return unicode.IsUpper(r) +} + +// ident checks whether s begins with a Go identifier. +// If so, it returns the identifier, which is a prefix of s, and ok == true. +// Otherwise it returns "", false. +// The caller should skip over the first len(id) bytes of s +// before further processing. +func ident(s string) (id string, ok bool) { + // Scan [\pL_][\pL_0-9]* + n := 0 + for n < len(s) { + if c := s[n]; c < utf8.RuneSelf { + if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') { + n++ + continue + } + break + } + r, nr := utf8.DecodeRuneInString(s[n:]) + if unicode.IsLetter(r) { + n += nr + continue + } + break + } + return s[:n], n > 0 +} + +// isIdentASCII reports whether c is an ASCII identifier byte. +func isIdentASCII(c byte) bool { + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c > 128, then 1<<c and 1<<(c-64) will both be zero, + // and this function will return false. + const mask = 0 | + (1<<26-1)<<'A' | + (1<<26-1)<<'a' | + (1<<10-1)<<'0' | + 1<<'_' + + return ((uint64(1)<<c)&(mask&(1<<64-1)) | + (uint64(1)<<(c-64))&(mask>>64)) != 0 +} + +// validImportPath reports whether path is a valid import path. +// It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath. +func validImportPath(path string) bool { + if !utf8.ValidString(path) { + return false + } + if path == "" { + return false + } + if path[0] == '-' { + return false + } + if strings.Contains(path, "//") { + return false + } + if path[len(path)-1] == '/' { + return false + } + elemStart := 0 + for i, r := range path { + if r == '/' { + if !validImportPathElem(path[elemStart:i]) { + return false + } + elemStart = i + 1 + } + } + return validImportPathElem(path[elemStart:]) +} + +func validImportPathElem(elem string) bool { + if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' { + return false + } + for i := 0; i < len(elem); i++ { + if !importPathOK(elem[i]) { + return false + } + } + return true +} + +func importPathOK(c byte) bool { + // mask is a 128-bit bitmap with 1s for allowed bytes, + // so that the byte c can be tested with a shift and an and. + // If c > 128, then 1<<c and 1<<(c-64) will both be zero, + // and this function will return false. + const mask = 0 | + (1<<26-1)<<'A' | + (1<<26-1)<<'a' | + (1<<10-1)<<'0' | + 1<<'-' | + 1<<'.' | + 1<<'~' | + 1<<'_' | + 1<<'+' + + return ((uint64(1)<<c)&(mask&(1<<64-1)) | + (uint64(1)<<(c-64))&(mask>>64)) != 0 +} diff --git a/src/go/doc/comment/parse_test.go b/src/go/doc/comment/parse_test.go new file mode 100644 index 0000000..bce733e --- /dev/null +++ b/src/go/doc/comment/parse_test.go @@ -0,0 +1,12 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import "testing" + +// See https://golang.org/issue/52353 +func Test52353(t *testing.T) { + ident("𫕐ﯯ") +} diff --git a/src/go/doc/comment/print.go b/src/go/doc/comment/print.go new file mode 100644 index 0000000..4e9da3d --- /dev/null +++ b/src/go/doc/comment/print.go @@ -0,0 +1,290 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "bytes" + "fmt" + "strings" +) + +// A Printer is a doc comment printer. +// The fields in the struct can be filled in before calling +// any of the printing methods +// in order to customize the details of the printing process. +type Printer struct { + // HeadingLevel is the nesting level used for + // HTML and Markdown headings. + // If HeadingLevel is zero, it defaults to level 3, + // meaning to use <h3> and ###. + HeadingLevel int + + // HeadingID is a function that computes the heading ID + // (anchor tag) to use for the heading h when generating + // HTML and Markdown. If HeadingID returns an empty string, + // then the heading ID is omitted. + // If HeadingID is nil, h.DefaultID is used. + HeadingID func(h *Heading) string + + // DocLinkURL is a function that computes the URL for the given DocLink. + // If DocLinkURL is nil, then link.DefaultURL(p.DocLinkBaseURL) is used. + DocLinkURL func(link *DocLink) string + + // DocLinkBaseURL is used when DocLinkURL is nil, + // passed to [DocLink.DefaultURL] to construct a DocLink's URL. + // See that method's documentation for details. + DocLinkBaseURL string + + // TextPrefix is a prefix to print at the start of every line + // when generating text output using the Text method. + TextPrefix string + + // TextCodePrefix is the prefix to print at the start of each + // preformatted (code block) line when generating text output, + // instead of (not in addition to) TextPrefix. + // If TextCodePrefix is the empty string, it defaults to TextPrefix+"\t". + TextCodePrefix string + + // TextWidth is the maximum width text line to generate, + // measured in Unicode code points, + // excluding TextPrefix and the newline character. + // If TextWidth is zero, it defaults to 80 minus the number of code points in TextPrefix. + // If TextWidth is negative, there is no limit. + TextWidth int +} + +func (p *Printer) headingLevel() int { + if p.HeadingLevel <= 0 { + return 3 + } + return p.HeadingLevel +} + +func (p *Printer) headingID(h *Heading) string { + if p.HeadingID == nil { + return h.DefaultID() + } + return p.HeadingID(h) +} + +func (p *Printer) docLinkURL(link *DocLink) string { + if p.DocLinkURL != nil { + return p.DocLinkURL(link) + } + return link.DefaultURL(p.DocLinkBaseURL) +} + +// DefaultURL constructs and returns the documentation URL for l, +// using baseURL as a prefix for links to other packages. +// +// The possible forms returned by DefaultURL are: +// - baseURL/ImportPath, for a link to another package +// - baseURL/ImportPath#Name, for a link to a const, func, type, or var in another package +// - baseURL/ImportPath#Recv.Name, for a link to a method in another package +// - #Name, for a link to a const, func, type, or var in this package +// - #Recv.Name, for a link to a method in this package +// +// If baseURL ends in a trailing slash, then DefaultURL inserts +// a slash between ImportPath and # in the anchored forms. +// For example, here are some baseURL values and URLs they can generate: +// +// "/pkg/" → "/pkg/math/#Sqrt" +// "/pkg" → "/pkg/math#Sqrt" +// "/" → "/math/#Sqrt" +// "" → "/math#Sqrt" +func (l *DocLink) DefaultURL(baseURL string) string { + if l.ImportPath != "" { + slash := "" + if strings.HasSuffix(baseURL, "/") { + slash = "/" + } else { + baseURL += "/" + } + switch { + case l.Name == "": + return baseURL + l.ImportPath + slash + case l.Recv != "": + return baseURL + l.ImportPath + slash + "#" + l.Recv + "." + l.Name + default: + return baseURL + l.ImportPath + slash + "#" + l.Name + } + } + if l.Recv != "" { + return "#" + l.Recv + "." + l.Name + } + return "#" + l.Name +} + +// DefaultID returns the default anchor ID for the heading h. +// +// The default anchor ID is constructed by converting every +// rune that is not alphanumeric ASCII to an underscore +// and then adding the prefix “hdr-”. +// For example, if the heading text is “Go Doc Comments”, +// the default ID is “hdr-Go_Doc_Comments”. +func (h *Heading) DefaultID() string { + // Note: The “hdr-” prefix is important to avoid DOM clobbering attacks. + // See https://pkg.go.dev/github.com/google/safehtml#Identifier. + var out strings.Builder + var p textPrinter + p.oneLongLine(&out, h.Text) + s := strings.TrimSpace(out.String()) + if s == "" { + return "" + } + out.Reset() + out.WriteString("hdr-") + for _, r := range s { + if r < 0x80 && isIdentASCII(byte(r)) { + out.WriteByte(byte(r)) + } else { + out.WriteByte('_') + } + } + return out.String() +} + +type commentPrinter struct { + *Printer + headingPrefix string + needDoc map[string]bool +} + +// Comment returns the standard Go formatting of the Doc, +// without any comment markers. +func (p *Printer) Comment(d *Doc) []byte { + cp := &commentPrinter{Printer: p} + var out bytes.Buffer + for i, x := range d.Content { + if i > 0 && blankBefore(x) { + out.WriteString("\n") + } + cp.block(&out, x) + } + + // Print one block containing all the link definitions that were used, + // and then a second block containing all the unused ones. + // This makes it easy to clean up the unused ones: gofmt and + // delete the final block. And it's a nice visual signal without + // affecting the way the comment formats for users. + for i := 0; i < 2; i++ { + used := i == 0 + first := true + for _, def := range d.Links { + if def.Used == used { + if first { + out.WriteString("\n") + first = false + } + out.WriteString("[") + out.WriteString(def.Text) + out.WriteString("]: ") + out.WriteString(def.URL) + out.WriteString("\n") + } + } + } + + return out.Bytes() +} + +// blankBefore reports whether the block x requires a blank line before it. +// All blocks do, except for Lists that return false from x.BlankBefore(). +func blankBefore(x Block) bool { + if x, ok := x.(*List); ok { + return x.BlankBefore() + } + return true +} + +// block prints the block x to out. +func (p *commentPrinter) block(out *bytes.Buffer, x Block) { + switch x := x.(type) { + default: + fmt.Fprintf(out, "?%T", x) + + case *Paragraph: + p.text(out, "", x.Text) + out.WriteString("\n") + + case *Heading: + out.WriteString("# ") + p.text(out, "", x.Text) + out.WriteString("\n") + + case *Code: + md := x.Text + for md != "" { + var line string + line, md, _ = strings.Cut(md, "\n") + if line != "" { + out.WriteString("\t") + out.WriteString(line) + } + out.WriteString("\n") + } + + case *List: + loose := x.BlankBetween() + for i, item := range x.Items { + if i > 0 && loose { + out.WriteString("\n") + } + out.WriteString(" ") + if item.Number == "" { + out.WriteString(" - ") + } else { + out.WriteString(item.Number) + out.WriteString(". ") + } + for i, blk := range item.Content { + const fourSpace = " " + if i > 0 { + out.WriteString("\n" + fourSpace) + } + p.text(out, fourSpace, blk.(*Paragraph).Text) + out.WriteString("\n") + } + } + } +} + +// text prints the text sequence x to out. +func (p *commentPrinter) text(out *bytes.Buffer, indent string, x []Text) { + for _, t := range x { + switch t := t.(type) { + case Plain: + p.indent(out, indent, string(t)) + case Italic: + p.indent(out, indent, string(t)) + case *Link: + if t.Auto { + p.text(out, indent, t.Text) + } else { + out.WriteString("[") + p.text(out, indent, t.Text) + out.WriteString("]") + } + case *DocLink: + out.WriteString("[") + p.text(out, indent, t.Text) + out.WriteString("]") + } + } +} + +// indent prints s to out, indenting with the indent string +// after each newline in s. +func (p *commentPrinter) indent(out *bytes.Buffer, indent, s string) { + for s != "" { + line, rest, ok := strings.Cut(s, "\n") + out.WriteString(line) + if ok { + out.WriteString("\n") + out.WriteString(indent) + } + s = rest + } +} diff --git a/src/go/doc/comment/std.go b/src/go/doc/comment/std.go new file mode 100644 index 0000000..71f15f4 --- /dev/null +++ b/src/go/doc/comment/std.go @@ -0,0 +1,44 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by 'go generate' DO NOT EDIT. +//go:generate ./mkstd.sh + +package comment + +var stdPkgs = []string{ + "bufio", + "bytes", + "context", + "crypto", + "embed", + "encoding", + "errors", + "expvar", + "flag", + "fmt", + "hash", + "html", + "image", + "io", + "log", + "math", + "mime", + "net", + "os", + "path", + "plugin", + "reflect", + "regexp", + "runtime", + "sort", + "strconv", + "strings", + "sync", + "syscall", + "testing", + "time", + "unicode", + "unsafe", +} diff --git a/src/go/doc/comment/std_test.go b/src/go/doc/comment/std_test.go new file mode 100644 index 0000000..89206e6 --- /dev/null +++ b/src/go/doc/comment/std_test.go @@ -0,0 +1,34 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "internal/diff" + "internal/testenv" + "sort" + "strings" + "testing" +) + +func TestStd(t *testing.T) { + out, err := testenv.Command(t, testenv.GoToolPath(t), "list", "std").CombinedOutput() + if err != nil { + t.Fatalf("%v\n%s", err, out) + } + + var list []string + for _, pkg := range strings.Fields(string(out)) { + if !strings.Contains(pkg, "/") { + list = append(list, pkg) + } + } + sort.Strings(list) + + have := strings.Join(stdPkgs, "\n") + "\n" + want := strings.Join(list, "\n") + "\n" + if have != want { + t.Errorf("stdPkgs is out of date: regenerate with 'go generate'\n%s", diff.Diff("stdPkgs", []byte(have), "want", []byte(want))) + } +} diff --git a/src/go/doc/comment/testdata/README.md b/src/go/doc/comment/testdata/README.md new file mode 100644 index 0000000..d6f2c54 --- /dev/null +++ b/src/go/doc/comment/testdata/README.md @@ -0,0 +1,42 @@ +This directory contains test files (*.txt) for the comment parser. + +The files are in [txtar format](https://pkg.go.dev/golang.org/x/tools/txtar). +Consider this example: + + -- input -- + Hello. + -- gofmt -- + Hello. + -- html -- + <p>Hello. + -- markdown -- + Hello. + -- text -- + Hello. + +Each `-- name --` line introduces a new file with the given name. +The file named “input” must be first and contains the input to +[comment.Parser](https://pkg.go.dev/go/doc/comment/#Parser). + +The remaining files contain the expected output for the named format generated by +[comment.Printer](https://pkg.go.dev/go/doc/comment/#Printer): +“gofmt” for Printer.Comment (Go comment format, as used by gofmt), +“html” for Printer.HTML, “markdown” for Printer.Markdown, and “text” for Printer.Text. +The format can also be “dump” for a textual dump of the raw data structures. + +The text before the `-- input --` line, if present, is JSON to be unmarshalled +to initialize a comment.Printer. For example, this test case sets the Printer's +TextWidth field to 20: + + {"TextWidth": 20} + -- input -- + Package gob manages streams of gobs - binary values exchanged between an + Encoder (transmitter) and a Decoder (receiver). + -- text -- + Package gob + manages streams + of gobs - binary + values exchanged + between an Encoder + (transmitter) and a + Decoder (receiver). diff --git a/src/go/doc/comment/testdata/blank.txt b/src/go/doc/comment/testdata/blank.txt new file mode 100644 index 0000000..9049fde --- /dev/null +++ b/src/go/doc/comment/testdata/blank.txt @@ -0,0 +1,12 @@ +-- input -- + $ + Blank line at start and end. + $ +-- gofmt -- +Blank line at start and end. +-- text -- +Blank line at start and end. +-- markdown -- +Blank line at start and end. +-- html -- +<p>Blank line at start and end. diff --git a/src/go/doc/comment/testdata/code.txt b/src/go/doc/comment/testdata/code.txt new file mode 100644 index 0000000..06b1519 --- /dev/null +++ b/src/go/doc/comment/testdata/code.txt @@ -0,0 +1,94 @@ +-- input -- +Text. + A tab-indented + (no, not eight-space indented) + code block and haiku. +More text. + One space + is + enough + to + start + a + block. +More text. + + Blocks + can + + have + blank + lines. +-- gofmt -- +Text. + + A tab-indented + (no, not eight-space indented) + code block and haiku. + +More text. + + One space + is + enough + to + start + a + block. + +More text. + + Blocks + can + + have + blank + lines. +-- markdown -- +Text. + + A tab-indented + (no, not eight-space indented) + code block and haiku. + +More text. + + One space + is + enough + to + start + a + block. + +More text. + + Blocks + can + + have + blank + lines. +-- html -- +<p>Text. +<pre>A tab-indented +(no, not eight-space indented) +code block and haiku. +</pre> +<p>More text. +<pre>One space + is + enough + to + start + a + block. +</pre> +<p>More text. +<pre> Blocks + can + +have + blank + lines. +</pre> diff --git a/src/go/doc/comment/testdata/code2.txt b/src/go/doc/comment/testdata/code2.txt new file mode 100644 index 0000000..0810bed --- /dev/null +++ b/src/go/doc/comment/testdata/code2.txt @@ -0,0 +1,31 @@ +-- input -- +Text. + + A tab-indented + (no, not eight-space indented) + code block and haiku. + +More text. +-- gofmt -- +Text. + + A tab-indented + (no, not eight-space indented) + code block and haiku. + +More text. +-- markdown -- +Text. + + A tab-indented + (no, not eight-space indented) + code block and haiku. + +More text. +-- html -- +<p>Text. +<pre>A tab-indented +(no, not eight-space indented) +code block and haiku. +</pre> +<p>More text. diff --git a/src/go/doc/comment/testdata/code3.txt b/src/go/doc/comment/testdata/code3.txt new file mode 100644 index 0000000..4a96a0e --- /dev/null +++ b/src/go/doc/comment/testdata/code3.txt @@ -0,0 +1,33 @@ +-- input -- +Text. + + $ + A tab-indented + (surrounded by more blank lines) + code block and haiku. + $ + +More text. +-- gofmt -- +Text. + + A tab-indented + (surrounded by more blank lines) + code block and haiku. + +More text. +-- markdown -- +Text. + + A tab-indented + (surrounded by more blank lines) + code block and haiku. + +More text. +-- html -- +<p>Text. +<pre>A tab-indented +(surrounded by more blank lines) +code block and haiku. +</pre> +<p>More text. diff --git a/src/go/doc/comment/testdata/code4.txt b/src/go/doc/comment/testdata/code4.txt new file mode 100644 index 0000000..f128c9a --- /dev/null +++ b/src/go/doc/comment/testdata/code4.txt @@ -0,0 +1,38 @@ +-- input -- +To test, run this command: + go test -more + +Or, to test specific things, run this command: + +go test -more \ + -pkg first/package \ + -pkg second/package \ + -pkg third/package + +Happy testing! +-- gofmt -- +To test, run this command: + + go test -more + +Or, to test specific things, run this command: + + go test -more \ + -pkg first/package \ + -pkg second/package \ + -pkg third/package + +Happy testing! +-- markdown -- +To test, run this command: + + go test -more + +Or, to test specific things, run this command: + + go test -more \ + -pkg first/package \ + -pkg second/package \ + -pkg third/package + +Happy testing! diff --git a/src/go/doc/comment/testdata/code5.txt b/src/go/doc/comment/testdata/code5.txt new file mode 100644 index 0000000..0e340dd --- /dev/null +++ b/src/go/doc/comment/testdata/code5.txt @@ -0,0 +1,21 @@ +-- input -- +L1 +L2 +L3 +L4 +L5 +- L6 { + L7 +} +L8 +-- gofmt -- +L1 +L2 +L3 +L4 +L5 + - L6 { + L7 + } + +L8 diff --git a/src/go/doc/comment/testdata/code6.txt b/src/go/doc/comment/testdata/code6.txt new file mode 100644 index 0000000..d2915d1 --- /dev/null +++ b/src/go/doc/comment/testdata/code6.txt @@ -0,0 +1,24 @@ +-- input -- +Run this program: + +func main() { + fmt.Println("hello, world") +} + +Or this: + +go func() { + fmt.Println("hello, world") +}() +-- gofmt -- +Run this program: + + func main() { + fmt.Println("hello, world") + } + +Or this: + + go func() { + fmt.Println("hello, world") + }() diff --git a/src/go/doc/comment/testdata/crash1.txt b/src/go/doc/comment/testdata/crash1.txt new file mode 100644 index 0000000..6bb2f6f --- /dev/null +++ b/src/go/doc/comment/testdata/crash1.txt @@ -0,0 +1,16 @@ +-- input -- +[] + +[]: http:// +-- gofmt -- +[] + +[]: http:// +-- html -- +<p><a href="http://"></a> +-- markdown -- +[](http://) +-- text -- + + +[]: http:// diff --git a/src/go/doc/comment/testdata/doclink.txt b/src/go/doc/comment/testdata/doclink.txt new file mode 100644 index 0000000..a932347 --- /dev/null +++ b/src/go/doc/comment/testdata/doclink.txt @@ -0,0 +1,21 @@ +-- input -- +In this package, see [Doc] and [Parser.Parse]. +There is no [Undef] or [Undef.Method]. +See also the [comment] package, +especially [comment.Doc] and [comment.Parser.Parse]. +-- gofmt -- +In this package, see [Doc] and [Parser.Parse]. +There is no [Undef] or [Undef.Method]. +See also the [comment] package, +especially [comment.Doc] and [comment.Parser.Parse]. +-- text -- +In this package, see Doc and Parser.Parse. There is no [Undef] or +[Undef.Method]. See also the comment package, especially comment.Doc and +comment.Parser.Parse. +-- markdown -- +In this package, see [Doc](#Doc) and [Parser.Parse](#Parser.Parse). There is no \[Undef] or \[Undef.Method]. See also the [comment](/go/doc/comment) package, especially [comment.Doc](/go/doc/comment#Doc) and [comment.Parser.Parse](/go/doc/comment#Parser.Parse). +-- html -- +<p>In this package, see <a href="#Doc">Doc</a> and <a href="#Parser.Parse">Parser.Parse</a>. +There is no [Undef] or [Undef.Method]. +See also the <a href="/go/doc/comment">comment</a> package, +especially <a href="/go/doc/comment#Doc">comment.Doc</a> and <a href="/go/doc/comment#Parser.Parse">comment.Parser.Parse</a>. diff --git a/src/go/doc/comment/testdata/doclink2.txt b/src/go/doc/comment/testdata/doclink2.txt new file mode 100644 index 0000000..ecd8e4e --- /dev/null +++ b/src/go/doc/comment/testdata/doclink2.txt @@ -0,0 +1,8 @@ +-- input -- +We use [io.Reader] a lot, and also a few map[io.Reader]string. + +Never [io.Reader]int or Slice[io.Reader] though. +-- markdown -- +We use [io.Reader](/io#Reader) a lot, and also a few map\[io.Reader]string. + +Never \[io.Reader]int or Slice\[io.Reader] though. diff --git a/src/go/doc/comment/testdata/doclink3.txt b/src/go/doc/comment/testdata/doclink3.txt new file mode 100644 index 0000000..0ccfb3d --- /dev/null +++ b/src/go/doc/comment/testdata/doclink3.txt @@ -0,0 +1,8 @@ +-- input -- +[encoding/json.Marshal] is a doc link. + +[rot13.Marshal] is not. +-- markdown -- +[encoding/json.Marshal](/encoding/json#Marshal) is a doc link. + +\[rot13.Marshal] is not. diff --git a/src/go/doc/comment/testdata/doclink4.txt b/src/go/doc/comment/testdata/doclink4.txt new file mode 100644 index 0000000..c709527 --- /dev/null +++ b/src/go/doc/comment/testdata/doclink4.txt @@ -0,0 +1,7 @@ +-- input -- +[io] at start of comment. +[io] at start of line. +At end of line: [io] +At end of comment: [io] +-- markdown -- +[io](/io) at start of comment. [io](/io) at start of line. At end of line: [io](/io) At end of comment: [io](/io) diff --git a/src/go/doc/comment/testdata/doclink5.txt b/src/go/doc/comment/testdata/doclink5.txt new file mode 100644 index 0000000..ac7b3ae --- /dev/null +++ b/src/go/doc/comment/testdata/doclink5.txt @@ -0,0 +1,5 @@ +{"DocLinkBaseURL": "https://pkg.go.dev"} +-- input -- +[encoding/json.Marshal] is a doc link. +-- markdown -- +[encoding/json.Marshal](https://pkg.go.dev/encoding/json#Marshal) is a doc link. diff --git a/src/go/doc/comment/testdata/doclink6.txt b/src/go/doc/comment/testdata/doclink6.txt new file mode 100644 index 0000000..1acd03b --- /dev/null +++ b/src/go/doc/comment/testdata/doclink6.txt @@ -0,0 +1,5 @@ +{"DocLinkBaseURL": "https://go.dev/pkg/"} +-- input -- +[encoding/json.Marshal] is a doc link, and so is [rsc.io/quote.NonExist]. +-- markdown -- +[encoding/json.Marshal](https://go.dev/pkg/encoding/json/#Marshal) is a doc link, and so is [rsc.io/quote.NonExist](https://go.dev/pkg/rsc.io/quote/#NonExist). diff --git a/src/go/doc/comment/testdata/doclink7.txt b/src/go/doc/comment/testdata/doclink7.txt new file mode 100644 index 0000000..d34979a --- /dev/null +++ b/src/go/doc/comment/testdata/doclink7.txt @@ -0,0 +1,4 @@ +-- input -- +You see more [*bytes.Buffer] than [bytes.Buffer]. +-- markdown -- +You see more [\*bytes.Buffer](/bytes#Buffer) than [bytes.Buffer](/bytes#Buffer). diff --git a/src/go/doc/comment/testdata/escape.txt b/src/go/doc/comment/testdata/escape.txt new file mode 100644 index 0000000..f54663f --- /dev/null +++ b/src/go/doc/comment/testdata/escape.txt @@ -0,0 +1,55 @@ +-- input -- +What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>? + ++ Line + +- Line + +* Line + +999. Line + +## Line +-- gofmt -- +What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>? + ++ Line + +- Line + +* Line + +999. Line + +## Line +-- text -- +What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>? + ++ Line + +- Line + +* Line + +999. Line + +## Line +-- markdown -- +What the ~!@#$%^&\*()\_+-=\`{}|\[]\\:";',./\<>? + +\+ Line + +\- Line + +\* Line + +999\. Line + +\## Line +-- html -- +<p>What the ~!@#$%^&*()_+-=`{}|[]\:";',./<>? +<p>+ Line +<p>- Line +<p>* Line +<p>999. Line +<p>## Line diff --git a/src/go/doc/comment/testdata/head.txt b/src/go/doc/comment/testdata/head.txt new file mode 100644 index 0000000..b99a8c5 --- /dev/null +++ b/src/go/doc/comment/testdata/head.txt @@ -0,0 +1,92 @@ +-- input -- +Some text. + +An Old Heading + +Not An Old Heading. + +And some text. + +# A New Heading. + +And some more text. + +# Not a heading, +because text follows it. + +Because text precedes it, +# not a heading. + +## Not a heading either. + +-- gofmt -- +Some text. + +# An Old Heading + +Not An Old Heading. + +And some text. + +# A New Heading. + +And some more text. + +# Not a heading, +because text follows it. + +Because text precedes it, +# not a heading. + +## Not a heading either. + +-- text -- +Some text. + +# An Old Heading + +Not An Old Heading. + +And some text. + +# A New Heading. + +And some more text. + +# Not a heading, because text follows it. + +Because text precedes it, # not a heading. + +## Not a heading either. + +-- markdown -- +Some text. + +### An Old Heading {#hdr-An_Old_Heading} + +Not An Old Heading. + +And some text. + +### A New Heading. {#hdr-A_New_Heading_} + +And some more text. + +\# Not a heading, because text follows it. + +Because text precedes it, # not a heading. + +\## Not a heading either. + +-- html -- +<p>Some text. +<h3 id="hdr-An_Old_Heading">An Old Heading</h3> +<p>Not An Old Heading. +<p>And some text. +<h3 id="hdr-A_New_Heading_">A New Heading.</h3> +<p>And some more text. +<p># Not a heading, +because text follows it. +<p>Because text precedes it, +# not a heading. +<p>## Not a heading either. diff --git a/src/go/doc/comment/testdata/head2.txt b/src/go/doc/comment/testdata/head2.txt new file mode 100644 index 0000000..d357632 --- /dev/null +++ b/src/go/doc/comment/testdata/head2.txt @@ -0,0 +1,36 @@ +-- input -- +✦ + +Almost a+heading + +✦ + +Don't be a heading + +✦ + +A.b is a heading + +✦ + +A. b is not a heading + +✦ +-- gofmt -- +✦ + +Almost a+heading + +✦ + +Don't be a heading + +✦ + +# A.b is a heading + +✦ + +A. b is not a heading + +✦ diff --git a/src/go/doc/comment/testdata/head3.txt b/src/go/doc/comment/testdata/head3.txt new file mode 100644 index 0000000..dbb7cb3 --- /dev/null +++ b/src/go/doc/comment/testdata/head3.txt @@ -0,0 +1,7 @@ +{"HeadingLevel": 5} +-- input -- +# Heading +-- markdown -- +##### Heading {#hdr-Heading} +-- html -- +<h5 id="hdr-Heading">Heading</h5> diff --git a/src/go/doc/comment/testdata/hello.txt b/src/go/doc/comment/testdata/hello.txt new file mode 100644 index 0000000..fb07f1e --- /dev/null +++ b/src/go/doc/comment/testdata/hello.txt @@ -0,0 +1,35 @@ +-- input -- + Hello, + world + + This is + a test. +-- dump -- +Doc + Paragraph + Plain + "Hello,\n" + "world" + Paragraph + Plain + "This is\n" + "a test." +-- gofmt -- +Hello, +world + +This is +a test. +-- html -- +<p>Hello, +world +<p>This is +a test. +-- markdown -- +Hello, world + +This is a test. +-- text -- +Hello, world + +This is a test. diff --git a/src/go/doc/comment/testdata/link.txt b/src/go/doc/comment/testdata/link.txt new file mode 100644 index 0000000..551e306 --- /dev/null +++ b/src/go/doc/comment/testdata/link.txt @@ -0,0 +1,17 @@ +-- input -- +The Go home page is https://go.dev/. +It used to be https://golang.org. + +-- gofmt -- +The Go home page is https://go.dev/. +It used to be https://golang.org. + +-- text -- +The Go home page is https://go.dev/. It used to be https://golang.org. + +-- markdown -- +The Go home page is [https://go.dev/](https://go.dev/). It used to be [https://golang.org](https://golang.org). + +-- html -- +<p>The Go home page is <a href="https://go.dev/">https://go.dev/</a>. +It used to be <a href="https://golang.org">https://golang.org</a>. diff --git a/src/go/doc/comment/testdata/link2.txt b/src/go/doc/comment/testdata/link2.txt new file mode 100644 index 0000000..8637a32 --- /dev/null +++ b/src/go/doc/comment/testdata/link2.txt @@ -0,0 +1,31 @@ +-- input -- +The Go home page is https://go.dev/. +It used to be https://golang.org. +https:// is not a link. +Nor is https:// +https://☺ is not a link. +https://:80 is not a link. + +-- gofmt -- +The Go home page is https://go.dev/. +It used to be https://golang.org. +https:// is not a link. +Nor is https:// +https://☺ is not a link. +https://:80 is not a link. + +-- text -- +The Go home page is https://go.dev/. It used to be https://golang.org. https:// +is not a link. Nor is https:// https://☺ is not a link. https://:80 is not a +link. + +-- markdown -- +The Go home page is [https://go.dev/](https://go.dev/). It used to be [https://golang.org](https://golang.org). https:// is not a link. Nor is https:// https://☺ is not a link. https://:80 is not a link. + +-- html -- +<p>The Go home page is <a href="https://go.dev/">https://go.dev/</a>. +It used to be <a href="https://golang.org">https://golang.org</a>. +https:// is not a link. +Nor is https:// +https://☺ is not a link. +https://:80 is not a link. diff --git a/src/go/doc/comment/testdata/link3.txt b/src/go/doc/comment/testdata/link3.txt new file mode 100644 index 0000000..5a115b5 --- /dev/null +++ b/src/go/doc/comment/testdata/link3.txt @@ -0,0 +1,14 @@ +-- input -- +Doc text. + +[Go home page]: https://go.dev +-- gofmt -- +Doc text. + +[Go home page]: https://go.dev +-- text -- +Doc text. +-- markdown -- +Doc text. +-- html -- +<p>Doc text. diff --git a/src/go/doc/comment/testdata/link4.txt b/src/go/doc/comment/testdata/link4.txt new file mode 100644 index 0000000..75f194c --- /dev/null +++ b/src/go/doc/comment/testdata/link4.txt @@ -0,0 +1,77 @@ +-- input -- +These are not links. + +[x + +[x]: + +[x]:https://go.dev + +[x]https://go.dev + +[x]: surprise://go.dev + +[x]: surprise! + +But this is, with a tab (although it's unused). + +[z]: https://go.dev +-- gofmt -- +These are not links. + +[x + +[x]: + +[x]:https://go.dev + +[x]https://go.dev + +[x]: surprise://go.dev + +[x]: surprise! + +But this is, with a tab (although it's unused). + +[z]: https://go.dev +-- text -- +These are not links. + +[x + +[x]: + +[x]:https://go.dev + +[x]https://go.dev + +[x]: surprise://go.dev + +[x]: surprise! + +But this is, with a tab (although it's unused). +-- markdown -- +These are not links. + +\[x + +\[x]: + +\[x]:[https://go.dev](https://go.dev) + +\[x][https://go.dev](https://go.dev) + +\[x]: surprise://go.dev + +\[x]: surprise! + +But this is, with a tab (although it's unused). +-- html -- +<p>These are not links. +<p>[x +<p>[x]: +<p>[x]:<a href="https://go.dev">https://go.dev</a> +<p>[x]<a href="https://go.dev">https://go.dev</a> +<p>[x]: surprise://go.dev +<p>[x]: surprise! +<p>But this is, with a tab (although it's unused). diff --git a/src/go/doc/comment/testdata/link5.txt b/src/go/doc/comment/testdata/link5.txt new file mode 100644 index 0000000..b4fb588 --- /dev/null +++ b/src/go/doc/comment/testdata/link5.txt @@ -0,0 +1,36 @@ +-- input -- +See the [Go home page] and the [pkg +site]. + +[Go home page]: https://go.dev/ +[pkg site]: https://pkg.go.dev +[Go home page]: https://duplicate.ignored + +They're really great! + +-- gofmt -- +See the [Go home page] and the [pkg +site]. + +They're really great! + +[Go home page]: https://go.dev/ +[pkg site]: https://pkg.go.dev + +[Go home page]: https://duplicate.ignored + +-- text -- +See the Go home page and the pkg site. + +They're really great! + +[Go home page]: https://go.dev/ +[pkg site]: https://pkg.go.dev +-- markdown -- +See the [Go home page](https://go.dev/) and the [pkg site](https://pkg.go.dev). + +They're really great! +-- html -- +<p>See the <a href="https://go.dev/">Go home page</a> and the <a href="https://pkg.go.dev">pkg +site</a>. +<p>They're really great! diff --git a/src/go/doc/comment/testdata/link6.txt b/src/go/doc/comment/testdata/link6.txt new file mode 100644 index 0000000..ff629b4 --- /dev/null +++ b/src/go/doc/comment/testdata/link6.txt @@ -0,0 +1,50 @@ +-- input -- +URLs with punctuation are hard. +We don't want to consume the end-of-sentence punctuation. + +For example, https://en.wikipedia.org/wiki/John_Adams_(miniseries). +And https://example.com/[foo]/bar{. +And https://example.com/(foo)/bar! +And https://example.com/{foo}/bar{. +And https://example.com/)baz{foo}. + +[And https://example.com/]. + +-- gofmt -- +URLs with punctuation are hard. +We don't want to consume the end-of-sentence punctuation. + +For example, https://en.wikipedia.org/wiki/John_Adams_(miniseries). +And https://example.com/[foo]/bar{. +And https://example.com/(foo)/bar! +And https://example.com/{foo}/bar{. +And https://example.com/)baz{foo}. + +[And https://example.com/]. + +-- text -- +URLs with punctuation are hard. We don't want to consume the end-of-sentence +punctuation. + +For example, https://en.wikipedia.org/wiki/John_Adams_(miniseries). +And https://example.com/[foo]/bar{. And https://example.com/(foo)/bar! And +https://example.com/{foo}/bar{. And https://example.com/)baz{foo}. + +[And https://example.com/]. + +-- markdown -- +URLs with punctuation are hard. We don't want to consume the end-of-sentence punctuation. + +For example, [https://en.wikipedia.org/wiki/John\_Adams\_(miniseries)](https://en.wikipedia.org/wiki/John_Adams_(miniseries)). And [https://example.com/\[foo]/bar](https://example.com/[foo]/bar){. And [https://example.com/(foo)/bar](https://example.com/(foo)/bar)! And [https://example.com/{foo}/bar](https://example.com/{foo}/bar){. And [https://example.com/](https://example.com/))baz{foo}. + +\[And [https://example.com/](https://example.com/)]. + +-- html -- +<p>URLs with punctuation are hard. +We don't want to consume the end-of-sentence punctuation. +<p>For example, <a href="https://en.wikipedia.org/wiki/John_Adams_(miniseries)">https://en.wikipedia.org/wiki/John_Adams_(miniseries)</a>. +And <a href="https://example.com/[foo]/bar">https://example.com/[foo]/bar</a>{. +And <a href="https://example.com/(foo)/bar">https://example.com/(foo)/bar</a>! +And <a href="https://example.com/{foo}/bar">https://example.com/{foo}/bar</a>{. +And <a href="https://example.com/">https://example.com/</a>)baz{foo}. +<p>[And <a href="https://example.com/">https://example.com/</a>]. diff --git a/src/go/doc/comment/testdata/link7.txt b/src/go/doc/comment/testdata/link7.txt new file mode 100644 index 0000000..89a8b31 --- /dev/null +++ b/src/go/doc/comment/testdata/link7.txt @@ -0,0 +1,25 @@ +-- input -- +[math] is a package but this is not a doc link. + +[io] is a doc link. + +[math]: https://example.com +-- gofmt -- +[math] is a package but this is not a doc link. + +[io] is a doc link. + +[math]: https://example.com +-- text -- +math is a package but this is not a doc link. + +io is a doc link. + +[math]: https://example.com +-- markdown -- +[math](https://example.com) is a package but this is not a doc link. + +[io](/io) is a doc link. +-- html -- +<p><a href="https://example.com">math</a> is a package but this is not a doc link. +<p><a href="/io">io</a> is a doc link. diff --git a/src/go/doc/comment/testdata/linklist.txt b/src/go/doc/comment/testdata/linklist.txt new file mode 100644 index 0000000..baf4062 --- /dev/null +++ b/src/go/doc/comment/testdata/linklist.txt @@ -0,0 +1,18 @@ +{"DocLinkBaseURL": "https://pkg.go.dev"} +-- input -- +Did you know? + + - [encoding/json.Marshal] is a doc link. So is [encoding/json.Unmarshal]. +-- text -- +Did you know? + + - encoding/json.Marshal is a doc link. So is encoding/json.Unmarshal. +-- markdown -- +Did you know? + + - [encoding/json.Marshal](https://pkg.go.dev/encoding/json#Marshal) is a doc link. So is [encoding/json.Unmarshal](https://pkg.go.dev/encoding/json#Unmarshal). +-- html -- +<p>Did you know? +<ul> +<li><a href="https://pkg.go.dev/encoding/json#Marshal">encoding/json.Marshal</a> is a doc link. So is <a href="https://pkg.go.dev/encoding/json#Unmarshal">encoding/json.Unmarshal</a>. +</ul> diff --git a/src/go/doc/comment/testdata/linklist2.txt b/src/go/doc/comment/testdata/linklist2.txt new file mode 100644 index 0000000..81b3061 --- /dev/null +++ b/src/go/doc/comment/testdata/linklist2.txt @@ -0,0 +1,39 @@ +{"DocLinkBaseURL": "https://pkg.go.dev"} +-- input -- +Did you know? + + - [testing.T] is one doc link. + - So is [testing.M]. + - So is [testing.B]. + This is the same list paragraph. + + There is [testing.PB] in this list item, too! +-- text -- +Did you know? + + - testing.T is one doc link. + + - So is testing.M. + + - So is testing.B. This is the same list paragraph. + + There is testing.PB in this list item, too! +-- markdown -- +Did you know? + + - [testing.T](https://pkg.go.dev/testing#T) is one doc link. + + - So is [testing.M](https://pkg.go.dev/testing#M). + + - So is [testing.B](https://pkg.go.dev/testing#B). This is the same list paragraph. + + There is [testing.PB](https://pkg.go.dev/testing#PB) in this list item, too! +-- html -- +<p>Did you know? +<ul> +<li><p><a href="https://pkg.go.dev/testing#T">testing.T</a> is one doc link. +<li><p>So is <a href="https://pkg.go.dev/testing#M">testing.M</a>. +<li><p>So is <a href="https://pkg.go.dev/testing#B">testing.B</a>. +This is the same list paragraph. +<p>There is <a href="https://pkg.go.dev/testing#PB">testing.PB</a> in this list item, too! +</ul> diff --git a/src/go/doc/comment/testdata/linklist3.txt b/src/go/doc/comment/testdata/linklist3.txt new file mode 100644 index 0000000..701a54e --- /dev/null +++ b/src/go/doc/comment/testdata/linklist3.txt @@ -0,0 +1,31 @@ +{"DocLinkBaseURL": "https://pkg.go.dev"} +-- input -- +Cool things: + + - Foo + - [Go] + - Bar + +[Go]: https://go.dev/ +-- text -- +Cool things: + + - Foo + - Go + - Bar + +[Go]: https://go.dev/ +-- markdown -- +Cool things: + + - Foo + - [Go](https://go.dev/) + - Bar + +-- html -- +<p>Cool things: +<ul> +<li>Foo +<li><a href="https://go.dev/">Go</a> +<li>Bar +</ul> diff --git a/src/go/doc/comment/testdata/linklist4.txt b/src/go/doc/comment/testdata/linklist4.txt new file mode 100644 index 0000000..db39ec4 --- /dev/null +++ b/src/go/doc/comment/testdata/linklist4.txt @@ -0,0 +1,36 @@ +{"DocLinkBaseURL": "https://pkg.go.dev"} +-- input -- +Cool things: + + - Foo + - [Go] is great + + [Go]: https://go.dev/ + - Bar + +-- text -- +Cool things: + + - Foo + + - Go is great + + - Bar + +[Go]: https://go.dev/ +-- markdown -- +Cool things: + + - Foo + + - [Go](https://go.dev/) is great + + - Bar + +-- html -- +<p>Cool things: +<ul> +<li><p>Foo +<li><p><a href="https://go.dev/">Go</a> is great +<li><p>Bar +</ul> diff --git a/src/go/doc/comment/testdata/list.txt b/src/go/doc/comment/testdata/list.txt new file mode 100644 index 0000000..455782f --- /dev/null +++ b/src/go/doc/comment/testdata/list.txt @@ -0,0 +1,48 @@ +-- input -- +Text. +- Not a list. + - Here is the list. + • Using multiple bullets. + * Indentation does not matter. + + Lots of bullets. +More text. + +-- gofmt -- +Text. +- Not a list. + - Here is the list. + - Using multiple bullets. + - Indentation does not matter. + - Lots of bullets. + +More text. + +-- text -- +Text. - Not a list. + - Here is the list. + - Using multiple bullets. + - Indentation does not matter. + - Lots of bullets. + +More text. + +-- markdown -- +Text. - Not a list. + + - Here is the list. + - Using multiple bullets. + - Indentation does not matter. + - Lots of bullets. + +More text. + +-- html -- +<p>Text. +- Not a list. +<ul> +<li>Here is the list. +<li>Using multiple bullets. +<li>Indentation does not matter. +<li>Lots of bullets. +</ul> +<p>More text. diff --git a/src/go/doc/comment/testdata/list10.txt b/src/go/doc/comment/testdata/list10.txt new file mode 100644 index 0000000..9c49083 --- /dev/null +++ b/src/go/doc/comment/testdata/list10.txt @@ -0,0 +1,13 @@ +-- input -- + + 1. This list + 2. Starts the comment + 3. And also has a blank line before it. + +All of which is a little weird. +-- gofmt -- + 1. This list + 2. Starts the comment + 3. And also has a blank line before it. + +All of which is a little weird. diff --git a/src/go/doc/comment/testdata/list2.txt b/src/go/doc/comment/testdata/list2.txt new file mode 100644 index 0000000..c390b3d --- /dev/null +++ b/src/go/doc/comment/testdata/list2.txt @@ -0,0 +1,57 @@ +-- input -- +Text. + 1. Uno + 2) Dos + 3. Tres + 5. Cinco + 7. Siete + 11. Once + 12. Doce + 13. Trece. + +-- gofmt -- +Text. + 1. Uno + 2. Dos + 3. Tres + 5. Cinco + 7. Siete + 11. Once + 12. Doce + 13. Trece. + +-- text -- +Text. + 1. Uno + 2. Dos + 3. Tres + 5. Cinco + 7. Siete + 11. Once + 12. Doce + 13. Trece. + +-- markdown -- +Text. + + 1. Uno + 2. Dos + 3. Tres + 5. Cinco + 7. Siete + 11. Once + 12. Doce + 13. Trece. + +-- html -- +<p>Text. +<ol> +<li>Uno +<li>Dos +<li>Tres +<li value="5">Cinco +<li value="7">Siete +<li value="11">Once +<li>Doce +<li>Trece. +</ol> diff --git a/src/go/doc/comment/testdata/list3.txt b/src/go/doc/comment/testdata/list3.txt new file mode 100644 index 0000000..d7d345d --- /dev/null +++ b/src/go/doc/comment/testdata/list3.txt @@ -0,0 +1,32 @@ +-- input -- +Text. + + 1. Uno + 1. Dos + 1. Tres + 1. Quatro + +-- gofmt -- +Text. + + 1. Uno + 1. Dos + 1. Tres + 1. Quatro + +-- markdown -- +Text. + + 1. Uno + 1. Dos + 1. Tres + 1. Quatro + +-- html -- +<p>Text. +<ol> +<li>Uno +<li value="1">Dos +<li value="1">Tres +<li value="1">Quatro +</ol> diff --git a/src/go/doc/comment/testdata/list4.txt b/src/go/doc/comment/testdata/list4.txt new file mode 100644 index 0000000..9c28d65 --- /dev/null +++ b/src/go/doc/comment/testdata/list4.txt @@ -0,0 +1,38 @@ +-- input -- +Text. + 1. List +2. Not indented, not a list. + 3. Another list. + +-- gofmt -- +Text. + 1. List + +2. Not indented, not a list. + 3. Another list. + +-- text -- +Text. + 1. List + +2. Not indented, not a list. + 3. Another list. + +-- markdown -- +Text. + + 1. List + +2\. Not indented, not a list. + + 3. Another list. + +-- html -- +<p>Text. +<ol> +<li>List +</ol> +<p>2. Not indented, not a list. +<ol> +<li value="3">Another list. +</ol> diff --git a/src/go/doc/comment/testdata/list5.txt b/src/go/doc/comment/testdata/list5.txt new file mode 100644 index 0000000..a5128e5 --- /dev/null +++ b/src/go/doc/comment/testdata/list5.txt @@ -0,0 +1,40 @@ +-- input -- +Text. + + 1. One + 999999999999999999999. Big + 1000000000000000000000. Bigger + 1000000000000000000001. Biggest + +-- gofmt -- +Text. + + 1. One + 999999999999999999999. Big + 1000000000000000000000. Bigger + 1000000000000000000001. Biggest + +-- text -- +Text. + + 1. One + 999999999999999999999. Big + 1000000000000000000000. Bigger + 1000000000000000000001. Biggest + +-- markdown -- +Text. + + 1. One + 999999999999999999999. Big + 1000000000000000000000. Bigger + 1000000000000000000001. Biggest + +-- html -- +<p>Text. +<ol> +<li>One +<li value="999999999999999999999">Big +<li>Bigger +<li>Biggest +</ol> diff --git a/src/go/doc/comment/testdata/list6.txt b/src/go/doc/comment/testdata/list6.txt new file mode 100644 index 0000000..ffc0122 --- /dev/null +++ b/src/go/doc/comment/testdata/list6.txt @@ -0,0 +1,129 @@ +-- input -- +Text. + - List immediately after. + - Another. + +More text. + + - List after blank line. + - Another. + +Even more text. + - List immediately after. + + - Blank line between items. + +Yet more text. + + - Another list after blank line. + + - Blank line between items. + +Still more text. + - One list item. + + Multiple paragraphs. +-- dump -- +Doc + Paragraph + Plain "Text." + List ForceBlankBefore=false ForceBlankBetween=false + Item Number="" + Paragraph + Plain "List immediately after." + Item Number="" + Paragraph + Plain "Another." + Paragraph + Plain "More text." + List ForceBlankBefore=true ForceBlankBetween=false + Item Number="" + Paragraph + Plain "List after blank line." + Item Number="" + Paragraph + Plain "Another." + Paragraph + Plain "Even more text." + List ForceBlankBefore=false ForceBlankBetween=true + Item Number="" + Paragraph + Plain "List immediately after." + Item Number="" + Paragraph + Plain "Blank line between items." + Paragraph + Plain "Yet more text." + List ForceBlankBefore=true ForceBlankBetween=true + Item Number="" + Paragraph + Plain "Another list after blank line." + Item Number="" + Paragraph + Plain "Blank line between items." + Paragraph + Plain "Still more text." + List ForceBlankBefore=false ForceBlankBetween=true + Item Number="" + Paragraph + Plain "One list item." + Paragraph + Plain "Multiple paragraphs." + +-- gofmt -- +Text. + - List immediately after. + - Another. + +More text. + + - List after blank line. + - Another. + +Even more text. + + - List immediately after. + + - Blank line between items. + +Yet more text. + + - Another list after blank line. + + - Blank line between items. + +Still more text. + + - One list item. + + Multiple paragraphs. + +-- markdown -- +Text. + + - List immediately after. + - Another. + +More text. + + - List after blank line. + - Another. + +Even more text. + + - List immediately after. + + - Blank line between items. + +Yet more text. + + - Another list after blank line. + + - Blank line between items. + +Still more text. + + - One list item. + + Multiple paragraphs. + diff --git a/src/go/doc/comment/testdata/list7.txt b/src/go/doc/comment/testdata/list7.txt new file mode 100644 index 0000000..4466050 --- /dev/null +++ b/src/go/doc/comment/testdata/list7.txt @@ -0,0 +1,98 @@ +-- input -- +Almost list markers (but not quite): + + - + +❦ + + - $ + +❦ + + - $ + +❦ + + $ + $ + +❦ + + 1! List. + +❦ +-- gofmt -- +Almost list markers (but not quite): + + - + +❦ + + - $ + +❦ + + - $ + +❦ + +❦ + + 1! List. + +❦ +-- text -- +Almost list markers (but not quite): + + - + +❦ + + - + +❦ + + - + +❦ + +❦ + + 1! List. + +❦ +-- markdown -- +Almost list markers (but not quite): + + - + +❦ + + - $ + +❦ + + - $ + +❦ + +❦ + + 1! List. + +❦ +-- html -- +<p>Almost list markers (but not quite): +<pre>- +</pre> +<p>❦ +<pre>- $ +</pre> +<p>❦ +<pre>- $ +</pre> +<p>❦ +<p>❦ +<pre>1! List. +</pre> +<p>❦ diff --git a/src/go/doc/comment/testdata/list8.txt b/src/go/doc/comment/testdata/list8.txt new file mode 100644 index 0000000..fc46b0d --- /dev/null +++ b/src/go/doc/comment/testdata/list8.txt @@ -0,0 +1,56 @@ +-- input -- +Loose lists. + - A + + B + - C + D + - E + - F +-- gofmt -- +Loose lists. + + - A + + B + + - C + D + + - E + + - F +-- text -- +Loose lists. + + - A + + B + + - C D + + - E + + - F +-- markdown -- +Loose lists. + + - A + + B + + - C D + + - E + + - F +-- html -- +<p>Loose lists. +<ul> +<li><p>A +<p>B +<li><p>C +D +<li><p>E +<li><p>F +</ul> diff --git a/src/go/doc/comment/testdata/list9.txt b/src/go/doc/comment/testdata/list9.txt new file mode 100644 index 0000000..48e4673 --- /dev/null +++ b/src/go/doc/comment/testdata/list9.txt @@ -0,0 +1,30 @@ +-- input -- +Text. + +1. Not a list +2. because it is +3. unindented. + +4. This one + is a list + because of the indented text. +5. More wrapped + items. +6. And unwrapped. + +7. The blank line stops the heuristic. +-- gofmt -- +Text. + +1. Not a list +2. because it is +3. unindented. + + 4. This one + is a list + because of the indented text. + 5. More wrapped + items. + 6. And unwrapped. + +7. The blank line stops the heuristic. diff --git a/src/go/doc/comment/testdata/para.txt b/src/go/doc/comment/testdata/para.txt new file mode 100644 index 0000000..2355fa8 --- /dev/null +++ b/src/go/doc/comment/testdata/para.txt @@ -0,0 +1,17 @@ +-- input -- +Hello, world. +This is a paragraph. + +-- gofmt -- +Hello, world. +This is a paragraph. + +-- text -- +Hello, world. This is a paragraph. + +-- markdown -- +Hello, world. This is a paragraph. + +-- html -- +<p>Hello, world. +This is a paragraph. diff --git a/src/go/doc/comment/testdata/quote.txt b/src/go/doc/comment/testdata/quote.txt new file mode 100644 index 0000000..b64adae --- /dev/null +++ b/src/go/doc/comment/testdata/quote.txt @@ -0,0 +1,15 @@ +-- input -- +Doubled single quotes like `` and '' turn into Unicode double quotes, +but single quotes ` and ' do not. +Misplaced markdown fences ``` do not either. +-- gofmt -- +Doubled single quotes like “ and ” turn into Unicode double quotes, +but single quotes ` and ' do not. +Misplaced markdown fences ``` do not either. +-- text -- +Doubled single quotes like “ and ” turn into Unicode double quotes, but single +quotes ` and ' do not. Misplaced markdown fences ``` do not either. +-- html -- +<p>Doubled single quotes like “ and ” turn into Unicode double quotes, +but single quotes ` and ' do not. +Misplaced markdown fences ``` do not either. diff --git a/src/go/doc/comment/testdata/text.txt b/src/go/doc/comment/testdata/text.txt new file mode 100644 index 0000000..c4de6e2 --- /dev/null +++ b/src/go/doc/comment/testdata/text.txt @@ -0,0 +1,62 @@ +{"TextPrefix":"|", "TextCodePrefix": "@"} +-- input -- +Hello, world + Code block here. +More text. +Tight list + - one + - two + - three +Loose list + - one + + - two + + - three + +# Heading + +More text. +-- gofmt -- +Hello, world + + Code block here. + +More text. +Tight list + - one + - two + - three + +Loose list + + - one + + - two + + - three + +# Heading + +More text. +-- text -- +|Hello, world +| +@Code block here. +| +|More text. Tight list +| - one +| - two +| - three +| +|Loose list +| +| - one +| +| - two +| +| - three +| +|# Heading +| +|More text. diff --git a/src/go/doc/comment/testdata/text2.txt b/src/go/doc/comment/testdata/text2.txt new file mode 100644 index 0000000..a099d0b --- /dev/null +++ b/src/go/doc/comment/testdata/text2.txt @@ -0,0 +1,14 @@ +{"TextWidth": -1} +-- input -- +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is +transporting arguments and results of remote procedure calls (RPCs) such as +those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream +and is most efficient when a single Encoder is used to transmit a stream of +values, amortizing the cost of compilation. +-- text -- +Package gob manages streams of gobs - binary values exchanged between an Encoder (transmitter) and a Decoder (receiver). A typical use is transporting arguments and results of remote procedure calls (RPCs) such as those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream and is most efficient when a single Encoder is used to transmit a stream of values, amortizing the cost of compilation. diff --git a/src/go/doc/comment/testdata/text3.txt b/src/go/doc/comment/testdata/text3.txt new file mode 100644 index 0000000..75d2c37 --- /dev/null +++ b/src/go/doc/comment/testdata/text3.txt @@ -0,0 +1,28 @@ +{"TextWidth": 30} +-- input -- +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is +transporting arguments and results of remote procedure calls (RPCs) such as +those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream +and is most efficient when a single Encoder is used to transmit a stream of +values, amortizing the cost of compilation. +-- text -- +Package gob manages streams +of gobs - binary values +exchanged between an Encoder +(transmitter) and a Decoder +(receiver). A typical use is +transporting arguments and +results of remote procedure +calls (RPCs) such as those +provided by package "net/rpc". + +The implementation compiles +a custom codec for each data +type in the stream and is +most efficient when a single +Encoder is used to transmit a +stream of values, amortizing +the cost of compilation. diff --git a/src/go/doc/comment/testdata/text4.txt b/src/go/doc/comment/testdata/text4.txt new file mode 100644 index 0000000..e429985 --- /dev/null +++ b/src/go/doc/comment/testdata/text4.txt @@ -0,0 +1,29 @@ +{"TextWidth": 29} +-- input -- +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is +transporting arguments and results of remote procedure calls (RPCs) such as +those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream +and is most efficient when a single Encoder is used to transmit a stream of +values, amortizing the cost of compilation. +-- text -- +Package gob manages streams +of gobs - binary values +exchanged between an Encoder +(transmitter) and a Decoder +(receiver). A typical use +is transporting arguments +and results of remote +procedure calls (RPCs) such +as those provided by package +"net/rpc". + +The implementation compiles +a custom codec for each data +type in the stream and is +most efficient when a single +Encoder is used to transmit a +stream of values, amortizing +the cost of compilation. diff --git a/src/go/doc/comment/testdata/text5.txt b/src/go/doc/comment/testdata/text5.txt new file mode 100644 index 0000000..2408fc5 --- /dev/null +++ b/src/go/doc/comment/testdata/text5.txt @@ -0,0 +1,38 @@ +{"TextWidth": 20} +-- input -- +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is +transporting arguments and results of remote procedure calls (RPCs) such as +those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream +and is most efficient when a single Encoder is used to transmit a stream of +values, amortizing the cost of compilation. +-- text -- +Package gob +manages streams +of gobs - binary +values exchanged +between an Encoder +(transmitter) and a +Decoder (receiver). +A typical use +is transporting +arguments and +results of remote +procedure calls +(RPCs) such as those +provided by package +"net/rpc". + +The implementation +compiles a custom +codec for each +data type in the +stream and is most +efficient when a +single Encoder is +used to transmit a +stream of values, +amortizing the cost +of compilation. diff --git a/src/go/doc/comment/testdata/text6.txt b/src/go/doc/comment/testdata/text6.txt new file mode 100644 index 0000000..d6deff5 --- /dev/null +++ b/src/go/doc/comment/testdata/text6.txt @@ -0,0 +1,18 @@ +-- input -- +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is +transporting arguments and results of remote procedure calls (RPCs) such as +those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream +and is most efficient when a single Encoder is used to transmit a stream of +values, amortizing the cost of compilation. +-- text -- +Package gob manages streams of gobs - binary values exchanged between an Encoder +(transmitter) and a Decoder (receiver). A typical use is transporting arguments +and results of remote procedure calls (RPCs) such as those provided by package +"net/rpc". + +The implementation compiles a custom codec for each data type in the stream and +is most efficient when a single Encoder is used to transmit a stream of values, +amortizing the cost of compilation. diff --git a/src/go/doc/comment/testdata/text7.txt b/src/go/doc/comment/testdata/text7.txt new file mode 100644 index 0000000..c9fb6d3 --- /dev/null +++ b/src/go/doc/comment/testdata/text7.txt @@ -0,0 +1,21 @@ +{"TextPrefix": " "} +-- input -- +Package gob manages streams of gobs - binary values exchanged between an +Encoder (transmitter) and a Decoder (receiver). A typical use is +transporting arguments and results of remote procedure calls (RPCs) such as +those provided by package "net/rpc". + +The implementation compiles a custom codec for each data type in the stream +and is most efficient when a single Encoder is used to transmit a stream of +values, amortizing the cost of compilation. +-- text -- + Package gob manages streams of gobs - binary values + exchanged between an Encoder (transmitter) and a Decoder + (receiver). A typical use is transporting arguments and + results of remote procedure calls (RPCs) such as those + provided by package "net/rpc". + + The implementation compiles a custom codec for each data + type in the stream and is most efficient when a single + Encoder is used to transmit a stream of values, amortizing + the cost of compilation. diff --git a/src/go/doc/comment/testdata/text8.txt b/src/go/doc/comment/testdata/text8.txt new file mode 100644 index 0000000..560ac95 --- /dev/null +++ b/src/go/doc/comment/testdata/text8.txt @@ -0,0 +1,94 @@ +{"TextWidth": 40} +-- input -- +If the arguments have version suffixes (like @latest or @v1.0.0), "go install" +builds packages in module-aware mode, ignoring the go.mod file in the current +directory or any parent directory, if there is one. This is useful for +installing executables without affecting the dependencies of the main module. +To eliminate ambiguity about which module versions are used in the build, the +arguments must satisfy the following constraints: + + - Arguments must be package paths or package patterns (with "..." wildcards). + They must not be standard packages (like fmt), meta-patterns (std, cmd, + all), or relative or absolute file paths. + + - All arguments must have the same version suffix. Different queries are not + allowed, even if they refer to the same version. + + - All arguments must refer to packages in the same module at the same version. + + - Package path arguments must refer to main packages. Pattern arguments + will only match main packages. + + - No module is considered the "main" module. If the module containing + packages named on the command line has a go.mod file, it must not contain + directives (replace and exclude) that would cause it to be interpreted + differently than if it were the main module. The module must not require + a higher version of itself. + + - Vendor directories are not used in any module. (Vendor directories are not + included in the module zip files downloaded by 'go install'.) + +If the arguments don't have version suffixes, "go install" may run in +module-aware mode or GOPATH mode, depending on the GO111MODULE environment +variable and the presence of a go.mod file. See 'go help modules' for details. +If module-aware mode is enabled, "go install" runs in the context of the main +module. +-- text -- +If the arguments have version suffixes +(like @latest or @v1.0.0), "go install" +builds packages in module-aware mode, +ignoring the go.mod file in the current +directory or any parent directory, +if there is one. This is useful for +installing executables without affecting +the dependencies of the main module. +To eliminate ambiguity about which +module versions are used in the build, +the arguments must satisfy the following +constraints: + + - Arguments must be package paths + or package patterns (with "..." + wildcards). They must not be + standard packages (like fmt), + meta-patterns (std, cmd, all), + or relative or absolute file paths. + + - All arguments must have the same + version suffix. Different queries + are not allowed, even if they refer + to the same version. + + - All arguments must refer to packages + in the same module at the same + version. + + - Package path arguments must refer + to main packages. Pattern arguments + will only match main packages. + + - No module is considered the "main" + module. If the module containing + packages named on the command line + has a go.mod file, it must not + contain directives (replace and + exclude) that would cause it to be + interpreted differently than if it + were the main module. The module + must not require a higher version of + itself. + + - Vendor directories are not used in + any module. (Vendor directories are + not included in the module zip files + downloaded by 'go install'.) + +If the arguments don't have version +suffixes, "go install" may run in +module-aware mode or GOPATH mode, +depending on the GO111MODULE environment +variable and the presence of a go.mod +file. See 'go help modules' for details. +If module-aware mode is enabled, +"go install" runs in the context of the +main module. diff --git a/src/go/doc/comment/testdata/text9.txt b/src/go/doc/comment/testdata/text9.txt new file mode 100644 index 0000000..07a64aa --- /dev/null +++ b/src/go/doc/comment/testdata/text9.txt @@ -0,0 +1,12 @@ +{"TextPrefix":"|", "TextCodePrefix": "@"} +-- input -- +Hello, world + Code block here. +-- gofmt -- +Hello, world + + Code block here. +-- text -- +|Hello, world +| +@Code block here. diff --git a/src/go/doc/comment/testdata/words.txt b/src/go/doc/comment/testdata/words.txt new file mode 100644 index 0000000..63c7e1a --- /dev/null +++ b/src/go/doc/comment/testdata/words.txt @@ -0,0 +1,10 @@ +-- input -- +This is an italicword and a linkedword and Unicöde. +-- gofmt -- +This is an italicword and a linkedword and Unicöde. +-- text -- +This is an italicword and a linkedword and Unicöde. +-- markdown -- +This is an *italicword* and a [*linkedword*](https://example.com/linkedword) and Unicöde. +-- html -- +<p>This is an <i>italicword</i> and a <a href="https://example.com/linkedword"><i>linkedword</i></a> and Unicöde. diff --git a/src/go/doc/comment/testdata_test.go b/src/go/doc/comment/testdata_test.go new file mode 100644 index 0000000..0676d86 --- /dev/null +++ b/src/go/doc/comment/testdata_test.go @@ -0,0 +1,202 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "bytes" + "encoding/json" + "fmt" + "internal/diff" + "internal/txtar" + "path/filepath" + "strings" + "testing" +) + +func TestTestdata(t *testing.T) { + files, _ := filepath.Glob("testdata/*.txt") + if len(files) == 0 { + t.Fatalf("no testdata") + } + var p Parser + p.Words = map[string]string{ + "italicword": "", + "linkedword": "https://example.com/linkedword", + } + p.LookupPackage = func(name string) (importPath string, ok bool) { + if name == "comment" { + return "go/doc/comment", true + } + return DefaultLookupPackage(name) + } + p.LookupSym = func(recv, name string) (ok bool) { + if recv == "Parser" && name == "Parse" || + recv == "" && name == "Doc" || + recv == "" && name == "NoURL" { + return true + } + return false + } + + stripDollars := func(b []byte) []byte { + // Remove trailing $ on lines. + // They make it easier to see lines with trailing spaces, + // as well as turning them into lines without trailing spaces, + // in case editors remove trailing spaces. + return bytes.ReplaceAll(b, []byte("$\n"), []byte("\n")) + } + for _, file := range files { + t.Run(filepath.Base(file), func(t *testing.T) { + var pr Printer + a, err := txtar.ParseFile(file) + if err != nil { + t.Fatal(err) + } + if len(a.Comment) > 0 { + err := json.Unmarshal(a.Comment, &pr) + if err != nil { + t.Fatalf("unmarshalling top json: %v", err) + } + } + if len(a.Files) < 1 || a.Files[0].Name != "input" { + t.Fatalf("first file is not %q", "input") + } + d := p.Parse(string(stripDollars(a.Files[0].Data))) + for _, f := range a.Files[1:] { + want := stripDollars(f.Data) + for len(want) >= 2 && want[len(want)-1] == '\n' && want[len(want)-2] == '\n' { + want = want[:len(want)-1] + } + var out []byte + switch f.Name { + default: + t.Fatalf("unknown output file %q", f.Name) + case "dump": + out = dump(d) + case "gofmt": + out = pr.Comment(d) + case "html": + out = pr.HTML(d) + case "markdown": + out = pr.Markdown(d) + case "text": + out = pr.Text(d) + } + if string(out) != string(want) { + t.Errorf("%s: %s", file, diff.Diff(f.Name, want, "have", out)) + } + } + }) + } +} + +func dump(d *Doc) []byte { + var out bytes.Buffer + dumpTo(&out, 0, d) + return out.Bytes() +} + +func dumpTo(out *bytes.Buffer, indent int, x any) { + switch x := x.(type) { + default: + fmt.Fprintf(out, "?%T", x) + + case *Doc: + fmt.Fprintf(out, "Doc") + dumpTo(out, indent+1, x.Content) + if len(x.Links) > 0 { + dumpNL(out, indent+1) + fmt.Fprintf(out, "Links") + dumpTo(out, indent+2, x.Links) + } + fmt.Fprintf(out, "\n") + + case []*LinkDef: + for _, def := range x { + dumpNL(out, indent) + dumpTo(out, indent, def) + } + + case *LinkDef: + fmt.Fprintf(out, "LinkDef Used:%v Text:%q URL:%s", x.Used, x.Text, x.URL) + + case []Block: + for _, blk := range x { + dumpNL(out, indent) + dumpTo(out, indent, blk) + } + + case *Heading: + fmt.Fprintf(out, "Heading") + dumpTo(out, indent+1, x.Text) + + case *List: + fmt.Fprintf(out, "List ForceBlankBefore=%v ForceBlankBetween=%v", x.ForceBlankBefore, x.ForceBlankBetween) + dumpTo(out, indent+1, x.Items) + + case []*ListItem: + for _, item := range x { + dumpNL(out, indent) + dumpTo(out, indent, item) + } + + case *ListItem: + fmt.Fprintf(out, "Item Number=%q", x.Number) + dumpTo(out, indent+1, x.Content) + + case *Paragraph: + fmt.Fprintf(out, "Paragraph") + dumpTo(out, indent+1, x.Text) + + case *Code: + fmt.Fprintf(out, "Code") + dumpTo(out, indent+1, x.Text) + + case []Text: + for _, t := range x { + dumpNL(out, indent) + dumpTo(out, indent, t) + } + + case Plain: + if !strings.Contains(string(x), "\n") { + fmt.Fprintf(out, "Plain %q", string(x)) + } else { + fmt.Fprintf(out, "Plain") + dumpTo(out, indent+1, string(x)) + } + + case Italic: + if !strings.Contains(string(x), "\n") { + fmt.Fprintf(out, "Italic %q", string(x)) + } else { + fmt.Fprintf(out, "Italic") + dumpTo(out, indent+1, string(x)) + } + + case string: + for _, line := range strings.SplitAfter(x, "\n") { + if line != "" { + dumpNL(out, indent) + fmt.Fprintf(out, "%q", line) + } + } + + case *Link: + fmt.Fprintf(out, "Link %q", x.URL) + dumpTo(out, indent+1, x.Text) + + case *DocLink: + fmt.Fprintf(out, "DocLink pkg:%q, recv:%q, name:%q", x.ImportPath, x.Recv, x.Name) + dumpTo(out, indent+1, x.Text) + } +} + +func dumpNL(out *bytes.Buffer, n int) { + out.WriteByte('\n') + for i := 0; i < n; i++ { + out.WriteByte('\t') + } +} diff --git a/src/go/doc/comment/text.go b/src/go/doc/comment/text.go new file mode 100644 index 0000000..6f9c2e2 --- /dev/null +++ b/src/go/doc/comment/text.go @@ -0,0 +1,337 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "bytes" + "fmt" + "sort" + "strings" + "unicode/utf8" +) + +// A textPrinter holds the state needed for printing a Doc as plain text. +type textPrinter struct { + *Printer + long strings.Builder + prefix string + codePrefix string + width int +} + +// Text returns a textual formatting of the Doc. +// See the [Printer] documentation for ways to customize the text output. +func (p *Printer) Text(d *Doc) []byte { + tp := &textPrinter{ + Printer: p, + prefix: p.TextPrefix, + codePrefix: p.TextCodePrefix, + width: p.TextWidth, + } + if tp.codePrefix == "" { + tp.codePrefix = p.TextPrefix + "\t" + } + if tp.width == 0 { + tp.width = 80 - utf8.RuneCountInString(tp.prefix) + } + + var out bytes.Buffer + for i, x := range d.Content { + if i > 0 && blankBefore(x) { + out.WriteString(tp.prefix) + writeNL(&out) + } + tp.block(&out, x) + } + anyUsed := false + for _, def := range d.Links { + if def.Used { + anyUsed = true + break + } + } + if anyUsed { + writeNL(&out) + for _, def := range d.Links { + if def.Used { + fmt.Fprintf(&out, "[%s]: %s\n", def.Text, def.URL) + } + } + } + return out.Bytes() +} + +// writeNL calls out.WriteByte('\n') +// but first trims trailing spaces on the previous line. +func writeNL(out *bytes.Buffer) { + // Trim trailing spaces. + data := out.Bytes() + n := 0 + for n < len(data) && (data[len(data)-n-1] == ' ' || data[len(data)-n-1] == '\t') { + n++ + } + if n > 0 { + out.Truncate(len(data) - n) + } + out.WriteByte('\n') +} + +// block prints the block x to out. +func (p *textPrinter) block(out *bytes.Buffer, x Block) { + switch x := x.(type) { + default: + fmt.Fprintf(out, "?%T\n", x) + + case *Paragraph: + out.WriteString(p.prefix) + p.text(out, "", x.Text) + + case *Heading: + out.WriteString(p.prefix) + out.WriteString("# ") + p.text(out, "", x.Text) + + case *Code: + text := x.Text + for text != "" { + var line string + line, text, _ = strings.Cut(text, "\n") + if line != "" { + out.WriteString(p.codePrefix) + out.WriteString(line) + } + writeNL(out) + } + + case *List: + loose := x.BlankBetween() + for i, item := range x.Items { + if i > 0 && loose { + out.WriteString(p.prefix) + writeNL(out) + } + out.WriteString(p.prefix) + out.WriteString(" ") + if item.Number == "" { + out.WriteString(" - ") + } else { + out.WriteString(item.Number) + out.WriteString(". ") + } + for i, blk := range item.Content { + const fourSpace = " " + if i > 0 { + writeNL(out) + out.WriteString(p.prefix) + out.WriteString(fourSpace) + } + p.text(out, fourSpace, blk.(*Paragraph).Text) + } + } + } +} + +// text prints the text sequence x to out. +func (p *textPrinter) text(out *bytes.Buffer, indent string, x []Text) { + p.oneLongLine(&p.long, x) + words := strings.Fields(p.long.String()) + p.long.Reset() + + var seq []int + if p.width < 0 || len(words) == 0 { + seq = []int{0, len(words)} // one long line + } else { + seq = wrap(words, p.width-utf8.RuneCountInString(indent)) + } + for i := 0; i+1 < len(seq); i++ { + if i > 0 { + out.WriteString(p.prefix) + out.WriteString(indent) + } + for j, w := range words[seq[i]:seq[i+1]] { + if j > 0 { + out.WriteString(" ") + } + out.WriteString(w) + } + writeNL(out) + } +} + +// oneLongLine prints the text sequence x to out as one long line, +// without worrying about line wrapping. +// Explicit links have the [ ] dropped to improve readability. +func (p *textPrinter) oneLongLine(out *strings.Builder, x []Text) { + for _, t := range x { + switch t := t.(type) { + case Plain: + out.WriteString(string(t)) + case Italic: + out.WriteString(string(t)) + case *Link: + p.oneLongLine(out, t.Text) + case *DocLink: + p.oneLongLine(out, t.Text) + } + } +} + +// wrap wraps words into lines of at most max runes, +// minimizing the sum of the squares of the leftover lengths +// at the end of each line (except the last, of course), +// with a preference for ending lines at punctuation (.,:;). +// +// The returned slice gives the indexes of the first words +// on each line in the wrapped text with a final entry of len(words). +// Thus the lines are words[seq[0]:seq[1]], words[seq[1]:seq[2]], +// ..., words[seq[len(seq)-2]:seq[len(seq)-1]]. +// +// The implementation runs in O(n log n) time, where n = len(words), +// using the algorithm described in D. S. Hirschberg and L. L. Larmore, +// “[The least weight subsequence problem],” FOCS 1985, pp. 137-143. +// +// [The least weight subsequence problem]: https://doi.org/10.1109/SFCS.1985.60 +func wrap(words []string, max int) (seq []int) { + // The algorithm requires that our scoring function be concave, + // meaning that for all i₀ ≤ i₁ < j₀ ≤ j₁, + // weight(i₀, j₀) + weight(i₁, j₁) ≤ weight(i₀, j₁) + weight(i₁, j₀). + // + // Our weights are two-element pairs [hi, lo] + // ordered by elementwise comparison. + // The hi entry counts the weight for lines that are longer than max, + // and the lo entry counts the weight for lines that are not. + // This forces the algorithm to first minimize the number of lines + // that are longer than max, which correspond to lines with + // single very long words. Having done that, it can move on to + // minimizing the lo score, which is more interesting. + // + // The lo score is the sum for each line of the square of the + // number of spaces remaining at the end of the line and a + // penalty of 64 given out for not ending the line in a + // punctuation character (.,:;). + // The penalty is somewhat arbitrarily chosen by trying + // different amounts and judging how nice the wrapped text looks. + // Roughly speaking, using 64 means that we are willing to + // end a line with eight blank spaces in order to end at a + // punctuation character, even if the next word would fit in + // those spaces. + // + // We care about ending in punctuation characters because + // it makes the text easier to skim if not too many sentences + // or phrases begin with a single word on the previous line. + + // A score is the score (also called weight) for a given line. + // add and cmp add and compare scores. + type score struct { + hi int64 + lo int64 + } + add := func(s, t score) score { return score{s.hi + t.hi, s.lo + t.lo} } + cmp := func(s, t score) int { + switch { + case s.hi < t.hi: + return -1 + case s.hi > t.hi: + return +1 + case s.lo < t.lo: + return -1 + case s.lo > t.lo: + return +1 + } + return 0 + } + + // total[j] is the total number of runes + // (including separating spaces) in words[:j]. + total := make([]int, len(words)+1) + total[0] = 0 + for i, s := range words { + total[1+i] = total[i] + utf8.RuneCountInString(s) + 1 + } + + // weight returns weight(i, j). + weight := func(i, j int) score { + // On the last line, there is zero weight for being too short. + n := total[j] - 1 - total[i] + if j == len(words) && n <= max { + return score{0, 0} + } + + // Otherwise the weight is the penalty plus the square of the number of + // characters remaining on the line or by which the line goes over. + // In the latter case, that value goes in the hi part of the score. + // (See note above.) + p := wrapPenalty(words[j-1]) + v := int64(max-n) * int64(max-n) + if n > max { + return score{v, p} + } + return score{0, v + p} + } + + // The rest of this function is “The Basic Algorithm” from + // Hirschberg and Larmore's conference paper, + // using the same names as in the paper. + f := []score{{0, 0}} + g := func(i, j int) score { return add(f[i], weight(i, j)) } + + bridge := func(a, b, c int) bool { + k := c + sort.Search(len(words)+1-c, func(k int) bool { + k += c + return cmp(g(a, k), g(b, k)) > 0 + }) + if k > len(words) { + return true + } + return cmp(g(c, k), g(b, k)) <= 0 + } + + // d is a one-ended deque implemented as a slice. + d := make([]int, 1, len(words)) + d[0] = 0 + bestleft := make([]int, 1, len(words)) + bestleft[0] = -1 + for m := 1; m < len(words); m++ { + f = append(f, g(d[0], m)) + bestleft = append(bestleft, d[0]) + for len(d) > 1 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { + d = d[1:] // “Retire” + } + for len(d) > 1 && bridge(d[len(d)-2], d[len(d)-1], m) { + d = d[:len(d)-1] // “Fire” + } + if cmp(g(m, len(words)), g(d[len(d)-1], len(words))) < 0 { + d = append(d, m) // “Hire” + // The next few lines are not in the paper but are necessary + // to handle two-word inputs correctly. It appears to be + // just a bug in the paper's pseudocode. + if len(d) == 2 && cmp(g(d[1], m+1), g(d[0], m+1)) <= 0 { + d = d[1:] + } + } + } + bestleft = append(bestleft, d[0]) + + // Recover least weight sequence from bestleft. + n := 1 + for m := len(words); m > 0; m = bestleft[m] { + n++ + } + seq = make([]int, n) + for m := len(words); m > 0; m = bestleft[m] { + n-- + seq[n] = m + } + return seq +} + +// wrapPenalty is the penalty for inserting a line break after word s. +func wrapPenalty(s string) int64 { + switch s[len(s)-1] { + case '.', ',', ':', ';': + return 0 + } + return 64 +} diff --git a/src/go/doc/comment/wrap_test.go b/src/go/doc/comment/wrap_test.go new file mode 100644 index 0000000..f9802c9 --- /dev/null +++ b/src/go/doc/comment/wrap_test.go @@ -0,0 +1,141 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package comment + +import ( + "flag" + "fmt" + "math/rand" + "testing" + "time" + "unicode/utf8" +) + +var wrapSeed = flag.Int64("wrapseed", 0, "use `seed` for wrap test (default auto-seeds)") + +func TestWrap(t *testing.T) { + if *wrapSeed == 0 { + *wrapSeed = time.Now().UnixNano() + } + t.Logf("-wrapseed=%#x\n", *wrapSeed) + r := rand.New(rand.NewSource(*wrapSeed)) + + // Generate words of random length. + s := "1234567890αβcdefghijklmnopqrstuvwxyz" + sN := utf8.RuneCountInString(s) + var words []string + for i := 0; i < 100; i++ { + n := 1 + r.Intn(sN-1) + if n >= 12 { + n++ // extra byte for β + } + if n >= 11 { + n++ // extra byte for α + } + words = append(words, s[:n]) + } + + for n := 1; n <= len(words) && !t.Failed(); n++ { + t.Run(fmt.Sprint("n=", n), func(t *testing.T) { + words := words[:n] + t.Logf("words: %v", words) + for max := 1; max < 100 && !t.Failed(); max++ { + t.Run(fmt.Sprint("max=", max), func(t *testing.T) { + seq := wrap(words, max) + + // Compute score for seq. + start := 0 + score := int64(0) + if len(seq) == 0 { + t.Fatalf("wrap seq is empty") + } + if seq[0] != 0 { + t.Fatalf("wrap seq does not start with 0") + } + for _, n := range seq[1:] { + if n <= start { + t.Fatalf("wrap seq is non-increasing: %v", seq) + } + if n > len(words) { + t.Fatalf("wrap seq contains %d > %d: %v", n, len(words), seq) + } + size := -1 + for _, s := range words[start:n] { + size += 1 + utf8.RuneCountInString(s) + } + if n-start == 1 && size >= max { + // no score + } else if size > max { + t.Fatalf("wrap used overlong line %d:%d: %v", start, n, words[start:n]) + } else if n != len(words) { + score += int64(max-size)*int64(max-size) + wrapPenalty(words[n-1]) + } + start = n + } + if start != len(words) { + t.Fatalf("wrap seq does not use all words (%d < %d): %v", start, len(words), seq) + } + + // Check that score matches slow reference implementation. + slowSeq, slowScore := wrapSlow(words, max) + if score != slowScore { + t.Fatalf("wrap score = %d != wrapSlow score %d\nwrap: %v\nslow: %v", score, slowScore, seq, slowSeq) + } + }) + } + }) + } +} + +// wrapSlow is an O(n²) reference implementation for wrap. +// It returns a minimal-score sequence along with the score. +// It is OK if wrap returns a different sequence as long as that +// sequence has the same score. +func wrapSlow(words []string, max int) (seq []int, score int64) { + // Quadratic dynamic programming algorithm for line wrapping problem. + // best[i] tracks the best score possible for words[:i], + // assuming that for i < len(words) the line breaks after those words. + // bestleft[i] tracks the previous line break for best[i]. + best := make([]int64, len(words)+1) + bestleft := make([]int, len(words)+1) + best[0] = 0 + for i, w := range words { + if utf8.RuneCountInString(w) >= max { + // Overlong word must appear on line by itself. No effect on score. + best[i+1] = best[i] + continue + } + best[i+1] = 1e18 + p := wrapPenalty(w) + n := -1 + for j := i; j >= 0; j-- { + n += 1 + utf8.RuneCountInString(words[j]) + if n > max { + break + } + line := int64(n-max)*int64(n-max) + p + if i == len(words)-1 { + line = 0 // no score for final line being too short + } + s := best[j] + line + if best[i+1] > s { + best[i+1] = s + bestleft[i+1] = j + } + } + } + + // Recover least weight sequence from bestleft. + n := 1 + for m := len(words); m > 0; m = bestleft[m] { + n++ + } + seq = make([]int, n) + for m := len(words); m > 0; m = bestleft[m] { + n-- + seq[n] = m + } + return seq, best[len(words)] +} |