1 files changed, 612 insertions, 0 deletions
diff --git a/src/go/build/read.go b/src/go/build/read.go
new file mode 100644
index 0000000..5289197
--- /dev/null
+++ b/src/go/build/read.go
@@ -0,0 +1,612 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package build
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"go/ast"
+	"go/parser"
+	"go/scanner"
+	"go/token"
+	"io"
+	"strconv"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+type importReader struct {
+	b    *bufio.Reader
+	buf  []byte
+	peek byte
+	err  error
+	eof  bool
+	nerr int
+	pos  token.Position
+}
+
+var bom = []byte{0xef, 0xbb, 0xbf}
+
+func newImportReader(name string, r io.Reader) *importReader {
+	b := bufio.NewReader(r)
+	// Remove leading UTF-8 BOM.
+	// Per https://golang.org/ref/spec#Source_code_representation:
+	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+	// if it is the first Unicode code point in the source text.
+	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
+		b.Discard(3)
+	}
+	return &importReader{
+		b: b,
+		pos: token.Position{
+			Filename: name,
+			Line:     1,
+			Column:   1,
+		},
+	}
+}
+
+func isIdent(c byte) bool {
+	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
+}
+
+var (
+	errSyntax = errors.New("syntax error")
+	errNUL    = errors.New("unexpected NUL in input")
+)
+
+// syntaxError records a syntax error, but only if an I/O error has not already been recorded.
+func (r *importReader) syntaxError() {
+	if r.err == nil {
+		r.err = errSyntax
+	}
+}
+
+// readByte reads the next byte from the input, saves it in buf, and returns it.
+// If an error occurs, readByte records the error in r.err and returns 0.
+func (r *importReader) readByte() byte {
+	c, err := r.b.ReadByte()
+	if err == nil {
+		r.buf = append(r.buf, c)
+		if c == 0 {
+			err = errNUL
+		}
+	}
+	if err != nil {
+		if err == io.EOF {
+			r.eof = true
+		} else if r.err == nil {
+			r.err = err
+		}
+		c = 0
+	}
+	return c
+}
+
+// readByteNoBuf is like readByte but doesn't buffer the byte.
+// It exhausts r.buf before reading from r.b.
+func (r *importReader) readByteNoBuf() byte {
+	var c byte
+	var err error
+	if len(r.buf) > 0 {
+		c = r.buf[0]
+		r.buf = r.buf[1:]
+	} else {
+		c, err = r.b.ReadByte()
+		if err == nil && c == 0 {
+			err = errNUL
+		}
+	}
+
+	if err != nil {
+		if err == io.EOF {
+			r.eof = true
+		} else if r.err == nil {
+			r.err = err
+		}
+		return 0
+	}
+	r.pos.Offset++
+	if c == '\n' {
+		r.pos.Line++
+		r.pos.Column = 1
+	} else {
+		r.pos.Column++
+	}
+	return c
+}
+
+// peekByte returns the next byte from the input reader but does not advance beyond it.
+// If skipSpace is set, peekByte skips leading spaces and comments.
+func (r *importReader) peekByte(skipSpace bool) byte {
+	if r.err != nil {
+		if r.nerr++; r.nerr > 10000 {
+			panic("go/build: import reader looping")
+		}
+		return 0
+	}
+
+	// Use r.peek as first input byte.
+	// Don't just return r.peek here: it might have been left by peekByte(false)
+	// and this might be peekByte(true).
+	c := r.peek
+	if c == 0 {
+		c = r.readByte()
+	}
+	for r.err == nil && !r.eof {
+		if skipSpace {
+			// For the purposes of this reader, semicolons are never necessary to
+			// understand the input and are treated as spaces.
+			switch c {
+			case ' ', '\f', '\t', '\r', '\n', ';':
+				c = r.readByte()
+				continue
+
+			case '/':
+				c = r.readByte()
+				if c == '/' {
+					for c != '\n' && r.err == nil && !r.eof {
+						c = r.readByte()
+					}
+				} else if c == '*' {
+					var c1 byte
+					for (c != '*' || c1 != '/') && r.err == nil {
+						if r.eof {
+							r.syntaxError()
+						}
+						c, c1 = c1, r.readByte()
+					}
+				} else {
+					r.syntaxError()
+				}
+				c = r.readByte()
+				continue
+			}
+		}
+		break
+	}
+	r.peek = c
+	return r.peek
+}
+
+// nextByte is like peekByte but advances beyond the returned byte.
+func (r *importReader) nextByte(skipSpace bool) byte {
+	c := r.peekByte(skipSpace)
+	r.peek = 0
+	return c
+}
+
+var goEmbed = []byte("go:embed")
+
+// findEmbed advances the input reader to the next //go:embed comment.
+// It reports whether it found a comment.
+// (Otherwise it found an error or EOF.)
+func (r *importReader) findEmbed(first bool) bool {
+	// The import block scan stopped after a non-space character,
+	// so the reader is not at the start of a line on the first call.
+	// After that, each //go:embed extraction leaves the reader
+	// at the end of a line.
+	startLine := !first
+	var c byte
+	for r.err == nil && !r.eof {
+		c = r.readByteNoBuf()
+	Reswitch:
+		switch c {
+		default:
+			startLine = false
+
+		case '\n':
+			startLine = true
+
+		case ' ', '\t':
+			// leave startLine alone
+
+		case '"':
+			startLine = false
+			for r.err == nil {
+				if r.eof {
+					r.syntaxError()
+				}
+				c = r.readByteNoBuf()
+				if c == '\\' {
+					r.readByteNoBuf()
+					if r.err != nil {
+						r.syntaxError()
+						return false
+					}
+					continue
+				}
+				if c == '"' {
+					c = r.readByteNoBuf()
+					goto Reswitch
+				}
+			}
+			goto Reswitch
+
+		case '`':
+			startLine = false
+			for r.err == nil {
+				if r.eof {
+					r.syntaxError()
+				}
+				c = r.readByteNoBuf()
+				if c == '`' {
+					c = r.readByteNoBuf()
+					goto Reswitch
+				}
+			}
+
+		case '\'':
+			startLine = false
+			for r.err == nil {
+				if r.eof {
+					r.syntaxError()
+				}
+				c = r.readByteNoBuf()
+				if c == '\\' {
+					r.readByteNoBuf()
+					if r.err != nil {
+						r.syntaxError()
+						return false
+					}
+					continue
+				}
+				if c == '\'' {
+					c = r.readByteNoBuf()
+					goto Reswitch
+				}
+			}
+
+		case '/':
+			c = r.readByteNoBuf()
+			switch c {
+			default:
+				startLine = false
+				goto Reswitch
+
+			case '*':
+				var c1 byte
+				for (c != '*' || c1 != '/') && r.err == nil {
+					if r.eof {
+						r.syntaxError()
+					}
+					c, c1 = c1, r.readByteNoBuf()
+				}
+				startLine = false
+
+			case '/':
+				if startLine {
+					// Try to read this as a //go:embed comment.
+					for i := range goEmbed {
+						c = r.readByteNoBuf()
+						if c != goEmbed[i] {
+							goto SkipSlashSlash
+						}
+					}
+					c = r.readByteNoBuf()
+					if c == ' ' || c == '\t' {
+						// Found one!
+						return true
+					}
+				}
+			SkipSlashSlash:
+				for c != '\n' && r.err == nil && !r.eof {
+					c = r.readByteNoBuf()
+				}
+				startLine = true
+			}
+		}
+	}
+	return false
+}
+
+// readKeyword reads the given keyword from the input.
+// If the keyword is not present, readKeyword records a syntax error.
+func (r *importReader) readKeyword(kw string) {
+	r.peekByte(true)
+	for i := 0; i < len(kw); i++ {
+		if r.nextByte(false) != kw[i] {
+			r.syntaxError()
+			return
+		}
+	}
+	if isIdent(r.peekByte(false)) {
+		r.syntaxError()
+	}
+}
+
+// readIdent reads an identifier from the input.
+// If an identifier is not present, readIdent records a syntax error.
+func (r *importReader) readIdent() {
+	c := r.peekByte(true)
+	if !isIdent(c) {
+		r.syntaxError()
+		return
+	}
+	for isIdent(r.peekByte(false)) {
+		r.peek = 0
+	}
+}
+
+// readString reads a quoted string literal from the input.
+// If an identifier is not present, readString records a syntax error.
+func (r *importReader) readString() {
+	switch r.nextByte(true) {
+	case '`':
+		for r.err == nil {
+			if r.nextByte(false) == '`' {
+				break
+			}
+			if r.eof {
+				r.syntaxError()
+			}
+		}
+	case '"':
+		for r.err == nil {
+			c := r.nextByte(false)
+			if c == '"' {
+				break
+			}
+			if r.eof || c == '\n' {
+				r.syntaxError()
+			}
+			if c == '\\' {
+				r.nextByte(false)
+			}
+		}
+	default:
+		r.syntaxError()
+	}
+}
+
+// readImport reads an import clause - optional identifier followed by quoted string -
+// from the input.
+func (r *importReader) readImport() {
+	c := r.peekByte(true)
+	if c == '.' {
+		r.peek = 0
+	} else if isIdent(c) {
+		r.readIdent()
+	}
+	r.readString()
+}
+
+// readComments is like io.ReadAll, except that it only reads the leading
+// block of comments in the file.
+func readComments(f io.Reader) ([]byte, error) {
+	r := newImportReader("", f)
+	r.peekByte(true)
+	if r.err == nil && !r.eof {
+		// Didn't reach EOF, so must have found a non-space byte. Remove it.
+		r.buf = r.buf[:len(r.buf)-1]
+	}
+	return r.buf, r.err
+}
+
+// readGoInfo expects a Go file as input and reads the file up to and including the import section.
+// It records what it learned in *info.
+// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
+// info.imports and info.embeds.
+//
+// It only returns an error if there are problems reading the file,
+// not for syntax errors in the file itself.
+func readGoInfo(f io.Reader, info *fileInfo) error {
+	r := newImportReader(info.name, f)
+
+	r.readKeyword("package")
+	r.readIdent()
+	for r.peekByte(true) == 'i' {
+		r.readKeyword("import")
+		if r.peekByte(true) == '(' {
+			r.nextByte(false)
+			for r.peekByte(true) != ')' && r.err == nil {
+				r.readImport()
+			}
+			r.nextByte(false)
+		} else {
+			r.readImport()
+		}
+	}
+
+	info.header = r.buf
+
+	// If we stopped successfully before EOF, we read a byte that told us we were done.
+	// Return all but that last byte, which would cause a syntax error if we let it through.
+	if r.err == nil && !r.eof {
+		info.header = r.buf[:len(r.buf)-1]
+	}
+
+	// If we stopped for a syntax error, consume the whole file so that
+	// we are sure we don't change the errors that go/parser returns.
+	if r.err == errSyntax {
+		r.err = nil
+		for r.err == nil && !r.eof {
+			r.readByte()
+		}
+		info.header = r.buf
+	}
+	if r.err != nil {
+		return r.err
+	}
+
+	if info.fset == nil {
+		return nil
+	}
+
+	// Parse file header & record imports.
+	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
+	if info.parseErr != nil {
+		return nil
+	}
+
+	hasEmbed := false
+	for _, decl := range info.parsed.Decls {
+		d, ok := decl.(*ast.GenDecl)
+		if !ok {
+			continue
+		}
+		for _, dspec := range d.Specs {
+			spec, ok := dspec.(*ast.ImportSpec)
+			if !ok {
+				continue
+			}
+			quoted := spec.Path.Value
+			path, err := strconv.Unquote(quoted)
+			if err != nil {
+				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
+			}
+			if !isValidImport(path) {
+				// The parser used to return a parse error for invalid import paths, but
+				// no longer does, so check for and create the error here instead.
+				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
+				info.imports = nil
+				return nil
+			}
+			if path == "embed" {
+				hasEmbed = true
+			}
+
+			doc := spec.Doc
+			if doc == nil && len(d.Specs) == 1 {
+				doc = d.Doc
+			}
+			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
+		}
+	}
+
+	// Extract directives.
+	for _, group := range info.parsed.Comments {
+		if group.Pos() >= info.parsed.Package {
+			break
+		}
+		for _, c := range group.List {
+			if strings.HasPrefix(c.Text, "//go:") {
+				info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)})
+			}
+		}
+	}
+
+	// If the file imports "embed",
+	// we have to look for //go:embed comments
+	// in the remainder of the file.
+	// The compiler will enforce the mapping of comments to
+	// declared variables. We just need to know the patterns.
+	// If there were //go:embed comments earlier in the file
+	// (near the package statement or imports), the compiler
+	// will reject them. They can be (and have already been) ignored.
+	if hasEmbed {
+		var line []byte
+		for first := true; r.findEmbed(first); first = false {
+			line = line[:0]
+			pos := r.pos
+			for {
+				c := r.readByteNoBuf()
+				if c == '\n' || r.err != nil || r.eof {
+					break
+				}
+				line = append(line, c)
+			}
+			// Add args if line is well-formed.
+			// Ignore badly-formed lines - the compiler will report them when it finds them,
+			// and we can pretend they are not there to help go list succeed with what it knows.
+			embs, err := parseGoEmbed(string(line), pos)
+			if err == nil {
+				info.embeds = append(info.embeds, embs...)
+			}
+		}
+	}
+
+	return nil
+}
+
+// isValidImport checks if the import is a valid import using the more strict
+// checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
+// It was ported from the function of the same name that was removed from the
+// parser in CL 424855, when the parser stopped doing these checks.
+func isValidImport(s string) bool {
+	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
+	for _, r := range s {
+		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
+			return false
+		}
+	}
+	return s != ""
+}
+
+// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
+// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
+// This is based on a similar function in cmd/compile/internal/gc/noder.go;
+// this version calculates position information as well.
+func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
+	trimBytes := func(n int) {
+		pos.Offset += n
+		pos.Column += utf8.RuneCountInString(args[:n])
+		args = args[n:]
+	}
+	trimSpace := func() {
+		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
+		trimBytes(len(args) - len(trim))
+	}
+
+	var list []fileEmbed
+	for trimSpace(); args != ""; trimSpace() {
+		var path string
+		pathPos := pos
+	Switch:
+		switch args[0] {
+		default:
+			i := len(args)
+			for j, c := range args {
+				if unicode.IsSpace(c) {
+					i = j
+					break
+				}
+			}
+			path = args[:i]
+			trimBytes(i)
+
+		case '`':
+			var ok bool
+			path, _, ok = strings.Cut(args[1:], "`")
+			if !ok {
+				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+			}
+			trimBytes(1 + len(path) + 1)
+
+		case '"':
+			i := 1
+			for ; i < len(args); i++ {
+				if args[i] == '\\' {
+					i++
+					continue
+				}
+				if args[i] == '"' {
+					q, err := strconv.Unquote(args[:i+1])
+					if err != nil {
+						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
+					}
+					path = q
+					trimBytes(i + 1)
+					break Switch
+				}
+			}
+			if i >= len(args) {
+				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+			}
+		}
+
+		if args != "" {
+			r, _ := utf8.DecodeRuneInString(args)
+			if !unicode.IsSpace(r) {
+				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+			}
+		}
+		list = append(list, fileEmbed{path, pathPos})
+	}
+	return list, nil
+}