diff options
Diffstat (limited to 'src/cmd/compile/internal/syntax')
47 files changed, 10479 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/syntax/branches.go b/src/cmd/compile/internal/syntax/branches.go new file mode 100644 index 0000000..3d7ffed --- /dev/null +++ b/src/cmd/compile/internal/syntax/branches.go @@ -0,0 +1,339 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import "fmt" + +// checkBranches checks correct use of labels and branch +// statements (break, continue, fallthrough, goto) in a function body. +// It catches: +// - misplaced breaks, continues, and fallthroughs +// - bad labeled breaks and continues +// - invalid, unused, duplicate, and missing labels +// - gotos jumping over variable declarations and into blocks +func checkBranches(body *BlockStmt, errh ErrorHandler) { + if body == nil { + return + } + + // scope of all labels in this body + ls := &labelScope{errh: errh} + fwdGotos := ls.blockBranches(nil, targets{}, nil, body.Pos(), body.List) + + // If there are any forward gotos left, no matching label was + // found for them. Either those labels were never defined, or + // they are inside blocks and not reachable from the gotos. + for _, fwd := range fwdGotos { + name := fwd.Label.Value + if l := ls.labels[name]; l != nil { + l.used = true // avoid "defined and not used" error + ls.err(fwd.Label.Pos(), "goto %s jumps into block starting at %s", name, l.parent.start) + } else { + ls.err(fwd.Label.Pos(), "label %s not defined", name) + } + } + + // spec: "It is illegal to define a label that is never used." + for _, l := range ls.labels { + if !l.used { + l := l.lstmt.Label + ls.err(l.Pos(), "label %s defined and not used", l.Value) + } + } +} + +type labelScope struct { + errh ErrorHandler + labels map[string]*label // all label declarations inside the function; allocated lazily +} + +type label struct { + parent *block // block containing this label declaration + lstmt *LabeledStmt // statement declaring the label + used bool // whether the label is used or not +} + +type block struct { + parent *block // immediately enclosing block, or nil + start Pos // start of block + lstmt *LabeledStmt // labeled statement associated with this block, or nil +} + +func (ls *labelScope) err(pos Pos, format string, args ...interface{}) { + ls.errh(Error{pos, fmt.Sprintf(format, args...)}) +} + +// declare declares the label introduced by s in block b and returns +// the new label. If the label was already declared, declare reports +// and error and the existing label is returned instead. +func (ls *labelScope) declare(b *block, s *LabeledStmt) *label { + name := s.Label.Value + labels := ls.labels + if labels == nil { + labels = make(map[string]*label) + ls.labels = labels + } else if alt := labels[name]; alt != nil { + ls.err(s.Label.Pos(), "label %s already defined at %s", name, alt.lstmt.Label.Pos().String()) + return alt + } + l := &label{b, s, false} + labels[name] = l + return l +} + +// gotoTarget returns the labeled statement matching the given name and +// declared in block b or any of its enclosing blocks. The result is nil +// if the label is not defined, or doesn't match a valid labeled statement. +func (ls *labelScope) gotoTarget(b *block, name string) *LabeledStmt { + if l := ls.labels[name]; l != nil { + l.used = true // even if it's not a valid target + for ; b != nil; b = b.parent { + if l.parent == b { + return l.lstmt + } + } + } + return nil +} + +var invalid = new(LabeledStmt) // singleton to signal invalid enclosing target + +// enclosingTarget returns the innermost enclosing labeled statement matching +// the given name. The result is nil if the label is not defined, and invalid +// if the label is defined but doesn't label a valid labeled statement. +func (ls *labelScope) enclosingTarget(b *block, name string) *LabeledStmt { + if l := ls.labels[name]; l != nil { + l.used = true // even if it's not a valid target (see e.g., test/fixedbugs/bug136.go) + for ; b != nil; b = b.parent { + if l.lstmt == b.lstmt { + return l.lstmt + } + } + return invalid + } + return nil +} + +// targets describes the target statements within which break +// or continue statements are valid. +type targets struct { + breaks Stmt // *ForStmt, *SwitchStmt, *SelectStmt, or nil + continues *ForStmt // or nil + caseIndex int // case index of immediately enclosing switch statement, or < 0 +} + +// blockBranches processes a block's body starting at start and returns the +// list of unresolved (forward) gotos. parent is the immediately enclosing +// block (or nil), ctxt provides information about the enclosing statements, +// and lstmt is the labeled statement associated with this block, or nil. +func (ls *labelScope) blockBranches(parent *block, ctxt targets, lstmt *LabeledStmt, start Pos, body []Stmt) []*BranchStmt { + b := &block{parent: parent, start: start, lstmt: lstmt} + + var varPos Pos + var varName Expr + var fwdGotos, badGotos []*BranchStmt + + recordVarDecl := func(pos Pos, name Expr) { + varPos = pos + varName = name + // Any existing forward goto jumping over the variable + // declaration is invalid. The goto may still jump out + // of the block and be ok, but we don't know that yet. + // Remember all forward gotos as potential bad gotos. + badGotos = append(badGotos[:0], fwdGotos...) + } + + jumpsOverVarDecl := func(fwd *BranchStmt) bool { + if varPos.IsKnown() { + for _, bad := range badGotos { + if fwd == bad { + return true + } + } + } + return false + } + + innerBlock := func(ctxt targets, start Pos, body []Stmt) { + // Unresolved forward gotos from the inner block + // become forward gotos for the current block. + fwdGotos = append(fwdGotos, ls.blockBranches(b, ctxt, lstmt, start, body)...) + } + + // A fallthrough statement counts as last statement in a statement + // list even if there are trailing empty statements; remove them. + stmtList := trimTrailingEmptyStmts(body) + for stmtIndex, stmt := range stmtList { + lstmt = nil + L: + switch s := stmt.(type) { + case *DeclStmt: + for _, d := range s.DeclList { + if v, ok := d.(*VarDecl); ok { + recordVarDecl(v.Pos(), v.NameList[0]) + break // the first VarDecl will do + } + } + + case *LabeledStmt: + // declare non-blank label + if name := s.Label.Value; name != "_" { + l := ls.declare(b, s) + // resolve matching forward gotos + i := 0 + for _, fwd := range fwdGotos { + if fwd.Label.Value == name { + fwd.Target = s + l.used = true + if jumpsOverVarDecl(fwd) { + ls.err( + fwd.Label.Pos(), + "goto %s jumps over declaration of %s at %s", + name, String(varName), varPos, + ) + } + } else { + // no match - keep forward goto + fwdGotos[i] = fwd + i++ + } + } + fwdGotos = fwdGotos[:i] + lstmt = s + } + // process labeled statement + stmt = s.Stmt + goto L + + case *BranchStmt: + // unlabeled branch statement + if s.Label == nil { + switch s.Tok { + case _Break: + if t := ctxt.breaks; t != nil { + s.Target = t + } else { + ls.err(s.Pos(), "break is not in a loop, switch, or select") + } + case _Continue: + if t := ctxt.continues; t != nil { + s.Target = t + } else { + ls.err(s.Pos(), "continue is not in a loop") + } + case _Fallthrough: + msg := "fallthrough statement out of place" + if t, _ := ctxt.breaks.(*SwitchStmt); t != nil { + if _, ok := t.Tag.(*TypeSwitchGuard); ok { + msg = "cannot fallthrough in type switch" + } else if ctxt.caseIndex < 0 || stmtIndex+1 < len(stmtList) { + // fallthrough nested in a block or not the last statement + // use msg as is + } else if ctxt.caseIndex+1 == len(t.Body) { + msg = "cannot fallthrough final case in switch" + } else { + break // fallthrough ok + } + } + ls.err(s.Pos(), msg) + case _Goto: + fallthrough // should always have a label + default: + panic("invalid BranchStmt") + } + break + } + + // labeled branch statement + name := s.Label.Value + switch s.Tok { + case _Break: + // spec: "If there is a label, it must be that of an enclosing + // "for", "switch", or "select" statement, and that is the one + // whose execution terminates." + if t := ls.enclosingTarget(b, name); t != nil { + switch t := t.Stmt.(type) { + case *SwitchStmt, *SelectStmt, *ForStmt: + s.Target = t + default: + ls.err(s.Label.Pos(), "invalid break label %s", name) + } + } else { + ls.err(s.Label.Pos(), "break label not defined: %s", name) + } + + case _Continue: + // spec: "If there is a label, it must be that of an enclosing + // "for" statement, and that is the one whose execution advances." + if t := ls.enclosingTarget(b, name); t != nil { + if t, ok := t.Stmt.(*ForStmt); ok { + s.Target = t + } else { + ls.err(s.Label.Pos(), "invalid continue label %s", name) + } + } else { + ls.err(s.Label.Pos(), "continue label not defined: %s", name) + } + + case _Goto: + if t := ls.gotoTarget(b, name); t != nil { + s.Target = t + } else { + // label may be declared later - add goto to forward gotos + fwdGotos = append(fwdGotos, s) + } + + case _Fallthrough: + fallthrough // should never have a label + default: + panic("invalid BranchStmt") + } + + case *AssignStmt: + if s.Op == Def { + recordVarDecl(s.Pos(), s.Lhs) + } + + case *BlockStmt: + inner := targets{ctxt.breaks, ctxt.continues, -1} + innerBlock(inner, s.Pos(), s.List) + + case *IfStmt: + inner := targets{ctxt.breaks, ctxt.continues, -1} + innerBlock(inner, s.Then.Pos(), s.Then.List) + if s.Else != nil { + innerBlock(inner, s.Else.Pos(), []Stmt{s.Else}) + } + + case *ForStmt: + inner := targets{s, s, -1} + innerBlock(inner, s.Body.Pos(), s.Body.List) + + case *SwitchStmt: + inner := targets{s, ctxt.continues, -1} + for i, cc := range s.Body { + inner.caseIndex = i + innerBlock(inner, cc.Pos(), cc.Body) + } + + case *SelectStmt: + inner := targets{s, ctxt.continues, -1} + for _, cc := range s.Body { + innerBlock(inner, cc.Pos(), cc.Body) + } + } + } + + return fwdGotos +} + +func trimTrailingEmptyStmts(list []Stmt) []Stmt { + for i := len(list); i > 0; i-- { + if _, ok := list[i-1].(*EmptyStmt); !ok { + return list[:i] + } + } + return nil +} diff --git a/src/cmd/compile/internal/syntax/dumper.go b/src/cmd/compile/internal/syntax/dumper.go new file mode 100644 index 0000000..d524788 --- /dev/null +++ b/src/cmd/compile/internal/syntax/dumper.go @@ -0,0 +1,212 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements printing of syntax tree structures. + +package syntax + +import ( + "fmt" + "io" + "reflect" + "unicode" + "unicode/utf8" +) + +// Fdump dumps the structure of the syntax tree rooted at n to w. +// It is intended for debugging purposes; no specific output format +// is guaranteed. +func Fdump(w io.Writer, n Node) (err error) { + p := dumper{ + output: w, + ptrmap: make(map[Node]int), + last: '\n', // force printing of line number on first line + } + + defer func() { + if e := recover(); e != nil { + err = e.(writeError).err // re-panics if it's not a writeError + } + }() + + if n == nil { + p.printf("nil\n") + return + } + p.dump(reflect.ValueOf(n), n) + p.printf("\n") + + return +} + +type dumper struct { + output io.Writer + ptrmap map[Node]int // node -> dump line number + indent int // current indentation level + last byte // last byte processed by Write + line int // current line number +} + +var indentBytes = []byte(". ") + +func (p *dumper) Write(data []byte) (n int, err error) { + var m int + for i, b := range data { + // invariant: data[0:n] has been written + if b == '\n' { + m, err = p.output.Write(data[n : i+1]) + n += m + if err != nil { + return + } + } else if p.last == '\n' { + p.line++ + _, err = fmt.Fprintf(p.output, "%6d ", p.line) + if err != nil { + return + } + for j := p.indent; j > 0; j-- { + _, err = p.output.Write(indentBytes) + if err != nil { + return + } + } + } + p.last = b + } + if len(data) > n { + m, err = p.output.Write(data[n:]) + n += m + } + return +} + +// writeError wraps locally caught write errors so we can distinguish +// them from genuine panics which we don't want to return as errors. +type writeError struct { + err error +} + +// printf is a convenience wrapper that takes care of print errors. +func (p *dumper) printf(format string, args ...interface{}) { + if _, err := fmt.Fprintf(p, format, args...); err != nil { + panic(writeError{err}) + } +} + +// dump prints the contents of x. +// If x is the reflect.Value of a struct s, where &s +// implements Node, then &s should be passed for n - +// this permits printing of the unexported span and +// comments fields of the embedded isNode field by +// calling the Span() and Comment() instead of using +// reflection. +func (p *dumper) dump(x reflect.Value, n Node) { + switch x.Kind() { + case reflect.Interface: + if x.IsNil() { + p.printf("nil") + return + } + p.dump(x.Elem(), nil) + + case reflect.Ptr: + if x.IsNil() { + p.printf("nil") + return + } + + // special cases for identifiers w/o attached comments (common case) + if x, ok := x.Interface().(*Name); ok { + p.printf("%s @ %v", x.Value, x.Pos()) + return + } + + p.printf("*") + // Fields may share type expressions, and declarations + // may share the same group - use ptrmap to keep track + // of nodes that have been printed already. + if ptr, ok := x.Interface().(Node); ok { + if line, exists := p.ptrmap[ptr]; exists { + p.printf("(Node @ %d)", line) + return + } + p.ptrmap[ptr] = p.line + n = ptr + } + p.dump(x.Elem(), n) + + case reflect.Slice: + if x.IsNil() { + p.printf("nil") + return + } + p.printf("%s (%d entries) {", x.Type(), x.Len()) + if x.Len() > 0 { + p.indent++ + p.printf("\n") + for i, n := 0, x.Len(); i < n; i++ { + p.printf("%d: ", i) + p.dump(x.Index(i), nil) + p.printf("\n") + } + p.indent-- + } + p.printf("}") + + case reflect.Struct: + typ := x.Type() + + // if span, ok := x.Interface().(lexical.Span); ok { + // p.printf("%s", &span) + // return + // } + + p.printf("%s {", typ) + p.indent++ + + first := true + if n != nil { + p.printf("\n") + first = false + // p.printf("Span: %s\n", n.Span()) + // if c := *n.Comments(); c != nil { + // p.printf("Comments: ") + // p.dump(reflect.ValueOf(c), nil) // a Comment is not a Node + // p.printf("\n") + // } + } + + for i, n := 0, typ.NumField(); i < n; i++ { + // Exclude non-exported fields because their + // values cannot be accessed via reflection. + if name := typ.Field(i).Name; isExported(name) { + if first { + p.printf("\n") + first = false + } + p.printf("%s: ", name) + p.dump(x.Field(i), nil) + p.printf("\n") + } + } + + p.indent-- + p.printf("}") + + default: + switch x := x.Interface().(type) { + case string: + // print strings in quotes + p.printf("%q", x) + default: + p.printf("%v", x) + } + } +} + +func isExported(name string) bool { + ch, _ := utf8.DecodeRuneInString(name) + return unicode.IsUpper(ch) +} diff --git a/src/cmd/compile/internal/syntax/dumper_test.go b/src/cmd/compile/internal/syntax/dumper_test.go new file mode 100644 index 0000000..1ba85cc --- /dev/null +++ b/src/cmd/compile/internal/syntax/dumper_test.go @@ -0,0 +1,21 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "testing" +) + +func TestDump(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + ast, _ := ParseFile(*src_, func(err error) { t.Error(err) }, nil, CheckBranches) + + if ast != nil { + Fdump(testOut(), ast) + } +} diff --git a/src/cmd/compile/internal/syntax/error_test.go b/src/cmd/compile/internal/syntax/error_test.go new file mode 100644 index 0000000..55ea634 --- /dev/null +++ b/src/cmd/compile/internal/syntax/error_test.go @@ -0,0 +1,190 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a regression test harness for syntax errors. +// The files in the testdata directory are parsed and the reported +// errors are compared against the errors declared in those files. +// +// Errors are declared in place in the form of "error comments", +// just before (or on the same line as) the offending token. +// +// Error comments must be of the form // ERROR rx or /* ERROR rx */ +// where rx is a regular expression that matches the reported error +// message. The rx text comprises the comment text after "ERROR ", +// with any white space around it stripped. +// +// If the line comment form is used, the reported error's line must +// match the line of the error comment. +// +// If the regular comment form is used, the reported error's position +// must match the position of the token immediately following the +// error comment. Thus, /* ERROR ... */ comments should appear +// immediately before the position where the error is reported. +// +// Currently, the test harness only supports one error comment per +// token. If multiple error comments appear before a token, only +// the last one is considered. + +package syntax + +import ( + "flag" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "testing" +) + +const testdata = "testdata" // directory containing test files + +var print = flag.Bool("print", false, "only print errors") + +// A position represents a source position in the current file. +type position struct { + line, col uint +} + +func (pos position) String() string { + return fmt.Sprintf("%d:%d", pos.line, pos.col) +} + +func sortedPositions(m map[position]string) []position { + list := make([]position, len(m)) + i := 0 + for pos := range m { + list[i] = pos + i++ + } + sort.Slice(list, func(i, j int) bool { + a, b := list[i], list[j] + return a.line < b.line || a.line == b.line && a.col < b.col + }) + return list +} + +// declaredErrors returns a map of source positions to error +// patterns, extracted from error comments in the given file. +// Error comments in the form of line comments use col = 0 +// in their position. +func declaredErrors(t *testing.T, filename string) map[position]string { + f, err := os.Open(filename) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + declared := make(map[position]string) + + var s scanner + var pattern string + s.init(f, func(line, col uint, msg string) { + // errors never start with '/' so they are automatically excluded here + switch { + case strings.HasPrefix(msg, "// ERROR "): + // we can't have another comment on the same line - just add it + declared[position{s.line, 0}] = strings.TrimSpace(msg[9:]) + case strings.HasPrefix(msg, "/* ERROR "): + // we may have more comments before the next token - collect them + pattern = strings.TrimSpace(msg[9 : len(msg)-2]) + } + }, comments) + + // consume file + for { + s.next() + if pattern != "" { + declared[position{s.line, s.col}] = pattern + pattern = "" + } + if s.tok == _EOF { + break + } + } + + return declared +} + +func testSyntaxErrors(t *testing.T, filename string) { + declared := declaredErrors(t, filename) + if *print { + fmt.Println("Declared errors:") + for _, pos := range sortedPositions(declared) { + fmt.Printf("%s:%s: %s\n", filename, pos, declared[pos]) + } + + fmt.Println() + fmt.Println("Reported errors:") + } + + f, err := os.Open(filename) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + ParseFile(filename, func(err error) { + e, ok := err.(Error) + if !ok { + return + } + + if *print { + fmt.Println(err) + return + } + + orig := position{e.Pos.Line(), e.Pos.Col()} + pos := orig + pattern, found := declared[pos] + if !found { + // try line comment (only line must match) + pos = position{e.Pos.Line(), 0} + pattern, found = declared[pos] + } + if found { + rx, err := regexp.Compile(pattern) + if err != nil { + t.Errorf("%s:%s: %v", filename, pos, err) + return + } + if match := rx.MatchString(e.Msg); !match { + t.Errorf("%s:%s: %q does not match %q", filename, pos, e.Msg, pattern) + return + } + // we have a match - eliminate this error + delete(declared, pos) + } else { + t.Errorf("%s:%s: unexpected error: %s", filename, orig, e.Msg) + } + }, nil, CheckBranches) + + if *print { + fmt.Println() + return // we're done + } + + // report expected but not reported errors + for pos, pattern := range declared { + t.Errorf("%s:%s: missing error: %s", filename, pos, pattern) + } +} + +func TestSyntaxErrors(t *testing.T) { + testenv.MustHaveGoBuild(t) // we need access to source (testdata) + + list, err := os.ReadDir(testdata) + if err != nil { + t.Fatal(err) + } + for _, fi := range list { + name := fi.Name() + if !fi.IsDir() && !strings.HasPrefix(name, ".") { + testSyntaxErrors(t, filepath.Join(testdata, name)) + } + } +} diff --git a/src/cmd/compile/internal/syntax/nodes.go b/src/cmd/compile/internal/syntax/nodes.go new file mode 100644 index 0000000..e943a9a --- /dev/null +++ b/src/cmd/compile/internal/syntax/nodes.go @@ -0,0 +1,483 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// ---------------------------------------------------------------------------- +// Nodes + +type Node interface { + // Pos() returns the position associated with the node as follows: + // 1) The position of a node representing a terminal syntax production + // (Name, BasicLit, etc.) is the position of the respective production + // in the source. + // 2) The position of a node representing a non-terminal production + // (IndexExpr, IfStmt, etc.) is the position of a token uniquely + // associated with that production; usually the left-most one + // ('[' for IndexExpr, 'if' for IfStmt, etc.) + Pos() Pos + aNode() +} + +type node struct { + // commented out for now since not yet used + // doc *Comment // nil means no comment(s) attached + pos Pos +} + +func (n *node) Pos() Pos { return n.pos } +func (*node) aNode() {} + +// ---------------------------------------------------------------------------- +// Files + +// package PkgName; DeclList[0], DeclList[1], ... +type File struct { + Pragma Pragma + PkgName *Name + DeclList []Decl + EOF Pos + node +} + +// ---------------------------------------------------------------------------- +// Declarations + +type ( + Decl interface { + Node + aDecl() + } + + // Path + // LocalPkgName Path + ImportDecl struct { + Group *Group // nil means not part of a group + Pragma Pragma + LocalPkgName *Name // including "."; nil means no rename present + Path *BasicLit // Path.Bad || Path.Kind == StringLit; nil means no path + decl + } + + // NameList + // NameList = Values + // NameList Type = Values + ConstDecl struct { + Group *Group // nil means not part of a group + Pragma Pragma + NameList []*Name + Type Expr // nil means no type + Values Expr // nil means no values + decl + } + + // Name Type + TypeDecl struct { + Group *Group // nil means not part of a group + Pragma Pragma + Name *Name + TParamList []*Field // nil means no type parameters + Alias bool + Type Expr + decl + } + + // NameList Type + // NameList Type = Values + // NameList = Values + VarDecl struct { + Group *Group // nil means not part of a group + Pragma Pragma + NameList []*Name + Type Expr // nil means no type + Values Expr // nil means no values + decl + } + + // func Name Type { Body } + // func Name Type + // func Receiver Name Type { Body } + // func Receiver Name Type + FuncDecl struct { + Pragma Pragma + Recv *Field // nil means regular function + Name *Name + TParamList []*Field // nil means no type parameters + Type *FuncType + Body *BlockStmt // nil means no body (forward declaration) + decl + } +) + +type decl struct{ node } + +func (*decl) aDecl() {} + +// All declarations belonging to the same group point to the same Group node. +type Group struct { + _ int // not empty so we are guaranteed different Group instances +} + +// ---------------------------------------------------------------------------- +// Expressions + +func NewName(pos Pos, value string) *Name { + n := new(Name) + n.pos = pos + n.Value = value + return n +} + +type ( + Expr interface { + Node + typeInfo + aExpr() + } + + // Placeholder for an expression that failed to parse + // correctly and where we can't provide a better node. + BadExpr struct { + expr + } + + // Value + Name struct { + Value string + expr + } + + // Value + BasicLit struct { + Value string + Kind LitKind + Bad bool // true means the literal Value has syntax errors + expr + } + + // Type { ElemList[0], ElemList[1], ... } + CompositeLit struct { + Type Expr // nil means no literal type + ElemList []Expr + NKeys int // number of elements with keys + Rbrace Pos + expr + } + + // Key: Value + KeyValueExpr struct { + Key, Value Expr + expr + } + + // func Type { Body } + FuncLit struct { + Type *FuncType + Body *BlockStmt + expr + } + + // (X) + ParenExpr struct { + X Expr + expr + } + + // X.Sel + SelectorExpr struct { + X Expr + Sel *Name + expr + } + + // X[Index] + // X[T1, T2, ...] (with Ti = Index.(*ListExpr).ElemList[i]) + IndexExpr struct { + X Expr + Index Expr + expr + } + + // X[Index[0] : Index[1] : Index[2]] + SliceExpr struct { + X Expr + Index [3]Expr + // Full indicates whether this is a simple or full slice expression. + // In a valid AST, this is equivalent to Index[2] != nil. + // TODO(mdempsky): This is only needed to report the "3-index + // slice of string" error when Index[2] is missing. + Full bool + expr + } + + // X.(Type) + AssertExpr struct { + X Expr + Type Expr + expr + } + + // X.(type) + // Lhs := X.(type) + TypeSwitchGuard struct { + Lhs *Name // nil means no Lhs := + X Expr // X.(type) + expr + } + + Operation struct { + Op Operator + X, Y Expr // Y == nil means unary expression + expr + } + + // Fun(ArgList[0], ArgList[1], ...) + CallExpr struct { + Fun Expr + ArgList []Expr // nil means no arguments + HasDots bool // last argument is followed by ... + expr + } + + // ElemList[0], ElemList[1], ... + ListExpr struct { + ElemList []Expr + expr + } + + // [Len]Elem + ArrayType struct { + // TODO(gri) consider using Name{"..."} instead of nil (permits attaching of comments) + Len Expr // nil means Len is ... + Elem Expr + expr + } + + // []Elem + SliceType struct { + Elem Expr + expr + } + + // ...Elem + DotsType struct { + Elem Expr + expr + } + + // struct { FieldList[0] TagList[0]; FieldList[1] TagList[1]; ... } + StructType struct { + FieldList []*Field + TagList []*BasicLit // i >= len(TagList) || TagList[i] == nil means no tag for field i + expr + } + + // Name Type + // Type + Field struct { + Name *Name // nil means anonymous field/parameter (structs/parameters), or embedded element (interfaces) + Type Expr // field names declared in a list share the same Type (identical pointers) + node + } + + // interface { MethodList[0]; MethodList[1]; ... } + InterfaceType struct { + MethodList []*Field + expr + } + + FuncType struct { + ParamList []*Field + ResultList []*Field + expr + } + + // map[Key]Value + MapType struct { + Key, Value Expr + expr + } + + // chan Elem + // <-chan Elem + // chan<- Elem + ChanType struct { + Dir ChanDir // 0 means no direction + Elem Expr + expr + } +) + +type expr struct { + node + typeAndValue // After typechecking, contains the results of typechecking this expression. +} + +func (*expr) aExpr() {} + +type ChanDir uint + +const ( + _ ChanDir = iota + SendOnly + RecvOnly +) + +// ---------------------------------------------------------------------------- +// Statements + +type ( + Stmt interface { + Node + aStmt() + } + + SimpleStmt interface { + Stmt + aSimpleStmt() + } + + EmptyStmt struct { + simpleStmt + } + + LabeledStmt struct { + Label *Name + Stmt Stmt + stmt + } + + BlockStmt struct { + List []Stmt + Rbrace Pos + stmt + } + + ExprStmt struct { + X Expr + simpleStmt + } + + SendStmt struct { + Chan, Value Expr // Chan <- Value + simpleStmt + } + + DeclStmt struct { + DeclList []Decl + stmt + } + + AssignStmt struct { + Op Operator // 0 means no operation + Lhs, Rhs Expr // Rhs == nil means Lhs++ (Op == Add) or Lhs-- (Op == Sub) + simpleStmt + } + + BranchStmt struct { + Tok token // Break, Continue, Fallthrough, or Goto + Label *Name + // Target is the continuation of the control flow after executing + // the branch; it is computed by the parser if CheckBranches is set. + // Target is a *LabeledStmt for gotos, and a *SwitchStmt, *SelectStmt, + // or *ForStmt for breaks and continues, depending on the context of + // the branch. Target is not set for fallthroughs. + Target Stmt + stmt + } + + CallStmt struct { + Tok token // Go or Defer + Call Expr + stmt + } + + ReturnStmt struct { + Results Expr // nil means no explicit return values + stmt + } + + IfStmt struct { + Init SimpleStmt + Cond Expr + Then *BlockStmt + Else Stmt // either nil, *IfStmt, or *BlockStmt + stmt + } + + ForStmt struct { + Init SimpleStmt // incl. *RangeClause + Cond Expr + Post SimpleStmt + Body *BlockStmt + stmt + } + + SwitchStmt struct { + Init SimpleStmt + Tag Expr // incl. *TypeSwitchGuard + Body []*CaseClause + Rbrace Pos + stmt + } + + SelectStmt struct { + Body []*CommClause + Rbrace Pos + stmt + } +) + +type ( + RangeClause struct { + Lhs Expr // nil means no Lhs = or Lhs := + Def bool // means := + X Expr // range X + simpleStmt + } + + CaseClause struct { + Cases Expr // nil means default clause + Body []Stmt + Colon Pos + node + } + + CommClause struct { + Comm SimpleStmt // send or receive stmt; nil means default clause + Body []Stmt + Colon Pos + node + } +) + +type stmt struct{ node } + +func (stmt) aStmt() {} + +type simpleStmt struct { + stmt +} + +func (simpleStmt) aSimpleStmt() {} + +// ---------------------------------------------------------------------------- +// Comments + +// TODO(gri) Consider renaming to CommentPos, CommentPlacement, etc. +// Kind = Above doesn't make much sense. +type CommentKind uint + +const ( + Above CommentKind = iota + Below + Left + Right +) + +type Comment struct { + Kind CommentKind + Text string + Next *Comment +} diff --git a/src/cmd/compile/internal/syntax/nodes_test.go b/src/cmd/compile/internal/syntax/nodes_test.go new file mode 100644 index 0000000..a39f08c --- /dev/null +++ b/src/cmd/compile/internal/syntax/nodes_test.go @@ -0,0 +1,329 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "fmt" + "strings" + "testing" +) + +// A test is a source code snippet of a particular node type. +// In the snippet, a '@' indicates the position recorded by +// the parser when creating the respective node. +type test struct { + nodetyp string + snippet string +} + +var decls = []test{ + // The position of declarations is always the + // position of the first token of an individual + // declaration, independent of grouping. + {"ImportDecl", `import @"math"`}, + {"ImportDecl", `import @mymath "math"`}, + {"ImportDecl", `import @. "math"`}, + {"ImportDecl", `import (@"math")`}, + {"ImportDecl", `import (@mymath "math")`}, + {"ImportDecl", `import (@. "math")`}, + + {"ConstDecl", `const @x`}, + {"ConstDecl", `const @x = 0`}, + {"ConstDecl", `const @x, y, z = 0, 1, 2`}, + {"ConstDecl", `const (@x)`}, + {"ConstDecl", `const (@x = 0)`}, + {"ConstDecl", `const (@x, y, z = 0, 1, 2)`}, + + {"TypeDecl", `type @T int`}, + {"TypeDecl", `type @T = int`}, + {"TypeDecl", `type (@T int)`}, + {"TypeDecl", `type (@T = int)`}, + + {"VarDecl", `var @x int`}, + {"VarDecl", `var @x, y, z int`}, + {"VarDecl", `var @x int = 0`}, + {"VarDecl", `var @x, y, z int = 1, 2, 3`}, + {"VarDecl", `var @x = 0`}, + {"VarDecl", `var @x, y, z = 1, 2, 3`}, + {"VarDecl", `var (@x int)`}, + {"VarDecl", `var (@x, y, z int)`}, + {"VarDecl", `var (@x int = 0)`}, + {"VarDecl", `var (@x, y, z int = 1, 2, 3)`}, + {"VarDecl", `var (@x = 0)`}, + {"VarDecl", `var (@x, y, z = 1, 2, 3)`}, + + {"FuncDecl", `func @f() {}`}, + {"FuncDecl", `func @(T) f() {}`}, + {"FuncDecl", `func @(x T) f() {}`}, +} + +var exprs = []test{ + // The position of an expression is the position + // of the left-most token that identifies the + // kind of expression. + {"Name", `@x`}, + + {"BasicLit", `@0`}, + {"BasicLit", `@0x123`}, + {"BasicLit", `@3.1415`}, + {"BasicLit", `@.2718`}, + {"BasicLit", `@1i`}, + {"BasicLit", `@'a'`}, + {"BasicLit", `@"abc"`}, + {"BasicLit", "@`abc`"}, + + {"CompositeLit", `@{}`}, + {"CompositeLit", `T@{}`}, + {"CompositeLit", `struct{x, y int}@{}`}, + + {"KeyValueExpr", `"foo"@: true`}, + {"KeyValueExpr", `"a"@: b`}, + + {"FuncLit", `@func (){}`}, + {"ParenExpr", `@(x)`}, + {"SelectorExpr", `a@.b`}, + {"IndexExpr", `a@[i]`}, + + {"SliceExpr", `a@[:]`}, + {"SliceExpr", `a@[i:]`}, + {"SliceExpr", `a@[:j]`}, + {"SliceExpr", `a@[i:j]`}, + {"SliceExpr", `a@[i:j:k]`}, + + {"AssertExpr", `x@.(T)`}, + + {"Operation", `@*b`}, + {"Operation", `@+b`}, + {"Operation", `@-b`}, + {"Operation", `@!b`}, + {"Operation", `@^b`}, + {"Operation", `@&b`}, + {"Operation", `@<-b`}, + + {"Operation", `a @|| b`}, + {"Operation", `a @&& b`}, + {"Operation", `a @== b`}, + {"Operation", `a @+ b`}, + {"Operation", `a @* b`}, + + {"CallExpr", `f@()`}, + {"CallExpr", `f@(x, y, z)`}, + {"CallExpr", `obj.f@(1, 2, 3)`}, + {"CallExpr", `func(x int) int { return x + 1 }@(y)`}, + + // ListExpr: tested via multi-value const/var declarations +} + +var types = []test{ + {"Operation", `@*T`}, + {"Operation", `@*struct{}`}, + + {"ArrayType", `@[10]T`}, + {"ArrayType", `@[...]T`}, + + {"SliceType", `@[]T`}, + {"DotsType", `@...T`}, + {"StructType", `@struct{}`}, + {"InterfaceType", `@interface{}`}, + {"FuncType", `func@()`}, + {"MapType", `@map[T]T`}, + + {"ChanType", `@chan T`}, + {"ChanType", `@chan<- T`}, + {"ChanType", `@<-chan T`}, +} + +var fields = []test{ + {"Field", `@T`}, + {"Field", `@(T)`}, + {"Field", `@x T`}, + {"Field", `@x *(T)`}, + {"Field", `@x, y, z T`}, + {"Field", `@x, y, z (*T)`}, +} + +var stmts = []test{ + {"EmptyStmt", `@`}, + + {"LabeledStmt", `L@:`}, + {"LabeledStmt", `L@: ;`}, + {"LabeledStmt", `L@: f()`}, + + {"BlockStmt", `@{}`}, + + // The position of an ExprStmt is the position of the expression. + {"ExprStmt", `@<-ch`}, + {"ExprStmt", `f@()`}, + {"ExprStmt", `append@(s, 1, 2, 3)`}, + + {"SendStmt", `ch @<- x`}, + + {"DeclStmt", `@const x = 0`}, + {"DeclStmt", `@const (x = 0)`}, + {"DeclStmt", `@type T int`}, + {"DeclStmt", `@type T = int`}, + {"DeclStmt", `@type (T1 = int; T2 = float32)`}, + {"DeclStmt", `@var x = 0`}, + {"DeclStmt", `@var x, y, z int`}, + {"DeclStmt", `@var (a, b = 1, 2)`}, + + {"AssignStmt", `x @= y`}, + {"AssignStmt", `a, b, x @= 1, 2, 3`}, + {"AssignStmt", `x @+= y`}, + {"AssignStmt", `x @:= y`}, + {"AssignStmt", `x, ok @:= f()`}, + {"AssignStmt", `x@++`}, + {"AssignStmt", `a[i]@--`}, + + {"BranchStmt", `@break`}, + {"BranchStmt", `@break L`}, + {"BranchStmt", `@continue`}, + {"BranchStmt", `@continue L`}, + {"BranchStmt", `@fallthrough`}, + {"BranchStmt", `@goto L`}, + + {"CallStmt", `@defer f()`}, + {"CallStmt", `@go f()`}, + + {"ReturnStmt", `@return`}, + {"ReturnStmt", `@return x`}, + {"ReturnStmt", `@return a, b, a + b*f(1, 2, 3)`}, + + {"IfStmt", `@if cond {}`}, + {"IfStmt", `@if cond { f() } else {}`}, + {"IfStmt", `@if cond { f() } else { g(); h() }`}, + {"ForStmt", `@for {}`}, + {"ForStmt", `@for { f() }`}, + {"SwitchStmt", `@switch {}`}, + {"SwitchStmt", `@switch { default: }`}, + {"SwitchStmt", `@switch { default: x++ }`}, + {"SelectStmt", `@select {}`}, + {"SelectStmt", `@select { default: }`}, + {"SelectStmt", `@select { default: ch <- false }`}, +} + +var ranges = []test{ + {"RangeClause", `@range s`}, + {"RangeClause", `i = @range s`}, + {"RangeClause", `i := @range s`}, + {"RangeClause", `_, x = @range s`}, + {"RangeClause", `i, x = @range s`}, + {"RangeClause", `_, x := @range s.f`}, + {"RangeClause", `i, x := @range f(i)`}, +} + +var guards = []test{ + {"TypeSwitchGuard", `x@.(type)`}, + {"TypeSwitchGuard", `x := x@.(type)`}, +} + +var cases = []test{ + {"CaseClause", `@case x:`}, + {"CaseClause", `@case x, y, z:`}, + {"CaseClause", `@case x == 1, y == 2:`}, + {"CaseClause", `@default:`}, +} + +var comms = []test{ + {"CommClause", `@case <-ch:`}, + {"CommClause", `@case x <- ch:`}, + {"CommClause", `@case x = <-ch:`}, + {"CommClause", `@case x := <-ch:`}, + {"CommClause", `@case x, ok = <-ch: f(1, 2, 3)`}, + {"CommClause", `@case x, ok := <-ch: x++`}, + {"CommClause", `@default:`}, + {"CommClause", `@default: ch <- true`}, +} + +func TestPos(t *testing.T) { + // TODO(gri) Once we have a general tree walker, we can use that to find + // the first occurrence of the respective node and we don't need to hand- + // extract the node for each specific kind of construct. + + testPos(t, decls, "package p; ", "", + func(f *File) Node { return f.DeclList[0] }, + ) + + // embed expressions in a composite literal so we can test key:value and naked composite literals + testPos(t, exprs, "package p; var _ = T{ ", " }", + func(f *File) Node { return f.DeclList[0].(*VarDecl).Values.(*CompositeLit).ElemList[0] }, + ) + + // embed types in a function signature so we can test ... types + testPos(t, types, "package p; func f(", ")", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Type.ParamList[0].Type }, + ) + + testPos(t, fields, "package p; func f(", ")", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Type.ParamList[0] }, + ) + + testPos(t, stmts, "package p; func _() { ", "; }", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Body.List[0] }, + ) + + testPos(t, ranges, "package p; func _() { for ", " {} }", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Body.List[0].(*ForStmt).Init.(*RangeClause) }, + ) + + testPos(t, guards, "package p; func _() { switch ", " {} }", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Body.List[0].(*SwitchStmt).Tag.(*TypeSwitchGuard) }, + ) + + testPos(t, cases, "package p; func _() { switch { ", " } }", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Body.List[0].(*SwitchStmt).Body[0] }, + ) + + testPos(t, comms, "package p; func _() { select { ", " } }", + func(f *File) Node { return f.DeclList[0].(*FuncDecl).Body.List[0].(*SelectStmt).Body[0] }, + ) +} + +func testPos(t *testing.T, list []test, prefix, suffix string, extract func(*File) Node) { + for _, test := range list { + // complete source, compute @ position, and strip @ from source + src, index := stripAt(prefix + test.snippet + suffix) + if index < 0 { + t.Errorf("missing @: %s (%s)", src, test.nodetyp) + continue + } + + // build syntax tree + file, err := Parse(nil, strings.NewReader(src), nil, nil, 0) + if err != nil { + t.Errorf("parse error: %s: %v (%s)", src, err, test.nodetyp) + continue + } + + // extract desired node + node := extract(file) + if typ := typeOf(node); typ != test.nodetyp { + t.Errorf("type error: %s: type = %s, want %s", src, typ, test.nodetyp) + continue + } + + // verify node position with expected position as indicated by @ + if pos := int(node.Pos().Col()); pos != index+colbase { + t.Errorf("pos error: %s: pos = %d, want %d (%s)", src, pos, index+colbase, test.nodetyp) + continue + } + } +} + +func stripAt(s string) (string, int) { + if i := strings.Index(s, "@"); i >= 0 { + return s[:i] + s[i+1:], i + } + return s, -1 +} + +func typeOf(n Node) string { + const prefix = "*syntax." + k := fmt.Sprintf("%T", n) + if strings.HasPrefix(k, prefix) { + return k[len(prefix):] + } + return k +} diff --git a/src/cmd/compile/internal/syntax/operator_string.go b/src/cmd/compile/internal/syntax/operator_string.go new file mode 100644 index 0000000..f045d8c --- /dev/null +++ b/src/cmd/compile/internal/syntax/operator_string.go @@ -0,0 +1,46 @@ +// Code generated by "stringer -type Operator -linecomment tokens.go"; DO NOT EDIT. + +package syntax + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Def-1] + _ = x[Not-2] + _ = x[Recv-3] + _ = x[Tilde-4] + _ = x[OrOr-5] + _ = x[AndAnd-6] + _ = x[Eql-7] + _ = x[Neq-8] + _ = x[Lss-9] + _ = x[Leq-10] + _ = x[Gtr-11] + _ = x[Geq-12] + _ = x[Add-13] + _ = x[Sub-14] + _ = x[Or-15] + _ = x[Xor-16] + _ = x[Mul-17] + _ = x[Div-18] + _ = x[Rem-19] + _ = x[And-20] + _ = x[AndNot-21] + _ = x[Shl-22] + _ = x[Shr-23] +} + +const _Operator_name = ":!<-~||&&==!=<<=>>=+-|^*/%&&^<<>>" + +var _Operator_index = [...]uint8{0, 1, 2, 4, 5, 7, 9, 11, 13, 14, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 31, 33} + +func (i Operator) String() string { + i -= 1 + if i >= Operator(len(_Operator_index)-1) { + return "Operator(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _Operator_name[_Operator_index[i]:_Operator_index[i+1]] +} diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go new file mode 100644 index 0000000..ee9761e --- /dev/null +++ b/src/cmd/compile/internal/syntax/parser.go @@ -0,0 +1,2803 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "fmt" + "io" + "strconv" + "strings" +) + +const debug = false +const trace = false + +type parser struct { + file *PosBase + errh ErrorHandler + mode Mode + pragh PragmaHandler + scanner + + base *PosBase // current position base + first error // first error encountered + errcnt int // number of errors encountered + pragma Pragma // pragmas + + fnest int // function nesting level (for error handling) + xnest int // expression nesting level (for complit ambiguity resolution) + indent []byte // tracing support +} + +func (p *parser) init(file *PosBase, r io.Reader, errh ErrorHandler, pragh PragmaHandler, mode Mode) { + p.file = file + p.errh = errh + p.mode = mode + p.pragh = pragh + p.scanner.init( + r, + // Error and directive handler for scanner. + // Because the (line, col) positions passed to the + // handler is always at or after the current reading + // position, it is safe to use the most recent position + // base to compute the corresponding Pos value. + func(line, col uint, msg string) { + if msg[0] != '/' { + p.errorAt(p.posAt(line, col), msg) + return + } + + // otherwise it must be a comment containing a line or go: directive. + // //line directives must be at the start of the line (column colbase). + // /*line*/ directives can be anywhere in the line. + text := commentText(msg) + if (col == colbase || msg[1] == '*') && strings.HasPrefix(text, "line ") { + var pos Pos // position immediately following the comment + if msg[1] == '/' { + // line comment (newline is part of the comment) + pos = MakePos(p.file, line+1, colbase) + } else { + // regular comment + // (if the comment spans multiple lines it's not + // a valid line directive and will be discarded + // by updateBase) + pos = MakePos(p.file, line, col+uint(len(msg))) + } + p.updateBase(pos, line, col+2+5, text[5:]) // +2 to skip over // or /* + return + } + + // go: directive (but be conservative and test) + if pragh != nil && strings.HasPrefix(text, "go:") { + p.pragma = pragh(p.posAt(line, col+2), p.scanner.blank, text, p.pragma) // +2 to skip over // or /* + } + }, + directives, + ) + + p.base = file + p.first = nil + p.errcnt = 0 + p.pragma = nil + + p.fnest = 0 + p.xnest = 0 + p.indent = nil +} + +// takePragma returns the current parsed pragmas +// and clears them from the parser state. +func (p *parser) takePragma() Pragma { + prag := p.pragma + p.pragma = nil + return prag +} + +// clearPragma is called at the end of a statement or +// other Go form that does NOT accept a pragma. +// It sends the pragma back to the pragma handler +// to be reported as unused. +func (p *parser) clearPragma() { + if p.pragma != nil { + p.pragh(p.pos(), p.scanner.blank, "", p.pragma) + p.pragma = nil + } +} + +// updateBase sets the current position base to a new line base at pos. +// The base's filename, line, and column values are extracted from text +// which is positioned at (tline, tcol) (only needed for error messages). +func (p *parser) updateBase(pos Pos, tline, tcol uint, text string) { + i, n, ok := trailingDigits(text) + if i == 0 { + return // ignore (not a line directive) + } + // i > 0 + + if !ok { + // text has a suffix :xxx but xxx is not a number + p.errorAt(p.posAt(tline, tcol+i), "invalid line number: "+text[i:]) + return + } + + var line, col uint + i2, n2, ok2 := trailingDigits(text[:i-1]) + if ok2 { + //line filename:line:col + i, i2 = i2, i + line, col = n2, n + if col == 0 || col > PosMax { + p.errorAt(p.posAt(tline, tcol+i2), "invalid column number: "+text[i2:]) + return + } + text = text[:i2-1] // lop off ":col" + } else { + //line filename:line + line = n + } + + if line == 0 || line > PosMax { + p.errorAt(p.posAt(tline, tcol+i), "invalid line number: "+text[i:]) + return + } + + // If we have a column (//line filename:line:col form), + // an empty filename means to use the previous filename. + filename := text[:i-1] // lop off ":line" + trimmed := false + if filename == "" && ok2 { + filename = p.base.Filename() + trimmed = p.base.Trimmed() + } + + p.base = NewLineBase(pos, filename, trimmed, line, col) +} + +func commentText(s string) string { + if s[:2] == "/*" { + return s[2 : len(s)-2] // lop off /* and */ + } + + // line comment (does not include newline) + // (on Windows, the line comment may end in \r\n) + i := len(s) + if s[i-1] == '\r' { + i-- + } + return s[2:i] // lop off //, and \r at end, if any +} + +func trailingDigits(text string) (uint, uint, bool) { + // Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails. + i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':') + if i < 0 { + return 0, 0, false // no ":" + } + // i >= 0 + n, err := strconv.ParseUint(text[i+1:], 10, 0) + return uint(i + 1), uint(n), err == nil +} + +func (p *parser) got(tok token) bool { + if p.tok == tok { + p.next() + return true + } + return false +} + +func (p *parser) want(tok token) { + if !p.got(tok) { + p.syntaxError("expected " + tokstring(tok)) + p.advance() + } +} + +// gotAssign is like got(_Assign) but it also accepts ":=" +// (and reports an error) for better parser error recovery. +func (p *parser) gotAssign() bool { + switch p.tok { + case _Define: + p.syntaxError("expected =") + fallthrough + case _Assign: + p.next() + return true + } + return false +} + +// ---------------------------------------------------------------------------- +// Error handling + +// posAt returns the Pos value for (line, col) and the current position base. +func (p *parser) posAt(line, col uint) Pos { + return MakePos(p.base, line, col) +} + +// errorAt reports an error at the given position. +func (p *parser) errorAt(pos Pos, msg string) { + err := Error{pos, msg} + if p.first == nil { + p.first = err + } + p.errcnt++ + if p.errh == nil { + panic(p.first) + } + p.errh(err) +} + +// syntaxErrorAt reports a syntax error at the given position. +func (p *parser) syntaxErrorAt(pos Pos, msg string) { + if trace { + p.print("syntax error: " + msg) + } + + if p.tok == _EOF && p.first != nil { + return // avoid meaningless follow-up errors + } + + // add punctuation etc. as needed to msg + switch { + case msg == "": + // nothing to do + case strings.HasPrefix(msg, "in "), strings.HasPrefix(msg, "at "), strings.HasPrefix(msg, "after "): + msg = " " + msg + case strings.HasPrefix(msg, "expected "): + msg = ", " + msg + default: + // plain error - we don't care about current token + p.errorAt(pos, "syntax error: "+msg) + return + } + + // determine token string + var tok string + switch p.tok { + case _Name, _Semi: + tok = p.lit + case _Literal: + tok = "literal " + p.lit + case _Operator: + tok = p.op.String() + case _AssignOp: + tok = p.op.String() + "=" + case _IncOp: + tok = p.op.String() + tok += tok + default: + tok = tokstring(p.tok) + } + + // TODO(gri) This may print "unexpected X, expected Y". + // Consider "got X, expected Y" in this case. + p.errorAt(pos, "syntax error: unexpected "+tok+msg) +} + +// tokstring returns the English word for selected punctuation tokens +// for more readable error messages. Use tokstring (not tok.String()) +// for user-facing (error) messages; use tok.String() for debugging +// output. +func tokstring(tok token) string { + switch tok { + case _Comma: + return "comma" + case _Semi: + return "semicolon or newline" + } + return tok.String() +} + +// Convenience methods using the current token position. +func (p *parser) pos() Pos { return p.posAt(p.line, p.col) } +func (p *parser) error(msg string) { p.errorAt(p.pos(), msg) } +func (p *parser) syntaxError(msg string) { p.syntaxErrorAt(p.pos(), msg) } + +// The stopset contains keywords that start a statement. +// They are good synchronization points in case of syntax +// errors and (usually) shouldn't be skipped over. +const stopset uint64 = 1<<_Break | + 1<<_Const | + 1<<_Continue | + 1<<_Defer | + 1<<_Fallthrough | + 1<<_For | + 1<<_Go | + 1<<_Goto | + 1<<_If | + 1<<_Return | + 1<<_Select | + 1<<_Switch | + 1<<_Type | + 1<<_Var + +// advance consumes tokens until it finds a token of the stopset or followlist. +// The stopset is only considered if we are inside a function (p.fnest > 0). +// The followlist is the list of valid tokens that can follow a production; +// if it is empty, exactly one (non-EOF) token is consumed to ensure progress. +func (p *parser) advance(followlist ...token) { + if trace { + p.print(fmt.Sprintf("advance %s", followlist)) + } + + // compute follow set + // (not speed critical, advance is only called in error situations) + var followset uint64 = 1 << _EOF // don't skip over EOF + if len(followlist) > 0 { + if p.fnest > 0 { + followset |= stopset + } + for _, tok := range followlist { + followset |= 1 << tok + } + } + + for !contains(followset, p.tok) { + if trace { + p.print("skip " + p.tok.String()) + } + p.next() + if len(followlist) == 0 { + break + } + } + + if trace { + p.print("next " + p.tok.String()) + } +} + +// usage: defer p.trace(msg)() +func (p *parser) trace(msg string) func() { + p.print(msg + " (") + const tab = ". " + p.indent = append(p.indent, tab...) + return func() { + p.indent = p.indent[:len(p.indent)-len(tab)] + if x := recover(); x != nil { + panic(x) // skip print_trace + } + p.print(")") + } +} + +func (p *parser) print(msg string) { + fmt.Printf("%5d: %s%s\n", p.line, p.indent, msg) +} + +// ---------------------------------------------------------------------------- +// Package files +// +// Parse methods are annotated with matching Go productions as appropriate. +// The annotations are intended as guidelines only since a single Go grammar +// rule may be covered by multiple parse methods and vice versa. +// +// Excluding methods returning slices, parse methods named xOrNil may return +// nil; all others are expected to return a valid non-nil node. + +// SourceFile = PackageClause ";" { ImportDecl ";" } { TopLevelDecl ";" } . +func (p *parser) fileOrNil() *File { + if trace { + defer p.trace("file")() + } + + f := new(File) + f.pos = p.pos() + + // PackageClause + if !p.got(_Package) { + p.syntaxError("package statement must be first") + return nil + } + f.Pragma = p.takePragma() + f.PkgName = p.name() + p.want(_Semi) + + // don't bother continuing if package clause has errors + if p.first != nil { + return nil + } + + // Accept import declarations anywhere for error tolerance, but complain. + // { ( ImportDecl | TopLevelDecl ) ";" } + prev := _Import + for p.tok != _EOF { + if p.tok == _Import && prev != _Import { + p.syntaxError("imports must appear before other declarations") + } + prev = p.tok + + switch p.tok { + case _Import: + p.next() + f.DeclList = p.appendGroup(f.DeclList, p.importDecl) + + case _Const: + p.next() + f.DeclList = p.appendGroup(f.DeclList, p.constDecl) + + case _Type: + p.next() + f.DeclList = p.appendGroup(f.DeclList, p.typeDecl) + + case _Var: + p.next() + f.DeclList = p.appendGroup(f.DeclList, p.varDecl) + + case _Func: + p.next() + if d := p.funcDeclOrNil(); d != nil { + f.DeclList = append(f.DeclList, d) + } + + default: + if p.tok == _Lbrace && len(f.DeclList) > 0 && isEmptyFuncDecl(f.DeclList[len(f.DeclList)-1]) { + // opening { of function declaration on next line + p.syntaxError("unexpected semicolon or newline before {") + } else { + p.syntaxError("non-declaration statement outside function body") + } + p.advance(_Import, _Const, _Type, _Var, _Func) + continue + } + + // Reset p.pragma BEFORE advancing to the next token (consuming ';') + // since comments before may set pragmas for the next function decl. + p.clearPragma() + + if p.tok != _EOF && !p.got(_Semi) { + p.syntaxError("after top level declaration") + p.advance(_Import, _Const, _Type, _Var, _Func) + } + } + // p.tok == _EOF + + p.clearPragma() + f.EOF = p.pos() + + return f +} + +func isEmptyFuncDecl(dcl Decl) bool { + f, ok := dcl.(*FuncDecl) + return ok && f.Body == nil +} + +// ---------------------------------------------------------------------------- +// Declarations + +// list parses a possibly empty, sep-separated list of elements, optionally +// followed by sep, and closed by close (or EOF). sep must be one of _Comma +// or _Semi, and close must be one of _Rparen, _Rbrace, or _Rbrack. +// +// For each list element, f is called. Specifically, unless we're at close +// (or EOF), f is called at least once. After f returns true, no more list +// elements are accepted. list returns the position of the closing token. +// +// list = [ f { sep f } [sep] ] close . +func (p *parser) list(context string, sep, close token, f func() bool) Pos { + if debug && (sep != _Comma && sep != _Semi || close != _Rparen && close != _Rbrace && close != _Rbrack) { + panic("invalid sep or close argument for list") + } + + done := false + for p.tok != _EOF && p.tok != close && !done { + done = f() + // sep is optional before close + if !p.got(sep) && p.tok != close { + p.syntaxError(fmt.Sprintf("in %s; possibly missing %s or %s", context, tokstring(sep), tokstring(close))) + p.advance(_Rparen, _Rbrack, _Rbrace) + if p.tok != close { + // position could be better but we had an error so we don't care + return p.pos() + } + } + } + + pos := p.pos() + p.want(close) + return pos +} + +// appendGroup(f) = f | "(" { f ";" } ")" . // ";" is optional before ")" +func (p *parser) appendGroup(list []Decl, f func(*Group) Decl) []Decl { + if p.tok == _Lparen { + g := new(Group) + p.clearPragma() + p.next() // must consume "(" after calling clearPragma! + p.list("grouped declaration", _Semi, _Rparen, func() bool { + if x := f(g); x != nil { + list = append(list, x) + } + return false + }) + } else { + if x := f(nil); x != nil { + list = append(list, x) + } + } + return list +} + +// ImportSpec = [ "." | PackageName ] ImportPath . +// ImportPath = string_lit . +func (p *parser) importDecl(group *Group) Decl { + if trace { + defer p.trace("importDecl")() + } + + d := new(ImportDecl) + d.pos = p.pos() + d.Group = group + d.Pragma = p.takePragma() + + switch p.tok { + case _Name: + d.LocalPkgName = p.name() + case _Dot: + d.LocalPkgName = NewName(p.pos(), ".") + p.next() + } + d.Path = p.oliteral() + if d.Path == nil { + p.syntaxError("missing import path") + p.advance(_Semi, _Rparen) + return d + } + if !d.Path.Bad && d.Path.Kind != StringLit { + p.syntaxErrorAt(d.Path.Pos(), "import path must be a string") + d.Path.Bad = true + } + // d.Path.Bad || d.Path.Kind == StringLit + + return d +} + +// ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . +func (p *parser) constDecl(group *Group) Decl { + if trace { + defer p.trace("constDecl")() + } + + d := new(ConstDecl) + d.pos = p.pos() + d.Group = group + d.Pragma = p.takePragma() + + d.NameList = p.nameList(p.name()) + if p.tok != _EOF && p.tok != _Semi && p.tok != _Rparen { + d.Type = p.typeOrNil() + if p.gotAssign() { + d.Values = p.exprList() + } + } + + return d +} + +// TypeSpec = identifier [ TypeParams ] [ "=" ] Type . +func (p *parser) typeDecl(group *Group) Decl { + if trace { + defer p.trace("typeDecl")() + } + + d := new(TypeDecl) + d.pos = p.pos() + d.Group = group + d.Pragma = p.takePragma() + + d.Name = p.name() + if p.tok == _Lbrack { + // d.Name "[" ... + // array/slice type or type parameter list + pos := p.pos() + p.next() + switch p.tok { + case _Name: + // We may have an array type or a type parameter list. + // In either case we expect an expression x (which may + // just be a name, or a more complex expression) which + // we can analyze further. + // + // A type parameter list may have a type bound starting + // with a "[" as in: P []E. In that case, simply parsing + // an expression would lead to an error: P[] is invalid. + // But since index or slice expressions are never constant + // and thus invalid array length expressions, if the name + // is followed by "[" it must be the start of an array or + // slice constraint. Only if we don't see a "[" do we + // need to parse a full expression. Notably, name <- x + // is not a concern because name <- x is a statement and + // not an expression. + var x Expr = p.name() + if p.tok != _Lbrack { + // To parse the expression starting with name, expand + // the call sequence we would get by passing in name + // to parser.expr, and pass in name to parser.pexpr. + p.xnest++ + x = p.binaryExpr(p.pexpr(x, false), 0) + p.xnest-- + } + // Analyze expression x. If we can split x into a type parameter + // name, possibly followed by a type parameter type, we consider + // this the start of a type parameter list, with some caveats: + // a single name followed by "]" tilts the decision towards an + // array declaration; a type parameter type that could also be + // an ordinary expression but which is followed by a comma tilts + // the decision towards a type parameter list. + if pname, ptype := extractName(x, p.tok == _Comma); pname != nil && (ptype != nil || p.tok != _Rbrack) { + // d.Name "[" pname ... + // d.Name "[" pname ptype ... + // d.Name "[" pname ptype "," ... + d.TParamList = p.paramList(pname, ptype, _Rbrack, true) // ptype may be nil + d.Alias = p.gotAssign() + d.Type = p.typeOrNil() + } else { + // d.Name "[" pname "]" ... + // d.Name "[" x ... + d.Type = p.arrayType(pos, x) + } + case _Rbrack: + // d.Name "[" "]" ... + p.next() + d.Type = p.sliceType(pos) + default: + // d.Name "[" ... + d.Type = p.arrayType(pos, nil) + } + } else { + d.Alias = p.gotAssign() + d.Type = p.typeOrNil() + } + + if d.Type == nil { + d.Type = p.badExpr() + p.syntaxError("in type declaration") + p.advance(_Semi, _Rparen) + } + + return d +} + +// extractName splits the expression x into (name, expr) if syntactically +// x can be written as name expr. The split only happens if expr is a type +// element (per the isTypeElem predicate) or if force is set. +// If x is just a name, the result is (name, nil). If the split succeeds, +// the result is (name, expr). Otherwise the result is (nil, x). +// Examples: +// +// x force name expr +// ------------------------------------ +// P*[]int T/F P *[]int +// P*E T P *E +// P*E F nil P*E +// P([]int) T/F P []int +// P(E) T P E +// P(E) F nil P(E) +// P*E|F|~G T/F P *E|F|~G +// P*E|F|G T P *E|F|G +// P*E|F|G F nil P*E|F|G +func extractName(x Expr, force bool) (*Name, Expr) { + switch x := x.(type) { + case *Name: + return x, nil + case *Operation: + if x.Y == nil { + break // unary expr + } + switch x.Op { + case Mul: + if name, _ := x.X.(*Name); name != nil && (force || isTypeElem(x.Y)) { + // x = name *x.Y + op := *x + op.X, op.Y = op.Y, nil // change op into unary *op.Y + return name, &op + } + case Or: + if name, lhs := extractName(x.X, force || isTypeElem(x.Y)); name != nil && lhs != nil { + // x = name lhs|x.Y + op := *x + op.X = lhs + return name, &op + } + } + case *CallExpr: + if name, _ := x.Fun.(*Name); name != nil { + if len(x.ArgList) == 1 && !x.HasDots && (force || isTypeElem(x.ArgList[0])) { + // x = name "(" x.ArgList[0] ")" + return name, x.ArgList[0] + } + } + } + return nil, x +} + +// isTypeElem reports whether x is a (possibly parenthesized) type element expression. +// The result is false if x could be a type element OR an ordinary (value) expression. +func isTypeElem(x Expr) bool { + switch x := x.(type) { + case *ArrayType, *StructType, *FuncType, *InterfaceType, *SliceType, *MapType, *ChanType: + return true + case *Operation: + return isTypeElem(x.X) || (x.Y != nil && isTypeElem(x.Y)) || x.Op == Tilde + case *ParenExpr: + return isTypeElem(x.X) + } + return false +} + +// VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . +func (p *parser) varDecl(group *Group) Decl { + if trace { + defer p.trace("varDecl")() + } + + d := new(VarDecl) + d.pos = p.pos() + d.Group = group + d.Pragma = p.takePragma() + + d.NameList = p.nameList(p.name()) + if p.gotAssign() { + d.Values = p.exprList() + } else { + d.Type = p.type_() + if p.gotAssign() { + d.Values = p.exprList() + } + } + + return d +} + +// FunctionDecl = "func" FunctionName [ TypeParams ] ( Function | Signature ) . +// FunctionName = identifier . +// Function = Signature FunctionBody . +// MethodDecl = "func" Receiver MethodName ( Function | Signature ) . +// Receiver = Parameters . +func (p *parser) funcDeclOrNil() *FuncDecl { + if trace { + defer p.trace("funcDecl")() + } + + f := new(FuncDecl) + f.pos = p.pos() + f.Pragma = p.takePragma() + + var context string + if p.got(_Lparen) { + context = "method" + rcvr := p.paramList(nil, nil, _Rparen, false) + switch len(rcvr) { + case 0: + p.error("method has no receiver") + default: + p.error("method has multiple receivers") + fallthrough + case 1: + f.Recv = rcvr[0] + } + } + + if p.tok == _Name { + f.Name = p.name() + f.TParamList, f.Type = p.funcType(context) + } else { + msg := "expected name or (" + if context != "" { + msg = "expected name" + } + p.syntaxError(msg) + p.advance(_Lbrace, _Semi) + } + + if p.tok == _Lbrace { + f.Body = p.funcBody() + } + + return f +} + +func (p *parser) funcBody() *BlockStmt { + p.fnest++ + errcnt := p.errcnt + body := p.blockStmt("") + p.fnest-- + + // Don't check branches if there were syntax errors in the function + // as it may lead to spurious errors (e.g., see test/switch2.go) or + // possibly crashes due to incomplete syntax trees. + if p.mode&CheckBranches != 0 && errcnt == p.errcnt { + checkBranches(body, p.errh) + } + + return body +} + +// ---------------------------------------------------------------------------- +// Expressions + +func (p *parser) expr() Expr { + if trace { + defer p.trace("expr")() + } + + return p.binaryExpr(nil, 0) +} + +// Expression = UnaryExpr | Expression binary_op Expression . +func (p *parser) binaryExpr(x Expr, prec int) Expr { + // don't trace binaryExpr - only leads to overly nested trace output + + if x == nil { + x = p.unaryExpr() + } + for (p.tok == _Operator || p.tok == _Star) && p.prec > prec { + t := new(Operation) + t.pos = p.pos() + t.Op = p.op + tprec := p.prec + p.next() + t.X = x + t.Y = p.binaryExpr(nil, tprec) + x = t + } + return x +} + +// UnaryExpr = PrimaryExpr | unary_op UnaryExpr . +func (p *parser) unaryExpr() Expr { + if trace { + defer p.trace("unaryExpr")() + } + + switch p.tok { + case _Operator, _Star: + switch p.op { + case Mul, Add, Sub, Not, Xor, Tilde: + x := new(Operation) + x.pos = p.pos() + x.Op = p.op + p.next() + x.X = p.unaryExpr() + return x + + case And: + x := new(Operation) + x.pos = p.pos() + x.Op = And + p.next() + // unaryExpr may have returned a parenthesized composite literal + // (see comment in operand) - remove parentheses if any + x.X = unparen(p.unaryExpr()) + return x + } + + case _Arrow: + // receive op (<-x) or receive-only channel (<-chan E) + pos := p.pos() + p.next() + + // If the next token is _Chan we still don't know if it is + // a channel (<-chan int) or a receive op (<-chan int(ch)). + // We only know once we have found the end of the unaryExpr. + + x := p.unaryExpr() + + // There are two cases: + // + // <-chan... => <-x is a channel type + // <-x => <-x is a receive operation + // + // In the first case, <- must be re-associated with + // the channel type parsed already: + // + // <-(chan E) => (<-chan E) + // <-(chan<-E) => (<-chan (<-E)) + + if _, ok := x.(*ChanType); ok { + // x is a channel type => re-associate <- + dir := SendOnly + t := x + for dir == SendOnly { + c, ok := t.(*ChanType) + if !ok { + break + } + dir = c.Dir + if dir == RecvOnly { + // t is type <-chan E but <-<-chan E is not permitted + // (report same error as for "type _ <-<-chan E") + p.syntaxError("unexpected <-, expected chan") + // already progressed, no need to advance + } + c.Dir = RecvOnly + t = c.Elem + } + if dir == SendOnly { + // channel dir is <- but channel element E is not a channel + // (report same error as for "type _ <-chan<-E") + p.syntaxError(fmt.Sprintf("unexpected %s, expected chan", String(t))) + // already progressed, no need to advance + } + return x + } + + // x is not a channel type => we have a receive op + o := new(Operation) + o.pos = pos + o.Op = Recv + o.X = x + return o + } + + // TODO(mdempsky): We need parens here so we can report an + // error for "(x) := true". It should be possible to detect + // and reject that more efficiently though. + return p.pexpr(nil, true) +} + +// callStmt parses call-like statements that can be preceded by 'defer' and 'go'. +func (p *parser) callStmt() *CallStmt { + if trace { + defer p.trace("callStmt")() + } + + s := new(CallStmt) + s.pos = p.pos() + s.Tok = p.tok // _Defer or _Go + p.next() + + x := p.pexpr(nil, p.tok == _Lparen) // keep_parens so we can report error below + if t := unparen(x); t != x { + p.errorAt(x.Pos(), fmt.Sprintf("expression in %s must not be parenthesized", s.Tok)) + // already progressed, no need to advance + x = t + } + + s.Call = x + return s +} + +// Operand = Literal | OperandName | MethodExpr | "(" Expression ")" . +// Literal = BasicLit | CompositeLit | FunctionLit . +// BasicLit = int_lit | float_lit | imaginary_lit | rune_lit | string_lit . +// OperandName = identifier | QualifiedIdent. +func (p *parser) operand(keep_parens bool) Expr { + if trace { + defer p.trace("operand " + p.tok.String())() + } + + switch p.tok { + case _Name: + return p.name() + + case _Literal: + return p.oliteral() + + case _Lparen: + pos := p.pos() + p.next() + p.xnest++ + x := p.expr() + p.xnest-- + p.want(_Rparen) + + // Optimization: Record presence of ()'s only where needed + // for error reporting. Don't bother in other cases; it is + // just a waste of memory and time. + // + // Parentheses are not permitted around T in a composite + // literal T{}. If the next token is a {, assume x is a + // composite literal type T (it may not be, { could be + // the opening brace of a block, but we don't know yet). + if p.tok == _Lbrace { + keep_parens = true + } + + // Parentheses are also not permitted around the expression + // in a go/defer statement. In that case, operand is called + // with keep_parens set. + if keep_parens { + px := new(ParenExpr) + px.pos = pos + px.X = x + x = px + } + return x + + case _Func: + pos := p.pos() + p.next() + _, ftyp := p.funcType("function type") + if p.tok == _Lbrace { + p.xnest++ + + f := new(FuncLit) + f.pos = pos + f.Type = ftyp + f.Body = p.funcBody() + + p.xnest-- + return f + } + return ftyp + + case _Lbrack, _Chan, _Map, _Struct, _Interface: + return p.type_() // othertype + + default: + x := p.badExpr() + p.syntaxError("expected expression") + p.advance(_Rparen, _Rbrack, _Rbrace) + return x + } + + // Syntactically, composite literals are operands. Because a complit + // type may be a qualified identifier which is handled by pexpr + // (together with selector expressions), complits are parsed there + // as well (operand is only called from pexpr). +} + +// pexpr parses a PrimaryExpr. +// +// PrimaryExpr = +// Operand | +// Conversion | +// PrimaryExpr Selector | +// PrimaryExpr Index | +// PrimaryExpr Slice | +// PrimaryExpr TypeAssertion | +// PrimaryExpr Arguments . +// +// Selector = "." identifier . +// Index = "[" Expression "]" . +// Slice = "[" ( [ Expression ] ":" [ Expression ] ) | +// ( [ Expression ] ":" Expression ":" Expression ) +// "]" . +// TypeAssertion = "." "(" Type ")" . +// Arguments = "(" [ ( ExpressionList | Type [ "," ExpressionList ] ) [ "..." ] [ "," ] ] ")" . +func (p *parser) pexpr(x Expr, keep_parens bool) Expr { + if trace { + defer p.trace("pexpr")() + } + + if x == nil { + x = p.operand(keep_parens) + } + +loop: + for { + pos := p.pos() + switch p.tok { + case _Dot: + p.next() + switch p.tok { + case _Name: + // pexpr '.' sym + t := new(SelectorExpr) + t.pos = pos + t.X = x + t.Sel = p.name() + x = t + + case _Lparen: + p.next() + if p.got(_Type) { + t := new(TypeSwitchGuard) + // t.Lhs is filled in by parser.simpleStmt + t.pos = pos + t.X = x + x = t + } else { + t := new(AssertExpr) + t.pos = pos + t.X = x + t.Type = p.type_() + x = t + } + p.want(_Rparen) + + default: + p.syntaxError("expected name or (") + p.advance(_Semi, _Rparen) + } + + case _Lbrack: + p.next() + + var i Expr + if p.tok != _Colon { + var comma bool + if p.tok == _Rbrack { + // invalid empty instance, slice or index expression; accept but complain + p.syntaxError("expected operand") + i = p.badExpr() + } else { + i, comma = p.typeList(false) + } + if comma || p.tok == _Rbrack { + p.want(_Rbrack) + // x[], x[i,] or x[i, j, ...] + t := new(IndexExpr) + t.pos = pos + t.X = x + t.Index = i + x = t + break + } + } + + // x[i:... + // For better error message, don't simply use p.want(_Colon) here (issue #47704). + if !p.got(_Colon) { + p.syntaxError("expected comma, : or ]") + p.advance(_Comma, _Colon, _Rbrack) + } + p.xnest++ + t := new(SliceExpr) + t.pos = pos + t.X = x + t.Index[0] = i + if p.tok != _Colon && p.tok != _Rbrack { + // x[i:j... + t.Index[1] = p.expr() + } + if p.tok == _Colon { + t.Full = true + // x[i:j:...] + if t.Index[1] == nil { + p.error("middle index required in 3-index slice") + t.Index[1] = p.badExpr() + } + p.next() + if p.tok != _Rbrack { + // x[i:j:k... + t.Index[2] = p.expr() + } else { + p.error("final index required in 3-index slice") + t.Index[2] = p.badExpr() + } + } + p.xnest-- + p.want(_Rbrack) + x = t + + case _Lparen: + t := new(CallExpr) + t.pos = pos + p.next() + t.Fun = x + t.ArgList, t.HasDots = p.argList() + x = t + + case _Lbrace: + // operand may have returned a parenthesized complit + // type; accept it but complain if we have a complit + t := unparen(x) + // determine if '{' belongs to a composite literal or a block statement + complit_ok := false + switch t.(type) { + case *Name, *SelectorExpr: + if p.xnest >= 0 { + // x is possibly a composite literal type + complit_ok = true + } + case *IndexExpr: + if p.xnest >= 0 && !isValue(t) { + // x is possibly a composite literal type + complit_ok = true + } + case *ArrayType, *SliceType, *StructType, *MapType: + // x is a comptype + complit_ok = true + } + if !complit_ok { + break loop + } + if t != x { + p.syntaxError("cannot parenthesize type in composite literal") + // already progressed, no need to advance + } + n := p.complitexpr() + n.Type = x + x = n + + default: + break loop + } + } + + return x +} + +// isValue reports whether x syntactically must be a value (and not a type) expression. +func isValue(x Expr) bool { + switch x := x.(type) { + case *BasicLit, *CompositeLit, *FuncLit, *SliceExpr, *AssertExpr, *TypeSwitchGuard, *CallExpr: + return true + case *Operation: + return x.Op != Mul || x.Y != nil // *T may be a type + case *ParenExpr: + return isValue(x.X) + case *IndexExpr: + return isValue(x.X) || isValue(x.Index) + } + return false +} + +// Element = Expression | LiteralValue . +func (p *parser) bare_complitexpr() Expr { + if trace { + defer p.trace("bare_complitexpr")() + } + + if p.tok == _Lbrace { + // '{' start_complit braced_keyval_list '}' + return p.complitexpr() + } + + return p.expr() +} + +// LiteralValue = "{" [ ElementList [ "," ] ] "}" . +func (p *parser) complitexpr() *CompositeLit { + if trace { + defer p.trace("complitexpr")() + } + + x := new(CompositeLit) + x.pos = p.pos() + + p.xnest++ + p.want(_Lbrace) + x.Rbrace = p.list("composite literal", _Comma, _Rbrace, func() bool { + // value + e := p.bare_complitexpr() + if p.tok == _Colon { + // key ':' value + l := new(KeyValueExpr) + l.pos = p.pos() + p.next() + l.Key = e + l.Value = p.bare_complitexpr() + e = l + x.NKeys++ + } + x.ElemList = append(x.ElemList, e) + return false + }) + p.xnest-- + + return x +} + +// ---------------------------------------------------------------------------- +// Types + +func (p *parser) type_() Expr { + if trace { + defer p.trace("type_")() + } + + typ := p.typeOrNil() + if typ == nil { + typ = p.badExpr() + p.syntaxError("expected type") + p.advance(_Comma, _Colon, _Semi, _Rparen, _Rbrack, _Rbrace) + } + + return typ +} + +func newIndirect(pos Pos, typ Expr) Expr { + o := new(Operation) + o.pos = pos + o.Op = Mul + o.X = typ + return o +} + +// typeOrNil is like type_ but it returns nil if there was no type +// instead of reporting an error. +// +// Type = TypeName | TypeLit | "(" Type ")" . +// TypeName = identifier | QualifiedIdent . +// TypeLit = ArrayType | StructType | PointerType | FunctionType | InterfaceType | +// SliceType | MapType | Channel_Type . +func (p *parser) typeOrNil() Expr { + if trace { + defer p.trace("typeOrNil")() + } + + pos := p.pos() + switch p.tok { + case _Star: + // ptrtype + p.next() + return newIndirect(pos, p.type_()) + + case _Arrow: + // recvchantype + p.next() + p.want(_Chan) + t := new(ChanType) + t.pos = pos + t.Dir = RecvOnly + t.Elem = p.chanElem() + return t + + case _Func: + // fntype + p.next() + _, t := p.funcType("function type") + return t + + case _Lbrack: + // '[' oexpr ']' ntype + // '[' _DotDotDot ']' ntype + p.next() + if p.got(_Rbrack) { + return p.sliceType(pos) + } + return p.arrayType(pos, nil) + + case _Chan: + // _Chan non_recvchantype + // _Chan _Comm ntype + p.next() + t := new(ChanType) + t.pos = pos + if p.got(_Arrow) { + t.Dir = SendOnly + } + t.Elem = p.chanElem() + return t + + case _Map: + // _Map '[' ntype ']' ntype + p.next() + p.want(_Lbrack) + t := new(MapType) + t.pos = pos + t.Key = p.type_() + p.want(_Rbrack) + t.Value = p.type_() + return t + + case _Struct: + return p.structType() + + case _Interface: + return p.interfaceType() + + case _Name: + return p.qualifiedName(nil) + + case _Lparen: + p.next() + t := p.type_() + p.want(_Rparen) + return t + } + + return nil +} + +func (p *parser) typeInstance(typ Expr) Expr { + if trace { + defer p.trace("typeInstance")() + } + + pos := p.pos() + p.want(_Lbrack) + x := new(IndexExpr) + x.pos = pos + x.X = typ + if p.tok == _Rbrack { + p.syntaxError("expected type argument list") + x.Index = p.badExpr() + } else { + x.Index, _ = p.typeList(true) + } + p.want(_Rbrack) + return x +} + +// If context != "", type parameters are not permitted. +func (p *parser) funcType(context string) ([]*Field, *FuncType) { + if trace { + defer p.trace("funcType")() + } + + typ := new(FuncType) + typ.pos = p.pos() + + var tparamList []*Field + if p.got(_Lbrack) { + if context != "" { + // accept but complain + p.syntaxErrorAt(typ.pos, context+" must have no type parameters") + } + if p.tok == _Rbrack { + p.syntaxError("empty type parameter list") + p.next() + } else { + tparamList = p.paramList(nil, nil, _Rbrack, true) + } + } + + p.want(_Lparen) + typ.ParamList = p.paramList(nil, nil, _Rparen, false) + typ.ResultList = p.funcResult() + + return tparamList, typ +} + +// "[" has already been consumed, and pos is its position. +// If len != nil it is the already consumed array length. +func (p *parser) arrayType(pos Pos, len Expr) Expr { + if trace { + defer p.trace("arrayType")() + } + + if len == nil && !p.got(_DotDotDot) { + p.xnest++ + len = p.expr() + p.xnest-- + } + if p.tok == _Comma { + // Trailing commas are accepted in type parameter + // lists but not in array type declarations. + // Accept for better error handling but complain. + p.syntaxError("unexpected comma; expected ]") + p.next() + } + p.want(_Rbrack) + t := new(ArrayType) + t.pos = pos + t.Len = len + t.Elem = p.type_() + return t +} + +// "[" and "]" have already been consumed, and pos is the position of "[". +func (p *parser) sliceType(pos Pos) Expr { + t := new(SliceType) + t.pos = pos + t.Elem = p.type_() + return t +} + +func (p *parser) chanElem() Expr { + if trace { + defer p.trace("chanElem")() + } + + typ := p.typeOrNil() + if typ == nil { + typ = p.badExpr() + p.syntaxError("missing channel element type") + // assume element type is simply absent - don't advance + } + + return typ +} + +// StructType = "struct" "{" { FieldDecl ";" } "}" . +func (p *parser) structType() *StructType { + if trace { + defer p.trace("structType")() + } + + typ := new(StructType) + typ.pos = p.pos() + + p.want(_Struct) + p.want(_Lbrace) + p.list("struct type", _Semi, _Rbrace, func() bool { + p.fieldDecl(typ) + return false + }) + + return typ +} + +// InterfaceType = "interface" "{" { ( MethodDecl | EmbeddedElem ) ";" } "}" . +func (p *parser) interfaceType() *InterfaceType { + if trace { + defer p.trace("interfaceType")() + } + + typ := new(InterfaceType) + typ.pos = p.pos() + + p.want(_Interface) + p.want(_Lbrace) + p.list("interface type", _Semi, _Rbrace, func() bool { + var f *Field + if p.tok == _Name { + f = p.methodDecl() + } + if f == nil || f.Name == nil { + f = p.embeddedElem(f) + } + typ.MethodList = append(typ.MethodList, f) + return false + }) + + return typ +} + +// Result = Parameters | Type . +func (p *parser) funcResult() []*Field { + if trace { + defer p.trace("funcResult")() + } + + if p.got(_Lparen) { + return p.paramList(nil, nil, _Rparen, false) + } + + pos := p.pos() + if typ := p.typeOrNil(); typ != nil { + f := new(Field) + f.pos = pos + f.Type = typ + return []*Field{f} + } + + return nil +} + +func (p *parser) addField(styp *StructType, pos Pos, name *Name, typ Expr, tag *BasicLit) { + if tag != nil { + for i := len(styp.FieldList) - len(styp.TagList); i > 0; i-- { + styp.TagList = append(styp.TagList, nil) + } + styp.TagList = append(styp.TagList, tag) + } + + f := new(Field) + f.pos = pos + f.Name = name + f.Type = typ + styp.FieldList = append(styp.FieldList, f) + + if debug && tag != nil && len(styp.FieldList) != len(styp.TagList) { + panic("inconsistent struct field list") + } +} + +// FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . +// AnonymousField = [ "*" ] TypeName . +// Tag = string_lit . +func (p *parser) fieldDecl(styp *StructType) { + if trace { + defer p.trace("fieldDecl")() + } + + pos := p.pos() + switch p.tok { + case _Name: + name := p.name() + if p.tok == _Dot || p.tok == _Literal || p.tok == _Semi || p.tok == _Rbrace { + // embedded type + typ := p.qualifiedName(name) + tag := p.oliteral() + p.addField(styp, pos, nil, typ, tag) + break + } + + // name1, name2, ... Type [ tag ] + names := p.nameList(name) + var typ Expr + + // Careful dance: We don't know if we have an embedded instantiated + // type T[P1, P2, ...] or a field T of array/slice type [P]E or []E. + if len(names) == 1 && p.tok == _Lbrack { + typ = p.arrayOrTArgs() + if typ, ok := typ.(*IndexExpr); ok { + // embedded type T[P1, P2, ...] + typ.X = name // name == names[0] + tag := p.oliteral() + p.addField(styp, pos, nil, typ, tag) + break + } + } else { + // T P + typ = p.type_() + } + + tag := p.oliteral() + + for _, name := range names { + p.addField(styp, name.Pos(), name, typ, tag) + } + + case _Star: + p.next() + var typ Expr + if p.tok == _Lparen { + // *(T) + p.syntaxError("cannot parenthesize embedded type") + p.next() + typ = p.qualifiedName(nil) + p.got(_Rparen) // no need to complain if missing + } else { + // *T + typ = p.qualifiedName(nil) + } + tag := p.oliteral() + p.addField(styp, pos, nil, newIndirect(pos, typ), tag) + + case _Lparen: + p.syntaxError("cannot parenthesize embedded type") + p.next() + var typ Expr + if p.tok == _Star { + // (*T) + pos := p.pos() + p.next() + typ = newIndirect(pos, p.qualifiedName(nil)) + } else { + // (T) + typ = p.qualifiedName(nil) + } + p.got(_Rparen) // no need to complain if missing + tag := p.oliteral() + p.addField(styp, pos, nil, typ, tag) + + default: + p.syntaxError("expected field name or embedded type") + p.advance(_Semi, _Rbrace) + } +} + +func (p *parser) arrayOrTArgs() Expr { + if trace { + defer p.trace("arrayOrTArgs")() + } + + pos := p.pos() + p.want(_Lbrack) + if p.got(_Rbrack) { + return p.sliceType(pos) + } + + // x [n]E or x[n,], x[n1, n2], ... + n, comma := p.typeList(false) + p.want(_Rbrack) + if !comma { + if elem := p.typeOrNil(); elem != nil { + // x [n]E + t := new(ArrayType) + t.pos = pos + t.Len = n + t.Elem = elem + return t + } + } + + // x[n,], x[n1, n2], ... + t := new(IndexExpr) + t.pos = pos + // t.X will be filled in by caller + t.Index = n + return t +} + +func (p *parser) oliteral() *BasicLit { + if p.tok == _Literal { + b := new(BasicLit) + b.pos = p.pos() + b.Value = p.lit + b.Kind = p.kind + b.Bad = p.bad + p.next() + return b + } + return nil +} + +// MethodSpec = MethodName Signature | InterfaceTypeName . +// MethodName = identifier . +// InterfaceTypeName = TypeName . +func (p *parser) methodDecl() *Field { + if trace { + defer p.trace("methodDecl")() + } + + f := new(Field) + f.pos = p.pos() + name := p.name() + + const context = "interface method" + + switch p.tok { + case _Lparen: + // method + f.Name = name + _, f.Type = p.funcType(context) + + case _Lbrack: + // Careful dance: We don't know if we have a generic method m[T C](x T) + // or an embedded instantiated type T[P1, P2] (we accept generic methods + // for generality and robustness of parsing but complain with an error). + pos := p.pos() + p.next() + + // Empty type parameter or argument lists are not permitted. + // Treat as if [] were absent. + if p.tok == _Rbrack { + // name[] + pos := p.pos() + p.next() + if p.tok == _Lparen { + // name[]( + p.errorAt(pos, "empty type parameter list") + f.Name = name + _, f.Type = p.funcType(context) + } else { + p.errorAt(pos, "empty type argument list") + f.Type = name + } + break + } + + // A type argument list looks like a parameter list with only + // types. Parse a parameter list and decide afterwards. + list := p.paramList(nil, nil, _Rbrack, false) + if len(list) == 0 { + // The type parameter list is not [] but we got nothing + // due to other errors (reported by paramList). Treat + // as if [] were absent. + if p.tok == _Lparen { + f.Name = name + _, f.Type = p.funcType(context) + } else { + f.Type = name + } + break + } + + // len(list) > 0 + if list[0].Name != nil { + // generic method + f.Name = name + _, f.Type = p.funcType(context) + p.errorAt(pos, "interface method must have no type parameters") + break + } + + // embedded instantiated type + t := new(IndexExpr) + t.pos = pos + t.X = name + if len(list) == 1 { + t.Index = list[0].Type + } else { + // len(list) > 1 + l := new(ListExpr) + l.pos = list[0].Pos() + l.ElemList = make([]Expr, len(list)) + for i := range list { + l.ElemList[i] = list[i].Type + } + t.Index = l + } + f.Type = t + + default: + // embedded type + f.Type = p.qualifiedName(name) + } + + return f +} + +// EmbeddedElem = MethodSpec | EmbeddedTerm { "|" EmbeddedTerm } . +func (p *parser) embeddedElem(f *Field) *Field { + if trace { + defer p.trace("embeddedElem")() + } + + if f == nil { + f = new(Field) + f.pos = p.pos() + f.Type = p.embeddedTerm() + } + + for p.tok == _Operator && p.op == Or { + t := new(Operation) + t.pos = p.pos() + t.Op = Or + p.next() + t.X = f.Type + t.Y = p.embeddedTerm() + f.Type = t + } + + return f +} + +// EmbeddedTerm = [ "~" ] Type . +func (p *parser) embeddedTerm() Expr { + if trace { + defer p.trace("embeddedTerm")() + } + + if p.tok == _Operator && p.op == Tilde { + t := new(Operation) + t.pos = p.pos() + t.Op = Tilde + p.next() + t.X = p.type_() + return t + } + + t := p.typeOrNil() + if t == nil { + t = p.badExpr() + p.syntaxError("expected ~ term or type") + p.advance(_Operator, _Semi, _Rparen, _Rbrack, _Rbrace) + } + + return t +} + +// ParameterDecl = [ IdentifierList ] [ "..." ] Type . +func (p *parser) paramDeclOrNil(name *Name, follow token) *Field { + if trace { + defer p.trace("paramDeclOrNil")() + } + + // type set notation is ok in type parameter lists + typeSetsOk := follow == _Rbrack + + pos := p.pos() + if name != nil { + pos = name.pos + } else if typeSetsOk && p.tok == _Operator && p.op == Tilde { + // "~" ... + return p.embeddedElem(nil) + } + + f := new(Field) + f.pos = pos + + if p.tok == _Name || name != nil { + // name + if name == nil { + name = p.name() + } + + if p.tok == _Lbrack { + // name "[" ... + f.Type = p.arrayOrTArgs() + if typ, ok := f.Type.(*IndexExpr); ok { + // name "[" ... "]" + typ.X = name + } else { + // name "[" n "]" E + f.Name = name + } + if typeSetsOk && p.tok == _Operator && p.op == Or { + // name "[" ... "]" "|" ... + // name "[" n "]" E "|" ... + f = p.embeddedElem(f) + } + return f + } + + if p.tok == _Dot { + // name "." ... + f.Type = p.qualifiedName(name) + if typeSetsOk && p.tok == _Operator && p.op == Or { + // name "." name "|" ... + f = p.embeddedElem(f) + } + return f + } + + if typeSetsOk && p.tok == _Operator && p.op == Or { + // name "|" ... + f.Type = name + return p.embeddedElem(f) + } + + f.Name = name + } + + if p.tok == _DotDotDot { + // [name] "..." ... + t := new(DotsType) + t.pos = p.pos() + p.next() + t.Elem = p.typeOrNil() + if t.Elem == nil { + t.Elem = p.badExpr() + p.syntaxError("... is missing type") + } + f.Type = t + return f + } + + if typeSetsOk && p.tok == _Operator && p.op == Tilde { + // [name] "~" ... + f.Type = p.embeddedElem(nil).Type + return f + } + + f.Type = p.typeOrNil() + if typeSetsOk && p.tok == _Operator && p.op == Or && f.Type != nil { + // [name] type "|" + f = p.embeddedElem(f) + } + if f.Name != nil || f.Type != nil { + return f + } + + p.syntaxError("expected " + tokstring(follow)) + p.advance(_Comma, follow) + return nil +} + +// Parameters = "(" [ ParameterList [ "," ] ] ")" . +// ParameterList = ParameterDecl { "," ParameterDecl } . +// "(" or "[" has already been consumed. +// If name != nil, it is the first name after "(" or "[". +// If typ != nil, name must be != nil, and (name, typ) is the first field in the list. +// In the result list, either all fields have a name, or no field has a name. +func (p *parser) paramList(name *Name, typ Expr, close token, requireNames bool) (list []*Field) { + if trace { + defer p.trace("paramList")() + } + + // p.list won't invoke its function argument if we're at the end of the + // parameter list. If we have a complete field, handle this case here. + if name != nil && typ != nil && p.tok == close { + p.next() + par := new(Field) + par.pos = name.pos + par.Name = name + par.Type = typ + return []*Field{par} + } + + var named int // number of parameters that have an explicit name and type + var typed int // number of parameters that have an explicit type + end := p.list("parameter list", _Comma, close, func() bool { + var par *Field + if typ != nil { + if debug && name == nil { + panic("initial type provided without name") + } + par = new(Field) + par.pos = name.pos + par.Name = name + par.Type = typ + } else { + par = p.paramDeclOrNil(name, close) + } + name = nil // 1st name was consumed if present + typ = nil // 1st type was consumed if present + if par != nil { + if debug && par.Name == nil && par.Type == nil { + panic("parameter without name or type") + } + if par.Name != nil && par.Type != nil { + named++ + } + if par.Type != nil { + typed++ + } + list = append(list, par) + } + return false + }) + + if len(list) == 0 { + return + } + + // distribute parameter types (len(list) > 0) + if named == 0 && !requireNames { + // all unnamed => found names are named types + for _, par := range list { + if typ := par.Name; typ != nil { + par.Type = typ + par.Name = nil + } + } + } else if named != len(list) { + // some named => all must have names and types + var pos Pos // left-most error position (or unknown) + var typ Expr // current type (from right to left) + for i := len(list) - 1; i >= 0; i-- { + par := list[i] + if par.Type != nil { + typ = par.Type + if par.Name == nil { + pos = StartPos(typ) + par.Name = NewName(pos, "_") + } + } else if typ != nil { + par.Type = typ + } else { + // par.Type == nil && typ == nil => we only have a par.Name + pos = par.Name.Pos() + t := p.badExpr() + t.pos = pos // correct position + par.Type = t + } + } + if pos.IsKnown() { + var msg string + if requireNames { + if named == typed { + pos = end // position error at closing ] + msg = "missing type constraint" + } else { + msg = "type parameters must be named" + } + } else { + msg = "mixed named and unnamed parameters" + } + p.syntaxErrorAt(pos, msg) + } + } + + return +} + +func (p *parser) badExpr() *BadExpr { + b := new(BadExpr) + b.pos = p.pos() + return b +} + +// ---------------------------------------------------------------------------- +// Statements + +// SimpleStmt = EmptyStmt | ExpressionStmt | SendStmt | IncDecStmt | Assignment | ShortVarDecl . +func (p *parser) simpleStmt(lhs Expr, keyword token) SimpleStmt { + if trace { + defer p.trace("simpleStmt")() + } + + if keyword == _For && p.tok == _Range { + // _Range expr + if debug && lhs != nil { + panic("invalid call of simpleStmt") + } + return p.newRangeClause(nil, false) + } + + if lhs == nil { + lhs = p.exprList() + } + + if _, ok := lhs.(*ListExpr); !ok && p.tok != _Assign && p.tok != _Define { + // expr + pos := p.pos() + switch p.tok { + case _AssignOp: + // lhs op= rhs + op := p.op + p.next() + return p.newAssignStmt(pos, op, lhs, p.expr()) + + case _IncOp: + // lhs++ or lhs-- + op := p.op + p.next() + return p.newAssignStmt(pos, op, lhs, nil) + + case _Arrow: + // lhs <- rhs + s := new(SendStmt) + s.pos = pos + p.next() + s.Chan = lhs + s.Value = p.expr() + return s + + default: + // expr + s := new(ExprStmt) + s.pos = lhs.Pos() + s.X = lhs + return s + } + } + + // expr_list + switch p.tok { + case _Assign, _Define: + pos := p.pos() + var op Operator + if p.tok == _Define { + op = Def + } + p.next() + + if keyword == _For && p.tok == _Range { + // expr_list op= _Range expr + return p.newRangeClause(lhs, op == Def) + } + + // expr_list op= expr_list + rhs := p.exprList() + + if x, ok := rhs.(*TypeSwitchGuard); ok && keyword == _Switch && op == Def { + if lhs, ok := lhs.(*Name); ok { + // switch … lhs := rhs.(type) + x.Lhs = lhs + s := new(ExprStmt) + s.pos = x.Pos() + s.X = x + return s + } + } + + return p.newAssignStmt(pos, op, lhs, rhs) + + default: + p.syntaxError("expected := or = or comma") + p.advance(_Semi, _Rbrace) + // make the best of what we have + if x, ok := lhs.(*ListExpr); ok { + lhs = x.ElemList[0] + } + s := new(ExprStmt) + s.pos = lhs.Pos() + s.X = lhs + return s + } +} + +func (p *parser) newRangeClause(lhs Expr, def bool) *RangeClause { + r := new(RangeClause) + r.pos = p.pos() + p.next() // consume _Range + r.Lhs = lhs + r.Def = def + r.X = p.expr() + return r +} + +func (p *parser) newAssignStmt(pos Pos, op Operator, lhs, rhs Expr) *AssignStmt { + a := new(AssignStmt) + a.pos = pos + a.Op = op + a.Lhs = lhs + a.Rhs = rhs + return a +} + +func (p *parser) labeledStmtOrNil(label *Name) Stmt { + if trace { + defer p.trace("labeledStmt")() + } + + s := new(LabeledStmt) + s.pos = p.pos() + s.Label = label + + p.want(_Colon) + + if p.tok == _Rbrace { + // We expect a statement (incl. an empty statement), which must be + // terminated by a semicolon. Because semicolons may be omitted before + // an _Rbrace, seeing an _Rbrace implies an empty statement. + e := new(EmptyStmt) + e.pos = p.pos() + s.Stmt = e + return s + } + + s.Stmt = p.stmtOrNil() + if s.Stmt != nil { + return s + } + + // report error at line of ':' token + p.syntaxErrorAt(s.pos, "missing statement after label") + // we are already at the end of the labeled statement - no need to advance + return nil // avoids follow-on errors (see e.g., fixedbugs/bug274.go) +} + +// context must be a non-empty string unless we know that p.tok == _Lbrace. +func (p *parser) blockStmt(context string) *BlockStmt { + if trace { + defer p.trace("blockStmt")() + } + + s := new(BlockStmt) + s.pos = p.pos() + + // people coming from C may forget that braces are mandatory in Go + if !p.got(_Lbrace) { + p.syntaxError("expected { after " + context) + p.advance(_Name, _Rbrace) + s.Rbrace = p.pos() // in case we found "}" + if p.got(_Rbrace) { + return s + } + } + + s.List = p.stmtList() + s.Rbrace = p.pos() + p.want(_Rbrace) + + return s +} + +func (p *parser) declStmt(f func(*Group) Decl) *DeclStmt { + if trace { + defer p.trace("declStmt")() + } + + s := new(DeclStmt) + s.pos = p.pos() + + p.next() // _Const, _Type, or _Var + s.DeclList = p.appendGroup(nil, f) + + return s +} + +func (p *parser) forStmt() Stmt { + if trace { + defer p.trace("forStmt")() + } + + s := new(ForStmt) + s.pos = p.pos() + + s.Init, s.Cond, s.Post = p.header(_For) + s.Body = p.blockStmt("for clause") + + return s +} + +func (p *parser) header(keyword token) (init SimpleStmt, cond Expr, post SimpleStmt) { + p.want(keyword) + + if p.tok == _Lbrace { + if keyword == _If { + p.syntaxError("missing condition in if statement") + cond = p.badExpr() + } + return + } + // p.tok != _Lbrace + + outer := p.xnest + p.xnest = -1 + + if p.tok != _Semi { + // accept potential varDecl but complain + if p.got(_Var) { + p.syntaxError(fmt.Sprintf("var declaration not allowed in %s initializer", tokstring(keyword))) + } + init = p.simpleStmt(nil, keyword) + // If we have a range clause, we are done (can only happen for keyword == _For). + if _, ok := init.(*RangeClause); ok { + p.xnest = outer + return + } + } + + var condStmt SimpleStmt + var semi struct { + pos Pos + lit string // valid if pos.IsKnown() + } + if p.tok != _Lbrace { + if p.tok == _Semi { + semi.pos = p.pos() + semi.lit = p.lit + p.next() + } else { + // asking for a '{' rather than a ';' here leads to a better error message + p.want(_Lbrace) + if p.tok != _Lbrace { + p.advance(_Lbrace, _Rbrace) // for better synchronization (e.g., issue #22581) + } + } + if keyword == _For { + if p.tok != _Semi { + if p.tok == _Lbrace { + p.syntaxError("expected for loop condition") + goto done + } + condStmt = p.simpleStmt(nil, 0 /* range not permitted */) + } + p.want(_Semi) + if p.tok != _Lbrace { + post = p.simpleStmt(nil, 0 /* range not permitted */) + if a, _ := post.(*AssignStmt); a != nil && a.Op == Def { + p.syntaxErrorAt(a.Pos(), "cannot declare in post statement of for loop") + } + } + } else if p.tok != _Lbrace { + condStmt = p.simpleStmt(nil, keyword) + } + } else { + condStmt = init + init = nil + } + +done: + // unpack condStmt + switch s := condStmt.(type) { + case nil: + if keyword == _If && semi.pos.IsKnown() { + if semi.lit != "semicolon" { + p.syntaxErrorAt(semi.pos, fmt.Sprintf("unexpected %s, expected { after if clause", semi.lit)) + } else { + p.syntaxErrorAt(semi.pos, "missing condition in if statement") + } + b := new(BadExpr) + b.pos = semi.pos + cond = b + } + case *ExprStmt: + cond = s.X + default: + // A common syntax error is to write '=' instead of '==', + // which turns an expression into an assignment. Provide + // a more explicit error message in that case to prevent + // further confusion. + var str string + if as, ok := s.(*AssignStmt); ok && as.Op == 0 { + // Emphasize Lhs and Rhs of assignment with parentheses to highlight '='. + // Do it always - it's not worth going through the trouble of doing it + // only for "complex" left and right sides. + str = "assignment (" + String(as.Lhs) + ") = (" + String(as.Rhs) + ")" + } else { + str = String(s) + } + p.syntaxErrorAt(s.Pos(), fmt.Sprintf("cannot use %s as value", str)) + } + + p.xnest = outer + return +} + +func (p *parser) ifStmt() *IfStmt { + if trace { + defer p.trace("ifStmt")() + } + + s := new(IfStmt) + s.pos = p.pos() + + s.Init, s.Cond, _ = p.header(_If) + s.Then = p.blockStmt("if clause") + + if p.got(_Else) { + switch p.tok { + case _If: + s.Else = p.ifStmt() + case _Lbrace: + s.Else = p.blockStmt("") + default: + p.syntaxError("else must be followed by if or statement block") + p.advance(_Name, _Rbrace) + } + } + + return s +} + +func (p *parser) switchStmt() *SwitchStmt { + if trace { + defer p.trace("switchStmt")() + } + + s := new(SwitchStmt) + s.pos = p.pos() + + s.Init, s.Tag, _ = p.header(_Switch) + + if !p.got(_Lbrace) { + p.syntaxError("missing { after switch clause") + p.advance(_Case, _Default, _Rbrace) + } + for p.tok != _EOF && p.tok != _Rbrace { + s.Body = append(s.Body, p.caseClause()) + } + s.Rbrace = p.pos() + p.want(_Rbrace) + + return s +} + +func (p *parser) selectStmt() *SelectStmt { + if trace { + defer p.trace("selectStmt")() + } + + s := new(SelectStmt) + s.pos = p.pos() + + p.want(_Select) + if !p.got(_Lbrace) { + p.syntaxError("missing { after select clause") + p.advance(_Case, _Default, _Rbrace) + } + for p.tok != _EOF && p.tok != _Rbrace { + s.Body = append(s.Body, p.commClause()) + } + s.Rbrace = p.pos() + p.want(_Rbrace) + + return s +} + +func (p *parser) caseClause() *CaseClause { + if trace { + defer p.trace("caseClause")() + } + + c := new(CaseClause) + c.pos = p.pos() + + switch p.tok { + case _Case: + p.next() + c.Cases = p.exprList() + + case _Default: + p.next() + + default: + p.syntaxError("expected case or default or }") + p.advance(_Colon, _Case, _Default, _Rbrace) + } + + c.Colon = p.pos() + p.want(_Colon) + c.Body = p.stmtList() + + return c +} + +func (p *parser) commClause() *CommClause { + if trace { + defer p.trace("commClause")() + } + + c := new(CommClause) + c.pos = p.pos() + + switch p.tok { + case _Case: + p.next() + c.Comm = p.simpleStmt(nil, 0) + + // The syntax restricts the possible simple statements here to: + // + // lhs <- x (send statement) + // <-x + // lhs = <-x + // lhs := <-x + // + // All these (and more) are recognized by simpleStmt and invalid + // syntax trees are flagged later, during type checking. + + case _Default: + p.next() + + default: + p.syntaxError("expected case or default or }") + p.advance(_Colon, _Case, _Default, _Rbrace) + } + + c.Colon = p.pos() + p.want(_Colon) + c.Body = p.stmtList() + + return c +} + +// stmtOrNil parses a statement if one is present, or else returns nil. +// +// Statement = +// Declaration | LabeledStmt | SimpleStmt | +// GoStmt | ReturnStmt | BreakStmt | ContinueStmt | GotoStmt | +// FallthroughStmt | Block | IfStmt | SwitchStmt | SelectStmt | ForStmt | +// DeferStmt . +func (p *parser) stmtOrNil() Stmt { + if trace { + defer p.trace("stmt " + p.tok.String())() + } + + // Most statements (assignments) start with an identifier; + // look for it first before doing anything more expensive. + if p.tok == _Name { + p.clearPragma() + lhs := p.exprList() + if label, ok := lhs.(*Name); ok && p.tok == _Colon { + return p.labeledStmtOrNil(label) + } + return p.simpleStmt(lhs, 0) + } + + switch p.tok { + case _Var: + return p.declStmt(p.varDecl) + + case _Const: + return p.declStmt(p.constDecl) + + case _Type: + return p.declStmt(p.typeDecl) + } + + p.clearPragma() + + switch p.tok { + case _Lbrace: + return p.blockStmt("") + + case _Operator, _Star: + switch p.op { + case Add, Sub, Mul, And, Xor, Not: + return p.simpleStmt(nil, 0) // unary operators + } + + case _Literal, _Func, _Lparen, // operands + _Lbrack, _Struct, _Map, _Chan, _Interface, // composite types + _Arrow: // receive operator + return p.simpleStmt(nil, 0) + + case _For: + return p.forStmt() + + case _Switch: + return p.switchStmt() + + case _Select: + return p.selectStmt() + + case _If: + return p.ifStmt() + + case _Fallthrough: + s := new(BranchStmt) + s.pos = p.pos() + p.next() + s.Tok = _Fallthrough + return s + + case _Break, _Continue: + s := new(BranchStmt) + s.pos = p.pos() + s.Tok = p.tok + p.next() + if p.tok == _Name { + s.Label = p.name() + } + return s + + case _Go, _Defer: + return p.callStmt() + + case _Goto: + s := new(BranchStmt) + s.pos = p.pos() + s.Tok = _Goto + p.next() + s.Label = p.name() + return s + + case _Return: + s := new(ReturnStmt) + s.pos = p.pos() + p.next() + if p.tok != _Semi && p.tok != _Rbrace { + s.Results = p.exprList() + } + return s + + case _Semi: + s := new(EmptyStmt) + s.pos = p.pos() + return s + } + + return nil +} + +// StatementList = { Statement ";" } . +func (p *parser) stmtList() (l []Stmt) { + if trace { + defer p.trace("stmtList")() + } + + for p.tok != _EOF && p.tok != _Rbrace && p.tok != _Case && p.tok != _Default { + s := p.stmtOrNil() + p.clearPragma() + if s == nil { + break + } + l = append(l, s) + // ";" is optional before "}" + if !p.got(_Semi) && p.tok != _Rbrace { + p.syntaxError("at end of statement") + p.advance(_Semi, _Rbrace, _Case, _Default) + p.got(_Semi) // avoid spurious empty statement + } + } + return +} + +// argList parses a possibly empty, comma-separated list of arguments, +// optionally followed by a comma (if not empty), and closed by ")". +// The last argument may be followed by "...". +// +// argList = [ arg { "," arg } [ "..." ] [ "," ] ] ")" . +func (p *parser) argList() (list []Expr, hasDots bool) { + if trace { + defer p.trace("argList")() + } + + p.xnest++ + p.list("argument list", _Comma, _Rparen, func() bool { + list = append(list, p.expr()) + hasDots = p.got(_DotDotDot) + return hasDots + }) + p.xnest-- + + return +} + +// ---------------------------------------------------------------------------- +// Common productions + +func (p *parser) name() *Name { + // no tracing to avoid overly verbose output + + if p.tok == _Name { + n := NewName(p.pos(), p.lit) + p.next() + return n + } + + n := NewName(p.pos(), "_") + p.syntaxError("expected name") + p.advance() + return n +} + +// IdentifierList = identifier { "," identifier } . +// The first name must be provided. +func (p *parser) nameList(first *Name) []*Name { + if trace { + defer p.trace("nameList")() + } + + if debug && first == nil { + panic("first name not provided") + } + + l := []*Name{first} + for p.got(_Comma) { + l = append(l, p.name()) + } + + return l +} + +// The first name may be provided, or nil. +func (p *parser) qualifiedName(name *Name) Expr { + if trace { + defer p.trace("qualifiedName")() + } + + var x Expr + switch { + case name != nil: + x = name + case p.tok == _Name: + x = p.name() + default: + x = NewName(p.pos(), "_") + p.syntaxError("expected name") + p.advance(_Dot, _Semi, _Rbrace) + } + + if p.tok == _Dot { + s := new(SelectorExpr) + s.pos = p.pos() + p.next() + s.X = x + s.Sel = p.name() + x = s + } + + if p.tok == _Lbrack { + x = p.typeInstance(x) + } + + return x +} + +// ExpressionList = Expression { "," Expression } . +func (p *parser) exprList() Expr { + if trace { + defer p.trace("exprList")() + } + + x := p.expr() + if p.got(_Comma) { + list := []Expr{x, p.expr()} + for p.got(_Comma) { + list = append(list, p.expr()) + } + t := new(ListExpr) + t.pos = x.Pos() + t.ElemList = list + x = t + } + return x +} + +// typeList parses a non-empty, comma-separated list of types, +// optionally followed by a comma. If strict is set to false, +// the first element may also be a (non-type) expression. +// If there is more than one argument, the result is a *ListExpr. +// The comma result indicates whether there was a (separating or +// trailing) comma. +// +// typeList = arg { "," arg } [ "," ] . +func (p *parser) typeList(strict bool) (x Expr, comma bool) { + if trace { + defer p.trace("typeList")() + } + + p.xnest++ + if strict { + x = p.type_() + } else { + x = p.expr() + } + if p.got(_Comma) { + comma = true + if t := p.typeOrNil(); t != nil { + list := []Expr{x, t} + for p.got(_Comma) { + if t = p.typeOrNil(); t == nil { + break + } + list = append(list, t) + } + l := new(ListExpr) + l.pos = x.Pos() // == list[0].Pos() + l.ElemList = list + x = l + } + } + p.xnest-- + return +} + +// unparen removes all parentheses around an expression. +func unparen(x Expr) Expr { + for { + p, ok := x.(*ParenExpr) + if !ok { + break + } + x = p.X + } + return x +} diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go new file mode 100644 index 0000000..74583ca --- /dev/null +++ b/src/cmd/compile/internal/syntax/parser_test.go @@ -0,0 +1,364 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "bytes" + "flag" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "regexp" + "runtime" + "strings" + "sync" + "testing" + "time" +) + +var ( + fast = flag.Bool("fast", false, "parse package files in parallel") + verify = flag.Bool("verify", false, "verify idempotent printing") + src_ = flag.String("src", "parser.go", "source file to parse") + skip = flag.String("skip", "", "files matching this regular expression are skipped by TestStdLib") +) + +func TestParse(t *testing.T) { + ParseFile(*src_, func(err error) { t.Error(err) }, nil, 0) +} + +func TestVerify(t *testing.T) { + ast, err := ParseFile(*src_, func(err error) { t.Error(err) }, nil, 0) + if err != nil { + return // error already reported + } + verifyPrint(t, *src_, ast) +} + +func TestStdLib(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + var skipRx *regexp.Regexp + if *skip != "" { + var err error + skipRx, err = regexp.Compile(*skip) + if err != nil { + t.Fatalf("invalid argument for -skip (%v)", err) + } + } + + var m1 runtime.MemStats + runtime.ReadMemStats(&m1) + start := time.Now() + + type parseResult struct { + filename string + lines uint + } + + goroot := testenv.GOROOT(t) + + results := make(chan parseResult) + go func() { + defer close(results) + for _, dir := range []string{ + filepath.Join(goroot, "src"), + filepath.Join(goroot, "misc"), + } { + walkDirs(t, dir, func(filename string) { + if skipRx != nil && skipRx.MatchString(filename) { + // Always report skipped files since regexp + // typos can lead to surprising results. + fmt.Printf("skipping %s\n", filename) + return + } + if debug { + fmt.Printf("parsing %s\n", filename) + } + ast, err := ParseFile(filename, nil, nil, 0) + if err != nil { + t.Error(err) + return + } + if *verify { + verifyPrint(t, filename, ast) + } + results <- parseResult{filename, ast.EOF.Line()} + }) + } + }() + + var count, lines uint + for res := range results { + count++ + lines += res.lines + if testing.Verbose() { + fmt.Printf("%5d %s (%d lines)\n", count, res.filename, res.lines) + } + } + + dt := time.Since(start) + var m2 runtime.MemStats + runtime.ReadMemStats(&m2) + dm := float64(m2.TotalAlloc-m1.TotalAlloc) / 1e6 + + fmt.Printf("parsed %d lines (%d files) in %v (%d lines/s)\n", lines, count, dt, int64(float64(lines)/dt.Seconds())) + fmt.Printf("allocated %.3fMb (%.3fMb/s)\n", dm, dm/dt.Seconds()) +} + +func walkDirs(t *testing.T, dir string, action func(string)) { + entries, err := os.ReadDir(dir) + if err != nil { + t.Error(err) + return + } + + var files, dirs []string + for _, entry := range entries { + if entry.Type().IsRegular() { + if strings.HasSuffix(entry.Name(), ".go") { + path := filepath.Join(dir, entry.Name()) + files = append(files, path) + } + } else if entry.IsDir() && entry.Name() != "testdata" { + path := filepath.Join(dir, entry.Name()) + if !strings.HasSuffix(path, string(filepath.Separator)+"test") { + dirs = append(dirs, path) + } + } + } + + if *fast { + var wg sync.WaitGroup + wg.Add(len(files)) + for _, filename := range files { + go func(filename string) { + defer wg.Done() + action(filename) + }(filename) + } + wg.Wait() + } else { + for _, filename := range files { + action(filename) + } + } + + for _, dir := range dirs { + walkDirs(t, dir, action) + } +} + +func verifyPrint(t *testing.T, filename string, ast1 *File) { + var buf1 bytes.Buffer + _, err := Fprint(&buf1, ast1, LineForm) + if err != nil { + panic(err) + } + bytes1 := buf1.Bytes() + + ast2, err := Parse(NewFileBase(filename), &buf1, nil, nil, 0) + if err != nil { + panic(err) + } + + var buf2 bytes.Buffer + _, err = Fprint(&buf2, ast2, LineForm) + if err != nil { + panic(err) + } + bytes2 := buf2.Bytes() + + if bytes.Compare(bytes1, bytes2) != 0 { + fmt.Printf("--- %s ---\n", filename) + fmt.Printf("%s\n", bytes1) + fmt.Println() + + fmt.Printf("--- %s ---\n", filename) + fmt.Printf("%s\n", bytes2) + fmt.Println() + + t.Error("printed syntax trees do not match") + } +} + +func TestIssue17697(t *testing.T) { + _, err := Parse(nil, bytes.NewReader(nil), nil, nil, 0) // return with parser error, don't panic + if err == nil { + t.Errorf("no error reported") + } +} + +func TestParseFile(t *testing.T) { + _, err := ParseFile("", nil, nil, 0) + if err == nil { + t.Error("missing io error") + } + + var first error + _, err = ParseFile("", func(err error) { + if first == nil { + first = err + } + }, nil, 0) + if err == nil || first == nil { + t.Error("missing io error") + } + if err != first { + t.Errorf("got %v; want first error %v", err, first) + } +} + +// Make sure (PosMax + 1) doesn't overflow when converted to default +// type int (when passed as argument to fmt.Sprintf) on 32bit platforms +// (see test cases below). +var tooLarge int = PosMax + 1 + +func TestLineDirectives(t *testing.T) { + // valid line directives lead to a syntax error after them + const valid = "syntax error: package statement must be first" + const filename = "directives.go" + + for _, test := range []struct { + src, msg string + filename string + line, col uint // 1-based; 0 means unknown + }{ + // ignored //line directives + {"//\n", valid, filename, 2, 1}, // no directive + {"//line\n", valid, filename, 2, 1}, // missing colon + {"//line foo\n", valid, filename, 2, 1}, // missing colon + {" //line foo:\n", valid, filename, 2, 1}, // not a line start + {"// line foo:\n", valid, filename, 2, 1}, // space between // and line + + // invalid //line directives with one colon + {"//line :\n", "invalid line number: ", filename, 1, 9}, + {"//line :x\n", "invalid line number: x", filename, 1, 9}, + {"//line foo :\n", "invalid line number: ", filename, 1, 13}, + {"//line foo:x\n", "invalid line number: x", filename, 1, 12}, + {"//line foo:0\n", "invalid line number: 0", filename, 1, 12}, + {"//line foo:1 \n", "invalid line number: 1 ", filename, 1, 12}, + {"//line foo:-12\n", "invalid line number: -12", filename, 1, 12}, + {"//line C:foo:0\n", "invalid line number: 0", filename, 1, 14}, + {fmt.Sprintf("//line foo:%d\n", tooLarge), fmt.Sprintf("invalid line number: %d", tooLarge), filename, 1, 12}, + + // invalid //line directives with two colons + {"//line ::\n", "invalid line number: ", filename, 1, 10}, + {"//line ::x\n", "invalid line number: x", filename, 1, 10}, + {"//line foo::123abc\n", "invalid line number: 123abc", filename, 1, 13}, + {"//line foo::0\n", "invalid line number: 0", filename, 1, 13}, + {"//line foo:0:1\n", "invalid line number: 0", filename, 1, 12}, + + {"//line :123:0\n", "invalid column number: 0", filename, 1, 13}, + {"//line foo:123:0\n", "invalid column number: 0", filename, 1, 16}, + {fmt.Sprintf("//line foo:10:%d\n", tooLarge), fmt.Sprintf("invalid column number: %d", tooLarge), filename, 1, 15}, + + // effect of valid //line directives on lines + {"//line foo:123\n foo", valid, "foo", 123, 0}, + {"//line foo:123\n foo", valid, " foo", 123, 0}, + {"//line foo:123\n//line bar:345\nfoo", valid, "bar", 345, 0}, + {"//line C:foo:123\n", valid, "C:foo", 123, 0}, + {"//line /src/a/a.go:123\n foo", valid, "/src/a/a.go", 123, 0}, + {"//line :x:1\n", valid, ":x", 1, 0}, + {"//line foo ::1\n", valid, "foo :", 1, 0}, + {"//line foo:123abc:1\n", valid, "foo:123abc", 1, 0}, + {"//line foo :123:1\n", valid, "foo ", 123, 1}, + {"//line ::123\n", valid, ":", 123, 0}, + + // effect of valid //line directives on columns + {"//line :x:1:10\n", valid, ":x", 1, 10}, + {"//line foo ::1:2\n", valid, "foo :", 1, 2}, + {"//line foo:123abc:1:1000\n", valid, "foo:123abc", 1, 1000}, + {"//line foo :123:1000\n\n", valid, "foo ", 124, 1}, + {"//line ::123:1234\n", valid, ":", 123, 1234}, + + // //line directives with omitted filenames lead to empty filenames + {"//line :10\n", valid, "", 10, 0}, + {"//line :10:20\n", valid, filename, 10, 20}, + {"//line bar:1\n//line :10\n", valid, "", 10, 0}, + {"//line bar:1\n//line :10:20\n", valid, "bar", 10, 20}, + + // ignored /*line directives + {"/**/", valid, filename, 1, 5}, // no directive + {"/*line*/", valid, filename, 1, 9}, // missing colon + {"/*line foo*/", valid, filename, 1, 13}, // missing colon + {" //line foo:*/", valid, filename, 1, 16}, // not a line start + {"/* line foo:*/", valid, filename, 1, 16}, // space between // and line + + // invalid /*line directives with one colon + {"/*line :*/", "invalid line number: ", filename, 1, 9}, + {"/*line :x*/", "invalid line number: x", filename, 1, 9}, + {"/*line foo :*/", "invalid line number: ", filename, 1, 13}, + {"/*line foo:x*/", "invalid line number: x", filename, 1, 12}, + {"/*line foo:0*/", "invalid line number: 0", filename, 1, 12}, + {"/*line foo:1 */", "invalid line number: 1 ", filename, 1, 12}, + {"/*line C:foo:0*/", "invalid line number: 0", filename, 1, 14}, + {fmt.Sprintf("/*line foo:%d*/", tooLarge), fmt.Sprintf("invalid line number: %d", tooLarge), filename, 1, 12}, + + // invalid /*line directives with two colons + {"/*line ::*/", "invalid line number: ", filename, 1, 10}, + {"/*line ::x*/", "invalid line number: x", filename, 1, 10}, + {"/*line foo::123abc*/", "invalid line number: 123abc", filename, 1, 13}, + {"/*line foo::0*/", "invalid line number: 0", filename, 1, 13}, + {"/*line foo:0:1*/", "invalid line number: 0", filename, 1, 12}, + + {"/*line :123:0*/", "invalid column number: 0", filename, 1, 13}, + {"/*line foo:123:0*/", "invalid column number: 0", filename, 1, 16}, + {fmt.Sprintf("/*line foo:10:%d*/", tooLarge), fmt.Sprintf("invalid column number: %d", tooLarge), filename, 1, 15}, + + // effect of valid /*line directives on lines + {"/*line foo:123*/ foo", valid, "foo", 123, 0}, + {"/*line foo:123*/\n//line bar:345\nfoo", valid, "bar", 345, 0}, + {"/*line C:foo:123*/", valid, "C:foo", 123, 0}, + {"/*line /src/a/a.go:123*/ foo", valid, "/src/a/a.go", 123, 0}, + {"/*line :x:1*/", valid, ":x", 1, 0}, + {"/*line foo ::1*/", valid, "foo :", 1, 0}, + {"/*line foo:123abc:1*/", valid, "foo:123abc", 1, 0}, + {"/*line foo :123:10*/", valid, "foo ", 123, 10}, + {"/*line ::123*/", valid, ":", 123, 0}, + + // effect of valid /*line directives on columns + {"/*line :x:1:10*/", valid, ":x", 1, 10}, + {"/*line foo ::1:2*/", valid, "foo :", 1, 2}, + {"/*line foo:123abc:1:1000*/", valid, "foo:123abc", 1, 1000}, + {"/*line foo :123:1000*/\n", valid, "foo ", 124, 1}, + {"/*line ::123:1234*/", valid, ":", 123, 1234}, + + // /*line directives with omitted filenames lead to the previously used filenames + {"/*line :10*/", valid, "", 10, 0}, + {"/*line :10:20*/", valid, filename, 10, 20}, + {"//line bar:1\n/*line :10*/", valid, "", 10, 0}, + {"//line bar:1\n/*line :10:20*/", valid, "bar", 10, 20}, + } { + base := NewFileBase(filename) + _, err := Parse(base, strings.NewReader(test.src), nil, nil, 0) + if err == nil { + t.Errorf("%s: no error reported", test.src) + continue + } + perr, ok := err.(Error) + if !ok { + t.Errorf("%s: got %v; want parser error", test.src, err) + continue + } + if msg := perr.Msg; msg != test.msg { + t.Errorf("%s: got msg = %q; want %q", test.src, msg, test.msg) + } + + pos := perr.Pos + if filename := pos.RelFilename(); filename != test.filename { + t.Errorf("%s: got filename = %q; want %q", test.src, filename, test.filename) + } + if line := pos.RelLine(); line != test.line { + t.Errorf("%s: got line = %d; want %d", test.src, line, test.line) + } + if col := pos.RelCol(); col != test.col { + t.Errorf("%s: got col = %d; want %d", test.src, col, test.col) + } + } +} diff --git a/src/cmd/compile/internal/syntax/pos.go b/src/cmd/compile/internal/syntax/pos.go new file mode 100644 index 0000000..b5e53d2 --- /dev/null +++ b/src/cmd/compile/internal/syntax/pos.go @@ -0,0 +1,209 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import "fmt" + +// PosMax is the largest line or column value that can be represented without loss. +// Incoming values (arguments) larger than PosMax will be set to PosMax. +const PosMax = 1 << 30 + +// A Pos represents an absolute (line, col) source position +// with a reference to position base for computing relative +// (to a file, or line directive) position information. +// Pos values are intentionally light-weight so that they +// can be created without too much concern about space use. +type Pos struct { + base *PosBase + line, col uint32 +} + +// MakePos returns a new Pos for the given PosBase, line and column. +func MakePos(base *PosBase, line, col uint) Pos { return Pos{base, sat32(line), sat32(col)} } + +// TODO(gri) IsKnown makes an assumption about linebase < 1. +// Maybe we should check for Base() != nil instead. + +func (pos Pos) Pos() Pos { return pos } +func (pos Pos) IsKnown() bool { return pos.line > 0 } +func (pos Pos) Base() *PosBase { return pos.base } +func (pos Pos) Line() uint { return uint(pos.line) } +func (pos Pos) Col() uint { return uint(pos.col) } + +func (pos Pos) RelFilename() string { return pos.base.Filename() } + +func (pos Pos) RelLine() uint { + b := pos.base + if b.Line() == 0 { + // base line is unknown => relative line is unknown + return 0 + } + return b.Line() + (pos.Line() - b.Pos().Line()) +} + +func (pos Pos) RelCol() uint { + b := pos.base + if b.Col() == 0 { + // base column is unknown => relative column is unknown + // (the current specification for line directives requires + // this to apply until the next PosBase/line directive, + // not just until the new newline) + return 0 + } + if pos.Line() == b.Pos().Line() { + // pos on same line as pos base => column is relative to pos base + return b.Col() + (pos.Col() - b.Pos().Col()) + } + return pos.Col() +} + +// Cmp compares the positions p and q and returns a result r as follows: +// +// r < 0: p is before q +// r == 0: p and q are the same position (but may not be identical) +// r > 0: p is after q +// +// If p and q are in different files, p is before q if the filename +// of p sorts lexicographically before the filename of q. +func (p Pos) Cmp(q Pos) int { + pname := p.RelFilename() + qname := q.RelFilename() + switch { + case pname < qname: + return -1 + case pname > qname: + return +1 + } + + pline := p.Line() + qline := q.Line() + switch { + case pline < qline: + return -1 + case pline > qline: + return +1 + } + + pcol := p.Col() + qcol := q.Col() + switch { + case pcol < qcol: + return -1 + case pcol > qcol: + return +1 + } + + return 0 +} + +func (pos Pos) String() string { + rel := position_{pos.RelFilename(), pos.RelLine(), pos.RelCol()} + abs := position_{pos.Base().Pos().RelFilename(), pos.Line(), pos.Col()} + s := rel.String() + if rel != abs { + s += "[" + abs.String() + "]" + } + return s +} + +// TODO(gri) cleanup: find better name, avoid conflict with position in error_test.go +type position_ struct { + filename string + line, col uint +} + +func (p position_) String() string { + if p.line == 0 { + if p.filename == "" { + return "<unknown position>" + } + return p.filename + } + if p.col == 0 { + return fmt.Sprintf("%s:%d", p.filename, p.line) + } + return fmt.Sprintf("%s:%d:%d", p.filename, p.line, p.col) +} + +// A PosBase represents the base for relative position information: +// At position pos, the relative position is filename:line:col. +type PosBase struct { + pos Pos + filename string + line, col uint32 + trimmed bool // whether -trimpath has been applied +} + +// NewFileBase returns a new PosBase for the given filename. +// A file PosBase's position is relative to itself, with the +// position being filename:1:1. +func NewFileBase(filename string) *PosBase { + return NewTrimmedFileBase(filename, false) +} + +// NewTrimmedFileBase is like NewFileBase, but allows specifying Trimmed. +func NewTrimmedFileBase(filename string, trimmed bool) *PosBase { + base := &PosBase{MakePos(nil, linebase, colbase), filename, linebase, colbase, trimmed} + base.pos.base = base + return base +} + +// NewLineBase returns a new PosBase for a line directive "line filename:line:col" +// relative to pos, which is the position of the character immediately following +// the comment containing the line directive. For a directive in a line comment, +// that position is the beginning of the next line (i.e., the newline character +// belongs to the line comment). +func NewLineBase(pos Pos, filename string, trimmed bool, line, col uint) *PosBase { + return &PosBase{pos, filename, sat32(line), sat32(col), trimmed} +} + +func (base *PosBase) IsFileBase() bool { + if base == nil { + return false + } + return base.pos.base == base +} + +func (base *PosBase) Pos() (_ Pos) { + if base == nil { + return + } + return base.pos +} + +func (base *PosBase) Filename() string { + if base == nil { + return "" + } + return base.filename +} + +func (base *PosBase) Line() uint { + if base == nil { + return 0 + } + return uint(base.line) +} + +func (base *PosBase) Col() uint { + if base == nil { + return 0 + } + return uint(base.col) +} + +func (base *PosBase) Trimmed() bool { + if base == nil { + return false + } + return base.trimmed +} + +func sat32(x uint) uint32 { + if x > PosMax { + return PosMax + } + return uint32(x) +} diff --git a/src/cmd/compile/internal/syntax/positions.go b/src/cmd/compile/internal/syntax/positions.go new file mode 100644 index 0000000..9359655 --- /dev/null +++ b/src/cmd/compile/internal/syntax/positions.go @@ -0,0 +1,364 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements helper functions for scope position computations. + +package syntax + +// StartPos returns the start position of n. +func StartPos(n Node) Pos { + // Cases for nodes which don't need a correction are commented out. + for m := n; ; { + switch n := m.(type) { + case nil: + panic("nil node") + + // packages + case *File: + // file block starts at the beginning of the file + return MakePos(n.Pos().Base(), 1, 1) + + // declarations + // case *ImportDecl: + // case *ConstDecl: + // case *TypeDecl: + // case *VarDecl: + // case *FuncDecl: + + // expressions + // case *BadExpr: + // case *Name: + // case *BasicLit: + case *CompositeLit: + if n.Type != nil { + m = n.Type + continue + } + return n.Pos() + // case *KeyValueExpr: + // case *FuncLit: + // case *ParenExpr: + case *SelectorExpr: + m = n.X + case *IndexExpr: + m = n.X + // case *SliceExpr: + case *AssertExpr: + m = n.X + case *TypeSwitchGuard: + if n.Lhs != nil { + m = n.Lhs + continue + } + m = n.X + case *Operation: + if n.Y != nil { + m = n.X + continue + } + return n.Pos() + case *CallExpr: + m = n.Fun + case *ListExpr: + if len(n.ElemList) > 0 { + m = n.ElemList[0] + continue + } + return n.Pos() + // types + // case *ArrayType: + // case *SliceType: + // case *DotsType: + // case *StructType: + // case *Field: + // case *InterfaceType: + // case *FuncType: + // case *MapType: + // case *ChanType: + + // statements + // case *EmptyStmt: + // case *LabeledStmt: + // case *BlockStmt: + // case *ExprStmt: + case *SendStmt: + m = n.Chan + // case *DeclStmt: + case *AssignStmt: + m = n.Lhs + // case *BranchStmt: + // case *CallStmt: + // case *ReturnStmt: + // case *IfStmt: + // case *ForStmt: + // case *SwitchStmt: + // case *SelectStmt: + + // helper nodes + case *RangeClause: + if n.Lhs != nil { + m = n.Lhs + continue + } + m = n.X + // case *CaseClause: + // case *CommClause: + + default: + return n.Pos() + } + } +} + +// EndPos returns the approximate end position of n in the source. +// For some nodes (*Name, *BasicLit) it returns the position immediately +// following the node; for others (*BlockStmt, *SwitchStmt, etc.) it +// returns the position of the closing '}'; and for some (*ParenExpr) +// the returned position is the end position of the last enclosed +// expression. +// Thus, EndPos should not be used for exact demarcation of the +// end of a node in the source; it is mostly useful to determine +// scope ranges where there is some leeway. +func EndPos(n Node) Pos { + for m := n; ; { + switch n := m.(type) { + case nil: + panic("nil node") + + // packages + case *File: + return n.EOF + + // declarations + case *ImportDecl: + m = n.Path + case *ConstDecl: + if n.Values != nil { + m = n.Values + continue + } + if n.Type != nil { + m = n.Type + continue + } + if l := len(n.NameList); l > 0 { + m = n.NameList[l-1] + continue + } + return n.Pos() + case *TypeDecl: + m = n.Type + case *VarDecl: + if n.Values != nil { + m = n.Values + continue + } + if n.Type != nil { + m = n.Type + continue + } + if l := len(n.NameList); l > 0 { + m = n.NameList[l-1] + continue + } + return n.Pos() + case *FuncDecl: + if n.Body != nil { + m = n.Body + continue + } + m = n.Type + + // expressions + case *BadExpr: + return n.Pos() + case *Name: + p := n.Pos() + return MakePos(p.Base(), p.Line(), p.Col()+uint(len(n.Value))) + case *BasicLit: + p := n.Pos() + return MakePos(p.Base(), p.Line(), p.Col()+uint(len(n.Value))) + case *CompositeLit: + return n.Rbrace + case *KeyValueExpr: + m = n.Value + case *FuncLit: + m = n.Body + case *ParenExpr: + m = n.X + case *SelectorExpr: + m = n.Sel + case *IndexExpr: + m = n.Index + case *SliceExpr: + for i := len(n.Index) - 1; i >= 0; i-- { + if x := n.Index[i]; x != nil { + m = x + continue + } + } + m = n.X + case *AssertExpr: + m = n.Type + case *TypeSwitchGuard: + m = n.X + case *Operation: + if n.Y != nil { + m = n.Y + continue + } + m = n.X + case *CallExpr: + if l := lastExpr(n.ArgList); l != nil { + m = l + continue + } + m = n.Fun + case *ListExpr: + if l := lastExpr(n.ElemList); l != nil { + m = l + continue + } + return n.Pos() + + // types + case *ArrayType: + m = n.Elem + case *SliceType: + m = n.Elem + case *DotsType: + m = n.Elem + case *StructType: + if l := lastField(n.FieldList); l != nil { + m = l + continue + } + return n.Pos() + // TODO(gri) need to take TagList into account + case *Field: + if n.Type != nil { + m = n.Type + continue + } + m = n.Name + case *InterfaceType: + if l := lastField(n.MethodList); l != nil { + m = l + continue + } + return n.Pos() + case *FuncType: + if l := lastField(n.ResultList); l != nil { + m = l + continue + } + if l := lastField(n.ParamList); l != nil { + m = l + continue + } + return n.Pos() + case *MapType: + m = n.Value + case *ChanType: + m = n.Elem + + // statements + case *EmptyStmt: + return n.Pos() + case *LabeledStmt: + m = n.Stmt + case *BlockStmt: + return n.Rbrace + case *ExprStmt: + m = n.X + case *SendStmt: + m = n.Value + case *DeclStmt: + if l := lastDecl(n.DeclList); l != nil { + m = l + continue + } + return n.Pos() + case *AssignStmt: + m = n.Rhs + if m == nil { + p := EndPos(n.Lhs) + return MakePos(p.Base(), p.Line(), p.Col()+2) + } + case *BranchStmt: + if n.Label != nil { + m = n.Label + continue + } + return n.Pos() + case *CallStmt: + m = n.Call + case *ReturnStmt: + if n.Results != nil { + m = n.Results + continue + } + return n.Pos() + case *IfStmt: + if n.Else != nil { + m = n.Else + continue + } + m = n.Then + case *ForStmt: + m = n.Body + case *SwitchStmt: + return n.Rbrace + case *SelectStmt: + return n.Rbrace + + // helper nodes + case *RangeClause: + m = n.X + case *CaseClause: + if l := lastStmt(n.Body); l != nil { + m = l + continue + } + return n.Colon + case *CommClause: + if l := lastStmt(n.Body); l != nil { + m = l + continue + } + return n.Colon + + default: + return n.Pos() + } + } +} + +func lastDecl(list []Decl) Decl { + if l := len(list); l > 0 { + return list[l-1] + } + return nil +} + +func lastExpr(list []Expr) Expr { + if l := len(list); l > 0 { + return list[l-1] + } + return nil +} + +func lastStmt(list []Stmt) Stmt { + if l := len(list); l > 0 { + return list[l-1] + } + return nil +} + +func lastField(list []*Field) *Field { + if l := len(list); l > 0 { + return list[l-1] + } + return nil +} diff --git a/src/cmd/compile/internal/syntax/printer.go b/src/cmd/compile/internal/syntax/printer.go new file mode 100644 index 0000000..62de68e --- /dev/null +++ b/src/cmd/compile/internal/syntax/printer.go @@ -0,0 +1,1020 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements printing of syntax trees in source format. + +package syntax + +import ( + "fmt" + "io" + "strings" +) + +// Form controls print formatting. +type Form uint + +const ( + _ Form = iota // default + LineForm // use spaces instead of linebreaks where possible + ShortForm // like LineForm but print "…" for non-empty function or composite literal bodies +) + +// Fprint prints node x to w in the specified form. +// It returns the number of bytes written, and whether there was an error. +func Fprint(w io.Writer, x Node, form Form) (n int, err error) { + p := printer{ + output: w, + form: form, + linebreaks: form == 0, + } + + defer func() { + n = p.written + if e := recover(); e != nil { + err = e.(writeError).err // re-panics if it's not a writeError + } + }() + + p.print(x) + p.flush(_EOF) + + return +} + +// String is a convenience function that prints n in ShortForm +// and returns the printed string. +func String(n Node) string { + var buf strings.Builder + _, err := Fprint(&buf, n, ShortForm) + if err != nil { + fmt.Fprintf(&buf, "<<< ERROR: %s", err) + } + return buf.String() +} + +type ctrlSymbol int + +const ( + none ctrlSymbol = iota + semi + blank + newline + indent + outdent + // comment + // eolComment +) + +type whitespace struct { + last token + kind ctrlSymbol + //text string // comment text (possibly ""); valid if kind == comment +} + +type printer struct { + output io.Writer + written int // number of bytes written + form Form + linebreaks bool // print linebreaks instead of semis + + indent int // current indentation level + nlcount int // number of consecutive newlines + + pending []whitespace // pending whitespace + lastTok token // last token (after any pending semi) processed by print +} + +// write is a thin wrapper around p.output.Write +// that takes care of accounting and error handling. +func (p *printer) write(data []byte) { + n, err := p.output.Write(data) + p.written += n + if err != nil { + panic(writeError{err}) + } +} + +var ( + tabBytes = []byte("\t\t\t\t\t\t\t\t") + newlineByte = []byte("\n") + blankByte = []byte(" ") +) + +func (p *printer) writeBytes(data []byte) { + if len(data) == 0 { + panic("expected non-empty []byte") + } + if p.nlcount > 0 && p.indent > 0 { + // write indentation + n := p.indent + for n > len(tabBytes) { + p.write(tabBytes) + n -= len(tabBytes) + } + p.write(tabBytes[:n]) + } + p.write(data) + p.nlcount = 0 +} + +func (p *printer) writeString(s string) { + p.writeBytes([]byte(s)) +} + +// If impliesSemi returns true for a non-blank line's final token tok, +// a semicolon is automatically inserted. Vice versa, a semicolon may +// be omitted in those cases. +func impliesSemi(tok token) bool { + switch tok { + case _Name, + _Break, _Continue, _Fallthrough, _Return, + /*_Inc, _Dec,*/ _Rparen, _Rbrack, _Rbrace: // TODO(gri) fix this + return true + } + return false +} + +// TODO(gri) provide table of []byte values for all tokens to avoid repeated string conversion + +func lineComment(text string) bool { + return strings.HasPrefix(text, "//") +} + +func (p *printer) addWhitespace(kind ctrlSymbol, text string) { + p.pending = append(p.pending, whitespace{p.lastTok, kind /*text*/}) + switch kind { + case semi: + p.lastTok = _Semi + case newline: + p.lastTok = 0 + // TODO(gri) do we need to handle /*-style comments containing newlines here? + } +} + +func (p *printer) flush(next token) { + // eliminate semis and redundant whitespace + sawNewline := next == _EOF + sawParen := next == _Rparen || next == _Rbrace + for i := len(p.pending) - 1; i >= 0; i-- { + switch p.pending[i].kind { + case semi: + k := semi + if sawParen { + sawParen = false + k = none // eliminate semi + } else if sawNewline && impliesSemi(p.pending[i].last) { + sawNewline = false + k = none // eliminate semi + } + p.pending[i].kind = k + case newline: + sawNewline = true + case blank, indent, outdent: + // nothing to do + // case comment: + // // A multi-line comment acts like a newline; and a "" + // // comment implies by definition at least one newline. + // if text := p.pending[i].text; strings.HasPrefix(text, "/*") && strings.ContainsRune(text, '\n') { + // sawNewline = true + // } + // case eolComment: + // // TODO(gri) act depending on sawNewline + default: + panic("unreachable") + } + } + + // print pending + prev := none + for i := range p.pending { + switch p.pending[i].kind { + case none: + // nothing to do + case semi: + p.writeString(";") + p.nlcount = 0 + prev = semi + case blank: + if prev != blank { + // at most one blank + p.writeBytes(blankByte) + p.nlcount = 0 + prev = blank + } + case newline: + const maxEmptyLines = 1 + if p.nlcount <= maxEmptyLines { + p.write(newlineByte) + p.nlcount++ + prev = newline + } + case indent: + p.indent++ + case outdent: + p.indent-- + if p.indent < 0 { + panic("negative indentation") + } + // case comment: + // if text := p.pending[i].text; text != "" { + // p.writeString(text) + // p.nlcount = 0 + // prev = comment + // } + // // TODO(gri) should check that line comments are always followed by newline + default: + panic("unreachable") + } + } + + p.pending = p.pending[:0] // re-use underlying array +} + +func mayCombine(prev token, next byte) (b bool) { + return // for now + // switch prev { + // case lexical.Int: + // b = next == '.' // 1. + // case lexical.Add: + // b = next == '+' // ++ + // case lexical.Sub: + // b = next == '-' // -- + // case lexical.Quo: + // b = next == '*' // /* + // case lexical.Lss: + // b = next == '-' || next == '<' // <- or << + // case lexical.And: + // b = next == '&' || next == '^' // && or &^ + // } + // return +} + +func (p *printer) print(args ...interface{}) { + for i := 0; i < len(args); i++ { + switch x := args[i].(type) { + case nil: + // we should not reach here but don't crash + + case Node: + p.printNode(x) + + case token: + // _Name implies an immediately following string + // argument which is the actual value to print. + var s string + if x == _Name { + i++ + if i >= len(args) { + panic("missing string argument after _Name") + } + s = args[i].(string) + } else { + s = x.String() + } + + // TODO(gri) This check seems at the wrong place since it doesn't + // take into account pending white space. + if mayCombine(p.lastTok, s[0]) { + panic("adjacent tokens combine without whitespace") + } + + if x == _Semi { + // delay printing of semi + p.addWhitespace(semi, "") + } else { + p.flush(x) + p.writeString(s) + p.nlcount = 0 + p.lastTok = x + } + + case Operator: + if x != 0 { + p.flush(_Operator) + p.writeString(x.String()) + } + + case ctrlSymbol: + switch x { + case none, semi /*, comment*/ : + panic("unreachable") + case newline: + // TODO(gri) need to handle mandatory newlines after a //-style comment + if !p.linebreaks { + x = blank + } + } + p.addWhitespace(x, "") + + // case *Comment: // comments are not Nodes + // p.addWhitespace(comment, x.Text) + + default: + panic(fmt.Sprintf("unexpected argument %v (%T)", x, x)) + } + } +} + +func (p *printer) printNode(n Node) { + // ncom := *n.Comments() + // if ncom != nil { + // // TODO(gri) in general we cannot make assumptions about whether + // // a comment is a /*- or a //-style comment since the syntax + // // tree may have been manipulated. Need to make sure the correct + // // whitespace is emitted. + // for _, c := range ncom.Alone { + // p.print(c, newline) + // } + // for _, c := range ncom.Before { + // if c.Text == "" || lineComment(c.Text) { + // panic("unexpected empty line or //-style 'before' comment") + // } + // p.print(c, blank) + // } + // } + + p.printRawNode(n) + + // if ncom != nil && len(ncom.After) > 0 { + // for i, c := range ncom.After { + // if i+1 < len(ncom.After) { + // if c.Text == "" || lineComment(c.Text) { + // panic("unexpected empty line or //-style non-final 'after' comment") + // } + // } + // p.print(blank, c) + // } + // //p.print(newline) + // } +} + +func (p *printer) printRawNode(n Node) { + switch n := n.(type) { + case nil: + // we should not reach here but don't crash + + // expressions and types + case *BadExpr: + p.print(_Name, "<bad expr>") + + case *Name: + p.print(_Name, n.Value) // _Name requires actual value following immediately + + case *BasicLit: + p.print(_Name, n.Value) // _Name requires actual value following immediately + + case *FuncLit: + p.print(n.Type, blank) + if n.Body != nil { + if p.form == ShortForm { + p.print(_Lbrace) + if len(n.Body.List) > 0 { + p.print(_Name, "…") + } + p.print(_Rbrace) + } else { + p.print(n.Body) + } + } + + case *CompositeLit: + if n.Type != nil { + p.print(n.Type) + } + p.print(_Lbrace) + if p.form == ShortForm { + if len(n.ElemList) > 0 { + p.print(_Name, "…") + } + } else { + if n.NKeys > 0 && n.NKeys == len(n.ElemList) { + p.printExprLines(n.ElemList) + } else { + p.printExprList(n.ElemList) + } + } + p.print(_Rbrace) + + case *ParenExpr: + p.print(_Lparen, n.X, _Rparen) + + case *SelectorExpr: + p.print(n.X, _Dot, n.Sel) + + case *IndexExpr: + p.print(n.X, _Lbrack, n.Index, _Rbrack) + + case *SliceExpr: + p.print(n.X, _Lbrack) + if i := n.Index[0]; i != nil { + p.printNode(i) + } + p.print(_Colon) + if j := n.Index[1]; j != nil { + p.printNode(j) + } + if k := n.Index[2]; k != nil { + p.print(_Colon, k) + } + p.print(_Rbrack) + + case *AssertExpr: + p.print(n.X, _Dot, _Lparen, n.Type, _Rparen) + + case *TypeSwitchGuard: + if n.Lhs != nil { + p.print(n.Lhs, blank, _Define, blank) + } + p.print(n.X, _Dot, _Lparen, _Type, _Rparen) + + case *CallExpr: + p.print(n.Fun, _Lparen) + p.printExprList(n.ArgList) + if n.HasDots { + p.print(_DotDotDot) + } + p.print(_Rparen) + + case *Operation: + if n.Y == nil { + // unary expr + p.print(n.Op) + // if n.Op == lexical.Range { + // p.print(blank) + // } + p.print(n.X) + } else { + // binary expr + // TODO(gri) eventually take precedence into account + // to control possibly missing parentheses + p.print(n.X, blank, n.Op, blank, n.Y) + } + + case *KeyValueExpr: + p.print(n.Key, _Colon, blank, n.Value) + + case *ListExpr: + p.printExprList(n.ElemList) + + case *ArrayType: + var len interface{} = _DotDotDot + if n.Len != nil { + len = n.Len + } + p.print(_Lbrack, len, _Rbrack, n.Elem) + + case *SliceType: + p.print(_Lbrack, _Rbrack, n.Elem) + + case *DotsType: + p.print(_DotDotDot, n.Elem) + + case *StructType: + p.print(_Struct) + if len(n.FieldList) > 0 && p.linebreaks { + p.print(blank) + } + p.print(_Lbrace) + if len(n.FieldList) > 0 { + if p.linebreaks { + p.print(newline, indent) + p.printFieldList(n.FieldList, n.TagList, _Semi) + p.print(outdent, newline) + } else { + p.printFieldList(n.FieldList, n.TagList, _Semi) + } + } + p.print(_Rbrace) + + case *FuncType: + p.print(_Func) + p.printSignature(n) + + case *InterfaceType: + p.print(_Interface) + if p.linebreaks && len(n.MethodList) > 1 { + p.print(blank) + p.print(_Lbrace) + p.print(newline, indent) + p.printMethodList(n.MethodList) + p.print(outdent, newline) + } else { + p.print(_Lbrace) + p.printMethodList(n.MethodList) + } + p.print(_Rbrace) + + case *MapType: + p.print(_Map, _Lbrack, n.Key, _Rbrack, n.Value) + + case *ChanType: + if n.Dir == RecvOnly { + p.print(_Arrow) + } + p.print(_Chan) + if n.Dir == SendOnly { + p.print(_Arrow) + } + p.print(blank) + if e, _ := n.Elem.(*ChanType); n.Dir == 0 && e != nil && e.Dir == RecvOnly { + // don't print chan (<-chan T) as chan <-chan T + p.print(_Lparen) + p.print(n.Elem) + p.print(_Rparen) + } else { + p.print(n.Elem) + } + + // statements + case *DeclStmt: + p.printDecl(n.DeclList) + + case *EmptyStmt: + // nothing to print + + case *LabeledStmt: + p.print(outdent, n.Label, _Colon, indent, newline, n.Stmt) + + case *ExprStmt: + p.print(n.X) + + case *SendStmt: + p.print(n.Chan, blank, _Arrow, blank, n.Value) + + case *AssignStmt: + p.print(n.Lhs) + if n.Rhs == nil { + // TODO(gri) This is going to break the mayCombine + // check once we enable that again. + p.print(n.Op, n.Op) // ++ or -- + } else { + p.print(blank, n.Op, _Assign, blank) + p.print(n.Rhs) + } + + case *CallStmt: + p.print(n.Tok, blank, n.Call) + + case *ReturnStmt: + p.print(_Return) + if n.Results != nil { + p.print(blank, n.Results) + } + + case *BranchStmt: + p.print(n.Tok) + if n.Label != nil { + p.print(blank, n.Label) + } + + case *BlockStmt: + p.print(_Lbrace) + if len(n.List) > 0 { + p.print(newline, indent) + p.printStmtList(n.List, true) + p.print(outdent, newline) + } + p.print(_Rbrace) + + case *IfStmt: + p.print(_If, blank) + if n.Init != nil { + p.print(n.Init, _Semi, blank) + } + p.print(n.Cond, blank, n.Then) + if n.Else != nil { + p.print(blank, _Else, blank, n.Else) + } + + case *SwitchStmt: + p.print(_Switch, blank) + if n.Init != nil { + p.print(n.Init, _Semi, blank) + } + if n.Tag != nil { + p.print(n.Tag, blank) + } + p.printSwitchBody(n.Body) + + case *SelectStmt: + p.print(_Select, blank) // for now + p.printSelectBody(n.Body) + + case *RangeClause: + if n.Lhs != nil { + tok := _Assign + if n.Def { + tok = _Define + } + p.print(n.Lhs, blank, tok, blank) + } + p.print(_Range, blank, n.X) + + case *ForStmt: + p.print(_For, blank) + if n.Init == nil && n.Post == nil { + if n.Cond != nil { + p.print(n.Cond, blank) + } + } else { + if n.Init != nil { + p.print(n.Init) + // TODO(gri) clean this up + if _, ok := n.Init.(*RangeClause); ok { + p.print(blank, n.Body) + break + } + } + p.print(_Semi, blank) + if n.Cond != nil { + p.print(n.Cond) + } + p.print(_Semi, blank) + if n.Post != nil { + p.print(n.Post, blank) + } + } + p.print(n.Body) + + case *ImportDecl: + if n.Group == nil { + p.print(_Import, blank) + } + if n.LocalPkgName != nil { + p.print(n.LocalPkgName, blank) + } + p.print(n.Path) + + case *ConstDecl: + if n.Group == nil { + p.print(_Const, blank) + } + p.printNameList(n.NameList) + if n.Type != nil { + p.print(blank, n.Type) + } + if n.Values != nil { + p.print(blank, _Assign, blank, n.Values) + } + + case *TypeDecl: + if n.Group == nil { + p.print(_Type, blank) + } + p.print(n.Name) + if n.TParamList != nil { + p.printParameterList(n.TParamList, _Type) + } + p.print(blank) + if n.Alias { + p.print(_Assign, blank) + } + p.print(n.Type) + + case *VarDecl: + if n.Group == nil { + p.print(_Var, blank) + } + p.printNameList(n.NameList) + if n.Type != nil { + p.print(blank, n.Type) + } + if n.Values != nil { + p.print(blank, _Assign, blank, n.Values) + } + + case *FuncDecl: + p.print(_Func, blank) + if r := n.Recv; r != nil { + p.print(_Lparen) + if r.Name != nil { + p.print(r.Name, blank) + } + p.printNode(r.Type) + p.print(_Rparen, blank) + } + p.print(n.Name) + if n.TParamList != nil { + p.printParameterList(n.TParamList, _Func) + } + p.printSignature(n.Type) + if n.Body != nil { + p.print(blank, n.Body) + } + + case *printGroup: + p.print(n.Tok, blank, _Lparen) + if len(n.Decls) > 0 { + p.print(newline, indent) + for _, d := range n.Decls { + p.printNode(d) + p.print(_Semi, newline) + } + p.print(outdent) + } + p.print(_Rparen) + + // files + case *File: + p.print(_Package, blank, n.PkgName) + if len(n.DeclList) > 0 { + p.print(_Semi, newline, newline) + p.printDeclList(n.DeclList) + } + + default: + panic(fmt.Sprintf("syntax.Iterate: unexpected node type %T", n)) + } +} + +func (p *printer) printFields(fields []*Field, tags []*BasicLit, i, j int) { + if i+1 == j && fields[i].Name == nil { + // anonymous field + p.printNode(fields[i].Type) + } else { + for k, f := range fields[i:j] { + if k > 0 { + p.print(_Comma, blank) + } + p.printNode(f.Name) + } + p.print(blank) + p.printNode(fields[i].Type) + } + if i < len(tags) && tags[i] != nil { + p.print(blank) + p.printNode(tags[i]) + } +} + +func (p *printer) printFieldList(fields []*Field, tags []*BasicLit, sep token) { + i0 := 0 + var typ Expr + for i, f := range fields { + if f.Name == nil || f.Type != typ { + if i0 < i { + p.printFields(fields, tags, i0, i) + p.print(sep, newline) + i0 = i + } + typ = f.Type + } + } + p.printFields(fields, tags, i0, len(fields)) +} + +func (p *printer) printMethodList(methods []*Field) { + for i, m := range methods { + if i > 0 { + p.print(_Semi, newline) + } + if m.Name != nil { + p.printNode(m.Name) + p.printSignature(m.Type.(*FuncType)) + } else { + p.printNode(m.Type) + } + } +} + +func (p *printer) printNameList(list []*Name) { + for i, x := range list { + if i > 0 { + p.print(_Comma, blank) + } + p.printNode(x) + } +} + +func (p *printer) printExprList(list []Expr) { + for i, x := range list { + if i > 0 { + p.print(_Comma, blank) + } + p.printNode(x) + } +} + +func (p *printer) printExprLines(list []Expr) { + if len(list) > 0 { + p.print(newline, indent) + for _, x := range list { + p.print(x, _Comma, newline) + } + p.print(outdent) + } +} + +func groupFor(d Decl) (token, *Group) { + switch d := d.(type) { + case *ImportDecl: + return _Import, d.Group + case *ConstDecl: + return _Const, d.Group + case *TypeDecl: + return _Type, d.Group + case *VarDecl: + return _Var, d.Group + case *FuncDecl: + return _Func, nil + default: + panic("unreachable") + } +} + +type printGroup struct { + node + Tok token + Decls []Decl +} + +func (p *printer) printDecl(list []Decl) { + tok, group := groupFor(list[0]) + + if group == nil { + if len(list) != 1 { + panic("unreachable") + } + p.printNode(list[0]) + return + } + + // if _, ok := list[0].(*EmptyDecl); ok { + // if len(list) != 1 { + // panic("unreachable") + // } + // // TODO(gri) if there are comments inside the empty + // // group, we may need to keep the list non-nil + // list = nil + // } + + // printGroup is here for consistent comment handling + // (this is not yet used) + var pg printGroup + // *pg.Comments() = *group.Comments() + pg.Tok = tok + pg.Decls = list + p.printNode(&pg) +} + +func (p *printer) printDeclList(list []Decl) { + i0 := 0 + var tok token + var group *Group + for i, x := range list { + if s, g := groupFor(x); g == nil || g != group { + if i0 < i { + p.printDecl(list[i0:i]) + p.print(_Semi, newline) + // print empty line between different declaration groups, + // different kinds of declarations, or between functions + if g != group || s != tok || s == _Func { + p.print(newline) + } + i0 = i + } + tok, group = s, g + } + } + p.printDecl(list[i0:]) +} + +func (p *printer) printSignature(sig *FuncType) { + p.printParameterList(sig.ParamList, 0) + if list := sig.ResultList; list != nil { + p.print(blank) + if len(list) == 1 && list[0].Name == nil { + p.printNode(list[0].Type) + } else { + p.printParameterList(list, 0) + } + } +} + +// If tok != 0 print a type parameter list: tok == _Type means +// a type parameter list for a type, tok == _Func means a type +// parameter list for a func. +func (p *printer) printParameterList(list []*Field, tok token) { + open, close := _Lparen, _Rparen + if tok != 0 { + open, close = _Lbrack, _Rbrack + } + p.print(open) + for i, f := range list { + if i > 0 { + p.print(_Comma, blank) + } + if f.Name != nil { + p.printNode(f.Name) + if i+1 < len(list) { + f1 := list[i+1] + if f1.Name != nil && f1.Type == f.Type { + continue // no need to print type + } + } + p.print(blank) + } + p.printNode(unparen(f.Type)) // no need for (extra) parentheses around parameter types + } + // A type parameter list [P T] where the name P and the type expression T syntactically + // combine to another valid (value) expression requires a trailing comma, as in [P *T,] + // (or an enclosing interface as in [P interface(*T)]), so that the type parameter list + // is not parsed as an array length [P*T]. + if tok == _Type && len(list) == 1 && combinesWithName(list[0].Type) { + p.print(_Comma) + } + p.print(close) +} + +// combinesWithName reports whether a name followed by the expression x +// syntactically combines to another valid (value) expression. For instance +// using *T for x, "name *T" syntactically appears as the expression x*T. +// On the other hand, using P|Q or *P|~Q for x, "name P|Q" or name *P|~Q" +// cannot be combined into a valid (value) expression. +func combinesWithName(x Expr) bool { + switch x := x.(type) { + case *Operation: + if x.Y == nil { + // name *x.X combines to name*x.X if x.X is not a type element + return x.Op == Mul && !isTypeElem(x.X) + } + // binary expressions + return combinesWithName(x.X) && !isTypeElem(x.Y) + case *ParenExpr: + // name(x) combines but we are making sure at + // the call site that x is never parenthesized. + panic("unexpected parenthesized expression") + } + return false +} + +func (p *printer) printStmtList(list []Stmt, braces bool) { + for i, x := range list { + p.print(x, _Semi) + if i+1 < len(list) { + p.print(newline) + } else if braces { + // Print an extra semicolon if the last statement is + // an empty statement and we are in a braced block + // because one semicolon is automatically removed. + if _, ok := x.(*EmptyStmt); ok { + p.print(x, _Semi) + } + } + } +} + +func (p *printer) printSwitchBody(list []*CaseClause) { + p.print(_Lbrace) + if len(list) > 0 { + p.print(newline) + for i, c := range list { + p.printCaseClause(c, i+1 == len(list)) + p.print(newline) + } + } + p.print(_Rbrace) +} + +func (p *printer) printSelectBody(list []*CommClause) { + p.print(_Lbrace) + if len(list) > 0 { + p.print(newline) + for i, c := range list { + p.printCommClause(c, i+1 == len(list)) + p.print(newline) + } + } + p.print(_Rbrace) +} + +func (p *printer) printCaseClause(c *CaseClause, braces bool) { + if c.Cases != nil { + p.print(_Case, blank, c.Cases) + } else { + p.print(_Default) + } + p.print(_Colon) + if len(c.Body) > 0 { + p.print(newline, indent) + p.printStmtList(c.Body, braces) + p.print(outdent) + } +} + +func (p *printer) printCommClause(c *CommClause, braces bool) { + if c.Comm != nil { + p.print(_Case, blank) + p.print(c.Comm) + } else { + p.print(_Default) + } + p.print(_Colon) + if len(c.Body) > 0 { + p.print(newline, indent) + p.printStmtList(c.Body, braces) + p.print(outdent) + } +} diff --git a/src/cmd/compile/internal/syntax/printer_test.go b/src/cmd/compile/internal/syntax/printer_test.go new file mode 100644 index 0000000..ceb512e --- /dev/null +++ b/src/cmd/compile/internal/syntax/printer_test.go @@ -0,0 +1,272 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "fmt" + "io" + "os" + "strings" + "testing" +) + +func TestPrint(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + ast, _ := ParseFile(*src_, func(err error) { t.Error(err) }, nil, 0) + + if ast != nil { + Fprint(testOut(), ast, LineForm) + fmt.Println() + } +} + +type shortBuffer struct { + buf []byte +} + +func (w *shortBuffer) Write(data []byte) (n int, err error) { + w.buf = append(w.buf, data...) + n = len(data) + if len(w.buf) > 10 { + err = io.ErrShortBuffer + } + return +} + +func TestPrintError(t *testing.T) { + const src = "package p; var x int" + ast, err := Parse(nil, strings.NewReader(src), nil, nil, 0) + if err != nil { + t.Fatal(err) + } + + var buf shortBuffer + _, err = Fprint(&buf, ast, 0) + if err == nil || err != io.ErrShortBuffer { + t.Errorf("got err = %s, want %s", err, io.ErrShortBuffer) + } +} + +var stringTests = [][2]string{ + dup("package p"), + dup("package p; type _ int; type T1 = struct{}; type ( _ *struct{}; T2 = float32 )"), + + // generic type declarations (given type separated with blank from LHS) + dup("package p; type _[T any] struct{}"), + dup("package p; type _[A, B, C interface{m()}] struct{}"), + dup("package p; type _[T any, A, B, C interface{m()}, X, Y, Z interface{~int}] struct{}"), + + dup("package p; type _[P *struct{}] struct{}"), + dup("package p; type _[P *T,] struct{}"), + dup("package p; type _[P *T, _ any] struct{}"), + {"package p; type _[P (*T),] struct{}", "package p; type _[P *T,] struct{}"}, + {"package p; type _[P (*T), _ any] struct{}", "package p; type _[P *T, _ any] struct{}"}, + {"package p; type _[P (T),] struct{}", "package p; type _[P T] struct{}"}, + {"package p; type _[P (T), _ any] struct{}", "package p; type _[P T, _ any] struct{}"}, + + {"package p; type _[P (*struct{})] struct{}", "package p; type _[P *struct{}] struct{}"}, + {"package p; type _[P ([]int)] struct{}", "package p; type _[P []int] struct{}"}, + {"package p; type _[P ([]int) | int] struct{}", "package p; type _[P []int | int] struct{}"}, + + // a type literal in an |-expression indicates a type parameter list (blank after type parameter list and type) + dup("package p; type _[P *[]int] struct{}"), + dup("package p; type _[P T | T] struct{}"), + dup("package p; type _[P T | T | T | T] struct{}"), + dup("package p; type _[P *T | T, Q T] struct{}"), + dup("package p; type _[P *[]T | T] struct{}"), + dup("package p; type _[P *T | T | T | T | ~T] struct{}"), + dup("package p; type _[P *T | T | T | ~T | T] struct{}"), + dup("package p; type _[P *T | T | struct{} | T] struct{}"), + dup("package p; type _[P <-chan int] struct{}"), + dup("package p; type _[P *T | struct{} | T] struct{}"), + + // a trailing comma always indicates a (possibly invalid) type parameter list (blank after type parameter list and type) + dup("package p; type _[P *T,] struct{}"), + dup("package p; type _[P *T | T,] struct{}"), + dup("package p; type _[P *T | <-T | T,] struct{}"), + + // slice/array type declarations (no blank between array length and element type) + dup("package p; type _ []byte"), + dup("package p; type _ [n]byte"), + dup("package p; type _ [P(T)]byte"), + dup("package p; type _ [P((T))]byte"), + dup("package p; type _ [P * *T]byte"), + dup("package p; type _ [P * T]byte"), + dup("package p; type _ [P(*T)]byte"), + dup("package p; type _ [P(**T)]byte"), + dup("package p; type _ [P * T - T]byte"), + dup("package p; type _ [P * T - T]byte"), + dup("package p; type _ [P * T | T]byte"), + dup("package p; type _ [P * T | <-T | T]byte"), + + // generic function declarations + dup("package p; func _[T any]()"), + dup("package p; func _[A, B, C interface{m()}]()"), + dup("package p; func _[T any, A, B, C interface{m()}, X, Y, Z interface{~int}]()"), + + // generic functions with elided interfaces in type constraints + dup("package p; func _[P *T]() {}"), + dup("package p; func _[P *T | T | T | T | ~T]() {}"), + dup("package p; func _[P *T | T | struct{} | T]() {}"), + dup("package p; func _[P ~int, Q int | string]() {}"), + dup("package p; func _[P struct{f int}, Q *P]() {}"), + + // methods with generic receiver types + dup("package p; func (R[T]) _()"), + dup("package p; func (*R[A, B, C]) _()"), + dup("package p; func (_ *R[A, B, C]) _()"), + + // channels + dup("package p; type _ chan chan int"), + dup("package p; type _ chan (<-chan int)"), + dup("package p; type _ chan chan<- int"), + + dup("package p; type _ <-chan chan int"), + dup("package p; type _ <-chan <-chan int"), + dup("package p; type _ <-chan chan<- int"), + + dup("package p; type _ chan<- chan int"), + dup("package p; type _ chan<- <-chan int"), + dup("package p; type _ chan<- chan<- int"), + + // TODO(gri) expand +} + +func TestPrintString(t *testing.T) { + for _, test := range stringTests { + ast, err := Parse(nil, strings.NewReader(test[0]), nil, nil, 0) + if err != nil { + t.Error(err) + continue + } + if got := String(ast); got != test[1] { + t.Errorf("%q: got %q", test[1], got) + } + } +} + +func testOut() io.Writer { + if testing.Verbose() { + return os.Stdout + } + return io.Discard +} + +func dup(s string) [2]string { return [2]string{s, s} } + +var exprTests = [][2]string{ + // basic type literals + dup("x"), + dup("true"), + dup("42"), + dup("3.1415"), + dup("2.71828i"), + dup(`'a'`), + dup(`"foo"`), + dup("`bar`"), + + // func and composite literals + dup("func() {}"), + dup("[]int{}"), + {"func(x int) complex128 { return 0 }", "func(x int) complex128 {…}"}, + {"[]int{1, 2, 3}", "[]int{…}"}, + + // type expressions + dup("[1 << 10]byte"), + dup("[]int"), + dup("*int"), + dup("struct{x int}"), + dup("func()"), + dup("func(int, float32) string"), + dup("interface{m()}"), + dup("interface{m() string; n(x int)}"), + dup("interface{~int}"), + dup("interface{~int | ~float64 | ~string}"), + dup("interface{~int; m()}"), + dup("interface{~int | ~float64 | ~string; m() string; n(x int)}"), + dup("map[string]int"), + dup("chan E"), + dup("<-chan E"), + dup("chan<- E"), + + // new interfaces + dup("interface{int}"), + dup("interface{~int}"), + dup("interface{~int}"), + dup("interface{int | string}"), + dup("interface{~int | ~string; float64; m()}"), + dup("interface{~a | ~b | ~c; ~int | ~string; float64; m()}"), + dup("interface{~T[int, string] | string}"), + + // non-type expressions + dup("(x)"), + dup("x.f"), + dup("a[i]"), + + dup("s[:]"), + dup("s[i:]"), + dup("s[:j]"), + dup("s[i:j]"), + dup("s[:j:k]"), + dup("s[i:j:k]"), + + dup("x.(T)"), + + dup("x.([10]int)"), + dup("x.([...]int)"), + + dup("x.(struct{})"), + dup("x.(struct{x int; y, z float32; E})"), + + dup("x.(func())"), + dup("x.(func(x int))"), + dup("x.(func() int)"), + dup("x.(func(x, y int, z float32) (r int))"), + dup("x.(func(a, b, c int))"), + dup("x.(func(x ...T))"), + + dup("x.(interface{})"), + dup("x.(interface{m(); n(x int); E})"), + dup("x.(interface{m(); n(x int) T; E; F})"), + + dup("x.(map[K]V)"), + + dup("x.(chan E)"), + dup("x.(<-chan E)"), + dup("x.(chan<- chan int)"), + dup("x.(chan<- <-chan int)"), + dup("x.(<-chan chan int)"), + dup("x.(chan (<-chan int))"), + + dup("f()"), + dup("f(x)"), + dup("int(x)"), + dup("f(x, x + y)"), + dup("f(s...)"), + dup("f(a, s...)"), + + dup("*x"), + dup("&x"), + dup("x + y"), + dup("x + y << (2 * s)"), +} + +func TestShortString(t *testing.T) { + for _, test := range exprTests { + src := "package p; var _ = " + test[0] + ast, err := Parse(nil, strings.NewReader(src), nil, nil, 0) + if err != nil { + t.Errorf("%s: %s", test[0], err) + continue + } + x := ast.DeclList[0].(*VarDecl).Values + if got := String(x); got != test[1] { + t.Errorf("%s: got %s, want %s", test[0], got, test[1]) + } + } +} diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go new file mode 100644 index 0000000..807d838 --- /dev/null +++ b/src/cmd/compile/internal/syntax/scanner.go @@ -0,0 +1,881 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements scanner, a lexical tokenizer for +// Go source. After initialization, consecutive calls of +// next advance the scanner one token at a time. +// +// This file, source.go, tokens.go, and token_string.go are self-contained +// (`go tool compile scanner.go source.go tokens.go token_string.go` compiles) +// and thus could be made into their own package. + +package syntax + +import ( + "fmt" + "io" + "unicode" + "unicode/utf8" +) + +// The mode flags below control which comments are reported +// by calling the error handler. If no flag is set, comments +// are ignored. +const ( + comments uint = 1 << iota // call handler for all comments + directives // call handler for directives only +) + +type scanner struct { + source + mode uint + nlsemi bool // if set '\n' and EOF translate to ';' + + // current token, valid after calling next() + line, col uint + blank bool // line is blank up to col + tok token + lit string // valid if tok is _Name, _Literal, or _Semi ("semicolon", "newline", or "EOF"); may be malformed if bad is true + bad bool // valid if tok is _Literal, true if a syntax error occurred, lit may be malformed + kind LitKind // valid if tok is _Literal + op Operator // valid if tok is _Operator, _Star, _AssignOp, or _IncOp + prec int // valid if tok is _Operator, _Star, _AssignOp, or _IncOp +} + +func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) { + s.source.init(src, errh) + s.mode = mode + s.nlsemi = false +} + +// errorf reports an error at the most recently read character position. +func (s *scanner) errorf(format string, args ...interface{}) { + s.error(fmt.Sprintf(format, args...)) +} + +// errorAtf reports an error at a byte column offset relative to the current token start. +func (s *scanner) errorAtf(offset int, format string, args ...interface{}) { + s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...)) +} + +// setLit sets the scanner state for a recognized _Literal token. +func (s *scanner) setLit(kind LitKind, ok bool) { + s.nlsemi = true + s.tok = _Literal + s.lit = string(s.segment()) + s.bad = !ok + s.kind = kind +} + +// next advances the scanner by reading the next token. +// +// If a read, source encoding, or lexical error occurs, next calls +// the installed error handler with the respective error position +// and message. The error message is guaranteed to be non-empty and +// never starts with a '/'. The error handler must exist. +// +// If the scanner mode includes the comments flag and a comment +// (including comments containing directives) is encountered, the +// error handler is also called with each comment position and text +// (including opening /* or // and closing */, but without a newline +// at the end of line comments). Comment text always starts with a / +// which can be used to distinguish these handler calls from errors. +// +// If the scanner mode includes the directives (but not the comments) +// flag, only comments containing a //line, /*line, or //go: directive +// are reported, in the same way as regular comments. +func (s *scanner) next() { + nlsemi := s.nlsemi + s.nlsemi = false + +redo: + // skip white space + s.stop() + startLine, startCol := s.pos() + for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !nlsemi || s.ch == '\r' { + s.nextch() + } + + // token start + s.line, s.col = s.pos() + s.blank = s.line > startLine || startCol == colbase + s.start() + if isLetter(s.ch) || s.ch >= utf8.RuneSelf && s.atIdentChar(true) { + s.nextch() + s.ident() + return + } + + switch s.ch { + case -1: + if nlsemi { + s.lit = "EOF" + s.tok = _Semi + break + } + s.tok = _EOF + + case '\n': + s.nextch() + s.lit = "newline" + s.tok = _Semi + + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + s.number(false) + + case '"': + s.stdString() + + case '`': + s.rawString() + + case '\'': + s.rune() + + case '(': + s.nextch() + s.tok = _Lparen + + case '[': + s.nextch() + s.tok = _Lbrack + + case '{': + s.nextch() + s.tok = _Lbrace + + case ',': + s.nextch() + s.tok = _Comma + + case ';': + s.nextch() + s.lit = "semicolon" + s.tok = _Semi + + case ')': + s.nextch() + s.nlsemi = true + s.tok = _Rparen + + case ']': + s.nextch() + s.nlsemi = true + s.tok = _Rbrack + + case '}': + s.nextch() + s.nlsemi = true + s.tok = _Rbrace + + case ':': + s.nextch() + if s.ch == '=' { + s.nextch() + s.tok = _Define + break + } + s.tok = _Colon + + case '.': + s.nextch() + if isDecimal(s.ch) { + s.number(true) + break + } + if s.ch == '.' { + s.nextch() + if s.ch == '.' { + s.nextch() + s.tok = _DotDotDot + break + } + s.rewind() // now s.ch holds 1st '.' + s.nextch() // consume 1st '.' again + } + s.tok = _Dot + + case '+': + s.nextch() + s.op, s.prec = Add, precAdd + if s.ch != '+' { + goto assignop + } + s.nextch() + s.nlsemi = true + s.tok = _IncOp + + case '-': + s.nextch() + s.op, s.prec = Sub, precAdd + if s.ch != '-' { + goto assignop + } + s.nextch() + s.nlsemi = true + s.tok = _IncOp + + case '*': + s.nextch() + s.op, s.prec = Mul, precMul + // don't goto assignop - want _Star token + if s.ch == '=' { + s.nextch() + s.tok = _AssignOp + break + } + s.tok = _Star + + case '/': + s.nextch() + if s.ch == '/' { + s.nextch() + s.lineComment() + goto redo + } + if s.ch == '*' { + s.nextch() + s.fullComment() + if line, _ := s.pos(); line > s.line && nlsemi { + // A multi-line comment acts like a newline; + // it translates to a ';' if nlsemi is set. + s.lit = "newline" + s.tok = _Semi + break + } + goto redo + } + s.op, s.prec = Div, precMul + goto assignop + + case '%': + s.nextch() + s.op, s.prec = Rem, precMul + goto assignop + + case '&': + s.nextch() + if s.ch == '&' { + s.nextch() + s.op, s.prec = AndAnd, precAndAnd + s.tok = _Operator + break + } + s.op, s.prec = And, precMul + if s.ch == '^' { + s.nextch() + s.op = AndNot + } + goto assignop + + case '|': + s.nextch() + if s.ch == '|' { + s.nextch() + s.op, s.prec = OrOr, precOrOr + s.tok = _Operator + break + } + s.op, s.prec = Or, precAdd + goto assignop + + case '^': + s.nextch() + s.op, s.prec = Xor, precAdd + goto assignop + + case '<': + s.nextch() + if s.ch == '=' { + s.nextch() + s.op, s.prec = Leq, precCmp + s.tok = _Operator + break + } + if s.ch == '<' { + s.nextch() + s.op, s.prec = Shl, precMul + goto assignop + } + if s.ch == '-' { + s.nextch() + s.tok = _Arrow + break + } + s.op, s.prec = Lss, precCmp + s.tok = _Operator + + case '>': + s.nextch() + if s.ch == '=' { + s.nextch() + s.op, s.prec = Geq, precCmp + s.tok = _Operator + break + } + if s.ch == '>' { + s.nextch() + s.op, s.prec = Shr, precMul + goto assignop + } + s.op, s.prec = Gtr, precCmp + s.tok = _Operator + + case '=': + s.nextch() + if s.ch == '=' { + s.nextch() + s.op, s.prec = Eql, precCmp + s.tok = _Operator + break + } + s.tok = _Assign + + case '!': + s.nextch() + if s.ch == '=' { + s.nextch() + s.op, s.prec = Neq, precCmp + s.tok = _Operator + break + } + s.op, s.prec = Not, 0 + s.tok = _Operator + + case '~': + s.nextch() + s.op, s.prec = Tilde, 0 + s.tok = _Operator + + default: + s.errorf("invalid character %#U", s.ch) + s.nextch() + goto redo + } + + return + +assignop: + if s.ch == '=' { + s.nextch() + s.tok = _AssignOp + return + } + s.tok = _Operator +} + +func (s *scanner) ident() { + // accelerate common case (7bit ASCII) + for isLetter(s.ch) || isDecimal(s.ch) { + s.nextch() + } + + // general case + if s.ch >= utf8.RuneSelf { + for s.atIdentChar(false) { + s.nextch() + } + } + + // possibly a keyword + lit := s.segment() + if len(lit) >= 2 { + if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) { + s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok) + s.tok = tok + return + } + } + + s.nlsemi = true + s.lit = string(lit) + s.tok = _Name +} + +// tokStrFast is a faster version of token.String, which assumes that tok +// is one of the valid tokens - and can thus skip bounds checks. +func tokStrFast(tok token) string { + return _token_name[_token_index[tok-1]:_token_index[tok]] +} + +func (s *scanner) atIdentChar(first bool) bool { + switch { + case unicode.IsLetter(s.ch) || s.ch == '_': + // ok + case unicode.IsDigit(s.ch): + if first { + s.errorf("identifier cannot begin with digit %#U", s.ch) + } + case s.ch >= utf8.RuneSelf: + s.errorf("invalid character %#U in identifier", s.ch) + default: + return false + } + return true +} + +// hash is a perfect hash function for keywords. +// It assumes that s has at least length 2. +func hash(s []byte) uint { + return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1) +} + +var keywordMap [1 << 6]token // size must be power of two + +func init() { + // populate keywordMap + for tok := _Break; tok <= _Var; tok++ { + h := hash([]byte(tok.String())) + if keywordMap[h] != 0 { + panic("imperfect hash") + } + keywordMap[h] = tok + } +} + +func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter +func isLetter(ch rune) bool { return 'a' <= lower(ch) && lower(ch) <= 'z' || ch == '_' } +func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } +func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' } + +// digits accepts the sequence { digit | '_' }. +// If base <= 10, digits accepts any decimal digit but records +// the index (relative to the literal start) of a digit >= base +// in *invalid, if *invalid < 0. +// digits returns a bitset describing whether the sequence contained +// digits (bit 0 is set), or separators '_' (bit 1 is set). +func (s *scanner) digits(base int, invalid *int) (digsep int) { + if base <= 10 { + max := rune('0' + base) + for isDecimal(s.ch) || s.ch == '_' { + ds := 1 + if s.ch == '_' { + ds = 2 + } else if s.ch >= max && *invalid < 0 { + _, col := s.pos() + *invalid = int(col - s.col) // record invalid rune index + } + digsep |= ds + s.nextch() + } + } else { + for isHex(s.ch) || s.ch == '_' { + ds := 1 + if s.ch == '_' { + ds = 2 + } + digsep |= ds + s.nextch() + } + } + return +} + +func (s *scanner) number(seenPoint bool) { + ok := true + kind := IntLit + base := 10 // number base + prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b' + digsep := 0 // bit 0: digit present, bit 1: '_' present + invalid := -1 // index of invalid digit in literal, or < 0 + + // integer part + if !seenPoint { + if s.ch == '0' { + s.nextch() + switch lower(s.ch) { + case 'x': + s.nextch() + base, prefix = 16, 'x' + case 'o': + s.nextch() + base, prefix = 8, 'o' + case 'b': + s.nextch() + base, prefix = 2, 'b' + default: + base, prefix = 8, '0' + digsep = 1 // leading 0 + } + } + digsep |= s.digits(base, &invalid) + if s.ch == '.' { + if prefix == 'o' || prefix == 'b' { + s.errorf("invalid radix point in %s literal", baseName(base)) + ok = false + } + s.nextch() + seenPoint = true + } + } + + // fractional part + if seenPoint { + kind = FloatLit + digsep |= s.digits(base, &invalid) + } + + if digsep&1 == 0 && ok { + s.errorf("%s literal has no digits", baseName(base)) + ok = false + } + + // exponent + if e := lower(s.ch); e == 'e' || e == 'p' { + if ok { + switch { + case e == 'e' && prefix != 0 && prefix != '0': + s.errorf("%q exponent requires decimal mantissa", s.ch) + ok = false + case e == 'p' && prefix != 'x': + s.errorf("%q exponent requires hexadecimal mantissa", s.ch) + ok = false + } + } + s.nextch() + kind = FloatLit + if s.ch == '+' || s.ch == '-' { + s.nextch() + } + digsep = s.digits(10, nil) | digsep&2 // don't lose sep bit + if digsep&1 == 0 && ok { + s.errorf("exponent has no digits") + ok = false + } + } else if prefix == 'x' && kind == FloatLit && ok { + s.errorf("hexadecimal mantissa requires a 'p' exponent") + ok = false + } + + // suffix 'i' + if s.ch == 'i' { + kind = ImagLit + s.nextch() + } + + s.setLit(kind, ok) // do this now so we can use s.lit below + + if kind == IntLit && invalid >= 0 && ok { + s.errorAtf(invalid, "invalid digit %q in %s literal", s.lit[invalid], baseName(base)) + ok = false + } + + if digsep&2 != 0 && ok { + if i := invalidSep(s.lit); i >= 0 { + s.errorAtf(i, "'_' must separate successive digits") + ok = false + } + } + + s.bad = !ok // correct s.bad +} + +func baseName(base int) string { + switch base { + case 2: + return "binary" + case 8: + return "octal" + case 10: + return "decimal" + case 16: + return "hexadecimal" + } + panic("invalid base") +} + +// invalidSep returns the index of the first invalid separator in x, or -1. +func invalidSep(x string) int { + x1 := ' ' // prefix char, we only care if it's 'x' + d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else) + i := 0 + + // a prefix counts as a digit + if len(x) >= 2 && x[0] == '0' { + x1 = lower(rune(x[1])) + if x1 == 'x' || x1 == 'o' || x1 == 'b' { + d = '0' + i = 2 + } + } + + // mantissa and exponent + for ; i < len(x); i++ { + p := d // previous digit + d = rune(x[i]) + switch { + case d == '_': + if p != '0' { + return i + } + case isDecimal(d) || x1 == 'x' && isHex(d): + d = '0' + default: + if p == '_' { + return i - 1 + } + d = '.' + } + } + if d == '_' { + return len(x) - 1 + } + + return -1 +} + +func (s *scanner) rune() { + ok := true + s.nextch() + + n := 0 + for ; ; n++ { + if s.ch == '\'' { + if ok { + if n == 0 { + s.errorf("empty rune literal or unescaped '") + ok = false + } else if n != 1 { + s.errorAtf(0, "more than one character in rune literal") + ok = false + } + } + s.nextch() + break + } + if s.ch == '\\' { + s.nextch() + if !s.escape('\'') { + ok = false + } + continue + } + if s.ch == '\n' { + if ok { + s.errorf("newline in rune literal") + ok = false + } + break + } + if s.ch < 0 { + if ok { + s.errorAtf(0, "rune literal not terminated") + ok = false + } + break + } + s.nextch() + } + + s.setLit(RuneLit, ok) +} + +func (s *scanner) stdString() { + ok := true + s.nextch() + + for { + if s.ch == '"' { + s.nextch() + break + } + if s.ch == '\\' { + s.nextch() + if !s.escape('"') { + ok = false + } + continue + } + if s.ch == '\n' { + s.errorf("newline in string") + ok = false + break + } + if s.ch < 0 { + s.errorAtf(0, "string not terminated") + ok = false + break + } + s.nextch() + } + + s.setLit(StringLit, ok) +} + +func (s *scanner) rawString() { + ok := true + s.nextch() + + for { + if s.ch == '`' { + s.nextch() + break + } + if s.ch < 0 { + s.errorAtf(0, "string not terminated") + ok = false + break + } + s.nextch() + } + // We leave CRs in the string since they are part of the + // literal (even though they are not part of the literal + // value). + + s.setLit(StringLit, ok) +} + +func (s *scanner) comment(text string) { + s.errorAtf(0, "%s", text) +} + +func (s *scanner) skipLine() { + // don't consume '\n' - needed for nlsemi logic + for s.ch >= 0 && s.ch != '\n' { + s.nextch() + } +} + +func (s *scanner) lineComment() { + // opening has already been consumed + + if s.mode&comments != 0 { + s.skipLine() + s.comment(string(s.segment())) + return + } + + // are we saving directives? or is this definitely not a directive? + if s.mode&directives == 0 || (s.ch != 'g' && s.ch != 'l') { + s.stop() + s.skipLine() + return + } + + // recognize go: or line directives + prefix := "go:" + if s.ch == 'l' { + prefix = "line " + } + for _, m := range prefix { + if s.ch != m { + s.stop() + s.skipLine() + return + } + s.nextch() + } + + // directive text + s.skipLine() + s.comment(string(s.segment())) +} + +func (s *scanner) skipComment() bool { + for s.ch >= 0 { + for s.ch == '*' { + s.nextch() + if s.ch == '/' { + s.nextch() + return true + } + } + s.nextch() + } + s.errorAtf(0, "comment not terminated") + return false +} + +func (s *scanner) fullComment() { + /* opening has already been consumed */ + + if s.mode&comments != 0 { + if s.skipComment() { + s.comment(string(s.segment())) + } + return + } + + if s.mode&directives == 0 || s.ch != 'l' { + s.stop() + s.skipComment() + return + } + + // recognize line directive + const prefix = "line " + for _, m := range prefix { + if s.ch != m { + s.stop() + s.skipComment() + return + } + s.nextch() + } + + // directive text + if s.skipComment() { + s.comment(string(s.segment())) + } +} + +func (s *scanner) escape(quote rune) bool { + var n int + var base, max uint32 + + switch s.ch { + case quote, 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\': + s.nextch() + return true + case '0', '1', '2', '3', '4', '5', '6', '7': + n, base, max = 3, 8, 255 + case 'x': + s.nextch() + n, base, max = 2, 16, 255 + case 'u': + s.nextch() + n, base, max = 4, 16, unicode.MaxRune + case 'U': + s.nextch() + n, base, max = 8, 16, unicode.MaxRune + default: + if s.ch < 0 { + return true // complain in caller about EOF + } + s.errorf("unknown escape") + return false + } + + var x uint32 + for i := n; i > 0; i-- { + if s.ch < 0 { + return true // complain in caller about EOF + } + d := base + if isDecimal(s.ch) { + d = uint32(s.ch) - '0' + } else if 'a' <= lower(s.ch) && lower(s.ch) <= 'f' { + d = uint32(lower(s.ch)) - 'a' + 10 + } + if d >= base { + s.errorf("invalid character %q in %s escape", s.ch, baseName(int(base))) + return false + } + // d < base + x = x*base + d + s.nextch() + } + + if x > max && base == 8 { + s.errorf("octal escape value %d > 255", x) + return false + } + + if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ { + s.errorf("escape is invalid Unicode code point %#U", x) + return false + } + + return true +} diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go new file mode 100644 index 0000000..450ec1f --- /dev/null +++ b/src/cmd/compile/internal/syntax/scanner_test.go @@ -0,0 +1,767 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "bytes" + "fmt" + "os" + "strings" + "testing" +) + +// errh is a default error handler for basic tests. +func errh(line, col uint, msg string) { + panic(fmt.Sprintf("%d:%d: %s", line, col, msg)) +} + +// Don't bother with other tests if TestSmoke doesn't pass. +func TestSmoke(t *testing.T) { + const src = "if (+foo\t+=..123/***/0.9_0e-0i'a'`raw`\"string\"..f;//$" + tokens := []token{_If, _Lparen, _Operator, _Name, _AssignOp, _Dot, _Literal, _Literal, _Literal, _Literal, _Literal, _Dot, _Dot, _Name, _Semi, _EOF} + + var got scanner + got.init(strings.NewReader(src), errh, 0) + for _, want := range tokens { + got.next() + if got.tok != want { + t.Errorf("%d:%d: got %s; want %s", got.line, got.col, got.tok, want) + continue + } + } +} + +// Once TestSmoke passes, run TestTokens next. +func TestTokens(t *testing.T) { + var got scanner + for _, want := range sampleTokens { + got.init(strings.NewReader(want.src), func(line, col uint, msg string) { + t.Errorf("%s:%d:%d: %s", want.src, line, col, msg) + }, 0) + got.next() + if got.tok != want.tok { + t.Errorf("%s: got %s; want %s", want.src, got.tok, want.tok) + continue + } + if (got.tok == _Name || got.tok == _Literal) && got.lit != want.src { + t.Errorf("%s: got %q; want %q", want.src, got.lit, want.src) + } + } +} + +func TestScanner(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + + filename := *src_ // can be changed via -src flag + src, err := os.Open(filename) + if err != nil { + t.Fatal(err) + } + defer src.Close() + + var s scanner + s.init(src, errh, 0) + for { + s.next() + if s.tok == _EOF { + break + } + if !testing.Verbose() { + continue + } + switch s.tok { + case _Name, _Literal: + fmt.Printf("%s:%d:%d: %s => %s\n", filename, s.line, s.col, s.tok, s.lit) + case _Operator: + fmt.Printf("%s:%d:%d: %s => %s (prec = %d)\n", filename, s.line, s.col, s.tok, s.op, s.prec) + default: + fmt.Printf("%s:%d:%d: %s\n", filename, s.line, s.col, s.tok) + } + } +} + +func TestEmbeddedTokens(t *testing.T) { + // make source + var buf bytes.Buffer + for i, s := range sampleTokens { + buf.WriteString("\t\t\t\t"[:i&3]) // leading indentation + buf.WriteString(s.src) // token + buf.WriteString(" "[:i&7]) // trailing spaces + fmt.Fprintf(&buf, "/*line foo:%d */ // bar\n", i) // comments + newline (don't crash w/o directive handler) + } + + // scan source + var got scanner + var src string + got.init(&buf, func(line, col uint, msg string) { + t.Fatalf("%s:%d:%d: %s", src, line, col, msg) + }, 0) + got.next() + for i, want := range sampleTokens { + src = want.src + nlsemi := false + + if got.line-linebase != uint(i) { + t.Errorf("%s: got line %d; want %d", src, got.line-linebase, i) + } + + if got.tok != want.tok { + t.Errorf("%s: got tok %s; want %s", src, got.tok, want.tok) + continue + } + + switch want.tok { + case _Semi: + if got.lit != "semicolon" { + t.Errorf("%s: got %s; want semicolon", src, got.lit) + } + + case _Name, _Literal: + if got.lit != want.src { + t.Errorf("%s: got lit %q; want %q", src, got.lit, want.src) + continue + } + nlsemi = true + + case _Operator, _AssignOp, _IncOp: + if got.op != want.op { + t.Errorf("%s: got op %s; want %s", src, got.op, want.op) + continue + } + if got.prec != want.prec { + t.Errorf("%s: got prec %d; want %d", src, got.prec, want.prec) + continue + } + nlsemi = want.tok == _IncOp + + case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return: + nlsemi = true + } + + if nlsemi { + got.next() + if got.tok != _Semi { + t.Errorf("%s: got tok %s; want ;", src, got.tok) + continue + } + if got.lit != "newline" { + t.Errorf("%s: got %s; want newline", src, got.lit) + } + } + + got.next() + } + + if got.tok != _EOF { + t.Errorf("got %q; want _EOF", got.tok) + } +} + +var sampleTokens = [...]struct { + tok token + src string + op Operator + prec int +}{ + // name samples + {_Name, "x", 0, 0}, + {_Name, "X123", 0, 0}, + {_Name, "foo", 0, 0}, + {_Name, "Foo123", 0, 0}, + {_Name, "foo_bar", 0, 0}, + {_Name, "_", 0, 0}, + {_Name, "_foobar", 0, 0}, + {_Name, "a۰۱۸", 0, 0}, + {_Name, "foo६४", 0, 0}, + {_Name, "bar9876", 0, 0}, + {_Name, "ŝ", 0, 0}, + {_Name, "ŝfoo", 0, 0}, + + // literal samples + {_Literal, "0", 0, 0}, + {_Literal, "1", 0, 0}, + {_Literal, "12345", 0, 0}, + {_Literal, "123456789012345678890123456789012345678890", 0, 0}, + {_Literal, "01234567", 0, 0}, + {_Literal, "0_1_234_567", 0, 0}, + {_Literal, "0X0", 0, 0}, + {_Literal, "0xcafebabe", 0, 0}, + {_Literal, "0x_cafe_babe", 0, 0}, + {_Literal, "0O0", 0, 0}, + {_Literal, "0o000", 0, 0}, + {_Literal, "0o_000", 0, 0}, + {_Literal, "0B1", 0, 0}, + {_Literal, "0b01100110", 0, 0}, + {_Literal, "0b_0110_0110", 0, 0}, + {_Literal, "0.", 0, 0}, + {_Literal, "0.e0", 0, 0}, + {_Literal, "0.e-1", 0, 0}, + {_Literal, "0.e+123", 0, 0}, + {_Literal, ".0", 0, 0}, + {_Literal, ".0E00", 0, 0}, + {_Literal, ".0E-0123", 0, 0}, + {_Literal, ".0E+12345678901234567890", 0, 0}, + {_Literal, ".45e1", 0, 0}, + {_Literal, "3.14159265", 0, 0}, + {_Literal, "1e0", 0, 0}, + {_Literal, "1e+100", 0, 0}, + {_Literal, "1e-100", 0, 0}, + {_Literal, "2.71828e-1000", 0, 0}, + {_Literal, "0i", 0, 0}, + {_Literal, "1i", 0, 0}, + {_Literal, "012345678901234567889i", 0, 0}, + {_Literal, "123456789012345678890i", 0, 0}, + {_Literal, "0.i", 0, 0}, + {_Literal, ".0i", 0, 0}, + {_Literal, "3.14159265i", 0, 0}, + {_Literal, "1e0i", 0, 0}, + {_Literal, "1e+100i", 0, 0}, + {_Literal, "1e-100i", 0, 0}, + {_Literal, "2.71828e-1000i", 0, 0}, + {_Literal, "'a'", 0, 0}, + {_Literal, "'\\000'", 0, 0}, + {_Literal, "'\\xFF'", 0, 0}, + {_Literal, "'\\uff16'", 0, 0}, + {_Literal, "'\\U0000ff16'", 0, 0}, + {_Literal, "`foobar`", 0, 0}, + {_Literal, "`foo\tbar`", 0, 0}, + {_Literal, "`\r`", 0, 0}, + + // operators + {_Operator, "!", Not, 0}, + {_Operator, "~", Tilde, 0}, + + {_Operator, "||", OrOr, precOrOr}, + + {_Operator, "&&", AndAnd, precAndAnd}, + + {_Operator, "==", Eql, precCmp}, + {_Operator, "!=", Neq, precCmp}, + {_Operator, "<", Lss, precCmp}, + {_Operator, "<=", Leq, precCmp}, + {_Operator, ">", Gtr, precCmp}, + {_Operator, ">=", Geq, precCmp}, + + {_Operator, "+", Add, precAdd}, + {_Operator, "-", Sub, precAdd}, + {_Operator, "|", Or, precAdd}, + {_Operator, "^", Xor, precAdd}, + + {_Star, "*", Mul, precMul}, + {_Operator, "/", Div, precMul}, + {_Operator, "%", Rem, precMul}, + {_Operator, "&", And, precMul}, + {_Operator, "&^", AndNot, precMul}, + {_Operator, "<<", Shl, precMul}, + {_Operator, ">>", Shr, precMul}, + + // assignment operations + {_AssignOp, "+=", Add, precAdd}, + {_AssignOp, "-=", Sub, precAdd}, + {_AssignOp, "|=", Or, precAdd}, + {_AssignOp, "^=", Xor, precAdd}, + + {_AssignOp, "*=", Mul, precMul}, + {_AssignOp, "/=", Div, precMul}, + {_AssignOp, "%=", Rem, precMul}, + {_AssignOp, "&=", And, precMul}, + {_AssignOp, "&^=", AndNot, precMul}, + {_AssignOp, "<<=", Shl, precMul}, + {_AssignOp, ">>=", Shr, precMul}, + + // other operations + {_IncOp, "++", Add, precAdd}, + {_IncOp, "--", Sub, precAdd}, + {_Assign, "=", 0, 0}, + {_Define, ":=", 0, 0}, + {_Arrow, "<-", 0, 0}, + + // delimiters + {_Lparen, "(", 0, 0}, + {_Lbrack, "[", 0, 0}, + {_Lbrace, "{", 0, 0}, + {_Rparen, ")", 0, 0}, + {_Rbrack, "]", 0, 0}, + {_Rbrace, "}", 0, 0}, + {_Comma, ",", 0, 0}, + {_Semi, ";", 0, 0}, + {_Colon, ":", 0, 0}, + {_Dot, ".", 0, 0}, + {_DotDotDot, "...", 0, 0}, + + // keywords + {_Break, "break", 0, 0}, + {_Case, "case", 0, 0}, + {_Chan, "chan", 0, 0}, + {_Const, "const", 0, 0}, + {_Continue, "continue", 0, 0}, + {_Default, "default", 0, 0}, + {_Defer, "defer", 0, 0}, + {_Else, "else", 0, 0}, + {_Fallthrough, "fallthrough", 0, 0}, + {_For, "for", 0, 0}, + {_Func, "func", 0, 0}, + {_Go, "go", 0, 0}, + {_Goto, "goto", 0, 0}, + {_If, "if", 0, 0}, + {_Import, "import", 0, 0}, + {_Interface, "interface", 0, 0}, + {_Map, "map", 0, 0}, + {_Package, "package", 0, 0}, + {_Range, "range", 0, 0}, + {_Return, "return", 0, 0}, + {_Select, "select", 0, 0}, + {_Struct, "struct", 0, 0}, + {_Switch, "switch", 0, 0}, + {_Type, "type", 0, 0}, + {_Var, "var", 0, 0}, +} + +func TestComments(t *testing.T) { + type comment struct { + line, col uint // 0-based + text string + } + + for _, test := range []struct { + src string + want comment + }{ + // no comments + {"no comment here", comment{0, 0, ""}}, + {" /", comment{0, 0, ""}}, + {"\n /*/", comment{0, 0, ""}}, + + //-style comments + {"// line comment\n", comment{0, 0, "// line comment"}}, + {"package p // line comment\n", comment{0, 10, "// line comment"}}, + {"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}}, + {"\n\n//\n", comment{2, 0, "//"}}, + {"//", comment{0, 0, "//"}}, + + /*-style comments */ + {"123/* regular comment */", comment{0, 3, "/* regular comment */"}}, + {"package p /* regular comment", comment{0, 0, ""}}, + {"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}}, + {"\n\n/**/", comment{2, 0, "/**/"}}, + {"/*", comment{0, 0, ""}}, + } { + var s scanner + var got comment + s.init(strings.NewReader(test.src), func(line, col uint, msg string) { + if msg[0] != '/' { + // error + if msg != "comment not terminated" { + t.Errorf("%q: %s", test.src, msg) + } + return + } + got = comment{line - linebase, col - colbase, msg} // keep last one + }, comments) + + for { + s.next() + if s.tok == _EOF { + break + } + } + + want := test.want + if got.line != want.line || got.col != want.col { + t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col) + } + if got.text != want.text { + t.Errorf("%q: got %q; want %q", test.src, got.text, want.text) + } + } +} + +func TestNumbers(t *testing.T) { + for _, test := range []struct { + kind LitKind + src, tokens, err string + }{ + // binaries + {IntLit, "0b0", "0b0", ""}, + {IntLit, "0b1010", "0b1010", ""}, + {IntLit, "0B1110", "0B1110", ""}, + + {IntLit, "0b", "0b", "binary literal has no digits"}, + {IntLit, "0b0190", "0b0190", "invalid digit '9' in binary literal"}, + {IntLit, "0b01a0", "0b01 a0", ""}, // only accept 0-9 + + {FloatLit, "0b.", "0b.", "invalid radix point in binary literal"}, + {FloatLit, "0b.1", "0b.1", "invalid radix point in binary literal"}, + {FloatLit, "0b1.0", "0b1.0", "invalid radix point in binary literal"}, + {FloatLit, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"}, + {FloatLit, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"}, + + {ImagLit, "0b10i", "0b10i", ""}, + {ImagLit, "0b10.0i", "0b10.0i", "invalid radix point in binary literal"}, + + // octals + {IntLit, "0o0", "0o0", ""}, + {IntLit, "0o1234", "0o1234", ""}, + {IntLit, "0O1234", "0O1234", ""}, + + {IntLit, "0o", "0o", "octal literal has no digits"}, + {IntLit, "0o8123", "0o8123", "invalid digit '8' in octal literal"}, + {IntLit, "0o1293", "0o1293", "invalid digit '9' in octal literal"}, + {IntLit, "0o12a3", "0o12 a3", ""}, // only accept 0-9 + + {FloatLit, "0o.", "0o.", "invalid radix point in octal literal"}, + {FloatLit, "0o.2", "0o.2", "invalid radix point in octal literal"}, + {FloatLit, "0o1.2", "0o1.2", "invalid radix point in octal literal"}, + {FloatLit, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"}, + {FloatLit, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"}, + + {ImagLit, "0o10i", "0o10i", ""}, + {ImagLit, "0o10e0i", "0o10e0i", "'e' exponent requires decimal mantissa"}, + + // 0-octals + {IntLit, "0", "0", ""}, + {IntLit, "0123", "0123", ""}, + + {IntLit, "08123", "08123", "invalid digit '8' in octal literal"}, + {IntLit, "01293", "01293", "invalid digit '9' in octal literal"}, + {IntLit, "0F.", "0 F .", ""}, // only accept 0-9 + {IntLit, "0123F.", "0123 F .", ""}, + {IntLit, "0123456x", "0123456 x", ""}, + + // decimals + {IntLit, "1", "1", ""}, + {IntLit, "1234", "1234", ""}, + + {IntLit, "1f", "1 f", ""}, // only accept 0-9 + + {ImagLit, "0i", "0i", ""}, + {ImagLit, "0678i", "0678i", ""}, + + // decimal floats + {FloatLit, "0.", "0.", ""}, + {FloatLit, "123.", "123.", ""}, + {FloatLit, "0123.", "0123.", ""}, + + {FloatLit, ".0", ".0", ""}, + {FloatLit, ".123", ".123", ""}, + {FloatLit, ".0123", ".0123", ""}, + + {FloatLit, "0.0", "0.0", ""}, + {FloatLit, "123.123", "123.123", ""}, + {FloatLit, "0123.0123", "0123.0123", ""}, + + {FloatLit, "0e0", "0e0", ""}, + {FloatLit, "123e+0", "123e+0", ""}, + {FloatLit, "0123E-1", "0123E-1", ""}, + + {FloatLit, "0.e+1", "0.e+1", ""}, + {FloatLit, "123.E-10", "123.E-10", ""}, + {FloatLit, "0123.e123", "0123.e123", ""}, + + {FloatLit, ".0e-1", ".0e-1", ""}, + {FloatLit, ".123E+10", ".123E+10", ""}, + {FloatLit, ".0123E123", ".0123E123", ""}, + + {FloatLit, "0.0e1", "0.0e1", ""}, + {FloatLit, "123.123E-10", "123.123E-10", ""}, + {FloatLit, "0123.0123e+456", "0123.0123e+456", ""}, + + {FloatLit, "0e", "0e", "exponent has no digits"}, + {FloatLit, "0E+", "0E+", "exponent has no digits"}, + {FloatLit, "1e+f", "1e+ f", "exponent has no digits"}, + {FloatLit, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"}, + {FloatLit, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"}, + + {ImagLit, "0.i", "0.i", ""}, + {ImagLit, ".123i", ".123i", ""}, + {ImagLit, "123.123i", "123.123i", ""}, + {ImagLit, "123e+0i", "123e+0i", ""}, + {ImagLit, "123.E-10i", "123.E-10i", ""}, + {ImagLit, ".123E+10i", ".123E+10i", ""}, + + // hexadecimals + {IntLit, "0x0", "0x0", ""}, + {IntLit, "0x1234", "0x1234", ""}, + {IntLit, "0xcafef00d", "0xcafef00d", ""}, + {IntLit, "0XCAFEF00D", "0XCAFEF00D", ""}, + + {IntLit, "0x", "0x", "hexadecimal literal has no digits"}, + {IntLit, "0x1g", "0x1 g", ""}, + + {ImagLit, "0xf00i", "0xf00i", ""}, + + // hexadecimal floats + {FloatLit, "0x0p0", "0x0p0", ""}, + {FloatLit, "0x12efp-123", "0x12efp-123", ""}, + {FloatLit, "0xABCD.p+0", "0xABCD.p+0", ""}, + {FloatLit, "0x.0189P-0", "0x.0189P-0", ""}, + {FloatLit, "0x1.ffffp+1023", "0x1.ffffp+1023", ""}, + + {FloatLit, "0x.", "0x.", "hexadecimal literal has no digits"}, + {FloatLit, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"}, + {FloatLit, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"}, + {FloatLit, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"}, + {FloatLit, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"}, + {FloatLit, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"}, + {FloatLit, "0x0p", "0x0p", "exponent has no digits"}, + {FloatLit, "0xeP-", "0xeP-", "exponent has no digits"}, + {FloatLit, "0x1234PAB", "0x1234P AB", "exponent has no digits"}, + {FloatLit, "0x1.2p1a", "0x1.2p1 a", ""}, + + {ImagLit, "0xf00.bap+12i", "0xf00.bap+12i", ""}, + + // separators + {IntLit, "0b_1000_0001", "0b_1000_0001", ""}, + {IntLit, "0o_600", "0o_600", ""}, + {IntLit, "0_466", "0_466", ""}, + {IntLit, "1_000", "1_000", ""}, + {FloatLit, "1_000.000_1", "1_000.000_1", ""}, + {ImagLit, "10e+1_2_3i", "10e+1_2_3i", ""}, + {IntLit, "0x_f00d", "0x_f00d", ""}, + {FloatLit, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""}, + + {IntLit, "0b__1000", "0b__1000", "'_' must separate successive digits"}, + {IntLit, "0o60___0", "0o60___0", "'_' must separate successive digits"}, + {IntLit, "0466_", "0466_", "'_' must separate successive digits"}, + {FloatLit, "1_.", "1_.", "'_' must separate successive digits"}, + {FloatLit, "0._1", "0._1", "'_' must separate successive digits"}, + {FloatLit, "2.7_e0", "2.7_e0", "'_' must separate successive digits"}, + {ImagLit, "10e+12_i", "10e+12_i", "'_' must separate successive digits"}, + {IntLit, "0x___0", "0x___0", "'_' must separate successive digits"}, + {FloatLit, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"}, + } { + var s scanner + var err string + s.init(strings.NewReader(test.src), func(_, _ uint, msg string) { + if err == "" { + err = msg + } + }, 0) + + for i, want := range strings.Split(test.tokens, " ") { + err = "" + s.next() + + if err != "" && !s.bad { + t.Errorf("%q: got error but bad not set", test.src) + } + + // compute lit where s.lit is not defined + var lit string + switch s.tok { + case _Name, _Literal: + lit = s.lit + case _Dot: + lit = "." + } + + if i == 0 { + if s.tok != _Literal || s.kind != test.kind { + t.Errorf("%q: got token %s (kind = %d); want literal (kind = %d)", test.src, s.tok, s.kind, test.kind) + } + if err != test.err { + t.Errorf("%q: got error %q; want %q", test.src, err, test.err) + } + } + + if lit != want { + t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, s.tok, want) + } + } + + // make sure we read all + s.next() + if s.tok == _Semi { + s.next() + } + if s.tok != _EOF { + t.Errorf("%q: got %s; want EOF", test.src, s.tok) + } + } +} + +func TestScanErrors(t *testing.T) { + for _, test := range []struct { + src, err string + line, col uint // 0-based + }{ + // Note: Positions for lexical errors are the earliest position + // where the error is apparent, not the beginning of the respective + // token. + + // rune-level errors + {"fo\x00o", "invalid NUL character", 0, 2}, + {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0}, + {"foo\n\n\xff ", "invalid UTF-8 encoding", 2, 0}, + + // token-level errors + {"\u00BD" /* ½ */, "invalid character U+00BD '½' in identifier", 0, 0}, + {"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid character U+00BD '½' in identifier", 0, 13 /* byte offset */}, + {"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0}, + {"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid character U+00BD '½' in identifier", 0, 8 /* byte offset */}, + + {"x + #y", "invalid character U+0023 '#'", 0, 4}, + {"foo$bar = 0", "invalid character U+0024 '$'", 0, 3}, + {"0123456789", "invalid digit '8' in octal literal", 0, 8}, + {"0123456789. /* foobar", "comment not terminated", 0, 12}, // valid float constant + {"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant + {"var a, b = 09, 07\n", "invalid digit '9' in octal literal", 0, 12}, + + {`''`, "empty rune literal or unescaped '", 0, 1}, + {"'\n", "newline in rune literal", 0, 1}, + {`'\`, "rune literal not terminated", 0, 0}, + {`'\'`, "rune literal not terminated", 0, 0}, + {`'\x`, "rune literal not terminated", 0, 0}, + {`'\x'`, "invalid character '\\'' in hexadecimal escape", 0, 3}, + {`'\y'`, "unknown escape", 0, 2}, + {`'\x0'`, "invalid character '\\'' in hexadecimal escape", 0, 4}, + {`'\00'`, "invalid character '\\'' in octal escape", 0, 4}, + {`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape + {`'\378`, "invalid character '8' in octal escape", 0, 4}, + {`'\400'`, "octal escape value 256 > 255", 0, 5}, + {`'xx`, "rune literal not terminated", 0, 0}, + {`'xx'`, "more than one character in rune literal", 0, 0}, + + {"\n \"foo\n", "newline in string", 1, 7}, + {`"`, "string not terminated", 0, 0}, + {`"foo`, "string not terminated", 0, 0}, + {"`", "string not terminated", 0, 0}, + {"`foo", "string not terminated", 0, 0}, + {"/*/", "comment not terminated", 0, 0}, + {"/*\n\nfoo", "comment not terminated", 0, 0}, + {`"\`, "string not terminated", 0, 0}, + {`"\"`, "string not terminated", 0, 0}, + {`"\x`, "string not terminated", 0, 0}, + {`"\x"`, "invalid character '\"' in hexadecimal escape", 0, 3}, + {`"\y"`, "unknown escape", 0, 2}, + {`"\x0"`, "invalid character '\"' in hexadecimal escape", 0, 4}, + {`"\00"`, "invalid character '\"' in octal escape", 0, 4}, + {`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape + {`"\378"`, "invalid character '8' in octal escape", 0, 4}, + {`"\400"`, "octal escape value 256 > 255", 0, 5}, + + {`s := "foo\z"`, "unknown escape", 0, 10}, + {`s := "foo\z00\nbar"`, "unknown escape", 0, 10}, + {`"\x`, "string not terminated", 0, 0}, + {`"\x"`, "invalid character '\"' in hexadecimal escape", 0, 3}, + {`var s string = "\x"`, "invalid character '\"' in hexadecimal escape", 0, 18}, + {`return "\Uffffffff"`, "escape is invalid Unicode code point U+FFFFFFFF", 0, 18}, + + {"0b.0", "invalid radix point in binary literal", 0, 2}, + {"0x.p0\n", "hexadecimal literal has no digits", 0, 3}, + + // former problem cases + {"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0}, + } { + var s scanner + var line, col uint + var err string + s.init(strings.NewReader(test.src), func(l, c uint, msg string) { + if err == "" { + line, col = l-linebase, c-colbase + err = msg + } + }, 0) + + for { + s.next() + if s.tok == _EOF { + break + } + } + + if err != "" { + if err != test.err { + t.Errorf("%q: got err = %q; want %q", test.src, err, test.err) + } + if line != test.line { + t.Errorf("%q: got line = %d; want %d", test.src, line, test.line) + } + if col != test.col { + t.Errorf("%q: got col = %d; want %d", test.src, col, test.col) + } + } else { + t.Errorf("%q: got no error; want %q", test.src, test.err) + } + } +} + +func TestDirectives(t *testing.T) { + for _, src := range []string{ + "line", + "// line", + "//line", + "//line foo", + "//line foo%bar", + + "go", + "// go:", + "//go:", + "//go :foo", + "//go:foo", + "//go:foo%bar", + } { + got := "" + var s scanner + s.init(strings.NewReader(src), func(_, col uint, msg string) { + if col != colbase { + t.Errorf("%s: got col = %d; want %d", src, col, colbase) + } + if msg == "" { + t.Errorf("%s: handler called with empty msg", src) + } + got = msg + }, directives) + + s.next() + if strings.HasPrefix(src, "//line ") || strings.HasPrefix(src, "//go:") { + // handler should have been called + if got != src { + t.Errorf("got %s; want %s", got, src) + } + } else { + // handler should not have been called + if got != "" { + t.Errorf("got %s for %s", got, src) + } + } + } +} + +func TestIssue21938(t *testing.T) { + s := "/*" + strings.Repeat(" ", 4089) + "*/ .5" + + var got scanner + got.init(strings.NewReader(s), errh, 0) + got.next() + + if got.tok != _Literal || got.lit != ".5" { + t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5") + } +} + +func TestIssue33961(t *testing.T) { + literals := `08__ 0b.p 0b_._p 0x.e 0x.p` + for _, lit := range strings.Split(literals, " ") { + n := 0 + var got scanner + got.init(strings.NewReader(lit), func(_, _ uint, msg string) { + // fmt.Printf("%s: %s\n", lit, msg) // uncomment for debugging + n++ + }, 0) + got.next() + + if n != 1 { + t.Errorf("%q: got %d errors; want 1", lit, n) + continue + } + + if !got.bad { + t.Errorf("%q: got error but bad not set", lit) + } + } +} diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go new file mode 100644 index 0000000..01b5921 --- /dev/null +++ b/src/cmd/compile/internal/syntax/source.go @@ -0,0 +1,218 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements source, a buffered rune reader +// specialized for scanning Go code: Reading +// ASCII characters, maintaining current (line, col) +// position information, and recording of the most +// recently read source segment are highly optimized. +// This file is self-contained (go tool compile source.go +// compiles) and thus could be made into its own package. + +package syntax + +import ( + "io" + "unicode/utf8" +) + +// The source buffer is accessed using three indices b (begin), +// r (read), and e (end): +// +// - If b >= 0, it points to the beginning of a segment of most +// recently read characters (typically a Go literal). +// +// - r points to the byte immediately following the most recently +// read character ch, which starts at r-chw. +// +// - e points to the byte immediately following the last byte that +// was read into the buffer. +// +// The buffer content is terminated at buf[e] with the sentinel +// character utf8.RuneSelf. This makes it possible to test for +// the common case of ASCII characters with a single 'if' (see +// nextch method). +// +// +------ content in use -------+ +// v v +// buf [...read...|...segment...|ch|...unread...|s|...free...] +// ^ ^ ^ ^ +// | | | | +// b r-chw r e +// +// Invariant: -1 <= b < r <= e < len(buf) && buf[e] == sentinel + +type source struct { + in io.Reader + errh func(line, col uint, msg string) + + buf []byte // source buffer + ioerr error // pending I/O error, or nil + b, r, e int // buffer indices (see comment above) + line, col uint // source position of ch (0-based) + ch rune // most recently read character + chw int // width of ch +} + +const sentinel = utf8.RuneSelf + +func (s *source) init(in io.Reader, errh func(line, col uint, msg string)) { + s.in = in + s.errh = errh + + if s.buf == nil { + s.buf = make([]byte, nextSize(0)) + } + s.buf[0] = sentinel + s.ioerr = nil + s.b, s.r, s.e = -1, 0, 0 + s.line, s.col = 0, 0 + s.ch = ' ' + s.chw = 0 +} + +// starting points for line and column numbers +const linebase = 1 +const colbase = 1 + +// pos returns the (line, col) source position of s.ch. +func (s *source) pos() (line, col uint) { + return linebase + s.line, colbase + s.col +} + +// error reports the error msg at source position s.pos(). +func (s *source) error(msg string) { + line, col := s.pos() + s.errh(line, col, msg) +} + +// start starts a new active source segment (including s.ch). +// As long as stop has not been called, the active segment's +// bytes (excluding s.ch) may be retrieved by calling segment. +func (s *source) start() { s.b = s.r - s.chw } +func (s *source) stop() { s.b = -1 } +func (s *source) segment() []byte { return s.buf[s.b : s.r-s.chw] } + +// rewind rewinds the scanner's read position and character s.ch +// to the start of the currently active segment, which must not +// contain any newlines (otherwise position information will be +// incorrect). Currently, rewind is only needed for handling the +// source sequence ".."; it must not be called outside an active +// segment. +func (s *source) rewind() { + // ok to verify precondition - rewind is rarely called + if s.b < 0 { + panic("no active segment") + } + s.col -= uint(s.r - s.b) + s.r = s.b + s.nextch() +} + +func (s *source) nextch() { +redo: + s.col += uint(s.chw) + if s.ch == '\n' { + s.line++ + s.col = 0 + } + + // fast common case: at least one ASCII character + if s.ch = rune(s.buf[s.r]); s.ch < sentinel { + s.r++ + s.chw = 1 + if s.ch == 0 { + s.error("invalid NUL character") + goto redo + } + return + } + + // slower general case: add more bytes to buffer if we don't have a full rune + for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil { + s.fill() + } + + // EOF + if s.r == s.e { + if s.ioerr != io.EOF { + // ensure we never start with a '/' (e.g., rooted path) in the error message + s.error("I/O error: " + s.ioerr.Error()) + s.ioerr = nil + } + s.ch = -1 + s.chw = 0 + return + } + + s.ch, s.chw = utf8.DecodeRune(s.buf[s.r:s.e]) + s.r += s.chw + + if s.ch == utf8.RuneError && s.chw == 1 { + s.error("invalid UTF-8 encoding") + goto redo + } + + // BOM's are only allowed as the first character in a file + const BOM = 0xfeff + if s.ch == BOM { + if s.line > 0 || s.col > 0 { + s.error("invalid BOM in the middle of the file") + } + goto redo + } +} + +// fill reads more source bytes into s.buf. +// It returns with at least one more byte in the buffer, or with s.ioerr != nil. +func (s *source) fill() { + // determine content to preserve + b := s.r + if s.b >= 0 { + b = s.b + s.b = 0 // after buffer has grown or content has been moved down + } + content := s.buf[b:s.e] + + // grow buffer or move content down + if len(content)*2 > len(s.buf) { + s.buf = make([]byte, nextSize(len(s.buf))) + copy(s.buf, content) + } else if b > 0 { + copy(s.buf, content) + } + s.r -= b + s.e -= b + + // read more data: try a limited number of times + for i := 0; i < 10; i++ { + var n int + n, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1]) // -1 to leave space for sentinel + if n < 0 { + panic("negative read") // incorrect underlying io.Reader implementation + } + if n > 0 || s.ioerr != nil { + s.e += n + s.buf[s.e] = sentinel + return + } + // n == 0 + } + + s.buf[s.e] = sentinel + s.ioerr = io.ErrNoProgress +} + +// nextSize returns the next bigger size for a buffer of a given size. +func nextSize(size int) int { + const min = 4 << 10 // 4K: minimum buffer size + const max = 1 << 20 // 1M: maximum buffer size which is still doubled + if size < min { + return min + } + if size <= max { + return size << 1 + } + return size + max +} diff --git a/src/cmd/compile/internal/syntax/syntax.go b/src/cmd/compile/internal/syntax/syntax.go new file mode 100644 index 0000000..83b102d --- /dev/null +++ b/src/cmd/compile/internal/syntax/syntax.go @@ -0,0 +1,94 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "fmt" + "io" + "os" +) + +// Mode describes the parser mode. +type Mode uint + +// Modes supported by the parser. +const ( + CheckBranches Mode = 1 << iota // check correct use of labels, break, continue, and goto statements +) + +// Error describes a syntax error. Error implements the error interface. +type Error struct { + Pos Pos + Msg string +} + +func (err Error) Error() string { + return fmt.Sprintf("%s: %s", err.Pos, err.Msg) +} + +var _ error = Error{} // verify that Error implements error + +// An ErrorHandler is called for each error encountered reading a .go file. +type ErrorHandler func(err error) + +// A Pragma value augments a package, import, const, func, type, or var declaration. +// Its meaning is entirely up to the PragmaHandler, +// except that nil is used to mean “no pragma seen.” +type Pragma interface{} + +// A PragmaHandler is used to process //go: directives while scanning. +// It is passed the current pragma value, which starts out being nil, +// and it returns an updated pragma value. +// The text is the directive, with the "//" prefix stripped. +// The current pragma is saved at each package, import, const, func, type, or var +// declaration, into the File, ImportDecl, ConstDecl, FuncDecl, TypeDecl, or VarDecl node. +// +// If text is the empty string, the pragma is being returned +// to the handler unused, meaning it appeared before a non-declaration. +// The handler may wish to report an error. In this case, pos is the +// current parser position, not the position of the pragma itself. +// Blank specifies whether the line is blank before the pragma. +type PragmaHandler func(pos Pos, blank bool, text string, current Pragma) Pragma + +// Parse parses a single Go source file from src and returns the corresponding +// syntax tree. If there are errors, Parse will return the first error found, +// and a possibly partially constructed syntax tree, or nil. +// +// If errh != nil, it is called with each error encountered, and Parse will +// process as much source as possible. In this case, the returned syntax tree +// is only nil if no correct package clause was found. +// If errh is nil, Parse will terminate immediately upon encountering the first +// error, and the returned syntax tree is nil. +// +// If pragh != nil, it is called with each pragma encountered. +func Parse(base *PosBase, src io.Reader, errh ErrorHandler, pragh PragmaHandler, mode Mode) (_ *File, first error) { + defer func() { + if p := recover(); p != nil { + if err, ok := p.(Error); ok { + first = err + return + } + panic(p) + } + }() + + var p parser + p.init(base, src, errh, pragh, mode) + p.next() + return p.fileOrNil(), p.first +} + +// ParseFile behaves like Parse but it reads the source from the named file. +func ParseFile(filename string, errh ErrorHandler, pragh PragmaHandler, mode Mode) (*File, error) { + f, err := os.Open(filename) + if err != nil { + if errh != nil { + errh(err) + } + return nil, err + } + defer f.Close() + return Parse(NewFileBase(filename), f, errh, pragh, mode) +} diff --git a/src/cmd/compile/internal/syntax/testdata/chans.go b/src/cmd/compile/internal/syntax/testdata/chans.go new file mode 100644 index 0000000..d4c4207 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/chans.go @@ -0,0 +1,66 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package chans + +import "runtime" + +// Ranger returns a Sender and a Receiver. The Receiver provides a +// Next method to retrieve values. The Sender provides a Send method +// to send values and a Close method to stop sending values. The Next +// method indicates when the Sender has been closed, and the Send +// method indicates when the Receiver has been freed. +// +// This is a convenient way to exit a goroutine sending values when +// the receiver stops reading them. +func Ranger[T any]() (*Sender[T], *Receiver[T]) { + c := make(chan T) + d := make(chan bool) + s := &Sender[T]{values: c, done: d} + r := &Receiver[T]{values: c, done: d} + runtime.SetFinalizer(r, r.finalize) + return s, r +} + +// A sender is used to send values to a Receiver. +type Sender[T any] struct { + values chan<- T + done <-chan bool +} + +// Send sends a value to the receiver. It returns whether any more +// values may be sent; if it returns false the value was not sent. +func (s *Sender[T]) Send(v T) bool { + select { + case s.values <- v: + return true + case <-s.done: + return false + } +} + +// Close tells the receiver that no more values will arrive. +// After Close is called, the Sender may no longer be used. +func (s *Sender[T]) Close() { + close(s.values) +} + +// A Receiver receives values from a Sender. +type Receiver[T any] struct { + values <-chan T + done chan<- bool +} + +// Next returns the next value from the channel. The bool result +// indicates whether the value is valid, or whether the Sender has +// been closed and no more values will be received. +func (r *Receiver[T]) Next() (T, bool) { + v, ok := <-r.values + return v, ok +} + +// finalize is a finalizer for the receiver. +func (r *Receiver[T]) finalize() { + close(r.done) +} diff --git a/src/cmd/compile/internal/syntax/testdata/fallthrough.go b/src/cmd/compile/internal/syntax/testdata/fallthrough.go new file mode 100644 index 0000000..851da81 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/fallthrough.go @@ -0,0 +1,55 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fallthroughs + +func _() { + var x int + switch x { + case 0: + fallthrough + + case 1: + fallthrough // ERROR fallthrough statement out of place + { + } + + case 2: + { + fallthrough // ERROR fallthrough statement out of place + } + + case 3: + for { + fallthrough // ERROR fallthrough statement out of place + } + + case 4: + fallthrough // trailing empty statements are ok + ; + ; + + case 5: + fallthrough + + default: + fallthrough // ERROR cannot fallthrough final case in switch + } + + fallthrough // ERROR fallthrough statement out of place + + if true { + fallthrough // ERROR fallthrough statement out of place + } + + for { + fallthrough // ERROR fallthrough statement out of place + } + + var t any + switch t.(type) { + case int: + fallthrough // ERROR cannot fallthrough in type switch + } +} diff --git a/src/cmd/compile/internal/syntax/testdata/interface.go b/src/cmd/compile/internal/syntax/testdata/interface.go new file mode 100644 index 0000000..dbc4187 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/interface.go @@ -0,0 +1,74 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains test cases for interfaces containing +// constraint elements. + +package p + +type _ interface { + m() + E +} + +type _ interface { + m() + ~int + int | string + int | ~string + ~int | ~string +} + +type _ interface { + m() + ~int + T[int, string] | string + int | ~T[string, struct{}] + ~int | ~string +} + +type _ interface { + int + []byte + [10]int + struct{} + *int + func() + interface{} + map[string]int + chan T + chan<- T + <-chan T + T[int] +} + +type _ interface { + int | string + []byte | string + [10]int | string + struct{} | string + *int | string + func() | string + interface{} | string + map[string]int | string + chan T | string + chan<- T | string + <-chan T | string + T[int] | string +} + +type _ interface { + ~int | string + ~[]byte | string + ~[10]int | string + ~struct{} | string + ~*int | string + ~func() | string + ~interface{} | string + ~map[string]int | string + ~chan T | string + ~chan<- T | string + ~<-chan T | string + ~T[int] | string +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue20789.go b/src/cmd/compile/internal/syntax/testdata/issue20789.go new file mode 100644 index 0000000..0d5988b --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue20789.go @@ -0,0 +1,9 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Make sure this doesn't crash the compiler. +// Line 9 must end in EOF for this test (no newline). + +package e +func([<-chan<-[func /* ERROR unexpected u */ u){go
\ No newline at end of file diff --git a/src/cmd/compile/internal/syntax/testdata/issue23385.go b/src/cmd/compile/internal/syntax/testdata/issue23385.go new file mode 100644 index 0000000..2459a73 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue23385.go @@ -0,0 +1,17 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Check error message for use of = instead of == . + +package p + +func _() { + if true || 0 /* ERROR cannot use assignment .* as value */ = 1 { + } +} + +func _(a, b string) { + if a == "a" && b /* ERROR cannot use assignment .* as value */ = "b" { + } +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue23434.go b/src/cmd/compile/internal/syntax/testdata/issue23434.go new file mode 100644 index 0000000..5a72a7f --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue23434.go @@ -0,0 +1,31 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test case for issue 23434: Better synchronization of +// parser after missing type. There should be exactly +// one error each time, with now follow errors. + +package p + +type T /* ERROR unexpected newline */ + +type Map map[int] /* ERROR unexpected newline */ + +// Examples from #23434: + +func g() { + m := make(map[string] /* ERROR unexpected ! */ !) + for { + x := 1 + print(x) + } +} + +func f() { + m := make(map[string] /* ERROR unexpected \) */ ) + for { + x := 1 + print(x) + } +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue31092.go b/src/cmd/compile/internal/syntax/testdata/issue31092.go new file mode 100644 index 0000000..b1839b8 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue31092.go @@ -0,0 +1,16 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test cases for issue 31092: Better synchronization of +// parser after seeing an := rather than an = in a const, +// type, or variable declaration. + +package p + +const _ /* ERROR unexpected := */ := 0 +type _ /* ERROR unexpected := */ := int +var _ /* ERROR unexpected := */ := 0 + +const _ int /* ERROR unexpected := */ := 0 +var _ int /* ERROR unexpected := */ := 0 diff --git a/src/cmd/compile/internal/syntax/testdata/issue43527.go b/src/cmd/compile/internal/syntax/testdata/issue43527.go new file mode 100644 index 0000000..dd2c9b1 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue43527.go @@ -0,0 +1,23 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +type ( + // 0 and 1-element []-lists are syntactically valid + _[A, B /* ERROR missing type constraint */ ] int + _[A, /* ERROR type parameters must be named */ interface{}] int + _[A, B, C /* ERROR missing type constraint */ ] int + _[A B, C /* ERROR missing type constraint */ ] int + _[A B, /* ERROR type parameters must be named */ interface{}] int + _[A B, /* ERROR type parameters must be named */ interface{}, C D] int + _[A B, /* ERROR type parameters must be named */ interface{}, C, D] int + _[A B, /* ERROR type parameters must be named */ interface{}, C, interface{}] int + _[A B, C interface{}, D, /* ERROR type parameters must be named */ interface{}] int +) + +// function type parameters use the same parsing routine - just have a couple of tests + +func _[A, B /* ERROR missing type constraint */ ]() {} +func _[A, /* ERROR type parameters must be named */ interface{}]() {} diff --git a/src/cmd/compile/internal/syntax/testdata/issue43674.go b/src/cmd/compile/internal/syntax/testdata/issue43674.go new file mode 100644 index 0000000..51c692a --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue43674.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +func _(... /* ERROR [.][.][.] is missing type */ ) +func _(... /* ERROR [.][.][.] is missing type */ , int) + +func _(a, b ... /* ERROR [.][.][.] is missing type */ ) +func _(a, b ... /* ERROR [.][.][.] is missing type */ , x int) + +func _()(... /* ERROR [.][.][.] is missing type */ ) diff --git a/src/cmd/compile/internal/syntax/testdata/issue46558.go b/src/cmd/compile/internal/syntax/testdata/issue46558.go new file mode 100644 index 0000000..a22b600 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue46558.go @@ -0,0 +1,14 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +func F(s string) { + switch s[0] { + case 'a': + case s[2] { // ERROR unexpected { + case 'b': + } + } +} // ERROR non-declaration statement diff --git a/src/cmd/compile/internal/syntax/testdata/issue47704.go b/src/cmd/compile/internal/syntax/testdata/issue47704.go new file mode 100644 index 0000000..e4cdad1 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue47704.go @@ -0,0 +1,17 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +func _() { + _ = m[] // ERROR expected operand + _ = m[x,] + _ = m[x /* ERROR unexpected a */ a b c d] +} + +// test case from the issue +func f(m map[int]int) int { + return m[0 // ERROR expected comma, \: or \] + ] +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue48382.go b/src/cmd/compile/internal/syntax/testdata/issue48382.go new file mode 100644 index 0000000..7c024a0 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue48382.go @@ -0,0 +1,16 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +type _ func /* ERROR function type must have no type parameters */ [ /* ERROR empty type parameter list */ ]() +type _ func /* ERROR function type must have no type parameters */ [ x /* ERROR missing type constraint */ ]() +type _ func /* ERROR function type must have no type parameters */ [P any]() + +var _ = (func /* ERROR function type must have no type parameters */ [P any]())(nil) +var _ = func /* ERROR function type must have no type parameters */ [P any]() {} + +type _ interface{ + m /* ERROR interface method must have no type parameters */ [P any]() +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue49205.go b/src/cmd/compile/internal/syntax/testdata/issue49205.go new file mode 100644 index 0000000..bbcc950 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue49205.go @@ -0,0 +1,27 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +// test case from issue + +type _ interface{ + m /* ERROR unexpected int in interface type; possibly missing semicolon or newline or } */ int +} + +// other cases where the fix for this issue affects the error message + +const ( + x int = 10 /* ERROR unexpected literal "foo" in grouped declaration; possibly missing semicolon or newline or \) */ "foo" +) + +var _ = []int{1, 2, 3 /* ERROR unexpected int in composite literal; possibly missing comma or } */ int } + +type _ struct { + x y /* ERROR syntax error: unexpected comma in struct type; possibly missing semicolon or newline or } */ , +} + +func f(a, b c /* ERROR unexpected d in parameter list; possibly missing comma or \) */ d) { + f(a, b, c /* ERROR unexpected d in argument list; possibly missing comma or \) */ d) +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue49482.go b/src/cmd/compile/internal/syntax/testdata/issue49482.go new file mode 100644 index 0000000..1fc303d --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue49482.go @@ -0,0 +1,31 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +type ( + // these need a comma to disambiguate + _[P *T,] struct{} + _[P *T, _ any] struct{} + _[P (*T),] struct{} + _[P (*T), _ any] struct{} + _[P (T),] struct{} + _[P (T), _ any] struct{} + + // these parse as name followed by type + _[P *struct{}] struct{} + _[P (*struct{})] struct{} + _[P ([]int)] struct{} + + // array declarations + _ [P(T)]struct{} + _ [P((T))]struct{} + _ [P * *T] struct{} // this could be a name followed by a type but it makes the rules more complicated + _ [P * T]struct{} + _ [P(*T)]struct{} + _ [P(**T)]struct{} + _ [P * T - T]struct{} + _ [P*T-T /* ERROR unexpected comma */ ,]struct{} + _ [10 /* ERROR unexpected comma */ ,]struct{} +) diff --git a/src/cmd/compile/internal/syntax/testdata/issue52391.go b/src/cmd/compile/internal/syntax/testdata/issue52391.go new file mode 100644 index 0000000..f2098ce --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue52391.go @@ -0,0 +1,17 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +type _ interface { + int + (int) + (*int) + *([]byte) + ~(int) + (int) | (string) + (int) | ~(string) + (/* ERROR unexpected ~ */ ~int) + (int /* ERROR unexpected \| */ | /* ERROR unexpected string */ string /* ERROR unexpected \) */ ) +} diff --git a/src/cmd/compile/internal/syntax/testdata/issue56022.go b/src/cmd/compile/internal/syntax/testdata/issue56022.go new file mode 100644 index 0000000..d28d35c --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/issue56022.go @@ -0,0 +1,10 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +func /* ERROR unexpected {, expected name or \($ */ {} +func (T) /* ERROR unexpected {, expected name$ */ {} +func (T) /* ERROR unexpected \(, expected name$ */ () {} +func (T) /* ERROR unexpected \(, expected name$ */ () diff --git a/src/cmd/compile/internal/syntax/testdata/linalg.go b/src/cmd/compile/internal/syntax/testdata/linalg.go new file mode 100644 index 0000000..822d028 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/linalg.go @@ -0,0 +1,83 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package linalg + +import "math" + +// Numeric is type bound that matches any numeric type. +// It would likely be in a constraints package in the standard library. +type Numeric interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | + uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | + float32 | ~float64 | + complex64 | ~complex128 +} + +func DotProduct[T Numeric](s1, s2 []T) T { + if len(s1) != len(s2) { + panic("DotProduct: slices of unequal length") + } + var r T + for i := range s1 { + r += s1[i] * s2[i] + } + return r +} + +// NumericAbs matches numeric types with an Abs method. +type NumericAbs[T any] interface { + Numeric + + Abs() T +} + +// AbsDifference computes the absolute value of the difference of +// a and b, where the absolute value is determined by the Abs method. +func AbsDifference[T NumericAbs[T]](a, b T) T { + d := a - b + return d.Abs() +} + +// OrderedNumeric is a type bound that matches numeric types that support the < operator. +type OrderedNumeric interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | + uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | + float32 | ~float64 +} + +// Complex is a type bound that matches the two complex types, which do not have a < operator. +type Complex interface { + ~complex64 | ~complex128 +} + +// OrderedAbs is a helper type that defines an Abs method for +// ordered numeric types. +type OrderedAbs[T OrderedNumeric] T + +func (a OrderedAbs[T]) Abs() OrderedAbs[T] { + if a < 0 { + return -a + } + return a +} + +// ComplexAbs is a helper type that defines an Abs method for +// complex types. +type ComplexAbs[T Complex] T + +func (a ComplexAbs[T]) Abs() ComplexAbs[T] { + r := float64(real(a)) + i := float64(imag(a)) + d := math.Sqrt(r * r + i * i) + return ComplexAbs[T](complex(d, 0)) +} + +func OrderedAbsDifference[T OrderedNumeric](a, b T) T { + return T(AbsDifference(OrderedAbs[T](a), OrderedAbs[T](b))) +} + +func ComplexAbsDifference[T Complex](a, b T) T { + return T(AbsDifference(ComplexAbs[T](a), ComplexAbs[T](b))) +} diff --git a/src/cmd/compile/internal/syntax/testdata/map.go b/src/cmd/compile/internal/syntax/testdata/map.go new file mode 100644 index 0000000..a508d21 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/map.go @@ -0,0 +1,112 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package orderedmap provides an ordered map, implemented as a binary tree. +package orderedmap + +import "chans" + +// Map is an ordered map. +type Map[K, V any] struct { + root *node[K, V] + compare func(K, K) int +} + +// node is the type of a node in the binary tree. +type node[K, V any] struct { + key K + val V + left, right *node[K, V] +} + +// New returns a new map. +func New[K, V any](compare func(K, K) int) *Map[K, V] { + return &Map[K, V]{compare: compare} +} + +// find looks up key in the map, and returns either a pointer +// to the node holding key, or a pointer to the location where +// such a node would go. +func (m *Map[K, V]) find(key K) **node[K, V] { + pn := &m.root + for *pn != nil { + switch cmp := m.compare(key, (*pn).key); { + case cmp < 0: + pn = &(*pn).left + case cmp > 0: + pn = &(*pn).right + default: + return pn + } + } + return pn +} + +// Insert inserts a new key/value into the map. +// If the key is already present, the value is replaced. +// Returns true if this is a new key, false if already present. +func (m *Map[K, V]) Insert(key K, val V) bool { + pn := m.find(key) + if *pn != nil { + (*pn).val = val + return false + } + *pn = &node[K, V]{key: key, val: val} + return true +} + +// Find returns the value associated with a key, or zero if not present. +// The found result reports whether the key was found. +func (m *Map[K, V]) Find(key K) (V, bool) { + pn := m.find(key) + if *pn == nil { + var zero V // see the discussion of zero values, above + return zero, false + } + return (*pn).val, true +} + +// keyValue is a pair of key and value used when iterating. +type keyValue[K, V any] struct { + key K + val V +} + +// InOrder returns an iterator that does an in-order traversal of the map. +func (m *Map[K, V]) InOrder() *Iterator[K, V] { + sender, receiver := chans.Ranger[keyValue[K, V]]() + var f func(*node[K, V]) bool + f = func(n *node[K, V]) bool { + if n == nil { + return true + } + // Stop sending values if sender.Send returns false, + // meaning that nothing is listening at the receiver end. + return f(n.left) && + sender.Send(keyValue[K, V]{n.key, n.val}) && + f(n.right) + } + go func() { + f(m.root) + sender.Close() + }() + return &Iterator[K, V]{receiver} +} + +// Iterator is used to iterate over the map. +type Iterator[K, V any] struct { + r *chans.Receiver[keyValue[K, V]] +} + +// Next returns the next key and value pair, and a boolean indicating +// whether they are valid or whether we have reached the end. +func (it *Iterator[K, V]) Next() (K, V, bool) { + keyval, ok := it.r.Next() + if !ok { + var zerok K + var zerov V + return zerok, zerov, false + } + return keyval.key, keyval.val, true +} diff --git a/src/cmd/compile/internal/syntax/testdata/map2.go b/src/cmd/compile/internal/syntax/testdata/map2.go new file mode 100644 index 0000000..2833445 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/map2.go @@ -0,0 +1,146 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file is like map.go2, but instead if importing chans, it contains +// the necessary functionality at the end of the file. + +// Package orderedmap provides an ordered map, implemented as a binary tree. +package orderedmap + +// Map is an ordered map. +type Map[K, V any] struct { + root *node[K, V] + compare func(K, K) int +} + +// node is the type of a node in the binary tree. +type node[K, V any] struct { + key K + val V + left, right *node[K, V] +} + +// New returns a new map. +func New[K, V any](compare func(K, K) int) *Map[K, V] { + return &Map[K, V]{compare: compare} +} + +// find looks up key in the map, and returns either a pointer +// to the node holding key, or a pointer to the location where +// such a node would go. +func (m *Map[K, V]) find(key K) **node[K, V] { + pn := &m.root + for *pn != nil { + switch cmp := m.compare(key, (*pn).key); { + case cmp < 0: + pn = &(*pn).left + case cmp > 0: + pn = &(*pn).right + default: + return pn + } + } + return pn +} + +// Insert inserts a new key/value into the map. +// If the key is already present, the value is replaced. +// Returns true if this is a new key, false if already present. +func (m *Map[K, V]) Insert(key K, val V) bool { + pn := m.find(key) + if *pn != nil { + (*pn).val = val + return false + } + *pn = &node[K, V]{key: key, val: val} + return true +} + +// Find returns the value associated with a key, or zero if not present. +// The found result reports whether the key was found. +func (m *Map[K, V]) Find(key K) (V, bool) { + pn := m.find(key) + if *pn == nil { + var zero V // see the discussion of zero values, above + return zero, false + } + return (*pn).val, true +} + +// keyValue is a pair of key and value used when iterating. +type keyValue[K, V any] struct { + key K + val V +} + +// InOrder returns an iterator that does an in-order traversal of the map. +func (m *Map[K, V]) InOrder() *Iterator[K, V] { + sender, receiver := chans_Ranger[keyValue[K, V]]() + var f func(*node[K, V]) bool + f = func(n *node[K, V]) bool { + if n == nil { + return true + } + // Stop sending values if sender.Send returns false, + // meaning that nothing is listening at the receiver end. + return f(n.left) && + sender.Send(keyValue[K, V]{n.key, n.val}) && + f(n.right) + } + go func() { + f(m.root) + sender.Close() + }() + return &Iterator[K, V]{receiver} +} + +// Iterator is used to iterate over the map. +type Iterator[K, V any] struct { + r *chans_Receiver[keyValue[K, V]] +} + +// Next returns the next key and value pair, and a boolean indicating +// whether they are valid or whether we have reached the end. +func (it *Iterator[K, V]) Next() (K, V, bool) { + keyval, ok := it.r.Next() + if !ok { + var zerok K + var zerov V + return zerok, zerov, false + } + return keyval.key, keyval.val, true +} + +// chans + +func chans_Ranger[T any]() (*chans_Sender[T], *chans_Receiver[T]) + +// A sender is used to send values to a Receiver. +type chans_Sender[T any] struct { + values chan<- T + done <-chan bool +} + +func (s *chans_Sender[T]) Send(v T) bool { + select { + case s.values <- v: + return true + case <-s.done: + return false + } +} + +func (s *chans_Sender[T]) Close() { + close(s.values) +} + +type chans_Receiver[T any] struct { + values <-chan T + done chan<- bool +} + +func (r *chans_Receiver[T]) Next() (T, bool) { + v, ok := <-r.values + return v, ok +}
\ No newline at end of file diff --git a/src/cmd/compile/internal/syntax/testdata/sample.go b/src/cmd/compile/internal/syntax/testdata/sample.go new file mode 100644 index 0000000..5a2b4bf --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/sample.go @@ -0,0 +1,33 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This is a sample test file illustrating the use +// of error comments with the error test harness. + +package p + +// The following are invalid error comments; they are +// silently ignored. The prefix must be exactly one of +// "/* ERROR " or "// ERROR ". +// +/*ERROR*/ +/*ERROR foo*/ +/* ERRORfoo */ +/* ERROR foo */ +//ERROR +// ERROR +// ERRORfoo +// ERROR foo + +// This is a valid error comment; it applies to the +// immediately following token. +import "math" /* ERROR unexpected comma */ , + +// If there are multiple /*-style error comments before +// the next token, only the last one is considered. +type x = /* ERROR ignored */ /* ERROR literal 0 in type declaration */ 0 + +// A //-style error comment matches any error position +// on the same line. +func () foo() // ERROR method has no receiver diff --git a/src/cmd/compile/internal/syntax/testdata/slices.go b/src/cmd/compile/internal/syntax/testdata/slices.go new file mode 100644 index 0000000..9265109 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/slices.go @@ -0,0 +1,68 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package slices implements various slice algorithms. +package slices + +// Map turns a []T1 to a []T2 using a mapping function. +func Map[T1, T2 any](s []T1, f func(T1) T2) []T2 { + r := make([]T2, len(s)) + for i, v := range s { + r[i] = f(v) + } + return r +} + +// Reduce reduces a []T1 to a single value using a reduction function. +func Reduce[T1, T2 any](s []T1, initializer T2, f func(T2, T1) T2) T2 { + r := initializer + for _, v := range s { + r = f(r, v) + } + return r +} + +// Filter filters values from a slice using a filter function. +func Filter[T any](s []T, f func(T) bool) []T { + var r []T + for _, v := range s { + if f(v) { + r = append(r, v) + } + } + return r +} + +// Example uses + +func limiter(x int) byte { + switch { + case x < 0: + return 0 + default: + return byte(x) + case x > 255: + return 255 + } +} + +var input = []int{-4, 68954, 7, 44, 0, -555, 6945} +var limited1 = Map[int, byte](input, limiter) +var limited2 = Map(input, limiter) // using type inference + +func reducer(x float64, y int) float64 { + return x + float64(y) +} + +var reduced1 = Reduce[int, float64](input, 0, reducer) +var reduced2 = Reduce(input, 1i, reducer) // using type inference +var reduced3 = Reduce(input, 1, reducer) // using type inference + +func filter(x int) bool { + return x&1 != 0 +} + +var filtered1 = Filter[int](input, filter) +var filtered2 = Filter(input, filter) // using type inference + diff --git a/src/cmd/compile/internal/syntax/testdata/smoketest.go b/src/cmd/compile/internal/syntax/testdata/smoketest.go new file mode 100644 index 0000000..6b3593a --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/smoketest.go @@ -0,0 +1,73 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains basic generic code snippets. + +package p + +// type parameter lists +type B[P any] struct{} +type _[P interface{}] struct{} +type _[P B] struct{} +type _[P B[P]] struct{} + +type _[A, B, C any] struct{} +type _[A, B, C B] struct{} +type _[A, B, C B[A, B, C]] struct{} +type _[A1, A2 B1, A3 B2, A4, A5, A6 B3] struct{} + +type _[A interface{}] struct{} +type _[A, B interface{ m() }] struct{} + +type _[A, B, C any] struct{} + +// in functions +func _[P any]() +func _[P interface{}]() +func _[P B]() +func _[P B[P]]() + +// type instantiations +type _ T[int] + +// in expressions +var _ = T[int]{} + +// in embedded types +type _ struct{ T[int] } + +// interfaces +type _ interface { + m() + ~int +} + +type _ interface { + ~int | ~float | ~string + ~complex128 + underlying(underlying underlying) underlying +} + +type _ interface { + T + T[int] +} + +// tricky cases +func _(T[P], T[P1, P2]) +func _(a [N]T) + +type _ struct { + T[P] + T[P1, P2] + f[N] +} +type _ interface { + m() + + // instantiated types + T[ /* ERROR empty type argument list */ ] + T[P] + T[P1, P2] +} diff --git a/src/cmd/compile/internal/syntax/testdata/tparams.go b/src/cmd/compile/internal/syntax/testdata/tparams.go new file mode 100644 index 0000000..646fbbe --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/tparams.go @@ -0,0 +1,46 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +type t[a, b /* ERROR missing type constraint */ ] struct{} +type t[a t, b t, c /* ERROR missing type constraint */ ] struct{} +type t struct { + t [n]byte + t[a] + t[a, b] +} +type t interface { + t[a] + m /* ERROR method must have no type parameters */ [_ _, /* ERROR mixed */ _]() + t[a, b] +} + +func f[ /* ERROR empty type parameter list */ ]() +func f[a, b /* ERROR missing type constraint */ ]() +func f[a t, b t, c /* ERROR missing type constraint */ ]() + +func f[a b, /* ERROR expected ] */ 0] () + +// issue #49482 +type ( + t[a *[]int] struct{} + t[a *t,] struct{} + t[a *t|[]int] struct{} + t[a *t|t,] struct{} + t[a *t|~t,] struct{} + t[a *struct{}|t] struct{} + t[a *t|struct{}] struct{} + t[a *struct{}|~t] struct{} +) + +// issue #51488 +type ( + t[a *t|t,] struct{} + t[a *t|t, b t] struct{} + t[a *t|t] struct{} + t[a *[]t|t] struct{} + t[a ([]t)] struct{} + t[a ([]t)|t] struct{} +) diff --git a/src/cmd/compile/internal/syntax/testdata/typeset.go b/src/cmd/compile/internal/syntax/testdata/typeset.go new file mode 100644 index 0000000..fe5c3f4 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testdata/typeset.go @@ -0,0 +1,91 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains test cases for typeset-only constraint elements. + +package p + +type ( + _[_ t] t + _[_ ~t] t + _[_ t|t] t + _[_ ~t|t] t + _[_ t|~t] t + _[_ ~t|~t] t + + _[_ t, _, _ t|t] t + _[_ t, _, _ ~t|t] t + _[_ t, _, _ t|~t] t + _[_ t, _, _ ~t|~t] t + + _[_ t.t] t + _[_ ~t.t] t + _[_ t.t|t.t] t + _[_ ~t.t|t.t] t + _[_ t.t|~t.t] t + _[_ ~t.t|~t.t] t + + _[_ t, _, _ t.t|t.t] t + _[_ t, _, _ ~t.t|t.t] t + _[_ t, _, _ t.t|~t.t] t + _[_ t, _, _ ~t.t|~t.t] t + + _[_ struct{}] t + _[_ ~struct{}] t + + _[_ struct{}|t] t + _[_ ~struct{}|t] t + _[_ struct{}|~t] t + _[_ ~struct{}|~t] t + + _[_ t|struct{}] t + _[_ ~t|struct{}] t + _[_ t|~struct{}] t + _[_ ~t|~struct{}] t + + // test cases for issue #49175 + _[_ []t]t + _[_ [1]t]t + _[_ ~[]t]t + _[_ ~[1]t]t + t [ /* ERROR type parameters must be named */ t[0]]t +) + +// test cases for issue #49174 +func _[_ t]() {} +func _[_ []t]() {} +func _[_ [1]t]() {} +func _[_ []t | t]() {} +func _[_ [1]t | t]() {} +func _[_ t | []t]() {} +func _[_ []t | []t]() {} +func _[_ [1]t | [1]t]() {} +func _[_ t[t] | t[t]]() {} + +// Single-expression type parameter lists and those that don't start +// with a (type parameter) name are considered array sizes. +// The term must be a valid expression (it could be a type incl. a +// tilde term) but the type-checker will complain. +type ( + _[t] t + _[t|t] t + + // These are invalid and the type-checker will complain. + _[~t] t + _[~t|t] t + _[t|~t] t + _[~t|~t] t +) + +type ( + _[_ t, t /* ERROR missing type constraint */ ] t + _[_ ~t, t /* ERROR missing type constraint */ ] t + _[_ t, /* ERROR type parameters must be named */ ~t] t + _[_ ~t, /* ERROR type parameters must be named */ ~t] t + + _[_ t|t, /* ERROR type parameters must be named */ t|t] t + _[_ ~t|t, /* ERROR type parameters must be named */ t|t] t + _[_ t|t, /* ERROR type parameters must be named */ ~t|t] t + _[_ ~t|t, /* ERROR type parameters must be named */ ~t|t] t +) diff --git a/src/cmd/compile/internal/syntax/testing.go b/src/cmd/compile/internal/syntax/testing.go new file mode 100644 index 0000000..6a97dc0 --- /dev/null +++ b/src/cmd/compile/internal/syntax/testing.go @@ -0,0 +1,72 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements testing support. + +package syntax + +import ( + "io" + "regexp" + "strings" +) + +// CommentsDo parses the given source and calls the provided handler for each +// comment or error. If the text provided to handler starts with a '/' it is +// the comment text; otherwise it is the error message. +func CommentsDo(src io.Reader, handler func(line, col uint, text string)) { + var s scanner + s.init(src, handler, comments) + for s.tok != _EOF { + s.next() + } +} + +// ERROR comments must start with text `ERROR "msg"` or `ERROR msg`. +// Space around "msg" or msg is ignored. +var errRx = regexp.MustCompile(`^ *ERROR *"?([^"]*)"?`) + +// ErrorMap collects all comments with comment text of the form +// `ERROR "msg"` or `ERROR msg` from the given src and returns them +// as []Error lists in a map indexed by line number. The position +// for each Error is the position of the token immediately preceding +// the comment, the Error message is the message msg extracted from +// the comment, with all errors that are on the same line collected +// in a slice, in source order. If there is no preceding token (the +// `ERROR` comment appears in the beginning of the file), then the +// recorded position is unknown (line, col = 0, 0). If there are no +// ERROR comments, the result is nil. +func ErrorMap(src io.Reader) (errmap map[uint][]Error) { + // position of previous token + var base *PosBase + var prev struct{ line, col uint } + + var s scanner + s.init(src, func(_, _ uint, text string) { + if text[0] != '/' { + return // error, ignore + } + if text[1] == '*' { + text = text[:len(text)-2] // strip trailing */ + } + if s := errRx.FindStringSubmatch(text[2:]); len(s) == 2 { + pos := MakePos(base, prev.line, prev.col) + err := Error{pos, strings.TrimSpace(s[1])} + if errmap == nil { + errmap = make(map[uint][]Error) + } + errmap[prev.line] = append(errmap[prev.line], err) + } + }, comments) + + for s.tok != _EOF { + s.next() + if s.tok == _Semi && s.lit != "semicolon" { + continue // ignore automatically inserted semicolons + } + prev.line, prev.col = s.line, s.col + } + + return +} diff --git a/src/cmd/compile/internal/syntax/testing_test.go b/src/cmd/compile/internal/syntax/testing_test.go new file mode 100644 index 0000000..d34e5ea --- /dev/null +++ b/src/cmd/compile/internal/syntax/testing_test.go @@ -0,0 +1,45 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "fmt" + "strings" + "testing" +) + +func TestErrorMap(t *testing.T) { + const src = `/* ERROR 0:0 */ /* ERROR "0:0" */ // ERROR 0:0 +// ERROR "0:0" +x /* ERROR 3:1 */ // ignore automatically inserted semicolon here +/* ERROR 3:1 */ // position of x on previous line + x /* ERROR 5:4 */ ; // do not ignore this semicolon +/* ERROR 5:22 */ // position of ; on previous line + package /* ERROR 7:2 */ // indented with tab + import /* ERROR 8:9 */ // indented with blanks +` + m := ErrorMap(strings.NewReader(src)) + got := 0 // number of errors found + for line, errlist := range m { + for _, err := range errlist { + if err.Pos.Line() != line { + t.Errorf("%v: got map line %d; want %d", err, err.Pos.Line(), line) + continue + } + // err.Pos.Line() == line + msg := fmt.Sprintf("%d:%d", line, err.Pos.Col()) + if err.Msg != msg { + t.Errorf("%v: got msg %q; want %q", err, err.Msg, msg) + continue + } + } + got += len(errlist) + } + + want := strings.Count(src, "ERROR") + if got != want { + t.Errorf("ErrorMap got %d errors; want %d", got, want) + } +} diff --git a/src/cmd/compile/internal/syntax/token_string.go b/src/cmd/compile/internal/syntax/token_string.go new file mode 100644 index 0000000..ef295eb --- /dev/null +++ b/src/cmd/compile/internal/syntax/token_string.go @@ -0,0 +1,70 @@ +// Code generated by "stringer -type token -linecomment tokens.go"; DO NOT EDIT. + +package syntax + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[_EOF-1] + _ = x[_Name-2] + _ = x[_Literal-3] + _ = x[_Operator-4] + _ = x[_AssignOp-5] + _ = x[_IncOp-6] + _ = x[_Assign-7] + _ = x[_Define-8] + _ = x[_Arrow-9] + _ = x[_Star-10] + _ = x[_Lparen-11] + _ = x[_Lbrack-12] + _ = x[_Lbrace-13] + _ = x[_Rparen-14] + _ = x[_Rbrack-15] + _ = x[_Rbrace-16] + _ = x[_Comma-17] + _ = x[_Semi-18] + _ = x[_Colon-19] + _ = x[_Dot-20] + _ = x[_DotDotDot-21] + _ = x[_Break-22] + _ = x[_Case-23] + _ = x[_Chan-24] + _ = x[_Const-25] + _ = x[_Continue-26] + _ = x[_Default-27] + _ = x[_Defer-28] + _ = x[_Else-29] + _ = x[_Fallthrough-30] + _ = x[_For-31] + _ = x[_Func-32] + _ = x[_Go-33] + _ = x[_Goto-34] + _ = x[_If-35] + _ = x[_Import-36] + _ = x[_Interface-37] + _ = x[_Map-38] + _ = x[_Package-39] + _ = x[_Range-40] + _ = x[_Return-41] + _ = x[_Select-42] + _ = x[_Struct-43] + _ = x[_Switch-44] + _ = x[_Type-45] + _ = x[_Var-46] + _ = x[tokenCount-47] +} + +const _token_name = "EOFnameliteralopop=opop=:=<-*([{)]},;:....breakcasechanconstcontinuedefaultdeferelsefallthroughforfuncgogotoifimportinterfacemappackagerangereturnselectstructswitchtypevar" + +var _token_index = [...]uint8{0, 3, 7, 14, 16, 19, 23, 24, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 42, 47, 51, 55, 60, 68, 75, 80, 84, 95, 98, 102, 104, 108, 110, 116, 125, 128, 135, 140, 146, 152, 158, 164, 168, 171, 171} + +func (i token) String() string { + i -= 1 + if i >= token(len(_token_index)-1) { + return "token(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _token_name[_token_index[i]:_token_index[i+1]] +} diff --git a/src/cmd/compile/internal/syntax/tokens.go b/src/cmd/compile/internal/syntax/tokens.go new file mode 100644 index 0000000..6dece1a --- /dev/null +++ b/src/cmd/compile/internal/syntax/tokens.go @@ -0,0 +1,157 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +type token uint + +//go:generate stringer -type token -linecomment tokens.go + +const ( + _ token = iota + _EOF // EOF + + // names and literals + _Name // name + _Literal // literal + + // operators and operations + // _Operator is excluding '*' (_Star) + _Operator // op + _AssignOp // op= + _IncOp // opop + _Assign // = + _Define // := + _Arrow // <- + _Star // * + + // delimiters + _Lparen // ( + _Lbrack // [ + _Lbrace // { + _Rparen // ) + _Rbrack // ] + _Rbrace // } + _Comma // , + _Semi // ; + _Colon // : + _Dot // . + _DotDotDot // ... + + // keywords + _Break // break + _Case // case + _Chan // chan + _Const // const + _Continue // continue + _Default // default + _Defer // defer + _Else // else + _Fallthrough // fallthrough + _For // for + _Func // func + _Go // go + _Goto // goto + _If // if + _Import // import + _Interface // interface + _Map // map + _Package // package + _Range // range + _Return // return + _Select // select + _Struct // struct + _Switch // switch + _Type // type + _Var // var + + // empty line comment to exclude it from .String + tokenCount // +) + +const ( + // for BranchStmt + Break = _Break + Continue = _Continue + Fallthrough = _Fallthrough + Goto = _Goto + + // for CallStmt + Go = _Go + Defer = _Defer +) + +// Make sure we have at most 64 tokens so we can use them in a set. +const _ uint64 = 1 << (tokenCount - 1) + +// contains reports whether tok is in tokset. +func contains(tokset uint64, tok token) bool { + return tokset&(1<<tok) != 0 +} + +type LitKind uint8 + +// TODO(gri) With the 'i' (imaginary) suffix now permitted on integer +// and floating-point numbers, having a single ImagLit does +// not represent the literal kind well anymore. Remove it? +const ( + IntLit LitKind = iota + FloatLit + ImagLit + RuneLit + StringLit +) + +type Operator uint + +//go:generate stringer -type Operator -linecomment tokens.go + +const ( + _ Operator = iota + + // Def is the : in := + Def // : + Not // ! + Recv // <- + Tilde // ~ + + // precOrOr + OrOr // || + + // precAndAnd + AndAnd // && + + // precCmp + Eql // == + Neq // != + Lss // < + Leq // <= + Gtr // > + Geq // >= + + // precAdd + Add // + + Sub // - + Or // | + Xor // ^ + + // precMul + Mul // * + Div // / + Rem // % + And // & + AndNot // &^ + Shl // << + Shr // >> +) + +// Operator precedences +const ( + _ = iota + precOrOr + precAndAnd + precCmp + precAdd + precMul +) diff --git a/src/cmd/compile/internal/syntax/type.go b/src/cmd/compile/internal/syntax/type.go new file mode 100644 index 0000000..01eab7a --- /dev/null +++ b/src/cmd/compile/internal/syntax/type.go @@ -0,0 +1,73 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import "go/constant" + +// A Type represents a type of Go. +// All types implement the Type interface. +// (This type originally lived in types2. We moved it here +// so we could depend on it from other packages without +// introducing a circularity.) +type Type interface { + // Underlying returns the underlying type of a type. + Underlying() Type + + // String returns a string representation of a type. + String() string +} + +// Expressions in the syntax package provide storage for +// the typechecker to record its results. This interface +// is the mechanism the typechecker uses to record results, +// and clients use to retrieve those results. +type typeInfo interface { + SetTypeInfo(TypeAndValue) + GetTypeInfo() TypeAndValue +} + +// A TypeAndValue records the type information, constant +// value if known, and various other flags associated with +// an expression. +// This type is similar to types2.TypeAndValue, but exposes +// none of types2's internals. +type TypeAndValue struct { + Type Type + Value constant.Value + exprFlags +} + +type exprFlags uint8 + +func (f exprFlags) IsVoid() bool { return f&1 != 0 } +func (f exprFlags) IsType() bool { return f&2 != 0 } +func (f exprFlags) IsBuiltin() bool { return f&4 != 0 } +func (f exprFlags) IsValue() bool { return f&8 != 0 } +func (f exprFlags) IsNil() bool { return f&16 != 0 } +func (f exprFlags) Addressable() bool { return f&32 != 0 } +func (f exprFlags) Assignable() bool { return f&64 != 0 } +func (f exprFlags) HasOk() bool { return f&128 != 0 } + +func (f *exprFlags) SetIsVoid() { *f |= 1 } +func (f *exprFlags) SetIsType() { *f |= 2 } +func (f *exprFlags) SetIsBuiltin() { *f |= 4 } +func (f *exprFlags) SetIsValue() { *f |= 8 } +func (f *exprFlags) SetIsNil() { *f |= 16 } +func (f *exprFlags) SetAddressable() { *f |= 32 } +func (f *exprFlags) SetAssignable() { *f |= 64 } +func (f *exprFlags) SetHasOk() { *f |= 128 } + +// a typeAndValue contains the results of typechecking an expression. +// It is embedded in expression nodes. +type typeAndValue struct { + tv TypeAndValue +} + +func (x *typeAndValue) SetTypeInfo(tv TypeAndValue) { + x.tv = tv +} +func (x *typeAndValue) GetTypeInfo() TypeAndValue { + return x.tv +} diff --git a/src/cmd/compile/internal/syntax/walk.go b/src/cmd/compile/internal/syntax/walk.go new file mode 100644 index 0000000..8f1d566 --- /dev/null +++ b/src/cmd/compile/internal/syntax/walk.go @@ -0,0 +1,362 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements syntax tree walking. + +package syntax + +import "fmt" + +// Inspect traverses an AST in pre-order: It starts by calling +// f(node); node must not be nil. If f returns true, Inspect invokes f +// recursively for each of the non-nil children of node, followed by a +// call of f(nil). +// +// See Walk for caveats about shared nodes. +func Inspect(root Node, f func(Node) bool) { + Walk(root, inspector(f)) +} + +type inspector func(Node) bool + +func (v inspector) Visit(node Node) Visitor { + if v(node) { + return v + } + return nil +} + +// Crawl traverses a syntax in pre-order: It starts by calling f(root); +// root must not be nil. If f returns false (== "continue"), Crawl calls +// f recursively for each of the non-nil children of that node; if f +// returns true (== "stop"), Crawl does not traverse the respective node's +// children. +// +// See Walk for caveats about shared nodes. +// +// Deprecated: Use Inspect instead. +func Crawl(root Node, f func(Node) bool) { + Inspect(root, func(node Node) bool { + return node != nil && !f(node) + }) +} + +// Walk traverses an AST in pre-order: It starts by calling +// v.Visit(node); node must not be nil. If the visitor w returned by +// v.Visit(node) is not nil, Walk is invoked recursively with visitor +// w for each of the non-nil children of node, followed by a call of +// w.Visit(nil). +// +// Some nodes may be shared among multiple parent nodes (e.g., types in +// field lists such as type T in "a, b, c T"). Such shared nodes are +// walked multiple times. +// TODO(gri) Revisit this design. It may make sense to walk those nodes +// only once. A place where this matters is types2.TestResolveIdents. +func Walk(root Node, v Visitor) { + walker{v}.node(root) +} + +// A Visitor's Visit method is invoked for each node encountered by Walk. +// If the result visitor w is not nil, Walk visits each of the children +// of node with the visitor w, followed by a call of w.Visit(nil). +type Visitor interface { + Visit(node Node) (w Visitor) +} + +type walker struct { + v Visitor +} + +func (w walker) node(n Node) { + if n == nil { + panic("nil node") + } + + w.v = w.v.Visit(n) + if w.v == nil { + return + } + + switch n := n.(type) { + // packages + case *File: + w.node(n.PkgName) + w.declList(n.DeclList) + + // declarations + case *ImportDecl: + if n.LocalPkgName != nil { + w.node(n.LocalPkgName) + } + w.node(n.Path) + + case *ConstDecl: + w.nameList(n.NameList) + if n.Type != nil { + w.node(n.Type) + } + if n.Values != nil { + w.node(n.Values) + } + + case *TypeDecl: + w.node(n.Name) + w.fieldList(n.TParamList) + w.node(n.Type) + + case *VarDecl: + w.nameList(n.NameList) + if n.Type != nil { + w.node(n.Type) + } + if n.Values != nil { + w.node(n.Values) + } + + case *FuncDecl: + if n.Recv != nil { + w.node(n.Recv) + } + w.node(n.Name) + w.fieldList(n.TParamList) + w.node(n.Type) + if n.Body != nil { + w.node(n.Body) + } + + // expressions + case *BadExpr: // nothing to do + case *Name: // nothing to do + case *BasicLit: // nothing to do + + case *CompositeLit: + if n.Type != nil { + w.node(n.Type) + } + w.exprList(n.ElemList) + + case *KeyValueExpr: + w.node(n.Key) + w.node(n.Value) + + case *FuncLit: + w.node(n.Type) + w.node(n.Body) + + case *ParenExpr: + w.node(n.X) + + case *SelectorExpr: + w.node(n.X) + w.node(n.Sel) + + case *IndexExpr: + w.node(n.X) + w.node(n.Index) + + case *SliceExpr: + w.node(n.X) + for _, x := range n.Index { + if x != nil { + w.node(x) + } + } + + case *AssertExpr: + w.node(n.X) + w.node(n.Type) + + case *TypeSwitchGuard: + if n.Lhs != nil { + w.node(n.Lhs) + } + w.node(n.X) + + case *Operation: + w.node(n.X) + if n.Y != nil { + w.node(n.Y) + } + + case *CallExpr: + w.node(n.Fun) + w.exprList(n.ArgList) + + case *ListExpr: + w.exprList(n.ElemList) + + // types + case *ArrayType: + if n.Len != nil { + w.node(n.Len) + } + w.node(n.Elem) + + case *SliceType: + w.node(n.Elem) + + case *DotsType: + w.node(n.Elem) + + case *StructType: + w.fieldList(n.FieldList) + for _, t := range n.TagList { + if t != nil { + w.node(t) + } + } + + case *Field: + if n.Name != nil { + w.node(n.Name) + } + w.node(n.Type) + + case *InterfaceType: + w.fieldList(n.MethodList) + + case *FuncType: + w.fieldList(n.ParamList) + w.fieldList(n.ResultList) + + case *MapType: + w.node(n.Key) + w.node(n.Value) + + case *ChanType: + w.node(n.Elem) + + // statements + case *EmptyStmt: // nothing to do + + case *LabeledStmt: + w.node(n.Label) + w.node(n.Stmt) + + case *BlockStmt: + w.stmtList(n.List) + + case *ExprStmt: + w.node(n.X) + + case *SendStmt: + w.node(n.Chan) + w.node(n.Value) + + case *DeclStmt: + w.declList(n.DeclList) + + case *AssignStmt: + w.node(n.Lhs) + if n.Rhs != nil { + w.node(n.Rhs) + } + + case *BranchStmt: + if n.Label != nil { + w.node(n.Label) + } + // Target points to nodes elsewhere in the syntax tree + + case *CallStmt: + w.node(n.Call) + + case *ReturnStmt: + if n.Results != nil { + w.node(n.Results) + } + + case *IfStmt: + if n.Init != nil { + w.node(n.Init) + } + w.node(n.Cond) + w.node(n.Then) + if n.Else != nil { + w.node(n.Else) + } + + case *ForStmt: + if n.Init != nil { + w.node(n.Init) + } + if n.Cond != nil { + w.node(n.Cond) + } + if n.Post != nil { + w.node(n.Post) + } + w.node(n.Body) + + case *SwitchStmt: + if n.Init != nil { + w.node(n.Init) + } + if n.Tag != nil { + w.node(n.Tag) + } + for _, s := range n.Body { + w.node(s) + } + + case *SelectStmt: + for _, s := range n.Body { + w.node(s) + } + + // helper nodes + case *RangeClause: + if n.Lhs != nil { + w.node(n.Lhs) + } + w.node(n.X) + + case *CaseClause: + if n.Cases != nil { + w.node(n.Cases) + } + w.stmtList(n.Body) + + case *CommClause: + if n.Comm != nil { + w.node(n.Comm) + } + w.stmtList(n.Body) + + default: + panic(fmt.Sprintf("internal error: unknown node type %T", n)) + } + + w.v.Visit(nil) +} + +func (w walker) declList(list []Decl) { + for _, n := range list { + w.node(n) + } +} + +func (w walker) exprList(list []Expr) { + for _, n := range list { + w.node(n) + } +} + +func (w walker) stmtList(list []Stmt) { + for _, n := range list { + w.node(n) + } +} + +func (w walker) nameList(list []*Name) { + for _, n := range list { + w.node(n) + } +} + +func (w walker) fieldList(list []*Field) { + for _, n := range list { + w.node(n) + } +} |