summaryrefslogtreecommitdiffstats
path: root/src/cmd/asm/internal/lex
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/asm/internal/lex')
-rw-r--r--src/cmd/asm/internal/lex/input.go486
-rw-r--r--src/cmd/asm/internal/lex/lex.go141
-rw-r--r--src/cmd/asm/internal/lex/lex_test.go365
-rw-r--r--src/cmd/asm/internal/lex/slice.go74
-rw-r--r--src/cmd/asm/internal/lex/stack.go61
-rw-r--r--src/cmd/asm/internal/lex/tokenizer.go153
6 files changed, 1280 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go
new file mode 100644
index 0000000..da4ebe6
--- /dev/null
+++ b/src/cmd/asm/internal/lex/input.go
@@ -0,0 +1,486 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "text/scanner"
+
+ "cmd/asm/internal/flags"
+ "cmd/internal/objabi"
+ "cmd/internal/src"
+)
+
+// Input is the main input: a stack of readers and some macro definitions.
+// It also handles #include processing (by pushing onto the input stack)
+// and parses and instantiates macro definitions.
+type Input struct {
+ Stack
+ includes []string
+ beginningOfLine bool
+ ifdefStack []bool
+ macros map[string]*Macro
+ text string // Text of last token returned by Next.
+ peek bool
+ peekToken ScanToken
+ peekText string
+}
+
+// NewInput returns an Input from the given path.
+func NewInput(name string) *Input {
+ return &Input{
+ // include directories: look in source dir, then -I directories.
+ includes: append([]string{filepath.Dir(name)}, flags.I...),
+ beginningOfLine: true,
+ macros: predefine(flags.D),
+ }
+}
+
+// predefine installs the macros set by the -D flag on the command line.
+func predefine(defines flags.MultiFlag) map[string]*Macro {
+ macros := make(map[string]*Macro)
+ for _, name := range defines {
+ value := "1"
+ i := strings.IndexRune(name, '=')
+ if i > 0 {
+ name, value = name[:i], name[i+1:]
+ }
+ tokens := Tokenize(name)
+ if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
+ fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
+ flags.Usage()
+ }
+ macros[name] = &Macro{
+ name: name,
+ args: nil,
+ tokens: Tokenize(value),
+ }
+ }
+ return macros
+}
+
+var panicOnError bool // For testing.
+
+func (in *Input) Error(args ...interface{}) {
+ if panicOnError {
+ panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
+ }
+ fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
+ os.Exit(1)
+}
+
+// expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
+func (in *Input) expectText(args ...interface{}) {
+ in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
+}
+
+// enabled reports whether the input is enabled by an ifdef, or is at the top level.
+func (in *Input) enabled() bool {
+ return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
+}
+
+func (in *Input) expectNewline(directive string) {
+ tok := in.Stack.Next()
+ if tok != '\n' {
+ in.expectText("expected newline after", directive)
+ }
+}
+
+func (in *Input) Next() ScanToken {
+ if in.peek {
+ in.peek = false
+ tok := in.peekToken
+ in.text = in.peekText
+ return tok
+ }
+ // If we cannot generate a token after 100 macro invocations, we're in trouble.
+ // The usual case is caught by Push, below, but be safe.
+ for nesting := 0; nesting < 100; {
+ tok := in.Stack.Next()
+ switch tok {
+ case '#':
+ if !in.beginningOfLine {
+ in.Error("'#' must be first item on line")
+ }
+ in.beginningOfLine = in.hash()
+ in.text = "#"
+ return '#'
+
+ case scanner.Ident:
+ // Is it a macro name?
+ name := in.Stack.Text()
+ macro := in.macros[name]
+ if macro != nil {
+ nesting++
+ in.invokeMacro(macro)
+ continue
+ }
+ fallthrough
+ default:
+ if tok == scanner.EOF && len(in.ifdefStack) > 0 {
+ // We're skipping text but have run out of input with no #endif.
+ in.Error("unclosed #ifdef or #ifndef")
+ }
+ in.beginningOfLine = tok == '\n'
+ if in.enabled() {
+ in.text = in.Stack.Text()
+ return tok
+ }
+ }
+ }
+ in.Error("recursive macro invocation")
+ return 0
+}
+
+func (in *Input) Text() string {
+ return in.text
+}
+
+// hash processes a # preprocessor directive. It reports whether it completes.
+func (in *Input) hash() bool {
+ // We have a '#'; it must be followed by a known word (define, include, etc.).
+ tok := in.Stack.Next()
+ if tok != scanner.Ident {
+ in.expectText("expected identifier after '#'")
+ }
+ if !in.enabled() {
+ // Can only start including again if we are at #else or #endif but also
+ // need to keep track of nested #if[n]defs.
+ // We let #line through because it might affect errors.
+ switch in.Stack.Text() {
+ case "else", "endif", "ifdef", "ifndef", "line":
+ // Press on.
+ default:
+ return false
+ }
+ }
+ switch in.Stack.Text() {
+ case "define":
+ in.define()
+ case "else":
+ in.else_()
+ case "endif":
+ in.endif()
+ case "ifdef":
+ in.ifdef(true)
+ case "ifndef":
+ in.ifdef(false)
+ case "include":
+ in.include()
+ case "line":
+ in.line()
+ case "undef":
+ in.undef()
+ default:
+ in.Error("unexpected token after '#':", in.Stack.Text())
+ }
+ return true
+}
+
+// macroName returns the name for the macro being referenced.
+func (in *Input) macroName() string {
+ // We use the Stack's input method; no macro processing at this stage.
+ tok := in.Stack.Next()
+ if tok != scanner.Ident {
+ in.expectText("expected identifier after # directive")
+ }
+ // Name is alphanumeric by definition.
+ return in.Stack.Text()
+}
+
+// #define processing.
+func (in *Input) define() {
+ name := in.macroName()
+ args, tokens := in.macroDefinition(name)
+ in.defineMacro(name, args, tokens)
+}
+
+// defineMacro stores the macro definition in the Input.
+func (in *Input) defineMacro(name string, args []string, tokens []Token) {
+ if in.macros[name] != nil {
+ in.Error("redefinition of macro:", name)
+ }
+ in.macros[name] = &Macro{
+ name: name,
+ args: args,
+ tokens: tokens,
+ }
+}
+
+// macroDefinition returns the list of formals and the tokens of the definition.
+// The argument list is nil for no parens on the definition; otherwise a list of
+// formal argument names.
+func (in *Input) macroDefinition(name string) ([]string, []Token) {
+ prevCol := in.Stack.Col()
+ tok := in.Stack.Next()
+ if tok == '\n' || tok == scanner.EOF {
+ return nil, nil // No definition for macro
+ }
+ var args []string
+ // The C preprocessor treats
+ // #define A(x)
+ // and
+ // #define A (x)
+ // distinctly: the first is a macro with arguments, the second without.
+ // Distinguish these cases using the column number, since we don't
+ // see the space itself. Note that text/scanner reports the position at the
+ // end of the token. It's where you are now, and you just read this token.
+ if tok == '(' && in.Stack.Col() == prevCol+1 {
+ // Macro has arguments. Scan list of formals.
+ acceptArg := true
+ args = []string{} // Zero length but not nil.
+ Loop:
+ for {
+ tok = in.Stack.Next()
+ switch tok {
+ case ')':
+ tok = in.Stack.Next() // First token of macro definition.
+ break Loop
+ case ',':
+ if acceptArg {
+ in.Error("bad syntax in definition for macro:", name)
+ }
+ acceptArg = true
+ case scanner.Ident:
+ if !acceptArg {
+ in.Error("bad syntax in definition for macro:", name)
+ }
+ arg := in.Stack.Text()
+ if i := lookup(args, arg); i >= 0 {
+ in.Error("duplicate argument", arg, "in definition for macro:", name)
+ }
+ args = append(args, arg)
+ acceptArg = false
+ default:
+ in.Error("bad definition for macro:", name)
+ }
+ }
+ }
+ var tokens []Token
+ // Scan to newline. Backslashes escape newlines.
+ for tok != '\n' {
+ if tok == scanner.EOF {
+ in.Error("missing newline in definition for macro:", name)
+ }
+ if tok == '\\' {
+ tok = in.Stack.Next()
+ if tok != '\n' && tok != '\\' {
+ in.Error(`can only escape \ or \n in definition for macro:`, name)
+ }
+ }
+ tokens = append(tokens, Make(tok, in.Stack.Text()))
+ tok = in.Stack.Next()
+ }
+ return args, tokens
+}
+
+func lookup(args []string, arg string) int {
+ for i, a := range args {
+ if a == arg {
+ return i
+ }
+ }
+ return -1
+}
+
+// invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
+// parameters substituted for the formals.
+// Invoking a macro does not touch the PC/line history.
+func (in *Input) invokeMacro(macro *Macro) {
+ // If the macro has no arguments, just substitute the text.
+ if macro.args == nil {
+ in.Push(NewSlice(in.Base(), in.Line(), macro.tokens))
+ return
+ }
+ tok := in.Stack.Next()
+ if tok != '(' {
+ // If the macro has arguments but is invoked without them, all we push is the macro name.
+ // First, put back the token.
+ in.peekToken = tok
+ in.peekText = in.text
+ in.peek = true
+ in.Push(NewSlice(in.Base(), in.Line(), []Token{Make(macroName, macro.name)}))
+ return
+ }
+ actuals := in.argsFor(macro)
+ var tokens []Token
+ for _, tok := range macro.tokens {
+ if tok.ScanToken != scanner.Ident {
+ tokens = append(tokens, tok)
+ continue
+ }
+ substitution := actuals[tok.text]
+ if substitution == nil {
+ tokens = append(tokens, tok)
+ continue
+ }
+ tokens = append(tokens, substitution...)
+ }
+ in.Push(NewSlice(in.Base(), in.Line(), tokens))
+}
+
+// argsFor returns a map from formal name to actual value for this argumented macro invocation.
+// The opening parenthesis has been absorbed.
+func (in *Input) argsFor(macro *Macro) map[string][]Token {
+ var args [][]Token
+ // One macro argument per iteration. Collect them all and check counts afterwards.
+ for argNum := 0; ; argNum++ {
+ tokens, tok := in.collectArgument(macro)
+ args = append(args, tokens)
+ if tok == ')' {
+ break
+ }
+ }
+ // Zero-argument macros are tricky.
+ if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
+ args = nil
+ } else if len(args) != len(macro.args) {
+ in.Error("wrong arg count for macro", macro.name)
+ }
+ argMap := make(map[string][]Token)
+ for i, arg := range args {
+ argMap[macro.args[i]] = arg
+ }
+ return argMap
+}
+
+// collectArgument returns the actual tokens for a single argument of a macro.
+// It also returns the token that terminated the argument, which will always
+// be either ',' or ')'. The starting '(' has been scanned.
+func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
+ nesting := 0
+ var tokens []Token
+ for {
+ tok := in.Stack.Next()
+ if tok == scanner.EOF || tok == '\n' {
+ in.Error("unterminated arg list invoking macro:", macro.name)
+ }
+ if nesting == 0 && (tok == ')' || tok == ',') {
+ return tokens, tok
+ }
+ if tok == '(' {
+ nesting++
+ }
+ if tok == ')' {
+ nesting--
+ }
+ tokens = append(tokens, Make(tok, in.Stack.Text()))
+ }
+}
+
+// #ifdef and #ifndef processing.
+func (in *Input) ifdef(truth bool) {
+ name := in.macroName()
+ in.expectNewline("#if[n]def")
+ if !in.enabled() {
+ truth = false
+ } else if _, defined := in.macros[name]; !defined {
+ truth = !truth
+ }
+ in.ifdefStack = append(in.ifdefStack, truth)
+}
+
+// #else processing
+func (in *Input) else_() {
+ in.expectNewline("#else")
+ if len(in.ifdefStack) == 0 {
+ in.Error("unmatched #else")
+ }
+ if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] {
+ in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
+ }
+}
+
+// #endif processing.
+func (in *Input) endif() {
+ in.expectNewline("#endif")
+ if len(in.ifdefStack) == 0 {
+ in.Error("unmatched #endif")
+ }
+ in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
+}
+
+// #include processing.
+func (in *Input) include() {
+ // Find and parse string.
+ tok := in.Stack.Next()
+ if tok != scanner.String {
+ in.expectText("expected string after #include")
+ }
+ name, err := strconv.Unquote(in.Stack.Text())
+ if err != nil {
+ in.Error("unquoting include file name: ", err)
+ }
+ in.expectNewline("#include")
+ // Push tokenizer for file onto stack.
+ fd, err := os.Open(name)
+ if err != nil {
+ for _, dir := range in.includes {
+ fd, err = os.Open(filepath.Join(dir, name))
+ if err == nil {
+ break
+ }
+ }
+ if err != nil {
+ in.Error("#include:", err)
+ }
+ }
+ in.Push(NewTokenizer(name, fd, fd))
+}
+
+// #line processing.
+func (in *Input) line() {
+ // Only need to handle Plan 9 format: #line 337 "filename"
+ tok := in.Stack.Next()
+ if tok != scanner.Int {
+ in.expectText("expected line number after #line")
+ }
+ line, err := strconv.Atoi(in.Stack.Text())
+ if err != nil {
+ in.Error("error parsing #line (cannot happen):", err)
+ }
+ tok = in.Stack.Next()
+ if tok != scanner.String {
+ in.expectText("expected file name in #line")
+ }
+ file, err := strconv.Unquote(in.Stack.Text())
+ if err != nil {
+ in.Error("unquoting #line file name: ", err)
+ }
+ tok = in.Stack.Next()
+ if tok != '\n' {
+ in.Error("unexpected token at end of #line: ", tok)
+ }
+ pos := src.MakePos(in.Base(), uint(in.Line())+1, 1) // +1 because #line nnn means line nnn starts on next line
+ in.Stack.SetBase(src.NewLinePragmaBase(pos, file, objabi.AbsFile(objabi.WorkingDir(), file, *flags.TrimPath), uint(line), 1))
+}
+
+// #undef processing
+func (in *Input) undef() {
+ name := in.macroName()
+ if in.macros[name] == nil {
+ in.Error("#undef for undefined macro:", name)
+ }
+ // Newline must be next.
+ tok := in.Stack.Next()
+ if tok != '\n' {
+ in.Error("syntax error in #undef for macro:", name)
+ }
+ delete(in.macros, name)
+}
+
+func (in *Input) Push(r TokenReader) {
+ if len(in.tr) > 100 {
+ in.Error("input recursion")
+ }
+ in.Stack.Push(r)
+}
+
+func (in *Input) Close() {
+}
diff --git a/src/cmd/asm/internal/lex/lex.go b/src/cmd/asm/internal/lex/lex.go
new file mode 100644
index 0000000..7cd41a5
--- /dev/null
+++ b/src/cmd/asm/internal/lex/lex.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lex implements lexical analysis for the assembler.
+package lex
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "strings"
+ "text/scanner"
+
+ "cmd/internal/src"
+)
+
+// A ScanToken represents an input item. It is a simple wrapping of rune, as
+// returned by text/scanner.Scanner, plus a couple of extra values.
+type ScanToken rune
+
+const (
+ // Asm defines some two-character lexemes. We make up
+ // a rune/ScanToken value for them - ugly but simple.
+ LSH ScanToken = -1000 - iota // << Left shift.
+ RSH // >> Logical right shift.
+ ARR // -> Used on ARM for shift type 3, arithmetic right shift.
+ ROT // @> Used on ARM for shift type 4, rotate right.
+ Include // included file started here
+ BuildComment // //go:build or +build comment
+ macroName // name of macro that should not be expanded
+)
+
+// IsRegisterShift reports whether the token is one of the ARM register shift operators.
+func IsRegisterShift(r ScanToken) bool {
+ return ROT <= r && r <= LSH // Order looks backwards because these are negative.
+}
+
+func (t ScanToken) String() string {
+ switch t {
+ case scanner.EOF:
+ return "EOF"
+ case scanner.Ident:
+ return "identifier"
+ case scanner.Int:
+ return "integer constant"
+ case scanner.Float:
+ return "float constant"
+ case scanner.Char:
+ return "rune constant"
+ case scanner.String:
+ return "string constant"
+ case scanner.RawString:
+ return "raw string constant"
+ case scanner.Comment:
+ return "comment"
+ default:
+ return fmt.Sprintf("%q", rune(t))
+ }
+}
+
+// NewLexer returns a lexer for the named file and the given link context.
+func NewLexer(name string) TokenReader {
+ input := NewInput(name)
+ fd, err := os.Open(name)
+ if err != nil {
+ log.Fatalf("%s\n", err)
+ }
+ input.Push(NewTokenizer(name, fd, fd))
+ return input
+}
+
+// The other files in this directory each contain an implementation of TokenReader.
+
+// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
+// the text of the most recently returned token is, and where it was found.
+// The underlying scanner elides all spaces except newline, so the input looks like a stream of
+// Tokens; original spacing is lost but we don't need it.
+type TokenReader interface {
+ // Next returns the next token.
+ Next() ScanToken
+ // The following methods all refer to the most recent token returned by Next.
+ // Text returns the original string representation of the token.
+ Text() string
+ // File reports the source file name of the token.
+ File() string
+ // Base reports the position base of the token.
+ Base() *src.PosBase
+ // SetBase sets the position base.
+ SetBase(*src.PosBase)
+ // Line reports the source line number of the token.
+ Line() int
+ // Col reports the source column number of the token.
+ Col() int
+ // Close does any teardown required.
+ Close()
+}
+
+// A Token is a scan token plus its string value.
+// A macro is stored as a sequence of Tokens with spaces stripped.
+type Token struct {
+ ScanToken
+ text string
+}
+
+// Make returns a Token with the given rune (ScanToken) and text representation.
+func Make(token ScanToken, text string) Token {
+ // If the symbol starts with center dot, as in ·x, rewrite it as ""·x
+ if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
+ text = `""` + text
+ }
+ // Substitute the substitutes for . and /.
+ text = strings.Replace(text, "\u00B7", ".", -1)
+ text = strings.Replace(text, "\u2215", "/", -1)
+ return Token{ScanToken: token, text: text}
+}
+
+func (l Token) String() string {
+ return l.text
+}
+
+// A Macro represents the definition of a #defined macro.
+type Macro struct {
+ name string // The #define name.
+ args []string // Formal arguments.
+ tokens []Token // Body of macro.
+}
+
+// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
+func Tokenize(str string) []Token {
+ t := NewTokenizer("command line", strings.NewReader(str), nil)
+ var tokens []Token
+ for {
+ tok := t.Next()
+ if tok == scanner.EOF {
+ break
+ }
+ tokens = append(tokens, Make(tok, t.Text()))
+ }
+ return tokens
+}
diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go
new file mode 100644
index 0000000..51679d2
--- /dev/null
+++ b/src/cmd/asm/internal/lex/lex_test.go
@@ -0,0 +1,365 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+ "text/scanner"
+)
+
+type lexTest struct {
+ name string
+ input string
+ output string
+}
+
+var lexTests = []lexTest{
+ {
+ "empty",
+ "",
+ "",
+ },
+ {
+ "simple",
+ "1 (a)",
+ "1.(.a.)",
+ },
+ {
+ "simple define",
+ lines(
+ "#define A 1234",
+ "A",
+ ),
+ "1234.\n",
+ },
+ {
+ "define without value",
+ "#define A",
+ "",
+ },
+ {
+ "macro without arguments",
+ "#define A() 1234\n" + "A()\n",
+ "1234.\n",
+ },
+ {
+ "macro with just parens as body",
+ "#define A () \n" + "A\n",
+ "(.).\n",
+ },
+ {
+ "macro with parens but no arguments",
+ "#define A (x) \n" + "A\n",
+ "(.x.).\n",
+ },
+ {
+ "macro with arguments",
+ "#define A(x, y, z) x+z+y\n" + "A(1, 2, 3)\n",
+ "1.+.3.+.2.\n",
+ },
+ {
+ "argumented macro invoked without arguments",
+ lines(
+ "#define X() foo ",
+ "X()",
+ "X",
+ ),
+ "foo.\n.X.\n",
+ },
+ {
+ "multiline macro without arguments",
+ lines(
+ "#define A 1\\",
+ "\t2\\",
+ "\t3",
+ "before",
+ "A",
+ "after",
+ ),
+ "before.\n.1.\n.2.\n.3.\n.after.\n",
+ },
+ {
+ "multiline macro with arguments",
+ lines(
+ "#define A(a, b, c) a\\",
+ "\tb\\",
+ "\tc",
+ "before",
+ "A(1, 2, 3)",
+ "after",
+ ),
+ "before.\n.1.\n.2.\n.3.\n.after.\n",
+ },
+ {
+ "LOAD macro",
+ lines(
+ "#define LOAD(off, reg) \\",
+ "\tMOVBLZX (off*4)(R12), reg \\",
+ "\tADDB reg, DX",
+ "",
+ "LOAD(8, AX)",
+ ),
+ "\n.\n.MOVBLZX.(.8.*.4.).(.R12.).,.AX.\n.ADDB.AX.,.DX.\n",
+ },
+ {
+ "nested multiline macro",
+ lines(
+ "#define KEYROUND(xmm, load, off, r1, r2, index) \\",
+ "\tMOVBLZX (BP)(DX*4), R8 \\",
+ "\tload((off+1), r2) \\",
+ "\tMOVB R8, (off*4)(R12) \\",
+ "\tPINSRW $index, (BP)(R8*4), xmm",
+ "#define LOAD(off, reg) \\",
+ "\tMOVBLZX (off*4)(R12), reg \\",
+ "\tADDB reg, DX",
+ "KEYROUND(X0, LOAD, 8, AX, BX, 0)",
+ ),
+ "\n.MOVBLZX.(.BP.).(.DX.*.4.).,.R8.\n.\n.MOVBLZX.(.(.8.+.1.).*.4.).(.R12.).,.BX.\n.ADDB.BX.,.DX.\n.MOVB.R8.,.(.8.*.4.).(.R12.).\n.PINSRW.$.0.,.(.BP.).(.R8.*.4.).,.X0.\n",
+ },
+ {
+ "taken #ifdef",
+ lines(
+ "#define A",
+ "#ifdef A",
+ "#define B 1234",
+ "#endif",
+ "B",
+ ),
+ "1234.\n",
+ },
+ {
+ "not taken #ifdef",
+ lines(
+ "#ifdef A",
+ "#define B 1234",
+ "#endif",
+ "B",
+ ),
+ "B.\n",
+ },
+ {
+ "taken #ifdef with else",
+ lines(
+ "#define A",
+ "#ifdef A",
+ "#define B 1234",
+ "#else",
+ "#define B 5678",
+ "#endif",
+ "B",
+ ),
+ "1234.\n",
+ },
+ {
+ "not taken #ifdef with else",
+ lines(
+ "#ifdef A",
+ "#define B 1234",
+ "#else",
+ "#define B 5678",
+ "#endif",
+ "B",
+ ),
+ "5678.\n",
+ },
+ {
+ "nested taken/taken #ifdef",
+ lines(
+ "#define A",
+ "#define B",
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "1234.\n",
+ },
+ {
+ "nested taken/not-taken #ifdef",
+ lines(
+ "#define A",
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "5678.\n",
+ },
+ {
+ "nested not-taken/would-be-taken #ifdef",
+ lines(
+ "#define B",
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "C.\n",
+ },
+ {
+ "nested not-taken/not-taken #ifdef",
+ lines(
+ "#ifdef A",
+ "#ifdef B",
+ "#define C 1234",
+ "#else",
+ "#define C 5678",
+ "#endif",
+ "#endif",
+ "C",
+ ),
+ "C.\n",
+ },
+ {
+ "nested #define",
+ lines(
+ "#define A #define B THIS",
+ "A",
+ "B",
+ ),
+ "THIS.\n",
+ },
+ {
+ "nested #define with args",
+ lines(
+ "#define A #define B(x) x",
+ "A",
+ "B(THIS)",
+ ),
+ "THIS.\n",
+ },
+ /* This one fails. See comment in Slice.Col.
+ {
+ "nested #define with args",
+ lines(
+ "#define A #define B (x) x",
+ "A",
+ "B(THIS)",
+ ),
+ "x.\n",
+ },
+ */
+}
+
+func TestLex(t *testing.T) {
+ for _, test := range lexTests {
+ input := NewInput(test.name)
+ input.Push(NewTokenizer(test.name, strings.NewReader(test.input), nil))
+ result := drain(input)
+ if result != test.output {
+ t.Errorf("%s: got %q expected %q", test.name, result, test.output)
+ }
+ }
+}
+
+// lines joins the arguments together as complete lines.
+func lines(a ...string) string {
+ return strings.Join(a, "\n") + "\n"
+}
+
+// drain returns a single string representing the processed input tokens.
+func drain(input *Input) string {
+ var buf bytes.Buffer
+ for {
+ tok := input.Next()
+ if tok == scanner.EOF {
+ return buf.String()
+ }
+ if tok == '#' {
+ continue
+ }
+ if buf.Len() > 0 {
+ buf.WriteByte('.')
+ }
+ buf.WriteString(input.Text())
+ }
+}
+
+type badLexTest struct {
+ input string
+ error string
+}
+
+var badLexTests = []badLexTest{
+ {
+ "3 #define foo bar\n",
+ "'#' must be first item on line",
+ },
+ {
+ "#ifdef foo\nhello",
+ "unclosed #ifdef or #ifndef",
+ },
+ {
+ "#ifndef foo\nhello",
+ "unclosed #ifdef or #ifndef",
+ },
+ {
+ "#ifdef foo\nhello\n#else\nbye",
+ "unclosed #ifdef or #ifndef",
+ },
+ {
+ "#define A() A()\nA()",
+ "recursive macro invocation",
+ },
+ {
+ "#define A a\n#define A a\n",
+ "redefinition of macro",
+ },
+ {
+ "#define A a",
+ "no newline after macro definition",
+ },
+}
+
+func TestBadLex(t *testing.T) {
+ for _, test := range badLexTests {
+ input := NewInput(test.error)
+ input.Push(NewTokenizer(test.error, strings.NewReader(test.input), nil))
+ err := firstError(input)
+ if err == nil {
+ t.Errorf("%s: got no error", test.error)
+ continue
+ }
+ if !strings.Contains(err.Error(), test.error) {
+ t.Errorf("got error %q expected %q", err.Error(), test.error)
+ }
+ }
+}
+
+// firstError returns the first error value triggered by the input.
+func firstError(input *Input) (err error) {
+ panicOnError = true
+ defer func() {
+ panicOnError = false
+ switch e := recover(); e := e.(type) {
+ case nil:
+ case error:
+ err = e
+ default:
+ panic(e)
+ }
+ }()
+
+ for {
+ tok := input.Next()
+ if tok == scanner.EOF {
+ return
+ }
+ }
+}
diff --git a/src/cmd/asm/internal/lex/slice.go b/src/cmd/asm/internal/lex/slice.go
new file mode 100644
index 0000000..8ee0c70
--- /dev/null
+++ b/src/cmd/asm/internal/lex/slice.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "text/scanner"
+
+ "cmd/internal/src"
+)
+
+// A Slice reads from a slice of Tokens.
+type Slice struct {
+ tokens []Token
+ base *src.PosBase
+ line int
+ pos int
+}
+
+func NewSlice(base *src.PosBase, line int, tokens []Token) *Slice {
+ return &Slice{
+ tokens: tokens,
+ base: base,
+ line: line,
+ pos: -1, // Next will advance to zero.
+ }
+}
+
+func (s *Slice) Next() ScanToken {
+ s.pos++
+ if s.pos >= len(s.tokens) {
+ return scanner.EOF
+ }
+ return s.tokens[s.pos].ScanToken
+}
+
+func (s *Slice) Text() string {
+ return s.tokens[s.pos].text
+}
+
+func (s *Slice) File() string {
+ return s.base.Filename()
+}
+
+func (s *Slice) Base() *src.PosBase {
+ return s.base
+}
+
+func (s *Slice) SetBase(base *src.PosBase) {
+ // Cannot happen because we only have slices of already-scanned text,
+ // but be prepared.
+ s.base = base
+}
+
+func (s *Slice) Line() int {
+ return s.line
+}
+
+func (s *Slice) Col() int {
+ // TODO: Col is only called when defining a macro and all it cares about is increasing
+ // position to discover whether there is a blank before the parenthesis.
+ // We only get here if defining a macro inside a macro.
+ // This imperfect implementation means we cannot tell the difference between
+ // #define A #define B(x) x
+ // and
+ // #define A #define B (x) x
+ // The first has definition of B has an argument, the second doesn't. Because we let
+ // text/scanner strip the blanks for us, this is extremely rare, hard to fix, and not worth it.
+ return s.pos
+}
+
+func (s *Slice) Close() {
+}
diff --git a/src/cmd/asm/internal/lex/stack.go b/src/cmd/asm/internal/lex/stack.go
new file mode 100644
index 0000000..929e528
--- /dev/null
+++ b/src/cmd/asm/internal/lex/stack.go
@@ -0,0 +1,61 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "text/scanner"
+
+ "cmd/internal/src"
+)
+
+// A Stack is a stack of TokenReaders. As the top TokenReader hits EOF,
+// it resumes reading the next one down.
+type Stack struct {
+ tr []TokenReader
+}
+
+// Push adds tr to the top (end) of the input stack. (Popping happens automatically.)
+func (s *Stack) Push(tr TokenReader) {
+ s.tr = append(s.tr, tr)
+}
+
+func (s *Stack) Next() ScanToken {
+ tos := s.tr[len(s.tr)-1]
+ tok := tos.Next()
+ for tok == scanner.EOF && len(s.tr) > 1 {
+ tos.Close()
+ // Pop the topmost item from the stack and resume with the next one down.
+ s.tr = s.tr[:len(s.tr)-1]
+ tok = s.Next()
+ }
+ return tok
+}
+
+func (s *Stack) Text() string {
+ return s.tr[len(s.tr)-1].Text()
+}
+
+func (s *Stack) File() string {
+ return s.Base().Filename()
+}
+
+func (s *Stack) Base() *src.PosBase {
+ return s.tr[len(s.tr)-1].Base()
+}
+
+func (s *Stack) SetBase(base *src.PosBase) {
+ s.tr[len(s.tr)-1].SetBase(base)
+}
+
+func (s *Stack) Line() int {
+ return s.tr[len(s.tr)-1].Line()
+}
+
+func (s *Stack) Col() int {
+ return s.tr[len(s.tr)-1].Col()
+}
+
+func (s *Stack) Close() { // Unused.
+}
diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go
new file mode 100644
index 0000000..861a2d4
--- /dev/null
+++ b/src/cmd/asm/internal/lex/tokenizer.go
@@ -0,0 +1,153 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+ "io"
+ "os"
+ "strings"
+ "text/scanner"
+ "unicode"
+
+ "cmd/asm/internal/flags"
+ "cmd/internal/objabi"
+ "cmd/internal/src"
+)
+
+// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
+// for our purposes and made a TokenReader. It forms the lowest level,
+// turning text from readers into tokens.
+type Tokenizer struct {
+ tok ScanToken
+ s *scanner.Scanner
+ base *src.PosBase
+ line int
+ file *os.File // If non-nil, file descriptor to close.
+}
+
+func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
+ var s scanner.Scanner
+ s.Init(r)
+ // Newline is like a semicolon; other space characters are fine.
+ s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
+ // Don't skip comments: we need to count newlines.
+ s.Mode = scanner.ScanChars |
+ scanner.ScanFloats |
+ scanner.ScanIdents |
+ scanner.ScanInts |
+ scanner.ScanStrings |
+ scanner.ScanComments
+ s.Position.Filename = name
+ s.IsIdentRune = isIdentRune
+ return &Tokenizer{
+ s: &s,
+ base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
+ line: 1,
+ file: file,
+ }
+}
+
+// We want center dot (·) and division slash (∕) to work as identifier characters.
+func isIdentRune(ch rune, i int) bool {
+ if unicode.IsLetter(ch) {
+ return true
+ }
+ switch ch {
+ case '_': // Underscore; traditional.
+ return true
+ case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
+ return true
+ case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
+ return true
+ }
+ // Digits are OK only after the first character.
+ return i > 0 && unicode.IsDigit(ch)
+}
+
+func (t *Tokenizer) Text() string {
+ switch t.tok {
+ case LSH:
+ return "<<"
+ case RSH:
+ return ">>"
+ case ARR:
+ return "->"
+ case ROT:
+ return "@>"
+ }
+ return t.s.TokenText()
+}
+
+func (t *Tokenizer) File() string {
+ return t.base.Filename()
+}
+
+func (t *Tokenizer) Base() *src.PosBase {
+ return t.base
+}
+
+func (t *Tokenizer) SetBase(base *src.PosBase) {
+ t.base = base
+}
+
+func (t *Tokenizer) Line() int {
+ return t.line
+}
+
+func (t *Tokenizer) Col() int {
+ return t.s.Pos().Column
+}
+
+func (t *Tokenizer) Next() ScanToken {
+ s := t.s
+ for {
+ t.tok = ScanToken(s.Scan())
+ if t.tok != scanner.Comment {
+ break
+ }
+ text := s.TokenText()
+ t.line += strings.Count(text, "\n")
+ // TODO: Use constraint.IsGoBuild once it exists.
+ if strings.HasPrefix(text, "//go:build") {
+ t.tok = BuildComment
+ break
+ }
+ }
+ switch t.tok {
+ case '\n':
+ t.line++
+ case '-':
+ if s.Peek() == '>' {
+ s.Next()
+ t.tok = ARR
+ return ARR
+ }
+ case '@':
+ if s.Peek() == '>' {
+ s.Next()
+ t.tok = ROT
+ return ROT
+ }
+ case '<':
+ if s.Peek() == '<' {
+ s.Next()
+ t.tok = LSH
+ return LSH
+ }
+ case '>':
+ if s.Peek() == '>' {
+ s.Next()
+ t.tok = RSH
+ return RSH
+ }
+ }
+ return t.tok
+}
+
+func (t *Tokenizer) Close() {
+ if t.file != nil {
+ t.file.Close()
+ }
+}