diff options
Diffstat (limited to 'src/cmd/asm/internal/lex')
-rw-r--r-- | src/cmd/asm/internal/lex/input.go | 486 | ||||
-rw-r--r-- | src/cmd/asm/internal/lex/lex.go | 141 | ||||
-rw-r--r-- | src/cmd/asm/internal/lex/lex_test.go | 365 | ||||
-rw-r--r-- | src/cmd/asm/internal/lex/slice.go | 74 | ||||
-rw-r--r-- | src/cmd/asm/internal/lex/stack.go | 61 | ||||
-rw-r--r-- | src/cmd/asm/internal/lex/tokenizer.go | 153 |
6 files changed, 1280 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go new file mode 100644 index 0000000..da4ebe6 --- /dev/null +++ b/src/cmd/asm/internal/lex/input.go @@ -0,0 +1,486 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "text/scanner" + + "cmd/asm/internal/flags" + "cmd/internal/objabi" + "cmd/internal/src" +) + +// Input is the main input: a stack of readers and some macro definitions. +// It also handles #include processing (by pushing onto the input stack) +// and parses and instantiates macro definitions. +type Input struct { + Stack + includes []string + beginningOfLine bool + ifdefStack []bool + macros map[string]*Macro + text string // Text of last token returned by Next. + peek bool + peekToken ScanToken + peekText string +} + +// NewInput returns an Input from the given path. +func NewInput(name string) *Input { + return &Input{ + // include directories: look in source dir, then -I directories. + includes: append([]string{filepath.Dir(name)}, flags.I...), + beginningOfLine: true, + macros: predefine(flags.D), + } +} + +// predefine installs the macros set by the -D flag on the command line. +func predefine(defines flags.MultiFlag) map[string]*Macro { + macros := make(map[string]*Macro) + for _, name := range defines { + value := "1" + i := strings.IndexRune(name, '=') + if i > 0 { + name, value = name[:i], name[i+1:] + } + tokens := Tokenize(name) + if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident { + fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0]) + flags.Usage() + } + macros[name] = &Macro{ + name: name, + args: nil, + tokens: Tokenize(value), + } + } + return macros +} + +var panicOnError bool // For testing. + +func (in *Input) Error(args ...interface{}) { + if panicOnError { + panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))) + } + fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)) + os.Exit(1) +} + +// expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token. +func (in *Input) expectText(args ...interface{}) { + in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...) +} + +// enabled reports whether the input is enabled by an ifdef, or is at the top level. +func (in *Input) enabled() bool { + return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1] +} + +func (in *Input) expectNewline(directive string) { + tok := in.Stack.Next() + if tok != '\n' { + in.expectText("expected newline after", directive) + } +} + +func (in *Input) Next() ScanToken { + if in.peek { + in.peek = false + tok := in.peekToken + in.text = in.peekText + return tok + } + // If we cannot generate a token after 100 macro invocations, we're in trouble. + // The usual case is caught by Push, below, but be safe. + for nesting := 0; nesting < 100; { + tok := in.Stack.Next() + switch tok { + case '#': + if !in.beginningOfLine { + in.Error("'#' must be first item on line") + } + in.beginningOfLine = in.hash() + in.text = "#" + return '#' + + case scanner.Ident: + // Is it a macro name? + name := in.Stack.Text() + macro := in.macros[name] + if macro != nil { + nesting++ + in.invokeMacro(macro) + continue + } + fallthrough + default: + if tok == scanner.EOF && len(in.ifdefStack) > 0 { + // We're skipping text but have run out of input with no #endif. + in.Error("unclosed #ifdef or #ifndef") + } + in.beginningOfLine = tok == '\n' + if in.enabled() { + in.text = in.Stack.Text() + return tok + } + } + } + in.Error("recursive macro invocation") + return 0 +} + +func (in *Input) Text() string { + return in.text +} + +// hash processes a # preprocessor directive. It reports whether it completes. +func (in *Input) hash() bool { + // We have a '#'; it must be followed by a known word (define, include, etc.). + tok := in.Stack.Next() + if tok != scanner.Ident { + in.expectText("expected identifier after '#'") + } + if !in.enabled() { + // Can only start including again if we are at #else or #endif but also + // need to keep track of nested #if[n]defs. + // We let #line through because it might affect errors. + switch in.Stack.Text() { + case "else", "endif", "ifdef", "ifndef", "line": + // Press on. + default: + return false + } + } + switch in.Stack.Text() { + case "define": + in.define() + case "else": + in.else_() + case "endif": + in.endif() + case "ifdef": + in.ifdef(true) + case "ifndef": + in.ifdef(false) + case "include": + in.include() + case "line": + in.line() + case "undef": + in.undef() + default: + in.Error("unexpected token after '#':", in.Stack.Text()) + } + return true +} + +// macroName returns the name for the macro being referenced. +func (in *Input) macroName() string { + // We use the Stack's input method; no macro processing at this stage. + tok := in.Stack.Next() + if tok != scanner.Ident { + in.expectText("expected identifier after # directive") + } + // Name is alphanumeric by definition. + return in.Stack.Text() +} + +// #define processing. +func (in *Input) define() { + name := in.macroName() + args, tokens := in.macroDefinition(name) + in.defineMacro(name, args, tokens) +} + +// defineMacro stores the macro definition in the Input. +func (in *Input) defineMacro(name string, args []string, tokens []Token) { + if in.macros[name] != nil { + in.Error("redefinition of macro:", name) + } + in.macros[name] = &Macro{ + name: name, + args: args, + tokens: tokens, + } +} + +// macroDefinition returns the list of formals and the tokens of the definition. +// The argument list is nil for no parens on the definition; otherwise a list of +// formal argument names. +func (in *Input) macroDefinition(name string) ([]string, []Token) { + prevCol := in.Stack.Col() + tok := in.Stack.Next() + if tok == '\n' || tok == scanner.EOF { + return nil, nil // No definition for macro + } + var args []string + // The C preprocessor treats + // #define A(x) + // and + // #define A (x) + // distinctly: the first is a macro with arguments, the second without. + // Distinguish these cases using the column number, since we don't + // see the space itself. Note that text/scanner reports the position at the + // end of the token. It's where you are now, and you just read this token. + if tok == '(' && in.Stack.Col() == prevCol+1 { + // Macro has arguments. Scan list of formals. + acceptArg := true + args = []string{} // Zero length but not nil. + Loop: + for { + tok = in.Stack.Next() + switch tok { + case ')': + tok = in.Stack.Next() // First token of macro definition. + break Loop + case ',': + if acceptArg { + in.Error("bad syntax in definition for macro:", name) + } + acceptArg = true + case scanner.Ident: + if !acceptArg { + in.Error("bad syntax in definition for macro:", name) + } + arg := in.Stack.Text() + if i := lookup(args, arg); i >= 0 { + in.Error("duplicate argument", arg, "in definition for macro:", name) + } + args = append(args, arg) + acceptArg = false + default: + in.Error("bad definition for macro:", name) + } + } + } + var tokens []Token + // Scan to newline. Backslashes escape newlines. + for tok != '\n' { + if tok == scanner.EOF { + in.Error("missing newline in definition for macro:", name) + } + if tok == '\\' { + tok = in.Stack.Next() + if tok != '\n' && tok != '\\' { + in.Error(`can only escape \ or \n in definition for macro:`, name) + } + } + tokens = append(tokens, Make(tok, in.Stack.Text())) + tok = in.Stack.Next() + } + return args, tokens +} + +func lookup(args []string, arg string) int { + for i, a := range args { + if a == arg { + return i + } + } + return -1 +} + +// invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual +// parameters substituted for the formals. +// Invoking a macro does not touch the PC/line history. +func (in *Input) invokeMacro(macro *Macro) { + // If the macro has no arguments, just substitute the text. + if macro.args == nil { + in.Push(NewSlice(in.Base(), in.Line(), macro.tokens)) + return + } + tok := in.Stack.Next() + if tok != '(' { + // If the macro has arguments but is invoked without them, all we push is the macro name. + // First, put back the token. + in.peekToken = tok + in.peekText = in.text + in.peek = true + in.Push(NewSlice(in.Base(), in.Line(), []Token{Make(macroName, macro.name)})) + return + } + actuals := in.argsFor(macro) + var tokens []Token + for _, tok := range macro.tokens { + if tok.ScanToken != scanner.Ident { + tokens = append(tokens, tok) + continue + } + substitution := actuals[tok.text] + if substitution == nil { + tokens = append(tokens, tok) + continue + } + tokens = append(tokens, substitution...) + } + in.Push(NewSlice(in.Base(), in.Line(), tokens)) +} + +// argsFor returns a map from formal name to actual value for this argumented macro invocation. +// The opening parenthesis has been absorbed. +func (in *Input) argsFor(macro *Macro) map[string][]Token { + var args [][]Token + // One macro argument per iteration. Collect them all and check counts afterwards. + for argNum := 0; ; argNum++ { + tokens, tok := in.collectArgument(macro) + args = append(args, tokens) + if tok == ')' { + break + } + } + // Zero-argument macros are tricky. + if len(macro.args) == 0 && len(args) == 1 && args[0] == nil { + args = nil + } else if len(args) != len(macro.args) { + in.Error("wrong arg count for macro", macro.name) + } + argMap := make(map[string][]Token) + for i, arg := range args { + argMap[macro.args[i]] = arg + } + return argMap +} + +// collectArgument returns the actual tokens for a single argument of a macro. +// It also returns the token that terminated the argument, which will always +// be either ',' or ')'. The starting '(' has been scanned. +func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) { + nesting := 0 + var tokens []Token + for { + tok := in.Stack.Next() + if tok == scanner.EOF || tok == '\n' { + in.Error("unterminated arg list invoking macro:", macro.name) + } + if nesting == 0 && (tok == ')' || tok == ',') { + return tokens, tok + } + if tok == '(' { + nesting++ + } + if tok == ')' { + nesting-- + } + tokens = append(tokens, Make(tok, in.Stack.Text())) + } +} + +// #ifdef and #ifndef processing. +func (in *Input) ifdef(truth bool) { + name := in.macroName() + in.expectNewline("#if[n]def") + if !in.enabled() { + truth = false + } else if _, defined := in.macros[name]; !defined { + truth = !truth + } + in.ifdefStack = append(in.ifdefStack, truth) +} + +// #else processing +func (in *Input) else_() { + in.expectNewline("#else") + if len(in.ifdefStack) == 0 { + in.Error("unmatched #else") + } + if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] { + in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1] + } +} + +// #endif processing. +func (in *Input) endif() { + in.expectNewline("#endif") + if len(in.ifdefStack) == 0 { + in.Error("unmatched #endif") + } + in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1] +} + +// #include processing. +func (in *Input) include() { + // Find and parse string. + tok := in.Stack.Next() + if tok != scanner.String { + in.expectText("expected string after #include") + } + name, err := strconv.Unquote(in.Stack.Text()) + if err != nil { + in.Error("unquoting include file name: ", err) + } + in.expectNewline("#include") + // Push tokenizer for file onto stack. + fd, err := os.Open(name) + if err != nil { + for _, dir := range in.includes { + fd, err = os.Open(filepath.Join(dir, name)) + if err == nil { + break + } + } + if err != nil { + in.Error("#include:", err) + } + } + in.Push(NewTokenizer(name, fd, fd)) +} + +// #line processing. +func (in *Input) line() { + // Only need to handle Plan 9 format: #line 337 "filename" + tok := in.Stack.Next() + if tok != scanner.Int { + in.expectText("expected line number after #line") + } + line, err := strconv.Atoi(in.Stack.Text()) + if err != nil { + in.Error("error parsing #line (cannot happen):", err) + } + tok = in.Stack.Next() + if tok != scanner.String { + in.expectText("expected file name in #line") + } + file, err := strconv.Unquote(in.Stack.Text()) + if err != nil { + in.Error("unquoting #line file name: ", err) + } + tok = in.Stack.Next() + if tok != '\n' { + in.Error("unexpected token at end of #line: ", tok) + } + pos := src.MakePos(in.Base(), uint(in.Line())+1, 1) // +1 because #line nnn means line nnn starts on next line + in.Stack.SetBase(src.NewLinePragmaBase(pos, file, objabi.AbsFile(objabi.WorkingDir(), file, *flags.TrimPath), uint(line), 1)) +} + +// #undef processing +func (in *Input) undef() { + name := in.macroName() + if in.macros[name] == nil { + in.Error("#undef for undefined macro:", name) + } + // Newline must be next. + tok := in.Stack.Next() + if tok != '\n' { + in.Error("syntax error in #undef for macro:", name) + } + delete(in.macros, name) +} + +func (in *Input) Push(r TokenReader) { + if len(in.tr) > 100 { + in.Error("input recursion") + } + in.Stack.Push(r) +} + +func (in *Input) Close() { +} diff --git a/src/cmd/asm/internal/lex/lex.go b/src/cmd/asm/internal/lex/lex.go new file mode 100644 index 0000000..7cd41a5 --- /dev/null +++ b/src/cmd/asm/internal/lex/lex.go @@ -0,0 +1,141 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lex implements lexical analysis for the assembler. +package lex + +import ( + "fmt" + "log" + "os" + "strings" + "text/scanner" + + "cmd/internal/src" +) + +// A ScanToken represents an input item. It is a simple wrapping of rune, as +// returned by text/scanner.Scanner, plus a couple of extra values. +type ScanToken rune + +const ( + // Asm defines some two-character lexemes. We make up + // a rune/ScanToken value for them - ugly but simple. + LSH ScanToken = -1000 - iota // << Left shift. + RSH // >> Logical right shift. + ARR // -> Used on ARM for shift type 3, arithmetic right shift. + ROT // @> Used on ARM for shift type 4, rotate right. + Include // included file started here + BuildComment // //go:build or +build comment + macroName // name of macro that should not be expanded +) + +// IsRegisterShift reports whether the token is one of the ARM register shift operators. +func IsRegisterShift(r ScanToken) bool { + return ROT <= r && r <= LSH // Order looks backwards because these are negative. +} + +func (t ScanToken) String() string { + switch t { + case scanner.EOF: + return "EOF" + case scanner.Ident: + return "identifier" + case scanner.Int: + return "integer constant" + case scanner.Float: + return "float constant" + case scanner.Char: + return "rune constant" + case scanner.String: + return "string constant" + case scanner.RawString: + return "raw string constant" + case scanner.Comment: + return "comment" + default: + return fmt.Sprintf("%q", rune(t)) + } +} + +// NewLexer returns a lexer for the named file and the given link context. +func NewLexer(name string) TokenReader { + input := NewInput(name) + fd, err := os.Open(name) + if err != nil { + log.Fatalf("%s\n", err) + } + input.Push(NewTokenizer(name, fd, fd)) + return input +} + +// The other files in this directory each contain an implementation of TokenReader. + +// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what +// the text of the most recently returned token is, and where it was found. +// The underlying scanner elides all spaces except newline, so the input looks like a stream of +// Tokens; original spacing is lost but we don't need it. +type TokenReader interface { + // Next returns the next token. + Next() ScanToken + // The following methods all refer to the most recent token returned by Next. + // Text returns the original string representation of the token. + Text() string + // File reports the source file name of the token. + File() string + // Base reports the position base of the token. + Base() *src.PosBase + // SetBase sets the position base. + SetBase(*src.PosBase) + // Line reports the source line number of the token. + Line() int + // Col reports the source column number of the token. + Col() int + // Close does any teardown required. + Close() +} + +// A Token is a scan token plus its string value. +// A macro is stored as a sequence of Tokens with spaces stripped. +type Token struct { + ScanToken + text string +} + +// Make returns a Token with the given rune (ScanToken) and text representation. +func Make(token ScanToken, text string) Token { + // If the symbol starts with center dot, as in ·x, rewrite it as ""·x + if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") { + text = `""` + text + } + // Substitute the substitutes for . and /. + text = strings.Replace(text, "\u00B7", ".", -1) + text = strings.Replace(text, "\u2215", "/", -1) + return Token{ScanToken: token, text: text} +} + +func (l Token) String() string { + return l.text +} + +// A Macro represents the definition of a #defined macro. +type Macro struct { + name string // The #define name. + args []string // Formal arguments. + tokens []Token // Body of macro. +} + +// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests. +func Tokenize(str string) []Token { + t := NewTokenizer("command line", strings.NewReader(str), nil) + var tokens []Token + for { + tok := t.Next() + if tok == scanner.EOF { + break + } + tokens = append(tokens, Make(tok, t.Text())) + } + return tokens +} diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go new file mode 100644 index 0000000..51679d2 --- /dev/null +++ b/src/cmd/asm/internal/lex/lex_test.go @@ -0,0 +1,365 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "bytes" + "strings" + "testing" + "text/scanner" +) + +type lexTest struct { + name string + input string + output string +} + +var lexTests = []lexTest{ + { + "empty", + "", + "", + }, + { + "simple", + "1 (a)", + "1.(.a.)", + }, + { + "simple define", + lines( + "#define A 1234", + "A", + ), + "1234.\n", + }, + { + "define without value", + "#define A", + "", + }, + { + "macro without arguments", + "#define A() 1234\n" + "A()\n", + "1234.\n", + }, + { + "macro with just parens as body", + "#define A () \n" + "A\n", + "(.).\n", + }, + { + "macro with parens but no arguments", + "#define A (x) \n" + "A\n", + "(.x.).\n", + }, + { + "macro with arguments", + "#define A(x, y, z) x+z+y\n" + "A(1, 2, 3)\n", + "1.+.3.+.2.\n", + }, + { + "argumented macro invoked without arguments", + lines( + "#define X() foo ", + "X()", + "X", + ), + "foo.\n.X.\n", + }, + { + "multiline macro without arguments", + lines( + "#define A 1\\", + "\t2\\", + "\t3", + "before", + "A", + "after", + ), + "before.\n.1.\n.2.\n.3.\n.after.\n", + }, + { + "multiline macro with arguments", + lines( + "#define A(a, b, c) a\\", + "\tb\\", + "\tc", + "before", + "A(1, 2, 3)", + "after", + ), + "before.\n.1.\n.2.\n.3.\n.after.\n", + }, + { + "LOAD macro", + lines( + "#define LOAD(off, reg) \\", + "\tMOVBLZX (off*4)(R12), reg \\", + "\tADDB reg, DX", + "", + "LOAD(8, AX)", + ), + "\n.\n.MOVBLZX.(.8.*.4.).(.R12.).,.AX.\n.ADDB.AX.,.DX.\n", + }, + { + "nested multiline macro", + lines( + "#define KEYROUND(xmm, load, off, r1, r2, index) \\", + "\tMOVBLZX (BP)(DX*4), R8 \\", + "\tload((off+1), r2) \\", + "\tMOVB R8, (off*4)(R12) \\", + "\tPINSRW $index, (BP)(R8*4), xmm", + "#define LOAD(off, reg) \\", + "\tMOVBLZX (off*4)(R12), reg \\", + "\tADDB reg, DX", + "KEYROUND(X0, LOAD, 8, AX, BX, 0)", + ), + "\n.MOVBLZX.(.BP.).(.DX.*.4.).,.R8.\n.\n.MOVBLZX.(.(.8.+.1.).*.4.).(.R12.).,.BX.\n.ADDB.BX.,.DX.\n.MOVB.R8.,.(.8.*.4.).(.R12.).\n.PINSRW.$.0.,.(.BP.).(.R8.*.4.).,.X0.\n", + }, + { + "taken #ifdef", + lines( + "#define A", + "#ifdef A", + "#define B 1234", + "#endif", + "B", + ), + "1234.\n", + }, + { + "not taken #ifdef", + lines( + "#ifdef A", + "#define B 1234", + "#endif", + "B", + ), + "B.\n", + }, + { + "taken #ifdef with else", + lines( + "#define A", + "#ifdef A", + "#define B 1234", + "#else", + "#define B 5678", + "#endif", + "B", + ), + "1234.\n", + }, + { + "not taken #ifdef with else", + lines( + "#ifdef A", + "#define B 1234", + "#else", + "#define B 5678", + "#endif", + "B", + ), + "5678.\n", + }, + { + "nested taken/taken #ifdef", + lines( + "#define A", + "#define B", + "#ifdef A", + "#ifdef B", + "#define C 1234", + "#else", + "#define C 5678", + "#endif", + "#endif", + "C", + ), + "1234.\n", + }, + { + "nested taken/not-taken #ifdef", + lines( + "#define A", + "#ifdef A", + "#ifdef B", + "#define C 1234", + "#else", + "#define C 5678", + "#endif", + "#endif", + "C", + ), + "5678.\n", + }, + { + "nested not-taken/would-be-taken #ifdef", + lines( + "#define B", + "#ifdef A", + "#ifdef B", + "#define C 1234", + "#else", + "#define C 5678", + "#endif", + "#endif", + "C", + ), + "C.\n", + }, + { + "nested not-taken/not-taken #ifdef", + lines( + "#ifdef A", + "#ifdef B", + "#define C 1234", + "#else", + "#define C 5678", + "#endif", + "#endif", + "C", + ), + "C.\n", + }, + { + "nested #define", + lines( + "#define A #define B THIS", + "A", + "B", + ), + "THIS.\n", + }, + { + "nested #define with args", + lines( + "#define A #define B(x) x", + "A", + "B(THIS)", + ), + "THIS.\n", + }, + /* This one fails. See comment in Slice.Col. + { + "nested #define with args", + lines( + "#define A #define B (x) x", + "A", + "B(THIS)", + ), + "x.\n", + }, + */ +} + +func TestLex(t *testing.T) { + for _, test := range lexTests { + input := NewInput(test.name) + input.Push(NewTokenizer(test.name, strings.NewReader(test.input), nil)) + result := drain(input) + if result != test.output { + t.Errorf("%s: got %q expected %q", test.name, result, test.output) + } + } +} + +// lines joins the arguments together as complete lines. +func lines(a ...string) string { + return strings.Join(a, "\n") + "\n" +} + +// drain returns a single string representing the processed input tokens. +func drain(input *Input) string { + var buf bytes.Buffer + for { + tok := input.Next() + if tok == scanner.EOF { + return buf.String() + } + if tok == '#' { + continue + } + if buf.Len() > 0 { + buf.WriteByte('.') + } + buf.WriteString(input.Text()) + } +} + +type badLexTest struct { + input string + error string +} + +var badLexTests = []badLexTest{ + { + "3 #define foo bar\n", + "'#' must be first item on line", + }, + { + "#ifdef foo\nhello", + "unclosed #ifdef or #ifndef", + }, + { + "#ifndef foo\nhello", + "unclosed #ifdef or #ifndef", + }, + { + "#ifdef foo\nhello\n#else\nbye", + "unclosed #ifdef or #ifndef", + }, + { + "#define A() A()\nA()", + "recursive macro invocation", + }, + { + "#define A a\n#define A a\n", + "redefinition of macro", + }, + { + "#define A a", + "no newline after macro definition", + }, +} + +func TestBadLex(t *testing.T) { + for _, test := range badLexTests { + input := NewInput(test.error) + input.Push(NewTokenizer(test.error, strings.NewReader(test.input), nil)) + err := firstError(input) + if err == nil { + t.Errorf("%s: got no error", test.error) + continue + } + if !strings.Contains(err.Error(), test.error) { + t.Errorf("got error %q expected %q", err.Error(), test.error) + } + } +} + +// firstError returns the first error value triggered by the input. +func firstError(input *Input) (err error) { + panicOnError = true + defer func() { + panicOnError = false + switch e := recover(); e := e.(type) { + case nil: + case error: + err = e + default: + panic(e) + } + }() + + for { + tok := input.Next() + if tok == scanner.EOF { + return + } + } +} diff --git a/src/cmd/asm/internal/lex/slice.go b/src/cmd/asm/internal/lex/slice.go new file mode 100644 index 0000000..8ee0c70 --- /dev/null +++ b/src/cmd/asm/internal/lex/slice.go @@ -0,0 +1,74 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "text/scanner" + + "cmd/internal/src" +) + +// A Slice reads from a slice of Tokens. +type Slice struct { + tokens []Token + base *src.PosBase + line int + pos int +} + +func NewSlice(base *src.PosBase, line int, tokens []Token) *Slice { + return &Slice{ + tokens: tokens, + base: base, + line: line, + pos: -1, // Next will advance to zero. + } +} + +func (s *Slice) Next() ScanToken { + s.pos++ + if s.pos >= len(s.tokens) { + return scanner.EOF + } + return s.tokens[s.pos].ScanToken +} + +func (s *Slice) Text() string { + return s.tokens[s.pos].text +} + +func (s *Slice) File() string { + return s.base.Filename() +} + +func (s *Slice) Base() *src.PosBase { + return s.base +} + +func (s *Slice) SetBase(base *src.PosBase) { + // Cannot happen because we only have slices of already-scanned text, + // but be prepared. + s.base = base +} + +func (s *Slice) Line() int { + return s.line +} + +func (s *Slice) Col() int { + // TODO: Col is only called when defining a macro and all it cares about is increasing + // position to discover whether there is a blank before the parenthesis. + // We only get here if defining a macro inside a macro. + // This imperfect implementation means we cannot tell the difference between + // #define A #define B(x) x + // and + // #define A #define B (x) x + // The first has definition of B has an argument, the second doesn't. Because we let + // text/scanner strip the blanks for us, this is extremely rare, hard to fix, and not worth it. + return s.pos +} + +func (s *Slice) Close() { +} diff --git a/src/cmd/asm/internal/lex/stack.go b/src/cmd/asm/internal/lex/stack.go new file mode 100644 index 0000000..929e528 --- /dev/null +++ b/src/cmd/asm/internal/lex/stack.go @@ -0,0 +1,61 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "text/scanner" + + "cmd/internal/src" +) + +// A Stack is a stack of TokenReaders. As the top TokenReader hits EOF, +// it resumes reading the next one down. +type Stack struct { + tr []TokenReader +} + +// Push adds tr to the top (end) of the input stack. (Popping happens automatically.) +func (s *Stack) Push(tr TokenReader) { + s.tr = append(s.tr, tr) +} + +func (s *Stack) Next() ScanToken { + tos := s.tr[len(s.tr)-1] + tok := tos.Next() + for tok == scanner.EOF && len(s.tr) > 1 { + tos.Close() + // Pop the topmost item from the stack and resume with the next one down. + s.tr = s.tr[:len(s.tr)-1] + tok = s.Next() + } + return tok +} + +func (s *Stack) Text() string { + return s.tr[len(s.tr)-1].Text() +} + +func (s *Stack) File() string { + return s.Base().Filename() +} + +func (s *Stack) Base() *src.PosBase { + return s.tr[len(s.tr)-1].Base() +} + +func (s *Stack) SetBase(base *src.PosBase) { + s.tr[len(s.tr)-1].SetBase(base) +} + +func (s *Stack) Line() int { + return s.tr[len(s.tr)-1].Line() +} + +func (s *Stack) Col() int { + return s.tr[len(s.tr)-1].Col() +} + +func (s *Stack) Close() { // Unused. +} diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go new file mode 100644 index 0000000..861a2d4 --- /dev/null +++ b/src/cmd/asm/internal/lex/tokenizer.go @@ -0,0 +1,153 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "io" + "os" + "strings" + "text/scanner" + "unicode" + + "cmd/asm/internal/flags" + "cmd/internal/objabi" + "cmd/internal/src" +) + +// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured +// for our purposes and made a TokenReader. It forms the lowest level, +// turning text from readers into tokens. +type Tokenizer struct { + tok ScanToken + s *scanner.Scanner + base *src.PosBase + line int + file *os.File // If non-nil, file descriptor to close. +} + +func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { + var s scanner.Scanner + s.Init(r) + // Newline is like a semicolon; other space characters are fine. + s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' + // Don't skip comments: we need to count newlines. + s.Mode = scanner.ScanChars | + scanner.ScanFloats | + scanner.ScanIdents | + scanner.ScanInts | + scanner.ScanStrings | + scanner.ScanComments + s.Position.Filename = name + s.IsIdentRune = isIdentRune + return &Tokenizer{ + s: &s, + base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)), + line: 1, + file: file, + } +} + +// We want center dot (·) and division slash (∕) to work as identifier characters. +func isIdentRune(ch rune, i int) bool { + if unicode.IsLetter(ch) { + return true + } + switch ch { + case '_': // Underscore; traditional. + return true + case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot + return true + case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash + return true + } + // Digits are OK only after the first character. + return i > 0 && unicode.IsDigit(ch) +} + +func (t *Tokenizer) Text() string { + switch t.tok { + case LSH: + return "<<" + case RSH: + return ">>" + case ARR: + return "->" + case ROT: + return "@>" + } + return t.s.TokenText() +} + +func (t *Tokenizer) File() string { + return t.base.Filename() +} + +func (t *Tokenizer) Base() *src.PosBase { + return t.base +} + +func (t *Tokenizer) SetBase(base *src.PosBase) { + t.base = base +} + +func (t *Tokenizer) Line() int { + return t.line +} + +func (t *Tokenizer) Col() int { + return t.s.Pos().Column +} + +func (t *Tokenizer) Next() ScanToken { + s := t.s + for { + t.tok = ScanToken(s.Scan()) + if t.tok != scanner.Comment { + break + } + text := s.TokenText() + t.line += strings.Count(text, "\n") + // TODO: Use constraint.IsGoBuild once it exists. + if strings.HasPrefix(text, "//go:build") { + t.tok = BuildComment + break + } + } + switch t.tok { + case '\n': + t.line++ + case '-': + if s.Peek() == '>' { + s.Next() + t.tok = ARR + return ARR + } + case '@': + if s.Peek() == '>' { + s.Next() + t.tok = ROT + return ROT + } + case '<': + if s.Peek() == '<' { + s.Next() + t.tok = LSH + return LSH + } + case '>': + if s.Peek() == '>' { + s.Next() + t.tok = RSH + return RSH + } + } + return t.tok +} + +func (t *Tokenizer) Close() { + if t.file != nil { + t.file.Close() + } +} |