6 files changed, 1280 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go
new file mode 100644
index 0000000..da4ebe6
--- /dev/null
+++ b/src/cmd/asm/internal/lex/input.go
@@ -0,0 +1,486 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"text/scanner"
+
+	"cmd/asm/internal/flags"
+	"cmd/internal/objabi"
+	"cmd/internal/src"
+)
+
+// Input is the main input: a stack of readers and some macro definitions.
+// It also handles #include processing (by pushing onto the input stack)
+// and parses and instantiates macro definitions.
+type Input struct {
+	Stack
+	includes        []string
+	beginningOfLine bool
+	ifdefStack      []bool
+	macros          map[string]*Macro
+	text            string // Text of last token returned by Next.
+	peek            bool
+	peekToken       ScanToken
+	peekText        string
+}
+
+// NewInput returns an Input from the given path.
+func NewInput(name string) *Input {
+	return &Input{
+		// include directories: look in source dir, then -I directories.
+		includes:        append([]string{filepath.Dir(name)}, flags.I...),
+		beginningOfLine: true,
+		macros:          predefine(flags.D),
+	}
+}
+
+// predefine installs the macros set by the -D flag on the command line.
+func predefine(defines flags.MultiFlag) map[string]*Macro {
+	macros := make(map[string]*Macro)
+	for _, name := range defines {
+		value := "1"
+		i := strings.IndexRune(name, '=')
+		if i > 0 {
+			name, value = name[:i], name[i+1:]
+		}
+		tokens := Tokenize(name)
+		if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident {
+			fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0])
+			flags.Usage()
+		}
+		macros[name] = &Macro{
+			name:   name,
+			args:   nil,
+			tokens: Tokenize(value),
+		}
+	}
+	return macros
+}
+
+var panicOnError bool // For testing.
+
+func (in *Input) Error(args ...interface{}) {
+	if panicOnError {
+		panic(fmt.Errorf("%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)))
+	}
+	fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...))
+	os.Exit(1)
+}
+
+// expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token.
+func (in *Input) expectText(args ...interface{}) {
+	in.Error(append(args, "; got", strconv.Quote(in.Stack.Text()))...)
+}
+
+// enabled reports whether the input is enabled by an ifdef, or is at the top level.
+func (in *Input) enabled() bool {
+	return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1]
+}
+
+func (in *Input) expectNewline(directive string) {
+	tok := in.Stack.Next()
+	if tok != '\n' {
+		in.expectText("expected newline after", directive)
+	}
+}
+
+func (in *Input) Next() ScanToken {
+	if in.peek {
+		in.peek = false
+		tok := in.peekToken
+		in.text = in.peekText
+		return tok
+	}
+	// If we cannot generate a token after 100 macro invocations, we're in trouble.
+	// The usual case is caught by Push, below, but be safe.
+	for nesting := 0; nesting < 100; {
+		tok := in.Stack.Next()
+		switch tok {
+		case '#':
+			if !in.beginningOfLine {
+				in.Error("'#' must be first item on line")
+			}
+			in.beginningOfLine = in.hash()
+			in.text = "#"
+			return '#'
+
+		case scanner.Ident:
+			// Is it a macro name?
+			name := in.Stack.Text()
+			macro := in.macros[name]
+			if macro != nil {
+				nesting++
+				in.invokeMacro(macro)
+				continue
+			}
+			fallthrough
+		default:
+			if tok == scanner.EOF && len(in.ifdefStack) > 0 {
+				// We're skipping text but have run out of input with no #endif.
+				in.Error("unclosed #ifdef or #ifndef")
+			}
+			in.beginningOfLine = tok == '\n'
+			if in.enabled() {
+				in.text = in.Stack.Text()
+				return tok
+			}
+		}
+	}
+	in.Error("recursive macro invocation")
+	return 0
+}
+
+func (in *Input) Text() string {
+	return in.text
+}
+
+// hash processes a # preprocessor directive. It reports whether it completes.
+func (in *Input) hash() bool {
+	// We have a '#'; it must be followed by a known word (define, include, etc.).
+	tok := in.Stack.Next()
+	if tok != scanner.Ident {
+		in.expectText("expected identifier after '#'")
+	}
+	if !in.enabled() {
+		// Can only start including again if we are at #else or #endif but also
+		// need to keep track of nested #if[n]defs.
+		// We let #line through because it might affect errors.
+		switch in.Stack.Text() {
+		case "else", "endif", "ifdef", "ifndef", "line":
+			// Press on.
+		default:
+			return false
+		}
+	}
+	switch in.Stack.Text() {
+	case "define":
+		in.define()
+	case "else":
+		in.else_()
+	case "endif":
+		in.endif()
+	case "ifdef":
+		in.ifdef(true)
+	case "ifndef":
+		in.ifdef(false)
+	case "include":
+		in.include()
+	case "line":
+		in.line()
+	case "undef":
+		in.undef()
+	default:
+		in.Error("unexpected token after '#':", in.Stack.Text())
+	}
+	return true
+}
+
+// macroName returns the name for the macro being referenced.
+func (in *Input) macroName() string {
+	// We use the Stack's input method; no macro processing at this stage.
+	tok := in.Stack.Next()
+	if tok != scanner.Ident {
+		in.expectText("expected identifier after # directive")
+	}
+	// Name is alphanumeric by definition.
+	return in.Stack.Text()
+}
+
+// #define processing.
+func (in *Input) define() {
+	name := in.macroName()
+	args, tokens := in.macroDefinition(name)
+	in.defineMacro(name, args, tokens)
+}
+
+// defineMacro stores the macro definition in the Input.
+func (in *Input) defineMacro(name string, args []string, tokens []Token) {
+	if in.macros[name] != nil {
+		in.Error("redefinition of macro:", name)
+	}
+	in.macros[name] = &Macro{
+		name:   name,
+		args:   args,
+		tokens: tokens,
+	}
+}
+
+// macroDefinition returns the list of formals and the tokens of the definition.
+// The argument list is nil for no parens on the definition; otherwise a list of
+// formal argument names.
+func (in *Input) macroDefinition(name string) ([]string, []Token) {
+	prevCol := in.Stack.Col()
+	tok := in.Stack.Next()
+	if tok == '\n' || tok == scanner.EOF {
+		return nil, nil // No definition for macro
+	}
+	var args []string
+	// The C preprocessor treats
+	//	#define A(x)
+	// and
+	//	#define A (x)
+	// distinctly: the first is a macro with arguments, the second without.
+	// Distinguish these cases using the column number, since we don't
+	// see the space itself. Note that text/scanner reports the position at the
+	// end of the token. It's where you are now, and you just read this token.
+	if tok == '(' && in.Stack.Col() == prevCol+1 {
+		// Macro has arguments. Scan list of formals.
+		acceptArg := true
+		args = []string{} // Zero length but not nil.
+	Loop:
+		for {
+			tok = in.Stack.Next()
+			switch tok {
+			case ')':
+				tok = in.Stack.Next() // First token of macro definition.
+				break Loop
+			case ',':
+				if acceptArg {
+					in.Error("bad syntax in definition for macro:", name)
+				}
+				acceptArg = true
+			case scanner.Ident:
+				if !acceptArg {
+					in.Error("bad syntax in definition for macro:", name)
+				}
+				arg := in.Stack.Text()
+				if i := lookup(args, arg); i >= 0 {
+					in.Error("duplicate argument", arg, "in definition for macro:", name)
+				}
+				args = append(args, arg)
+				acceptArg = false
+			default:
+				in.Error("bad definition for macro:", name)
+			}
+		}
+	}
+	var tokens []Token
+	// Scan to newline. Backslashes escape newlines.
+	for tok != '\n' {
+		if tok == scanner.EOF {
+			in.Error("missing newline in definition for macro:", name)
+		}
+		if tok == '\\' {
+			tok = in.Stack.Next()
+			if tok != '\n' && tok != '\\' {
+				in.Error(`can only escape \ or \n in definition for macro:`, name)
+			}
+		}
+		tokens = append(tokens, Make(tok, in.Stack.Text()))
+		tok = in.Stack.Next()
+	}
+	return args, tokens
+}
+
+func lookup(args []string, arg string) int {
+	for i, a := range args {
+		if a == arg {
+			return i
+		}
+	}
+	return -1
+}
+
+// invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual
+// parameters substituted for the formals.
+// Invoking a macro does not touch the PC/line history.
+func (in *Input) invokeMacro(macro *Macro) {
+	// If the macro has no arguments, just substitute the text.
+	if macro.args == nil {
+		in.Push(NewSlice(in.Base(), in.Line(), macro.tokens))
+		return
+	}
+	tok := in.Stack.Next()
+	if tok != '(' {
+		// If the macro has arguments but is invoked without them, all we push is the macro name.
+		// First, put back the token.
+		in.peekToken = tok
+		in.peekText = in.text
+		in.peek = true
+		in.Push(NewSlice(in.Base(), in.Line(), []Token{Make(macroName, macro.name)}))
+		return
+	}
+	actuals := in.argsFor(macro)
+	var tokens []Token
+	for _, tok := range macro.tokens {
+		if tok.ScanToken != scanner.Ident {
+			tokens = append(tokens, tok)
+			continue
+		}
+		substitution := actuals[tok.text]
+		if substitution == nil {
+			tokens = append(tokens, tok)
+			continue
+		}
+		tokens = append(tokens, substitution...)
+	}
+	in.Push(NewSlice(in.Base(), in.Line(), tokens))
+}
+
+// argsFor returns a map from formal name to actual value for this argumented macro invocation.
+// The opening parenthesis has been absorbed.
+func (in *Input) argsFor(macro *Macro) map[string][]Token {
+	var args [][]Token
+	// One macro argument per iteration. Collect them all and check counts afterwards.
+	for argNum := 0; ; argNum++ {
+		tokens, tok := in.collectArgument(macro)
+		args = append(args, tokens)
+		if tok == ')' {
+			break
+		}
+	}
+	// Zero-argument macros are tricky.
+	if len(macro.args) == 0 && len(args) == 1 && args[0] == nil {
+		args = nil
+	} else if len(args) != len(macro.args) {
+		in.Error("wrong arg count for macro", macro.name)
+	}
+	argMap := make(map[string][]Token)
+	for i, arg := range args {
+		argMap[macro.args[i]] = arg
+	}
+	return argMap
+}
+
+// collectArgument returns the actual tokens for a single argument of a macro.
+// It also returns the token that terminated the argument, which will always
+// be either ',' or ')'. The starting '(' has been scanned.
+func (in *Input) collectArgument(macro *Macro) ([]Token, ScanToken) {
+	nesting := 0
+	var tokens []Token
+	for {
+		tok := in.Stack.Next()
+		if tok == scanner.EOF || tok == '\n' {
+			in.Error("unterminated arg list invoking macro:", macro.name)
+		}
+		if nesting == 0 && (tok == ')' || tok == ',') {
+			return tokens, tok
+		}
+		if tok == '(' {
+			nesting++
+		}
+		if tok == ')' {
+			nesting--
+		}
+		tokens = append(tokens, Make(tok, in.Stack.Text()))
+	}
+}
+
+// #ifdef and #ifndef processing.
+func (in *Input) ifdef(truth bool) {
+	name := in.macroName()
+	in.expectNewline("#if[n]def")
+	if !in.enabled() {
+		truth = false
+	} else if _, defined := in.macros[name]; !defined {
+		truth = !truth
+	}
+	in.ifdefStack = append(in.ifdefStack, truth)
+}
+
+// #else processing
+func (in *Input) else_() {
+	in.expectNewline("#else")
+	if len(in.ifdefStack) == 0 {
+		in.Error("unmatched #else")
+	}
+	if len(in.ifdefStack) == 1 || in.ifdefStack[len(in.ifdefStack)-2] {
+		in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1]
+	}
+}
+
+// #endif processing.
+func (in *Input) endif() {
+	in.expectNewline("#endif")
+	if len(in.ifdefStack) == 0 {
+		in.Error("unmatched #endif")
+	}
+	in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1]
+}
+
+// #include processing.
+func (in *Input) include() {
+	// Find and parse string.
+	tok := in.Stack.Next()
+	if tok != scanner.String {
+		in.expectText("expected string after #include")
+	}
+	name, err := strconv.Unquote(in.Stack.Text())
+	if err != nil {
+		in.Error("unquoting include file name: ", err)
+	}
+	in.expectNewline("#include")
+	// Push tokenizer for file onto stack.
+	fd, err := os.Open(name)
+	if err != nil {
+		for _, dir := range in.includes {
+			fd, err = os.Open(filepath.Join(dir, name))
+			if err == nil {
+				break
+			}
+		}
+		if err != nil {
+			in.Error("#include:", err)
+		}
+	}
+	in.Push(NewTokenizer(name, fd, fd))
+}
+
+// #line processing.
+func (in *Input) line() {
+	// Only need to handle Plan 9 format: #line 337 "filename"
+	tok := in.Stack.Next()
+	if tok != scanner.Int {
+		in.expectText("expected line number after #line")
+	}
+	line, err := strconv.Atoi(in.Stack.Text())
+	if err != nil {
+		in.Error("error parsing #line (cannot happen):", err)
+	}
+	tok = in.Stack.Next()
+	if tok != scanner.String {
+		in.expectText("expected file name in #line")
+	}
+	file, err := strconv.Unquote(in.Stack.Text())
+	if err != nil {
+		in.Error("unquoting #line file name: ", err)
+	}
+	tok = in.Stack.Next()
+	if tok != '\n' {
+		in.Error("unexpected token at end of #line: ", tok)
+	}
+	pos := src.MakePos(in.Base(), uint(in.Line())+1, 1) // +1 because #line nnn means line nnn starts on next line
+	in.Stack.SetBase(src.NewLinePragmaBase(pos, file, objabi.AbsFile(objabi.WorkingDir(), file, *flags.TrimPath), uint(line), 1))
+}
+
+// #undef processing
+func (in *Input) undef() {
+	name := in.macroName()
+	if in.macros[name] == nil {
+		in.Error("#undef for undefined macro:", name)
+	}
+	// Newline must be next.
+	tok := in.Stack.Next()
+	if tok != '\n' {
+		in.Error("syntax error in #undef for macro:", name)
+	}
+	delete(in.macros, name)
+}
+
+func (in *Input) Push(r TokenReader) {
+	if len(in.tr) > 100 {
+		in.Error("input recursion")
+	}
+	in.Stack.Push(r)
+}
+
+func (in *Input) Close() {
+}
diff --git a/src/cmd/asm/internal/lex/lex.go b/src/cmd/asm/internal/lex/lex.go
new file mode 100644
index 0000000..7cd41a5
--- /dev/null
+++ b/src/cmd/asm/internal/lex/lex.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lex implements lexical analysis for the assembler.
+package lex
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"strings"
+	"text/scanner"
+
+	"cmd/internal/src"
+)
+
+// A ScanToken represents an input item. It is a simple wrapping of rune, as
+// returned by text/scanner.Scanner, plus a couple of extra values.
+type ScanToken rune
+
+const (
+	// Asm defines some two-character lexemes. We make up
+	// a rune/ScanToken value for them - ugly but simple.
+	LSH          ScanToken = -1000 - iota // << Left shift.
+	RSH                                   // >> Logical right shift.
+	ARR                                   // -> Used on ARM for shift type 3, arithmetic right shift.
+	ROT                                   // @> Used on ARM for shift type 4, rotate right.
+	Include                               // included file started here
+	BuildComment                          // //go:build or +build comment
+	macroName                             // name of macro that should not be expanded
+)
+
+// IsRegisterShift reports whether the token is one of the ARM register shift operators.
+func IsRegisterShift(r ScanToken) bool {
+	return ROT <= r && r <= LSH // Order looks backwards because these are negative.
+}
+
+func (t ScanToken) String() string {
+	switch t {
+	case scanner.EOF:
+		return "EOF"
+	case scanner.Ident:
+		return "identifier"
+	case scanner.Int:
+		return "integer constant"
+	case scanner.Float:
+		return "float constant"
+	case scanner.Char:
+		return "rune constant"
+	case scanner.String:
+		return "string constant"
+	case scanner.RawString:
+		return "raw string constant"
+	case scanner.Comment:
+		return "comment"
+	default:
+		return fmt.Sprintf("%q", rune(t))
+	}
+}
+
+// NewLexer returns a lexer for the named file and the given link context.
+func NewLexer(name string) TokenReader {
+	input := NewInput(name)
+	fd, err := os.Open(name)
+	if err != nil {
+		log.Fatalf("%s\n", err)
+	}
+	input.Push(NewTokenizer(name, fd, fd))
+	return input
+}
+
+// The other files in this directory each contain an implementation of TokenReader.
+
+// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
+// the text of the most recently returned token is, and where it was found.
+// The underlying scanner elides all spaces except newline, so the input looks like a stream of
+// Tokens; original spacing is lost but we don't need it.
+type TokenReader interface {
+	// Next returns the next token.
+	Next() ScanToken
+	// The following methods all refer to the most recent token returned by Next.
+	// Text returns the original string representation of the token.
+	Text() string
+	// File reports the source file name of the token.
+	File() string
+	// Base reports the position base of the token.
+	Base() *src.PosBase
+	// SetBase sets the position base.
+	SetBase(*src.PosBase)
+	// Line reports the source line number of the token.
+	Line() int
+	// Col reports the source column number of the token.
+	Col() int
+	// Close does any teardown required.
+	Close()
+}
+
+// A Token is a scan token plus its string value.
+// A macro is stored as a sequence of Tokens with spaces stripped.
+type Token struct {
+	ScanToken
+	text string
+}
+
+// Make returns a Token with the given rune (ScanToken) and text representation.
+func Make(token ScanToken, text string) Token {
+	// If the symbol starts with center dot, as in ·x, rewrite it as ""·x
+	if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
+		text = `""` + text
+	}
+	// Substitute the substitutes for . and /.
+	text = strings.Replace(text, "\u00B7", ".", -1)
+	text = strings.Replace(text, "\u2215", "/", -1)
+	return Token{ScanToken: token, text: text}
+}
+
+func (l Token) String() string {
+	return l.text
+}
+
+// A Macro represents the definition of a #defined macro.
+type Macro struct {
+	name   string   // The #define name.
+	args   []string // Formal arguments.
+	tokens []Token  // Body of macro.
+}
+
+// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
+func Tokenize(str string) []Token {
+	t := NewTokenizer("command line", strings.NewReader(str), nil)
+	var tokens []Token
+	for {
+		tok := t.Next()
+		if tok == scanner.EOF {
+			break
+		}
+		tokens = append(tokens, Make(tok, t.Text()))
+	}
+	return tokens
+}
diff --git a/src/cmd/asm/internal/lex/lex_test.go b/src/cmd/asm/internal/lex/lex_test.go
new file mode 100644
index 0000000..51679d2
--- /dev/null
+++ b/src/cmd/asm/internal/lex/lex_test.go
@@ -0,0 +1,365 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+	"text/scanner"
+)
+
+type lexTest struct {
+	name   string
+	input  string
+	output string
+}
+
+var lexTests = []lexTest{
+	{
+		"empty",
+		"",
+		"",
+	},
+	{
+		"simple",
+		"1 (a)",
+		"1.(.a.)",
+	},
+	{
+		"simple define",
+		lines(
+			"#define A 1234",
+			"A",
+		),
+		"1234.\n",
+	},
+	{
+		"define without value",
+		"#define A",
+		"",
+	},
+	{
+		"macro without arguments",
+		"#define A() 1234\n" + "A()\n",
+		"1234.\n",
+	},
+	{
+		"macro with just parens as body",
+		"#define A () \n" + "A\n",
+		"(.).\n",
+	},
+	{
+		"macro with parens but no arguments",
+		"#define A (x) \n" + "A\n",
+		"(.x.).\n",
+	},
+	{
+		"macro with arguments",
+		"#define A(x, y, z) x+z+y\n" + "A(1, 2, 3)\n",
+		"1.+.3.+.2.\n",
+	},
+	{
+		"argumented macro invoked without arguments",
+		lines(
+			"#define X() foo ",
+			"X()",
+			"X",
+		),
+		"foo.\n.X.\n",
+	},
+	{
+		"multiline macro without arguments",
+		lines(
+			"#define A 1\\",
+			"\t2\\",
+			"\t3",
+			"before",
+			"A",
+			"after",
+		),
+		"before.\n.1.\n.2.\n.3.\n.after.\n",
+	},
+	{
+		"multiline macro with arguments",
+		lines(
+			"#define A(a, b, c) a\\",
+			"\tb\\",
+			"\tc",
+			"before",
+			"A(1, 2, 3)",
+			"after",
+		),
+		"before.\n.1.\n.2.\n.3.\n.after.\n",
+	},
+	{
+		"LOAD macro",
+		lines(
+			"#define LOAD(off, reg) \\",
+			"\tMOVBLZX	(off*4)(R12),	reg \\",
+			"\tADDB	reg,		DX",
+			"",
+			"LOAD(8, AX)",
+		),
+		"\n.\n.MOVBLZX.(.8.*.4.).(.R12.).,.AX.\n.ADDB.AX.,.DX.\n",
+	},
+	{
+		"nested multiline macro",
+		lines(
+			"#define KEYROUND(xmm, load, off, r1, r2, index) \\",
+			"\tMOVBLZX	(BP)(DX*4),	R8 \\",
+			"\tload((off+1), r2) \\",
+			"\tMOVB	R8,		(off*4)(R12) \\",
+			"\tPINSRW	$index, (BP)(R8*4), xmm",
+			"#define LOAD(off, reg) \\",
+			"\tMOVBLZX	(off*4)(R12),	reg \\",
+			"\tADDB	reg,		DX",
+			"KEYROUND(X0, LOAD, 8, AX, BX, 0)",
+		),
+		"\n.MOVBLZX.(.BP.).(.DX.*.4.).,.R8.\n.\n.MOVBLZX.(.(.8.+.1.).*.4.).(.R12.).,.BX.\n.ADDB.BX.,.DX.\n.MOVB.R8.,.(.8.*.4.).(.R12.).\n.PINSRW.$.0.,.(.BP.).(.R8.*.4.).,.X0.\n",
+	},
+	{
+		"taken #ifdef",
+		lines(
+			"#define A",
+			"#ifdef A",
+			"#define B 1234",
+			"#endif",
+			"B",
+		),
+		"1234.\n",
+	},
+	{
+		"not taken #ifdef",
+		lines(
+			"#ifdef A",
+			"#define B 1234",
+			"#endif",
+			"B",
+		),
+		"B.\n",
+	},
+	{
+		"taken #ifdef with else",
+		lines(
+			"#define A",
+			"#ifdef A",
+			"#define B 1234",
+			"#else",
+			"#define B 5678",
+			"#endif",
+			"B",
+		),
+		"1234.\n",
+	},
+	{
+		"not taken #ifdef with else",
+		lines(
+			"#ifdef A",
+			"#define B 1234",
+			"#else",
+			"#define B 5678",
+			"#endif",
+			"B",
+		),
+		"5678.\n",
+	},
+	{
+		"nested taken/taken #ifdef",
+		lines(
+			"#define A",
+			"#define B",
+			"#ifdef A",
+			"#ifdef B",
+			"#define C 1234",
+			"#else",
+			"#define C 5678",
+			"#endif",
+			"#endif",
+			"C",
+		),
+		"1234.\n",
+	},
+	{
+		"nested taken/not-taken #ifdef",
+		lines(
+			"#define A",
+			"#ifdef A",
+			"#ifdef B",
+			"#define C 1234",
+			"#else",
+			"#define C 5678",
+			"#endif",
+			"#endif",
+			"C",
+		),
+		"5678.\n",
+	},
+	{
+		"nested not-taken/would-be-taken #ifdef",
+		lines(
+			"#define B",
+			"#ifdef A",
+			"#ifdef B",
+			"#define C 1234",
+			"#else",
+			"#define C 5678",
+			"#endif",
+			"#endif",
+			"C",
+		),
+		"C.\n",
+	},
+	{
+		"nested not-taken/not-taken #ifdef",
+		lines(
+			"#ifdef A",
+			"#ifdef B",
+			"#define C 1234",
+			"#else",
+			"#define C 5678",
+			"#endif",
+			"#endif",
+			"C",
+		),
+		"C.\n",
+	},
+	{
+		"nested #define",
+		lines(
+			"#define A #define B THIS",
+			"A",
+			"B",
+		),
+		"THIS.\n",
+	},
+	{
+		"nested #define with args",
+		lines(
+			"#define A #define B(x) x",
+			"A",
+			"B(THIS)",
+		),
+		"THIS.\n",
+	},
+	/* This one fails. See comment in Slice.Col.
+	{
+		"nested #define with args",
+		lines(
+			"#define A #define B (x) x",
+			"A",
+			"B(THIS)",
+		),
+		"x.\n",
+	},
+	*/
+}
+
+func TestLex(t *testing.T) {
+	for _, test := range lexTests {
+		input := NewInput(test.name)
+		input.Push(NewTokenizer(test.name, strings.NewReader(test.input), nil))
+		result := drain(input)
+		if result != test.output {
+			t.Errorf("%s: got %q expected %q", test.name, result, test.output)
+		}
+	}
+}
+
+// lines joins the arguments together as complete lines.
+func lines(a ...string) string {
+	return strings.Join(a, "\n") + "\n"
+}
+
+// drain returns a single string representing the processed input tokens.
+func drain(input *Input) string {
+	var buf bytes.Buffer
+	for {
+		tok := input.Next()
+		if tok == scanner.EOF {
+			return buf.String()
+		}
+		if tok == '#' {
+			continue
+		}
+		if buf.Len() > 0 {
+			buf.WriteByte('.')
+		}
+		buf.WriteString(input.Text())
+	}
+}
+
+type badLexTest struct {
+	input string
+	error string
+}
+
+var badLexTests = []badLexTest{
+	{
+		"3 #define foo bar\n",
+		"'#' must be first item on line",
+	},
+	{
+		"#ifdef foo\nhello",
+		"unclosed #ifdef or #ifndef",
+	},
+	{
+		"#ifndef foo\nhello",
+		"unclosed #ifdef or #ifndef",
+	},
+	{
+		"#ifdef foo\nhello\n#else\nbye",
+		"unclosed #ifdef or #ifndef",
+	},
+	{
+		"#define A() A()\nA()",
+		"recursive macro invocation",
+	},
+	{
+		"#define A a\n#define A a\n",
+		"redefinition of macro",
+	},
+	{
+		"#define A a",
+		"no newline after macro definition",
+	},
+}
+
+func TestBadLex(t *testing.T) {
+	for _, test := range badLexTests {
+		input := NewInput(test.error)
+		input.Push(NewTokenizer(test.error, strings.NewReader(test.input), nil))
+		err := firstError(input)
+		if err == nil {
+			t.Errorf("%s: got no error", test.error)
+			continue
+		}
+		if !strings.Contains(err.Error(), test.error) {
+			t.Errorf("got error %q expected %q", err.Error(), test.error)
+		}
+	}
+}
+
+// firstError returns the first error value triggered by the input.
+func firstError(input *Input) (err error) {
+	panicOnError = true
+	defer func() {
+		panicOnError = false
+		switch e := recover(); e := e.(type) {
+		case nil:
+		case error:
+			err = e
+		default:
+			panic(e)
+		}
+	}()
+
+	for {
+		tok := input.Next()
+		if tok == scanner.EOF {
+			return
+		}
+	}
+}
diff --git a/src/cmd/asm/internal/lex/slice.go b/src/cmd/asm/internal/lex/slice.go
new file mode 100644
index 0000000..8ee0c70
--- /dev/null
+++ b/src/cmd/asm/internal/lex/slice.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+	"text/scanner"
+
+	"cmd/internal/src"
+)
+
+// A Slice reads from a slice of Tokens.
+type Slice struct {
+	tokens []Token
+	base   *src.PosBase
+	line   int
+	pos    int
+}
+
+func NewSlice(base *src.PosBase, line int, tokens []Token) *Slice {
+	return &Slice{
+		tokens: tokens,
+		base:   base,
+		line:   line,
+		pos:    -1, // Next will advance to zero.
+	}
+}
+
+func (s *Slice) Next() ScanToken {
+	s.pos++
+	if s.pos >= len(s.tokens) {
+		return scanner.EOF
+	}
+	return s.tokens[s.pos].ScanToken
+}
+
+func (s *Slice) Text() string {
+	return s.tokens[s.pos].text
+}
+
+func (s *Slice) File() string {
+	return s.base.Filename()
+}
+
+func (s *Slice) Base() *src.PosBase {
+	return s.base
+}
+
+func (s *Slice) SetBase(base *src.PosBase) {
+	// Cannot happen because we only have slices of already-scanned text,
+	// but be prepared.
+	s.base = base
+}
+
+func (s *Slice) Line() int {
+	return s.line
+}
+
+func (s *Slice) Col() int {
+	// TODO: Col is only called when defining a macro and all it cares about is increasing
+	// position to discover whether there is a blank before the parenthesis.
+	// We only get here if defining a macro inside a macro.
+	// This imperfect implementation means we cannot tell the difference between
+	//	#define A #define B(x) x
+	// and
+	//	#define A #define B (x) x
+	// The first has definition of B has an argument, the second doesn't. Because we let
+	// text/scanner strip the blanks for us, this is extremely rare, hard to fix, and not worth it.
+	return s.pos
+}
+
+func (s *Slice) Close() {
+}
diff --git a/src/cmd/asm/internal/lex/stack.go b/src/cmd/asm/internal/lex/stack.go
new file mode 100644
index 0000000..929e528
--- /dev/null
+++ b/src/cmd/asm/internal/lex/stack.go
@@ -0,0 +1,61 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+	"text/scanner"
+
+	"cmd/internal/src"
+)
+
+// A Stack is a stack of TokenReaders. As the top TokenReader hits EOF,
+// it resumes reading the next one down.
+type Stack struct {
+	tr []TokenReader
+}
+
+// Push adds tr to the top (end) of the input stack. (Popping happens automatically.)
+func (s *Stack) Push(tr TokenReader) {
+	s.tr = append(s.tr, tr)
+}
+
+func (s *Stack) Next() ScanToken {
+	tos := s.tr[len(s.tr)-1]
+	tok := tos.Next()
+	for tok == scanner.EOF && len(s.tr) > 1 {
+		tos.Close()
+		// Pop the topmost item from the stack and resume with the next one down.
+		s.tr = s.tr[:len(s.tr)-1]
+		tok = s.Next()
+	}
+	return tok
+}
+
+func (s *Stack) Text() string {
+	return s.tr[len(s.tr)-1].Text()
+}
+
+func (s *Stack) File() string {
+	return s.Base().Filename()
+}
+
+func (s *Stack) Base() *src.PosBase {
+	return s.tr[len(s.tr)-1].Base()
+}
+
+func (s *Stack) SetBase(base *src.PosBase) {
+	s.tr[len(s.tr)-1].SetBase(base)
+}
+
+func (s *Stack) Line() int {
+	return s.tr[len(s.tr)-1].Line()
+}
+
+func (s *Stack) Col() int {
+	return s.tr[len(s.tr)-1].Col()
+}
+
+func (s *Stack) Close() { // Unused.
+}
diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go
new file mode 100644
index 0000000..861a2d4
--- /dev/null
+++ b/src/cmd/asm/internal/lex/tokenizer.go
@@ -0,0 +1,153 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+	"io"
+	"os"
+	"strings"
+	"text/scanner"
+	"unicode"
+
+	"cmd/asm/internal/flags"
+	"cmd/internal/objabi"
+	"cmd/internal/src"
+)
+
+// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
+// for our purposes and made a TokenReader. It forms the lowest level,
+// turning text from readers into tokens.
+type Tokenizer struct {
+	tok  ScanToken
+	s    *scanner.Scanner
+	base *src.PosBase
+	line int
+	file *os.File // If non-nil, file descriptor to close.
+}
+
+func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
+	var s scanner.Scanner
+	s.Init(r)
+	// Newline is like a semicolon; other space characters are fine.
+	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
+	// Don't skip comments: we need to count newlines.
+	s.Mode = scanner.ScanChars |
+		scanner.ScanFloats |
+		scanner.ScanIdents |
+		scanner.ScanInts |
+		scanner.ScanStrings |
+		scanner.ScanComments
+	s.Position.Filename = name
+	s.IsIdentRune = isIdentRune
+	return &Tokenizer{
+		s:    &s,
+		base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
+		line: 1,
+		file: file,
+	}
+}
+
+// We want center dot (·) and division slash (∕) to work as identifier characters.
+func isIdentRune(ch rune, i int) bool {
+	if unicode.IsLetter(ch) {
+		return true
+	}
+	switch ch {
+	case '_': // Underscore; traditional.
+		return true
+	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
+		return true
+	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
+		return true
+	}
+	// Digits are OK only after the first character.
+	return i > 0 && unicode.IsDigit(ch)
+}
+
+func (t *Tokenizer) Text() string {
+	switch t.tok {
+	case LSH:
+		return "<<"
+	case RSH:
+		return ">>"
+	case ARR:
+		return "->"
+	case ROT:
+		return "@>"
+	}
+	return t.s.TokenText()
+}
+
+func (t *Tokenizer) File() string {
+	return t.base.Filename()
+}
+
+func (t *Tokenizer) Base() *src.PosBase {
+	return t.base
+}
+
+func (t *Tokenizer) SetBase(base *src.PosBase) {
+	t.base = base
+}
+
+func (t *Tokenizer) Line() int {
+	return t.line
+}
+
+func (t *Tokenizer) Col() int {
+	return t.s.Pos().Column
+}
+
+func (t *Tokenizer) Next() ScanToken {
+	s := t.s
+	for {
+		t.tok = ScanToken(s.Scan())
+		if t.tok != scanner.Comment {
+			break
+		}
+		text := s.TokenText()
+		t.line += strings.Count(text, "\n")
+		// TODO: Use constraint.IsGoBuild once it exists.
+		if strings.HasPrefix(text, "//go:build") {
+			t.tok = BuildComment
+			break
+		}
+	}
+	switch t.tok {
+	case '\n':
+		t.line++
+	case '-':
+		if s.Peek() == '>' {
+			s.Next()
+			t.tok = ARR
+			return ARR
+		}
+	case '@':
+		if s.Peek() == '>' {
+			s.Next()
+			t.tok = ROT
+			return ROT
+		}
+	case '<':
+		if s.Peek() == '<' {
+			s.Next()
+			t.tok = LSH
+			return LSH
+		}
+	case '>':
+		if s.Peek() == '>' {
+			s.Next()
+			t.tok = RSH
+			return RSH
+		}
+	}
+	return t.tok
+}
+
+func (t *Tokenizer) Close() {
+	if t.file != nil {
+		t.file.Close()
+	}
+}