1 files changed, 153 insertions, 0 deletions
diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go
new file mode 100644
index 0000000..f60f7a1
--- /dev/null
+++ b/src/cmd/asm/internal/lex/tokenizer.go
@@ -0,0 +1,153 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lex
+
+import (
+	"go/build/constraint"
+	"io"
+	"os"
+	"strings"
+	"text/scanner"
+	"unicode"
+
+	"cmd/asm/internal/flags"
+	"cmd/internal/objabi"
+	"cmd/internal/src"
+)
+
+// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
+// for our purposes and made a TokenReader. It forms the lowest level,
+// turning text from readers into tokens.
+type Tokenizer struct {
+	tok  ScanToken
+	s    *scanner.Scanner
+	base *src.PosBase
+	line int
+	file *os.File // If non-nil, file descriptor to close.
+}
+
+func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
+	var s scanner.Scanner
+	s.Init(r)
+	// Newline is like a semicolon; other space characters are fine.
+	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
+	// Don't skip comments: we need to count newlines.
+	s.Mode = scanner.ScanChars |
+		scanner.ScanFloats |
+		scanner.ScanIdents |
+		scanner.ScanInts |
+		scanner.ScanStrings |
+		scanner.ScanComments
+	s.Position.Filename = name
+	s.IsIdentRune = isIdentRune
+	return &Tokenizer{
+		s:    &s,
+		base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
+		line: 1,
+		file: file,
+	}
+}
+
+// We want center dot (·) and division slash (∕) to work as identifier characters.
+func isIdentRune(ch rune, i int) bool {
+	if unicode.IsLetter(ch) {
+		return true
+	}
+	switch ch {
+	case '_': // Underscore; traditional.
+		return true
+	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
+		return true
+	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
+		return true
+	}
+	// Digits are OK only after the first character.
+	return i > 0 && unicode.IsDigit(ch)
+}
+
+func (t *Tokenizer) Text() string {
+	switch t.tok {
+	case LSH:
+		return "<<"
+	case RSH:
+		return ">>"
+	case ARR:
+		return "->"
+	case ROT:
+		return "@>"
+	}
+	return t.s.TokenText()
+}
+
+func (t *Tokenizer) File() string {
+	return t.base.Filename()
+}
+
+func (t *Tokenizer) Base() *src.PosBase {
+	return t.base
+}
+
+func (t *Tokenizer) SetBase(base *src.PosBase) {
+	t.base = base
+}
+
+func (t *Tokenizer) Line() int {
+	return t.line
+}
+
+func (t *Tokenizer) Col() int {
+	return t.s.Pos().Column
+}
+
+func (t *Tokenizer) Next() ScanToken {
+	s := t.s
+	for {
+		t.tok = ScanToken(s.Scan())
+		if t.tok != scanner.Comment {
+			break
+		}
+		text := s.TokenText()
+		t.line += strings.Count(text, "\n")
+		if constraint.IsGoBuild(text) {
+			t.tok = BuildComment
+			break
+		}
+	}
+	switch t.tok {
+	case '\n':
+		t.line++
+	case '-':
+		if s.Peek() == '>' {
+			s.Next()
+			t.tok = ARR
+			return ARR
+		}
+	case '@':
+		if s.Peek() == '>' {
+			s.Next()
+			t.tok = ROT
+			return ROT
+		}
+	case '<':
+		if s.Peek() == '<' {
+			s.Next()
+			t.tok = LSH
+			return LSH
+		}
+	case '>':
+		if s.Peek() == '>' {
+			s.Next()
+			t.tok = RSH
+			return RSH
+		}
+	}
+	return t.tok
+}
+
+func (t *Tokenizer) Close() {
+	if t.file != nil {
+		t.file.Close()
+	}
+}