1 files changed, 218 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go
new file mode 100644
index 0000000..01b5921
--- /dev/null
+++ b/src/cmd/compile/internal/syntax/source.go
@@ -0,0 +1,218 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements source, a buffered rune reader
+// specialized for scanning Go code: Reading
+// ASCII characters, maintaining current (line, col)
+// position information, and recording of the most
+// recently read source segment are highly optimized.
+// This file is self-contained (go tool compile source.go
+// compiles) and thus could be made into its own package.
+
+package syntax
+
+import (
+	"io"
+	"unicode/utf8"
+)
+
+// The source buffer is accessed using three indices b (begin),
+// r (read), and e (end):
+//
+// - If b >= 0, it points to the beginning of a segment of most
+//   recently read characters (typically a Go literal).
+//
+// - r points to the byte immediately following the most recently
+//   read character ch, which starts at r-chw.
+//
+// - e points to the byte immediately following the last byte that
+//   was read into the buffer.
+//
+// The buffer content is terminated at buf[e] with the sentinel
+// character utf8.RuneSelf. This makes it possible to test for
+// the common case of ASCII characters with a single 'if' (see
+// nextch method).
+//
+//                +------ content in use -------+
+//                v                             v
+// buf [...read...|...segment...|ch|...unread...|s|...free...]
+//                ^             ^  ^            ^
+//                |             |  |            |
+//                b         r-chw  r            e
+//
+// Invariant: -1 <= b < r <= e < len(buf) && buf[e] == sentinel
+
+type source struct {
+	in   io.Reader
+	errh func(line, col uint, msg string)
+
+	buf       []byte // source buffer
+	ioerr     error  // pending I/O error, or nil
+	b, r, e   int    // buffer indices (see comment above)
+	line, col uint   // source position of ch (0-based)
+	ch        rune   // most recently read character
+	chw       int    // width of ch
+}
+
+const sentinel = utf8.RuneSelf
+
+func (s *source) init(in io.Reader, errh func(line, col uint, msg string)) {
+	s.in = in
+	s.errh = errh
+
+	if s.buf == nil {
+		s.buf = make([]byte, nextSize(0))
+	}
+	s.buf[0] = sentinel
+	s.ioerr = nil
+	s.b, s.r, s.e = -1, 0, 0
+	s.line, s.col = 0, 0
+	s.ch = ' '
+	s.chw = 0
+}
+
+// starting points for line and column numbers
+const linebase = 1
+const colbase = 1
+
+// pos returns the (line, col) source position of s.ch.
+func (s *source) pos() (line, col uint) {
+	return linebase + s.line, colbase + s.col
+}
+
+// error reports the error msg at source position s.pos().
+func (s *source) error(msg string) {
+	line, col := s.pos()
+	s.errh(line, col, msg)
+}
+
+// start starts a new active source segment (including s.ch).
+// As long as stop has not been called, the active segment's
+// bytes (excluding s.ch) may be retrieved by calling segment.
+func (s *source) start()          { s.b = s.r - s.chw }
+func (s *source) stop()           { s.b = -1 }
+func (s *source) segment() []byte { return s.buf[s.b : s.r-s.chw] }
+
+// rewind rewinds the scanner's read position and character s.ch
+// to the start of the currently active segment, which must not
+// contain any newlines (otherwise position information will be
+// incorrect). Currently, rewind is only needed for handling the
+// source sequence ".."; it must not be called outside an active
+// segment.
+func (s *source) rewind() {
+	// ok to verify precondition - rewind is rarely called
+	if s.b < 0 {
+		panic("no active segment")
+	}
+	s.col -= uint(s.r - s.b)
+	s.r = s.b
+	s.nextch()
+}
+
+func (s *source) nextch() {
+redo:
+	s.col += uint(s.chw)
+	if s.ch == '\n' {
+		s.line++
+		s.col = 0
+	}
+
+	// fast common case: at least one ASCII character
+	if s.ch = rune(s.buf[s.r]); s.ch < sentinel {
+		s.r++
+		s.chw = 1
+		if s.ch == 0 {
+			s.error("invalid NUL character")
+			goto redo
+		}
+		return
+	}
+
+	// slower general case: add more bytes to buffer if we don't have a full rune
+	for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil {
+		s.fill()
+	}
+
+	// EOF
+	if s.r == s.e {
+		if s.ioerr != io.EOF {
+			// ensure we never start with a '/' (e.g., rooted path) in the error message
+			s.error("I/O error: " + s.ioerr.Error())
+			s.ioerr = nil
+		}
+		s.ch = -1
+		s.chw = 0
+		return
+	}
+
+	s.ch, s.chw = utf8.DecodeRune(s.buf[s.r:s.e])
+	s.r += s.chw
+
+	if s.ch == utf8.RuneError && s.chw == 1 {
+		s.error("invalid UTF-8 encoding")
+		goto redo
+	}
+
+	// BOM's are only allowed as the first character in a file
+	const BOM = 0xfeff
+	if s.ch == BOM {
+		if s.line > 0 || s.col > 0 {
+			s.error("invalid BOM in the middle of the file")
+		}
+		goto redo
+	}
+}
+
+// fill reads more source bytes into s.buf.
+// It returns with at least one more byte in the buffer, or with s.ioerr != nil.
+func (s *source) fill() {
+	// determine content to preserve
+	b := s.r
+	if s.b >= 0 {
+		b = s.b
+		s.b = 0 // after buffer has grown or content has been moved down
+	}
+	content := s.buf[b:s.e]
+
+	// grow buffer or move content down
+	if len(content)*2 > len(s.buf) {
+		s.buf = make([]byte, nextSize(len(s.buf)))
+		copy(s.buf, content)
+	} else if b > 0 {
+		copy(s.buf, content)
+	}
+	s.r -= b
+	s.e -= b
+
+	// read more data: try a limited number of times
+	for i := 0; i < 10; i++ {
+		var n int
+		n, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1]) // -1 to leave space for sentinel
+		if n < 0 {
+			panic("negative read") // incorrect underlying io.Reader implementation
+		}
+		if n > 0 || s.ioerr != nil {
+			s.e += n
+			s.buf[s.e] = sentinel
+			return
+		}
+		// n == 0
+	}
+
+	s.buf[s.e] = sentinel
+	s.ioerr = io.ErrNoProgress
+}
+
+// nextSize returns the next bigger size for a buffer of a given size.
+func nextSize(size int) int {
+	const min = 4 << 10 // 4K: minimum buffer size
+	const max = 1 << 20 // 1M: maximum buffer size which is still doubled
+	if size < min {
+		return min
+	}
+	if size <= max {
+		return size << 1
+	}
+	return size + max
+}