diff options
Diffstat (limited to 'src/bufio')
-rw-r--r-- | src/bufio/bufio.go | 782 | ||||
-rw-r--r-- | src/bufio/bufio_test.go | 1760 | ||||
-rw-r--r-- | src/bufio/example_test.go | 127 | ||||
-rw-r--r-- | src/bufio/export_test.go | 29 | ||||
-rw-r--r-- | src/bufio/scan.go | 419 | ||||
-rw-r--r-- | src/bufio/scan_test.go | 596 |
6 files changed, 3713 insertions, 0 deletions
diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go new file mode 100644 index 0000000..6baf9b9 --- /dev/null +++ b/src/bufio/bufio.go @@ -0,0 +1,782 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package bufio implements buffered I/O. It wraps an io.Reader or io.Writer +// object, creating another object (Reader or Writer) that also implements +// the interface but provides buffering and some help for textual I/O. +package bufio + +import ( + "bytes" + "errors" + "io" + "strings" + "unicode/utf8" +) + +const ( + defaultBufSize = 4096 +) + +var ( + ErrInvalidUnreadByte = errors.New("bufio: invalid use of UnreadByte") + ErrInvalidUnreadRune = errors.New("bufio: invalid use of UnreadRune") + ErrBufferFull = errors.New("bufio: buffer full") + ErrNegativeCount = errors.New("bufio: negative count") +) + +// Buffered input. + +// Reader implements buffering for an io.Reader object. +type Reader struct { + buf []byte + rd io.Reader // reader provided by the client + r, w int // buf read and write positions + err error + lastByte int // last byte read for UnreadByte; -1 means invalid + lastRuneSize int // size of last rune read for UnreadRune; -1 means invalid +} + +const minReadBufferSize = 16 +const maxConsecutiveEmptyReads = 100 + +// NewReaderSize returns a new Reader whose buffer has at least the specified +// size. If the argument io.Reader is already a Reader with large enough +// size, it returns the underlying Reader. +func NewReaderSize(rd io.Reader, size int) *Reader { + // Is it already a Reader? + b, ok := rd.(*Reader) + if ok && len(b.buf) >= size { + return b + } + if size < minReadBufferSize { + size = minReadBufferSize + } + r := new(Reader) + r.reset(make([]byte, size), rd) + return r +} + +// NewReader returns a new Reader whose buffer has the default size. +func NewReader(rd io.Reader) *Reader { + return NewReaderSize(rd, defaultBufSize) +} + +// Size returns the size of the underlying buffer in bytes. +func (b *Reader) Size() int { return len(b.buf) } + +// Reset discards any buffered data, resets all state, and switches +// the buffered reader to read from r. +func (b *Reader) Reset(r io.Reader) { + b.reset(b.buf, r) +} + +func (b *Reader) reset(buf []byte, r io.Reader) { + *b = Reader{ + buf: buf, + rd: r, + lastByte: -1, + lastRuneSize: -1, + } +} + +var errNegativeRead = errors.New("bufio: reader returned negative count from Read") + +// fill reads a new chunk into the buffer. +func (b *Reader) fill() { + // Slide existing data to beginning. + if b.r > 0 { + copy(b.buf, b.buf[b.r:b.w]) + b.w -= b.r + b.r = 0 + } + + if b.w >= len(b.buf) { + panic("bufio: tried to fill full buffer") + } + + // Read new data: try a limited number of times. + for i := maxConsecutiveEmptyReads; i > 0; i-- { + n, err := b.rd.Read(b.buf[b.w:]) + if n < 0 { + panic(errNegativeRead) + } + b.w += n + if err != nil { + b.err = err + return + } + if n > 0 { + return + } + } + b.err = io.ErrNoProgress +} + +func (b *Reader) readErr() error { + err := b.err + b.err = nil + return err +} + +// Peek returns the next n bytes without advancing the reader. The bytes stop +// being valid at the next read call. If Peek returns fewer than n bytes, it +// also returns an error explaining why the read is short. The error is +// ErrBufferFull if n is larger than b's buffer size. +// +// Calling Peek prevents a UnreadByte or UnreadRune call from succeeding +// until the next read operation. +func (b *Reader) Peek(n int) ([]byte, error) { + if n < 0 { + return nil, ErrNegativeCount + } + + b.lastByte = -1 + b.lastRuneSize = -1 + + for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil { + b.fill() // b.w-b.r < len(b.buf) => buffer is not full + } + + if n > len(b.buf) { + return b.buf[b.r:b.w], ErrBufferFull + } + + // 0 <= n <= len(b.buf) + var err error + if avail := b.w - b.r; avail < n { + // not enough data in buffer + n = avail + err = b.readErr() + if err == nil { + err = ErrBufferFull + } + } + return b.buf[b.r : b.r+n], err +} + +// Discard skips the next n bytes, returning the number of bytes discarded. +// +// If Discard skips fewer than n bytes, it also returns an error. +// If 0 <= n <= b.Buffered(), Discard is guaranteed to succeed without +// reading from the underlying io.Reader. +func (b *Reader) Discard(n int) (discarded int, err error) { + if n < 0 { + return 0, ErrNegativeCount + } + if n == 0 { + return + } + remain := n + for { + skip := b.Buffered() + if skip == 0 { + b.fill() + skip = b.Buffered() + } + if skip > remain { + skip = remain + } + b.r += skip + remain -= skip + if remain == 0 { + return n, nil + } + if b.err != nil { + return n - remain, b.readErr() + } + } +} + +// Read reads data into p. +// It returns the number of bytes read into p. +// The bytes are taken from at most one Read on the underlying Reader, +// hence n may be less than len(p). +// To read exactly len(p) bytes, use io.ReadFull(b, p). +// At EOF, the count will be zero and err will be io.EOF. +func (b *Reader) Read(p []byte) (n int, err error) { + n = len(p) + if n == 0 { + if b.Buffered() > 0 { + return 0, nil + } + return 0, b.readErr() + } + if b.r == b.w { + if b.err != nil { + return 0, b.readErr() + } + if len(p) >= len(b.buf) { + // Large read, empty buffer. + // Read directly into p to avoid copy. + n, b.err = b.rd.Read(p) + if n < 0 { + panic(errNegativeRead) + } + if n > 0 { + b.lastByte = int(p[n-1]) + b.lastRuneSize = -1 + } + return n, b.readErr() + } + // One read. + // Do not use b.fill, which will loop. + b.r = 0 + b.w = 0 + n, b.err = b.rd.Read(b.buf) + if n < 0 { + panic(errNegativeRead) + } + if n == 0 { + return 0, b.readErr() + } + b.w += n + } + + // copy as much as we can + n = copy(p, b.buf[b.r:b.w]) + b.r += n + b.lastByte = int(b.buf[b.r-1]) + b.lastRuneSize = -1 + return n, nil +} + +// ReadByte reads and returns a single byte. +// If no byte is available, returns an error. +func (b *Reader) ReadByte() (byte, error) { + b.lastRuneSize = -1 + for b.r == b.w { + if b.err != nil { + return 0, b.readErr() + } + b.fill() // buffer is empty + } + c := b.buf[b.r] + b.r++ + b.lastByte = int(c) + return c, nil +} + +// UnreadByte unreads the last byte. Only the most recently read byte can be unread. +// +// UnreadByte returns an error if the most recent method called on the +// Reader was not a read operation. Notably, Peek is not considered a +// read operation. +func (b *Reader) UnreadByte() error { + if b.lastByte < 0 || b.r == 0 && b.w > 0 { + return ErrInvalidUnreadByte + } + // b.r > 0 || b.w == 0 + if b.r > 0 { + b.r-- + } else { + // b.r == 0 && b.w == 0 + b.w = 1 + } + b.buf[b.r] = byte(b.lastByte) + b.lastByte = -1 + b.lastRuneSize = -1 + return nil +} + +// ReadRune reads a single UTF-8 encoded Unicode character and returns the +// rune and its size in bytes. If the encoded rune is invalid, it consumes one byte +// and returns unicode.ReplacementChar (U+FFFD) with a size of 1. +func (b *Reader) ReadRune() (r rune, size int, err error) { + for b.r+utf8.UTFMax > b.w && !utf8.FullRune(b.buf[b.r:b.w]) && b.err == nil && b.w-b.r < len(b.buf) { + b.fill() // b.w-b.r < len(buf) => buffer is not full + } + b.lastRuneSize = -1 + if b.r == b.w { + return 0, 0, b.readErr() + } + r, size = rune(b.buf[b.r]), 1 + if r >= utf8.RuneSelf { + r, size = utf8.DecodeRune(b.buf[b.r:b.w]) + } + b.r += size + b.lastByte = int(b.buf[b.r-1]) + b.lastRuneSize = size + return r, size, nil +} + +// UnreadRune unreads the last rune. If the most recent method called on +// the Reader was not a ReadRune, UnreadRune returns an error. (In this +// regard it is stricter than UnreadByte, which will unread the last byte +// from any read operation.) +func (b *Reader) UnreadRune() error { + if b.lastRuneSize < 0 || b.r < b.lastRuneSize { + return ErrInvalidUnreadRune + } + b.r -= b.lastRuneSize + b.lastByte = -1 + b.lastRuneSize = -1 + return nil +} + +// Buffered returns the number of bytes that can be read from the current buffer. +func (b *Reader) Buffered() int { return b.w - b.r } + +// ReadSlice reads until the first occurrence of delim in the input, +// returning a slice pointing at the bytes in the buffer. +// The bytes stop being valid at the next read. +// If ReadSlice encounters an error before finding a delimiter, +// it returns all the data in the buffer and the error itself (often io.EOF). +// ReadSlice fails with error ErrBufferFull if the buffer fills without a delim. +// Because the data returned from ReadSlice will be overwritten +// by the next I/O operation, most clients should use +// ReadBytes or ReadString instead. +// ReadSlice returns err != nil if and only if line does not end in delim. +func (b *Reader) ReadSlice(delim byte) (line []byte, err error) { + s := 0 // search start index + for { + // Search buffer. + if i := bytes.IndexByte(b.buf[b.r+s:b.w], delim); i >= 0 { + i += s + line = b.buf[b.r : b.r+i+1] + b.r += i + 1 + break + } + + // Pending error? + if b.err != nil { + line = b.buf[b.r:b.w] + b.r = b.w + err = b.readErr() + break + } + + // Buffer full? + if b.Buffered() >= len(b.buf) { + b.r = b.w + line = b.buf + err = ErrBufferFull + break + } + + s = b.w - b.r // do not rescan area we scanned before + + b.fill() // buffer is not full + } + + // Handle last byte, if any. + if i := len(line) - 1; i >= 0 { + b.lastByte = int(line[i]) + b.lastRuneSize = -1 + } + + return +} + +// ReadLine is a low-level line-reading primitive. Most callers should use +// ReadBytes('\n') or ReadString('\n') instead or use a Scanner. +// +// ReadLine tries to return a single line, not including the end-of-line bytes. +// If the line was too long for the buffer then isPrefix is set and the +// beginning of the line is returned. The rest of the line will be returned +// from future calls. isPrefix will be false when returning the last fragment +// of the line. The returned buffer is only valid until the next call to +// ReadLine. ReadLine either returns a non-nil line or it returns an error, +// never both. +// +// The text returned from ReadLine does not include the line end ("\r\n" or "\n"). +// No indication or error is given if the input ends without a final line end. +// Calling UnreadByte after ReadLine will always unread the last byte read +// (possibly a character belonging to the line end) even if that byte is not +// part of the line returned by ReadLine. +func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) { + line, err = b.ReadSlice('\n') + if err == ErrBufferFull { + // Handle the case where "\r\n" straddles the buffer. + if len(line) > 0 && line[len(line)-1] == '\r' { + // Put the '\r' back on buf and drop it from line. + // Let the next call to ReadLine check for "\r\n". + if b.r == 0 { + // should be unreachable + panic("bufio: tried to rewind past start of buffer") + } + b.r-- + line = line[:len(line)-1] + } + return line, true, nil + } + + if len(line) == 0 { + if err != nil { + line = nil + } + return + } + err = nil + + if line[len(line)-1] == '\n' { + drop := 1 + if len(line) > 1 && line[len(line)-2] == '\r' { + drop = 2 + } + line = line[:len(line)-drop] + } + return +} + +// collectFragments reads until the first occurrence of delim in the input. It +// returns (slice of full buffers, remaining bytes before delim, total number +// of bytes in the combined first two elements, error). +// The complete result is equal to +// `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a +// length of `totalLen`. The result is structured in this way to allow callers +// to minimize allocations and copies. +func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) { + var frag []byte + // Use ReadSlice to look for delim, accumulating full buffers. + for { + var e error + frag, e = b.ReadSlice(delim) + if e == nil { // got final fragment + break + } + if e != ErrBufferFull { // unexpected error + err = e + break + } + + // Make a copy of the buffer. + buf := make([]byte, len(frag)) + copy(buf, frag) + fullBuffers = append(fullBuffers, buf) + totalLen += len(buf) + } + + totalLen += len(frag) + return fullBuffers, frag, totalLen, err +} + +// ReadBytes reads until the first occurrence of delim in the input, +// returning a slice containing the data up to and including the delimiter. +// If ReadBytes encounters an error before finding a delimiter, +// it returns the data read before the error and the error itself (often io.EOF). +// ReadBytes returns err != nil if and only if the returned data does not end in +// delim. +// For simple uses, a Scanner may be more convenient. +func (b *Reader) ReadBytes(delim byte) ([]byte, error) { + full, frag, n, err := b.collectFragments(delim) + // Allocate new buffer to hold the full pieces and the fragment. + buf := make([]byte, n) + n = 0 + // Copy full pieces and fragment in. + for i := range full { + n += copy(buf[n:], full[i]) + } + copy(buf[n:], frag) + return buf, err +} + +// ReadString reads until the first occurrence of delim in the input, +// returning a string containing the data up to and including the delimiter. +// If ReadString encounters an error before finding a delimiter, +// it returns the data read before the error and the error itself (often io.EOF). +// ReadString returns err != nil if and only if the returned data does not end in +// delim. +// For simple uses, a Scanner may be more convenient. +func (b *Reader) ReadString(delim byte) (string, error) { + full, frag, n, err := b.collectFragments(delim) + // Allocate new buffer to hold the full pieces and the fragment. + var buf strings.Builder + buf.Grow(n) + // Copy full pieces and fragment in. + for _, fb := range full { + buf.Write(fb) + } + buf.Write(frag) + return buf.String(), err +} + +// WriteTo implements io.WriterTo. +// This may make multiple calls to the Read method of the underlying Reader. +// If the underlying reader supports the WriteTo method, +// this calls the underlying WriteTo without buffering. +func (b *Reader) WriteTo(w io.Writer) (n int64, err error) { + n, err = b.writeBuf(w) + if err != nil { + return + } + + if r, ok := b.rd.(io.WriterTo); ok { + m, err := r.WriteTo(w) + n += m + return n, err + } + + if w, ok := w.(io.ReaderFrom); ok { + m, err := w.ReadFrom(b.rd) + n += m + return n, err + } + + if b.w-b.r < len(b.buf) { + b.fill() // buffer not full + } + + for b.r < b.w { + // b.r < b.w => buffer is not empty + m, err := b.writeBuf(w) + n += m + if err != nil { + return n, err + } + b.fill() // buffer is empty + } + + if b.err == io.EOF { + b.err = nil + } + + return n, b.readErr() +} + +var errNegativeWrite = errors.New("bufio: writer returned negative count from Write") + +// writeBuf writes the Reader's buffer to the writer. +func (b *Reader) writeBuf(w io.Writer) (int64, error) { + n, err := w.Write(b.buf[b.r:b.w]) + if n < 0 { + panic(errNegativeWrite) + } + b.r += n + return int64(n), err +} + +// buffered output + +// Writer implements buffering for an io.Writer object. +// If an error occurs writing to a Writer, no more data will be +// accepted and all subsequent writes, and Flush, will return the error. +// After all data has been written, the client should call the +// Flush method to guarantee all data has been forwarded to +// the underlying io.Writer. +type Writer struct { + err error + buf []byte + n int + wr io.Writer +} + +// NewWriterSize returns a new Writer whose buffer has at least the specified +// size. If the argument io.Writer is already a Writer with large enough +// size, it returns the underlying Writer. +func NewWriterSize(w io.Writer, size int) *Writer { + // Is it already a Writer? + b, ok := w.(*Writer) + if ok && len(b.buf) >= size { + return b + } + if size <= 0 { + size = defaultBufSize + } + return &Writer{ + buf: make([]byte, size), + wr: w, + } +} + +// NewWriter returns a new Writer whose buffer has the default size. +func NewWriter(w io.Writer) *Writer { + return NewWriterSize(w, defaultBufSize) +} + +// Size returns the size of the underlying buffer in bytes. +func (b *Writer) Size() int { return len(b.buf) } + +// Reset discards any unflushed buffered data, clears any error, and +// resets b to write its output to w. +func (b *Writer) Reset(w io.Writer) { + b.err = nil + b.n = 0 + b.wr = w +} + +// Flush writes any buffered data to the underlying io.Writer. +func (b *Writer) Flush() error { + if b.err != nil { + return b.err + } + if b.n == 0 { + return nil + } + n, err := b.wr.Write(b.buf[0:b.n]) + if n < b.n && err == nil { + err = io.ErrShortWrite + } + if err != nil { + if n > 0 && n < b.n { + copy(b.buf[0:b.n-n], b.buf[n:b.n]) + } + b.n -= n + b.err = err + return err + } + b.n = 0 + return nil +} + +// Available returns how many bytes are unused in the buffer. +func (b *Writer) Available() int { return len(b.buf) - b.n } + +// Buffered returns the number of bytes that have been written into the current buffer. +func (b *Writer) Buffered() int { return b.n } + +// Write writes the contents of p into the buffer. +// It returns the number of bytes written. +// If nn < len(p), it also returns an error explaining +// why the write is short. +func (b *Writer) Write(p []byte) (nn int, err error) { + for len(p) > b.Available() && b.err == nil { + var n int + if b.Buffered() == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, b.err = b.wr.Write(p) + } else { + n = copy(b.buf[b.n:], p) + b.n += n + b.Flush() + } + nn += n + p = p[n:] + } + if b.err != nil { + return nn, b.err + } + n := copy(b.buf[b.n:], p) + b.n += n + nn += n + return nn, nil +} + +// WriteByte writes a single byte. +func (b *Writer) WriteByte(c byte) error { + if b.err != nil { + return b.err + } + if b.Available() <= 0 && b.Flush() != nil { + return b.err + } + b.buf[b.n] = c + b.n++ + return nil +} + +// WriteRune writes a single Unicode code point, returning +// the number of bytes written and any error. +func (b *Writer) WriteRune(r rune) (size int, err error) { + if r < utf8.RuneSelf { + err = b.WriteByte(byte(r)) + if err != nil { + return 0, err + } + return 1, nil + } + if b.err != nil { + return 0, b.err + } + n := b.Available() + if n < utf8.UTFMax { + if b.Flush(); b.err != nil { + return 0, b.err + } + n = b.Available() + if n < utf8.UTFMax { + // Can only happen if buffer is silly small. + return b.WriteString(string(r)) + } + } + size = utf8.EncodeRune(b.buf[b.n:], r) + b.n += size + return size, nil +} + +// WriteString writes a string. +// It returns the number of bytes written. +// If the count is less than len(s), it also returns an error explaining +// why the write is short. +func (b *Writer) WriteString(s string) (int, error) { + nn := 0 + for len(s) > b.Available() && b.err == nil { + n := copy(b.buf[b.n:], s) + b.n += n + nn += n + s = s[n:] + b.Flush() + } + if b.err != nil { + return nn, b.err + } + n := copy(b.buf[b.n:], s) + b.n += n + nn += n + return nn, nil +} + +// ReadFrom implements io.ReaderFrom. If the underlying writer +// supports the ReadFrom method, and b has no buffered data yet, +// this calls the underlying ReadFrom without buffering. +func (b *Writer) ReadFrom(r io.Reader) (n int64, err error) { + if b.err != nil { + return 0, b.err + } + if b.Buffered() == 0 { + if w, ok := b.wr.(io.ReaderFrom); ok { + n, err = w.ReadFrom(r) + b.err = err + return n, err + } + } + var m int + for { + if b.Available() == 0 { + if err1 := b.Flush(); err1 != nil { + return n, err1 + } + } + nr := 0 + for nr < maxConsecutiveEmptyReads { + m, err = r.Read(b.buf[b.n:]) + if m != 0 || err != nil { + break + } + nr++ + } + if nr == maxConsecutiveEmptyReads { + return n, io.ErrNoProgress + } + b.n += m + n += int64(m) + if err != nil { + break + } + } + if err == io.EOF { + // If we filled the buffer exactly, flush preemptively. + if b.Available() == 0 { + err = b.Flush() + } else { + err = nil + } + } + return n, err +} + +// buffered input and output + +// ReadWriter stores pointers to a Reader and a Writer. +// It implements io.ReadWriter. +type ReadWriter struct { + *Reader + *Writer +} + +// NewReadWriter allocates a new ReadWriter that dispatches to r and w. +func NewReadWriter(r *Reader, w *Writer) *ReadWriter { + return &ReadWriter{r, w} +} diff --git a/src/bufio/bufio_test.go b/src/bufio/bufio_test.go new file mode 100644 index 0000000..d7b34bd --- /dev/null +++ b/src/bufio/bufio_test.go @@ -0,0 +1,1760 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio_test + +import ( + . "bufio" + "bytes" + "errors" + "fmt" + "io" + "strings" + "testing" + "testing/iotest" + "time" + "unicode/utf8" +) + +// Reads from a reader and rot13s the result. +type rot13Reader struct { + r io.Reader +} + +func newRot13Reader(r io.Reader) *rot13Reader { + r13 := new(rot13Reader) + r13.r = r + return r13 +} + +func (r13 *rot13Reader) Read(p []byte) (int, error) { + n, err := r13.r.Read(p) + for i := 0; i < n; i++ { + c := p[i] | 0x20 // lowercase byte + if 'a' <= c && c <= 'm' { + p[i] += 13 + } else if 'n' <= c && c <= 'z' { + p[i] -= 13 + } + } + return n, err +} + +// Call ReadByte to accumulate the text of a file +func readBytes(buf *Reader) string { + var b [1000]byte + nb := 0 + for { + c, err := buf.ReadByte() + if err == io.EOF { + break + } + if err == nil { + b[nb] = c + nb++ + } else if err != iotest.ErrTimeout { + panic("Data: " + err.Error()) + } + } + return string(b[0:nb]) +} + +func TestReaderSimple(t *testing.T) { + data := "hello world" + b := NewReader(strings.NewReader(data)) + if s := readBytes(b); s != "hello world" { + t.Errorf("simple hello world test failed: got %q", s) + } + + b = NewReader(newRot13Reader(strings.NewReader(data))) + if s := readBytes(b); s != "uryyb jbeyq" { + t.Errorf("rot13 hello world test failed: got %q", s) + } +} + +type readMaker struct { + name string + fn func(io.Reader) io.Reader +} + +var readMakers = []readMaker{ + {"full", func(r io.Reader) io.Reader { return r }}, + {"byte", iotest.OneByteReader}, + {"half", iotest.HalfReader}, + {"data+err", iotest.DataErrReader}, + {"timeout", iotest.TimeoutReader}, +} + +// Call ReadString (which ends up calling everything else) +// to accumulate the text of a file. +func readLines(b *Reader) string { + s := "" + for { + s1, err := b.ReadString('\n') + if err == io.EOF { + break + } + if err != nil && err != iotest.ErrTimeout { + panic("GetLines: " + err.Error()) + } + s += s1 + } + return s +} + +// Call Read to accumulate the text of a file +func reads(buf *Reader, m int) string { + var b [1000]byte + nb := 0 + for { + n, err := buf.Read(b[nb : nb+m]) + nb += n + if err == io.EOF { + break + } + } + return string(b[0:nb]) +} + +type bufReader struct { + name string + fn func(*Reader) string +} + +var bufreaders = []bufReader{ + {"1", func(b *Reader) string { return reads(b, 1) }}, + {"2", func(b *Reader) string { return reads(b, 2) }}, + {"3", func(b *Reader) string { return reads(b, 3) }}, + {"4", func(b *Reader) string { return reads(b, 4) }}, + {"5", func(b *Reader) string { return reads(b, 5) }}, + {"7", func(b *Reader) string { return reads(b, 7) }}, + {"bytes", readBytes}, + {"lines", readLines}, +} + +const minReadBufferSize = 16 + +var bufsizes = []int{ + 0, minReadBufferSize, 23, 32, 46, 64, 93, 128, 1024, 4096, +} + +func TestReader(t *testing.T) { + var texts [31]string + str := "" + all := "" + for i := 0; i < len(texts)-1; i++ { + texts[i] = str + "\n" + all += texts[i] + str += string(rune(i%26 + 'a')) + } + texts[len(texts)-1] = all + + for h := 0; h < len(texts); h++ { + text := texts[h] + for i := 0; i < len(readMakers); i++ { + for j := 0; j < len(bufreaders); j++ { + for k := 0; k < len(bufsizes); k++ { + readmaker := readMakers[i] + bufreader := bufreaders[j] + bufsize := bufsizes[k] + read := readmaker.fn(strings.NewReader(text)) + buf := NewReaderSize(read, bufsize) + s := bufreader.fn(buf) + if s != text { + t.Errorf("reader=%s fn=%s bufsize=%d want=%q got=%q", + readmaker.name, bufreader.name, bufsize, text, s) + } + } + } + } + } +} + +type zeroReader struct{} + +func (zeroReader) Read(p []byte) (int, error) { + return 0, nil +} + +func TestZeroReader(t *testing.T) { + var z zeroReader + r := NewReader(z) + + c := make(chan error) + go func() { + _, err := r.ReadByte() + c <- err + }() + + select { + case err := <-c: + if err == nil { + t.Error("error expected") + } else if err != io.ErrNoProgress { + t.Error("unexpected error:", err) + } + case <-time.After(time.Second): + t.Error("test timed out (endless loop in ReadByte?)") + } +} + +// A StringReader delivers its data one string segment at a time via Read. +type StringReader struct { + data []string + step int +} + +func (r *StringReader) Read(p []byte) (n int, err error) { + if r.step < len(r.data) { + s := r.data[r.step] + n = copy(p, s) + r.step++ + } else { + err = io.EOF + } + return +} + +func readRuneSegments(t *testing.T, segments []string) { + got := "" + want := strings.Join(segments, "") + r := NewReader(&StringReader{data: segments}) + for { + r, _, err := r.ReadRune() + if err != nil { + if err != io.EOF { + return + } + break + } + got += string(r) + } + if got != want { + t.Errorf("segments=%v got=%s want=%s", segments, got, want) + } +} + +var segmentList = [][]string{ + {}, + {""}, + {"日", "本語"}, + {"\u65e5", "\u672c", "\u8a9e"}, + {"\U000065e5", "\U0000672c", "\U00008a9e"}, + {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"}, + {"Hello", ", ", "World", "!"}, + {"Hello", ", ", "", "World", "!"}, +} + +func TestReadRune(t *testing.T) { + for _, s := range segmentList { + readRuneSegments(t, s) + } +} + +func TestUnreadRune(t *testing.T) { + segments := []string{"Hello, world:", "日本語"} + r := NewReader(&StringReader{data: segments}) + got := "" + want := strings.Join(segments, "") + // Normal execution. + for { + r1, _, err := r.ReadRune() + if err != nil { + if err != io.EOF { + t.Error("unexpected error on ReadRune:", err) + } + break + } + got += string(r1) + // Put it back and read it again. + if err = r.UnreadRune(); err != nil { + t.Fatal("unexpected error on UnreadRune:", err) + } + r2, _, err := r.ReadRune() + if err != nil { + t.Fatal("unexpected error reading after unreading:", err) + } + if r1 != r2 { + t.Fatalf("incorrect rune after unread: got %c, want %c", r1, r2) + } + } + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestNoUnreadRuneAfterPeek(t *testing.T) { + br := NewReader(strings.NewReader("example")) + br.ReadRune() + br.Peek(1) + if err := br.UnreadRune(); err == nil { + t.Error("UnreadRune didn't fail after Peek") + } +} + +func TestNoUnreadByteAfterPeek(t *testing.T) { + br := NewReader(strings.NewReader("example")) + br.ReadByte() + br.Peek(1) + if err := br.UnreadByte(); err == nil { + t.Error("UnreadByte didn't fail after Peek") + } +} + +func TestUnreadByte(t *testing.T) { + segments := []string{"Hello, ", "world"} + r := NewReader(&StringReader{data: segments}) + got := "" + want := strings.Join(segments, "") + // Normal execution. + for { + b1, err := r.ReadByte() + if err != nil { + if err != io.EOF { + t.Error("unexpected error on ReadByte:", err) + } + break + } + got += string(b1) + // Put it back and read it again. + if err = r.UnreadByte(); err != nil { + t.Fatal("unexpected error on UnreadByte:", err) + } + b2, err := r.ReadByte() + if err != nil { + t.Fatal("unexpected error reading after unreading:", err) + } + if b1 != b2 { + t.Fatalf("incorrect byte after unread: got %q, want %q", b1, b2) + } + } + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestUnreadByteMultiple(t *testing.T) { + segments := []string{"Hello, ", "world"} + data := strings.Join(segments, "") + for n := 0; n <= len(data); n++ { + r := NewReader(&StringReader{data: segments}) + // Read n bytes. + for i := 0; i < n; i++ { + b, err := r.ReadByte() + if err != nil { + t.Fatalf("n = %d: unexpected error on ReadByte: %v", n, err) + } + if b != data[i] { + t.Fatalf("n = %d: incorrect byte returned from ReadByte: got %q, want %q", n, b, data[i]) + } + } + // Unread one byte if there is one. + if n > 0 { + if err := r.UnreadByte(); err != nil { + t.Errorf("n = %d: unexpected error on UnreadByte: %v", n, err) + } + } + // Test that we cannot unread any further. + if err := r.UnreadByte(); err == nil { + t.Errorf("n = %d: expected error on UnreadByte", n) + } + } +} + +func TestUnreadByteOthers(t *testing.T) { + // A list of readers to use in conjunction with UnreadByte. + var readers = []func(*Reader, byte) ([]byte, error){ + (*Reader).ReadBytes, + (*Reader).ReadSlice, + func(r *Reader, delim byte) ([]byte, error) { + data, err := r.ReadString(delim) + return []byte(data), err + }, + // ReadLine doesn't fit the data/pattern easily + // so we leave it out. It should be covered via + // the ReadSlice test since ReadLine simply calls + // ReadSlice, and it's that function that handles + // the last byte. + } + + // Try all readers with UnreadByte. + for rno, read := range readers { + // Some input data that is longer than the minimum reader buffer size. + const n = 10 + var buf bytes.Buffer + for i := 0; i < n; i++ { + buf.WriteString("abcdefg") + } + + r := NewReaderSize(&buf, minReadBufferSize) + readTo := func(delim byte, want string) { + data, err := read(r, delim) + if err != nil { + t.Fatalf("#%d: unexpected error reading to %c: %v", rno, delim, err) + } + if got := string(data); got != want { + t.Fatalf("#%d: got %q, want %q", rno, got, want) + } + } + + // Read the data with occasional UnreadByte calls. + for i := 0; i < n; i++ { + readTo('d', "abcd") + for j := 0; j < 3; j++ { + if err := r.UnreadByte(); err != nil { + t.Fatalf("#%d: unexpected error on UnreadByte: %v", rno, err) + } + readTo('d', "d") + } + readTo('g', "efg") + } + + // All data should have been read. + _, err := r.ReadByte() + if err != io.EOF { + t.Errorf("#%d: got error %v; want EOF", rno, err) + } + } +} + +// Test that UnreadRune fails if the preceding operation was not a ReadRune. +func TestUnreadRuneError(t *testing.T) { + buf := make([]byte, 3) // All runes in this test are 3 bytes long + r := NewReader(&StringReader{data: []string{"日本語日本語日本語"}}) + if r.UnreadRune() == nil { + t.Error("expected error on UnreadRune from fresh buffer") + } + _, _, err := r.ReadRune() + if err != nil { + t.Error("unexpected error on ReadRune (1):", err) + } + if err = r.UnreadRune(); err != nil { + t.Error("unexpected error on UnreadRune (1):", err) + } + if r.UnreadRune() == nil { + t.Error("expected error after UnreadRune (1)") + } + // Test error after Read. + _, _, err = r.ReadRune() // reset state + if err != nil { + t.Error("unexpected error on ReadRune (2):", err) + } + _, err = r.Read(buf) + if err != nil { + t.Error("unexpected error on Read (2):", err) + } + if r.UnreadRune() == nil { + t.Error("expected error after Read (2)") + } + // Test error after ReadByte. + _, _, err = r.ReadRune() // reset state + if err != nil { + t.Error("unexpected error on ReadRune (2):", err) + } + for range buf { + _, err = r.ReadByte() + if err != nil { + t.Error("unexpected error on ReadByte (2):", err) + } + } + if r.UnreadRune() == nil { + t.Error("expected error after ReadByte") + } + // Test error after UnreadByte. + _, _, err = r.ReadRune() // reset state + if err != nil { + t.Error("unexpected error on ReadRune (3):", err) + } + _, err = r.ReadByte() + if err != nil { + t.Error("unexpected error on ReadByte (3):", err) + } + err = r.UnreadByte() + if err != nil { + t.Error("unexpected error on UnreadByte (3):", err) + } + if r.UnreadRune() == nil { + t.Error("expected error after UnreadByte (3)") + } + // Test error after ReadSlice. + _, _, err = r.ReadRune() // reset state + if err != nil { + t.Error("unexpected error on ReadRune (4):", err) + } + _, err = r.ReadSlice(0) + if err != io.EOF { + t.Error("unexpected error on ReadSlice (4):", err) + } + if r.UnreadRune() == nil { + t.Error("expected error after ReadSlice (4)") + } +} + +func TestUnreadRuneAtEOF(t *testing.T) { + // UnreadRune/ReadRune should error at EOF (was a bug; used to panic) + r := NewReader(strings.NewReader("x")) + r.ReadRune() + r.ReadRune() + r.UnreadRune() + _, _, err := r.ReadRune() + if err == nil { + t.Error("expected error at EOF") + } else if err != io.EOF { + t.Error("expected EOF; got", err) + } +} + +func TestReadWriteRune(t *testing.T) { + const NRune = 1000 + byteBuf := new(bytes.Buffer) + w := NewWriter(byteBuf) + // Write the runes out using WriteRune + buf := make([]byte, utf8.UTFMax) + for r := rune(0); r < NRune; r++ { + size := utf8.EncodeRune(buf, r) + nbytes, err := w.WriteRune(r) + if err != nil { + t.Fatalf("WriteRune(0x%x) error: %s", r, err) + } + if nbytes != size { + t.Fatalf("WriteRune(0x%x) expected %d, got %d", r, size, nbytes) + } + } + w.Flush() + + r := NewReader(byteBuf) + // Read them back with ReadRune + for r1 := rune(0); r1 < NRune; r1++ { + size := utf8.EncodeRune(buf, r1) + nr, nbytes, err := r.ReadRune() + if nr != r1 || nbytes != size || err != nil { + t.Fatalf("ReadRune(0x%x) got 0x%x,%d not 0x%x,%d (err=%s)", r1, nr, nbytes, r1, size, err) + } + } +} + +func TestReadStringAllocs(t *testing.T) { + r := strings.NewReader(" foo foo 42 42 42 42 42 42 42 42 4.2 4.2 4.2 4.2\n") + buf := NewReader(r) + allocs := testing.AllocsPerRun(100, func() { + r.Seek(0, io.SeekStart) + buf.Reset(r) + + _, err := buf.ReadString('\n') + if err != nil { + t.Fatal(err) + } + }) + if allocs != 1 { + t.Errorf("Unexpected number of allocations, got %f, want 1", allocs) + } +} + +func TestWriter(t *testing.T) { + var data [8192]byte + + for i := 0; i < len(data); i++ { + data[i] = byte(' ' + i%('~'-' ')) + } + w := new(bytes.Buffer) + for i := 0; i < len(bufsizes); i++ { + for j := 0; j < len(bufsizes); j++ { + nwrite := bufsizes[i] + bs := bufsizes[j] + + // Write nwrite bytes using buffer size bs. + // Check that the right amount makes it out + // and that the data is correct. + + w.Reset() + buf := NewWriterSize(w, bs) + context := fmt.Sprintf("nwrite=%d bufsize=%d", nwrite, bs) + n, e1 := buf.Write(data[0:nwrite]) + if e1 != nil || n != nwrite { + t.Errorf("%s: buf.Write %d = %d, %v", context, nwrite, n, e1) + continue + } + if e := buf.Flush(); e != nil { + t.Errorf("%s: buf.Flush = %v", context, e) + } + + written := w.Bytes() + if len(written) != nwrite { + t.Errorf("%s: %d bytes written", context, len(written)) + } + for l := 0; l < len(written); l++ { + if written[l] != data[l] { + t.Errorf("wrong bytes written") + t.Errorf("want=%q", data[0:len(written)]) + t.Errorf("have=%q", written) + } + } + } + } +} + +// Check that write errors are returned properly. + +type errorWriterTest struct { + n, m int + err error + expect error +} + +func (w errorWriterTest) Write(p []byte) (int, error) { + return len(p) * w.n / w.m, w.err +} + +var errorWriterTests = []errorWriterTest{ + {0, 1, nil, io.ErrShortWrite}, + {1, 2, nil, io.ErrShortWrite}, + {1, 1, nil, nil}, + {0, 1, io.ErrClosedPipe, io.ErrClosedPipe}, + {1, 2, io.ErrClosedPipe, io.ErrClosedPipe}, + {1, 1, io.ErrClosedPipe, io.ErrClosedPipe}, +} + +func TestWriteErrors(t *testing.T) { + for _, w := range errorWriterTests { + buf := NewWriter(w) + _, e := buf.Write([]byte("hello world")) + if e != nil { + t.Errorf("Write hello to %v: %v", w, e) + continue + } + // Two flushes, to verify the error is sticky. + for i := 0; i < 2; i++ { + e = buf.Flush() + if e != w.expect { + t.Errorf("Flush %d/2 %v: got %v, wanted %v", i+1, w, e, w.expect) + } + } + } +} + +func TestNewReaderSizeIdempotent(t *testing.T) { + const BufSize = 1000 + b := NewReaderSize(strings.NewReader("hello world"), BufSize) + // Does it recognize itself? + b1 := NewReaderSize(b, BufSize) + if b1 != b { + t.Error("NewReaderSize did not detect underlying Reader") + } + // Does it wrap if existing buffer is too small? + b2 := NewReaderSize(b, 2*BufSize) + if b2 == b { + t.Error("NewReaderSize did not enlarge buffer") + } +} + +func TestNewWriterSizeIdempotent(t *testing.T) { + const BufSize = 1000 + b := NewWriterSize(new(bytes.Buffer), BufSize) + // Does it recognize itself? + b1 := NewWriterSize(b, BufSize) + if b1 != b { + t.Error("NewWriterSize did not detect underlying Writer") + } + // Does it wrap if existing buffer is too small? + b2 := NewWriterSize(b, 2*BufSize) + if b2 == b { + t.Error("NewWriterSize did not enlarge buffer") + } +} + +func TestWriteString(t *testing.T) { + const BufSize = 8 + buf := new(bytes.Buffer) + b := NewWriterSize(buf, BufSize) + b.WriteString("0") // easy + b.WriteString("123456") // still easy + b.WriteString("7890") // easy after flush + b.WriteString("abcdefghijklmnopqrstuvwxy") // hard + b.WriteString("z") + if err := b.Flush(); err != nil { + t.Error("WriteString", err) + } + s := "01234567890abcdefghijklmnopqrstuvwxyz" + if string(buf.Bytes()) != s { + t.Errorf("WriteString wants %q gets %q", s, string(buf.Bytes())) + } +} + +func TestBufferFull(t *testing.T) { + const longString = "And now, hello, world! It is the time for all good men to come to the aid of their party" + buf := NewReaderSize(strings.NewReader(longString), minReadBufferSize) + line, err := buf.ReadSlice('!') + if string(line) != "And now, hello, " || err != ErrBufferFull { + t.Errorf("first ReadSlice(,) = %q, %v", line, err) + } + line, err = buf.ReadSlice('!') + if string(line) != "world!" || err != nil { + t.Errorf("second ReadSlice(,) = %q, %v", line, err) + } +} + +func TestPeek(t *testing.T) { + p := make([]byte, 10) + // string is 16 (minReadBufferSize) long. + buf := NewReaderSize(strings.NewReader("abcdefghijklmnop"), minReadBufferSize) + if s, err := buf.Peek(1); string(s) != "a" || err != nil { + t.Fatalf("want %q got %q, err=%v", "a", string(s), err) + } + if s, err := buf.Peek(4); string(s) != "abcd" || err != nil { + t.Fatalf("want %q got %q, err=%v", "abcd", string(s), err) + } + if _, err := buf.Peek(-1); err != ErrNegativeCount { + t.Fatalf("want ErrNegativeCount got %v", err) + } + if s, err := buf.Peek(32); string(s) != "abcdefghijklmnop" || err != ErrBufferFull { + t.Fatalf("want %q, ErrBufFull got %q, err=%v", "abcdefghijklmnop", string(s), err) + } + if _, err := buf.Read(p[0:3]); string(p[0:3]) != "abc" || err != nil { + t.Fatalf("want %q got %q, err=%v", "abc", string(p[0:3]), err) + } + if s, err := buf.Peek(1); string(s) != "d" || err != nil { + t.Fatalf("want %q got %q, err=%v", "d", string(s), err) + } + if s, err := buf.Peek(2); string(s) != "de" || err != nil { + t.Fatalf("want %q got %q, err=%v", "de", string(s), err) + } + if _, err := buf.Read(p[0:3]); string(p[0:3]) != "def" || err != nil { + t.Fatalf("want %q got %q, err=%v", "def", string(p[0:3]), err) + } + if s, err := buf.Peek(4); string(s) != "ghij" || err != nil { + t.Fatalf("want %q got %q, err=%v", "ghij", string(s), err) + } + if _, err := buf.Read(p[0:]); string(p[0:]) != "ghijklmnop" || err != nil { + t.Fatalf("want %q got %q, err=%v", "ghijklmnop", string(p[0:minReadBufferSize]), err) + } + if s, err := buf.Peek(0); string(s) != "" || err != nil { + t.Fatalf("want %q got %q, err=%v", "", string(s), err) + } + if _, err := buf.Peek(1); err != io.EOF { + t.Fatalf("want EOF got %v", err) + } + + // Test for issue 3022, not exposing a reader's error on a successful Peek. + buf = NewReaderSize(dataAndEOFReader("abcd"), 32) + if s, err := buf.Peek(2); string(s) != "ab" || err != nil { + t.Errorf(`Peek(2) on "abcd", EOF = %q, %v; want "ab", nil`, string(s), err) + } + if s, err := buf.Peek(4); string(s) != "abcd" || err != nil { + t.Errorf(`Peek(4) on "abcd", EOF = %q, %v; want "abcd", nil`, string(s), err) + } + if n, err := buf.Read(p[0:5]); string(p[0:n]) != "abcd" || err != nil { + t.Fatalf("Read after peek = %q, %v; want abcd, EOF", p[0:n], err) + } + if n, err := buf.Read(p[0:1]); string(p[0:n]) != "" || err != io.EOF { + t.Fatalf(`second Read after peek = %q, %v; want "", EOF`, p[0:n], err) + } +} + +type dataAndEOFReader string + +func (r dataAndEOFReader) Read(p []byte) (int, error) { + return copy(p, r), io.EOF +} + +func TestPeekThenUnreadRune(t *testing.T) { + // This sequence used to cause a crash. + r := NewReader(strings.NewReader("x")) + r.ReadRune() + r.Peek(1) + r.UnreadRune() + r.ReadRune() // Used to panic here +} + +var testOutput = []byte("0123456789abcdefghijklmnopqrstuvwxy") +var testInput = []byte("012\n345\n678\n9ab\ncde\nfgh\nijk\nlmn\nopq\nrst\nuvw\nxy") +var testInputrn = []byte("012\r\n345\r\n678\r\n9ab\r\ncde\r\nfgh\r\nijk\r\nlmn\r\nopq\r\nrst\r\nuvw\r\nxy\r\n\n\r\n") + +// TestReader wraps a []byte and returns reads of a specific length. +type testReader struct { + data []byte + stride int +} + +func (t *testReader) Read(buf []byte) (n int, err error) { + n = t.stride + if n > len(t.data) { + n = len(t.data) + } + if n > len(buf) { + n = len(buf) + } + copy(buf, t.data) + t.data = t.data[n:] + if len(t.data) == 0 { + err = io.EOF + } + return +} + +func testReadLine(t *testing.T, input []byte) { + //for stride := 1; stride < len(input); stride++ { + for stride := 1; stride < 2; stride++ { + done := 0 + reader := testReader{input, stride} + l := NewReaderSize(&reader, len(input)+1) + for { + line, isPrefix, err := l.ReadLine() + if len(line) > 0 && err != nil { + t.Errorf("ReadLine returned both data and error: %s", err) + } + if isPrefix { + t.Errorf("ReadLine returned prefix") + } + if err != nil { + if err != io.EOF { + t.Fatalf("Got unknown error: %s", err) + } + break + } + if want := testOutput[done : done+len(line)]; !bytes.Equal(want, line) { + t.Errorf("Bad line at stride %d: want: %x got: %x", stride, want, line) + } + done += len(line) + } + if done != len(testOutput) { + t.Errorf("ReadLine didn't return everything: got: %d, want: %d (stride: %d)", done, len(testOutput), stride) + } + } +} + +func TestReadLine(t *testing.T) { + testReadLine(t, testInput) + testReadLine(t, testInputrn) +} + +func TestLineTooLong(t *testing.T) { + data := make([]byte, 0) + for i := 0; i < minReadBufferSize*5/2; i++ { + data = append(data, '0'+byte(i%10)) + } + buf := bytes.NewReader(data) + l := NewReaderSize(buf, minReadBufferSize) + line, isPrefix, err := l.ReadLine() + if !isPrefix || !bytes.Equal(line, data[:minReadBufferSize]) || err != nil { + t.Errorf("bad result for first line: got %q want %q %v", line, data[:minReadBufferSize], err) + } + data = data[len(line):] + line, isPrefix, err = l.ReadLine() + if !isPrefix || !bytes.Equal(line, data[:minReadBufferSize]) || err != nil { + t.Errorf("bad result for second line: got %q want %q %v", line, data[:minReadBufferSize], err) + } + data = data[len(line):] + line, isPrefix, err = l.ReadLine() + if isPrefix || !bytes.Equal(line, data[:minReadBufferSize/2]) || err != nil { + t.Errorf("bad result for third line: got %q want %q %v", line, data[:minReadBufferSize/2], err) + } + line, isPrefix, err = l.ReadLine() + if isPrefix || err == nil { + t.Errorf("expected no more lines: %x %s", line, err) + } +} + +func TestReadAfterLines(t *testing.T) { + line1 := "this is line1" + restData := "this is line2\nthis is line 3\n" + inbuf := bytes.NewReader([]byte(line1 + "\n" + restData)) + outbuf := new(bytes.Buffer) + maxLineLength := len(line1) + len(restData)/2 + l := NewReaderSize(inbuf, maxLineLength) + line, isPrefix, err := l.ReadLine() + if isPrefix || err != nil || string(line) != line1 { + t.Errorf("bad result for first line: isPrefix=%v err=%v line=%q", isPrefix, err, string(line)) + } + n, err := io.Copy(outbuf, l) + if int(n) != len(restData) || err != nil { + t.Errorf("bad result for Read: n=%d err=%v", n, err) + } + if outbuf.String() != restData { + t.Errorf("bad result for Read: got %q; expected %q", outbuf.String(), restData) + } +} + +func TestReadEmptyBuffer(t *testing.T) { + l := NewReaderSize(new(bytes.Buffer), minReadBufferSize) + line, isPrefix, err := l.ReadLine() + if err != io.EOF { + t.Errorf("expected EOF from ReadLine, got '%s' %t %s", line, isPrefix, err) + } +} + +func TestLinesAfterRead(t *testing.T) { + l := NewReaderSize(bytes.NewReader([]byte("foo")), minReadBufferSize) + _, err := io.ReadAll(l) + if err != nil { + t.Error(err) + return + } + + line, isPrefix, err := l.ReadLine() + if err != io.EOF { + t.Errorf("expected EOF from ReadLine, got '%s' %t %s", line, isPrefix, err) + } +} + +func TestReadLineNonNilLineOrError(t *testing.T) { + r := NewReader(strings.NewReader("line 1\n")) + for i := 0; i < 2; i++ { + l, _, err := r.ReadLine() + if l != nil && err != nil { + t.Fatalf("on line %d/2; ReadLine=%#v, %v; want non-nil line or Error, but not both", + i+1, l, err) + } + } +} + +type readLineResult struct { + line []byte + isPrefix bool + err error +} + +var readLineNewlinesTests = []struct { + input string + expect []readLineResult +}{ + {"012345678901234\r\n012345678901234\r\n", []readLineResult{ + {[]byte("012345678901234"), true, nil}, + {nil, false, nil}, + {[]byte("012345678901234"), true, nil}, + {nil, false, nil}, + {nil, false, io.EOF}, + }}, + {"0123456789012345\r012345678901234\r", []readLineResult{ + {[]byte("0123456789012345"), true, nil}, + {[]byte("\r012345678901234"), true, nil}, + {[]byte("\r"), false, nil}, + {nil, false, io.EOF}, + }}, +} + +func TestReadLineNewlines(t *testing.T) { + for _, e := range readLineNewlinesTests { + testReadLineNewlines(t, e.input, e.expect) + } +} + +func testReadLineNewlines(t *testing.T, input string, expect []readLineResult) { + b := NewReaderSize(strings.NewReader(input), minReadBufferSize) + for i, e := range expect { + line, isPrefix, err := b.ReadLine() + if !bytes.Equal(line, e.line) { + t.Errorf("%q call %d, line == %q, want %q", input, i, line, e.line) + return + } + if isPrefix != e.isPrefix { + t.Errorf("%q call %d, isPrefix == %v, want %v", input, i, isPrefix, e.isPrefix) + return + } + if err != e.err { + t.Errorf("%q call %d, err == %v, want %v", input, i, err, e.err) + return + } + } +} + +func createTestInput(n int) []byte { + input := make([]byte, n) + for i := range input { + // 101 and 251 are arbitrary prime numbers. + // The idea is to create an input sequence + // which doesn't repeat too frequently. + input[i] = byte(i % 251) + if i%101 == 0 { + input[i] ^= byte(i / 101) + } + } + return input +} + +func TestReaderWriteTo(t *testing.T) { + input := createTestInput(8192) + r := NewReader(onlyReader{bytes.NewReader(input)}) + w := new(bytes.Buffer) + if n, err := r.WriteTo(w); err != nil || n != int64(len(input)) { + t.Fatalf("r.WriteTo(w) = %d, %v, want %d, nil", n, err, len(input)) + } + + for i, val := range w.Bytes() { + if val != input[i] { + t.Errorf("after write: out[%d] = %#x, want %#x", i, val, input[i]) + } + } +} + +type errorWriterToTest struct { + rn, wn int + rerr, werr error + expected error +} + +func (r errorWriterToTest) Read(p []byte) (int, error) { + return len(p) * r.rn, r.rerr +} + +func (w errorWriterToTest) Write(p []byte) (int, error) { + return len(p) * w.wn, w.werr +} + +var errorWriterToTests = []errorWriterToTest{ + {1, 0, nil, io.ErrClosedPipe, io.ErrClosedPipe}, + {0, 1, io.ErrClosedPipe, nil, io.ErrClosedPipe}, + {0, 0, io.ErrUnexpectedEOF, io.ErrClosedPipe, io.ErrClosedPipe}, + {0, 1, io.EOF, nil, nil}, +} + +func TestReaderWriteToErrors(t *testing.T) { + for i, rw := range errorWriterToTests { + r := NewReader(rw) + if _, err := r.WriteTo(rw); err != rw.expected { + t.Errorf("r.WriteTo(errorWriterToTests[%d]) = _, %v, want _,%v", i, err, rw.expected) + } + } +} + +func TestWriterReadFrom(t *testing.T) { + ws := []func(io.Writer) io.Writer{ + func(w io.Writer) io.Writer { return onlyWriter{w} }, + func(w io.Writer) io.Writer { return w }, + } + + rs := []func(io.Reader) io.Reader{ + iotest.DataErrReader, + func(r io.Reader) io.Reader { return r }, + } + + for ri, rfunc := range rs { + for wi, wfunc := range ws { + input := createTestInput(8192) + b := new(bytes.Buffer) + w := NewWriter(wfunc(b)) + r := rfunc(bytes.NewReader(input)) + if n, err := w.ReadFrom(r); err != nil || n != int64(len(input)) { + t.Errorf("ws[%d],rs[%d]: w.ReadFrom(r) = %d, %v, want %d, nil", wi, ri, n, err, len(input)) + continue + } + if err := w.Flush(); err != nil { + t.Errorf("Flush returned %v", err) + continue + } + if got, want := b.String(), string(input); got != want { + t.Errorf("ws[%d], rs[%d]:\ngot %q\nwant %q\n", wi, ri, got, want) + } + } + } +} + +type errorReaderFromTest struct { + rn, wn int + rerr, werr error + expected error +} + +func (r errorReaderFromTest) Read(p []byte) (int, error) { + return len(p) * r.rn, r.rerr +} + +func (w errorReaderFromTest) Write(p []byte) (int, error) { + return len(p) * w.wn, w.werr +} + +var errorReaderFromTests = []errorReaderFromTest{ + {0, 1, io.EOF, nil, nil}, + {1, 1, io.EOF, nil, nil}, + {0, 1, io.ErrClosedPipe, nil, io.ErrClosedPipe}, + {0, 0, io.ErrClosedPipe, io.ErrShortWrite, io.ErrClosedPipe}, + {1, 0, nil, io.ErrShortWrite, io.ErrShortWrite}, +} + +func TestWriterReadFromErrors(t *testing.T) { + for i, rw := range errorReaderFromTests { + w := NewWriter(rw) + if _, err := w.ReadFrom(rw); err != rw.expected { + t.Errorf("w.ReadFrom(errorReaderFromTests[%d]) = _, %v, want _,%v", i, err, rw.expected) + } + } +} + +// TestWriterReadFromCounts tests that using io.Copy to copy into a +// bufio.Writer does not prematurely flush the buffer. For example, when +// buffering writes to a network socket, excessive network writes should be +// avoided. +func TestWriterReadFromCounts(t *testing.T) { + var w0 writeCountingDiscard + b0 := NewWriterSize(&w0, 1234) + b0.WriteString(strings.Repeat("x", 1000)) + if w0 != 0 { + t.Fatalf("write 1000 'x's: got %d writes, want 0", w0) + } + b0.WriteString(strings.Repeat("x", 200)) + if w0 != 0 { + t.Fatalf("write 1200 'x's: got %d writes, want 0", w0) + } + io.Copy(b0, onlyReader{strings.NewReader(strings.Repeat("x", 30))}) + if w0 != 0 { + t.Fatalf("write 1230 'x's: got %d writes, want 0", w0) + } + io.Copy(b0, onlyReader{strings.NewReader(strings.Repeat("x", 9))}) + if w0 != 1 { + t.Fatalf("write 1239 'x's: got %d writes, want 1", w0) + } + + var w1 writeCountingDiscard + b1 := NewWriterSize(&w1, 1234) + b1.WriteString(strings.Repeat("x", 1200)) + b1.Flush() + if w1 != 1 { + t.Fatalf("flush 1200 'x's: got %d writes, want 1", w1) + } + b1.WriteString(strings.Repeat("x", 89)) + if w1 != 1 { + t.Fatalf("write 1200 + 89 'x's: got %d writes, want 1", w1) + } + io.Copy(b1, onlyReader{strings.NewReader(strings.Repeat("x", 700))}) + if w1 != 1 { + t.Fatalf("write 1200 + 789 'x's: got %d writes, want 1", w1) + } + io.Copy(b1, onlyReader{strings.NewReader(strings.Repeat("x", 600))}) + if w1 != 2 { + t.Fatalf("write 1200 + 1389 'x's: got %d writes, want 2", w1) + } + b1.Flush() + if w1 != 3 { + t.Fatalf("flush 1200 + 1389 'x's: got %d writes, want 3", w1) + } +} + +// A writeCountingDiscard is like io.Discard and counts the number of times +// Write is called on it. +type writeCountingDiscard int + +func (w *writeCountingDiscard) Write(p []byte) (int, error) { + *w++ + return len(p), nil +} + +type negativeReader int + +func (r *negativeReader) Read([]byte) (int, error) { return -1, nil } + +func TestNegativeRead(t *testing.T) { + // should panic with a description pointing at the reader, not at itself. + // (should NOT panic with slice index error, for example.) + b := NewReader(new(negativeReader)) + defer func() { + switch err := recover().(type) { + case nil: + t.Fatal("read did not panic") + case error: + if !strings.Contains(err.Error(), "reader returned negative count from Read") { + t.Fatalf("wrong panic: %v", err) + } + default: + t.Fatalf("unexpected panic value: %T(%v)", err, err) + } + }() + b.Read(make([]byte, 100)) +} + +var errFake = errors.New("fake error") + +type errorThenGoodReader struct { + didErr bool + nread int +} + +func (r *errorThenGoodReader) Read(p []byte) (int, error) { + r.nread++ + if !r.didErr { + r.didErr = true + return 0, errFake + } + return len(p), nil +} + +func TestReaderClearError(t *testing.T) { + r := &errorThenGoodReader{} + b := NewReader(r) + buf := make([]byte, 1) + if _, err := b.Read(nil); err != nil { + t.Fatalf("1st nil Read = %v; want nil", err) + } + if _, err := b.Read(buf); err != errFake { + t.Fatalf("1st Read = %v; want errFake", err) + } + if _, err := b.Read(nil); err != nil { + t.Fatalf("2nd nil Read = %v; want nil", err) + } + if _, err := b.Read(buf); err != nil { + t.Fatalf("3rd Read with buffer = %v; want nil", err) + } + if r.nread != 2 { + t.Errorf("num reads = %d; want 2", r.nread) + } +} + +// Test for golang.org/issue/5947 +func TestWriterReadFromWhileFull(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriterSize(buf, 10) + + // Fill buffer exactly. + n, err := w.Write([]byte("0123456789")) + if n != 10 || err != nil { + t.Fatalf("Write returned (%v, %v), want (10, nil)", n, err) + } + + // Use ReadFrom to read in some data. + n2, err := w.ReadFrom(strings.NewReader("abcdef")) + if n2 != 6 || err != nil { + t.Fatalf("ReadFrom returned (%v, %v), want (6, nil)", n2, err) + } +} + +type emptyThenNonEmptyReader struct { + r io.Reader + n int +} + +func (r *emptyThenNonEmptyReader) Read(p []byte) (int, error) { + if r.n <= 0 { + return r.r.Read(p) + } + r.n-- + return 0, nil +} + +// Test for golang.org/issue/7611 +func TestWriterReadFromUntilEOF(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriterSize(buf, 5) + + // Partially fill buffer + n, err := w.Write([]byte("0123")) + if n != 4 || err != nil { + t.Fatalf("Write returned (%v, %v), want (4, nil)", n, err) + } + + // Use ReadFrom to read in some data. + r := &emptyThenNonEmptyReader{r: strings.NewReader("abcd"), n: 3} + n2, err := w.ReadFrom(r) + if n2 != 4 || err != nil { + t.Fatalf("ReadFrom returned (%v, %v), want (4, nil)", n2, err) + } + w.Flush() + if got, want := string(buf.Bytes()), "0123abcd"; got != want { + t.Fatalf("buf.Bytes() returned %q, want %q", got, want) + } +} + +func TestWriterReadFromErrNoProgress(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriterSize(buf, 5) + + // Partially fill buffer + n, err := w.Write([]byte("0123")) + if n != 4 || err != nil { + t.Fatalf("Write returned (%v, %v), want (4, nil)", n, err) + } + + // Use ReadFrom to read in some data. + r := &emptyThenNonEmptyReader{r: strings.NewReader("abcd"), n: 100} + n2, err := w.ReadFrom(r) + if n2 != 0 || err != io.ErrNoProgress { + t.Fatalf("buf.Bytes() returned (%v, %v), want (0, io.ErrNoProgress)", n2, err) + } +} + +func TestReadZero(t *testing.T) { + for _, size := range []int{100, 2} { + t.Run(fmt.Sprintf("bufsize=%d", size), func(t *testing.T) { + r := io.MultiReader(strings.NewReader("abc"), &emptyThenNonEmptyReader{r: strings.NewReader("def"), n: 1}) + br := NewReaderSize(r, size) + want := func(s string, wantErr error) { + p := make([]byte, 50) + n, err := br.Read(p) + if err != wantErr || n != len(s) || string(p[:n]) != s { + t.Fatalf("read(%d) = %q, %v, want %q, %v", len(p), string(p[:n]), err, s, wantErr) + } + t.Logf("read(%d) = %q, %v", len(p), string(p[:n]), err) + } + want("abc", nil) + want("", nil) + want("def", nil) + want("", io.EOF) + }) + } +} + +func TestReaderReset(t *testing.T) { + r := NewReader(strings.NewReader("foo foo")) + buf := make([]byte, 3) + r.Read(buf) + if string(buf) != "foo" { + t.Errorf("buf = %q; want foo", buf) + } + r.Reset(strings.NewReader("bar bar")) + all, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + if string(all) != "bar bar" { + t.Errorf("ReadAll = %q; want bar bar", all) + } +} + +func TestWriterReset(t *testing.T) { + var buf1, buf2 bytes.Buffer + w := NewWriter(&buf1) + w.WriteString("foo") + w.Reset(&buf2) // and not flushed + w.WriteString("bar") + w.Flush() + if buf1.String() != "" { + t.Errorf("buf1 = %q; want empty", buf1.String()) + } + if buf2.String() != "bar" { + t.Errorf("buf2 = %q; want bar", buf2.String()) + } +} + +func TestReaderDiscard(t *testing.T) { + tests := []struct { + name string + r io.Reader + bufSize int // 0 means 16 + peekSize int + + n int // input to Discard + + want int // from Discard + wantErr error // from Discard + + wantBuffered int + }{ + { + name: "normal case", + r: strings.NewReader("abcdefghijklmnopqrstuvwxyz"), + peekSize: 16, + n: 6, + want: 6, + wantBuffered: 10, + }, + { + name: "discard causing read", + r: strings.NewReader("abcdefghijklmnopqrstuvwxyz"), + n: 6, + want: 6, + wantBuffered: 10, + }, + { + name: "discard all without peek", + r: strings.NewReader("abcdefghijklmnopqrstuvwxyz"), + n: 26, + want: 26, + wantBuffered: 0, + }, + { + name: "discard more than end", + r: strings.NewReader("abcdefghijklmnopqrstuvwxyz"), + n: 27, + want: 26, + wantErr: io.EOF, + wantBuffered: 0, + }, + // Any error from filling shouldn't show up until we + // get past the valid bytes. Here we return we return 5 valid bytes at the same time + // as an error, but test that we don't see the error from Discard. + { + name: "fill error, discard less", + r: newScriptedReader(func(p []byte) (n int, err error) { + if len(p) < 5 { + panic("unexpected small read") + } + return 5, errors.New("5-then-error") + }), + n: 4, + want: 4, + wantErr: nil, + wantBuffered: 1, + }, + { + name: "fill error, discard equal", + r: newScriptedReader(func(p []byte) (n int, err error) { + if len(p) < 5 { + panic("unexpected small read") + } + return 5, errors.New("5-then-error") + }), + n: 5, + want: 5, + wantErr: nil, + wantBuffered: 0, + }, + { + name: "fill error, discard more", + r: newScriptedReader(func(p []byte) (n int, err error) { + if len(p) < 5 { + panic("unexpected small read") + } + return 5, errors.New("5-then-error") + }), + n: 6, + want: 5, + wantErr: errors.New("5-then-error"), + wantBuffered: 0, + }, + // Discard of 0 shouldn't cause a read: + { + name: "discard zero", + r: newScriptedReader(), // will panic on Read + n: 0, + want: 0, + wantErr: nil, + wantBuffered: 0, + }, + { + name: "discard negative", + r: newScriptedReader(), // will panic on Read + n: -1, + want: 0, + wantErr: ErrNegativeCount, + wantBuffered: 0, + }, + } + for _, tt := range tests { + br := NewReaderSize(tt.r, tt.bufSize) + if tt.peekSize > 0 { + peekBuf, err := br.Peek(tt.peekSize) + if err != nil { + t.Errorf("%s: Peek(%d): %v", tt.name, tt.peekSize, err) + continue + } + if len(peekBuf) != tt.peekSize { + t.Errorf("%s: len(Peek(%d)) = %v; want %v", tt.name, tt.peekSize, len(peekBuf), tt.peekSize) + continue + } + } + discarded, err := br.Discard(tt.n) + if ge, we := fmt.Sprint(err), fmt.Sprint(tt.wantErr); discarded != tt.want || ge != we { + t.Errorf("%s: Discard(%d) = (%v, %v); want (%v, %v)", tt.name, tt.n, discarded, ge, tt.want, we) + continue + } + if bn := br.Buffered(); bn != tt.wantBuffered { + t.Errorf("%s: after Discard, Buffered = %d; want %d", tt.name, bn, tt.wantBuffered) + } + } + +} + +func TestReaderSize(t *testing.T) { + if got, want := NewReader(nil).Size(), DefaultBufSize; got != want { + t.Errorf("NewReader's Reader.Size = %d; want %d", got, want) + } + if got, want := NewReaderSize(nil, 1234).Size(), 1234; got != want { + t.Errorf("NewReaderSize's Reader.Size = %d; want %d", got, want) + } +} + +func TestWriterSize(t *testing.T) { + if got, want := NewWriter(nil).Size(), DefaultBufSize; got != want { + t.Errorf("NewWriter's Writer.Size = %d; want %d", got, want) + } + if got, want := NewWriterSize(nil, 1234).Size(), 1234; got != want { + t.Errorf("NewWriterSize's Writer.Size = %d; want %d", got, want) + } +} + +// An onlyReader only implements io.Reader, no matter what other methods the underlying implementation may have. +type onlyReader struct { + io.Reader +} + +// An onlyWriter only implements io.Writer, no matter what other methods the underlying implementation may have. +type onlyWriter struct { + io.Writer +} + +// A scriptedReader is an io.Reader that executes its steps sequentially. +type scriptedReader []func(p []byte) (n int, err error) + +func (sr *scriptedReader) Read(p []byte) (n int, err error) { + if len(*sr) == 0 { + panic("too many Read calls on scripted Reader. No steps remain.") + } + step := (*sr)[0] + *sr = (*sr)[1:] + return step(p) +} + +func newScriptedReader(steps ...func(p []byte) (n int, err error)) io.Reader { + sr := scriptedReader(steps) + return &sr +} + +// eofReader returns the number of bytes read and io.EOF for the read that consumes the last of the content. +type eofReader struct { + buf []byte +} + +func (r *eofReader) Read(p []byte) (int, error) { + read := copy(p, r.buf) + r.buf = r.buf[read:] + + switch read { + case 0, len(r.buf): + // As allowed in the documentation, this will return io.EOF + // in the same call that consumes the last of the data. + // https://godoc.org/io#Reader + return read, io.EOF + } + + return read, nil +} + +func TestPartialReadEOF(t *testing.T) { + src := make([]byte, 10) + eofR := &eofReader{buf: src} + r := NewReader(eofR) + + // Start by reading 5 of the 10 available bytes. + dest := make([]byte, 5) + read, err := r.Read(dest) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if n := len(dest); read != n { + t.Fatalf("read %d bytes; wanted %d bytes", read, n) + } + + // The Reader should have buffered all the content from the io.Reader. + if n := len(eofR.buf); n != 0 { + t.Fatalf("got %d bytes left in bufio.Reader source; want 0 bytes", n) + } + // To prove the point, check that there are still 5 bytes available to read. + if n := r.Buffered(); n != 5 { + t.Fatalf("got %d bytes buffered in bufio.Reader; want 5 bytes", n) + } + + // This is the second read of 0 bytes. + read, err = r.Read([]byte{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if read != 0 { + t.Fatalf("read %d bytes; want 0 bytes", read) + } +} + +type writerWithReadFromError struct{} + +func (w writerWithReadFromError) ReadFrom(r io.Reader) (int64, error) { + return 0, errors.New("writerWithReadFromError error") +} + +func (w writerWithReadFromError) Write(b []byte) (n int, err error) { + return 10, nil +} + +func TestWriterReadFromMustSetUnderlyingError(t *testing.T) { + var wr = NewWriter(writerWithReadFromError{}) + if _, err := wr.ReadFrom(strings.NewReader("test2")); err == nil { + t.Fatal("expected ReadFrom returns error, got nil") + } + if _, err := wr.Write([]byte("123")); err == nil { + t.Fatal("expected Write returns error, got nil") + } +} + +type writeErrorOnlyWriter struct{} + +func (w writeErrorOnlyWriter) Write(p []byte) (n int, err error) { + return 0, errors.New("writeErrorOnlyWriter error") +} + +// Ensure that previous Write errors are immediately returned +// on any ReadFrom. See golang.org/issue/35194. +func TestWriterReadFromMustReturnUnderlyingError(t *testing.T) { + var wr = NewWriter(writeErrorOnlyWriter{}) + s := "test1" + wantBuffered := len(s) + if _, err := wr.WriteString(s); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if err := wr.Flush(); err == nil { + t.Error("expected flush error, got nil") + } + if _, err := wr.ReadFrom(strings.NewReader("test2")); err == nil { + t.Fatal("expected error, got nil") + } + if buffered := wr.Buffered(); buffered != wantBuffered { + t.Fatalf("Buffered = %v; want %v", buffered, wantBuffered) + } +} + +func BenchmarkReaderCopyOptimal(b *testing.B) { + // Optimal case is where the underlying reader implements io.WriterTo + srcBuf := bytes.NewBuffer(make([]byte, 8192)) + src := NewReader(srcBuf) + dstBuf := new(bytes.Buffer) + dst := onlyWriter{dstBuf} + for i := 0; i < b.N; i++ { + srcBuf.Reset() + src.Reset(srcBuf) + dstBuf.Reset() + io.Copy(dst, src) + } +} + +func BenchmarkReaderCopyUnoptimal(b *testing.B) { + // Unoptimal case is where the underlying reader doesn't implement io.WriterTo + srcBuf := bytes.NewBuffer(make([]byte, 8192)) + src := NewReader(onlyReader{srcBuf}) + dstBuf := new(bytes.Buffer) + dst := onlyWriter{dstBuf} + for i := 0; i < b.N; i++ { + srcBuf.Reset() + src.Reset(onlyReader{srcBuf}) + dstBuf.Reset() + io.Copy(dst, src) + } +} + +func BenchmarkReaderCopyNoWriteTo(b *testing.B) { + srcBuf := bytes.NewBuffer(make([]byte, 8192)) + srcReader := NewReader(srcBuf) + src := onlyReader{srcReader} + dstBuf := new(bytes.Buffer) + dst := onlyWriter{dstBuf} + for i := 0; i < b.N; i++ { + srcBuf.Reset() + srcReader.Reset(srcBuf) + dstBuf.Reset() + io.Copy(dst, src) + } +} + +func BenchmarkReaderWriteToOptimal(b *testing.B) { + const bufSize = 16 << 10 + buf := make([]byte, bufSize) + r := bytes.NewReader(buf) + srcReader := NewReaderSize(onlyReader{r}, 1<<10) + if _, ok := io.Discard.(io.ReaderFrom); !ok { + b.Fatal("io.Discard doesn't support ReaderFrom") + } + for i := 0; i < b.N; i++ { + r.Seek(0, io.SeekStart) + srcReader.Reset(onlyReader{r}) + n, err := srcReader.WriteTo(io.Discard) + if err != nil { + b.Fatal(err) + } + if n != bufSize { + b.Fatalf("n = %d; want %d", n, bufSize) + } + } +} + +func BenchmarkReaderReadString(b *testing.B) { + r := strings.NewReader(" foo foo 42 42 42 42 42 42 42 42 4.2 4.2 4.2 4.2\n") + buf := NewReader(r) + b.ReportAllocs() + for i := 0; i < b.N; i++ { + r.Seek(0, io.SeekStart) + buf.Reset(r) + + _, err := buf.ReadString('\n') + if err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkWriterCopyOptimal(b *testing.B) { + // Optimal case is where the underlying writer implements io.ReaderFrom + srcBuf := bytes.NewBuffer(make([]byte, 8192)) + src := onlyReader{srcBuf} + dstBuf := new(bytes.Buffer) + dst := NewWriter(dstBuf) + for i := 0; i < b.N; i++ { + srcBuf.Reset() + dstBuf.Reset() + dst.Reset(dstBuf) + io.Copy(dst, src) + } +} + +func BenchmarkWriterCopyUnoptimal(b *testing.B) { + srcBuf := bytes.NewBuffer(make([]byte, 8192)) + src := onlyReader{srcBuf} + dstBuf := new(bytes.Buffer) + dst := NewWriter(onlyWriter{dstBuf}) + for i := 0; i < b.N; i++ { + srcBuf.Reset() + dstBuf.Reset() + dst.Reset(onlyWriter{dstBuf}) + io.Copy(dst, src) + } +} + +func BenchmarkWriterCopyNoReadFrom(b *testing.B) { + srcBuf := bytes.NewBuffer(make([]byte, 8192)) + src := onlyReader{srcBuf} + dstBuf := new(bytes.Buffer) + dstWriter := NewWriter(dstBuf) + dst := onlyWriter{dstWriter} + for i := 0; i < b.N; i++ { + srcBuf.Reset() + dstBuf.Reset() + dstWriter.Reset(dstBuf) + io.Copy(dst, src) + } +} + +func BenchmarkReaderEmpty(b *testing.B) { + b.ReportAllocs() + str := strings.Repeat("x", 16<<10) + for i := 0; i < b.N; i++ { + br := NewReader(strings.NewReader(str)) + n, err := io.Copy(io.Discard, br) + if err != nil { + b.Fatal(err) + } + if n != int64(len(str)) { + b.Fatal("wrong length") + } + } +} + +func BenchmarkWriterEmpty(b *testing.B) { + b.ReportAllocs() + str := strings.Repeat("x", 1<<10) + bs := []byte(str) + for i := 0; i < b.N; i++ { + bw := NewWriter(io.Discard) + bw.Flush() + bw.WriteByte('a') + bw.Flush() + bw.WriteRune('B') + bw.Flush() + bw.Write(bs) + bw.Flush() + bw.WriteString(str) + bw.Flush() + } +} + +func BenchmarkWriterFlush(b *testing.B) { + b.ReportAllocs() + bw := NewWriter(io.Discard) + str := strings.Repeat("x", 50) + for i := 0; i < b.N; i++ { + bw.WriteString(str) + bw.Flush() + } +} diff --git a/src/bufio/example_test.go b/src/bufio/example_test.go new file mode 100644 index 0000000..8885d40 --- /dev/null +++ b/src/bufio/example_test.go @@ -0,0 +1,127 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio_test + +import ( + "bufio" + "fmt" + "os" + "strconv" + "strings" +) + +func ExampleWriter() { + w := bufio.NewWriter(os.Stdout) + fmt.Fprint(w, "Hello, ") + fmt.Fprint(w, "world!") + w.Flush() // Don't forget to flush! + // Output: Hello, world! +} + +// The simplest use of a Scanner, to read standard input as a set of lines. +func ExampleScanner_lines() { + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + fmt.Println(scanner.Text()) // Println will add back the final '\n' + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "reading standard input:", err) + } +} + +// Return the most recent call to Scan as a []byte. +func ExampleScanner_Bytes() { + scanner := bufio.NewScanner(strings.NewReader("gopher")) + for scanner.Scan() { + fmt.Println(len(scanner.Bytes()) == 6) + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "shouldn't see an error scanning a string") + } + // Output: + // true +} + +// Use a Scanner to implement a simple word-count utility by scanning the +// input as a sequence of space-delimited tokens. +func ExampleScanner_words() { + // An artificial input source. + const input = "Now is the winter of our discontent,\nMade glorious summer by this sun of York.\n" + scanner := bufio.NewScanner(strings.NewReader(input)) + // Set the split function for the scanning operation. + scanner.Split(bufio.ScanWords) + // Count the words. + count := 0 + for scanner.Scan() { + count++ + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "reading input:", err) + } + fmt.Printf("%d\n", count) + // Output: 15 +} + +// Use a Scanner with a custom split function (built by wrapping ScanWords) to validate +// 32-bit decimal input. +func ExampleScanner_custom() { + // An artificial input source. + const input = "1234 5678 1234567901234567890" + scanner := bufio.NewScanner(strings.NewReader(input)) + // Create a custom split function by wrapping the existing ScanWords function. + split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + advance, token, err = bufio.ScanWords(data, atEOF) + if err == nil && token != nil { + _, err = strconv.ParseInt(string(token), 10, 32) + } + return + } + // Set the split function for the scanning operation. + scanner.Split(split) + // Validate the input + for scanner.Scan() { + fmt.Printf("%s\n", scanner.Text()) + } + + if err := scanner.Err(); err != nil { + fmt.Printf("Invalid input: %s", err) + } + // Output: + // 1234 + // 5678 + // Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range +} + +// Use a Scanner with a custom split function to parse a comma-separated +// list with an empty final value. +func ExampleScanner_emptyFinalToken() { + // Comma-separated list; last entry is empty. + const input = "1,2,3,4," + scanner := bufio.NewScanner(strings.NewReader(input)) + // Define a split function that separates on commas. + onComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + for i := 0; i < len(data); i++ { + if data[i] == ',' { + return i + 1, data[:i], nil + } + } + if !atEOF { + return 0, nil, nil + } + // There is one final token to be delivered, which may be the empty string. + // Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this + // but does not trigger an error to be returned from Scan itself. + return 0, data, bufio.ErrFinalToken + } + scanner.Split(onComma) + // Scan. + for scanner.Scan() { + fmt.Printf("%q ", scanner.Text()) + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "reading input:", err) + } + // Output: "1" "2" "3" "4" "" +} diff --git a/src/bufio/export_test.go b/src/bufio/export_test.go new file mode 100644 index 0000000..1667f01 --- /dev/null +++ b/src/bufio/export_test.go @@ -0,0 +1,29 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio + +// Exported for testing only. +import ( + "unicode/utf8" +) + +var IsSpace = isSpace + +const DefaultBufSize = defaultBufSize + +func (s *Scanner) MaxTokenSize(n int) { + if n < utf8.UTFMax || n > 1e9 { + panic("bad max token size") + } + if n < len(s.buf) { + s.buf = make([]byte, n) + } + s.maxTokenSize = n +} + +// ErrOrEOF is like Err, but returns EOF. Used to test a corner case. +func (s *Scanner) ErrOrEOF() error { + return s.err +} diff --git a/src/bufio/scan.go b/src/bufio/scan.go new file mode 100644 index 0000000..af46a14 --- /dev/null +++ b/src/bufio/scan.go @@ -0,0 +1,419 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio + +import ( + "bytes" + "errors" + "io" + "unicode/utf8" +) + +// Scanner provides a convenient interface for reading data such as +// a file of newline-delimited lines of text. Successive calls to +// the Scan method will step through the 'tokens' of a file, skipping +// the bytes between the tokens. The specification of a token is +// defined by a split function of type SplitFunc; the default split +// function breaks the input into lines with line termination stripped. Split +// functions are defined in this package for scanning a file into +// lines, bytes, UTF-8-encoded runes, and space-delimited words. The +// client may instead provide a custom split function. +// +// Scanning stops unrecoverably at EOF, the first I/O error, or a token too +// large to fit in the buffer. When a scan stops, the reader may have +// advanced arbitrarily far past the last token. Programs that need more +// control over error handling or large tokens, or must run sequential scans +// on a reader, should use bufio.Reader instead. +// +type Scanner struct { + r io.Reader // The reader provided by the client. + split SplitFunc // The function to split the tokens. + maxTokenSize int // Maximum size of a token; modified by tests. + token []byte // Last token returned by split. + buf []byte // Buffer used as argument to split. + start int // First non-processed byte in buf. + end int // End of data in buf. + err error // Sticky error. + empties int // Count of successive empty tokens. + scanCalled bool // Scan has been called; buffer is in use. + done bool // Scan has finished. +} + +// SplitFunc is the signature of the split function used to tokenize the +// input. The arguments are an initial substring of the remaining unprocessed +// data and a flag, atEOF, that reports whether the Reader has no more data +// to give. The return values are the number of bytes to advance the input +// and the next token to return to the user, if any, plus an error, if any. +// +// Scanning stops if the function returns an error, in which case some of +// the input may be discarded. +// +// Otherwise, the Scanner advances the input. If the token is not nil, +// the Scanner returns it to the user. If the token is nil, the +// Scanner reads more data and continues scanning; if there is no more +// data--if atEOF was true--the Scanner returns. If the data does not +// yet hold a complete token, for instance if it has no newline while +// scanning lines, a SplitFunc can return (0, nil, nil) to signal the +// Scanner to read more data into the slice and try again with a +// longer slice starting at the same point in the input. +// +// The function is never called with an empty data slice unless atEOF +// is true. If atEOF is true, however, data may be non-empty and, +// as always, holds unprocessed text. +type SplitFunc func(data []byte, atEOF bool) (advance int, token []byte, err error) + +// Errors returned by Scanner. +var ( + ErrTooLong = errors.New("bufio.Scanner: token too long") + ErrNegativeAdvance = errors.New("bufio.Scanner: SplitFunc returns negative advance count") + ErrAdvanceTooFar = errors.New("bufio.Scanner: SplitFunc returns advance count beyond input") + ErrBadReadCount = errors.New("bufio.Scanner: Read returned impossible count") +) + +const ( + // MaxScanTokenSize is the maximum size used to buffer a token + // unless the user provides an explicit buffer with Scanner.Buffer. + // The actual maximum token size may be smaller as the buffer + // may need to include, for instance, a newline. + MaxScanTokenSize = 64 * 1024 + + startBufSize = 4096 // Size of initial allocation for buffer. +) + +// NewScanner returns a new Scanner to read from r. +// The split function defaults to ScanLines. +func NewScanner(r io.Reader) *Scanner { + return &Scanner{ + r: r, + split: ScanLines, + maxTokenSize: MaxScanTokenSize, + } +} + +// Err returns the first non-EOF error that was encountered by the Scanner. +func (s *Scanner) Err() error { + if s.err == io.EOF { + return nil + } + return s.err +} + +// Bytes returns the most recent token generated by a call to Scan. +// The underlying array may point to data that will be overwritten +// by a subsequent call to Scan. It does no allocation. +func (s *Scanner) Bytes() []byte { + return s.token +} + +// Text returns the most recent token generated by a call to Scan +// as a newly allocated string holding its bytes. +func (s *Scanner) Text() string { + return string(s.token) +} + +// ErrFinalToken is a special sentinel error value. It is intended to be +// returned by a Split function to indicate that the token being delivered +// with the error is the last token and scanning should stop after this one. +// After ErrFinalToken is received by Scan, scanning stops with no error. +// The value is useful to stop processing early or when it is necessary to +// deliver a final empty token. One could achieve the same behavior +// with a custom error value but providing one here is tidier. +// See the emptyFinalToken example for a use of this value. +var ErrFinalToken = errors.New("final token") + +// Scan advances the Scanner to the next token, which will then be +// available through the Bytes or Text method. It returns false when the +// scan stops, either by reaching the end of the input or an error. +// After Scan returns false, the Err method will return any error that +// occurred during scanning, except that if it was io.EOF, Err +// will return nil. +// Scan panics if the split function returns too many empty +// tokens without advancing the input. This is a common error mode for +// scanners. +func (s *Scanner) Scan() bool { + if s.done { + return false + } + s.scanCalled = true + // Loop until we have a token. + for { + // See if we can get a token with what we already have. + // If we've run out of data but have an error, give the split function + // a chance to recover any remaining, possibly empty token. + if s.end > s.start || s.err != nil { + advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil) + if err != nil { + if err == ErrFinalToken { + s.token = token + s.done = true + return true + } + s.setErr(err) + return false + } + if !s.advance(advance) { + return false + } + s.token = token + if token != nil { + if s.err == nil || advance > 0 { + s.empties = 0 + } else { + // Returning tokens not advancing input at EOF. + s.empties++ + if s.empties > maxConsecutiveEmptyReads { + panic("bufio.Scan: too many empty tokens without progressing") + } + } + return true + } + } + // We cannot generate a token with what we are holding. + // If we've already hit EOF or an I/O error, we are done. + if s.err != nil { + // Shut it down. + s.start = 0 + s.end = 0 + return false + } + // Must read more data. + // First, shift data to beginning of buffer if there's lots of empty space + // or space is needed. + if s.start > 0 && (s.end == len(s.buf) || s.start > len(s.buf)/2) { + copy(s.buf, s.buf[s.start:s.end]) + s.end -= s.start + s.start = 0 + } + // Is the buffer full? If so, resize. + if s.end == len(s.buf) { + // Guarantee no overflow in the multiplication below. + const maxInt = int(^uint(0) >> 1) + if len(s.buf) >= s.maxTokenSize || len(s.buf) > maxInt/2 { + s.setErr(ErrTooLong) + return false + } + newSize := len(s.buf) * 2 + if newSize == 0 { + newSize = startBufSize + } + if newSize > s.maxTokenSize { + newSize = s.maxTokenSize + } + newBuf := make([]byte, newSize) + copy(newBuf, s.buf[s.start:s.end]) + s.buf = newBuf + s.end -= s.start + s.start = 0 + } + // Finally we can read some input. Make sure we don't get stuck with + // a misbehaving Reader. Officially we don't need to do this, but let's + // be extra careful: Scanner is for safe, simple jobs. + for loop := 0; ; { + n, err := s.r.Read(s.buf[s.end:len(s.buf)]) + if n < 0 || len(s.buf)-s.end < n { + s.setErr(ErrBadReadCount) + break + } + s.end += n + if err != nil { + s.setErr(err) + break + } + if n > 0 { + s.empties = 0 + break + } + loop++ + if loop > maxConsecutiveEmptyReads { + s.setErr(io.ErrNoProgress) + break + } + } + } +} + +// advance consumes n bytes of the buffer. It reports whether the advance was legal. +func (s *Scanner) advance(n int) bool { + if n < 0 { + s.setErr(ErrNegativeAdvance) + return false + } + if n > s.end-s.start { + s.setErr(ErrAdvanceTooFar) + return false + } + s.start += n + return true +} + +// setErr records the first error encountered. +func (s *Scanner) setErr(err error) { + if s.err == nil || s.err == io.EOF { + s.err = err + } +} + +// Buffer sets the initial buffer to use when scanning and the maximum +// size of buffer that may be allocated during scanning. The maximum +// token size is the larger of max and cap(buf). If max <= cap(buf), +// Scan will use this buffer only and do no allocation. +// +// By default, Scan uses an internal buffer and sets the +// maximum token size to MaxScanTokenSize. +// +// Buffer panics if it is called after scanning has started. +func (s *Scanner) Buffer(buf []byte, max int) { + if s.scanCalled { + panic("Buffer called after Scan") + } + s.buf = buf[0:cap(buf)] + s.maxTokenSize = max +} + +// Split sets the split function for the Scanner. +// The default split function is ScanLines. +// +// Split panics if it is called after scanning has started. +func (s *Scanner) Split(split SplitFunc) { + if s.scanCalled { + panic("Split called after Scan") + } + s.split = split +} + +// Split functions + +// ScanBytes is a split function for a Scanner that returns each byte as a token. +func ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + return 1, data[0:1], nil +} + +var errorRune = []byte(string(utf8.RuneError)) + +// ScanRunes is a split function for a Scanner that returns each +// UTF-8-encoded rune as a token. The sequence of runes returned is +// equivalent to that from a range loop over the input as a string, which +// means that erroneous UTF-8 encodings translate to U+FFFD = "\xef\xbf\xbd". +// Because of the Scan interface, this makes it impossible for the client to +// distinguish correctly encoded replacement runes from encoding errors. +func ScanRunes(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + + // Fast path 1: ASCII. + if data[0] < utf8.RuneSelf { + return 1, data[0:1], nil + } + + // Fast path 2: Correct UTF-8 decode without error. + _, width := utf8.DecodeRune(data) + if width > 1 { + // It's a valid encoding. Width cannot be one for a correctly encoded + // non-ASCII rune. + return width, data[0:width], nil + } + + // We know it's an error: we have width==1 and implicitly r==utf8.RuneError. + // Is the error because there wasn't a full rune to be decoded? + // FullRune distinguishes correctly between erroneous and incomplete encodings. + if !atEOF && !utf8.FullRune(data) { + // Incomplete; get more bytes. + return 0, nil, nil + } + + // We have a real UTF-8 encoding error. Return a properly encoded error rune + // but advance only one byte. This matches the behavior of a range loop over + // an incorrectly encoded string. + return 1, errorRune, nil +} + +// dropCR drops a terminal \r from the data. +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0 : len(data)-1] + } + return data +} + +// ScanLines is a split function for a Scanner that returns each line of +// text, stripped of any trailing end-of-line marker. The returned line may +// be empty. The end-of-line marker is one optional carriage return followed +// by one mandatory newline. In regular expression notation, it is `\r?\n`. +// The last non-empty line of input will be returned even if it has no +// newline. +func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + // We have a full newline-terminated line. + return i + 1, dropCR(data[0:i]), nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), dropCR(data), nil + } + // Request more data. + return 0, nil, nil +} + +// isSpace reports whether the character is a Unicode white space character. +// We avoid dependency on the unicode package, but check validity of the implementation +// in the tests. +func isSpace(r rune) bool { + if r <= '\u00FF' { + // Obvious ASCII ones: \t through \r plus space. Plus two Latin-1 oddballs. + switch r { + case ' ', '\t', '\n', '\v', '\f', '\r': + return true + case '\u0085', '\u00A0': + return true + } + return false + } + // High-valued ones. + if '\u2000' <= r && r <= '\u200a' { + return true + } + switch r { + case '\u1680', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000': + return true + } + return false +} + +// ScanWords is a split function for a Scanner that returns each +// space-separated word of text, with surrounding spaces deleted. It will +// never return an empty string. The definition of space is set by +// unicode.IsSpace. +func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { + // Skip leading spaces. + start := 0 + for width := 0; start < len(data); start += width { + var r rune + r, width = utf8.DecodeRune(data[start:]) + if !isSpace(r) { + break + } + } + // Scan until space, marking end of word. + for width, i := 0, start; i < len(data); i += width { + var r rune + r, width = utf8.DecodeRune(data[i:]) + if isSpace(r) { + return i + width, data[start:i], nil + } + } + // If we're at EOF, we have a final, non-empty, non-terminated word. Return it. + if atEOF && len(data) > start { + return len(data), data[start:], nil + } + // Request more data. + return start, nil, nil +} diff --git a/src/bufio/scan_test.go b/src/bufio/scan_test.go new file mode 100644 index 0000000..e99b09f --- /dev/null +++ b/src/bufio/scan_test.go @@ -0,0 +1,596 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio_test + +import ( + . "bufio" + "bytes" + "errors" + "io" + "strings" + "testing" + "unicode" + "unicode/utf8" +) + +const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + +// Test white space table matches the Unicode definition. +func TestSpace(t *testing.T) { + for r := rune(0); r <= utf8.MaxRune; r++ { + if IsSpace(r) != unicode.IsSpace(r) { + t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r)) + } + } +} + +var scanTests = []string{ + "", + "a", + "¼", + "☹", + "\x81", // UTF-8 error + "\uFFFD", // correctly encoded RuneError + "abcdefgh", + "abc def\n\t\tgh ", + "abc¼☹\x81\uFFFD日本語\x82abc", +} + +func TestScanByte(t *testing.T) { + for n, test := range scanTests { + buf := strings.NewReader(test) + s := NewScanner(buf) + s.Split(ScanBytes) + var i int + for i = 0; s.Scan(); i++ { + if b := s.Bytes(); len(b) != 1 || b[0] != test[i] { + t.Errorf("#%d: %d: expected %q got %q", n, i, test, b) + } + } + if i != len(test) { + t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +// Test that the rune splitter returns same sequence of runes (not bytes) as for range string. +func TestScanRune(t *testing.T) { + for n, test := range scanTests { + buf := strings.NewReader(test) + s := NewScanner(buf) + s.Split(ScanRunes) + var i, runeCount int + var expect rune + // Use a string range loop to validate the sequence of runes. + for i, expect = range string(test) { + if !s.Scan() { + break + } + runeCount++ + got, _ := utf8.DecodeRune(s.Bytes()) + if got != expect { + t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got) + } + } + if s.Scan() { + t.Errorf("#%d: scan ran too long, got %q", n, s.Text()) + } + testRuneCount := utf8.RuneCountInString(test) + if runeCount != testRuneCount { + t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +var wordScanTests = []string{ + "", + " ", + "\n", + "a", + " a ", + "abc def", + " abc def ", + " abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n", +} + +// Test that the word splitter returns the same data as strings.Fields. +func TestScanWords(t *testing.T) { + for n, test := range wordScanTests { + buf := strings.NewReader(test) + s := NewScanner(buf) + s.Split(ScanWords) + words := strings.Fields(test) + var wordCount int + for wordCount = 0; wordCount < len(words); wordCount++ { + if !s.Scan() { + break + } + got := s.Text() + if got != words[wordCount] { + t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got) + } + } + if s.Scan() { + t.Errorf("#%d: scan ran too long, got %q", n, s.Text()) + } + if wordCount != len(words) { + t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +// slowReader is a reader that returns only a few bytes at a time, to test the incremental +// reads in Scanner.Scan. +type slowReader struct { + max int + buf io.Reader +} + +func (sr *slowReader) Read(p []byte) (n int, err error) { + if len(p) > sr.max { + p = p[0:sr.max] + } + return sr.buf.Read(p) +} + +// genLine writes to buf a predictable but non-trivial line of text of length +// n, including the terminal newline and an occasional carriage return. +// If addNewline is false, the \r and \n are not emitted. +func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { + buf.Reset() + doCR := lineNum%5 == 0 + if doCR { + n-- + } + for i := 0; i < n-1; i++ { // Stop early for \n. + c := 'a' + byte(lineNum+i) + if c == '\n' || c == '\r' { // Don't confuse us. + c = 'N' + } + buf.WriteByte(c) + } + if addNewline { + if doCR { + buf.WriteByte('\r') + } + buf.WriteByte('\n') + } +} + +// Test the line splitter, including some carriage returns but no long lines. +func TestScanLongLines(t *testing.T) { + // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. + tmp := new(bytes.Buffer) + buf := new(bytes.Buffer) + lineNum := 0 + j := 0 + for i := 0; i < 2*smallMaxTokenSize; i++ { + genLine(tmp, lineNum, j, true) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + buf.Write(tmp.Bytes()) + lineNum++ + } + s := NewScanner(&slowReader{1, buf}) + s.Split(ScanLines) + s.MaxTokenSize(smallMaxTokenSize) + j = 0 + for lineNum := 0; s.Scan(); lineNum++ { + genLine(tmp, lineNum, j, false) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + line := tmp.String() // We use the string-valued token here, for variety. + if s.Text() != line { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line) + } + } + err := s.Err() + if err != nil { + t.Fatal(err) + } +} + +// Test that the line splitter errors out on a long line. +func TestScanLineTooLong(t *testing.T) { + const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. + tmp := new(bytes.Buffer) + buf := new(bytes.Buffer) + lineNum := 0 + j := 0 + for i := 0; i < 2*smallMaxTokenSize; i++ { + genLine(tmp, lineNum, j, true) + j++ + buf.Write(tmp.Bytes()) + lineNum++ + } + s := NewScanner(&slowReader{3, buf}) + s.Split(ScanLines) + s.MaxTokenSize(smallMaxTokenSize) + j = 0 + for lineNum := 0; s.Scan(); lineNum++ { + genLine(tmp, lineNum, j, false) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + line := tmp.Bytes() + if !bytes.Equal(s.Bytes(), line) { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line) + } + } + err := s.Err() + if err != ErrTooLong { + t.Fatalf("expected ErrTooLong; got %s", err) + } +} + +// Test that the line splitter handles a final line without a newline. +func testNoNewline(text string, lines []string, t *testing.T) { + buf := strings.NewReader(text) + s := NewScanner(&slowReader{7, buf}) + s.Split(ScanLines) + for lineNum := 0; s.Scan(); lineNum++ { + line := lines[lineNum] + if s.Text() != line { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line) + } + } + err := s.Err() + if err != nil { + t.Fatal(err) + } +} + +// Test that the line splitter handles a final line without a newline. +func TestScanLineNoNewline(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final line with a carriage return but no newline. +func TestScanLineReturnButNoNewline(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\r" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final empty line. +func TestScanLineEmptyFinalLine(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\n\n" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + "", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final empty line with a carriage return but no newline. +func TestScanLineEmptyFinalLineWithCR(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\n\r" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + "", + } + testNoNewline(text, lines, t) +} + +var testError = errors.New("testError") + +// Test the correct error is returned when the split function errors out. +func TestSplitError(t *testing.T) { + // Create a split function that delivers a little data, then a predictable error. + numSplits := 0 + const okCount = 7 + errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF { + panic("didn't get enough data") + } + if numSplits >= okCount { + return 0, nil, testError + } + numSplits++ + return 1, data[0:1], nil + } + // Read the data. + const text = "abcdefghijklmnopqrstuvwxyz" + buf := strings.NewReader(text) + s := NewScanner(&slowReader{1, buf}) + s.Split(errorSplit) + var i int + for i = 0; s.Scan(); i++ { + if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] { + t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0]) + } + } + // Check correct termination location and error. + if i != okCount { + t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i) + } + err := s.Err() + if err != testError { + t.Fatalf("expected %q got %v", testError, err) + } +} + +// Test that an EOF is overridden by a user-generated scan error. +func TestErrAtEOF(t *testing.T) { + s := NewScanner(strings.NewReader("1 2 33")) + // This splitter will fail on last entry, after s.err==EOF. + split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + advance, token, err = ScanWords(data, atEOF) + if len(token) > 1 { + if s.ErrOrEOF() != io.EOF { + t.Fatal("not testing EOF") + } + err = testError + } + return + } + s.Split(split) + for s.Scan() { + } + if s.Err() != testError { + t.Fatal("wrong error:", s.Err()) + } +} + +// Test for issue 5268. +type alwaysError struct{} + +func (alwaysError) Read(p []byte) (int, error) { + return 0, io.ErrUnexpectedEOF +} + +func TestNonEOFWithEmptyRead(t *testing.T) { + scanner := NewScanner(alwaysError{}) + for scanner.Scan() { + t.Fatal("read should fail") + } + err := scanner.Err() + if err != io.ErrUnexpectedEOF { + t.Errorf("unexpected error: %v", err) + } +} + +// Test that Scan finishes if we have endless empty reads. +type endlessZeros struct{} + +func (endlessZeros) Read(p []byte) (int, error) { + return 0, nil +} + +func TestBadReader(t *testing.T) { + scanner := NewScanner(endlessZeros{}) + for scanner.Scan() { + t.Fatal("read should fail") + } + err := scanner.Err() + if err != io.ErrNoProgress { + t.Errorf("unexpected error: %v", err) + } +} + +func TestScanWordsExcessiveWhiteSpace(t *testing.T) { + const word = "ipsum" + s := strings.Repeat(" ", 4*smallMaxTokenSize) + word + scanner := NewScanner(strings.NewReader(s)) + scanner.MaxTokenSize(smallMaxTokenSize) + scanner.Split(ScanWords) + if !scanner.Scan() { + t.Fatalf("scan failed: %v", scanner.Err()) + } + if token := scanner.Text(); token != word { + t.Fatalf("unexpected token: %v", token) + } +} + +// Test that empty tokens, including at end of line or end of file, are found by the scanner. +// Issue 8672: Could miss final empty token. + +func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + for i := 0; i < len(data); i++ { + if data[i] == ',' { + return i + 1, data[:i], nil + } + } + return 0, data, ErrFinalToken +} + +func testEmptyTokens(t *testing.T, text string, values []string) { + s := NewScanner(strings.NewReader(text)) + s.Split(commaSplit) + var i int + for i = 0; s.Scan(); i++ { + if i >= len(values) { + t.Fatalf("got %d fields, expected %d", i+1, len(values)) + } + if s.Text() != values[i] { + t.Errorf("%d: expected %q got %q", i, values[i], s.Text()) + } + } + if i != len(values) { + t.Fatalf("got %d fields, expected %d", i, len(values)) + } + if err := s.Err(); err != nil { + t.Fatal(err) + } +} + +func TestEmptyTokens(t *testing.T) { + testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""}) +} + +func TestWithNoEmptyTokens(t *testing.T) { + testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"}) +} + +func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + if len(data) > 0 { + return 1, data[:1], nil + } + return 0, data, nil +} + +func TestDontLoopForever(t *testing.T) { + s := NewScanner(strings.NewReader("abc")) + s.Split(loopAtEOFSplit) + // Expect a panic + defer func() { + err := recover() + if err == nil { + t.Fatal("should have panicked") + } + if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") { + panic(err) + } + }() + for count := 0; s.Scan(); count++ { + if count > 1000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +func TestBlankLines(t *testing.T) { + s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000))) + for count := 0; s.Scan(); count++ { + if count > 2000 { + t.Fatal("looping") + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +type countdown int + +func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) { + if *c > 0 { + *c-- + return 1, data[:1], nil + } + return 0, nil, nil +} + +// Check that the looping-at-EOF check doesn't trigger for merely empty tokens. +func TestEmptyLinesOK(t *testing.T) { + c := countdown(10000) + s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000))) + s.Split(c.split) + for s.Scan() { + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } + if c != 0 { + t.Fatalf("stopped with %d left to process", c) + } +} + +// Make sure we can read a huge token if a big enough buffer is provided. +func TestHugeBuffer(t *testing.T) { + text := strings.Repeat("x", 2*MaxScanTokenSize) + s := NewScanner(strings.NewReader(text + "\n")) + s.Buffer(make([]byte, 100), 3*MaxScanTokenSize) + for s.Scan() { + token := s.Text() + if token != text { + t.Errorf("scan got incorrect token of length %d", len(token)) + } + } + if s.Err() != nil { + t.Fatal("after scan:", s.Err()) + } +} + +// negativeEOFReader returns an invalid -1 at the end, as though it +// were wrapping the read system call. +type negativeEOFReader int + +func (r *negativeEOFReader) Read(p []byte) (int, error) { + if *r > 0 { + c := int(*r) + if c > len(p) { + c = len(p) + } + for i := 0; i < c; i++ { + p[i] = 'a' + } + p[c-1] = '\n' + *r -= negativeEOFReader(c) + return c, nil + } + return -1, io.EOF +} + +// Test that the scanner doesn't panic and returns ErrBadReadCount +// on a reader that returns a negative count of bytes read (issue 38053). +func TestNegativeEOFReader(t *testing.T) { + r := negativeEOFReader(10) + scanner := NewScanner(&r) + c := 0 + for scanner.Scan() { + c++ + if c > 1 { + t.Error("read too many lines") + break + } + } + if got, want := scanner.Err(), ErrBadReadCount; got != want { + t.Errorf("scanner.Err: got %v, want %v", got, want) + } +} + +// largeReader returns an invalid count that is larger than the number +// of bytes requested. +type largeReader struct{} + +func (largeReader) Read(p []byte) (int, error) { + return len(p) + 1, nil +} + +// Test that the scanner doesn't panic and returns ErrBadReadCount +// on a reader that returns an impossibly large count of bytes read (issue 38053). +func TestLargeReader(t *testing.T) { + scanner := NewScanner(largeReader{}) + for scanner.Scan() { + } + if got, want := scanner.Err(), ErrBadReadCount; got != want { + t.Errorf("scanner.Err: got %v, want %v", got, want) + } +} |