summaryrefslogtreecommitdiffstats
path: root/src/net/textproto/reader.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/net/textproto/reader.go800
1 files changed, 800 insertions, 0 deletions
diff --git a/src/net/textproto/reader.go b/src/net/textproto/reader.go
new file mode 100644
index 0000000..5c3084f
--- /dev/null
+++ b/src/net/textproto/reader.go
@@ -0,0 +1,800 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package textproto
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "sync"
+)
+
+// A Reader implements convenience methods for reading requests
+// or responses from a text protocol network connection.
+type Reader struct {
+ R *bufio.Reader
+ dot *dotReader
+ buf []byte // a re-usable buffer for readContinuedLineSlice
+}
+
+// NewReader returns a new Reader reading from r.
+//
+// To avoid denial of service attacks, the provided bufio.Reader
+// should be reading from an io.LimitReader or similar Reader to bound
+// the size of responses.
+func NewReader(r *bufio.Reader) *Reader {
+ commonHeaderOnce.Do(initCommonHeader)
+ return &Reader{R: r}
+}
+
+// ReadLine reads a single line from r,
+// eliding the final \n or \r\n from the returned string.
+func (r *Reader) ReadLine() (string, error) {
+ line, err := r.readLineSlice()
+ return string(line), err
+}
+
+// ReadLineBytes is like ReadLine but returns a []byte instead of a string.
+func (r *Reader) ReadLineBytes() ([]byte, error) {
+ line, err := r.readLineSlice()
+ if line != nil {
+ buf := make([]byte, len(line))
+ copy(buf, line)
+ line = buf
+ }
+ return line, err
+}
+
+func (r *Reader) readLineSlice() ([]byte, error) {
+ r.closeDot()
+ var line []byte
+ for {
+ l, more, err := r.R.ReadLine()
+ if err != nil {
+ return nil, err
+ }
+ // Avoid the copy if the first call produced a full line.
+ if line == nil && !more {
+ return l, nil
+ }
+ line = append(line, l...)
+ if !more {
+ break
+ }
+ }
+ return line, nil
+}
+
+// ReadContinuedLine reads a possibly continued line from r,
+// eliding the final trailing ASCII white space.
+// Lines after the first are considered continuations if they
+// begin with a space or tab character. In the returned data,
+// continuation lines are separated from the previous line
+// only by a single space: the newline and leading white space
+// are removed.
+//
+// For example, consider this input:
+//
+// Line 1
+// continued...
+// Line 2
+//
+// The first call to ReadContinuedLine will return "Line 1 continued..."
+// and the second will return "Line 2".
+//
+// Empty lines are never continued.
+//
+func (r *Reader) ReadContinuedLine() (string, error) {
+ line, err := r.readContinuedLineSlice(noValidation)
+ return string(line), err
+}
+
+// trim returns s with leading and trailing spaces and tabs removed.
+// It does not assume Unicode or UTF-8.
+func trim(s []byte) []byte {
+ i := 0
+ for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
+ i++
+ }
+ n := len(s)
+ for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
+ n--
+ }
+ return s[i:n]
+}
+
+// ReadContinuedLineBytes is like ReadContinuedLine but
+// returns a []byte instead of a string.
+func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
+ line, err := r.readContinuedLineSlice(noValidation)
+ if line != nil {
+ buf := make([]byte, len(line))
+ copy(buf, line)
+ line = buf
+ }
+ return line, err
+}
+
+// readContinuedLineSlice reads continued lines from the reader buffer,
+// returning a byte slice with all lines. The validateFirstLine function
+// is run on the first read line, and if it returns an error then this
+// error is returned from readContinuedLineSlice.
+func (r *Reader) readContinuedLineSlice(validateFirstLine func([]byte) error) ([]byte, error) {
+ if validateFirstLine == nil {
+ return nil, fmt.Errorf("missing validateFirstLine func")
+ }
+
+ // Read the first line.
+ line, err := r.readLineSlice()
+ if err != nil {
+ return nil, err
+ }
+ if len(line) == 0 { // blank line - no continuation
+ return line, nil
+ }
+
+ if err := validateFirstLine(line); err != nil {
+ return nil, err
+ }
+
+ // Optimistically assume that we have started to buffer the next line
+ // and it starts with an ASCII letter (the next header key), or a blank
+ // line, so we can avoid copying that buffered data around in memory
+ // and skipping over non-existent whitespace.
+ if r.R.Buffered() > 1 {
+ peek, _ := r.R.Peek(2)
+ if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') ||
+ len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' {
+ return trim(line), nil
+ }
+ }
+
+ // ReadByte or the next readLineSlice will flush the read buffer;
+ // copy the slice into buf.
+ r.buf = append(r.buf[:0], trim(line)...)
+
+ // Read continuation lines.
+ for r.skipSpace() > 0 {
+ line, err := r.readLineSlice()
+ if err != nil {
+ break
+ }
+ r.buf = append(r.buf, ' ')
+ r.buf = append(r.buf, trim(line)...)
+ }
+ return r.buf, nil
+}
+
+// skipSpace skips R over all spaces and returns the number of bytes skipped.
+func (r *Reader) skipSpace() int {
+ n := 0
+ for {
+ c, err := r.R.ReadByte()
+ if err != nil {
+ // Bufio will keep err until next read.
+ break
+ }
+ if c != ' ' && c != '\t' {
+ r.R.UnreadByte()
+ break
+ }
+ n++
+ }
+ return n
+}
+
+func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
+ line, err := r.ReadLine()
+ if err != nil {
+ return
+ }
+ return parseCodeLine(line, expectCode)
+}
+
+func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
+ if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
+ err = ProtocolError("short response: " + line)
+ return
+ }
+ continued = line[3] == '-'
+ code, err = strconv.Atoi(line[0:3])
+ if err != nil || code < 100 {
+ err = ProtocolError("invalid response code: " + line)
+ return
+ }
+ message = line[4:]
+ if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
+ 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
+ 100 <= expectCode && expectCode < 1000 && code != expectCode {
+ err = &Error{code, message}
+ }
+ return
+}
+
+// ReadCodeLine reads a response code line of the form
+// code message
+// where code is a three-digit status code and the message
+// extends to the rest of the line. An example of such a line is:
+// 220 plan9.bell-labs.com ESMTP
+//
+// If the prefix of the status does not match the digits in expectCode,
+// ReadCodeLine returns with err set to &Error{code, message}.
+// For example, if expectCode is 31, an error will be returned if
+// the status is not in the range [310,319].
+//
+// If the response is multi-line, ReadCodeLine returns an error.
+//
+// An expectCode <= 0 disables the check of the status code.
+//
+func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
+ code, continued, message, err := r.readCodeLine(expectCode)
+ if err == nil && continued {
+ err = ProtocolError("unexpected multi-line response: " + message)
+ }
+ return
+}
+
+// ReadResponse reads a multi-line response of the form:
+//
+// code-message line 1
+// code-message line 2
+// ...
+// code message line n
+//
+// where code is a three-digit status code. The first line starts with the
+// code and a hyphen. The response is terminated by a line that starts
+// with the same code followed by a space. Each line in message is
+// separated by a newline (\n).
+//
+// See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
+// details of another form of response accepted:
+//
+// code-message line 1
+// message line 2
+// ...
+// code message line n
+//
+// If the prefix of the status does not match the digits in expectCode,
+// ReadResponse returns with err set to &Error{code, message}.
+// For example, if expectCode is 31, an error will be returned if
+// the status is not in the range [310,319].
+//
+// An expectCode <= 0 disables the check of the status code.
+//
+func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
+ code, continued, message, err := r.readCodeLine(expectCode)
+ multi := continued
+ for continued {
+ line, err := r.ReadLine()
+ if err != nil {
+ return 0, "", err
+ }
+
+ var code2 int
+ var moreMessage string
+ code2, continued, moreMessage, err = parseCodeLine(line, 0)
+ if err != nil || code2 != code {
+ message += "\n" + strings.TrimRight(line, "\r\n")
+ continued = true
+ continue
+ }
+ message += "\n" + moreMessage
+ }
+ if err != nil && multi && message != "" {
+ // replace one line error message with all lines (full message)
+ err = &Error{code, message}
+ }
+ return
+}
+
+// DotReader returns a new Reader that satisfies Reads using the
+// decoded text of a dot-encoded block read from r.
+// The returned Reader is only valid until the next call
+// to a method on r.
+//
+// Dot encoding is a common framing used for data blocks
+// in text protocols such as SMTP. The data consists of a sequence
+// of lines, each of which ends in "\r\n". The sequence itself
+// ends at a line containing just a dot: ".\r\n". Lines beginning
+// with a dot are escaped with an additional dot to avoid
+// looking like the end of the sequence.
+//
+// The decoded form returned by the Reader's Read method
+// rewrites the "\r\n" line endings into the simpler "\n",
+// removes leading dot escapes if present, and stops with error io.EOF
+// after consuming (and discarding) the end-of-sequence line.
+func (r *Reader) DotReader() io.Reader {
+ r.closeDot()
+ r.dot = &dotReader{r: r}
+ return r.dot
+}
+
+type dotReader struct {
+ r *Reader
+ state int
+}
+
+// Read satisfies reads by decoding dot-encoded data read from d.r.
+func (d *dotReader) Read(b []byte) (n int, err error) {
+ // Run data through a simple state machine to
+ // elide leading dots, rewrite trailing \r\n into \n,
+ // and detect ending .\r\n line.
+ const (
+ stateBeginLine = iota // beginning of line; initial state; must be zero
+ stateDot // read . at beginning of line
+ stateDotCR // read .\r at beginning of line
+ stateCR // read \r (possibly at end of line)
+ stateData // reading data in middle of line
+ stateEOF // reached .\r\n end marker line
+ )
+ br := d.r.R
+ for n < len(b) && d.state != stateEOF {
+ var c byte
+ c, err = br.ReadByte()
+ if err != nil {
+ if err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ break
+ }
+ switch d.state {
+ case stateBeginLine:
+ if c == '.' {
+ d.state = stateDot
+ continue
+ }
+ if c == '\r' {
+ d.state = stateCR
+ continue
+ }
+ d.state = stateData
+
+ case stateDot:
+ if c == '\r' {
+ d.state = stateDotCR
+ continue
+ }
+ if c == '\n' {
+ d.state = stateEOF
+ continue
+ }
+ d.state = stateData
+
+ case stateDotCR:
+ if c == '\n' {
+ d.state = stateEOF
+ continue
+ }
+ // Not part of .\r\n.
+ // Consume leading dot and emit saved \r.
+ br.UnreadByte()
+ c = '\r'
+ d.state = stateData
+
+ case stateCR:
+ if c == '\n' {
+ d.state = stateBeginLine
+ break
+ }
+ // Not part of \r\n. Emit saved \r
+ br.UnreadByte()
+ c = '\r'
+ d.state = stateData
+
+ case stateData:
+ if c == '\r' {
+ d.state = stateCR
+ continue
+ }
+ if c == '\n' {
+ d.state = stateBeginLine
+ }
+ }
+ b[n] = c
+ n++
+ }
+ if err == nil && d.state == stateEOF {
+ err = io.EOF
+ }
+ if err != nil && d.r.dot == d {
+ d.r.dot = nil
+ }
+ return
+}
+
+// closeDot drains the current DotReader if any,
+// making sure that it reads until the ending dot line.
+func (r *Reader) closeDot() {
+ if r.dot == nil {
+ return
+ }
+ buf := make([]byte, 128)
+ for r.dot != nil {
+ // When Read reaches EOF or an error,
+ // it will set r.dot == nil.
+ r.dot.Read(buf)
+ }
+}
+
+// ReadDotBytes reads a dot-encoding and returns the decoded data.
+//
+// See the documentation for the DotReader method for details about dot-encoding.
+func (r *Reader) ReadDotBytes() ([]byte, error) {
+ return io.ReadAll(r.DotReader())
+}
+
+// ReadDotLines reads a dot-encoding and returns a slice
+// containing the decoded lines, with the final \r\n or \n elided from each.
+//
+// See the documentation for the DotReader method for details about dot-encoding.
+func (r *Reader) ReadDotLines() ([]string, error) {
+ // We could use ReadDotBytes and then Split it,
+ // but reading a line at a time avoids needing a
+ // large contiguous block of memory and is simpler.
+ var v []string
+ var err error
+ for {
+ var line string
+ line, err = r.ReadLine()
+ if err != nil {
+ if err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ break
+ }
+
+ // Dot by itself marks end; otherwise cut one dot.
+ if len(line) > 0 && line[0] == '.' {
+ if len(line) == 1 {
+ break
+ }
+ line = line[1:]
+ }
+ v = append(v, line)
+ }
+ return v, err
+}
+
+// ReadMIMEHeader reads a MIME-style header from r.
+// The header is a sequence of possibly continued Key: Value lines
+// ending in a blank line.
+// The returned map m maps CanonicalMIMEHeaderKey(key) to a
+// sequence of values in the same order encountered in the input.
+//
+// For example, consider this input:
+//
+// My-Key: Value 1
+// Long-Key: Even
+// Longer Value
+// My-Key: Value 2
+//
+// Given that input, ReadMIMEHeader returns the map:
+//
+// map[string][]string{
+// "My-Key": {"Value 1", "Value 2"},
+// "Long-Key": {"Even Longer Value"},
+// }
+//
+func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
+ // Avoid lots of small slice allocations later by allocating one
+ // large one ahead of time which we'll cut up into smaller
+ // slices. If this isn't big enough later, we allocate small ones.
+ var strs []string
+ hint := r.upcomingHeaderNewlines()
+ if hint > 0 {
+ strs = make([]string, hint)
+ }
+
+ m := make(MIMEHeader, hint)
+
+ // The first line cannot start with a leading space.
+ if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
+ line, err := r.readLineSlice()
+ if err != nil {
+ return m, err
+ }
+ return m, ProtocolError("malformed MIME header initial line: " + string(line))
+ }
+
+ for {
+ kv, err := r.readContinuedLineSlice(mustHaveFieldNameColon)
+ if len(kv) == 0 {
+ return m, err
+ }
+
+ // Key ends at first colon.
+ i := bytes.IndexByte(kv, ':')
+ if i < 0 {
+ return m, ProtocolError("malformed MIME header line: " + string(kv))
+ }
+ key := canonicalMIMEHeaderKey(kv[:i])
+
+ // As per RFC 7230 field-name is a token, tokens consist of one or more chars.
+ // We could return a ProtocolError here, but better to be liberal in what we
+ // accept, so if we get an empty key, skip it.
+ if key == "" {
+ continue
+ }
+
+ // Skip initial spaces in value.
+ i++ // skip colon
+ for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
+ i++
+ }
+ value := string(kv[i:])
+
+ vv := m[key]
+ if vv == nil && len(strs) > 0 {
+ // More than likely this will be a single-element key.
+ // Most headers aren't multi-valued.
+ // Set the capacity on strs[0] to 1, so any future append
+ // won't extend the slice into the other strings.
+ vv, strs = strs[:1:1], strs[1:]
+ vv[0] = value
+ m[key] = vv
+ } else {
+ m[key] = append(vv, value)
+ }
+
+ if err != nil {
+ return m, err
+ }
+ }
+}
+
+// noValidation is a no-op validation func for readContinuedLineSlice
+// that permits any lines.
+func noValidation(_ []byte) error { return nil }
+
+// mustHaveFieldNameColon ensures that, per RFC 7230, the
+// field-name is on a single line, so the first line must
+// contain a colon.
+func mustHaveFieldNameColon(line []byte) error {
+ if bytes.IndexByte(line, ':') < 0 {
+ return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line))
+ }
+ return nil
+}
+
+// upcomingHeaderNewlines returns an approximation of the number of newlines
+// that will be in this header. If it gets confused, it returns 0.
+func (r *Reader) upcomingHeaderNewlines() (n int) {
+ // Try to determine the 'hint' size.
+ r.R.Peek(1) // force a buffer load if empty
+ s := r.R.Buffered()
+ if s == 0 {
+ return
+ }
+ peek, _ := r.R.Peek(s)
+ for len(peek) > 0 {
+ i := bytes.IndexByte(peek, '\n')
+ if i < 3 {
+ // Not present (-1) or found within the next few bytes,
+ // implying we're at the end ("\r\n\r\n" or "\n\n")
+ return
+ }
+ n++
+ peek = peek[i+1:]
+ }
+ return
+}
+
+// CanonicalMIMEHeaderKey returns the canonical format of the
+// MIME header key s. The canonicalization converts the first
+// letter and any letter following a hyphen to upper case;
+// the rest are converted to lowercase. For example, the
+// canonical key for "accept-encoding" is "Accept-Encoding".
+// MIME header keys are assumed to be ASCII only.
+// If s contains a space or invalid header field bytes, it is
+// returned without modifications.
+func CanonicalMIMEHeaderKey(s string) string {
+ commonHeaderOnce.Do(initCommonHeader)
+
+ // Quick check for canonical encoding.
+ upper := true
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if !validHeaderFieldByte(c) {
+ return s
+ }
+ if upper && 'a' <= c && c <= 'z' {
+ return canonicalMIMEHeaderKey([]byte(s))
+ }
+ if !upper && 'A' <= c && c <= 'Z' {
+ return canonicalMIMEHeaderKey([]byte(s))
+ }
+ upper = c == '-'
+ }
+ return s
+}
+
+const toLower = 'a' - 'A'
+
+// validHeaderFieldByte reports whether b is a valid byte in a header
+// field name. RFC 7230 says:
+// header-field = field-name ":" OWS field-value OWS
+// field-name = token
+// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+// token = 1*tchar
+func validHeaderFieldByte(b byte) bool {
+ return int(b) < len(isTokenTable) && isTokenTable[b]
+}
+
+// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
+// allowed to mutate the provided byte slice before returning the
+// string.
+//
+// For invalid inputs (if a contains spaces or non-token bytes), a
+// is unchanged and a string copy is returned.
+func canonicalMIMEHeaderKey(a []byte) string {
+ // See if a looks like a header key. If not, return it unchanged.
+ for _, c := range a {
+ if validHeaderFieldByte(c) {
+ continue
+ }
+ // Don't canonicalize.
+ return string(a)
+ }
+
+ upper := true
+ for i, c := range a {
+ // Canonicalize: first letter upper case
+ // and upper case after each dash.
+ // (Host, User-Agent, If-Modified-Since).
+ // MIME headers are ASCII only, so no Unicode issues.
+ if upper && 'a' <= c && c <= 'z' {
+ c -= toLower
+ } else if !upper && 'A' <= c && c <= 'Z' {
+ c += toLower
+ }
+ a[i] = c
+ upper = c == '-' // for next time
+ }
+ // The compiler recognizes m[string(byteSlice)] as a special
+ // case, so a copy of a's bytes into a new string does not
+ // happen in this map lookup:
+ if v := commonHeader[string(a)]; v != "" {
+ return v
+ }
+ return string(a)
+}
+
+// commonHeader interns common header strings.
+var commonHeader map[string]string
+
+var commonHeaderOnce sync.Once
+
+func initCommonHeader() {
+ commonHeader = make(map[string]string)
+ for _, v := range []string{
+ "Accept",
+ "Accept-Charset",
+ "Accept-Encoding",
+ "Accept-Language",
+ "Accept-Ranges",
+ "Cache-Control",
+ "Cc",
+ "Connection",
+ "Content-Id",
+ "Content-Language",
+ "Content-Length",
+ "Content-Transfer-Encoding",
+ "Content-Type",
+ "Cookie",
+ "Date",
+ "Dkim-Signature",
+ "Etag",
+ "Expires",
+ "From",
+ "Host",
+ "If-Modified-Since",
+ "If-None-Match",
+ "In-Reply-To",
+ "Last-Modified",
+ "Location",
+ "Message-Id",
+ "Mime-Version",
+ "Pragma",
+ "Received",
+ "Return-Path",
+ "Server",
+ "Set-Cookie",
+ "Subject",
+ "To",
+ "User-Agent",
+ "Via",
+ "X-Forwarded-For",
+ "X-Imforwards",
+ "X-Powered-By",
+ } {
+ commonHeader[v] = v
+ }
+}
+
+// isTokenTable is a copy of net/http/lex.go's isTokenTable.
+// See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
+var isTokenTable = [127]bool{
+ '!': true,
+ '#': true,
+ '$': true,
+ '%': true,
+ '&': true,
+ '\'': true,
+ '*': true,
+ '+': true,
+ '-': true,
+ '.': true,
+ '0': true,
+ '1': true,
+ '2': true,
+ '3': true,
+ '4': true,
+ '5': true,
+ '6': true,
+ '7': true,
+ '8': true,
+ '9': true,
+ 'A': true,
+ 'B': true,
+ 'C': true,
+ 'D': true,
+ 'E': true,
+ 'F': true,
+ 'G': true,
+ 'H': true,
+ 'I': true,
+ 'J': true,
+ 'K': true,
+ 'L': true,
+ 'M': true,
+ 'N': true,
+ 'O': true,
+ 'P': true,
+ 'Q': true,
+ 'R': true,
+ 'S': true,
+ 'T': true,
+ 'U': true,
+ 'W': true,
+ 'V': true,
+ 'X': true,
+ 'Y': true,
+ 'Z': true,
+ '^': true,
+ '_': true,
+ '`': true,
+ 'a': true,
+ 'b': true,
+ 'c': true,
+ 'd': true,
+ 'e': true,
+ 'f': true,
+ 'g': true,
+ 'h': true,
+ 'i': true,
+ 'j': true,
+ 'k': true,
+ 'l': true,
+ 'm': true,
+ 'n': true,
+ 'o': true,
+ 'p': true,
+ 'q': true,
+ 'r': true,
+ 's': true,
+ 't': true,
+ 'u': true,
+ 'v': true,
+ 'w': true,
+ 'x': true,
+ 'y': true,
+ 'z': true,
+ '|': true,
+ '~': true,
+}