diff options
Diffstat (limited to 'src/mime/multipart/multipart.go')
-rw-r--r-- | src/mime/multipart/multipart.go | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/src/mime/multipart/multipart.go b/src/mime/multipart/multipart.go new file mode 100644 index 0000000..066aa70 --- /dev/null +++ b/src/mime/multipart/multipart.go @@ -0,0 +1,487 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// + +/* +Package multipart implements MIME multipart parsing, as defined in RFC +2046. + +The implementation is sufficient for HTTP (RFC 2388) and the multipart +bodies generated by popular browsers. + +# Limits + +To protect against malicious inputs, this package sets limits on the size +of the MIME data it processes. + +Reader.NextPart and Reader.NextRawPart limit the number of headers in a +part to 10000 and Reader.ReadForm limits the total number of headers in all +FileHeaders to 10000. +These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values> +setting. + +Reader.ReadForm further limits the number of parts in a form to 1000. +This limit may be adjusted with the GODEBUG=multipartmaxparts=<value> +setting. +*/ +package multipart + +import ( + "bufio" + "bytes" + "fmt" + "internal/godebug" + "io" + "mime" + "mime/quotedprintable" + "net/textproto" + "path/filepath" + "strconv" + "strings" +) + +var emptyParams = make(map[string]string) + +// This constant needs to be at least 76 for this package to work correctly. +// This is because \r\n--separator_of_len_70- would fill the buffer and it +// wouldn't be safe to consume a single byte from it. +const peekBufferSize = 4096 + +// A Part represents a single part in a multipart body. +type Part struct { + // The headers of the body, if any, with the keys canonicalized + // in the same fashion that the Go http.Request headers are. + // For example, "foo-bar" changes case to "Foo-Bar" + Header textproto.MIMEHeader + + mr *Reader + + disposition string + dispositionParams map[string]string + + // r is either a reader directly reading from mr, or it's a + // wrapper around such a reader, decoding the + // Content-Transfer-Encoding + r io.Reader + + n int // known data bytes waiting in mr.bufReader + total int64 // total data bytes read already + err error // error to return when n == 0 + readErr error // read error observed from mr.bufReader +} + +// FormName returns the name parameter if p has a Content-Disposition +// of type "form-data". Otherwise it returns the empty string. +func (p *Part) FormName() string { + // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF + // of Content-Disposition value format. + if p.dispositionParams == nil { + p.parseContentDisposition() + } + if p.disposition != "form-data" { + return "" + } + return p.dispositionParams["name"] +} + +// FileName returns the filename parameter of the Part's Content-Disposition +// header. If not empty, the filename is passed through filepath.Base (which is +// platform dependent) before being returned. +func (p *Part) FileName() string { + if p.dispositionParams == nil { + p.parseContentDisposition() + } + filename := p.dispositionParams["filename"] + if filename == "" { + return "" + } + // RFC 7578, Section 4.2 requires that if a filename is provided, the + // directory path information must not be used. + return filepath.Base(filename) +} + +func (p *Part) parseContentDisposition() { + v := p.Header.Get("Content-Disposition") + var err error + p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) + if err != nil { + p.dispositionParams = emptyParams + } +} + +// NewReader creates a new multipart Reader reading from r using the +// given MIME boundary. +// +// The boundary is usually obtained from the "boundary" parameter of +// the message's "Content-Type" header. Use mime.ParseMediaType to +// parse such headers. +func NewReader(r io.Reader, boundary string) *Reader { + b := []byte("\r\n--" + boundary + "--") + return &Reader{ + bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), + nl: b[:2], + nlDashBoundary: b[:len(b)-2], + dashBoundaryDash: b[2:], + dashBoundary: b[2 : len(b)-2], + } +} + +// stickyErrorReader is an io.Reader which never calls Read on its +// underlying Reader once an error has been seen. (the io.Reader +// interface's contract promises nothing about the return values of +// Read calls after an error, yet this package does do multiple Reads +// after error) +type stickyErrorReader struct { + r io.Reader + err error +} + +func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { + if r.err != nil { + return 0, r.err + } + n, r.err = r.r.Read(p) + return n, r.err +} + +func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { + bp := &Part{ + Header: make(map[string][]string), + mr: mr, + } + if err := bp.populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders); err != nil { + return nil, err + } + bp.r = partReader{bp} + + // rawPart is used to switch between Part.NextPart and Part.NextRawPart. + if !rawPart { + const cte = "Content-Transfer-Encoding" + if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { + bp.Header.Del(cte) + bp.r = quotedprintable.NewReader(bp.r) + } + } + return bp, nil +} + +func (p *Part) populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders int64) error { + r := textproto.NewReader(p.mr.bufReader) + header, err := readMIMEHeader(r, maxMIMEHeaderSize, maxMIMEHeaders) + if err == nil { + p.Header = header + } + // TODO: Add a distinguishable error to net/textproto. + if err != nil && err.Error() == "message too large" { + err = ErrMessageTooLarge + } + return err +} + +// Read reads the body of a part, after its headers and before the +// next part (if any) begins. +func (p *Part) Read(d []byte) (n int, err error) { + return p.r.Read(d) +} + +// partReader implements io.Reader by reading raw bytes directly from the +// wrapped *Part, without doing any Transfer-Encoding decoding. +type partReader struct { + p *Part +} + +func (pr partReader) Read(d []byte) (int, error) { + p := pr.p + br := p.mr.bufReader + + // Read into buffer until we identify some data to return, + // or we find a reason to stop (boundary or read error). + for p.n == 0 && p.err == nil { + peek, _ := br.Peek(br.Buffered()) + p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) + if p.n == 0 && p.err == nil { + // Force buffered I/O to read more into buffer. + _, p.readErr = br.Peek(len(peek) + 1) + if p.readErr == io.EOF { + p.readErr = io.ErrUnexpectedEOF + } + } + } + + // Read out from "data to return" part of buffer. + if p.n == 0 { + return 0, p.err + } + n := len(d) + if n > p.n { + n = p.n + } + n, _ = br.Read(d[:n]) + p.total += int64(n) + p.n -= n + if p.n == 0 { + return n, p.err + } + return n, nil +} + +// scanUntilBoundary scans buf to identify how much of it can be safely +// returned as part of the Part body. +// dashBoundary is "--boundary". +// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. +// The comments below (and the name) assume "\n--boundary", but either is accepted. +// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. +// readErr is the read error, if any, that followed reading the bytes in buf. +// scanUntilBoundary returns the number of data bytes from buf that can be +// returned as part of the Part body and also the error to return (if any) +// once those data bytes are done. +func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { + if total == 0 { + // At beginning of body, allow dashBoundary. + if bytes.HasPrefix(buf, dashBoundary) { + switch matchAfterPrefix(buf, dashBoundary, readErr) { + case -1: + return len(dashBoundary), nil + case 0: + return 0, nil + case +1: + return 0, io.EOF + } + } + if bytes.HasPrefix(dashBoundary, buf) { + return 0, readErr + } + } + + // Search for "\n--boundary". + if i := bytes.Index(buf, nlDashBoundary); i >= 0 { + switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { + case -1: + return i + len(nlDashBoundary), nil + case 0: + return i, nil + case +1: + return i, io.EOF + } + } + if bytes.HasPrefix(nlDashBoundary, buf) { + return 0, readErr + } + + // Otherwise, anything up to the final \n is not part of the boundary + // and so must be part of the body. + // Also if the section from the final \n onward is not a prefix of the boundary, + // it too must be part of the body. + i := bytes.LastIndexByte(buf, nlDashBoundary[0]) + if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { + return i, nil + } + return len(buf), readErr +} + +// matchAfterPrefix checks whether buf should be considered to match the boundary. +// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", +// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. +// +// matchAfterPrefix returns +1 if the buffer does match the boundary, +// meaning the prefix is followed by a double dash, space, tab, cr, nl, +// or end of input. +// It returns -1 if the buffer definitely does NOT match the boundary, +// meaning the prefix is followed by some other character. +// For example, "--foobar" does not match "--foo". +// It returns 0 more input needs to be read to make the decision, +// meaning that len(buf) == len(prefix) and readErr == nil. +func matchAfterPrefix(buf, prefix []byte, readErr error) int { + if len(buf) == len(prefix) { + if readErr != nil { + return +1 + } + return 0 + } + c := buf[len(prefix)] + + if c == ' ' || c == '\t' || c == '\r' || c == '\n' { + return +1 + } + + // Try to detect boundaryDash + if c == '-' { + if len(buf) == len(prefix)+1 { + if readErr != nil { + // Prefix + "-" does not match + return -1 + } + return 0 + } + if buf[len(prefix)+1] == '-' { + return +1 + } + } + + return -1 +} + +func (p *Part) Close() error { + io.Copy(io.Discard, p) + return nil +} + +// Reader is an iterator over parts in a MIME multipart body. +// Reader's underlying parser consumes its input as needed. Seeking +// isn't supported. +type Reader struct { + bufReader *bufio.Reader + tempDir string // used in tests + + currentPart *Part + partsRead int + + nl []byte // "\r\n" or "\n" (set after seeing first boundary line) + nlDashBoundary []byte // nl + "--boundary" + dashBoundaryDash []byte // "--boundary--" + dashBoundary []byte // "--boundary" +} + +// maxMIMEHeaderSize is the maximum size of a MIME header we will parse, +// including header keys, values, and map overhead. +const maxMIMEHeaderSize = 10 << 20 + +// multipartMaxHeaders is the maximum number of header entries NextPart will return, +// as well as the maximum combined total of header entries Reader.ReadForm will return +// in FileHeaders. +var multipartMaxHeaders = godebug.New("multipartmaxheaders") + +func maxMIMEHeaders() int64 { + if s := multipartMaxHeaders.Value(); s != "" { + if v, err := strconv.ParseInt(s, 10, 64); err == nil && v >= 0 { + return v + } + } + return 10000 +} + +// NextPart returns the next part in the multipart or an error. +// When there are no more parts, the error io.EOF is returned. +// +// As a special case, if the "Content-Transfer-Encoding" header +// has a value of "quoted-printable", that header is instead +// hidden and the body is transparently decoded during Read calls. +func (r *Reader) NextPart() (*Part, error) { + return r.nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders()) +} + +// NextRawPart returns the next part in the multipart or an error. +// When there are no more parts, the error io.EOF is returned. +// +// Unlike NextPart, it does not have special handling for +// "Content-Transfer-Encoding: quoted-printable". +func (r *Reader) NextRawPart() (*Part, error) { + return r.nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders()) +} + +func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { + if r.currentPart != nil { + r.currentPart.Close() + } + if string(r.dashBoundary) == "--" { + return nil, fmt.Errorf("multipart: boundary is empty") + } + expectNewPart := false + for { + line, err := r.bufReader.ReadSlice('\n') + + if err == io.EOF && r.isFinalBoundary(line) { + // If the buffer ends in "--boundary--" without the + // trailing "\r\n", ReadSlice will return an error + // (since it's missing the '\n'), but this is a valid + // multipart EOF so we need to return io.EOF instead of + // a fmt-wrapped one. + return nil, io.EOF + } + if err != nil { + return nil, fmt.Errorf("multipart: NextPart: %w", err) + } + + if r.isBoundaryDelimiterLine(line) { + r.partsRead++ + bp, err := newPart(r, rawPart, maxMIMEHeaderSize, maxMIMEHeaders) + if err != nil { + return nil, err + } + r.currentPart = bp + return bp, nil + } + + if r.isFinalBoundary(line) { + // Expected EOF + return nil, io.EOF + } + + if expectNewPart { + return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) + } + + if r.partsRead == 0 { + // skip line + continue + } + + // Consume the "\n" or "\r\n" separator between the + // body of the previous part and the boundary line we + // now expect will follow. (either a new part or the + // end boundary) + if bytes.Equal(line, r.nl) { + expectNewPart = true + continue + } + + return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) + } +} + +// isFinalBoundary reports whether line is the final boundary line +// indicating that all parts are over. +// It matches `^--boundary--[ \t]*(\r\n)?$` +func (r *Reader) isFinalBoundary(line []byte) bool { + if !bytes.HasPrefix(line, r.dashBoundaryDash) { + return false + } + rest := line[len(r.dashBoundaryDash):] + rest = skipLWSPChar(rest) + return len(rest) == 0 || bytes.Equal(rest, r.nl) +} + +func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { + // https://tools.ietf.org/html/rfc2046#section-5.1 + // The boundary delimiter line is then defined as a line + // consisting entirely of two hyphen characters ("-", + // decimal value 45) followed by the boundary parameter + // value from the Content-Type header field, optional linear + // whitespace, and a terminating CRLF. + if !bytes.HasPrefix(line, r.dashBoundary) { + return false + } + rest := line[len(r.dashBoundary):] + rest = skipLWSPChar(rest) + + // On the first part, see our lines are ending in \n instead of \r\n + // and switch into that mode if so. This is a violation of the spec, + // but occurs in practice. + if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { + r.nl = r.nl[1:] + r.nlDashBoundary = r.nlDashBoundary[1:] + } + return bytes.Equal(rest, r.nl) +} + +// skipLWSPChar returns b with leading spaces and tabs removed. +// RFC 822 defines: +// +// LWSP-char = SPACE / HTAB +func skipLWSPChar(b []byte) []byte { + for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { + b = b[1:] + } + return b +} |