diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/mime | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
32 files changed, 5987 insertions, 0 deletions
diff --git a/src/mime/encodedword.go b/src/mime/encodedword.go new file mode 100644 index 0000000..e6b470b --- /dev/null +++ b/src/mime/encodedword.go @@ -0,0 +1,414 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "bytes" + "encoding/base64" + "errors" + "fmt" + "io" + "strings" + "unicode" + "unicode/utf8" +) + +// A WordEncoder is an RFC 2047 encoded-word encoder. +type WordEncoder byte + +const ( + // BEncoding represents Base64 encoding scheme as defined by RFC 2045. + BEncoding = WordEncoder('b') + // QEncoding represents the Q-encoding scheme as defined by RFC 2047. + QEncoding = WordEncoder('q') +) + +var ( + errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") +) + +// Encode returns the encoded-word form of s. If s is ASCII without special +// characters, it is returned unchanged. The provided charset is the IANA +// charset name of s. It is case insensitive. +func (e WordEncoder) Encode(charset, s string) string { + if !needsEncoding(s) { + return s + } + return e.encodeWord(charset, s) +} + +func needsEncoding(s string) bool { + for _, b := range s { + if (b < ' ' || b > '~') && b != '\t' { + return true + } + } + return false +} + +// encodeWord encodes a string into an encoded-word. +func (e WordEncoder) encodeWord(charset, s string) string { + var buf strings.Builder + // Could use a hint like len(s)*3, but that's not enough for cases + // with word splits and too much for simpler inputs. + // 48 is close to maxEncodedWordLen/2, but adjusted to allocator size class. + buf.Grow(48) + + e.openWord(&buf, charset) + if e == BEncoding { + e.bEncode(&buf, charset, s) + } else { + e.qEncode(&buf, charset, s) + } + closeWord(&buf) + + return buf.String() +} + +const ( + // The maximum length of an encoded-word is 75 characters. + // See RFC 2047, section 2. + maxEncodedWordLen = 75 + // maxContentLen is how much content can be encoded, ignoring the header and + // 2-byte footer. + maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=") +) + +var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen) + +// bEncode encodes s using base64 encoding and writes it to buf. +func (e WordEncoder) bEncode(buf *strings.Builder, charset, s string) { + w := base64.NewEncoder(base64.StdEncoding, buf) + // If the charset is not UTF-8 or if the content is short, do not bother + // splitting the encoded-word. + if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen { + io.WriteString(w, s) + w.Close() + return + } + + var currentLen, last, runeLen int + for i := 0; i < len(s); i += runeLen { + // Multi-byte characters must not be split across encoded-words. + // See RFC 2047, section 5.3. + _, runeLen = utf8.DecodeRuneInString(s[i:]) + + if currentLen+runeLen <= maxBase64Len { + currentLen += runeLen + } else { + io.WriteString(w, s[last:i]) + w.Close() + e.splitWord(buf, charset) + last = i + currentLen = runeLen + } + } + io.WriteString(w, s[last:]) + w.Close() +} + +// qEncode encodes s using Q encoding and writes it to buf. It splits the +// encoded-words when necessary. +func (e WordEncoder) qEncode(buf *strings.Builder, charset, s string) { + // We only split encoded-words when the charset is UTF-8. + if !isUTF8(charset) { + writeQString(buf, s) + return + } + + var currentLen, runeLen int + for i := 0; i < len(s); i += runeLen { + b := s[i] + // Multi-byte characters must not be split across encoded-words. + // See RFC 2047, section 5.3. + var encLen int + if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' { + runeLen, encLen = 1, 1 + } else { + _, runeLen = utf8.DecodeRuneInString(s[i:]) + encLen = 3 * runeLen + } + + if currentLen+encLen > maxContentLen { + e.splitWord(buf, charset) + currentLen = 0 + } + writeQString(buf, s[i:i+runeLen]) + currentLen += encLen + } +} + +// writeQString encodes s using Q encoding and writes it to buf. +func writeQString(buf *strings.Builder, s string) { + for i := 0; i < len(s); i++ { + switch b := s[i]; { + case b == ' ': + buf.WriteByte('_') + case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_': + buf.WriteByte(b) + default: + buf.WriteByte('=') + buf.WriteByte(upperhex[b>>4]) + buf.WriteByte(upperhex[b&0x0f]) + } + } +} + +// openWord writes the beginning of an encoded-word into buf. +func (e WordEncoder) openWord(buf *strings.Builder, charset string) { + buf.WriteString("=?") + buf.WriteString(charset) + buf.WriteByte('?') + buf.WriteByte(byte(e)) + buf.WriteByte('?') +} + +// closeWord writes the end of an encoded-word into buf. +func closeWord(buf *strings.Builder) { + buf.WriteString("?=") +} + +// splitWord closes the current encoded-word and opens a new one. +func (e WordEncoder) splitWord(buf *strings.Builder, charset string) { + closeWord(buf) + buf.WriteByte(' ') + e.openWord(buf, charset) +} + +func isUTF8(charset string) bool { + return strings.EqualFold(charset, "UTF-8") +} + +const upperhex = "0123456789ABCDEF" + +// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. +type WordDecoder struct { + // CharsetReader, if non-nil, defines a function to generate + // charset-conversion readers, converting from the provided + // charset into UTF-8. + // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets + // are handled by default. + // One of the CharsetReader's result values must be non-nil. + CharsetReader func(charset string, input io.Reader) (io.Reader, error) +} + +// Decode decodes an RFC 2047 encoded-word. +func (d *WordDecoder) Decode(word string) (string, error) { + // See https://tools.ietf.org/html/rfc2047#section-2 for details. + // Our decoder is permissive, we accept empty encoded-text. + if len(word) < 8 || !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 { + return "", errInvalidWord + } + word = word[2 : len(word)-2] + + // split word "UTF-8?q?text" into "UTF-8", 'q', and "text" + charset, text, _ := strings.Cut(word, "?") + if charset == "" { + return "", errInvalidWord + } + encoding, text, _ := strings.Cut(text, "?") + if len(encoding) != 1 { + return "", errInvalidWord + } + + content, err := decode(encoding[0], text) + if err != nil { + return "", err + } + + var buf strings.Builder + if err := d.convert(&buf, charset, content); err != nil { + return "", err + } + return buf.String(), nil +} + +// DecodeHeader decodes all encoded-words of the given string. It returns an +// error if and only if CharsetReader of d returns an error. +func (d *WordDecoder) DecodeHeader(header string) (string, error) { + // If there is no encoded-word, returns before creating a buffer. + i := strings.Index(header, "=?") + if i == -1 { + return header, nil + } + + var buf strings.Builder + + buf.WriteString(header[:i]) + header = header[i:] + + betweenWords := false + for { + start := strings.Index(header, "=?") + if start == -1 { + break + } + cur := start + len("=?") + + i := strings.Index(header[cur:], "?") + if i == -1 { + break + } + charset := header[cur : cur+i] + cur += i + len("?") + + if len(header) < cur+len("Q??=") { + break + } + encoding := header[cur] + cur++ + + if header[cur] != '?' { + break + } + cur++ + + j := strings.Index(header[cur:], "?=") + if j == -1 { + break + } + text := header[cur : cur+j] + end := cur + j + len("?=") + + content, err := decode(encoding, text) + if err != nil { + betweenWords = false + buf.WriteString(header[:start+2]) + header = header[start+2:] + continue + } + + // Write characters before the encoded-word. White-space and newline + // characters separating two encoded-words must be deleted. + if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { + buf.WriteString(header[:start]) + } + + if err := d.convert(&buf, charset, content); err != nil { + return "", err + } + + header = header[end:] + betweenWords = true + } + + if len(header) > 0 { + buf.WriteString(header) + } + + return buf.String(), nil +} + +func decode(encoding byte, text string) ([]byte, error) { + switch encoding { + case 'B', 'b': + return base64.StdEncoding.DecodeString(text) + case 'Q', 'q': + return qDecode(text) + default: + return nil, errInvalidWord + } +} + +func (d *WordDecoder) convert(buf *strings.Builder, charset string, content []byte) error { + switch { + case strings.EqualFold("utf-8", charset): + buf.Write(content) + case strings.EqualFold("iso-8859-1", charset): + for _, c := range content { + buf.WriteRune(rune(c)) + } + case strings.EqualFold("us-ascii", charset): + for _, c := range content { + if c >= utf8.RuneSelf { + buf.WriteRune(unicode.ReplacementChar) + } else { + buf.WriteByte(c) + } + } + default: + if d.CharsetReader == nil { + return fmt.Errorf("mime: unhandled charset %q", charset) + } + r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) + if err != nil { + return err + } + if _, err = io.Copy(buf, r); err != nil { + return err + } + } + return nil +} + +// hasNonWhitespace reports whether s (assumed to be ASCII) contains at least +// one byte of non-whitespace. +func hasNonWhitespace(s string) bool { + for _, b := range s { + switch b { + // Encoded-words can only be separated by linear white spaces which does + // not include vertical tabs (\v). + case ' ', '\t', '\n', '\r': + default: + return true + } + } + return false +} + +// qDecode decodes a Q encoded string. +func qDecode(s string) ([]byte, error) { + dec := make([]byte, len(s)) + n := 0 + for i := 0; i < len(s); i++ { + switch c := s[i]; { + case c == '_': + dec[n] = ' ' + case c == '=': + if i+2 >= len(s) { + return nil, errInvalidWord + } + b, err := readHexByte(s[i+1], s[i+2]) + if err != nil { + return nil, err + } + dec[n] = b + i += 2 + case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': + dec[n] = c + default: + return nil, errInvalidWord + } + n++ + } + + return dec[:n], nil +} + +// readHexByte returns the byte from its quoted-printable representation. +func readHexByte(a, b byte) (byte, error) { + var hb, lb byte + var err error + if hb, err = fromHex(a); err != nil { + return 0, err + } + if lb, err = fromHex(b); err != nil { + return 0, err + } + return hb<<4 | lb, nil +} + +func fromHex(b byte) (byte, error) { + switch { + case b >= '0' && b <= '9': + return b - '0', nil + case b >= 'A' && b <= 'F': + return b - 'A' + 10, nil + // Accept badly encoded bytes. + case b >= 'a' && b <= 'f': + return b - 'a' + 10, nil + } + return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) +} diff --git a/src/mime/encodedword_test.go b/src/mime/encodedword_test.go new file mode 100644 index 0000000..2a98794 --- /dev/null +++ b/src/mime/encodedword_test.go @@ -0,0 +1,239 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "errors" + "io" + "strings" + "testing" +) + +func TestEncodeWord(t *testing.T) { + utf8, iso88591 := "utf-8", "iso-8859-1" + tests := []struct { + enc WordEncoder + charset string + src, exp string + }{ + {QEncoding, utf8, "François-Jérôme", "=?utf-8?q?Fran=C3=A7ois-J=C3=A9r=C3=B4me?="}, + {BEncoding, utf8, "Café", "=?utf-8?b?Q2Fmw6k=?="}, + {QEncoding, iso88591, "La Seleção", "=?iso-8859-1?q?La_Sele=C3=A7=C3=A3o?="}, + {QEncoding, utf8, "", ""}, + {QEncoding, utf8, "A", "A"}, + {QEncoding, iso88591, "a", "a"}, + {QEncoding, utf8, "123 456", "123 456"}, + {QEncoding, utf8, "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~"}, + {QEncoding, utf8, strings.Repeat("é", 10), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?="}, + {QEncoding, utf8, strings.Repeat("é", 11), "=?utf-8?q?" + strings.Repeat("=C3=A9", 10) + "?= =?utf-8?q?=C3=A9?="}, + {QEncoding, iso88591, strings.Repeat("\xe9", 22), "=?iso-8859-1?q?" + strings.Repeat("=E9", 22) + "?="}, + {QEncoding, utf8, strings.Repeat("\x80", 22), "=?utf-8?q?" + strings.Repeat("=80", 21) + "?= =?utf-8?q?=80?="}, + {BEncoding, iso88591, strings.Repeat("\xe9", 45), "=?iso-8859-1?b?" + strings.Repeat("6enp", 15) + "?="}, + {BEncoding, utf8, strings.Repeat("\x80", 48), "=?utf-8?b?" + strings.Repeat("gICA", 15) + "?= =?utf-8?b?gICA?="}, + } + + for _, test := range tests { + if s := test.enc.Encode(test.charset, test.src); s != test.exp { + t.Errorf("Encode(%q) = %q, want %q", test.src, s, test.exp) + } + } +} + +func TestEncodedWordLength(t *testing.T) { + tests := []struct { + enc WordEncoder + src string + }{ + {QEncoding, strings.Repeat("à", 30)}, + {QEncoding, strings.Repeat("é", 60)}, + {BEncoding, strings.Repeat("ï", 25)}, + {BEncoding, strings.Repeat("ô", 37)}, + {BEncoding, strings.Repeat("\x80", 50)}, + {QEncoding, "{$firstname} Bienvendio a Apostolica, aquà inicia el camino de tu"}, + } + + for _, test := range tests { + s := test.enc.Encode("utf-8", test.src) + wordLen := 0 + for i := 0; i < len(s); i++ { + if s[i] == ' ' { + wordLen = 0 + continue + } + + wordLen++ + if wordLen > maxEncodedWordLen { + t.Errorf("Encode(%q) has more than %d characters: %q", + test.src, maxEncodedWordLen, s) + } + } + } +} + +func TestDecodeWord(t *testing.T) { + tests := []struct { + src, exp string + hasErr bool + }{ + {"=?UTF-8?Q?=C2=A1Hola,_se=C3=B1or!?=", "¡Hola, señor!", false}, + {"=?UTF-8?Q?Fran=C3=A7ois-J=C3=A9r=C3=B4me?=", "François-Jérôme", false}, + {"=?UTF-8?q?ascii?=", "ascii", false}, + {"=?utf-8?B?QW5kcsOp?=", "André", false}, + {"=?ISO-8859-1?Q?Rapha=EBl_Dupont?=", "Raphaël Dupont", false}, + {"=?utf-8?b?IkFudG9uaW8gSm9zw6kiIDxqb3NlQGV4YW1wbGUub3JnPg==?=", `"Antonio José" <jose@example.org>`, false}, + {"=?UTF-8?A?Test?=", "", true}, + {"=?UTF-8?Q?A=B?=", "", true}, + {"=?UTF-8?Q?=A?=", "", true}, + {"=?UTF-8?A?A?=", "", true}, + {"=????=", "", true}, + {"=?UTF-8???=", "", true}, + {"=?UTF-8?Q??=", "", false}, + } + + for _, test := range tests { + dec := new(WordDecoder) + s, err := dec.Decode(test.src) + if test.hasErr && err == nil { + t.Errorf("Decode(%q) should return an error", test.src) + continue + } + if !test.hasErr && err != nil { + t.Errorf("Decode(%q): %v", test.src, err) + continue + } + if s != test.exp { + t.Errorf("Decode(%q) = %q, want %q", test.src, s, test.exp) + } + } +} + +func TestDecodeHeader(t *testing.T) { + tests := []struct { + src, exp string + }{ + {"=?UTF-8?Q?=C2=A1Hola,_se=C3=B1or!?=", "¡Hola, señor!"}, + {"=?UTF-8?Q?Fran=C3=A7ois-J=C3=A9r=C3=B4me?=", "François-Jérôme"}, + {"=?UTF-8?q?ascii?=", "ascii"}, + {"=?utf-8?B?QW5kcsOp?=", "André"}, + {"=?ISO-8859-1?Q?Rapha=EBl_Dupont?=", "Raphaël Dupont"}, + {"Jean", "Jean"}, + {"=?utf-8?b?IkFudG9uaW8gSm9zw6kiIDxqb3NlQGV4YW1wbGUub3JnPg==?=", `"Antonio José" <jose@example.org>`}, + {"=?UTF-8?A?Test?=", "=?UTF-8?A?Test?="}, + {"=?UTF-8?Q?A=B?=", "=?UTF-8?Q?A=B?="}, + {"=?UTF-8?Q?=A?=", "=?UTF-8?Q?=A?="}, + {"=?UTF-8?A?A?=", "=?UTF-8?A?A?="}, + // Incomplete words + {"=?", "=?"}, + {"=?UTF-8?", "=?UTF-8?"}, + {"=?UTF-8?=", "=?UTF-8?="}, + {"=?UTF-8?Q", "=?UTF-8?Q"}, + {"=?UTF-8?Q?", "=?UTF-8?Q?"}, + {"=?UTF-8?Q?=", "=?UTF-8?Q?="}, + {"=?UTF-8?Q?A", "=?UTF-8?Q?A"}, + {"=?UTF-8?Q?A?", "=?UTF-8?Q?A?"}, + // Tests from RFC 2047 + {"=?ISO-8859-1?Q?a?=", "a"}, + {"=?ISO-8859-1?Q?a?= b", "a b"}, + {"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=", "ab"}, + {"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=", "ab"}, + {"=?ISO-8859-1?Q?a?= \r\n\t =?ISO-8859-1?Q?b?=", "ab"}, + {"=?ISO-8859-1?Q?a_b?=", "a b"}, + } + + for _, test := range tests { + dec := new(WordDecoder) + s, err := dec.DecodeHeader(test.src) + if err != nil { + t.Errorf("DecodeHeader(%q): %v", test.src, err) + } + if s != test.exp { + t.Errorf("DecodeHeader(%q) = %q, want %q", test.src, s, test.exp) + } + } +} + +func TestCharsetDecoder(t *testing.T) { + tests := []struct { + src string + want string + charsets []string + content []string + }{ + {"=?utf-8?b?Q2Fmw6k=?=", "Café", nil, nil}, + {"=?ISO-8859-1?Q?caf=E9?=", "café", nil, nil}, + {"=?US-ASCII?Q?foo_bar?=", "foo bar", nil, nil}, + {"=?utf-8?Q?=?=", "=?utf-8?Q?=?=", nil, nil}, + {"=?utf-8?Q?=A?=", "=?utf-8?Q?=A?=", nil, nil}, + { + "=?ISO-8859-15?Q?f=F5=F6?= =?windows-1252?Q?b=E0r?=", + "f\xf5\xf6b\xe0r", + []string{"iso-8859-15", "windows-1252"}, + []string{"f\xf5\xf6", "b\xe0r"}, + }, + } + + for _, test := range tests { + i := 0 + dec := &WordDecoder{ + CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { + if charset != test.charsets[i] { + t.Errorf("DecodeHeader(%q), got charset %q, want %q", test.src, charset, test.charsets[i]) + } + content, err := io.ReadAll(input) + if err != nil { + t.Errorf("DecodeHeader(%q), error in reader: %v", test.src, err) + } + got := string(content) + if got != test.content[i] { + t.Errorf("DecodeHeader(%q), got content %q, want %q", test.src, got, test.content[i]) + } + i++ + + return strings.NewReader(got), nil + }, + } + got, err := dec.DecodeHeader(test.src) + if err != nil { + t.Errorf("DecodeHeader(%q): %v", test.src, err) + } + if got != test.want { + t.Errorf("DecodeHeader(%q) = %q, want %q", test.src, got, test.want) + } + } +} + +func TestCharsetDecoderError(t *testing.T) { + dec := &WordDecoder{ + CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { + return nil, errors.New("Test error") + }, + } + + if _, err := dec.DecodeHeader("=?charset?Q?foo?="); err == nil { + t.Error("DecodeHeader should return an error") + } +} + +func BenchmarkQEncodeWord(b *testing.B) { + for i := 0; i < b.N; i++ { + QEncoding.Encode("UTF-8", "¡Hola, señor!") + } +} + +func BenchmarkQDecodeWord(b *testing.B) { + dec := new(WordDecoder) + + for i := 0; i < b.N; i++ { + dec.Decode("=?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=") + } +} + +func BenchmarkQDecodeHeader(b *testing.B) { + dec := new(WordDecoder) + + for i := 0; i < b.N; i++ { + dec.DecodeHeader("=?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=") + } +} diff --git a/src/mime/example_test.go b/src/mime/example_test.go new file mode 100644 index 0000000..8a96873 --- /dev/null +++ b/src/mime/example_test.go @@ -0,0 +1,123 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime_test + +import ( + "bytes" + "fmt" + "io" + "mime" +) + +func ExampleWordEncoder_Encode() { + fmt.Println(mime.QEncoding.Encode("utf-8", "¡Hola, señor!")) + fmt.Println(mime.QEncoding.Encode("utf-8", "Hello!")) + fmt.Println(mime.BEncoding.Encode("UTF-8", "¡Hola, señor!")) + fmt.Println(mime.QEncoding.Encode("ISO-8859-1", "Caf\xE9")) + // Output: + // =?utf-8?q?=C2=A1Hola,_se=C3=B1or!?= + // Hello! + // =?UTF-8?b?wqFIb2xhLCBzZcOxb3Ih?= + // =?ISO-8859-1?q?Caf=E9?= +} + +func ExampleWordDecoder_Decode() { + dec := new(mime.WordDecoder) + header, err := dec.Decode("=?utf-8?q?=C2=A1Hola,_se=C3=B1or!?=") + if err != nil { + panic(err) + } + fmt.Println(header) + + dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + switch charset { + case "x-case": + // Fake character set for example. + // Real use would integrate with packages such + // as code.google.com/p/go-charset + content, err := io.ReadAll(input) + if err != nil { + return nil, err + } + return bytes.NewReader(bytes.ToUpper(content)), nil + default: + return nil, fmt.Errorf("unhandled charset %q", charset) + } + } + header, err = dec.Decode("=?x-case?q?hello!?=") + if err != nil { + panic(err) + } + fmt.Println(header) + // Output: + // ¡Hola, señor! + // HELLO! +} + +func ExampleWordDecoder_DecodeHeader() { + dec := new(mime.WordDecoder) + header, err := dec.DecodeHeader("=?utf-8?q?=C3=89ric?= <eric@example.org>, =?utf-8?q?Ana=C3=AFs?= <anais@example.org>") + if err != nil { + panic(err) + } + fmt.Println(header) + + header, err = dec.DecodeHeader("=?utf-8?q?=C2=A1Hola,?= =?utf-8?q?_se=C3=B1or!?=") + if err != nil { + panic(err) + } + fmt.Println(header) + + dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + switch charset { + case "x-case": + // Fake character set for example. + // Real use would integrate with packages such + // as code.google.com/p/go-charset + content, err := io.ReadAll(input) + if err != nil { + return nil, err + } + return bytes.NewReader(bytes.ToUpper(content)), nil + default: + return nil, fmt.Errorf("unhandled charset %q", charset) + } + } + header, err = dec.DecodeHeader("=?x-case?q?hello_?= =?x-case?q?world!?=") + if err != nil { + panic(err) + } + fmt.Println(header) + // Output: + // Éric <eric@example.org>, Anaïs <anais@example.org> + // ¡Hola, señor! + // HELLO WORLD! +} + +func ExampleFormatMediaType() { + mediatype := "text/html" + params := map[string]string{ + "charset": "utf-8", + } + + result := mime.FormatMediaType(mediatype, params) + + fmt.Println("result:", result) + // Output: + // result: text/html; charset=utf-8 +} + +func ExampleParseMediaType() { + mediatype, params, err := mime.ParseMediaType("text/html; charset=utf-8") + if err != nil { + panic(err) + } + + fmt.Println("type:", mediatype) + fmt.Println("charset:", params["charset"]) + // Output: + // type: text/html + // charset: utf-8 +} diff --git a/src/mime/grammar.go b/src/mime/grammar.go new file mode 100644 index 0000000..6a6f71d --- /dev/null +++ b/src/mime/grammar.go @@ -0,0 +1,32 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "strings" +) + +// isTSpecial reports whether rune is in 'tspecials' as defined by RFC +// 1521 and RFC 2045. +func isTSpecial(r rune) bool { + return strings.ContainsRune(`()<>@,;:\"/[]?=`, r) +} + +// isTokenChar reports whether rune is in 'token' as defined by RFC +// 1521 and RFC 2045. +func isTokenChar(r rune) bool { + // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, + // or tspecials> + return r > 0x20 && r < 0x7f && !isTSpecial(r) +} + +// isToken reports whether s is a 'token' as defined by RFC 1521 +// and RFC 2045. +func isToken(s string) bool { + if s == "" { + return false + } + return strings.IndexFunc(s, isNotTokenChar) < 0 +} diff --git a/src/mime/mediatype.go b/src/mime/mediatype.go new file mode 100644 index 0000000..bc8d417 --- /dev/null +++ b/src/mime/mediatype.go @@ -0,0 +1,410 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "errors" + "fmt" + "sort" + "strings" + "unicode" +) + +// FormatMediaType serializes mediatype t and the parameters +// param as a media type conforming to RFC 2045 and RFC 2616. +// The type and parameter names are written in lower-case. +// When any of the arguments result in a standard violation then +// FormatMediaType returns the empty string. +func FormatMediaType(t string, param map[string]string) string { + var b strings.Builder + if major, sub, ok := strings.Cut(t, "/"); !ok { + if !isToken(t) { + return "" + } + b.WriteString(strings.ToLower(t)) + } else { + if !isToken(major) || !isToken(sub) { + return "" + } + b.WriteString(strings.ToLower(major)) + b.WriteByte('/') + b.WriteString(strings.ToLower(sub)) + } + + attrs := make([]string, 0, len(param)) + for a := range param { + attrs = append(attrs, a) + } + sort.Strings(attrs) + + for _, attribute := range attrs { + value := param[attribute] + b.WriteByte(';') + b.WriteByte(' ') + if !isToken(attribute) { + return "" + } + b.WriteString(strings.ToLower(attribute)) + + needEnc := needsEncoding(value) + if needEnc { + // RFC 2231 section 4 + b.WriteByte('*') + } + b.WriteByte('=') + + if needEnc { + b.WriteString("utf-8''") + + offset := 0 + for index := 0; index < len(value); index++ { + ch := value[index] + // {RFC 2231 section 7} + // attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials> + if ch <= ' ' || ch >= 0x7F || + ch == '*' || ch == '\'' || ch == '%' || + isTSpecial(rune(ch)) { + + b.WriteString(value[offset:index]) + offset = index + 1 + + b.WriteByte('%') + b.WriteByte(upperhex[ch>>4]) + b.WriteByte(upperhex[ch&0x0F]) + } + } + b.WriteString(value[offset:]) + continue + } + + if isToken(value) { + b.WriteString(value) + continue + } + + b.WriteByte('"') + offset := 0 + for index := 0; index < len(value); index++ { + character := value[index] + if character == '"' || character == '\\' { + b.WriteString(value[offset:index]) + offset = index + b.WriteByte('\\') + } + } + b.WriteString(value[offset:]) + b.WriteByte('"') + } + return b.String() +} + +func checkMediaTypeDisposition(s string) error { + typ, rest := consumeToken(s) + if typ == "" { + return errors.New("mime: no media type") + } + if rest == "" { + return nil + } + if !strings.HasPrefix(rest, "/") { + return errors.New("mime: expected slash after first token") + } + subtype, rest := consumeToken(rest[1:]) + if subtype == "" { + return errors.New("mime: expected token after slash") + } + if rest != "" { + return errors.New("mime: unexpected content after media subtype") + } + return nil +} + +// ErrInvalidMediaParameter is returned by ParseMediaType if +// the media type value was found but there was an error parsing +// the optional parameters +var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter") + +// ParseMediaType parses a media type value and any optional +// parameters, per RFC 1521. Media types are the values in +// Content-Type and Content-Disposition headers (RFC 2183). +// On success, ParseMediaType returns the media type converted +// to lowercase and trimmed of white space and a non-nil map. +// If there is an error parsing the optional parameter, +// the media type will be returned along with the error +// ErrInvalidMediaParameter. +// The returned map, params, maps from the lowercase +// attribute to the attribute value with its case preserved. +func ParseMediaType(v string) (mediatype string, params map[string]string, err error) { + base, _, _ := strings.Cut(v, ";") + mediatype = strings.TrimSpace(strings.ToLower(base)) + + err = checkMediaTypeDisposition(mediatype) + if err != nil { + return "", nil, err + } + + params = make(map[string]string) + + // Map of base parameter name -> parameter name -> value + // for parameters containing a '*' character. + // Lazily initialized. + var continuation map[string]map[string]string + + v = v[len(base):] + for len(v) > 0 { + v = strings.TrimLeftFunc(v, unicode.IsSpace) + if len(v) == 0 { + break + } + key, value, rest := consumeMediaParam(v) + if key == "" { + if strings.TrimSpace(rest) == ";" { + // Ignore trailing semicolons. + // Not an error. + break + } + // Parse error. + return mediatype, nil, ErrInvalidMediaParameter + } + + pmap := params + if baseName, _, ok := strings.Cut(key, "*"); ok { + if continuation == nil { + continuation = make(map[string]map[string]string) + } + var ok bool + if pmap, ok = continuation[baseName]; !ok { + continuation[baseName] = make(map[string]string) + pmap = continuation[baseName] + } + } + if v, exists := pmap[key]; exists && v != value { + // Duplicate parameter names are incorrect, but we allow them if they are equal. + return "", nil, errors.New("mime: duplicate parameter name") + } + pmap[key] = value + v = rest + } + + // Stitch together any continuations or things with stars + // (i.e. RFC 2231 things with stars: "foo*0" or "foo*") + var buf strings.Builder + for key, pieceMap := range continuation { + singlePartKey := key + "*" + if v, ok := pieceMap[singlePartKey]; ok { + if decv, ok := decode2231Enc(v); ok { + params[key] = decv + } + continue + } + + buf.Reset() + valid := false + for n := 0; ; n++ { + simplePart := fmt.Sprintf("%s*%d", key, n) + if v, ok := pieceMap[simplePart]; ok { + valid = true + buf.WriteString(v) + continue + } + encodedPart := simplePart + "*" + v, ok := pieceMap[encodedPart] + if !ok { + break + } + valid = true + if n == 0 { + if decv, ok := decode2231Enc(v); ok { + buf.WriteString(decv) + } + } else { + decv, _ := percentHexUnescape(v) + buf.WriteString(decv) + } + } + if valid { + params[key] = buf.String() + } + } + + return +} + +func decode2231Enc(v string) (string, bool) { + sv := strings.SplitN(v, "'", 3) + if len(sv) != 3 { + return "", false + } + // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll + // need to decide how to expose it in the API. But I'm not sure + // anybody uses it in practice. + charset := strings.ToLower(sv[0]) + if len(charset) == 0 { + return "", false + } + if charset != "us-ascii" && charset != "utf-8" { + // TODO: unsupported encoding + return "", false + } + encv, err := percentHexUnescape(sv[2]) + if err != nil { + return "", false + } + return encv, true +} + +func isNotTokenChar(r rune) bool { + return !isTokenChar(r) +} + +// consumeToken consumes a token from the beginning of provided +// string, per RFC 2045 section 5.1 (referenced from 2183), and return +// the token consumed and the rest of the string. Returns ("", v) on +// failure to consume at least one character. +func consumeToken(v string) (token, rest string) { + notPos := strings.IndexFunc(v, isNotTokenChar) + if notPos == -1 { + return v, "" + } + if notPos == 0 { + return "", v + } + return v[0:notPos], v[notPos:] +} + +// consumeValue consumes a "value" per RFC 2045, where a value is +// either a 'token' or a 'quoted-string'. On success, consumeValue +// returns the value consumed (and de-quoted/escaped, if a +// quoted-string) and the rest of the string. On failure, returns +// ("", v). +func consumeValue(v string) (value, rest string) { + if v == "" { + return + } + if v[0] != '"' { + return consumeToken(v) + } + + // parse a quoted-string + buffer := new(strings.Builder) + for i := 1; i < len(v); i++ { + r := v[i] + if r == '"' { + return buffer.String(), v[i+1:] + } + // When MSIE sends a full file path (in "intranet mode"), it does not + // escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt". + // + // No known MIME generators emit unnecessary backslash escapes + // for simple token characters like numbers and letters. + // + // If we see an unnecessary backslash escape, assume it is from MSIE + // and intended as a literal backslash. This makes Go servers deal better + // with MSIE without affecting the way they handle conforming MIME + // generators. + if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) { + buffer.WriteByte(v[i+1]) + i++ + continue + } + if r == '\r' || r == '\n' { + return "", v + } + buffer.WriteByte(v[i]) + } + // Did not find end quote. + return "", v +} + +func consumeMediaParam(v string) (param, value, rest string) { + rest = strings.TrimLeftFunc(v, unicode.IsSpace) + if !strings.HasPrefix(rest, ";") { + return "", "", v + } + + rest = rest[1:] // consume semicolon + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) + param, rest = consumeToken(rest) + param = strings.ToLower(param) + if param == "" { + return "", "", v + } + + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) + if !strings.HasPrefix(rest, "=") { + return "", "", v + } + rest = rest[1:] // consume equals sign + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) + value, rest2 := consumeValue(rest) + if value == "" && rest2 == rest { + return "", "", v + } + rest = rest2 + return param, value, rest +} + +func percentHexUnescape(s string) (string, error) { + // Count %, check that they're well-formed. + percents := 0 + for i := 0; i < len(s); { + if s[i] != '%' { + i++ + continue + } + percents++ + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + s = s[i:] + if len(s) > 3 { + s = s[0:3] + } + return "", fmt.Errorf("mime: bogus characters after %%: %q", s) + } + i += 3 + } + if percents == 0 { + return s, nil + } + + t := make([]byte, len(s)-2*percents) + j := 0 + for i := 0; i < len(s); { + switch s[i] { + case '%': + t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) + j++ + i += 3 + default: + t[j] = s[i] + j++ + i++ + } + } + return string(t), nil +} + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +func unhex(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 0 +} diff --git a/src/mime/mediatype_test.go b/src/mime/mediatype_test.go new file mode 100644 index 0000000..1458cdb --- /dev/null +++ b/src/mime/mediatype_test.go @@ -0,0 +1,540 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "reflect" + "strings" + "testing" +) + +func TestConsumeToken(t *testing.T) { + tests := [...][3]string{ + {"foo bar", "foo", " bar"}, + {"bar", "bar", ""}, + {"", "", ""}, + {" foo", "", " foo"}, + } + for _, test := range tests { + token, rest := consumeToken(test[0]) + expectedToken := test[1] + expectedRest := test[2] + if token != expectedToken { + t.Errorf("expected to consume token '%s', not '%s' from '%s'", + expectedToken, token, test[0]) + } else if rest != expectedRest { + t.Errorf("expected to have left '%s', not '%s' after reading token '%s' from '%s'", + expectedRest, rest, token, test[0]) + } + } +} + +func TestConsumeValue(t *testing.T) { + tests := [...][3]string{ + {"foo bar", "foo", " bar"}, + {"bar", "bar", ""}, + {" bar ", "", " bar "}, + {`"My value"end`, "My value", "end"}, + {`"My value" end`, "My value", " end"}, + {`"\\" rest`, "\\", " rest"}, + {`"My \" value"end`, "My \" value", "end"}, + {`"\" rest`, "", `"\" rest`}, + {`"C:\dev\go\robots.txt"`, `C:\dev\go\robots.txt`, ""}, + {`"C:\新建文件夹\中文第二次测试.mp4"`, `C:\新建文件夹\中文第二次测试.mp4`, ""}, + } + for _, test := range tests { + value, rest := consumeValue(test[0]) + expectedValue := test[1] + expectedRest := test[2] + if value != expectedValue { + t.Errorf("expected to consume value [%s], not [%s] from [%s]", + expectedValue, value, test[0]) + } else if rest != expectedRest { + t.Errorf("expected to have left [%s], not [%s] after reading value [%s] from [%s]", + expectedRest, rest, value, test[0]) + } + } +} + +func TestConsumeMediaParam(t *testing.T) { + tests := [...][4]string{ + {" ; foo=bar", "foo", "bar", ""}, + {"; foo=bar", "foo", "bar", ""}, + {";foo=bar", "foo", "bar", ""}, + {";FOO=bar", "foo", "bar", ""}, + {`;foo="bar"`, "foo", "bar", ""}, + {`;foo="bar"; `, "foo", "bar", "; "}, + {`;foo="bar"; foo=baz`, "foo", "bar", "; foo=baz"}, + {` ; boundary=----CUT;`, "boundary", "----CUT", ";"}, + {` ; key=value; blah="value";name="foo" `, "key", "value", `; blah="value";name="foo" `}, + {`; blah="value";name="foo" `, "blah", "value", `;name="foo" `}, + {`;name="foo" `, "name", "foo", ` `}, + } + for _, test := range tests { + param, value, rest := consumeMediaParam(test[0]) + expectedParam := test[1] + expectedValue := test[2] + expectedRest := test[3] + if param != expectedParam { + t.Errorf("expected to consume param [%s], not [%s] from [%s]", + expectedParam, param, test[0]) + } else if value != expectedValue { + t.Errorf("expected to consume value [%s], not [%s] from [%s]", + expectedValue, value, test[0]) + } else if rest != expectedRest { + t.Errorf("expected to have left [%s], not [%s] after reading [%s/%s] from [%s]", + expectedRest, rest, param, value, test[0]) + } + } +} + +type mediaTypeTest struct { + in string + t string + p map[string]string +} + +func TestParseMediaType(t *testing.T) { + // Convenience map initializer + m := func(s ...string) map[string]string { + sm := make(map[string]string) + for i := 0; i < len(s); i += 2 { + sm[s[i]] = s[i+1] + } + return sm + } + + nameFoo := map[string]string{"name": "foo"} + tests := []mediaTypeTest{ + {`form-data; name="foo"`, "form-data", nameFoo}, + {` form-data ; name=foo`, "form-data", nameFoo}, + {`FORM-DATA;name="foo"`, "form-data", nameFoo}, + {` FORM-DATA ; name="foo"`, "form-data", nameFoo}, + {` FORM-DATA ; name="foo"`, "form-data", nameFoo}, + + {`form-data; key=value; blah="value";name="foo" `, + "form-data", + m("key", "value", "blah", "value", "name", "foo")}, + + {`foo; key=val1; key=the-key-appears-again-which-is-bogus`, + "", m()}, + + // From RFC 2231: + {`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`, + "application/x-stuff", + m("title", "This is ***fun***")}, + + {`message/external-body; access-type=URL; ` + + `URL*0="ftp://";` + + `URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`, + "message/external-body", + m("access-type", "URL", + "url", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar")}, + + {`application/x-stuff; ` + + `title*0*=us-ascii'en'This%20is%20even%20more%20; ` + + `title*1*=%2A%2A%2Afun%2A%2A%2A%20; ` + + `title*2="isn't it!"`, + "application/x-stuff", + m("title", "This is even more ***fun*** isn't it!")}, + + // Tests from http://greenbytes.de/tech/tc2231/ + // Note: Backslash escape handling is a bit loose, like MSIE. + + // #attonly + {`attachment`, + "attachment", + m()}, + // #attonlyucase + {`ATTACHMENT`, + "attachment", + m()}, + // #attwithasciifilename + {`attachment; filename="foo.html"`, + "attachment", + m("filename", "foo.html")}, + // #attwithasciifilename25 + {`attachment; filename="0000000000111111111122222"`, + "attachment", + m("filename", "0000000000111111111122222")}, + // #attwithasciifilename35 + {`attachment; filename="00000000001111111111222222222233333"`, + "attachment", + m("filename", "00000000001111111111222222222233333")}, + // #attwithasciifnescapedchar + {`attachment; filename="f\oo.html"`, + "attachment", + m("filename", "f\\oo.html")}, + // #attwithasciifnescapedquote + {`attachment; filename="\"quoting\" tested.html"`, + "attachment", + m("filename", `"quoting" tested.html`)}, + // #attwithquotedsemicolon + {`attachment; filename="Here's a semicolon;.html"`, + "attachment", + m("filename", "Here's a semicolon;.html")}, + // #attwithfilenameandextparam + {`attachment; foo="bar"; filename="foo.html"`, + "attachment", + m("foo", "bar", "filename", "foo.html")}, + // #attwithfilenameandextparamescaped + {`attachment; foo="\"\\";filename="foo.html"`, + "attachment", + m("foo", "\"\\", "filename", "foo.html")}, + // #attwithasciifilenameucase + {`attachment; FILENAME="foo.html"`, + "attachment", + m("filename", "foo.html")}, + // #attwithasciifilenamenq + {`attachment; filename=foo.html`, + "attachment", + m("filename", "foo.html")}, + // #attwithasciifilenamenqs + {`attachment; filename=foo.html ;`, + "attachment", + m("filename", "foo.html")}, + // #attwithfntokensq + {`attachment; filename='foo.html'`, + "attachment", + m("filename", "'foo.html'")}, + // #attwithisofnplain + {`attachment; filename="foo-ä.html"`, + "attachment", + m("filename", "foo-ä.html")}, + // #attwithutf8fnplain + {`attachment; filename="foo-ä.html"`, + "attachment", + m("filename", "foo-ä.html")}, + // #attwithfnrawpctenca + {`attachment; filename="foo-%41.html"`, + "attachment", + m("filename", "foo-%41.html")}, + // #attwithfnusingpct + {`attachment; filename="50%.html"`, + "attachment", + m("filename", "50%.html")}, + // #attwithfnrawpctencaq + {`attachment; filename="foo-%\41.html"`, + "attachment", + m("filename", "foo-%\\41.html")}, + // #attwithnamepct + {`attachment; name="foo-%41.html"`, + "attachment", + m("name", "foo-%41.html")}, + // #attwithfilenamepctandiso + {`attachment; name="ä-%41.html"`, + "attachment", + m("name", "ä-%41.html")}, + // #attwithfnrawpctenclong + {`attachment; filename="foo-%c3%a4-%e2%82%ac.html"`, + "attachment", + m("filename", "foo-%c3%a4-%e2%82%ac.html")}, + // #attwithasciifilenamews1 + {`attachment; filename ="foo.html"`, + "attachment", + m("filename", "foo.html")}, + // #attmissingdisposition + {`filename=foo.html`, + "", m()}, + // #attmissingdisposition2 + {`x=y; filename=foo.html`, + "", m()}, + // #attmissingdisposition3 + {`"foo; filename=bar;baz"; filename=qux`, + "", m()}, + // #attmissingdisposition4 + {`filename=foo.html, filename=bar.html`, + "", m()}, + // #emptydisposition + {`; filename=foo.html`, + "", m()}, + // #doublecolon + {`: inline; attachment; filename=foo.html`, + "", m()}, + // #attandinline + {`inline; attachment; filename=foo.html`, + "", m()}, + // #attandinline2 + {`attachment; inline; filename=foo.html`, + "", m()}, + // #attbrokenquotedfn + {`attachment; filename="foo.html".txt`, + "", m()}, + // #attbrokenquotedfn2 + {`attachment; filename="bar`, + "", m()}, + // #attbrokenquotedfn3 + {`attachment; filename=foo"bar;baz"qux`, + "", m()}, + // #attmultinstances + {`attachment; filename=foo.html, attachment; filename=bar.html`, + "", m()}, + // #attmissingdelim + {`attachment; foo=foo filename=bar`, + "", m()}, + // #attmissingdelim2 + {`attachment; filename=bar foo=foo`, + "", m()}, + // #attmissingdelim3 + {`attachment filename=bar`, + "", m()}, + // #attreversed + {`filename=foo.html; attachment`, + "", m()}, + // #attconfusedparam + {`attachment; xfilename=foo.html`, + "attachment", + m("xfilename", "foo.html")}, + // #attcdate + {`attachment; creation-date="Wed, 12 Feb 1997 16:29:51 -0500"`, + "attachment", + m("creation-date", "Wed, 12 Feb 1997 16:29:51 -0500")}, + // #attmdate + {`attachment; modification-date="Wed, 12 Feb 1997 16:29:51 -0500"`, + "attachment", + m("modification-date", "Wed, 12 Feb 1997 16:29:51 -0500")}, + // #dispext + {`foobar`, "foobar", m()}, + // #dispextbadfn + {`attachment; example="filename=example.txt"`, + "attachment", + m("example", "filename=example.txt")}, + // #attwithfn2231utf8 + {`attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html`, + "attachment", + m("filename", "foo-ä-€.html")}, + // #attwithfn2231noc + {`attachment; filename*=''foo-%c3%a4-%e2%82%ac.html`, + "attachment", + m()}, + // #attwithfn2231utf8comp + {`attachment; filename*=UTF-8''foo-a%cc%88.html`, + "attachment", + m("filename", "foo-ä.html")}, + // #attwithfn2231ws2 + {`attachment; filename*= UTF-8''foo-%c3%a4.html`, + "attachment", + m("filename", "foo-ä.html")}, + // #attwithfn2231ws3 + {`attachment; filename* =UTF-8''foo-%c3%a4.html`, + "attachment", + m("filename", "foo-ä.html")}, + // #attwithfn2231quot + {`attachment; filename*="UTF-8''foo-%c3%a4.html"`, + "attachment", + m("filename", "foo-ä.html")}, + // #attwithfn2231quot2 + {`attachment; filename*="foo%20bar.html"`, + "attachment", + m()}, + // #attwithfn2231singleqmissing + {`attachment; filename*=UTF-8'foo-%c3%a4.html`, + "attachment", + m()}, + // #attwithfn2231nbadpct1 + {`attachment; filename*=UTF-8''foo%`, + "attachment", + m()}, + // #attwithfn2231nbadpct2 + {`attachment; filename*=UTF-8''f%oo.html`, + "attachment", + m()}, + // #attwithfn2231dpct + {`attachment; filename*=UTF-8''A-%2541.html`, + "attachment", + m("filename", "A-%41.html")}, + // #attfncont + {`attachment; filename*0="foo."; filename*1="html"`, + "attachment", + m("filename", "foo.html")}, + // #attfncontenc + {`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`, + "attachment", + m("filename", "foo-ä.html")}, + // #attfncontlz + {`attachment; filename*0="foo"; filename*01="bar"`, + "attachment", + m("filename", "foo")}, + // #attfncontnc + {`attachment; filename*0="foo"; filename*2="bar"`, + "attachment", + m("filename", "foo")}, + // #attfnconts1 + {`attachment; filename*1="foo."; filename*2="html"`, + "attachment", m()}, + // #attfncontord + {`attachment; filename*1="bar"; filename*0="foo"`, + "attachment", + m("filename", "foobar")}, + // #attfnboth + {`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`, + "attachment", + m("filename", "foo-ä.html")}, + // #attfnboth2 + {`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`, + "attachment", + m("filename", "foo-ä.html")}, + // #attfnboth3 + {`attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4`, + "attachment", + m()}, + // #attnewandfn + {`attachment; foobar=x; filename="foo.html"`, + "attachment", + m("foobar", "x", "filename", "foo.html")}, + + // Browsers also just send UTF-8 directly without RFC 2231, + // at least when the source page is served with UTF-8. + {`form-data; firstname="Брэд"; lastname="Фицпатрик"`, + "form-data", + m("firstname", "Брэд", "lastname", "Фицпатрик")}, + + // Empty string used to be mishandled. + {`foo; bar=""`, "foo", m("bar", "")}, + + // Microsoft browsers in intranet mode do not think they need to escape \ in file name. + {`form-data; name="file"; filename="C:\dev\go\robots.txt"`, "form-data", m("name", "file", "filename", `C:\dev\go\robots.txt`)}, + {`form-data; name="file"; filename="C:\新建文件夹\中文第二次测试.mp4"`, "form-data", m("name", "file", "filename", `C:\新建文件夹\中文第二次测试.mp4`)}, + + // issue #46323 (https://github.com/golang/go/issues/46323) + { + // example from rfc2231-p.3 (https://datatracker.ietf.org/doc/html/rfc2231) + `message/external-body; access-type=URL; + URL*0="ftp://"; + URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar";`, // <-- trailing semicolon + `message/external-body`, + m("access-type", "URL", "url", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"), + }, + + // Issue #48866: duplicate parameters containing equal values should be allowed + {`text; charset=utf-8; charset=utf-8; format=fixed`, "text", m("charset", "utf-8", "format", "fixed")}, + {`text; charset=utf-8; format=flowed; charset=utf-8`, "text", m("charset", "utf-8", "format", "flowed")}, + } + for _, test := range tests { + mt, params, err := ParseMediaType(test.in) + if err != nil { + if test.t != "" { + t.Errorf("for input %#q, unexpected error: %v", test.in, err) + continue + } + continue + } + if g, e := mt, test.t; g != e { + t.Errorf("for input %#q, expected type %q, got %q", + test.in, e, g) + continue + } + if len(params) == 0 && len(test.p) == 0 { + continue + } + if !reflect.DeepEqual(params, test.p) { + t.Errorf("for input %#q, wrong params.\n"+ + "expected: %#v\n"+ + " got: %#v", + test.in, test.p, params) + } + } +} + +type badMediaTypeTest struct { + in string + mt string + err string +} + +var badMediaTypeTests = []badMediaTypeTest{ + {"bogus ;=========", "bogus", "mime: invalid media parameter"}, + // The following example is from real email delivered by gmail (error: missing semicolon) + // and it is there to check behavior described in #19498 + {"application/pdf; x-mac-type=\"3F3F3F3F\"; x-mac-creator=\"3F3F3F3F\" name=\"a.pdf\";", + "application/pdf", "mime: invalid media parameter"}, + {"bogus/<script>alert</script>", "", "mime: expected token after slash"}, + {"bogus/bogus<script>alert</script>", "", "mime: unexpected content after media subtype"}, + // Tests from http://greenbytes.de/tech/tc2231/ + {`"attachment"`, "attachment", "mime: no media type"}, + {"attachment; filename=foo,bar.html", "attachment", "mime: invalid media parameter"}, + {"attachment; ;filename=foo", "attachment", "mime: invalid media parameter"}, + {"attachment; filename=foo bar.html", "attachment", "mime: invalid media parameter"}, + {`attachment; filename="foo.html"; filename="bar.html"`, "attachment", "mime: duplicate parameter name"}, + {"attachment; filename=foo[1](2).html", "attachment", "mime: invalid media parameter"}, + {"attachment; filename=foo-ä.html", "attachment", "mime: invalid media parameter"}, + {"attachment; filename=foo-ä.html", "attachment", "mime: invalid media parameter"}, + {`attachment; filename *=UTF-8''foo-%c3%a4.html`, "attachment", "mime: invalid media parameter"}, +} + +func TestParseMediaTypeBogus(t *testing.T) { + for _, tt := range badMediaTypeTests { + mt, params, err := ParseMediaType(tt.in) + if err == nil { + t.Errorf("ParseMediaType(%q) = nil error; want parse error", tt.in) + continue + } + if err.Error() != tt.err { + t.Errorf("ParseMediaType(%q) = err %q; want %q", tt.in, err.Error(), tt.err) + } + if params != nil { + t.Errorf("ParseMediaType(%q): got non-nil params on error", tt.in) + } + if err != ErrInvalidMediaParameter && mt != "" { + t.Errorf("ParseMediaType(%q): got unexpected non-empty media type string", tt.in) + } + if err == ErrInvalidMediaParameter && mt != tt.mt { + t.Errorf("ParseMediaType(%q): in case of invalid parameters: expected type %q, got %q", tt.in, tt.mt, mt) + } + } +} + +type formatTest struct { + typ string + params map[string]string + want string +} + +var formatTests = []formatTest{ + {"noslash", map[string]string{"X": "Y"}, "noslash; x=Y"}, // e.g. Content-Disposition values (RFC 2183); issue 11289 + {"foo bar/baz", nil, ""}, + {"foo/bar baz", nil, ""}, + {"attachment", map[string]string{"filename": "ĄĄŽŽČČŠŠ"}, "attachment; filename*=utf-8''%C4%84%C4%84%C5%BD%C5%BD%C4%8C%C4%8C%C5%A0%C5%A0"}, + {"attachment", map[string]string{"filename": "ÁÁÊÊÇÇÎÎ"}, "attachment; filename*=utf-8''%C3%81%C3%81%C3%8A%C3%8A%C3%87%C3%87%C3%8E%C3%8E"}, + {"attachment", map[string]string{"filename": "数据统计.png"}, "attachment; filename*=utf-8''%E6%95%B0%E6%8D%AE%E7%BB%9F%E8%AE%A1.png"}, + {"foo/BAR", nil, "foo/bar"}, + {"foo/BAR", map[string]string{"X": "Y"}, "foo/bar; x=Y"}, + {"foo/BAR", map[string]string{"space": "With space"}, `foo/bar; space="With space"`}, + {"foo/BAR", map[string]string{"quote": `With "quote`}, `foo/bar; quote="With \"quote"`}, + {"foo/BAR", map[string]string{"bslash": `With \backslash`}, `foo/bar; bslash="With \\backslash"`}, + {"foo/BAR", map[string]string{"both": `With \backslash and "quote`}, `foo/bar; both="With \\backslash and \"quote"`}, + {"foo/BAR", map[string]string{"": "empty attribute"}, ""}, + {"foo/BAR", map[string]string{"bad attribute": "baz"}, ""}, + {"foo/BAR", map[string]string{"nonascii": "not an ascii character: ä"}, "foo/bar; nonascii*=utf-8''not%20an%20ascii%20character%3A%20%C3%A4"}, + {"foo/BAR", map[string]string{"ctl": "newline: \n nil: \000"}, "foo/bar; ctl*=utf-8''newline%3A%20%0A%20nil%3A%20%00"}, + {"foo/bar", map[string]string{"a": "av", "b": "bv", "c": "cv"}, "foo/bar; a=av; b=bv; c=cv"}, + {"foo/bar", map[string]string{"0": "'", "9": "'"}, "foo/bar; 0='; 9='"}, + {"foo", map[string]string{"bar": ""}, `foo; bar=""`}, +} + +func TestFormatMediaType(t *testing.T) { + for i, tt := range formatTests { + got := FormatMediaType(tt.typ, tt.params) + if got != tt.want { + t.Errorf("%d. FormatMediaType(%q, %v) = %q; want %q", i, tt.typ, tt.params, got, tt.want) + } + if got == "" { + continue + } + typ, params, err := ParseMediaType(got) + if err != nil { + t.Errorf("%d. ParseMediaType(%q) err: %v", i, got, err) + } + if typ != strings.ToLower(tt.typ) { + t.Errorf("%d. ParseMediaType(%q) typ = %q; want %q", i, got, typ, tt.typ) + } + for k, v := range tt.params { + k = strings.ToLower(k) + if params[k] != v { + t.Errorf("%d. ParseMediaType(%q) params[%s] = %q; want %q", i, got, k, params[k], v) + } + } + } +} diff --git a/src/mime/multipart/example_test.go b/src/mime/multipart/example_test.go new file mode 100644 index 0000000..fe154ac --- /dev/null +++ b/src/mime/multipart/example_test.go @@ -0,0 +1,52 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package multipart_test + +import ( + "fmt" + "io" + "log" + "mime" + "mime/multipart" + "net/mail" + "strings" +) + +func ExampleNewReader() { + msg := &mail.Message{ + Header: map[string][]string{ + "Content-Type": {"multipart/mixed; boundary=foo"}, + }, + Body: strings.NewReader( + "--foo\r\nFoo: one\r\n\r\nA section\r\n" + + "--foo\r\nFoo: two\r\n\r\nAnd another\r\n" + + "--foo--\r\n"), + } + mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type")) + if err != nil { + log.Fatal(err) + } + if strings.HasPrefix(mediaType, "multipart/") { + mr := multipart.NewReader(msg.Body, params["boundary"]) + for { + p, err := mr.NextPart() + if err == io.EOF { + return + } + if err != nil { + log.Fatal(err) + } + slurp, err := io.ReadAll(p) + if err != nil { + log.Fatal(err) + } + fmt.Printf("Part %q: %q\n", p.Header.Get("Foo"), slurp) + } + } + + // Output: + // Part "one": "A section" + // Part "two": "And another" +} diff --git a/src/mime/multipart/formdata.go b/src/mime/multipart/formdata.go new file mode 100644 index 0000000..267cfe8 --- /dev/null +++ b/src/mime/multipart/formdata.go @@ -0,0 +1,301 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package multipart + +import ( + "bytes" + "errors" + "internal/godebug" + "io" + "math" + "net/textproto" + "os" + "strconv" +) + +// ErrMessageTooLarge is returned by ReadForm if the message form +// data is too large to be processed. +var ErrMessageTooLarge = errors.New("multipart: message too large") + +// TODO(adg,bradfitz): find a way to unify the DoS-prevention strategy here +// with that of the http package's ParseForm. + +// ReadForm parses an entire multipart message whose parts have +// a Content-Disposition of "form-data". +// It stores up to maxMemory bytes + 10MB (reserved for non-file parts) +// in memory. File parts which can't be stored in memory will be stored on +// disk in temporary files. +// It returns ErrMessageTooLarge if all non-file parts can't be stored in +// memory. +func (r *Reader) ReadForm(maxMemory int64) (*Form, error) { + return r.readForm(maxMemory) +} + +var ( + multipartFiles = godebug.New("multipartfiles") + multipartMaxParts = godebug.New("multipartmaxparts") +) + +func (r *Reader) readForm(maxMemory int64) (_ *Form, err error) { + form := &Form{make(map[string][]string), make(map[string][]*FileHeader)} + var ( + file *os.File + fileOff int64 + ) + numDiskFiles := 0 + combineFiles := true + if multipartFiles.Value() == "distinct" { + combineFiles = false + } + maxParts := 1000 + if s := multipartMaxParts.Value(); s != "" { + if v, err := strconv.Atoi(s); err == nil && v >= 0 { + maxParts = v + } + } + maxHeaders := maxMIMEHeaders() + + defer func() { + if file != nil { + if cerr := file.Close(); err == nil { + err = cerr + } + } + if combineFiles && numDiskFiles > 1 { + for _, fhs := range form.File { + for _, fh := range fhs { + fh.tmpshared = true + } + } + } + if err != nil { + form.RemoveAll() + if file != nil { + os.Remove(file.Name()) + } + } + }() + + // maxFileMemoryBytes is the maximum bytes of file data we will store in memory. + // Data past this limit is written to disk. + // This limit strictly applies to content, not metadata (filenames, MIME headers, etc.), + // since metadata is always stored in memory, not disk. + // + // maxMemoryBytes is the maximum bytes we will store in memory, including file content, + // non-file part values, metdata, and map entry overhead. + // + // We reserve an additional 10 MB in maxMemoryBytes for non-file data. + // + // The relationship between these parameters, as well as the overly-large and + // unconfigurable 10 MB added on to maxMemory, is unfortunate but difficult to change + // within the constraints of the API as documented. + maxFileMemoryBytes := maxMemory + maxMemoryBytes := maxMemory + int64(10<<20) + if maxMemoryBytes <= 0 { + if maxMemory < 0 { + maxMemoryBytes = 0 + } else { + maxMemoryBytes = math.MaxInt64 + } + } + var copyBuf []byte + for { + p, err := r.nextPart(false, maxMemoryBytes, maxHeaders) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if maxParts <= 0 { + return nil, ErrMessageTooLarge + } + maxParts-- + + name := p.FormName() + if name == "" { + continue + } + filename := p.FileName() + + // Multiple values for the same key (one map entry, longer slice) are cheaper + // than the same number of values for different keys (many map entries), but + // using a consistent per-value cost for overhead is simpler. + const mapEntryOverhead = 200 + maxMemoryBytes -= int64(len(name)) + maxMemoryBytes -= mapEntryOverhead + if maxMemoryBytes < 0 { + // We can't actually take this path, since nextPart would already have + // rejected the MIME headers for being too large. Check anyway. + return nil, ErrMessageTooLarge + } + + var b bytes.Buffer + + if filename == "" { + // value, store as string in memory + n, err := io.CopyN(&b, p, maxMemoryBytes+1) + if err != nil && err != io.EOF { + return nil, err + } + maxMemoryBytes -= n + if maxMemoryBytes < 0 { + return nil, ErrMessageTooLarge + } + form.Value[name] = append(form.Value[name], b.String()) + continue + } + + // file, store in memory or on disk + const fileHeaderSize = 100 + maxMemoryBytes -= mimeHeaderSize(p.Header) + maxMemoryBytes -= mapEntryOverhead + maxMemoryBytes -= fileHeaderSize + if maxMemoryBytes < 0 { + return nil, ErrMessageTooLarge + } + for _, v := range p.Header { + maxHeaders -= int64(len(v)) + } + fh := &FileHeader{ + Filename: filename, + Header: p.Header, + } + n, err := io.CopyN(&b, p, maxFileMemoryBytes+1) + if err != nil && err != io.EOF { + return nil, err + } + if n > maxFileMemoryBytes { + if file == nil { + file, err = os.CreateTemp(r.tempDir, "multipart-") + if err != nil { + return nil, err + } + } + numDiskFiles++ + if _, err := file.Write(b.Bytes()); err != nil { + return nil, err + } + if copyBuf == nil { + copyBuf = make([]byte, 32*1024) // same buffer size as io.Copy uses + } + // os.File.ReadFrom will allocate its own copy buffer if we let io.Copy use it. + type writerOnly struct{ io.Writer } + remainingSize, err := io.CopyBuffer(writerOnly{file}, p, copyBuf) + if err != nil { + return nil, err + } + fh.tmpfile = file.Name() + fh.Size = int64(b.Len()) + remainingSize + fh.tmpoff = fileOff + fileOff += fh.Size + if !combineFiles { + if err := file.Close(); err != nil { + return nil, err + } + file = nil + } + } else { + fh.content = b.Bytes() + fh.Size = int64(len(fh.content)) + maxFileMemoryBytes -= n + maxMemoryBytes -= n + } + form.File[name] = append(form.File[name], fh) + } + + return form, nil +} + +func mimeHeaderSize(h textproto.MIMEHeader) (size int64) { + size = 400 + for k, vs := range h { + size += int64(len(k)) + size += 200 // map entry overhead + for _, v := range vs { + size += int64(len(v)) + } + } + return size +} + +// Form is a parsed multipart form. +// Its File parts are stored either in memory or on disk, +// and are accessible via the *FileHeader's Open method. +// Its Value parts are stored as strings. +// Both are keyed by field name. +type Form struct { + Value map[string][]string + File map[string][]*FileHeader +} + +// RemoveAll removes any temporary files associated with a Form. +func (f *Form) RemoveAll() error { + var err error + for _, fhs := range f.File { + for _, fh := range fhs { + if fh.tmpfile != "" { + e := os.Remove(fh.tmpfile) + if e != nil && !errors.Is(e, os.ErrNotExist) && err == nil { + err = e + } + } + } + } + return err +} + +// A FileHeader describes a file part of a multipart request. +type FileHeader struct { + Filename string + Header textproto.MIMEHeader + Size int64 + + content []byte + tmpfile string + tmpoff int64 + tmpshared bool +} + +// Open opens and returns the FileHeader's associated File. +func (fh *FileHeader) Open() (File, error) { + if b := fh.content; b != nil { + r := io.NewSectionReader(bytes.NewReader(b), 0, int64(len(b))) + return sectionReadCloser{r, nil}, nil + } + if fh.tmpshared { + f, err := os.Open(fh.tmpfile) + if err != nil { + return nil, err + } + r := io.NewSectionReader(f, fh.tmpoff, fh.Size) + return sectionReadCloser{r, f}, nil + } + return os.Open(fh.tmpfile) +} + +// File is an interface to access the file part of a multipart message. +// Its contents may be either stored in memory or on disk. +// If stored on disk, the File's underlying concrete type will be an *os.File. +type File interface { + io.Reader + io.ReaderAt + io.Seeker + io.Closer +} + +// helper types to turn a []byte into a File + +type sectionReadCloser struct { + *io.SectionReader + io.Closer +} + +func (rc sectionReadCloser) Close() error { + if rc.Closer != nil { + return rc.Closer.Close() + } + return nil +} diff --git a/src/mime/multipart/formdata_test.go b/src/mime/multipart/formdata_test.go new file mode 100644 index 0000000..a618a64 --- /dev/null +++ b/src/mime/multipart/formdata_test.go @@ -0,0 +1,471 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package multipart + +import ( + "bytes" + "fmt" + "io" + "math" + "net/textproto" + "os" + "strings" + "testing" +) + +func TestReadForm(t *testing.T) { + b := strings.NewReader(strings.ReplaceAll(message, "\n", "\r\n")) + r := NewReader(b, boundary) + f, err := r.ReadForm(25) + if err != nil { + t.Fatal("ReadForm:", err) + } + defer f.RemoveAll() + if g, e := f.Value["texta"][0], textaValue; g != e { + t.Errorf("texta value = %q, want %q", g, e) + } + if g, e := f.Value["textb"][0], textbValue; g != e { + t.Errorf("texta value = %q, want %q", g, e) + } + fd := testFile(t, f.File["filea"][0], "filea.txt", fileaContents) + if _, ok := fd.(*os.File); ok { + t.Error("file is *os.File, should not be") + } + fd.Close() + fd = testFile(t, f.File["fileb"][0], "fileb.txt", filebContents) + if _, ok := fd.(*os.File); !ok { + t.Errorf("file has unexpected underlying type %T", fd) + } + fd.Close() +} + +func TestReadFormWithNamelessFile(t *testing.T) { + b := strings.NewReader(strings.ReplaceAll(messageWithFileWithoutName, "\n", "\r\n")) + r := NewReader(b, boundary) + f, err := r.ReadForm(25) + if err != nil { + t.Fatal("ReadForm:", err) + } + defer f.RemoveAll() + + if g, e := f.Value["hiddenfile"][0], filebContents; g != e { + t.Errorf("hiddenfile value = %q, want %q", g, e) + } +} + +// Issue 40430: Handle ReadForm(math.MaxInt64) +func TestReadFormMaxMemoryOverflow(t *testing.T) { + b := strings.NewReader(strings.ReplaceAll(messageWithTextContentType, "\n", "\r\n")) + r := NewReader(b, boundary) + f, err := r.ReadForm(math.MaxInt64) + if err != nil { + t.Fatalf("ReadForm(MaxInt64): %v", err) + } + if f == nil { + t.Fatal("ReadForm(MaxInt64): missing form") + } +} + +func TestReadFormWithTextContentType(t *testing.T) { + // From https://github.com/golang/go/issues/24041 + b := strings.NewReader(strings.ReplaceAll(messageWithTextContentType, "\n", "\r\n")) + r := NewReader(b, boundary) + f, err := r.ReadForm(25) + if err != nil { + t.Fatal("ReadForm:", err) + } + defer f.RemoveAll() + + if g, e := f.Value["texta"][0], textaValue; g != e { + t.Errorf("texta value = %q, want %q", g, e) + } +} + +func testFile(t *testing.T, fh *FileHeader, efn, econtent string) File { + if fh.Filename != efn { + t.Errorf("filename = %q, want %q", fh.Filename, efn) + } + if fh.Size != int64(len(econtent)) { + t.Errorf("size = %d, want %d", fh.Size, len(econtent)) + } + f, err := fh.Open() + if err != nil { + t.Fatal("opening file:", err) + } + b := new(strings.Builder) + _, err = io.Copy(b, f) + if err != nil { + t.Fatal("copying contents:", err) + } + if g := b.String(); g != econtent { + t.Errorf("contents = %q, want %q", g, econtent) + } + return f +} + +const ( + fileaContents = "This is a test file." + filebContents = "Another test file." + textaValue = "foo" + textbValue = "bar" + boundary = `MyBoundary` +) + +const messageWithFileWithoutName = ` +--MyBoundary +Content-Disposition: form-data; name="hiddenfile"; filename="" +Content-Type: text/plain + +` + filebContents + ` +--MyBoundary-- +` + +const messageWithTextContentType = ` +--MyBoundary +Content-Disposition: form-data; name="texta" +Content-Type: text/plain + +` + textaValue + ` +--MyBoundary +` + +const message = ` +--MyBoundary +Content-Disposition: form-data; name="filea"; filename="filea.txt" +Content-Type: text/plain + +` + fileaContents + ` +--MyBoundary +Content-Disposition: form-data; name="fileb"; filename="fileb.txt" +Content-Type: text/plain + +` + filebContents + ` +--MyBoundary +Content-Disposition: form-data; name="texta" + +` + textaValue + ` +--MyBoundary +Content-Disposition: form-data; name="textb" + +` + textbValue + ` +--MyBoundary-- +` + +func TestReadForm_NoReadAfterEOF(t *testing.T) { + maxMemory := int64(32) << 20 + boundary := `---------------------------8d345eef0d38dc9` + body := ` +-----------------------------8d345eef0d38dc9 +Content-Disposition: form-data; name="version" + +171 +-----------------------------8d345eef0d38dc9--` + + mr := NewReader(&failOnReadAfterErrorReader{t: t, r: strings.NewReader(body)}, boundary) + + f, err := mr.ReadForm(maxMemory) + if err != nil { + t.Fatal(err) + } + t.Logf("Got: %#v", f) +} + +// failOnReadAfterErrorReader is an io.Reader wrapping r. +// It fails t if any Read is called after a failing Read. +type failOnReadAfterErrorReader struct { + t *testing.T + r io.Reader + sawErr error +} + +func (r *failOnReadAfterErrorReader) Read(p []byte) (n int, err error) { + if r.sawErr != nil { + r.t.Fatalf("unexpected Read on Reader after previous read saw error %v", r.sawErr) + } + n, err = r.r.Read(p) + r.sawErr = err + return +} + +// TestReadForm_NonFileMaxMemory asserts that the ReadForm maxMemory limit is applied +// while processing non-file form data as well as file form data. +func TestReadForm_NonFileMaxMemory(t *testing.T) { + if testing.Short() { + t.Skip("skipping in -short mode") + } + n := 10 << 20 + largeTextValue := strings.Repeat("1", n) + message := `--MyBoundary +Content-Disposition: form-data; name="largetext" + +` + largeTextValue + ` +--MyBoundary-- +` + testBody := strings.ReplaceAll(message, "\n", "\r\n") + // Try parsing the form with increasing maxMemory values. + // Changes in how we account for non-file form data may cause the exact point + // where we change from rejecting the form as too large to accepting it to vary, + // but we should see both successes and failures. + const failWhenMaxMemoryLessThan = 128 + for maxMemory := int64(0); maxMemory < failWhenMaxMemoryLessThan*2; maxMemory += 16 { + b := strings.NewReader(testBody) + r := NewReader(b, boundary) + f, err := r.ReadForm(maxMemory) + if err != nil { + continue + } + if g := f.Value["largetext"][0]; g != largeTextValue { + t.Errorf("largetext mismatch: got size: %v, expected size: %v", len(g), len(largeTextValue)) + } + f.RemoveAll() + if maxMemory < failWhenMaxMemoryLessThan { + t.Errorf("ReadForm(%v): no error, expect to hit memory limit when maxMemory < %v", maxMemory, failWhenMaxMemoryLessThan) + } + return + } + t.Errorf("ReadForm(x) failed for x < 1024, expect success") +} + +// TestReadForm_MetadataTooLarge verifies that we account for the size of field names, +// MIME headers, and map entry overhead while limiting the memory consumption of parsed forms. +func TestReadForm_MetadataTooLarge(t *testing.T) { + for _, test := range []struct { + name string + f func(*Writer) + }{{ + name: "large name", + f: func(fw *Writer) { + name := strings.Repeat("a", 10<<20) + w, _ := fw.CreateFormField(name) + w.Write([]byte("value")) + }, + }, { + name: "large MIME header", + f: func(fw *Writer) { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", `form-data; name="a"`) + h.Set("X-Foo", strings.Repeat("a", 10<<20)) + w, _ := fw.CreatePart(h) + w.Write([]byte("value")) + }, + }, { + name: "many parts", + f: func(fw *Writer) { + for i := 0; i < 110000; i++ { + w, _ := fw.CreateFormField("f") + w.Write([]byte("v")) + } + }, + }} { + t.Run(test.name, func(t *testing.T) { + var buf bytes.Buffer + fw := NewWriter(&buf) + test.f(fw) + if err := fw.Close(); err != nil { + t.Fatal(err) + } + fr := NewReader(&buf, fw.Boundary()) + _, err := fr.ReadForm(0) + if err != ErrMessageTooLarge { + t.Errorf("fr.ReadForm() = %v, want ErrMessageTooLarge", err) + } + }) + } +} + +// TestReadForm_ManyFiles_Combined tests that a multipart form containing many files only +// results in a single on-disk file. +func TestReadForm_ManyFiles_Combined(t *testing.T) { + const distinct = false + testReadFormManyFiles(t, distinct) +} + +// TestReadForm_ManyFiles_Distinct tests that setting GODEBUG=multipartfiles=distinct +// results in every file in a multipart form being placed in a distinct on-disk file. +func TestReadForm_ManyFiles_Distinct(t *testing.T) { + t.Setenv("GODEBUG", "multipartfiles=distinct") + const distinct = true + testReadFormManyFiles(t, distinct) +} + +func testReadFormManyFiles(t *testing.T, distinct bool) { + var buf bytes.Buffer + fw := NewWriter(&buf) + const numFiles = 10 + for i := 0; i < numFiles; i++ { + name := fmt.Sprint(i) + w, err := fw.CreateFormFile(name, name) + if err != nil { + t.Fatal(err) + } + w.Write([]byte(name)) + } + if err := fw.Close(); err != nil { + t.Fatal(err) + } + fr := NewReader(&buf, fw.Boundary()) + fr.tempDir = t.TempDir() + form, err := fr.ReadForm(0) + if err != nil { + t.Fatal(err) + } + for i := 0; i < numFiles; i++ { + name := fmt.Sprint(i) + if got := len(form.File[name]); got != 1 { + t.Fatalf("form.File[%q] has %v entries, want 1", name, got) + } + fh := form.File[name][0] + file, err := fh.Open() + if err != nil { + t.Fatalf("form.File[%q].Open() = %v", name, err) + } + if distinct { + if _, ok := file.(*os.File); !ok { + t.Fatalf("form.File[%q].Open: %T, want *os.File", name, file) + } + } + got, err := io.ReadAll(file) + file.Close() + if string(got) != name || err != nil { + t.Fatalf("read form.File[%q]: %q, %v; want %q, nil", name, string(got), err, name) + } + } + dir, err := os.Open(fr.tempDir) + if err != nil { + t.Fatal(err) + } + defer dir.Close() + names, err := dir.Readdirnames(0) + if err != nil { + t.Fatal(err) + } + wantNames := 1 + if distinct { + wantNames = numFiles + } + if len(names) != wantNames { + t.Fatalf("temp dir contains %v files; want 1", len(names)) + } + if err := form.RemoveAll(); err != nil { + t.Fatalf("form.RemoveAll() = %v", err) + } + names, err = dir.Readdirnames(0) + if err != nil { + t.Fatal(err) + } + if len(names) != 0 { + t.Fatalf("temp dir contains %v files; want 0", len(names)) + } +} + +func TestReadFormLimits(t *testing.T) { + for _, test := range []struct { + values int + files int + extraKeysPerFile int + wantErr error + godebug string + }{ + {values: 1000}, + {values: 1001, wantErr: ErrMessageTooLarge}, + {values: 500, files: 500}, + {values: 501, files: 500, wantErr: ErrMessageTooLarge}, + {files: 1000}, + {files: 1001, wantErr: ErrMessageTooLarge}, + {files: 1, extraKeysPerFile: 9998}, // plus Content-Disposition and Content-Type + {files: 1, extraKeysPerFile: 10000, wantErr: ErrMessageTooLarge}, + {godebug: "multipartmaxparts=100", values: 100}, + {godebug: "multipartmaxparts=100", values: 101, wantErr: ErrMessageTooLarge}, + {godebug: "multipartmaxheaders=100", files: 2, extraKeysPerFile: 48}, + {godebug: "multipartmaxheaders=100", files: 2, extraKeysPerFile: 50, wantErr: ErrMessageTooLarge}, + } { + name := fmt.Sprintf("values=%v/files=%v/extraKeysPerFile=%v", test.values, test.files, test.extraKeysPerFile) + if test.godebug != "" { + name += fmt.Sprintf("/godebug=%v", test.godebug) + } + t.Run(name, func(t *testing.T) { + if test.godebug != "" { + t.Setenv("GODEBUG", test.godebug) + } + var buf bytes.Buffer + fw := NewWriter(&buf) + for i := 0; i < test.values; i++ { + w, _ := fw.CreateFormField(fmt.Sprintf("field%v", i)) + fmt.Fprintf(w, "value %v", i) + } + for i := 0; i < test.files; i++ { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", + fmt.Sprintf(`form-data; name="file%v"; filename="file%v"`, i, i)) + h.Set("Content-Type", "application/octet-stream") + for j := 0; j < test.extraKeysPerFile; j++ { + h.Set(fmt.Sprintf("k%v", j), "v") + } + w, _ := fw.CreatePart(h) + fmt.Fprintf(w, "value %v", i) + } + if err := fw.Close(); err != nil { + t.Fatal(err) + } + fr := NewReader(bytes.NewReader(buf.Bytes()), fw.Boundary()) + form, err := fr.ReadForm(1 << 10) + if err == nil { + defer form.RemoveAll() + } + if err != test.wantErr { + t.Errorf("ReadForm = %v, want %v", err, test.wantErr) + } + }) + } +} + +func BenchmarkReadForm(b *testing.B) { + for _, test := range []struct { + name string + form func(fw *Writer, count int) + }{{ + name: "fields", + form: func(fw *Writer, count int) { + for i := 0; i < count; i++ { + w, _ := fw.CreateFormField(fmt.Sprintf("field%v", i)) + fmt.Fprintf(w, "value %v", i) + } + }, + }, { + name: "files", + form: func(fw *Writer, count int) { + for i := 0; i < count; i++ { + w, _ := fw.CreateFormFile(fmt.Sprintf("field%v", i), fmt.Sprintf("file%v", i)) + fmt.Fprintf(w, "value %v", i) + } + }, + }} { + b.Run(test.name, func(b *testing.B) { + for _, maxMemory := range []int64{ + 0, + 1 << 20, + } { + var buf bytes.Buffer + fw := NewWriter(&buf) + test.form(fw, 10) + if err := fw.Close(); err != nil { + b.Fatal(err) + } + b.Run(fmt.Sprintf("maxMemory=%v", maxMemory), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + fr := NewReader(bytes.NewReader(buf.Bytes()), fw.Boundary()) + form, err := fr.ReadForm(maxMemory) + if err != nil { + b.Fatal(err) + } + form.RemoveAll() + } + + }) + } + }) + } +} diff --git a/src/mime/multipart/multipart.go b/src/mime/multipart/multipart.go new file mode 100644 index 0000000..066aa70 --- /dev/null +++ b/src/mime/multipart/multipart.go @@ -0,0 +1,487 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// + +/* +Package multipart implements MIME multipart parsing, as defined in RFC +2046. + +The implementation is sufficient for HTTP (RFC 2388) and the multipart +bodies generated by popular browsers. + +# Limits + +To protect against malicious inputs, this package sets limits on the size +of the MIME data it processes. + +Reader.NextPart and Reader.NextRawPart limit the number of headers in a +part to 10000 and Reader.ReadForm limits the total number of headers in all +FileHeaders to 10000. +These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values> +setting. + +Reader.ReadForm further limits the number of parts in a form to 1000. +This limit may be adjusted with the GODEBUG=multipartmaxparts=<value> +setting. +*/ +package multipart + +import ( + "bufio" + "bytes" + "fmt" + "internal/godebug" + "io" + "mime" + "mime/quotedprintable" + "net/textproto" + "path/filepath" + "strconv" + "strings" +) + +var emptyParams = make(map[string]string) + +// This constant needs to be at least 76 for this package to work correctly. +// This is because \r\n--separator_of_len_70- would fill the buffer and it +// wouldn't be safe to consume a single byte from it. +const peekBufferSize = 4096 + +// A Part represents a single part in a multipart body. +type Part struct { + // The headers of the body, if any, with the keys canonicalized + // in the same fashion that the Go http.Request headers are. + // For example, "foo-bar" changes case to "Foo-Bar" + Header textproto.MIMEHeader + + mr *Reader + + disposition string + dispositionParams map[string]string + + // r is either a reader directly reading from mr, or it's a + // wrapper around such a reader, decoding the + // Content-Transfer-Encoding + r io.Reader + + n int // known data bytes waiting in mr.bufReader + total int64 // total data bytes read already + err error // error to return when n == 0 + readErr error // read error observed from mr.bufReader +} + +// FormName returns the name parameter if p has a Content-Disposition +// of type "form-data". Otherwise it returns the empty string. +func (p *Part) FormName() string { + // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF + // of Content-Disposition value format. + if p.dispositionParams == nil { + p.parseContentDisposition() + } + if p.disposition != "form-data" { + return "" + } + return p.dispositionParams["name"] +} + +// FileName returns the filename parameter of the Part's Content-Disposition +// header. If not empty, the filename is passed through filepath.Base (which is +// platform dependent) before being returned. +func (p *Part) FileName() string { + if p.dispositionParams == nil { + p.parseContentDisposition() + } + filename := p.dispositionParams["filename"] + if filename == "" { + return "" + } + // RFC 7578, Section 4.2 requires that if a filename is provided, the + // directory path information must not be used. + return filepath.Base(filename) +} + +func (p *Part) parseContentDisposition() { + v := p.Header.Get("Content-Disposition") + var err error + p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) + if err != nil { + p.dispositionParams = emptyParams + } +} + +// NewReader creates a new multipart Reader reading from r using the +// given MIME boundary. +// +// The boundary is usually obtained from the "boundary" parameter of +// the message's "Content-Type" header. Use mime.ParseMediaType to +// parse such headers. +func NewReader(r io.Reader, boundary string) *Reader { + b := []byte("\r\n--" + boundary + "--") + return &Reader{ + bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), + nl: b[:2], + nlDashBoundary: b[:len(b)-2], + dashBoundaryDash: b[2:], + dashBoundary: b[2 : len(b)-2], + } +} + +// stickyErrorReader is an io.Reader which never calls Read on its +// underlying Reader once an error has been seen. (the io.Reader +// interface's contract promises nothing about the return values of +// Read calls after an error, yet this package does do multiple Reads +// after error) +type stickyErrorReader struct { + r io.Reader + err error +} + +func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { + if r.err != nil { + return 0, r.err + } + n, r.err = r.r.Read(p) + return n, r.err +} + +func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { + bp := &Part{ + Header: make(map[string][]string), + mr: mr, + } + if err := bp.populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders); err != nil { + return nil, err + } + bp.r = partReader{bp} + + // rawPart is used to switch between Part.NextPart and Part.NextRawPart. + if !rawPart { + const cte = "Content-Transfer-Encoding" + if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { + bp.Header.Del(cte) + bp.r = quotedprintable.NewReader(bp.r) + } + } + return bp, nil +} + +func (p *Part) populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders int64) error { + r := textproto.NewReader(p.mr.bufReader) + header, err := readMIMEHeader(r, maxMIMEHeaderSize, maxMIMEHeaders) + if err == nil { + p.Header = header + } + // TODO: Add a distinguishable error to net/textproto. + if err != nil && err.Error() == "message too large" { + err = ErrMessageTooLarge + } + return err +} + +// Read reads the body of a part, after its headers and before the +// next part (if any) begins. +func (p *Part) Read(d []byte) (n int, err error) { + return p.r.Read(d) +} + +// partReader implements io.Reader by reading raw bytes directly from the +// wrapped *Part, without doing any Transfer-Encoding decoding. +type partReader struct { + p *Part +} + +func (pr partReader) Read(d []byte) (int, error) { + p := pr.p + br := p.mr.bufReader + + // Read into buffer until we identify some data to return, + // or we find a reason to stop (boundary or read error). + for p.n == 0 && p.err == nil { + peek, _ := br.Peek(br.Buffered()) + p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) + if p.n == 0 && p.err == nil { + // Force buffered I/O to read more into buffer. + _, p.readErr = br.Peek(len(peek) + 1) + if p.readErr == io.EOF { + p.readErr = io.ErrUnexpectedEOF + } + } + } + + // Read out from "data to return" part of buffer. + if p.n == 0 { + return 0, p.err + } + n := len(d) + if n > p.n { + n = p.n + } + n, _ = br.Read(d[:n]) + p.total += int64(n) + p.n -= n + if p.n == 0 { + return n, p.err + } + return n, nil +} + +// scanUntilBoundary scans buf to identify how much of it can be safely +// returned as part of the Part body. +// dashBoundary is "--boundary". +// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. +// The comments below (and the name) assume "\n--boundary", but either is accepted. +// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. +// readErr is the read error, if any, that followed reading the bytes in buf. +// scanUntilBoundary returns the number of data bytes from buf that can be +// returned as part of the Part body and also the error to return (if any) +// once those data bytes are done. +func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { + if total == 0 { + // At beginning of body, allow dashBoundary. + if bytes.HasPrefix(buf, dashBoundary) { + switch matchAfterPrefix(buf, dashBoundary, readErr) { + case -1: + return len(dashBoundary), nil + case 0: + return 0, nil + case +1: + return 0, io.EOF + } + } + if bytes.HasPrefix(dashBoundary, buf) { + return 0, readErr + } + } + + // Search for "\n--boundary". + if i := bytes.Index(buf, nlDashBoundary); i >= 0 { + switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { + case -1: + return i + len(nlDashBoundary), nil + case 0: + return i, nil + case +1: + return i, io.EOF + } + } + if bytes.HasPrefix(nlDashBoundary, buf) { + return 0, readErr + } + + // Otherwise, anything up to the final \n is not part of the boundary + // and so must be part of the body. + // Also if the section from the final \n onward is not a prefix of the boundary, + // it too must be part of the body. + i := bytes.LastIndexByte(buf, nlDashBoundary[0]) + if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { + return i, nil + } + return len(buf), readErr +} + +// matchAfterPrefix checks whether buf should be considered to match the boundary. +// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", +// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. +// +// matchAfterPrefix returns +1 if the buffer does match the boundary, +// meaning the prefix is followed by a double dash, space, tab, cr, nl, +// or end of input. +// It returns -1 if the buffer definitely does NOT match the boundary, +// meaning the prefix is followed by some other character. +// For example, "--foobar" does not match "--foo". +// It returns 0 more input needs to be read to make the decision, +// meaning that len(buf) == len(prefix) and readErr == nil. +func matchAfterPrefix(buf, prefix []byte, readErr error) int { + if len(buf) == len(prefix) { + if readErr != nil { + return +1 + } + return 0 + } + c := buf[len(prefix)] + + if c == ' ' || c == '\t' || c == '\r' || c == '\n' { + return +1 + } + + // Try to detect boundaryDash + if c == '-' { + if len(buf) == len(prefix)+1 { + if readErr != nil { + // Prefix + "-" does not match + return -1 + } + return 0 + } + if buf[len(prefix)+1] == '-' { + return +1 + } + } + + return -1 +} + +func (p *Part) Close() error { + io.Copy(io.Discard, p) + return nil +} + +// Reader is an iterator over parts in a MIME multipart body. +// Reader's underlying parser consumes its input as needed. Seeking +// isn't supported. +type Reader struct { + bufReader *bufio.Reader + tempDir string // used in tests + + currentPart *Part + partsRead int + + nl []byte // "\r\n" or "\n" (set after seeing first boundary line) + nlDashBoundary []byte // nl + "--boundary" + dashBoundaryDash []byte // "--boundary--" + dashBoundary []byte // "--boundary" +} + +// maxMIMEHeaderSize is the maximum size of a MIME header we will parse, +// including header keys, values, and map overhead. +const maxMIMEHeaderSize = 10 << 20 + +// multipartMaxHeaders is the maximum number of header entries NextPart will return, +// as well as the maximum combined total of header entries Reader.ReadForm will return +// in FileHeaders. +var multipartMaxHeaders = godebug.New("multipartmaxheaders") + +func maxMIMEHeaders() int64 { + if s := multipartMaxHeaders.Value(); s != "" { + if v, err := strconv.ParseInt(s, 10, 64); err == nil && v >= 0 { + return v + } + } + return 10000 +} + +// NextPart returns the next part in the multipart or an error. +// When there are no more parts, the error io.EOF is returned. +// +// As a special case, if the "Content-Transfer-Encoding" header +// has a value of "quoted-printable", that header is instead +// hidden and the body is transparently decoded during Read calls. +func (r *Reader) NextPart() (*Part, error) { + return r.nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders()) +} + +// NextRawPart returns the next part in the multipart or an error. +// When there are no more parts, the error io.EOF is returned. +// +// Unlike NextPart, it does not have special handling for +// "Content-Transfer-Encoding: quoted-printable". +func (r *Reader) NextRawPart() (*Part, error) { + return r.nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders()) +} + +func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) { + if r.currentPart != nil { + r.currentPart.Close() + } + if string(r.dashBoundary) == "--" { + return nil, fmt.Errorf("multipart: boundary is empty") + } + expectNewPart := false + for { + line, err := r.bufReader.ReadSlice('\n') + + if err == io.EOF && r.isFinalBoundary(line) { + // If the buffer ends in "--boundary--" without the + // trailing "\r\n", ReadSlice will return an error + // (since it's missing the '\n'), but this is a valid + // multipart EOF so we need to return io.EOF instead of + // a fmt-wrapped one. + return nil, io.EOF + } + if err != nil { + return nil, fmt.Errorf("multipart: NextPart: %w", err) + } + + if r.isBoundaryDelimiterLine(line) { + r.partsRead++ + bp, err := newPart(r, rawPart, maxMIMEHeaderSize, maxMIMEHeaders) + if err != nil { + return nil, err + } + r.currentPart = bp + return bp, nil + } + + if r.isFinalBoundary(line) { + // Expected EOF + return nil, io.EOF + } + + if expectNewPart { + return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) + } + + if r.partsRead == 0 { + // skip line + continue + } + + // Consume the "\n" or "\r\n" separator between the + // body of the previous part and the boundary line we + // now expect will follow. (either a new part or the + // end boundary) + if bytes.Equal(line, r.nl) { + expectNewPart = true + continue + } + + return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) + } +} + +// isFinalBoundary reports whether line is the final boundary line +// indicating that all parts are over. +// It matches `^--boundary--[ \t]*(\r\n)?$` +func (r *Reader) isFinalBoundary(line []byte) bool { + if !bytes.HasPrefix(line, r.dashBoundaryDash) { + return false + } + rest := line[len(r.dashBoundaryDash):] + rest = skipLWSPChar(rest) + return len(rest) == 0 || bytes.Equal(rest, r.nl) +} + +func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { + // https://tools.ietf.org/html/rfc2046#section-5.1 + // The boundary delimiter line is then defined as a line + // consisting entirely of two hyphen characters ("-", + // decimal value 45) followed by the boundary parameter + // value from the Content-Type header field, optional linear + // whitespace, and a terminating CRLF. + if !bytes.HasPrefix(line, r.dashBoundary) { + return false + } + rest := line[len(r.dashBoundary):] + rest = skipLWSPChar(rest) + + // On the first part, see our lines are ending in \n instead of \r\n + // and switch into that mode if so. This is a violation of the spec, + // but occurs in practice. + if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { + r.nl = r.nl[1:] + r.nlDashBoundary = r.nlDashBoundary[1:] + } + return bytes.Equal(rest, r.nl) +} + +// skipLWSPChar returns b with leading spaces and tabs removed. +// RFC 822 defines: +// +// LWSP-char = SPACE / HTAB +func skipLWSPChar(b []byte) []byte { + for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { + b = b[1:] + } + return b +} diff --git a/src/mime/multipart/multipart_test.go b/src/mime/multipart/multipart_test.go new file mode 100644 index 0000000..e0cb768 --- /dev/null +++ b/src/mime/multipart/multipart_test.go @@ -0,0 +1,1020 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package multipart + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/textproto" + "os" + "reflect" + "strings" + "testing" +) + +func TestBoundaryLine(t *testing.T) { + mr := NewReader(strings.NewReader(""), "myBoundary") + if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) { + t.Error("expected") + } + if !mr.isBoundaryDelimiterLine([]byte("--myBoundary \r\n")) { + t.Error("expected") + } + if !mr.isBoundaryDelimiterLine([]byte("--myBoundary \n")) { + t.Error("expected") + } + if mr.isBoundaryDelimiterLine([]byte("--myBoundary bogus \n")) { + t.Error("expected fail") + } + if mr.isBoundaryDelimiterLine([]byte("--myBoundary bogus--")) { + t.Error("expected fail") + } +} + +func escapeString(v string) string { + bytes, _ := json.Marshal(v) + return string(bytes) +} + +func expectEq(t *testing.T, expected, actual, what string) { + if expected == actual { + return + } + t.Errorf("Unexpected value for %s; got %s (len %d) but expected: %s (len %d)", + what, escapeString(actual), len(actual), escapeString(expected), len(expected)) +} + +func TestNameAccessors(t *testing.T) { + tests := [...][3]string{ + {`form-data; name="foo"`, "foo", ""}, + {` form-data ; name=foo`, "foo", ""}, + {`FORM-DATA;name="foo"`, "foo", ""}, + {` FORM-DATA ; name="foo"`, "foo", ""}, + {` FORM-DATA ; name="foo"`, "foo", ""}, + {` FORM-DATA ; name=foo`, "foo", ""}, + {` FORM-DATA ; filename="foo.txt"; name=foo; baz=quux`, "foo", "foo.txt"}, + {` not-form-data ; filename="bar.txt"; name=foo; baz=quux`, "", "bar.txt"}, + } + for i, test := range tests { + p := &Part{Header: make(map[string][]string)} + p.Header.Set("Content-Disposition", test[0]) + if g, e := p.FormName(), test[1]; g != e { + t.Errorf("test %d: FormName() = %q; want %q", i, g, e) + } + if g, e := p.FileName(), test[2]; g != e { + t.Errorf("test %d: FileName() = %q; want %q", i, g, e) + } + } +} + +var longLine = strings.Repeat("\n\n\r\r\r\n\r\000", (1<<20)/8) + +func testMultipartBody(sep string) string { + testBody := ` +This is a multi-part message. This line is ignored. +--MyBoundary +Header1: value1 +HEADER2: value2 +foo-bar: baz + +My value +The end. +--MyBoundary +name: bigsection + +[longline] +--MyBoundary +Header1: value1b +HEADER2: value2b +foo-bar: bazb + +Line 1 +Line 2 +Line 3 ends in a newline, but just one. + +--MyBoundary + +never read data +--MyBoundary-- + + +useless trailer +` + testBody = strings.ReplaceAll(testBody, "\n", sep) + return strings.Replace(testBody, "[longline]", longLine, 1) +} + +func TestMultipart(t *testing.T) { + bodyReader := strings.NewReader(testMultipartBody("\r\n")) + testMultipart(t, bodyReader, false) +} + +func TestMultipartOnlyNewlines(t *testing.T) { + bodyReader := strings.NewReader(testMultipartBody("\n")) + testMultipart(t, bodyReader, true) +} + +func TestMultipartSlowInput(t *testing.T) { + bodyReader := strings.NewReader(testMultipartBody("\r\n")) + testMultipart(t, &slowReader{bodyReader}, false) +} + +func testMultipart(t *testing.T, r io.Reader, onlyNewlines bool) { + t.Parallel() + reader := NewReader(r, "MyBoundary") + buf := new(strings.Builder) + + // Part1 + part, err := reader.NextPart() + if part == nil || err != nil { + t.Error("Expected part1") + return + } + if x := part.Header.Get("Header1"); x != "value1" { + t.Errorf("part.Header.Get(%q) = %q, want %q", "Header1", x, "value1") + } + if x := part.Header.Get("foo-bar"); x != "baz" { + t.Errorf("part.Header.Get(%q) = %q, want %q", "foo-bar", x, "baz") + } + if x := part.Header.Get("Foo-Bar"); x != "baz" { + t.Errorf("part.Header.Get(%q) = %q, want %q", "Foo-Bar", x, "baz") + } + buf.Reset() + if _, err := io.Copy(buf, part); err != nil { + t.Errorf("part 1 copy: %v", err) + } + + adjustNewlines := func(s string) string { + if onlyNewlines { + return strings.ReplaceAll(s, "\r\n", "\n") + } + return s + } + + expectEq(t, adjustNewlines("My value\r\nThe end."), buf.String(), "Value of first part") + + // Part2 + part, err = reader.NextPart() + if err != nil { + t.Fatalf("Expected part2; got: %v", err) + return + } + if e, g := "bigsection", part.Header.Get("name"); e != g { + t.Errorf("part2's name header: expected %q, got %q", e, g) + } + buf.Reset() + if _, err := io.Copy(buf, part); err != nil { + t.Errorf("part 2 copy: %v", err) + } + s := buf.String() + if len(s) != len(longLine) { + t.Errorf("part2 body expected long line of length %d; got length %d", + len(longLine), len(s)) + } + if s != longLine { + t.Errorf("part2 long body didn't match") + } + + // Part3 + part, err = reader.NextPart() + if part == nil || err != nil { + t.Error("Expected part3") + return + } + if part.Header.Get("foo-bar") != "bazb" { + t.Error("Expected foo-bar: bazb") + } + buf.Reset() + if _, err := io.Copy(buf, part); err != nil { + t.Errorf("part 3 copy: %v", err) + } + expectEq(t, adjustNewlines("Line 1\r\nLine 2\r\nLine 3 ends in a newline, but just one.\r\n"), + buf.String(), "body of part 3") + + // Part4 + part, err = reader.NextPart() + if part == nil || err != nil { + t.Error("Expected part 4 without errors") + return + } + + // Non-existent part5 + part, err = reader.NextPart() + if part != nil { + t.Error("Didn't expect a fifth part.") + } + if err != io.EOF { + t.Errorf("On fifth part expected io.EOF; got %v", err) + } +} + +func TestVariousTextLineEndings(t *testing.T) { + tests := [...]string{ + "Foo\nBar", + "Foo\nBar\n", + "Foo\r\nBar", + "Foo\r\nBar\r\n", + "Foo\rBar", + "Foo\rBar\r", + "\x00\x01\x02\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10", + } + + for testNum, expectedBody := range tests { + body := "--BOUNDARY\r\n" + + "Content-Disposition: form-data; name=\"value\"\r\n" + + "\r\n" + + expectedBody + + "\r\n--BOUNDARY--\r\n" + bodyReader := strings.NewReader(body) + + reader := NewReader(bodyReader, "BOUNDARY") + buf := new(bytes.Buffer) + part, err := reader.NextPart() + if part == nil { + t.Errorf("Expected a body part on text %d", testNum) + continue + } + if err != nil { + t.Errorf("Unexpected error on text %d: %v", testNum, err) + continue + } + written, err := io.Copy(buf, part) + expectEq(t, expectedBody, buf.String(), fmt.Sprintf("test %d", testNum)) + if err != nil { + t.Errorf("Error copying multipart; bytes=%v, error=%v", written, err) + } + + part, err = reader.NextPart() + if part != nil { + t.Errorf("Unexpected part in test %d", testNum) + } + if err != io.EOF { + t.Errorf("On test %d expected io.EOF; got %v", testNum, err) + } + + } +} + +type maliciousReader struct { + t *testing.T + n int +} + +const maxReadThreshold = 1 << 20 + +func (mr *maliciousReader) Read(b []byte) (n int, err error) { + mr.n += len(b) + if mr.n >= maxReadThreshold { + mr.t.Fatal("too much was read") + return 0, io.EOF + } + return len(b), nil +} + +func TestLineLimit(t *testing.T) { + mr := &maliciousReader{t: t} + r := NewReader(mr, "fooBoundary") + part, err := r.NextPart() + if part != nil { + t.Errorf("unexpected part read") + } + if err == nil { + t.Errorf("expected an error") + } + if mr.n >= maxReadThreshold { + t.Errorf("expected to read < %d bytes; read %d", maxReadThreshold, mr.n) + } +} + +func TestMultipartTruncated(t *testing.T) { + for _, body := range []string{ + ` +This is a multi-part message. This line is ignored. +--MyBoundary +foo-bar: baz + +Oh no, premature EOF! +`, + ` +This is a multi-part message. This line is ignored. +--MyBoundary +foo-bar: baz + +Oh no, premature EOF! +--MyBoundary-`, + } { + body = strings.ReplaceAll(body, "\n", "\r\n") + bodyReader := strings.NewReader(body) + r := NewReader(bodyReader, "MyBoundary") + + part, err := r.NextPart() + if err != nil { + t.Fatalf("didn't get a part") + } + _, err = io.Copy(io.Discard, part) + if err != io.ErrUnexpectedEOF { + t.Fatalf("expected error io.ErrUnexpectedEOF; got %v", err) + } + } +} + +type slowReader struct { + r io.Reader +} + +func (s *slowReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return s.r.Read(p) + } + return s.r.Read(p[:1]) +} + +type sentinelReader struct { + // done is closed when this reader is read from. + done chan struct{} +} + +func (s *sentinelReader) Read([]byte) (int, error) { + if s.done != nil { + close(s.done) + s.done = nil + } + return 0, io.EOF +} + +// TestMultipartStreamReadahead tests that PartReader does not block +// on reading past the end of a part, ensuring that it can be used on +// a stream like multipart/x-mixed-replace. See golang.org/issue/15431 +func TestMultipartStreamReadahead(t *testing.T) { + testBody1 := ` +This is a multi-part message. This line is ignored. +--MyBoundary +foo-bar: baz + +Body +--MyBoundary +` + testBody2 := `foo-bar: bop + +Body 2 +--MyBoundary-- +` + done1 := make(chan struct{}) + reader := NewReader( + io.MultiReader( + strings.NewReader(testBody1), + &sentinelReader{done1}, + strings.NewReader(testBody2)), + "MyBoundary") + + var i int + readPart := func(hdr textproto.MIMEHeader, body string) { + part, err := reader.NextPart() + if part == nil || err != nil { + t.Fatalf("Part %d: NextPart failed: %v", i, err) + } + + if !reflect.DeepEqual(part.Header, hdr) { + t.Errorf("Part %d: part.Header = %v, want %v", i, part.Header, hdr) + } + data, err := io.ReadAll(part) + expectEq(t, body, string(data), fmt.Sprintf("Part %d body", i)) + if err != nil { + t.Fatalf("Part %d: ReadAll failed: %v", i, err) + } + i++ + } + + readPart(textproto.MIMEHeader{"Foo-Bar": {"baz"}}, "Body") + + select { + case <-done1: + t.Errorf("Reader read past second boundary") + default: + } + + readPart(textproto.MIMEHeader{"Foo-Bar": {"bop"}}, "Body 2") +} + +func TestLineContinuation(t *testing.T) { + // This body, extracted from an email, contains headers that span multiple + // lines. + + // TODO: The original mail ended with a double-newline before the + // final delimiter; this was manually edited to use a CRLF. + testBody := + "\n--Apple-Mail-2-292336769\nContent-Transfer-Encoding: 7bit\nContent-Type: text/plain;\n\tcharset=US-ASCII;\n\tdelsp=yes;\n\tformat=flowed\n\nI'm finding the same thing happening on my system (10.4.1).\n\n\n--Apple-Mail-2-292336769\nContent-Transfer-Encoding: quoted-printable\nContent-Type: text/html;\n\tcharset=ISO-8859-1\n\n<HTML><BODY>I'm finding the same thing =\nhappening on my system (10.4.1).=A0 But I built it with XCode =\n2.0.</BODY></=\nHTML>=\n\r\n--Apple-Mail-2-292336769--\n" + + r := NewReader(strings.NewReader(testBody), "Apple-Mail-2-292336769") + + for i := 0; i < 2; i++ { + part, err := r.NextPart() + if err != nil { + t.Fatalf("didn't get a part") + } + var buf strings.Builder + n, err := io.Copy(&buf, part) + if err != nil { + t.Errorf("error reading part: %v\nread so far: %q", err, buf.String()) + } + if n <= 0 { + t.Errorf("read %d bytes; expected >0", n) + } + } +} + +func TestQuotedPrintableEncoding(t *testing.T) { + for _, cte := range []string{"quoted-printable", "Quoted-PRINTABLE"} { + t.Run(cte, func(t *testing.T) { + testQuotedPrintableEncoding(t, cte) + }) + } +} + +func testQuotedPrintableEncoding(t *testing.T, cte string) { + // From https://golang.org/issue/4411 + body := "--0016e68ee29c5d515f04cedf6733\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=text\r\nContent-Transfer-Encoding: " + cte + "\r\n\r\nwords words words words words words words words words words words words wor=\r\nds words words words words words words words words words words words words =\r\nwords words words words words words words words words words words words wor=\r\nds words words words words words words words words words words words words =\r\nwords words words words words words words words words\r\n--0016e68ee29c5d515f04cedf6733\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--0016e68ee29c5d515f04cedf6733--" + r := NewReader(strings.NewReader(body), "0016e68ee29c5d515f04cedf6733") + part, err := r.NextPart() + if err != nil { + t.Fatal(err) + } + if te, ok := part.Header["Content-Transfer-Encoding"]; ok { + t.Errorf("unexpected Content-Transfer-Encoding of %q", te) + } + var buf strings.Builder + _, err = io.Copy(&buf, part) + if err != nil { + t.Error(err) + } + got := buf.String() + want := "words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words words" + if got != want { + t.Errorf("wrong part value:\n got: %q\nwant: %q", got, want) + } +} + +func TestRawPart(t *testing.T) { + // https://github.com/golang/go/issues/29090 + + body := strings.Replace(`--0016e68ee29c5d515f04cedf6733 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: quoted-printable + +<div dir=3D"ltr">Hello World.</div> +--0016e68ee29c5d515f04cedf6733 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: quoted-printable + +<div dir=3D"ltr">Hello World.</div> +--0016e68ee29c5d515f04cedf6733--`, "\n", "\r\n", -1) + + r := NewReader(strings.NewReader(body), "0016e68ee29c5d515f04cedf6733") + + // This part is expected to be raw, bypassing the automatic handling + // of quoted-printable. + part, err := r.NextRawPart() + if err != nil { + t.Fatal(err) + } + if _, ok := part.Header["Content-Transfer-Encoding"]; !ok { + t.Errorf("missing Content-Transfer-Encoding") + } + var buf strings.Builder + _, err = io.Copy(&buf, part) + if err != nil { + t.Error(err) + } + got := buf.String() + // Data is still quoted-printable. + want := `<div dir=3D"ltr">Hello World.</div>` + if got != want { + t.Errorf("wrong part value:\n got: %q\nwant: %q", got, want) + } + + // This part is expected to have automatic decoding of quoted-printable. + part, err = r.NextPart() + if err != nil { + t.Fatal(err) + } + if te, ok := part.Header["Content-Transfer-Encoding"]; ok { + t.Errorf("unexpected Content-Transfer-Encoding of %q", te) + } + + buf.Reset() + _, err = io.Copy(&buf, part) + if err != nil { + t.Error(err) + } + got = buf.String() + // QP data has been decoded. + want = `<div dir="ltr">Hello World.</div>` + if got != want { + t.Errorf("wrong part value:\n got: %q\nwant: %q", got, want) + } +} + +// Test parsing an image attachment from gmail, which previously failed. +func TestNested(t *testing.T) { + // nested-mime is the body part of a multipart/mixed email + // with boundary e89a8ff1c1e83553e304be640612 + f, err := os.Open("testdata/nested-mime") + if err != nil { + t.Fatal(err) + } + defer f.Close() + mr := NewReader(f, "e89a8ff1c1e83553e304be640612") + p, err := mr.NextPart() + if err != nil { + t.Fatalf("error reading first section (alternative): %v", err) + } + + // Read the inner text/plain and text/html sections of the multipart/alternative. + mr2 := NewReader(p, "e89a8ff1c1e83553e004be640610") + p, err = mr2.NextPart() + if err != nil { + t.Fatalf("reading text/plain part: %v", err) + } + if b, err := io.ReadAll(p); string(b) != "*body*\r\n" || err != nil { + t.Fatalf("reading text/plain part: got %q, %v", b, err) + } + p, err = mr2.NextPart() + if err != nil { + t.Fatalf("reading text/html part: %v", err) + } + if b, err := io.ReadAll(p); string(b) != "<b>body</b>\r\n" || err != nil { + t.Fatalf("reading text/html part: got %q, %v", b, err) + } + + p, err = mr2.NextPart() + if err != io.EOF { + t.Fatalf("final inner NextPart = %v; want io.EOF", err) + } + + // Back to the outer multipart/mixed, reading the image attachment. + _, err = mr.NextPart() + if err != nil { + t.Fatalf("error reading the image attachment at the end: %v", err) + } + + _, err = mr.NextPart() + if err != io.EOF { + t.Fatalf("final outer NextPart = %v; want io.EOF", err) + } +} + +type headerBody struct { + header textproto.MIMEHeader + body string +} + +func formData(key, value string) headerBody { + return headerBody{ + textproto.MIMEHeader{ + "Content-Type": {"text/plain; charset=ISO-8859-1"}, + "Content-Disposition": {"form-data; name=" + key}, + }, + value, + } +} + +type parseTest struct { + name string + in, sep string + want []headerBody +} + +var parseTests = []parseTest{ + // Actual body from App Engine on a blob upload. The final part (the + // Content-Type: message/external-body) is what App Engine replaces + // the uploaded file with. The other form fields (prefixed with + // "other" in their form-data name) are unchanged. A bug was + // reported with blob uploads failing when the other fields were + // empty. This was the MIME POST body that previously failed. + { + name: "App Engine post", + sep: "00151757727e9583fd04bfbca4c6", + in: "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--", + want: []headerBody{ + formData("otherEmpty1", ""), + formData("otherFoo1", "foo"), + formData("otherFoo2", "foo"), + formData("otherEmpty2", ""), + formData("otherRepeatFoo", "foo"), + formData("otherRepeatFoo", "foo"), + formData("otherRepeatEmpty", ""), + formData("otherRepeatEmpty", ""), + formData("submit", "Submit"), + {textproto.MIMEHeader{ + "Content-Type": {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"}, + "Content-Disposition": {"form-data; name=file; filename=\"fall.png\""}, + }, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"}, + }, + }, + + // Single empty part, ended with --boundary immediately after headers. + { + name: "single empty part, --boundary", + sep: "abc", + in: "--abc\r\nFoo: bar\r\n\r\n--abc--", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, ""}, + }, + }, + + // Single empty part, ended with \r\n--boundary immediately after headers. + { + name: "single empty part, \r\n--boundary", + sep: "abc", + in: "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, ""}, + }, + }, + + // Final part empty. + { + name: "final part empty", + sep: "abc", + in: "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, ""}, + {textproto.MIMEHeader{"Foo2": {"bar2"}}, ""}, + }, + }, + + // Final part empty with newlines after final separator. + { + name: "final part empty then crlf", + sep: "abc", + in: "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, ""}, + }, + }, + + // Final part empty with lwsp-chars after final separator. + { + name: "final part empty then lwsp", + sep: "abc", + in: "--abc\r\nFoo: bar\r\n\r\n--abc-- \t", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, ""}, + }, + }, + + // No parts (empty form as submitted by Chrome) + { + name: "no parts", + sep: "----WebKitFormBoundaryQfEAfzFOiSemeHfA", + in: "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n", + want: []headerBody{}, + }, + + // Part containing data starting with the boundary, but with additional suffix. + { + name: "fake separator as data", + sep: "sep", + in: "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"}, + }, + }, + + // Part containing a boundary with whitespace following it. + { + name: "boundary with whitespace", + sep: "sep", + in: "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, "text"}, + }, + }, + + // With ignored leading line. + { + name: "leading line", + sep: "MyBoundary", + in: strings.Replace(`This is a multi-part message. This line is ignored. +--MyBoundary +foo: bar + + +--MyBoundary--`, "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, ""}, + }, + }, + + // Issue 10616; minimal + { + name: "issue 10616 minimal", + sep: "sep", + in: "--sep \r\nFoo: bar\r\n\r\n" + + "a\r\n" + + "--sep_alt\r\n" + + "b\r\n" + + "\r\n--sep--", + want: []headerBody{ + {textproto.MIMEHeader{"Foo": {"bar"}}, "a\r\n--sep_alt\r\nb\r\n"}, + }, + }, + + // Issue 10616; full example from bug. + { + name: "nested separator prefix is outer separator", + sep: "----=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9", + in: strings.Replace(`------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9 +Content-Type: multipart/alternative; boundary="----=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt" + +------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +This is a multi-part message in MIME format. + +------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: 8bit + +html things +------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt-- +------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9--`, "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`multipart/alternative; boundary="----=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt"`}}, + strings.Replace(`------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit + +This is a multi-part message in MIME format. + +------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: 8bit + +html things +------=_NextPart_4c2fbafd7ec4c8bf08034fe724b608d9_alt--`, "\n", "\r\n", -1), + }, + }, + }, + + // Issue 12662: Check that we don't consume the leading \r if the peekBuffer + // ends in '\r\n--separator-' + { + name: "peek buffer boundary condition", + sep: "00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db", + in: strings.Replace(`--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db +Content-Disposition: form-data; name="block"; filename="block" +Content-Type: application/octet-stream + +`+strings.Repeat("A", peekBufferSize-65)+"\n--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db--", "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="block"; filename="block"`}}, + strings.Repeat("A", peekBufferSize-65), + }, + }, + }, + + // Issue 12662: Same test as above with \r\n at the end + { + name: "peek buffer boundary condition", + sep: "00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db", + in: strings.Replace(`--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db +Content-Disposition: form-data; name="block"; filename="block" +Content-Type: application/octet-stream + +`+strings.Repeat("A", peekBufferSize-65)+"\n--00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db--\n", "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="block"; filename="block"`}}, + strings.Repeat("A", peekBufferSize-65), + }, + }, + }, + + // Issue 12662v2: We want to make sure that for short buffers that end with + // '\r\n--separator-' we always consume at least one (valid) symbol from the + // peekBuffer + { + name: "peek buffer boundary condition", + sep: "aaaaaaaaaa00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db", + in: strings.Replace(`--aaaaaaaaaa00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db +Content-Disposition: form-data; name="block"; filename="block" +Content-Type: application/octet-stream + +`+strings.Repeat("A", peekBufferSize)+"\n--aaaaaaaaaa00ffded004d4dd0fdf945fbdef9d9050cfd6a13a821846299b27fc71b9db--", "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="block"; filename="block"`}}, + strings.Repeat("A", peekBufferSize), + }, + }, + }, + + // Context: https://github.com/camlistore/camlistore/issues/642 + // If the file contents in the form happens to have a size such as: + // size = peekBufferSize - (len("\n--") + len(boundary) + len("\r") + 1), (modulo peekBufferSize) + // then peekBufferSeparatorIndex was wrongly returning (-1, false), which was leading to an nCopy + // cut such as: + // "somedata\r| |\n--Boundary\r" (instead of "somedata| |\r\n--Boundary\r"), which was making the + // subsequent Read miss the boundary. + { + name: "safeCount off by one", + sep: "08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74", + in: strings.Replace(`--08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74 +Content-Disposition: form-data; name="myfile"; filename="my-file.txt" +Content-Type: application/octet-stream + +`, "\n", "\r\n", -1) + + strings.Repeat("A", peekBufferSize-(len("\n--")+len("08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74")+len("\r")+1)) + + strings.Replace(` +--08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74 +Content-Disposition: form-data; name="key" + +val +--08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74-- +`, "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`application/octet-stream`}, "Content-Disposition": {`form-data; name="myfile"; filename="my-file.txt"`}}, + strings.Repeat("A", peekBufferSize-(len("\n--")+len("08b84578eabc563dcba967a945cdf0d9f613864a8f4a716f0e81caa71a74")+len("\r")+1)), + }, + {textproto.MIMEHeader{"Content-Disposition": {`form-data; name="key"`}}, + "val", + }, + }, + }, + + // Issue 46042; a nested multipart uses the outer separator followed by + // a dash. + { + name: "nested separator prefix is outer separator followed by a dash", + sep: "foo", + in: strings.Replace(`--foo +Content-Type: multipart/alternative; boundary="foo-bar" + +--foo-bar + +Body +--foo-bar + +Body2 +--foo-bar-- +--foo--`, "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`multipart/alternative; boundary="foo-bar"`}}, + strings.Replace(`--foo-bar + +Body +--foo-bar + +Body2 +--foo-bar--`, "\n", "\r\n", -1), + }, + }, + }, + + // A nested boundary cannot be the outer separator followed by double dash. + { + name: "nested separator prefix is outer separator followed by double dash", + sep: "foo", + in: strings.Replace(`--foo +Content-Type: multipart/alternative; boundary="foo--" + +--foo-- + +Body + +--foo--`, "\n", "\r\n", -1), + want: []headerBody{ + {textproto.MIMEHeader{"Content-Type": {`multipart/alternative; boundary="foo--"`}}, ""}, + }, + }, + + roundTripParseTest(), +} + +func TestParse(t *testing.T) { +Cases: + for _, tt := range parseTests { + r := NewReader(strings.NewReader(tt.in), tt.sep) + got := []headerBody{} + for { + p, err := r.NextPart() + if err == io.EOF { + break + } + if err != nil { + t.Errorf("in test %q, NextPart: %v", tt.name, err) + continue Cases + } + pbody, err := io.ReadAll(p) + if err != nil { + t.Errorf("in test %q, error reading part: %v", tt.name, err) + continue Cases + } + got = append(got, headerBody{p.Header, string(pbody)}) + } + if !reflect.DeepEqual(tt.want, got) { + t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want) + if len(tt.want) != len(got) { + t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want)) + } else if len(got) > 1 { + for pi, wantPart := range tt.want { + if !reflect.DeepEqual(wantPart, got[pi]) { + t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart) + } + } + } + } + } +} + +func partsFromReader(r *Reader) ([]headerBody, error) { + got := []headerBody{} + for { + p, err := r.NextPart() + if err == io.EOF { + return got, nil + } + if err != nil { + return nil, fmt.Errorf("NextPart: %v", err) + } + pbody, err := io.ReadAll(p) + if err != nil { + return nil, fmt.Errorf("error reading part: %v", err) + } + got = append(got, headerBody{p.Header, string(pbody)}) + } +} + +func TestParseAllSizes(t *testing.T) { + t.Parallel() + maxSize := 5 << 10 + if testing.Short() { + maxSize = 512 + } + var buf bytes.Buffer + body := strings.Repeat("a", maxSize) + bodyb := []byte(body) + for size := 0; size < maxSize; size++ { + buf.Reset() + w := NewWriter(&buf) + part, _ := w.CreateFormField("f") + part.Write(bodyb[:size]) + part, _ = w.CreateFormField("key") + part.Write([]byte("val")) + w.Close() + r := NewReader(&buf, w.Boundary()) + got, err := partsFromReader(r) + if err != nil { + t.Errorf("For size %d: %v", size, err) + continue + } + if len(got) != 2 { + t.Errorf("For size %d, num parts = %d; want 2", size, len(got)) + continue + } + if got[0].body != body[:size] { + t.Errorf("For size %d, got unexpected len %d: %q", size, len(got[0].body), got[0].body) + } + } +} + +func roundTripParseTest() parseTest { + t := parseTest{ + name: "round trip", + want: []headerBody{ + formData("empty", ""), + formData("lf", "\n"), + formData("cr", "\r"), + formData("crlf", "\r\n"), + formData("foo", "bar"), + }, + } + var buf strings.Builder + w := NewWriter(&buf) + for _, p := range t.want { + pw, err := w.CreatePart(p.header) + if err != nil { + panic(err) + } + _, err = pw.Write([]byte(p.body)) + if err != nil { + panic(err) + } + } + w.Close() + t.in = buf.String() + t.sep = w.Boundary() + return t +} + +func TestNoBoundary(t *testing.T) { + mr := NewReader(strings.NewReader(""), "") + _, err := mr.NextPart() + if got, want := fmt.Sprint(err), "multipart: boundary is empty"; got != want { + t.Errorf("NextPart error = %v; want %v", got, want) + } +} diff --git a/src/mime/multipart/readmimeheader.go b/src/mime/multipart/readmimeheader.go new file mode 100644 index 0000000..25aa6e2 --- /dev/null +++ b/src/mime/multipart/readmimeheader.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +package multipart + +import ( + "net/textproto" + _ "unsafe" // for go:linkname +) + +// readMIMEHeader is defined in package net/textproto. +// +//go:linkname readMIMEHeader net/textproto.readMIMEHeader +func readMIMEHeader(r *textproto.Reader, maxMemory, maxHeaders int64) (textproto.MIMEHeader, error) diff --git a/src/mime/multipart/testdata/nested-mime b/src/mime/multipart/testdata/nested-mime new file mode 100644 index 0000000..71c238e --- /dev/null +++ b/src/mime/multipart/testdata/nested-mime @@ -0,0 +1,29 @@ +--e89a8ff1c1e83553e304be640612
+Content-Type: multipart/alternative; boundary=e89a8ff1c1e83553e004be640610
+
+--e89a8ff1c1e83553e004be640610
+Content-Type: text/plain; charset=UTF-8
+
+*body*
+
+--e89a8ff1c1e83553e004be640610
+Content-Type: text/html; charset=UTF-8
+
+<b>body</b>
+
+--e89a8ff1c1e83553e004be640610--
+--e89a8ff1c1e83553e304be640612
+Content-Type: image/png; name="x.png"
+Content-Disposition: attachment;
+ filename="x.png"
+Content-Transfer-Encoding: base64
+X-Attachment-Id: f_h1edgigu0
+
+iVBORw0KGgoAAAANSUhEUgAAAagAAADrCAIAAACza5XhAAAKMWlDQ1BJQ0MgUHJvZmlsZQAASImd
+lndUU9kWh8+9N71QkhCKlNBraFICSA29SJEuKjEJEErAkAAiNkRUcERRkaYIMijggKNDkbEiioUB
+8b2kqeGaj4aTNftesu5mob4pr07ecMywRwLBvDCJOksqlUyldAZD7g9fxIZRWWPMvXRNJROJRBIG
+Y7Vx0mva1HAwYqibdKONXye3dW4iUonhWFJnqK7OaanU1gGkErFYEgaj0cg8wK+zVPh2ziwnHy07
+U8lYTNapezSzOuevRwLB7CFkqQQCwaJDiBQIBIJFhwh8AoFg0SHUqQUCASRJKkwkhMy/JfODWPEJ
+BIJFhwh8AoFg0TFnQqQ55GtPFopcJsN97e1nYtNuIBYeGBgYCmYrmE3jZ05iaGAoMX0xzxkWz6Hv
+yO7WvrlwzA0uLzrD+VkKqViwl9IfTBVNFMyc/x9alloiPPlqhQAAAABJRU5ErkJggg==
+--e89a8ff1c1e83553e304be640612--
diff --git a/src/mime/multipart/writer.go b/src/mime/multipart/writer.go new file mode 100644 index 0000000..d1ff151 --- /dev/null +++ b/src/mime/multipart/writer.go @@ -0,0 +1,201 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package multipart + +import ( + "bytes" + "crypto/rand" + "errors" + "fmt" + "io" + "net/textproto" + "sort" + "strings" +) + +// A Writer generates multipart messages. +type Writer struct { + w io.Writer + boundary string + lastpart *part +} + +// NewWriter returns a new multipart Writer with a random boundary, +// writing to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + boundary: randomBoundary(), + } +} + +// Boundary returns the Writer's boundary. +func (w *Writer) Boundary() string { + return w.boundary +} + +// SetBoundary overrides the Writer's default randomly-generated +// boundary separator with an explicit value. +// +// SetBoundary must be called before any parts are created, may only +// contain certain ASCII characters, and must be non-empty and +// at most 70 bytes long. +func (w *Writer) SetBoundary(boundary string) error { + if w.lastpart != nil { + return errors.New("mime: SetBoundary called after write") + } + // rfc2046#section-5.1.1 + if len(boundary) < 1 || len(boundary) > 70 { + return errors.New("mime: invalid boundary length") + } + end := len(boundary) - 1 + for i, b := range boundary { + if 'A' <= b && b <= 'Z' || 'a' <= b && b <= 'z' || '0' <= b && b <= '9' { + continue + } + switch b { + case '\'', '(', ')', '+', '_', ',', '-', '.', '/', ':', '=', '?': + continue + case ' ': + if i != end { + continue + } + } + return errors.New("mime: invalid boundary character") + } + w.boundary = boundary + return nil +} + +// FormDataContentType returns the Content-Type for an HTTP +// multipart/form-data with this Writer's Boundary. +func (w *Writer) FormDataContentType() string { + b := w.boundary + // We must quote the boundary if it contains any of the + // tspecials characters defined by RFC 2045, or space. + if strings.ContainsAny(b, `()<>@,;:\"/[]?= `) { + b = `"` + b + `"` + } + return "multipart/form-data; boundary=" + b +} + +func randomBoundary() string { + var buf [30]byte + _, err := io.ReadFull(rand.Reader, buf[:]) + if err != nil { + panic(err) + } + return fmt.Sprintf("%x", buf[:]) +} + +// CreatePart creates a new multipart section with the provided +// header. The body of the part should be written to the returned +// Writer. After calling CreatePart, any previous part may no longer +// be written to. +func (w *Writer) CreatePart(header textproto.MIMEHeader) (io.Writer, error) { + if w.lastpart != nil { + if err := w.lastpart.close(); err != nil { + return nil, err + } + } + var b bytes.Buffer + if w.lastpart != nil { + fmt.Fprintf(&b, "\r\n--%s\r\n", w.boundary) + } else { + fmt.Fprintf(&b, "--%s\r\n", w.boundary) + } + + keys := make([]string, 0, len(header)) + for k := range header { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + for _, v := range header[k] { + fmt.Fprintf(&b, "%s: %s\r\n", k, v) + } + } + fmt.Fprintf(&b, "\r\n") + _, err := io.Copy(w.w, &b) + if err != nil { + return nil, err + } + p := &part{ + mw: w, + } + w.lastpart = p + return p, nil +} + +var quoteEscaper = strings.NewReplacer("\\", "\\\\", `"`, "\\\"") + +func escapeQuotes(s string) string { + return quoteEscaper.Replace(s) +} + +// CreateFormFile is a convenience wrapper around CreatePart. It creates +// a new form-data header with the provided field name and file name. +func (w *Writer) CreateFormFile(fieldname, filename string) (io.Writer, error) { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", + fmt.Sprintf(`form-data; name="%s"; filename="%s"`, + escapeQuotes(fieldname), escapeQuotes(filename))) + h.Set("Content-Type", "application/octet-stream") + return w.CreatePart(h) +} + +// CreateFormField calls CreatePart with a header using the +// given field name. +func (w *Writer) CreateFormField(fieldname string) (io.Writer, error) { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", + fmt.Sprintf(`form-data; name="%s"`, escapeQuotes(fieldname))) + return w.CreatePart(h) +} + +// WriteField calls CreateFormField and then writes the given value. +func (w *Writer) WriteField(fieldname, value string) error { + p, err := w.CreateFormField(fieldname) + if err != nil { + return err + } + _, err = p.Write([]byte(value)) + return err +} + +// Close finishes the multipart message and writes the trailing +// boundary end line to the output. +func (w *Writer) Close() error { + if w.lastpart != nil { + if err := w.lastpart.close(); err != nil { + return err + } + w.lastpart = nil + } + _, err := fmt.Fprintf(w.w, "\r\n--%s--\r\n", w.boundary) + return err +} + +type part struct { + mw *Writer + closed bool + we error // last error that occurred writing +} + +func (p *part) close() error { + p.closed = true + return p.we +} + +func (p *part) Write(d []byte) (n int, err error) { + if p.closed { + return 0, errors.New("multipart: can't write to finished part") + } + n, err = p.mw.w.Write(d) + if err != nil { + p.we = err + } + return +} diff --git a/src/mime/multipart/writer_test.go b/src/mime/multipart/writer_test.go new file mode 100644 index 0000000..9e0f131 --- /dev/null +++ b/src/mime/multipart/writer_test.go @@ -0,0 +1,174 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package multipart + +import ( + "bytes" + "io" + "mime" + "net/textproto" + "strings" + "testing" +) + +func TestWriter(t *testing.T) { + fileContents := []byte("my file contents") + + var b bytes.Buffer + w := NewWriter(&b) + { + part, err := w.CreateFormFile("myfile", "my-file.txt") + if err != nil { + t.Fatalf("CreateFormFile: %v", err) + } + part.Write(fileContents) + err = w.WriteField("key", "val") + if err != nil { + t.Fatalf("WriteField: %v", err) + } + part.Write([]byte("val")) + err = w.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + s := b.String() + if len(s) == 0 { + t.Fatal("String: unexpected empty result") + } + if s[0] == '\r' || s[0] == '\n' { + t.Fatal("String: unexpected newline") + } + } + + r := NewReader(&b, w.Boundary()) + + part, err := r.NextPart() + if err != nil { + t.Fatalf("part 1: %v", err) + } + if g, e := part.FormName(), "myfile"; g != e { + t.Errorf("part 1: want form name %q, got %q", e, g) + } + slurp, err := io.ReadAll(part) + if err != nil { + t.Fatalf("part 1: ReadAll: %v", err) + } + if e, g := string(fileContents), string(slurp); e != g { + t.Errorf("part 1: want contents %q, got %q", e, g) + } + + part, err = r.NextPart() + if err != nil { + t.Fatalf("part 2: %v", err) + } + if g, e := part.FormName(), "key"; g != e { + t.Errorf("part 2: want form name %q, got %q", e, g) + } + slurp, err = io.ReadAll(part) + if err != nil { + t.Fatalf("part 2: ReadAll: %v", err) + } + if e, g := "val", string(slurp); e != g { + t.Errorf("part 2: want contents %q, got %q", e, g) + } + + part, err = r.NextPart() + if part != nil || err == nil { + t.Fatalf("expected end of parts; got %v, %v", part, err) + } +} + +func TestWriterSetBoundary(t *testing.T) { + tests := []struct { + b string + ok bool + }{ + {"abc", true}, + {"", false}, + {"ungültig", false}, + {"!", false}, + {strings.Repeat("x", 70), true}, + {strings.Repeat("x", 71), false}, + {"bad!ascii!", false}, + {"my-separator", true}, + {"with space", true}, + {"badspace ", false}, + {"(boundary)", true}, + } + for i, tt := range tests { + var b strings.Builder + w := NewWriter(&b) + err := w.SetBoundary(tt.b) + got := err == nil + if got != tt.ok { + t.Errorf("%d. boundary %q = %v (%v); want %v", i, tt.b, got, err, tt.ok) + } else if tt.ok { + got := w.Boundary() + if got != tt.b { + t.Errorf("boundary = %q; want %q", got, tt.b) + } + + ct := w.FormDataContentType() + mt, params, err := mime.ParseMediaType(ct) + if err != nil { + t.Errorf("could not parse Content-Type %q: %v", ct, err) + } else if mt != "multipart/form-data" { + t.Errorf("unexpected media type %q; want %q", mt, "multipart/form-data") + } else if b := params["boundary"]; b != tt.b { + t.Errorf("unexpected boundary parameter %q; want %q", b, tt.b) + } + + w.Close() + wantSub := "\r\n--" + tt.b + "--\r\n" + if got := b.String(); !strings.Contains(got, wantSub) { + t.Errorf("expected %q in output. got: %q", wantSub, got) + } + } + } +} + +func TestWriterBoundaryGoroutines(t *testing.T) { + // Verify there's no data race accessing any lazy boundary if it's used by + // different goroutines. This was previously broken by + // https://codereview.appspot.com/95760043/ and reverted in + // https://codereview.appspot.com/117600043/ + w := NewWriter(io.Discard) + done := make(chan int) + go func() { + w.CreateFormField("foo") + done <- 1 + }() + w.Boundary() + <-done +} + +func TestSortedHeader(t *testing.T) { + var buf strings.Builder + w := NewWriter(&buf) + if err := w.SetBoundary("MIMEBOUNDARY"); err != nil { + t.Fatalf("Error setting mime boundary: %v", err) + } + + header := textproto.MIMEHeader{ + "A": {"2"}, + "B": {"5", "7", "6"}, + "C": {"4"}, + "M": {"3"}, + "Z": {"1"}, + } + + part, err := w.CreatePart(header) + if err != nil { + t.Fatalf("Unable to create part: %v", err) + } + part.Write([]byte("foo")) + + w.Close() + + want := "--MIMEBOUNDARY\r\nA: 2\r\nB: 5\r\nB: 7\r\nB: 6\r\nC: 4\r\nM: 3\r\nZ: 1\r\n\r\nfoo\r\n--MIMEBOUNDARY--\r\n" + if want != buf.String() { + t.Fatalf("\n got: %q\nwant: %q\n", buf.String(), want) + } +} diff --git a/src/mime/quotedprintable/example_test.go b/src/mime/quotedprintable/example_test.go new file mode 100644 index 0000000..e5a479a --- /dev/null +++ b/src/mime/quotedprintable/example_test.go @@ -0,0 +1,37 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable_test + +import ( + "fmt" + "io" + "mime/quotedprintable" + "os" + "strings" +) + +func ExampleNewReader() { + for _, s := range []string{ + `=48=65=6C=6C=6F=2C=20=47=6F=70=68=65=72=73=21`, + `invalid escape: <b style="font-size: 200%">hello</b>`, + "Hello, Gophers! This symbol will be unescaped: =3D and this will be written in =\r\none line.", + } { + b, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(s))) + fmt.Printf("%s %v\n", b, err) + } + // Output: + // Hello, Gophers! <nil> + // invalid escape: <b style="font-size: 200%">hello</b> <nil> + // Hello, Gophers! This symbol will be unescaped: = and this will be written in one line. <nil> +} + +func ExampleNewWriter() { + w := quotedprintable.NewWriter(os.Stdout) + w.Write([]byte("These symbols will be escaped: = \t")) + w.Close() + + // Output: + // These symbols will be escaped: =3D =09 +} diff --git a/src/mime/quotedprintable/reader.go b/src/mime/quotedprintable/reader.go new file mode 100644 index 0000000..4239625 --- /dev/null +++ b/src/mime/quotedprintable/reader.go @@ -0,0 +1,139 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package quotedprintable implements quoted-printable encoding as specified by +// RFC 2045. +package quotedprintable + +import ( + "bufio" + "bytes" + "fmt" + "io" +) + +// Reader is a quoted-printable decoder. +type Reader struct { + br *bufio.Reader + rerr error // last read error + line []byte // to be consumed before more of br +} + +// NewReader returns a quoted-printable reader, decoding from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + br: bufio.NewReader(r), + } +} + +func fromHex(b byte) (byte, error) { + switch { + case b >= '0' && b <= '9': + return b - '0', nil + case b >= 'A' && b <= 'F': + return b - 'A' + 10, nil + // Accept badly encoded bytes. + case b >= 'a' && b <= 'f': + return b - 'a' + 10, nil + } + return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b) +} + +func readHexByte(v []byte) (b byte, err error) { + if len(v) < 2 { + return 0, io.ErrUnexpectedEOF + } + var hb, lb byte + if hb, err = fromHex(v[0]); err != nil { + return 0, err + } + if lb, err = fromHex(v[1]); err != nil { + return 0, err + } + return hb<<4 | lb, nil +} + +func isQPDiscardWhitespace(r rune) bool { + switch r { + case '\n', '\r', ' ', '\t': + return true + } + return false +} + +var ( + crlf = []byte("\r\n") + lf = []byte("\n") + softSuffix = []byte("=") +) + +// Read reads and decodes quoted-printable data from the underlying reader. +func (r *Reader) Read(p []byte) (n int, err error) { + // Deviations from RFC 2045: + // 1. in addition to "=\r\n", "=\n" is also treated as soft line break. + // 2. it will pass through a '\r' or '\n' not preceded by '=', consistent + // with other broken QP encoders & decoders. + // 3. it accepts soft line-break (=) at end of message (issue 15486); i.e. + // the final byte read from the underlying reader is allowed to be '=', + // and it will be silently ignored. + // 4. it takes = as literal = if not followed by two hex digits + // but not at end of line (issue 13219). + for len(p) > 0 { + if len(r.line) == 0 { + if r.rerr != nil { + return n, r.rerr + } + r.line, r.rerr = r.br.ReadSlice('\n') + + // Does the line end in CRLF instead of just LF? + hasLF := bytes.HasSuffix(r.line, lf) + hasCR := bytes.HasSuffix(r.line, crlf) + wholeLine := r.line + r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace) + if bytes.HasSuffix(r.line, softSuffix) { + rightStripped := wholeLine[len(r.line):] + r.line = r.line[:len(r.line)-1] + if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) && + !(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) { + r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped) + } + } else if hasLF { + if hasCR { + r.line = append(r.line, '\r', '\n') + } else { + r.line = append(r.line, '\n') + } + } + continue + } + b := r.line[0] + + switch { + case b == '=': + b, err = readHexByte(r.line[1:]) + if err != nil { + if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' { + // Take the = as a literal =. + b = '=' + break + } + return n, err + } + r.line = r.line[2:] // 2 of the 3; other 1 is done below + case b == '\t' || b == '\r' || b == '\n': + break + case b >= 0x80: + // As an extension to RFC 2045, we accept + // values >= 0x80 without complaint. Issue 22597. + break + case b < ' ' || b > '~': + return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b) + } + p[0] = b + p = p[1:] + r.line = r.line[1:] + n++ + } + return n, nil +} diff --git a/src/mime/quotedprintable/reader_test.go b/src/mime/quotedprintable/reader_test.go new file mode 100644 index 0000000..0af1e5f --- /dev/null +++ b/src/mime/quotedprintable/reader_test.go @@ -0,0 +1,216 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable + +import ( + "bufio" + "errors" + "flag" + "fmt" + "io" + "os/exec" + "regexp" + "sort" + "strings" + "testing" + "time" +) + +func TestReader(t *testing.T) { + tests := []struct { + in, want string + err any + }{ + {in: "", want: ""}, + {in: "foo bar", want: "foo bar"}, + {in: "foo bar=3D", want: "foo bar="}, + {in: "foo bar=3d", want: "foo bar="}, // lax. + {in: "foo bar=\n", want: "foo bar"}, + {in: "foo bar\n", want: "foo bar\n"}, // somewhat lax. + {in: "foo bar=0", want: "foo bar=0"}, // lax + {in: "foo bar=0D=0A", want: "foo bar\r\n"}, + {in: " A B \r\n C ", want: " A B\r\n C"}, + {in: " A B =\r\n C ", want: " A B C"}, + {in: " A B =\n C ", want: " A B C"}, // lax. treating LF as CRLF + {in: "foo=\nbar", want: "foobar"}, + {in: "foo\x00bar", want: "foo", err: "quotedprintable: invalid unescaped byte 0x00 in body"}, + {in: "foo bar\xff", want: "foo bar\xff"}, + + // Equal sign. + {in: "=3D30\n", want: "=30\n"}, + {in: "=00=FF0=\n", want: "\x00\xff0"}, + + // Trailing whitespace + {in: "foo \n", want: "foo\n"}, + {in: "foo \n\nfoo =\n\nfoo=20\n\n", want: "foo\n\nfoo \nfoo \n\n"}, + + // Tests that we allow bare \n and \r through, despite it being strictly + // not permitted per RFC 2045, Section 6.7 Page 22 bullet (4). + {in: "foo\nbar", want: "foo\nbar"}, + {in: "foo\rbar", want: "foo\rbar"}, + {in: "foo\r\nbar", want: "foo\r\nbar"}, + + // Different types of soft line-breaks. + {in: "foo=\r\nbar", want: "foobar"}, + {in: "foo=\nbar", want: "foobar"}, + {in: "foo=\rbar", want: "foo", err: "quotedprintable: invalid hex byte 0x0d"}, + {in: "foo=\r\r\r \nbar", want: "foo", err: `quotedprintable: invalid bytes after =: "\r\r\r \n"`}, + // Issue 15486, accept trailing soft line-break at end of input. + {in: "foo=", want: "foo"}, + {in: "=", want: "", err: `quotedprintable: invalid bytes after =: ""`}, + + // Example from RFC 2045: + {in: "Now's the time =\n" + "for all folk to come=\n" + " to the aid of their country.", + want: "Now's the time for all folk to come to the aid of their country."}, + {in: "accept UTF-8 right quotation mark: ’", + want: "accept UTF-8 right quotation mark: ’"}, + } + for _, tt := range tests { + var buf strings.Builder + _, err := io.Copy(&buf, NewReader(strings.NewReader(tt.in))) + if got := buf.String(); got != tt.want { + t.Errorf("for %q, got %q; want %q", tt.in, got, tt.want) + } + switch verr := tt.err.(type) { + case nil: + if err != nil { + t.Errorf("for %q, got unexpected error: %v", tt.in, err) + } + case string: + if got := fmt.Sprint(err); got != verr { + t.Errorf("for %q, got error %q; want %q", tt.in, got, verr) + } + case error: + if err != verr { + t.Errorf("for %q, got error %q; want %q", tt.in, err, verr) + } + } + } + +} + +func everySequence(base, alpha string, length int, fn func(string)) { + if len(base) == length { + fn(base) + return + } + for i := 0; i < len(alpha); i++ { + everySequence(base+alpha[i:i+1], alpha, length, fn) + } +} + +var useQprint = flag.Bool("qprint", false, "Compare against the 'qprint' program.") + +var badSoftRx = regexp.MustCompile(`=([^\r\n]+?\n)|([^\r\n]+$)|(\r$)|(\r[^\n]+\n)|( \r\n)`) + +func TestExhaustive(t *testing.T) { + if *useQprint { + _, err := exec.LookPath("qprint") + if err != nil { + t.Fatalf("Error looking for qprint: %v", err) + } + } + + var buf strings.Builder + res := make(map[string]int) + n := 6 + if testing.Short() { + n = 4 + } + everySequence("", "0A \r\n=", n, func(s string) { + if strings.HasSuffix(s, "=") || strings.Contains(s, "==") { + return + } + buf.Reset() + _, err := io.Copy(&buf, NewReader(strings.NewReader(s))) + if err != nil { + errStr := err.Error() + if strings.Contains(errStr, "invalid bytes after =:") { + errStr = "invalid bytes after =" + } + res[errStr]++ + if strings.Contains(errStr, "invalid hex byte ") { + if strings.HasSuffix(errStr, "0x20") && (strings.Contains(s, "=0 ") || strings.Contains(s, "=A ") || strings.Contains(s, "= ")) { + return + } + if strings.HasSuffix(errStr, "0x3d") && (strings.Contains(s, "=0=") || strings.Contains(s, "=A=")) { + return + } + if strings.HasSuffix(errStr, "0x0a") || strings.HasSuffix(errStr, "0x0d") { + // bunch of cases; since whitespace at the end of a line before \n is removed. + return + } + } + if strings.Contains(errStr, "unexpected EOF") { + return + } + if errStr == "invalid bytes after =" && badSoftRx.MatchString(s) { + return + } + t.Errorf("decode(%q) = %v", s, err) + return + } + if *useQprint { + cmd := exec.Command("qprint", "-d") + cmd.Stdin = strings.NewReader(s) + stderr, err := cmd.StderrPipe() + if err != nil { + panic(err) + } + qpres := make(chan any, 2) + go func() { + br := bufio.NewReader(stderr) + s, _ := br.ReadString('\n') + if s != "" { + qpres <- errors.New(s) + if cmd.Process != nil { + // It can get stuck on invalid input, like: + // echo -n "0000= " | qprint -d + cmd.Process.Kill() + } + } + }() + go func() { + want, err := cmd.Output() + if err == nil { + qpres <- want + } + }() + select { + case got := <-qpres: + if want, ok := got.([]byte); ok { + if string(want) != buf.String() { + t.Errorf("go decode(%q) = %q; qprint = %q", s, want, buf.String()) + } + } else { + t.Logf("qprint -d(%q) = %v", s, got) + } + case <-time.After(5 * time.Second): + t.Logf("qprint timeout on %q", s) + } + } + res["OK"]++ + }) + var outcomes []string + for k, v := range res { + outcomes = append(outcomes, fmt.Sprintf("%v: %d", k, v)) + } + sort.Strings(outcomes) + got := strings.Join(outcomes, "\n") + want := `OK: 28934 +invalid bytes after =: 3949 +quotedprintable: invalid hex byte 0x0d: 2048 +unexpected EOF: 194` + if testing.Short() { + want = `OK: 896 +invalid bytes after =: 100 +quotedprintable: invalid hex byte 0x0d: 26 +unexpected EOF: 3` + } + + if got != want { + t.Errorf("Got:\n%s\nWant:\n%s", got, want) + } +} diff --git a/src/mime/quotedprintable/writer.go b/src/mime/quotedprintable/writer.go new file mode 100644 index 0000000..16ea0bf --- /dev/null +++ b/src/mime/quotedprintable/writer.go @@ -0,0 +1,172 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable + +import "io" + +const lineMaxLen = 76 + +// A Writer is a quoted-printable writer that implements io.WriteCloser. +type Writer struct { + // Binary mode treats the writer's input as pure binary and processes end of + // line bytes as binary data. + Binary bool + + w io.Writer + i int + line [78]byte + cr bool +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w} +} + +// Write encodes p using quoted-printable encoding and writes it to the +// underlying io.Writer. It limits line length to 76 characters. The encoded +// bytes are not necessarily flushed until the Writer is closed. +func (w *Writer) Write(p []byte) (n int, err error) { + for i, b := range p { + switch { + // Simple writes are done in batch. + case b >= '!' && b <= '~' && b != '=': + continue + case isWhitespace(b) || !w.Binary && (b == '\n' || b == '\r'): + continue + } + + if i > n { + if err := w.write(p[n:i]); err != nil { + return n, err + } + n = i + } + + if err := w.encode(b); err != nil { + return n, err + } + n++ + } + + if n == len(p) { + return n, nil + } + + if err := w.write(p[n:]); err != nil { + return n, err + } + + return len(p), nil +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (w *Writer) Close() error { + if err := w.checkLastByte(); err != nil { + return err + } + + return w.flush() +} + +// write limits text encoded in quoted-printable to 76 characters per line. +func (w *Writer) write(p []byte) error { + for _, b := range p { + if b == '\n' || b == '\r' { + // If the previous byte was \r, the CRLF has already been inserted. + if w.cr && b == '\n' { + w.cr = false + continue + } + + if b == '\r' { + w.cr = true + } + + if err := w.checkLastByte(); err != nil { + return err + } + if err := w.insertCRLF(); err != nil { + return err + } + continue + } + + if w.i == lineMaxLen-1 { + if err := w.insertSoftLineBreak(); err != nil { + return err + } + } + + w.line[w.i] = b + w.i++ + w.cr = false + } + + return nil +} + +func (w *Writer) encode(b byte) error { + if lineMaxLen-1-w.i < 3 { + if err := w.insertSoftLineBreak(); err != nil { + return err + } + } + + w.line[w.i] = '=' + w.line[w.i+1] = upperhex[b>>4] + w.line[w.i+2] = upperhex[b&0x0f] + w.i += 3 + + return nil +} + +const upperhex = "0123456789ABCDEF" + +// checkLastByte encodes the last buffered byte if it is a space or a tab. +func (w *Writer) checkLastByte() error { + if w.i == 0 { + return nil + } + + b := w.line[w.i-1] + if isWhitespace(b) { + w.i-- + if err := w.encode(b); err != nil { + return err + } + } + + return nil +} + +func (w *Writer) insertSoftLineBreak() error { + w.line[w.i] = '=' + w.i++ + + return w.insertCRLF() +} + +func (w *Writer) insertCRLF() error { + w.line[w.i] = '\r' + w.line[w.i+1] = '\n' + w.i += 2 + + return w.flush() +} + +func (w *Writer) flush() error { + if _, err := w.w.Write(w.line[:w.i]); err != nil { + return err + } + + w.i = 0 + return nil +} + +func isWhitespace(b byte) bool { + return b == ' ' || b == '\t' +} diff --git a/src/mime/quotedprintable/writer_test.go b/src/mime/quotedprintable/writer_test.go new file mode 100644 index 0000000..07411fe --- /dev/null +++ b/src/mime/quotedprintable/writer_test.go @@ -0,0 +1,158 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable + +import ( + "bytes" + "io" + "strings" + "testing" +) + +func TestWriter(t *testing.T) { + testWriter(t, false) +} + +func TestWriterBinary(t *testing.T) { + testWriter(t, true) +} + +func testWriter(t *testing.T, binary bool) { + tests := []struct { + in, want, wantB string + }{ + {in: "", want: ""}, + {in: "foo bar", want: "foo bar"}, + {in: "foo bar=", want: "foo bar=3D"}, + {in: "foo bar\r", want: "foo bar\r\n", wantB: "foo bar=0D"}, + {in: "foo bar\r\r", want: "foo bar\r\n\r\n", wantB: "foo bar=0D=0D"}, + {in: "foo bar\n", want: "foo bar\r\n", wantB: "foo bar=0A"}, + {in: "foo bar\r\n", want: "foo bar\r\n", wantB: "foo bar=0D=0A"}, + {in: "foo bar\r\r\n", want: "foo bar\r\n\r\n", wantB: "foo bar=0D=0D=0A"}, + {in: "foo bar ", want: "foo bar=20"}, + {in: "foo bar\t", want: "foo bar=09"}, + {in: "foo bar ", want: "foo bar =20"}, + {in: "foo bar \n", want: "foo bar=20\r\n", wantB: "foo bar =0A"}, + {in: "foo bar \r", want: "foo bar=20\r\n", wantB: "foo bar =0D"}, + {in: "foo bar \r\n", want: "foo bar=20\r\n", wantB: "foo bar =0D=0A"}, + {in: "foo bar \n", want: "foo bar =20\r\n", wantB: "foo bar =0A"}, + {in: "foo bar \n ", want: "foo bar =20\r\n=20", wantB: "foo bar =0A=20"}, + {in: "¡Hola Señor!", want: "=C2=A1Hola Se=C3=B1or!"}, + { + in: "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", + want: "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", + }, + { + in: strings.Repeat("a", 75), + want: strings.Repeat("a", 75), + }, + { + in: strings.Repeat("a", 76), + want: strings.Repeat("a", 75) + "=\r\na", + }, + { + in: strings.Repeat("a", 72) + "=", + want: strings.Repeat("a", 72) + "=3D", + }, + { + in: strings.Repeat("a", 73) + "=", + want: strings.Repeat("a", 73) + "=\r\n=3D", + }, + { + in: strings.Repeat("a", 74) + "=", + want: strings.Repeat("a", 74) + "=\r\n=3D", + }, + { + in: strings.Repeat("a", 75) + "=", + want: strings.Repeat("a", 75) + "=\r\n=3D", + }, + { + in: strings.Repeat(" ", 73), + want: strings.Repeat(" ", 72) + "=20", + }, + { + in: strings.Repeat(" ", 74), + want: strings.Repeat(" ", 73) + "=\r\n=20", + }, + { + in: strings.Repeat(" ", 75), + want: strings.Repeat(" ", 74) + "=\r\n=20", + }, + { + in: strings.Repeat(" ", 76), + want: strings.Repeat(" ", 75) + "=\r\n=20", + }, + { + in: strings.Repeat(" ", 77), + want: strings.Repeat(" ", 75) + "=\r\n =20", + }, + } + + for _, tt := range tests { + buf := new(strings.Builder) + w := NewWriter(buf) + + want := tt.want + if binary { + w.Binary = true + if tt.wantB != "" { + want = tt.wantB + } + } + + if _, err := w.Write([]byte(tt.in)); err != nil { + t.Errorf("Write(%q): %v", tt.in, err) + continue + } + if err := w.Close(); err != nil { + t.Errorf("Close(): %v", err) + continue + } + got := buf.String() + if got != want { + t.Errorf("Write(%q), got:\n%q\nwant:\n%q", tt.in, got, want) + } + } +} + +func TestRoundTrip(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriter(buf) + if _, err := w.Write(testMsg); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + + r := NewReader(buf) + gotBytes, err := io.ReadAll(r) + if err != nil { + t.Fatalf("Error while reading from Reader: %v", err) + } + got := string(gotBytes) + if got != string(testMsg) { + t.Errorf("Encoding and decoding changed the message, got:\n%s", got) + } +} + +// From https://fr.wikipedia.org/wiki/Quoted-Printable +var testMsg = []byte("Quoted-Printable (QP) est un format d'encodage de données codées sur 8 bits, qui utilise exclusivement les caractères alphanumériques imprimables du code ASCII (7 bits).\r\n" + + "\r\n" + + "En effet, les différents codages comprennent de nombreux caractères qui ne sont pas représentables en ASCII (par exemple les caractères accentués), ainsi que des caractères dits « non-imprimables ».\r\n" + + "\r\n" + + "L'encodage Quoted-Printable permet de remédier à ce problème, en procédant de la manière suivante :\r\n" + + "\r\n" + + "Un octet correspondant à un caractère imprimable de l'ASCII sauf le signe égal (donc un caractère de code ASCII entre 33 et 60 ou entre 62 et 126) ou aux caractères de saut de ligne (codes ASCII 13 et 10) ou une suite de tabulations et espaces non situées en fin de ligne (de codes ASCII respectifs 9 et 32) est représenté tel quel.\r\n" + + "Un octet qui ne correspond pas à la définition ci-dessus (caractère non imprimable de l'ASCII, tabulation ou espaces non suivies d'un caractère imprimable avant la fin de la ligne ou signe égal) est représenté par un signe égal, suivi de son numéro, exprimé en hexadécimal.\r\n" + + "Enfin, un signe égal suivi par un saut de ligne (donc la suite des trois caractères de codes ASCII 61, 13 et 10) peut être inséré n'importe où, afin de limiter la taille des lignes produites si nécessaire. Une limite de 76 caractères par ligne est généralement respectée.\r\n") + +func BenchmarkWriter(b *testing.B) { + for i := 0; i < b.N; i++ { + w := NewWriter(io.Discard) + w.Write(testMsg) + w.Close() + } +} diff --git a/src/mime/testdata/test.types b/src/mime/testdata/test.types new file mode 100644 index 0000000..9b040ed --- /dev/null +++ b/src/mime/testdata/test.types @@ -0,0 +1,8 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + + + # mime package test +application/test t1 # Simple test +text/test t2 # Text test diff --git a/src/mime/testdata/test.types.globs2 b/src/mime/testdata/test.types.globs2 new file mode 100644 index 0000000..4606d98 --- /dev/null +++ b/src/mime/testdata/test.types.globs2 @@ -0,0 +1,14 @@ +# Copyright 2021 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + + +# mime package test for globs2 +50:document/test:*.t3 +50:example/test:*.t4 +50:text/plain:*,v +50:application/x-trash:*~ +30:example/do-not-use:*.t4 +10:example/glob-question-mark:*.foo?ar +10:example/glob-asterisk:*.foo*r +10:example/glob-range:*.foo[1-3] diff --git a/src/mime/testdata/test.types.plan9 b/src/mime/testdata/test.types.plan9 new file mode 100644 index 0000000..19dbf41 --- /dev/null +++ b/src/mime/testdata/test.types.plan9 @@ -0,0 +1,8 @@ +# Copyright 2013 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + + + # mime package test +.t1 application test - y # Simple test +.t2 text test - y # Text test diff --git a/src/mime/type.go b/src/mime/type.go new file mode 100644 index 0000000..465ecf0 --- /dev/null +++ b/src/mime/type.go @@ -0,0 +1,202 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package mime implements parts of the MIME spec. +package mime + +import ( + "fmt" + "sort" + "strings" + "sync" +) + +var ( + mimeTypes sync.Map // map[string]string; ".Z" => "application/x-compress" + mimeTypesLower sync.Map // map[string]string; ".z" => "application/x-compress" + + // extensions maps from MIME type to list of lowercase file + // extensions: "image/jpeg" => [".jpg", ".jpeg"] + extensionsMu sync.Mutex // Guards stores (but not loads) on extensions. + extensions sync.Map // map[string][]string; slice values are append-only. +) + +func clearSyncMap(m *sync.Map) { + m.Range(func(k, _ any) bool { + m.Delete(k) + return true + }) +} + +// setMimeTypes is used by initMime's non-test path, and by tests. +func setMimeTypes(lowerExt, mixExt map[string]string) { + clearSyncMap(&mimeTypes) + clearSyncMap(&mimeTypesLower) + clearSyncMap(&extensions) + + for k, v := range lowerExt { + mimeTypesLower.Store(k, v) + } + for k, v := range mixExt { + mimeTypes.Store(k, v) + } + + extensionsMu.Lock() + defer extensionsMu.Unlock() + for k, v := range lowerExt { + justType, _, err := ParseMediaType(v) + if err != nil { + panic(err) + } + var exts []string + if ei, ok := extensions.Load(justType); ok { + exts = ei.([]string) + } + extensions.Store(justType, append(exts, k)) + } +} + +var builtinTypesLower = map[string]string{ + ".avif": "image/avif", + ".css": "text/css; charset=utf-8", + ".gif": "image/gif", + ".htm": "text/html; charset=utf-8", + ".html": "text/html; charset=utf-8", + ".jpeg": "image/jpeg", + ".jpg": "image/jpeg", + ".js": "text/javascript; charset=utf-8", + ".json": "application/json", + ".mjs": "text/javascript; charset=utf-8", + ".pdf": "application/pdf", + ".png": "image/png", + ".svg": "image/svg+xml", + ".wasm": "application/wasm", + ".webp": "image/webp", + ".xml": "text/xml; charset=utf-8", +} + +var once sync.Once // guards initMime + +var testInitMime, osInitMime func() + +func initMime() { + if fn := testInitMime; fn != nil { + fn() + } else { + setMimeTypes(builtinTypesLower, builtinTypesLower) + osInitMime() + } +} + +// TypeByExtension returns the MIME type associated with the file extension ext. +// The extension ext should begin with a leading dot, as in ".html". +// When ext has no associated type, TypeByExtension returns "". +// +// Extensions are looked up first case-sensitively, then case-insensitively. +// +// The built-in table is small but on unix it is augmented by the local +// system's MIME-info database or mime.types file(s) if available under one or +// more of these names: +// +// /usr/local/share/mime/globs2 +// /usr/share/mime/globs2 +// /etc/mime.types +// /etc/apache2/mime.types +// /etc/apache/mime.types +// +// On Windows, MIME types are extracted from the registry. +// +// Text types have the charset parameter set to "utf-8" by default. +func TypeByExtension(ext string) string { + once.Do(initMime) + + // Case-sensitive lookup. + if v, ok := mimeTypes.Load(ext); ok { + return v.(string) + } + + // Case-insensitive lookup. + // Optimistically assume a short ASCII extension and be + // allocation-free in that case. + var buf [10]byte + lower := buf[:0] + const utf8RuneSelf = 0x80 // from utf8 package, but not importing it. + for i := 0; i < len(ext); i++ { + c := ext[i] + if c >= utf8RuneSelf { + // Slow path. + si, _ := mimeTypesLower.Load(strings.ToLower(ext)) + s, _ := si.(string) + return s + } + if 'A' <= c && c <= 'Z' { + lower = append(lower, c+('a'-'A')) + } else { + lower = append(lower, c) + } + } + si, _ := mimeTypesLower.Load(string(lower)) + s, _ := si.(string) + return s +} + +// ExtensionsByType returns the extensions known to be associated with the MIME +// type typ. The returned extensions will each begin with a leading dot, as in +// ".html". When typ has no associated extensions, ExtensionsByType returns an +// nil slice. +func ExtensionsByType(typ string) ([]string, error) { + justType, _, err := ParseMediaType(typ) + if err != nil { + return nil, err + } + + once.Do(initMime) + s, ok := extensions.Load(justType) + if !ok { + return nil, nil + } + ret := append([]string(nil), s.([]string)...) + sort.Strings(ret) + return ret, nil +} + +// AddExtensionType sets the MIME type associated with +// the extension ext to typ. The extension should begin with +// a leading dot, as in ".html". +func AddExtensionType(ext, typ string) error { + if !strings.HasPrefix(ext, ".") { + return fmt.Errorf("mime: extension %q missing leading dot", ext) + } + once.Do(initMime) + return setExtensionType(ext, typ) +} + +func setExtensionType(extension, mimeType string) error { + justType, param, err := ParseMediaType(mimeType) + if err != nil { + return err + } + if strings.HasPrefix(mimeType, "text/") && param["charset"] == "" { + param["charset"] = "utf-8" + mimeType = FormatMediaType(mimeType, param) + } + extLower := strings.ToLower(extension) + + mimeTypes.Store(extension, mimeType) + mimeTypesLower.Store(extLower, mimeType) + + extensionsMu.Lock() + defer extensionsMu.Unlock() + var exts []string + if ei, ok := extensions.Load(justType); ok { + exts = ei.([]string) + } + for _, v := range exts { + if v == extLower { + return nil + } + } + extensions.Store(justType, append(exts, extLower)) + return nil +} diff --git a/src/mime/type_dragonfly.go b/src/mime/type_dragonfly.go new file mode 100644 index 0000000..d09d74a --- /dev/null +++ b/src/mime/type_dragonfly.go @@ -0,0 +1,9 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +func init() { + typeFiles = append(typeFiles, "/usr/local/etc/mime.types") +} diff --git a/src/mime/type_freebsd.go b/src/mime/type_freebsd.go new file mode 100644 index 0000000..d09d74a --- /dev/null +++ b/src/mime/type_freebsd.go @@ -0,0 +1,9 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +func init() { + typeFiles = append(typeFiles, "/usr/local/etc/mime.types") +} diff --git a/src/mime/type_openbsd.go b/src/mime/type_openbsd.go new file mode 100644 index 0000000..c3b1abb --- /dev/null +++ b/src/mime/type_openbsd.go @@ -0,0 +1,9 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +func init() { + typeFiles = append(typeFiles, "/usr/share/misc/mime.types") +} diff --git a/src/mime/type_plan9.go b/src/mime/type_plan9.go new file mode 100644 index 0000000..14ff973 --- /dev/null +++ b/src/mime/type_plan9.go @@ -0,0 +1,57 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "bufio" + "os" + "strings" +) + +func init() { + osInitMime = initMimePlan9 +} + +func initMimePlan9() { + for _, filename := range typeFiles { + loadMimeFile(filename) + } +} + +var typeFiles = []string{ + "/sys/lib/mimetype", +} + +func initMimeForTests() map[string]string { + typeFiles = []string{"testdata/test.types.plan9"} + return map[string]string{ + ".t1": "application/test", + ".t2": "text/test; charset=utf-8", + ".pNg": "image/png", + } +} + +func loadMimeFile(filename string) { + f, err := os.Open(filename) + if err != nil { + return + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) <= 2 || fields[0][0] != '.' { + continue + } + if fields[1] == "-" || fields[2] == "-" { + continue + } + setExtensionType(fields[0], fields[1]+"/"+fields[2]) + } + if err := scanner.Err(); err != nil { + panic(err) + } +} diff --git a/src/mime/type_test.go b/src/mime/type_test.go new file mode 100644 index 0000000..d8368e8 --- /dev/null +++ b/src/mime/type_test.go @@ -0,0 +1,220 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "reflect" + "strings" + "sync" + "testing" +) + +func setMimeInit(fn func()) (cleanup func()) { + once = sync.Once{} + testInitMime = fn + return func() { + testInitMime = nil + once = sync.Once{} + } +} + +func clearMimeTypes() { + setMimeTypes(map[string]string{}, map[string]string{}) +} + +func setType(ext, typ string) { + if !strings.HasPrefix(ext, ".") { + panic("missing leading dot") + } + if err := setExtensionType(ext, typ); err != nil { + panic("bad test data: " + err.Error()) + } +} + +func TestTypeByExtension(t *testing.T) { + once = sync.Once{} + // initMimeForTests returns the platform-specific extension => + // type tests. On Unix and Plan 9, this also tests the parsing + // of MIME text files (in testdata/*). On Windows, we test the + // real registry on the machine and assume that ".png" exists + // there, which empirically it always has, for all versions of + // Windows. + typeTests := initMimeForTests() + + for ext, want := range typeTests { + val := TypeByExtension(ext) + if val != want { + t.Errorf("TypeByExtension(%q) = %q, want %q", ext, val, want) + } + } +} + +func TestTypeByExtension_LocalData(t *testing.T) { + cleanup := setMimeInit(func() { + clearMimeTypes() + setType(".foo", "x/foo") + setType(".bar", "x/bar") + setType(".Bar", "x/bar; capital=1") + }) + defer cleanup() + + tests := map[string]string{ + ".foo": "x/foo", + ".bar": "x/bar", + ".Bar": "x/bar; capital=1", + ".sdlkfjskdlfj": "", + ".t1": "", // testdata shouldn't be used + } + + for ext, want := range tests { + val := TypeByExtension(ext) + if val != want { + t.Errorf("TypeByExtension(%q) = %q, want %q", ext, val, want) + } + } +} + +func TestTypeByExtensionCase(t *testing.T) { + const custom = "test/test; charset=iso-8859-1" + const caps = "test/test; WAS=ALLCAPS" + + cleanup := setMimeInit(func() { + clearMimeTypes() + setType(".TEST", caps) + setType(".tesT", custom) + }) + defer cleanup() + + // case-sensitive lookup + if got := TypeByExtension(".tesT"); got != custom { + t.Fatalf("for .tesT, got %q; want %q", got, custom) + } + if got := TypeByExtension(".TEST"); got != caps { + t.Fatalf("for .TEST, got %q; want %s", got, caps) + } + + // case-insensitive + if got := TypeByExtension(".TesT"); got != custom { + t.Fatalf("for .TesT, got %q; want %q", got, custom) + } +} + +func TestExtensionsByType(t *testing.T) { + cleanup := setMimeInit(func() { + clearMimeTypes() + setType(".gif", "image/gif") + setType(".a", "foo/letter") + setType(".b", "foo/letter") + setType(".B", "foo/letter") + setType(".PNG", "image/png") + }) + defer cleanup() + + tests := []struct { + typ string + want []string + wantErr string + }{ + {typ: "image/gif", want: []string{".gif"}}, + {typ: "image/png", want: []string{".png"}}, // lowercase + {typ: "foo/letter", want: []string{".a", ".b"}}, + {typ: "x/unknown", want: nil}, + } + + for _, tt := range tests { + got, err := ExtensionsByType(tt.typ) + if err != nil && tt.wantErr != "" && strings.Contains(err.Error(), tt.wantErr) { + continue + } + if err != nil { + t.Errorf("ExtensionsByType(%q) error: %v", tt.typ, err) + continue + } + if tt.wantErr != "" { + t.Errorf("ExtensionsByType(%q) = %q, %v; want error substring %q", tt.typ, got, err, tt.wantErr) + continue + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ExtensionsByType(%q) = %q; want %q", tt.typ, got, tt.want) + } + } +} + +func TestLookupMallocs(t *testing.T) { + n := testing.AllocsPerRun(10000, func() { + TypeByExtension(".html") + TypeByExtension(".HtML") + }) + if n > 0 { + t.Errorf("allocs = %v; want 0", n) + } +} + +func BenchmarkTypeByExtension(b *testing.B) { + initMime() + b.ResetTimer() + + for _, ext := range []string{ + ".html", + ".HTML", + ".unused", + } { + b.Run(ext, func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + TypeByExtension(ext) + } + }) + }) + } +} + +func BenchmarkExtensionsByType(b *testing.B) { + initMime() + b.ResetTimer() + + for _, typ := range []string{ + "text/html", + "text/html; charset=utf-8", + "application/octet-stream", + } { + b.Run(typ, func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := ExtensionsByType(typ); err != nil { + b.Fatal(err) + } + } + }) + }) + } +} + +func TestExtensionsByType2(t *testing.T) { + cleanup := setMimeInit(func() { + clearMimeTypes() + // Initialize built-in types like in type.go before osInitMime. + setMimeTypes(builtinTypesLower, builtinTypesLower) + }) + defer cleanup() + + tests := []struct { + typ string + want []string + }{ + {typ: "image/jpeg", want: []string{".jpeg", ".jpg"}}, + } + + for _, tt := range tests { + got, err := ExtensionsByType(tt.typ) + if err != nil { + t.Errorf("ExtensionsByType(%q): %v", tt.typ, err) + continue + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ExtensionsByType(%q) = %q; want %q", tt.typ, got, tt.want) + } + } +} diff --git a/src/mime/type_unix.go b/src/mime/type_unix.go new file mode 100644 index 0000000..649d900 --- /dev/null +++ b/src/mime/type_unix.go @@ -0,0 +1,126 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix || (js && wasm) + +package mime + +import ( + "bufio" + "os" + "strings" +) + +func init() { + osInitMime = initMimeUnix +} + +// See https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.21.html +// for the FreeDesktop Shared MIME-info Database specification. +var mimeGlobs = []string{ + "/usr/local/share/mime/globs2", + "/usr/share/mime/globs2", +} + +// Common locations for mime.types files on unix. +var typeFiles = []string{ + "/etc/mime.types", + "/etc/apache2/mime.types", + "/etc/apache/mime.types", + "/etc/httpd/conf/mime.types", +} + +func loadMimeGlobsFile(filename string) error { + f, err := os.Open(filename) + if err != nil { + return err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + // Each line should be of format: weight:mimetype:glob[:morefields...] + fields := strings.Split(scanner.Text(), ":") + if len(fields) < 3 || len(fields[0]) < 1 || len(fields[2]) < 3 { + continue + } else if fields[0][0] == '#' || fields[2][0] != '*' || fields[2][1] != '.' { + continue + } + + extension := fields[2][1:] + if strings.ContainsAny(extension, "?*[") { + // Not a bare extension, but a glob. Ignore for now: + // - we do not have an implementation for this glob + // syntax (translation to path/filepath.Match could + // be possible) + // - support for globs with weight ordering would have + // performance impact to all lookups to support the + // rarely seen glob entries + // - trying to match glob metacharacters literally is + // not useful + continue + } + if _, ok := mimeTypes.Load(extension); ok { + // We've already seen this extension. + // The file is in weight order, so we keep + // the first entry that we see. + continue + } + + setExtensionType(extension, fields[1]) + } + if err := scanner.Err(); err != nil { + panic(err) + } + return nil +} + +func loadMimeFile(filename string) { + f, err := os.Open(filename) + if err != nil { + return + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) <= 1 || fields[0][0] == '#' { + continue + } + mimeType := fields[0] + for _, ext := range fields[1:] { + if ext[0] == '#' { + break + } + setExtensionType("."+ext, mimeType) + } + } + if err := scanner.Err(); err != nil { + panic(err) + } +} + +func initMimeUnix() { + for _, filename := range mimeGlobs { + if err := loadMimeGlobsFile(filename); err == nil { + return // Stop checking more files if mimetype database is found. + } + } + + // Fallback if no system-generated mimetype database exists. + for _, filename := range typeFiles { + loadMimeFile(filename) + } +} + +func initMimeForTests() map[string]string { + mimeGlobs = []string{""} + typeFiles = []string{"testdata/test.types"} + return map[string]string{ + ".T1": "application/test", + ".t2": "text/test; charset=utf-8", + ".png": "image/png", + } +} diff --git a/src/mime/type_unix_test.go b/src/mime/type_unix_test.go new file mode 100644 index 0000000..7b8db79 --- /dev/null +++ b/src/mime/type_unix_test.go @@ -0,0 +1,44 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build unix || (js && wasm) + +package mime + +import ( + "testing" +) + +func initMimeUnixTest(t *testing.T) { + once.Do(initMime) + err := loadMimeGlobsFile("testdata/test.types.globs2") + if err != nil { + t.Fatal(err) + } + + loadMimeFile("testdata/test.types") +} + +func TestTypeByExtensionUNIX(t *testing.T) { + initMimeUnixTest(t) + typeTests := map[string]string{ + ".T1": "application/test", + ".t2": "text/test; charset=utf-8", + ".t3": "document/test", + ".t4": "example/test", + ".png": "image/png", + ",v": "", + "~": "", + ".foo?ar": "", + ".foo*r": "", + ".foo[1-3]": "", + } + + for ext, want := range typeTests { + val := TypeByExtension(ext) + if val != want { + t.Errorf("TypeByExtension(%q) = %q, want %q", ext, val, want) + } + } +} diff --git a/src/mime/type_windows.go b/src/mime/type_windows.go new file mode 100644 index 0000000..9380214 --- /dev/null +++ b/src/mime/type_windows.go @@ -0,0 +1,52 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "internal/syscall/windows/registry" +) + +func init() { + osInitMime = initMimeWindows +} + +func initMimeWindows() { + names, err := registry.CLASSES_ROOT.ReadSubKeyNames() + if err != nil { + return + } + for _, name := range names { + if len(name) < 2 || name[0] != '.' { // looking for extensions only + continue + } + k, err := registry.OpenKey(registry.CLASSES_ROOT, name, registry.READ) + if err != nil { + continue + } + v, _, err := k.GetStringValue("Content Type") + k.Close() + if err != nil { + continue + } + + // There is a long-standing problem on Windows: the + // registry sometimes records that the ".js" extension + // should be "text/plain". See issue #32350. While + // normally local configuration should override + // defaults, this problem is common enough that we + // handle it here by ignoring that registry setting. + if name == ".js" && (v == "text/plain" || v == "text/plain; charset=utf-8") { + continue + } + + setExtensionType(name, v) + } +} + +func initMimeForTests() map[string]string { + return map[string]string{ + ".PnG": "image/png", + } +} |