diff options
Diffstat (limited to 'src/mime/quotedprintable')
-rw-r--r-- | src/mime/quotedprintable/example_test.go | 37 | ||||
-rw-r--r-- | src/mime/quotedprintable/reader.go | 139 | ||||
-rw-r--r-- | src/mime/quotedprintable/reader_test.go | 216 | ||||
-rw-r--r-- | src/mime/quotedprintable/writer.go | 172 | ||||
-rw-r--r-- | src/mime/quotedprintable/writer_test.go | 158 |
5 files changed, 722 insertions, 0 deletions
diff --git a/src/mime/quotedprintable/example_test.go b/src/mime/quotedprintable/example_test.go new file mode 100644 index 0000000..e5a479a --- /dev/null +++ b/src/mime/quotedprintable/example_test.go @@ -0,0 +1,37 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable_test + +import ( + "fmt" + "io" + "mime/quotedprintable" + "os" + "strings" +) + +func ExampleNewReader() { + for _, s := range []string{ + `=48=65=6C=6C=6F=2C=20=47=6F=70=68=65=72=73=21`, + `invalid escape: <b style="font-size: 200%">hello</b>`, + "Hello, Gophers! This symbol will be unescaped: =3D and this will be written in =\r\none line.", + } { + b, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(s))) + fmt.Printf("%s %v\n", b, err) + } + // Output: + // Hello, Gophers! <nil> + // invalid escape: <b style="font-size: 200%">hello</b> <nil> + // Hello, Gophers! This symbol will be unescaped: = and this will be written in one line. <nil> +} + +func ExampleNewWriter() { + w := quotedprintable.NewWriter(os.Stdout) + w.Write([]byte("These symbols will be escaped: = \t")) + w.Close() + + // Output: + // These symbols will be escaped: =3D =09 +} diff --git a/src/mime/quotedprintable/reader.go b/src/mime/quotedprintable/reader.go new file mode 100644 index 0000000..4239625 --- /dev/null +++ b/src/mime/quotedprintable/reader.go @@ -0,0 +1,139 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package quotedprintable implements quoted-printable encoding as specified by +// RFC 2045. +package quotedprintable + +import ( + "bufio" + "bytes" + "fmt" + "io" +) + +// Reader is a quoted-printable decoder. +type Reader struct { + br *bufio.Reader + rerr error // last read error + line []byte // to be consumed before more of br +} + +// NewReader returns a quoted-printable reader, decoding from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + br: bufio.NewReader(r), + } +} + +func fromHex(b byte) (byte, error) { + switch { + case b >= '0' && b <= '9': + return b - '0', nil + case b >= 'A' && b <= 'F': + return b - 'A' + 10, nil + // Accept badly encoded bytes. + case b >= 'a' && b <= 'f': + return b - 'a' + 10, nil + } + return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b) +} + +func readHexByte(v []byte) (b byte, err error) { + if len(v) < 2 { + return 0, io.ErrUnexpectedEOF + } + var hb, lb byte + if hb, err = fromHex(v[0]); err != nil { + return 0, err + } + if lb, err = fromHex(v[1]); err != nil { + return 0, err + } + return hb<<4 | lb, nil +} + +func isQPDiscardWhitespace(r rune) bool { + switch r { + case '\n', '\r', ' ', '\t': + return true + } + return false +} + +var ( + crlf = []byte("\r\n") + lf = []byte("\n") + softSuffix = []byte("=") +) + +// Read reads and decodes quoted-printable data from the underlying reader. +func (r *Reader) Read(p []byte) (n int, err error) { + // Deviations from RFC 2045: + // 1. in addition to "=\r\n", "=\n" is also treated as soft line break. + // 2. it will pass through a '\r' or '\n' not preceded by '=', consistent + // with other broken QP encoders & decoders. + // 3. it accepts soft line-break (=) at end of message (issue 15486); i.e. + // the final byte read from the underlying reader is allowed to be '=', + // and it will be silently ignored. + // 4. it takes = as literal = if not followed by two hex digits + // but not at end of line (issue 13219). + for len(p) > 0 { + if len(r.line) == 0 { + if r.rerr != nil { + return n, r.rerr + } + r.line, r.rerr = r.br.ReadSlice('\n') + + // Does the line end in CRLF instead of just LF? + hasLF := bytes.HasSuffix(r.line, lf) + hasCR := bytes.HasSuffix(r.line, crlf) + wholeLine := r.line + r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace) + if bytes.HasSuffix(r.line, softSuffix) { + rightStripped := wholeLine[len(r.line):] + r.line = r.line[:len(r.line)-1] + if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) && + !(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) { + r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped) + } + } else if hasLF { + if hasCR { + r.line = append(r.line, '\r', '\n') + } else { + r.line = append(r.line, '\n') + } + } + continue + } + b := r.line[0] + + switch { + case b == '=': + b, err = readHexByte(r.line[1:]) + if err != nil { + if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' { + // Take the = as a literal =. + b = '=' + break + } + return n, err + } + r.line = r.line[2:] // 2 of the 3; other 1 is done below + case b == '\t' || b == '\r' || b == '\n': + break + case b >= 0x80: + // As an extension to RFC 2045, we accept + // values >= 0x80 without complaint. Issue 22597. + break + case b < ' ' || b > '~': + return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b) + } + p[0] = b + p = p[1:] + r.line = r.line[1:] + n++ + } + return n, nil +} diff --git a/src/mime/quotedprintable/reader_test.go b/src/mime/quotedprintable/reader_test.go new file mode 100644 index 0000000..0af1e5f --- /dev/null +++ b/src/mime/quotedprintable/reader_test.go @@ -0,0 +1,216 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable + +import ( + "bufio" + "errors" + "flag" + "fmt" + "io" + "os/exec" + "regexp" + "sort" + "strings" + "testing" + "time" +) + +func TestReader(t *testing.T) { + tests := []struct { + in, want string + err any + }{ + {in: "", want: ""}, + {in: "foo bar", want: "foo bar"}, + {in: "foo bar=3D", want: "foo bar="}, + {in: "foo bar=3d", want: "foo bar="}, // lax. + {in: "foo bar=\n", want: "foo bar"}, + {in: "foo bar\n", want: "foo bar\n"}, // somewhat lax. + {in: "foo bar=0", want: "foo bar=0"}, // lax + {in: "foo bar=0D=0A", want: "foo bar\r\n"}, + {in: " A B \r\n C ", want: " A B\r\n C"}, + {in: " A B =\r\n C ", want: " A B C"}, + {in: " A B =\n C ", want: " A B C"}, // lax. treating LF as CRLF + {in: "foo=\nbar", want: "foobar"}, + {in: "foo\x00bar", want: "foo", err: "quotedprintable: invalid unescaped byte 0x00 in body"}, + {in: "foo bar\xff", want: "foo bar\xff"}, + + // Equal sign. + {in: "=3D30\n", want: "=30\n"}, + {in: "=00=FF0=\n", want: "\x00\xff0"}, + + // Trailing whitespace + {in: "foo \n", want: "foo\n"}, + {in: "foo \n\nfoo =\n\nfoo=20\n\n", want: "foo\n\nfoo \nfoo \n\n"}, + + // Tests that we allow bare \n and \r through, despite it being strictly + // not permitted per RFC 2045, Section 6.7 Page 22 bullet (4). + {in: "foo\nbar", want: "foo\nbar"}, + {in: "foo\rbar", want: "foo\rbar"}, + {in: "foo\r\nbar", want: "foo\r\nbar"}, + + // Different types of soft line-breaks. + {in: "foo=\r\nbar", want: "foobar"}, + {in: "foo=\nbar", want: "foobar"}, + {in: "foo=\rbar", want: "foo", err: "quotedprintable: invalid hex byte 0x0d"}, + {in: "foo=\r\r\r \nbar", want: "foo", err: `quotedprintable: invalid bytes after =: "\r\r\r \n"`}, + // Issue 15486, accept trailing soft line-break at end of input. + {in: "foo=", want: "foo"}, + {in: "=", want: "", err: `quotedprintable: invalid bytes after =: ""`}, + + // Example from RFC 2045: + {in: "Now's the time =\n" + "for all folk to come=\n" + " to the aid of their country.", + want: "Now's the time for all folk to come to the aid of their country."}, + {in: "accept UTF-8 right quotation mark: ’", + want: "accept UTF-8 right quotation mark: ’"}, + } + for _, tt := range tests { + var buf strings.Builder + _, err := io.Copy(&buf, NewReader(strings.NewReader(tt.in))) + if got := buf.String(); got != tt.want { + t.Errorf("for %q, got %q; want %q", tt.in, got, tt.want) + } + switch verr := tt.err.(type) { + case nil: + if err != nil { + t.Errorf("for %q, got unexpected error: %v", tt.in, err) + } + case string: + if got := fmt.Sprint(err); got != verr { + t.Errorf("for %q, got error %q; want %q", tt.in, got, verr) + } + case error: + if err != verr { + t.Errorf("for %q, got error %q; want %q", tt.in, err, verr) + } + } + } + +} + +func everySequence(base, alpha string, length int, fn func(string)) { + if len(base) == length { + fn(base) + return + } + for i := 0; i < len(alpha); i++ { + everySequence(base+alpha[i:i+1], alpha, length, fn) + } +} + +var useQprint = flag.Bool("qprint", false, "Compare against the 'qprint' program.") + +var badSoftRx = regexp.MustCompile(`=([^\r\n]+?\n)|([^\r\n]+$)|(\r$)|(\r[^\n]+\n)|( \r\n)`) + +func TestExhaustive(t *testing.T) { + if *useQprint { + _, err := exec.LookPath("qprint") + if err != nil { + t.Fatalf("Error looking for qprint: %v", err) + } + } + + var buf strings.Builder + res := make(map[string]int) + n := 6 + if testing.Short() { + n = 4 + } + everySequence("", "0A \r\n=", n, func(s string) { + if strings.HasSuffix(s, "=") || strings.Contains(s, "==") { + return + } + buf.Reset() + _, err := io.Copy(&buf, NewReader(strings.NewReader(s))) + if err != nil { + errStr := err.Error() + if strings.Contains(errStr, "invalid bytes after =:") { + errStr = "invalid bytes after =" + } + res[errStr]++ + if strings.Contains(errStr, "invalid hex byte ") { + if strings.HasSuffix(errStr, "0x20") && (strings.Contains(s, "=0 ") || strings.Contains(s, "=A ") || strings.Contains(s, "= ")) { + return + } + if strings.HasSuffix(errStr, "0x3d") && (strings.Contains(s, "=0=") || strings.Contains(s, "=A=")) { + return + } + if strings.HasSuffix(errStr, "0x0a") || strings.HasSuffix(errStr, "0x0d") { + // bunch of cases; since whitespace at the end of a line before \n is removed. + return + } + } + if strings.Contains(errStr, "unexpected EOF") { + return + } + if errStr == "invalid bytes after =" && badSoftRx.MatchString(s) { + return + } + t.Errorf("decode(%q) = %v", s, err) + return + } + if *useQprint { + cmd := exec.Command("qprint", "-d") + cmd.Stdin = strings.NewReader(s) + stderr, err := cmd.StderrPipe() + if err != nil { + panic(err) + } + qpres := make(chan any, 2) + go func() { + br := bufio.NewReader(stderr) + s, _ := br.ReadString('\n') + if s != "" { + qpres <- errors.New(s) + if cmd.Process != nil { + // It can get stuck on invalid input, like: + // echo -n "0000= " | qprint -d + cmd.Process.Kill() + } + } + }() + go func() { + want, err := cmd.Output() + if err == nil { + qpres <- want + } + }() + select { + case got := <-qpres: + if want, ok := got.([]byte); ok { + if string(want) != buf.String() { + t.Errorf("go decode(%q) = %q; qprint = %q", s, want, buf.String()) + } + } else { + t.Logf("qprint -d(%q) = %v", s, got) + } + case <-time.After(5 * time.Second): + t.Logf("qprint timeout on %q", s) + } + } + res["OK"]++ + }) + var outcomes []string + for k, v := range res { + outcomes = append(outcomes, fmt.Sprintf("%v: %d", k, v)) + } + sort.Strings(outcomes) + got := strings.Join(outcomes, "\n") + want := `OK: 28934 +invalid bytes after =: 3949 +quotedprintable: invalid hex byte 0x0d: 2048 +unexpected EOF: 194` + if testing.Short() { + want = `OK: 896 +invalid bytes after =: 100 +quotedprintable: invalid hex byte 0x0d: 26 +unexpected EOF: 3` + } + + if got != want { + t.Errorf("Got:\n%s\nWant:\n%s", got, want) + } +} diff --git a/src/mime/quotedprintable/writer.go b/src/mime/quotedprintable/writer.go new file mode 100644 index 0000000..16ea0bf --- /dev/null +++ b/src/mime/quotedprintable/writer.go @@ -0,0 +1,172 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable + +import "io" + +const lineMaxLen = 76 + +// A Writer is a quoted-printable writer that implements io.WriteCloser. +type Writer struct { + // Binary mode treats the writer's input as pure binary and processes end of + // line bytes as binary data. + Binary bool + + w io.Writer + i int + line [78]byte + cr bool +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w} +} + +// Write encodes p using quoted-printable encoding and writes it to the +// underlying io.Writer. It limits line length to 76 characters. The encoded +// bytes are not necessarily flushed until the Writer is closed. +func (w *Writer) Write(p []byte) (n int, err error) { + for i, b := range p { + switch { + // Simple writes are done in batch. + case b >= '!' && b <= '~' && b != '=': + continue + case isWhitespace(b) || !w.Binary && (b == '\n' || b == '\r'): + continue + } + + if i > n { + if err := w.write(p[n:i]); err != nil { + return n, err + } + n = i + } + + if err := w.encode(b); err != nil { + return n, err + } + n++ + } + + if n == len(p) { + return n, nil + } + + if err := w.write(p[n:]); err != nil { + return n, err + } + + return len(p), nil +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (w *Writer) Close() error { + if err := w.checkLastByte(); err != nil { + return err + } + + return w.flush() +} + +// write limits text encoded in quoted-printable to 76 characters per line. +func (w *Writer) write(p []byte) error { + for _, b := range p { + if b == '\n' || b == '\r' { + // If the previous byte was \r, the CRLF has already been inserted. + if w.cr && b == '\n' { + w.cr = false + continue + } + + if b == '\r' { + w.cr = true + } + + if err := w.checkLastByte(); err != nil { + return err + } + if err := w.insertCRLF(); err != nil { + return err + } + continue + } + + if w.i == lineMaxLen-1 { + if err := w.insertSoftLineBreak(); err != nil { + return err + } + } + + w.line[w.i] = b + w.i++ + w.cr = false + } + + return nil +} + +func (w *Writer) encode(b byte) error { + if lineMaxLen-1-w.i < 3 { + if err := w.insertSoftLineBreak(); err != nil { + return err + } + } + + w.line[w.i] = '=' + w.line[w.i+1] = upperhex[b>>4] + w.line[w.i+2] = upperhex[b&0x0f] + w.i += 3 + + return nil +} + +const upperhex = "0123456789ABCDEF" + +// checkLastByte encodes the last buffered byte if it is a space or a tab. +func (w *Writer) checkLastByte() error { + if w.i == 0 { + return nil + } + + b := w.line[w.i-1] + if isWhitespace(b) { + w.i-- + if err := w.encode(b); err != nil { + return err + } + } + + return nil +} + +func (w *Writer) insertSoftLineBreak() error { + w.line[w.i] = '=' + w.i++ + + return w.insertCRLF() +} + +func (w *Writer) insertCRLF() error { + w.line[w.i] = '\r' + w.line[w.i+1] = '\n' + w.i += 2 + + return w.flush() +} + +func (w *Writer) flush() error { + if _, err := w.w.Write(w.line[:w.i]); err != nil { + return err + } + + w.i = 0 + return nil +} + +func isWhitespace(b byte) bool { + return b == ' ' || b == '\t' +} diff --git a/src/mime/quotedprintable/writer_test.go b/src/mime/quotedprintable/writer_test.go new file mode 100644 index 0000000..07411fe --- /dev/null +++ b/src/mime/quotedprintable/writer_test.go @@ -0,0 +1,158 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package quotedprintable + +import ( + "bytes" + "io" + "strings" + "testing" +) + +func TestWriter(t *testing.T) { + testWriter(t, false) +} + +func TestWriterBinary(t *testing.T) { + testWriter(t, true) +} + +func testWriter(t *testing.T, binary bool) { + tests := []struct { + in, want, wantB string + }{ + {in: "", want: ""}, + {in: "foo bar", want: "foo bar"}, + {in: "foo bar=", want: "foo bar=3D"}, + {in: "foo bar\r", want: "foo bar\r\n", wantB: "foo bar=0D"}, + {in: "foo bar\r\r", want: "foo bar\r\n\r\n", wantB: "foo bar=0D=0D"}, + {in: "foo bar\n", want: "foo bar\r\n", wantB: "foo bar=0A"}, + {in: "foo bar\r\n", want: "foo bar\r\n", wantB: "foo bar=0D=0A"}, + {in: "foo bar\r\r\n", want: "foo bar\r\n\r\n", wantB: "foo bar=0D=0D=0A"}, + {in: "foo bar ", want: "foo bar=20"}, + {in: "foo bar\t", want: "foo bar=09"}, + {in: "foo bar ", want: "foo bar =20"}, + {in: "foo bar \n", want: "foo bar=20\r\n", wantB: "foo bar =0A"}, + {in: "foo bar \r", want: "foo bar=20\r\n", wantB: "foo bar =0D"}, + {in: "foo bar \r\n", want: "foo bar=20\r\n", wantB: "foo bar =0D=0A"}, + {in: "foo bar \n", want: "foo bar =20\r\n", wantB: "foo bar =0A"}, + {in: "foo bar \n ", want: "foo bar =20\r\n=20", wantB: "foo bar =0A=20"}, + {in: "¡Hola Señor!", want: "=C2=A1Hola Se=C3=B1or!"}, + { + in: "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", + want: "\t !\"#$%&'()*+,-./ :;<>?@[\\]^_`{|}~", + }, + { + in: strings.Repeat("a", 75), + want: strings.Repeat("a", 75), + }, + { + in: strings.Repeat("a", 76), + want: strings.Repeat("a", 75) + "=\r\na", + }, + { + in: strings.Repeat("a", 72) + "=", + want: strings.Repeat("a", 72) + "=3D", + }, + { + in: strings.Repeat("a", 73) + "=", + want: strings.Repeat("a", 73) + "=\r\n=3D", + }, + { + in: strings.Repeat("a", 74) + "=", + want: strings.Repeat("a", 74) + "=\r\n=3D", + }, + { + in: strings.Repeat("a", 75) + "=", + want: strings.Repeat("a", 75) + "=\r\n=3D", + }, + { + in: strings.Repeat(" ", 73), + want: strings.Repeat(" ", 72) + "=20", + }, + { + in: strings.Repeat(" ", 74), + want: strings.Repeat(" ", 73) + "=\r\n=20", + }, + { + in: strings.Repeat(" ", 75), + want: strings.Repeat(" ", 74) + "=\r\n=20", + }, + { + in: strings.Repeat(" ", 76), + want: strings.Repeat(" ", 75) + "=\r\n=20", + }, + { + in: strings.Repeat(" ", 77), + want: strings.Repeat(" ", 75) + "=\r\n =20", + }, + } + + for _, tt := range tests { + buf := new(strings.Builder) + w := NewWriter(buf) + + want := tt.want + if binary { + w.Binary = true + if tt.wantB != "" { + want = tt.wantB + } + } + + if _, err := w.Write([]byte(tt.in)); err != nil { + t.Errorf("Write(%q): %v", tt.in, err) + continue + } + if err := w.Close(); err != nil { + t.Errorf("Close(): %v", err) + continue + } + got := buf.String() + if got != want { + t.Errorf("Write(%q), got:\n%q\nwant:\n%q", tt.in, got, want) + } + } +} + +func TestRoundTrip(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriter(buf) + if _, err := w.Write(testMsg); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + + r := NewReader(buf) + gotBytes, err := io.ReadAll(r) + if err != nil { + t.Fatalf("Error while reading from Reader: %v", err) + } + got := string(gotBytes) + if got != string(testMsg) { + t.Errorf("Encoding and decoding changed the message, got:\n%s", got) + } +} + +// From https://fr.wikipedia.org/wiki/Quoted-Printable +var testMsg = []byte("Quoted-Printable (QP) est un format d'encodage de données codées sur 8 bits, qui utilise exclusivement les caractères alphanumériques imprimables du code ASCII (7 bits).\r\n" + + "\r\n" + + "En effet, les différents codages comprennent de nombreux caractères qui ne sont pas représentables en ASCII (par exemple les caractères accentués), ainsi que des caractères dits « non-imprimables ».\r\n" + + "\r\n" + + "L'encodage Quoted-Printable permet de remédier à ce problème, en procédant de la manière suivante :\r\n" + + "\r\n" + + "Un octet correspondant à un caractère imprimable de l'ASCII sauf le signe égal (donc un caractère de code ASCII entre 33 et 60 ou entre 62 et 126) ou aux caractères de saut de ligne (codes ASCII 13 et 10) ou une suite de tabulations et espaces non situées en fin de ligne (de codes ASCII respectifs 9 et 32) est représenté tel quel.\r\n" + + "Un octet qui ne correspond pas à la définition ci-dessus (caractère non imprimable de l'ASCII, tabulation ou espaces non suivies d'un caractère imprimable avant la fin de la ligne ou signe égal) est représenté par un signe égal, suivi de son numéro, exprimé en hexadécimal.\r\n" + + "Enfin, un signe égal suivi par un saut de ligne (donc la suite des trois caractères de codes ASCII 61, 13 et 10) peut être inséré n'importe où, afin de limiter la taille des lignes produites si nécessaire. Une limite de 76 caractères par ligne est généralement respectée.\r\n") + +func BenchmarkWriter(b *testing.B) { + for i := 0; i < b.N; i++ { + w := NewWriter(io.Discard) + w.Write(testMsg) + w.Close() + } +} |