diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
commit | f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch) | |
tree | 7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/net/mail | |
parent | Initial commit. (diff) | |
download | golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip |
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/net/mail')
-rw-r--r-- | src/net/mail/example_test.go | 77 | ||||
-rw-r--r-- | src/net/mail/message.go | 915 | ||||
-rw-r--r-- | src/net/mail/message_test.go | 1219 |
3 files changed, 2211 insertions, 0 deletions
diff --git a/src/net/mail/example_test.go b/src/net/mail/example_test.go new file mode 100644 index 0000000..d325dc7 --- /dev/null +++ b/src/net/mail/example_test.go @@ -0,0 +1,77 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mail_test + +import ( + "fmt" + "io" + "log" + "net/mail" + "strings" +) + +func ExampleParseAddressList() { + const list = "Alice <alice@example.com>, Bob <bob@example.com>, Eve <eve@example.com>" + emails, err := mail.ParseAddressList(list) + if err != nil { + log.Fatal(err) + } + + for _, v := range emails { + fmt.Println(v.Name, v.Address) + } + + // Output: + // Alice alice@example.com + // Bob bob@example.com + // Eve eve@example.com +} + +func ExampleParseAddress() { + e, err := mail.ParseAddress("Alice <alice@example.com>") + if err != nil { + log.Fatal(err) + } + + fmt.Println(e.Name, e.Address) + + // Output: + // Alice alice@example.com +} + +func ExampleReadMessage() { + msg := `Date: Mon, 23 Jun 2015 11:40:36 -0400 +From: Gopher <from@example.com> +To: Another Gopher <to@example.com> +Subject: Gophers at Gophercon + +Message body +` + + r := strings.NewReader(msg) + m, err := mail.ReadMessage(r) + if err != nil { + log.Fatal(err) + } + + header := m.Header + fmt.Println("Date:", header.Get("Date")) + fmt.Println("From:", header.Get("From")) + fmt.Println("To:", header.Get("To")) + fmt.Println("Subject:", header.Get("Subject")) + + body, err := io.ReadAll(m.Body) + if err != nil { + log.Fatal(err) + } + fmt.Printf("%s", body) + + // Output: + // Date: Mon, 23 Jun 2015 11:40:36 -0400 + // From: Gopher <from@example.com> + // To: Another Gopher <to@example.com> + // Subject: Gophers at Gophercon + // Message body +} diff --git a/src/net/mail/message.go b/src/net/mail/message.go new file mode 100644 index 0000000..fc2a9e4 --- /dev/null +++ b/src/net/mail/message.go @@ -0,0 +1,915 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package mail implements parsing of mail messages. + +For the most part, this package follows the syntax as specified by RFC 5322 and +extended by RFC 6532. +Notable divergences: + - Obsolete address formats are not parsed, including addresses with + embedded route information. + - The full range of spacing (the CFWS syntax element) is not supported, + such as breaking addresses across lines. + - No unicode normalization is performed. + - The special characters ()[]:;@\, are allowed to appear unquoted in names. + - A leading From line is permitted, as in mbox format (RFC 4155). +*/ +package mail + +import ( + "bufio" + "errors" + "fmt" + "io" + "log" + "mime" + "net/textproto" + "strings" + "sync" + "time" + "unicode/utf8" +) + +var debug = debugT(false) + +type debugT bool + +func (d debugT) Printf(format string, args ...any) { + if d { + log.Printf(format, args...) + } +} + +// A Message represents a parsed mail message. +type Message struct { + Header Header + Body io.Reader +} + +// ReadMessage reads a message from r. +// The headers are parsed, and the body of the message will be available +// for reading from msg.Body. +func ReadMessage(r io.Reader) (msg *Message, err error) { + tp := textproto.NewReader(bufio.NewReader(r)) + + hdr, err := readHeader(tp) + if err != nil && (err != io.EOF || len(hdr) == 0) { + return nil, err + } + + return &Message{ + Header: Header(hdr), + Body: tp.R, + }, nil +} + +// readHeader reads the message headers from r. +// This is like textproto.ReadMIMEHeader, but doesn't validate. +// The fix for issue #53188 tightened up net/textproto to enforce +// restrictions of RFC 7230. +// This package implements RFC 5322, which does not have those restrictions. +// This function copies the relevant code from net/textproto, +// simplified for RFC 5322. +func readHeader(r *textproto.Reader) (map[string][]string, error) { + m := make(map[string][]string) + + // The first line cannot start with a leading space. + if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') { + line, err := r.ReadLine() + if err != nil { + return m, err + } + return m, errors.New("malformed initial line: " + line) + } + + for { + kv, err := r.ReadContinuedLine() + if kv == "" { + return m, err + } + + // Key ends at first colon. + k, v, ok := strings.Cut(kv, ":") + if !ok { + return m, errors.New("malformed header line: " + kv) + } + key := textproto.CanonicalMIMEHeaderKey(k) + + // Permit empty key, because that is what we did in the past. + if key == "" { + continue + } + + // Skip initial spaces in value. + value := strings.TrimLeft(v, " \t") + + m[key] = append(m[key], value) + + if err != nil { + return m, err + } + } +} + +// Layouts suitable for passing to time.Parse. +// These are tried in order. +var ( + dateLayoutsBuildOnce sync.Once + dateLayouts []string +) + +func buildDateLayouts() { + // Generate layouts based on RFC 5322, section 3.3. + + dows := [...]string{"", "Mon, "} // day-of-week + days := [...]string{"2", "02"} // day = 1*2DIGIT + years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT + seconds := [...]string{":05", ""} // second + // "-0700 (MST)" is not in RFC 5322, but is common. + zones := [...]string{"-0700", "MST", "UT"} // zone = (("+" / "-") 4DIGIT) / "UT" / "GMT" / ... + + for _, dow := range dows { + for _, day := range days { + for _, year := range years { + for _, second := range seconds { + for _, zone := range zones { + s := dow + day + " Jan " + year + " 15:04" + second + " " + zone + dateLayouts = append(dateLayouts, s) + } + } + } + } + } +} + +// ParseDate parses an RFC 5322 date string. +func ParseDate(date string) (time.Time, error) { + dateLayoutsBuildOnce.Do(buildDateLayouts) + // CR and LF must match and are tolerated anywhere in the date field. + date = strings.ReplaceAll(date, "\r\n", "") + if strings.Contains(date, "\r") { + return time.Time{}, errors.New("mail: header has a CR without LF") + } + // Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII + p := addrParser{date, nil} + p.skipSpace() + + // RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone + // zone length is always 5 chars unless obsolete (obs-zone) + if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 { + date = p.s[:ind+5] + p.s = p.s[ind+5:] + } else { + ind := strings.Index(p.s, "T") + if ind == 0 { + // In this case we have the following date formats: + // * Thu, 20 Nov 1997 09:55:06 MDT + // * Thu, 20 Nov 1997 09:55:06 MDT (MDT) + // * Thu, 20 Nov 1997 09:55:06 MDT (This comment) + ind = strings.Index(p.s[1:], "T") + if ind != -1 { + ind++ + } + } + + if ind != -1 && len(p.s) >= ind+5 { + // The last letter T of the obsolete time zone is checked when no standard time zone is found. + // If T is misplaced, the date to parse is garbage. + date = p.s[:ind+1] + p.s = p.s[ind+1:] + } + } + if !p.skipCFWS() { + return time.Time{}, errors.New("mail: misformatted parenthetical comment") + } + for _, layout := range dateLayouts { + t, err := time.Parse(layout, date) + if err == nil { + return t, nil + } + } + return time.Time{}, errors.New("mail: header could not be parsed") +} + +// A Header represents the key-value pairs in a mail message header. +type Header map[string][]string + +// Get gets the first value associated with the given key. +// It is case insensitive; CanonicalMIMEHeaderKey is used +// to canonicalize the provided key. +// If there are no values associated with the key, Get returns "". +// To access multiple values of a key, or to use non-canonical keys, +// access the map directly. +func (h Header) Get(key string) string { + return textproto.MIMEHeader(h).Get(key) +} + +var ErrHeaderNotPresent = errors.New("mail: header not in message") + +// Date parses the Date header field. +func (h Header) Date() (time.Time, error) { + hdr := h.Get("Date") + if hdr == "" { + return time.Time{}, ErrHeaderNotPresent + } + return ParseDate(hdr) +} + +// AddressList parses the named header field as a list of addresses. +func (h Header) AddressList(key string) ([]*Address, error) { + hdr := h.Get(key) + if hdr == "" { + return nil, ErrHeaderNotPresent + } + return ParseAddressList(hdr) +} + +// Address represents a single mail address. +// An address such as "Barry Gibbs <bg@example.com>" is represented +// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. +type Address struct { + Name string // Proper name; may be empty. + Address string // user@domain +} + +// ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" +func ParseAddress(address string) (*Address, error) { + return (&addrParser{s: address}).parseSingleAddress() +} + +// ParseAddressList parses the given string as a list of addresses. +func ParseAddressList(list string) ([]*Address, error) { + return (&addrParser{s: list}).parseAddressList() +} + +// An AddressParser is an RFC 5322 address parser. +type AddressParser struct { + // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words. + WordDecoder *mime.WordDecoder +} + +// Parse parses a single RFC 5322 address of the +// form "Gogh Fir <gf@example.com>" or "foo@example.com". +func (p *AddressParser) Parse(address string) (*Address, error) { + return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() +} + +// ParseList parses the given string as a list of comma-separated addresses +// of the form "Gogh Fir <gf@example.com>" or "foo@example.com". +func (p *AddressParser) ParseList(list string) ([]*Address, error) { + return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList() +} + +// String formats the address as a valid RFC 5322 address. +// If the address's name contains non-ASCII characters +// the name will be rendered according to RFC 2047. +func (a *Address) String() string { + // Format address local@domain + at := strings.LastIndex(a.Address, "@") + var local, domain string + if at < 0 { + // This is a malformed address ("@" is required in addr-spec); + // treat the whole address as local-part. + local = a.Address + } else { + local, domain = a.Address[:at], a.Address[at+1:] + } + + // Add quotes if needed + quoteLocal := false + for i, r := range local { + if isAtext(r, false) { + continue + } + if r == '.' { + // Dots are okay if they are surrounded by atext. + // We only need to check that the previous byte is + // not a dot, and this isn't the end of the string. + if i > 0 && local[i-1] != '.' && i < len(local)-1 { + continue + } + } + quoteLocal = true + break + } + if quoteLocal { + local = quoteString(local) + + } + + s := "<" + local + "@" + domain + ">" + + if a.Name == "" { + return s + } + + // If every character is printable ASCII, quoting is simple. + allPrintable := true + for _, r := range a.Name { + // isWSP here should actually be isFWS, + // but we don't support folding yet. + if !isVchar(r) && !isWSP(r) || isMultibyte(r) { + allPrintable = false + break + } + } + if allPrintable { + return quoteString(a.Name) + " " + s + } + + // Text in an encoded-word in a display-name must not contain certain + // characters like quotes or parentheses (see RFC 2047 section 5.3). + // When this is the case encode the name using base64 encoding. + if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") { + return mime.BEncoding.Encode("utf-8", a.Name) + " " + s + } + return mime.QEncoding.Encode("utf-8", a.Name) + " " + s +} + +type addrParser struct { + s string + dec *mime.WordDecoder // may be nil +} + +func (p *addrParser) parseAddressList() ([]*Address, error) { + var list []*Address + for { + p.skipSpace() + + // allow skipping empty entries (RFC5322 obs-addr-list) + if p.consume(',') { + continue + } + + addrs, err := p.parseAddress(true) + if err != nil { + return nil, err + } + list = append(list, addrs...) + + if !p.skipCFWS() { + return nil, errors.New("mail: misformatted parenthetical comment") + } + if p.empty() { + break + } + if p.peek() != ',' { + return nil, errors.New("mail: expected comma") + } + + // Skip empty entries for obs-addr-list. + for p.consume(',') { + p.skipSpace() + } + if p.empty() { + break + } + } + return list, nil +} + +func (p *addrParser) parseSingleAddress() (*Address, error) { + addrs, err := p.parseAddress(true) + if err != nil { + return nil, err + } + if !p.skipCFWS() { + return nil, errors.New("mail: misformatted parenthetical comment") + } + if !p.empty() { + return nil, fmt.Errorf("mail: expected single address, got %q", p.s) + } + if len(addrs) == 0 { + return nil, errors.New("mail: empty group") + } + if len(addrs) > 1 { + return nil, errors.New("mail: group with multiple addresses") + } + return addrs[0], nil +} + +// parseAddress parses a single RFC 5322 address at the start of p. +func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) { + debug.Printf("parseAddress: %q", p.s) + p.skipSpace() + if p.empty() { + return nil, errors.New("mail: no address") + } + + // address = mailbox / group + // mailbox = name-addr / addr-spec + // group = display-name ":" [group-list] ";" [CFWS] + + // addr-spec has a more restricted grammar than name-addr, + // so try parsing it first, and fallback to name-addr. + // TODO(dsymonds): Is this really correct? + spec, err := p.consumeAddrSpec() + if err == nil { + var displayName string + p.skipSpace() + if !p.empty() && p.peek() == '(' { + displayName, err = p.consumeDisplayNameComment() + if err != nil { + return nil, err + } + } + + return []*Address{{ + Name: displayName, + Address: spec, + }}, err + } + debug.Printf("parseAddress: not an addr-spec: %v", err) + debug.Printf("parseAddress: state is now %q", p.s) + + // display-name + var displayName string + if p.peek() != '<' { + displayName, err = p.consumePhrase() + if err != nil { + return nil, err + } + } + debug.Printf("parseAddress: displayName=%q", displayName) + + p.skipSpace() + if handleGroup { + if p.consume(':') { + return p.consumeGroupList() + } + } + // angle-addr = "<" addr-spec ">" + if !p.consume('<') { + atext := true + for _, r := range displayName { + if !isAtext(r, true) { + atext = false + break + } + } + if atext { + // The input is like "foo.bar"; it's possible the input + // meant to be "foo.bar@domain", or "foo.bar <...>". + return nil, errors.New("mail: missing '@' or angle-addr") + } + // The input is like "Full Name", which couldn't possibly be a + // valid email address if followed by "@domain"; the input + // likely meant to be "Full Name <...>". + return nil, errors.New("mail: no angle-addr") + } + spec, err = p.consumeAddrSpec() + if err != nil { + return nil, err + } + if !p.consume('>') { + return nil, errors.New("mail: unclosed angle-addr") + } + debug.Printf("parseAddress: spec=%q", spec) + + return []*Address{{ + Name: displayName, + Address: spec, + }}, nil +} + +func (p *addrParser) consumeGroupList() ([]*Address, error) { + var group []*Address + // handle empty group. + p.skipSpace() + if p.consume(';') { + if !p.skipCFWS() { + return nil, errors.New("mail: misformatted parenthetical comment") + } + return group, nil + } + + for { + p.skipSpace() + // embedded groups not allowed. + addrs, err := p.parseAddress(false) + if err != nil { + return nil, err + } + group = append(group, addrs...) + + if !p.skipCFWS() { + return nil, errors.New("mail: misformatted parenthetical comment") + } + if p.consume(';') { + if !p.skipCFWS() { + return nil, errors.New("mail: misformatted parenthetical comment") + } + break + } + if !p.consume(',') { + return nil, errors.New("mail: expected comma") + } + } + return group, nil +} + +// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. +func (p *addrParser) consumeAddrSpec() (spec string, err error) { + debug.Printf("consumeAddrSpec: %q", p.s) + + orig := *p + defer func() { + if err != nil { + *p = orig + } + }() + + // local-part = dot-atom / quoted-string + var localPart string + p.skipSpace() + if p.empty() { + return "", errors.New("mail: no addr-spec") + } + if p.peek() == '"' { + // quoted-string + debug.Printf("consumeAddrSpec: parsing quoted-string") + localPart, err = p.consumeQuotedString() + if localPart == "" { + err = errors.New("mail: empty quoted string in addr-spec") + } + } else { + // dot-atom + debug.Printf("consumeAddrSpec: parsing dot-atom") + localPart, err = p.consumeAtom(true, false) + } + if err != nil { + debug.Printf("consumeAddrSpec: failed: %v", err) + return "", err + } + + if !p.consume('@') { + return "", errors.New("mail: missing @ in addr-spec") + } + + // domain = dot-atom / domain-literal + var domain string + p.skipSpace() + if p.empty() { + return "", errors.New("mail: no domain in addr-spec") + } + // TODO(dsymonds): Handle domain-literal + domain, err = p.consumeAtom(true, false) + if err != nil { + return "", err + } + + return localPart + "@" + domain, nil +} + +// consumePhrase parses the RFC 5322 phrase at the start of p. +func (p *addrParser) consumePhrase() (phrase string, err error) { + debug.Printf("consumePhrase: [%s]", p.s) + // phrase = 1*word + var words []string + var isPrevEncoded bool + for { + // obs-phrase allows CFWS after one word + if len(words) > 0 { + if !p.skipCFWS() { + return "", errors.New("mail: misformatted parenthetical comment") + } + } + // word = atom / quoted-string + var word string + p.skipSpace() + if p.empty() { + break + } + isEncoded := false + if p.peek() == '"' { + // quoted-string + word, err = p.consumeQuotedString() + } else { + // atom + // We actually parse dot-atom here to be more permissive + // than what RFC 5322 specifies. + word, err = p.consumeAtom(true, true) + if err == nil { + word, isEncoded, err = p.decodeRFC2047Word(word) + } + } + + if err != nil { + break + } + debug.Printf("consumePhrase: consumed %q", word) + if isPrevEncoded && isEncoded { + words[len(words)-1] += word + } else { + words = append(words, word) + } + isPrevEncoded = isEncoded + } + // Ignore any error if we got at least one word. + if err != nil && len(words) == 0 { + debug.Printf("consumePhrase: hit err: %v", err) + return "", fmt.Errorf("mail: missing word in phrase: %v", err) + } + phrase = strings.Join(words, " ") + return phrase, nil +} + +// consumeQuotedString parses the quoted string at the start of p. +func (p *addrParser) consumeQuotedString() (qs string, err error) { + // Assume first byte is '"'. + i := 1 + qsb := make([]rune, 0, 10) + + escaped := false + +Loop: + for { + r, size := utf8.DecodeRuneInString(p.s[i:]) + + switch { + case size == 0: + return "", errors.New("mail: unclosed quoted-string") + + case size == 1 && r == utf8.RuneError: + return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) + + case escaped: + // quoted-pair = ("\" (VCHAR / WSP)) + + if !isVchar(r) && !isWSP(r) { + return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) + } + + qsb = append(qsb, r) + escaped = false + + case isQtext(r) || isWSP(r): + // qtext (printable US-ASCII excluding " and \), or + // FWS (almost; we're ignoring CRLF) + qsb = append(qsb, r) + + case r == '"': + break Loop + + case r == '\\': + escaped = true + + default: + return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) + + } + + i += size + } + p.s = p.s[i+1:] + return string(qsb), nil +} + +// consumeAtom parses an RFC 5322 atom at the start of p. +// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. +// If permissive is true, consumeAtom will not fail on: +// - leading/trailing/double dots in the atom (see golang.org/issue/4938) +func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { + i := 0 + +Loop: + for { + r, size := utf8.DecodeRuneInString(p.s[i:]) + switch { + case size == 1 && r == utf8.RuneError: + return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) + + case size == 0 || !isAtext(r, dot): + break Loop + + default: + i += size + + } + } + + if i == 0 { + return "", errors.New("mail: invalid string") + } + atom, p.s = p.s[:i], p.s[i:] + if !permissive { + if strings.HasPrefix(atom, ".") { + return "", errors.New("mail: leading dot in atom") + } + if strings.Contains(atom, "..") { + return "", errors.New("mail: double dot in atom") + } + if strings.HasSuffix(atom, ".") { + return "", errors.New("mail: trailing dot in atom") + } + } + return atom, nil +} + +func (p *addrParser) consumeDisplayNameComment() (string, error) { + if !p.consume('(') { + return "", errors.New("mail: comment does not start with (") + } + comment, ok := p.consumeComment() + if !ok { + return "", errors.New("mail: misformatted parenthetical comment") + } + + // TODO(stapelberg): parse quoted-string within comment + words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' }) + for idx, word := range words { + decoded, isEncoded, err := p.decodeRFC2047Word(word) + if err != nil { + return "", err + } + if isEncoded { + words[idx] = decoded + } + } + + return strings.Join(words, " "), nil +} + +func (p *addrParser) consume(c byte) bool { + if p.empty() || p.peek() != c { + return false + } + p.s = p.s[1:] + return true +} + +// skipSpace skips the leading space and tab characters. +func (p *addrParser) skipSpace() { + p.s = strings.TrimLeft(p.s, " \t") +} + +func (p *addrParser) peek() byte { + return p.s[0] +} + +func (p *addrParser) empty() bool { + return p.len() == 0 +} + +func (p *addrParser) len() int { + return len(p.s) +} + +// skipCFWS skips CFWS as defined in RFC5322. +func (p *addrParser) skipCFWS() bool { + p.skipSpace() + + for { + if !p.consume('(') { + break + } + + if _, ok := p.consumeComment(); !ok { + return false + } + + p.skipSpace() + } + + return true +} + +func (p *addrParser) consumeComment() (string, bool) { + // '(' already consumed. + depth := 1 + + var comment string + for { + if p.empty() || depth == 0 { + break + } + + if p.peek() == '\\' && p.len() > 1 { + p.s = p.s[1:] + } else if p.peek() == '(' { + depth++ + } else if p.peek() == ')' { + depth-- + } + if depth > 0 { + comment += p.s[:1] + } + p.s = p.s[1:] + } + + return comment, depth == 0 +} + +func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) { + dec := p.dec + if dec == nil { + dec = &rfc2047Decoder + } + + // Substitute our own CharsetReader function so that we can tell + // whether an error from the Decode method was due to the + // CharsetReader (meaning the charset is invalid). + // We used to look for the charsetError type in the error result, + // but that behaves badly with CharsetReaders other than the + // one in rfc2047Decoder. + adec := *dec + charsetReaderError := false + adec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + if dec.CharsetReader == nil { + charsetReaderError = true + return nil, charsetError(charset) + } + r, err := dec.CharsetReader(charset, input) + if err != nil { + charsetReaderError = true + } + return r, err + } + word, err = adec.Decode(s) + if err == nil { + return word, true, nil + } + + // If the error came from the character set reader + // (meaning the character set itself is invalid + // but the decoding worked fine until then), + // return the original text and the error, + // with isEncoded=true. + if charsetReaderError { + return s, true, err + } + + // Ignore invalid RFC 2047 encoded-word errors. + return s, false, nil +} + +var rfc2047Decoder = mime.WordDecoder{ + CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { + return nil, charsetError(charset) + }, +} + +type charsetError string + +func (e charsetError) Error() string { + return fmt.Sprintf("charset not supported: %q", string(e)) +} + +// isAtext reports whether r is an RFC 5322 atext character. +// If dot is true, period is included. +func isAtext(r rune, dot bool) bool { + switch r { + case '.': + return dot + + // RFC 5322 3.2.3. specials + case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials + return false + } + return isVchar(r) +} + +// isQtext reports whether r is an RFC 5322 qtext character. +func isQtext(r rune) bool { + // Printable US-ASCII, excluding backslash or quote. + if r == '\\' || r == '"' { + return false + } + return isVchar(r) +} + +// quoteString renders a string as an RFC 5322 quoted-string. +func quoteString(s string) string { + var b strings.Builder + b.WriteByte('"') + for _, r := range s { + if isQtext(r) || isWSP(r) { + b.WriteRune(r) + } else if isVchar(r) { + b.WriteByte('\\') + b.WriteRune(r) + } + } + b.WriteByte('"') + return b.String() +} + +// isVchar reports whether r is an RFC 5322 VCHAR character. +func isVchar(r rune) bool { + // Visible (printing) characters. + return '!' <= r && r <= '~' || isMultibyte(r) +} + +// isMultibyte reports whether r is a multi-byte UTF-8 character +// as supported by RFC 6532. +func isMultibyte(r rune) bool { + return r >= utf8.RuneSelf +} + +// isWSP reports whether r is a WSP (white space). +// WSP is a space or horizontal tab (RFC 5234 Appendix B). +func isWSP(r rune) bool { + return r == ' ' || r == '\t' +} diff --git a/src/net/mail/message_test.go b/src/net/mail/message_test.go new file mode 100644 index 0000000..1f2f62a --- /dev/null +++ b/src/net/mail/message_test.go @@ -0,0 +1,1219 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mail + +import ( + "bytes" + "io" + "mime" + "reflect" + "strings" + "testing" + "time" +) + +var parseTests = []struct { + in string + header Header + body string +}{ + { + // RFC 5322, Appendix A.1.1 + in: `From: John Doe <jdoe@machine.example> +To: Mary Smith <mary@example.net> +Subject: Saying Hello +Date: Fri, 21 Nov 1997 09:55:06 -0600 +Message-ID: <1234@local.machine.example> + +This is a message just to say hello. +So, "Hello". +`, + header: Header{ + "From": []string{"John Doe <jdoe@machine.example>"}, + "To": []string{"Mary Smith <mary@example.net>"}, + "Subject": []string{"Saying Hello"}, + "Date": []string{"Fri, 21 Nov 1997 09:55:06 -0600"}, + "Message-Id": []string{"<1234@local.machine.example>"}, + }, + body: "This is a message just to say hello.\nSo, \"Hello\".\n", + }, + { + // RFC 5965, Appendix B.1, a part of the multipart message (a header-only sub message) + in: `Feedback-Type: abuse +User-Agent: SomeGenerator/1.0 +Version: 1 +`, + header: Header{ + "Feedback-Type": []string{"abuse"}, + "User-Agent": []string{"SomeGenerator/1.0"}, + "Version": []string{"1"}, + }, + body: "", + }, + { + // RFC 5322 permits any printable ASCII character, + // except colon, in a header key. Issue #58862. + in: `From: iant@golang.org +Custom/Header: v + +Body +`, + header: Header{ + "From": []string{"iant@golang.org"}, + "Custom/Header": []string{"v"}, + }, + body: "Body\n", + }, + { + // RFC 4155 mbox format. We've historically permitted this, + // so we continue to permit it. Issue #60332. + in: `From iant@golang.org Mon Jun 19 00:00:00 2023 +From: iant@golang.org + +Hello, gophers! +`, + header: Header{ + "From": []string{"iant@golang.org"}, + "From iant@golang.org Mon Jun 19 00": []string{"00:00 2023"}, + }, + body: "Hello, gophers!\n", + }, +} + +func TestParsing(t *testing.T) { + for i, test := range parseTests { + msg, err := ReadMessage(bytes.NewBuffer([]byte(test.in))) + if err != nil { + t.Errorf("test #%d: Failed parsing message: %v", i, err) + continue + } + if !headerEq(msg.Header, test.header) { + t.Errorf("test #%d: Incorrectly parsed message header.\nGot:\n%+v\nWant:\n%+v", + i, msg.Header, test.header) + } + body, err := io.ReadAll(msg.Body) + if err != nil { + t.Errorf("test #%d: Failed reading body: %v", i, err) + continue + } + bodyStr := string(body) + if bodyStr != test.body { + t.Errorf("test #%d: Incorrectly parsed message body.\nGot:\n%+v\nWant:\n%+v", + i, bodyStr, test.body) + } + } +} + +func headerEq(a, b Header) bool { + if len(a) != len(b) { + return false + } + for k, as := range a { + bs, ok := b[k] + if !ok { + return false + } + if !reflect.DeepEqual(as, bs) { + return false + } + } + return true +} + +func TestDateParsing(t *testing.T) { + tests := []struct { + dateStr string + exp time.Time + }{ + // RFC 5322, Appendix A.1.1 + { + "Fri, 21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + }, + // RFC 5322, Appendix A.6.2 + // Obsolete date. + { + "21 Nov 97 09:55:06 GMT", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("GMT", 0)), + }, + // Commonly found format not specified by RFC 5322. + { + "Fri, 21 Nov 1997 09:55:06 -0600 (MDT)", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + }, + { + "Thu, 20 Nov 1997 09:55:06 -0600 (MDT)", + time.Date(1997, 11, 20, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + }, + { + "Thu, 20 Nov 1997 09:55:06 GMT (GMT)", + time.Date(1997, 11, 20, 9, 55, 6, 0, time.UTC), + }, + { + "Fri, 21 Nov 1997 09:55:06 +1300 (TOT)", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", +13*60*60)), + }, + } + for _, test := range tests { + hdr := Header{ + "Date": []string{test.dateStr}, + } + date, err := hdr.Date() + if err != nil { + t.Errorf("Header(Date: %s).Date(): %v", test.dateStr, err) + } else if !date.Equal(test.exp) { + t.Errorf("Header(Date: %s).Date() = %+v, want %+v", test.dateStr, date, test.exp) + } + + date, err = ParseDate(test.dateStr) + if err != nil { + t.Errorf("ParseDate(%s): %v", test.dateStr, err) + } else if !date.Equal(test.exp) { + t.Errorf("ParseDate(%s) = %+v, want %+v", test.dateStr, date, test.exp) + } + } +} + +func TestDateParsingCFWS(t *testing.T) { + tests := []struct { + dateStr string + exp time.Time + valid bool + }{ + // FWS-only. No date. + { + " ", + // nil is not allowed + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // FWS is allowed before optional day of week. + { + " Fri, 21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, + }, + { + "21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, + }, + { + "Fri 21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, // missing , + }, + // FWS is allowed before day of month but HTAB fails. + { + "Fri, 21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, + }, + // FWS is allowed before and after year but HTAB fails. + { + "Fri, 21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, + }, + // FWS is allowed before zone but HTAB is not handled. Obsolete timezone is handled. + { + "Fri, 21 Nov 1997 09:55:06 CST", + time.Time{}, + true, + }, + // FWS is allowed after date and a CRLF is already replaced. + { + "Fri, 21 Nov 1997 09:55:06 CST (no leading FWS and a trailing CRLF) \r\n", + time.Time{}, + true, + }, + // CFWS is a reduced set of US-ASCII where space and accentuated are obsolete. No error. + { + "Fri, 21 Nov 1997 09:55:06 -0600 (MDT and non-US-ASCII signs éèç )", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, + }, + // CFWS is allowed after zone including a nested comment. + // Trailing FWS is allowed. + { + "Fri, 21 Nov 1997 09:55:06 -0600 \r\n (thisisa(valid)cfws) \t ", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, + }, + // CRLF is incomplete and misplaced. + { + "Fri, 21 Nov 1997 \r 09:55:06 -0600 \r\n (thisisa(valid)cfws) \t ", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // CRLF is complete but misplaced. No error is returned. + { + "Fri, 21 Nov 199\r\n7 09:55:06 -0600 \r\n (thisisa(valid)cfws) \t ", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + true, // should be false in the strict interpretation of RFC 5322. + }, + // Invalid ASCII in date. + { + "Fri, 21 Nov 1997 ù 09:55:06 -0600 \r\n (thisisa(valid)cfws) \t ", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // CFWS chars () in date. + { + "Fri, 21 Nov () 1997 09:55:06 -0600 \r\n (thisisa(valid)cfws) \t ", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // Timezone is invalid but T is found in comment. + { + "Fri, 21 Nov 1997 09:55:06 -060 \r\n (Thisisa(valid)cfws) \t ", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // Date has no month. + { + "Fri, 21 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // Invalid month : OCT iso Oct + { + "Fri, 21 OCT 1997 09:55:06 CST", + time.Time{}, + false, + }, + // A too short time zone. + { + "Fri, 21 Nov 1997 09:55:06 -060", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // A too short obsolete time zone. + { + "Fri, 21 1997 09:55:06 GT", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + false, + }, + // Ensure that the presence of "T" in the date + // doesn't trip out ParseDate, as per issue 39260. + { + "Tue, 26 May 2020 14:04:40 GMT", + time.Date(2020, 05, 26, 14, 04, 40, 0, time.UTC), + true, + }, + { + "Tue, 26 May 2020 14:04:40 UT", + time.Date(2020, 05, 26, 14, 04, 40, 0, time.UTC), + true, + }, + { + "Thu, 21 May 2020 14:04:40 UT", + time.Date(2020, 05, 21, 14, 04, 40, 0, time.UTC), + true, + }, + { + "Tue, 26 May 2020 14:04:40 XT", + time.Date(2020, 05, 26, 14, 04, 40, 0, time.UTC), + false, + }, + { + "Thu, 21 May 2020 14:04:40 XT", + time.Date(2020, 05, 21, 14, 04, 40, 0, time.UTC), + false, + }, + { + "Thu, 21 May 2020 14:04:40 UTC", + time.Date(2020, 05, 21, 14, 04, 40, 0, time.UTC), + true, + }, + { + "Fri, 21 Nov 1997 09:55:06 GMT (GMT)", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.UTC), + true, + }, + } + for _, test := range tests { + hdr := Header{ + "Date": []string{test.dateStr}, + } + date, err := hdr.Date() + if err != nil && test.valid { + t.Errorf("Header(Date: %s).Date(): %v", test.dateStr, err) + } else if err == nil && test.exp.IsZero() { + // OK. Used when exact result depends on the + // system's local zoneinfo. + } else if err == nil && !date.Equal(test.exp) && test.valid { + t.Errorf("Header(Date: %s).Date() = %+v, want %+v", test.dateStr, date, test.exp) + } else if err == nil && !test.valid { // an invalid expression was tested + t.Errorf("Header(Date: %s).Date() did not return an error but %v", test.dateStr, date) + } + + date, err = ParseDate(test.dateStr) + if err != nil && test.valid { + t.Errorf("ParseDate(%s): %v", test.dateStr, err) + } else if err == nil && test.exp.IsZero() { + // OK. Used when exact result depends on the + // system's local zoneinfo. + } else if err == nil && !test.valid { // an invalid expression was tested + t.Errorf("ParseDate(%s) did not return an error but %v", test.dateStr, date) + } else if err == nil && test.valid && !date.Equal(test.exp) { + t.Errorf("ParseDate(%s) = %+v, want %+v", test.dateStr, date, test.exp) + } + } +} + +func TestAddressParsingError(t *testing.T) { + mustErrTestCases := [...]struct { + text string + wantErrText string + }{ + 0: {"=?iso-8859-2?Q?Bogl=E1rka_Tak=E1cs?= <unknown@gmail.com>", "charset not supported"}, + 1: {"a@gmail.com b@gmail.com", "expected single address"}, + 2: {string([]byte{0xed, 0xa0, 0x80}) + " <micro@example.net>", "invalid utf-8 in address"}, + 3: {"\"" + string([]byte{0xed, 0xa0, 0x80}) + "\" <half-surrogate@example.com>", "invalid utf-8 in quoted-string"}, + 4: {"\"\\" + string([]byte{0x80}) + "\" <escaped-invalid-unicode@example.net>", "invalid utf-8 in quoted-string"}, + 5: {"\"\x00\" <null@example.net>", "bad character in quoted-string"}, + 6: {"\"\\\x00\" <escaped-null@example.net>", "bad character in quoted-string"}, + 7: {"John Doe", "no angle-addr"}, + 8: {`<jdoe#machine.example>`, "missing @ in addr-spec"}, + 9: {`John <middle> Doe <jdoe@machine.example>`, "missing @ in addr-spec"}, + 10: {"cfws@example.com (", "misformatted parenthetical comment"}, + 11: {"empty group: ;", "empty group"}, + 12: {"root group: embed group: null@example.com;", "no angle-addr"}, + 13: {"group not closed: null@example.com", "expected comma"}, + 14: {"group: first@example.com, second@example.com;", "group with multiple addresses"}, + 15: {"john.doe", "missing '@' or angle-addr"}, + 16: {"john.doe@", "missing '@' or angle-addr"}, + 17: {"John Doe@foo.bar", "no angle-addr"}, + 18: {" group: null@example.com; (asd", "misformatted parenthetical comment"}, + 19: {" group: ; (asd", "misformatted parenthetical comment"}, + 20: {`(John) Doe <jdoe@machine.example>`, "missing word in phrase:"}, + } + + for i, tc := range mustErrTestCases { + _, err := ParseAddress(tc.text) + if err == nil || !strings.Contains(err.Error(), tc.wantErrText) { + t.Errorf(`mail.ParseAddress(%q) #%d want %q, got %v`, tc.text, i, tc.wantErrText, err) + } + } + + t.Run("CustomWordDecoder", func(t *testing.T) { + p := &AddressParser{WordDecoder: &mime.WordDecoder{}} + for i, tc := range mustErrTestCases { + _, err := p.Parse(tc.text) + if err == nil || !strings.Contains(err.Error(), tc.wantErrText) { + t.Errorf(`p.Parse(%q) #%d want %q, got %v`, tc.text, i, tc.wantErrText, err) + } + } + }) + +} + +func TestAddressParsing(t *testing.T) { + tests := []struct { + addrsStr string + exp []*Address + }{ + // Bare address + { + `jdoe@machine.example`, + []*Address{{ + Address: "jdoe@machine.example", + }}, + }, + // RFC 5322, Appendix A.1.1 + { + `John Doe <jdoe@machine.example>`, + []*Address{{ + Name: "John Doe", + Address: "jdoe@machine.example", + }}, + }, + // RFC 5322, Appendix A.1.2 + { + `"Joe Q. Public" <john.q.public@example.com>`, + []*Address{{ + Name: "Joe Q. Public", + Address: "john.q.public@example.com", + }}, + }, + // Comment in display name + { + `John (middle) Doe <jdoe@machine.example>`, + []*Address{{ + Name: "John Doe", + Address: "jdoe@machine.example", + }}, + }, + // Display name is quoted string, so comment is not a comment + { + `"John (middle) Doe" <jdoe@machine.example>`, + []*Address{{ + Name: "John (middle) Doe", + Address: "jdoe@machine.example", + }}, + }, + { + `"John <middle> Doe" <jdoe@machine.example>`, + []*Address{{ + Name: "John <middle> Doe", + Address: "jdoe@machine.example", + }}, + }, + { + `Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`, + []*Address{ + { + Name: "Mary Smith", + Address: "mary@x.test", + }, + { + Address: "jdoe@example.org", + }, + { + Name: "Who?", + Address: "one@y.test", + }, + }, + }, + { + `<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`, + []*Address{ + { + Address: "boss@nil.test", + }, + { + Name: `Giant; "Big" Box`, + Address: "sysservices@example.net", + }, + }, + }, + // RFC 5322, Appendix A.6.1 + { + `Joe Q. Public <john.q.public@example.com>`, + []*Address{{ + Name: "Joe Q. Public", + Address: "john.q.public@example.com", + }}, + }, + // RFC 5322, Appendix A.1.3 + { + `group1: groupaddr1@example.com;`, + []*Address{ + { + Name: "", + Address: "groupaddr1@example.com", + }, + }, + }, + { + `empty group: ;`, + []*Address(nil), + }, + { + `A Group:Ed Jones <c@a.test>,joe@where.test,John <jdoe@one.test>;`, + []*Address{ + { + Name: "Ed Jones", + Address: "c@a.test", + }, + { + Name: "", + Address: "joe@where.test", + }, + { + Name: "John", + Address: "jdoe@one.test", + }, + }, + }, + // RFC5322 4.4 obs-addr-list + { + ` , joe@where.test,,John <jdoe@one.test>,`, + []*Address{ + { + Name: "", + Address: "joe@where.test", + }, + { + Name: "John", + Address: "jdoe@one.test", + }, + }, + }, + { + ` , joe@where.test,,John <jdoe@one.test>,,`, + []*Address{ + { + Name: "", + Address: "joe@where.test", + }, + { + Name: "John", + Address: "jdoe@one.test", + }, + }, + }, + { + `Group1: <addr1@example.com>;, Group 2: addr2@example.com;, John <addr3@example.com>`, + []*Address{ + { + Name: "", + Address: "addr1@example.com", + }, + { + Name: "", + Address: "addr2@example.com", + }, + { + Name: "John", + Address: "addr3@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded ISO-8859-1 address. + { + `=?iso-8859-1?q?J=F6rg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded US-ASCII address. Dumb but legal. + { + `=?us-ascii?q?J=6Frg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jorg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded UTF-8 address. + { + `=?utf-8?q?J=C3=B6rg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded UTF-8 address with multiple encoded-words. + { + `=?utf-8?q?J=C3=B6rg?= =?utf-8?q?Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `JörgDoe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047, Section 8. + { + `=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>`, + []*Address{ + { + Name: `André Pirard`, + Address: "PIRARD@vm1.ulg.ac.be", + }, + }, + }, + // Custom example of RFC 2047 "B"-encoded ISO-8859-1 address. + { + `=?ISO-8859-1?B?SvZyZw==?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg`, + Address: "joerg@example.com", + }, + }, + }, + // Custom example of RFC 2047 "B"-encoded UTF-8 address. + { + `=?UTF-8?B?SsO2cmc=?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg`, + Address: "joerg@example.com", + }, + }, + }, + // Custom example with "." in name. For issue 4938 + { + `Asem H. <noreply@example.com>`, + []*Address{ + { + Name: `Asem H.`, + Address: "noreply@example.com", + }, + }, + }, + // RFC 6532 3.2.3, qtext /= UTF8-non-ascii + { + `"Gø Pher" <gopher@example.com>`, + []*Address{ + { + Name: `Gø Pher`, + Address: "gopher@example.com", + }, + }, + }, + // RFC 6532 3.2, atext /= UTF8-non-ascii + { + `µ <micro@example.com>`, + []*Address{ + { + Name: `µ`, + Address: "micro@example.com", + }, + }, + }, + // RFC 6532 3.2.2, local address parts allow UTF-8 + { + `Micro <µ@example.com>`, + []*Address{ + { + Name: `Micro`, + Address: "µ@example.com", + }, + }, + }, + // RFC 6532 3.2.4, domains parts allow UTF-8 + { + `Micro <micro@µ.example.com>`, + []*Address{ + { + Name: `Micro`, + Address: "micro@µ.example.com", + }, + }, + }, + // Issue 14866 + { + `"" <emptystring@example.com>`, + []*Address{ + { + Name: "", + Address: "emptystring@example.com", + }, + }, + }, + // CFWS + { + `<cfws@example.com> (CFWS (cfws)) (another comment)`, + []*Address{ + { + Name: "", + Address: "cfws@example.com", + }, + }, + }, + { + `<cfws@example.com> () (another comment), <cfws2@example.com> (another)`, + []*Address{ + { + Name: "", + Address: "cfws@example.com", + }, + { + Name: "", + Address: "cfws2@example.com", + }, + }, + }, + // Comment as display name + { + `john@example.com (John Doe)`, + []*Address{ + { + Name: "John Doe", + Address: "john@example.com", + }, + }, + }, + // Comment and display name + { + `John Doe <john@example.com> (Joey)`, + []*Address{ + { + Name: "John Doe", + Address: "john@example.com", + }, + }, + }, + // Comment as display name, no space + { + `john@example.com(John Doe)`, + []*Address{ + { + Name: "John Doe", + Address: "john@example.com", + }, + }, + }, + // Comment as display name, Q-encoded + { + `asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?=)`, + []*Address{ + { + Name: "Adam Sjøgren", + Address: "asjo@example.com", + }, + }, + }, + // Comment as display name, Q-encoded and tab-separated + { + `asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?=)`, + []*Address{ + { + Name: "Adam Sjøgren", + Address: "asjo@example.com", + }, + }, + }, + // Nested comment as display name, Q-encoded + { + `asjo@example.com (Adam =?utf-8?Q?Sj=C3=B8gren?= (Debian))`, + []*Address{ + { + Name: "Adam Sjøgren (Debian)", + Address: "asjo@example.com", + }, + }, + }, + // Comment in group display name + { + `group (comment:): a@example.com, b@example.com;`, + []*Address{ + { + Address: "a@example.com", + }, + { + Address: "b@example.com", + }, + }, + }, + { + `x(:"):"@a.example;("@b.example;`, + []*Address{ + { + Address: `@a.example;(@b.example`, + }, + }, + }, + } + for _, test := range tests { + if len(test.exp) == 1 { + addr, err := ParseAddress(test.addrsStr) + if err != nil { + t.Errorf("Failed parsing (single) %q: %v", test.addrsStr, err) + continue + } + if !reflect.DeepEqual([]*Address{addr}, test.exp) { + t.Errorf("Parse (single) of %q: got %+v, want %+v", test.addrsStr, addr, test.exp) + } + } + + addrs, err := ParseAddressList(test.addrsStr) + if err != nil { + t.Errorf("Failed parsing (list) %q: %v", test.addrsStr, err) + continue + } + if !reflect.DeepEqual(addrs, test.exp) { + t.Errorf("Parse (list) of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp) + } + } +} + +func TestAddressParser(t *testing.T) { + tests := []struct { + addrsStr string + exp []*Address + }{ + // Bare address + { + `jdoe@machine.example`, + []*Address{{ + Address: "jdoe@machine.example", + }}, + }, + // RFC 5322, Appendix A.1.1 + { + `John Doe <jdoe@machine.example>`, + []*Address{{ + Name: "John Doe", + Address: "jdoe@machine.example", + }}, + }, + // RFC 5322, Appendix A.1.2 + { + `"Joe Q. Public" <john.q.public@example.com>`, + []*Address{{ + Name: "Joe Q. Public", + Address: "john.q.public@example.com", + }}, + }, + { + `Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`, + []*Address{ + { + Name: "Mary Smith", + Address: "mary@x.test", + }, + { + Address: "jdoe@example.org", + }, + { + Name: "Who?", + Address: "one@y.test", + }, + }, + }, + { + `<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`, + []*Address{ + { + Address: "boss@nil.test", + }, + { + Name: `Giant; "Big" Box`, + Address: "sysservices@example.net", + }, + }, + }, + // RFC 2047 "Q"-encoded ISO-8859-1 address. + { + `=?iso-8859-1?q?J=F6rg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded US-ASCII address. Dumb but legal. + { + `=?us-ascii?q?J=6Frg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jorg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded ISO-8859-15 address. + { + `=?ISO-8859-15?Q?J=F6rg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "B"-encoded windows-1252 address. + { + `=?windows-1252?q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>`, + []*Address{ + { + Name: `André Pirard`, + Address: "PIRARD@vm1.ulg.ac.be", + }, + }, + }, + // Custom example of RFC 2047 "B"-encoded ISO-8859-15 address. + { + `=?ISO-8859-15?B?SvZyZw==?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg`, + Address: "joerg@example.com", + }, + }, + }, + // Custom example of RFC 2047 "B"-encoded UTF-8 address. + { + `=?UTF-8?B?SsO2cmc=?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg`, + Address: "joerg@example.com", + }, + }, + }, + // Custom example with "." in name. For issue 4938 + { + `Asem H. <noreply@example.com>`, + []*Address{ + { + Name: `Asem H.`, + Address: "noreply@example.com", + }, + }, + }, + } + + ap := AddressParser{WordDecoder: &mime.WordDecoder{ + CharsetReader: func(charset string, input io.Reader) (io.Reader, error) { + in, err := io.ReadAll(input) + if err != nil { + return nil, err + } + + switch charset { + case "iso-8859-15": + in = bytes.ReplaceAll(in, []byte("\xf6"), []byte("ö")) + case "windows-1252": + in = bytes.ReplaceAll(in, []byte("\xe9"), []byte("é")) + } + + return bytes.NewReader(in), nil + }, + }} + + for _, test := range tests { + if len(test.exp) == 1 { + addr, err := ap.Parse(test.addrsStr) + if err != nil { + t.Errorf("Failed parsing (single) %q: %v", test.addrsStr, err) + continue + } + if !reflect.DeepEqual([]*Address{addr}, test.exp) { + t.Errorf("Parse (single) of %q: got %+v, want %+v", test.addrsStr, addr, test.exp) + } + } + + addrs, err := ap.ParseList(test.addrsStr) + if err != nil { + t.Errorf("Failed parsing (list) %q: %v", test.addrsStr, err) + continue + } + if !reflect.DeepEqual(addrs, test.exp) { + t.Errorf("Parse (list) of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp) + } + } +} + +func TestAddressString(t *testing.T) { + tests := []struct { + addr *Address + exp string + }{ + { + &Address{Address: "bob@example.com"}, + "<bob@example.com>", + }, + { // quoted local parts: RFC 5322, 3.4.1. and 3.2.4. + &Address{Address: `my@idiot@address@example.com`}, + `<"my@idiot@address"@example.com>`, + }, + { // quoted local parts + &Address{Address: ` @example.com`}, + `<" "@example.com>`, + }, + { + &Address{Name: "Bob", Address: "bob@example.com"}, + `"Bob" <bob@example.com>`, + }, + { + // note the ö (o with an umlaut) + &Address{Name: "Böb", Address: "bob@example.com"}, + `=?utf-8?q?B=C3=B6b?= <bob@example.com>`, + }, + { + &Address{Name: "Bob Jane", Address: "bob@example.com"}, + `"Bob Jane" <bob@example.com>`, + }, + { + &Address{Name: "Böb Jacöb", Address: "bob@example.com"}, + `=?utf-8?q?B=C3=B6b_Jac=C3=B6b?= <bob@example.com>`, + }, + { // https://golang.org/issue/12098 + &Address{Name: "Rob", Address: ""}, + `"Rob" <@>`, + }, + { // https://golang.org/issue/12098 + &Address{Name: "Rob", Address: "@"}, + `"Rob" <@>`, + }, + { + &Address{Name: "Böb, Jacöb", Address: "bob@example.com"}, + `=?utf-8?b?QsO2YiwgSmFjw7Zi?= <bob@example.com>`, + }, + { + &Address{Name: "=??Q?x?=", Address: "hello@world.com"}, + `"=??Q?x?=" <hello@world.com>`, + }, + { + &Address{Name: "=?hello", Address: "hello@world.com"}, + `"=?hello" <hello@world.com>`, + }, + { + &Address{Name: "world?=", Address: "hello@world.com"}, + `"world?=" <hello@world.com>`, + }, + { + // should q-encode even for invalid utf-8. + &Address{Name: string([]byte{0xed, 0xa0, 0x80}), Address: "invalid-utf8@example.net"}, + "=?utf-8?q?=ED=A0=80?= <invalid-utf8@example.net>", + }, + } + for _, test := range tests { + s := test.addr.String() + if s != test.exp { + t.Errorf("Address%+v.String() = %v, want %v", *test.addr, s, test.exp) + continue + } + + // Check round-trip. + if test.addr.Address != "" && test.addr.Address != "@" { + a, err := ParseAddress(test.exp) + if err != nil { + t.Errorf("ParseAddress(%#q): %v", test.exp, err) + continue + } + if a.Name != test.addr.Name || a.Address != test.addr.Address { + t.Errorf("ParseAddress(%#q) = %#v, want %#v", test.exp, a, test.addr) + } + } + } +} + +// Check if all valid addresses can be parsed, formatted and parsed again +func TestAddressParsingAndFormatting(t *testing.T) { + + // Should pass + tests := []string{ + `<Bob@example.com>`, + `<bob.bob@example.com>`, + `<".bob"@example.com>`, + `<" "@example.com>`, + `<some.mail-with-dash@example.com>`, + `<"dot.and space"@example.com>`, + `<"very.unusual.@.unusual.com"@example.com>`, + `<admin@mailserver1>`, + `<postmaster@localhost>`, + "<#!$%&'*+-/=?^_`{}|~@example.org>", + `<"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com>`, // escaped quotes + `<"()<>[]:,;@\\\"!#$%&'*+-/=?^_{}| ~.a"@example.org>`, // escaped backslashes + `<"Abc\\@def"@example.com>`, + `<"Joe\\Blow"@example.com>`, + `<test1/test2=test3@example.com>`, + `<def!xyz%abc@example.com>`, + `<_somename@example.com>`, + `<joe@uk>`, + `<~@example.com>`, + `<"..."@test.com>`, + `<"john..doe"@example.com>`, + `<"john.doe."@example.com>`, + `<".john.doe"@example.com>`, + `<"."@example.com>`, + `<".."@example.com>`, + `<"0:"@0>`, + } + + for _, test := range tests { + addr, err := ParseAddress(test) + if err != nil { + t.Errorf("Couldn't parse address %s: %s", test, err.Error()) + continue + } + str := addr.String() + addr, err = ParseAddress(str) + if err != nil { + t.Errorf("ParseAddr(%q) error: %v", test, err) + continue + } + + if addr.String() != test { + t.Errorf("String() round-trip = %q; want %q", addr, test) + continue + } + + } + + // Should fail + badTests := []string{ + `<Abc.example.com>`, + `<A@b@c@example.com>`, + `<a"b(c)d,e:f;g<h>i[j\k]l@example.com>`, + `<just"not"right@example.com>`, + `<this is"not\allowed@example.com>`, + `<this\ still\"not\\allowed@example.com>`, + `<john..doe@example.com>`, + `<john.doe@example..com>`, + `<john.doe@example..com>`, + `<john.doe.@example.com>`, + `<john.doe.@.example.com>`, + `<.john.doe@example.com>`, + `<@example.com>`, + `<.@example.com>`, + `<test@.>`, + `< @example.com>`, + `<""test""blah""@example.com>`, + `<""@0>`, + } + + for _, test := range badTests { + _, err := ParseAddress(test) + if err == nil { + t.Errorf("Should have failed to parse address: %s", test) + continue + } + + } + +} + +func TestAddressFormattingAndParsing(t *testing.T) { + tests := []*Address{ + {Name: "@lïce", Address: "alice@example.com"}, + {Name: "Böb O'Connor", Address: "bob@example.com"}, + {Name: "???", Address: "bob@example.com"}, + {Name: "Böb ???", Address: "bob@example.com"}, + {Name: "Böb (Jacöb)", Address: "bob@example.com"}, + {Name: "à#$%&'(),.:;<>@[]^`{|}~'", Address: "bob@example.com"}, + // https://golang.org/issue/11292 + {Name: "\"\\\x1f,\"", Address: "0@0"}, + // https://golang.org/issue/12782 + {Name: "naé, mée", Address: "test.mail@gmail.com"}, + } + + for i, test := range tests { + parsed, err := ParseAddress(test.String()) + if err != nil { + t.Errorf("test #%d: ParseAddr(%q) error: %v", i, test.String(), err) + continue + } + if parsed.Name != test.Name { + t.Errorf("test #%d: Parsed name = %q; want %q", i, parsed.Name, test.Name) + } + if parsed.Address != test.Address { + t.Errorf("test #%d: Parsed address = %q; want %q", i, parsed.Address, test.Address) + } + } +} + +func TestEmptyAddress(t *testing.T) { + parsed, err := ParseAddress("") + if parsed != nil || err == nil { + t.Errorf(`ParseAddress("") = %v, %v, want nil, error`, parsed, err) + } + list, err := ParseAddressList("") + if len(list) > 0 || err == nil { + t.Errorf(`ParseAddressList("") = %v, %v, want nil, error`, list, err) + } + list, err = ParseAddressList(",") + if len(list) > 0 || err == nil { + t.Errorf(`ParseAddressList("") = %v, %v, want nil, error`, list, err) + } + list, err = ParseAddressList("a@b c@d") + if len(list) > 0 || err == nil { + t.Errorf(`ParseAddressList("") = %v, %v, want nil, error`, list, err) + } +} |