diff options
Diffstat (limited to 'src/encoding')
87 files changed, 43555 insertions, 0 deletions
diff --git a/src/encoding/ascii85/ascii85.go b/src/encoding/ascii85/ascii85.go new file mode 100644 index 0000000..18bf9f0 --- /dev/null +++ b/src/encoding/ascii85/ascii85.go @@ -0,0 +1,307 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package ascii85 implements the ascii85 data encoding +// as used in the btoa tool and Adobe's PostScript and PDF document formats. +package ascii85 + +import ( + "io" + "strconv" +) + +/* + * Encoder + */ + +// Encode encodes src into at most [MaxEncodedLen](len(src)) +// bytes of dst, returning the actual number of bytes written. +// +// The encoding handles 4-byte chunks, using a special encoding +// for the last fragment, so Encode is not appropriate for use on +// individual blocks of a large data stream. Use [NewEncoder] instead. +// +// Often, ascii85-encoded data is wrapped in <~ and ~> symbols. +// Encode does not add these. +func Encode(dst, src []byte) int { + if len(src) == 0 { + return 0 + } + + n := 0 + for len(src) > 0 { + dst[0] = 0 + dst[1] = 0 + dst[2] = 0 + dst[3] = 0 + dst[4] = 0 + + // Unpack 4 bytes into uint32 to repack into base 85 5-byte. + var v uint32 + switch len(src) { + default: + v |= uint32(src[3]) + fallthrough + case 3: + v |= uint32(src[2]) << 8 + fallthrough + case 2: + v |= uint32(src[1]) << 16 + fallthrough + case 1: + v |= uint32(src[0]) << 24 + } + + // Special case: zero (!!!!!) shortens to z. + if v == 0 && len(src) >= 4 { + dst[0] = 'z' + dst = dst[1:] + src = src[4:] + n++ + continue + } + + // Otherwise, 5 base 85 digits starting at !. + for i := 4; i >= 0; i-- { + dst[i] = '!' + byte(v%85) + v /= 85 + } + + // If src was short, discard the low destination bytes. + m := 5 + if len(src) < 4 { + m -= 4 - len(src) + src = nil + } else { + src = src[4:] + } + dst = dst[m:] + n += m + } + return n +} + +// MaxEncodedLen returns the maximum length of an encoding of n source bytes. +func MaxEncodedLen(n int) int { return (n + 3) / 4 * 5 } + +// NewEncoder returns a new ascii85 stream encoder. Data written to +// the returned writer will be encoded and then written to w. +// Ascii85 encodings operate in 32-bit blocks; when finished +// writing, the caller must Close the returned encoder to flush any +// trailing partial block. +func NewEncoder(w io.Writer) io.WriteCloser { return &encoder{w: w} } + +type encoder struct { + err error + w io.Writer + buf [4]byte // buffered data waiting to be encoded + nbuf int // number of bytes in buf + out [1024]byte // output buffer +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // Leading fringe. + if e.nbuf > 0 { + var i int + for i = 0; i < len(p) && e.nbuf < 4; i++ { + e.buf[e.nbuf] = p[i] + e.nbuf++ + } + n += i + p = p[i:] + if e.nbuf < 4 { + return + } + nout := Encode(e.out[0:], e.buf[0:]) + if _, e.err = e.w.Write(e.out[0:nout]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + + // Large interior chunks. + for len(p) >= 4 { + nn := len(e.out) / 5 * 4 + if nn > len(p) { + nn = len(p) + } + nn -= nn % 4 + if nn > 0 { + nout := Encode(e.out[0:], p[0:nn]) + if _, e.err = e.w.Write(e.out[0:nout]); e.err != nil { + return n, e.err + } + } + n += nn + p = p[nn:] + } + + // Trailing fringe. + copy(e.buf[:], p) + e.nbuf = len(p) + n += len(p) + return +} + +// Close flushes any pending output from the encoder. +// It is an error to call Write after calling Close. +func (e *encoder) Close() error { + // If there's anything left in the buffer, flush it out + if e.err == nil && e.nbuf > 0 { + nout := Encode(e.out[0:], e.buf[0:e.nbuf]) + e.nbuf = 0 + _, e.err = e.w.Write(e.out[0:nout]) + } + return e.err +} + +/* + * Decoder + */ + +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "illegal ascii85 data at input byte " + strconv.FormatInt(int64(e), 10) +} + +// Decode decodes src into dst, returning both the number +// of bytes written to dst and the number consumed from src. +// If src contains invalid ascii85 data, Decode will return the +// number of bytes successfully written and a [CorruptInputError]. +// Decode ignores space and control characters in src. +// Often, ascii85-encoded data is wrapped in <~ and ~> symbols. +// Decode expects these to have been stripped by the caller. +// +// If flush is true, Decode assumes that src represents the +// end of the input stream and processes it completely rather +// than wait for the completion of another 32-bit block. +// +// [NewDecoder] wraps an [io.Reader] interface around Decode. +func Decode(dst, src []byte, flush bool) (ndst, nsrc int, err error) { + var v uint32 + var nb int + for i, b := range src { + if len(dst)-ndst < 4 { + return + } + switch { + case b <= ' ': + continue + case b == 'z' && nb == 0: + nb = 5 + v = 0 + case '!' <= b && b <= 'u': + v = v*85 + uint32(b-'!') + nb++ + default: + return 0, 0, CorruptInputError(i) + } + if nb == 5 { + nsrc = i + 1 + dst[ndst] = byte(v >> 24) + dst[ndst+1] = byte(v >> 16) + dst[ndst+2] = byte(v >> 8) + dst[ndst+3] = byte(v) + ndst += 4 + nb = 0 + v = 0 + } + } + if flush { + nsrc = len(src) + if nb > 0 { + // The number of output bytes in the last fragment + // is the number of leftover input bytes - 1: + // the extra byte provides enough bits to cover + // the inefficiency of the encoding for the block. + if nb == 1 { + return 0, 0, CorruptInputError(len(src)) + } + for i := nb; i < 5; i++ { + // The short encoding truncated the output value. + // We have to assume the worst case values (digit 84) + // in order to ensure that the top bits are correct. + v = v*85 + 84 + } + for i := 0; i < nb-1; i++ { + dst[ndst] = byte(v >> 24) + v <<= 8 + ndst++ + } + } + } + return +} + +// NewDecoder constructs a new ascii85 stream decoder. +func NewDecoder(r io.Reader) io.Reader { return &decoder{r: r} } + +type decoder struct { + err error + readErr error + r io.Reader + buf [1024]byte // leftover input + nbuf int + out []byte // leftover decoded output + outbuf [1024]byte +} + +func (d *decoder) Read(p []byte) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + if d.err != nil { + return 0, d.err + } + + for { + // Copy leftover output from last decode. + if len(d.out) > 0 { + n = copy(p, d.out) + d.out = d.out[n:] + return + } + + // Decode leftover input from last read. + var nn, nsrc, ndst int + if d.nbuf > 0 { + ndst, nsrc, d.err = Decode(d.outbuf[0:], d.buf[0:d.nbuf], d.readErr != nil) + if ndst > 0 { + d.out = d.outbuf[0:ndst] + d.nbuf = copy(d.buf[0:], d.buf[nsrc:d.nbuf]) + continue // copy out and return + } + if ndst == 0 && d.err == nil { + // Special case: input buffer is mostly filled with non-data bytes. + // Filter out such bytes to make room for more input. + off := 0 + for i := 0; i < d.nbuf; i++ { + if d.buf[i] > ' ' { + d.buf[off] = d.buf[i] + off++ + } + } + d.nbuf = off + } + } + + // Out of input, out of decoded output. Check errors. + if d.err != nil { + return 0, d.err + } + if d.readErr != nil { + d.err = d.readErr + return 0, d.err + } + + // Read more data. + nn, d.readErr = d.r.Read(d.buf[d.nbuf:]) + d.nbuf += nn + } +} diff --git a/src/encoding/ascii85/ascii85_test.go b/src/encoding/ascii85/ascii85_test.go new file mode 100644 index 0000000..578829e --- /dev/null +++ b/src/encoding/ascii85/ascii85_test.go @@ -0,0 +1,214 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ascii85 + +import ( + "bytes" + "io" + "strings" + "testing" +) + +type testpair struct { + decoded, encoded string +} + +var bigtest = testpair{ + "Man is distinguished, not only by his reason, but by this singular passion from " + + "other animals, which is a lust of the mind, that by a perseverance of delight in " + + "the continued and indefatigable generation of knowledge, exceeds the short " + + "vehemence of any carnal pleasure.", + "9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,\n" + + "O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF\"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY\n" + + "i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa\n" + + "l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G\n" + + ">uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c\n", +} + +var pairs = []testpair{ + // Encode returns 0 when len(src) is 0 + { + "", + "", + }, + // Wikipedia example + bigtest, + // Special case when shortening !!!!! to z. + { + "\000\000\000\000", + "z", + }, +} + +func testEqual(t *testing.T, msg string, args ...any) bool { + t.Helper() + if args[len(args)-2] != args[len(args)-1] { + t.Errorf(msg, args...) + return false + } + return true +} + +func strip85(s string) string { + t := make([]byte, len(s)) + w := 0 + for r := 0; r < len(s); r++ { + c := s[r] + if c > ' ' { + t[w] = c + w++ + } + } + return string(t[0:w]) +} + +func TestEncode(t *testing.T) { + for _, p := range pairs { + buf := make([]byte, MaxEncodedLen(len(p.decoded))) + n := Encode(buf, []byte(p.decoded)) + buf = buf[0:n] + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, strip85(string(buf)), strip85(p.encoded)) + } +} + +func TestEncoder(t *testing.T) { + for _, p := range pairs { + bb := &strings.Builder{} + encoder := NewEncoder(bb) + encoder.Write([]byte(p.decoded)) + encoder.Close() + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, strip85(bb.String()), strip85(p.encoded)) + } +} + +func TestEncoderBuffering(t *testing.T) { + input := []byte(bigtest.decoded) + for bs := 1; bs <= 12; bs++ { + bb := &strings.Builder{} + encoder := NewEncoder(bb) + for pos := 0; pos < len(input); pos += bs { + end := pos + bs + if end > len(input) { + end = len(input) + } + n, err := encoder.Write(input[pos:end]) + testEqual(t, "Write(%q) gave error %v, want %v", input[pos:end], err, error(nil)) + testEqual(t, "Write(%q) gave length %v, want %v", input[pos:end], n, end-pos) + } + err := encoder.Close() + testEqual(t, "Close gave error %v, want %v", err, error(nil)) + testEqual(t, "Encoding/%d of %q = %q, want %q", bs, bigtest.decoded, strip85(bb.String()), strip85(bigtest.encoded)) + } +} + +func TestDecode(t *testing.T) { + for _, p := range pairs { + dbuf := make([]byte, 4*len(p.encoded)) + ndst, nsrc, err := Decode(dbuf, []byte(p.encoded), true) + testEqual(t, "Decode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "Decode(%q) = nsrc %v, want %v", p.encoded, nsrc, len(p.encoded)) + testEqual(t, "Decode(%q) = ndst %v, want %v", p.encoded, ndst, len(p.decoded)) + testEqual(t, "Decode(%q) = %q, want %q", p.encoded, string(dbuf[0:ndst]), p.decoded) + } +} + +func TestDecoder(t *testing.T) { + for _, p := range pairs { + decoder := NewDecoder(strings.NewReader(p.encoded)) + dbuf, err := io.ReadAll(decoder) + if err != nil { + t.Fatal("Read failed", err) + } + testEqual(t, "Read from %q = length %v, want %v", p.encoded, len(dbuf), len(p.decoded)) + testEqual(t, "Decoding of %q = %q, want %q", p.encoded, string(dbuf), p.decoded) + if err != nil { + testEqual(t, "Read from %q = %v, want %v", p.encoded, err, io.EOF) + } + } +} + +func TestDecoderBuffering(t *testing.T) { + for bs := 1; bs <= 12; bs++ { + decoder := NewDecoder(strings.NewReader(bigtest.encoded)) + buf := make([]byte, len(bigtest.decoded)+12) + var total int + var n int + var err error + for total = 0; total < len(bigtest.decoded) && err == nil; { + n, err = decoder.Read(buf[total : total+bs]) + total += n + } + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", bigtest.encoded, total, n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q", bs, bigtest.encoded, string(buf[0:total]), bigtest.decoded) + } +} + +func TestDecodeCorrupt(t *testing.T) { + type corrupt struct { + e string + p int + } + examples := []corrupt{ + {"v", 0}, + {"!z!!!!!!!!!", 1}, + } + + for _, e := range examples { + dbuf := make([]byte, 4*len(e.e)) + _, _, err := Decode(dbuf, []byte(e.e), true) + switch err := err.(type) { + case CorruptInputError: + testEqual(t, "Corruption in %q at offset %v, want %v", e.e, int(err), e.p) + default: + t.Error("Decoder failed to detect corruption in", e) + } + } +} + +func TestBig(t *testing.T) { + n := 3*1000 + 1 + raw := make([]byte, n) + const alpha = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + for i := 0; i < n; i++ { + raw[i] = alpha[i%len(alpha)] + } + encoded := new(bytes.Buffer) + w := NewEncoder(encoded) + nn, err := w.Write(raw) + if nn != n || err != nil { + t.Fatalf("Encoder.Write(raw) = %d, %v want %d, nil", nn, err, n) + } + err = w.Close() + if err != nil { + t.Fatalf("Encoder.Close() = %v want nil", err) + } + decoded, err := io.ReadAll(NewDecoder(encoded)) + if err != nil { + t.Fatalf("io.ReadAll(NewDecoder(...)): %v", err) + } + + if !bytes.Equal(raw, decoded) { + var i int + for i = 0; i < len(decoded) && i < len(raw); i++ { + if decoded[i] != raw[i] { + break + } + } + t.Errorf("Decode(Encode(%d-byte string)) failed at offset %d", n, i) + } +} + +func TestDecoderInternalWhitespace(t *testing.T) { + s := strings.Repeat(" ", 2048) + "z" + decoded, err := io.ReadAll(NewDecoder(strings.NewReader(s))) + if err != nil { + t.Errorf("Decode gave error %v", err) + } + if want := []byte("\000\000\000\000"); !bytes.Equal(want, decoded) { + t.Errorf("Decode failed: got %v, want %v", decoded, want) + } +} diff --git a/src/encoding/asn1/asn1.go b/src/encoding/asn1/asn1.go new file mode 100644 index 0000000..781ab87 --- /dev/null +++ b/src/encoding/asn1/asn1.go @@ -0,0 +1,1124 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package asn1 implements parsing of DER-encoded ASN.1 data structures, +// as defined in ITU-T Rec X.690. +// +// See also “A Layman's Guide to a Subset of ASN.1, BER, and DER,” +// http://luca.ntop.org/Teaching/Appunti/asn1.html. +package asn1 + +// ASN.1 is a syntax for specifying abstract objects and BER, DER, PER, XER etc +// are different encoding formats for those objects. Here, we'll be dealing +// with DER, the Distinguished Encoding Rules. DER is used in X.509 because +// it's fast to parse and, unlike BER, has a unique encoding for every object. +// When calculating hashes over objects, it's important that the resulting +// bytes be the same at both ends and DER removes this margin of error. +// +// ASN.1 is very complex and this package doesn't attempt to implement +// everything by any means. + +import ( + "errors" + "fmt" + "math" + "math/big" + "reflect" + "strconv" + "strings" + "time" + "unicode/utf16" + "unicode/utf8" +) + +// A StructuralError suggests that the ASN.1 data is valid, but the Go type +// which is receiving it doesn't match. +type StructuralError struct { + Msg string +} + +func (e StructuralError) Error() string { return "asn1: structure error: " + e.Msg } + +// A SyntaxError suggests that the ASN.1 data is invalid. +type SyntaxError struct { + Msg string +} + +func (e SyntaxError) Error() string { return "asn1: syntax error: " + e.Msg } + +// We start by dealing with each of the primitive types in turn. + +// BOOLEAN + +func parseBool(bytes []byte) (ret bool, err error) { + if len(bytes) != 1 { + err = SyntaxError{"invalid boolean"} + return + } + + // DER demands that "If the encoding represents the boolean value TRUE, + // its single contents octet shall have all eight bits set to one." + // Thus only 0 and 255 are valid encoded values. + switch bytes[0] { + case 0: + ret = false + case 0xff: + ret = true + default: + err = SyntaxError{"invalid boolean"} + } + + return +} + +// INTEGER + +// checkInteger returns nil if the given bytes are a valid DER-encoded +// INTEGER and an error otherwise. +func checkInteger(bytes []byte) error { + if len(bytes) == 0 { + return StructuralError{"empty integer"} + } + if len(bytes) == 1 { + return nil + } + if (bytes[0] == 0 && bytes[1]&0x80 == 0) || (bytes[0] == 0xff && bytes[1]&0x80 == 0x80) { + return StructuralError{"integer not minimally-encoded"} + } + return nil +} + +// parseInt64 treats the given bytes as a big-endian, signed integer and +// returns the result. +func parseInt64(bytes []byte) (ret int64, err error) { + err = checkInteger(bytes) + if err != nil { + return + } + if len(bytes) > 8 { + // We'll overflow an int64 in this case. + err = StructuralError{"integer too large"} + return + } + for bytesRead := 0; bytesRead < len(bytes); bytesRead++ { + ret <<= 8 + ret |= int64(bytes[bytesRead]) + } + + // Shift up and down in order to sign extend the result. + ret <<= 64 - uint8(len(bytes))*8 + ret >>= 64 - uint8(len(bytes))*8 + return +} + +// parseInt32 treats the given bytes as a big-endian, signed integer and returns +// the result. +func parseInt32(bytes []byte) (int32, error) { + if err := checkInteger(bytes); err != nil { + return 0, err + } + ret64, err := parseInt64(bytes) + if err != nil { + return 0, err + } + if ret64 != int64(int32(ret64)) { + return 0, StructuralError{"integer too large"} + } + return int32(ret64), nil +} + +var bigOne = big.NewInt(1) + +// parseBigInt treats the given bytes as a big-endian, signed integer and returns +// the result. +func parseBigInt(bytes []byte) (*big.Int, error) { + if err := checkInteger(bytes); err != nil { + return nil, err + } + ret := new(big.Int) + if len(bytes) > 0 && bytes[0]&0x80 == 0x80 { + // This is a negative number. + notBytes := make([]byte, len(bytes)) + for i := range notBytes { + notBytes[i] = ^bytes[i] + } + ret.SetBytes(notBytes) + ret.Add(ret, bigOne) + ret.Neg(ret) + return ret, nil + } + ret.SetBytes(bytes) + return ret, nil +} + +// BIT STRING + +// BitString is the structure to use when you want an ASN.1 BIT STRING type. A +// bit string is padded up to the nearest byte in memory and the number of +// valid bits is recorded. Padding bits will be zero. +type BitString struct { + Bytes []byte // bits packed into bytes. + BitLength int // length in bits. +} + +// At returns the bit at the given index. If the index is out of range it +// returns 0. +func (b BitString) At(i int) int { + if i < 0 || i >= b.BitLength { + return 0 + } + x := i / 8 + y := 7 - uint(i%8) + return int(b.Bytes[x]>>y) & 1 +} + +// RightAlign returns a slice where the padding bits are at the beginning. The +// slice may share memory with the BitString. +func (b BitString) RightAlign() []byte { + shift := uint(8 - (b.BitLength % 8)) + if shift == 8 || len(b.Bytes) == 0 { + return b.Bytes + } + + a := make([]byte, len(b.Bytes)) + a[0] = b.Bytes[0] >> shift + for i := 1; i < len(b.Bytes); i++ { + a[i] = b.Bytes[i-1] << (8 - shift) + a[i] |= b.Bytes[i] >> shift + } + + return a +} + +// parseBitString parses an ASN.1 bit string from the given byte slice and returns it. +func parseBitString(bytes []byte) (ret BitString, err error) { + if len(bytes) == 0 { + err = SyntaxError{"zero length BIT STRING"} + return + } + paddingBits := int(bytes[0]) + if paddingBits > 7 || + len(bytes) == 1 && paddingBits > 0 || + bytes[len(bytes)-1]&((1<<bytes[0])-1) != 0 { + err = SyntaxError{"invalid padding bits in BIT STRING"} + return + } + ret.BitLength = (len(bytes)-1)*8 - paddingBits + ret.Bytes = bytes[1:] + return +} + +// NULL + +// NullRawValue is a [RawValue] with its Tag set to the ASN.1 NULL type tag (5). +var NullRawValue = RawValue{Tag: TagNull} + +// NullBytes contains bytes representing the DER-encoded ASN.1 NULL type. +var NullBytes = []byte{TagNull, 0} + +// OBJECT IDENTIFIER + +// An ObjectIdentifier represents an ASN.1 OBJECT IDENTIFIER. +type ObjectIdentifier []int + +// Equal reports whether oi and other represent the same identifier. +func (oi ObjectIdentifier) Equal(other ObjectIdentifier) bool { + if len(oi) != len(other) { + return false + } + for i := 0; i < len(oi); i++ { + if oi[i] != other[i] { + return false + } + } + + return true +} + +func (oi ObjectIdentifier) String() string { + var s strings.Builder + s.Grow(32) + + buf := make([]byte, 0, 19) + for i, v := range oi { + if i > 0 { + s.WriteByte('.') + } + s.Write(strconv.AppendInt(buf, int64(v), 10)) + } + + return s.String() +} + +// parseObjectIdentifier parses an OBJECT IDENTIFIER from the given bytes and +// returns it. An object identifier is a sequence of variable length integers +// that are assigned in a hierarchy. +func parseObjectIdentifier(bytes []byte) (s ObjectIdentifier, err error) { + if len(bytes) == 0 { + err = SyntaxError{"zero length OBJECT IDENTIFIER"} + return + } + + // In the worst case, we get two elements from the first byte (which is + // encoded differently) and then every varint is a single byte long. + s = make([]int, len(bytes)+1) + + // The first varint is 40*value1 + value2: + // According to this packing, value1 can take the values 0, 1 and 2 only. + // When value1 = 0 or value1 = 1, then value2 is <= 39. When value1 = 2, + // then there are no restrictions on value2. + v, offset, err := parseBase128Int(bytes, 0) + if err != nil { + return + } + if v < 80 { + s[0] = v / 40 + s[1] = v % 40 + } else { + s[0] = 2 + s[1] = v - 80 + } + + i := 2 + for ; offset < len(bytes); i++ { + v, offset, err = parseBase128Int(bytes, offset) + if err != nil { + return + } + s[i] = v + } + s = s[0:i] + return +} + +// ENUMERATED + +// An Enumerated is represented as a plain int. +type Enumerated int + +// FLAG + +// A Flag accepts any data and is set to true if present. +type Flag bool + +// parseBase128Int parses a base-128 encoded int from the given offset in the +// given byte slice. It returns the value and the new offset. +func parseBase128Int(bytes []byte, initOffset int) (ret, offset int, err error) { + offset = initOffset + var ret64 int64 + for shifted := 0; offset < len(bytes); shifted++ { + // 5 * 7 bits per byte == 35 bits of data + // Thus the representation is either non-minimal or too large for an int32 + if shifted == 5 { + err = StructuralError{"base 128 integer too large"} + return + } + ret64 <<= 7 + b := bytes[offset] + // integers should be minimally encoded, so the leading octet should + // never be 0x80 + if shifted == 0 && b == 0x80 { + err = SyntaxError{"integer is not minimally encoded"} + return + } + ret64 |= int64(b & 0x7f) + offset++ + if b&0x80 == 0 { + ret = int(ret64) + // Ensure that the returned value fits in an int on all platforms + if ret64 > math.MaxInt32 { + err = StructuralError{"base 128 integer too large"} + } + return + } + } + err = SyntaxError{"truncated base 128 integer"} + return +} + +// UTCTime + +func parseUTCTime(bytes []byte) (ret time.Time, err error) { + s := string(bytes) + + formatStr := "0601021504Z0700" + ret, err = time.Parse(formatStr, s) + if err != nil { + formatStr = "060102150405Z0700" + ret, err = time.Parse(formatStr, s) + } + if err != nil { + return + } + + if serialized := ret.Format(formatStr); serialized != s { + err = fmt.Errorf("asn1: time did not serialize back to the original value and may be invalid: given %q, but serialized as %q", s, serialized) + return + } + + if ret.Year() >= 2050 { + // UTCTime only encodes times prior to 2050. See https://tools.ietf.org/html/rfc5280#section-4.1.2.5.1 + ret = ret.AddDate(-100, 0, 0) + } + + return +} + +// parseGeneralizedTime parses the GeneralizedTime from the given byte slice +// and returns the resulting time. +func parseGeneralizedTime(bytes []byte) (ret time.Time, err error) { + const formatStr = "20060102150405.999999999Z0700" + s := string(bytes) + + if ret, err = time.Parse(formatStr, s); err != nil { + return + } + + if serialized := ret.Format(formatStr); serialized != s { + err = fmt.Errorf("asn1: time did not serialize back to the original value and may be invalid: given %q, but serialized as %q", s, serialized) + } + + return +} + +// NumericString + +// parseNumericString parses an ASN.1 NumericString from the given byte array +// and returns it. +func parseNumericString(bytes []byte) (ret string, err error) { + for _, b := range bytes { + if !isNumeric(b) { + return "", SyntaxError{"NumericString contains invalid character"} + } + } + return string(bytes), nil +} + +// isNumeric reports whether the given b is in the ASN.1 NumericString set. +func isNumeric(b byte) bool { + return '0' <= b && b <= '9' || + b == ' ' +} + +// PrintableString + +// parsePrintableString parses an ASN.1 PrintableString from the given byte +// array and returns it. +func parsePrintableString(bytes []byte) (ret string, err error) { + for _, b := range bytes { + if !isPrintable(b, allowAsterisk, allowAmpersand) { + err = SyntaxError{"PrintableString contains invalid character"} + return + } + } + ret = string(bytes) + return +} + +type asteriskFlag bool +type ampersandFlag bool + +const ( + allowAsterisk asteriskFlag = true + rejectAsterisk asteriskFlag = false + + allowAmpersand ampersandFlag = true + rejectAmpersand ampersandFlag = false +) + +// isPrintable reports whether the given b is in the ASN.1 PrintableString set. +// If asterisk is allowAsterisk then '*' is also allowed, reflecting existing +// practice. If ampersand is allowAmpersand then '&' is allowed as well. +func isPrintable(b byte, asterisk asteriskFlag, ampersand ampersandFlag) bool { + return 'a' <= b && b <= 'z' || + 'A' <= b && b <= 'Z' || + '0' <= b && b <= '9' || + '\'' <= b && b <= ')' || + '+' <= b && b <= '/' || + b == ' ' || + b == ':' || + b == '=' || + b == '?' || + // This is technically not allowed in a PrintableString. + // However, x509 certificates with wildcard strings don't + // always use the correct string type so we permit it. + (bool(asterisk) && b == '*') || + // This is not technically allowed either. However, not + // only is it relatively common, but there are also a + // handful of CA certificates that contain it. At least + // one of which will not expire until 2027. + (bool(ampersand) && b == '&') +} + +// IA5String + +// parseIA5String parses an ASN.1 IA5String (ASCII string) from the given +// byte slice and returns it. +func parseIA5String(bytes []byte) (ret string, err error) { + for _, b := range bytes { + if b >= utf8.RuneSelf { + err = SyntaxError{"IA5String contains invalid character"} + return + } + } + ret = string(bytes) + return +} + +// T61String + +// parseT61String parses an ASN.1 T61String (8-bit clean string) from the given +// byte slice and returns it. +func parseT61String(bytes []byte) (ret string, err error) { + return string(bytes), nil +} + +// UTF8String + +// parseUTF8String parses an ASN.1 UTF8String (raw UTF-8) from the given byte +// array and returns it. +func parseUTF8String(bytes []byte) (ret string, err error) { + if !utf8.Valid(bytes) { + return "", errors.New("asn1: invalid UTF-8 string") + } + return string(bytes), nil +} + +// BMPString + +// parseBMPString parses an ASN.1 BMPString (Basic Multilingual Plane of +// ISO/IEC/ITU 10646-1) from the given byte slice and returns it. +func parseBMPString(bmpString []byte) (string, error) { + if len(bmpString)%2 != 0 { + return "", errors.New("pkcs12: odd-length BMP string") + } + + // Strip terminator if present. + if l := len(bmpString); l >= 2 && bmpString[l-1] == 0 && bmpString[l-2] == 0 { + bmpString = bmpString[:l-2] + } + + s := make([]uint16, 0, len(bmpString)/2) + for len(bmpString) > 0 { + s = append(s, uint16(bmpString[0])<<8+uint16(bmpString[1])) + bmpString = bmpString[2:] + } + + return string(utf16.Decode(s)), nil +} + +// A RawValue represents an undecoded ASN.1 object. +type RawValue struct { + Class, Tag int + IsCompound bool + Bytes []byte + FullBytes []byte // includes the tag and length +} + +// RawContent is used to signal that the undecoded, DER data needs to be +// preserved for a struct. To use it, the first field of the struct must have +// this type. It's an error for any of the other fields to have this type. +type RawContent []byte + +// Tagging + +// parseTagAndLength parses an ASN.1 tag and length pair from the given offset +// into a byte slice. It returns the parsed data and the new offset. SET and +// SET OF (tag 17) are mapped to SEQUENCE and SEQUENCE OF (tag 16) since we +// don't distinguish between ordered and unordered objects in this code. +func parseTagAndLength(bytes []byte, initOffset int) (ret tagAndLength, offset int, err error) { + offset = initOffset + // parseTagAndLength should not be called without at least a single + // byte to read. Thus this check is for robustness: + if offset >= len(bytes) { + err = errors.New("asn1: internal error in parseTagAndLength") + return + } + b := bytes[offset] + offset++ + ret.class = int(b >> 6) + ret.isCompound = b&0x20 == 0x20 + ret.tag = int(b & 0x1f) + + // If the bottom five bits are set, then the tag number is actually base 128 + // encoded afterwards + if ret.tag == 0x1f { + ret.tag, offset, err = parseBase128Int(bytes, offset) + if err != nil { + return + } + // Tags should be encoded in minimal form. + if ret.tag < 0x1f { + err = SyntaxError{"non-minimal tag"} + return + } + } + if offset >= len(bytes) { + err = SyntaxError{"truncated tag or length"} + return + } + b = bytes[offset] + offset++ + if b&0x80 == 0 { + // The length is encoded in the bottom 7 bits. + ret.length = int(b & 0x7f) + } else { + // Bottom 7 bits give the number of length bytes to follow. + numBytes := int(b & 0x7f) + if numBytes == 0 { + err = SyntaxError{"indefinite length found (not DER)"} + return + } + ret.length = 0 + for i := 0; i < numBytes; i++ { + if offset >= len(bytes) { + err = SyntaxError{"truncated tag or length"} + return + } + b = bytes[offset] + offset++ + if ret.length >= 1<<23 { + // We can't shift ret.length up without + // overflowing. + err = StructuralError{"length too large"} + return + } + ret.length <<= 8 + ret.length |= int(b) + if ret.length == 0 { + // DER requires that lengths be minimal. + err = StructuralError{"superfluous leading zeros in length"} + return + } + } + // Short lengths must be encoded in short form. + if ret.length < 0x80 { + err = StructuralError{"non-minimal length"} + return + } + } + + return +} + +// parseSequenceOf is used for SEQUENCE OF and SET OF values. It tries to parse +// a number of ASN.1 values from the given byte slice and returns them as a +// slice of Go values of the given type. +func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type) (ret reflect.Value, err error) { + matchAny, expectedTag, compoundType, ok := getUniversalType(elemType) + if !ok { + err = StructuralError{"unknown Go type for slice"} + return + } + + // First we iterate over the input and count the number of elements, + // checking that the types are correct in each case. + numElements := 0 + for offset := 0; offset < len(bytes); { + var t tagAndLength + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + switch t.tag { + case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString, TagBMPString: + // We pretend that various other string types are + // PRINTABLE STRINGs so that a sequence of them can be + // parsed into a []string. + t.tag = TagPrintableString + case TagGeneralizedTime, TagUTCTime: + // Likewise, both time types are treated the same. + t.tag = TagUTCTime + } + + if !matchAny && (t.class != ClassUniversal || t.isCompound != compoundType || t.tag != expectedTag) { + err = StructuralError{"sequence tag mismatch"} + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"truncated sequence"} + return + } + offset += t.length + numElements++ + } + ret = reflect.MakeSlice(sliceType, numElements, numElements) + params := fieldParameters{} + offset := 0 + for i := 0; i < numElements; i++ { + offset, err = parseField(ret.Index(i), bytes, offset, params) + if err != nil { + return + } + } + return +} + +var ( + bitStringType = reflect.TypeFor[BitString]() + objectIdentifierType = reflect.TypeFor[ObjectIdentifier]() + enumeratedType = reflect.TypeFor[Enumerated]() + flagType = reflect.TypeFor[Flag]() + timeType = reflect.TypeFor[time.Time]() + rawValueType = reflect.TypeFor[RawValue]() + rawContentsType = reflect.TypeFor[RawContent]() + bigIntType = reflect.TypeFor[*big.Int]() +) + +// invalidLength reports whether offset + length > sliceLength, or if the +// addition would overflow. +func invalidLength(offset, length, sliceLength int) bool { + return offset+length < offset || offset+length > sliceLength +} + +// parseField is the main parsing function. Given a byte slice and an offset +// into the array, it will try to parse a suitable ASN.1 value out and store it +// in the given Value. +func parseField(v reflect.Value, bytes []byte, initOffset int, params fieldParameters) (offset int, err error) { + offset = initOffset + fieldType := v.Type() + + // If we have run out of data, it may be that there are optional elements at the end. + if offset == len(bytes) { + if !setDefaultValue(v, params) { + err = SyntaxError{"sequence truncated"} + } + return + } + + // Deal with the ANY type. + if ifaceType := fieldType; ifaceType.Kind() == reflect.Interface && ifaceType.NumMethod() == 0 { + var t tagAndLength + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"data truncated"} + return + } + var result any + if !t.isCompound && t.class == ClassUniversal { + innerBytes := bytes[offset : offset+t.length] + switch t.tag { + case TagPrintableString: + result, err = parsePrintableString(innerBytes) + case TagNumericString: + result, err = parseNumericString(innerBytes) + case TagIA5String: + result, err = parseIA5String(innerBytes) + case TagT61String: + result, err = parseT61String(innerBytes) + case TagUTF8String: + result, err = parseUTF8String(innerBytes) + case TagInteger: + result, err = parseInt64(innerBytes) + case TagBitString: + result, err = parseBitString(innerBytes) + case TagOID: + result, err = parseObjectIdentifier(innerBytes) + case TagUTCTime: + result, err = parseUTCTime(innerBytes) + case TagGeneralizedTime: + result, err = parseGeneralizedTime(innerBytes) + case TagOctetString: + result = innerBytes + case TagBMPString: + result, err = parseBMPString(innerBytes) + default: + // If we don't know how to handle the type, we just leave Value as nil. + } + } + offset += t.length + if err != nil { + return + } + if result != nil { + v.Set(reflect.ValueOf(result)) + } + return + } + + t, offset, err := parseTagAndLength(bytes, offset) + if err != nil { + return + } + if params.explicit { + expectedClass := ClassContextSpecific + if params.application { + expectedClass = ClassApplication + } + if offset == len(bytes) { + err = StructuralError{"explicit tag has no child"} + return + } + if t.class == expectedClass && t.tag == *params.tag && (t.length == 0 || t.isCompound) { + if fieldType == rawValueType { + // The inner element should not be parsed for RawValues. + } else if t.length > 0 { + t, offset, err = parseTagAndLength(bytes, offset) + if err != nil { + return + } + } else { + if fieldType != flagType { + err = StructuralError{"zero length explicit tag was not an asn1.Flag"} + return + } + v.SetBool(true) + return + } + } else { + // The tags didn't match, it might be an optional element. + ok := setDefaultValue(v, params) + if ok { + offset = initOffset + } else { + err = StructuralError{"explicitly tagged member didn't match"} + } + return + } + } + + matchAny, universalTag, compoundType, ok1 := getUniversalType(fieldType) + if !ok1 { + err = StructuralError{fmt.Sprintf("unknown Go type: %v", fieldType)} + return + } + + // Special case for strings: all the ASN.1 string types map to the Go + // type string. getUniversalType returns the tag for PrintableString + // when it sees a string, so if we see a different string type on the + // wire, we change the universal type to match. + if universalTag == TagPrintableString { + if t.class == ClassUniversal { + switch t.tag { + case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString, TagBMPString: + universalTag = t.tag + } + } else if params.stringType != 0 { + universalTag = params.stringType + } + } + + // Special case for time: UTCTime and GeneralizedTime both map to the + // Go type time.Time. + if universalTag == TagUTCTime && t.tag == TagGeneralizedTime && t.class == ClassUniversal { + universalTag = TagGeneralizedTime + } + + if params.set { + universalTag = TagSet + } + + matchAnyClassAndTag := matchAny + expectedClass := ClassUniversal + expectedTag := universalTag + + if !params.explicit && params.tag != nil { + expectedClass = ClassContextSpecific + expectedTag = *params.tag + matchAnyClassAndTag = false + } + + if !params.explicit && params.application && params.tag != nil { + expectedClass = ClassApplication + expectedTag = *params.tag + matchAnyClassAndTag = false + } + + if !params.explicit && params.private && params.tag != nil { + expectedClass = ClassPrivate + expectedTag = *params.tag + matchAnyClassAndTag = false + } + + // We have unwrapped any explicit tagging at this point. + if !matchAnyClassAndTag && (t.class != expectedClass || t.tag != expectedTag) || + (!matchAny && t.isCompound != compoundType) { + // Tags don't match. Again, it could be an optional element. + ok := setDefaultValue(v, params) + if ok { + offset = initOffset + } else { + err = StructuralError{fmt.Sprintf("tags don't match (%d vs %+v) %+v %s @%d", expectedTag, t, params, fieldType.Name(), offset)} + } + return + } + if invalidLength(offset, t.length, len(bytes)) { + err = SyntaxError{"data truncated"} + return + } + innerBytes := bytes[offset : offset+t.length] + offset += t.length + + // We deal with the structures defined in this package first. + switch v := v.Addr().Interface().(type) { + case *RawValue: + *v = RawValue{t.class, t.tag, t.isCompound, innerBytes, bytes[initOffset:offset]} + return + case *ObjectIdentifier: + *v, err = parseObjectIdentifier(innerBytes) + return + case *BitString: + *v, err = parseBitString(innerBytes) + return + case *time.Time: + if universalTag == TagUTCTime { + *v, err = parseUTCTime(innerBytes) + return + } + *v, err = parseGeneralizedTime(innerBytes) + return + case *Enumerated: + parsedInt, err1 := parseInt32(innerBytes) + if err1 == nil { + *v = Enumerated(parsedInt) + } + err = err1 + return + case *Flag: + *v = true + return + case **big.Int: + parsedInt, err1 := parseBigInt(innerBytes) + if err1 == nil { + *v = parsedInt + } + err = err1 + return + } + switch val := v; val.Kind() { + case reflect.Bool: + parsedBool, err1 := parseBool(innerBytes) + if err1 == nil { + val.SetBool(parsedBool) + } + err = err1 + return + case reflect.Int, reflect.Int32, reflect.Int64: + if val.Type().Size() == 4 { + parsedInt, err1 := parseInt32(innerBytes) + if err1 == nil { + val.SetInt(int64(parsedInt)) + } + err = err1 + } else { + parsedInt, err1 := parseInt64(innerBytes) + if err1 == nil { + val.SetInt(parsedInt) + } + err = err1 + } + return + // TODO(dfc) Add support for the remaining integer types + case reflect.Struct: + structType := fieldType + + for i := 0; i < structType.NumField(); i++ { + if !structType.Field(i).IsExported() { + err = StructuralError{"struct contains unexported fields"} + return + } + } + + if structType.NumField() > 0 && + structType.Field(0).Type == rawContentsType { + bytes := bytes[initOffset:offset] + val.Field(0).Set(reflect.ValueOf(RawContent(bytes))) + } + + innerOffset := 0 + for i := 0; i < structType.NumField(); i++ { + field := structType.Field(i) + if i == 0 && field.Type == rawContentsType { + continue + } + innerOffset, err = parseField(val.Field(i), innerBytes, innerOffset, parseFieldParameters(field.Tag.Get("asn1"))) + if err != nil { + return + } + } + // We allow extra bytes at the end of the SEQUENCE because + // adding elements to the end has been used in X.509 as the + // version numbers have increased. + return + case reflect.Slice: + sliceType := fieldType + if sliceType.Elem().Kind() == reflect.Uint8 { + val.Set(reflect.MakeSlice(sliceType, len(innerBytes), len(innerBytes))) + reflect.Copy(val, reflect.ValueOf(innerBytes)) + return + } + newSlice, err1 := parseSequenceOf(innerBytes, sliceType, sliceType.Elem()) + if err1 == nil { + val.Set(newSlice) + } + err = err1 + return + case reflect.String: + var v string + switch universalTag { + case TagPrintableString: + v, err = parsePrintableString(innerBytes) + case TagNumericString: + v, err = parseNumericString(innerBytes) + case TagIA5String: + v, err = parseIA5String(innerBytes) + case TagT61String: + v, err = parseT61String(innerBytes) + case TagUTF8String: + v, err = parseUTF8String(innerBytes) + case TagGeneralString: + // GeneralString is specified in ISO-2022/ECMA-35, + // A brief review suggests that it includes structures + // that allow the encoding to change midstring and + // such. We give up and pass it as an 8-bit string. + v, err = parseT61String(innerBytes) + case TagBMPString: + v, err = parseBMPString(innerBytes) + + default: + err = SyntaxError{fmt.Sprintf("internal error: unknown string type %d", universalTag)} + } + if err == nil { + val.SetString(v) + } + return + } + err = StructuralError{"unsupported: " + v.Type().String()} + return +} + +// canHaveDefaultValue reports whether k is a Kind that we will set a default +// value for. (A signed integer, essentially.) +func canHaveDefaultValue(k reflect.Kind) bool { + switch k { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return true + } + + return false +} + +// setDefaultValue is used to install a default value, from a tag string, into +// a Value. It is successful if the field was optional, even if a default value +// wasn't provided or it failed to install it into the Value. +func setDefaultValue(v reflect.Value, params fieldParameters) (ok bool) { + if !params.optional { + return + } + ok = true + if params.defaultValue == nil { + return + } + if canHaveDefaultValue(v.Kind()) { + v.SetInt(*params.defaultValue) + } + return +} + +// Unmarshal parses the DER-encoded ASN.1 data structure b +// and uses the reflect package to fill in an arbitrary value pointed at by val. +// Because Unmarshal uses the reflect package, the structs +// being written to must use upper case field names. If val +// is nil or not a pointer, Unmarshal returns an error. +// +// After parsing b, any bytes that were leftover and not used to fill +// val will be returned in rest. When parsing a SEQUENCE into a struct, +// any trailing elements of the SEQUENCE that do not have matching +// fields in val will not be included in rest, as these are considered +// valid elements of the SEQUENCE and not trailing data. +// +// - An ASN.1 INTEGER can be written to an int, int32, int64, +// or *[big.Int]. +// If the encoded value does not fit in the Go type, +// Unmarshal returns a parse error. +// +// - An ASN.1 BIT STRING can be written to a [BitString]. +// +// - An ASN.1 OCTET STRING can be written to a []byte. +// +// - An ASN.1 OBJECT IDENTIFIER can be written to an [ObjectIdentifier]. +// +// - An ASN.1 ENUMERATED can be written to an [Enumerated]. +// +// - An ASN.1 UTCTIME or GENERALIZEDTIME can be written to a [time.Time]. +// +// - An ASN.1 PrintableString, IA5String, or NumericString can be written to a string. +// +// - Any of the above ASN.1 values can be written to an interface{}. +// The value stored in the interface has the corresponding Go type. +// For integers, that type is int64. +// +// - An ASN.1 SEQUENCE OF x or SET OF x can be written +// to a slice if an x can be written to the slice's element type. +// +// - An ASN.1 SEQUENCE or SET can be written to a struct +// if each of the elements in the sequence can be +// written to the corresponding element in the struct. +// +// The following tags on struct fields have special meaning to Unmarshal: +// +// application specifies that an APPLICATION tag is used +// private specifies that a PRIVATE tag is used +// default:x sets the default value for optional integer fields (only used if optional is also present) +// explicit specifies that an additional, explicit tag wraps the implicit one +// optional marks the field as ASN.1 OPTIONAL +// set causes a SET, rather than a SEQUENCE type to be expected +// tag:x specifies the ASN.1 tag number; implies ASN.1 CONTEXT SPECIFIC +// +// When decoding an ASN.1 value with an IMPLICIT tag into a string field, +// Unmarshal will default to a PrintableString, which doesn't support +// characters such as '@' and '&'. To force other encodings, use the following +// tags: +// +// ia5 causes strings to be unmarshaled as ASN.1 IA5String values +// numeric causes strings to be unmarshaled as ASN.1 NumericString values +// utf8 causes strings to be unmarshaled as ASN.1 UTF8String values +// +// If the type of the first field of a structure is RawContent then the raw +// ASN1 contents of the struct will be stored in it. +// +// If the name of a slice type ends with "SET" then it's treated as if +// the "set" tag was set on it. This results in interpreting the type as a +// SET OF x rather than a SEQUENCE OF x. This can be used with nested slices +// where a struct tag cannot be given. +// +// Other ASN.1 types are not supported; if it encounters them, +// Unmarshal returns a parse error. +func Unmarshal(b []byte, val any) (rest []byte, err error) { + return UnmarshalWithParams(b, val, "") +} + +// An invalidUnmarshalError describes an invalid argument passed to Unmarshal. +// (The argument to Unmarshal must be a non-nil pointer.) +type invalidUnmarshalError struct { + Type reflect.Type +} + +func (e *invalidUnmarshalError) Error() string { + if e.Type == nil { + return "asn1: Unmarshal recipient value is nil" + } + + if e.Type.Kind() != reflect.Pointer { + return "asn1: Unmarshal recipient value is non-pointer " + e.Type.String() + } + return "asn1: Unmarshal recipient value is nil " + e.Type.String() +} + +// UnmarshalWithParams allows field parameters to be specified for the +// top-level element. The form of the params is the same as the field tags. +func UnmarshalWithParams(b []byte, val any, params string) (rest []byte, err error) { + v := reflect.ValueOf(val) + if v.Kind() != reflect.Pointer || v.IsNil() { + return nil, &invalidUnmarshalError{reflect.TypeOf(val)} + } + offset, err := parseField(v.Elem(), b, 0, parseFieldParameters(params)) + if err != nil { + return nil, err + } + return b[offset:], nil +} diff --git a/src/encoding/asn1/asn1_test.go b/src/encoding/asn1/asn1_test.go new file mode 100644 index 0000000..9a605e2 --- /dev/null +++ b/src/encoding/asn1/asn1_test.go @@ -0,0 +1,1177 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "bytes" + "encoding/hex" + "fmt" + "math" + "math/big" + "reflect" + "strings" + "testing" + "time" +) + +type boolTest struct { + in []byte + ok bool + out bool +} + +var boolTestData = []boolTest{ + {[]byte{0x00}, true, false}, + {[]byte{0xff}, true, true}, + {[]byte{0x00, 0x00}, false, false}, + {[]byte{0xff, 0xff}, false, false}, + {[]byte{0x01}, false, false}, +} + +func TestParseBool(t *testing.T) { + for i, test := range boolTestData { + ret, err := parseBool(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if test.ok && ret != test.out { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } +} + +type int64Test struct { + in []byte + ok bool + out int64 +} + +var int64TestData = []int64Test{ + {[]byte{0x00}, true, 0}, + {[]byte{0x7f}, true, 127}, + {[]byte{0x00, 0x80}, true, 128}, + {[]byte{0x01, 0x00}, true, 256}, + {[]byte{0x80}, true, -128}, + {[]byte{0xff, 0x7f}, true, -129}, + {[]byte{0xff}, true, -1}, + {[]byte{0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, true, -9223372036854775808}, + {[]byte{0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, false, 0}, + {[]byte{}, false, 0}, + {[]byte{0x00, 0x7f}, false, 0}, + {[]byte{0xff, 0xf0}, false, 0}, +} + +func TestParseInt64(t *testing.T) { + for i, test := range int64TestData { + ret, err := parseInt64(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if test.ok && ret != test.out { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } +} + +type int32Test struct { + in []byte + ok bool + out int32 +} + +var int32TestData = []int32Test{ + {[]byte{0x00}, true, 0}, + {[]byte{0x7f}, true, 127}, + {[]byte{0x00, 0x80}, true, 128}, + {[]byte{0x01, 0x00}, true, 256}, + {[]byte{0x80}, true, -128}, + {[]byte{0xff, 0x7f}, true, -129}, + {[]byte{0xff}, true, -1}, + {[]byte{0x80, 0x00, 0x00, 0x00}, true, -2147483648}, + {[]byte{0x80, 0x00, 0x00, 0x00, 0x00}, false, 0}, + {[]byte{}, false, 0}, + {[]byte{0x00, 0x7f}, false, 0}, + {[]byte{0xff, 0xf0}, false, 0}, +} + +func TestParseInt32(t *testing.T) { + for i, test := range int32TestData { + ret, err := parseInt32(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if test.ok && ret != test.out { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } +} + +var bigIntTests = []struct { + in []byte + ok bool + base10 string +}{ + {[]byte{0xff}, true, "-1"}, + {[]byte{0x00}, true, "0"}, + {[]byte{0x01}, true, "1"}, + {[]byte{0x00, 0xff}, true, "255"}, + {[]byte{0xff, 0x00}, true, "-256"}, + {[]byte{0x01, 0x00}, true, "256"}, + {[]byte{}, false, ""}, + {[]byte{0x00, 0x7f}, false, ""}, + {[]byte{0xff, 0xf0}, false, ""}, +} + +func TestParseBigInt(t *testing.T) { + for i, test := range bigIntTests { + ret, err := parseBigInt(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if test.ok { + if ret.String() != test.base10 { + t.Errorf("#%d: bad result from %x, got %s want %s", i, test.in, ret.String(), test.base10) + } + e, err := makeBigInt(ret) + if err != nil { + t.Errorf("%d: err=%q", i, err) + continue + } + result := make([]byte, e.Len()) + e.Encode(result) + if !bytes.Equal(result, test.in) { + t.Errorf("#%d: got %x from marshaling %s, want %x", i, result, ret, test.in) + } + } + } +} + +type bitStringTest struct { + in []byte + ok bool + out []byte + bitLength int +} + +var bitStringTestData = []bitStringTest{ + {[]byte{}, false, []byte{}, 0}, + {[]byte{0x00}, true, []byte{}, 0}, + {[]byte{0x07, 0x00}, true, []byte{0x00}, 1}, + {[]byte{0x07, 0x01}, false, []byte{}, 0}, + {[]byte{0x07, 0x40}, false, []byte{}, 0}, + {[]byte{0x08, 0x00}, false, []byte{}, 0}, +} + +func TestBitString(t *testing.T) { + for i, test := range bitStringTestData { + ret, err := parseBitString(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if test.bitLength != ret.BitLength || !bytes.Equal(ret.Bytes, test.out) { + t.Errorf("#%d: Bad result: %v (expected %v %v)", i, ret, test.out, test.bitLength) + } + } + } +} + +func TestBitStringAt(t *testing.T) { + bs := BitString{[]byte{0x82, 0x40}, 16} + if bs.At(0) != 1 { + t.Error("#1: Failed") + } + if bs.At(1) != 0 { + t.Error("#2: Failed") + } + if bs.At(6) != 1 { + t.Error("#3: Failed") + } + if bs.At(9) != 1 { + t.Error("#4: Failed") + } + if bs.At(-1) != 0 { + t.Error("#5: Failed") + } + if bs.At(17) != 0 { + t.Error("#6: Failed") + } +} + +type bitStringRightAlignTest struct { + in []byte + inlen int + out []byte +} + +var bitStringRightAlignTests = []bitStringRightAlignTest{ + {[]byte{0x80}, 1, []byte{0x01}}, + {[]byte{0x80, 0x80}, 9, []byte{0x01, 0x01}}, + {[]byte{}, 0, []byte{}}, + {[]byte{0xce}, 8, []byte{0xce}}, + {[]byte{0xce, 0x47}, 16, []byte{0xce, 0x47}}, + {[]byte{0x34, 0x50}, 12, []byte{0x03, 0x45}}, +} + +func TestBitStringRightAlign(t *testing.T) { + for i, test := range bitStringRightAlignTests { + bs := BitString{test.in, test.inlen} + out := bs.RightAlign() + if !bytes.Equal(out, test.out) { + t.Errorf("#%d got: %x want: %x", i, out, test.out) + } + } +} + +type objectIdentifierTest struct { + in []byte + ok bool + out ObjectIdentifier // has base type[]int +} + +var objectIdentifierTestData = []objectIdentifierTest{ + {[]byte{}, false, []int{}}, + {[]byte{85}, true, []int{2, 5}}, + {[]byte{85, 0x02}, true, []int{2, 5, 2}}, + {[]byte{85, 0x02, 0xc0, 0x00}, true, []int{2, 5, 2, 0x2000}}, + {[]byte{0x81, 0x34, 0x03}, true, []int{2, 100, 3}}, + {[]byte{85, 0x02, 0xc0, 0x80, 0x80, 0x80, 0x80}, false, []int{}}, +} + +func TestObjectIdentifier(t *testing.T) { + for i, test := range objectIdentifierTestData { + ret, err := parseObjectIdentifier(test.in) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if !reflect.DeepEqual(test.out, ret) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, ret, test.out) + } + } + } + + if s := ObjectIdentifier([]int{1, 2, 3, 4}).String(); s != "1.2.3.4" { + t.Errorf("bad ObjectIdentifier.String(). Got %s, want 1.2.3.4", s) + } +} + +type timeTest struct { + in string + ok bool + out time.Time +} + +var utcTestData = []timeTest{ + {"910506164540-0700", true, time.Date(1991, 05, 06, 16, 45, 40, 0, time.FixedZone("", -7*60*60))}, + {"910506164540+0730", true, time.Date(1991, 05, 06, 16, 45, 40, 0, time.FixedZone("", 7*60*60+30*60))}, + {"910506234540Z", true, time.Date(1991, 05, 06, 23, 45, 40, 0, time.UTC)}, + {"9105062345Z", true, time.Date(1991, 05, 06, 23, 45, 0, 0, time.UTC)}, + {"5105062345Z", true, time.Date(1951, 05, 06, 23, 45, 0, 0, time.UTC)}, + {"a10506234540Z", false, time.Time{}}, + {"91a506234540Z", false, time.Time{}}, + {"9105a6234540Z", false, time.Time{}}, + {"910506a34540Z", false, time.Time{}}, + {"910506334a40Z", false, time.Time{}}, + {"91050633444aZ", false, time.Time{}}, + {"910506334461Z", false, time.Time{}}, + {"910506334400Za", false, time.Time{}}, + /* These are invalid times. However, the time package normalises times + * and they were accepted in some versions. See #11134. */ + {"000100000000Z", false, time.Time{}}, + {"101302030405Z", false, time.Time{}}, + {"100002030405Z", false, time.Time{}}, + {"100100030405Z", false, time.Time{}}, + {"100132030405Z", false, time.Time{}}, + {"100231030405Z", false, time.Time{}}, + {"100102240405Z", false, time.Time{}}, + {"100102036005Z", false, time.Time{}}, + {"100102030460Z", false, time.Time{}}, + {"-100102030410Z", false, time.Time{}}, + {"10-0102030410Z", false, time.Time{}}, + {"10-0002030410Z", false, time.Time{}}, + {"1001-02030410Z", false, time.Time{}}, + {"100102-030410Z", false, time.Time{}}, + {"10010203-0410Z", false, time.Time{}}, + {"1001020304-10Z", false, time.Time{}}, +} + +func TestUTCTime(t *testing.T) { + for i, test := range utcTestData { + ret, err := parseUTCTime([]byte(test.in)) + if err != nil { + if test.ok { + t.Errorf("#%d: parseUTCTime(%q) = error %v", i, test.in, err) + } + continue + } + if !test.ok { + t.Errorf("#%d: parseUTCTime(%q) succeeded, should have failed", i, test.in) + continue + } + const format = "Jan _2 15:04:05 -0700 2006" // ignore zone name, just offset + have := ret.Format(format) + want := test.out.Format(format) + if have != want { + t.Errorf("#%d: parseUTCTime(%q) = %s, want %s", i, test.in, have, want) + } + } +} + +var generalizedTimeTestData = []timeTest{ + {"20100102030405Z", true, time.Date(2010, 01, 02, 03, 04, 05, 0, time.UTC)}, + {"20100102030405", false, time.Time{}}, + {"20100102030405.123456Z", true, time.Date(2010, 01, 02, 03, 04, 05, 123456e3, time.UTC)}, + {"20100102030405.123456", false, time.Time{}}, + {"20100102030405.Z", false, time.Time{}}, + {"20100102030405.", false, time.Time{}}, + {"20100102030405+0607", true, time.Date(2010, 01, 02, 03, 04, 05, 0, time.FixedZone("", 6*60*60+7*60))}, + {"20100102030405-0607", true, time.Date(2010, 01, 02, 03, 04, 05, 0, time.FixedZone("", -6*60*60-7*60))}, + /* These are invalid times. However, the time package normalises times + * and they were accepted in some versions. See #11134. */ + {"00000100000000Z", false, time.Time{}}, + {"20101302030405Z", false, time.Time{}}, + {"20100002030405Z", false, time.Time{}}, + {"20100100030405Z", false, time.Time{}}, + {"20100132030405Z", false, time.Time{}}, + {"20100231030405Z", false, time.Time{}}, + {"20100102240405Z", false, time.Time{}}, + {"20100102036005Z", false, time.Time{}}, + {"20100102030460Z", false, time.Time{}}, + {"-20100102030410Z", false, time.Time{}}, + {"2010-0102030410Z", false, time.Time{}}, + {"2010-0002030410Z", false, time.Time{}}, + {"201001-02030410Z", false, time.Time{}}, + {"20100102-030410Z", false, time.Time{}}, + {"2010010203-0410Z", false, time.Time{}}, + {"201001020304-10Z", false, time.Time{}}, +} + +func TestGeneralizedTime(t *testing.T) { + for i, test := range generalizedTimeTestData { + ret, err := parseGeneralizedTime([]byte(test.in)) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil { + if !reflect.DeepEqual(test.out, ret) { + t.Errorf("#%d: Bad result: %q → %v (expected %v)", i, test.in, ret, test.out) + } + } + } +} + +type tagAndLengthTest struct { + in []byte + ok bool + out tagAndLength +} + +var tagAndLengthData = []tagAndLengthTest{ + {[]byte{0x80, 0x01}, true, tagAndLength{2, 0, 1, false}}, + {[]byte{0xa0, 0x01}, true, tagAndLength{2, 0, 1, true}}, + {[]byte{0x02, 0x00}, true, tagAndLength{0, 2, 0, false}}, + {[]byte{0xfe, 0x00}, true, tagAndLength{3, 30, 0, true}}, + {[]byte{0x1f, 0x1f, 0x00}, true, tagAndLength{0, 31, 0, false}}, + {[]byte{0x1f, 0x81, 0x00, 0x00}, true, tagAndLength{0, 128, 0, false}}, + {[]byte{0x1f, 0x81, 0x80, 0x01, 0x00}, true, tagAndLength{0, 0x4001, 0, false}}, + {[]byte{0x00, 0x81, 0x80}, true, tagAndLength{0, 0, 128, false}}, + {[]byte{0x00, 0x82, 0x01, 0x00}, true, tagAndLength{0, 0, 256, false}}, + {[]byte{0x00, 0x83, 0x01, 0x00}, false, tagAndLength{}}, + {[]byte{0x1f, 0x85}, false, tagAndLength{}}, + {[]byte{0x30, 0x80}, false, tagAndLength{}}, + // Superfluous zeros in the length should be an error. + {[]byte{0xa0, 0x82, 0x00, 0xff}, false, tagAndLength{}}, + // Lengths up to the maximum size of an int should work. + {[]byte{0xa0, 0x84, 0x7f, 0xff, 0xff, 0xff}, true, tagAndLength{2, 0, 0x7fffffff, true}}, + // Lengths that would overflow an int should be rejected. + {[]byte{0xa0, 0x84, 0x80, 0x00, 0x00, 0x00}, false, tagAndLength{}}, + // Long length form may not be used for lengths that fit in short form. + {[]byte{0xa0, 0x81, 0x7f}, false, tagAndLength{}}, + // Tag numbers which would overflow int32 are rejected. (The value below is 2^31.) + {[]byte{0x1f, 0x88, 0x80, 0x80, 0x80, 0x00, 0x00}, false, tagAndLength{}}, + // Tag numbers that fit in an int32 are valid. (The value below is 2^31 - 1.) + {[]byte{0x1f, 0x87, 0xFF, 0xFF, 0xFF, 0x7F, 0x00}, true, tagAndLength{tag: math.MaxInt32}}, + // Long tag number form may not be used for tags that fit in short form. + {[]byte{0x1f, 0x1e, 0x00}, false, tagAndLength{}}, +} + +func TestParseTagAndLength(t *testing.T) { + for i, test := range tagAndLengthData { + tagAndLength, _, err := parseTagAndLength(test.in, 0) + if (err == nil) != test.ok { + t.Errorf("#%d: Incorrect error result (did pass? %v, expected: %v)", i, err == nil, test.ok) + } + if err == nil && !reflect.DeepEqual(test.out, tagAndLength) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, tagAndLength, test.out) + } + } +} + +type parseFieldParametersTest struct { + in string + out fieldParameters +} + +func newInt(n int) *int { return &n } + +func newInt64(n int64) *int64 { return &n } + +func newString(s string) *string { return &s } + +func newBool(b bool) *bool { return &b } + +var parseFieldParametersTestData []parseFieldParametersTest = []parseFieldParametersTest{ + {"", fieldParameters{}}, + {"ia5", fieldParameters{stringType: TagIA5String}}, + {"generalized", fieldParameters{timeType: TagGeneralizedTime}}, + {"utc", fieldParameters{timeType: TagUTCTime}}, + {"printable", fieldParameters{stringType: TagPrintableString}}, + {"numeric", fieldParameters{stringType: TagNumericString}}, + {"optional", fieldParameters{optional: true}}, + {"explicit", fieldParameters{explicit: true, tag: new(int)}}, + {"application", fieldParameters{application: true, tag: new(int)}}, + {"private", fieldParameters{private: true, tag: new(int)}}, + {"optional,explicit", fieldParameters{optional: true, explicit: true, tag: new(int)}}, + {"default:42", fieldParameters{defaultValue: newInt64(42)}}, + {"tag:17", fieldParameters{tag: newInt(17)}}, + {"optional,explicit,default:42,tag:17", fieldParameters{optional: true, explicit: true, defaultValue: newInt64(42), tag: newInt(17)}}, + {"optional,explicit,default:42,tag:17,rubbish1", fieldParameters{optional: true, explicit: true, application: false, defaultValue: newInt64(42), tag: newInt(17), stringType: 0, timeType: 0, set: false, omitEmpty: false}}, + {"set", fieldParameters{set: true}}, +} + +func TestParseFieldParameters(t *testing.T) { + for i, test := range parseFieldParametersTestData { + f := parseFieldParameters(test.in) + if !reflect.DeepEqual(f, test.out) { + t.Errorf("#%d: Bad result: %v (expected %v)", i, f, test.out) + } + } +} + +type TestObjectIdentifierStruct struct { + OID ObjectIdentifier +} + +type TestContextSpecificTags struct { + A int `asn1:"tag:1"` +} + +type TestContextSpecificTags2 struct { + A int `asn1:"explicit,tag:1"` + B int +} + +type TestContextSpecificTags3 struct { + S string `asn1:"tag:1,utf8"` +} + +type TestElementsAfterString struct { + S string + A, B int +} + +type TestBigInt struct { + X *big.Int +} + +type TestSet struct { + Ints []int `asn1:"set"` +} + +var unmarshalTestData = []struct { + in []byte + out any +}{ + {[]byte{0x02, 0x01, 0x42}, newInt(0x42)}, + {[]byte{0x05, 0x00}, &RawValue{0, 5, false, []byte{}, []byte{0x05, 0x00}}}, + {[]byte{0x30, 0x08, 0x06, 0x06, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d}, &TestObjectIdentifierStruct{[]int{1, 2, 840, 113549}}}, + {[]byte{0x03, 0x04, 0x06, 0x6e, 0x5d, 0xc0}, &BitString{[]byte{110, 93, 192}, 18}}, + {[]byte{0x30, 0x09, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03}, &[]int{1, 2, 3}}, + {[]byte{0x02, 0x01, 0x10}, newInt(16)}, + {[]byte{0x13, 0x04, 't', 'e', 's', 't'}, newString("test")}, + {[]byte{0x16, 0x04, 't', 'e', 's', 't'}, newString("test")}, + // Ampersand is allowed in PrintableString due to mistakes by major CAs. + {[]byte{0x13, 0x05, 't', 'e', 's', 't', '&'}, newString("test&")}, + {[]byte{0x16, 0x04, 't', 'e', 's', 't'}, &RawValue{0, 22, false, []byte("test"), []byte("\x16\x04test")}}, + {[]byte{0x04, 0x04, 1, 2, 3, 4}, &RawValue{0, 4, false, []byte{1, 2, 3, 4}, []byte{4, 4, 1, 2, 3, 4}}}, + {[]byte{0x30, 0x03, 0x81, 0x01, 0x01}, &TestContextSpecificTags{1}}, + {[]byte{0x30, 0x08, 0xa1, 0x03, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02}, &TestContextSpecificTags2{1, 2}}, + {[]byte{0x30, 0x03, 0x81, 0x01, '@'}, &TestContextSpecificTags3{"@"}}, + {[]byte{0x01, 0x01, 0x00}, newBool(false)}, + {[]byte{0x01, 0x01, 0xff}, newBool(true)}, + {[]byte{0x30, 0x0b, 0x13, 0x03, 0x66, 0x6f, 0x6f, 0x02, 0x01, 0x22, 0x02, 0x01, 0x33}, &TestElementsAfterString{"foo", 0x22, 0x33}}, + {[]byte{0x30, 0x05, 0x02, 0x03, 0x12, 0x34, 0x56}, &TestBigInt{big.NewInt(0x123456)}}, + {[]byte{0x30, 0x0b, 0x31, 0x09, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03}, &TestSet{Ints: []int{1, 2, 3}}}, + {[]byte{0x12, 0x0b, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' '}, newString("0123456789 ")}, +} + +func TestUnmarshal(t *testing.T) { + for i, test := range unmarshalTestData { + pv := reflect.New(reflect.TypeOf(test.out).Elem()) + val := pv.Interface() + _, err := Unmarshal(test.in, val) + if err != nil { + t.Errorf("Unmarshal failed at index %d %v", i, err) + } + if !reflect.DeepEqual(val, test.out) { + t.Errorf("#%d:\nhave %#v\nwant %#v", i, val, test.out) + } + } +} + +func TestUnmarshalWithNilOrNonPointer(t *testing.T) { + tests := []struct { + b []byte + v any + want string + }{ + {b: []byte{0x05, 0x00}, v: nil, want: "asn1: Unmarshal recipient value is nil"}, + {b: []byte{0x05, 0x00}, v: RawValue{}, want: "asn1: Unmarshal recipient value is non-pointer asn1.RawValue"}, + {b: []byte{0x05, 0x00}, v: (*RawValue)(nil), want: "asn1: Unmarshal recipient value is nil *asn1.RawValue"}, + } + + for _, test := range tests { + _, err := Unmarshal(test.b, test.v) + if err == nil { + t.Errorf("Unmarshal expecting error, got nil") + continue + } + if g, w := err.Error(), test.want; g != w { + t.Errorf("InvalidUnmarshalError mismatch\nGot: %q\nWant: %q", g, w) + } + } +} + +type Certificate struct { + TBSCertificate TBSCertificate + SignatureAlgorithm AlgorithmIdentifier + SignatureValue BitString +} + +type TBSCertificate struct { + Version int `asn1:"optional,explicit,default:0,tag:0"` + SerialNumber RawValue + SignatureAlgorithm AlgorithmIdentifier + Issuer RDNSequence + Validity Validity + Subject RDNSequence + PublicKey PublicKeyInfo +} + +type AlgorithmIdentifier struct { + Algorithm ObjectIdentifier +} + +type RDNSequence []RelativeDistinguishedNameSET + +type RelativeDistinguishedNameSET []AttributeTypeAndValue + +type AttributeTypeAndValue struct { + Type ObjectIdentifier + Value any +} + +type Validity struct { + NotBefore, NotAfter time.Time +} + +type PublicKeyInfo struct { + Algorithm AlgorithmIdentifier + PublicKey BitString +} + +func TestCertificate(t *testing.T) { + // This is a minimal, self-signed certificate that should parse correctly. + var cert Certificate + if _, err := Unmarshal(derEncodedSelfSignedCertBytes, &cert); err != nil { + t.Errorf("Unmarshal failed: %v", err) + } + if !reflect.DeepEqual(cert, derEncodedSelfSignedCert) { + t.Errorf("Bad result:\ngot: %+v\nwant: %+v", cert, derEncodedSelfSignedCert) + } +} + +func TestCertificateWithNUL(t *testing.T) { + // This is the paypal NUL-hack certificate. It should fail to parse because + // NUL isn't a permitted character in a PrintableString. + + var cert Certificate + if _, err := Unmarshal(derEncodedPaypalNULCertBytes, &cert); err == nil { + t.Error("Unmarshal succeeded, should not have") + } +} + +type rawStructTest struct { + Raw RawContent + A int +} + +func TestRawStructs(t *testing.T) { + var s rawStructTest + input := []byte{0x30, 0x03, 0x02, 0x01, 0x50} + + rest, err := Unmarshal(input, &s) + if len(rest) != 0 { + t.Errorf("incomplete parse: %x", rest) + return + } + if err != nil { + t.Error(err) + return + } + if s.A != 0x50 { + t.Errorf("bad value for A: got %d want %d", s.A, 0x50) + } + if !bytes.Equal([]byte(s.Raw), input) { + t.Errorf("bad value for Raw: got %x want %x", s.Raw, input) + } +} + +type oiEqualTest struct { + first ObjectIdentifier + second ObjectIdentifier + same bool +} + +var oiEqualTests = []oiEqualTest{ + { + ObjectIdentifier{1, 2, 3}, + ObjectIdentifier{1, 2, 3}, + true, + }, + { + ObjectIdentifier{1}, + ObjectIdentifier{1, 2, 3}, + false, + }, + { + ObjectIdentifier{1, 2, 3}, + ObjectIdentifier{10, 11, 12}, + false, + }, +} + +func TestObjectIdentifierEqual(t *testing.T) { + for _, o := range oiEqualTests { + if s := o.first.Equal(o.second); s != o.same { + t.Errorf("ObjectIdentifier.Equal: got: %t want: %t", s, o.same) + } + } +} + +var derEncodedSelfSignedCert = Certificate{ + TBSCertificate: TBSCertificate{ + Version: 0, + SerialNumber: RawValue{Class: 0, Tag: 2, IsCompound: false, Bytes: []uint8{0x0, 0x8c, 0xc3, 0x37, 0x92, 0x10, 0xec, 0x2c, 0x98}, FullBytes: []byte{2, 9, 0x0, 0x8c, 0xc3, 0x37, 0x92, 0x10, 0xec, 0x2c, 0x98}}, + SignatureAlgorithm: AlgorithmIdentifier{Algorithm: ObjectIdentifier{1, 2, 840, 113549, 1, 1, 5}}, + Issuer: RDNSequence{ + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 6}, Value: "XX"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 8}, Value: "Some-State"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 7}, Value: "City"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 10}, Value: "Internet Widgits Pty Ltd"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 3}, Value: "false.example.com"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{1, 2, 840, 113549, 1, 9, 1}, Value: "false@example.com"}}, + }, + Validity: Validity{ + NotBefore: time.Date(2009, 10, 8, 00, 25, 53, 0, time.UTC), + NotAfter: time.Date(2010, 10, 8, 00, 25, 53, 0, time.UTC), + }, + Subject: RDNSequence{ + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 6}, Value: "XX"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 8}, Value: "Some-State"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 7}, Value: "City"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 10}, Value: "Internet Widgits Pty Ltd"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{2, 5, 4, 3}, Value: "false.example.com"}}, + RelativeDistinguishedNameSET{AttributeTypeAndValue{Type: ObjectIdentifier{1, 2, 840, 113549, 1, 9, 1}, Value: "false@example.com"}}, + }, + PublicKey: PublicKeyInfo{ + Algorithm: AlgorithmIdentifier{Algorithm: ObjectIdentifier{1, 2, 840, 113549, 1, 1, 1}}, + PublicKey: BitString{ + Bytes: []uint8{ + 0x30, 0x48, 0x2, 0x41, 0x0, 0xcd, 0xb7, + 0x63, 0x9c, 0x32, 0x78, 0xf0, 0x6, 0xaa, 0x27, 0x7f, 0x6e, 0xaf, 0x42, + 0x90, 0x2b, 0x59, 0x2d, 0x8c, 0xbc, 0xbe, 0x38, 0xa1, 0xc9, 0x2b, 0xa4, + 0x69, 0x5a, 0x33, 0x1b, 0x1d, 0xea, 0xde, 0xad, 0xd8, 0xe9, 0xa5, 0xc2, + 0x7e, 0x8c, 0x4c, 0x2f, 0xd0, 0xa8, 0x88, 0x96, 0x57, 0x72, 0x2a, 0x4f, + 0x2a, 0xf7, 0x58, 0x9c, 0xf2, 0xc7, 0x70, 0x45, 0xdc, 0x8f, 0xde, 0xec, + 0x35, 0x7d, 0x2, 0x3, 0x1, 0x0, 0x1, + }, + BitLength: 592, + }, + }, + }, + SignatureAlgorithm: AlgorithmIdentifier{Algorithm: ObjectIdentifier{1, 2, 840, 113549, 1, 1, 5}}, + SignatureValue: BitString{ + Bytes: []uint8{ + 0xa6, 0x7b, 0x6, 0xec, 0x5e, 0xce, + 0x92, 0x77, 0x2c, 0xa4, 0x13, 0xcb, 0xa3, 0xca, 0x12, 0x56, 0x8f, 0xdc, 0x6c, + 0x7b, 0x45, 0x11, 0xcd, 0x40, 0xa7, 0xf6, 0x59, 0x98, 0x4, 0x2, 0xdf, 0x2b, + 0x99, 0x8b, 0xb9, 0xa4, 0xa8, 0xcb, 0xeb, 0x34, 0xc0, 0xf0, 0xa7, 0x8c, 0xf8, + 0xd9, 0x1e, 0xde, 0x14, 0xa5, 0xed, 0x76, 0xbf, 0x11, 0x6f, 0xe3, 0x60, 0xaa, + 0xfa, 0x88, 0x21, 0x49, 0x4, 0x35, + }, + BitLength: 512, + }, +} + +var derEncodedSelfSignedCertBytes = []byte{ + 0x30, 0x82, 0x02, 0x18, 0x30, + 0x82, 0x01, 0xc2, 0x02, 0x09, 0x00, 0x8c, 0xc3, 0x37, 0x92, 0x10, 0xec, 0x2c, + 0x98, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, + 0x05, 0x05, 0x00, 0x30, 0x81, 0x92, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, + 0x04, 0x06, 0x13, 0x02, 0x58, 0x58, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, + 0x04, 0x08, 0x13, 0x0a, 0x53, 0x6f, 0x6d, 0x65, 0x2d, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x04, 0x43, + 0x69, 0x74, 0x79, 0x31, 0x21, 0x30, 0x1f, 0x06, 0x03, 0x55, 0x04, 0x0a, 0x13, + 0x18, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, 0x20, 0x57, 0x69, 0x64, + 0x67, 0x69, 0x74, 0x73, 0x20, 0x50, 0x74, 0x79, 0x20, 0x4c, 0x74, 0x64, 0x31, + 0x1a, 0x30, 0x18, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, 0x11, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x2e, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, + 0x6d, 0x31, 0x20, 0x30, 0x1e, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, + 0x01, 0x09, 0x01, 0x16, 0x11, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x40, 0x65, 0x78, + 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x1e, 0x17, 0x0d, + 0x30, 0x39, 0x31, 0x30, 0x30, 0x38, 0x30, 0x30, 0x32, 0x35, 0x35, 0x33, 0x5a, + 0x17, 0x0d, 0x31, 0x30, 0x31, 0x30, 0x30, 0x38, 0x30, 0x30, 0x32, 0x35, 0x35, + 0x33, 0x5a, 0x30, 0x81, 0x92, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, + 0x06, 0x13, 0x02, 0x58, 0x58, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, + 0x08, 0x13, 0x0a, 0x53, 0x6f, 0x6d, 0x65, 0x2d, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x04, 0x43, 0x69, + 0x74, 0x79, 0x31, 0x21, 0x30, 0x1f, 0x06, 0x03, 0x55, 0x04, 0x0a, 0x13, 0x18, + 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, 0x20, 0x57, 0x69, 0x64, 0x67, + 0x69, 0x74, 0x73, 0x20, 0x50, 0x74, 0x79, 0x20, 0x4c, 0x74, 0x64, 0x31, 0x1a, + 0x30, 0x18, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, 0x11, 0x66, 0x61, 0x6c, 0x73, + 0x65, 0x2e, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, + 0x31, 0x20, 0x30, 0x1e, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, + 0x09, 0x01, 0x16, 0x11, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x40, 0x65, 0x78, 0x61, + 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x5c, 0x30, 0x0d, 0x06, + 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00, 0x03, + 0x4b, 0x00, 0x30, 0x48, 0x02, 0x41, 0x00, 0xcd, 0xb7, 0x63, 0x9c, 0x32, 0x78, + 0xf0, 0x06, 0xaa, 0x27, 0x7f, 0x6e, 0xaf, 0x42, 0x90, 0x2b, 0x59, 0x2d, 0x8c, + 0xbc, 0xbe, 0x38, 0xa1, 0xc9, 0x2b, 0xa4, 0x69, 0x5a, 0x33, 0x1b, 0x1d, 0xea, + 0xde, 0xad, 0xd8, 0xe9, 0xa5, 0xc2, 0x7e, 0x8c, 0x4c, 0x2f, 0xd0, 0xa8, 0x88, + 0x96, 0x57, 0x72, 0x2a, 0x4f, 0x2a, 0xf7, 0x58, 0x9c, 0xf2, 0xc7, 0x70, 0x45, + 0xdc, 0x8f, 0xde, 0xec, 0x35, 0x7d, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, + 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05, 0x05, 0x00, + 0x03, 0x41, 0x00, 0xa6, 0x7b, 0x06, 0xec, 0x5e, 0xce, 0x92, 0x77, 0x2c, 0xa4, + 0x13, 0xcb, 0xa3, 0xca, 0x12, 0x56, 0x8f, 0xdc, 0x6c, 0x7b, 0x45, 0x11, 0xcd, + 0x40, 0xa7, 0xf6, 0x59, 0x98, 0x04, 0x02, 0xdf, 0x2b, 0x99, 0x8b, 0xb9, 0xa4, + 0xa8, 0xcb, 0xeb, 0x34, 0xc0, 0xf0, 0xa7, 0x8c, 0xf8, 0xd9, 0x1e, 0xde, 0x14, + 0xa5, 0xed, 0x76, 0xbf, 0x11, 0x6f, 0xe3, 0x60, 0xaa, 0xfa, 0x88, 0x21, 0x49, + 0x04, 0x35, +} + +var derEncodedPaypalNULCertBytes = []byte{ + 0x30, 0x82, 0x06, 0x44, 0x30, + 0x82, 0x05, 0xad, 0xa0, 0x03, 0x02, 0x01, 0x02, 0x02, 0x03, 0x00, 0xf0, 0x9b, + 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05, + 0x05, 0x00, 0x30, 0x82, 0x01, 0x12, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, + 0x04, 0x06, 0x13, 0x02, 0x45, 0x53, 0x31, 0x12, 0x30, 0x10, 0x06, 0x03, 0x55, + 0x04, 0x08, 0x13, 0x09, 0x42, 0x61, 0x72, 0x63, 0x65, 0x6c, 0x6f, 0x6e, 0x61, + 0x31, 0x12, 0x30, 0x10, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x09, 0x42, 0x61, + 0x72, 0x63, 0x65, 0x6c, 0x6f, 0x6e, 0x61, 0x31, 0x29, 0x30, 0x27, 0x06, 0x03, + 0x55, 0x04, 0x0a, 0x13, 0x20, 0x49, 0x50, 0x53, 0x20, 0x43, 0x65, 0x72, 0x74, + 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x41, 0x75, 0x74, + 0x68, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x20, 0x73, 0x2e, 0x6c, 0x2e, 0x31, 0x2e, + 0x30, 0x2c, 0x06, 0x03, 0x55, 0x04, 0x0a, 0x14, 0x25, 0x67, 0x65, 0x6e, 0x65, + 0x72, 0x61, 0x6c, 0x40, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, + 0x20, 0x43, 0x2e, 0x49, 0x2e, 0x46, 0x2e, 0x20, 0x20, 0x42, 0x2d, 0x42, 0x36, + 0x32, 0x32, 0x31, 0x30, 0x36, 0x39, 0x35, 0x31, 0x2e, 0x30, 0x2c, 0x06, 0x03, + 0x55, 0x04, 0x0b, 0x13, 0x25, 0x69, 0x70, 0x73, 0x43, 0x41, 0x20, 0x43, 0x4c, + 0x41, 0x53, 0x45, 0x41, 0x31, 0x20, 0x43, 0x65, 0x72, 0x74, 0x69, 0x66, 0x69, + 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, + 0x69, 0x74, 0x79, 0x31, 0x2e, 0x30, 0x2c, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, + 0x25, 0x69, 0x70, 0x73, 0x43, 0x41, 0x20, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, + 0x31, 0x20, 0x43, 0x65, 0x72, 0x74, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x20, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x69, 0x74, 0x79, 0x31, + 0x20, 0x30, 0x1e, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x09, + 0x01, 0x16, 0x11, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x40, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x1e, 0x17, 0x0d, 0x30, 0x39, + 0x30, 0x32, 0x32, 0x34, 0x32, 0x33, 0x30, 0x34, 0x31, 0x37, 0x5a, 0x17, 0x0d, + 0x31, 0x31, 0x30, 0x32, 0x32, 0x34, 0x32, 0x33, 0x30, 0x34, 0x31, 0x37, 0x5a, + 0x30, 0x81, 0x94, 0x31, 0x0b, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13, + 0x02, 0x55, 0x53, 0x31, 0x13, 0x30, 0x11, 0x06, 0x03, 0x55, 0x04, 0x08, 0x13, + 0x0a, 0x43, 0x61, 0x6c, 0x69, 0x66, 0x6f, 0x72, 0x6e, 0x69, 0x61, 0x31, 0x16, + 0x30, 0x14, 0x06, 0x03, 0x55, 0x04, 0x07, 0x13, 0x0d, 0x53, 0x61, 0x6e, 0x20, + 0x46, 0x72, 0x61, 0x6e, 0x63, 0x69, 0x73, 0x63, 0x6f, 0x31, 0x11, 0x30, 0x0f, + 0x06, 0x03, 0x55, 0x04, 0x0a, 0x13, 0x08, 0x53, 0x65, 0x63, 0x75, 0x72, 0x69, + 0x74, 0x79, 0x31, 0x14, 0x30, 0x12, 0x06, 0x03, 0x55, 0x04, 0x0b, 0x13, 0x0b, + 0x53, 0x65, 0x63, 0x75, 0x72, 0x65, 0x20, 0x55, 0x6e, 0x69, 0x74, 0x31, 0x2f, + 0x30, 0x2d, 0x06, 0x03, 0x55, 0x04, 0x03, 0x13, 0x26, 0x77, 0x77, 0x77, 0x2e, + 0x70, 0x61, 0x79, 0x70, 0x61, 0x6c, 0x2e, 0x63, 0x6f, 0x6d, 0x00, 0x73, 0x73, + 0x6c, 0x2e, 0x73, 0x65, 0x63, 0x75, 0x72, 0x65, 0x63, 0x6f, 0x6e, 0x6e, 0x65, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x63, 0x63, 0x30, 0x81, 0x9f, 0x30, 0x0d, + 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00, + 0x03, 0x81, 0x8d, 0x00, 0x30, 0x81, 0x89, 0x02, 0x81, 0x81, 0x00, 0xd2, 0x69, + 0xfa, 0x6f, 0x3a, 0x00, 0xb4, 0x21, 0x1b, 0xc8, 0xb1, 0x02, 0xd7, 0x3f, 0x19, + 0xb2, 0xc4, 0x6d, 0xb4, 0x54, 0xf8, 0x8b, 0x8a, 0xcc, 0xdb, 0x72, 0xc2, 0x9e, + 0x3c, 0x60, 0xb9, 0xc6, 0x91, 0x3d, 0x82, 0xb7, 0x7d, 0x99, 0xff, 0xd1, 0x29, + 0x84, 0xc1, 0x73, 0x53, 0x9c, 0x82, 0xdd, 0xfc, 0x24, 0x8c, 0x77, 0xd5, 0x41, + 0xf3, 0xe8, 0x1e, 0x42, 0xa1, 0xad, 0x2d, 0x9e, 0xff, 0x5b, 0x10, 0x26, 0xce, + 0x9d, 0x57, 0x17, 0x73, 0x16, 0x23, 0x38, 0xc8, 0xd6, 0xf1, 0xba, 0xa3, 0x96, + 0x5b, 0x16, 0x67, 0x4a, 0x4f, 0x73, 0x97, 0x3a, 0x4d, 0x14, 0xa4, 0xf4, 0xe2, + 0x3f, 0x8b, 0x05, 0x83, 0x42, 0xd1, 0xd0, 0xdc, 0x2f, 0x7a, 0xe5, 0xb6, 0x10, + 0xb2, 0x11, 0xc0, 0xdc, 0x21, 0x2a, 0x90, 0xff, 0xae, 0x97, 0x71, 0x5a, 0x49, + 0x81, 0xac, 0x40, 0xf3, 0x3b, 0xb8, 0x59, 0xb2, 0x4f, 0x02, 0x03, 0x01, 0x00, + 0x01, 0xa3, 0x82, 0x03, 0x21, 0x30, 0x82, 0x03, 0x1d, 0x30, 0x09, 0x06, 0x03, + 0x55, 0x1d, 0x13, 0x04, 0x02, 0x30, 0x00, 0x30, 0x11, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x01, 0x04, 0x04, 0x03, 0x02, 0x06, 0x40, + 0x30, 0x0b, 0x06, 0x03, 0x55, 0x1d, 0x0f, 0x04, 0x04, 0x03, 0x02, 0x03, 0xf8, + 0x30, 0x13, 0x06, 0x03, 0x55, 0x1d, 0x25, 0x04, 0x0c, 0x30, 0x0a, 0x06, 0x08, + 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, 0x03, 0x01, 0x30, 0x1d, 0x06, 0x03, 0x55, + 0x1d, 0x0e, 0x04, 0x16, 0x04, 0x14, 0x61, 0x8f, 0x61, 0x34, 0x43, 0x55, 0x14, + 0x7f, 0x27, 0x09, 0xce, 0x4c, 0x8b, 0xea, 0x9b, 0x7b, 0x19, 0x25, 0xbc, 0x6e, + 0x30, 0x1f, 0x06, 0x03, 0x55, 0x1d, 0x23, 0x04, 0x18, 0x30, 0x16, 0x80, 0x14, + 0x0e, 0x07, 0x60, 0xd4, 0x39, 0xc9, 0x1b, 0x5b, 0x5d, 0x90, 0x7b, 0x23, 0xc8, + 0xd2, 0x34, 0x9d, 0x4a, 0x9a, 0x46, 0x39, 0x30, 0x09, 0x06, 0x03, 0x55, 0x1d, + 0x11, 0x04, 0x02, 0x30, 0x00, 0x30, 0x1c, 0x06, 0x03, 0x55, 0x1d, 0x12, 0x04, + 0x15, 0x30, 0x13, 0x81, 0x11, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x40, + 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x72, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x0d, 0x04, 0x65, 0x16, 0x63, + 0x4f, 0x72, 0x67, 0x61, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, + 0x49, 0x6e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x4e, + 0x4f, 0x54, 0x20, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x45, 0x44, 0x2e, + 0x20, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x20, 0x53, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x20, 0x43, 0x65, 0x72, 0x74, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, + 0x65, 0x20, 0x69, 0x73, 0x73, 0x75, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x68, + 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x30, 0x2f, 0x06, 0x09, 0x60, + 0x86, 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x02, 0x04, 0x22, 0x16, 0x20, 0x68, + 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, + 0x32, 0x30, 0x30, 0x32, 0x2f, 0x30, 0x43, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, + 0x86, 0xf8, 0x42, 0x01, 0x04, 0x04, 0x36, 0x16, 0x34, 0x68, 0x74, 0x74, 0x70, + 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, + 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, + 0x32, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, 0x32, 0x43, 0x4c, + 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x63, 0x72, 0x6c, 0x30, 0x46, 0x06, 0x09, + 0x60, 0x86, 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x03, 0x04, 0x39, 0x16, 0x37, + 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, + 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, + 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, 0x72, 0x65, 0x76, 0x6f, 0x63, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x68, 0x74, + 0x6d, 0x6c, 0x3f, 0x30, 0x43, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x86, 0xf8, + 0x42, 0x01, 0x07, 0x04, 0x36, 0x16, 0x34, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, + 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, + 0x72, 0x65, 0x6e, 0x65, 0x77, 0x61, 0x6c, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, + 0x31, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0x3f, 0x30, 0x41, 0x06, 0x09, 0x60, 0x86, + 0x48, 0x01, 0x86, 0xf8, 0x42, 0x01, 0x08, 0x04, 0x34, 0x16, 0x32, 0x68, 0x74, + 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69, 0x70, 0x73, + 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, + 0x30, 0x30, 0x32, 0x2f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x43, 0x4c, 0x41, + 0x53, 0x45, 0x41, 0x31, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0x30, 0x81, 0x83, 0x06, + 0x03, 0x55, 0x1d, 0x1f, 0x04, 0x7c, 0x30, 0x7a, 0x30, 0x39, 0xa0, 0x37, 0xa0, + 0x35, 0x86, 0x33, 0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, + 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, + 0x73, 0x63, 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, + 0x32, 0x30, 0x30, 0x32, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x63, + 0x72, 0x6c, 0x30, 0x3d, 0xa0, 0x3b, 0xa0, 0x39, 0x86, 0x37, 0x68, 0x74, 0x74, + 0x70, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x62, 0x61, 0x63, 0x6b, 0x2e, 0x69, + 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x69, 0x70, 0x73, 0x63, + 0x61, 0x32, 0x30, 0x30, 0x32, 0x2f, 0x69, 0x70, 0x73, 0x63, 0x61, 0x32, 0x30, + 0x30, 0x32, 0x43, 0x4c, 0x41, 0x53, 0x45, 0x41, 0x31, 0x2e, 0x63, 0x72, 0x6c, + 0x30, 0x32, 0x06, 0x08, 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, 0x01, 0x01, 0x04, + 0x26, 0x30, 0x24, 0x30, 0x22, 0x06, 0x08, 0x2b, 0x06, 0x01, 0x05, 0x05, 0x07, + 0x30, 0x01, 0x86, 0x16, 0x68, 0x74, 0x74, 0x70, 0x3a, 0x2f, 0x2f, 0x6f, 0x63, + 0x73, 0x70, 0x2e, 0x69, 0x70, 0x73, 0x63, 0x61, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x05, + 0x05, 0x00, 0x03, 0x81, 0x81, 0x00, 0x68, 0xee, 0x79, 0x97, 0x97, 0xdd, 0x3b, + 0xef, 0x16, 0x6a, 0x06, 0xf2, 0x14, 0x9a, 0x6e, 0xcd, 0x9e, 0x12, 0xf7, 0xaa, + 0x83, 0x10, 0xbd, 0xd1, 0x7c, 0x98, 0xfa, 0xc7, 0xae, 0xd4, 0x0e, 0x2c, 0x9e, + 0x38, 0x05, 0x9d, 0x52, 0x60, 0xa9, 0x99, 0x0a, 0x81, 0xb4, 0x98, 0x90, 0x1d, + 0xae, 0xbb, 0x4a, 0xd7, 0xb9, 0xdc, 0x88, 0x9e, 0x37, 0x78, 0x41, 0x5b, 0xf7, + 0x82, 0xa5, 0xf2, 0xba, 0x41, 0x25, 0x5a, 0x90, 0x1a, 0x1e, 0x45, 0x38, 0xa1, + 0x52, 0x58, 0x75, 0x94, 0x26, 0x44, 0xfb, 0x20, 0x07, 0xba, 0x44, 0xcc, 0xe5, + 0x4a, 0x2d, 0x72, 0x3f, 0x98, 0x47, 0xf6, 0x26, 0xdc, 0x05, 0x46, 0x05, 0x07, + 0x63, 0x21, 0xab, 0x46, 0x9b, 0x9c, 0x78, 0xd5, 0x54, 0x5b, 0x3d, 0x0c, 0x1e, + 0xc8, 0x64, 0x8c, 0xb5, 0x50, 0x23, 0x82, 0x6f, 0xdb, 0xb8, 0x22, 0x1c, 0x43, + 0x96, 0x07, 0xa8, 0xbb, +} + +var stringSliceTestData = [][]string{ + {"foo", "bar"}, + {"foo", "\\bar"}, + {"foo", "\"bar\""}, + {"foo", "åäö"}, +} + +func TestStringSlice(t *testing.T) { + for _, test := range stringSliceTestData { + bs, err := Marshal(test) + if err != nil { + t.Error(err) + } + + var res []string + _, err = Unmarshal(bs, &res) + if err != nil { + t.Error(err) + } + + if fmt.Sprintf("%v", res) != fmt.Sprintf("%v", test) { + t.Errorf("incorrect marshal/unmarshal; %v != %v", res, test) + } + } +} + +type explicitTaggedTimeTest struct { + Time time.Time `asn1:"explicit,tag:0"` +} + +var explicitTaggedTimeTestData = []struct { + in []byte + out explicitTaggedTimeTest +}{ + {[]byte{0x30, 0x11, 0xa0, 0xf, 0x17, 0xd, '9', '1', '0', '5', '0', '6', '1', '6', '4', '5', '4', '0', 'Z'}, + explicitTaggedTimeTest{time.Date(1991, 05, 06, 16, 45, 40, 0, time.UTC)}}, + {[]byte{0x30, 0x17, 0xa0, 0xf, 0x18, 0x13, '2', '0', '1', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '+', '0', '6', '0', '7'}, + explicitTaggedTimeTest{time.Date(2010, 01, 02, 03, 04, 05, 0, time.FixedZone("", 6*60*60+7*60))}}, +} + +func TestExplicitTaggedTime(t *testing.T) { + // Test that a time.Time will match either tagUTCTime or + // tagGeneralizedTime. + for i, test := range explicitTaggedTimeTestData { + var got explicitTaggedTimeTest + _, err := Unmarshal(test.in, &got) + if err != nil { + t.Errorf("Unmarshal failed at index %d %v", i, err) + } + if !got.Time.Equal(test.out.Time) { + t.Errorf("#%d: got %v, want %v", i, got.Time, test.out.Time) + } + } +} + +type implicitTaggedTimeTest struct { + Time time.Time `asn1:"tag:24"` +} + +func TestImplicitTaggedTime(t *testing.T) { + // An implicitly tagged time value, that happens to have an implicit + // tag equal to a GENERALIZEDTIME, should still be parsed as a UTCTime. + // (There's no "timeType" in fieldParameters to determine what type of + // time should be expected when implicitly tagged.) + der := []byte{0x30, 0x0f, 0x80 | 24, 0xd, '9', '1', '0', '5', '0', '6', '1', '6', '4', '5', '4', '0', 'Z'} + var result implicitTaggedTimeTest + if _, err := Unmarshal(der, &result); err != nil { + t.Fatalf("Error while parsing: %s", err) + } + if expected := time.Date(1991, 05, 06, 16, 45, 40, 0, time.UTC); !result.Time.Equal(expected) { + t.Errorf("Wrong result. Got %v, want %v", result.Time, expected) + } +} + +type truncatedExplicitTagTest struct { + Test int `asn1:"explicit,tag:0"` +} + +func TestTruncatedExplicitTag(t *testing.T) { + // This crashed Unmarshal in the past. See #11154. + der := []byte{ + 0x30, // SEQUENCE + 0x02, // two bytes long + 0xa0, // context-specific, tag 0 + 0x30, // 48 bytes long + } + + var result truncatedExplicitTagTest + if _, err := Unmarshal(der, &result); err == nil { + t.Error("Unmarshal returned without error") + } +} + +type invalidUTF8Test struct { + Str string `asn1:"utf8"` +} + +func TestUnmarshalInvalidUTF8(t *testing.T) { + data := []byte("0\x05\f\x03a\xc9c") + var result invalidUTF8Test + _, err := Unmarshal(data, &result) + + const expectedSubstring = "UTF" + if err == nil { + t.Fatal("Successfully unmarshaled invalid UTF-8 data") + } else if !strings.Contains(err.Error(), expectedSubstring) { + t.Fatalf("Expected error to mention %q but error was %q", expectedSubstring, err.Error()) + } +} + +func TestMarshalNilValue(t *testing.T) { + nilValueTestData := []any{ + nil, + struct{ V any }{}, + } + for i, test := range nilValueTestData { + if _, err := Marshal(test); err == nil { + t.Fatalf("#%d: successfully marshaled nil value", i) + } + } +} + +type unexported struct { + X int + y int +} + +type exported struct { + X int + Y int +} + +func TestUnexportedStructField(t *testing.T) { + want := StructuralError{"struct contains unexported fields"} + + _, err := Marshal(unexported{X: 5, y: 1}) + if err != want { + t.Errorf("got %v, want %v", err, want) + } + + bs, err := Marshal(exported{X: 5, Y: 1}) + if err != nil { + t.Fatal(err) + } + var u unexported + _, err = Unmarshal(bs, &u) + if err != want { + t.Errorf("got %v, want %v", err, want) + } +} + +func TestNull(t *testing.T) { + marshaled, err := Marshal(NullRawValue) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(NullBytes, marshaled) { + t.Errorf("Expected Marshal of NullRawValue to yield %x, got %x", NullBytes, marshaled) + } + + unmarshaled := RawValue{} + if _, err := Unmarshal(NullBytes, &unmarshaled); err != nil { + t.Fatal(err) + } + + unmarshaled.FullBytes = NullRawValue.FullBytes + if len(unmarshaled.Bytes) == 0 { + // DeepEqual considers a nil slice and an empty slice to be different. + unmarshaled.Bytes = NullRawValue.Bytes + } + + if !reflect.DeepEqual(NullRawValue, unmarshaled) { + t.Errorf("Expected Unmarshal of NullBytes to yield %v, got %v", NullRawValue, unmarshaled) + } +} + +func TestExplicitTagRawValueStruct(t *testing.T) { + type foo struct { + A RawValue `asn1:"optional,explicit,tag:5"` + B []byte `asn1:"optional,explicit,tag:6"` + } + before := foo{B: []byte{1, 2, 3}} + derBytes, err := Marshal(before) + if err != nil { + t.Fatal(err) + } + + var after foo + if rest, err := Unmarshal(derBytes, &after); err != nil || len(rest) != 0 { + t.Fatal(err) + } + + got := fmt.Sprintf("%#v", after) + want := fmt.Sprintf("%#v", before) + if got != want { + t.Errorf("got %s, want %s (DER: %x)", got, want, derBytes) + } +} + +func TestTaggedRawValue(t *testing.T) { + type taggedRawValue struct { + A RawValue `asn1:"tag:5"` + } + type untaggedRawValue struct { + A RawValue + } + const isCompound = 0x20 + const tag = 5 + + tests := []struct { + shouldMatch bool + derBytes []byte + }{ + {false, []byte{0x30, 3, TagInteger, 1, 1}}, + {true, []byte{0x30, 3, (ClassContextSpecific << 6) | tag, 1, 1}}, + {true, []byte{0x30, 3, (ClassContextSpecific << 6) | tag | isCompound, 1, 1}}, + {false, []byte{0x30, 3, (ClassApplication << 6) | tag | isCompound, 1, 1}}, + {false, []byte{0x30, 3, (ClassPrivate << 6) | tag | isCompound, 1, 1}}, + } + + for i, test := range tests { + var tagged taggedRawValue + if _, err := Unmarshal(test.derBytes, &tagged); (err == nil) != test.shouldMatch { + t.Errorf("#%d: unexpected result parsing %x: %s", i, test.derBytes, err) + } + + // An untagged RawValue should accept anything. + var untagged untaggedRawValue + if _, err := Unmarshal(test.derBytes, &untagged); err != nil { + t.Errorf("#%d: unexpected failure parsing %x with untagged RawValue: %s", i, test.derBytes, err) + } + } +} + +var bmpStringTests = []struct { + decoded string + encodedHex string +}{ + {"", "0000"}, + // Example from https://tools.ietf.org/html/rfc7292#appendix-B. + {"Beavis", "0042006500610076006900730000"}, + // Some characters from the "Letterlike Symbols Unicode block". + {"\u2115 - Double-struck N", "21150020002d00200044006f00750062006c0065002d00730074007200750063006b0020004e0000"}, +} + +func TestBMPString(t *testing.T) { + for i, test := range bmpStringTests { + encoded, err := hex.DecodeString(test.encodedHex) + if err != nil { + t.Fatalf("#%d: failed to decode from hex string", i) + } + + decoded, err := parseBMPString(encoded) + + if err != nil { + t.Errorf("#%d: decoding output gave an error: %s", i, err) + continue + } + + if decoded != test.decoded { + t.Errorf("#%d: decoding output resulted in %q, but it should have been %q", i, decoded, test.decoded) + continue + } + } +} + +func TestNonMinimalEncodedOID(t *testing.T) { + h, err := hex.DecodeString("060a2a80864886f70d01010b") + if err != nil { + t.Fatalf("failed to decode from hex string: %s", err) + } + var oid ObjectIdentifier + _, err = Unmarshal(h, &oid) + if err == nil { + t.Fatalf("accepted non-minimally encoded oid") + } +} + +func BenchmarkObjectIdentifierString(b *testing.B) { + oidPublicKeyRSA := ObjectIdentifier{1, 2, 840, 113549, 1, 1, 1} + for i := 0; i < b.N; i++ { + _ = oidPublicKeyRSA.String() + } +} diff --git a/src/encoding/asn1/common.go b/src/encoding/asn1/common.go new file mode 100644 index 0000000..40115df --- /dev/null +++ b/src/encoding/asn1/common.go @@ -0,0 +1,185 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "reflect" + "strconv" + "strings" +) + +// ASN.1 objects have metadata preceding them: +// the tag: the type of the object +// a flag denoting if this object is compound or not +// the class type: the namespace of the tag +// the length of the object, in bytes + +// Here are some standard tags and classes + +// ASN.1 tags represent the type of the following object. +const ( + TagBoolean = 1 + TagInteger = 2 + TagBitString = 3 + TagOctetString = 4 + TagNull = 5 + TagOID = 6 + TagEnum = 10 + TagUTF8String = 12 + TagSequence = 16 + TagSet = 17 + TagNumericString = 18 + TagPrintableString = 19 + TagT61String = 20 + TagIA5String = 22 + TagUTCTime = 23 + TagGeneralizedTime = 24 + TagGeneralString = 27 + TagBMPString = 30 +) + +// ASN.1 class types represent the namespace of the tag. +const ( + ClassUniversal = 0 + ClassApplication = 1 + ClassContextSpecific = 2 + ClassPrivate = 3 +) + +type tagAndLength struct { + class, tag, length int + isCompound bool +} + +// ASN.1 has IMPLICIT and EXPLICIT tags, which can be translated as "instead +// of" and "in addition to". When not specified, every primitive type has a +// default tag in the UNIVERSAL class. +// +// For example: a BIT STRING is tagged [UNIVERSAL 3] by default (although ASN.1 +// doesn't actually have a UNIVERSAL keyword). However, by saying [IMPLICIT +// CONTEXT-SPECIFIC 42], that means that the tag is replaced by another. +// +// On the other hand, if it said [EXPLICIT CONTEXT-SPECIFIC 10], then an +// /additional/ tag would wrap the default tag. This explicit tag will have the +// compound flag set. +// +// (This is used in order to remove ambiguity with optional elements.) +// +// You can layer EXPLICIT and IMPLICIT tags to an arbitrary depth, however we +// don't support that here. We support a single layer of EXPLICIT or IMPLICIT +// tagging with tag strings on the fields of a structure. + +// fieldParameters is the parsed representation of tag string from a structure field. +type fieldParameters struct { + optional bool // true iff the field is OPTIONAL + explicit bool // true iff an EXPLICIT tag is in use. + application bool // true iff an APPLICATION tag is in use. + private bool // true iff a PRIVATE tag is in use. + defaultValue *int64 // a default value for INTEGER typed fields (maybe nil). + tag *int // the EXPLICIT or IMPLICIT tag (maybe nil). + stringType int // the string tag to use when marshaling. + timeType int // the time tag to use when marshaling. + set bool // true iff this should be encoded as a SET + omitEmpty bool // true iff this should be omitted if empty when marshaling. + + // Invariants: + // if explicit is set, tag is non-nil. +} + +// Given a tag string with the format specified in the package comment, +// parseFieldParameters will parse it into a fieldParameters structure, +// ignoring unknown parts of the string. +func parseFieldParameters(str string) (ret fieldParameters) { + var part string + for len(str) > 0 { + part, str, _ = strings.Cut(str, ",") + switch { + case part == "optional": + ret.optional = true + case part == "explicit": + ret.explicit = true + if ret.tag == nil { + ret.tag = new(int) + } + case part == "generalized": + ret.timeType = TagGeneralizedTime + case part == "utc": + ret.timeType = TagUTCTime + case part == "ia5": + ret.stringType = TagIA5String + case part == "printable": + ret.stringType = TagPrintableString + case part == "numeric": + ret.stringType = TagNumericString + case part == "utf8": + ret.stringType = TagUTF8String + case strings.HasPrefix(part, "default:"): + i, err := strconv.ParseInt(part[8:], 10, 64) + if err == nil { + ret.defaultValue = new(int64) + *ret.defaultValue = i + } + case strings.HasPrefix(part, "tag:"): + i, err := strconv.Atoi(part[4:]) + if err == nil { + ret.tag = new(int) + *ret.tag = i + } + case part == "set": + ret.set = true + case part == "application": + ret.application = true + if ret.tag == nil { + ret.tag = new(int) + } + case part == "private": + ret.private = true + if ret.tag == nil { + ret.tag = new(int) + } + case part == "omitempty": + ret.omitEmpty = true + } + } + return +} + +// Given a reflected Go type, getUniversalType returns the default tag number +// and expected compound flag. +func getUniversalType(t reflect.Type) (matchAny bool, tagNumber int, isCompound, ok bool) { + switch t { + case rawValueType: + return true, -1, false, true + case objectIdentifierType: + return false, TagOID, false, true + case bitStringType: + return false, TagBitString, false, true + case timeType: + return false, TagUTCTime, false, true + case enumeratedType: + return false, TagEnum, false, true + case bigIntType: + return false, TagInteger, false, true + } + switch t.Kind() { + case reflect.Bool: + return false, TagBoolean, false, true + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return false, TagInteger, false, true + case reflect.Struct: + return false, TagSequence, true, true + case reflect.Slice: + if t.Elem().Kind() == reflect.Uint8 { + return false, TagOctetString, false, true + } + if strings.HasSuffix(t.Name(), "SET") { + return false, TagSet, true, true + } + return false, TagSequence, true, true + case reflect.String: + return false, TagPrintableString, false, true + } + return false, 0, false, false +} diff --git a/src/encoding/asn1/marshal.go b/src/encoding/asn1/marshal.go new file mode 100644 index 0000000..d8c8fe1 --- /dev/null +++ b/src/encoding/asn1/marshal.go @@ -0,0 +1,747 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "bytes" + "errors" + "fmt" + "math/big" + "reflect" + "sort" + "time" + "unicode/utf8" +) + +var ( + byte00Encoder encoder = byteEncoder(0x00) + byteFFEncoder encoder = byteEncoder(0xff) +) + +// encoder represents an ASN.1 element that is waiting to be marshaled. +type encoder interface { + // Len returns the number of bytes needed to marshal this element. + Len() int + // Encode encodes this element by writing Len() bytes to dst. + Encode(dst []byte) +} + +type byteEncoder byte + +func (c byteEncoder) Len() int { + return 1 +} + +func (c byteEncoder) Encode(dst []byte) { + dst[0] = byte(c) +} + +type bytesEncoder []byte + +func (b bytesEncoder) Len() int { + return len(b) +} + +func (b bytesEncoder) Encode(dst []byte) { + if copy(dst, b) != len(b) { + panic("internal error") + } +} + +type stringEncoder string + +func (s stringEncoder) Len() int { + return len(s) +} + +func (s stringEncoder) Encode(dst []byte) { + if copy(dst, s) != len(s) { + panic("internal error") + } +} + +type multiEncoder []encoder + +func (m multiEncoder) Len() int { + var size int + for _, e := range m { + size += e.Len() + } + return size +} + +func (m multiEncoder) Encode(dst []byte) { + var off int + for _, e := range m { + e.Encode(dst[off:]) + off += e.Len() + } +} + +type setEncoder []encoder + +func (s setEncoder) Len() int { + var size int + for _, e := range s { + size += e.Len() + } + return size +} + +func (s setEncoder) Encode(dst []byte) { + // Per X690 Section 11.6: The encodings of the component values of a + // set-of value shall appear in ascending order, the encodings being + // compared as octet strings with the shorter components being padded + // at their trailing end with 0-octets. + // + // First we encode each element to its TLV encoding and then use + // octetSort to get the ordering expected by X690 DER rules before + // writing the sorted encodings out to dst. + l := make([][]byte, len(s)) + for i, e := range s { + l[i] = make([]byte, e.Len()) + e.Encode(l[i]) + } + + sort.Slice(l, func(i, j int) bool { + // Since we are using bytes.Compare to compare TLV encodings we + // don't need to right pad s[i] and s[j] to the same length as + // suggested in X690. If len(s[i]) < len(s[j]) the length octet of + // s[i], which is the first determining byte, will inherently be + // smaller than the length octet of s[j]. This lets us skip the + // padding step. + return bytes.Compare(l[i], l[j]) < 0 + }) + + var off int + for _, b := range l { + copy(dst[off:], b) + off += len(b) + } +} + +type taggedEncoder struct { + // scratch contains temporary space for encoding the tag and length of + // an element in order to avoid extra allocations. + scratch [8]byte + tag encoder + body encoder +} + +func (t *taggedEncoder) Len() int { + return t.tag.Len() + t.body.Len() +} + +func (t *taggedEncoder) Encode(dst []byte) { + t.tag.Encode(dst) + t.body.Encode(dst[t.tag.Len():]) +} + +type int64Encoder int64 + +func (i int64Encoder) Len() int { + n := 1 + + for i > 127 { + n++ + i >>= 8 + } + + for i < -128 { + n++ + i >>= 8 + } + + return n +} + +func (i int64Encoder) Encode(dst []byte) { + n := i.Len() + + for j := 0; j < n; j++ { + dst[j] = byte(i >> uint((n-1-j)*8)) + } +} + +func base128IntLength(n int64) int { + if n == 0 { + return 1 + } + + l := 0 + for i := n; i > 0; i >>= 7 { + l++ + } + + return l +} + +func appendBase128Int(dst []byte, n int64) []byte { + l := base128IntLength(n) + + for i := l - 1; i >= 0; i-- { + o := byte(n >> uint(i*7)) + o &= 0x7f + if i != 0 { + o |= 0x80 + } + + dst = append(dst, o) + } + + return dst +} + +func makeBigInt(n *big.Int) (encoder, error) { + if n == nil { + return nil, StructuralError{"empty integer"} + } + + if n.Sign() < 0 { + // A negative number has to be converted to two's-complement + // form. So we'll invert and subtract 1. If the + // most-significant-bit isn't set then we'll need to pad the + // beginning with 0xff in order to keep the number negative. + nMinus1 := new(big.Int).Neg(n) + nMinus1.Sub(nMinus1, bigOne) + bytes := nMinus1.Bytes() + for i := range bytes { + bytes[i] ^= 0xff + } + if len(bytes) == 0 || bytes[0]&0x80 == 0 { + return multiEncoder([]encoder{byteFFEncoder, bytesEncoder(bytes)}), nil + } + return bytesEncoder(bytes), nil + } else if n.Sign() == 0 { + // Zero is written as a single 0 zero rather than no bytes. + return byte00Encoder, nil + } else { + bytes := n.Bytes() + if len(bytes) > 0 && bytes[0]&0x80 != 0 { + // We'll have to pad this with 0x00 in order to stop it + // looking like a negative number. + return multiEncoder([]encoder{byte00Encoder, bytesEncoder(bytes)}), nil + } + return bytesEncoder(bytes), nil + } +} + +func appendLength(dst []byte, i int) []byte { + n := lengthLength(i) + + for ; n > 0; n-- { + dst = append(dst, byte(i>>uint((n-1)*8))) + } + + return dst +} + +func lengthLength(i int) (numBytes int) { + numBytes = 1 + for i > 255 { + numBytes++ + i >>= 8 + } + return +} + +func appendTagAndLength(dst []byte, t tagAndLength) []byte { + b := uint8(t.class) << 6 + if t.isCompound { + b |= 0x20 + } + if t.tag >= 31 { + b |= 0x1f + dst = append(dst, b) + dst = appendBase128Int(dst, int64(t.tag)) + } else { + b |= uint8(t.tag) + dst = append(dst, b) + } + + if t.length >= 128 { + l := lengthLength(t.length) + dst = append(dst, 0x80|byte(l)) + dst = appendLength(dst, t.length) + } else { + dst = append(dst, byte(t.length)) + } + + return dst +} + +type bitStringEncoder BitString + +func (b bitStringEncoder) Len() int { + return len(b.Bytes) + 1 +} + +func (b bitStringEncoder) Encode(dst []byte) { + dst[0] = byte((8 - b.BitLength%8) % 8) + if copy(dst[1:], b.Bytes) != len(b.Bytes) { + panic("internal error") + } +} + +type oidEncoder []int + +func (oid oidEncoder) Len() int { + l := base128IntLength(int64(oid[0]*40 + oid[1])) + for i := 2; i < len(oid); i++ { + l += base128IntLength(int64(oid[i])) + } + return l +} + +func (oid oidEncoder) Encode(dst []byte) { + dst = appendBase128Int(dst[:0], int64(oid[0]*40+oid[1])) + for i := 2; i < len(oid); i++ { + dst = appendBase128Int(dst, int64(oid[i])) + } +} + +func makeObjectIdentifier(oid []int) (e encoder, err error) { + if len(oid) < 2 || oid[0] > 2 || (oid[0] < 2 && oid[1] >= 40) { + return nil, StructuralError{"invalid object identifier"} + } + + return oidEncoder(oid), nil +} + +func makePrintableString(s string) (e encoder, err error) { + for i := 0; i < len(s); i++ { + // The asterisk is often used in PrintableString, even though + // it is invalid. If a PrintableString was specifically + // requested then the asterisk is permitted by this code. + // Ampersand is allowed in parsing due a handful of CA + // certificates, however when making new certificates + // it is rejected. + if !isPrintable(s[i], allowAsterisk, rejectAmpersand) { + return nil, StructuralError{"PrintableString contains invalid character"} + } + } + + return stringEncoder(s), nil +} + +func makeIA5String(s string) (e encoder, err error) { + for i := 0; i < len(s); i++ { + if s[i] > 127 { + return nil, StructuralError{"IA5String contains invalid character"} + } + } + + return stringEncoder(s), nil +} + +func makeNumericString(s string) (e encoder, err error) { + for i := 0; i < len(s); i++ { + if !isNumeric(s[i]) { + return nil, StructuralError{"NumericString contains invalid character"} + } + } + + return stringEncoder(s), nil +} + +func makeUTF8String(s string) encoder { + return stringEncoder(s) +} + +func appendTwoDigits(dst []byte, v int) []byte { + return append(dst, byte('0'+(v/10)%10), byte('0'+v%10)) +} + +func appendFourDigits(dst []byte, v int) []byte { + var bytes [4]byte + for i := range bytes { + bytes[3-i] = '0' + byte(v%10) + v /= 10 + } + return append(dst, bytes[:]...) +} + +func outsideUTCRange(t time.Time) bool { + year := t.Year() + return year < 1950 || year >= 2050 +} + +func makeUTCTime(t time.Time) (e encoder, err error) { + dst := make([]byte, 0, 18) + + dst, err = appendUTCTime(dst, t) + if err != nil { + return nil, err + } + + return bytesEncoder(dst), nil +} + +func makeGeneralizedTime(t time.Time) (e encoder, err error) { + dst := make([]byte, 0, 20) + + dst, err = appendGeneralizedTime(dst, t) + if err != nil { + return nil, err + } + + return bytesEncoder(dst), nil +} + +func appendUTCTime(dst []byte, t time.Time) (ret []byte, err error) { + year := t.Year() + + switch { + case 1950 <= year && year < 2000: + dst = appendTwoDigits(dst, year-1900) + case 2000 <= year && year < 2050: + dst = appendTwoDigits(dst, year-2000) + default: + return nil, StructuralError{"cannot represent time as UTCTime"} + } + + return appendTimeCommon(dst, t), nil +} + +func appendGeneralizedTime(dst []byte, t time.Time) (ret []byte, err error) { + year := t.Year() + if year < 0 || year > 9999 { + return nil, StructuralError{"cannot represent time as GeneralizedTime"} + } + + dst = appendFourDigits(dst, year) + + return appendTimeCommon(dst, t), nil +} + +func appendTimeCommon(dst []byte, t time.Time) []byte { + _, month, day := t.Date() + + dst = appendTwoDigits(dst, int(month)) + dst = appendTwoDigits(dst, day) + + hour, min, sec := t.Clock() + + dst = appendTwoDigits(dst, hour) + dst = appendTwoDigits(dst, min) + dst = appendTwoDigits(dst, sec) + + _, offset := t.Zone() + + switch { + case offset/60 == 0: + return append(dst, 'Z') + case offset > 0: + dst = append(dst, '+') + case offset < 0: + dst = append(dst, '-') + } + + offsetMinutes := offset / 60 + if offsetMinutes < 0 { + offsetMinutes = -offsetMinutes + } + + dst = appendTwoDigits(dst, offsetMinutes/60) + dst = appendTwoDigits(dst, offsetMinutes%60) + + return dst +} + +func stripTagAndLength(in []byte) []byte { + _, offset, err := parseTagAndLength(in, 0) + if err != nil { + return in + } + return in[offset:] +} + +func makeBody(value reflect.Value, params fieldParameters) (e encoder, err error) { + switch value.Type() { + case flagType: + return bytesEncoder(nil), nil + case timeType: + t := value.Interface().(time.Time) + if params.timeType == TagGeneralizedTime || outsideUTCRange(t) { + return makeGeneralizedTime(t) + } + return makeUTCTime(t) + case bitStringType: + return bitStringEncoder(value.Interface().(BitString)), nil + case objectIdentifierType: + return makeObjectIdentifier(value.Interface().(ObjectIdentifier)) + case bigIntType: + return makeBigInt(value.Interface().(*big.Int)) + } + + switch v := value; v.Kind() { + case reflect.Bool: + if v.Bool() { + return byteFFEncoder, nil + } + return byte00Encoder, nil + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return int64Encoder(v.Int()), nil + case reflect.Struct: + t := v.Type() + + for i := 0; i < t.NumField(); i++ { + if !t.Field(i).IsExported() { + return nil, StructuralError{"struct contains unexported fields"} + } + } + + startingField := 0 + + n := t.NumField() + if n == 0 { + return bytesEncoder(nil), nil + } + + // If the first element of the structure is a non-empty + // RawContents, then we don't bother serializing the rest. + if t.Field(0).Type == rawContentsType { + s := v.Field(0) + if s.Len() > 0 { + bytes := s.Bytes() + /* The RawContents will contain the tag and + * length fields but we'll also be writing + * those ourselves, so we strip them out of + * bytes */ + return bytesEncoder(stripTagAndLength(bytes)), nil + } + + startingField = 1 + } + + switch n1 := n - startingField; n1 { + case 0: + return bytesEncoder(nil), nil + case 1: + return makeField(v.Field(startingField), parseFieldParameters(t.Field(startingField).Tag.Get("asn1"))) + default: + m := make([]encoder, n1) + for i := 0; i < n1; i++ { + m[i], err = makeField(v.Field(i+startingField), parseFieldParameters(t.Field(i+startingField).Tag.Get("asn1"))) + if err != nil { + return nil, err + } + } + + return multiEncoder(m), nil + } + case reflect.Slice: + sliceType := v.Type() + if sliceType.Elem().Kind() == reflect.Uint8 { + return bytesEncoder(v.Bytes()), nil + } + + var fp fieldParameters + + switch l := v.Len(); l { + case 0: + return bytesEncoder(nil), nil + case 1: + return makeField(v.Index(0), fp) + default: + m := make([]encoder, l) + + for i := 0; i < l; i++ { + m[i], err = makeField(v.Index(i), fp) + if err != nil { + return nil, err + } + } + + if params.set { + return setEncoder(m), nil + } + return multiEncoder(m), nil + } + case reflect.String: + switch params.stringType { + case TagIA5String: + return makeIA5String(v.String()) + case TagPrintableString: + return makePrintableString(v.String()) + case TagNumericString: + return makeNumericString(v.String()) + default: + return makeUTF8String(v.String()), nil + } + } + + return nil, StructuralError{"unknown Go type"} +} + +func makeField(v reflect.Value, params fieldParameters) (e encoder, err error) { + if !v.IsValid() { + return nil, fmt.Errorf("asn1: cannot marshal nil value") + } + // If the field is an interface{} then recurse into it. + if v.Kind() == reflect.Interface && v.Type().NumMethod() == 0 { + return makeField(v.Elem(), params) + } + + if v.Kind() == reflect.Slice && v.Len() == 0 && params.omitEmpty { + return bytesEncoder(nil), nil + } + + if params.optional && params.defaultValue != nil && canHaveDefaultValue(v.Kind()) { + defaultValue := reflect.New(v.Type()).Elem() + defaultValue.SetInt(*params.defaultValue) + + if reflect.DeepEqual(v.Interface(), defaultValue.Interface()) { + return bytesEncoder(nil), nil + } + } + + // If no default value is given then the zero value for the type is + // assumed to be the default value. This isn't obviously the correct + // behavior, but it's what Go has traditionally done. + if params.optional && params.defaultValue == nil { + if reflect.DeepEqual(v.Interface(), reflect.Zero(v.Type()).Interface()) { + return bytesEncoder(nil), nil + } + } + + if v.Type() == rawValueType { + rv := v.Interface().(RawValue) + if len(rv.FullBytes) != 0 { + return bytesEncoder(rv.FullBytes), nil + } + + t := new(taggedEncoder) + + t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{rv.Class, rv.Tag, len(rv.Bytes), rv.IsCompound})) + t.body = bytesEncoder(rv.Bytes) + + return t, nil + } + + matchAny, tag, isCompound, ok := getUniversalType(v.Type()) + if !ok || matchAny { + return nil, StructuralError{fmt.Sprintf("unknown Go type: %v", v.Type())} + } + + if params.timeType != 0 && tag != TagUTCTime { + return nil, StructuralError{"explicit time type given to non-time member"} + } + + if params.stringType != 0 && tag != TagPrintableString { + return nil, StructuralError{"explicit string type given to non-string member"} + } + + switch tag { + case TagPrintableString: + if params.stringType == 0 { + // This is a string without an explicit string type. We'll use + // a PrintableString if the character set in the string is + // sufficiently limited, otherwise we'll use a UTF8String. + for _, r := range v.String() { + if r >= utf8.RuneSelf || !isPrintable(byte(r), rejectAsterisk, rejectAmpersand) { + if !utf8.ValidString(v.String()) { + return nil, errors.New("asn1: string not valid UTF-8") + } + tag = TagUTF8String + break + } + } + } else { + tag = params.stringType + } + case TagUTCTime: + if params.timeType == TagGeneralizedTime || outsideUTCRange(v.Interface().(time.Time)) { + tag = TagGeneralizedTime + } + } + + if params.set { + if tag != TagSequence { + return nil, StructuralError{"non sequence tagged as set"} + } + tag = TagSet + } + + // makeField can be called for a slice that should be treated as a SET + // but doesn't have params.set set, for instance when using a slice + // with the SET type name suffix. In this case getUniversalType returns + // TagSet, but makeBody doesn't know about that so will treat the slice + // as a sequence. To work around this we set params.set. + if tag == TagSet && !params.set { + params.set = true + } + + t := new(taggedEncoder) + + t.body, err = makeBody(v, params) + if err != nil { + return nil, err + } + + bodyLen := t.body.Len() + + class := ClassUniversal + if params.tag != nil { + if params.application { + class = ClassApplication + } else if params.private { + class = ClassPrivate + } else { + class = ClassContextSpecific + } + + if params.explicit { + t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{ClassUniversal, tag, bodyLen, isCompound})) + + tt := new(taggedEncoder) + + tt.body = t + + tt.tag = bytesEncoder(appendTagAndLength(tt.scratch[:0], tagAndLength{ + class: class, + tag: *params.tag, + length: bodyLen + t.tag.Len(), + isCompound: true, + })) + + return tt, nil + } + + // implicit tag. + tag = *params.tag + } + + t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{class, tag, bodyLen, isCompound})) + + return t, nil +} + +// Marshal returns the ASN.1 encoding of val. +// +// In addition to the struct tags recognized by Unmarshal, the following can be +// used: +// +// ia5: causes strings to be marshaled as ASN.1, IA5String values +// omitempty: causes empty slices to be skipped +// printable: causes strings to be marshaled as ASN.1, PrintableString values +// utf8: causes strings to be marshaled as ASN.1, UTF8String values +// utc: causes time.Time to be marshaled as ASN.1, UTCTime values +// generalized: causes time.Time to be marshaled as ASN.1, GeneralizedTime values +func Marshal(val any) ([]byte, error) { + return MarshalWithParams(val, "") +} + +// MarshalWithParams allows field parameters to be specified for the +// top-level element. The form of the params is the same as the field tags. +func MarshalWithParams(val any, params string) ([]byte, error) { + e, err := makeField(reflect.ValueOf(val), parseFieldParameters(params)) + if err != nil { + return nil, err + } + b := make([]byte, e.Len()) + e.Encode(b) + return b, nil +} diff --git a/src/encoding/asn1/marshal_test.go b/src/encoding/asn1/marshal_test.go new file mode 100644 index 0000000..d9c3cf4 --- /dev/null +++ b/src/encoding/asn1/marshal_test.go @@ -0,0 +1,406 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asn1 + +import ( + "bytes" + "encoding/hex" + "math/big" + "reflect" + "strings" + "testing" + "time" +) + +type intStruct struct { + A int +} + +type twoIntStruct struct { + A int + B int +} + +type bigIntStruct struct { + A *big.Int +} + +type nestedStruct struct { + A intStruct +} + +type rawContentsStruct struct { + Raw RawContent + A int +} + +type implicitTagTest struct { + A int `asn1:"implicit,tag:5"` +} + +type explicitTagTest struct { + A int `asn1:"explicit,tag:5"` +} + +type flagTest struct { + A Flag `asn1:"tag:0,optional"` +} + +type generalizedTimeTest struct { + A time.Time `asn1:"generalized"` +} + +type ia5StringTest struct { + A string `asn1:"ia5"` +} + +type printableStringTest struct { + A string `asn1:"printable"` +} + +type genericStringTest struct { + A string +} + +type optionalRawValueTest struct { + A RawValue `asn1:"optional"` +} + +type omitEmptyTest struct { + A []string `asn1:"omitempty"` +} + +type defaultTest struct { + A int `asn1:"optional,default:1"` +} + +type applicationTest struct { + A int `asn1:"application,tag:0"` + B int `asn1:"application,tag:1,explicit"` +} + +type privateTest struct { + A int `asn1:"private,tag:0"` + B int `asn1:"private,tag:1,explicit"` + C int `asn1:"private,tag:31"` // tag size should be 2 octet + D int `asn1:"private,tag:128"` // tag size should be 3 octet +} + +type numericStringTest struct { + A string `asn1:"numeric"` +} + +type testSET []int + +var PST = time.FixedZone("PST", -8*60*60) + +type marshalTest struct { + in any + out string // hex encoded +} + +func farFuture() time.Time { + t, err := time.Parse(time.RFC3339, "2100-04-05T12:01:01Z") + if err != nil { + panic(err) + } + return t +} + +var marshalTests = []marshalTest{ + {10, "02010a"}, + {127, "02017f"}, + {128, "02020080"}, + {-128, "020180"}, + {-129, "0202ff7f"}, + {intStruct{64}, "3003020140"}, + {bigIntStruct{big.NewInt(0x123456)}, "30050203123456"}, + {twoIntStruct{64, 65}, "3006020140020141"}, + {nestedStruct{intStruct{127}}, "3005300302017f"}, + {[]byte{1, 2, 3}, "0403010203"}, + {implicitTagTest{64}, "3003850140"}, + {explicitTagTest{64}, "3005a503020140"}, + {flagTest{true}, "30028000"}, + {flagTest{false}, "3000"}, + {time.Unix(0, 0).UTC(), "170d3730303130313030303030305a"}, + {time.Unix(1258325776, 0).UTC(), "170d3039313131353232353631365a"}, + {time.Unix(1258325776, 0).In(PST), "17113039313131353134353631362d30383030"}, + {farFuture(), "180f32313030303430353132303130315a"}, + {generalizedTimeTest{time.Unix(1258325776, 0).UTC()}, "3011180f32303039313131353232353631365a"}, + {BitString{[]byte{0x80}, 1}, "03020780"}, + {BitString{[]byte{0x81, 0xf0}, 12}, "03030481f0"}, + {ObjectIdentifier([]int{1, 2, 3, 4}), "06032a0304"}, + {ObjectIdentifier([]int{1, 2, 840, 133549, 1, 1, 5}), "06092a864888932d010105"}, + {ObjectIdentifier([]int{2, 100, 3}), "0603813403"}, + {"test", "130474657374"}, + { + "" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", // This is 127 times 'x' + "137f" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "78787878787878787878787878787878787878787878787878787878787878", + }, + { + "" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", // This is 128 times 'x' + "138180" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878" + + "7878787878787878787878787878787878787878787878787878787878787878", + }, + {ia5StringTest{"test"}, "3006160474657374"}, + {optionalRawValueTest{}, "3000"}, + {printableStringTest{"test"}, "3006130474657374"}, + {printableStringTest{"test*"}, "30071305746573742a"}, + {genericStringTest{"test"}, "3006130474657374"}, + {genericStringTest{"test*"}, "30070c05746573742a"}, + {genericStringTest{"test&"}, "30070c057465737426"}, + {rawContentsStruct{nil, 64}, "3003020140"}, + {rawContentsStruct{[]byte{0x30, 3, 1, 2, 3}, 64}, "3003010203"}, + {RawValue{Tag: 1, Class: 2, IsCompound: false, Bytes: []byte{1, 2, 3}}, "8103010203"}, + {testSET([]int{10}), "310302010a"}, + {omitEmptyTest{[]string{}}, "3000"}, + {omitEmptyTest{[]string{"1"}}, "30053003130131"}, + {"Σ", "0c02cea3"}, + {defaultTest{0}, "3003020100"}, + {defaultTest{1}, "3000"}, + {defaultTest{2}, "3003020102"}, + {applicationTest{1, 2}, "30084001016103020102"}, + {privateTest{1, 2, 3, 4}, "3011c00101e103020102df1f0103df81000104"}, + {numericStringTest{"1 9"}, "30051203312039"}, +} + +func TestMarshal(t *testing.T) { + for i, test := range marshalTests { + data, err := Marshal(test.in) + if err != nil { + t.Errorf("#%d failed: %s", i, err) + } + out, _ := hex.DecodeString(test.out) + if !bytes.Equal(out, data) { + t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out) + + } + } +} + +type marshalWithParamsTest struct { + in any + params string + out string // hex encoded +} + +var marshalWithParamsTests = []marshalWithParamsTest{ + {intStruct{10}, "set", "310302010a"}, + {intStruct{10}, "application", "600302010a"}, + {intStruct{10}, "private", "e00302010a"}, +} + +func TestMarshalWithParams(t *testing.T) { + for i, test := range marshalWithParamsTests { + data, err := MarshalWithParams(test.in, test.params) + if err != nil { + t.Errorf("#%d failed: %s", i, err) + } + out, _ := hex.DecodeString(test.out) + if !bytes.Equal(out, data) { + t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out) + + } + } +} + +type marshalErrTest struct { + in any + err string +} + +var marshalErrTests = []marshalErrTest{ + {bigIntStruct{nil}, "empty integer"}, + {numericStringTest{"a"}, "invalid character"}, + {ia5StringTest{"\xb0"}, "invalid character"}, + {printableStringTest{"!"}, "invalid character"}, +} + +func TestMarshalError(t *testing.T) { + for i, test := range marshalErrTests { + _, err := Marshal(test.in) + if err == nil { + t.Errorf("#%d should fail, but success", i) + continue + } + + if !strings.Contains(err.Error(), test.err) { + t.Errorf("#%d got: %v want %v", i, err, test.err) + } + } +} + +func TestInvalidUTF8(t *testing.T) { + _, err := Marshal(string([]byte{0xff, 0xff})) + if err == nil { + t.Errorf("invalid UTF8 string was accepted") + } +} + +func TestMarshalOID(t *testing.T) { + var marshalTestsOID = []marshalTest{ + {[]byte("\x06\x01\x30"), "0403060130"}, // bytes format returns a byte sequence \x04 + // {ObjectIdentifier([]int{0}), "060100"}, // returns an error as OID 0.0 has the same encoding + {[]byte("\x06\x010"), "0403060130"}, // same as above "\x06\x010" = "\x06\x01" + "0" + {ObjectIdentifier([]int{2, 999, 3}), "0603883703"}, // Example of ITU-T X.690 + {ObjectIdentifier([]int{0, 0}), "060100"}, // zero OID + } + for i, test := range marshalTestsOID { + data, err := Marshal(test.in) + if err != nil { + t.Errorf("#%d failed: %s", i, err) + } + out, _ := hex.DecodeString(test.out) + if !bytes.Equal(out, data) { + t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out) + } + } +} + +func TestIssue11130(t *testing.T) { + data := []byte("\x06\x010") // == \x06\x01\x30 == OID = 0 (the figure) + var v any + // v has Zero value here and Elem() would panic + _, err := Unmarshal(data, &v) + if err != nil { + t.Errorf("%v", err) + return + } + if reflect.TypeOf(v).String() != reflect.TypeOf(ObjectIdentifier{}).String() { + t.Errorf("marshal OID returned an invalid type") + return + } + + data1, err := Marshal(v) + if err != nil { + t.Errorf("%v", err) + return + } + + if !bytes.Equal(data, data1) { + t.Errorf("got: %q, want: %q \n", data1, data) + return + } + + var v1 any + _, err = Unmarshal(data1, &v1) + if err != nil { + t.Errorf("%v", err) + return + } + if !reflect.DeepEqual(v, v1) { + t.Errorf("got: %#v data=%q, want : %#v data=%q\n ", v1, data1, v, data) + } +} + +func BenchmarkMarshal(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + for _, test := range marshalTests { + Marshal(test.in) + } + } +} + +func TestSetEncoder(t *testing.T) { + testStruct := struct { + Strings []string `asn1:"set"` + }{ + Strings: []string{"a", "aa", "b", "bb", "c", "cc"}, + } + + // Expected ordering of the SET should be: + // a, b, c, aa, bb, cc + + output, err := Marshal(testStruct) + if err != nil { + t.Errorf("%v", err) + } + + expectedOrder := []string{"a", "b", "c", "aa", "bb", "cc"} + var resultStruct struct { + Strings []string `asn1:"set"` + } + rest, err := Unmarshal(output, &resultStruct) + if err != nil { + t.Errorf("%v", err) + } + if len(rest) != 0 { + t.Error("Unmarshal returned extra garbage") + } + if !reflect.DeepEqual(expectedOrder, resultStruct.Strings) { + t.Errorf("Unexpected SET content. got: %s, want: %s", resultStruct.Strings, expectedOrder) + } +} + +func TestSetEncoderSETSliceSuffix(t *testing.T) { + type testSetSET []string + testSet := testSetSET{"a", "aa", "b", "bb", "c", "cc"} + + // Expected ordering of the SET should be: + // a, b, c, aa, bb, cc + + output, err := Marshal(testSet) + if err != nil { + t.Errorf("%v", err) + } + + expectedOrder := testSetSET{"a", "b", "c", "aa", "bb", "cc"} + var resultSet testSetSET + rest, err := Unmarshal(output, &resultSet) + if err != nil { + t.Errorf("%v", err) + } + if len(rest) != 0 { + t.Error("Unmarshal returned extra garbage") + } + if !reflect.DeepEqual(expectedOrder, resultSet) { + t.Errorf("Unexpected SET content. got: %s, want: %s", resultSet, expectedOrder) + } +} + +func BenchmarkUnmarshal(b *testing.B) { + b.ReportAllocs() + + type testCase struct { + in []byte + out any + } + var testData []testCase + for _, test := range unmarshalTestData { + pv := reflect.New(reflect.TypeOf(test.out).Elem()) + inCopy := make([]byte, len(test.in)) + copy(inCopy, test.in) + outCopy := pv.Interface() + + testData = append(testData, testCase{ + in: inCopy, + out: outCopy, + }) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, testCase := range testData { + _, _ = Unmarshal(testCase.in, testCase.out) + } + } +} diff --git a/src/encoding/base32/base32.go b/src/encoding/base32/base32.go new file mode 100644 index 0000000..4a61199 --- /dev/null +++ b/src/encoding/base32/base32.go @@ -0,0 +1,583 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package base32 implements base32 encoding as specified by RFC 4648. +package base32 + +import ( + "io" + "slices" + "strconv" +) + +/* + * Encodings + */ + +// An Encoding is a radix 32 encoding/decoding scheme, defined by a +// 32-character alphabet. The most common is the "base32" encoding +// introduced for SASL GSSAPI and standardized in RFC 4648. +// The alternate "base32hex" encoding is used in DNSSEC. +type Encoding struct { + encode [32]byte // mapping of symbol index to symbol byte value + decodeMap [256]uint8 // mapping of symbol byte value to symbol index + padChar rune +} + +const ( + StdPadding rune = '=' // Standard padding character + NoPadding rune = -1 // No padding +) + +const ( + decodeMapInitialize = "" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + invalidIndex = '\xff' +) + +// NewEncoding returns a new padded Encoding defined by the given alphabet, +// which must be a 32-byte string that contains unique byte values and +// does not contain the padding character or CR / LF ('\r', '\n'). +// The alphabet is treated as a sequence of byte values +// without any special treatment for multi-byte UTF-8. +// The resulting Encoding uses the default padding character ('='), +// which may be changed or disabled via [Encoding.WithPadding]. +func NewEncoding(encoder string) *Encoding { + if len(encoder) != 32 { + panic("encoding alphabet is not 32-bytes long") + } + + e := new(Encoding) + e.padChar = StdPadding + copy(e.encode[:], encoder) + copy(e.decodeMap[:], decodeMapInitialize) + + for i := 0; i < len(encoder); i++ { + // Note: While we document that the alphabet cannot contain + // the padding character, we do not enforce it since we do not know + // if the caller intends to switch the padding from StdPadding later. + switch { + case encoder[i] == '\n' || encoder[i] == '\r': + panic("encoding alphabet contains newline character") + case e.decodeMap[encoder[i]] != invalidIndex: + panic("encoding alphabet includes duplicate symbols") + } + e.decodeMap[encoder[i]] = uint8(i) + } + return e +} + +// StdEncoding is the standard base32 encoding, as defined in RFC 4648. +var StdEncoding = NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567") + +// HexEncoding is the “Extended Hex Alphabet” defined in RFC 4648. +// It is typically used in DNS. +var HexEncoding = NewEncoding("0123456789ABCDEFGHIJKLMNOPQRSTUV") + +// WithPadding creates a new encoding identical to enc except +// with a specified padding character, or NoPadding to disable padding. +// The padding character must not be '\r' or '\n', +// must not be contained in the encoding's alphabet, +// must not be negative, and must be a rune equal or below '\xff'. +// Padding characters above '\x7f' are encoded as their exact byte value +// rather than using the UTF-8 representation of the codepoint. +func (enc Encoding) WithPadding(padding rune) *Encoding { + switch { + case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff: + panic("invalid padding") + case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex: + panic("padding contained in alphabet") + } + enc.padChar = padding + return &enc +} + +/* + * Encoder + */ + +// Encode encodes src using the encoding enc, +// writing [Encoding.EncodedLen](len(src)) bytes to dst. +// +// The encoding pads the output to a multiple of 8 bytes, +// so Encode is not appropriate for use on individual blocks +// of a large data stream. Use [NewEncoder] instead. +func (enc *Encoding) Encode(dst, src []byte) { + if len(src) == 0 { + return + } + // enc is a pointer receiver, so the use of enc.encode within the hot + // loop below means a nil check at every operation. Lift that nil check + // outside of the loop to speed up the encoder. + _ = enc.encode + + di, si := 0, 0 + n := (len(src) / 5) * 5 + for si < n { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]) + lo := hi<<8 | uint32(src[si+4]) + + dst[di+0] = enc.encode[(hi>>27)&0x1F] + dst[di+1] = enc.encode[(hi>>22)&0x1F] + dst[di+2] = enc.encode[(hi>>17)&0x1F] + dst[di+3] = enc.encode[(hi>>12)&0x1F] + dst[di+4] = enc.encode[(hi>>7)&0x1F] + dst[di+5] = enc.encode[(hi>>2)&0x1F] + dst[di+6] = enc.encode[(lo>>5)&0x1F] + dst[di+7] = enc.encode[(lo)&0x1F] + + si += 5 + di += 8 + } + + // Add the remaining small block + remain := len(src) - si + if remain == 0 { + return + } + + // Encode the remaining bytes in reverse order. + val := uint32(0) + switch remain { + case 4: + val |= uint32(src[si+3]) + dst[di+6] = enc.encode[val<<3&0x1F] + dst[di+5] = enc.encode[val>>2&0x1F] + fallthrough + case 3: + val |= uint32(src[si+2]) << 8 + dst[di+4] = enc.encode[val>>7&0x1F] + fallthrough + case 2: + val |= uint32(src[si+1]) << 16 + dst[di+3] = enc.encode[val>>12&0x1F] + dst[di+2] = enc.encode[val>>17&0x1F] + fallthrough + case 1: + val |= uint32(src[si+0]) << 24 + dst[di+1] = enc.encode[val>>22&0x1F] + dst[di+0] = enc.encode[val>>27&0x1F] + } + + // Pad the final quantum + if enc.padChar != NoPadding { + nPad := (remain * 8 / 5) + 1 + for i := nPad; i < 8; i++ { + dst[di+i] = byte(enc.padChar) + } + } +} + +// AppendEncode appends the base32 encoded src to dst +// and returns the extended buffer. +func (enc *Encoding) AppendEncode(dst, src []byte) []byte { + n := enc.EncodedLen(len(src)) + dst = slices.Grow(dst, n) + enc.Encode(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +// EncodeToString returns the base32 encoding of src. +func (enc *Encoding) EncodeToString(src []byte) string { + buf := make([]byte, enc.EncodedLen(len(src))) + enc.Encode(buf, src) + return string(buf) +} + +type encoder struct { + err error + enc *Encoding + w io.Writer + buf [5]byte // buffered data waiting to be encoded + nbuf int // number of bytes in buf + out [1024]byte // output buffer +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // Leading fringe. + if e.nbuf > 0 { + var i int + for i = 0; i < len(p) && e.nbuf < 5; i++ { + e.buf[e.nbuf] = p[i] + e.nbuf++ + } + n += i + p = p[i:] + if e.nbuf < 5 { + return + } + e.enc.Encode(e.out[0:], e.buf[0:]) + if _, e.err = e.w.Write(e.out[0:8]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + + // Large interior chunks. + for len(p) >= 5 { + nn := len(e.out) / 8 * 5 + if nn > len(p) { + nn = len(p) + nn -= nn % 5 + } + e.enc.Encode(e.out[0:], p[0:nn]) + if _, e.err = e.w.Write(e.out[0 : nn/5*8]); e.err != nil { + return n, e.err + } + n += nn + p = p[nn:] + } + + // Trailing fringe. + copy(e.buf[:], p) + e.nbuf = len(p) + n += len(p) + return +} + +// Close flushes any pending output from the encoder. +// It is an error to call Write after calling Close. +func (e *encoder) Close() error { + // If there's anything left in the buffer, flush it out + if e.err == nil && e.nbuf > 0 { + e.enc.Encode(e.out[0:], e.buf[0:e.nbuf]) + encodedLen := e.enc.EncodedLen(e.nbuf) + e.nbuf = 0 + _, e.err = e.w.Write(e.out[0:encodedLen]) + } + return e.err +} + +// NewEncoder returns a new base32 stream encoder. Data written to +// the returned writer will be encoded using enc and then written to w. +// Base32 encodings operate in 5-byte blocks; when finished +// writing, the caller must Close the returned encoder to flush any +// partially written blocks. +func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { + return &encoder{enc: enc, w: w} +} + +// EncodedLen returns the length in bytes of the base32 encoding +// of an input buffer of length n. +func (enc *Encoding) EncodedLen(n int) int { + if enc.padChar == NoPadding { + return n/5*8 + (n%5*8+4)/5 + } + return (n + 4) / 5 * 8 +} + +/* + * Decoder + */ + +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "illegal base32 data at input byte " + strconv.FormatInt(int64(e), 10) +} + +// decode is like Decode but returns an additional 'end' value, which +// indicates if end-of-message padding was encountered and thus any +// additional data is an error. This method assumes that src has been +// stripped of all supported whitespace ('\r' and '\n'). +func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { + // Lift the nil check outside of the loop. + _ = enc.decodeMap + + dsti := 0 + olen := len(src) + + for len(src) > 0 && !end { + // Decode quantum using the base32 alphabet + var dbuf [8]byte + dlen := 8 + + for j := 0; j < 8; { + + if len(src) == 0 { + if enc.padChar != NoPadding { + // We have reached the end and are missing padding + return n, false, CorruptInputError(olen - len(src) - j) + } + // We have reached the end and are not expecting any padding + dlen, end = j, true + break + } + in := src[0] + src = src[1:] + if in == byte(enc.padChar) && j >= 2 && len(src) < 8 { + // We've reached the end and there's padding + if len(src)+j < 8-1 { + // not enough padding + return n, false, CorruptInputError(olen) + } + for k := 0; k < 8-1-j; k++ { + if len(src) > k && src[k] != byte(enc.padChar) { + // incorrect padding + return n, false, CorruptInputError(olen - len(src) + k - 1) + } + } + dlen, end = j, true + // 7, 5 and 2 are not valid padding lengths, and so 1, 3 and 6 are not + // valid dlen values. See RFC 4648 Section 6 "Base 32 Encoding" listing + // the five valid padding lengths, and Section 9 "Illustrations and + // Examples" for an illustration for how the 1st, 3rd and 6th base32 + // src bytes do not yield enough information to decode a dst byte. + if dlen == 1 || dlen == 3 || dlen == 6 { + return n, false, CorruptInputError(olen - len(src) - 1) + } + break + } + dbuf[j] = enc.decodeMap[in] + if dbuf[j] == 0xFF { + return n, false, CorruptInputError(olen - len(src) - 1) + } + j++ + } + + // Pack 8x 5-bit source blocks into 5 byte destination + // quantum + switch dlen { + case 8: + dst[dsti+4] = dbuf[6]<<5 | dbuf[7] + n++ + fallthrough + case 7: + dst[dsti+3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3 + n++ + fallthrough + case 5: + dst[dsti+2] = dbuf[3]<<4 | dbuf[4]>>1 + n++ + fallthrough + case 4: + dst[dsti+1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4 + n++ + fallthrough + case 2: + dst[dsti+0] = dbuf[0]<<3 | dbuf[1]>>2 + n++ + } + dsti += 5 + } + return n, end, nil +} + +// Decode decodes src using the encoding enc. It writes at most +// [Encoding.DecodedLen](len(src)) bytes to dst and returns the number of bytes +// written. If src contains invalid base32 data, it will return the +// number of bytes successfully written and [CorruptInputError]. +// Newline characters (\r and \n) are ignored. +func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + buf := make([]byte, len(src)) + l := stripNewlines(buf, src) + n, _, err = enc.decode(dst, buf[:l]) + return +} + +// AppendDecode appends the base32 decoded src to dst +// and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) { + // Compute the output size without padding to avoid over allocating. + n := len(src) + for n > 0 && rune(src[n-1]) == enc.padChar { + n-- + } + n = decodedLen(n, NoPadding) + + dst = slices.Grow(dst, n) + n, err := enc.Decode(dst[len(dst):][:n], src) + return dst[:len(dst)+n], err +} + +// DecodeString returns the bytes represented by the base32 string s. +func (enc *Encoding) DecodeString(s string) ([]byte, error) { + buf := []byte(s) + l := stripNewlines(buf, buf) + n, _, err := enc.decode(buf, buf[:l]) + return buf[:n], err +} + +type decoder struct { + err error + enc *Encoding + r io.Reader + end bool // saw end of message + buf [1024]byte // leftover input + nbuf int + out []byte // leftover decoded output + outbuf [1024 / 8 * 5]byte +} + +func readEncodedData(r io.Reader, buf []byte, min int, expectsPadding bool) (n int, err error) { + for n < min && err == nil { + var nn int + nn, err = r.Read(buf[n:]) + n += nn + } + // data was read, less than min bytes could be read + if n < min && n > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + // no data was read, the buffer already contains some data + // when padding is disabled this is not an error, as the message can be of + // any length + if expectsPadding && min < 8 && n == 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Use leftover decoded output from last read. + if len(d.out) > 0 { + n = copy(p, d.out) + d.out = d.out[n:] + if len(d.out) == 0 { + return n, d.err + } + return n, nil + } + + if d.err != nil { + return 0, d.err + } + + // Read a chunk. + nn := len(p) / 5 * 8 + if nn < 8 { + nn = 8 + } + if nn > len(d.buf) { + nn = len(d.buf) + } + + // Minimum amount of bytes that needs to be read each cycle + var min int + var expectsPadding bool + if d.enc.padChar == NoPadding { + min = 1 + expectsPadding = false + } else { + min = 8 - d.nbuf + expectsPadding = true + } + + nn, d.err = readEncodedData(d.r, d.buf[d.nbuf:nn], min, expectsPadding) + d.nbuf += nn + if d.nbuf < min { + return 0, d.err + } + if nn > 0 && d.end { + return 0, CorruptInputError(0) + } + + // Decode chunk into p, or d.out and then p if p is too small. + var nr int + if d.enc.padChar == NoPadding { + nr = d.nbuf + } else { + nr = d.nbuf / 8 * 8 + } + nw := d.enc.DecodedLen(d.nbuf) + + if nw > len(p) { + nw, d.end, err = d.enc.decode(d.outbuf[0:], d.buf[0:nr]) + d.out = d.outbuf[0:nw] + n = copy(p, d.out) + d.out = d.out[n:] + } else { + n, d.end, err = d.enc.decode(p, d.buf[0:nr]) + } + d.nbuf -= nr + for i := 0; i < d.nbuf; i++ { + d.buf[i] = d.buf[i+nr] + } + + if err != nil && (d.err == nil || d.err == io.EOF) { + d.err = err + } + + if len(d.out) > 0 { + // We cannot return all the decoded bytes to the caller in this + // invocation of Read, so we return a nil error to ensure that Read + // will be called again. The error stored in d.err, if any, will be + // returned with the last set of decoded bytes. + return n, nil + } + + return n, d.err +} + +type newlineFilteringReader struct { + wrapped io.Reader +} + +// stripNewlines removes newline characters and returns the number +// of non-newline characters copied to dst. +func stripNewlines(dst, src []byte) int { + offset := 0 + for _, b := range src { + if b == '\r' || b == '\n' { + continue + } + dst[offset] = b + offset++ + } + return offset +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + s := p[0:n] + offset := stripNewlines(s, s) + if err != nil || offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} + +// NewDecoder constructs a new base32 stream decoder. +func NewDecoder(enc *Encoding, r io.Reader) io.Reader { + return &decoder{enc: enc, r: &newlineFilteringReader{r}} +} + +// DecodedLen returns the maximum length in bytes of the decoded data +// corresponding to n bytes of base32-encoded data. +func (enc *Encoding) DecodedLen(n int) int { + return decodedLen(n, enc.padChar) +} + +func decodedLen(n int, padChar rune) int { + if padChar == NoPadding { + return n/8*5 + n%8*5/8 + } + return n / 8 * 5 +} diff --git a/src/encoding/base32/base32_test.go b/src/encoding/base32/base32_test.go new file mode 100644 index 0000000..33638ad --- /dev/null +++ b/src/encoding/base32/base32_test.go @@ -0,0 +1,867 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package base32 + +import ( + "bytes" + "errors" + "io" + "math" + "strconv" + "strings" + "testing" +) + +type testpair struct { + decoded, encoded string +} + +var pairs = []testpair{ + // RFC 4648 examples + {"", ""}, + {"f", "MY======"}, + {"fo", "MZXQ===="}, + {"foo", "MZXW6==="}, + {"foob", "MZXW6YQ="}, + {"fooba", "MZXW6YTB"}, + {"foobar", "MZXW6YTBOI======"}, + + // Wikipedia examples, converted to base32 + {"sure.", "ON2XEZJO"}, + {"sure", "ON2XEZI="}, + {"sur", "ON2XE==="}, + {"su", "ON2Q===="}, + {"leasure.", "NRSWC43VOJSS4==="}, + {"easure.", "MVQXG5LSMUXA===="}, + {"asure.", "MFZXK4TFFY======"}, + {"sure.", "ON2XEZJO"}, +} + +var bigtest = testpair{ + "Twas brillig, and the slithy toves", + "KR3WC4ZAMJZGS3DMNFTSYIDBNZSCA5DIMUQHG3DJORUHSIDUN53GK4Y=", +} + +func testEqual(t *testing.T, msg string, args ...any) bool { + t.Helper() + if args[len(args)-2] != args[len(args)-1] { + t.Errorf(msg, args...) + return false + } + return true +} + +func TestEncode(t *testing.T) { + for _, p := range pairs { + got := StdEncoding.EncodeToString([]byte(p.decoded)) + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, p.encoded) + dst := StdEncoding.AppendEncode([]byte("lead"), []byte(p.decoded)) + testEqual(t, `AppendEncode("lead", %q) = %q, want %q`, p.decoded, string(dst), "lead"+p.encoded) + } +} + +func TestEncoder(t *testing.T) { + for _, p := range pairs { + bb := &strings.Builder{} + encoder := NewEncoder(StdEncoding, bb) + encoder.Write([]byte(p.decoded)) + encoder.Close() + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, bb.String(), p.encoded) + } +} + +func TestEncoderBuffering(t *testing.T) { + input := []byte(bigtest.decoded) + for bs := 1; bs <= 12; bs++ { + bb := &strings.Builder{} + encoder := NewEncoder(StdEncoding, bb) + for pos := 0; pos < len(input); pos += bs { + end := pos + bs + if end > len(input) { + end = len(input) + } + n, err := encoder.Write(input[pos:end]) + testEqual(t, "Write(%q) gave error %v, want %v", input[pos:end], err, error(nil)) + testEqual(t, "Write(%q) gave length %v, want %v", input[pos:end], n, end-pos) + } + err := encoder.Close() + testEqual(t, "Close gave error %v, want %v", err, error(nil)) + testEqual(t, "Encoding/%d of %q = %q, want %q", bs, bigtest.decoded, bb.String(), bigtest.encoded) + } +} + +func TestDecode(t *testing.T) { + for _, p := range pairs { + dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))) + count, end, err := StdEncoding.decode(dbuf, []byte(p.encoded)) + testEqual(t, "Decode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "Decode(%q) = length %v, want %v", p.encoded, count, len(p.decoded)) + if len(p.encoded) > 0 { + testEqual(t, "Decode(%q) = end %v, want %v", p.encoded, end, (p.encoded[len(p.encoded)-1] == '=')) + } + testEqual(t, "Decode(%q) = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + + dbuf, err = StdEncoding.DecodeString(p.encoded) + testEqual(t, "DecodeString(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "DecodeString(%q) = %q, want %q", p.encoded, string(dbuf), p.decoded) + + dst, err := StdEncoding.AppendDecode([]byte("lead"), []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("lead", %q) = %q, want %q`, p.encoded, string(dst), "lead"+p.decoded) + + dst2, err := StdEncoding.AppendDecode(dst[:0:len(p.decoded)], []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("", %q) = %q, want %q`, p.encoded, string(dst2), p.decoded) + if len(dst) > 0 && len(dst2) > 0 && &dst[0] != &dst2[0] { + t.Errorf("unexpected capacity growth: got %d, want %d", cap(dst2), cap(dst)) + } + } +} + +func TestDecoder(t *testing.T) { + for _, p := range pairs { + decoder := NewDecoder(StdEncoding, strings.NewReader(p.encoded)) + dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))) + count, err := decoder.Read(dbuf) + if err != nil && err != io.EOF { + t.Fatal("Read failed", err) + } + testEqual(t, "Read from %q = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decoding of %q = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + if err != io.EOF { + _, err = decoder.Read(dbuf) + } + testEqual(t, "Read from %q = %v, want %v", p.encoded, err, io.EOF) + } +} + +type badReader struct { + data []byte + errs []error + called int + limit int +} + +// Populates p with data, returns a count of the bytes written and an +// error. The error returned is taken from badReader.errs, with each +// invocation of Read returning the next error in this slice, or io.EOF, +// if all errors from the slice have already been returned. The +// number of bytes returned is determined by the size of the input buffer +// the test passes to decoder.Read and will be a multiple of 8, unless +// badReader.limit is non zero. +func (b *badReader) Read(p []byte) (int, error) { + lim := len(p) + if b.limit != 0 && b.limit < lim { + lim = b.limit + } + if len(b.data) < lim { + lim = len(b.data) + } + for i := range p[:lim] { + p[i] = b.data[i] + } + b.data = b.data[lim:] + err := io.EOF + if b.called < len(b.errs) { + err = b.errs[b.called] + } + b.called++ + return lim, err +} + +// TestIssue20044 tests that decoder.Read behaves correctly when the caller +// supplied reader returns an error. +func TestIssue20044(t *testing.T) { + badErr := errors.New("bad reader error") + testCases := []struct { + r badReader + res string + err error + dbuflen int + }{ + // Check valid input data accompanied by an error is processed and the error is propagated. + {r: badReader{data: []byte("MY======"), errs: []error{badErr}}, + res: "f", err: badErr}, + // Check a read error accompanied by input data consisting of newlines only is propagated. + {r: badReader{data: []byte("\n\n\n\n\n\n\n\n"), errs: []error{badErr, nil}}, + res: "", err: badErr}, + // Reader will be called twice. The first time it will return 8 newline characters. The + // second time valid base32 encoded data and an error. The data should be decoded + // correctly and the error should be propagated. + {r: badReader{data: []byte("\n\n\n\n\n\n\n\nMY======"), errs: []error{nil, badErr}}, + res: "f", err: badErr, dbuflen: 8}, + // Reader returns invalid input data (too short) and an error. Verify the reader + // error is returned. + {r: badReader{data: []byte("MY====="), errs: []error{badErr}}, + res: "", err: badErr}, + // Reader returns invalid input data (too short) but no error. Verify io.ErrUnexpectedEOF + // is returned. + {r: badReader{data: []byte("MY====="), errs: []error{nil}}, + res: "", err: io.ErrUnexpectedEOF}, + // Reader returns invalid input data and an error. Verify the reader and not the + // decoder error is returned. + {r: badReader{data: []byte("Ma======"), errs: []error{badErr}}, + res: "", err: badErr}, + // Reader returns valid data and io.EOF. Check data is decoded and io.EOF is propagated. + {r: badReader{data: []byte("MZXW6YTB"), errs: []error{io.EOF}}, + res: "fooba", err: io.EOF}, + // Check errors are properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but an error on the second call. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{nil, badErr}}, + res: "leasure.", err: badErr, dbuflen: 1}, + // Check io.EOF is properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but io.EOF on the second call. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{nil, io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 1}, + // The following two test cases check that errors are propagated correctly when more than + // 8 bytes are read at a time. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 11}, + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{badErr}}, + res: "leasure.", err: badErr, dbuflen: 11}, + // Check that errors are correctly propagated when the reader returns valid bytes in + // groups that are not divisible by 8. The first read will return 11 bytes and no + // error. The second will return 7 and an error. The data should be decoded correctly + // and the error should be propagated. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{nil, badErr}, limit: 11}, + res: "leasure.", err: badErr}, + } + + for _, tc := range testCases { + input := tc.r.data + decoder := NewDecoder(StdEncoding, &tc.r) + var dbuflen int + if tc.dbuflen > 0 { + dbuflen = tc.dbuflen + } else { + dbuflen = StdEncoding.DecodedLen(len(input)) + } + dbuf := make([]byte, dbuflen) + var err error + var res []byte + for err == nil { + var n int + n, err = decoder.Read(dbuf) + if n > 0 { + res = append(res, dbuf[:n]...) + } + } + + testEqual(t, "Decoding of %q = %q, want %q", string(input), string(res), tc.res) + testEqual(t, "Decoding of %q err = %v, expected %v", string(input), err, tc.err) + } +} + +// TestDecoderError verifies decode errors are propagated when there are no read +// errors. +func TestDecoderError(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "MZXW6YTb" + dbuf := make([]byte, StdEncoding.DecodedLen(len(input))) + br := badReader{data: []byte(input), errs: []error{readErr}} + decoder := NewDecoder(StdEncoding, &br) + n, err := decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + if _, ok := err.(CorruptInputError); !ok { + t.Errorf("Corrupt input error expected. Found %T", err) + } + } +} + +// TestReaderEOF ensures decoder.Read behaves correctly when input data is +// exhausted. +func TestReaderEOF(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "MZXW6YTB" + br := badReader{data: []byte(input), errs: []error{nil, readErr}} + decoder := NewDecoder(StdEncoding, &br) + dbuf := make([]byte, StdEncoding.DecodedLen(len(input))) + n, err := decoder.Read(dbuf) + testEqual(t, "Decoding of %q err = %v, expected %v", input, err, error(nil)) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + } +} + +func TestDecoderBuffering(t *testing.T) { + for bs := 1; bs <= 12; bs++ { + decoder := NewDecoder(StdEncoding, strings.NewReader(bigtest.encoded)) + buf := make([]byte, len(bigtest.decoded)+12) + var total int + var n int + var err error + for total = 0; total < len(bigtest.decoded) && err == nil; { + n, err = decoder.Read(buf[total : total+bs]) + total += n + } + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", bigtest.encoded, total, n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q", bs, bigtest.encoded, string(buf[0:total]), bigtest.decoded) + } +} + +func TestDecodeCorrupt(t *testing.T) { + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, + {"!!!!", 0}, + {"x===", 0}, + {"AA=A====", 2}, + {"AAA=AAAA", 3}, + {"MMMMMMMMM", 8}, + {"MMMMMM", 0}, + {"A=", 1}, + {"AA=", 3}, + {"AA==", 4}, + {"AA===", 5}, + {"AAAA=", 5}, + {"AAAA==", 6}, + {"AAAAA=", 6}, + {"AAAAA==", 7}, + {"A=======", 1}, + {"AA======", -1}, + {"AAA=====", 3}, + {"AAAA====", -1}, + {"AAAAA===", -1}, + {"AAAAAA==", 6}, + {"AAAAAAA=", -1}, + {"AAAAAAAA", -1}, + } + for _, tc := range testCases { + dbuf := make([]byte, StdEncoding.DecodedLen(len(tc.input))) + _, err := StdEncoding.Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected corruption in", tc.input) + } + continue + } + switch err := err.(type) { + case CorruptInputError: + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) + default: + t.Error("Decoder failed to detect corruption in", tc) + } + } +} + +func TestBig(t *testing.T) { + n := 3*1000 + 1 + raw := make([]byte, n) + const alpha = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + for i := 0; i < n; i++ { + raw[i] = alpha[i%len(alpha)] + } + encoded := new(bytes.Buffer) + w := NewEncoder(StdEncoding, encoded) + nn, err := w.Write(raw) + if nn != n || err != nil { + t.Fatalf("Encoder.Write(raw) = %d, %v want %d, nil", nn, err, n) + } + err = w.Close() + if err != nil { + t.Fatalf("Encoder.Close() = %v want nil", err) + } + decoded, err := io.ReadAll(NewDecoder(StdEncoding, encoded)) + if err != nil { + t.Fatalf("io.ReadAll(NewDecoder(...)): %v", err) + } + + if !bytes.Equal(raw, decoded) { + var i int + for i = 0; i < len(decoded) && i < len(raw); i++ { + if decoded[i] != raw[i] { + break + } + } + t.Errorf("Decode(Encode(%d-byte string)) failed at offset %d", n, i) + } +} + +func testStringEncoding(t *testing.T, expected string, examples []string) { + for _, e := range examples { + buf, err := StdEncoding.DecodeString(e) + if err != nil { + t.Errorf("Decode(%q) failed: %v", e, err) + continue + } + if s := string(buf); s != expected { + t.Errorf("Decode(%q) = %q, want %q", e, s, expected) + } + } +} + +func TestNewLineCharacters(t *testing.T) { + // Each of these should decode to the string "sure", without errors. + examples := []string{ + "ON2XEZI=", + "ON2XEZI=\r", + "ON2XEZI=\n", + "ON2XEZI=\r\n", + "ON2XEZ\r\nI=", + "ON2X\rEZ\nI=", + "ON2X\nEZ\rI=", + "ON2XEZ\nI=", + "ON2XEZI\n=", + } + testStringEncoding(t, "sure", examples) + + // Each of these should decode to the string "foobar", without errors. + examples = []string{ + "MZXW6YTBOI======", + "MZXW6YTBOI=\r\n=====", + } + testStringEncoding(t, "foobar", examples) +} + +func TestDecoderIssue4779(t *testing.T) { + encoded := `JRXXEZLNEBUXA43VNUQGI33MN5ZCA43JOQQGC3LFOQWCAY3PNZZWKY3UMV2HK4 +RAMFSGS4DJONUWG2LOM4QGK3DJOQWCA43FMQQGI3YKMVUXK43NN5SCA5DFNVYG64RANFXGG2LENFSH +K3TUEB2XIIDMMFRG64TFEBSXIIDEN5WG64TFEBWWCZ3OMEQGC3DJOF2WCLRAKV2CAZLONFWQUYLEEB +WWS3TJNUQHMZLONFQW2LBAOF2WS4ZANZXXG5DSOVSCAZLYMVZGG2LUMF2GS33OEB2WY3DBNVRW6IDM +MFRG64TJOMQG42LTNEQHK5AKMFWGS4LVNFYCAZLYEBSWCIDDN5WW233EN4QGG33OONSXC5LBOQXCAR +DVNFZSAYLVORSSA2LSOVZGKIDEN5WG64RANFXAU4TFOBZGK2DFNZSGK4TJOQQGS3RAOZXWY5LQORQX +IZJAOZSWY2LUEBSXG43FEBRWS3DMOVWSAZDPNRXXEZJAMV2SAZTVM5UWC5BANZ2WY3DBBJYGC4TJMF +2HK4ROEBCXQY3FOB2GK5LSEBZWS3TUEBXWGY3BMVRWC5BAMN2XA2LEMF2GC5BANZXW4IDQOJXWSZDF +NZ2CYIDTOVXHIIDJNYFGG5LMOBQSA4LVNEQG6ZTGNFRWSYJAMRSXGZLSOVXHIIDNN5WGY2LUEBQW42 +LNEBUWIIDFON2CA3DBMJXXE5LNFY== +====` + encodedShort := strings.ReplaceAll(encoded, "\n", "") + + dec := NewDecoder(StdEncoding, strings.NewReader(encoded)) + res1, err := io.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + dec = NewDecoder(StdEncoding, strings.NewReader(encodedShort)) + var res2 []byte + res2, err = io.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + if !bytes.Equal(res1, res2) { + t.Error("Decoded results not equal") + } +} + +func BenchmarkEncode(b *testing.B) { + data := make([]byte, 8192) + buf := make([]byte, StdEncoding.EncodedLen(len(data))) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.Encode(buf, data) + } +} + +func BenchmarkEncodeToString(b *testing.B) { + data := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.EncodeToString(data) + } +} + +func BenchmarkDecode(b *testing.B) { + data := make([]byte, StdEncoding.EncodedLen(8192)) + StdEncoding.Encode(data, make([]byte, 8192)) + buf := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.Decode(buf, data) + } +} +func BenchmarkDecodeString(b *testing.B) { + data := StdEncoding.EncodeToString(make([]byte, 8192)) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.DecodeString(data) + } +} + +func TestWithCustomPadding(t *testing.T) { + for _, testcase := range pairs { + defaultPadding := StdEncoding.EncodeToString([]byte(testcase.decoded)) + customPadding := StdEncoding.WithPadding('@').EncodeToString([]byte(testcase.decoded)) + expected := strings.ReplaceAll(defaultPadding, "=", "@") + + if expected != customPadding { + t.Errorf("Expected custom %s, got %s", expected, customPadding) + } + if testcase.encoded != defaultPadding { + t.Errorf("Expected %s, got %s", testcase.encoded, defaultPadding) + } + } +} + +func TestWithoutPadding(t *testing.T) { + for _, testcase := range pairs { + defaultPadding := StdEncoding.EncodeToString([]byte(testcase.decoded)) + customPadding := StdEncoding.WithPadding(NoPadding).EncodeToString([]byte(testcase.decoded)) + expected := strings.TrimRight(defaultPadding, "=") + + if expected != customPadding { + t.Errorf("Expected custom %s, got %s", expected, customPadding) + } + if testcase.encoded != defaultPadding { + t.Errorf("Expected %s, got %s", testcase.encoded, defaultPadding) + } + } +} + +func TestDecodeWithPadding(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding('-'), + StdEncoding.WithPadding(NoPadding), + } + + for i, enc := range encodings { + for _, pair := range pairs { + + input := pair.decoded + encoded := enc.EncodeToString([]byte(input)) + + decoded, err := enc.DecodeString(encoded) + if err != nil { + t.Errorf("DecodeString Error for encoding %d (%q): %v", i, input, err) + } + + if input != string(decoded) { + t.Errorf("Unexpected result for encoding %d: got %q; want %q", i, decoded, input) + } + } + } +} + +func TestDecodeWithWrongPadding(t *testing.T) { + encoded := StdEncoding.EncodeToString([]byte("foobar")) + + _, err := StdEncoding.WithPadding('-').DecodeString(encoded) + if err == nil { + t.Error("expected error") + } + + _, err = StdEncoding.WithPadding(NoPadding).DecodeString(encoded) + if err == nil { + t.Error("expected error") + } +} + +func TestBufferedDecodingSameError(t *testing.T) { + testcases := []struct { + prefix string + chunkCombinations [][]string + expected error + }{ + // NBSWY3DPO5XXE3DE == helloworld + // Test with "ZZ" as extra input + {"helloworld", [][]string{ + {"NBSW", "Y3DP", "O5XX", "E3DE", "ZZ"}, + {"NBSWY3DPO5XXE3DE", "ZZ"}, + {"NBSWY3DPO5XXE3DEZZ"}, + {"NBS", "WY3", "DPO", "5XX", "E3D", "EZZ"}, + {"NBSWY3DPO5XXE3", "DEZZ"}, + }, io.ErrUnexpectedEOF}, + + // Test with "ZZY" as extra input + {"helloworld", [][]string{ + {"NBSW", "Y3DP", "O5XX", "E3DE", "ZZY"}, + {"NBSWY3DPO5XXE3DE", "ZZY"}, + {"NBSWY3DPO5XXE3DEZZY"}, + {"NBS", "WY3", "DPO", "5XX", "E3D", "EZZY"}, + {"NBSWY3DPO5XXE3", "DEZZY"}, + }, io.ErrUnexpectedEOF}, + + // Normal case, this is valid input + {"helloworld", [][]string{ + {"NBSW", "Y3DP", "O5XX", "E3DE"}, + {"NBSWY3DPO5XXE3DE"}, + {"NBS", "WY3", "DPO", "5XX", "E3D", "E"}, + {"NBSWY3DPO5XXE3", "DE"}, + }, nil}, + + // MZXW6YTB = fooba + {"fooba", [][]string{ + {"MZXW6YTBZZ"}, + {"MZXW6YTBZ", "Z"}, + {"MZXW6YTB", "ZZ"}, + {"MZXW6YT", "BZZ"}, + {"MZXW6Y", "TBZZ"}, + {"MZXW6Y", "TB", "ZZ"}, + {"MZXW6", "YTBZZ"}, + {"MZXW6", "YTB", "ZZ"}, + {"MZXW6", "YT", "BZZ"}, + }, io.ErrUnexpectedEOF}, + + // Normal case, this is valid input + {"fooba", [][]string{ + {"MZXW6YTB"}, + {"MZXW6YT", "B"}, + {"MZXW6Y", "TB"}, + {"MZXW6", "YTB"}, + {"MZXW6", "YT", "B"}, + {"MZXW", "6YTB"}, + {"MZXW", "6Y", "TB"}, + }, nil}, + } + + for _, testcase := range testcases { + for _, chunks := range testcase.chunkCombinations { + pr, pw := io.Pipe() + + // Write the encoded chunks into the pipe + go func() { + for _, chunk := range chunks { + pw.Write([]byte(chunk)) + } + pw.Close() + }() + + decoder := NewDecoder(StdEncoding, pr) + _, err := io.ReadAll(decoder) + + if err != testcase.expected { + t.Errorf("Expected %v, got %v; case %s %+v", testcase.expected, err, testcase.prefix, chunks) + } + } + } +} + +func TestBufferedDecodingPadding(t *testing.T) { + testcases := []struct { + chunks []string + expectedError string + }{ + {[]string{ + "I4======", + "==", + }, "unexpected EOF"}, + + {[]string{ + "I4======N4======", + }, "illegal base32 data at input byte 2"}, + + {[]string{ + "I4======", + "N4======", + }, "illegal base32 data at input byte 0"}, + + {[]string{ + "I4======", + "========", + }, "illegal base32 data at input byte 0"}, + + {[]string{ + "I4I4I4I4", + "I4======", + "I4======", + }, "illegal base32 data at input byte 0"}, + } + + for _, testcase := range testcases { + testcase := testcase + pr, pw := io.Pipe() + go func() { + for _, chunk := range testcase.chunks { + _, _ = pw.Write([]byte(chunk)) + } + _ = pw.Close() + }() + + decoder := NewDecoder(StdEncoding, pr) + _, err := io.ReadAll(decoder) + + if err == nil && len(testcase.expectedError) != 0 { + t.Errorf("case %q: got nil error, want %v", testcase.chunks, testcase.expectedError) + } else if err.Error() != testcase.expectedError { + t.Errorf("case %q: got %v, want %v", testcase.chunks, err, testcase.expectedError) + } + } +} + +func TestEncodedLen(t *testing.T) { + var rawStdEncoding = StdEncoding.WithPadding(NoPadding) + type test struct { + enc *Encoding + n int + want int64 + } + tests := []test{ + {StdEncoding, 0, 0}, + {StdEncoding, 1, 8}, + {StdEncoding, 2, 8}, + {StdEncoding, 3, 8}, + {StdEncoding, 4, 8}, + {StdEncoding, 5, 8}, + {StdEncoding, 6, 16}, + {StdEncoding, 10, 16}, + {StdEncoding, 11, 24}, + {rawStdEncoding, 0, 0}, + {rawStdEncoding, 1, 2}, + {rawStdEncoding, 2, 4}, + {rawStdEncoding, 3, 5}, + {rawStdEncoding, 4, 7}, + {rawStdEncoding, 5, 8}, + {rawStdEncoding, 6, 10}, + {rawStdEncoding, 7, 12}, + {rawStdEncoding, 10, 16}, + {rawStdEncoding, 11, 18}, + } + // check overflow + switch strconv.IntSize { + case 32: + tests = append(tests, test{rawStdEncoding, (math.MaxInt-4)/8 + 1, 429496730}) + tests = append(tests, test{rawStdEncoding, math.MaxInt/8*5 + 4, math.MaxInt}) + case 64: + tests = append(tests, test{rawStdEncoding, (math.MaxInt-4)/8 + 1, 1844674407370955162}) + tests = append(tests, test{rawStdEncoding, math.MaxInt/8*5 + 4, math.MaxInt}) + } + for _, tt := range tests { + if got := tt.enc.EncodedLen(tt.n); int64(got) != tt.want { + t.Errorf("EncodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestDecodedLen(t *testing.T) { + var rawStdEncoding = StdEncoding.WithPadding(NoPadding) + type test struct { + enc *Encoding + n int + want int64 + } + tests := []test{ + {StdEncoding, 0, 0}, + {StdEncoding, 8, 5}, + {StdEncoding, 16, 10}, + {StdEncoding, 24, 15}, + {rawStdEncoding, 0, 0}, + {rawStdEncoding, 2, 1}, + {rawStdEncoding, 4, 2}, + {rawStdEncoding, 5, 3}, + {rawStdEncoding, 7, 4}, + {rawStdEncoding, 8, 5}, + {rawStdEncoding, 10, 6}, + {rawStdEncoding, 12, 7}, + {rawStdEncoding, 16, 10}, + {rawStdEncoding, 18, 11}, + } + // check overflow + switch strconv.IntSize { + case 32: + tests = append(tests, test{rawStdEncoding, math.MaxInt/5 + 1, 268435456}) + tests = append(tests, test{rawStdEncoding, math.MaxInt, 1342177279}) + case 64: + tests = append(tests, test{rawStdEncoding, math.MaxInt/5 + 1, 1152921504606846976}) + tests = append(tests, test{rawStdEncoding, math.MaxInt, 5764607523034234879}) + } + for _, tt := range tests { + if got := tt.enc.DecodedLen(tt.n); int64(got) != tt.want { + t.Errorf("DecodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestWithoutPaddingClose(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding(NoPadding), + } + + for _, encoding := range encodings { + for _, testpair := range pairs { + + var buf strings.Builder + encoder := NewEncoder(encoding, &buf) + encoder.Write([]byte(testpair.decoded)) + encoder.Close() + + expected := testpair.encoded + if encoding.padChar == NoPadding { + expected = strings.ReplaceAll(expected, "=", "") + } + + res := buf.String() + + if res != expected { + t.Errorf("Expected %s got %s; padChar=%d", expected, res, encoding.padChar) + } + } + } +} + +func TestDecodeReadAll(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding(NoPadding), + } + + for _, pair := range pairs { + for encIndex, encoding := range encodings { + encoded := pair.encoded + if encoding.padChar == NoPadding { + encoded = strings.ReplaceAll(encoded, "=", "") + } + + decReader, err := io.ReadAll(NewDecoder(encoding, strings.NewReader(encoded))) + if err != nil { + t.Errorf("NewDecoder error: %v", err) + } + + if pair.decoded != string(decReader) { + t.Errorf("Expected %s got %s; Encoding %d", pair.decoded, decReader, encIndex) + } + } + } +} + +func TestDecodeSmallBuffer(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding(NoPadding), + } + + for bufferSize := 1; bufferSize < 200; bufferSize++ { + for _, pair := range pairs { + for encIndex, encoding := range encodings { + encoded := pair.encoded + if encoding.padChar == NoPadding { + encoded = strings.ReplaceAll(encoded, "=", "") + } + + decoder := NewDecoder(encoding, strings.NewReader(encoded)) + + var allRead []byte + + for { + buf := make([]byte, bufferSize) + n, err := decoder.Read(buf) + allRead = append(allRead, buf[0:n]...) + if err == io.EOF { + break + } + if err != nil { + t.Error(err) + } + } + + if pair.decoded != string(allRead) { + t.Errorf("Expected %s got %s; Encoding %d; bufferSize %d", pair.decoded, allRead, encIndex, bufferSize) + } + } + } + } +} diff --git a/src/encoding/base32/example_test.go b/src/encoding/base32/example_test.go new file mode 100644 index 0000000..251624f --- /dev/null +++ b/src/encoding/base32/example_test.go @@ -0,0 +1,68 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Keep in sync with ../base64/example_test.go. + +package base32_test + +import ( + "encoding/base32" + "fmt" + "os" +) + +func ExampleEncoding_EncodeToString() { + data := []byte("any + old & data") + str := base32.StdEncoding.EncodeToString(data) + fmt.Println(str) + // Output: + // MFXHSIBLEBXWYZBAEYQGIYLUME====== +} + +func ExampleEncoding_Encode() { + data := []byte("Hello, world!") + dst := make([]byte, base32.StdEncoding.EncodedLen(len(data))) + base32.StdEncoding.Encode(dst, data) + fmt.Println(string(dst)) + // Output: + // JBSWY3DPFQQHO33SNRSCC=== +} + +func ExampleEncoding_DecodeString() { + str := "ONXW2ZJAMRQXIYJAO5UXI2BAAAQGC3TEEDX3XPY=" + data, err := base32.StdEncoding.DecodeString(str) + if err != nil { + fmt.Println("error:", err) + return + } + fmt.Printf("%q\n", data) + // Output: + // "some data with \x00 and \ufeff" +} + +func ExampleEncoding_Decode() { + str := "JBSWY3DPFQQHO33SNRSCC===" + dst := make([]byte, base32.StdEncoding.DecodedLen(len(str))) + n, err := base32.StdEncoding.Decode(dst, []byte(str)) + if err != nil { + fmt.Println("decode error:", err) + return + } + dst = dst[:n] + fmt.Printf("%q\n", dst) + // Output: + // "Hello, world!" +} + +func ExampleNewEncoder() { + input := []byte("foo\x00bar") + encoder := base32.NewEncoder(base32.StdEncoding, os.Stdout) + encoder.Write(input) + // Must close the encoder when finished to flush any partial blocks. + // If you comment out the following line, the last partial block "r" + // won't be encoded. + encoder.Close() + // Output: + // MZXW6ADCMFZA==== +} diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go new file mode 100644 index 0000000..87f4586 --- /dev/null +++ b/src/encoding/base64/base64.go @@ -0,0 +1,661 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package base64 implements base64 encoding as specified by RFC 4648. +package base64 + +import ( + "encoding/binary" + "io" + "slices" + "strconv" +) + +/* + * Encodings + */ + +// An Encoding is a radix 64 encoding/decoding scheme, defined by a +// 64-character alphabet. The most common encoding is the "base64" +// encoding defined in RFC 4648 and used in MIME (RFC 2045) and PEM +// (RFC 1421). RFC 4648 also defines an alternate encoding, which is +// the standard encoding with - and _ substituted for + and /. +type Encoding struct { + encode [64]byte // mapping of symbol index to symbol byte value + decodeMap [256]uint8 // mapping of symbol byte value to symbol index + padChar rune + strict bool +} + +const ( + StdPadding rune = '=' // Standard padding character + NoPadding rune = -1 // No padding +) + +const ( + decodeMapInitialize = "" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + invalidIndex = '\xff' +) + +// NewEncoding returns a new padded Encoding defined by the given alphabet, +// which must be a 64-byte string that contains unique byte values and +// does not contain the padding character or CR / LF ('\r', '\n'). +// The alphabet is treated as a sequence of byte values +// without any special treatment for multi-byte UTF-8. +// The resulting Encoding uses the default padding character ('='), +// which may be changed or disabled via [Encoding.WithPadding]. +func NewEncoding(encoder string) *Encoding { + if len(encoder) != 64 { + panic("encoding alphabet is not 64-bytes long") + } + + e := new(Encoding) + e.padChar = StdPadding + copy(e.encode[:], encoder) + copy(e.decodeMap[:], decodeMapInitialize) + + for i := 0; i < len(encoder); i++ { + // Note: While we document that the alphabet cannot contain + // the padding character, we do not enforce it since we do not know + // if the caller intends to switch the padding from StdPadding later. + switch { + case encoder[i] == '\n' || encoder[i] == '\r': + panic("encoding alphabet contains newline character") + case e.decodeMap[encoder[i]] != invalidIndex: + panic("encoding alphabet includes duplicate symbols") + } + e.decodeMap[encoder[i]] = uint8(i) + } + return e +} + +// WithPadding creates a new encoding identical to enc except +// with a specified padding character, or [NoPadding] to disable padding. +// The padding character must not be '\r' or '\n', +// must not be contained in the encoding's alphabet, +// must not be negative, and must be a rune equal or below '\xff'. +// Padding characters above '\x7f' are encoded as their exact byte value +// rather than using the UTF-8 representation of the codepoint. +func (enc Encoding) WithPadding(padding rune) *Encoding { + switch { + case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff: + panic("invalid padding") + case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex: + panic("padding contained in alphabet") + } + enc.padChar = padding + return &enc +} + +// Strict creates a new encoding identical to enc except with +// strict decoding enabled. In this mode, the decoder requires that +// trailing padding bits are zero, as described in RFC 4648 section 3.5. +// +// Note that the input is still malleable, as new line characters +// (CR and LF) are still ignored. +func (enc Encoding) Strict() *Encoding { + enc.strict = true + return &enc +} + +// StdEncoding is the standard base64 encoding, as defined in RFC 4648. +var StdEncoding = NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + +// URLEncoding is the alternate base64 encoding defined in RFC 4648. +// It is typically used in URLs and file names. +var URLEncoding = NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_") + +// RawStdEncoding is the standard raw, unpadded base64 encoding, +// as defined in RFC 4648 section 3.2. +// This is the same as [StdEncoding] but omits padding characters. +var RawStdEncoding = StdEncoding.WithPadding(NoPadding) + +// RawURLEncoding is the unpadded alternate base64 encoding defined in RFC 4648. +// It is typically used in URLs and file names. +// This is the same as [URLEncoding] but omits padding characters. +var RawURLEncoding = URLEncoding.WithPadding(NoPadding) + +/* + * Encoder + */ + +// Encode encodes src using the encoding enc, +// writing [Encoding.EncodedLen](len(src)) bytes to dst. +// +// The encoding pads the output to a multiple of 4 bytes, +// so Encode is not appropriate for use on individual blocks +// of a large data stream. Use [NewEncoder] instead. +func (enc *Encoding) Encode(dst, src []byte) { + if len(src) == 0 { + return + } + // enc is a pointer receiver, so the use of enc.encode within the hot + // loop below means a nil check at every operation. Lift that nil check + // outside of the loop to speed up the encoder. + _ = enc.encode + + di, si := 0, 0 + n := (len(src) / 3) * 3 + for si < n { + // Convert 3x 8bit source bytes into 4 bytes + val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2]) + + dst[di+0] = enc.encode[val>>18&0x3F] + dst[di+1] = enc.encode[val>>12&0x3F] + dst[di+2] = enc.encode[val>>6&0x3F] + dst[di+3] = enc.encode[val&0x3F] + + si += 3 + di += 4 + } + + remain := len(src) - si + if remain == 0 { + return + } + // Add the remaining small block + val := uint(src[si+0]) << 16 + if remain == 2 { + val |= uint(src[si+1]) << 8 + } + + dst[di+0] = enc.encode[val>>18&0x3F] + dst[di+1] = enc.encode[val>>12&0x3F] + + switch remain { + case 2: + dst[di+2] = enc.encode[val>>6&0x3F] + if enc.padChar != NoPadding { + dst[di+3] = byte(enc.padChar) + } + case 1: + if enc.padChar != NoPadding { + dst[di+2] = byte(enc.padChar) + dst[di+3] = byte(enc.padChar) + } + } +} + +// AppendEncode appends the base64 encoded src to dst +// and returns the extended buffer. +func (enc *Encoding) AppendEncode(dst, src []byte) []byte { + n := enc.EncodedLen(len(src)) + dst = slices.Grow(dst, n) + enc.Encode(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +// EncodeToString returns the base64 encoding of src. +func (enc *Encoding) EncodeToString(src []byte) string { + buf := make([]byte, enc.EncodedLen(len(src))) + enc.Encode(buf, src) + return string(buf) +} + +type encoder struct { + err error + enc *Encoding + w io.Writer + buf [3]byte // buffered data waiting to be encoded + nbuf int // number of bytes in buf + out [1024]byte // output buffer +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // Leading fringe. + if e.nbuf > 0 { + var i int + for i = 0; i < len(p) && e.nbuf < 3; i++ { + e.buf[e.nbuf] = p[i] + e.nbuf++ + } + n += i + p = p[i:] + if e.nbuf < 3 { + return + } + e.enc.Encode(e.out[:], e.buf[:]) + if _, e.err = e.w.Write(e.out[:4]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + + // Large interior chunks. + for len(p) >= 3 { + nn := len(e.out) / 4 * 3 + if nn > len(p) { + nn = len(p) + nn -= nn % 3 + } + e.enc.Encode(e.out[:], p[:nn]) + if _, e.err = e.w.Write(e.out[0 : nn/3*4]); e.err != nil { + return n, e.err + } + n += nn + p = p[nn:] + } + + // Trailing fringe. + copy(e.buf[:], p) + e.nbuf = len(p) + n += len(p) + return +} + +// Close flushes any pending output from the encoder. +// It is an error to call Write after calling Close. +func (e *encoder) Close() error { + // If there's anything left in the buffer, flush it out + if e.err == nil && e.nbuf > 0 { + e.enc.Encode(e.out[:], e.buf[:e.nbuf]) + _, e.err = e.w.Write(e.out[:e.enc.EncodedLen(e.nbuf)]) + e.nbuf = 0 + } + return e.err +} + +// NewEncoder returns a new base64 stream encoder. Data written to +// the returned writer will be encoded using enc and then written to w. +// Base64 encodings operate in 4-byte blocks; when finished +// writing, the caller must Close the returned encoder to flush any +// partially written blocks. +func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { + return &encoder{enc: enc, w: w} +} + +// EncodedLen returns the length in bytes of the base64 encoding +// of an input buffer of length n. +func (enc *Encoding) EncodedLen(n int) int { + if enc.padChar == NoPadding { + return n/3*4 + (n%3*8+5)/6 // minimum # chars at 6 bits per char + } + return (n + 2) / 3 * 4 // minimum # 4-char quanta, 3 bytes each +} + +/* + * Decoder + */ + +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10) +} + +// decodeQuantum decodes up to 4 base64 bytes. The received parameters are +// the destination buffer dst, the source buffer src and an index in the +// source buffer si. +// It returns the number of bytes read from src, the number of bytes written +// to dst, and an error, if any. +func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) { + // Decode quantum using the base64 alphabet + var dbuf [4]byte + dlen := 4 + + // Lift the nil check outside of the loop. + _ = enc.decodeMap + + for j := 0; j < len(dbuf); j++ { + if len(src) == si { + switch { + case j == 0: + return si, 0, nil + case j == 1, enc.padChar != NoPadding: + return si, 0, CorruptInputError(si - j) + } + dlen = j + break + } + in := src[si] + si++ + + out := enc.decodeMap[in] + if out != 0xff { + dbuf[j] = out + continue + } + + if in == '\n' || in == '\r' { + j-- + continue + } + + if rune(in) != enc.padChar { + return si, 0, CorruptInputError(si - 1) + } + + // We've reached the end and there's padding + switch j { + case 0, 1: + // incorrect padding + return si, 0, CorruptInputError(si - 1) + case 2: + // "==" is expected, the first "=" is already consumed. + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } + if si == len(src) { + // not enough padding + return si, 0, CorruptInputError(len(src)) + } + if rune(src[si]) != enc.padChar { + // incorrect padding + return si, 0, CorruptInputError(si - 1) + } + + si++ + } + + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } + if si < len(src) { + // trailing garbage + err = CorruptInputError(si) + } + dlen = j + break + } + + // Convert 4x 6bit source bytes into 3 bytes + val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3]) + dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16) + switch dlen { + case 4: + dst[2] = dbuf[2] + dbuf[2] = 0 + fallthrough + case 3: + dst[1] = dbuf[1] + if enc.strict && dbuf[2] != 0 { + return si, 0, CorruptInputError(si - 1) + } + dbuf[1] = 0 + fallthrough + case 2: + dst[0] = dbuf[0] + if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) { + return si, 0, CorruptInputError(si - 2) + } + } + + return si, dlen - 1, err +} + +// AppendDecode appends the base64 decoded src to dst +// and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) { + // Compute the output size without padding to avoid over allocating. + n := len(src) + for n > 0 && rune(src[n-1]) == enc.padChar { + n-- + } + n = decodedLen(n, NoPadding) + + dst = slices.Grow(dst, n) + n, err := enc.Decode(dst[len(dst):][:n], src) + return dst[:len(dst)+n], err +} + +// DecodeString returns the bytes represented by the base64 string s. +func (enc *Encoding) DecodeString(s string) ([]byte, error) { + dbuf := make([]byte, enc.DecodedLen(len(s))) + n, err := enc.Decode(dbuf, []byte(s)) + return dbuf[:n], err +} + +type decoder struct { + err error + readErr error // error from r.Read + enc *Encoding + r io.Reader + buf [1024]byte // leftover input + nbuf int + out []byte // leftover decoded output + outbuf [1024 / 4 * 3]byte +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Use leftover decoded output from last read. + if len(d.out) > 0 { + n = copy(p, d.out) + d.out = d.out[n:] + return n, nil + } + + if d.err != nil { + return 0, d.err + } + + // This code assumes that d.r strips supported whitespace ('\r' and '\n'). + + // Refill buffer. + for d.nbuf < 4 && d.readErr == nil { + nn := len(p) / 3 * 4 + if nn < 4 { + nn = 4 + } + if nn > len(d.buf) { + nn = len(d.buf) + } + nn, d.readErr = d.r.Read(d.buf[d.nbuf:nn]) + d.nbuf += nn + } + + if d.nbuf < 4 { + if d.enc.padChar == NoPadding && d.nbuf > 0 { + // Decode final fragment, without padding. + var nw int + nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf]) + d.nbuf = 0 + d.out = d.outbuf[:nw] + n = copy(p, d.out) + d.out = d.out[n:] + if n > 0 || len(p) == 0 && len(d.out) > 0 { + return n, nil + } + if d.err != nil { + return 0, d.err + } + } + d.err = d.readErr + if d.err == io.EOF && d.nbuf > 0 { + d.err = io.ErrUnexpectedEOF + } + return 0, d.err + } + + // Decode chunk into p, or d.out and then p if p is too small. + nr := d.nbuf / 4 * 4 + nw := d.nbuf / 4 * 3 + if nw > len(p) { + nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr]) + d.out = d.outbuf[:nw] + n = copy(p, d.out) + d.out = d.out[n:] + } else { + n, d.err = d.enc.Decode(p, d.buf[:nr]) + } + d.nbuf -= nr + copy(d.buf[:d.nbuf], d.buf[nr:]) + return n, d.err +} + +// Decode decodes src using the encoding enc. It writes at most +// [Encoding.DecodedLen](len(src)) bytes to dst and returns the number of bytes +// written. If src contains invalid base64 data, it will return the +// number of bytes successfully written and [CorruptInputError]. +// New line characters (\r and \n) are ignored. +func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + if len(src) == 0 { + return 0, nil + } + + // Lift the nil check outside of the loop. enc.decodeMap is directly + // used later in this function, to let the compiler know that the + // receiver can't be nil. + _ = enc.decodeMap + + si := 0 + for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 { + src2 := src[si : si+8] + if dn, ok := assemble64( + enc.decodeMap[src2[0]], + enc.decodeMap[src2[1]], + enc.decodeMap[src2[2]], + enc.decodeMap[src2[3]], + enc.decodeMap[src2[4]], + enc.decodeMap[src2[5]], + enc.decodeMap[src2[6]], + enc.decodeMap[src2[7]], + ); ok { + binary.BigEndian.PutUint64(dst[n:], dn) + n += 6 + si += 8 + } else { + var ninc int + si, ninc, err = enc.decodeQuantum(dst[n:], src, si) + n += ninc + if err != nil { + return n, err + } + } + } + + for len(src)-si >= 4 && len(dst)-n >= 4 { + src2 := src[si : si+4] + if dn, ok := assemble32( + enc.decodeMap[src2[0]], + enc.decodeMap[src2[1]], + enc.decodeMap[src2[2]], + enc.decodeMap[src2[3]], + ); ok { + binary.BigEndian.PutUint32(dst[n:], dn) + n += 3 + si += 4 + } else { + var ninc int + si, ninc, err = enc.decodeQuantum(dst[n:], src, si) + n += ninc + if err != nil { + return n, err + } + } + } + + for si < len(src) { + var ninc int + si, ninc, err = enc.decodeQuantum(dst[n:], src, si) + n += ninc + if err != nil { + return n, err + } + } + return n, err +} + +// assemble32 assembles 4 base64 digits into 3 bytes. +// Each digit comes from the decode map, and will be 0xff +// if it came from an invalid character. +func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) { + // Check that all the digits are valid. If any of them was 0xff, their + // bitwise OR will be 0xff. + if n1|n2|n3|n4 == 0xff { + return 0, false + } + return uint32(n1)<<26 | + uint32(n2)<<20 | + uint32(n3)<<14 | + uint32(n4)<<8, + true +} + +// assemble64 assembles 8 base64 digits into 6 bytes. +// Each digit comes from the decode map, and will be 0xff +// if it came from an invalid character. +func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) { + // Check that all the digits are valid. If any of them was 0xff, their + // bitwise OR will be 0xff. + if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff { + return 0, false + } + return uint64(n1)<<58 | + uint64(n2)<<52 | + uint64(n3)<<46 | + uint64(n4)<<40 | + uint64(n5)<<34 | + uint64(n6)<<28 | + uint64(n7)<<22 | + uint64(n8)<<16, + true +} + +type newlineFilteringReader struct { + wrapped io.Reader +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + offset := 0 + for i, b := range p[:n] { + if b != '\r' && b != '\n' { + if i != offset { + p[offset] = b + } + offset++ + } + } + if offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} + +// NewDecoder constructs a new base64 stream decoder. +func NewDecoder(enc *Encoding, r io.Reader) io.Reader { + return &decoder{enc: enc, r: &newlineFilteringReader{r}} +} + +// DecodedLen returns the maximum length in bytes of the decoded data +// corresponding to n bytes of base64-encoded data. +func (enc *Encoding) DecodedLen(n int) int { + return decodedLen(n, enc.padChar) +} + +func decodedLen(n int, padChar rune) int { + if padChar == NoPadding { + // Unpadded data may end with partial block of 2-3 characters. + return n/4*3 + n%4*6/8 + } + // Padded base64 should always be a multiple of 4 characters in length. + return n / 4 * 3 +} diff --git a/src/encoding/base64/base64_test.go b/src/encoding/base64/base64_test.go new file mode 100644 index 0000000..7f5ebd8 --- /dev/null +++ b/src/encoding/base64/base64_test.go @@ -0,0 +1,578 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package base64 + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "reflect" + "runtime/debug" + "strconv" + "strings" + "testing" + "time" +) + +type testpair struct { + decoded, encoded string +} + +var pairs = []testpair{ + // RFC 3548 examples + {"\x14\xfb\x9c\x03\xd9\x7e", "FPucA9l+"}, + {"\x14\xfb\x9c\x03\xd9", "FPucA9k="}, + {"\x14\xfb\x9c\x03", "FPucAw=="}, + + // RFC 4648 examples + {"", ""}, + {"f", "Zg=="}, + {"fo", "Zm8="}, + {"foo", "Zm9v"}, + {"foob", "Zm9vYg=="}, + {"fooba", "Zm9vYmE="}, + {"foobar", "Zm9vYmFy"}, + + // Wikipedia examples + {"sure.", "c3VyZS4="}, + {"sure", "c3VyZQ=="}, + {"sur", "c3Vy"}, + {"su", "c3U="}, + {"leasure.", "bGVhc3VyZS4="}, + {"easure.", "ZWFzdXJlLg=="}, + {"asure.", "YXN1cmUu"}, + {"sure.", "c3VyZS4="}, +} + +// Do nothing to a reference base64 string (leave in standard format) +func stdRef(ref string) string { + return ref +} + +// Convert a reference string to URL-encoding +func urlRef(ref string) string { + ref = strings.ReplaceAll(ref, "+", "-") + ref = strings.ReplaceAll(ref, "/", "_") + return ref +} + +// Convert a reference string to raw, unpadded format +func rawRef(ref string) string { + return strings.TrimRight(ref, "=") +} + +// Both URL and unpadding conversions +func rawURLRef(ref string) string { + return rawRef(urlRef(ref)) +} + +const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + +// A nonstandard encoding with a funny padding character, for testing +var funnyEncoding = NewEncoding(encodeStd).WithPadding(rune('@')) + +func funnyRef(ref string) string { + return strings.ReplaceAll(ref, "=", "@") +} + +type encodingTest struct { + enc *Encoding // Encoding to test + conv func(string) string // Reference string converter +} + +var encodingTests = []encodingTest{ + {StdEncoding, stdRef}, + {URLEncoding, urlRef}, + {RawStdEncoding, rawRef}, + {RawURLEncoding, rawURLRef}, + {funnyEncoding, funnyRef}, + {StdEncoding.Strict(), stdRef}, + {URLEncoding.Strict(), urlRef}, + {RawStdEncoding.Strict(), rawRef}, + {RawURLEncoding.Strict(), rawURLRef}, + {funnyEncoding.Strict(), funnyRef}, +} + +var bigtest = testpair{ + "Twas brillig, and the slithy toves", + "VHdhcyBicmlsbGlnLCBhbmQgdGhlIHNsaXRoeSB0b3Zlcw==", +} + +func testEqual(t *testing.T, msg string, args ...any) bool { + t.Helper() + if args[len(args)-2] != args[len(args)-1] { + t.Errorf(msg, args...) + return false + } + return true +} + +func TestEncode(t *testing.T) { + for _, p := range pairs { + for _, tt := range encodingTests { + got := tt.enc.EncodeToString([]byte(p.decoded)) + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, tt.conv(p.encoded)) + dst := tt.enc.AppendEncode([]byte("lead"), []byte(p.decoded)) + testEqual(t, `AppendEncode("lead", %q) = %q, want %q`, p.decoded, string(dst), "lead"+tt.conv(p.encoded)) + } + } +} + +func TestEncoder(t *testing.T) { + for _, p := range pairs { + bb := &strings.Builder{} + encoder := NewEncoder(StdEncoding, bb) + encoder.Write([]byte(p.decoded)) + encoder.Close() + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, bb.String(), p.encoded) + } +} + +func TestEncoderBuffering(t *testing.T) { + input := []byte(bigtest.decoded) + for bs := 1; bs <= 12; bs++ { + bb := &strings.Builder{} + encoder := NewEncoder(StdEncoding, bb) + for pos := 0; pos < len(input); pos += bs { + end := pos + bs + if end > len(input) { + end = len(input) + } + n, err := encoder.Write(input[pos:end]) + testEqual(t, "Write(%q) gave error %v, want %v", input[pos:end], err, error(nil)) + testEqual(t, "Write(%q) gave length %v, want %v", input[pos:end], n, end-pos) + } + err := encoder.Close() + testEqual(t, "Close gave error %v, want %v", err, error(nil)) + testEqual(t, "Encoding/%d of %q = %q, want %q", bs, bigtest.decoded, bb.String(), bigtest.encoded) + } +} + +func TestDecode(t *testing.T) { + for _, p := range pairs { + for _, tt := range encodingTests { + encoded := tt.conv(p.encoded) + dbuf := make([]byte, tt.enc.DecodedLen(len(encoded))) + count, err := tt.enc.Decode(dbuf, []byte(encoded)) + testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil)) + testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded)) + testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded) + + dbuf, err = tt.enc.DecodeString(encoded) + testEqual(t, "DecodeString(%q) = error %v, want %v", encoded, err, error(nil)) + testEqual(t, "DecodeString(%q) = %q, want %q", encoded, string(dbuf), p.decoded) + + dst, err := tt.enc.AppendDecode([]byte("lead"), []byte(encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("lead", %q) = %q, want %q`, p.encoded, string(dst), "lead"+p.decoded) + + dst2, err := tt.enc.AppendDecode(dst[:0:len(p.decoded)], []byte(encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("", %q) = %q, want %q`, p.encoded, string(dst2), p.decoded) + if len(dst) > 0 && len(dst2) > 0 && &dst[0] != &dst2[0] { + t.Errorf("unexpected capacity growth: got %d, want %d", cap(dst2), cap(dst)) + } + } + } +} + +func TestDecoder(t *testing.T) { + for _, p := range pairs { + decoder := NewDecoder(StdEncoding, strings.NewReader(p.encoded)) + dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))) + count, err := decoder.Read(dbuf) + if err != nil && err != io.EOF { + t.Fatal("Read failed", err) + } + testEqual(t, "Read from %q = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decoding of %q = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + if err != io.EOF { + _, err = decoder.Read(dbuf) + } + testEqual(t, "Read from %q = %v, want %v", p.encoded, err, io.EOF) + } +} + +func TestDecoderBuffering(t *testing.T) { + for bs := 1; bs <= 12; bs++ { + decoder := NewDecoder(StdEncoding, strings.NewReader(bigtest.encoded)) + buf := make([]byte, len(bigtest.decoded)+12) + var total int + var n int + var err error + for total = 0; total < len(bigtest.decoded) && err == nil; { + n, err = decoder.Read(buf[total : total+bs]) + total += n + } + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", bigtest.encoded, total, n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q", bs, bigtest.encoded, string(buf[0:total]), bigtest.decoded) + } +} + +func TestDecodeCorrupt(t *testing.T) { + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, + {"\n", -1}, + {"AAA=\n", -1}, + {"AAAA\n", -1}, + {"!!!!", 0}, + {"====", 0}, + {"x===", 1}, + {"=AAA", 0}, + {"A=AA", 1}, + {"AA=A", 2}, + {"AA==A", 4}, + {"AAA=AAAA", 4}, + {"AAAAA", 4}, + {"AAAAAA", 4}, + {"A=", 1}, + {"A==", 1}, + {"AA=", 3}, + {"AA==", -1}, + {"AAA=", -1}, + {"AAAA", -1}, + {"AAAAAA=", 7}, + {"YWJjZA=====", 8}, + {"A!\n", 1}, + {"A=\n", 1}, + } + for _, tc := range testCases { + dbuf := make([]byte, StdEncoding.DecodedLen(len(tc.input))) + _, err := StdEncoding.Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected corruption in", tc.input) + } + continue + } + switch err := err.(type) { + case CorruptInputError: + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) + default: + t.Error("Decoder failed to detect corruption in", tc) + } + } +} + +func TestDecodeBounds(t *testing.T) { + var buf [32]byte + s := StdEncoding.EncodeToString(buf[:]) + defer func() { + if err := recover(); err != nil { + t.Fatalf("Decode panicked unexpectedly: %v\n%s", err, debug.Stack()) + } + }() + n, err := StdEncoding.Decode(buf[:], []byte(s)) + if n != len(buf) || err != nil { + t.Fatalf("StdEncoding.Decode = %d, %v, want %d, nil", n, err, len(buf)) + } +} + +func TestEncodedLen(t *testing.T) { + type test struct { + enc *Encoding + n int + want int64 + } + tests := []test{ + {RawStdEncoding, 0, 0}, + {RawStdEncoding, 1, 2}, + {RawStdEncoding, 2, 3}, + {RawStdEncoding, 3, 4}, + {RawStdEncoding, 7, 10}, + {StdEncoding, 0, 0}, + {StdEncoding, 1, 4}, + {StdEncoding, 2, 4}, + {StdEncoding, 3, 4}, + {StdEncoding, 4, 8}, + {StdEncoding, 7, 12}, + } + // check overflow + switch strconv.IntSize { + case 32: + tests = append(tests, test{RawStdEncoding, (math.MaxInt-5)/8 + 1, 357913942}) + tests = append(tests, test{RawStdEncoding, math.MaxInt/4*3 + 2, math.MaxInt}) + case 64: + tests = append(tests, test{RawStdEncoding, (math.MaxInt-5)/8 + 1, 1537228672809129302}) + tests = append(tests, test{RawStdEncoding, math.MaxInt/4*3 + 2, math.MaxInt}) + } + for _, tt := range tests { + if got := tt.enc.EncodedLen(tt.n); int64(got) != tt.want { + t.Errorf("EncodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestDecodedLen(t *testing.T) { + type test struct { + enc *Encoding + n int + want int64 + } + tests := []test{ + {RawStdEncoding, 0, 0}, + {RawStdEncoding, 2, 1}, + {RawStdEncoding, 3, 2}, + {RawStdEncoding, 4, 3}, + {RawStdEncoding, 10, 7}, + {StdEncoding, 0, 0}, + {StdEncoding, 4, 3}, + {StdEncoding, 8, 6}, + } + // check overflow + switch strconv.IntSize { + case 32: + tests = append(tests, test{RawStdEncoding, math.MaxInt/6 + 1, 268435456}) + tests = append(tests, test{RawStdEncoding, math.MaxInt, 1610612735}) + case 64: + tests = append(tests, test{RawStdEncoding, math.MaxInt/6 + 1, 1152921504606846976}) + tests = append(tests, test{RawStdEncoding, math.MaxInt, 6917529027641081855}) + } + for _, tt := range tests { + if got := tt.enc.DecodedLen(tt.n); int64(got) != tt.want { + t.Errorf("DecodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestBig(t *testing.T) { + n := 3*1000 + 1 + raw := make([]byte, n) + const alpha = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + for i := 0; i < n; i++ { + raw[i] = alpha[i%len(alpha)] + } + encoded := new(bytes.Buffer) + w := NewEncoder(StdEncoding, encoded) + nn, err := w.Write(raw) + if nn != n || err != nil { + t.Fatalf("Encoder.Write(raw) = %d, %v want %d, nil", nn, err, n) + } + err = w.Close() + if err != nil { + t.Fatalf("Encoder.Close() = %v want nil", err) + } + decoded, err := io.ReadAll(NewDecoder(StdEncoding, encoded)) + if err != nil { + t.Fatalf("io.ReadAll(NewDecoder(...)): %v", err) + } + + if !bytes.Equal(raw, decoded) { + var i int + for i = 0; i < len(decoded) && i < len(raw); i++ { + if decoded[i] != raw[i] { + break + } + } + t.Errorf("Decode(Encode(%d-byte string)) failed at offset %d", n, i) + } +} + +func TestNewLineCharacters(t *testing.T) { + // Each of these should decode to the string "sure", without errors. + const expected = "sure" + examples := []string{ + "c3VyZQ==", + "c3VyZQ==\r", + "c3VyZQ==\n", + "c3VyZQ==\r\n", + "c3VyZ\r\nQ==", + "c3V\ryZ\nQ==", + "c3V\nyZ\rQ==", + "c3VyZ\nQ==", + "c3VyZQ\n==", + "c3VyZQ=\n=", + "c3VyZQ=\r\n\r\n=", + } + for _, e := range examples { + buf, err := StdEncoding.DecodeString(e) + if err != nil { + t.Errorf("Decode(%q) failed: %v", e, err) + continue + } + if s := string(buf); s != expected { + t.Errorf("Decode(%q) = %q, want %q", e, s, expected) + } + } +} + +type nextRead struct { + n int // bytes to return + err error // error to return +} + +// faultInjectReader returns data from source, rate-limited +// and with the errors as written to nextc. +type faultInjectReader struct { + source string + nextc <-chan nextRead +} + +func (r *faultInjectReader) Read(p []byte) (int, error) { + nr := <-r.nextc + if len(p) > nr.n { + p = p[:nr.n] + } + n := copy(p, r.source) + r.source = r.source[n:] + return n, nr.err +} + +// tests that we don't ignore errors from our underlying reader +func TestDecoderIssue3577(t *testing.T) { + next := make(chan nextRead, 10) + wantErr := errors.New("my error") + next <- nextRead{5, nil} + next <- nextRead{10, wantErr} + next <- nextRead{0, wantErr} + d := NewDecoder(StdEncoding, &faultInjectReader{ + source: "VHdhcyBicmlsbGlnLCBhbmQgdGhlIHNsaXRoeSB0b3Zlcw==", // twas brillig... + nextc: next, + }) + errc := make(chan error, 1) + go func() { + _, err := io.ReadAll(d) + errc <- err + }() + select { + case err := <-errc: + if err != wantErr { + t.Errorf("got error %v; want %v", err, wantErr) + } + case <-time.After(5 * time.Second): + t.Errorf("timeout; Decoder blocked without returning an error") + } +} + +func TestDecoderIssue4779(t *testing.T) { + encoded := `CP/EAT8AAAEF +AQEBAQEBAAAAAAAAAAMAAQIEBQYHCAkKCwEAAQUBAQEBAQEAAAAAAAAAAQACAwQFBgcICQoLEAAB +BAEDAgQCBQcGCAUDDDMBAAIRAwQhEjEFQVFhEyJxgTIGFJGhsUIjJBVSwWIzNHKC0UMHJZJT8OHx +Y3M1FqKygyZEk1RkRcKjdDYX0lXiZfKzhMPTdePzRieUpIW0lcTU5PSltcXV5fVWZnaGlqa2xtbm +9jdHV2d3h5ent8fX5/cRAAICAQIEBAMEBQYHBwYFNQEAAhEDITESBEFRYXEiEwUygZEUobFCI8FS +0fAzJGLhcoKSQ1MVY3M08SUGFqKygwcmNcLSRJNUoxdkRVU2dGXi8rOEw9N14/NGlKSFtJXE1OT0 +pbXF1eX1VmZ2hpamtsbW5vYnN0dXZ3eHl6e3x//aAAwDAQACEQMRAD8A9VSSSSUpJJJJSkkkJ+Tj +1kiy1jCJJDnAcCTykpKkuQ6p/jN6FgmxlNduXawwAzaGH+V6jn/R/wCt71zdn+N/qL3kVYFNYB4N +ji6PDVjWpKp9TSXnvTf8bFNjg3qOEa2n6VlLpj/rT/pf567DpX1i6L1hs9Py67X8mqdtg/rUWbbf ++gkp0kkkklKSSSSUpJJJJT//0PVUkkklKVLq3WMDpGI7KzrNjADtYNXvI/Mqr/Pd/q9W3vaxjnvM +NaCXE9gNSvGPrf8AWS3qmba5jjsJhoB0DAf0NDf6sevf+/lf8Hj0JJATfWT6/dV6oXU1uOLQeKKn +EQP+Hubtfe/+R7Mf/g7f5xcocp++Z11JMCJPgFBxOg7/AOuqDx8I/ikpkXkmSdU8mJIJA/O8EMAy +j+mSARB/17pKVXYWHXjsj7yIex0PadzXMO1zT5KHoNA3HT8ietoGhgjsfA+CSnvvqh/jJtqsrwOv +2b6NGNzXfTYexzJ+nU7/ALkf4P8Awv6P9KvTQQ4AgyDqCF85Pho3CTB7eHwXoH+LT65uZbX9X+o2 +bqbPb06551Y4 +` + encodedShort := strings.ReplaceAll(encoded, "\n", "") + + dec := NewDecoder(StdEncoding, strings.NewReader(encoded)) + res1, err := io.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + dec = NewDecoder(StdEncoding, strings.NewReader(encodedShort)) + var res2 []byte + res2, err = io.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + if !bytes.Equal(res1, res2) { + t.Error("Decoded results not equal") + } +} + +func TestDecoderIssue7733(t *testing.T) { + s, err := StdEncoding.DecodeString("YWJjZA=====") + want := CorruptInputError(8) + if !reflect.DeepEqual(want, err) { + t.Errorf("Error = %v; want CorruptInputError(8)", err) + } + if string(s) != "abcd" { + t.Errorf("DecodeString = %q; want abcd", s) + } +} + +func TestDecoderIssue15656(t *testing.T) { + _, err := StdEncoding.Strict().DecodeString("WvLTlMrX9NpYDQlEIFlnDB==") + want := CorruptInputError(22) + if !reflect.DeepEqual(want, err) { + t.Errorf("Error = %v; want CorruptInputError(22)", err) + } + _, err = StdEncoding.Strict().DecodeString("WvLTlMrX9NpYDQlEIFlnDA==") + if err != nil { + t.Errorf("Error = %v; want nil", err) + } + _, err = StdEncoding.DecodeString("WvLTlMrX9NpYDQlEIFlnDB==") + if err != nil { + t.Errorf("Error = %v; want nil", err) + } +} + +func BenchmarkEncodeToString(b *testing.B) { + data := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.EncodeToString(data) + } +} + +func BenchmarkDecodeString(b *testing.B) { + sizes := []int{2, 4, 8, 64, 8192} + benchFunc := func(b *testing.B, benchSize int) { + data := StdEncoding.EncodeToString(make([]byte, benchSize)) + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + StdEncoding.DecodeString(data) + } + } + for _, size := range sizes { + b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { + benchFunc(b, size) + }) + } +} + +func BenchmarkNewEncoding(b *testing.B) { + b.SetBytes(int64(len(Encoding{}.decodeMap))) + for i := 0; i < b.N; i++ { + e := NewEncoding(encodeStd) + for _, v := range e.decodeMap { + _ = v + } + } +} + +func TestDecoderRaw(t *testing.T) { + source := "AAAAAA" + want := []byte{0, 0, 0, 0} + + // Direct. + dec1, err := RawURLEncoding.DecodeString(source) + if err != nil || !bytes.Equal(dec1, want) { + t.Errorf("RawURLEncoding.DecodeString(%q) = %x, %v, want %x, nil", source, dec1, err, want) + } + + // Through reader. Used to fail. + r := NewDecoder(RawURLEncoding, bytes.NewReader([]byte(source))) + dec2, err := io.ReadAll(io.LimitReader(r, 100)) + if err != nil || !bytes.Equal(dec2, want) { + t.Errorf("reading NewDecoder(RawURLEncoding, %q) = %x, %v, want %x, nil", source, dec2, err, want) + } + + // Should work with padding. + r = NewDecoder(URLEncoding, bytes.NewReader([]byte(source+"=="))) + dec3, err := io.ReadAll(r) + if err != nil || !bytes.Equal(dec3, want) { + t.Errorf("reading NewDecoder(URLEncoding, %q) = %x, %v, want %x, nil", source+"==", dec3, err, want) + } +} diff --git a/src/encoding/base64/example_test.go b/src/encoding/base64/example_test.go new file mode 100644 index 0000000..61a3adc --- /dev/null +++ b/src/encoding/base64/example_test.go @@ -0,0 +1,83 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Keep in sync with ../base32/example_test.go. + +package base64_test + +import ( + "encoding/base64" + "fmt" + "os" +) + +func Example() { + msg := "Hello, 世界" + encoded := base64.StdEncoding.EncodeToString([]byte(msg)) + fmt.Println(encoded) + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + fmt.Println("decode error:", err) + return + } + fmt.Println(string(decoded)) + // Output: + // SGVsbG8sIOS4lueVjA== + // Hello, 世界 +} + +func ExampleEncoding_EncodeToString() { + data := []byte("any + old & data") + str := base64.StdEncoding.EncodeToString(data) + fmt.Println(str) + // Output: + // YW55ICsgb2xkICYgZGF0YQ== +} + +func ExampleEncoding_Encode() { + data := []byte("Hello, world!") + dst := make([]byte, base64.StdEncoding.EncodedLen(len(data))) + base64.StdEncoding.Encode(dst, data) + fmt.Println(string(dst)) + // Output: + // SGVsbG8sIHdvcmxkIQ== +} + +func ExampleEncoding_DecodeString() { + str := "c29tZSBkYXRhIHdpdGggACBhbmQg77u/" + data, err := base64.StdEncoding.DecodeString(str) + if err != nil { + fmt.Println("error:", err) + return + } + fmt.Printf("%q\n", data) + // Output: + // "some data with \x00 and \ufeff" +} + +func ExampleEncoding_Decode() { + str := "SGVsbG8sIHdvcmxkIQ==" + dst := make([]byte, base64.StdEncoding.DecodedLen(len(str))) + n, err := base64.StdEncoding.Decode(dst, []byte(str)) + if err != nil { + fmt.Println("decode error:", err) + return + } + dst = dst[:n] + fmt.Printf("%q\n", dst) + // Output: + // "Hello, world!" +} + +func ExampleNewEncoder() { + input := []byte("foo\x00bar") + encoder := base64.NewEncoder(base64.StdEncoding, os.Stdout) + encoder.Write(input) + // Must close the encoder when finished to flush any partial blocks. + // If you comment out the following line, the last partial block "r" + // won't be encoded. + encoder.Close() + // Output: + // Zm9vAGJhcg== +} diff --git a/src/encoding/binary/binary.go b/src/encoding/binary/binary.go new file mode 100644 index 0000000..f001be8 --- /dev/null +++ b/src/encoding/binary/binary.go @@ -0,0 +1,815 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package binary implements simple translation between numbers and byte +// sequences and encoding and decoding of varints. +// +// Numbers are translated by reading and writing fixed-size values. +// A fixed-size value is either a fixed-size arithmetic +// type (bool, int8, uint8, int16, float32, complex64, ...) +// or an array or struct containing only fixed-size values. +// +// The varint functions encode and decode single integer values using +// a variable-length encoding; smaller values require fewer bytes. +// For a specification, see +// https://developers.google.com/protocol-buffers/docs/encoding. +// +// This package favors simplicity over efficiency. Clients that require +// high-performance serialization, especially for large data structures, +// should look at more advanced solutions such as the [encoding/gob] +// package or [google.golang.org/protobuf] for protocol buffers. +package binary + +import ( + "errors" + "io" + "math" + "reflect" + "sync" +) + +// A ByteOrder specifies how to convert byte slices into +// 16-, 32-, or 64-bit unsigned integers. +// +// It is implemented by [LittleEndian], [BigEndian], and [NativeEndian]. +type ByteOrder interface { + Uint16([]byte) uint16 + Uint32([]byte) uint32 + Uint64([]byte) uint64 + PutUint16([]byte, uint16) + PutUint32([]byte, uint32) + PutUint64([]byte, uint64) + String() string +} + +// AppendByteOrder specifies how to append 16-, 32-, or 64-bit unsigned integers +// into a byte slice. +// +// It is implemented by [LittleEndian], [BigEndian], and [NativeEndian]. +type AppendByteOrder interface { + AppendUint16([]byte, uint16) []byte + AppendUint32([]byte, uint32) []byte + AppendUint64([]byte, uint64) []byte + String() string +} + +// LittleEndian is the little-endian implementation of [ByteOrder] and [AppendByteOrder]. +var LittleEndian littleEndian + +// BigEndian is the big-endian implementation of [ByteOrder] and [AppendByteOrder]. +var BigEndian bigEndian + +type littleEndian struct{} + +func (littleEndian) Uint16(b []byte) uint16 { + _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 + return uint16(b[0]) | uint16(b[1])<<8 +} + +func (littleEndian) PutUint16(b []byte, v uint16) { + _ = b[1] // early bounds check to guarantee safety of writes below + b[0] = byte(v) + b[1] = byte(v >> 8) +} + +func (littleEndian) AppendUint16(b []byte, v uint16) []byte { + return append(b, + byte(v), + byte(v>>8), + ) +} + +func (littleEndian) Uint32(b []byte) uint32 { + _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func (littleEndian) PutUint32(b []byte, v uint32) { + _ = b[3] // early bounds check to guarantee safety of writes below + b[0] = byte(v) + b[1] = byte(v >> 8) + b[2] = byte(v >> 16) + b[3] = byte(v >> 24) +} + +func (littleEndian) AppendUint32(b []byte, v uint32) []byte { + return append(b, + byte(v), + byte(v>>8), + byte(v>>16), + byte(v>>24), + ) +} + +func (littleEndian) Uint64(b []byte) uint64 { + _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func (littleEndian) PutUint64(b []byte, v uint64) { + _ = b[7] // early bounds check to guarantee safety of writes below + b[0] = byte(v) + b[1] = byte(v >> 8) + b[2] = byte(v >> 16) + b[3] = byte(v >> 24) + b[4] = byte(v >> 32) + b[5] = byte(v >> 40) + b[6] = byte(v >> 48) + b[7] = byte(v >> 56) +} + +func (littleEndian) AppendUint64(b []byte, v uint64) []byte { + return append(b, + byte(v), + byte(v>>8), + byte(v>>16), + byte(v>>24), + byte(v>>32), + byte(v>>40), + byte(v>>48), + byte(v>>56), + ) +} + +func (littleEndian) String() string { return "LittleEndian" } + +func (littleEndian) GoString() string { return "binary.LittleEndian" } + +type bigEndian struct{} + +func (bigEndian) Uint16(b []byte) uint16 { + _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 + return uint16(b[1]) | uint16(b[0])<<8 +} + +func (bigEndian) PutUint16(b []byte, v uint16) { + _ = b[1] // early bounds check to guarantee safety of writes below + b[0] = byte(v >> 8) + b[1] = byte(v) +} + +func (bigEndian) AppendUint16(b []byte, v uint16) []byte { + return append(b, + byte(v>>8), + byte(v), + ) +} + +func (bigEndian) Uint32(b []byte) uint32 { + _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 + return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 +} + +func (bigEndian) PutUint32(b []byte, v uint32) { + _ = b[3] // early bounds check to guarantee safety of writes below + b[0] = byte(v >> 24) + b[1] = byte(v >> 16) + b[2] = byte(v >> 8) + b[3] = byte(v) +} + +func (bigEndian) AppendUint32(b []byte, v uint32) []byte { + return append(b, + byte(v>>24), + byte(v>>16), + byte(v>>8), + byte(v), + ) +} + +func (bigEndian) Uint64(b []byte) uint64 { + _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | + uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56 +} + +func (bigEndian) PutUint64(b []byte, v uint64) { + _ = b[7] // early bounds check to guarantee safety of writes below + b[0] = byte(v >> 56) + b[1] = byte(v >> 48) + b[2] = byte(v >> 40) + b[3] = byte(v >> 32) + b[4] = byte(v >> 24) + b[5] = byte(v >> 16) + b[6] = byte(v >> 8) + b[7] = byte(v) +} + +func (bigEndian) AppendUint64(b []byte, v uint64) []byte { + return append(b, + byte(v>>56), + byte(v>>48), + byte(v>>40), + byte(v>>32), + byte(v>>24), + byte(v>>16), + byte(v>>8), + byte(v), + ) +} + +func (bigEndian) String() string { return "BigEndian" } + +func (bigEndian) GoString() string { return "binary.BigEndian" } + +func (nativeEndian) String() string { return "NativeEndian" } + +func (nativeEndian) GoString() string { return "binary.NativeEndian" } + +// Read reads structured binary data from r into data. +// Data must be a pointer to a fixed-size value or a slice +// of fixed-size values. +// Bytes read from r are decoded using the specified byte order +// and written to successive fields of the data. +// When decoding boolean values, a zero byte is decoded as false, and +// any other non-zero byte is decoded as true. +// When reading into structs, the field data for fields with +// blank (_) field names is skipped; i.e., blank field names +// may be used for padding. +// When reading into a struct, all non-blank fields must be exported +// or Read may panic. +// +// The error is [io.EOF] only if no bytes were read. +// If an [io.EOF] happens after reading some but not all the bytes, +// Read returns [io.ErrUnexpectedEOF]. +func Read(r io.Reader, order ByteOrder, data any) error { + // Fast path for basic types and slices. + if n := intDataSize(data); n != 0 { + bs := make([]byte, n) + if _, err := io.ReadFull(r, bs); err != nil { + return err + } + switch data := data.(type) { + case *bool: + *data = bs[0] != 0 + case *int8: + *data = int8(bs[0]) + case *uint8: + *data = bs[0] + case *int16: + *data = int16(order.Uint16(bs)) + case *uint16: + *data = order.Uint16(bs) + case *int32: + *data = int32(order.Uint32(bs)) + case *uint32: + *data = order.Uint32(bs) + case *int64: + *data = int64(order.Uint64(bs)) + case *uint64: + *data = order.Uint64(bs) + case *float32: + *data = math.Float32frombits(order.Uint32(bs)) + case *float64: + *data = math.Float64frombits(order.Uint64(bs)) + case []bool: + for i, x := range bs { // Easier to loop over the input for 8-bit values. + data[i] = x != 0 + } + case []int8: + for i, x := range bs { + data[i] = int8(x) + } + case []uint8: + copy(data, bs) + case []int16: + for i := range data { + data[i] = int16(order.Uint16(bs[2*i:])) + } + case []uint16: + for i := range data { + data[i] = order.Uint16(bs[2*i:]) + } + case []int32: + for i := range data { + data[i] = int32(order.Uint32(bs[4*i:])) + } + case []uint32: + for i := range data { + data[i] = order.Uint32(bs[4*i:]) + } + case []int64: + for i := range data { + data[i] = int64(order.Uint64(bs[8*i:])) + } + case []uint64: + for i := range data { + data[i] = order.Uint64(bs[8*i:]) + } + case []float32: + for i := range data { + data[i] = math.Float32frombits(order.Uint32(bs[4*i:])) + } + case []float64: + for i := range data { + data[i] = math.Float64frombits(order.Uint64(bs[8*i:])) + } + default: + n = 0 // fast path doesn't apply + } + if n != 0 { + return nil + } + } + + // Fallback to reflect-based decoding. + v := reflect.ValueOf(data) + size := -1 + switch v.Kind() { + case reflect.Pointer: + v = v.Elem() + size = dataSize(v) + case reflect.Slice: + size = dataSize(v) + } + if size < 0 { + return errors.New("binary.Read: invalid type " + reflect.TypeOf(data).String()) + } + d := &decoder{order: order, buf: make([]byte, size)} + if _, err := io.ReadFull(r, d.buf); err != nil { + return err + } + d.value(v) + return nil +} + +// Write writes the binary representation of data into w. +// Data must be a fixed-size value or a slice of fixed-size +// values, or a pointer to such data. +// Boolean values encode as one byte: 1 for true, and 0 for false. +// Bytes written to w are encoded using the specified byte order +// and read from successive fields of the data. +// When writing structs, zero values are written for fields +// with blank (_) field names. +func Write(w io.Writer, order ByteOrder, data any) error { + // Fast path for basic types and slices. + if n := intDataSize(data); n != 0 { + bs := make([]byte, n) + switch v := data.(type) { + case *bool: + if *v { + bs[0] = 1 + } else { + bs[0] = 0 + } + case bool: + if v { + bs[0] = 1 + } else { + bs[0] = 0 + } + case []bool: + for i, x := range v { + if x { + bs[i] = 1 + } else { + bs[i] = 0 + } + } + case *int8: + bs[0] = byte(*v) + case int8: + bs[0] = byte(v) + case []int8: + for i, x := range v { + bs[i] = byte(x) + } + case *uint8: + bs[0] = *v + case uint8: + bs[0] = v + case []uint8: + bs = v + case *int16: + order.PutUint16(bs, uint16(*v)) + case int16: + order.PutUint16(bs, uint16(v)) + case []int16: + for i, x := range v { + order.PutUint16(bs[2*i:], uint16(x)) + } + case *uint16: + order.PutUint16(bs, *v) + case uint16: + order.PutUint16(bs, v) + case []uint16: + for i, x := range v { + order.PutUint16(bs[2*i:], x) + } + case *int32: + order.PutUint32(bs, uint32(*v)) + case int32: + order.PutUint32(bs, uint32(v)) + case []int32: + for i, x := range v { + order.PutUint32(bs[4*i:], uint32(x)) + } + case *uint32: + order.PutUint32(bs, *v) + case uint32: + order.PutUint32(bs, v) + case []uint32: + for i, x := range v { + order.PutUint32(bs[4*i:], x) + } + case *int64: + order.PutUint64(bs, uint64(*v)) + case int64: + order.PutUint64(bs, uint64(v)) + case []int64: + for i, x := range v { + order.PutUint64(bs[8*i:], uint64(x)) + } + case *uint64: + order.PutUint64(bs, *v) + case uint64: + order.PutUint64(bs, v) + case []uint64: + for i, x := range v { + order.PutUint64(bs[8*i:], x) + } + case *float32: + order.PutUint32(bs, math.Float32bits(*v)) + case float32: + order.PutUint32(bs, math.Float32bits(v)) + case []float32: + for i, x := range v { + order.PutUint32(bs[4*i:], math.Float32bits(x)) + } + case *float64: + order.PutUint64(bs, math.Float64bits(*v)) + case float64: + order.PutUint64(bs, math.Float64bits(v)) + case []float64: + for i, x := range v { + order.PutUint64(bs[8*i:], math.Float64bits(x)) + } + } + _, err := w.Write(bs) + return err + } + + // Fallback to reflect-based encoding. + v := reflect.Indirect(reflect.ValueOf(data)) + size := dataSize(v) + if size < 0 { + return errors.New("binary.Write: some values are not fixed-sized in type " + reflect.TypeOf(data).String()) + } + buf := make([]byte, size) + e := &encoder{order: order, buf: buf} + e.value(v) + _, err := w.Write(buf) + return err +} + +// Size returns how many bytes [Write] would generate to encode the value v, which +// must be a fixed-size value or a slice of fixed-size values, or a pointer to such data. +// If v is neither of these, Size returns -1. +func Size(v any) int { + return dataSize(reflect.Indirect(reflect.ValueOf(v))) +} + +var structSize sync.Map // map[reflect.Type]int + +// dataSize returns the number of bytes the actual data represented by v occupies in memory. +// For compound structures, it sums the sizes of the elements. Thus, for instance, for a slice +// it returns the length of the slice times the element size and does not count the memory +// occupied by the header. If the type of v is not acceptable, dataSize returns -1. +func dataSize(v reflect.Value) int { + switch v.Kind() { + case reflect.Slice: + if s := sizeof(v.Type().Elem()); s >= 0 { + return s * v.Len() + } + + case reflect.Struct: + t := v.Type() + if size, ok := structSize.Load(t); ok { + return size.(int) + } + size := sizeof(t) + structSize.Store(t, size) + return size + + default: + if v.IsValid() { + return sizeof(v.Type()) + } + } + + return -1 +} + +// sizeof returns the size >= 0 of variables for the given type or -1 if the type is not acceptable. +func sizeof(t reflect.Type) int { + switch t.Kind() { + case reflect.Array: + if s := sizeof(t.Elem()); s >= 0 { + return s * t.Len() + } + + case reflect.Struct: + sum := 0 + for i, n := 0, t.NumField(); i < n; i++ { + s := sizeof(t.Field(i).Type) + if s < 0 { + return -1 + } + sum += s + } + return sum + + case reflect.Bool, + reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return int(t.Size()) + } + + return -1 +} + +type coder struct { + order ByteOrder + buf []byte + offset int +} + +type decoder coder +type encoder coder + +func (d *decoder) bool() bool { + x := d.buf[d.offset] + d.offset++ + return x != 0 +} + +func (e *encoder) bool(x bool) { + if x { + e.buf[e.offset] = 1 + } else { + e.buf[e.offset] = 0 + } + e.offset++ +} + +func (d *decoder) uint8() uint8 { + x := d.buf[d.offset] + d.offset++ + return x +} + +func (e *encoder) uint8(x uint8) { + e.buf[e.offset] = x + e.offset++ +} + +func (d *decoder) uint16() uint16 { + x := d.order.Uint16(d.buf[d.offset : d.offset+2]) + d.offset += 2 + return x +} + +func (e *encoder) uint16(x uint16) { + e.order.PutUint16(e.buf[e.offset:e.offset+2], x) + e.offset += 2 +} + +func (d *decoder) uint32() uint32 { + x := d.order.Uint32(d.buf[d.offset : d.offset+4]) + d.offset += 4 + return x +} + +func (e *encoder) uint32(x uint32) { + e.order.PutUint32(e.buf[e.offset:e.offset+4], x) + e.offset += 4 +} + +func (d *decoder) uint64() uint64 { + x := d.order.Uint64(d.buf[d.offset : d.offset+8]) + d.offset += 8 + return x +} + +func (e *encoder) uint64(x uint64) { + e.order.PutUint64(e.buf[e.offset:e.offset+8], x) + e.offset += 8 +} + +func (d *decoder) int8() int8 { return int8(d.uint8()) } + +func (e *encoder) int8(x int8) { e.uint8(uint8(x)) } + +func (d *decoder) int16() int16 { return int16(d.uint16()) } + +func (e *encoder) int16(x int16) { e.uint16(uint16(x)) } + +func (d *decoder) int32() int32 { return int32(d.uint32()) } + +func (e *encoder) int32(x int32) { e.uint32(uint32(x)) } + +func (d *decoder) int64() int64 { return int64(d.uint64()) } + +func (e *encoder) int64(x int64) { e.uint64(uint64(x)) } + +func (d *decoder) value(v reflect.Value) { + switch v.Kind() { + case reflect.Array: + l := v.Len() + for i := 0; i < l; i++ { + d.value(v.Index(i)) + } + + case reflect.Struct: + t := v.Type() + l := v.NumField() + for i := 0; i < l; i++ { + // Note: Calling v.CanSet() below is an optimization. + // It would be sufficient to check the field name, + // but creating the StructField info for each field is + // costly (run "go test -bench=ReadStruct" and compare + // results when making changes to this code). + if v := v.Field(i); v.CanSet() || t.Field(i).Name != "_" { + d.value(v) + } else { + d.skip(v) + } + } + + case reflect.Slice: + l := v.Len() + for i := 0; i < l; i++ { + d.value(v.Index(i)) + } + + case reflect.Bool: + v.SetBool(d.bool()) + + case reflect.Int8: + v.SetInt(int64(d.int8())) + case reflect.Int16: + v.SetInt(int64(d.int16())) + case reflect.Int32: + v.SetInt(int64(d.int32())) + case reflect.Int64: + v.SetInt(d.int64()) + + case reflect.Uint8: + v.SetUint(uint64(d.uint8())) + case reflect.Uint16: + v.SetUint(uint64(d.uint16())) + case reflect.Uint32: + v.SetUint(uint64(d.uint32())) + case reflect.Uint64: + v.SetUint(d.uint64()) + + case reflect.Float32: + v.SetFloat(float64(math.Float32frombits(d.uint32()))) + case reflect.Float64: + v.SetFloat(math.Float64frombits(d.uint64())) + + case reflect.Complex64: + v.SetComplex(complex( + float64(math.Float32frombits(d.uint32())), + float64(math.Float32frombits(d.uint32())), + )) + case reflect.Complex128: + v.SetComplex(complex( + math.Float64frombits(d.uint64()), + math.Float64frombits(d.uint64()), + )) + } +} + +func (e *encoder) value(v reflect.Value) { + switch v.Kind() { + case reflect.Array: + l := v.Len() + for i := 0; i < l; i++ { + e.value(v.Index(i)) + } + + case reflect.Struct: + t := v.Type() + l := v.NumField() + for i := 0; i < l; i++ { + // see comment for corresponding code in decoder.value() + if v := v.Field(i); v.CanSet() || t.Field(i).Name != "_" { + e.value(v) + } else { + e.skip(v) + } + } + + case reflect.Slice: + l := v.Len() + for i := 0; i < l; i++ { + e.value(v.Index(i)) + } + + case reflect.Bool: + e.bool(v.Bool()) + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + switch v.Type().Kind() { + case reflect.Int8: + e.int8(int8(v.Int())) + case reflect.Int16: + e.int16(int16(v.Int())) + case reflect.Int32: + e.int32(int32(v.Int())) + case reflect.Int64: + e.int64(v.Int()) + } + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + switch v.Type().Kind() { + case reflect.Uint8: + e.uint8(uint8(v.Uint())) + case reflect.Uint16: + e.uint16(uint16(v.Uint())) + case reflect.Uint32: + e.uint32(uint32(v.Uint())) + case reflect.Uint64: + e.uint64(v.Uint()) + } + + case reflect.Float32, reflect.Float64: + switch v.Type().Kind() { + case reflect.Float32: + e.uint32(math.Float32bits(float32(v.Float()))) + case reflect.Float64: + e.uint64(math.Float64bits(v.Float())) + } + + case reflect.Complex64, reflect.Complex128: + switch v.Type().Kind() { + case reflect.Complex64: + x := v.Complex() + e.uint32(math.Float32bits(float32(real(x)))) + e.uint32(math.Float32bits(float32(imag(x)))) + case reflect.Complex128: + x := v.Complex() + e.uint64(math.Float64bits(real(x))) + e.uint64(math.Float64bits(imag(x))) + } + } +} + +func (d *decoder) skip(v reflect.Value) { + d.offset += dataSize(v) +} + +func (e *encoder) skip(v reflect.Value) { + n := dataSize(v) + zero := e.buf[e.offset : e.offset+n] + for i := range zero { + zero[i] = 0 + } + e.offset += n +} + +// intDataSize returns the size of the data required to represent the data when encoded. +// It returns zero if the type cannot be implemented by the fast path in Read or Write. +func intDataSize(data any) int { + switch data := data.(type) { + case bool, int8, uint8, *bool, *int8, *uint8: + return 1 + case []bool: + return len(data) + case []int8: + return len(data) + case []uint8: + return len(data) + case int16, uint16, *int16, *uint16: + return 2 + case []int16: + return 2 * len(data) + case []uint16: + return 2 * len(data) + case int32, uint32, *int32, *uint32: + return 4 + case []int32: + return 4 * len(data) + case []uint32: + return 4 * len(data) + case int64, uint64, *int64, *uint64: + return 8 + case []int64: + return 8 * len(data) + case []uint64: + return 8 * len(data) + case float32, *float32: + return 4 + case float64, *float64: + return 8 + case []float32: + return 4 * len(data) + case []float64: + return 8 * len(data) + } + return 0 +} diff --git a/src/encoding/binary/binary_test.go b/src/encoding/binary/binary_test.go new file mode 100644 index 0000000..4b22b28 --- /dev/null +++ b/src/encoding/binary/binary_test.go @@ -0,0 +1,887 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +import ( + "bytes" + "fmt" + "io" + "math" + "reflect" + "strings" + "sync" + "testing" + "unsafe" +) + +type Struct struct { + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Float32 float32 + Float64 float64 + Complex64 complex64 + Complex128 complex128 + Array [4]uint8 + Bool bool + BoolArray [4]bool +} + +type T struct { + Int int + Uint uint + Uintptr uintptr + Array [4]int +} + +var s = Struct{ + 0x01, + 0x0203, + 0x04050607, + 0x08090a0b0c0d0e0f, + 0x10, + 0x1112, + 0x13141516, + 0x1718191a1b1c1d1e, + + math.Float32frombits(0x1f202122), + math.Float64frombits(0x232425262728292a), + complex( + math.Float32frombits(0x2b2c2d2e), + math.Float32frombits(0x2f303132), + ), + complex( + math.Float64frombits(0x333435363738393a), + math.Float64frombits(0x3b3c3d3e3f404142), + ), + + [4]uint8{0x43, 0x44, 0x45, 0x46}, + + true, + [4]bool{true, false, true, false}, +} + +var big = []byte{ + 1, + 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, + 17, 18, + 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + + 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, + + 67, 68, 69, 70, + + 1, + 1, 0, 1, 0, +} + +var little = []byte{ + 1, + 3, 2, + 7, 6, 5, 4, + 15, 14, 13, 12, 11, 10, 9, 8, + 16, + 18, 17, + 22, 21, 20, 19, + 30, 29, 28, 27, 26, 25, 24, 23, + + 34, 33, 32, 31, + 42, 41, 40, 39, 38, 37, 36, 35, + 46, 45, 44, 43, 50, 49, 48, 47, + 58, 57, 56, 55, 54, 53, 52, 51, 66, 65, 64, 63, 62, 61, 60, 59, + + 67, 68, 69, 70, + + 1, + 1, 0, 1, 0, +} + +var src = []byte{1, 2, 3, 4, 5, 6, 7, 8} +var res = []int32{0x01020304, 0x05060708} +var putbuf = []byte{0, 0, 0, 0, 0, 0, 0, 0} + +func checkResult(t *testing.T, dir string, order ByteOrder, err error, have, want any) { + if err != nil { + t.Errorf("%v %v: %v", dir, order, err) + return + } + if !reflect.DeepEqual(have, want) { + t.Errorf("%v %v:\n\thave %+v\n\twant %+v", dir, order, have, want) + } +} + +func testRead(t *testing.T, order ByteOrder, b []byte, s1 any) { + var s2 Struct + err := Read(bytes.NewReader(b), order, &s2) + checkResult(t, "Read", order, err, s2, s1) +} + +func testWrite(t *testing.T, order ByteOrder, b []byte, s1 any) { + buf := new(bytes.Buffer) + err := Write(buf, order, s1) + checkResult(t, "Write", order, err, buf.Bytes(), b) +} + +func TestLittleEndianRead(t *testing.T) { testRead(t, LittleEndian, little, s) } +func TestLittleEndianWrite(t *testing.T) { testWrite(t, LittleEndian, little, s) } +func TestLittleEndianPtrWrite(t *testing.T) { testWrite(t, LittleEndian, little, &s) } + +func TestBigEndianRead(t *testing.T) { testRead(t, BigEndian, big, s) } +func TestBigEndianWrite(t *testing.T) { testWrite(t, BigEndian, big, s) } +func TestBigEndianPtrWrite(t *testing.T) { testWrite(t, BigEndian, big, &s) } + +func TestReadSlice(t *testing.T) { + slice := make([]int32, 2) + err := Read(bytes.NewReader(src), BigEndian, slice) + checkResult(t, "ReadSlice", BigEndian, err, slice, res) +} + +func TestWriteSlice(t *testing.T) { + buf := new(bytes.Buffer) + err := Write(buf, BigEndian, res) + checkResult(t, "WriteSlice", BigEndian, err, buf.Bytes(), src) +} + +func TestReadBool(t *testing.T) { + var res bool + var err error + err = Read(bytes.NewReader([]byte{0}), BigEndian, &res) + checkResult(t, "ReadBool", BigEndian, err, res, false) + res = false + err = Read(bytes.NewReader([]byte{1}), BigEndian, &res) + checkResult(t, "ReadBool", BigEndian, err, res, true) + res = false + err = Read(bytes.NewReader([]byte{2}), BigEndian, &res) + checkResult(t, "ReadBool", BigEndian, err, res, true) +} + +func TestReadBoolSlice(t *testing.T) { + slice := make([]bool, 4) + err := Read(bytes.NewReader([]byte{0, 1, 2, 255}), BigEndian, slice) + checkResult(t, "ReadBoolSlice", BigEndian, err, slice, []bool{false, true, true, true}) +} + +// Addresses of arrays are easier to manipulate with reflection than are slices. +var intArrays = []any{ + &[100]int8{}, + &[100]int16{}, + &[100]int32{}, + &[100]int64{}, + &[100]uint8{}, + &[100]uint16{}, + &[100]uint32{}, + &[100]uint64{}, +} + +func TestSliceRoundTrip(t *testing.T) { + buf := new(bytes.Buffer) + for _, array := range intArrays { + src := reflect.ValueOf(array).Elem() + unsigned := false + switch src.Index(0).Kind() { + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + unsigned = true + } + for i := 0; i < src.Len(); i++ { + if unsigned { + src.Index(i).SetUint(uint64(i * 0x07654321)) + } else { + src.Index(i).SetInt(int64(i * 0x07654321)) + } + } + buf.Reset() + srcSlice := src.Slice(0, src.Len()) + err := Write(buf, BigEndian, srcSlice.Interface()) + if err != nil { + t.Fatal(err) + } + dst := reflect.New(src.Type()).Elem() + dstSlice := dst.Slice(0, dst.Len()) + err = Read(buf, BigEndian, dstSlice.Interface()) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(src.Interface(), dst.Interface()) { + t.Fatal(src) + } + } +} + +func TestWriteT(t *testing.T) { + buf := new(bytes.Buffer) + ts := T{} + if err := Write(buf, BigEndian, ts); err == nil { + t.Errorf("WriteT: have err == nil, want non-nil") + } + + tv := reflect.Indirect(reflect.ValueOf(ts)) + for i, n := 0, tv.NumField(); i < n; i++ { + typ := tv.Field(i).Type().String() + if typ == "[4]int" { + typ = "int" // the problem is int, not the [4] + } + if err := Write(buf, BigEndian, tv.Field(i).Interface()); err == nil { + t.Errorf("WriteT.%v: have err == nil, want non-nil", tv.Field(i).Type()) + } else if !strings.Contains(err.Error(), typ) { + t.Errorf("WriteT: have err == %q, want it to mention %s", err, typ) + } + } +} + +type BlankFields struct { + A uint32 + _ int32 + B float64 + _ [4]int16 + C byte + _ [7]byte + _ struct { + f [8]float32 + } +} + +type BlankFieldsProbe struct { + A uint32 + P0 int32 + B float64 + P1 [4]int16 + C byte + P2 [7]byte + P3 struct { + F [8]float32 + } +} + +func TestBlankFields(t *testing.T) { + buf := new(bytes.Buffer) + b1 := BlankFields{A: 1234567890, B: 2.718281828, C: 42} + if err := Write(buf, LittleEndian, &b1); err != nil { + t.Error(err) + } + + // zero values must have been written for blank fields + var p BlankFieldsProbe + if err := Read(buf, LittleEndian, &p); err != nil { + t.Error(err) + } + + // quick test: only check first value of slices + if p.P0 != 0 || p.P1[0] != 0 || p.P2[0] != 0 || p.P3.F[0] != 0 { + t.Errorf("non-zero values for originally blank fields: %#v", p) + } + + // write p and see if we can probe only some fields + if err := Write(buf, LittleEndian, &p); err != nil { + t.Error(err) + } + + // read should ignore blank fields in b2 + var b2 BlankFields + if err := Read(buf, LittleEndian, &b2); err != nil { + t.Error(err) + } + if b1.A != b2.A || b1.B != b2.B || b1.C != b2.C { + t.Errorf("%#v != %#v", b1, b2) + } +} + +func TestSizeStructCache(t *testing.T) { + // Reset the cache, otherwise multiple test runs fail. + structSize = sync.Map{} + + count := func() int { + var i int + structSize.Range(func(_, _ any) bool { + i++ + return true + }) + return i + } + + var total int + added := func() int { + delta := count() - total + total += delta + return delta + } + + type foo struct { + A uint32 + } + + type bar struct { + A Struct + B foo + C Struct + } + + testcases := []struct { + val any + want int + }{ + {new(foo), 1}, + {new(bar), 1}, + {new(bar), 0}, + {new(struct{ A Struct }), 1}, + {new(struct{ A Struct }), 0}, + } + + for _, tc := range testcases { + if Size(tc.val) == -1 { + t.Fatalf("Can't get the size of %T", tc.val) + } + + if n := added(); n != tc.want { + t.Errorf("Sizing %T added %d entries to the cache, want %d", tc.val, n, tc.want) + } + } +} + +func TestSizeInvalid(t *testing.T) { + testcases := []any{ + int(0), + new(int), + (*int)(nil), + [1]uint{}, + new([1]uint), + (*[1]uint)(nil), + []int{}, + []int(nil), + new([]int), + (*[]int)(nil), + } + for _, tc := range testcases { + if got := Size(tc); got != -1 { + t.Errorf("Size(%T) = %d, want -1", tc, got) + } + } +} + +// An attempt to read into a struct with an unexported field will +// panic. This is probably not the best choice, but at this point +// anything else would be an API change. + +type Unexported struct { + a int32 +} + +func TestUnexportedRead(t *testing.T) { + var buf bytes.Buffer + u1 := Unexported{a: 1} + if err := Write(&buf, LittleEndian, &u1); err != nil { + t.Fatal(err) + } + + defer func() { + if recover() == nil { + t.Fatal("did not panic") + } + }() + var u2 Unexported + Read(&buf, LittleEndian, &u2) +} + +func TestReadErrorMsg(t *testing.T) { + var buf bytes.Buffer + read := func(data any) { + err := Read(&buf, LittleEndian, data) + want := "binary.Read: invalid type " + reflect.TypeOf(data).String() + if err == nil { + t.Errorf("%T: got no error; want %q", data, want) + return + } + if got := err.Error(); got != want { + t.Errorf("%T: got %q; want %q", data, got, want) + } + } + read(0) + s := new(struct{}) + read(&s) + p := &s + read(&p) +} + +func TestReadTruncated(t *testing.T) { + const data = "0123456789abcdef" + + var b1 = make([]int32, 4) + var b2 struct { + A, B, C, D byte + E int32 + F float64 + } + + for i := 0; i <= len(data); i++ { + var errWant error + switch i { + case 0: + errWant = io.EOF + case len(data): + errWant = nil + default: + errWant = io.ErrUnexpectedEOF + } + + if err := Read(strings.NewReader(data[:i]), LittleEndian, &b1); err != errWant { + t.Errorf("Read(%d) with slice: got %v, want %v", i, err, errWant) + } + if err := Read(strings.NewReader(data[:i]), LittleEndian, &b2); err != errWant { + t.Errorf("Read(%d) with struct: got %v, want %v", i, err, errWant) + } + } +} + +func testUint64SmallSliceLengthPanics() (panicked bool) { + defer func() { + panicked = recover() != nil + }() + b := [8]byte{1, 2, 3, 4, 5, 6, 7, 8} + LittleEndian.Uint64(b[:4]) + return false +} + +func testPutUint64SmallSliceLengthPanics() (panicked bool) { + defer func() { + panicked = recover() != nil + }() + b := [8]byte{} + LittleEndian.PutUint64(b[:4], 0x0102030405060708) + return false +} + +func TestByteOrder(t *testing.T) { + type byteOrder interface { + ByteOrder + AppendByteOrder + } + buf := make([]byte, 8) + for _, order := range []byteOrder{LittleEndian, BigEndian} { + const offset = 3 + for _, value := range []uint64{ + 0x0000000000000000, + 0x0123456789abcdef, + 0xfedcba9876543210, + 0xffffffffffffffff, + 0xaaaaaaaaaaaaaaaa, + math.Float64bits(math.Pi), + math.Float64bits(math.E), + } { + want16 := uint16(value) + order.PutUint16(buf[:2], want16) + if got := order.Uint16(buf[:2]); got != want16 { + t.Errorf("PutUint16: Uint16 = %v, want %v", got, want16) + } + buf = order.AppendUint16(buf[:offset], want16) + if got := order.Uint16(buf[offset:]); got != want16 { + t.Errorf("AppendUint16: Uint16 = %v, want %v", got, want16) + } + if len(buf) != offset+2 { + t.Errorf("AppendUint16: len(buf) = %d, want %d", len(buf), offset+2) + } + + want32 := uint32(value) + order.PutUint32(buf[:4], want32) + if got := order.Uint32(buf[:4]); got != want32 { + t.Errorf("PutUint32: Uint32 = %v, want %v", got, want32) + } + buf = order.AppendUint32(buf[:offset], want32) + if got := order.Uint32(buf[offset:]); got != want32 { + t.Errorf("AppendUint32: Uint32 = %v, want %v", got, want32) + } + if len(buf) != offset+4 { + t.Errorf("AppendUint32: len(buf) = %d, want %d", len(buf), offset+4) + } + + want64 := uint64(value) + order.PutUint64(buf[:8], want64) + if got := order.Uint64(buf[:8]); got != want64 { + t.Errorf("PutUint64: Uint64 = %v, want %v", got, want64) + } + buf = order.AppendUint64(buf[:offset], want64) + if got := order.Uint64(buf[offset:]); got != want64 { + t.Errorf("AppendUint64: Uint64 = %v, want %v", got, want64) + } + if len(buf) != offset+8 { + t.Errorf("AppendUint64: len(buf) = %d, want %d", len(buf), offset+8) + } + } + } +} + +func TestEarlyBoundsChecks(t *testing.T) { + if testUint64SmallSliceLengthPanics() != true { + t.Errorf("binary.LittleEndian.Uint64 expected to panic for small slices, but didn't") + } + if testPutUint64SmallSliceLengthPanics() != true { + t.Errorf("binary.LittleEndian.PutUint64 expected to panic for small slices, but didn't") + } +} + +func TestReadInvalidDestination(t *testing.T) { + testReadInvalidDestination(t, BigEndian) + testReadInvalidDestination(t, LittleEndian) +} + +func testReadInvalidDestination(t *testing.T, order ByteOrder) { + destinations := []any{ + int8(0), + int16(0), + int32(0), + int64(0), + + uint8(0), + uint16(0), + uint32(0), + uint64(0), + + bool(false), + } + + for _, dst := range destinations { + err := Read(bytes.NewReader([]byte{1, 2, 3, 4, 5, 6, 7, 8}), order, dst) + want := fmt.Sprintf("binary.Read: invalid type %T", dst) + if err == nil || err.Error() != want { + t.Fatalf("for type %T: got %q; want %q", dst, err, want) + } + } +} + +func TestNoFixedSize(t *testing.T) { + type Person struct { + Age int + Weight float64 + Height float64 + } + + person := Person{ + Age: 27, + Weight: 67.3, + Height: 177.8, + } + + buf := new(bytes.Buffer) + err := Write(buf, LittleEndian, &person) + if err == nil { + t.Fatal("binary.Write: unexpected success as size of type *binary.Person is not fixed") + } + errs := "binary.Write: some values are not fixed-sized in type *binary.Person" + if err.Error() != errs { + t.Fatalf("got %q, want %q", err, errs) + } +} + +type byteSliceReader struct { + remain []byte +} + +func (br *byteSliceReader) Read(p []byte) (int, error) { + n := copy(p, br.remain) + br.remain = br.remain[n:] + return n, nil +} + +func BenchmarkReadSlice1000Int32s(b *testing.B) { + bsr := &byteSliceReader{} + slice := make([]int32, 1000) + buf := make([]byte, len(slice)*4) + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = buf + Read(bsr, BigEndian, slice) + } +} + +func BenchmarkReadStruct(b *testing.B) { + bsr := &byteSliceReader{} + var buf bytes.Buffer + Write(&buf, BigEndian, &s) + b.SetBytes(int64(dataSize(reflect.ValueOf(s)))) + t := s + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = buf.Bytes() + Read(bsr, BigEndian, &t) + } + b.StopTimer() + if b.N > 0 && !reflect.DeepEqual(s, t) { + b.Fatalf("struct doesn't match:\ngot %v;\nwant %v", t, s) + } +} + +func BenchmarkWriteStruct(b *testing.B) { + b.SetBytes(int64(Size(&s))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Write(io.Discard, BigEndian, &s) + } +} + +func BenchmarkReadInts(b *testing.B) { + var ls Struct + bsr := &byteSliceReader{} + var r io.Reader = bsr + b.SetBytes(2 * (1 + 2 + 4 + 8)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = big + Read(r, BigEndian, &ls.Int8) + Read(r, BigEndian, &ls.Int16) + Read(r, BigEndian, &ls.Int32) + Read(r, BigEndian, &ls.Int64) + Read(r, BigEndian, &ls.Uint8) + Read(r, BigEndian, &ls.Uint16) + Read(r, BigEndian, &ls.Uint32) + Read(r, BigEndian, &ls.Uint64) + } + b.StopTimer() + want := s + want.Float32 = 0 + want.Float64 = 0 + want.Complex64 = 0 + want.Complex128 = 0 + want.Array = [4]uint8{0, 0, 0, 0} + want.Bool = false + want.BoolArray = [4]bool{false, false, false, false} + if b.N > 0 && !reflect.DeepEqual(ls, want) { + b.Fatalf("struct doesn't match:\ngot %v;\nwant %v", ls, want) + } +} + +func BenchmarkWriteInts(b *testing.B) { + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(2 * (1 + 2 + 4 + 8)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, s.Int8) + Write(w, BigEndian, s.Int16) + Write(w, BigEndian, s.Int32) + Write(w, BigEndian, s.Int64) + Write(w, BigEndian, s.Uint8) + Write(w, BigEndian, s.Uint16) + Write(w, BigEndian, s.Uint32) + Write(w, BigEndian, s.Uint64) + } + b.StopTimer() + if b.N > 0 && !bytes.Equal(buf.Bytes(), big[:30]) { + b.Fatalf("first half doesn't match: %x %x", buf.Bytes(), big[:30]) + } +} + +func BenchmarkWriteSlice1000Int32s(b *testing.B) { + slice := make([]int32, 1000) + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(4 * 1000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, slice) + } + b.StopTimer() +} + +func BenchmarkPutUint16(b *testing.B) { + b.SetBytes(2) + for i := 0; i < b.N; i++ { + BigEndian.PutUint16(putbuf[:2], uint16(i)) + } +} + +func BenchmarkAppendUint16(b *testing.B) { + b.SetBytes(2) + for i := 0; i < b.N; i++ { + putbuf = BigEndian.AppendUint16(putbuf[:0], uint16(i)) + } +} + +func BenchmarkPutUint32(b *testing.B) { + b.SetBytes(4) + for i := 0; i < b.N; i++ { + BigEndian.PutUint32(putbuf[:4], uint32(i)) + } +} + +func BenchmarkAppendUint32(b *testing.B) { + b.SetBytes(4) + for i := 0; i < b.N; i++ { + putbuf = BigEndian.AppendUint32(putbuf[:0], uint32(i)) + } +} + +func BenchmarkPutUint64(b *testing.B) { + b.SetBytes(8) + for i := 0; i < b.N; i++ { + BigEndian.PutUint64(putbuf[:8], uint64(i)) + } +} + +func BenchmarkAppendUint64(b *testing.B) { + b.SetBytes(8) + for i := 0; i < b.N; i++ { + putbuf = BigEndian.AppendUint64(putbuf[:0], uint64(i)) + } +} + +func BenchmarkLittleEndianPutUint16(b *testing.B) { + b.SetBytes(2) + for i := 0; i < b.N; i++ { + LittleEndian.PutUint16(putbuf[:2], uint16(i)) + } +} + +func BenchmarkLittleEndianAppendUint16(b *testing.B) { + b.SetBytes(2) + for i := 0; i < b.N; i++ { + putbuf = LittleEndian.AppendUint16(putbuf[:0], uint16(i)) + } +} + +func BenchmarkLittleEndianPutUint32(b *testing.B) { + b.SetBytes(4) + for i := 0; i < b.N; i++ { + LittleEndian.PutUint32(putbuf[:4], uint32(i)) + } +} + +func BenchmarkLittleEndianAppendUint32(b *testing.B) { + b.SetBytes(4) + for i := 0; i < b.N; i++ { + putbuf = LittleEndian.AppendUint32(putbuf[:0], uint32(i)) + } +} + +func BenchmarkLittleEndianPutUint64(b *testing.B) { + b.SetBytes(8) + for i := 0; i < b.N; i++ { + LittleEndian.PutUint64(putbuf[:8], uint64(i)) + } +} + +func BenchmarkLittleEndianAppendUint64(b *testing.B) { + b.SetBytes(8) + for i := 0; i < b.N; i++ { + putbuf = LittleEndian.AppendUint64(putbuf[:0], uint64(i)) + } +} + +func BenchmarkReadFloats(b *testing.B) { + var ls Struct + bsr := &byteSliceReader{} + var r io.Reader = bsr + b.SetBytes(4 + 8) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = big[30:] + Read(r, BigEndian, &ls.Float32) + Read(r, BigEndian, &ls.Float64) + } + b.StopTimer() + want := s + want.Int8 = 0 + want.Int16 = 0 + want.Int32 = 0 + want.Int64 = 0 + want.Uint8 = 0 + want.Uint16 = 0 + want.Uint32 = 0 + want.Uint64 = 0 + want.Complex64 = 0 + want.Complex128 = 0 + want.Array = [4]uint8{0, 0, 0, 0} + want.Bool = false + want.BoolArray = [4]bool{false, false, false, false} + if b.N > 0 && !reflect.DeepEqual(ls, want) { + b.Fatalf("struct doesn't match:\ngot %v;\nwant %v", ls, want) + } +} + +func BenchmarkWriteFloats(b *testing.B) { + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(4 + 8) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, s.Float32) + Write(w, BigEndian, s.Float64) + } + b.StopTimer() + if b.N > 0 && !bytes.Equal(buf.Bytes(), big[30:30+4+8]) { + b.Fatalf("first half doesn't match: %x %x", buf.Bytes(), big[30:30+4+8]) + } +} + +func BenchmarkReadSlice1000Float32s(b *testing.B) { + bsr := &byteSliceReader{} + slice := make([]float32, 1000) + buf := make([]byte, len(slice)*4) + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = buf + Read(bsr, BigEndian, slice) + } +} + +func BenchmarkWriteSlice1000Float32s(b *testing.B) { + slice := make([]float32, 1000) + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(4 * 1000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, slice) + } + b.StopTimer() +} + +func BenchmarkReadSlice1000Uint8s(b *testing.B) { + bsr := &byteSliceReader{} + slice := make([]uint8, 1000) + buf := make([]byte, len(slice)) + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + bsr.remain = buf + Read(bsr, BigEndian, slice) + } +} + +func BenchmarkWriteSlice1000Uint8s(b *testing.B) { + slice := make([]uint8, 1000) + buf := new(bytes.Buffer) + var w io.Writer = buf + b.SetBytes(1000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + buf.Reset() + Write(w, BigEndian, slice) + } +} + +func TestNativeEndian(t *testing.T) { + const val = 0x12345678 + i := uint32(val) + s := unsafe.Slice((*byte)(unsafe.Pointer(&i)), unsafe.Sizeof(i)) + if v := NativeEndian.Uint32(s); v != val { + t.Errorf("NativeEndian.Uint32 returned %#x, expected %#x", v, val) + } +} diff --git a/src/encoding/binary/example_test.go b/src/encoding/binary/example_test.go new file mode 100644 index 0000000..4c10daa --- /dev/null +++ b/src/encoding/binary/example_test.go @@ -0,0 +1,187 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary_test + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" +) + +func ExampleWrite() { + buf := new(bytes.Buffer) + var pi float64 = math.Pi + err := binary.Write(buf, binary.LittleEndian, pi) + if err != nil { + fmt.Println("binary.Write failed:", err) + } + fmt.Printf("% x", buf.Bytes()) + // Output: 18 2d 44 54 fb 21 09 40 +} + +func ExampleWrite_multi() { + buf := new(bytes.Buffer) + var data = []any{ + uint16(61374), + int8(-54), + uint8(254), + } + for _, v := range data { + err := binary.Write(buf, binary.LittleEndian, v) + if err != nil { + fmt.Println("binary.Write failed:", err) + } + } + fmt.Printf("%x", buf.Bytes()) + // Output: beefcafe +} + +func ExampleRead() { + var pi float64 + b := []byte{0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40} + buf := bytes.NewReader(b) + err := binary.Read(buf, binary.LittleEndian, &pi) + if err != nil { + fmt.Println("binary.Read failed:", err) + } + fmt.Print(pi) + // Output: 3.141592653589793 +} + +func ExampleRead_multi() { + b := []byte{0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40, 0xff, 0x01, 0x02, 0x03, 0xbe, 0xef} + r := bytes.NewReader(b) + + var data struct { + PI float64 + Uate uint8 + Mine [3]byte + Too uint16 + } + + if err := binary.Read(r, binary.LittleEndian, &data); err != nil { + fmt.Println("binary.Read failed:", err) + } + + fmt.Println(data.PI) + fmt.Println(data.Uate) + fmt.Printf("% x\n", data.Mine) + fmt.Println(data.Too) + // Output: + // 3.141592653589793 + // 255 + // 01 02 03 + // 61374 +} + +func ExampleByteOrder_put() { + b := make([]byte, 4) + binary.LittleEndian.PutUint16(b[0:], 0x03e8) + binary.LittleEndian.PutUint16(b[2:], 0x07d0) + fmt.Printf("% x\n", b) + // Output: + // e8 03 d0 07 +} + +func ExampleByteOrder_get() { + b := []byte{0xe8, 0x03, 0xd0, 0x07} + x1 := binary.LittleEndian.Uint16(b[0:]) + x2 := binary.LittleEndian.Uint16(b[2:]) + fmt.Printf("%#04x %#04x\n", x1, x2) + // Output: + // 0x03e8 0x07d0 +} + +func ExamplePutUvarint() { + buf := make([]byte, binary.MaxVarintLen64) + + for _, x := range []uint64{1, 2, 127, 128, 255, 256} { + n := binary.PutUvarint(buf, x) + fmt.Printf("%x\n", buf[:n]) + } + // Output: + // 01 + // 02 + // 7f + // 8001 + // ff01 + // 8002 +} + +func ExamplePutVarint() { + buf := make([]byte, binary.MaxVarintLen64) + + for _, x := range []int64{-65, -64, -2, -1, 0, 1, 2, 63, 64} { + n := binary.PutVarint(buf, x) + fmt.Printf("%x\n", buf[:n]) + } + // Output: + // 8101 + // 7f + // 03 + // 01 + // 00 + // 02 + // 04 + // 7e + // 8001 +} + +func ExampleUvarint() { + inputs := [][]byte{ + {0x01}, + {0x02}, + {0x7f}, + {0x80, 0x01}, + {0xff, 0x01}, + {0x80, 0x02}, + } + for _, b := range inputs { + x, n := binary.Uvarint(b) + if n != len(b) { + fmt.Println("Uvarint did not consume all of in") + } + fmt.Println(x) + } + // Output: + // 1 + // 2 + // 127 + // 128 + // 255 + // 256 +} + +func ExampleVarint() { + inputs := [][]byte{ + {0x81, 0x01}, + {0x7f}, + {0x03}, + {0x01}, + {0x00}, + {0x02}, + {0x04}, + {0x7e}, + {0x80, 0x01}, + } + for _, b := range inputs { + x, n := binary.Varint(b) + if n != len(b) { + fmt.Println("Varint did not consume all of in") + } + fmt.Println(x) + } + // Output: + // -65 + // -64 + // -2 + // -1 + // 0 + // 1 + // 2 + // 63 + // 64 +} diff --git a/src/encoding/binary/native_endian_big.go b/src/encoding/binary/native_endian_big.go new file mode 100644 index 0000000..bcc8e30 --- /dev/null +++ b/src/encoding/binary/native_endian_big.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64 + +package binary + +type nativeEndian struct { + bigEndian +} + +// NativeEndian is the native-endian implementation of [ByteOrder] and [AppendByteOrder]. +var NativeEndian nativeEndian diff --git a/src/encoding/binary/native_endian_little.go b/src/encoding/binary/native_endian_little.go new file mode 100644 index 0000000..38d3e9b --- /dev/null +++ b/src/encoding/binary/native_endian_little.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm + +package binary + +type nativeEndian struct { + littleEndian +} + +// NativeEndian is the native-endian implementation of [ByteOrder] and [AppendByteOrder]. +var NativeEndian nativeEndian diff --git a/src/encoding/binary/varint.go b/src/encoding/binary/varint.go new file mode 100644 index 0000000..64dd9d6 --- /dev/null +++ b/src/encoding/binary/varint.go @@ -0,0 +1,166 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +// This file implements "varint" encoding of 64-bit integers. +// The encoding is: +// - unsigned integers are serialized 7 bits at a time, starting with the +// least significant bits +// - the most significant bit (msb) in each output byte indicates if there +// is a continuation byte (msb = 1) +// - signed integers are mapped to unsigned integers using "zig-zag" +// encoding: Positive values x are written as 2*x + 0, negative values +// are written as 2*(^x) + 1; that is, negative numbers are complemented +// and whether to complement is encoded in bit 0. +// +// Design note: +// At most 10 bytes are needed for 64-bit values. The encoding could +// be more dense: a full 64-bit value needs an extra byte just to hold bit 63. +// Instead, the msb of the previous byte could be used to hold bit 63 since we +// know there can't be more than 64 bits. This is a trivial improvement and +// would reduce the maximum encoding length to 9 bytes. However, it breaks the +// invariant that the msb is always the "continuation bit" and thus makes the +// format incompatible with a varint encoding for larger numbers (say 128-bit). + +import ( + "errors" + "io" +) + +// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer. +const ( + MaxVarintLen16 = 3 + MaxVarintLen32 = 5 + MaxVarintLen64 = 10 +) + +// AppendUvarint appends the varint-encoded form of x, +// as generated by [PutUvarint], to buf and returns the extended buffer. +func AppendUvarint(buf []byte, x uint64) []byte { + for x >= 0x80 { + buf = append(buf, byte(x)|0x80) + x >>= 7 + } + return append(buf, byte(x)) +} + +// PutUvarint encodes a uint64 into buf and returns the number of bytes written. +// If the buffer is too small, PutUvarint will panic. +func PutUvarint(buf []byte, x uint64) int { + i := 0 + for x >= 0x80 { + buf[i] = byte(x) | 0x80 + x >>= 7 + i++ + } + buf[i] = byte(x) + return i + 1 +} + +// Uvarint decodes a uint64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +func Uvarint(buf []byte) (uint64, int) { + var x uint64 + var s uint + for i, b := range buf { + if i == MaxVarintLen64 { + // Catch byte reads past MaxVarintLen64. + // See issue https://golang.org/issues/41185 + return 0, -(i + 1) // overflow + } + if b < 0x80 { + if i == MaxVarintLen64-1 && b > 1 { + return 0, -(i + 1) // overflow + } + return x | uint64(b)<<s, i + 1 + } + x |= uint64(b&0x7f) << s + s += 7 + } + return 0, 0 +} + +// AppendVarint appends the varint-encoded form of x, +// as generated by [PutVarint], to buf and returns the extended buffer. +func AppendVarint(buf []byte, x int64) []byte { + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + return AppendUvarint(buf, ux) +} + +// PutVarint encodes an int64 into buf and returns the number of bytes written. +// If the buffer is too small, PutVarint will panic. +func PutVarint(buf []byte, x int64) int { + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + return PutUvarint(buf, ux) +} + +// Varint decodes an int64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 with the following meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +func Varint(buf []byte) (int64, int) { + ux, n := Uvarint(buf) // ok to continue in presence of error + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x, n +} + +var errOverflow = errors.New("binary: varint overflows a 64-bit integer") + +// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64. +// The error is [io.EOF] only if no bytes were read. +// If an [io.EOF] happens after reading some but not all the bytes, +// ReadUvarint returns [io.ErrUnexpectedEOF]. +func ReadUvarint(r io.ByteReader) (uint64, error) { + var x uint64 + var s uint + for i := 0; i < MaxVarintLen64; i++ { + b, err := r.ReadByte() + if err != nil { + if i > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return x, err + } + if b < 0x80 { + if i == MaxVarintLen64-1 && b > 1 { + return x, errOverflow + } + return x | uint64(b)<<s, nil + } + x |= uint64(b&0x7f) << s + s += 7 + } + return x, errOverflow +} + +// ReadVarint reads an encoded signed integer from r and returns it as an int64. +// The error is [io.EOF] only if no bytes were read. +// If an [io.EOF] happens after reading some but not all the bytes, +// ReadVarint returns [io.ErrUnexpectedEOF]. +func ReadVarint(r io.ByteReader) (int64, error) { + ux, err := ReadUvarint(r) // ok to continue in presence of error + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x, err +} diff --git a/src/encoding/binary/varint_test.go b/src/encoding/binary/varint_test.go new file mode 100644 index 0000000..5c3ea31 --- /dev/null +++ b/src/encoding/binary/varint_test.go @@ -0,0 +1,247 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +import ( + "bytes" + "io" + "math" + "testing" +) + +func testConstant(t *testing.T, w uint, max int) { + buf := make([]byte, MaxVarintLen64) + n := PutUvarint(buf, 1<<w-1) + if n != max { + t.Errorf("MaxVarintLen%d = %d; want %d", w, max, n) + } +} + +func TestConstants(t *testing.T) { + testConstant(t, 16, MaxVarintLen16) + testConstant(t, 32, MaxVarintLen32) + testConstant(t, 64, MaxVarintLen64) +} + +func testVarint(t *testing.T, x int64) { + buf := make([]byte, MaxVarintLen64) + n := PutVarint(buf, x) + y, m := Varint(buf[0:n]) + if x != y { + t.Errorf("Varint(%d): got %d", x, y) + } + if n != m { + t.Errorf("Varint(%d): got n = %d; want %d", x, m, n) + } + + buf2 := []byte("prefix") + buf2 = AppendVarint(buf2, x) + if string(buf2) != "prefix"+string(buf[:n]) { + t.Errorf("AppendVarint(%d): got %q, want %q", x, buf2, "prefix"+string(buf[:n])) + } + + y, err := ReadVarint(bytes.NewReader(buf)) + if err != nil { + t.Errorf("ReadVarint(%d): %s", x, err) + } + if x != y { + t.Errorf("ReadVarint(%d): got %d", x, y) + } +} + +func testUvarint(t *testing.T, x uint64) { + buf := make([]byte, MaxVarintLen64) + n := PutUvarint(buf, x) + y, m := Uvarint(buf[0:n]) + if x != y { + t.Errorf("Uvarint(%d): got %d", x, y) + } + if n != m { + t.Errorf("Uvarint(%d): got n = %d; want %d", x, m, n) + } + + buf2 := []byte("prefix") + buf2 = AppendUvarint(buf2, x) + if string(buf2) != "prefix"+string(buf[:n]) { + t.Errorf("AppendUvarint(%d): got %q, want %q", x, buf2, "prefix"+string(buf[:n])) + } + + y, err := ReadUvarint(bytes.NewReader(buf)) + if err != nil { + t.Errorf("ReadUvarint(%d): %s", x, err) + } + if x != y { + t.Errorf("ReadUvarint(%d): got %d", x, y) + } +} + +var tests = []int64{ + -1 << 63, + -1<<63 + 1, + -1, + 0, + 1, + 2, + 10, + 20, + 63, + 64, + 65, + 127, + 128, + 129, + 255, + 256, + 257, + 1<<63 - 1, +} + +func TestVarint(t *testing.T) { + for _, x := range tests { + testVarint(t, x) + testVarint(t, -x) + } + for x := int64(0x7); x != 0; x <<= 1 { + testVarint(t, x) + testVarint(t, -x) + } +} + +func TestUvarint(t *testing.T) { + for _, x := range tests { + testUvarint(t, uint64(x)) + } + for x := uint64(0x7); x != 0; x <<= 1 { + testUvarint(t, x) + } +} + +func TestBufferTooSmall(t *testing.T) { + buf := []byte{0x80, 0x80, 0x80, 0x80} + for i := 0; i <= len(buf); i++ { + buf := buf[0:i] + x, n := Uvarint(buf) + if x != 0 || n != 0 { + t.Errorf("Uvarint(%v): got x = %d, n = %d", buf, x, n) + } + + x, err := ReadUvarint(bytes.NewReader(buf)) + wantErr := io.EOF + if i > 0 { + wantErr = io.ErrUnexpectedEOF + } + if x != 0 || err != wantErr { + t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err) + } + } +} + +// Ensure that we catch overflows of bytes going past MaxVarintLen64. +// See issue https://golang.org/issues/41185 +func TestBufferTooBigWithOverflow(t *testing.T) { + tests := []struct { + in []byte + name string + wantN int + wantValue uint64 + }{ + { + name: "invalid: 1000 bytes", + in: func() []byte { + b := make([]byte, 1000) + for i := range b { + b[i] = 0xff + } + b[999] = 0 + return b + }(), + wantN: -11, + wantValue: 0, + }, + { + name: "valid: math.MaxUint64-40", + in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, + wantValue: math.MaxUint64 - 40, + wantN: 10, + }, + { + name: "invalid: with more than MaxVarintLen64 bytes", + in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, + wantN: -11, + wantValue: 0, + }, + { + name: "invalid: 10th byte", + in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}, + wantN: -10, + wantValue: 0, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + value, n := Uvarint(tt.in) + if g, w := n, tt.wantN; g != w { + t.Errorf("bytes returned=%d, want=%d", g, w) + } + if g, w := value, tt.wantValue; g != w { + t.Errorf("value=%d, want=%d", g, w) + } + }) + } +} + +func testOverflow(t *testing.T, buf []byte, x0 uint64, n0 int, err0 error) { + x, n := Uvarint(buf) + if x != 0 || n != n0 { + t.Errorf("Uvarint(% X): got x = %d, n = %d; want 0, %d", buf, x, n, n0) + } + + r := bytes.NewReader(buf) + len := r.Len() + x, err := ReadUvarint(r) + if x != x0 || err != err0 { + t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want %d, %s", buf, x, err, x0, err0) + } + if read := len - r.Len(); read > MaxVarintLen64 { + t.Errorf("ReadUvarint(%v): read more than MaxVarintLen64 bytes, got %d", buf, read) + } +} + +func TestOverflow(t *testing.T) { + testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, 0, -10, errOverflow) + testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, 0, -11, errOverflow) + testOverflow(t, []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 1<<64-1, -11, errOverflow) // 11 bytes, should overflow +} + +func TestNonCanonicalZero(t *testing.T) { + buf := []byte{0x80, 0x80, 0x80, 0} + x, n := Uvarint(buf) + if x != 0 || n != 4 { + t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n) + + } +} + +func BenchmarkPutUvarint32(b *testing.B) { + buf := make([]byte, MaxVarintLen32) + b.SetBytes(4) + for i := 0; i < b.N; i++ { + for j := uint(0); j < MaxVarintLen32; j++ { + PutUvarint(buf, 1<<(j*7)) + } + } +} + +func BenchmarkPutUvarint64(b *testing.B) { + buf := make([]byte, MaxVarintLen64) + b.SetBytes(8) + for i := 0; i < b.N; i++ { + for j := uint(0); j < MaxVarintLen64; j++ { + PutUvarint(buf, 1<<(j*7)) + } + } +} diff --git a/src/encoding/csv/example_test.go b/src/encoding/csv/example_test.go new file mode 100644 index 0000000..dc227d4 --- /dev/null +++ b/src/encoding/csv/example_test.go @@ -0,0 +1,131 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv_test + +import ( + "encoding/csv" + "fmt" + "io" + "log" + "os" + "strings" +) + +func ExampleReader() { + in := `first_name,last_name,username +"Rob","Pike",rob +Ken,Thompson,ken +"Robert","Griesemer","gri" +` + r := csv.NewReader(strings.NewReader(in)) + + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + + fmt.Println(record) + } + // Output: + // [first_name last_name username] + // [Rob Pike rob] + // [Ken Thompson ken] + // [Robert Griesemer gri] +} + +// This example shows how csv.Reader can be configured to handle other +// types of CSV files. +func ExampleReader_options() { + in := `first_name;last_name;username +"Rob";"Pike";rob +# lines beginning with a # character are ignored +Ken;Thompson;ken +"Robert";"Griesemer";"gri" +` + r := csv.NewReader(strings.NewReader(in)) + r.Comma = ';' + r.Comment = '#' + + records, err := r.ReadAll() + if err != nil { + log.Fatal(err) + } + + fmt.Print(records) + // Output: + // [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]] +} + +func ExampleReader_ReadAll() { + in := `first_name,last_name,username +"Rob","Pike",rob +Ken,Thompson,ken +"Robert","Griesemer","gri" +` + r := csv.NewReader(strings.NewReader(in)) + + records, err := r.ReadAll() + if err != nil { + log.Fatal(err) + } + + fmt.Print(records) + // Output: + // [[first_name last_name username] [Rob Pike rob] [Ken Thompson ken] [Robert Griesemer gri]] +} + +func ExampleWriter() { + records := [][]string{ + {"first_name", "last_name", "username"}, + {"Rob", "Pike", "rob"}, + {"Ken", "Thompson", "ken"}, + {"Robert", "Griesemer", "gri"}, + } + + w := csv.NewWriter(os.Stdout) + + for _, record := range records { + if err := w.Write(record); err != nil { + log.Fatalln("error writing record to csv:", err) + } + } + + // Write any buffered data to the underlying writer (standard output). + w.Flush() + + if err := w.Error(); err != nil { + log.Fatal(err) + } + // Output: + // first_name,last_name,username + // Rob,Pike,rob + // Ken,Thompson,ken + // Robert,Griesemer,gri +} + +func ExampleWriter_WriteAll() { + records := [][]string{ + {"first_name", "last_name", "username"}, + {"Rob", "Pike", "rob"}, + {"Ken", "Thompson", "ken"}, + {"Robert", "Griesemer", "gri"}, + } + + w := csv.NewWriter(os.Stdout) + w.WriteAll(records) // calls Flush internally + + if err := w.Error(); err != nil { + log.Fatalln("error writing csv:", err) + } + // Output: + // first_name,last_name,username + // Rob,Pike,rob + // Ken,Thompson,ken + // Robert,Griesemer,gri +} diff --git a/src/encoding/csv/fuzz.go b/src/encoding/csv/fuzz.go new file mode 100644 index 0000000..5f5cdfc --- /dev/null +++ b/src/encoding/csv/fuzz.go @@ -0,0 +1,70 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build gofuzz + +package csv + +import ( + "bytes" + "fmt" + "reflect" +) + +func Fuzz(data []byte) int { + score := 0 + buf := new(bytes.Buffer) + + for _, tt := range []Reader{ + {}, + {Comma: ';'}, + {Comma: '\t'}, + {LazyQuotes: true}, + {TrimLeadingSpace: true}, + {Comment: '#'}, + {Comment: ';'}, + } { + r := NewReader(bytes.NewReader(data)) + r.Comma = tt.Comma + r.Comment = tt.Comment + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + + records, err := r.ReadAll() + if err != nil { + continue + } + score = 1 + + buf.Reset() + w := NewWriter(buf) + w.Comma = tt.Comma + err = w.WriteAll(records) + if err != nil { + fmt.Printf("writer = %#v\n", w) + fmt.Printf("records = %v\n", records) + panic(err) + } + + r = NewReader(buf) + r.Comma = tt.Comma + r.Comment = tt.Comment + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + result, err := r.ReadAll() + if err != nil { + fmt.Printf("reader = %#v\n", r) + fmt.Printf("records = %v\n", records) + panic(err) + } + + if !reflect.DeepEqual(records, result) { + fmt.Println("records = \n", records) + fmt.Println("result = \n", records) + panic("not equal") + } + } + + return score +} diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go new file mode 100644 index 0000000..d9cab86 --- /dev/null +++ b/src/encoding/csv/reader.go @@ -0,0 +1,466 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package csv reads and writes comma-separated values (CSV) files. +// There are many kinds of CSV files; this package supports the format +// described in RFC 4180. +// +// A csv file contains zero or more records of one or more fields per record. +// Each record is separated by the newline character. The final record may +// optionally be followed by a newline character. +// +// field1,field2,field3 +// +// White space is considered part of a field. +// +// Carriage returns before newline characters are silently removed. +// +// Blank lines are ignored. A line with only whitespace characters (excluding +// the ending newline character) is not considered a blank line. +// +// Fields which start and stop with the quote character " are called +// quoted-fields. The beginning and ending quote are not part of the +// field. +// +// The source: +// +// normal string,"quoted-field" +// +// results in the fields +// +// {`normal string`, `quoted-field`} +// +// Within a quoted-field a quote character followed by a second quote +// character is considered a single quote. +// +// "the ""word"" is true","a ""quoted-field""" +// +// results in +// +// {`the "word" is true`, `a "quoted-field"`} +// +// Newlines and commas may be included in a quoted-field +// +// "Multi-line +// field","comma is ," +// +// results in +// +// {`Multi-line +// field`, `comma is ,`} +package csv + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "unicode" + "unicode/utf8" +) + +// A ParseError is returned for parsing errors. +// Line and column numbers are 1-indexed. +type ParseError struct { + StartLine int // Line where the record starts + Line int // Line where the error occurred + Column int // Column (1-based byte index) where the error occurred + Err error // The actual error +} + +func (e *ParseError) Error() string { + if e.Err == ErrFieldCount { + return fmt.Sprintf("record on line %d: %v", e.Line, e.Err) + } + if e.StartLine != e.Line { + return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err) + } + return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err) +} + +func (e *ParseError) Unwrap() error { return e.Err } + +// These are the errors that can be returned in [ParseError.Err]. +var ( + ErrBareQuote = errors.New("bare \" in non-quoted-field") + ErrQuote = errors.New("extraneous or missing \" in quoted-field") + ErrFieldCount = errors.New("wrong number of fields") + + // Deprecated: ErrTrailingComma is no longer used. + ErrTrailingComma = errors.New("extra delimiter at end of line") +) + +var errInvalidDelim = errors.New("csv: invalid field or comment delimiter") + +func validDelim(r rune) bool { + return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError +} + +// A Reader reads records from a CSV-encoded file. +// +// As returned by [NewReader], a Reader expects input conforming to RFC 4180. +// The exported fields can be changed to customize the details before the +// first call to [Reader.Read] or [Reader.ReadAll]. +// +// The Reader converts all \r\n sequences in its input to plain \n, +// including in multiline field values, so that the returned data does +// not depend on which line-ending convention an input file uses. +type Reader struct { + // Comma is the field delimiter. + // It is set to comma (',') by NewReader. + // Comma must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + Comma rune + + // Comment, if not 0, is the comment character. Lines beginning with the + // Comment character without preceding whitespace are ignored. + // With leading whitespace the Comment character becomes part of the + // field, even if TrimLeadingSpace is true. + // Comment must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + // It must also not be equal to Comma. + Comment rune + + // FieldsPerRecord is the number of expected fields per record. + // If FieldsPerRecord is positive, Read requires each record to + // have the given number of fields. If FieldsPerRecord is 0, Read sets it to + // the number of fields in the first record, so that future records must + // have the same field count. If FieldsPerRecord is negative, no check is + // made and records may have a variable number of fields. + FieldsPerRecord int + + // If LazyQuotes is true, a quote may appear in an unquoted field and a + // non-doubled quote may appear in a quoted field. + LazyQuotes bool + + // If TrimLeadingSpace is true, leading white space in a field is ignored. + // This is done even if the field delimiter, Comma, is white space. + TrimLeadingSpace bool + + // ReuseRecord controls whether calls to Read may return a slice sharing + // the backing array of the previous call's returned slice for performance. + // By default, each call to Read returns newly allocated memory owned by the caller. + ReuseRecord bool + + // Deprecated: TrailingComma is no longer used. + TrailingComma bool + + r *bufio.Reader + + // numLine is the current line being read in the CSV file. + numLine int + + // offset is the input stream byte offset of the current reader position. + offset int64 + + // rawBuffer is a line buffer only used by the readLine method. + rawBuffer []byte + + // recordBuffer holds the unescaped fields, one after another. + // The fields can be accessed by using the indexes in fieldIndexes. + // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` + // and fieldIndexes will contain the indexes [1, 2, 5, 6]. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. + fieldIndexes []int + + // fieldPositions is an index of field positions for the + // last record returned by Read. + fieldPositions []position + + // lastRecord is a record cache and only used when ReuseRecord == true. + lastRecord []string +} + +// NewReader returns a new Reader that reads from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + Comma: ',', + r: bufio.NewReader(r), + } +} + +// Read reads one record (a slice of fields) from r. +// If the record has an unexpected number of fields, +// Read returns the record along with the error [ErrFieldCount]. +// If the record contains a field that cannot be parsed, +// Read returns a partial record along with the parse error. +// The partial record contains all fields read before the error. +// If there is no data left to be read, Read returns nil, [io.EOF]. +// If [Reader.ReuseRecord] is true, the returned slice may be shared +// between multiple calls to Read. +func (r *Reader) Read() (record []string, err error) { + if r.ReuseRecord { + record, err = r.readRecord(r.lastRecord) + r.lastRecord = record + } else { + record, err = r.readRecord(nil) + } + return record, err +} + +// FieldPos returns the line and column corresponding to +// the start of the field with the given index in the slice most recently +// returned by [Reader.Read]. Numbering of lines and columns starts at 1; +// columns are counted in bytes, not runes. +// +// If this is called with an out-of-bounds index, it panics. +func (r *Reader) FieldPos(field int) (line, column int) { + if field < 0 || field >= len(r.fieldPositions) { + panic("out of range index passed to FieldPos") + } + p := &r.fieldPositions[field] + return p.line, p.col +} + +// InputOffset returns the input stream byte offset of the current reader +// position. The offset gives the location of the end of the most recently +// read row and the beginning of the next row. +func (r *Reader) InputOffset() int64 { + return r.offset +} + +// pos holds the position of a field in the current line. +type position struct { + line, col int +} + +// ReadAll reads all the remaining records from r. +// Each record is a slice of fields. +// A successful call returns err == nil, not err == [io.EOF]. Because ReadAll is +// defined to read until EOF, it does not treat end of file as an error to be +// reported. +func (r *Reader) ReadAll() (records [][]string, err error) { + for { + record, err := r.readRecord(nil) + if err == io.EOF { + return records, nil + } + if err != nil { + return nil, err + } + records = append(records, record) + } +} + +// readLine reads the next line (with the trailing endline). +// If EOF is hit without a trailing endline, it will be omitted. +// If some bytes were read, then the error is never [io.EOF]. +// The result is only valid until the next call to readLine. +func (r *Reader) readLine() ([]byte, error) { + line, err := r.r.ReadSlice('\n') + if err == bufio.ErrBufferFull { + r.rawBuffer = append(r.rawBuffer[:0], line...) + for err == bufio.ErrBufferFull { + line, err = r.r.ReadSlice('\n') + r.rawBuffer = append(r.rawBuffer, line...) + } + line = r.rawBuffer + } + readSize := len(line) + if readSize > 0 && err == io.EOF { + err = nil + // For backwards compatibility, drop trailing \r before EOF. + if line[readSize-1] == '\r' { + line = line[:readSize-1] + } + } + r.numLine++ + r.offset += int64(readSize) + // Normalize \r\n to \n on all input lines. + if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { + line[n-2] = '\n' + line = line[:n-1] + } + return line, err +} + +// lengthNL reports the number of bytes for the trailing \n. +func lengthNL(b []byte) int { + if len(b) > 0 && b[len(b)-1] == '\n' { + return 1 + } + return 0 +} + +// nextRune returns the next rune in b or utf8.RuneError. +func nextRune(b []byte) rune { + r, _ := utf8.DecodeRune(b) + return r +} + +func (r *Reader) readRecord(dst []string) ([]string, error) { + if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { + return nil, errInvalidDelim + } + + // Read line (automatically skipping past empty lines and any comments). + var line []byte + var errRead error + for errRead == nil { + line, errRead = r.readLine() + if r.Comment != 0 && nextRune(line) == r.Comment { + line = nil + continue // Skip comment lines + } + if errRead == nil && len(line) == lengthNL(line) { + line = nil + continue // Skip empty lines + } + break + } + if errRead == io.EOF { + return nil, errRead + } + + // Parse each field in the record. + var err error + const quoteLen = len(`"`) + commaLen := utf8.RuneLen(r.Comma) + recLine := r.numLine // Starting line for record + r.recordBuffer = r.recordBuffer[:0] + r.fieldIndexes = r.fieldIndexes[:0] + r.fieldPositions = r.fieldPositions[:0] + pos := position{line: r.numLine, col: 1} +parseField: + for { + if r.TrimLeadingSpace { + i := bytes.IndexFunc(line, func(r rune) bool { + return !unicode.IsSpace(r) + }) + if i < 0 { + i = len(line) + pos.col -= lengthNL(line) + } + line = line[i:] + pos.col += i + } + if len(line) == 0 || line[0] != '"' { + // Non-quoted string field + i := bytes.IndexRune(line, r.Comma) + field := line + if i >= 0 { + field = field[:i] + } else { + field = field[:len(field)-lengthNL(field)] + } + // Check to make sure a quote does not appear in field. + if !r.LazyQuotes { + if j := bytes.IndexByte(field, '"'); j >= 0 { + col := pos.col + j + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} + break parseField + } + } + r.recordBuffer = append(r.recordBuffer, field...) + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, pos) + if i >= 0 { + line = line[i+commaLen:] + pos.col += i + commaLen + continue parseField + } + break parseField + } else { + // Quoted string field + fieldPos := pos + line = line[quoteLen:] + pos.col += quoteLen + for { + i := bytes.IndexByte(line, '"') + if i >= 0 { + // Hit next quote. + r.recordBuffer = append(r.recordBuffer, line[:i]...) + line = line[i+quoteLen:] + pos.col += i + quoteLen + switch rn := nextRune(line); { + case rn == '"': + // `""` sequence (append quote). + r.recordBuffer = append(r.recordBuffer, '"') + line = line[quoteLen:] + pos.col += quoteLen + case rn == r.Comma: + // `",` sequence (end of field). + line = line[commaLen:] + pos.col += commaLen + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + continue parseField + case lengthNL(line) == len(line): + // `"\n` sequence (end of line). + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + break parseField + case r.LazyQuotes: + // `"` sequence (bare quote). + r.recordBuffer = append(r.recordBuffer, '"') + default: + // `"*` sequence (invalid non-escaped quote). + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote} + break parseField + } + } else if len(line) > 0 { + // Hit end of line (copy all data so far). + r.recordBuffer = append(r.recordBuffer, line...) + if errRead != nil { + break parseField + } + pos.col += len(line) + line, errRead = r.readLine() + if len(line) > 0 { + pos.line++ + pos.col = 1 + } + if errRead == io.EOF { + errRead = nil + } + } else { + // Abrupt end of file (EOF or error). + if !r.LazyQuotes && errRead == nil { + err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote} + break parseField + } + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + break parseField + } + } + } + } + if err == nil { + err = errRead + } + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(r.recordBuffer) // Convert to string once to batch allocations + dst = dst[:0] + if cap(dst) < len(r.fieldIndexes) { + dst = make([]string, len(r.fieldIndexes)) + } + dst = dst[:len(r.fieldIndexes)] + var preIdx int + for i, idx := range r.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx + } + + // Check or update the expected fields per record. + if r.FieldsPerRecord > 0 { + if len(dst) != r.FieldsPerRecord && err == nil { + err = &ParseError{ + StartLine: recLine, + Line: recLine, + Column: 1, + Err: ErrFieldCount, + } + } + } else if r.FieldsPerRecord == 0 { + r.FieldsPerRecord = len(dst) + } + return dst, err +} diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go new file mode 100644 index 0000000..2e5d623 --- /dev/null +++ b/src/encoding/csv/reader_test.go @@ -0,0 +1,657 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "errors" + "fmt" + "io" + "reflect" + "strings" + "testing" + "unicode/utf8" +) + +type readTest struct { + Name string + Input string + Output [][]string + Positions [][][2]int + Errors []error + + // These fields are copied into the Reader + Comma rune + Comment rune + UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 + FieldsPerRecord int + LazyQuotes bool + TrimLeadingSpace bool + ReuseRecord bool +} + +// In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote +// the start of a field, a record boundary and the position of an error respectively. +// They are removed before parsing and are used to verify the position +// information reported by FieldPos. + +var readTests = []readTest{{ + Name: "Simple", + Input: "§a,§b,§c\n", + Output: [][]string{{"a", "b", "c"}}, +}, { + Name: "CRLF", + Input: "§a,§b\r\n¶§c,§d\r\n", + Output: [][]string{{"a", "b"}, {"c", "d"}}, +}, { + Name: "BareCR", + Input: "§a,§b\rc,§d\r\n", + Output: [][]string{{"a", "b\rc", "d"}}, +}, { + Name: "RFC4180test", + Input: `§#field1,§field2,§field3 +¶§"aaa",§"bb +b",§"ccc" +¶§"a,a",§"b""bb",§"ccc" +¶§zzz,§yyy,§xxx +`, + Output: [][]string{ + {"#field1", "field2", "field3"}, + {"aaa", "bb\nb", "ccc"}, + {"a,a", `b"bb`, "ccc"}, + {"zzz", "yyy", "xxx"}, + }, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "NoEOLTest", + Input: "§a,§b,§c", + Output: [][]string{{"a", "b", "c"}}, +}, { + Name: "Semicolon", + Input: "§a;§b;§c\n", + Output: [][]string{{"a", "b", "c"}}, + Comma: ';', +}, { + Name: "MultiLine", + Input: `§"two +line",§"one line",§"three +line +field"`, + Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, +}, { + Name: "BlankLine", + Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n", + Output: [][]string{ + {"a", "b", "c"}, + {"d", "e", "f"}, + }, +}, { + Name: "BlankLineFieldCount", + Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n", + Output: [][]string{ + {"a", "b", "c"}, + {"d", "e", "f"}, + }, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "TrimSpace", + Input: " §a, §b, §c\n", + Output: [][]string{{"a", "b", "c"}}, + TrimLeadingSpace: true, +}, { + Name: "LeadingSpace", + Input: "§ a,§ b,§ c\n", + Output: [][]string{{" a", " b", " c"}}, +}, { + Name: "Comment", + Input: "#1,2,3\n§a,§b,§c\n#comment", + Output: [][]string{{"a", "b", "c"}}, + Comment: '#', +}, { + Name: "NoComment", + Input: "§#1,§2,§3\n¶§a,§b,§c", + Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, +}, { + Name: "LazyQuotes", + Input: `§a "word",§"1"2",§a",§"b`, + Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, + LazyQuotes: true, +}, { + Name: "BareQuotes", + Input: `§a "word",§"1"2",§a"`, + Output: [][]string{{`a "word"`, `1"2`, `a"`}}, + LazyQuotes: true, +}, { + Name: "BareDoubleQuotes", + Input: `§a""b,§c`, + Output: [][]string{{`a""b`, `c`}}, + LazyQuotes: true, +}, { + Name: "BadDoubleQuotes", + Input: `§a∑""b,c`, + Errors: []error{&ParseError{Err: ErrBareQuote}}, +}, { + Name: "TrimQuote", + Input: ` §"a",§" b",§c`, + Output: [][]string{{"a", " b", "c"}}, + TrimLeadingSpace: true, +}, { + Name: "BadBareQuote", + Input: `§a ∑"word","b"`, + Errors: []error{&ParseError{Err: ErrBareQuote}}, +}, { + Name: "BadTrailingQuote", + Input: `§"a word",b∑"`, + Errors: []error{&ParseError{Err: ErrBareQuote}}, +}, { + Name: "ExtraneousQuote", + Input: `§"a ∑"word","b"`, + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "BadFieldCount", + Input: "§a,§b,§c\n¶∑§d,§e", + Errors: []error{nil, &ParseError{Err: ErrFieldCount}}, + Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "BadFieldCountMultiple", + Input: "§a,§b,§c\n¶∑§d,§e\n¶∑§f", + Errors: []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}}, + Output: [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}}, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "BadFieldCount1", + Input: `§∑a,§b,§c`, + Errors: []error{&ParseError{Err: ErrFieldCount}}, + Output: [][]string{{"a", "b", "c"}}, + UseFieldsPerRecord: true, + FieldsPerRecord: 2, +}, { + Name: "FieldCount", + Input: "§a,§b,§c\n¶§d,§e", + Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, +}, { + Name: "TrailingCommaEOF", + Input: "§a,§b,§c,§", + Output: [][]string{{"a", "b", "c", ""}}, +}, { + Name: "TrailingCommaEOL", + Input: "§a,§b,§c,§\n", + Output: [][]string{{"a", "b", "c", ""}}, +}, { + Name: "TrailingCommaSpaceEOF", + Input: "§a,§b,§c, §", + Output: [][]string{{"a", "b", "c", ""}}, + TrimLeadingSpace: true, +}, { + Name: "TrailingCommaSpaceEOL", + Input: "§a,§b,§c, §\n", + Output: [][]string{{"a", "b", "c", ""}}, + TrimLeadingSpace: true, +}, { + Name: "TrailingCommaLine3", + Input: "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§", + Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, + TrimLeadingSpace: true, +}, { + Name: "NotTrailingComma3", + Input: "§a,§b,§c,§ \n", + Output: [][]string{{"a", "b", "c", " "}}, +}, { + Name: "CommaFieldTest", + Input: `§x,§y,§z,§w +¶§x,§y,§z,§ +¶§x,§y,§,§ +¶§x,§,§,§ +¶§,§,§,§ +¶§"x",§"y",§"z",§"w" +¶§"x",§"y",§"z",§"" +¶§"x",§"y",§"",§"" +¶§"x",§"",§"",§"" +¶§"",§"",§"",§"" +`, + Output: [][]string{ + {"x", "y", "z", "w"}, + {"x", "y", "z", ""}, + {"x", "y", "", ""}, + {"x", "", "", ""}, + {"", "", "", ""}, + {"x", "y", "z", "w"}, + {"x", "y", "z", ""}, + {"x", "y", "", ""}, + {"x", "", "", ""}, + {"", "", "", ""}, + }, +}, { + Name: "TrailingCommaIneffective1", + Input: "§a,§b,§\n¶§c,§d,§e", + Output: [][]string{ + {"a", "b", ""}, + {"c", "d", "e"}, + }, + TrimLeadingSpace: true, +}, { + Name: "ReadAllReuseRecord", + Input: "§a,§b\n¶§c,§d", + Output: [][]string{ + {"a", "b"}, + {"c", "d"}, + }, + ReuseRecord: true, +}, { + Name: "StartLine1", // Issue 19019 + Input: "§a,\"b\nc∑\"d,e", + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "StartLine2", + Input: "§a,§b\n¶§\"d\n\n,e∑", + Errors: []error{nil, &ParseError{Err: ErrQuote}}, + Output: [][]string{{"a", "b"}}, +}, { + Name: "CRLFInQuotedField", // Issue 21201 + Input: "§A,§\"Hello\r\nHi\",§B\r\n", + Output: [][]string{ + {"A", "Hello\nHi", "B"}, + }, +}, { + Name: "BinaryBlobField", // Issue 19410 + Input: "§x09\x41\xb4\x1c,§aktau", + Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, +}, { + Name: "TrailingCR", + Input: "§field1,§field2\r", + Output: [][]string{{"field1", "field2"}}, +}, { + Name: "QuotedTrailingCR", + Input: "§\"field\"\r", + Output: [][]string{{"field"}}, +}, { + Name: "QuotedTrailingCRCR", + Input: "§\"field∑\"\r\r", + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "FieldCR", + Input: "§field\rfield\r", + Output: [][]string{{"field\rfield"}}, +}, { + Name: "FieldCRCR", + Input: "§field\r\rfield\r\r", + Output: [][]string{{"field\r\rfield\r"}}, +}, { + Name: "FieldCRCRLF", + Input: "§field\r\r\n¶§field\r\r\n", + Output: [][]string{{"field\r"}, {"field\r"}}, +}, { + Name: "FieldCRCRLFCR", + Input: "§field\r\r\n¶§\rfield\r\r\n\r", + Output: [][]string{{"field\r"}, {"\rfield\r"}}, +}, { + Name: "FieldCRCRLFCRCR", + Input: "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r", + Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, +}, { + Name: "MultiFieldCRCRLFCRCR", + Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§", + Output: [][]string{ + {"field1", "field2\r"}, + {"\r\rfield1", "field2\r"}, + {"\r\r", ""}, + }, +}, { + Name: "NonASCIICommaAndComment", + Input: "§a£§b,c£ \t§d,e\n€ comment\n", + Output: [][]string{{"a", "b,c", "d,e"}}, + TrimLeadingSpace: true, + Comma: '£', + Comment: '€', +}, { + Name: "NonASCIICommaAndCommentWithQuotes", + Input: "§a€§\" b,\"€§ c\nλ comment\n", + Output: [][]string{{"a", " b,", " c"}}, + Comma: '€', + Comment: 'λ', +}, { + // λ and θ start with the same byte. + // This tests that the parser doesn't confuse such characters. + Name: "NonASCIICommaConfusion", + Input: "§\"abθcd\"λ§efθgh", + Output: [][]string{{"abθcd", "efθgh"}}, + Comma: 'λ', + Comment: '€', +}, { + Name: "NonASCIICommentConfusion", + Input: "§λ\n¶§λ\nθ\n¶§λ\n", + Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, + Comment: 'θ', +}, { + Name: "QuotedFieldMultipleLF", + Input: "§\"\n\n\n\n\"", + Output: [][]string{{"\n\n\n\n"}}, +}, { + Name: "MultipleCRLF", + Input: "\r\n\r\n\r\n\r\n", +}, { + // The implementation may read each line in several chunks if it doesn't fit entirely + // in the read buffer, so we should test the code to handle that condition. + Name: "HugeLines", + Input: strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000), + Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, + Comment: '#', +}, { + Name: "QuoteWithTrailingCRLF", + Input: "§\"foo∑\"bar\"\r\n", + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "LazyQuoteWithTrailingCRLF", + Input: "§\"foo\"bar\"\r\n", + Output: [][]string{{`foo"bar`}}, + LazyQuotes: true, +}, { + Name: "DoubleQuoteWithTrailingCRLF", + Input: "§\"foo\"\"bar\"\r\n", + Output: [][]string{{`foo"bar`}}, +}, { + Name: "EvenQuotes", + Input: `§""""""""`, + Output: [][]string{{`"""`}}, +}, { + Name: "OddQuotes", + Input: `§"""""""∑`, + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "LazyOddQuotes", + Input: `§"""""""`, + Output: [][]string{{`"""`}}, + LazyQuotes: true, +}, { + Name: "BadComma1", + Comma: '\n', + Errors: []error{errInvalidDelim}, +}, { + Name: "BadComma2", + Comma: '\r', + Errors: []error{errInvalidDelim}, +}, { + Name: "BadComma3", + Comma: '"', + Errors: []error{errInvalidDelim}, +}, { + Name: "BadComma4", + Comma: utf8.RuneError, + Errors: []error{errInvalidDelim}, +}, { + Name: "BadComment1", + Comment: '\n', + Errors: []error{errInvalidDelim}, +}, { + Name: "BadComment2", + Comment: '\r', + Errors: []error{errInvalidDelim}, +}, { + Name: "BadComment3", + Comment: utf8.RuneError, + Errors: []error{errInvalidDelim}, +}, { + Name: "BadCommaComment", + Comma: 'X', + Comment: 'X', + Errors: []error{errInvalidDelim}, +}} + +func TestRead(t *testing.T) { + newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int, string) { + positions, errPositions, input := makePositions(tt.Input) + r := NewReader(strings.NewReader(input)) + + if tt.Comma != 0 { + r.Comma = tt.Comma + } + r.Comment = tt.Comment + if tt.UseFieldsPerRecord { + r.FieldsPerRecord = tt.FieldsPerRecord + } else { + r.FieldsPerRecord = -1 + } + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + r.ReuseRecord = tt.ReuseRecord + return r, positions, errPositions, input + } + + for _, tt := range readTests { + t.Run(tt.Name, func(t *testing.T) { + r, positions, errPositions, input := newReader(tt) + out, err := r.ReadAll() + if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil { + if !reflect.DeepEqual(err, wantErr) { + t.Fatalf("ReadAll() error mismatch:\ngot %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr) + } + if out != nil { + t.Fatalf("ReadAll() output:\ngot %q\nwant nil", out) + } + } else { + if err != nil { + t.Fatalf("unexpected Readall() error: %v", err) + } + if !reflect.DeepEqual(out, tt.Output) { + t.Fatalf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) + } + } + + // Check input offset after call ReadAll() + inputByteSize := len(input) + inputOffset := r.InputOffset() + if err == nil && int64(inputByteSize) != inputOffset { + t.Errorf("wrong input offset after call ReadAll():\ngot: %d\nwant: %d\ninput: %s", inputOffset, inputByteSize, input) + } + + // Check field and error positions. + r, _, _, _ = newReader(tt) + for recNum := 0; ; recNum++ { + rec, err := r.Read() + var wantErr error + if recNum < len(tt.Errors) && tt.Errors[recNum] != nil { + wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions) + } else if recNum >= len(tt.Output) { + wantErr = io.EOF + } + if !reflect.DeepEqual(err, wantErr) { + t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr) + } + // ErrFieldCount is explicitly non-fatal. + if err != nil && !errors.Is(err, ErrFieldCount) { + if recNum < len(tt.Output) { + t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output)) + } + break + } + if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) { + t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want) + } + pos := positions[recNum] + if len(pos) != len(rec) { + t.Fatalf("mismatched position length at record %d", recNum) + } + for i := range rec { + line, col := r.FieldPos(i) + if got, want := [2]int{line, col}, pos[i]; got != want { + t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want) + } + } + } + }) + } +} + +// firstError returns the first non-nil error in errs, +// with the position adjusted according to the error's +// index inside positions. +func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error { + for i, err := range errs { + if err != nil { + return errorWithPosition(err, i, positions, errPositions) + } + } + return nil +} + +func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error { + parseErr, ok := err.(*ParseError) + if !ok { + return err + } + if recNum >= len(positions) { + panic(fmt.Errorf("no positions found for error at record %d", recNum)) + } + errPos, ok := errPositions[recNum] + if !ok { + panic(fmt.Errorf("no error position found for error at record %d", recNum)) + } + parseErr1 := *parseErr + parseErr1.StartLine = positions[recNum][0][0] + parseErr1.Line = errPos[0] + parseErr1.Column = errPos[1] + return &parseErr1 +} + +// makePositions returns the expected field positions of all +// the fields in text, the positions of any errors, and the text with the position markers +// removed. +// +// The start of each field is marked with a § symbol; +// CSV lines are separated by ¶ symbols; +// Error positions are marked with ∑ symbols. +func makePositions(text string) ([][][2]int, map[int][2]int, string) { + buf := make([]byte, 0, len(text)) + var positions [][][2]int + errPositions := make(map[int][2]int) + line, col := 1, 1 + recNum := 0 + + for len(text) > 0 { + r, size := utf8.DecodeRuneInString(text) + switch r { + case '\n': + line++ + col = 1 + buf = append(buf, '\n') + case '§': + if len(positions) == 0 { + positions = append(positions, [][2]int{}) + } + positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col}) + case '¶': + positions = append(positions, [][2]int{}) + recNum++ + case '∑': + errPositions[recNum] = [2]int{line, col} + default: + buf = append(buf, text[:size]...) + col += size + } + text = text[size:] + } + return positions, errPositions, string(buf) +} + +// nTimes is an io.Reader which yields the string s n times. +type nTimes struct { + s string + n int + off int +} + +func (r *nTimes) Read(p []byte) (n int, err error) { + for { + if r.n <= 0 || r.s == "" { + return n, io.EOF + } + n0 := copy(p, r.s[r.off:]) + p = p[n0:] + n += n0 + r.off += n0 + if r.off == len(r.s) { + r.off = 0 + r.n-- + } + if len(p) == 0 { + return + } + } +} + +// benchmarkRead measures reading the provided CSV rows data. +// initReader, if non-nil, modifies the Reader before it's used. +func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { + b.ReportAllocs() + r := NewReader(&nTimes{s: rows, n: b.N}) + if initReader != nil { + initReader(r) + } + for { + _, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + b.Fatal(err) + } + } +} + +const benchmarkCSVData = `x,y,z,w +x,y,z, +x,y,, +x,,, +,,, +"x","y","z","w" +"x","y","z","" +"x","y","","" +"x","","","" +"","","","" +` + +func BenchmarkRead(b *testing.B) { + benchmarkRead(b, nil, benchmarkCSVData) +} + +func BenchmarkReadWithFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) +} + +func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) +} + +func BenchmarkReadLargeFields(b *testing.B) { + benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv +,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +`, 3)) +} + +func BenchmarkReadReuseRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) +} + +func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) +} + +func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) +} + +func BenchmarkReadReuseRecordLargeFields(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv +,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +`, 3)) +} diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go new file mode 100644 index 0000000..ff3142f --- /dev/null +++ b/src/encoding/csv/writer.go @@ -0,0 +1,184 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bufio" + "io" + "strings" + "unicode" + "unicode/utf8" +) + +// A Writer writes records using CSV encoding. +// +// As returned by [NewWriter], a Writer writes records terminated by a +// newline and uses ',' as the field delimiter. The exported fields can be +// changed to customize the details before +// the first call to [Writer.Write] or [Writer.WriteAll]. +// +// [Writer.Comma] is the field delimiter. +// +// If [Writer.UseCRLF] is true, +// the Writer ends each output line with \r\n instead of \n. +// +// The writes of individual records are buffered. +// After all data has been written, the client should call the +// [Writer.Flush] method to guarantee all data has been forwarded to +// the underlying [io.Writer]. Any errors that occurred should +// be checked by calling the [Writer.Error] method. +type Writer struct { + Comma rune // Field delimiter (set to ',' by NewWriter) + UseCRLF bool // True to use \r\n as the line terminator + w *bufio.Writer +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + Comma: ',', + w: bufio.NewWriter(w), + } +} + +// Write writes a single CSV record to w along with any necessary quoting. +// A record is a slice of strings with each string being one field. +// Writes are buffered, so [Writer.Flush] must eventually be called to ensure +// that the record is written to the underlying [io.Writer]. +func (w *Writer) Write(record []string) error { + if !validDelim(w.Comma) { + return errInvalidDelim + } + + for n, field := range record { + if n > 0 { + if _, err := w.w.WriteRune(w.Comma); err != nil { + return err + } + } + + // If we don't have to have a quoted field then just + // write out the field and continue to the next field. + if !w.fieldNeedsQuotes(field) { + if _, err := w.w.WriteString(field); err != nil { + return err + } + continue + } + + if err := w.w.WriteByte('"'); err != nil { + return err + } + for len(field) > 0 { + // Search for special characters. + i := strings.IndexAny(field, "\"\r\n") + if i < 0 { + i = len(field) + } + + // Copy verbatim everything before the special character. + if _, err := w.w.WriteString(field[:i]); err != nil { + return err + } + field = field[i:] + + // Encode the special character. + if len(field) > 0 { + var err error + switch field[0] { + case '"': + _, err = w.w.WriteString(`""`) + case '\r': + if !w.UseCRLF { + err = w.w.WriteByte('\r') + } + case '\n': + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + } + field = field[1:] + if err != nil { + return err + } + } + } + if err := w.w.WriteByte('"'); err != nil { + return err + } + } + var err error + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + return err +} + +// Flush writes any buffered data to the underlying [io.Writer]. +// To check if an error occurred during Flush, call [Writer.Error]. +func (w *Writer) Flush() { + w.w.Flush() +} + +// Error reports any error that has occurred during +// a previous [Writer.Write] or [Writer.Flush]. +func (w *Writer) Error() error { + _, err := w.w.Write(nil) + return err +} + +// WriteAll writes multiple CSV records to w using [Writer.Write] and +// then calls [Writer.Flush], returning any error from the Flush. +func (w *Writer) WriteAll(records [][]string) error { + for _, record := range records { + err := w.Write(record) + if err != nil { + return err + } + } + return w.w.Flush() +} + +// fieldNeedsQuotes reports whether our field must be enclosed in quotes. +// Fields with a Comma, fields with a quote or newline, and +// fields which start with a space must be enclosed in quotes. +// We used to quote empty strings, but we do not anymore (as of Go 1.4). +// The two representations should be equivalent, but Postgres distinguishes +// quoted vs non-quoted empty string during database imports, and it has +// an option to force the quoted behavior for non-quoted CSV but it has +// no option to force the non-quoted behavior for quoted CSV, making +// CSV with quoted empty strings strictly less useful. +// Not quoting the empty string also makes this package match the behavior +// of Microsoft Excel and Google Drive. +// For Postgres, quote the data terminating string `\.`. +func (w *Writer) fieldNeedsQuotes(field string) bool { + if field == "" { + return false + } + + if field == `\.` { + return true + } + + if w.Comma < utf8.RuneSelf { + for i := 0; i < len(field); i++ { + c := field[i] + if c == '\n' || c == '\r' || c == '"' || c == byte(w.Comma) { + return true + } + } + } else { + if strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") { + return true + } + } + + r1, _ := utf8.DecodeRuneInString(field) + return unicode.IsSpace(r1) +} diff --git a/src/encoding/csv/writer_test.go b/src/encoding/csv/writer_test.go new file mode 100644 index 0000000..de02347 --- /dev/null +++ b/src/encoding/csv/writer_test.go @@ -0,0 +1,113 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "bytes" + "errors" + "strings" + "testing" +) + +var writeTests = []struct { + Input [][]string + Output string + Error error + UseCRLF bool + Comma rune +}{ + {Input: [][]string{{"abc"}}, Output: "abc\n"}, + {Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true}, + {Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"}, + {Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"}, + {Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"}, + {Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"}, + {Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"}, + {Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"}, + {Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"}, + {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"}, + {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true}, + {Input: [][]string{{"abc\rdef"}}, Output: "\"abcdef\"\r\n", UseCRLF: true}, + {Input: [][]string{{"abc\rdef"}}, Output: "\"abc\rdef\"\n", UseCRLF: false}, + {Input: [][]string{{""}}, Output: "\n"}, + {Input: [][]string{{"", ""}}, Output: ",\n"}, + {Input: [][]string{{"", "", ""}}, Output: ",,\n"}, + {Input: [][]string{{"", "", "a"}}, Output: ",,a\n"}, + {Input: [][]string{{"", "a", ""}}, Output: ",a,\n"}, + {Input: [][]string{{"", "a", "a"}}, Output: ",a,a\n"}, + {Input: [][]string{{"a", "", ""}}, Output: "a,,\n"}, + {Input: [][]string{{"a", "", "a"}}, Output: "a,,a\n"}, + {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, + {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, + {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, + {Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"}, + {Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"}, + {Input: [][]string{{"a", "a", ""}}, Output: "a|a|\n", Comma: '|'}, + {Input: [][]string{{",", ",", ""}}, Output: ",|,|\n", Comma: '|'}, + {Input: [][]string{{"foo"}}, Comma: '"', Error: errInvalidDelim}, +} + +func TestWrite(t *testing.T) { + for n, tt := range writeTests { + b := &strings.Builder{} + f := NewWriter(b) + f.UseCRLF = tt.UseCRLF + if tt.Comma != 0 { + f.Comma = tt.Comma + } + err := f.WriteAll(tt.Input) + if err != tt.Error { + t.Errorf("Unexpected error:\ngot %v\nwant %v", err, tt.Error) + } + out := b.String() + if out != tt.Output { + t.Errorf("#%d: out=%q want %q", n, out, tt.Output) + } + } +} + +type errorWriter struct{} + +func (e errorWriter) Write(b []byte) (int, error) { + return 0, errors.New("Test") +} + +func TestError(t *testing.T) { + b := &bytes.Buffer{} + f := NewWriter(b) + f.Write([]string{"abc"}) + f.Flush() + err := f.Error() + + if err != nil { + t.Errorf("Unexpected error: %s\n", err) + } + + f = NewWriter(errorWriter{}) + f.Write([]string{"abc"}) + f.Flush() + err = f.Error() + + if err == nil { + t.Error("Error should not be nil") + } +} + +var benchmarkWriteData = [][]string{ + {"abc", "def", "12356", "1234567890987654311234432141542132"}, + {"abc", "def", "12356", "1234567890987654311234432141542132"}, + {"abc", "def", "12356", "1234567890987654311234432141542132"}, +} + +func BenchmarkWrite(b *testing.B) { + for i := 0; i < b.N; i++ { + w := NewWriter(&bytes.Buffer{}) + err := w.WriteAll(benchmarkWriteData) + if err != nil { + b.Fatal(err) + } + w.Flush() + } +} diff --git a/src/encoding/encoding.go b/src/encoding/encoding.go new file mode 100644 index 0000000..50acf3c --- /dev/null +++ b/src/encoding/encoding.go @@ -0,0 +1,54 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package encoding defines interfaces shared by other packages that +// convert data to and from byte-level and textual representations. +// Packages that check for these interfaces include encoding/gob, +// encoding/json, and encoding/xml. As a result, implementing an +// interface once can make a type useful in multiple encodings. +// Standard types that implement these interfaces include time.Time and net.IP. +// The interfaces come in pairs that produce and consume encoded data. +// +// Adding encoding/decoding methods to existing types may constitute a breaking change, +// as they can be used for serialization in communicating with programs +// written with different library versions. +// The policy for packages maintained by the Go project is to only allow +// the addition of marshaling functions if no existing, reasonable marshaling exists. +package encoding + +// BinaryMarshaler is the interface implemented by an object that can +// marshal itself into a binary form. +// +// MarshalBinary encodes the receiver into a binary form and returns the result. +type BinaryMarshaler interface { + MarshalBinary() (data []byte, err error) +} + +// BinaryUnmarshaler is the interface implemented by an object that can +// unmarshal a binary representation of itself. +// +// UnmarshalBinary must be able to decode the form generated by MarshalBinary. +// UnmarshalBinary must copy the data if it wishes to retain the data +// after returning. +type BinaryUnmarshaler interface { + UnmarshalBinary(data []byte) error +} + +// TextMarshaler is the interface implemented by an object that can +// marshal itself into a textual form. +// +// MarshalText encodes the receiver into UTF-8-encoded text and returns the result. +type TextMarshaler interface { + MarshalText() (text []byte, err error) +} + +// TextUnmarshaler is the interface implemented by an object that can +// unmarshal a textual representation of itself. +// +// UnmarshalText must be able to decode the form generated by MarshalText. +// UnmarshalText must copy the text if it wishes to retain the text +// after returning. +type TextUnmarshaler interface { + UnmarshalText(text []byte) error +} diff --git a/src/encoding/gob/codec_test.go b/src/encoding/gob/codec_test.go new file mode 100644 index 0000000..ec56ad5 --- /dev/null +++ b/src/encoding/gob/codec_test.go @@ -0,0 +1,1630 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "errors" + "flag" + "math" + "math/rand" + "reflect" + "strings" + "testing" + "time" + "unsafe" +) + +var doFuzzTests = flag.Bool("gob.fuzz", false, "run the fuzz tests, which are large and very slow") + +// Guarantee encoding format by comparing some encodings to hand-written values +type EncodeT struct { + x uint64 + b []byte +} + +var encodeT = []EncodeT{ + {0x00, []byte{0x00}}, + {0x0F, []byte{0x0F}}, + {0xFF, []byte{0xFF, 0xFF}}, + {0xFFFF, []byte{0xFE, 0xFF, 0xFF}}, + {0xFFFFFF, []byte{0xFD, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFF, []byte{0xFC, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFF, []byte{0xFB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFFFF, []byte{0xFA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFFFFFF, []byte{0xF9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0xFFFFFFFFFFFFFFFF, []byte{0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}, + {0x1111, []byte{0xFE, 0x11, 0x11}}, + {0x1111111111111111, []byte{0xF8, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, + {0x8888888888888888, []byte{0xF8, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88}}, + {1 << 63, []byte{0xF8, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}, +} + +// testError is meant to be used as a deferred function to turn a panic(gobError) into a +// plain test.Error call. +func testError(t *testing.T) { + if e := recover(); e != nil { + t.Error(e.(gobError).err) // Will re-panic if not one of our errors, such as a runtime error. + } +} + +func newDecBuffer(data []byte) *decBuffer { + return &decBuffer{ + data: data, + } +} + +// Test basic encode/decode routines for unsigned integers +func TestUintCodec(t *testing.T) { + defer testError(t) + b := new(encBuffer) + encState := newEncoderState(b) + for _, tt := range encodeT { + b.Reset() + encState.encodeUint(tt.x) + if !bytes.Equal(tt.b, b.Bytes()) { + t.Errorf("encodeUint: %#x encode: expected % x got % x", tt.x, tt.b, b.Bytes()) + } + } + for u := uint64(0); ; u = (u + 1) * 7 { + b.Reset() + encState.encodeUint(u) + decState := newDecodeState(newDecBuffer(b.Bytes())) + v := decState.decodeUint() + if u != v { + t.Errorf("Encode/Decode: sent %#x received %#x", u, v) + } + if u&(1<<63) != 0 { + break + } + } +} + +func verifyInt(i int64, t *testing.T) { + defer testError(t) + var b = new(encBuffer) + encState := newEncoderState(b) + encState.encodeInt(i) + decState := newDecodeState(newDecBuffer(b.Bytes())) + j := decState.decodeInt() + if i != j { + t.Errorf("Encode/Decode: sent %#x received %#x", uint64(i), uint64(j)) + } +} + +// Test basic encode/decode routines for signed integers +func TestIntCodec(t *testing.T) { + for u := uint64(0); ; u = (u + 1) * 7 { + // Do positive and negative values + i := int64(u) + verifyInt(i, t) + verifyInt(-i, t) + verifyInt(^i, t) + if u&(1<<63) != 0 { + break + } + } + verifyInt(-1<<63, t) // a tricky case +} + +// The result of encoding a true boolean with field number 7 +var boolResult = []byte{0x07, 0x01} + +// The result of encoding a number 17 with field number 7 +var signedResult = []byte{0x07, 2 * 17} +var unsignedResult = []byte{0x07, 17} +var floatResult = []byte{0x07, 0xFE, 0x31, 0x40} + +// The result of encoding a number 17+19i with field number 7 +var complexResult = []byte{0x07, 0xFE, 0x31, 0x40, 0xFE, 0x33, 0x40} + +// The result of encoding "hello" with field number 7 +var bytesResult = []byte{0x07, 0x05, 'h', 'e', 'l', 'l', 'o'} + +func newDecodeState(buf *decBuffer) *decoderState { + d := new(decoderState) + d.b = buf + return d +} + +func newEncoderState(b *encBuffer) *encoderState { + b.Reset() + state := &encoderState{enc: nil, b: b} + state.fieldnum = -1 + return state +} + +// Test instruction execution for encoding. +// Do not run the machine yet; instead do individual instructions crafted by hand. +func TestScalarEncInstructions(t *testing.T) { + var b = new(encBuffer) + + // bool + { + var data bool = true + instr := &encInstr{encBool, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(boolResult, b.Bytes()) { + t.Errorf("bool enc instructions: expected % x got % x", boolResult, b.Bytes()) + } + } + + // int + { + b.Reset() + var data int = 17 + instr := &encInstr{encInt, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint + { + b.Reset() + var data uint = 17 + instr := &encInstr{encUint, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int8 + { + b.Reset() + var data int8 = 17 + instr := &encInstr{encInt, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int8 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint8 + { + b.Reset() + var data uint8 = 17 + instr := &encInstr{encUint, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint8 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int16 + { + b.Reset() + var data int16 = 17 + instr := &encInstr{encInt, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int16 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint16 + { + b.Reset() + var data uint16 = 17 + instr := &encInstr{encUint, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint16 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int32 + { + b.Reset() + var data int32 = 17 + instr := &encInstr{encInt, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int32 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint32 + { + b.Reset() + var data uint32 = 17 + instr := &encInstr{encUint, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint32 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // int64 + { + b.Reset() + var data int64 = 17 + instr := &encInstr{encInt, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(signedResult, b.Bytes()) { + t.Errorf("int64 enc instructions: expected % x got % x", signedResult, b.Bytes()) + } + } + + // uint64 + { + b.Reset() + var data uint64 = 17 + instr := &encInstr{encUint, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(unsignedResult, b.Bytes()) { + t.Errorf("uint64 enc instructions: expected % x got % x", unsignedResult, b.Bytes()) + } + } + + // float32 + { + b.Reset() + var data float32 = 17 + instr := &encInstr{encFloat, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(floatResult, b.Bytes()) { + t.Errorf("float32 enc instructions: expected % x got % x", floatResult, b.Bytes()) + } + } + + // float64 + { + b.Reset() + var data float64 = 17 + instr := &encInstr{encFloat, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(floatResult, b.Bytes()) { + t.Errorf("float64 enc instructions: expected % x got % x", floatResult, b.Bytes()) + } + } + + // bytes == []uint8 + { + b.Reset() + data := []byte("hello") + instr := &encInstr{encUint8Array, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(bytesResult, b.Bytes()) { + t.Errorf("bytes enc instructions: expected % x got % x", bytesResult, b.Bytes()) + } + } + + // string + { + b.Reset() + var data string = "hello" + instr := &encInstr{encString, 6, nil, 0} + state := newEncoderState(b) + instr.op(instr, state, reflect.ValueOf(data)) + if !bytes.Equal(bytesResult, b.Bytes()) { + t.Errorf("string enc instructions: expected % x got % x", bytesResult, b.Bytes()) + } + } +} + +func execDec(instr *decInstr, state *decoderState, t *testing.T, value reflect.Value) { + defer testError(t) + v := int(state.decodeUint()) + if v+state.fieldnum != 6 { + t.Fatalf("decoding field number %d, got %d", 6, v+state.fieldnum) + } + instr.op(instr, state, value.Elem()) + state.fieldnum = 6 +} + +func newDecodeStateFromData(data []byte) *decoderState { + b := newDecBuffer(data) + state := newDecodeState(b) + state.fieldnum = -1 + return state +} + +// Test instruction execution for decoding. +// Do not run the machine yet; instead do individual instructions crafted by hand. +func TestScalarDecInstructions(t *testing.T) { + ovfl := errors.New("overflow") + + // bool + { + var data bool + instr := &decInstr{decBool, 6, nil, ovfl} + state := newDecodeStateFromData(boolResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != true { + t.Errorf("bool a = %v not true", data) + } + } + // int + { + var data int + instr := &decInstr{decOpTable[reflect.Int], 6, nil, ovfl} + state := newDecodeStateFromData(signedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("int a = %v not 17", data) + } + } + + // uint + { + var data uint + instr := &decInstr{decOpTable[reflect.Uint], 6, nil, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("uint a = %v not 17", data) + } + } + + // int8 + { + var data int8 + instr := &decInstr{decInt8, 6, nil, ovfl} + state := newDecodeStateFromData(signedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("int8 a = %v not 17", data) + } + } + + // uint8 + { + var data uint8 + instr := &decInstr{decUint8, 6, nil, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("uint8 a = %v not 17", data) + } + } + + // int16 + { + var data int16 + instr := &decInstr{decInt16, 6, nil, ovfl} + state := newDecodeStateFromData(signedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("int16 a = %v not 17", data) + } + } + + // uint16 + { + var data uint16 + instr := &decInstr{decUint16, 6, nil, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("uint16 a = %v not 17", data) + } + } + + // int32 + { + var data int32 + instr := &decInstr{decInt32, 6, nil, ovfl} + state := newDecodeStateFromData(signedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("int32 a = %v not 17", data) + } + } + + // uint32 + { + var data uint32 + instr := &decInstr{decUint32, 6, nil, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("uint32 a = %v not 17", data) + } + } + + // uintptr + { + var data uintptr + instr := &decInstr{decOpTable[reflect.Uintptr], 6, nil, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("uintptr a = %v not 17", data) + } + } + + // int64 + { + var data int64 + instr := &decInstr{decInt64, 6, nil, ovfl} + state := newDecodeStateFromData(signedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("int64 a = %v not 17", data) + } + } + + // uint64 + { + var data uint64 + instr := &decInstr{decUint64, 6, nil, ovfl} + state := newDecodeStateFromData(unsignedResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("uint64 a = %v not 17", data) + } + } + + // float32 + { + var data float32 + instr := &decInstr{decFloat32, 6, nil, ovfl} + state := newDecodeStateFromData(floatResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("float32 a = %v not 17", data) + } + } + + // float64 + { + var data float64 + instr := &decInstr{decFloat64, 6, nil, ovfl} + state := newDecodeStateFromData(floatResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17 { + t.Errorf("float64 a = %v not 17", data) + } + } + + // complex64 + { + var data complex64 + instr := &decInstr{decOpTable[reflect.Complex64], 6, nil, ovfl} + state := newDecodeStateFromData(complexResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17+19i { + t.Errorf("complex a = %v not 17+19i", data) + } + } + + // complex128 + { + var data complex128 + instr := &decInstr{decOpTable[reflect.Complex128], 6, nil, ovfl} + state := newDecodeStateFromData(complexResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != 17+19i { + t.Errorf("complex a = %v not 17+19i", data) + } + } + + // bytes == []uint8 + { + var data []byte + instr := &decInstr{decUint8Slice, 6, nil, ovfl} + state := newDecodeStateFromData(bytesResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if string(data) != "hello" { + t.Errorf(`bytes a = %q not "hello"`, string(data)) + } + } + + // string + { + var data string + instr := &decInstr{decString, 6, nil, ovfl} + state := newDecodeStateFromData(bytesResult) + execDec(instr, state, t, reflect.ValueOf(&data)) + if data != "hello" { + t.Errorf(`bytes a = %q not "hello"`, data) + } + } +} + +func TestEndToEnd(t *testing.T) { + type T2 struct { + T string + } + type T3 struct { + X float64 + Z *int + } + type T1 struct { + A, B, C int + M map[string]*float64 + M2 map[int]T3 + Mstring map[string]string + Mintptr map[int]*int + Mcomp map[complex128]complex128 + Marr map[[2]string][2]*float64 + EmptyMap map[string]int // to check that we receive a non-nil map. + N *[3]float64 + Strs *[2]string + Int64s *[]int64 + RI complex64 + S string + Y []byte + T *T2 + } + pi := 3.14159 + e := 2.71828 + two := 2.0 + meaning := 42 + fingers := 5 + s1 := "string1" + s2 := "string2" + var comp1 complex128 = complex(1.0, 1.0) + var comp2 complex128 = complex(1.0, 1.0) + var arr1 [2]string + arr1[0] = s1 + arr1[1] = s2 + var arr2 [2]string + arr2[0] = s2 + arr2[1] = s1 + var floatArr1 [2]*float64 + floatArr1[0] = &pi + floatArr1[1] = &e + var floatArr2 [2]*float64 + floatArr2[0] = &e + floatArr2[1] = &two + t1 := &T1{ + A: 17, + B: 18, + C: -5, + M: map[string]*float64{"pi": &pi, "e": &e}, + M2: map[int]T3{4: {X: pi, Z: &meaning}, 10: {X: e, Z: &fingers}}, + Mstring: map[string]string{"pi": "3.14", "e": "2.71"}, + Mintptr: map[int]*int{meaning: &fingers, fingers: &meaning}, + Mcomp: map[complex128]complex128{comp1: comp2, comp2: comp1}, + Marr: map[[2]string][2]*float64{arr1: floatArr1, arr2: floatArr2}, + EmptyMap: make(map[string]int), + N: &[3]float64{1.5, 2.5, 3.5}, + Strs: &[2]string{s1, s2}, + Int64s: &[]int64{77, 89, 123412342134}, + RI: 17 - 23i, + S: "Now is the time", + Y: []byte("hello, sailor"), + T: &T2{"this is T2"}, + } + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(t1) + if err != nil { + t.Error("encode:", err) + } + var _t1 T1 + err = NewDecoder(b).Decode(&_t1) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(t1, &_t1) { + t.Errorf("encode expected %v got %v", *t1, _t1) + } + // Be absolutely sure the received map is non-nil. + if t1.EmptyMap == nil { + t.Errorf("nil map sent") + } + if _t1.EmptyMap == nil { + t.Errorf("nil map received") + } +} + +func TestOverflow(t *testing.T) { + type inputT struct { + Maxi int64 + Mini int64 + Maxu uint64 + Maxf float64 + Minf float64 + Maxc complex128 + Minc complex128 + } + var it inputT + var err error + b := new(bytes.Buffer) + enc := NewEncoder(b) + dec := NewDecoder(b) + + // int8 + b.Reset() + it = inputT{ + Maxi: math.MaxInt8 + 1, + } + type outi8 struct { + Maxi int8 + Mini int8 + } + var o1 outi8 + enc.Encode(it) + err = dec.Decode(&o1) + if err == nil || err.Error() != `value for "Maxi" out of range` { + t.Error("wrong overflow error for int8:", err) + } + it = inputT{ + Mini: math.MinInt8 - 1, + } + b.Reset() + enc.Encode(it) + err = dec.Decode(&o1) + if err == nil || err.Error() != `value for "Mini" out of range` { + t.Error("wrong underflow error for int8:", err) + } + + // int16 + b.Reset() + it = inputT{ + Maxi: math.MaxInt16 + 1, + } + type outi16 struct { + Maxi int16 + Mini int16 + } + var o2 outi16 + enc.Encode(it) + err = dec.Decode(&o2) + if err == nil || err.Error() != `value for "Maxi" out of range` { + t.Error("wrong overflow error for int16:", err) + } + it = inputT{ + Mini: math.MinInt16 - 1, + } + b.Reset() + enc.Encode(it) + err = dec.Decode(&o2) + if err == nil || err.Error() != `value for "Mini" out of range` { + t.Error("wrong underflow error for int16:", err) + } + + // int32 + b.Reset() + it = inputT{ + Maxi: math.MaxInt32 + 1, + } + type outi32 struct { + Maxi int32 + Mini int32 + } + var o3 outi32 + enc.Encode(it) + err = dec.Decode(&o3) + if err == nil || err.Error() != `value for "Maxi" out of range` { + t.Error("wrong overflow error for int32:", err) + } + it = inputT{ + Mini: math.MinInt32 - 1, + } + b.Reset() + enc.Encode(it) + err = dec.Decode(&o3) + if err == nil || err.Error() != `value for "Mini" out of range` { + t.Error("wrong underflow error for int32:", err) + } + + // uint8 + b.Reset() + it = inputT{ + Maxu: math.MaxUint8 + 1, + } + type outu8 struct { + Maxu uint8 + } + var o4 outu8 + enc.Encode(it) + err = dec.Decode(&o4) + if err == nil || err.Error() != `value for "Maxu" out of range` { + t.Error("wrong overflow error for uint8:", err) + } + + // uint16 + b.Reset() + it = inputT{ + Maxu: math.MaxUint16 + 1, + } + type outu16 struct { + Maxu uint16 + } + var o5 outu16 + enc.Encode(it) + err = dec.Decode(&o5) + if err == nil || err.Error() != `value for "Maxu" out of range` { + t.Error("wrong overflow error for uint16:", err) + } + + // uint32 + b.Reset() + it = inputT{ + Maxu: math.MaxUint32 + 1, + } + type outu32 struct { + Maxu uint32 + } + var o6 outu32 + enc.Encode(it) + err = dec.Decode(&o6) + if err == nil || err.Error() != `value for "Maxu" out of range` { + t.Error("wrong overflow error for uint32:", err) + } + + // float32 + b.Reset() + it = inputT{ + Maxf: math.MaxFloat32 * 2, + } + type outf32 struct { + Maxf float32 + Minf float32 + } + var o7 outf32 + enc.Encode(it) + err = dec.Decode(&o7) + if err == nil || err.Error() != `value for "Maxf" out of range` { + t.Error("wrong overflow error for float32:", err) + } + + // complex64 + b.Reset() + it = inputT{ + Maxc: complex(math.MaxFloat32*2, math.MaxFloat32*2), + } + type outc64 struct { + Maxc complex64 + Minc complex64 + } + var o8 outc64 + enc.Encode(it) + err = dec.Decode(&o8) + if err == nil || err.Error() != `value for "Maxc" out of range` { + t.Error("wrong overflow error for complex64:", err) + } +} + +func TestNesting(t *testing.T) { + type RT struct { + A string + Next *RT + } + rt := new(RT) + rt.A = "level1" + rt.Next = new(RT) + rt.Next.A = "level2" + b := new(bytes.Buffer) + NewEncoder(b).Encode(rt) + var drt RT + dec := NewDecoder(b) + err := dec.Decode(&drt) + if err != nil { + t.Fatal("decoder error:", err) + } + if drt.A != rt.A { + t.Errorf("nesting: encode expected %v got %v", *rt, drt) + } + if drt.Next == nil { + t.Errorf("nesting: recursion failed") + } + if drt.Next.A != rt.Next.A { + t.Errorf("nesting: encode expected %v got %v", *rt.Next, *drt.Next) + } +} + +// These three structures have the same data with different indirections +type T0 struct { + A int + B int + C int + D int +} +type T1 struct { + A int + B *int + C **int + D ***int +} +type T2 struct { + A ***int + B **int + C *int + D int +} + +func TestAutoIndirection(t *testing.T) { + // First transfer t1 into t0 + var t1 T1 + t1.A = 17 + t1.B = new(int) + *t1.B = 177 + t1.C = new(*int) + *t1.C = new(int) + **t1.C = 1777 + t1.D = new(**int) + *t1.D = new(*int) + **t1.D = new(int) + ***t1.D = 17777 + b := new(bytes.Buffer) + enc := NewEncoder(b) + enc.Encode(t1) + dec := NewDecoder(b) + var t0 T0 + dec.Decode(&t0) + if t0.A != 17 || t0.B != 177 || t0.C != 1777 || t0.D != 17777 { + t.Errorf("t1->t0: expected {17 177 1777 17777}; got %v", t0) + } + + // Now transfer t2 into t0 + var t2 T2 + t2.D = 17777 + t2.C = new(int) + *t2.C = 1777 + t2.B = new(*int) + *t2.B = new(int) + **t2.B = 177 + t2.A = new(**int) + *t2.A = new(*int) + **t2.A = new(int) + ***t2.A = 17 + b.Reset() + enc.Encode(t2) + t0 = T0{} + dec.Decode(&t0) + if t0.A != 17 || t0.B != 177 || t0.C != 1777 || t0.D != 17777 { + t.Errorf("t2->t0 expected {17 177 1777 17777}; got %v", t0) + } + + // Now transfer t0 into t1 + t0 = T0{17, 177, 1777, 17777} + b.Reset() + enc.Encode(t0) + t1 = T1{} + dec.Decode(&t1) + if t1.A != 17 || *t1.B != 177 || **t1.C != 1777 || ***t1.D != 17777 { + t.Errorf("t0->t1 expected {17 177 1777 17777}; got {%d %d %d %d}", t1.A, *t1.B, **t1.C, ***t1.D) + } + + // Now transfer t0 into t2 + b.Reset() + enc.Encode(t0) + t2 = T2{} + dec.Decode(&t2) + if ***t2.A != 17 || **t2.B != 177 || *t2.C != 1777 || t2.D != 17777 { + t.Errorf("t0->t2 expected {17 177 1777 17777}; got {%d %d %d %d}", ***t2.A, **t2.B, *t2.C, t2.D) + } + + // Now do t2 again but without pre-allocated pointers. + b.Reset() + enc.Encode(t0) + ***t2.A = 0 + **t2.B = 0 + *t2.C = 0 + t2.D = 0 + dec.Decode(&t2) + if ***t2.A != 17 || **t2.B != 177 || *t2.C != 1777 || t2.D != 17777 { + t.Errorf("t0->t2 expected {17 177 1777 17777}; got {%d %d %d %d}", ***t2.A, **t2.B, *t2.C, t2.D) + } +} + +type RT0 struct { + A int + B string + C float64 +} +type RT1 struct { + C float64 + B string + A int + NotSet string +} + +func TestReorderedFields(t *testing.T) { + var rt0 RT0 + rt0.A = 17 + rt0.B = "hello" + rt0.C = 3.14159 + b := new(bytes.Buffer) + NewEncoder(b).Encode(rt0) + dec := NewDecoder(b) + var rt1 RT1 + // Wire type is RT0, local type is RT1. + err := dec.Decode(&rt1) + if err != nil { + t.Fatal("decode error:", err) + } + if rt0.A != rt1.A || rt0.B != rt1.B || rt0.C != rt1.C { + t.Errorf("rt1->rt0: expected %v; got %v", rt0, rt1) + } +} + +// Like an RT0 but with fields we'll ignore on the decode side. +type IT0 struct { + A int64 + B string + Ignore_d []int + Ignore_e [3]float64 + Ignore_f bool + Ignore_g string + Ignore_h []byte + Ignore_i *RT1 + Ignore_m map[string]int + C float64 +} + +func TestIgnoredFields(t *testing.T) { + var it0 IT0 + it0.A = 17 + it0.B = "hello" + it0.C = 3.14159 + it0.Ignore_d = []int{1, 2, 3} + it0.Ignore_e[0] = 1.0 + it0.Ignore_e[1] = 2.0 + it0.Ignore_e[2] = 3.0 + it0.Ignore_f = true + it0.Ignore_g = "pay no attention" + it0.Ignore_h = []byte("to the curtain") + it0.Ignore_i = &RT1{3.1, "hi", 7, "hello"} + it0.Ignore_m = map[string]int{"one": 1, "two": 2} + + b := new(bytes.Buffer) + NewEncoder(b).Encode(it0) + dec := NewDecoder(b) + var rt1 RT1 + // Wire type is IT0, local type is RT1. + err := dec.Decode(&rt1) + if err != nil { + t.Error("error: ", err) + } + if int(it0.A) != rt1.A || it0.B != rt1.B || it0.C != rt1.C { + t.Errorf("rt0->rt1: expected %v; got %v", it0, rt1) + } +} + +func TestBadRecursiveType(t *testing.T) { + type Rec ***Rec + var rec Rec + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(&rec) + if err == nil { + t.Error("expected error; got none") + } else if !strings.Contains(err.Error(), "recursive") { + t.Error("expected recursive type error; got", err) + } + // Can't test decode easily because we can't encode one, so we can't pass one to a Decoder. +} + +type Indirect struct { + A ***[3]int + S ***[]int + M ****map[string]int +} + +type Direct struct { + A [3]int + S []int + M map[string]int +} + +func TestIndirectSliceMapArray(t *testing.T) { + // Marshal indirect, unmarshal to direct. + i := new(Indirect) + i.A = new(**[3]int) + *i.A = new(*[3]int) + **i.A = new([3]int) + ***i.A = [3]int{1, 2, 3} + i.S = new(**[]int) + *i.S = new(*[]int) + **i.S = new([]int) + ***i.S = []int{4, 5, 6} + i.M = new(***map[string]int) + *i.M = new(**map[string]int) + **i.M = new(*map[string]int) + ***i.M = new(map[string]int) + ****i.M = map[string]int{"one": 1, "two": 2, "three": 3} + b := new(bytes.Buffer) + NewEncoder(b).Encode(i) + dec := NewDecoder(b) + var d Direct + err := dec.Decode(&d) + if err != nil { + t.Error("error: ", err) + } + if len(d.A) != 3 || d.A[0] != 1 || d.A[1] != 2 || d.A[2] != 3 { + t.Errorf("indirect to direct: d.A is %v not %v", d.A, ***i.A) + } + if len(d.S) != 3 || d.S[0] != 4 || d.S[1] != 5 || d.S[2] != 6 { + t.Errorf("indirect to direct: d.S is %v not %v", d.S, ***i.S) + } + if len(d.M) != 3 || d.M["one"] != 1 || d.M["two"] != 2 || d.M["three"] != 3 { + t.Errorf("indirect to direct: d.M is %v not %v", d.M, ***i.M) + } + // Marshal direct, unmarshal to indirect. + d.A = [3]int{11, 22, 33} + d.S = []int{44, 55, 66} + d.M = map[string]int{"four": 4, "five": 5, "six": 6} + i = new(Indirect) + b.Reset() + NewEncoder(b).Encode(d) + dec = NewDecoder(b) + err = dec.Decode(&i) + if err != nil { + t.Fatal("error: ", err) + } + if len(***i.A) != 3 || (***i.A)[0] != 11 || (***i.A)[1] != 22 || (***i.A)[2] != 33 { + t.Errorf("direct to indirect: ***i.A is %v not %v", ***i.A, d.A) + } + if len(***i.S) != 3 || (***i.S)[0] != 44 || (***i.S)[1] != 55 || (***i.S)[2] != 66 { + t.Errorf("direct to indirect: ***i.S is %v not %v", ***i.S, ***i.S) + } + if len(****i.M) != 3 || (****i.M)["four"] != 4 || (****i.M)["five"] != 5 || (****i.M)["six"] != 6 { + t.Errorf("direct to indirect: ****i.M is %v not %v", ****i.M, d.M) + } +} + +// An interface with several implementations +type Squarer interface { + Square() int +} + +type Int int + +func (i Int) Square() int { + return int(i * i) +} + +type Float float64 + +func (f Float) Square() int { + return int(f * f) +} + +type Vector []int + +func (v Vector) Square() int { + sum := 0 + for _, x := range v { + sum += x * x + } + return sum +} + +type Point struct { + X, Y int +} + +func (p Point) Square() int { + return p.X*p.X + p.Y*p.Y +} + +// A struct with interfaces in it. +type InterfaceItem struct { + I int + Sq1, Sq2, Sq3 Squarer + F float64 + Sq []Squarer +} + +// The same struct without interfaces +type NoInterfaceItem struct { + I int + F float64 +} + +func TestInterface(t *testing.T) { + iVal := Int(3) + fVal := Float(5) + // Sending a Vector will require that the receiver define a type in the middle of + // receiving the value for item2. + vVal := Vector{1, 2, 3} + b := new(bytes.Buffer) + item1 := &InterfaceItem{1, iVal, fVal, vVal, 11.5, []Squarer{iVal, fVal, nil, vVal}} + // Register the types. + Register(Int(0)) + Register(Float(0)) + Register(Vector{}) + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := InterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + if item2.I != item1.I { + t.Error("normal int did not decode correctly") + } + if item2.Sq1 == nil || item2.Sq1.Square() != iVal.Square() { + t.Error("Int did not decode correctly") + } + if item2.Sq2 == nil || item2.Sq2.Square() != fVal.Square() { + t.Error("Float did not decode correctly") + } + if item2.Sq3 == nil || item2.Sq3.Square() != vVal.Square() { + t.Error("Vector did not decode correctly") + } + if item2.F != item1.F { + t.Error("normal float did not decode correctly") + } + // Now check that we received a slice of Squarers correctly, including a nil element + if len(item1.Sq) != len(item2.Sq) { + t.Fatalf("[]Squarer length wrong: got %d; expected %d", len(item2.Sq), len(item1.Sq)) + } + for i, v1 := range item1.Sq { + v2 := item2.Sq[i] + if v1 == nil || v2 == nil { + if v1 != nil || v2 != nil { + t.Errorf("item %d inconsistent nils", i) + } + } else if v1.Square() != v2.Square() { + t.Errorf("item %d inconsistent values: %v %v", i, v1, v2) + } + } +} + +// A struct with all basic types, stored in interfaces. +type BasicInterfaceItem struct { + Int, Int8, Int16, Int32, Int64 any + Uint, Uint8, Uint16, Uint32, Uint64 any + Float32, Float64 any + Complex64, Complex128 any + Bool any + String any + Bytes any +} + +func TestInterfaceBasic(t *testing.T) { + b := new(bytes.Buffer) + item1 := &BasicInterfaceItem{ + int(1), int8(1), int16(1), int32(1), int64(1), + uint(1), uint8(1), uint16(1), uint32(1), uint64(1), + float32(1), 1.0, + complex64(1i), complex128(1i), + true, + "hello", + []byte("sailor"), + } + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := &BasicInterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(item1, item2) { + t.Errorf("encode expected %v got %v", item1, item2) + } + // Hand check a couple for correct types. + if v, ok := item2.Bool.(bool); !ok || !v { + t.Error("boolean should be true") + } + if v, ok := item2.String.(string); !ok || v != item1.String.(string) { + t.Errorf("string should be %v is %v", item1.String, v) + } +} + +type String string + +type PtrInterfaceItem struct { + Str1 any // basic + Str2 any // derived +} + +// We'll send pointers; should receive values. +// Also check that we can register T but send *T. +func TestInterfacePointer(t *testing.T) { + b := new(bytes.Buffer) + str1 := "howdy" + str2 := String("kiddo") + item1 := &PtrInterfaceItem{ + &str1, + &str2, + } + // Register the type. + Register(str2) + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := &PtrInterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + // Hand test for correct types and values. + if v, ok := item2.Str1.(string); !ok || v != str1 { + t.Errorf("basic string failed: %q should be %q", v, str1) + } + if v, ok := item2.Str2.(String); !ok || v != str2 { + t.Errorf("derived type String failed: %q should be %q", v, str2) + } +} + +func TestIgnoreInterface(t *testing.T) { + iVal := Int(3) + fVal := Float(5) + // Sending a Point will require that the receiver define a type in the middle of + // receiving the value for item2. + pVal := Point{2, 3} + b := new(bytes.Buffer) + item1 := &InterfaceItem{1, iVal, fVal, pVal, 11.5, nil} + // Register the types. + Register(Int(0)) + Register(Float(0)) + Register(Point{}) + err := NewEncoder(b).Encode(item1) + if err != nil { + t.Error("expected no encode error; got", err) + } + + item2 := NoInterfaceItem{} + err = NewDecoder(b).Decode(&item2) + if err != nil { + t.Fatal("decode:", err) + } + if item2.I != item1.I { + t.Error("normal int did not decode correctly") + } + if item2.F != item1.F { + t.Error("normal float did not decode correctly") + } +} + +type U struct { + A int + B string + c float64 + D uint +} + +func TestUnexportedFields(t *testing.T) { + var u0 U + u0.A = 17 + u0.B = "hello" + u0.c = 3.14159 + u0.D = 23 + b := new(bytes.Buffer) + NewEncoder(b).Encode(u0) + dec := NewDecoder(b) + var u1 U + u1.c = 1234. + err := dec.Decode(&u1) + if err != nil { + t.Fatal("decode error:", err) + } + if u0.A != u1.A || u0.B != u1.B || u0.D != u1.D { + t.Errorf("u1->u0: expected %v; got %v", u0, u1) + } + if u1.c != 1234. { + t.Error("u1.c modified") + } +} + +var singletons = []any{ + true, + 7, + uint(10), + 3.2, + "hello", + [3]int{11, 22, 33}, + []float32{0.5, 0.25, 0.125}, + map[string]int{"one": 1, "two": 2}, +} + +func TestDebugSingleton(t *testing.T) { + if debugFunc == nil { + return + } + b := new(bytes.Buffer) + // Accumulate a number of values and print them out all at once. + for _, x := range singletons { + err := NewEncoder(b).Encode(x) + if err != nil { + t.Fatal("encode:", err) + } + } + debugFunc(b) +} + +// A type that won't be defined in the gob until we send it in an interface value. +type OnTheFly struct { + A int +} + +type DT struct { + // X OnTheFly + A int + B string + C float64 + I any + J any + I_nil any + M map[string]int + T [3]int + S []string +} + +func newDT() DT { + var dt DT + dt.A = 17 + dt.B = "hello" + dt.C = 3.14159 + dt.I = 271828 + dt.J = OnTheFly{3} + dt.I_nil = nil + dt.M = map[string]int{"one": 1, "two": 2} + dt.T = [3]int{11, 22, 33} + dt.S = []string{"hi", "joe"} + return dt +} + +func TestDebugStruct(t *testing.T) { + if debugFunc == nil { + return + } + Register(OnTheFly{}) + dt := newDT() + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(dt) + if err != nil { + t.Fatal("encode:", err) + } + debugBuffer := bytes.NewBuffer(b.Bytes()) + dt2 := &DT{} + err = NewDecoder(b).Decode(&dt2) + if err != nil { + t.Error("decode:", err) + } + debugFunc(debugBuffer) +} + +func encFuzzDec(rng *rand.Rand, in any) error { + buf := new(bytes.Buffer) + enc := NewEncoder(buf) + if err := enc.Encode(&in); err != nil { + return err + } + + b := buf.Bytes() + for i, bi := range b { + if rng.Intn(10) < 3 { + b[i] = bi + uint8(rng.Intn(256)) + } + } + + dec := NewDecoder(buf) + var e any + if err := dec.Decode(&e); err != nil { + return err + } + return nil +} + +// This does some "fuzz testing" by attempting to decode a sequence of random bytes. +func TestFuzz(t *testing.T) { + if !*doFuzzTests { + t.Skipf("disabled; run with -gob.fuzz to enable") + } + + // all possible inputs + input := []any{ + new(int), + new(float32), + new(float64), + new(complex128), + &ByteStruct{255}, + &ArrayStruct{}, + &StringStruct{"hello"}, + &GobTest1{0, &StringStruct{"hello"}}, + } + testFuzz(t, time.Now().UnixNano(), 100, input...) +} + +func TestFuzzRegressions(t *testing.T) { + if !*doFuzzTests { + t.Skipf("disabled; run with -gob.fuzz to enable") + } + + // An instance triggering a type name of length ~102 GB. + testFuzz(t, 1328492090837718000, 100, new(float32)) + // An instance triggering a type name of 1.6 GB. + // Note: can take several minutes to run. + testFuzz(t, 1330522872628565000, 100, new(int)) +} + +func testFuzz(t *testing.T, seed int64, n int, input ...any) { + for _, e := range input { + t.Logf("seed=%d n=%d e=%T", seed, n, e) + rng := rand.New(rand.NewSource(seed)) + for i := 0; i < n; i++ { + encFuzzDec(rng, e) + } + } +} + +// TestFuzzOneByte tries to decode corrupted input sequences +// and checks that no panic occurs. +func TestFuzzOneByte(t *testing.T) { + if !*doFuzzTests { + t.Skipf("disabled; run with -gob.fuzz to enable") + } + + buf := new(strings.Builder) + Register(OnTheFly{}) + dt := newDT() + if err := NewEncoder(buf).Encode(dt); err != nil { + t.Fatal(err) + } + s := buf.String() + + indices := make([]int, 0, len(s)) + for i := 0; i < len(s); i++ { + switch i { + case 14, 167, 231, 265: // a slice length, corruptions are not handled yet. + continue + case 248: + // Large map size, which currently causes an out of memory panic. + // See golang.org/issue/24308 and golang.org/issue/20221. + continue + } + indices = append(indices, i) + } + if testing.Short() { + indices = []int{1, 111, 178} // known fixed panics + } + for _, i := range indices { + for j := 0; j < 256; j += 3 { + b := []byte(s) + b[i] ^= byte(j) + var e DT + func() { + defer func() { + if p := recover(); p != nil { + t.Errorf("crash for b[%d] ^= 0x%x", i, j) + panic(p) + } + }() + err := NewDecoder(bytes.NewReader(b)).Decode(&e) + _ = err + }() + } + } +} + +// Don't crash, just give error with invalid type id. +// Issue 9649. +func TestErrorInvalidTypeId(t *testing.T) { + data := []byte{0x01, 0x00, 0x01, 0x00} + d := NewDecoder(bytes.NewReader(data)) + // When running d.Decode(&foo) the first time the decoder stops + // after []byte{0x01, 0x00} and reports an errBadType. Running + // d.Decode(&foo) again on exactly the same input sequence should + // give another errBadType, but instead caused a panic because + // decoderMap wasn't cleaned up properly after the first error. + for i := 0; i < 2; i++ { + var foo struct{} + err := d.Decode(&foo) + if err != errBadType { + t.Fatalf("decode: expected %s, got %s", errBadType, err) + } + } +} + +type LargeSliceByte struct { + S []byte +} + +type LargeSliceInt8 struct { + S []int8 +} + +type StringPair struct { + A, B string +} + +type LargeSliceStruct struct { + S []StringPair +} + +type LargeSliceString struct { + S []string +} + +func testEncodeDecode(t *testing.T, in, out any) { + t.Helper() + var b bytes.Buffer + err := NewEncoder(&b).Encode(in) + if err != nil { + t.Fatal("encode:", err) + } + err = NewDecoder(&b).Decode(out) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(in, out) { + t.Errorf("output mismatch") + } +} + +func TestLargeSlice(t *testing.T) { + t.Run("byte", func(t *testing.T) { + if unsafe.Sizeof(uintptr(0)) > 4 { + t.Parallel() // Only run in parallel in a large address space + } + s := make([]byte, 10<<21) + for i := range s { + s[i] = byte(i) + } + st := &LargeSliceByte{S: s} + rt := &LargeSliceByte{} + testEncodeDecode(t, st, rt) + }) + t.Run("int8", func(t *testing.T) { + if unsafe.Sizeof(uintptr(0)) > 4 { + t.Parallel() + } + s := make([]int8, 10<<21) + for i := range s { + s[i] = int8(i) + } + st := &LargeSliceInt8{S: s} + rt := &LargeSliceInt8{} + testEncodeDecode(t, st, rt) + }) + t.Run("struct", func(t *testing.T) { + if unsafe.Sizeof(uintptr(0)) > 4 { + t.Parallel() + } + s := make([]StringPair, 1<<21) + for i := range s { + s[i].A = string(rune(i)) + s[i].B = s[i].A + } + st := &LargeSliceStruct{S: s} + rt := &LargeSliceStruct{} + testEncodeDecode(t, st, rt) + }) + t.Run("string", func(t *testing.T) { + if unsafe.Sizeof(uintptr(0)) > 4 { + t.Parallel() + } + s := make([]string, 1<<21) + for i := range s { + s[i] = string(rune(i)) + } + st := &LargeSliceString{S: s} + rt := &LargeSliceString{} + testEncodeDecode(t, st, rt) + }) +} + +func TestLocalRemoteTypesMismatch(t *testing.T) { + // Test data is from https://go.dev/issue/62117. + testData := []byte{9, 127, 3, 1, 2, 255, 128, 0, 0, 0, 3, 255, 128, 0} + + var v []*struct{} + buf := bytes.NewBuffer(testData) + err := NewDecoder(buf).Decode(&v) + if err == nil { + t.Error("Encode/Decode: expected error but got err == nil") + } +} diff --git a/src/encoding/gob/debug.go b/src/encoding/gob/debug.go new file mode 100644 index 0000000..a6b1a74 --- /dev/null +++ b/src/encoding/gob/debug.go @@ -0,0 +1,731 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Delete the next line to include in the gob package. +// +//go:build ignore + +package gob + +// This file is not normally included in the gob package. Used only for debugging the package itself. +// Except for reading uints, it is an implementation of a reader that is independent of +// the one implemented by Decoder. +// To enable the Debug function, delete the +build ignore line above and do +// go install + +import ( + "bytes" + "fmt" + "io" + "os" + "strings" + "sync" +) + +var dumpBytes = false // If true, print the remaining bytes in the input buffer at each item. + +// Init installs the debugging facility. If this file is not compiled in the +// package, the tests in codec_test.go are no-ops. +func init() { + debugFunc = Debug +} + +var ( + blanks = bytes.Repeat([]byte{' '}, 3*10) + empty = []byte(": <empty>\n") + tabs = strings.Repeat("\t", 100) +) + +// tab indents itself when printed. +type tab int + +func (t tab) String() string { + n := int(t) + if n > len(tabs) { + n = len(tabs) + } + return tabs[0:n] +} + +func (t tab) print() { + fmt.Fprint(os.Stderr, t) +} + +// A peekReader wraps an io.Reader, allowing one to peek ahead to see +// what's coming without stealing the data from the client of the Reader. +type peekReader struct { + r io.Reader + data []byte // read-ahead data +} + +// newPeekReader returns a peekReader that wraps r. +func newPeekReader(r io.Reader) *peekReader { + return &peekReader{r: r} +} + +// Read is the usual method. It will first take data that has been read ahead. +func (p *peekReader) Read(b []byte) (n int, err error) { + if len(p.data) == 0 { + return p.r.Read(b) + } + // Satisfy what's possible from the read-ahead data. + n = copy(b, p.data) + // Move data down to beginning of slice, to avoid endless growth + copy(p.data, p.data[n:]) + p.data = p.data[:len(p.data)-n] + return +} + +// peek returns as many bytes as possible from the unread +// portion of the stream, up to the length of b. +func (p *peekReader) peek(b []byte) (n int, err error) { + if len(p.data) > 0 { + n = copy(b, p.data) + if n == len(b) { + return + } + b = b[n:] + } + if len(b) == 0 { + return + } + m, e := io.ReadFull(p.r, b) + if m > 0 { + p.data = append(p.data, b[:m]...) + } + n += m + if e == io.ErrUnexpectedEOF { + // That means m > 0 but we reached EOF. If we got data + // we won't complain about not being able to peek enough. + if n > 0 { + e = nil + } else { + e = io.EOF + } + } + return n, e +} + +type debugger struct { + mutex sync.Mutex + remain int // the number of bytes known to remain in the input + remainingKnown bool // the value of 'remain' is valid + r *peekReader + wireType map[typeId]*wireType + tmp []byte // scratch space for decoding uints. +} + +// dump prints the next nBytes of the input. +// It arranges to print the output aligned from call to +// call, to make it easy to see what has been consumed. +func (deb *debugger) dump(format string, args ...any) { + if !dumpBytes { + return + } + fmt.Fprintf(os.Stderr, format+" ", args...) + if !deb.remainingKnown { + return + } + if deb.remain < 0 { + fmt.Fprintf(os.Stderr, "remaining byte count is negative! %d\n", deb.remain) + return + } + data := make([]byte, deb.remain) + n, _ := deb.r.peek(data) + if n == 0 { + os.Stderr.Write(empty) + return + } + b := new(bytes.Buffer) + fmt.Fprintf(b, "[%d]{\n", deb.remain) + // Blanks until first byte + lineLength := 0 + if n := len(data); n%10 != 0 { + lineLength = 10 - n%10 + fmt.Fprintf(b, "\t%s", blanks[:lineLength*3]) + } + // 10 bytes per line + for len(data) > 0 { + if lineLength == 0 { + fmt.Fprint(b, "\t") + } + m := 10 - lineLength + lineLength = 0 + if m > len(data) { + m = len(data) + } + fmt.Fprintf(b, "% x\n", data[:m]) + data = data[m:] + } + fmt.Fprint(b, "}\n") + os.Stderr.Write(b.Bytes()) +} + +// Debug prints a human-readable representation of the gob data read from r. +// It is a no-op unless debugging was enabled when the package was built. +func Debug(r io.Reader) { + err := debug(r) + if err != nil { + fmt.Fprintf(os.Stderr, "gob debug: %s\n", err) + } +} + +// debug implements Debug, but catches panics and returns +// them as errors to be printed by Debug. +func debug(r io.Reader) (err error) { + defer catchError(&err) + fmt.Fprintln(os.Stderr, "Start of debugging") + deb := &debugger{ + r: newPeekReader(r), + wireType: make(map[typeId]*wireType), + tmp: make([]byte, 16), + } + if b, ok := r.(*bytes.Buffer); ok { + deb.remain = b.Len() + deb.remainingKnown = true + } + deb.gobStream() + return +} + +// note that we've consumed some bytes +func (deb *debugger) consumed(n int) { + if deb.remainingKnown { + deb.remain -= n + } +} + +// int64 decodes and returns the next integer, which must be present. +// Don't call this if you could be at EOF. +func (deb *debugger) int64() int64 { + return toInt(deb.uint64()) +} + +// uint64 returns and decodes the next unsigned integer, which must be present. +// Don't call this if you could be at EOF. +// TODO: handle errors better. +func (deb *debugger) uint64() uint64 { + n, w, err := decodeUintReader(deb.r, deb.tmp) + if err != nil { + errorf("debug: read error: %s", err) + } + deb.consumed(w) + return n +} + +// GobStream: +// +// DelimitedMessage* (until EOF) +func (deb *debugger) gobStream() { + // Make sure we're single-threaded through here. + deb.mutex.Lock() + defer deb.mutex.Unlock() + + for deb.delimitedMessage(0) { + } +} + +// DelimitedMessage: +// +// uint(lengthOfMessage) Message +func (deb *debugger) delimitedMessage(indent tab) bool { + for { + n := deb.loadBlock(true) + if n < 0 { + return false + } + deb.dump("Delimited message of length %d", n) + deb.message(indent) + } + return true +} + +// loadBlock preps us to read a message +// of the length specified next in the input. It returns +// the length of the block. The argument tells whether +// an EOF is acceptable now. If it is and one is found, +// the return value is negative. +func (deb *debugger) loadBlock(eofOK bool) int { + n64, w, err := decodeUintReader(deb.r, deb.tmp) // deb.uint64 will error at EOF + if err != nil { + if eofOK && err == io.EOF { + return -1 + } + errorf("debug: unexpected error: %s", err) + } + deb.consumed(w) + n := int(n64) + if n < 0 { + errorf("huge value for message length: %d", n64) + } + return int(n) +} + +// Message: +// +// TypeSequence TypedValue +// +// TypeSequence +// +// (TypeDefinition DelimitedTypeDefinition*)? +// +// DelimitedTypeDefinition: +// +// uint(lengthOfTypeDefinition) TypeDefinition +// +// TypedValue: +// +// int(typeId) Value +func (deb *debugger) message(indent tab) bool { + for { + // Convert the uint64 to a signed integer typeId + uid := deb.int64() + id := typeId(uid) + deb.dump("type id=%d", id) + if id < 0 { + deb.typeDefinition(indent, -id) + n := deb.loadBlock(false) + deb.dump("Message of length %d", n) + continue + } else { + deb.value(indent, id) + break + } + } + return true +} + +// Helper methods to make it easy to scan a type descriptor. + +// common returns the CommonType at the input point. +func (deb *debugger) common() CommonType { + fieldNum := -1 + name := "" + id := typeId(0) + for { + delta := deb.delta(-1) + if delta == 0 { + break + } + fieldNum += delta + switch fieldNum { + case 0: + name = deb.string() + case 1: + // Id typeId + id = deb.typeId() + default: + errorf("corrupted CommonType, delta is %d fieldNum is %d", delta, fieldNum) + } + } + return CommonType{name, id} +} + +// uint returns the unsigned int at the input point, as a uint (not uint64). +func (deb *debugger) uint() uint { + return uint(deb.uint64()) +} + +// int returns the signed int at the input point, as an int (not int64). +func (deb *debugger) int() int { + return int(deb.int64()) +} + +// typeId returns the type id at the input point. +func (deb *debugger) typeId() typeId { + return typeId(deb.int64()) +} + +// string returns the string at the input point. +func (deb *debugger) string() string { + x := int(deb.uint64()) + b := make([]byte, x) + nb, _ := deb.r.Read(b) + if nb != x { + errorf("corrupted type") + } + deb.consumed(nb) + return string(b) +} + +// delta returns the field delta at the input point. The expect argument, +// if non-negative, identifies what the value should be. +func (deb *debugger) delta(expect int) int { + delta := int(deb.uint64()) + if delta < 0 || (expect >= 0 && delta != expect) { + errorf("decode: corrupted type: delta %d expected %d", delta, expect) + } + return delta +} + +// TypeDefinition: +// +// [int(-typeId) (already read)] encodingOfWireType +func (deb *debugger) typeDefinition(indent tab, id typeId) { + deb.dump("type definition for id %d", id) + // Encoding is of a wireType. Decode the structure as usual + fieldNum := -1 + wire := new(wireType) + // A wireType defines a single field. + delta := deb.delta(-1) + fieldNum += delta + switch fieldNum { + case 0: // array type, one field of {{Common}, elem, length} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is type Id of elem + deb.delta(1) + id := deb.typeId() + // Field number 3 is length + deb.delta(1) + length := deb.int() + wire.ArrayT = &arrayType{com, id, length} + + case 1: // slice type, one field of {{Common}, elem} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is type Id of elem + deb.delta(1) + id := deb.typeId() + wire.SliceT = &sliceType{com, id} + + case 2: // struct type, one field of {{Common}, []fieldType} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is slice of FieldType + deb.delta(1) + numField := int(deb.uint()) + field := make([]*fieldType, numField) + for i := 0; i < numField; i++ { + field[i] = new(fieldType) + deb.delta(1) // field 0 of fieldType: name + field[i].Name = deb.string() + deb.delta(1) // field 1 of fieldType: id + field[i].Id = deb.typeId() + deb.delta(0) // end of fieldType + } + wire.StructT = &structType{com, field} + + case 3: // map type, one field of {{Common}, key, elem} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + // Field number 1 is type Id of key + deb.delta(1) + keyId := deb.typeId() + // Field number 2 is type Id of elem + deb.delta(1) + elemId := deb.typeId() + wire.MapT = &mapType{com, keyId, elemId} + case 4: // GobEncoder type, one field of {{Common}} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + wire.GobEncoderT = &gobEncoderType{com} + case 5: // BinaryMarshaler type, one field of {{Common}} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + wire.BinaryMarshalerT = &gobEncoderType{com} + case 6: // TextMarshaler type, one field of {{Common}} + // Field number 0 is CommonType + deb.delta(1) + com := deb.common() + wire.TextMarshalerT = &gobEncoderType{com} + default: + errorf("bad field in type %d", fieldNum) + } + deb.printWireType(indent, wire) + deb.delta(0) // end inner type (arrayType, etc.) + deb.delta(0) // end wireType + // Remember we've seen this type. + deb.wireType[id] = wire +} + +// Value: +// +// SingletonValue | StructValue +func (deb *debugger) value(indent tab, id typeId) { + wire, ok := deb.wireType[id] + if ok && wire.StructT != nil { + deb.structValue(indent, id) + } else { + deb.singletonValue(indent, id) + } +} + +// SingletonValue: +// +// uint(0) FieldValue +func (deb *debugger) singletonValue(indent tab, id typeId) { + deb.dump("Singleton value") + // is it a builtin type? + wire := deb.wireType[id] + if builtinIdToType(id) == nil && wire == nil { + errorf("type id %d not defined", id) + } + m := deb.uint64() + if m != 0 { + errorf("expected zero; got %d", m) + } + deb.fieldValue(indent, id) +} + +// InterfaceValue: +// +// NilInterfaceValue | NonNilInterfaceValue +func (deb *debugger) interfaceValue(indent tab) { + deb.dump("Start of interface value") + if nameLen := deb.uint64(); nameLen == 0 { + deb.nilInterfaceValue(indent) + } else { + deb.nonNilInterfaceValue(indent, int(nameLen)) + } +} + +// NilInterfaceValue: +// +// uint(0) [already read] +func (deb *debugger) nilInterfaceValue(indent tab) int { + fmt.Fprintf(os.Stderr, "%snil interface\n", indent) + return 0 +} + +// NonNilInterfaceValue: +// +// ConcreteTypeName TypeSequence InterfaceContents +// +// ConcreteTypeName: +// +// uint(lengthOfName) [already read=n] name +// +// InterfaceContents: +// +// int(concreteTypeId) DelimitedValue +// +// DelimitedValue: +// +// uint(length) Value +func (deb *debugger) nonNilInterfaceValue(indent tab, nameLen int) { + // ConcreteTypeName + b := make([]byte, nameLen) + deb.r.Read(b) // TODO: CHECK THESE READS!! + deb.consumed(nameLen) + name := string(b) + + for { + id := deb.typeId() + if id < 0 { + deb.typeDefinition(indent, -id) + n := deb.loadBlock(false) + deb.dump("Nested message of length %d", n) + } else { + // DelimitedValue + x := deb.uint64() // in case we want to ignore the value; we don't. + fmt.Fprintf(os.Stderr, "%sinterface value, type %q id=%d; valueLength %d\n", indent, name, id, x) + deb.value(indent, id) + break + } + } +} + +// printCommonType prints a common type; used by printWireType. +func (deb *debugger) printCommonType(indent tab, kind string, common *CommonType) { + indent.print() + fmt.Fprintf(os.Stderr, "%s %q id=%d\n", kind, common.Name, common.Id) +} + +// printWireType prints the contents of a wireType. +func (deb *debugger) printWireType(indent tab, wire *wireType) { + fmt.Fprintf(os.Stderr, "%stype definition {\n", indent) + indent++ + switch { + case wire.ArrayT != nil: + deb.printCommonType(indent, "array", &wire.ArrayT.CommonType) + fmt.Fprintf(os.Stderr, "%slen %d\n", indent+1, wire.ArrayT.Len) + fmt.Fprintf(os.Stderr, "%selemid %d\n", indent+1, wire.ArrayT.Elem) + case wire.MapT != nil: + deb.printCommonType(indent, "map", &wire.MapT.CommonType) + fmt.Fprintf(os.Stderr, "%skey id=%d\n", indent+1, wire.MapT.Key) + fmt.Fprintf(os.Stderr, "%selem id=%d\n", indent+1, wire.MapT.Elem) + case wire.SliceT != nil: + deb.printCommonType(indent, "slice", &wire.SliceT.CommonType) + fmt.Fprintf(os.Stderr, "%selem id=%d\n", indent+1, wire.SliceT.Elem) + case wire.StructT != nil: + deb.printCommonType(indent, "struct", &wire.StructT.CommonType) + for i, field := range wire.StructT.Field { + fmt.Fprintf(os.Stderr, "%sfield %d:\t%s\tid=%d\n", indent+1, i, field.Name, field.Id) + } + case wire.GobEncoderT != nil: + deb.printCommonType(indent, "GobEncoder", &wire.GobEncoderT.CommonType) + } + indent-- + fmt.Fprintf(os.Stderr, "%s}\n", indent) +} + +// fieldValue prints a value of any type, such as a struct field. +// FieldValue: +// +// builtinValue | ArrayValue | MapValue | SliceValue | StructValue | InterfaceValue +func (deb *debugger) fieldValue(indent tab, id typeId) { + if builtinIdToType(id) != nil { + if id == tInterface { + deb.interfaceValue(indent) + } else { + deb.printBuiltin(indent, id) + } + return + } + wire, ok := deb.wireType[id] + if !ok { + errorf("type id %d not defined", id) + } + switch { + case wire.ArrayT != nil: + deb.arrayValue(indent, wire) + case wire.MapT != nil: + deb.mapValue(indent, wire) + case wire.SliceT != nil: + deb.sliceValue(indent, wire) + case wire.StructT != nil: + deb.structValue(indent, id) + case wire.GobEncoderT != nil: + deb.gobEncoderValue(indent, id) + default: + panic("bad wire type for field") + } +} + +// printBuiltin prints a value not of a fundamental type, that is, +// one whose type is known to gobs at bootstrap time. +func (deb *debugger) printBuiltin(indent tab, id typeId) { + switch id { + case tBool: + x := deb.int64() + if x == 0 { + fmt.Fprintf(os.Stderr, "%sfalse\n", indent) + } else { + fmt.Fprintf(os.Stderr, "%strue\n", indent) + } + case tInt: + x := deb.int64() + fmt.Fprintf(os.Stderr, "%s%d\n", indent, x) + case tUint: + x := deb.uint64() + fmt.Fprintf(os.Stderr, "%s%d\n", indent, x) + case tFloat: + x := deb.uint64() + fmt.Fprintf(os.Stderr, "%s%g\n", indent, float64FromBits(x)) + case tComplex: + r := deb.uint64() + i := deb.uint64() + fmt.Fprintf(os.Stderr, "%s%g+%gi\n", indent, float64FromBits(r), float64FromBits(i)) + case tBytes: + x := int(deb.uint64()) + b := make([]byte, x) + deb.r.Read(b) + deb.consumed(x) + fmt.Fprintf(os.Stderr, "%s{% x}=%q\n", indent, b, b) + case tString: + x := int(deb.uint64()) + b := make([]byte, x) + deb.r.Read(b) + deb.consumed(x) + fmt.Fprintf(os.Stderr, "%s%q\n", indent, b) + default: + panic("unknown builtin") + } +} + +// ArrayValue: +// +// uint(n) FieldValue*n +func (deb *debugger) arrayValue(indent tab, wire *wireType) { + elemId := wire.ArrayT.Elem + u := deb.uint64() + length := int(u) + for i := 0; i < length; i++ { + deb.fieldValue(indent, elemId) + } + if length != wire.ArrayT.Len { + fmt.Fprintf(os.Stderr, "%s(wrong length for array: %d should be %d)\n", indent, length, wire.ArrayT.Len) + } +} + +// MapValue: +// +// uint(n) (FieldValue FieldValue)*n [n (key, value) pairs] +func (deb *debugger) mapValue(indent tab, wire *wireType) { + keyId := wire.MapT.Key + elemId := wire.MapT.Elem + u := deb.uint64() + length := int(u) + for i := 0; i < length; i++ { + deb.fieldValue(indent+1, keyId) + deb.fieldValue(indent+1, elemId) + } +} + +// SliceValue: +// +// uint(n) (n FieldValue) +func (deb *debugger) sliceValue(indent tab, wire *wireType) { + elemId := wire.SliceT.Elem + u := deb.uint64() + length := int(u) + deb.dump("Start of slice of length %d", length) + + for i := 0; i < length; i++ { + deb.fieldValue(indent, elemId) + } +} + +// StructValue: +// +// (uint(fieldDelta) FieldValue)* +func (deb *debugger) structValue(indent tab, id typeId) { + deb.dump("Start of struct value of %q id=%d\n<<\n", id.name(), id) + fmt.Fprintf(os.Stderr, "%s%s struct {\n", indent, id.name()) + wire, ok := deb.wireType[id] + if !ok { + errorf("type id %d not defined", id) + } + strct := wire.StructT + fieldNum := -1 + indent++ + for { + delta := deb.uint64() + if delta == 0 { // struct terminator is zero delta fieldnum + break + } + fieldNum += int(delta) + if fieldNum < 0 || fieldNum >= len(strct.Field) { + deb.dump("field number out of range: prevField=%d delta=%d", fieldNum-int(delta), delta) + break + } + fmt.Fprintf(os.Stderr, "%sfield %d:\t%s\n", indent, fieldNum, wire.StructT.Field[fieldNum].Name) + deb.fieldValue(indent+1, strct.Field[fieldNum].Id) + } + indent-- + fmt.Fprintf(os.Stderr, "%s} // end %s struct\n", indent, id.name()) + deb.dump(">> End of struct value of type %d %q", id, id.name()) +} + +// GobEncoderValue: +// +// uint(n) byte*n +func (deb *debugger) gobEncoderValue(indent tab, id typeId) { + len := deb.uint64() + deb.dump("GobEncoder value of %q id=%d, length %d\n", id.name(), id, len) + fmt.Fprintf(os.Stderr, "%s%s (implements GobEncoder)\n", indent, id.name()) + data := make([]byte, len) + _, err := deb.r.Read(data) + if err != nil { + errorf("gobEncoder data read: %s", err) + } + fmt.Fprintf(os.Stderr, "%s[% .2x]\n", indent+1, data) +} diff --git a/src/encoding/gob/dec_helpers.go b/src/encoding/gob/dec_helpers.go new file mode 100644 index 0000000..44a74e2 --- /dev/null +++ b/src/encoding/gob/dec_helpers.go @@ -0,0 +1,548 @@ +// Code generated by go run decgen.go -output dec_helpers.go; DO NOT EDIT. + +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "math" + "reflect" +) + +var decArrayHelper = map[reflect.Kind]decHelper{ + reflect.Bool: decBoolArray, + reflect.Complex64: decComplex64Array, + reflect.Complex128: decComplex128Array, + reflect.Float32: decFloat32Array, + reflect.Float64: decFloat64Array, + reflect.Int: decIntArray, + reflect.Int16: decInt16Array, + reflect.Int32: decInt32Array, + reflect.Int64: decInt64Array, + reflect.Int8: decInt8Array, + reflect.String: decStringArray, + reflect.Uint: decUintArray, + reflect.Uint16: decUint16Array, + reflect.Uint32: decUint32Array, + reflect.Uint64: decUint64Array, + reflect.Uintptr: decUintptrArray, +} + +var decSliceHelper = map[reflect.Kind]decHelper{ + reflect.Bool: decBoolSlice, + reflect.Complex64: decComplex64Slice, + reflect.Complex128: decComplex128Slice, + reflect.Float32: decFloat32Slice, + reflect.Float64: decFloat64Slice, + reflect.Int: decIntSlice, + reflect.Int16: decInt16Slice, + reflect.Int32: decInt32Slice, + reflect.Int64: decInt64Slice, + reflect.Int8: decInt8Slice, + reflect.String: decStringSlice, + reflect.Uint: decUintSlice, + reflect.Uint16: decUint16Slice, + reflect.Uint32: decUint32Slice, + reflect.Uint64: decUint64Slice, + reflect.Uintptr: decUintptrSlice, +} + +func decBoolArray(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decBoolSlice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decBoolSlice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]bool) + if !ok { + // It is kind bool but not type bool. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding bool array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + slice[i] = state.decodeUint() != 0 + } + return true +} + +func decComplex64Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decComplex64Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decComplex64Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]complex64) + if !ok { + // It is kind complex64 but not type complex64. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding complex64 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + real := float32FromBits(state.decodeUint(), ovfl) + imag := float32FromBits(state.decodeUint(), ovfl) + slice[i] = complex(float32(real), float32(imag)) + } + return true +} + +func decComplex128Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decComplex128Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decComplex128Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]complex128) + if !ok { + // It is kind complex128 but not type complex128. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding complex128 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + real := float64FromBits(state.decodeUint()) + imag := float64FromBits(state.decodeUint()) + slice[i] = complex(real, imag) + } + return true +} + +func decFloat32Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decFloat32Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decFloat32Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]float32) + if !ok { + // It is kind float32 but not type float32. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding float32 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + slice[i] = float32(float32FromBits(state.decodeUint(), ovfl)) + } + return true +} + +func decFloat64Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decFloat64Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decFloat64Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]float64) + if !ok { + // It is kind float64 but not type float64. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding float64 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + slice[i] = float64FromBits(state.decodeUint()) + } + return true +} + +func decIntArray(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decIntSlice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decIntSlice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]int) + if !ok { + // It is kind int but not type int. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding int array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeInt() + // MinInt and MaxInt + if x < ^int64(^uint(0)>>1) || int64(^uint(0)>>1) < x { + error_(ovfl) + } + slice[i] = int(x) + } + return true +} + +func decInt16Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decInt16Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decInt16Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]int16) + if !ok { + // It is kind int16 but not type int16. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding int16 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeInt() + if x < math.MinInt16 || math.MaxInt16 < x { + error_(ovfl) + } + slice[i] = int16(x) + } + return true +} + +func decInt32Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decInt32Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decInt32Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]int32) + if !ok { + // It is kind int32 but not type int32. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding int32 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeInt() + if x < math.MinInt32 || math.MaxInt32 < x { + error_(ovfl) + } + slice[i] = int32(x) + } + return true +} + +func decInt64Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decInt64Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decInt64Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]int64) + if !ok { + // It is kind int64 but not type int64. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding int64 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + slice[i] = state.decodeInt() + } + return true +} + +func decInt8Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decInt8Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decInt8Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]int8) + if !ok { + // It is kind int8 but not type int8. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding int8 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeInt() + if x < math.MinInt8 || math.MaxInt8 < x { + error_(ovfl) + } + slice[i] = int8(x) + } + return true +} + +func decStringArray(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decStringSlice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decStringSlice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]string) + if !ok { + // It is kind string but not type string. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding string array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + u := state.decodeUint() + n := int(u) + if n < 0 || uint64(n) != u || n > state.b.Len() { + errorf("length of string exceeds input size (%d bytes)", u) + } + if n > state.b.Len() { + errorf("string data too long for buffer: %d", n) + } + // Read the data. + data := state.b.Bytes() + if len(data) < n { + errorf("invalid string length %d: exceeds input size %d", n, len(data)) + } + slice[i] = string(data[:n]) + state.b.Drop(n) + } + return true +} + +func decUintArray(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decUintSlice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decUintSlice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]uint) + if !ok { + // It is kind uint but not type uint. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding uint array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeUint() + /*TODO if math.MaxUint32 < x { + error_(ovfl) + }*/ + slice[i] = uint(x) + } + return true +} + +func decUint16Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decUint16Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decUint16Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]uint16) + if !ok { + // It is kind uint16 but not type uint16. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding uint16 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeUint() + if math.MaxUint16 < x { + error_(ovfl) + } + slice[i] = uint16(x) + } + return true +} + +func decUint32Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decUint32Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decUint32Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]uint32) + if !ok { + // It is kind uint32 but not type uint32. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding uint32 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeUint() + if math.MaxUint32 < x { + error_(ovfl) + } + slice[i] = uint32(x) + } + return true +} + +func decUint64Array(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decUint64Slice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decUint64Slice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]uint64) + if !ok { + // It is kind uint64 but not type uint64. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding uint64 array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + slice[i] = state.decodeUint() + } + return true +} + +func decUintptrArray(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return decUintptrSlice(state, v.Slice(0, v.Len()), length, ovfl) +} + +func decUintptrSlice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]uintptr) + if !ok { + // It is kind uintptr but not type uintptr. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding uintptr array or slice: length exceeds input size (%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + x := state.decodeUint() + if uint64(^uintptr(0)) < x { + error_(ovfl) + } + slice[i] = uintptr(x) + } + return true +} + +// growSlice is called for a slice that we only partially allocated, +// to grow it up to length. +func growSlice[E any](v reflect.Value, ps *[]E, length int) { + var zero E + s := *ps + s = append(s, zero) + cp := cap(s) + if cp > length { + cp = length + } + s = s[:cp] + v.Set(reflect.ValueOf(s)) + *ps = s +} diff --git a/src/encoding/gob/decgen.go b/src/encoding/gob/decgen.go new file mode 100644 index 0000000..27a30ea --- /dev/null +++ b/src/encoding/gob/decgen.go @@ -0,0 +1,265 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// encgen writes the helper functions for encoding. Intended to be +// used with go generate; see the invocation in encode.go. + +// TODO: We could do more by being unsafe. Add a -unsafe flag? + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "log" + "os" +) + +var output = flag.String("output", "dec_helpers.go", "file name to write") + +type Type struct { + lower string + upper string + decoder string +} + +var types = []Type{ + { + "bool", + "Bool", + `slice[i] = state.decodeUint() != 0`, + }, + { + "complex64", + "Complex64", + `real := float32FromBits(state.decodeUint(), ovfl) + imag := float32FromBits(state.decodeUint(), ovfl) + slice[i] = complex(float32(real), float32(imag))`, + }, + { + "complex128", + "Complex128", + `real := float64FromBits(state.decodeUint()) + imag := float64FromBits(state.decodeUint()) + slice[i] = complex(real, imag)`, + }, + { + "float32", + "Float32", + `slice[i] = float32(float32FromBits(state.decodeUint(), ovfl))`, + }, + { + "float64", + "Float64", + `slice[i] = float64FromBits(state.decodeUint())`, + }, + { + "int", + "Int", + `x := state.decodeInt() + // MinInt and MaxInt + if x < ^int64(^uint(0)>>1) || int64(^uint(0)>>1) < x { + error_(ovfl) + } + slice[i] = int(x)`, + }, + { + "int16", + "Int16", + `x := state.decodeInt() + if x < math.MinInt16 || math.MaxInt16 < x { + error_(ovfl) + } + slice[i] = int16(x)`, + }, + { + "int32", + "Int32", + `x := state.decodeInt() + if x < math.MinInt32 || math.MaxInt32 < x { + error_(ovfl) + } + slice[i] = int32(x)`, + }, + { + "int64", + "Int64", + `slice[i] = state.decodeInt()`, + }, + { + "int8", + "Int8", + `x := state.decodeInt() + if x < math.MinInt8 || math.MaxInt8 < x { + error_(ovfl) + } + slice[i] = int8(x)`, + }, + { + "string", + "String", + `u := state.decodeUint() + n := int(u) + if n < 0 || uint64(n) != u || n > state.b.Len() { + errorf("length of string exceeds input size (%d bytes)", u) + } + if n > state.b.Len() { + errorf("string data too long for buffer: %d", n) + } + // Read the data. + data := state.b.Bytes() + if len(data) < n { + errorf("invalid string length %d: exceeds input size %d", n, len(data)) + } + slice[i] = string(data[:n]) + state.b.Drop(n)`, + }, + { + "uint", + "Uint", + `x := state.decodeUint() + /*TODO if math.MaxUint32 < x { + error_(ovfl) + }*/ + slice[i] = uint(x)`, + }, + { + "uint16", + "Uint16", + `x := state.decodeUint() + if math.MaxUint16 < x { + error_(ovfl) + } + slice[i] = uint16(x)`, + }, + { + "uint32", + "Uint32", + `x := state.decodeUint() + if math.MaxUint32 < x { + error_(ovfl) + } + slice[i] = uint32(x)`, + }, + { + "uint64", + "Uint64", + `slice[i] = state.decodeUint()`, + }, + { + "uintptr", + "Uintptr", + `x := state.decodeUint() + if uint64(^uintptr(0)) < x { + error_(ovfl) + } + slice[i] = uintptr(x)`, + }, + // uint8 Handled separately. +} + +func main() { + log.SetFlags(0) + log.SetPrefix("decgen: ") + flag.Parse() + if flag.NArg() != 0 { + log.Fatal("usage: decgen [--output filename]") + } + var b bytes.Buffer + fmt.Fprintf(&b, "// Code generated by go run decgen.go -output %s; DO NOT EDIT.\n", *output) + fmt.Fprint(&b, header) + printMaps(&b, "Array") + fmt.Fprint(&b, "\n") + printMaps(&b, "Slice") + for _, t := range types { + fmt.Fprintf(&b, arrayHelper, t.lower, t.upper) + fmt.Fprintf(&b, sliceHelper, t.lower, t.upper, t.decoder) + } + fmt.Fprintf(&b, trailer) + source, err := format.Source(b.Bytes()) + if err != nil { + log.Fatal("source format error:", err) + } + fd, err := os.Create(*output) + if err != nil { + log.Fatal(err) + } + if _, err := fd.Write(source); err != nil { + log.Fatal(err) + } +} + +func printMaps(b *bytes.Buffer, upperClass string) { + fmt.Fprintf(b, "var dec%sHelper = map[reflect.Kind]decHelper{\n", upperClass) + for _, t := range types { + fmt.Fprintf(b, "reflect.%s: dec%s%s,\n", t.upper, t.upper, upperClass) + } + fmt.Fprintf(b, "}\n") +} + +const header = ` +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "math" + "reflect" +) + +` + +const arrayHelper = ` +func dec%[2]sArray(state *decoderState, v reflect.Value, length int, ovfl error) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return dec%[2]sSlice(state, v.Slice(0, v.Len()), length, ovfl) +} +` + +const sliceHelper = ` +func dec%[2]sSlice(state *decoderState, v reflect.Value, length int, ovfl error) bool { + slice, ok := v.Interface().([]%[1]s) + if !ok { + // It is kind %[1]s but not type %[1]s. TODO: We can handle this unsafely. + return false + } + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding %[1]s array or slice: length exceeds input size (%%d elements)", length) + } + if i >= len(slice) { + // This is a slice that we only partially allocated. + growSlice(v, &slice, length) + } + %[3]s + } + return true +} +` + +const trailer = ` +// growSlice is called for a slice that we only partially allocated, +// to grow it up to length. +func growSlice[E any](v reflect.Value, ps *[]E, length int) { + var zero E + s := *ps + s = append(s, zero) + cp := cap(s) + if cp > length { + cp = length + } + s = s[:cp] + v.Set(reflect.ValueOf(s)) + *ps = s +} +` diff --git a/src/encoding/gob/decode.go b/src/encoding/gob/decode.go new file mode 100644 index 0000000..d178b2b --- /dev/null +++ b/src/encoding/gob/decode.go @@ -0,0 +1,1306 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run decgen.go -output dec_helpers.go + +package gob + +import ( + "encoding" + "errors" + "internal/saferio" + "io" + "math" + "math/bits" + "reflect" +) + +var ( + errBadUint = errors.New("gob: encoded unsigned integer out of range") + errBadType = errors.New("gob: unknown type id or corrupted data") + errRange = errors.New("gob: bad data: field numbers out of bounds") +) + +type decHelper func(state *decoderState, v reflect.Value, length int, ovfl error) bool + +// decoderState is the execution state of an instance of the decoder. A new state +// is created for nested objects. +type decoderState struct { + dec *Decoder + // The buffer is stored with an extra indirection because it may be replaced + // if we load a type during decode (when reading an interface value). + b *decBuffer + fieldnum int // the last field number read. + next *decoderState // for free list +} + +// decBuffer is an extremely simple, fast implementation of a read-only byte buffer. +// It is initialized by calling Size and then copying the data into the slice returned by Bytes(). +type decBuffer struct { + data []byte + offset int // Read offset. +} + +func (d *decBuffer) Read(p []byte) (int, error) { + n := copy(p, d.data[d.offset:]) + if n == 0 && len(p) != 0 { + return 0, io.EOF + } + d.offset += n + return n, nil +} + +func (d *decBuffer) Drop(n int) { + if n > d.Len() { + panic("drop") + } + d.offset += n +} + +func (d *decBuffer) ReadByte() (byte, error) { + if d.offset >= len(d.data) { + return 0, io.EOF + } + c := d.data[d.offset] + d.offset++ + return c, nil +} + +func (d *decBuffer) Len() int { + return len(d.data) - d.offset +} + +func (d *decBuffer) Bytes() []byte { + return d.data[d.offset:] +} + +// SetBytes sets the buffer to the bytes, discarding any existing data. +func (d *decBuffer) SetBytes(data []byte) { + d.data = data + d.offset = 0 +} + +func (d *decBuffer) Reset() { + d.data = d.data[0:0] + d.offset = 0 +} + +// We pass the bytes.Buffer separately for easier testing of the infrastructure +// without requiring a full Decoder. +func (dec *Decoder) newDecoderState(buf *decBuffer) *decoderState { + d := dec.freeList + if d == nil { + d = new(decoderState) + d.dec = dec + } else { + dec.freeList = d.next + } + d.b = buf + return d +} + +func (dec *Decoder) freeDecoderState(d *decoderState) { + d.next = dec.freeList + dec.freeList = d +} + +func overflow(name string) error { + return errors.New(`value for "` + name + `" out of range`) +} + +// decodeUintReader reads an encoded unsigned integer from an io.Reader. +// Used only by the Decoder to read the message length. +func decodeUintReader(r io.Reader, buf []byte) (x uint64, width int, err error) { + width = 1 + n, err := io.ReadFull(r, buf[0:width]) + if n == 0 { + return + } + b := buf[0] + if b <= 0x7f { + return uint64(b), width, nil + } + n = -int(int8(b)) + if n > uint64Size { + err = errBadUint + return + } + width, err = io.ReadFull(r, buf[0:n]) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return + } + // Could check that the high byte is zero but it's not worth it. + for _, b := range buf[0:width] { + x = x<<8 | uint64(b) + } + width++ // +1 for length byte + return +} + +// decodeUint reads an encoded unsigned integer from state.r. +// Does not check for overflow. +func (state *decoderState) decodeUint() (x uint64) { + b, err := state.b.ReadByte() + if err != nil { + error_(err) + } + if b <= 0x7f { + return uint64(b) + } + n := -int(int8(b)) + if n > uint64Size { + error_(errBadUint) + } + buf := state.b.Bytes() + if len(buf) < n { + errorf("invalid uint data length %d: exceeds input size %d", n, len(buf)) + } + // Don't need to check error; it's safe to loop regardless. + // Could check that the high byte is zero but it's not worth it. + for _, b := range buf[0:n] { + x = x<<8 | uint64(b) + } + state.b.Drop(n) + return x +} + +// decodeInt reads an encoded signed integer from state.r. +// Does not check for overflow. +func (state *decoderState) decodeInt() int64 { + x := state.decodeUint() + if x&1 != 0 { + return ^int64(x >> 1) + } + return int64(x >> 1) +} + +// getLength decodes the next uint and makes sure it is a possible +// size for a data item that follows, which means it must fit in a +// non-negative int and fit in the buffer. +func (state *decoderState) getLength() (int, bool) { + n := int(state.decodeUint()) + if n < 0 || state.b.Len() < n || tooBig <= n { + return 0, false + } + return n, true +} + +// decOp is the signature of a decoding operator for a given type. +type decOp func(i *decInstr, state *decoderState, v reflect.Value) + +// The 'instructions' of the decoding machine +type decInstr struct { + op decOp + field int // field number of the wire type + index []int // field access indices for destination type + ovfl error // error message for overflow/underflow (for arrays, of the elements) +} + +// ignoreUint discards a uint value with no destination. +func ignoreUint(i *decInstr, state *decoderState, v reflect.Value) { + state.decodeUint() +} + +// ignoreTwoUints discards a uint value with no destination. It's used to skip +// complex values. +func ignoreTwoUints(i *decInstr, state *decoderState, v reflect.Value) { + state.decodeUint() + state.decodeUint() +} + +// Since the encoder writes no zeros, if we arrive at a decoder we have +// a value to extract and store. The field number has already been read +// (it's how we knew to call this decoder). +// Each decoder is responsible for handling any indirections associated +// with the data structure. If any pointer so reached is nil, allocation must +// be done. + +// decAlloc takes a value and returns a settable value that can +// be assigned to. If the value is a pointer, decAlloc guarantees it points to storage. +// The callers to the individual decoders are expected to have used decAlloc. +// The individual decoders don't need it. +func decAlloc(v reflect.Value) reflect.Value { + for v.Kind() == reflect.Pointer { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + return v +} + +// decBool decodes a uint and stores it as a boolean in value. +func decBool(i *decInstr, state *decoderState, value reflect.Value) { + value.SetBool(state.decodeUint() != 0) +} + +// decInt8 decodes an integer and stores it as an int8 in value. +func decInt8(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeInt() + if v < math.MinInt8 || math.MaxInt8 < v { + error_(i.ovfl) + } + value.SetInt(v) +} + +// decUint8 decodes an unsigned integer and stores it as a uint8 in value. +func decUint8(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeUint() + if math.MaxUint8 < v { + error_(i.ovfl) + } + value.SetUint(v) +} + +// decInt16 decodes an integer and stores it as an int16 in value. +func decInt16(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeInt() + if v < math.MinInt16 || math.MaxInt16 < v { + error_(i.ovfl) + } + value.SetInt(v) +} + +// decUint16 decodes an unsigned integer and stores it as a uint16 in value. +func decUint16(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeUint() + if math.MaxUint16 < v { + error_(i.ovfl) + } + value.SetUint(v) +} + +// decInt32 decodes an integer and stores it as an int32 in value. +func decInt32(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeInt() + if v < math.MinInt32 || math.MaxInt32 < v { + error_(i.ovfl) + } + value.SetInt(v) +} + +// decUint32 decodes an unsigned integer and stores it as a uint32 in value. +func decUint32(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeUint() + if math.MaxUint32 < v { + error_(i.ovfl) + } + value.SetUint(v) +} + +// decInt64 decodes an integer and stores it as an int64 in value. +func decInt64(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeInt() + value.SetInt(v) +} + +// decUint64 decodes an unsigned integer and stores it as a uint64 in value. +func decUint64(i *decInstr, state *decoderState, value reflect.Value) { + v := state.decodeUint() + value.SetUint(v) +} + +// Floating-point numbers are transmitted as uint64s holding the bits +// of the underlying representation. They are sent byte-reversed, with +// the exponent end coming out first, so integer floating point numbers +// (for example) transmit more compactly. This routine does the +// unswizzling. +func float64FromBits(u uint64) float64 { + v := bits.ReverseBytes64(u) + return math.Float64frombits(v) +} + +// float32FromBits decodes an unsigned integer, treats it as a 32-bit floating-point +// number, and returns it. It's a helper function for float32 and complex64. +// It returns a float64 because that's what reflection needs, but its return +// value is known to be accurately representable in a float32. +func float32FromBits(u uint64, ovfl error) float64 { + v := float64FromBits(u) + av := v + if av < 0 { + av = -av + } + // +Inf is OK in both 32- and 64-bit floats. Underflow is always OK. + if math.MaxFloat32 < av && av <= math.MaxFloat64 { + error_(ovfl) + } + return v +} + +// decFloat32 decodes an unsigned integer, treats it as a 32-bit floating-point +// number, and stores it in value. +func decFloat32(i *decInstr, state *decoderState, value reflect.Value) { + value.SetFloat(float32FromBits(state.decodeUint(), i.ovfl)) +} + +// decFloat64 decodes an unsigned integer, treats it as a 64-bit floating-point +// number, and stores it in value. +func decFloat64(i *decInstr, state *decoderState, value reflect.Value) { + value.SetFloat(float64FromBits(state.decodeUint())) +} + +// decComplex64 decodes a pair of unsigned integers, treats them as a +// pair of floating point numbers, and stores them as a complex64 in value. +// The real part comes first. +func decComplex64(i *decInstr, state *decoderState, value reflect.Value) { + real := float32FromBits(state.decodeUint(), i.ovfl) + imag := float32FromBits(state.decodeUint(), i.ovfl) + value.SetComplex(complex(real, imag)) +} + +// decComplex128 decodes a pair of unsigned integers, treats them as a +// pair of floating point numbers, and stores them as a complex128 in value. +// The real part comes first. +func decComplex128(i *decInstr, state *decoderState, value reflect.Value) { + real := float64FromBits(state.decodeUint()) + imag := float64FromBits(state.decodeUint()) + value.SetComplex(complex(real, imag)) +} + +// decUint8Slice decodes a byte slice and stores in value a slice header +// describing the data. +// uint8 slices are encoded as an unsigned count followed by the raw bytes. +func decUint8Slice(i *decInstr, state *decoderState, value reflect.Value) { + n, ok := state.getLength() + if !ok { + errorf("bad %s slice length: %d", value.Type(), n) + } + if value.Cap() < n { + safe := saferio.SliceCap[byte](uint64(n)) + if safe < 0 { + errorf("%s slice too big: %d elements", value.Type(), n) + } + value.Set(reflect.MakeSlice(value.Type(), safe, safe)) + ln := safe + i := 0 + for i < n { + if i >= ln { + // We didn't allocate the entire slice, + // due to using saferio.SliceCap. + // Grow the slice for one more element. + // The slice is full, so this should + // bump up the capacity. + value.Grow(1) + } + // Copy into s up to the capacity or n, + // whichever is less. + ln = value.Cap() + if ln > n { + ln = n + } + value.SetLen(ln) + sub := value.Slice(i, ln) + if _, err := state.b.Read(sub.Bytes()); err != nil { + errorf("error decoding []byte at %d: %s", i, err) + } + i = ln + } + } else { + value.SetLen(n) + if _, err := state.b.Read(value.Bytes()); err != nil { + errorf("error decoding []byte: %s", err) + } + } +} + +// decString decodes byte array and stores in value a string header +// describing the data. +// Strings are encoded as an unsigned count followed by the raw bytes. +func decString(i *decInstr, state *decoderState, value reflect.Value) { + n, ok := state.getLength() + if !ok { + errorf("bad %s slice length: %d", value.Type(), n) + } + // Read the data. + data := state.b.Bytes() + if len(data) < n { + errorf("invalid string length %d: exceeds input size %d", n, len(data)) + } + s := string(data[:n]) + state.b.Drop(n) + value.SetString(s) +} + +// ignoreUint8Array skips over the data for a byte slice value with no destination. +func ignoreUint8Array(i *decInstr, state *decoderState, value reflect.Value) { + n, ok := state.getLength() + if !ok { + errorf("slice length too large") + } + bn := state.b.Len() + if bn < n { + errorf("invalid slice length %d: exceeds input size %d", n, bn) + } + state.b.Drop(n) +} + +// Execution engine + +// The encoder engine is an array of instructions indexed by field number of the incoming +// decoder. It is executed with random access according to field number. +type decEngine struct { + instr []decInstr + numInstr int // the number of active instructions +} + +// decodeSingle decodes a top-level value that is not a struct and stores it in value. +// Such values are preceded by a zero, making them have the memory layout of a +// struct field (although with an illegal field number). +func (dec *Decoder) decodeSingle(engine *decEngine, value reflect.Value) { + state := dec.newDecoderState(&dec.buf) + defer dec.freeDecoderState(state) + state.fieldnum = singletonField + if state.decodeUint() != 0 { + errorf("decode: corrupted data: non-zero delta for singleton") + } + instr := &engine.instr[singletonField] + instr.op(instr, state, value) +} + +// decodeStruct decodes a top-level struct and stores it in value. +// Indir is for the value, not the type. At the time of the call it may +// differ from ut.indir, which was computed when the engine was built. +// This state cannot arise for decodeSingle, which is called directly +// from the user's value, not from the innards of an engine. +func (dec *Decoder) decodeStruct(engine *decEngine, value reflect.Value) { + state := dec.newDecoderState(&dec.buf) + defer dec.freeDecoderState(state) + state.fieldnum = -1 + for state.b.Len() > 0 { + delta := int(state.decodeUint()) + if delta < 0 { + errorf("decode: corrupted data: negative delta") + } + if delta == 0 { // struct terminator is zero delta fieldnum + break + } + if state.fieldnum >= len(engine.instr)-delta { // subtract to compare without overflow + error_(errRange) + } + fieldnum := state.fieldnum + delta + instr := &engine.instr[fieldnum] + var field reflect.Value + if instr.index != nil { + // Otherwise the field is unknown to us and instr.op is an ignore op. + field = value.FieldByIndex(instr.index) + if field.Kind() == reflect.Pointer { + field = decAlloc(field) + } + } + instr.op(instr, state, field) + state.fieldnum = fieldnum + } +} + +var noValue reflect.Value + +// ignoreStruct discards the data for a struct with no destination. +func (dec *Decoder) ignoreStruct(engine *decEngine) { + state := dec.newDecoderState(&dec.buf) + defer dec.freeDecoderState(state) + state.fieldnum = -1 + for state.b.Len() > 0 { + delta := int(state.decodeUint()) + if delta < 0 { + errorf("ignore decode: corrupted data: negative delta") + } + if delta == 0 { // struct terminator is zero delta fieldnum + break + } + fieldnum := state.fieldnum + delta + if fieldnum >= len(engine.instr) { + error_(errRange) + } + instr := &engine.instr[fieldnum] + instr.op(instr, state, noValue) + state.fieldnum = fieldnum + } +} + +// ignoreSingle discards the data for a top-level non-struct value with no +// destination. It's used when calling Decode with a nil value. +func (dec *Decoder) ignoreSingle(engine *decEngine) { + state := dec.newDecoderState(&dec.buf) + defer dec.freeDecoderState(state) + state.fieldnum = singletonField + delta := int(state.decodeUint()) + if delta != 0 { + errorf("decode: corrupted data: non-zero delta for singleton") + } + instr := &engine.instr[singletonField] + instr.op(instr, state, noValue) +} + +// decodeArrayHelper does the work for decoding arrays and slices. +func (dec *Decoder) decodeArrayHelper(state *decoderState, value reflect.Value, elemOp decOp, length int, ovfl error, helper decHelper) { + if helper != nil && helper(state, value, length, ovfl) { + return + } + instr := &decInstr{elemOp, 0, nil, ovfl} + isPtr := value.Type().Elem().Kind() == reflect.Pointer + ln := value.Len() + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding array or slice: length exceeds input size (%d elements)", length) + } + if i >= ln { + // This is a slice that we only partially allocated. + // Grow it up to length. + value.Grow(1) + cp := value.Cap() + if cp > length { + cp = length + } + value.SetLen(cp) + ln = cp + } + v := value.Index(i) + if isPtr { + v = decAlloc(v) + } + elemOp(instr, state, v) + } +} + +// decodeArray decodes an array and stores it in value. +// The length is an unsigned integer preceding the elements. Even though the length is redundant +// (it's part of the type), it's a useful check and is included in the encoding. +func (dec *Decoder) decodeArray(state *decoderState, value reflect.Value, elemOp decOp, length int, ovfl error, helper decHelper) { + if n := state.decodeUint(); n != uint64(length) { + errorf("length mismatch in decodeArray") + } + dec.decodeArrayHelper(state, value, elemOp, length, ovfl, helper) +} + +// decodeIntoValue is a helper for map decoding. +func decodeIntoValue(state *decoderState, op decOp, isPtr bool, value reflect.Value, instr *decInstr) reflect.Value { + v := value + if isPtr { + v = decAlloc(value) + } + + op(instr, state, v) + return value +} + +// decodeMap decodes a map and stores it in value. +// Maps are encoded as a length followed by key:value pairs. +// Because the internals of maps are not visible to us, we must +// use reflection rather than pointer magic. +func (dec *Decoder) decodeMap(mtyp reflect.Type, state *decoderState, value reflect.Value, keyOp, elemOp decOp, ovfl error) { + n := int(state.decodeUint()) + if value.IsNil() { + value.Set(reflect.MakeMapWithSize(mtyp, n)) + } + keyIsPtr := mtyp.Key().Kind() == reflect.Pointer + elemIsPtr := mtyp.Elem().Kind() == reflect.Pointer + keyInstr := &decInstr{keyOp, 0, nil, ovfl} + elemInstr := &decInstr{elemOp, 0, nil, ovfl} + keyP := reflect.New(mtyp.Key()) + elemP := reflect.New(mtyp.Elem()) + for i := 0; i < n; i++ { + key := decodeIntoValue(state, keyOp, keyIsPtr, keyP.Elem(), keyInstr) + elem := decodeIntoValue(state, elemOp, elemIsPtr, elemP.Elem(), elemInstr) + value.SetMapIndex(key, elem) + keyP.Elem().SetZero() + elemP.Elem().SetZero() + } +} + +// ignoreArrayHelper does the work for discarding arrays and slices. +func (dec *Decoder) ignoreArrayHelper(state *decoderState, elemOp decOp, length int) { + instr := &decInstr{elemOp, 0, nil, errors.New("no error")} + for i := 0; i < length; i++ { + if state.b.Len() == 0 { + errorf("decoding array or slice: length exceeds input size (%d elements)", length) + } + elemOp(instr, state, noValue) + } +} + +// ignoreArray discards the data for an array value with no destination. +func (dec *Decoder) ignoreArray(state *decoderState, elemOp decOp, length int) { + if n := state.decodeUint(); n != uint64(length) { + errorf("length mismatch in ignoreArray") + } + dec.ignoreArrayHelper(state, elemOp, length) +} + +// ignoreMap discards the data for a map value with no destination. +func (dec *Decoder) ignoreMap(state *decoderState, keyOp, elemOp decOp) { + n := int(state.decodeUint()) + keyInstr := &decInstr{keyOp, 0, nil, errors.New("no error")} + elemInstr := &decInstr{elemOp, 0, nil, errors.New("no error")} + for i := 0; i < n; i++ { + keyOp(keyInstr, state, noValue) + elemOp(elemInstr, state, noValue) + } +} + +// decodeSlice decodes a slice and stores it in value. +// Slices are encoded as an unsigned length followed by the elements. +func (dec *Decoder) decodeSlice(state *decoderState, value reflect.Value, elemOp decOp, ovfl error, helper decHelper) { + u := state.decodeUint() + typ := value.Type() + size := uint64(typ.Elem().Size()) + nBytes := u * size + n := int(u) + // Take care with overflow in this calculation. + if n < 0 || uint64(n) != u || nBytes > tooBig || (size > 0 && nBytes/size != u) { + // We don't check n against buffer length here because if it's a slice + // of interfaces, there will be buffer reloads. + errorf("%s slice too big: %d elements of %d bytes", typ.Elem(), u, size) + } + if value.Cap() < n { + safe := saferio.SliceCapWithSize(size, uint64(n)) + if safe < 0 { + errorf("%s slice too big: %d elements of %d bytes", typ.Elem(), u, size) + } + value.Set(reflect.MakeSlice(typ, safe, safe)) + } else { + value.SetLen(n) + } + dec.decodeArrayHelper(state, value, elemOp, n, ovfl, helper) +} + +// ignoreSlice skips over the data for a slice value with no destination. +func (dec *Decoder) ignoreSlice(state *decoderState, elemOp decOp) { + dec.ignoreArrayHelper(state, elemOp, int(state.decodeUint())) +} + +// decodeInterface decodes an interface value and stores it in value. +// Interfaces are encoded as the name of a concrete type followed by a value. +// If the name is empty, the value is nil and no value is sent. +func (dec *Decoder) decodeInterface(ityp reflect.Type, state *decoderState, value reflect.Value) { + // Read the name of the concrete type. + nr := state.decodeUint() + if nr > 1<<31 { // zero is permissible for anonymous types + errorf("invalid type name length %d", nr) + } + if nr > uint64(state.b.Len()) { + errorf("invalid type name length %d: exceeds input size", nr) + } + n := int(nr) + name := state.b.Bytes()[:n] + state.b.Drop(n) + // Allocate the destination interface value. + if len(name) == 0 { + // Copy the nil interface value to the target. + value.SetZero() + return + } + if len(name) > 1024 { + errorf("name too long (%d bytes): %.20q...", len(name), name) + } + // The concrete type must be registered. + typi, ok := nameToConcreteType.Load(string(name)) + if !ok { + errorf("name not registered for interface: %q", name) + } + typ := typi.(reflect.Type) + + // Read the type id of the concrete value. + concreteId := dec.decodeTypeSequence(true) + if concreteId < 0 { + error_(dec.err) + } + // Byte count of value is next; we don't care what it is (it's there + // in case we want to ignore the value by skipping it completely). + state.decodeUint() + // Read the concrete value. + v := allocValue(typ) + dec.decodeValue(concreteId, v) + if dec.err != nil { + error_(dec.err) + } + // Assign the concrete value to the interface. + // Tread carefully; it might not satisfy the interface. + if !typ.AssignableTo(ityp) { + errorf("%s is not assignable to type %s", typ, ityp) + } + // Copy the interface value to the target. + value.Set(v) +} + +// ignoreInterface discards the data for an interface value with no destination. +func (dec *Decoder) ignoreInterface(state *decoderState) { + // Read the name of the concrete type. + n, ok := state.getLength() + if !ok { + errorf("bad interface encoding: name too large for buffer") + } + bn := state.b.Len() + if bn < n { + errorf("invalid interface value length %d: exceeds input size %d", n, bn) + } + state.b.Drop(n) + id := dec.decodeTypeSequence(true) + if id < 0 { + error_(dec.err) + } + // At this point, the decoder buffer contains a delimited value. Just toss it. + n, ok = state.getLength() + if !ok { + errorf("bad interface encoding: data length too large for buffer") + } + state.b.Drop(n) +} + +// decodeGobDecoder decodes something implementing the GobDecoder interface. +// The data is encoded as a byte slice. +func (dec *Decoder) decodeGobDecoder(ut *userTypeInfo, state *decoderState, value reflect.Value) { + // Read the bytes for the value. + n, ok := state.getLength() + if !ok { + errorf("GobDecoder: length too large for buffer") + } + b := state.b.Bytes() + if len(b) < n { + errorf("GobDecoder: invalid data length %d: exceeds input size %d", n, len(b)) + } + b = b[:n] + state.b.Drop(n) + var err error + // We know it's one of these. + switch ut.externalDec { + case xGob: + err = value.Interface().(GobDecoder).GobDecode(b) + case xBinary: + err = value.Interface().(encoding.BinaryUnmarshaler).UnmarshalBinary(b) + case xText: + err = value.Interface().(encoding.TextUnmarshaler).UnmarshalText(b) + } + if err != nil { + error_(err) + } +} + +// ignoreGobDecoder discards the data for a GobDecoder value with no destination. +func (dec *Decoder) ignoreGobDecoder(state *decoderState) { + // Read the bytes for the value. + n, ok := state.getLength() + if !ok { + errorf("GobDecoder: length too large for buffer") + } + bn := state.b.Len() + if bn < n { + errorf("GobDecoder: invalid data length %d: exceeds input size %d", n, bn) + } + state.b.Drop(n) +} + +// Index by Go types. +var decOpTable = [...]decOp{ + reflect.Bool: decBool, + reflect.Int8: decInt8, + reflect.Int16: decInt16, + reflect.Int32: decInt32, + reflect.Int64: decInt64, + reflect.Uint8: decUint8, + reflect.Uint16: decUint16, + reflect.Uint32: decUint32, + reflect.Uint64: decUint64, + reflect.Float32: decFloat32, + reflect.Float64: decFloat64, + reflect.Complex64: decComplex64, + reflect.Complex128: decComplex128, + reflect.String: decString, +} + +// Indexed by gob types. tComplex will be added during type.init(). +var decIgnoreOpMap = map[typeId]decOp{ + tBool: ignoreUint, + tInt: ignoreUint, + tUint: ignoreUint, + tFloat: ignoreUint, + tBytes: ignoreUint8Array, + tString: ignoreUint8Array, + tComplex: ignoreTwoUints, +} + +// decOpFor returns the decoding op for the base type under rt and +// the indirection count to reach it. +func (dec *Decoder) decOpFor(wireId typeId, rt reflect.Type, name string, inProgress map[reflect.Type]*decOp) *decOp { + ut := userType(rt) + // If the type implements GobEncoder, we handle it without further processing. + if ut.externalDec != 0 { + return dec.gobDecodeOpFor(ut) + } + + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[rt]; opPtr != nil { + return opPtr + } + typ := ut.base + var op decOp + k := typ.Kind() + if int(k) < len(decOpTable) { + op = decOpTable[k] + } + if op == nil { + inProgress[rt] = &op + // Special cases + switch t := typ; t.Kind() { + case reflect.Array: + name = "element of " + name + elemId := dec.wireType[wireId].ArrayT.Elem + elemOp := dec.decOpFor(elemId, t.Elem(), name, inProgress) + ovfl := overflow(name) + helper := decArrayHelper[t.Elem().Kind()] + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.decodeArray(state, value, *elemOp, t.Len(), ovfl, helper) + } + + case reflect.Map: + keyId := dec.wireType[wireId].MapT.Key + elemId := dec.wireType[wireId].MapT.Elem + keyOp := dec.decOpFor(keyId, t.Key(), "key of "+name, inProgress) + elemOp := dec.decOpFor(elemId, t.Elem(), "element of "+name, inProgress) + ovfl := overflow(name) + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.decodeMap(t, state, value, *keyOp, *elemOp, ovfl) + } + + case reflect.Slice: + name = "element of " + name + if t.Elem().Kind() == reflect.Uint8 { + op = decUint8Slice + break + } + var elemId typeId + if tt := builtinIdToType(wireId); tt != nil { + elemId = tt.(*sliceType).Elem + } else { + elemId = dec.wireType[wireId].SliceT.Elem + } + elemOp := dec.decOpFor(elemId, t.Elem(), name, inProgress) + ovfl := overflow(name) + helper := decSliceHelper[t.Elem().Kind()] + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.decodeSlice(state, value, *elemOp, ovfl, helper) + } + + case reflect.Struct: + // Generate a closure that calls out to the engine for the nested type. + ut := userType(typ) + enginePtr, err := dec.getDecEnginePtr(wireId, ut) + if err != nil { + error_(err) + } + op = func(i *decInstr, state *decoderState, value reflect.Value) { + // indirect through enginePtr to delay evaluation for recursive structs. + dec.decodeStruct(*enginePtr, value) + } + case reflect.Interface: + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.decodeInterface(t, state, value) + } + } + } + if op == nil { + errorf("decode can't handle type %s", rt) + } + return &op +} + +var maxIgnoreNestingDepth = 10000 + +// decIgnoreOpFor returns the decoding op for a field that has no destination. +func (dec *Decoder) decIgnoreOpFor(wireId typeId, inProgress map[typeId]*decOp, depth int) *decOp { + if depth > maxIgnoreNestingDepth { + error_(errors.New("invalid nesting depth")) + } + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[wireId]; opPtr != nil { + return opPtr + } + op, ok := decIgnoreOpMap[wireId] + if !ok { + inProgress[wireId] = &op + if wireId == tInterface { + // Special case because it's a method: the ignored item might + // define types and we need to record their state in the decoder. + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.ignoreInterface(state) + } + return &op + } + // Special cases + wire := dec.wireType[wireId] + switch { + case wire == nil: + errorf("bad data: undefined type %s", wireId.string()) + case wire.ArrayT != nil: + elemId := wire.ArrayT.Elem + elemOp := dec.decIgnoreOpFor(elemId, inProgress, depth+1) + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.ignoreArray(state, *elemOp, wire.ArrayT.Len) + } + + case wire.MapT != nil: + keyId := dec.wireType[wireId].MapT.Key + elemId := dec.wireType[wireId].MapT.Elem + keyOp := dec.decIgnoreOpFor(keyId, inProgress, depth+1) + elemOp := dec.decIgnoreOpFor(elemId, inProgress, depth+1) + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.ignoreMap(state, *keyOp, *elemOp) + } + + case wire.SliceT != nil: + elemId := wire.SliceT.Elem + elemOp := dec.decIgnoreOpFor(elemId, inProgress, depth+1) + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.ignoreSlice(state, *elemOp) + } + + case wire.StructT != nil: + // Generate a closure that calls out to the engine for the nested type. + enginePtr, err := dec.getIgnoreEnginePtr(wireId) + if err != nil { + error_(err) + } + op = func(i *decInstr, state *decoderState, value reflect.Value) { + // indirect through enginePtr to delay evaluation for recursive structs + state.dec.ignoreStruct(*enginePtr) + } + + case wire.GobEncoderT != nil, wire.BinaryMarshalerT != nil, wire.TextMarshalerT != nil: + op = func(i *decInstr, state *decoderState, value reflect.Value) { + state.dec.ignoreGobDecoder(state) + } + } + } + if op == nil { + errorf("bad data: ignore can't handle type %s", wireId.string()) + } + return &op +} + +// gobDecodeOpFor returns the op for a type that is known to implement +// GobDecoder. +func (dec *Decoder) gobDecodeOpFor(ut *userTypeInfo) *decOp { + rcvrType := ut.user + if ut.decIndir == -1 { + rcvrType = reflect.PointerTo(rcvrType) + } else if ut.decIndir > 0 { + for i := int8(0); i < ut.decIndir; i++ { + rcvrType = rcvrType.Elem() + } + } + var op decOp + op = func(i *decInstr, state *decoderState, value reflect.Value) { + // We now have the base type. We need its address if the receiver is a pointer. + if value.Kind() != reflect.Pointer && rcvrType.Kind() == reflect.Pointer { + value = value.Addr() + } + state.dec.decodeGobDecoder(ut, state, value) + } + return &op +} + +// compatibleType asks: Are these two gob Types compatible? +// Answers the question for basic types, arrays, maps and slices, plus +// GobEncoder/Decoder pairs. +// Structs are considered ok; fields will be checked later. +func (dec *Decoder) compatibleType(fr reflect.Type, fw typeId, inProgress map[reflect.Type]typeId) bool { + if rhs, ok := inProgress[fr]; ok { + return rhs == fw + } + inProgress[fr] = fw + ut := userType(fr) + wire, ok := dec.wireType[fw] + // If wire was encoded with an encoding method, fr must have that method. + // And if not, it must not. + // At most one of the booleans in ut is set. + // We could possibly relax this constraint in the future in order to + // choose the decoding method using the data in the wireType. + // The parentheses look odd but are correct. + if (ut.externalDec == xGob) != (ok && wire.GobEncoderT != nil) || + (ut.externalDec == xBinary) != (ok && wire.BinaryMarshalerT != nil) || + (ut.externalDec == xText) != (ok && wire.TextMarshalerT != nil) { + return false + } + if ut.externalDec != 0 { // This test trumps all others. + return true + } + switch t := ut.base; t.Kind() { + default: + // chan, etc: cannot handle. + return false + case reflect.Bool: + return fw == tBool + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return fw == tInt + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return fw == tUint + case reflect.Float32, reflect.Float64: + return fw == tFloat + case reflect.Complex64, reflect.Complex128: + return fw == tComplex + case reflect.String: + return fw == tString + case reflect.Interface: + return fw == tInterface + case reflect.Array: + if !ok || wire.ArrayT == nil { + return false + } + array := wire.ArrayT + return t.Len() == array.Len && dec.compatibleType(t.Elem(), array.Elem, inProgress) + case reflect.Map: + if !ok || wire.MapT == nil { + return false + } + MapType := wire.MapT + return dec.compatibleType(t.Key(), MapType.Key, inProgress) && dec.compatibleType(t.Elem(), MapType.Elem, inProgress) + case reflect.Slice: + // Is it an array of bytes? + if t.Elem().Kind() == reflect.Uint8 { + return fw == tBytes + } + // Extract and compare element types. + var sw *sliceType + if tt := builtinIdToType(fw); tt != nil { + sw, _ = tt.(*sliceType) + } else if wire != nil { + sw = wire.SliceT + } + elem := userType(t.Elem()).base + return sw != nil && dec.compatibleType(elem, sw.Elem, inProgress) + case reflect.Struct: + return true + } +} + +// typeString returns a human-readable description of the type identified by remoteId. +func (dec *Decoder) typeString(remoteId typeId) string { + typeLock.Lock() + defer typeLock.Unlock() + if t := idToType(remoteId); t != nil { + // globally known type. + return t.string() + } + return dec.wireType[remoteId].string() +} + +// compileSingle compiles the decoder engine for a non-struct top-level value, including +// GobDecoders. +func (dec *Decoder) compileSingle(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err error) { + rt := ut.user + engine = new(decEngine) + engine.instr = make([]decInstr, 1) // one item + name := rt.String() // best we can do + if !dec.compatibleType(rt, remoteId, make(map[reflect.Type]typeId)) { + remoteType := dec.typeString(remoteId) + // Common confusing case: local interface type, remote concrete type. + if ut.base.Kind() == reflect.Interface && remoteId != tInterface { + return nil, errors.New("gob: local interface type " + name + " can only be decoded from remote interface type; received concrete type " + remoteType) + } + return nil, errors.New("gob: decoding into local type " + name + ", received remote type " + remoteType) + } + op := dec.decOpFor(remoteId, rt, name, make(map[reflect.Type]*decOp)) + ovfl := errors.New(`value for "` + name + `" out of range`) + engine.instr[singletonField] = decInstr{*op, singletonField, nil, ovfl} + engine.numInstr = 1 + return +} + +// compileIgnoreSingle compiles the decoder engine for a non-struct top-level value that will be discarded. +func (dec *Decoder) compileIgnoreSingle(remoteId typeId) *decEngine { + engine := new(decEngine) + engine.instr = make([]decInstr, 1) // one item + op := dec.decIgnoreOpFor(remoteId, make(map[typeId]*decOp), 0) + ovfl := overflow(dec.typeString(remoteId)) + engine.instr[0] = decInstr{*op, 0, nil, ovfl} + engine.numInstr = 1 + return engine +} + +// compileDec compiles the decoder engine for a value. If the value is not a struct, +// it calls out to compileSingle. +func (dec *Decoder) compileDec(remoteId typeId, ut *userTypeInfo) (engine *decEngine, err error) { + defer catchError(&err) + rt := ut.base + srt := rt + if srt.Kind() != reflect.Struct || ut.externalDec != 0 { + return dec.compileSingle(remoteId, ut) + } + var wireStruct *structType + // Builtin types can come from global pool; the rest must be defined by the decoder. + // Also we know we're decoding a struct now, so the client must have sent one. + if t := builtinIdToType(remoteId); t != nil { + wireStruct, _ = t.(*structType) + } else { + wire := dec.wireType[remoteId] + if wire == nil { + error_(errBadType) + } + wireStruct = wire.StructT + } + if wireStruct == nil { + errorf("type mismatch in decoder: want struct type %s; got non-struct", rt) + } + engine = new(decEngine) + engine.instr = make([]decInstr, len(wireStruct.Field)) + seen := make(map[reflect.Type]*decOp) + // Loop over the fields of the wire type. + for fieldnum := 0; fieldnum < len(wireStruct.Field); fieldnum++ { + wireField := wireStruct.Field[fieldnum] + if wireField.Name == "" { + errorf("empty name for remote field of type %s", wireStruct.Name) + } + ovfl := overflow(wireField.Name) + // Find the field of the local type with the same name. + localField, present := srt.FieldByName(wireField.Name) + // TODO(r): anonymous names + if !present || !isExported(wireField.Name) { + op := dec.decIgnoreOpFor(wireField.Id, make(map[typeId]*decOp), 0) + engine.instr[fieldnum] = decInstr{*op, fieldnum, nil, ovfl} + continue + } + if !dec.compatibleType(localField.Type, wireField.Id, make(map[reflect.Type]typeId)) { + errorf("wrong type (%s) for received field %s.%s", localField.Type, wireStruct.Name, wireField.Name) + } + op := dec.decOpFor(wireField.Id, localField.Type, localField.Name, seen) + engine.instr[fieldnum] = decInstr{*op, fieldnum, localField.Index, ovfl} + engine.numInstr++ + } + return +} + +// getDecEnginePtr returns the engine for the specified type. +func (dec *Decoder) getDecEnginePtr(remoteId typeId, ut *userTypeInfo) (enginePtr **decEngine, err error) { + rt := ut.user + decoderMap, ok := dec.decoderCache[rt] + if !ok { + decoderMap = make(map[typeId]**decEngine) + dec.decoderCache[rt] = decoderMap + } + if enginePtr, ok = decoderMap[remoteId]; !ok { + // To handle recursive types, mark this engine as underway before compiling. + enginePtr = new(*decEngine) + decoderMap[remoteId] = enginePtr + *enginePtr, err = dec.compileDec(remoteId, ut) + if err != nil { + delete(decoderMap, remoteId) + } + } + return +} + +// emptyStruct is the type we compile into when ignoring a struct value. +type emptyStruct struct{} + +var emptyStructType = reflect.TypeFor[emptyStruct]() + +// getIgnoreEnginePtr returns the engine for the specified type when the value is to be discarded. +func (dec *Decoder) getIgnoreEnginePtr(wireId typeId) (enginePtr **decEngine, err error) { + var ok bool + if enginePtr, ok = dec.ignorerCache[wireId]; !ok { + // To handle recursive types, mark this engine as underway before compiling. + enginePtr = new(*decEngine) + dec.ignorerCache[wireId] = enginePtr + wire := dec.wireType[wireId] + if wire != nil && wire.StructT != nil { + *enginePtr, err = dec.compileDec(wireId, userType(emptyStructType)) + } else { + *enginePtr = dec.compileIgnoreSingle(wireId) + } + if err != nil { + delete(dec.ignorerCache, wireId) + } + } + return +} + +// decodeValue decodes the data stream representing a value and stores it in value. +func (dec *Decoder) decodeValue(wireId typeId, value reflect.Value) { + defer catchError(&dec.err) + // If the value is nil, it means we should just ignore this item. + if !value.IsValid() { + dec.decodeIgnoredValue(wireId) + return + } + // Dereference down to the underlying type. + ut := userType(value.Type()) + base := ut.base + var enginePtr **decEngine + enginePtr, dec.err = dec.getDecEnginePtr(wireId, ut) + if dec.err != nil { + return + } + value = decAlloc(value) + engine := *enginePtr + if st := base; st.Kind() == reflect.Struct && ut.externalDec == 0 { + wt := dec.wireType[wireId] + if engine.numInstr == 0 && st.NumField() > 0 && + wt != nil && len(wt.StructT.Field) > 0 { + name := base.Name() + errorf("type mismatch: no fields matched compiling decoder for %s", name) + } + dec.decodeStruct(engine, value) + } else { + dec.decodeSingle(engine, value) + } +} + +// decodeIgnoredValue decodes the data stream representing a value of the specified type and discards it. +func (dec *Decoder) decodeIgnoredValue(wireId typeId) { + var enginePtr **decEngine + enginePtr, dec.err = dec.getIgnoreEnginePtr(wireId) + if dec.err != nil { + return + } + wire := dec.wireType[wireId] + if wire != nil && wire.StructT != nil { + dec.ignoreStruct(*enginePtr) + } else { + dec.ignoreSingle(*enginePtr) + } +} + +const ( + intBits = 32 << (^uint(0) >> 63) + uintptrBits = 32 << (^uintptr(0) >> 63) +) + +func init() { + var iop, uop decOp + switch intBits { + case 32: + iop = decInt32 + uop = decUint32 + case 64: + iop = decInt64 + uop = decUint64 + default: + panic("gob: unknown size of int/uint") + } + decOpTable[reflect.Int] = iop + decOpTable[reflect.Uint] = uop + + // Finally uintptr + switch uintptrBits { + case 32: + uop = decUint32 + case 64: + uop = decUint64 + default: + panic("gob: unknown size of uintptr") + } + decOpTable[reflect.Uintptr] = uop +} + +// Gob depends on being able to take the address +// of zeroed Values it creates, so use this wrapper instead +// of the standard reflect.Zero. +// Each call allocates once. +func allocValue(t reflect.Type) reflect.Value { + return reflect.New(t).Elem() +} diff --git a/src/encoding/gob/decoder.go b/src/encoding/gob/decoder.go new file mode 100644 index 0000000..c4b6088 --- /dev/null +++ b/src/encoding/gob/decoder.go @@ -0,0 +1,237 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bufio" + "errors" + "internal/saferio" + "io" + "reflect" + "sync" +) + +// tooBig provides a sanity check for sizes; used in several places. Upper limit +// of is 1GB on 32-bit systems, 8GB on 64-bit, allowing room to grow a little +// without overflow. +const tooBig = (1 << 30) << (^uint(0) >> 62) + +// A Decoder manages the receipt of type and data information read from the +// remote side of a connection. It is safe for concurrent use by multiple +// goroutines. +// +// The Decoder does only basic sanity checking on decoded input sizes, +// and its limits are not configurable. Take caution when decoding gob data +// from untrusted sources. +type Decoder struct { + mutex sync.Mutex // each item must be received atomically + r io.Reader // source of the data + buf decBuffer // buffer for more efficient i/o from r + wireType map[typeId]*wireType // map from remote ID to local description + decoderCache map[reflect.Type]map[typeId]**decEngine // cache of compiled engines + ignorerCache map[typeId]**decEngine // ditto for ignored objects + freeList *decoderState // list of free decoderStates; avoids reallocation + countBuf []byte // used for decoding integers while parsing messages + err error +} + +// NewDecoder returns a new decoder that reads from the [io.Reader]. +// If r does not also implement [io.ByteReader], it will be wrapped in a +// [bufio.Reader]. +func NewDecoder(r io.Reader) *Decoder { + dec := new(Decoder) + // We use the ability to read bytes as a plausible surrogate for buffering. + if _, ok := r.(io.ByteReader); !ok { + r = bufio.NewReader(r) + } + dec.r = r + dec.wireType = make(map[typeId]*wireType) + dec.decoderCache = make(map[reflect.Type]map[typeId]**decEngine) + dec.ignorerCache = make(map[typeId]**decEngine) + dec.countBuf = make([]byte, 9) // counts may be uint64s (unlikely!), require 9 bytes + + return dec +} + +// recvType loads the definition of a type. +func (dec *Decoder) recvType(id typeId) { + // Have we already seen this type? That's an error + if id < firstUserId || dec.wireType[id] != nil { + dec.err = errors.New("gob: duplicate type received") + return + } + + // Type: + wire := new(wireType) + dec.decodeValue(tWireType, reflect.ValueOf(wire)) + if dec.err != nil { + return + } + // Remember we've seen this type. + dec.wireType[id] = wire +} + +var errBadCount = errors.New("invalid message length") + +// recvMessage reads the next count-delimited item from the input. It is the converse +// of Encoder.writeMessage. It returns false on EOF or other error reading the message. +func (dec *Decoder) recvMessage() bool { + // Read a count. + nbytes, _, err := decodeUintReader(dec.r, dec.countBuf) + if err != nil { + dec.err = err + return false + } + if nbytes >= tooBig { + dec.err = errBadCount + return false + } + dec.readMessage(int(nbytes)) + return dec.err == nil +} + +// readMessage reads the next nbytes bytes from the input. +func (dec *Decoder) readMessage(nbytes int) { + if dec.buf.Len() != 0 { + // The buffer should always be empty now. + panic("non-empty decoder buffer") + } + // Read the data + var buf []byte + buf, dec.err = saferio.ReadData(dec.r, uint64(nbytes)) + dec.buf.SetBytes(buf) + if dec.err == io.EOF { + dec.err = io.ErrUnexpectedEOF + } +} + +// toInt turns an encoded uint64 into an int, according to the marshaling rules. +func toInt(x uint64) int64 { + i := int64(x >> 1) + if x&1 != 0 { + i = ^i + } + return i +} + +func (dec *Decoder) nextInt() int64 { + n, _, err := decodeUintReader(&dec.buf, dec.countBuf) + if err != nil { + dec.err = err + } + return toInt(n) +} + +func (dec *Decoder) nextUint() uint64 { + n, _, err := decodeUintReader(&dec.buf, dec.countBuf) + if err != nil { + dec.err = err + } + return n +} + +// decodeTypeSequence parses: +// TypeSequence +// +// (TypeDefinition DelimitedTypeDefinition*)? +// +// and returns the type id of the next value. It returns -1 at +// EOF. Upon return, the remainder of dec.buf is the value to be +// decoded. If this is an interface value, it can be ignored by +// resetting that buffer. +func (dec *Decoder) decodeTypeSequence(isInterface bool) typeId { + firstMessage := true + for dec.err == nil { + if dec.buf.Len() == 0 { + if !dec.recvMessage() { + // We can only return io.EOF if the input was empty. + // If we read one or more type spec messages, + // require a data item message to follow. + // If we hit an EOF before that, then give ErrUnexpectedEOF. + if !firstMessage && dec.err == io.EOF { + dec.err = io.ErrUnexpectedEOF + } + break + } + } + // Receive a type id. + id := typeId(dec.nextInt()) + if id >= 0 { + // Value follows. + return id + } + // Type definition for (-id) follows. + dec.recvType(-id) + if dec.err != nil { + break + } + // When decoding an interface, after a type there may be a + // DelimitedValue still in the buffer. Skip its count. + // (Alternatively, the buffer is empty and the byte count + // will be absorbed by recvMessage.) + if dec.buf.Len() > 0 { + if !isInterface { + dec.err = errors.New("extra data in buffer") + break + } + dec.nextUint() + } + firstMessage = false + } + return -1 +} + +// Decode reads the next value from the input stream and stores +// it in the data represented by the empty interface value. +// If e is nil, the value will be discarded. Otherwise, +// the value underlying e must be a pointer to the +// correct type for the next data item received. +// If the input is at EOF, Decode returns [io.EOF] and +// does not modify e. +func (dec *Decoder) Decode(e any) error { + if e == nil { + return dec.DecodeValue(reflect.Value{}) + } + value := reflect.ValueOf(e) + // If e represents a value as opposed to a pointer, the answer won't + // get back to the caller. Make sure it's a pointer. + if value.Type().Kind() != reflect.Pointer { + dec.err = errors.New("gob: attempt to decode into a non-pointer") + return dec.err + } + return dec.DecodeValue(value) +} + +// DecodeValue reads the next value from the input stream. +// If v is the zero reflect.Value (v.Kind() == Invalid), DecodeValue discards the value. +// Otherwise, it stores the value into v. In that case, v must represent +// a non-nil pointer to data or be an assignable reflect.Value (v.CanSet()) +// If the input is at EOF, DecodeValue returns [io.EOF] and +// does not modify v. +func (dec *Decoder) DecodeValue(v reflect.Value) error { + if v.IsValid() { + if v.Kind() == reflect.Pointer && !v.IsNil() { + // That's okay, we'll store through the pointer. + } else if !v.CanSet() { + return errors.New("gob: DecodeValue of unassignable value") + } + } + // Make sure we're single-threaded through here. + dec.mutex.Lock() + defer dec.mutex.Unlock() + + dec.buf.Reset() // In case data lingers from previous invocation. + dec.err = nil + id := dec.decodeTypeSequence(false) + if dec.err == nil { + dec.decodeValue(id, v) + } + return dec.err +} + +// If debug.go is compiled into the program, debugFunc prints a human-readable +// representation of the gob data read from r by calling that file's Debug function. +// Otherwise it is nil. +var debugFunc func(io.Reader) diff --git a/src/encoding/gob/doc.go b/src/encoding/gob/doc.go new file mode 100644 index 0000000..3f26ed8 --- /dev/null +++ b/src/encoding/gob/doc.go @@ -0,0 +1,423 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package gob manages streams of gobs - binary values exchanged between an +[Encoder] (transmitter) and a [Decoder] (receiver). A typical use is transporting +arguments and results of remote procedure calls (RPCs) such as those provided by +[net/rpc]. + +The implementation compiles a custom codec for each data type in the stream and +is most efficient when a single [Encoder] is used to transmit a stream of values, +amortizing the cost of compilation. + +# Basics + +A stream of gobs is self-describing. Each data item in the stream is preceded by +a specification of its type, expressed in terms of a small set of predefined +types. Pointers are not transmitted, but the things they point to are +transmitted; that is, the values are flattened. Nil pointers are not permitted, +as they have no value. Recursive types work fine, but +recursive values (data with cycles) are problematic. This may change. + +To use gobs, create an [Encoder] and present it with a series of data items as +values or addresses that can be dereferenced to values. The [Encoder] makes sure +all type information is sent before it is needed. At the receive side, a +[Decoder] retrieves values from the encoded stream and unpacks them into local +variables. + +# Types and Values + +The source and destination values/types need not correspond exactly. For structs, +fields (identified by name) that are in the source but absent from the receiving +variable will be ignored. Fields that are in the receiving variable but missing +from the transmitted type or value will be ignored in the destination. If a field +with the same name is present in both, their types must be compatible. Both the +receiver and transmitter will do all necessary indirection and dereferencing to +convert between gobs and actual Go values. For instance, a gob type that is +schematically, + + struct { A, B int } + +can be sent from or received into any of these Go types: + + struct { A, B int } // the same + *struct { A, B int } // extra indirection of the struct + struct { *A, **B int } // extra indirection of the fields + struct { A, B int64 } // different concrete value type; see below + +It may also be received into any of these: + + struct { A, B int } // the same + struct { B, A int } // ordering doesn't matter; matching is by name + struct { A, B, C int } // extra field (C) ignored + struct { B int } // missing field (A) ignored; data will be dropped + struct { B, C int } // missing field (A) ignored; extra field (C) ignored. + +Attempting to receive into these types will draw a decode error: + + struct { A int; B uint } // change of signedness for B + struct { A int; B float } // change of type for B + struct { } // no field names in common + struct { C, D int } // no field names in common + +Integers are transmitted two ways: arbitrary precision signed integers or +arbitrary precision unsigned integers. There is no int8, int16 etc. +discrimination in the gob format; there are only signed and unsigned integers. As +described below, the transmitter sends the value in a variable-length encoding; +the receiver accepts the value and stores it in the destination variable. +Floating-point numbers are always sent using IEEE-754 64-bit precision (see +below). + +Signed integers may be received into any signed integer variable: int, int16, etc.; +unsigned integers may be received into any unsigned integer variable; and floating +point values may be received into any floating point variable. However, +the destination variable must be able to represent the value or the decode +operation will fail. + +Structs, arrays and slices are also supported. Structs encode and decode only +exported fields. Strings and arrays of bytes are supported with a special, +efficient representation (see below). When a slice is decoded, if the existing +slice has capacity the slice will be extended in place; if not, a new array is +allocated. Regardless, the length of the resulting slice reports the number of +elements decoded. + +In general, if allocation is required, the decoder will allocate memory. If not, +it will update the destination variables with values read from the stream. It does +not initialize them first, so if the destination is a compound value such as a +map, struct, or slice, the decoded values will be merged elementwise into the +existing variables. + +Functions and channels will not be sent in a gob. Attempting to encode such a value +at the top level will fail. A struct field of chan or func type is treated exactly +like an unexported field and is ignored. + +Gob can encode a value of any type implementing the [GobEncoder] or +[encoding.BinaryMarshaler] interfaces by calling the corresponding method, +in that order of preference. + +Gob can decode a value of any type implementing the [GobDecoder] or +[encoding.BinaryUnmarshaler] interfaces by calling the corresponding method, +again in that order of preference. + +# Encoding Details + +This section documents the encoding, details that are not important for most +users. Details are presented bottom-up. + +An unsigned integer is sent one of two ways. If it is less than 128, it is sent +as a byte with that value. Otherwise it is sent as a minimal-length big-endian +(high byte first) byte stream holding the value, preceded by one byte holding the +byte count, negated. Thus 0 is transmitted as (00), 7 is transmitted as (07) and +256 is transmitted as (FE 01 00). + +A boolean is encoded within an unsigned integer: 0 for false, 1 for true. + +A signed integer, i, is encoded within an unsigned integer, u. Within u, bits 1 +upward contain the value; bit 0 says whether they should be complemented upon +receipt. The encode algorithm looks like this: + + var u uint + if i < 0 { + u = (^uint(i) << 1) | 1 // complement i, bit 0 is 1 + } else { + u = (uint(i) << 1) // do not complement i, bit 0 is 0 + } + encodeUnsigned(u) + +The low bit is therefore analogous to a sign bit, but making it the complement bit +instead guarantees that the largest negative integer is not a special case. For +example, -129=^128=(^256>>1) encodes as (FE 01 01). + +Floating-point numbers are always sent as a representation of a float64 value. +That value is converted to a uint64 using [math.Float64bits]. The uint64 is then +byte-reversed and sent as a regular unsigned integer. The byte-reversal means the +exponent and high-precision part of the mantissa go first. Since the low bits are +often zero, this can save encoding bytes. For instance, 17.0 is encoded in only +three bytes (FE 31 40). + +Strings and slices of bytes are sent as an unsigned count followed by that many +uninterpreted bytes of the value. + +All other slices and arrays are sent as an unsigned count followed by that many +elements using the standard gob encoding for their type, recursively. + +Maps are sent as an unsigned count followed by that many key, element +pairs. Empty but non-nil maps are sent, so if the receiver has not allocated +one already, one will always be allocated on receipt unless the transmitted map +is nil and not at the top level. + +In slices and arrays, as well as maps, all elements, even zero-valued elements, +are transmitted, even if all the elements are zero. + +Structs are sent as a sequence of (field number, field value) pairs. The field +value is sent using the standard gob encoding for its type, recursively. If a +field has the zero value for its type (except for arrays; see above), it is omitted +from the transmission. The field number is defined by the type of the encoded +struct: the first field of the encoded type is field 0, the second is field 1, +etc. When encoding a value, the field numbers are delta encoded for efficiency +and the fields are always sent in order of increasing field number; the deltas are +therefore unsigned. The initialization for the delta encoding sets the field +number to -1, so an unsigned integer field 0 with value 7 is transmitted as unsigned +delta = 1, unsigned value = 7 or (01 07). Finally, after all the fields have been +sent a terminating mark denotes the end of the struct. That mark is a delta=0 +value, which has representation (00). + +Interface types are not checked for compatibility; all interface types are +treated, for transmission, as members of a single "interface" type, analogous to +int or []byte - in effect they're all treated as interface{}. Interface values +are transmitted as a string identifying the concrete type being sent (a name +that must be pre-defined by calling [Register]), followed by a byte count of the +length of the following data (so the value can be skipped if it cannot be +stored), followed by the usual encoding of concrete (dynamic) value stored in +the interface value. (A nil interface value is identified by the empty string +and transmits no value.) Upon receipt, the decoder verifies that the unpacked +concrete item satisfies the interface of the receiving variable. + +If a value is passed to [Encoder.Encode] and the type is not a struct (or pointer to struct, +etc.), for simplicity of processing it is represented as a struct of one field. +The only visible effect of this is to encode a zero byte after the value, just as +after the last field of an encoded struct, so that the decode algorithm knows when +the top-level value is complete. + +The representation of types is described below. When a type is defined on a given +connection between an [Encoder] and [Decoder], it is assigned a signed integer type +id. When [Encoder.Encode](v) is called, it makes sure there is an id assigned for +the type of v and all its elements and then it sends the pair (typeid, encoded-v) +where typeid is the type id of the encoded type of v and encoded-v is the gob +encoding of the value v. + +To define a type, the encoder chooses an unused, positive type id and sends the +pair (-type id, encoded-type) where encoded-type is the gob encoding of a wireType +description, constructed from these types: + + type wireType struct { + ArrayT *ArrayType + SliceT *SliceType + StructT *StructType + MapT *MapType + GobEncoderT *gobEncoderType + BinaryMarshalerT *gobEncoderType + TextMarshalerT *gobEncoderType + + } + type arrayType struct { + CommonType + Elem typeId + Len int + } + type CommonType struct { + Name string // the name of the struct type + Id int // the id of the type, repeated so it's inside the type + } + type sliceType struct { + CommonType + Elem typeId + } + type structType struct { + CommonType + Field []*fieldType // the fields of the struct. + } + type fieldType struct { + Name string // the name of the field. + Id int // the type id of the field, which must be already defined + } + type mapType struct { + CommonType + Key typeId + Elem typeId + } + type gobEncoderType struct { + CommonType + } + +If there are nested type ids, the types for all inner type ids must be defined +before the top-level type id is used to describe an encoded-v. + +For simplicity in setup, the connection is defined to understand these types a +priori, as well as the basic gob types int, uint, etc. Their ids are: + + bool 1 + int 2 + uint 3 + float 4 + []byte 5 + string 6 + complex 7 + interface 8 + // gap for reserved ids. + WireType 16 + ArrayType 17 + CommonType 18 + SliceType 19 + StructType 20 + FieldType 21 + // 22 is slice of fieldType. + MapType 23 + +Finally, each message created by a call to Encode is preceded by an encoded +unsigned integer count of the number of bytes remaining in the message. After +the initial type name, interface values are wrapped the same way; in effect, the +interface value acts like a recursive invocation of Encode. + +In summary, a gob stream looks like + + (byteCount (-type id, encoding of a wireType)* (type id, encoding of a value))* + +where * signifies zero or more repetitions and the type id of a value must +be predefined or be defined before the value in the stream. + +Compatibility: Any future changes to the package will endeavor to maintain +compatibility with streams encoded using previous versions. That is, any released +version of this package should be able to decode data written with any previously +released version, subject to issues such as security fixes. See the Go compatibility +document for background: https://golang.org/doc/go1compat + +See "Gobs of data" for a design discussion of the gob wire format: +https://blog.golang.org/gobs-of-data + +# Security + +This package is not designed to be hardened against adversarial inputs, and is +outside the scope of https://go.dev/security/policy. In particular, the [Decoder] +does only basic sanity checking on decoded input sizes, and its limits are not +configurable. Care should be taken when decoding gob data from untrusted +sources, which may consume significant resources. +*/ +package gob + +/* +Grammar: + +Tokens starting with a lower case letter are terminals; int(n) +and uint(n) represent the signed/unsigned encodings of the value n. + +GobStream: + DelimitedMessage* +DelimitedMessage: + uint(lengthOfMessage) Message +Message: + TypeSequence TypedValue +TypeSequence + (TypeDefinition DelimitedTypeDefinition*)? +DelimitedTypeDefinition: + uint(lengthOfTypeDefinition) TypeDefinition +TypedValue: + int(typeId) Value +TypeDefinition: + int(-typeId) encodingOfWireType +Value: + SingletonValue | StructValue +SingletonValue: + uint(0) FieldValue +FieldValue: + builtinValue | ArrayValue | MapValue | SliceValue | StructValue | InterfaceValue +InterfaceValue: + NilInterfaceValue | NonNilInterfaceValue +NilInterfaceValue: + uint(0) +NonNilInterfaceValue: + ConcreteTypeName TypeSequence InterfaceContents +ConcreteTypeName: + uint(lengthOfName) [already read=n] name +InterfaceContents: + int(concreteTypeId) DelimitedValue +DelimitedValue: + uint(length) Value +ArrayValue: + uint(n) FieldValue*n [n elements] +MapValue: + uint(n) (FieldValue FieldValue)*n [n (key, value) pairs] +SliceValue: + uint(n) FieldValue*n [n elements] +StructValue: + (uint(fieldDelta) FieldValue)* +*/ + +/* +For implementers and the curious, here is an encoded example. Given + type Point struct {X, Y int} +and the value + p := Point{22, 33} +the bytes transmitted that encode p will be: + 1f ff 81 03 01 01 05 50 6f 69 6e 74 01 ff 82 00 + 01 02 01 01 58 01 04 00 01 01 59 01 04 00 00 00 + 07 ff 82 01 2c 01 42 00 +They are determined as follows. + +Since this is the first transmission of type Point, the type descriptor +for Point itself must be sent before the value. This is the first type +we've sent on this Encoder, so it has type id 65 (0 through 64 are +reserved). + + 1f // This item (a type descriptor) is 31 bytes long. + ff 81 // The negative of the id for the type we're defining, -65. + // This is one byte (indicated by FF = -1) followed by + // ^-65<<1 | 1. The low 1 bit signals to complement the + // rest upon receipt. + + // Now we send a type descriptor, which is itself a struct (wireType). + // The type of wireType itself is known (it's built in, as is the type of + // all its components), so we just need to send a *value* of type wireType + // that represents type "Point". + // Here starts the encoding of that value. + // Set the field number implicitly to -1; this is done at the beginning + // of every struct, including nested structs. + 03 // Add 3 to field number; now 2 (wireType.structType; this is a struct). + // structType starts with an embedded CommonType, which appears + // as a regular structure here too. + 01 // add 1 to field number (now 0); start of embedded CommonType. + 01 // add 1 to field number (now 0, the name of the type) + 05 // string is (unsigned) 5 bytes long + 50 6f 69 6e 74 // wireType.structType.CommonType.name = "Point" + 01 // add 1 to field number (now 1, the id of the type) + ff 82 // wireType.structType.CommonType._id = 65 + 00 // end of embedded wiretype.structType.CommonType struct + 01 // add 1 to field number (now 1, the field array in wireType.structType) + 02 // There are two fields in the type (len(structType.field)) + 01 // Start of first field structure; add 1 to get field number 0: field[0].name + 01 // 1 byte + 58 // structType.field[0].name = "X" + 01 // Add 1 to get field number 1: field[0].id + 04 // structType.field[0].typeId is 2 (signed int). + 00 // End of structType.field[0]; start structType.field[1]; set field number to -1. + 01 // Add 1 to get field number 0: field[1].name + 01 // 1 byte + 59 // structType.field[1].name = "Y" + 01 // Add 1 to get field number 1: field[1].id + 04 // struct.Type.field[1].typeId is 2 (signed int). + 00 // End of structType.field[1]; end of structType.field. + 00 // end of wireType.structType structure + 00 // end of wireType structure + +Now we can send the Point value. Again the field number resets to -1: + + 07 // this value is 7 bytes long + ff 82 // the type number, 65 (1 byte (-FF) followed by 65<<1) + 01 // add one to field number, yielding field 0 + 2c // encoding of signed "22" (0x2c = 44 = 22<<1); Point.x = 22 + 01 // add one to field number, yielding field 1 + 42 // encoding of signed "33" (0x42 = 66 = 33<<1); Point.y = 33 + 00 // end of structure + +The type encoding is long and fairly intricate but we send it only once. +If p is transmitted a second time, the type is already known so the +output will be just: + + 07 ff 82 01 2c 01 42 00 + +A single non-struct value at top level is transmitted like a field with +delta tag 0. For instance, a signed integer with value 3 presented as +the argument to Encode will emit: + + 03 04 00 06 + +Which represents: + + 03 // this value is 3 bytes long + 04 // the type number, 2, represents an integer + 00 // tag delta 0 + 06 // value 3 + +*/ diff --git a/src/encoding/gob/dump.go b/src/encoding/gob/dump.go new file mode 100644 index 0000000..f4b1beb --- /dev/null +++ b/src/encoding/gob/dump.go @@ -0,0 +1,29 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +package main + +// Need to compile package gob with debug.go to build this program. +// See comments in debug.go for how to do this. + +import ( + "encoding/gob" + "fmt" + "os" +) + +func main() { + var err error + file := os.Stdin + if len(os.Args) > 1 { + file, err = os.Open(os.Args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "dump: %s\n", err) + os.Exit(1) + } + } + gob.Debug(file) +} diff --git a/src/encoding/gob/enc_helpers.go b/src/encoding/gob/enc_helpers.go new file mode 100644 index 0000000..c3b4ca8 --- /dev/null +++ b/src/encoding/gob/enc_helpers.go @@ -0,0 +1,414 @@ +// Code generated by go run encgen.go -output enc_helpers.go; DO NOT EDIT. + +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "reflect" +) + +var encArrayHelper = map[reflect.Kind]encHelper{ + reflect.Bool: encBoolArray, + reflect.Complex64: encComplex64Array, + reflect.Complex128: encComplex128Array, + reflect.Float32: encFloat32Array, + reflect.Float64: encFloat64Array, + reflect.Int: encIntArray, + reflect.Int16: encInt16Array, + reflect.Int32: encInt32Array, + reflect.Int64: encInt64Array, + reflect.Int8: encInt8Array, + reflect.String: encStringArray, + reflect.Uint: encUintArray, + reflect.Uint16: encUint16Array, + reflect.Uint32: encUint32Array, + reflect.Uint64: encUint64Array, + reflect.Uintptr: encUintptrArray, +} + +var encSliceHelper = map[reflect.Kind]encHelper{ + reflect.Bool: encBoolSlice, + reflect.Complex64: encComplex64Slice, + reflect.Complex128: encComplex128Slice, + reflect.Float32: encFloat32Slice, + reflect.Float64: encFloat64Slice, + reflect.Int: encIntSlice, + reflect.Int16: encInt16Slice, + reflect.Int32: encInt32Slice, + reflect.Int64: encInt64Slice, + reflect.Int8: encInt8Slice, + reflect.String: encStringSlice, + reflect.Uint: encUintSlice, + reflect.Uint16: encUint16Slice, + reflect.Uint32: encUint32Slice, + reflect.Uint64: encUint64Slice, + reflect.Uintptr: encUintptrSlice, +} + +func encBoolArray(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encBoolSlice(state, v.Slice(0, v.Len())) +} + +func encBoolSlice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]bool) + if !ok { + // It is kind bool but not type bool. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != false || state.sendZero { + if x { + state.encodeUint(1) + } else { + state.encodeUint(0) + } + } + } + return true +} + +func encComplex64Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encComplex64Slice(state, v.Slice(0, v.Len())) +} + +func encComplex64Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]complex64) + if !ok { + // It is kind complex64 but not type complex64. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0+0i || state.sendZero { + rpart := floatBits(float64(real(x))) + ipart := floatBits(float64(imag(x))) + state.encodeUint(rpart) + state.encodeUint(ipart) + } + } + return true +} + +func encComplex128Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encComplex128Slice(state, v.Slice(0, v.Len())) +} + +func encComplex128Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]complex128) + if !ok { + // It is kind complex128 but not type complex128. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0+0i || state.sendZero { + rpart := floatBits(real(x)) + ipart := floatBits(imag(x)) + state.encodeUint(rpart) + state.encodeUint(ipart) + } + } + return true +} + +func encFloat32Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encFloat32Slice(state, v.Slice(0, v.Len())) +} + +func encFloat32Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]float32) + if !ok { + // It is kind float32 but not type float32. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + bits := floatBits(float64(x)) + state.encodeUint(bits) + } + } + return true +} + +func encFloat64Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encFloat64Slice(state, v.Slice(0, v.Len())) +} + +func encFloat64Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]float64) + if !ok { + // It is kind float64 but not type float64. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + bits := floatBits(x) + state.encodeUint(bits) + } + } + return true +} + +func encIntArray(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encIntSlice(state, v.Slice(0, v.Len())) +} + +func encIntSlice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]int) + if !ok { + // It is kind int but not type int. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeInt(int64(x)) + } + } + return true +} + +func encInt16Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encInt16Slice(state, v.Slice(0, v.Len())) +} + +func encInt16Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]int16) + if !ok { + // It is kind int16 but not type int16. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeInt(int64(x)) + } + } + return true +} + +func encInt32Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encInt32Slice(state, v.Slice(0, v.Len())) +} + +func encInt32Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]int32) + if !ok { + // It is kind int32 but not type int32. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeInt(int64(x)) + } + } + return true +} + +func encInt64Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encInt64Slice(state, v.Slice(0, v.Len())) +} + +func encInt64Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]int64) + if !ok { + // It is kind int64 but not type int64. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeInt(x) + } + } + return true +} + +func encInt8Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encInt8Slice(state, v.Slice(0, v.Len())) +} + +func encInt8Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]int8) + if !ok { + // It is kind int8 but not type int8. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeInt(int64(x)) + } + } + return true +} + +func encStringArray(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encStringSlice(state, v.Slice(0, v.Len())) +} + +func encStringSlice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]string) + if !ok { + // It is kind string but not type string. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != "" || state.sendZero { + state.encodeUint(uint64(len(x))) + state.b.WriteString(x) + } + } + return true +} + +func encUintArray(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encUintSlice(state, v.Slice(0, v.Len())) +} + +func encUintSlice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]uint) + if !ok { + // It is kind uint but not type uint. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeUint(uint64(x)) + } + } + return true +} + +func encUint16Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encUint16Slice(state, v.Slice(0, v.Len())) +} + +func encUint16Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]uint16) + if !ok { + // It is kind uint16 but not type uint16. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeUint(uint64(x)) + } + } + return true +} + +func encUint32Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encUint32Slice(state, v.Slice(0, v.Len())) +} + +func encUint32Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]uint32) + if !ok { + // It is kind uint32 but not type uint32. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeUint(uint64(x)) + } + } + return true +} + +func encUint64Array(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encUint64Slice(state, v.Slice(0, v.Len())) +} + +func encUint64Slice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]uint64) + if !ok { + // It is kind uint64 but not type uint64. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeUint(x) + } + } + return true +} + +func encUintptrArray(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return encUintptrSlice(state, v.Slice(0, v.Len())) +} + +func encUintptrSlice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]uintptr) + if !ok { + // It is kind uintptr but not type uintptr. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != 0 || state.sendZero { + state.encodeUint(uint64(x)) + } + } + return true +} diff --git a/src/encoding/gob/encgen.go b/src/encoding/gob/encgen.go new file mode 100644 index 0000000..e5f6878 --- /dev/null +++ b/src/encoding/gob/encgen.go @@ -0,0 +1,220 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// encgen writes the helper functions for encoding. Intended to be +// used with go generate; see the invocation in encode.go. + +// TODO: We could do more by being unsafe. Add a -unsafe flag? + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "log" + "os" +) + +var output = flag.String("output", "enc_helpers.go", "file name to write") + +type Type struct { + lower string + upper string + zero string + encoder string +} + +var types = []Type{ + { + "bool", + "Bool", + "false", + `if x { + state.encodeUint(1) + } else { + state.encodeUint(0) + }`, + }, + { + "complex64", + "Complex64", + "0+0i", + `rpart := floatBits(float64(real(x))) + ipart := floatBits(float64(imag(x))) + state.encodeUint(rpart) + state.encodeUint(ipart)`, + }, + { + "complex128", + "Complex128", + "0+0i", + `rpart := floatBits(real(x)) + ipart := floatBits(imag(x)) + state.encodeUint(rpart) + state.encodeUint(ipart)`, + }, + { + "float32", + "Float32", + "0", + `bits := floatBits(float64(x)) + state.encodeUint(bits)`, + }, + { + "float64", + "Float64", + "0", + `bits := floatBits(x) + state.encodeUint(bits)`, + }, + { + "int", + "Int", + "0", + `state.encodeInt(int64(x))`, + }, + { + "int16", + "Int16", + "0", + `state.encodeInt(int64(x))`, + }, + { + "int32", + "Int32", + "0", + `state.encodeInt(int64(x))`, + }, + { + "int64", + "Int64", + "0", + `state.encodeInt(x)`, + }, + { + "int8", + "Int8", + "0", + `state.encodeInt(int64(x))`, + }, + { + "string", + "String", + `""`, + `state.encodeUint(uint64(len(x))) + state.b.WriteString(x)`, + }, + { + "uint", + "Uint", + "0", + `state.encodeUint(uint64(x))`, + }, + { + "uint16", + "Uint16", + "0", + `state.encodeUint(uint64(x))`, + }, + { + "uint32", + "Uint32", + "0", + `state.encodeUint(uint64(x))`, + }, + { + "uint64", + "Uint64", + "0", + `state.encodeUint(x)`, + }, + { + "uintptr", + "Uintptr", + "0", + `state.encodeUint(uint64(x))`, + }, + // uint8 Handled separately. +} + +func main() { + log.SetFlags(0) + log.SetPrefix("encgen: ") + flag.Parse() + if flag.NArg() != 0 { + log.Fatal("usage: encgen [--output filename]") + } + var b bytes.Buffer + fmt.Fprintf(&b, "// Code generated by go run encgen.go -output %s; DO NOT EDIT.\n", *output) + fmt.Fprint(&b, header) + printMaps(&b, "Array") + fmt.Fprint(&b, "\n") + printMaps(&b, "Slice") + for _, t := range types { + fmt.Fprintf(&b, arrayHelper, t.lower, t.upper) + fmt.Fprintf(&b, sliceHelper, t.lower, t.upper, t.zero, t.encoder) + } + source, err := format.Source(b.Bytes()) + if err != nil { + log.Fatal("source format error:", err) + } + fd, err := os.Create(*output) + if err != nil { + log.Fatal(err) + } + if _, err := fd.Write(source); err != nil { + log.Fatal(err) + } +} + +func printMaps(b *bytes.Buffer, upperClass string) { + fmt.Fprintf(b, "var enc%sHelper = map[reflect.Kind]encHelper{\n", upperClass) + for _, t := range types { + fmt.Fprintf(b, "reflect.%s: enc%s%s,\n", t.upper, t.upper, upperClass) + } + fmt.Fprintf(b, "}\n") +} + +const header = ` +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "reflect" +) + +` + +const arrayHelper = ` +func enc%[2]sArray(state *encoderState, v reflect.Value) bool { + // Can only slice if it is addressable. + if !v.CanAddr() { + return false + } + return enc%[2]sSlice(state, v.Slice(0, v.Len())) +} +` + +const sliceHelper = ` +func enc%[2]sSlice(state *encoderState, v reflect.Value) bool { + slice, ok := v.Interface().([]%[1]s) + if !ok { + // It is kind %[1]s but not type %[1]s. TODO: We can handle this unsafely. + return false + } + for _, x := range slice { + if x != %[3]s || state.sendZero { + %[4]s + } + } + return true +} +` diff --git a/src/encoding/gob/encode.go b/src/encoding/gob/encode.go new file mode 100644 index 0000000..5f4d253 --- /dev/null +++ b/src/encoding/gob/encode.go @@ -0,0 +1,670 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run encgen.go -output enc_helpers.go + +package gob + +import ( + "encoding" + "encoding/binary" + "math" + "math/bits" + "reflect" + "sync" +) + +const uint64Size = 8 + +type encHelper func(state *encoderState, v reflect.Value) bool + +// encoderState is the global execution state of an instance of the encoder. +// Field numbers are delta encoded and always increase. The field +// number is initialized to -1 so 0 comes out as delta(1). A delta of +// 0 terminates the structure. +type encoderState struct { + enc *Encoder + b *encBuffer + sendZero bool // encoding an array element or map key/value pair; send zero values + fieldnum int // the last field number written. + buf [1 + uint64Size]byte // buffer used by the encoder; here to avoid allocation. + next *encoderState // for free list +} + +// encBuffer is an extremely simple, fast implementation of a write-only byte buffer. +// It never returns a non-nil error, but Write returns an error value so it matches io.Writer. +type encBuffer struct { + data []byte + scratch [64]byte +} + +var encBufferPool = sync.Pool{ + New: func() any { + e := new(encBuffer) + e.data = e.scratch[0:0] + return e + }, +} + +func (e *encBuffer) writeByte(c byte) { + e.data = append(e.data, c) +} + +func (e *encBuffer) Write(p []byte) (int, error) { + e.data = append(e.data, p...) + return len(p), nil +} + +func (e *encBuffer) WriteString(s string) { + e.data = append(e.data, s...) +} + +func (e *encBuffer) Len() int { + return len(e.data) +} + +func (e *encBuffer) Bytes() []byte { + return e.data +} + +func (e *encBuffer) Reset() { + if len(e.data) >= tooBig { + e.data = e.scratch[0:0] + } else { + e.data = e.data[0:0] + } +} + +func (enc *Encoder) newEncoderState(b *encBuffer) *encoderState { + e := enc.freeList + if e == nil { + e = new(encoderState) + e.enc = enc + } else { + enc.freeList = e.next + } + e.sendZero = false + e.fieldnum = 0 + e.b = b + if len(b.data) == 0 { + b.data = b.scratch[0:0] + } + return e +} + +func (enc *Encoder) freeEncoderState(e *encoderState) { + e.next = enc.freeList + enc.freeList = e +} + +// Unsigned integers have a two-state encoding. If the number is less +// than 128 (0 through 0x7F), its value is written directly. +// Otherwise the value is written in big-endian byte order preceded +// by the byte length, negated. + +// encodeUint writes an encoded unsigned integer to state.b. +func (state *encoderState) encodeUint(x uint64) { + if x <= 0x7F { + state.b.writeByte(uint8(x)) + return + } + + binary.BigEndian.PutUint64(state.buf[1:], x) + bc := bits.LeadingZeros64(x) >> 3 // 8 - bytelen(x) + state.buf[bc] = uint8(bc - uint64Size) // and then we subtract 8 to get -bytelen(x) + + state.b.Write(state.buf[bc : uint64Size+1]) +} + +// encodeInt writes an encoded signed integer to state.w. +// The low bit of the encoding says whether to bit complement the (other bits of the) +// uint to recover the int. +func (state *encoderState) encodeInt(i int64) { + var x uint64 + if i < 0 { + x = uint64(^i<<1) | 1 + } else { + x = uint64(i << 1) + } + state.encodeUint(x) +} + +// encOp is the signature of an encoding operator for a given type. +type encOp func(i *encInstr, state *encoderState, v reflect.Value) + +// The 'instructions' of the encoding machine +type encInstr struct { + op encOp + field int // field number in input + index []int // struct index + indir int // how many pointer indirections to reach the value in the struct +} + +// update emits a field number and updates the state to record its value for delta encoding. +// If the instruction pointer is nil, it does nothing +func (state *encoderState) update(instr *encInstr) { + if instr != nil { + state.encodeUint(uint64(instr.field - state.fieldnum)) + state.fieldnum = instr.field + } +} + +// Each encoder for a composite is responsible for handling any +// indirections associated with the elements of the data structure. +// If any pointer so reached is nil, no bytes are written. If the +// data item is zero, no bytes are written. Single values - ints, +// strings etc. - are indirected before calling their encoders. +// Otherwise, the output (for a scalar) is the field number, as an +// encoded integer, followed by the field data in its appropriate +// format. + +// encIndirect dereferences pv indir times and returns the result. +func encIndirect(pv reflect.Value, indir int) reflect.Value { + for ; indir > 0; indir-- { + if pv.IsNil() { + break + } + pv = pv.Elem() + } + return pv +} + +// encBool encodes the bool referenced by v as an unsigned 0 or 1. +func encBool(i *encInstr, state *encoderState, v reflect.Value) { + b := v.Bool() + if b || state.sendZero { + state.update(i) + if b { + state.encodeUint(1) + } else { + state.encodeUint(0) + } + } +} + +// encInt encodes the signed integer (int int8 int16 int32 int64) referenced by v. +func encInt(i *encInstr, state *encoderState, v reflect.Value) { + value := v.Int() + if value != 0 || state.sendZero { + state.update(i) + state.encodeInt(value) + } +} + +// encUint encodes the unsigned integer (uint uint8 uint16 uint32 uint64 uintptr) referenced by v. +func encUint(i *encInstr, state *encoderState, v reflect.Value) { + value := v.Uint() + if value != 0 || state.sendZero { + state.update(i) + state.encodeUint(value) + } +} + +// floatBits returns a uint64 holding the bits of a floating-point number. +// Floating-point numbers are transmitted as uint64s holding the bits +// of the underlying representation. They are sent byte-reversed, with +// the exponent end coming out first, so integer floating point numbers +// (for example) transmit more compactly. This routine does the +// swizzling. +func floatBits(f float64) uint64 { + u := math.Float64bits(f) + return bits.ReverseBytes64(u) +} + +// encFloat encodes the floating point value (float32 float64) referenced by v. +func encFloat(i *encInstr, state *encoderState, v reflect.Value) { + f := v.Float() + if f != 0 || state.sendZero { + bits := floatBits(f) + state.update(i) + state.encodeUint(bits) + } +} + +// encComplex encodes the complex value (complex64 complex128) referenced by v. +// Complex numbers are just a pair of floating-point numbers, real part first. +func encComplex(i *encInstr, state *encoderState, v reflect.Value) { + c := v.Complex() + if c != 0+0i || state.sendZero { + rpart := floatBits(real(c)) + ipart := floatBits(imag(c)) + state.update(i) + state.encodeUint(rpart) + state.encodeUint(ipart) + } +} + +// encUint8Array encodes the byte array referenced by v. +// Byte arrays are encoded as an unsigned count followed by the raw bytes. +func encUint8Array(i *encInstr, state *encoderState, v reflect.Value) { + b := v.Bytes() + if len(b) > 0 || state.sendZero { + state.update(i) + state.encodeUint(uint64(len(b))) + state.b.Write(b) + } +} + +// encString encodes the string referenced by v. +// Strings are encoded as an unsigned count followed by the raw bytes. +func encString(i *encInstr, state *encoderState, v reflect.Value) { + s := v.String() + if len(s) > 0 || state.sendZero { + state.update(i) + state.encodeUint(uint64(len(s))) + state.b.WriteString(s) + } +} + +// encStructTerminator encodes the end of an encoded struct +// as delta field number of 0. +func encStructTerminator(i *encInstr, state *encoderState, v reflect.Value) { + state.encodeUint(0) +} + +// Execution engine + +// encEngine an array of instructions indexed by field number of the encoding +// data, typically a struct. It is executed top to bottom, walking the struct. +type encEngine struct { + instr []encInstr +} + +const singletonField = 0 + +// valid reports whether the value is valid and a non-nil pointer. +// (Slices, maps, and chans take care of themselves.) +func valid(v reflect.Value) bool { + switch v.Kind() { + case reflect.Invalid: + return false + case reflect.Pointer: + return !v.IsNil() + } + return true +} + +// encodeSingle encodes a single top-level non-struct value. +func (enc *Encoder) encodeSingle(b *encBuffer, engine *encEngine, value reflect.Value) { + state := enc.newEncoderState(b) + defer enc.freeEncoderState(state) + state.fieldnum = singletonField + // There is no surrounding struct to frame the transmission, so we must + // generate data even if the item is zero. To do this, set sendZero. + state.sendZero = true + instr := &engine.instr[singletonField] + if instr.indir > 0 { + value = encIndirect(value, instr.indir) + } + if valid(value) { + instr.op(instr, state, value) + } +} + +// encodeStruct encodes a single struct value. +func (enc *Encoder) encodeStruct(b *encBuffer, engine *encEngine, value reflect.Value) { + if !valid(value) { + return + } + state := enc.newEncoderState(b) + defer enc.freeEncoderState(state) + state.fieldnum = -1 + for i := 0; i < len(engine.instr); i++ { + instr := &engine.instr[i] + if i >= value.NumField() { + // encStructTerminator + instr.op(instr, state, reflect.Value{}) + break + } + field := value.FieldByIndex(instr.index) + if instr.indir > 0 { + field = encIndirect(field, instr.indir) + // TODO: Is field guaranteed valid? If so we could avoid this check. + if !valid(field) { + continue + } + } + instr.op(instr, state, field) + } +} + +// encodeArray encodes an array. +func (enc *Encoder) encodeArray(b *encBuffer, value reflect.Value, op encOp, elemIndir int, length int, helper encHelper) { + state := enc.newEncoderState(b) + defer enc.freeEncoderState(state) + state.fieldnum = -1 + state.sendZero = true + state.encodeUint(uint64(length)) + if helper != nil && helper(state, value) { + return + } + for i := 0; i < length; i++ { + elem := value.Index(i) + if elemIndir > 0 { + elem = encIndirect(elem, elemIndir) + // TODO: Is elem guaranteed valid? If so we could avoid this check. + if !valid(elem) { + errorf("encodeArray: nil element") + } + } + op(nil, state, elem) + } +} + +// encodeReflectValue is a helper for maps. It encodes the value v. +func encodeReflectValue(state *encoderState, v reflect.Value, op encOp, indir int) { + for i := 0; i < indir && v.IsValid(); i++ { + v = reflect.Indirect(v) + } + if !v.IsValid() { + errorf("encodeReflectValue: nil element") + } + op(nil, state, v) +} + +// encodeMap encodes a map as unsigned count followed by key:value pairs. +func (enc *Encoder) encodeMap(b *encBuffer, mv reflect.Value, keyOp, elemOp encOp, keyIndir, elemIndir int) { + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.sendZero = true + state.encodeUint(uint64(mv.Len())) + mi := mv.MapRange() + for mi.Next() { + encodeReflectValue(state, mi.Key(), keyOp, keyIndir) + encodeReflectValue(state, mi.Value(), elemOp, elemIndir) + } + enc.freeEncoderState(state) +} + +// encodeInterface encodes the interface value iv. +// To send an interface, we send a string identifying the concrete type, followed +// by the type identifier (which might require defining that type right now), followed +// by the concrete value. A nil value gets sent as the empty string for the name, +// followed by no value. +func (enc *Encoder) encodeInterface(b *encBuffer, iv reflect.Value) { + // Gobs can encode nil interface values but not typed interface + // values holding nil pointers, since nil pointers point to no value. + elem := iv.Elem() + if elem.Kind() == reflect.Pointer && elem.IsNil() { + errorf("gob: cannot encode nil pointer of type %s inside interface", iv.Elem().Type()) + } + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.sendZero = true + if iv.IsNil() { + state.encodeUint(0) + return + } + + ut := userType(iv.Elem().Type()) + namei, ok := concreteTypeToName.Load(ut.base) + if !ok { + errorf("type not registered for interface: %s", ut.base) + } + name := namei.(string) + + // Send the name. + state.encodeUint(uint64(len(name))) + state.b.WriteString(name) + // Define the type id if necessary. + enc.sendTypeDescriptor(enc.writer(), state, ut) + // Send the type id. + enc.sendTypeId(state, ut) + // Encode the value into a new buffer. Any nested type definitions + // should be written to b, before the encoded value. + enc.pushWriter(b) + data := encBufferPool.Get().(*encBuffer) + data.Write(spaceForLength) + enc.encode(data, elem, ut) + if enc.err != nil { + error_(enc.err) + } + enc.popWriter() + enc.writeMessage(b, data) + data.Reset() + encBufferPool.Put(data) + if enc.err != nil { + error_(enc.err) + } + enc.freeEncoderState(state) +} + +// encodeGobEncoder encodes a value that implements the GobEncoder interface. +// The data is sent as a byte array. +func (enc *Encoder) encodeGobEncoder(b *encBuffer, ut *userTypeInfo, v reflect.Value) { + // TODO: should we catch panics from the called method? + + var data []byte + var err error + // We know it's one of these. + switch ut.externalEnc { + case xGob: + data, err = v.Interface().(GobEncoder).GobEncode() + case xBinary: + data, err = v.Interface().(encoding.BinaryMarshaler).MarshalBinary() + case xText: + data, err = v.Interface().(encoding.TextMarshaler).MarshalText() + } + if err != nil { + error_(err) + } + state := enc.newEncoderState(b) + state.fieldnum = -1 + state.encodeUint(uint64(len(data))) + state.b.Write(data) + enc.freeEncoderState(state) +} + +var encOpTable = [...]encOp{ + reflect.Bool: encBool, + reflect.Int: encInt, + reflect.Int8: encInt, + reflect.Int16: encInt, + reflect.Int32: encInt, + reflect.Int64: encInt, + reflect.Uint: encUint, + reflect.Uint8: encUint, + reflect.Uint16: encUint, + reflect.Uint32: encUint, + reflect.Uint64: encUint, + reflect.Uintptr: encUint, + reflect.Float32: encFloat, + reflect.Float64: encFloat, + reflect.Complex64: encComplex, + reflect.Complex128: encComplex, + reflect.String: encString, +} + +// encOpFor returns (a pointer to) the encoding op for the base type under rt and +// the indirection count to reach it. +func encOpFor(rt reflect.Type, inProgress map[reflect.Type]*encOp, building map[*typeInfo]bool) (*encOp, int) { + ut := userType(rt) + // If the type implements GobEncoder, we handle it without further processing. + if ut.externalEnc != 0 { + return gobEncodeOpFor(ut) + } + // If this type is already in progress, it's a recursive type (e.g. map[string]*T). + // Return the pointer to the op we're already building. + if opPtr := inProgress[rt]; opPtr != nil { + return opPtr, ut.indir + } + typ := ut.base + indir := ut.indir + k := typ.Kind() + var op encOp + if int(k) < len(encOpTable) { + op = encOpTable[k] + } + if op == nil { + inProgress[rt] = &op + // Special cases + switch t := typ; t.Kind() { + case reflect.Slice: + if t.Elem().Kind() == reflect.Uint8 { + op = encUint8Array + break + } + // Slices have a header; we decode it to find the underlying array. + elemOp, elemIndir := encOpFor(t.Elem(), inProgress, building) + helper := encSliceHelper[t.Elem().Kind()] + op = func(i *encInstr, state *encoderState, slice reflect.Value) { + if !state.sendZero && slice.Len() == 0 { + return + } + state.update(i) + state.enc.encodeArray(state.b, slice, *elemOp, elemIndir, slice.Len(), helper) + } + case reflect.Array: + // True arrays have size in the type. + elemOp, elemIndir := encOpFor(t.Elem(), inProgress, building) + helper := encArrayHelper[t.Elem().Kind()] + op = func(i *encInstr, state *encoderState, array reflect.Value) { + state.update(i) + state.enc.encodeArray(state.b, array, *elemOp, elemIndir, array.Len(), helper) + } + case reflect.Map: + keyOp, keyIndir := encOpFor(t.Key(), inProgress, building) + elemOp, elemIndir := encOpFor(t.Elem(), inProgress, building) + op = func(i *encInstr, state *encoderState, mv reflect.Value) { + // We send zero-length (but non-nil) maps because the + // receiver might want to use the map. (Maps don't use append.) + if !state.sendZero && mv.IsNil() { + return + } + state.update(i) + state.enc.encodeMap(state.b, mv, *keyOp, *elemOp, keyIndir, elemIndir) + } + case reflect.Struct: + // Generate a closure that calls out to the engine for the nested type. + getEncEngine(userType(typ), building) + info := mustGetTypeInfo(typ) + op = func(i *encInstr, state *encoderState, sv reflect.Value) { + state.update(i) + // indirect through info to delay evaluation for recursive structs + enc := info.encoder.Load() + state.enc.encodeStruct(state.b, enc, sv) + } + case reflect.Interface: + op = func(i *encInstr, state *encoderState, iv reflect.Value) { + if !state.sendZero && (!iv.IsValid() || iv.IsNil()) { + return + } + state.update(i) + state.enc.encodeInterface(state.b, iv) + } + } + } + if op == nil { + errorf("can't happen: encode type %s", rt) + } + return &op, indir +} + +// gobEncodeOpFor returns the op for a type that is known to implement GobEncoder. +func gobEncodeOpFor(ut *userTypeInfo) (*encOp, int) { + rt := ut.user + if ut.encIndir == -1 { + rt = reflect.PointerTo(rt) + } else if ut.encIndir > 0 { + for i := int8(0); i < ut.encIndir; i++ { + rt = rt.Elem() + } + } + var op encOp + op = func(i *encInstr, state *encoderState, v reflect.Value) { + if ut.encIndir == -1 { + // Need to climb up one level to turn value into pointer. + if !v.CanAddr() { + errorf("unaddressable value of type %s", rt) + } + v = v.Addr() + } + if !state.sendZero && v.IsZero() { + return + } + state.update(i) + state.enc.encodeGobEncoder(state.b, ut, v) + } + return &op, int(ut.encIndir) // encIndir: op will get called with p == address of receiver. +} + +// compileEnc returns the engine to compile the type. +func compileEnc(ut *userTypeInfo, building map[*typeInfo]bool) *encEngine { + srt := ut.base + engine := new(encEngine) + seen := make(map[reflect.Type]*encOp) + rt := ut.base + if ut.externalEnc != 0 { + rt = ut.user + } + if ut.externalEnc == 0 && srt.Kind() == reflect.Struct { + for fieldNum, wireFieldNum := 0, 0; fieldNum < srt.NumField(); fieldNum++ { + f := srt.Field(fieldNum) + if !isSent(&f) { + continue + } + op, indir := encOpFor(f.Type, seen, building) + engine.instr = append(engine.instr, encInstr{*op, wireFieldNum, f.Index, indir}) + wireFieldNum++ + } + if srt.NumField() > 0 && len(engine.instr) == 0 { + errorf("type %s has no exported fields", rt) + } + engine.instr = append(engine.instr, encInstr{encStructTerminator, 0, nil, 0}) + } else { + engine.instr = make([]encInstr, 1) + op, indir := encOpFor(rt, seen, building) + engine.instr[0] = encInstr{*op, singletonField, nil, indir} + } + return engine +} + +// getEncEngine returns the engine to compile the type. +func getEncEngine(ut *userTypeInfo, building map[*typeInfo]bool) *encEngine { + info, err := getTypeInfo(ut) + if err != nil { + error_(err) + } + enc := info.encoder.Load() + if enc == nil { + enc = buildEncEngine(info, ut, building) + } + return enc +} + +func buildEncEngine(info *typeInfo, ut *userTypeInfo, building map[*typeInfo]bool) *encEngine { + // Check for recursive types. + if building != nil && building[info] { + return nil + } + info.encInit.Lock() + defer info.encInit.Unlock() + enc := info.encoder.Load() + if enc == nil { + if building == nil { + building = make(map[*typeInfo]bool) + } + building[info] = true + enc = compileEnc(ut, building) + info.encoder.Store(enc) + } + return enc +} + +func (enc *Encoder) encode(b *encBuffer, value reflect.Value, ut *userTypeInfo) { + defer catchError(&enc.err) + engine := getEncEngine(ut, nil) + indir := ut.indir + if ut.externalEnc != 0 { + indir = int(ut.encIndir) + } + for i := 0; i < indir; i++ { + value = reflect.Indirect(value) + } + if ut.externalEnc == 0 && value.Type().Kind() == reflect.Struct { + enc.encodeStruct(b, engine, value) + } else { + enc.encodeSingle(b, engine, value) + } +} diff --git a/src/encoding/gob/encoder.go b/src/encoding/gob/encoder.go new file mode 100644 index 0000000..7d46152 --- /dev/null +++ b/src/encoding/gob/encoder.go @@ -0,0 +1,258 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "errors" + "io" + "reflect" + "sync" +) + +// An Encoder manages the transmission of type and data information to the +// other side of a connection. It is safe for concurrent use by multiple +// goroutines. +type Encoder struct { + mutex sync.Mutex // each item must be sent atomically + w []io.Writer // where to send the data + sent map[reflect.Type]typeId // which types we've already sent + countState *encoderState // stage for writing counts + freeList *encoderState // list of free encoderStates; avoids reallocation + byteBuf encBuffer // buffer for top-level encoderState + err error +} + +// Before we encode a message, we reserve space at the head of the +// buffer in which to encode its length. This means we can use the +// buffer to assemble the message without another allocation. +const maxLength = 9 // Maximum size of an encoded length. +var spaceForLength = make([]byte, maxLength) + +// NewEncoder returns a new encoder that will transmit on the [io.Writer]. +func NewEncoder(w io.Writer) *Encoder { + enc := new(Encoder) + enc.w = []io.Writer{w} + enc.sent = make(map[reflect.Type]typeId) + enc.countState = enc.newEncoderState(new(encBuffer)) + return enc +} + +// writer returns the innermost writer the encoder is using. +func (enc *Encoder) writer() io.Writer { + return enc.w[len(enc.w)-1] +} + +// pushWriter adds a writer to the encoder. +func (enc *Encoder) pushWriter(w io.Writer) { + enc.w = append(enc.w, w) +} + +// popWriter pops the innermost writer. +func (enc *Encoder) popWriter() { + enc.w = enc.w[0 : len(enc.w)-1] +} + +func (enc *Encoder) setError(err error) { + if enc.err == nil { // remember the first. + enc.err = err + } +} + +// writeMessage sends the data item preceded by an unsigned count of its length. +func (enc *Encoder) writeMessage(w io.Writer, b *encBuffer) { + // Space has been reserved for the length at the head of the message. + // This is a little dirty: we grab the slice from the bytes.Buffer and massage + // it by hand. + message := b.Bytes() + messageLen := len(message) - maxLength + // Length cannot be bigger than the decoder can handle. + if messageLen >= tooBig { + enc.setError(errors.New("gob: encoder: message too big")) + return + } + // Encode the length. + enc.countState.b.Reset() + enc.countState.encodeUint(uint64(messageLen)) + // Copy the length to be a prefix of the message. + offset := maxLength - enc.countState.b.Len() + copy(message[offset:], enc.countState.b.Bytes()) + // Write the data. + _, err := w.Write(message[offset:]) + // Drain the buffer and restore the space at the front for the count of the next message. + b.Reset() + b.Write(spaceForLength) + if err != nil { + enc.setError(err) + } +} + +// sendActualType sends the requested type, without further investigation, unless +// it's been sent before. +func (enc *Encoder) sendActualType(w io.Writer, state *encoderState, ut *userTypeInfo, actual reflect.Type) (sent bool) { + if _, alreadySent := enc.sent[actual]; alreadySent { + return false + } + info, err := getTypeInfo(ut) + if err != nil { + enc.setError(err) + return + } + // Send the pair (-id, type) + // Id: + state.encodeInt(-int64(info.id)) + // Type: + enc.encode(state.b, reflect.ValueOf(info.wire), wireTypeUserInfo) + enc.writeMessage(w, state.b) + if enc.err != nil { + return + } + + // Remember we've sent this type, both what the user gave us and the base type. + enc.sent[ut.base] = info.id + if ut.user != ut.base { + enc.sent[ut.user] = info.id + } + // Now send the inner types + switch st := actual; st.Kind() { + case reflect.Struct: + for i := 0; i < st.NumField(); i++ { + if isExported(st.Field(i).Name) { + enc.sendType(w, state, st.Field(i).Type) + } + } + case reflect.Array, reflect.Slice: + enc.sendType(w, state, st.Elem()) + case reflect.Map: + enc.sendType(w, state, st.Key()) + enc.sendType(w, state, st.Elem()) + } + return true +} + +// sendType sends the type info to the other side, if necessary. +func (enc *Encoder) sendType(w io.Writer, state *encoderState, origt reflect.Type) (sent bool) { + ut := userType(origt) + if ut.externalEnc != 0 { + // The rules are different: regardless of the underlying type's representation, + // we need to tell the other side that the base type is a GobEncoder. + return enc.sendActualType(w, state, ut, ut.base) + } + + // It's a concrete value, so drill down to the base type. + switch rt := ut.base; rt.Kind() { + default: + // Basic types and interfaces do not need to be described. + return + case reflect.Slice: + // If it's []uint8, don't send; it's considered basic. + if rt.Elem().Kind() == reflect.Uint8 { + return + } + // Otherwise we do send. + break + case reflect.Array: + // arrays must be sent so we know their lengths and element types. + break + case reflect.Map: + // maps must be sent so we know their lengths and key/value types. + break + case reflect.Struct: + // structs must be sent so we know their fields. + break + case reflect.Chan, reflect.Func: + // If we get here, it's a field of a struct; ignore it. + return + } + + return enc.sendActualType(w, state, ut, ut.base) +} + +// Encode transmits the data item represented by the empty interface value, +// guaranteeing that all necessary type information has been transmitted first. +// Passing a nil pointer to Encoder will panic, as they cannot be transmitted by gob. +func (enc *Encoder) Encode(e any) error { + return enc.EncodeValue(reflect.ValueOf(e)) +} + +// sendTypeDescriptor makes sure the remote side knows about this type. +// It will send a descriptor if this is the first time the type has been +// sent. +func (enc *Encoder) sendTypeDescriptor(w io.Writer, state *encoderState, ut *userTypeInfo) { + // Make sure the type is known to the other side. + // First, have we already sent this type? + rt := ut.base + if ut.externalEnc != 0 { + rt = ut.user + } + if _, alreadySent := enc.sent[rt]; !alreadySent { + // No, so send it. + sent := enc.sendType(w, state, rt) + if enc.err != nil { + return + } + // If the type info has still not been transmitted, it means we have + // a singleton basic type (int, []byte etc.) at top level. We don't + // need to send the type info but we do need to update enc.sent. + if !sent { + info, err := getTypeInfo(ut) + if err != nil { + enc.setError(err) + return + } + enc.sent[rt] = info.id + } + } +} + +// sendTypeId sends the id, which must have already been defined. +func (enc *Encoder) sendTypeId(state *encoderState, ut *userTypeInfo) { + // Identify the type of this top-level value. + state.encodeInt(int64(enc.sent[ut.base])) +} + +// EncodeValue transmits the data item represented by the reflection value, +// guaranteeing that all necessary type information has been transmitted first. +// Passing a nil pointer to EncodeValue will panic, as they cannot be transmitted by gob. +func (enc *Encoder) EncodeValue(value reflect.Value) error { + if value.Kind() == reflect.Invalid { + return errors.New("gob: cannot encode nil value") + } + if value.Kind() == reflect.Pointer && value.IsNil() { + panic("gob: cannot encode nil pointer of type " + value.Type().String()) + } + + // Make sure we're single-threaded through here, so multiple + // goroutines can share an encoder. + enc.mutex.Lock() + defer enc.mutex.Unlock() + + // Remove any nested writers remaining due to previous errors. + enc.w = enc.w[0:1] + + ut, err := validUserType(value.Type()) + if err != nil { + return err + } + + enc.err = nil + enc.byteBuf.Reset() + enc.byteBuf.Write(spaceForLength) + state := enc.newEncoderState(&enc.byteBuf) + + enc.sendTypeDescriptor(enc.writer(), state, ut) + enc.sendTypeId(state, ut) + if enc.err != nil { + return enc.err + } + + // Encode the object. + enc.encode(state.b, value, ut) + if enc.err == nil { + enc.writeMessage(enc.writer(), state.b) + } + + enc.freeEncoderState(state) + return enc.err +} diff --git a/src/encoding/gob/encoder_test.go b/src/encoding/gob/encoder_test.go new file mode 100644 index 0000000..d99b071 --- /dev/null +++ b/src/encoding/gob/encoder_test.go @@ -0,0 +1,1280 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "encoding/hex" + "fmt" + "io" + "math" + "reflect" + "sort" + "strings" + "testing" +) + +// Test basic operations in a safe manner. +func TestBasicEncoderDecoder(t *testing.T) { + var values = []any{ + true, + int(123), + int8(123), + int16(-12345), + int32(123456), + int64(-1234567), + uint(123), + uint8(123), + uint16(12345), + uint32(123456), + uint64(1234567), + uintptr(12345678), + float32(1.2345), + float64(1.2345678), + complex64(1.2345 + 2.3456i), + complex128(1.2345678 + 2.3456789i), + []byte("hello"), + string("hello"), + } + for _, value := range values { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(value) + if err != nil { + t.Error("encoder fail:", err) + } + dec := NewDecoder(b) + result := reflect.New(reflect.TypeOf(value)) + err = dec.Decode(result.Interface()) + if err != nil { + t.Fatalf("error decoding %T: %v:", reflect.TypeOf(value), err) + } + if !reflect.DeepEqual(value, result.Elem().Interface()) { + t.Fatalf("%T: expected %v got %v", value, value, result.Elem().Interface()) + } + } +} + +func TestEncodeIntSlice(t *testing.T) { + + s8 := []int8{1, 5, 12, 22, 35, 51, 70, 92, 117} + s16 := []int16{145, 176, 210, 247, 287, 330, 376, 425, 477} + s32 := []int32{532, 590, 651, 715, 782, 852, 925, 1001, 1080} + s64 := []int64{1162, 1247, 1335, 1426, 1520, 1617, 1717, 1820, 1926} + + t.Run("int8", func(t *testing.T) { + var sink bytes.Buffer + enc := NewEncoder(&sink) + enc.Encode(s8) + + dec := NewDecoder(&sink) + res := make([]int8, 9) + dec.Decode(&res) + + if !reflect.DeepEqual(s8, res) { + t.Fatalf("EncodeIntSlice: expected %v, got %v", s8, res) + } + }) + + t.Run("int16", func(t *testing.T) { + var sink bytes.Buffer + enc := NewEncoder(&sink) + enc.Encode(s16) + + dec := NewDecoder(&sink) + res := make([]int16, 9) + dec.Decode(&res) + + if !reflect.DeepEqual(s16, res) { + t.Fatalf("EncodeIntSlice: expected %v, got %v", s16, res) + } + }) + + t.Run("int32", func(t *testing.T) { + var sink bytes.Buffer + enc := NewEncoder(&sink) + enc.Encode(s32) + + dec := NewDecoder(&sink) + res := make([]int32, 9) + dec.Decode(&res) + + if !reflect.DeepEqual(s32, res) { + t.Fatalf("EncodeIntSlice: expected %v, got %v", s32, res) + } + }) + + t.Run("int64", func(t *testing.T) { + var sink bytes.Buffer + enc := NewEncoder(&sink) + enc.Encode(s64) + + dec := NewDecoder(&sink) + res := make([]int64, 9) + dec.Decode(&res) + + if !reflect.DeepEqual(s64, res) { + t.Fatalf("EncodeIntSlice: expected %v, got %v", s64, res) + } + }) + +} + +type ET0 struct { + A int + B string +} + +type ET2 struct { + X string +} + +type ET1 struct { + A int + Et2 *ET2 + Next *ET1 +} + +// Like ET1 but with a different name for a field +type ET3 struct { + A int + Et2 *ET2 + DifferentNext *ET1 +} + +// Like ET1 but with a different type for a field +type ET4 struct { + A int + Et2 float64 + Next int +} + +func TestEncoderDecoder(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + et0 := new(ET0) + et0.A = 7 + et0.B = "gobs of fun" + err := enc.Encode(et0) + if err != nil { + t.Error("encoder fail:", err) + } + //fmt.Printf("% x %q\n", b, b) + //Debug(b) + dec := NewDecoder(b) + newEt0 := new(ET0) + err = dec.Decode(newEt0) + if err != nil { + t.Fatal("error decoding ET0:", err) + } + + if !reflect.DeepEqual(et0, newEt0) { + t.Fatalf("invalid data for et0: expected %+v; got %+v", *et0, *newEt0) + } + if b.Len() != 0 { + t.Error("not at eof;", b.Len(), "bytes left") + } + // t.FailNow() + + b = new(bytes.Buffer) + enc = NewEncoder(b) + et1 := new(ET1) + et1.A = 7 + et1.Et2 = new(ET2) + err = enc.Encode(et1) + if err != nil { + t.Error("encoder fail:", err) + } + dec = NewDecoder(b) + newEt1 := new(ET1) + err = dec.Decode(newEt1) + if err != nil { + t.Fatal("error decoding ET1:", err) + } + + if !reflect.DeepEqual(et1, newEt1) { + t.Fatalf("invalid data for et1: expected %+v; got %+v", *et1, *newEt1) + } + if b.Len() != 0 { + t.Error("not at eof;", b.Len(), "bytes left") + } + + enc.Encode(et1) + newEt1 = new(ET1) + err = dec.Decode(newEt1) + if err != nil { + t.Fatal("round 2: error decoding ET1:", err) + } + if !reflect.DeepEqual(et1, newEt1) { + t.Fatalf("round 2: invalid data for et1: expected %+v; got %+v", *et1, *newEt1) + } + if b.Len() != 0 { + t.Error("round 2: not at eof;", b.Len(), "bytes left") + } + + // Now test with a running encoder/decoder pair that we recognize a type mismatch. + err = enc.Encode(et1) + if err != nil { + t.Error("round 3: encoder fail:", err) + } + newEt2 := new(ET2) + err = dec.Decode(newEt2) + if err == nil { + t.Fatal("round 3: expected `bad type' error decoding ET2") + } +} + +// Run one value through the encoder/decoder, but use the wrong type. +// Input is always an ET1; we compare it to whatever is under 'e'. +func badTypeCheck(e any, shouldFail bool, msg string, t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + et1 := new(ET1) + et1.A = 7 + et1.Et2 = new(ET2) + err := enc.Encode(et1) + if err != nil { + t.Error("encoder fail:", err) + } + dec := NewDecoder(b) + err = dec.Decode(e) + if shouldFail && err == nil { + t.Error("expected error for", msg) + } + if !shouldFail && err != nil { + t.Error("unexpected error for", msg, err) + } +} + +// Test that we recognize a bad type the first time. +func TestWrongTypeDecoder(t *testing.T) { + badTypeCheck(new(ET2), true, "no fields in common", t) + badTypeCheck(new(ET3), false, "different name of field", t) + badTypeCheck(new(ET4), true, "different type of field", t) +} + +// Types not supported at top level by the Encoder. +var unsupportedValues = []any{ + make(chan int), + func(a int) bool { return true }, +} + +func TestUnsupported(t *testing.T) { + var b bytes.Buffer + enc := NewEncoder(&b) + for _, v := range unsupportedValues { + err := enc.Encode(v) + if err == nil { + t.Errorf("expected error for %T; got none", v) + } + } +} + +func encAndDec(in, out any) error { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(in) + if err != nil { + return err + } + dec := NewDecoder(b) + err = dec.Decode(out) + if err != nil { + return err + } + return nil +} + +func TestTypeToPtrType(t *testing.T) { + // Encode a T, decode a *T + type Type0 struct { + A int + } + t0 := Type0{7} + t0p := new(Type0) + if err := encAndDec(t0, t0p); err != nil { + t.Error(err) + } +} + +func TestPtrTypeToType(t *testing.T) { + // Encode a *T, decode a T + type Type1 struct { + A uint + } + t1p := &Type1{17} + var t1 Type1 + if err := encAndDec(t1, t1p); err != nil { + t.Error(err) + } +} + +func TestTypeToPtrPtrPtrPtrType(t *testing.T) { + type Type2 struct { + A ****float64 + } + t2 := Type2{} + t2.A = new(***float64) + *t2.A = new(**float64) + **t2.A = new(*float64) + ***t2.A = new(float64) + ****t2.A = 27.4 + t2pppp := new(***Type2) + if err := encAndDec(t2, t2pppp); err != nil { + t.Fatal(err) + } + if ****(****t2pppp).A != ****t2.A { + t.Errorf("wrong value after decode: %g not %g", ****(****t2pppp).A, ****t2.A) + } +} + +func TestSlice(t *testing.T) { + type Type3 struct { + A []string + } + t3p := &Type3{[]string{"hello", "world"}} + var t3 Type3 + if err := encAndDec(t3, t3p); err != nil { + t.Error(err) + } +} + +func TestValueError(t *testing.T) { + // Encode a *T, decode a T + type Type4 struct { + A int + } + t4p := &Type4{3} + var t4 Type4 // note: not a pointer. + if err := encAndDec(t4p, t4); err == nil || !strings.Contains(err.Error(), "pointer") { + t.Error("expected error about pointer; got", err) + } +} + +func TestArray(t *testing.T) { + type Type5 struct { + A [3]string + B [3]byte + } + type Type6 struct { + A [2]string // can't hold t5.a + } + t5 := Type5{[3]string{"hello", ",", "world"}, [3]byte{1, 2, 3}} + var t5p Type5 + if err := encAndDec(t5, &t5p); err != nil { + t.Error(err) + } + var t6 Type6 + if err := encAndDec(t5, &t6); err == nil { + t.Error("should fail with mismatched array sizes") + } +} + +func TestRecursiveMapType(t *testing.T) { + type recursiveMap map[string]recursiveMap + r1 := recursiveMap{"A": recursiveMap{"B": nil, "C": nil}, "D": nil} + r2 := make(recursiveMap) + if err := encAndDec(r1, &r2); err != nil { + t.Error(err) + } +} + +func TestRecursiveSliceType(t *testing.T) { + type recursiveSlice []recursiveSlice + r1 := recursiveSlice{0: recursiveSlice{0: nil}, 1: nil} + r2 := make(recursiveSlice, 0) + if err := encAndDec(r1, &r2); err != nil { + t.Error(err) + } +} + +// Regression test for bug: must send zero values inside arrays +func TestDefaultsInArray(t *testing.T) { + type Type7 struct { + B []bool + I []int + S []string + F []float64 + } + t7 := Type7{ + []bool{false, false, true}, + []int{0, 0, 1}, + []string{"hi", "", "there"}, + []float64{0, 0, 1}, + } + var t7p Type7 + if err := encAndDec(t7, &t7p); err != nil { + t.Error(err) + } +} + +var testInt int +var testFloat32 float32 +var testString string +var testSlice []string +var testMap map[string]int +var testArray [7]int + +type SingleTest struct { + in any + out any + err string +} + +var singleTests = []SingleTest{ + {17, &testInt, ""}, + {float32(17.5), &testFloat32, ""}, + {"bike shed", &testString, ""}, + {[]string{"bike", "shed", "paint", "color"}, &testSlice, ""}, + {map[string]int{"seven": 7, "twelve": 12}, &testMap, ""}, + {[7]int{4, 55, 0, 0, 0, 0, 0}, &testArray, ""}, // case that once triggered a bug + {[7]int{4, 55, 1, 44, 22, 66, 1234}, &testArray, ""}, + + // Decode errors + {172, &testFloat32, "type"}, +} + +func TestSingletons(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + dec := NewDecoder(b) + for _, test := range singleTests { + b.Reset() + err := enc.Encode(test.in) + if err != nil { + t.Errorf("error encoding %v: %s", test.in, err) + continue + } + err = dec.Decode(test.out) + switch { + case err != nil && test.err == "": + t.Errorf("error decoding %v: %s", test.in, err) + continue + case err == nil && test.err != "": + t.Errorf("expected error decoding %v: %s", test.in, test.err) + continue + case err != nil && test.err != "": + if !strings.Contains(err.Error(), test.err) { + t.Errorf("wrong error decoding %v: wanted %s, got %v", test.in, test.err, err) + } + continue + } + // Get rid of the pointer in the rhs + val := reflect.ValueOf(test.out).Elem().Interface() + if !reflect.DeepEqual(test.in, val) { + t.Errorf("decoding singleton: expected %v got %v", test.in, val) + } + } +} + +func TestStructNonStruct(t *testing.T) { + type Struct struct { + A string + } + type NonStruct string + s := Struct{"hello"} + var sp Struct + if err := encAndDec(s, &sp); err != nil { + t.Error(err) + } + var ns NonStruct + if err := encAndDec(s, &ns); err == nil { + t.Error("should get error for struct/non-struct") + } else if !strings.Contains(err.Error(), "type") { + t.Error("for struct/non-struct expected type error; got", err) + } + // Now try the other way + var nsp NonStruct + if err := encAndDec(ns, &nsp); err != nil { + t.Error(err) + } + if err := encAndDec(ns, &s); err == nil { + t.Error("should get error for non-struct/struct") + } else if !strings.Contains(err.Error(), "type") { + t.Error("for non-struct/struct expected type error; got", err) + } +} + +type interfaceIndirectTestI interface { + F() bool +} + +type interfaceIndirectTestT struct{} + +func (this *interfaceIndirectTestT) F() bool { + return true +} + +// A version of a bug reported on golang-nuts. Also tests top-level +// slice of interfaces. The issue was registering *T caused T to be +// stored as the concrete type. +func TestInterfaceIndirect(t *testing.T) { + Register(&interfaceIndirectTestT{}) + b := new(bytes.Buffer) + w := []interfaceIndirectTestI{&interfaceIndirectTestT{}} + err := NewEncoder(b).Encode(w) + if err != nil { + t.Fatal("encode error:", err) + } + + var r []interfaceIndirectTestI + err = NewDecoder(b).Decode(&r) + if err != nil { + t.Fatal("decode error:", err) + } +} + +// Now follow various tests that decode into things that can't represent the +// encoded value, all of which should be legal. + +// Also, when the ignored object contains an interface value, it may define +// types. Make sure that skipping the value still defines the types by using +// the encoder/decoder pair to send a value afterwards. If an interface +// is sent, its type in the test is always NewType0, so this checks that the +// encoder and decoder don't skew with respect to type definitions. + +type Struct0 struct { + I any +} + +type NewType0 struct { + S string +} + +type ignoreTest struct { + in, out any +} + +var ignoreTests = []ignoreTest{ + // Decode normal struct into an empty struct + {&struct{ A int }{23}, &struct{}{}}, + // Decode normal struct into a nil. + {&struct{ A int }{23}, nil}, + // Decode singleton string into a nil. + {"hello, world", nil}, + // Decode singleton slice into a nil. + {[]int{1, 2, 3, 4}, nil}, + // Decode struct containing an interface into a nil. + {&Struct0{&NewType0{"value0"}}, nil}, + // Decode singleton slice of interfaces into a nil. + {[]any{"hi", &NewType0{"value1"}, 23}, nil}, +} + +func TestDecodeIntoNothing(t *testing.T) { + Register(new(NewType0)) + for i, test := range ignoreTests { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(test.in) + if err != nil { + t.Errorf("%d: encode error %s:", i, err) + continue + } + dec := NewDecoder(b) + err = dec.Decode(test.out) + if err != nil { + t.Errorf("%d: decode error: %s", i, err) + continue + } + // Now see if the encoder and decoder are in a consistent state. + str := fmt.Sprintf("Value %d", i) + err = enc.Encode(&NewType0{str}) + if err != nil { + t.Fatalf("%d: NewType0 encode error: %s", i, err) + } + ns := new(NewType0) + err = dec.Decode(ns) + if err != nil { + t.Fatalf("%d: NewType0 decode error: %s", i, err) + } + if ns.S != str { + t.Fatalf("%d: expected %q got %q", i, str, ns.S) + } + } +} + +func TestIgnoreRecursiveType(t *testing.T) { + // It's hard to build a self-contained test for this because + // we can't build compatible types in one package with + // different items so something is ignored. Here is + // some data that represents, according to debug.go: + // type definition { + // slice "recursiveSlice" id=106 + // elem id=106 + // } + data := []byte{ + 0x1d, 0xff, 0xd3, 0x02, 0x01, 0x01, 0x0e, 0x72, + 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, + 0x53, 0x6c, 0x69, 0x63, 0x65, 0x01, 0xff, 0xd4, + 0x00, 0x01, 0xff, 0xd4, 0x00, 0x00, 0x07, 0xff, + 0xd4, 0x00, 0x02, 0x01, 0x00, 0x00, + } + dec := NewDecoder(bytes.NewReader(data)) + // Issue 10415: This caused infinite recursion. + err := dec.Decode(nil) + if err != nil { + t.Fatal(err) + } +} + +// Another bug from golang-nuts, involving nested interfaces. +type Bug0Outer struct { + Bug0Field any +} + +type Bug0Inner struct { + A int +} + +func TestNestedInterfaces(t *testing.T) { + var buf bytes.Buffer + e := NewEncoder(&buf) + d := NewDecoder(&buf) + Register(new(Bug0Outer)) + Register(new(Bug0Inner)) + f := &Bug0Outer{&Bug0Outer{&Bug0Inner{7}}} + var v any = f + err := e.Encode(&v) + if err != nil { + t.Fatal("Encode:", err) + } + err = d.Decode(&v) + if err != nil { + t.Fatal("Decode:", err) + } + // Make sure it decoded correctly. + outer1, ok := v.(*Bug0Outer) + if !ok { + t.Fatalf("v not Bug0Outer: %T", v) + } + outer2, ok := outer1.Bug0Field.(*Bug0Outer) + if !ok { + t.Fatalf("v.Bug0Field not Bug0Outer: %T", outer1.Bug0Field) + } + inner, ok := outer2.Bug0Field.(*Bug0Inner) + if !ok { + t.Fatalf("v.Bug0Field.Bug0Field not Bug0Inner: %T", outer2.Bug0Field) + } + if inner.A != 7 { + t.Fatalf("final value %d; expected %d", inner.A, 7) + } +} + +// The bugs keep coming. We forgot to send map subtypes before the map. + +type Bug1Elem struct { + Name string + Id int +} + +type Bug1StructMap map[string]Bug1Elem + +func TestMapBug1(t *testing.T) { + in := make(Bug1StructMap) + in["val1"] = Bug1Elem{"elem1", 1} + in["val2"] = Bug1Elem{"elem2", 2} + + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(in) + if err != nil { + t.Fatal("encode:", err) + } + dec := NewDecoder(b) + out := make(Bug1StructMap) + err = dec.Decode(&out) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(in, out) { + t.Errorf("mismatch: %v %v", in, out) + } +} + +func TestGobMapInterfaceEncode(t *testing.T) { + m := map[string]any{ + "up": uintptr(0), + "i0": []int{-1}, + "i1": []int8{-1}, + "i2": []int16{-1}, + "i3": []int32{-1}, + "i4": []int64{-1}, + "u0": []uint{1}, + "u1": []uint8{1}, + "u2": []uint16{1}, + "u3": []uint32{1}, + "u4": []uint64{1}, + "f0": []float32{1}, + "f1": []float64{1}, + "c0": []complex64{complex(2, -2)}, + "c1": []complex128{complex(2, float64(-2))}, + "us": []uintptr{0}, + "bo": []bool{false}, + "st": []string{"s"}, + } + enc := NewEncoder(new(bytes.Buffer)) + err := enc.Encode(m) + if err != nil { + t.Errorf("encode map: %s", err) + } +} + +func TestSliceReusesMemory(t *testing.T) { + buf := new(bytes.Buffer) + // Bytes + { + x := []byte("abcd") + enc := NewEncoder(buf) + err := enc.Encode(x) + if err != nil { + t.Errorf("bytes: encode: %s", err) + } + // Decode into y, which is big enough. + y := []byte("ABCDE") + addr := &y[0] + dec := NewDecoder(buf) + err = dec.Decode(&y) + if err != nil { + t.Fatal("bytes: decode:", err) + } + if !bytes.Equal(x, y) { + t.Errorf("bytes: expected %q got %q\n", x, y) + } + if addr != &y[0] { + t.Errorf("bytes: unnecessary reallocation") + } + } + // general slice + { + x := []rune("abcd") + enc := NewEncoder(buf) + err := enc.Encode(x) + if err != nil { + t.Errorf("ints: encode: %s", err) + } + // Decode into y, which is big enough. + y := []rune("ABCDE") + addr := &y[0] + dec := NewDecoder(buf) + err = dec.Decode(&y) + if err != nil { + t.Fatal("ints: decode:", err) + } + if !reflect.DeepEqual(x, y) { + t.Errorf("ints: expected %q got %q\n", x, y) + } + if addr != &y[0] { + t.Errorf("ints: unnecessary reallocation") + } + } +} + +// Used to crash: negative count in recvMessage. +func TestBadCount(t *testing.T) { + b := []byte{0xfb, 0xa5, 0x82, 0x2f, 0xca, 0x1} + if err := NewDecoder(bytes.NewReader(b)).Decode(nil); err == nil { + t.Error("expected error from bad count") + } else if err.Error() != errBadCount.Error() { + t.Error("expected bad count error; got", err) + } +} + +// Verify that sequential Decoders built on a single input will +// succeed if the input implements ReadByte and there is no +// type information in the stream. +func TestSequentialDecoder(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + const count = 10 + for i := 0; i < count; i++ { + s := fmt.Sprintf("%d", i) + if err := enc.Encode(s); err != nil { + t.Error("encoder fail:", err) + } + } + for i := 0; i < count; i++ { + dec := NewDecoder(b) + var s string + if err := dec.Decode(&s); err != nil { + t.Fatal("decoder fail:", err) + } + if s != fmt.Sprintf("%d", i) { + t.Fatalf("decode expected %d got %s", i, s) + } + } +} + +// Should be able to have unrepresentable fields (chan, func, *chan etc.); we just ignore them. +type Bug2 struct { + A int + C chan int + CP *chan int + F func() + FPP **func() +} + +func TestChanFuncIgnored(t *testing.T) { + c := make(chan int) + f := func() {} + fp := &f + b0 := Bug2{23, c, &c, f, &fp} + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(b0); err != nil { + t.Fatal("error encoding:", err) + } + var b1 Bug2 + err := NewDecoder(&buf).Decode(&b1) + if err != nil { + t.Fatal("decode:", err) + } + if b1.A != b0.A { + t.Fatalf("got %d want %d", b1.A, b0.A) + } + if b1.C != nil || b1.CP != nil || b1.F != nil || b1.FPP != nil { + t.Fatal("unexpected value for chan or func") + } +} + +func TestSliceIncompatibility(t *testing.T) { + var in = []byte{1, 2, 3} + var out []int + if err := encAndDec(in, &out); err == nil { + t.Error("expected compatibility error") + } +} + +// Mutually recursive slices of structs caused problems. +type Bug3 struct { + Num int + Children []*Bug3 +} + +func TestGobPtrSlices(t *testing.T) { + in := []*Bug3{ + {1, nil}, + {2, nil}, + } + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(&in) + if err != nil { + t.Fatal("encode:", err) + } + + var out []*Bug3 + err = NewDecoder(b).Decode(&out) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(in, out) { + t.Fatalf("got %v; wanted %v", out, in) + } +} + +// getDecEnginePtr cached engine for ut.base instead of ut.user so we passed +// a *map and then tried to reuse its engine to decode the inner map. +func TestPtrToMapOfMap(t *testing.T) { + Register(make(map[string]any)) + subdata := make(map[string]any) + subdata["bar"] = "baz" + data := make(map[string]any) + data["foo"] = subdata + + b := new(bytes.Buffer) + err := NewEncoder(b).Encode(data) + if err != nil { + t.Fatal("encode:", err) + } + var newData map[string]any + err = NewDecoder(b).Decode(&newData) + if err != nil { + t.Fatal("decode:", err) + } + if !reflect.DeepEqual(data, newData) { + t.Fatalf("expected %v got %v", data, newData) + } +} + +// Test that untyped nils generate an error, not a panic. +// See Issue 16204. +func TestCatchInvalidNilValue(t *testing.T) { + encodeErr, panicErr := encodeAndRecover(nil) + if panicErr != nil { + t.Fatalf("panicErr=%v, should not panic encoding untyped nil", panicErr) + } + if encodeErr == nil { + t.Errorf("got err=nil, want non-nil error when encoding untyped nil value") + } else if !strings.Contains(encodeErr.Error(), "nil value") { + t.Errorf("expected 'nil value' error; got err=%v", encodeErr) + } +} + +// A top-level nil pointer generates a panic with a helpful string-valued message. +func TestTopLevelNilPointer(t *testing.T) { + var ip *int + encodeErr, panicErr := encodeAndRecover(ip) + if encodeErr != nil { + t.Fatal("error in encode:", encodeErr) + } + if panicErr == nil { + t.Fatal("top-level nil pointer did not panic") + } + errMsg := panicErr.Error() + if !strings.Contains(errMsg, "nil pointer") { + t.Fatal("expected nil pointer error, got:", errMsg) + } +} + +func encodeAndRecover(value any) (encodeErr, panicErr error) { + defer func() { + e := recover() + if e != nil { + switch err := e.(type) { + case error: + panicErr = err + default: + panicErr = fmt.Errorf("%v", err) + } + } + }() + + encodeErr = NewEncoder(io.Discard).Encode(value) + return +} + +func TestNilPointerPanics(t *testing.T) { + var ( + nilStringPtr *string + intMap = make(map[int]int) + intMapPtr = &intMap + nilIntMapPtr *map[int]int + zero int + nilBoolChannel chan bool + nilBoolChannelPtr *chan bool + nilStringSlice []string + stringSlice = make([]string, 1) + nilStringSlicePtr *[]string + ) + + testCases := []struct { + value any + mustPanic bool + }{ + {nilStringPtr, true}, + {intMap, false}, + {intMapPtr, false}, + {nilIntMapPtr, true}, + {zero, false}, + {nilStringSlice, false}, + {stringSlice, false}, + {nilStringSlicePtr, true}, + {nilBoolChannel, false}, + {nilBoolChannelPtr, true}, + } + + for _, tt := range testCases { + _, panicErr := encodeAndRecover(tt.value) + if tt.mustPanic { + if panicErr == nil { + t.Errorf("expected panic with input %#v, did not panic", tt.value) + } + continue + } + if panicErr != nil { + t.Fatalf("expected no panic with input %#v, got panic=%v", tt.value, panicErr) + } + } +} + +func TestNilPointerInsideInterface(t *testing.T) { + var ip *int + si := struct { + I any + }{ + I: ip, + } + buf := new(bytes.Buffer) + err := NewEncoder(buf).Encode(si) + if err == nil { + t.Fatal("expected error, got none") + } + errMsg := err.Error() + if !strings.Contains(errMsg, "nil pointer") || !strings.Contains(errMsg, "interface") { + t.Fatal("expected error about nil pointer and interface, got:", errMsg) + } +} + +type Bug4Public struct { + Name string + Secret Bug4Secret +} + +type Bug4Secret struct { + a int // error: no exported fields. +} + +// Test that a failed compilation doesn't leave around an executable encoder. +// Issue 3723. +func TestMultipleEncodingsOfBadType(t *testing.T) { + x := Bug4Public{ + Name: "name", + Secret: Bug4Secret{1}, + } + buf := new(bytes.Buffer) + enc := NewEncoder(buf) + err := enc.Encode(x) + if err == nil { + t.Fatal("first encoding: expected error") + } + buf.Reset() + enc = NewEncoder(buf) + err = enc.Encode(x) + if err == nil { + t.Fatal("second encoding: expected error") + } + if !strings.Contains(err.Error(), "no exported fields") { + t.Errorf("expected error about no exported fields; got %v", err) + } +} + +// There was an error check comparing the length of the input with the +// length of the slice being decoded. It was wrong because the next +// thing in the input might be a type definition, which would lead to +// an incorrect length check. This test reproduces the corner case. + +type Z struct { +} + +func Test29ElementSlice(t *testing.T) { + Register(Z{}) + src := make([]any, 100) // Size needs to be bigger than size of type definition. + for i := range src { + src[i] = Z{} + } + buf := new(bytes.Buffer) + err := NewEncoder(buf).Encode(src) + if err != nil { + t.Fatalf("encode: %v", err) + return + } + + var dst []any + err = NewDecoder(buf).Decode(&dst) + if err != nil { + t.Errorf("decode: %v", err) + return + } +} + +// Don't crash, just give error when allocating a huge slice. +// Issue 8084. +func TestErrorForHugeSlice(t *testing.T) { + // Encode an int slice. + buf := new(bytes.Buffer) + slice := []int{1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + err := NewEncoder(buf).Encode(slice) + if err != nil { + t.Fatal("encode:", err) + } + // Reach into the buffer and smash the count to make the encoded slice very long. + buf.Bytes()[buf.Len()-len(slice)-1] = 0xfa + // Decode and see error. + err = NewDecoder(buf).Decode(&slice) + if err == nil { + t.Fatal("decode: no error") + } + if !strings.Contains(err.Error(), "slice too big") { + t.Fatalf("decode: expected slice too big error, got %s", err.Error()) + } +} + +type badDataTest struct { + input string // The input encoded as a hex string. + error string // A substring of the error that should result. + data any // What to decode into. +} + +var badDataTests = []badDataTest{ + {"", "EOF", nil}, + {"7F6869", "unexpected EOF", nil}, + {"036e6f77206973207468652074696d6520666f7220616c6c20676f6f64206d656e", "unknown type id", new(ET2)}, + {"0424666f6f", "field numbers out of bounds", new(ET2)}, // Issue 6323. + {"05100028557b02027f8302", "interface encoding", nil}, // Issue 10270. + // Issue 10273. + {"130a00fb5dad0bf8ff020263e70002fa28020202a89859", "slice length too large", nil}, + {"0f1000fb285d003316020735ff023a65c5", "interface encoding", nil}, + {"03fffb0616fffc00f902ff02ff03bf005d02885802a311a8120228022c028ee7", "GobDecoder", nil}, + // Issue 10491. + {"10fe010f020102fe01100001fe010e000016fe010d030102fe010e00010101015801fe01100000000bfe011000f85555555555555555", "exceeds input size", nil}, +} + +// TestBadData tests that various problems caused by malformed input +// are caught as errors and do not cause panics. +func TestBadData(t *testing.T) { + for i, test := range badDataTests { + data, err := hex.DecodeString(test.input) + if err != nil { + t.Fatalf("#%d: hex error: %s", i, err) + } + d := NewDecoder(bytes.NewReader(data)) + err = d.Decode(test.data) + if err == nil { + t.Errorf("decode: no error") + continue + } + if !strings.Contains(err.Error(), test.error) { + t.Errorf("#%d: decode: expected %q error, got %s", i, test.error, err.Error()) + } + } +} + +func TestDecodeErrorMultipleTypes(t *testing.T) { + type Test struct { + A string + B int + } + var b bytes.Buffer + NewEncoder(&b).Encode(Test{"one", 1}) + + var result, result2 Test + dec := NewDecoder(&b) + err := dec.Decode(&result) + if err != nil { + t.Errorf("decode: unexpected error %v", err) + } + + b.Reset() + NewEncoder(&b).Encode(Test{"two", 2}) + err = dec.Decode(&result2) + if err == nil { + t.Errorf("decode: expected duplicate type error, got nil") + } else if !strings.Contains(err.Error(), "duplicate type") { + t.Errorf("decode: expected duplicate type error, got %s", err.Error()) + } +} + +// Issue 24075 +func TestMarshalFloatMap(t *testing.T) { + nan1 := math.NaN() + nan2 := math.Float64frombits(math.Float64bits(nan1) ^ 1) // A different NaN in the same class. + + in := map[float64]string{ + nan1: "a", + nan1: "b", + nan2: "c", + } + + var b bytes.Buffer + enc := NewEncoder(&b) + if err := enc.Encode(in); err != nil { + t.Errorf("Encode : %v", err) + } + + out := map[float64]string{} + dec := NewDecoder(&b) + if err := dec.Decode(&out); err != nil { + t.Fatalf("Decode : %v", err) + } + + type mapEntry struct { + keyBits uint64 + value string + } + readMap := func(m map[float64]string) (entries []mapEntry) { + for k, v := range m { + entries = append(entries, mapEntry{math.Float64bits(k), v}) + } + sort.Slice(entries, func(i, j int) bool { + ei, ej := entries[i], entries[j] + if ei.keyBits != ej.keyBits { + return ei.keyBits < ej.keyBits + } + return ei.value < ej.value + }) + return entries + } + + got := readMap(out) + want := readMap(in) + if !reflect.DeepEqual(got, want) { + t.Fatalf("\nEncode: %v\nDecode: %v", want, got) + } +} + +func TestDecodePartial(t *testing.T) { + type T struct { + X []int + Y string + } + + var buf bytes.Buffer + t1 := T{X: []int{1, 2, 3}, Y: "foo"} + t2 := T{X: []int{4, 5, 6}, Y: "bar"} + enc := NewEncoder(&buf) + + t1start := 0 + if err := enc.Encode(&t1); err != nil { + t.Fatal(err) + } + + t2start := buf.Len() + if err := enc.Encode(&t2); err != nil { + t.Fatal(err) + } + + data := buf.Bytes() + for i := 0; i <= len(data); i++ { + bufr := bytes.NewReader(data[:i]) + + // Decode both values, stopping at the first error. + var t1b, t2b T + dec := NewDecoder(bufr) + var err error + err = dec.Decode(&t1b) + if err == nil { + err = dec.Decode(&t2b) + } + + switch i { + case t1start, t2start: + // Either the first or the second Decode calls had zero input. + if err != io.EOF { + t.Errorf("%d/%d: expected io.EOF: %v", i, len(data), err) + } + case len(data): + // We reached the end of the entire input. + if err != nil { + t.Errorf("%d/%d: unexpected error: %v", i, len(data), err) + } + if !reflect.DeepEqual(t1b, t1) { + t.Fatalf("t1 value mismatch: got %v, want %v", t1b, t1) + } + if !reflect.DeepEqual(t2b, t2) { + t.Fatalf("t2 value mismatch: got %v, want %v", t2b, t2) + } + default: + // In between, we must see io.ErrUnexpectedEOF. + // The decoder used to erroneously return io.EOF in some cases here, + // such as if the input was cut off right after some type specs, + // but before any value was actually transmitted. + if err != io.ErrUnexpectedEOF { + t.Errorf("%d/%d: expected io.ErrUnexpectedEOF: %v", i, len(data), err) + } + } + } +} + +func TestDecoderOverflow(t *testing.T) { + // Issue 55337. + dec := NewDecoder(bytes.NewReader([]byte{ + 0x12, 0xff, 0xff, 0x2, 0x2, 0x20, 0x0, 0xf8, 0x7f, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x20, 0x20, 0x20, 0x20, 0x20, + })) + var r interface{} + err := dec.Decode(r) + if err == nil { + t.Fatalf("expected an error") + } +} diff --git a/src/encoding/gob/error.go b/src/encoding/gob/error.go new file mode 100644 index 0000000..9c614e3 --- /dev/null +++ b/src/encoding/gob/error.go @@ -0,0 +1,42 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import "fmt" + +// Errors in decoding and encoding are handled using panic and recover. +// Panics caused by user error (that is, everything except run-time panics +// such as "index out of bounds" errors) do not leave the file that caused +// them, but are instead turned into plain error returns. Encoding and +// decoding functions and methods that do not return an error either use +// panic to report an error or are guaranteed error-free. + +// A gobError is used to distinguish errors (panics) generated in this package. +type gobError struct { + err error +} + +// errorf is like error_ but takes Printf-style arguments to construct an error. +// It always prefixes the message with "gob: ". +func errorf(format string, args ...any) { + error_(fmt.Errorf("gob: "+format, args...)) +} + +// error_ wraps the argument error and uses it as the argument to panic. +func error_(err error) { + panic(gobError{err}) +} + +// catchError is meant to be used as a deferred function to turn a panic(gobError) into a +// plain error. It overwrites the error return of the function that deferred its call. +func catchError(err *error) { + if e := recover(); e != nil { + ge, ok := e.(gobError) + if !ok { + panic(e) + } + *err = ge.err + } +} diff --git a/src/encoding/gob/example_encdec_test.go b/src/encoding/gob/example_encdec_test.go new file mode 100644 index 0000000..e45ad4c --- /dev/null +++ b/src/encoding/gob/example_encdec_test.go @@ -0,0 +1,61 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob_test + +import ( + "bytes" + "encoding/gob" + "fmt" + "log" +) + +// The Vector type has unexported fields, which the package cannot access. +// We therefore write a BinaryMarshal/BinaryUnmarshal method pair to allow us +// to send and receive the type with the gob package. These interfaces are +// defined in the "encoding" package. +// We could equivalently use the locally defined GobEncode/GobDecoder +// interfaces. +type Vector struct { + x, y, z int +} + +func (v Vector) MarshalBinary() ([]byte, error) { + // A simple encoding: plain text. + var b bytes.Buffer + fmt.Fprintln(&b, v.x, v.y, v.z) + return b.Bytes(), nil +} + +// UnmarshalBinary modifies the receiver so it must take a pointer receiver. +func (v *Vector) UnmarshalBinary(data []byte) error { + // A simple encoding: plain text. + b := bytes.NewBuffer(data) + _, err := fmt.Fscanln(b, &v.x, &v.y, &v.z) + return err +} + +// This example transmits a value that implements the custom encoding and decoding methods. +func Example_encodeDecode() { + var network bytes.Buffer // Stand-in for the network. + + // Create an encoder and send a value. + enc := gob.NewEncoder(&network) + err := enc.Encode(Vector{3, 4, 5}) + if err != nil { + log.Fatal("encode:", err) + } + + // Create a decoder and receive a value. + dec := gob.NewDecoder(&network) + var v Vector + err = dec.Decode(&v) + if err != nil { + log.Fatal("decode:", err) + } + fmt.Println(v) + + // Output: + // {3 4 5} +} diff --git a/src/encoding/gob/example_interface_test.go b/src/encoding/gob/example_interface_test.go new file mode 100644 index 0000000..cf5ba38 --- /dev/null +++ b/src/encoding/gob/example_interface_test.go @@ -0,0 +1,81 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob_test + +import ( + "bytes" + "encoding/gob" + "fmt" + "log" + "math" +) + +type Point struct { + X, Y int +} + +func (p Point) Hypotenuse() float64 { + return math.Hypot(float64(p.X), float64(p.Y)) +} + +type Pythagoras interface { + Hypotenuse() float64 +} + +// This example shows how to encode an interface value. The key +// distinction from regular types is to register the concrete type that +// implements the interface. +func Example_interface() { + var network bytes.Buffer // Stand-in for the network. + + // We must register the concrete type for the encoder and decoder (which would + // normally be on a separate machine from the encoder). On each end, this tells the + // engine which concrete type is being sent that implements the interface. + gob.Register(Point{}) + + // Create an encoder and send some values. + enc := gob.NewEncoder(&network) + for i := 1; i <= 3; i++ { + interfaceEncode(enc, Point{3 * i, 4 * i}) + } + + // Create a decoder and receive some values. + dec := gob.NewDecoder(&network) + for i := 1; i <= 3; i++ { + result := interfaceDecode(dec) + fmt.Println(result.Hypotenuse()) + } + + // Output: + // 5 + // 10 + // 15 +} + +// interfaceEncode encodes the interface value into the encoder. +func interfaceEncode(enc *gob.Encoder, p Pythagoras) { + // The encode will fail unless the concrete type has been + // registered. We registered it in the calling function. + + // Pass pointer to interface so Encode sees (and hence sends) a value of + // interface type. If we passed p directly it would see the concrete type instead. + // See the blog post, "The Laws of Reflection" for background. + err := enc.Encode(&p) + if err != nil { + log.Fatal("encode:", err) + } +} + +// interfaceDecode decodes the next interface value from the stream and returns it. +func interfaceDecode(dec *gob.Decoder) Pythagoras { + // The decode will fail unless the concrete type on the wire has been + // registered. We registered it in the calling function. + var p Pythagoras + err := dec.Decode(&p) + if err != nil { + log.Fatal("decode:", err) + } + return p +} diff --git a/src/encoding/gob/example_test.go b/src/encoding/gob/example_test.go new file mode 100644 index 0000000..16b7123 --- /dev/null +++ b/src/encoding/gob/example_test.go @@ -0,0 +1,60 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob_test + +import ( + "bytes" + "encoding/gob" + "fmt" + "log" +) + +type P struct { + X, Y, Z int + Name string +} + +type Q struct { + X, Y *int32 + Name string +} + +// This example shows the basic usage of the package: Create an encoder, +// transmit some values, receive them with a decoder. +func Example_basic() { + // Initialize the encoder and decoder. Normally enc and dec would be + // bound to network connections and the encoder and decoder would + // run in different processes. + var network bytes.Buffer // Stand-in for a network connection + enc := gob.NewEncoder(&network) // Will write to network. + dec := gob.NewDecoder(&network) // Will read from network. + + // Encode (send) some values. + err := enc.Encode(P{3, 4, 5, "Pythagoras"}) + if err != nil { + log.Fatal("encode error:", err) + } + err = enc.Encode(P{1782, 1841, 1922, "Treehouse"}) + if err != nil { + log.Fatal("encode error:", err) + } + + // Decode (receive) and print the values. + var q Q + err = dec.Decode(&q) + if err != nil { + log.Fatal("decode error 1:", err) + } + fmt.Printf("%q: {%d, %d}\n", q.Name, *q.X, *q.Y) + err = dec.Decode(&q) + if err != nil { + log.Fatal("decode error 2:", err) + } + fmt.Printf("%q: {%d, %d}\n", q.Name, *q.X, *q.Y) + + // Output: + // "Pythagoras": {3, 4} + // "Treehouse": {1782, 1841} +} diff --git a/src/encoding/gob/gobencdec_test.go b/src/encoding/gob/gobencdec_test.go new file mode 100644 index 0000000..ae806fc --- /dev/null +++ b/src/encoding/gob/gobencdec_test.go @@ -0,0 +1,822 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains tests of the GobEncoder/GobDecoder support. + +package gob + +import ( + "bytes" + "errors" + "fmt" + "io" + "net" + "reflect" + "strings" + "testing" + "time" +) + +// Types that implement the GobEncoder/Decoder interfaces. + +type ByteStruct struct { + a byte // not an exported field +} + +type StringStruct struct { + s string // not an exported field +} + +type ArrayStruct struct { + a [8192]byte // not an exported field +} + +type Gobber int + +type ValueGobber string // encodes with a value, decodes with a pointer. + +type BinaryGobber int + +type BinaryValueGobber string + +type TextGobber int + +type TextValueGobber string + +// The relevant methods + +func (g *ByteStruct) GobEncode() ([]byte, error) { + b := make([]byte, 3) + b[0] = g.a + b[1] = g.a + 1 + b[2] = g.a + 2 + return b, nil +} + +func (g *ByteStruct) GobDecode(data []byte) error { + if g == nil { + return errors.New("NIL RECEIVER") + } + // Expect N sequential-valued bytes. + if len(data) == 0 { + return io.EOF + } + g.a = data[0] + for i, c := range data { + if c != g.a+byte(i) { + return errors.New("invalid data sequence") + } + } + return nil +} + +func (g *StringStruct) GobEncode() ([]byte, error) { + return []byte(g.s), nil +} + +func (g *StringStruct) GobDecode(data []byte) error { + // Expect N sequential-valued bytes. + if len(data) == 0 { + return io.EOF + } + a := data[0] + for i, c := range data { + if c != a+byte(i) { + return errors.New("invalid data sequence") + } + } + g.s = string(data) + return nil +} + +func (a *ArrayStruct) GobEncode() ([]byte, error) { + return a.a[:], nil +} + +func (a *ArrayStruct) GobDecode(data []byte) error { + if len(data) != len(a.a) { + return errors.New("wrong length in array decode") + } + copy(a.a[:], data) + return nil +} + +func (g *Gobber) GobEncode() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%d", *g)), nil +} + +func (g *Gobber) GobDecode(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%d", (*int)(g)) + return err +} + +func (g *BinaryGobber) MarshalBinary() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%d", *g)), nil +} + +func (g *BinaryGobber) UnmarshalBinary(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%d", (*int)(g)) + return err +} + +func (g *TextGobber) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%d", *g)), nil +} + +func (g *TextGobber) UnmarshalText(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%d", (*int)(g)) + return err +} + +func (v ValueGobber) GobEncode() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%s", v)), nil +} + +func (v *ValueGobber) GobDecode(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%s", (*string)(v)) + return err +} + +func (v BinaryValueGobber) MarshalBinary() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%s", v)), nil +} + +func (v *BinaryValueGobber) UnmarshalBinary(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%s", (*string)(v)) + return err +} + +func (v TextValueGobber) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf("VALUE=%s", v)), nil +} + +func (v *TextValueGobber) UnmarshalText(data []byte) error { + _, err := fmt.Sscanf(string(data), "VALUE=%s", (*string)(v)) + return err +} + +// Structs that include GobEncodable fields. + +type GobTest0 struct { + X int // guarantee we have something in common with GobTest* + G *ByteStruct +} + +type GobTest1 struct { + X int // guarantee we have something in common with GobTest* + G *StringStruct +} + +type GobTest2 struct { + X int // guarantee we have something in common with GobTest* + G string // not a GobEncoder - should give us errors +} + +type GobTest3 struct { + X int // guarantee we have something in common with GobTest* + G *Gobber + B *BinaryGobber + T *TextGobber +} + +type GobTest4 struct { + X int // guarantee we have something in common with GobTest* + V ValueGobber + BV BinaryValueGobber + TV TextValueGobber +} + +type GobTest5 struct { + X int // guarantee we have something in common with GobTest* + V *ValueGobber + BV *BinaryValueGobber + TV *TextValueGobber +} + +type GobTest6 struct { + X int // guarantee we have something in common with GobTest* + V ValueGobber + W *ValueGobber + BV BinaryValueGobber + BW *BinaryValueGobber + TV TextValueGobber + TW *TextValueGobber +} + +type GobTest7 struct { + X int // guarantee we have something in common with GobTest* + V *ValueGobber + W ValueGobber + BV *BinaryValueGobber + BW BinaryValueGobber + TV *TextValueGobber + TW TextValueGobber +} + +type GobTestIgnoreEncoder struct { + X int // guarantee we have something in common with GobTest* +} + +type GobTestValueEncDec struct { + X int // guarantee we have something in common with GobTest* + G StringStruct // not a pointer. +} + +type GobTestIndirectEncDec struct { + X int // guarantee we have something in common with GobTest* + G ***StringStruct // indirections to the receiver. +} + +type GobTestArrayEncDec struct { + X int // guarantee we have something in common with GobTest* + A ArrayStruct // not a pointer. +} + +type GobTestIndirectArrayEncDec struct { + X int // guarantee we have something in common with GobTest* + A ***ArrayStruct // indirections to a large receiver. +} + +func TestGobEncoderField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{17, &ByteStruct{'A'}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.a != 'A' { + t.Errorf("expected 'A' got %c", x.G.a) + } + // Now a field that's not a structure. + b.Reset() + gobber := Gobber(23) + bgobber := BinaryGobber(24) + tgobber := TextGobber(25) + err = enc.Encode(GobTest3{17, &gobber, &bgobber, &tgobber}) + if err != nil { + t.Fatal("encode error:", err) + } + y := new(GobTest3) + err = dec.Decode(y) + if err != nil { + t.Fatal("decode error:", err) + } + if *y.G != 23 || *y.B != 24 || *y.T != 25 { + t.Errorf("expected '23 got %d", *y.G) + } +} + +// Even though the field is a value, we can still take its address +// and should be able to call the methods. +func TestGobEncoderValueField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(&GobTestValueEncDec{17, StringStruct{"HIJKL"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestValueEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.s != "HIJKL" { + t.Errorf("expected `HIJKL` got %s", x.G.s) + } +} + +// GobEncode/Decode should work even if the value is +// more indirect than the receiver. +func TestGobEncoderIndirectField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + s := &StringStruct{"HIJKL"} + sp := &s + err := enc.Encode(GobTestIndirectEncDec{17, &sp}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIndirectEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if (***x.G).s != "HIJKL" { + t.Errorf("expected `HIJKL` got %s", (***x.G).s) + } +} + +// Test with a large field with methods. +func TestGobEncoderArrayField(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + var a GobTestArrayEncDec + a.X = 17 + for i := range a.A.a { + a.A.a[i] = byte(i) + } + err := enc.Encode(&a) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestArrayEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + for i, v := range x.A.a { + if v != byte(i) { + t.Errorf("expected %x got %x", byte(i), v) + break + } + } +} + +// Test an indirection to a large field with methods. +func TestGobEncoderIndirectArrayField(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + var a GobTestIndirectArrayEncDec + a.X = 17 + var array ArrayStruct + ap := &array + app := &ap + a.A = &app + for i := range array.a { + array.a[i] = byte(i) + } + err := enc.Encode(a) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIndirectArrayEncDec) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + for i, v := range (***x.A).a { + if v != byte(i) { + t.Errorf("expected %x got %x", byte(i), v) + break + } + } +} + +// As long as the fields have the same name and implement the +// interface, we can cross-connect them. Not sure it's useful +// and may even be bad but it works and it's hard to prevent +// without exposing the contents of the object, which would +// defeat the purpose. +func TestGobEncoderFieldsOfDifferentType(t *testing.T) { + // first, string in field to byte in field + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest1{17, &StringStruct{"ABC"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.G.a != 'A' { + t.Errorf("expected 'A' got %c", x.G.a) + } + // now the other direction, byte in field to string in field + b.Reset() + err = enc.Encode(GobTest0{17, &ByteStruct{'X'}}) + if err != nil { + t.Fatal("encode error:", err) + } + y := new(GobTest1) + err = dec.Decode(y) + if err != nil { + t.Fatal("decode error:", err) + } + if y.G.s != "XYZ" { + t.Fatalf("expected `XYZ` got %q", y.G.s) + } +} + +// Test that we can encode a value and decode into a pointer. +func TestGobEncoderValueEncoder(t *testing.T) { + // first, string in field to byte in field + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest4{17, ValueGobber("hello"), BinaryValueGobber("Καλημέρα"), TextValueGobber("こんにちは")}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest5) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if *x.V != "hello" || *x.BV != "Καλημέρα" || *x.TV != "こんにちは" { + t.Errorf("expected `hello` got %s", *x.V) + } +} + +// Test that we can use a value then a pointer type of a GobEncoder +// in the same encoded value. Bug 4647. +func TestGobEncoderValueThenPointer(t *testing.T) { + v := ValueGobber("forty-two") + w := ValueGobber("six-by-nine") + bv := BinaryValueGobber("1nanocentury") + bw := BinaryValueGobber("πseconds") + tv := TextValueGobber("gravitationalacceleration") + tw := TextValueGobber("π²ft/s²") + + // this was a bug: encoding a GobEncoder by value before a GobEncoder + // pointer would cause duplicate type definitions to be sent. + + b := new(bytes.Buffer) + enc := NewEncoder(b) + if err := enc.Encode(GobTest6{42, v, &w, bv, &bw, tv, &tw}); err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest6) + if err := dec.Decode(x); err != nil { + t.Fatal("decode error:", err) + } + + if got, want := x.V, v; got != want { + t.Errorf("v = %q, want %q", got, want) + } + if got, want := x.W, w; got == nil { + t.Errorf("w = nil, want %q", want) + } else if *got != want { + t.Errorf("w = %q, want %q", *got, want) + } + + if got, want := x.BV, bv; got != want { + t.Errorf("bv = %q, want %q", got, want) + } + if got, want := x.BW, bw; got == nil { + t.Errorf("bw = nil, want %q", want) + } else if *got != want { + t.Errorf("bw = %q, want %q", *got, want) + } + + if got, want := x.TV, tv; got != want { + t.Errorf("tv = %q, want %q", got, want) + } + if got, want := x.TW, tw; got == nil { + t.Errorf("tw = nil, want %q", want) + } else if *got != want { + t.Errorf("tw = %q, want %q", *got, want) + } +} + +// Test that we can use a pointer then a value type of a GobEncoder +// in the same encoded value. +func TestGobEncoderPointerThenValue(t *testing.T) { + v := ValueGobber("forty-two") + w := ValueGobber("six-by-nine") + bv := BinaryValueGobber("1nanocentury") + bw := BinaryValueGobber("πseconds") + tv := TextValueGobber("gravitationalacceleration") + tw := TextValueGobber("π²ft/s²") + + b := new(bytes.Buffer) + enc := NewEncoder(b) + if err := enc.Encode(GobTest7{42, &v, w, &bv, bw, &tv, tw}); err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest7) + if err := dec.Decode(x); err != nil { + t.Fatal("decode error:", err) + } + + if got, want := x.V, v; got == nil { + t.Errorf("v = nil, want %q", want) + } else if *got != want { + t.Errorf("v = %q, want %q", *got, want) + } + if got, want := x.W, w; got != want { + t.Errorf("w = %q, want %q", got, want) + } + + if got, want := x.BV, bv; got == nil { + t.Errorf("bv = nil, want %q", want) + } else if *got != want { + t.Errorf("bv = %q, want %q", *got, want) + } + if got, want := x.BW, bw; got != want { + t.Errorf("bw = %q, want %q", got, want) + } + + if got, want := x.TV, tv; got == nil { + t.Errorf("tv = nil, want %q", want) + } else if *got != want { + t.Errorf("tv = %q, want %q", *got, want) + } + if got, want := x.TW, tw; got != want { + t.Errorf("tw = %q, want %q", got, want) + } +} + +func TestGobEncoderFieldTypeError(t *testing.T) { + // GobEncoder to non-decoder: error + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(GobTest1{17, &StringStruct{"ABC"}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := &GobTest2{} + err = dec.Decode(x) + if err == nil { + t.Fatal("expected decode error for mismatched fields (encoder to non-decoder)") + } + if !strings.Contains(err.Error(), "type") { + t.Fatal("expected type error; got", err) + } + // Non-encoder to GobDecoder: error + b.Reset() + err = enc.Encode(GobTest2{17, "ABC"}) + if err != nil { + t.Fatal("encode error:", err) + } + y := &GobTest1{} + err = dec.Decode(y) + if err == nil { + t.Fatal("expected decode error for mismatched fields (non-encoder to decoder)") + } + if !strings.Contains(err.Error(), "type") { + t.Fatal("expected type error; got", err) + } +} + +// Even though ByteStruct is a struct, it's treated as a singleton at the top level. +func TestGobEncoderStructSingleton(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(&ByteStruct{'A'}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(ByteStruct) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.a != 'A' { + t.Errorf("expected 'A' got %c", x.a) + } +} + +func TestGobEncoderNonStructSingleton(t *testing.T) { + b := new(bytes.Buffer) + enc := NewEncoder(b) + var g Gobber = 1234 + err := enc.Encode(&g) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + var x Gobber + err = dec.Decode(&x) + if err != nil { + t.Fatal("decode error:", err) + } + if x != 1234 { + t.Errorf("expected 1234 got %d", x) + } +} + +func TestGobEncoderIgnoreStructField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{17, &ByteStruct{'A'}}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIgnoreEncoder) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 17 { + t.Errorf("expected 17 got %c", x.X) + } +} + +func TestGobEncoderIgnoreNonStructField(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + gobber := Gobber(23) + bgobber := BinaryGobber(24) + tgobber := TextGobber(25) + err := enc.Encode(GobTest3{17, &gobber, &bgobber, &tgobber}) + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTestIgnoreEncoder) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 17 { + t.Errorf("expected 17 got %c", x.X) + } +} + +func TestGobEncoderIgnoreNilEncoder(t *testing.T) { + b := new(bytes.Buffer) + // First a field that's a structure. + enc := NewEncoder(b) + err := enc.Encode(GobTest0{X: 18}) // G is nil + if err != nil { + t.Fatal("encode error:", err) + } + dec := NewDecoder(b) + x := new(GobTest0) + err = dec.Decode(x) + if err != nil { + t.Fatal("decode error:", err) + } + if x.X != 18 { + t.Errorf("expected x.X = 18, got %v", x.X) + } + if x.G != nil { + t.Errorf("expected x.G = nil, got %v", x.G) + } +} + +type gobDecoderBug0 struct { + foo, bar string +} + +func (br *gobDecoderBug0) String() string { + return br.foo + "-" + br.bar +} + +func (br *gobDecoderBug0) GobEncode() ([]byte, error) { + return []byte(br.String()), nil +} + +func (br *gobDecoderBug0) GobDecode(b []byte) error { + br.foo = "foo" + br.bar = "bar" + return nil +} + +// This was a bug: the receiver has a different indirection level +// than the variable. +func TestGobEncoderExtraIndirect(t *testing.T) { + gdb := &gobDecoderBug0{"foo", "bar"} + buf := new(bytes.Buffer) + e := NewEncoder(buf) + if err := e.Encode(gdb); err != nil { + t.Fatalf("encode: %v", err) + } + d := NewDecoder(buf) + var got *gobDecoderBug0 + if err := d.Decode(&got); err != nil { + t.Fatalf("decode: %v", err) + } + if got.foo != gdb.foo || got.bar != gdb.bar { + t.Errorf("got = %q, want %q", got, gdb) + } +} + +// Another bug: this caused a crash with the new Go1 Time type. +// We throw in a gob-encoding array, to test another case of isZero, +// and a struct containing a nil interface, to test a third. +type isZeroBug struct { + T time.Time + S string + I int + A isZeroBugArray + F isZeroBugInterface +} + +type isZeroBugArray [2]uint8 + +// Receiver is value, not pointer, to test isZero of array. +func (a isZeroBugArray) GobEncode() (b []byte, e error) { + b = append(b, a[:]...) + return b, nil +} + +func (a *isZeroBugArray) GobDecode(data []byte) error { + if len(data) != len(a) { + return io.EOF + } + a[0] = data[0] + a[1] = data[1] + return nil +} + +type isZeroBugInterface struct { + I any +} + +func (i isZeroBugInterface) GobEncode() (b []byte, e error) { + return []byte{}, nil +} + +func (i *isZeroBugInterface) GobDecode(data []byte) error { + return nil +} + +func TestGobEncodeIsZero(t *testing.T) { + x := isZeroBug{time.Unix(1e9, 0), "hello", -55, isZeroBugArray{1, 2}, isZeroBugInterface{}} + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(x) + if err != nil { + t.Fatal("encode:", err) + } + var y isZeroBug + dec := NewDecoder(b) + err = dec.Decode(&y) + if err != nil { + t.Fatal("decode:", err) + } + if x != y { + t.Fatalf("%v != %v", x, y) + } +} + +func TestGobEncodePtrError(t *testing.T) { + var err error + b := new(bytes.Buffer) + enc := NewEncoder(b) + err = enc.Encode(&err) + if err != nil { + t.Fatal("encode:", err) + } + dec := NewDecoder(b) + err2 := fmt.Errorf("foo") + err = dec.Decode(&err2) + if err != nil { + t.Fatal("decode:", err) + } + if err2 != nil { + t.Fatalf("expected nil, got %v", err2) + } +} + +func TestNetIP(t *testing.T) { + // Encoding of net.IP{1,2,3,4} in Go 1.1. + enc := []byte{0x07, 0x0a, 0x00, 0x04, 0x01, 0x02, 0x03, 0x04} + + var ip net.IP + err := NewDecoder(bytes.NewReader(enc)).Decode(&ip) + if err != nil { + t.Fatalf("decode: %v", err) + } + if ip.String() != "1.2.3.4" { + t.Errorf("decoded to %v, want 1.2.3.4", ip.String()) + } +} + +func TestIgnoreDepthLimit(t *testing.T) { + // We don't test the actual depth limit because it requires building an + // extremely large message, which takes quite a while. + oldNestingDepth := maxIgnoreNestingDepth + maxIgnoreNestingDepth = 100 + defer func() { maxIgnoreNestingDepth = oldNestingDepth }() + b := new(bytes.Buffer) + enc := NewEncoder(b) + typ := reflect.TypeFor[int]() + nested := reflect.ArrayOf(1, typ) + for i := 0; i < 100; i++ { + nested = reflect.ArrayOf(1, nested) + } + badStruct := reflect.New(reflect.StructOf([]reflect.StructField{{Name: "F", Type: nested}})) + enc.Encode(badStruct.Interface()) + dec := NewDecoder(b) + var output struct{ Hello int } + expectedErr := "invalid nesting depth" + if err := dec.Decode(&output); err == nil || err.Error() != expectedErr { + t.Errorf("Decode didn't fail with depth limit of 100: want %q, got %q", expectedErr, err) + } +} diff --git a/src/encoding/gob/timing_test.go b/src/encoding/gob/timing_test.go new file mode 100644 index 0000000..84349fb --- /dev/null +++ b/src/encoding/gob/timing_test.go @@ -0,0 +1,332 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "io" + "os" + "reflect" + "runtime" + "testing" +) + +type Bench struct { + A int + B float64 + C string + D []byte +} + +func benchmarkEndToEnd(b *testing.B, ctor func() any, pipe func() (r io.Reader, w io.Writer, err error)) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + r, w, err := pipe() + if err != nil { + b.Fatal("can't get pipe:", err) + } + v := ctor() + enc := NewEncoder(w) + dec := NewDecoder(r) + for pb.Next() { + if err := enc.Encode(v); err != nil { + b.Fatal("encode error:", err) + } + if err := dec.Decode(v); err != nil { + b.Fatal("decode error:", err) + } + } + }) +} + +func BenchmarkEndToEndPipe(b *testing.B) { + benchmarkEndToEnd(b, func() any { + return &Bench{7, 3.2, "now is the time", bytes.Repeat([]byte("for all good men"), 100)} + }, func() (r io.Reader, w io.Writer, err error) { + r, w, err = os.Pipe() + return + }) +} + +func BenchmarkEndToEndByteBuffer(b *testing.B) { + benchmarkEndToEnd(b, func() any { + return &Bench{7, 3.2, "now is the time", bytes.Repeat([]byte("for all good men"), 100)} + }, func() (r io.Reader, w io.Writer, err error) { + var buf bytes.Buffer + return &buf, &buf, nil + }) +} + +func BenchmarkEndToEndSliceByteBuffer(b *testing.B) { + benchmarkEndToEnd(b, func() any { + v := &Bench{7, 3.2, "now is the time", nil} + Register(v) + arr := make([]any, 100) + for i := range arr { + arr[i] = v + } + return &arr + }, func() (r io.Reader, w io.Writer, err error) { + var buf bytes.Buffer + return &buf, &buf, nil + }) +} + +func TestCountEncodeMallocs(t *testing.T) { + if testing.Short() { + t.Skip("skipping malloc count in short mode") + } + if runtime.GOMAXPROCS(0) > 1 { + t.Skip("skipping; GOMAXPROCS>1") + } + + const N = 1000 + + var buf bytes.Buffer + enc := NewEncoder(&buf) + bench := &Bench{7, 3.2, "now is the time", []byte("for all good men")} + + allocs := testing.AllocsPerRun(N, func() { + err := enc.Encode(bench) + if err != nil { + t.Fatal("encode:", err) + } + }) + if allocs != 0 { + t.Fatalf("mallocs per encode of type Bench: %v; wanted 0\n", allocs) + } +} + +func TestCountDecodeMallocs(t *testing.T) { + if testing.Short() { + t.Skip("skipping malloc count in short mode") + } + if runtime.GOMAXPROCS(0) > 1 { + t.Skip("skipping; GOMAXPROCS>1") + } + + const N = 1000 + + var buf bytes.Buffer + enc := NewEncoder(&buf) + bench := &Bench{7, 3.2, "now is the time", []byte("for all good men")} + + // Fill the buffer with enough to decode + testing.AllocsPerRun(N, func() { + err := enc.Encode(bench) + if err != nil { + t.Fatal("encode:", err) + } + }) + + dec := NewDecoder(&buf) + allocs := testing.AllocsPerRun(N, func() { + *bench = Bench{} + err := dec.Decode(&bench) + if err != nil { + t.Fatal("decode:", err) + } + }) + if allocs != 3 { + t.Fatalf("mallocs per decode of type Bench: %v; wanted 3\n", allocs) + } +} + +func benchmarkEncodeSlice(b *testing.B, a any) { + b.ResetTimer() + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + + for pb.Next() { + buf.Reset() + err := enc.Encode(a) + if err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkEncodeComplex128Slice(b *testing.B) { + a := make([]complex128, 1000) + for i := range a { + a[i] = 1.2 + 3.4i + } + benchmarkEncodeSlice(b, a) +} + +func BenchmarkEncodeFloat64Slice(b *testing.B) { + a := make([]float64, 1000) + for i := range a { + a[i] = 1.23e4 + } + benchmarkEncodeSlice(b, a) +} + +func BenchmarkEncodeInt32Slice(b *testing.B) { + a := make([]int32, 1000) + for i := range a { + a[i] = int32(i * 100) + } + benchmarkEncodeSlice(b, a) +} + +func BenchmarkEncodeStringSlice(b *testing.B) { + a := make([]string, 1000) + for i := range a { + a[i] = "now is the time" + } + benchmarkEncodeSlice(b, a) +} + +func BenchmarkEncodeInterfaceSlice(b *testing.B) { + a := make([]any, 1000) + for i := range a { + a[i] = "now is the time" + } + benchmarkEncodeSlice(b, a) +} + +// benchmarkBuf is a read buffer we can reset +type benchmarkBuf struct { + offset int + data []byte +} + +func (b *benchmarkBuf) Read(p []byte) (n int, err error) { + n = copy(p, b.data[b.offset:]) + if n == 0 { + return 0, io.EOF + } + b.offset += n + return +} + +func (b *benchmarkBuf) ReadByte() (c byte, err error) { + if b.offset >= len(b.data) { + return 0, io.EOF + } + c = b.data[b.offset] + b.offset++ + return +} + +func (b *benchmarkBuf) reset() { + b.offset = 0 +} + +func benchmarkDecodeSlice(b *testing.B, a any) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + err := enc.Encode(a) + if err != nil { + b.Fatal(err) + } + + ra := reflect.ValueOf(a) + rt := ra.Type() + b.ResetTimer() + + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + // TODO(#19025): Move per-thread allocation before ResetTimer. + rp := reflect.New(rt) + rp.Elem().Set(reflect.MakeSlice(rt, ra.Len(), ra.Cap())) + p := rp.Interface() + + bbuf := benchmarkBuf{data: buf.Bytes()} + + for pb.Next() { + bbuf.reset() + dec := NewDecoder(&bbuf) + err := dec.Decode(p) + if err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkDecodeComplex128Slice(b *testing.B) { + a := make([]complex128, 1000) + for i := range a { + a[i] = 1.2 + 3.4i + } + benchmarkDecodeSlice(b, a) +} + +func BenchmarkDecodeFloat64Slice(b *testing.B) { + a := make([]float64, 1000) + for i := range a { + a[i] = 1.23e4 + } + benchmarkDecodeSlice(b, a) +} + +func BenchmarkDecodeInt32Slice(b *testing.B) { + a := make([]int32, 1000) + for i := range a { + a[i] = 1234 + } + benchmarkDecodeSlice(b, a) +} + +func BenchmarkDecodeStringSlice(b *testing.B) { + a := make([]string, 1000) + for i := range a { + a[i] = "now is the time" + } + benchmarkDecodeSlice(b, a) +} +func BenchmarkDecodeStringsSlice(b *testing.B) { + a := make([][]string, 1000) + for i := range a { + a[i] = []string{"now is the time"} + } + benchmarkDecodeSlice(b, a) +} +func BenchmarkDecodeBytesSlice(b *testing.B) { + a := make([][]byte, 1000) + for i := range a { + a[i] = []byte("now is the time") + } + benchmarkDecodeSlice(b, a) +} + +func BenchmarkDecodeInterfaceSlice(b *testing.B) { + a := make([]any, 1000) + for i := range a { + a[i] = "now is the time" + } + benchmarkDecodeSlice(b, a) +} + +func BenchmarkDecodeMap(b *testing.B) { + count := 1000 + m := make(map[int]int, count) + for i := 0; i < count; i++ { + m[i] = i + } + var buf bytes.Buffer + enc := NewEncoder(&buf) + err := enc.Encode(m) + if err != nil { + b.Fatal(err) + } + bbuf := benchmarkBuf{data: buf.Bytes()} + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + var rm map[int]int + bbuf.reset() + dec := NewDecoder(&bbuf) + err := dec.Decode(&rm) + if err != nil { + b.Fatal(i, err) + } + } +} diff --git a/src/encoding/gob/type.go b/src/encoding/gob/type.go new file mode 100644 index 0000000..30d8ca6 --- /dev/null +++ b/src/encoding/gob/type.go @@ -0,0 +1,944 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "encoding" + "errors" + "fmt" + "os" + "reflect" + "sync" + "sync/atomic" + "unicode" + "unicode/utf8" +) + +// userTypeInfo stores the information associated with a type the user has handed +// to the package. It's computed once and stored in a map keyed by reflection +// type. +type userTypeInfo struct { + user reflect.Type // the type the user handed us + base reflect.Type // the base type after all indirections + indir int // number of indirections to reach the base type + externalEnc int // xGob, xBinary, or xText + externalDec int // xGob, xBinary, or xText + encIndir int8 // number of indirections to reach the receiver type; may be negative + decIndir int8 // number of indirections to reach the receiver type; may be negative +} + +// externalEncoding bits +const ( + xGob = 1 + iota // GobEncoder or GobDecoder + xBinary // encoding.BinaryMarshaler or encoding.BinaryUnmarshaler + xText // encoding.TextMarshaler or encoding.TextUnmarshaler +) + +var userTypeCache sync.Map // map[reflect.Type]*userTypeInfo + +// validUserType returns, and saves, the information associated with user-provided type rt. +// If the user type is not valid, err will be non-nil. To be used when the error handler +// is not set up. +func validUserType(rt reflect.Type) (*userTypeInfo, error) { + if ui, ok := userTypeCache.Load(rt); ok { + return ui.(*userTypeInfo), nil + } + + // Construct a new userTypeInfo and atomically add it to the userTypeCache. + // If we lose the race, we'll waste a little CPU and create a little garbage + // but return the existing value anyway. + + ut := new(userTypeInfo) + ut.base = rt + ut.user = rt + // A type that is just a cycle of pointers (such as type T *T) cannot + // be represented in gobs, which need some concrete data. We use a + // cycle detection algorithm from Knuth, Vol 2, Section 3.1, Ex 6, + // pp 539-540. As we step through indirections, run another type at + // half speed. If they meet up, there's a cycle. + slowpoke := ut.base // walks half as fast as ut.base + for { + pt := ut.base + if pt.Kind() != reflect.Pointer { + break + } + ut.base = pt.Elem() + if ut.base == slowpoke { // ut.base lapped slowpoke + // recursive pointer type. + return nil, errors.New("can't represent recursive pointer type " + ut.base.String()) + } + if ut.indir%2 == 0 { + slowpoke = slowpoke.Elem() + } + ut.indir++ + } + + if ok, indir := implementsInterface(ut.user, gobEncoderInterfaceType); ok { + ut.externalEnc, ut.encIndir = xGob, indir + } else if ok, indir := implementsInterface(ut.user, binaryMarshalerInterfaceType); ok { + ut.externalEnc, ut.encIndir = xBinary, indir + } + + // NOTE(rsc): Would like to allow MarshalText here, but results in incompatibility + // with older encodings for net.IP. See golang.org/issue/6760. + // } else if ok, indir := implementsInterface(ut.user, textMarshalerInterfaceType); ok { + // ut.externalEnc, ut.encIndir = xText, indir + // } + + if ok, indir := implementsInterface(ut.user, gobDecoderInterfaceType); ok { + ut.externalDec, ut.decIndir = xGob, indir + } else if ok, indir := implementsInterface(ut.user, binaryUnmarshalerInterfaceType); ok { + ut.externalDec, ut.decIndir = xBinary, indir + } + + // See note above. + // } else if ok, indir := implementsInterface(ut.user, textUnmarshalerInterfaceType); ok { + // ut.externalDec, ut.decIndir = xText, indir + // } + + ui, _ := userTypeCache.LoadOrStore(rt, ut) + return ui.(*userTypeInfo), nil +} + +var ( + gobEncoderInterfaceType = reflect.TypeFor[GobEncoder]() + gobDecoderInterfaceType = reflect.TypeFor[GobDecoder]() + binaryMarshalerInterfaceType = reflect.TypeFor[encoding.BinaryMarshaler]() + binaryUnmarshalerInterfaceType = reflect.TypeFor[encoding.BinaryUnmarshaler]() + textMarshalerInterfaceType = reflect.TypeFor[encoding.TextMarshaler]() + textUnmarshalerInterfaceType = reflect.TypeFor[encoding.TextUnmarshaler]() + + wireTypeType = reflect.TypeFor[wireType]() +) + +// implementsInterface reports whether the type implements the +// gobEncoder/gobDecoder interface. +// It also returns the number of indirections required to get to the +// implementation. +func implementsInterface(typ, gobEncDecType reflect.Type) (success bool, indir int8) { + if typ == nil { + return + } + rt := typ + // The type might be a pointer and we need to keep + // dereferencing to the base type until we find an implementation. + for { + if rt.Implements(gobEncDecType) { + return true, indir + } + if p := rt; p.Kind() == reflect.Pointer { + indir++ + if indir > 100 { // insane number of indirections + return false, 0 + } + rt = p.Elem() + continue + } + break + } + // No luck yet, but if this is a base type (non-pointer), the pointer might satisfy. + if typ.Kind() != reflect.Pointer { + // Not a pointer, but does the pointer work? + if reflect.PointerTo(typ).Implements(gobEncDecType) { + return true, -1 + } + } + return false, 0 +} + +// userType returns, and saves, the information associated with user-provided type rt. +// If the user type is not valid, it calls error. +func userType(rt reflect.Type) *userTypeInfo { + ut, err := validUserType(rt) + if err != nil { + error_(err) + } + return ut +} + +// A typeId represents a gob Type as an integer that can be passed on the wire. +// Internally, typeIds are used as keys to a map to recover the underlying type info. +type typeId int32 + +var typeLock sync.Mutex // set while building a type +const firstUserId = 64 // lowest id number granted to user + +type gobType interface { + id() typeId + setId(id typeId) + name() string + string() string // not public; only for debugging + safeString(seen map[typeId]bool) string +} + +var ( + types = make(map[reflect.Type]gobType, 32) + idToTypeSlice = make([]gobType, 1, firstUserId) + builtinIdToTypeSlice [firstUserId]gobType // set in init() after builtins are established +) + +func idToType(id typeId) gobType { + if id < 0 || int(id) >= len(idToTypeSlice) { + return nil + } + return idToTypeSlice[id] +} + +func builtinIdToType(id typeId) gobType { + if id < 0 || int(id) >= len(builtinIdToTypeSlice) { + return nil + } + return builtinIdToTypeSlice[id] +} + +func setTypeId(typ gobType) { + // When building recursive types, someone may get there before us. + if typ.id() != 0 { + return + } + nextId := typeId(len(idToTypeSlice)) + typ.setId(nextId) + idToTypeSlice = append(idToTypeSlice, typ) +} + +func (t typeId) gobType() gobType { + if t == 0 { + return nil + } + return idToType(t) +} + +// string returns the string representation of the type associated with the typeId. +func (t typeId) string() string { + if t.gobType() == nil { + return "<nil>" + } + return t.gobType().string() +} + +// Name returns the name of the type associated with the typeId. +func (t typeId) name() string { + if t.gobType() == nil { + return "<nil>" + } + return t.gobType().name() +} + +// CommonType holds elements of all types. +// It is a historical artifact, kept for binary compatibility and exported +// only for the benefit of the package's encoding of type descriptors. It is +// not intended for direct use by clients. +type CommonType struct { + Name string + Id typeId +} + +func (t *CommonType) id() typeId { return t.Id } + +func (t *CommonType) setId(id typeId) { t.Id = id } + +func (t *CommonType) string() string { return t.Name } + +func (t *CommonType) safeString(seen map[typeId]bool) string { + return t.Name +} + +func (t *CommonType) name() string { return t.Name } + +// Create and check predefined types +// The string for tBytes is "bytes" not "[]byte" to signify its specialness. + +var ( + // Primordial types, needed during initialization. + // Always passed as pointers so the interface{} type + // goes through without losing its interfaceness. + tBool = bootstrapType("bool", (*bool)(nil)) + tInt = bootstrapType("int", (*int)(nil)) + tUint = bootstrapType("uint", (*uint)(nil)) + tFloat = bootstrapType("float", (*float64)(nil)) + tBytes = bootstrapType("bytes", (*[]byte)(nil)) + tString = bootstrapType("string", (*string)(nil)) + tComplex = bootstrapType("complex", (*complex128)(nil)) + tInterface = bootstrapType("interface", (*any)(nil)) + // Reserve some Ids for compatible expansion + tReserved7 = bootstrapType("_reserved1", (*struct{ r7 int })(nil)) + tReserved6 = bootstrapType("_reserved1", (*struct{ r6 int })(nil)) + tReserved5 = bootstrapType("_reserved1", (*struct{ r5 int })(nil)) + tReserved4 = bootstrapType("_reserved1", (*struct{ r4 int })(nil)) + tReserved3 = bootstrapType("_reserved1", (*struct{ r3 int })(nil)) + tReserved2 = bootstrapType("_reserved1", (*struct{ r2 int })(nil)) + tReserved1 = bootstrapType("_reserved1", (*struct{ r1 int })(nil)) +) + +// Predefined because it's needed by the Decoder +var tWireType = mustGetTypeInfo(wireTypeType).id +var wireTypeUserInfo *userTypeInfo // userTypeInfo of wireType + +func init() { + // Some magic numbers to make sure there are no surprises. + checkId(16, tWireType) + checkId(17, mustGetTypeInfo(reflect.TypeFor[arrayType]()).id) + checkId(18, mustGetTypeInfo(reflect.TypeFor[CommonType]()).id) + checkId(19, mustGetTypeInfo(reflect.TypeFor[sliceType]()).id) + checkId(20, mustGetTypeInfo(reflect.TypeFor[structType]()).id) + checkId(21, mustGetTypeInfo(reflect.TypeFor[fieldType]()).id) + checkId(23, mustGetTypeInfo(reflect.TypeFor[mapType]()).id) + + copy(builtinIdToTypeSlice[:], idToTypeSlice) + + // Move the id space upwards to allow for growth in the predefined world + // without breaking existing files. + if nextId := len(idToTypeSlice); nextId > firstUserId { + panic(fmt.Sprintln("nextId too large:", nextId)) + } + idToTypeSlice = idToTypeSlice[:firstUserId] + registerBasics() + wireTypeUserInfo = userType(wireTypeType) +} + +// Array type +type arrayType struct { + CommonType + Elem typeId + Len int +} + +func newArrayType(name string) *arrayType { + a := &arrayType{CommonType{Name: name}, 0, 0} + return a +} + +func (a *arrayType) init(elem gobType, len int) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(a) + a.Elem = elem.id() + a.Len = len +} + +func (a *arrayType) safeString(seen map[typeId]bool) string { + if seen[a.Id] { + return a.Name + } + seen[a.Id] = true + return fmt.Sprintf("[%d]%s", a.Len, a.Elem.gobType().safeString(seen)) +} + +func (a *arrayType) string() string { return a.safeString(make(map[typeId]bool)) } + +// GobEncoder type (something that implements the GobEncoder interface) +type gobEncoderType struct { + CommonType +} + +func newGobEncoderType(name string) *gobEncoderType { + g := &gobEncoderType{CommonType{Name: name}} + setTypeId(g) + return g +} + +func (g *gobEncoderType) safeString(seen map[typeId]bool) string { + return g.Name +} + +func (g *gobEncoderType) string() string { return g.Name } + +// Map type +type mapType struct { + CommonType + Key typeId + Elem typeId +} + +func newMapType(name string) *mapType { + m := &mapType{CommonType{Name: name}, 0, 0} + return m +} + +func (m *mapType) init(key, elem gobType) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(m) + m.Key = key.id() + m.Elem = elem.id() +} + +func (m *mapType) safeString(seen map[typeId]bool) string { + if seen[m.Id] { + return m.Name + } + seen[m.Id] = true + key := m.Key.gobType().safeString(seen) + elem := m.Elem.gobType().safeString(seen) + return fmt.Sprintf("map[%s]%s", key, elem) +} + +func (m *mapType) string() string { return m.safeString(make(map[typeId]bool)) } + +// Slice type +type sliceType struct { + CommonType + Elem typeId +} + +func newSliceType(name string) *sliceType { + s := &sliceType{CommonType{Name: name}, 0} + return s +} + +func (s *sliceType) init(elem gobType) { + // Set our type id before evaluating the element's, in case it's our own. + setTypeId(s) + // See the comments about ids in newTypeObject. Only slices and + // structs have mutual recursion. + if elem.id() == 0 { + setTypeId(elem) + } + s.Elem = elem.id() +} + +func (s *sliceType) safeString(seen map[typeId]bool) string { + if seen[s.Id] { + return s.Name + } + seen[s.Id] = true + return fmt.Sprintf("[]%s", s.Elem.gobType().safeString(seen)) +} + +func (s *sliceType) string() string { return s.safeString(make(map[typeId]bool)) } + +// Struct type +type fieldType struct { + Name string + Id typeId +} + +type structType struct { + CommonType + Field []fieldType +} + +func (s *structType) safeString(seen map[typeId]bool) string { + if s == nil { + return "<nil>" + } + if _, ok := seen[s.Id]; ok { + return s.Name + } + seen[s.Id] = true + str := s.Name + " = struct { " + for _, f := range s.Field { + str += fmt.Sprintf("%s %s; ", f.Name, f.Id.gobType().safeString(seen)) + } + str += "}" + return str +} + +func (s *structType) string() string { return s.safeString(make(map[typeId]bool)) } + +func newStructType(name string) *structType { + s := &structType{CommonType{Name: name}, nil} + // For historical reasons we set the id here rather than init. + // See the comment in newTypeObject for details. + setTypeId(s) + return s +} + +// newTypeObject allocates a gobType for the reflection type rt. +// Unless ut represents a GobEncoder, rt should be the base type +// of ut. +// This is only called from the encoding side. The decoding side +// works through typeIds and userTypeInfos alone. +func newTypeObject(name string, ut *userTypeInfo, rt reflect.Type) (gobType, error) { + // Does this type implement GobEncoder? + if ut.externalEnc != 0 { + return newGobEncoderType(name), nil + } + var err error + var type0, type1 gobType + defer func() { + if err != nil { + delete(types, rt) + } + }() + // Install the top-level type before the subtypes (e.g. struct before + // fields) so recursive types can be constructed safely. + switch t := rt; t.Kind() { + // All basic types are easy: they are predefined. + case reflect.Bool: + return tBool.gobType(), nil + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return tInt.gobType(), nil + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return tUint.gobType(), nil + + case reflect.Float32, reflect.Float64: + return tFloat.gobType(), nil + + case reflect.Complex64, reflect.Complex128: + return tComplex.gobType(), nil + + case reflect.String: + return tString.gobType(), nil + + case reflect.Interface: + return tInterface.gobType(), nil + + case reflect.Array: + at := newArrayType(name) + types[rt] = at + type0, err = getBaseType("", t.Elem()) + if err != nil { + return nil, err + } + // Historical aside: + // For arrays, maps, and slices, we set the type id after the elements + // are constructed. This is to retain the order of type id allocation after + // a fix made to handle recursive types, which changed the order in + // which types are built. Delaying the setting in this way preserves + // type ids while allowing recursive types to be described. Structs, + // done below, were already handling recursion correctly so they + // assign the top-level id before those of the field. + at.init(type0, t.Len()) + return at, nil + + case reflect.Map: + mt := newMapType(name) + types[rt] = mt + type0, err = getBaseType("", t.Key()) + if err != nil { + return nil, err + } + type1, err = getBaseType("", t.Elem()) + if err != nil { + return nil, err + } + mt.init(type0, type1) + return mt, nil + + case reflect.Slice: + // []byte == []uint8 is a special case + if t.Elem().Kind() == reflect.Uint8 { + return tBytes.gobType(), nil + } + st := newSliceType(name) + types[rt] = st + type0, err = getBaseType(t.Elem().Name(), t.Elem()) + if err != nil { + return nil, err + } + st.init(type0) + return st, nil + + case reflect.Struct: + st := newStructType(name) + types[rt] = st + idToTypeSlice[st.id()] = st + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + if !isSent(&f) { + continue + } + typ := userType(f.Type).base + tname := typ.Name() + if tname == "" { + t := userType(f.Type).base + tname = t.String() + } + gt, err := getBaseType(tname, f.Type) + if err != nil { + return nil, err + } + // Some mutually recursive types can cause us to be here while + // still defining the element. Fix the element type id here. + // We could do this more neatly by setting the id at the start of + // building every type, but that would break binary compatibility. + if gt.id() == 0 { + setTypeId(gt) + } + st.Field = append(st.Field, fieldType{f.Name, gt.id()}) + } + return st, nil + + default: + return nil, errors.New("gob NewTypeObject can't handle type: " + rt.String()) + } +} + +// isExported reports whether this is an exported - upper case - name. +func isExported(name string) bool { + rune, _ := utf8.DecodeRuneInString(name) + return unicode.IsUpper(rune) +} + +// isSent reports whether this struct field is to be transmitted. +// It will be transmitted only if it is exported and not a chan or func field +// or pointer to chan or func. +func isSent(field *reflect.StructField) bool { + if !isExported(field.Name) { + return false + } + // If the field is a chan or func or pointer thereto, don't send it. + // That is, treat it like an unexported field. + typ := field.Type + for typ.Kind() == reflect.Pointer { + typ = typ.Elem() + } + if typ.Kind() == reflect.Chan || typ.Kind() == reflect.Func { + return false + } + return true +} + +// getBaseType returns the Gob type describing the given reflect.Type's base type. +// typeLock must be held. +func getBaseType(name string, rt reflect.Type) (gobType, error) { + ut := userType(rt) + return getType(name, ut, ut.base) +} + +// getType returns the Gob type describing the given reflect.Type. +// Should be called only when handling GobEncoders/Decoders, +// which may be pointers. All other types are handled through the +// base type, never a pointer. +// typeLock must be held. +func getType(name string, ut *userTypeInfo, rt reflect.Type) (gobType, error) { + typ, present := types[rt] + if present { + return typ, nil + } + typ, err := newTypeObject(name, ut, rt) + if err == nil { + types[rt] = typ + } + return typ, err +} + +func checkId(want, got typeId) { + if want != got { + fmt.Fprintf(os.Stderr, "checkId: %d should be %d\n", int(got), int(want)) + panic("bootstrap type wrong id: " + got.name() + " " + got.string() + " not " + want.string()) + } +} + +// used for building the basic types; called only from init(). the incoming +// interface always refers to a pointer. +func bootstrapType(name string, e any) typeId { + rt := reflect.TypeOf(e).Elem() + _, present := types[rt] + if present { + panic("bootstrap type already present: " + name + ", " + rt.String()) + } + typ := &CommonType{Name: name} + types[rt] = typ + setTypeId(typ) + return typ.id() +} + +// Representation of the information we send and receive about this type. +// Each value we send is preceded by its type definition: an encoded int. +// However, the very first time we send the value, we first send the pair +// (-id, wireType). +// For bootstrapping purposes, we assume that the recipient knows how +// to decode a wireType; it is exactly the wireType struct here, interpreted +// using the gob rules for sending a structure, except that we assume the +// ids for wireType and structType etc. are known. The relevant pieces +// are built in encode.go's init() function. +// To maintain binary compatibility, if you extend this type, always put +// the new fields last. +type wireType struct { + ArrayT *arrayType + SliceT *sliceType + StructT *structType + MapT *mapType + GobEncoderT *gobEncoderType + BinaryMarshalerT *gobEncoderType + TextMarshalerT *gobEncoderType +} + +func (w *wireType) string() string { + const unknown = "unknown type" + if w == nil { + return unknown + } + switch { + case w.ArrayT != nil: + return w.ArrayT.Name + case w.SliceT != nil: + return w.SliceT.Name + case w.StructT != nil: + return w.StructT.Name + case w.MapT != nil: + return w.MapT.Name + case w.GobEncoderT != nil: + return w.GobEncoderT.Name + case w.BinaryMarshalerT != nil: + return w.BinaryMarshalerT.Name + case w.TextMarshalerT != nil: + return w.TextMarshalerT.Name + } + return unknown +} + +type typeInfo struct { + id typeId + encInit sync.Mutex // protects creation of encoder + encoder atomic.Pointer[encEngine] + wire wireType +} + +// typeInfoMap is an atomic pointer to map[reflect.Type]*typeInfo. +// It's updated copy-on-write. Readers just do an atomic load +// to get the current version of the map. Writers make a full copy of +// the map and atomically update the pointer to point to the new map. +// Under heavy read contention, this is significantly faster than a map +// protected by a mutex. +var typeInfoMap atomic.Value + +// typeInfoMapInit is used instead of typeInfoMap during init time, +// as types are registered sequentially during init and we can save +// the overhead of making map copies. +// It is saved to typeInfoMap and set to nil before init finishes. +var typeInfoMapInit = make(map[reflect.Type]*typeInfo, 16) + +func lookupTypeInfo(rt reflect.Type) *typeInfo { + if m := typeInfoMapInit; m != nil { + return m[rt] + } + m, _ := typeInfoMap.Load().(map[reflect.Type]*typeInfo) + return m[rt] +} + +func getTypeInfo(ut *userTypeInfo) (*typeInfo, error) { + rt := ut.base + if ut.externalEnc != 0 { + // We want the user type, not the base type. + rt = ut.user + } + if info := lookupTypeInfo(rt); info != nil { + return info, nil + } + return buildTypeInfo(ut, rt) +} + +// buildTypeInfo constructs the type information for the type +// and stores it in the type info map. +func buildTypeInfo(ut *userTypeInfo, rt reflect.Type) (*typeInfo, error) { + typeLock.Lock() + defer typeLock.Unlock() + + if info := lookupTypeInfo(rt); info != nil { + return info, nil + } + + gt, err := getBaseType(rt.Name(), rt) + if err != nil { + return nil, err + } + info := &typeInfo{id: gt.id()} + + if ut.externalEnc != 0 { + userType, err := getType(rt.Name(), ut, rt) + if err != nil { + return nil, err + } + gt := userType.id().gobType().(*gobEncoderType) + switch ut.externalEnc { + case xGob: + info.wire.GobEncoderT = gt + case xBinary: + info.wire.BinaryMarshalerT = gt + case xText: + info.wire.TextMarshalerT = gt + } + rt = ut.user + } else { + t := info.id.gobType() + switch typ := rt; typ.Kind() { + case reflect.Array: + info.wire.ArrayT = t.(*arrayType) + case reflect.Map: + info.wire.MapT = t.(*mapType) + case reflect.Slice: + // []byte == []uint8 is a special case handled separately + if typ.Elem().Kind() != reflect.Uint8 { + info.wire.SliceT = t.(*sliceType) + } + case reflect.Struct: + info.wire.StructT = t.(*structType) + } + } + + if m := typeInfoMapInit; m != nil { + m[rt] = info + return info, nil + } + + // Create new map with old contents plus new entry. + m, _ := typeInfoMap.Load().(map[reflect.Type]*typeInfo) + newm := make(map[reflect.Type]*typeInfo, len(m)) + for k, v := range m { + newm[k] = v + } + newm[rt] = info + typeInfoMap.Store(newm) + return info, nil +} + +// Called only when a panic is acceptable and unexpected. +func mustGetTypeInfo(rt reflect.Type) *typeInfo { + t, err := getTypeInfo(userType(rt)) + if err != nil { + panic("getTypeInfo: " + err.Error()) + } + return t +} + +// GobEncoder is the interface describing data that provides its own +// representation for encoding values for transmission to a GobDecoder. +// A type that implements GobEncoder and GobDecoder has complete +// control over the representation of its data and may therefore +// contain things such as private fields, channels, and functions, +// which are not usually transmissible in gob streams. +// +// Note: Since gobs can be stored permanently, it is good design +// to guarantee the encoding used by a GobEncoder is stable as the +// software evolves. For instance, it might make sense for GobEncode +// to include a version number in the encoding. +type GobEncoder interface { + // GobEncode returns a byte slice representing the encoding of the + // receiver for transmission to a GobDecoder, usually of the same + // concrete type. + GobEncode() ([]byte, error) +} + +// GobDecoder is the interface describing data that provides its own +// routine for decoding transmitted values sent by a GobEncoder. +type GobDecoder interface { + // GobDecode overwrites the receiver, which must be a pointer, + // with the value represented by the byte slice, which was written + // by GobEncode, usually for the same concrete type. + GobDecode([]byte) error +} + +var ( + nameToConcreteType sync.Map // map[string]reflect.Type + concreteTypeToName sync.Map // map[reflect.Type]string +) + +// RegisterName is like [Register] but uses the provided name rather than the +// type's default. +func RegisterName(name string, value any) { + if name == "" { + // reserved for nil + panic("attempt to register empty name") + } + + ut := userType(reflect.TypeOf(value)) + + // Check for incompatible duplicates. The name must refer to the + // same user type, and vice versa. + + // Store the name and type provided by the user.... + if t, dup := nameToConcreteType.LoadOrStore(name, reflect.TypeOf(value)); dup && t != ut.user { + panic(fmt.Sprintf("gob: registering duplicate types for %q: %s != %s", name, t, ut.user)) + } + + // but the flattened type in the type table, since that's what decode needs. + if n, dup := concreteTypeToName.LoadOrStore(ut.base, name); dup && n != name { + nameToConcreteType.Delete(name) + panic(fmt.Sprintf("gob: registering duplicate names for %s: %q != %q", ut.user, n, name)) + } +} + +// Register records a type, identified by a value for that type, under its +// internal type name. That name will identify the concrete type of a value +// sent or received as an interface variable. Only types that will be +// transferred as implementations of interface values need to be registered. +// Expecting to be used only during initialization, it panics if the mapping +// between types and names is not a bijection. +func Register(value any) { + // Default to printed representation for unnamed types + rt := reflect.TypeOf(value) + name := rt.String() + + // But for named types (or pointers to them), qualify with import path (but see inner comment). + // Dereference one pointer looking for a named type. + star := "" + if rt.Name() == "" { + if pt := rt; pt.Kind() == reflect.Pointer { + star = "*" + // NOTE: The following line should be rt = pt.Elem() to implement + // what the comment above claims, but fixing it would break compatibility + // with existing gobs. + // + // Given package p imported as "full/p" with these definitions: + // package p + // type T1 struct { ... } + // this table shows the intended and actual strings used by gob to + // name the types: + // + // Type Correct string Actual string + // + // T1 full/p.T1 full/p.T1 + // *T1 *full/p.T1 *p.T1 + // + // The missing full path cannot be fixed without breaking existing gob decoders. + rt = pt + } + } + if rt.Name() != "" { + if rt.PkgPath() == "" { + name = star + rt.Name() + } else { + name = star + rt.PkgPath() + "." + rt.Name() + } + } + + RegisterName(name, value) +} + +func registerBasics() { + Register(int(0)) + Register(int8(0)) + Register(int16(0)) + Register(int32(0)) + Register(int64(0)) + Register(uint(0)) + Register(uint8(0)) + Register(uint16(0)) + Register(uint32(0)) + Register(uint64(0)) + Register(float32(0)) + Register(float64(0)) + Register(complex64(0i)) + Register(complex128(0i)) + Register(uintptr(0)) + Register(false) + Register("") + Register([]byte(nil)) + Register([]int(nil)) + Register([]int8(nil)) + Register([]int16(nil)) + Register([]int32(nil)) + Register([]int64(nil)) + Register([]uint(nil)) + Register([]uint8(nil)) + Register([]uint16(nil)) + Register([]uint32(nil)) + Register([]uint64(nil)) + Register([]float32(nil)) + Register([]float64(nil)) + Register([]complex64(nil)) + Register([]complex128(nil)) + Register([]uintptr(nil)) + Register([]bool(nil)) + Register([]string(nil)) +} + +func init() { + typeInfoMap.Store(typeInfoMapInit) + typeInfoMapInit = nil +} diff --git a/src/encoding/gob/type_test.go b/src/encoding/gob/type_test.go new file mode 100644 index 0000000..8d4c6d7 --- /dev/null +++ b/src/encoding/gob/type_test.go @@ -0,0 +1,262 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gob + +import ( + "bytes" + "reflect" + "sync" + "testing" +) + +type typeT struct { + id typeId + str string +} + +var basicTypes = []typeT{ + {tBool, "bool"}, + {tInt, "int"}, + {tUint, "uint"}, + {tFloat, "float"}, + {tBytes, "bytes"}, + {tString, "string"}, +} + +func getTypeUnlocked(name string, rt reflect.Type) gobType { + typeLock.Lock() + defer typeLock.Unlock() + t, err := getBaseType(name, rt) + if err != nil { + panic("getTypeUnlocked: " + err.Error()) + } + return t +} + +// Sanity checks +func TestBasic(t *testing.T) { + for _, tt := range basicTypes { + if tt.id.string() != tt.str { + t.Errorf("checkType: expected %q got %s", tt.str, tt.id.string()) + } + if tt.id == 0 { + t.Errorf("id for %q is zero", tt.str) + } + } +} + +// Reregister some basic types to check registration is idempotent. +func TestReregistration(t *testing.T) { + newtyp := getTypeUnlocked("int", reflect.TypeFor[int]()) + if newtyp != tInt.gobType() { + t.Errorf("reregistration of %s got new type", newtyp.string()) + } + newtyp = getTypeUnlocked("uint", reflect.TypeFor[uint]()) + if newtyp != tUint.gobType() { + t.Errorf("reregistration of %s got new type", newtyp.string()) + } + newtyp = getTypeUnlocked("string", reflect.TypeFor[string]()) + if newtyp != tString.gobType() { + t.Errorf("reregistration of %s got new type", newtyp.string()) + } +} + +func TestArrayType(t *testing.T) { + var a3 [3]int + a3int := getTypeUnlocked("foo", reflect.TypeOf(a3)) + newa3int := getTypeUnlocked("bar", reflect.TypeOf(a3)) + if a3int != newa3int { + t.Errorf("second registration of [3]int creates new type") + } + var a4 [4]int + a4int := getTypeUnlocked("goo", reflect.TypeOf(a4)) + if a3int == a4int { + t.Errorf("registration of [3]int creates same type as [4]int") + } + var b3 [3]bool + a3bool := getTypeUnlocked("", reflect.TypeOf(b3)) + if a3int == a3bool { + t.Errorf("registration of [3]bool creates same type as [3]int") + } + str := a3bool.string() + expected := "[3]bool" + if str != expected { + t.Errorf("array printed as %q; expected %q", str, expected) + } +} + +func TestSliceType(t *testing.T) { + var s []int + sint := getTypeUnlocked("slice", reflect.TypeOf(s)) + var news []int + newsint := getTypeUnlocked("slice1", reflect.TypeOf(news)) + if sint != newsint { + t.Errorf("second registration of []int creates new type") + } + var b []bool + sbool := getTypeUnlocked("", reflect.TypeOf(b)) + if sbool == sint { + t.Errorf("registration of []bool creates same type as []int") + } + str := sbool.string() + expected := "[]bool" + if str != expected { + t.Errorf("slice printed as %q; expected %q", str, expected) + } +} + +func TestMapType(t *testing.T) { + var m map[string]int + mapStringInt := getTypeUnlocked("map", reflect.TypeOf(m)) + var newm map[string]int + newMapStringInt := getTypeUnlocked("map1", reflect.TypeOf(newm)) + if mapStringInt != newMapStringInt { + t.Errorf("second registration of map[string]int creates new type") + } + var b map[string]bool + mapStringBool := getTypeUnlocked("", reflect.TypeOf(b)) + if mapStringBool == mapStringInt { + t.Errorf("registration of map[string]bool creates same type as map[string]int") + } + str := mapStringBool.string() + expected := "map[string]bool" + if str != expected { + t.Errorf("map printed as %q; expected %q", str, expected) + } +} + +type Bar struct { + X string +} + +// This structure has pointers and refers to itself, making it a good test case. +type Foo struct { + A int + B int32 // will become int + C string + D []byte + E *float64 // will become float64 + F ****float64 // will become float64 + G *Bar + H *Bar // should not interpolate the definition of Bar again + I *Foo // will not explode +} + +func TestStructType(t *testing.T) { + sstruct := getTypeUnlocked("Foo", reflect.TypeFor[Foo]()) + str := sstruct.string() + // If we can print it correctly, we built it correctly. + expected := "Foo = struct { A int; B int; C string; D bytes; E float; F float; G Bar = struct { X string; }; H Bar; I Foo; }" + if str != expected { + t.Errorf("struct printed as %q; expected %q", str, expected) + } +} + +// Should be OK to register the same type multiple times, as long as they're +// at the same level of indirection. +func TestRegistration(t *testing.T) { + type T struct{ a int } + Register(new(T)) + Register(new(T)) +} + +type N1 struct{} +type N2 struct{} + +// See comment in type.go/Register. +func TestRegistrationNaming(t *testing.T) { + testCases := []struct { + t any + name string + }{ + {&N1{}, "*gob.N1"}, + {N2{}, "encoding/gob.N2"}, + } + + for _, tc := range testCases { + Register(tc.t) + + tct := reflect.TypeOf(tc.t) + ct, _ := nameToConcreteType.Load(tc.name) + if ct != tct { + t.Errorf("nameToConcreteType[%q] = %v, want %v", tc.name, ct, tct) + } + // concreteTypeToName is keyed off the base type. + if tct.Kind() == reflect.Pointer { + tct = tct.Elem() + } + if n, _ := concreteTypeToName.Load(tct); n != tc.name { + t.Errorf("concreteTypeToName[%v] got %v, want %v", tct, n, tc.name) + } + } +} + +func TestStressParallel(t *testing.T) { + type T2 struct{ A int } + c := make(chan bool) + const N = 10 + for i := 0; i < N; i++ { + go func() { + p := new(T2) + Register(p) + b := new(bytes.Buffer) + enc := NewEncoder(b) + err := enc.Encode(p) + if err != nil { + t.Error("encoder fail:", err) + } + dec := NewDecoder(b) + err = dec.Decode(p) + if err != nil { + t.Error("decoder fail:", err) + } + c <- true + }() + } + for i := 0; i < N; i++ { + <-c + } +} + +// Issue 23328. Note that this test name is known to cmd/dist/test.go. +func TestTypeRace(t *testing.T) { + c := make(chan bool) + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + var buf bytes.Buffer + enc := NewEncoder(&buf) + dec := NewDecoder(&buf) + var x any + switch i { + case 0: + x = &N1{} + case 1: + x = &N2{} + default: + t.Errorf("bad i %d", i) + return + } + m := make(map[string]string) + <-c + if err := enc.Encode(x); err != nil { + t.Error(err) + return + } + if err := enc.Encode(x); err != nil { + t.Error(err) + return + } + if err := dec.Decode(&m); err == nil { + t.Error("decode unexpectedly succeeded") + return + } + }(i) + } + close(c) + wg.Wait() +} diff --git a/src/encoding/hex/example_test.go b/src/encoding/hex/example_test.go new file mode 100644 index 0000000..3580757 --- /dev/null +++ b/src/encoding/hex/example_test.go @@ -0,0 +1,98 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hex_test + +import ( + "encoding/hex" + "fmt" + "log" + "os" +) + +func ExampleEncode() { + src := []byte("Hello Gopher!") + + dst := make([]byte, hex.EncodedLen(len(src))) + hex.Encode(dst, src) + + fmt.Printf("%s\n", dst) + + // Output: + // 48656c6c6f20476f7068657221 +} + +func ExampleDecode() { + src := []byte("48656c6c6f20476f7068657221") + + dst := make([]byte, hex.DecodedLen(len(src))) + n, err := hex.Decode(dst, src) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%s\n", dst[:n]) + + // Output: + // Hello Gopher! +} + +func ExampleDecodeString() { + const s = "48656c6c6f20476f7068657221" + decoded, err := hex.DecodeString(s) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%s\n", decoded) + + // Output: + // Hello Gopher! +} + +func ExampleDump() { + content := []byte("Go is an open source programming language.") + + fmt.Printf("%s", hex.Dump(content)) + + // Output: + // 00000000 47 6f 20 69 73 20 61 6e 20 6f 70 65 6e 20 73 6f |Go is an open so| + // 00000010 75 72 63 65 20 70 72 6f 67 72 61 6d 6d 69 6e 67 |urce programming| + // 00000020 20 6c 61 6e 67 75 61 67 65 2e | language.| +} + +func ExampleDumper() { + lines := []string{ + "Go is an open source programming language.", + "\n", + "We encourage all Go users to subscribe to golang-announce.", + } + + stdoutDumper := hex.Dumper(os.Stdout) + + defer stdoutDumper.Close() + + for _, line := range lines { + stdoutDumper.Write([]byte(line)) + } + + // Output: + // 00000000 47 6f 20 69 73 20 61 6e 20 6f 70 65 6e 20 73 6f |Go is an open so| + // 00000010 75 72 63 65 20 70 72 6f 67 72 61 6d 6d 69 6e 67 |urce programming| + // 00000020 20 6c 61 6e 67 75 61 67 65 2e 0a 57 65 20 65 6e | language..We en| + // 00000030 63 6f 75 72 61 67 65 20 61 6c 6c 20 47 6f 20 75 |courage all Go u| + // 00000040 73 65 72 73 20 74 6f 20 73 75 62 73 63 72 69 62 |sers to subscrib| + // 00000050 65 20 74 6f 20 67 6f 6c 61 6e 67 2d 61 6e 6e 6f |e to golang-anno| + // 00000060 75 6e 63 65 2e |unce.| +} + +func ExampleEncodeToString() { + src := []byte("Hello") + encodedStr := hex.EncodeToString(src) + + fmt.Printf("%s\n", encodedStr) + + // Output: + // 48656c6c6f +} diff --git a/src/encoding/hex/hex.go b/src/encoding/hex/hex.go new file mode 100644 index 0000000..791d2bd --- /dev/null +++ b/src/encoding/hex/hex.go @@ -0,0 +1,355 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package hex implements hexadecimal encoding and decoding. +package hex + +import ( + "errors" + "fmt" + "io" + "slices" + "strings" +) + +const ( + hextable = "0123456789abcdef" + reverseHexTable = "" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +) + +// EncodedLen returns the length of an encoding of n source bytes. +// Specifically, it returns n * 2. +func EncodedLen(n int) int { return n * 2 } + +// Encode encodes src into [EncodedLen](len(src)) +// bytes of dst. As a convenience, it returns the number +// of bytes written to dst, but this value is always [EncodedLen](len(src)). +// Encode implements hexadecimal encoding. +func Encode(dst, src []byte) int { + j := 0 + for _, v := range src { + dst[j] = hextable[v>>4] + dst[j+1] = hextable[v&0x0f] + j += 2 + } + return len(src) * 2 +} + +// AppendEncode appends the hexadecimally encoded src to dst +// and returns the extended buffer. +func AppendEncode(dst, src []byte) []byte { + n := EncodedLen(len(src)) + dst = slices.Grow(dst, n) + Encode(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +// ErrLength reports an attempt to decode an odd-length input +// using [Decode] or [DecodeString]. +// The stream-based Decoder returns [io.ErrUnexpectedEOF] instead of ErrLength. +var ErrLength = errors.New("encoding/hex: odd length hex string") + +// InvalidByteError values describe errors resulting from an invalid byte in a hex string. +type InvalidByteError byte + +func (e InvalidByteError) Error() string { + return fmt.Sprintf("encoding/hex: invalid byte: %#U", rune(e)) +} + +// DecodedLen returns the length of a decoding of x source bytes. +// Specifically, it returns x / 2. +func DecodedLen(x int) int { return x / 2 } + +// Decode decodes src into [DecodedLen](len(src)) bytes, +// returning the actual number of bytes written to dst. +// +// Decode expects that src contains only hexadecimal +// characters and that src has even length. +// If the input is malformed, Decode returns the number +// of bytes decoded before the error. +func Decode(dst, src []byte) (int, error) { + i, j := 0, 1 + for ; j < len(src); j += 2 { + p := src[j-1] + q := src[j] + + a := reverseHexTable[p] + b := reverseHexTable[q] + if a > 0x0f { + return i, InvalidByteError(p) + } + if b > 0x0f { + return i, InvalidByteError(q) + } + dst[i] = (a << 4) | b + i++ + } + if len(src)%2 == 1 { + // Check for invalid char before reporting bad length, + // since the invalid char (if present) is an earlier problem. + if reverseHexTable[src[j-1]] > 0x0f { + return i, InvalidByteError(src[j-1]) + } + return i, ErrLength + } + return i, nil +} + +// AppendDecode appends the hexadecimally decoded src to dst +// and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func AppendDecode(dst, src []byte) ([]byte, error) { + n := DecodedLen(len(src)) + dst = slices.Grow(dst, n) + n, err := Decode(dst[len(dst):][:n], src) + return dst[:len(dst)+n], err +} + +// EncodeToString returns the hexadecimal encoding of src. +func EncodeToString(src []byte) string { + dst := make([]byte, EncodedLen(len(src))) + Encode(dst, src) + return string(dst) +} + +// DecodeString returns the bytes represented by the hexadecimal string s. +// +// DecodeString expects that src contains only hexadecimal +// characters and that src has even length. +// If the input is malformed, DecodeString returns +// the bytes decoded before the error. +func DecodeString(s string) ([]byte, error) { + src := []byte(s) + // We can use the source slice itself as the destination + // because the decode loop increments by one and then the 'seen' byte is not used anymore. + n, err := Decode(src, src) + return src[:n], err +} + +// Dump returns a string that contains a hex dump of the given data. The format +// of the hex dump matches the output of `hexdump -C` on the command line. +func Dump(data []byte) string { + if len(data) == 0 { + return "" + } + + var buf strings.Builder + // Dumper will write 79 bytes per complete 16 byte chunk, and at least + // 64 bytes for whatever remains. Round the allocation up, since only a + // maximum of 15 bytes will be wasted. + buf.Grow((1 + ((len(data) - 1) / 16)) * 79) + + dumper := Dumper(&buf) + dumper.Write(data) + dumper.Close() + return buf.String() +} + +// bufferSize is the number of hexadecimal characters to buffer in encoder and decoder. +const bufferSize = 1024 + +type encoder struct { + w io.Writer + err error + out [bufferSize]byte // output buffer +} + +// NewEncoder returns an [io.Writer] that writes lowercase hexadecimal characters to w. +func NewEncoder(w io.Writer) io.Writer { + return &encoder{w: w} +} + +func (e *encoder) Write(p []byte) (n int, err error) { + for len(p) > 0 && e.err == nil { + chunkSize := bufferSize / 2 + if len(p) < chunkSize { + chunkSize = len(p) + } + + var written int + encoded := Encode(e.out[:], p[:chunkSize]) + written, e.err = e.w.Write(e.out[:encoded]) + n += written / 2 + p = p[chunkSize:] + } + return n, e.err +} + +type decoder struct { + r io.Reader + err error + in []byte // input buffer (encoded form) + arr [bufferSize]byte // backing array for in +} + +// NewDecoder returns an [io.Reader] that decodes hexadecimal characters from r. +// NewDecoder expects that r contain only an even number of hexadecimal characters. +func NewDecoder(r io.Reader) io.Reader { + return &decoder{r: r} +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Fill internal buffer with sufficient bytes to decode + if len(d.in) < 2 && d.err == nil { + var numCopy, numRead int + numCopy = copy(d.arr[:], d.in) // Copies either 0 or 1 bytes + numRead, d.err = d.r.Read(d.arr[numCopy:]) + d.in = d.arr[:numCopy+numRead] + if d.err == io.EOF && len(d.in)%2 != 0 { + + if a := reverseHexTable[d.in[len(d.in)-1]]; a > 0x0f { + d.err = InvalidByteError(d.in[len(d.in)-1]) + } else { + d.err = io.ErrUnexpectedEOF + } + } + } + + // Decode internal buffer into output buffer + if numAvail := len(d.in) / 2; len(p) > numAvail { + p = p[:numAvail] + } + numDec, err := Decode(p, d.in[:len(p)*2]) + d.in = d.in[2*numDec:] + if err != nil { + d.in, d.err = nil, err // Decode error; discard input remainder + } + + if len(d.in) < 2 { + return numDec, d.err // Only expose errors when buffer fully consumed + } + return numDec, nil +} + +// Dumper returns a [io.WriteCloser] that writes a hex dump of all written data to +// w. The format of the dump matches the output of `hexdump -C` on the command +// line. +func Dumper(w io.Writer) io.WriteCloser { + return &dumper{w: w} +} + +type dumper struct { + w io.Writer + rightChars [18]byte + buf [14]byte + used int // number of bytes in the current line + n uint // number of bytes, total + closed bool +} + +func toChar(b byte) byte { + if b < 32 || b > 126 { + return '.' + } + return b +} + +func (h *dumper) Write(data []byte) (n int, err error) { + if h.closed { + return 0, errors.New("encoding/hex: dumper closed") + } + + // Output lines look like: + // 00000010 2e 2f 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d |./0123456789:;<=| + // ^ offset ^ extra space ^ ASCII of line. + for i := range data { + if h.used == 0 { + // At the beginning of a line we print the current + // offset in hex. + h.buf[0] = byte(h.n >> 24) + h.buf[1] = byte(h.n >> 16) + h.buf[2] = byte(h.n >> 8) + h.buf[3] = byte(h.n) + Encode(h.buf[4:], h.buf[:4]) + h.buf[12] = ' ' + h.buf[13] = ' ' + _, err = h.w.Write(h.buf[4:]) + if err != nil { + return + } + } + Encode(h.buf[:], data[i:i+1]) + h.buf[2] = ' ' + l := 3 + if h.used == 7 { + // There's an additional space after the 8th byte. + h.buf[3] = ' ' + l = 4 + } else if h.used == 15 { + // At the end of the line there's an extra space and + // the bar for the right column. + h.buf[3] = ' ' + h.buf[4] = '|' + l = 5 + } + _, err = h.w.Write(h.buf[:l]) + if err != nil { + return + } + n++ + h.rightChars[h.used] = toChar(data[i]) + h.used++ + h.n++ + if h.used == 16 { + h.rightChars[16] = '|' + h.rightChars[17] = '\n' + _, err = h.w.Write(h.rightChars[:]) + if err != nil { + return + } + h.used = 0 + } + } + return +} + +func (h *dumper) Close() (err error) { + // See the comments in Write() for the details of this format. + if h.closed { + return + } + h.closed = true + if h.used == 0 { + return + } + h.buf[0] = ' ' + h.buf[1] = ' ' + h.buf[2] = ' ' + h.buf[3] = ' ' + h.buf[4] = '|' + nBytes := h.used + for h.used < 16 { + l := 3 + if h.used == 7 { + l = 4 + } else if h.used == 15 { + l = 5 + } + _, err = h.w.Write(h.buf[:l]) + if err != nil { + return + } + h.used++ + } + h.rightChars[nBytes] = '|' + h.rightChars[nBytes+1] = '\n' + _, err = h.w.Write(h.rightChars[:nBytes+2]) + return +} diff --git a/src/encoding/hex/hex_test.go b/src/encoding/hex/hex_test.go new file mode 100644 index 0000000..03331ea --- /dev/null +++ b/src/encoding/hex/hex_test.go @@ -0,0 +1,289 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package hex + +import ( + "bytes" + "fmt" + "io" + "strings" + "testing" +) + +type encDecTest struct { + enc string + dec []byte +} + +var encDecTests = []encDecTest{ + {"", []byte{}}, + {"0001020304050607", []byte{0, 1, 2, 3, 4, 5, 6, 7}}, + {"08090a0b0c0d0e0f", []byte{8, 9, 10, 11, 12, 13, 14, 15}}, + {"f0f1f2f3f4f5f6f7", []byte{0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7}}, + {"f8f9fafbfcfdfeff", []byte{0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}}, + {"67", []byte{'g'}}, + {"e3a1", []byte{0xe3, 0xa1}}, +} + +func TestEncode(t *testing.T) { + for i, test := range encDecTests { + dst := make([]byte, EncodedLen(len(test.dec))) + n := Encode(dst, test.dec) + if n != len(dst) { + t.Errorf("#%d: bad return value: got: %d want: %d", i, n, len(dst)) + } + if string(dst) != test.enc { + t.Errorf("#%d: got: %#v want: %#v", i, dst, test.enc) + } + dst = []byte("lead") + dst = AppendEncode(dst, test.dec) + if string(dst) != "lead"+test.enc { + t.Errorf("#%d: got: %#v want: %#v", i, dst, "lead"+test.enc) + } + } +} + +func TestDecode(t *testing.T) { + // Case for decoding uppercase hex characters, since + // Encode always uses lowercase. + decTests := append(encDecTests, encDecTest{"F8F9FAFBFCFDFEFF", []byte{0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}}) + for i, test := range decTests { + dst := make([]byte, DecodedLen(len(test.enc))) + n, err := Decode(dst, []byte(test.enc)) + if err != nil { + t.Errorf("#%d: bad return value: got:%d want:%d", i, n, len(dst)) + } else if !bytes.Equal(dst, test.dec) { + t.Errorf("#%d: got: %#v want: %#v", i, dst, test.dec) + } + dst = []byte("lead") + dst, err = AppendDecode(dst, []byte(test.enc)) + if err != nil { + t.Errorf("#%d: AppendDecode error: %v", i, err) + } else if string(dst) != "lead"+string(test.dec) { + t.Errorf("#%d: got: %#v want: %#v", i, dst, "lead"+string(test.dec)) + } + } +} + +func TestEncodeToString(t *testing.T) { + for i, test := range encDecTests { + s := EncodeToString(test.dec) + if s != test.enc { + t.Errorf("#%d got:%s want:%s", i, s, test.enc) + } + } +} + +func TestDecodeString(t *testing.T) { + for i, test := range encDecTests { + dst, err := DecodeString(test.enc) + if err != nil { + t.Errorf("#%d: unexpected err value: %s", i, err) + continue + } + if !bytes.Equal(dst, test.dec) { + t.Errorf("#%d: got: %#v want: #%v", i, dst, test.dec) + } + } +} + +var errTests = []struct { + in string + out string + err error +}{ + {"", "", nil}, + {"0", "", ErrLength}, + {"zd4aa", "", InvalidByteError('z')}, + {"d4aaz", "\xd4\xaa", InvalidByteError('z')}, + {"30313", "01", ErrLength}, + {"0g", "", InvalidByteError('g')}, + {"00gg", "\x00", InvalidByteError('g')}, + {"0\x01", "", InvalidByteError('\x01')}, + {"ffeed", "\xff\xee", ErrLength}, +} + +func TestDecodeErr(t *testing.T) { + for _, tt := range errTests { + out := make([]byte, len(tt.in)+10) + n, err := Decode(out, []byte(tt.in)) + if string(out[:n]) != tt.out || err != tt.err { + t.Errorf("Decode(%q) = %q, %v, want %q, %v", tt.in, string(out[:n]), err, tt.out, tt.err) + } + } +} + +func TestDecodeStringErr(t *testing.T) { + for _, tt := range errTests { + out, err := DecodeString(tt.in) + if string(out) != tt.out || err != tt.err { + t.Errorf("DecodeString(%q) = %q, %v, want %q, %v", tt.in, out, err, tt.out, tt.err) + } + } +} + +func TestEncoderDecoder(t *testing.T) { + for _, multiplier := range []int{1, 128, 192} { + for _, test := range encDecTests { + input := bytes.Repeat(test.dec, multiplier) + output := strings.Repeat(test.enc, multiplier) + + var buf bytes.Buffer + enc := NewEncoder(&buf) + r := struct{ io.Reader }{bytes.NewReader(input)} // io.Reader only; not io.WriterTo + if n, err := io.CopyBuffer(enc, r, make([]byte, 7)); n != int64(len(input)) || err != nil { + t.Errorf("encoder.Write(%q*%d) = (%d, %v), want (%d, nil)", test.dec, multiplier, n, err, len(input)) + continue + } + + if encDst := buf.String(); encDst != output { + t.Errorf("buf(%q*%d) = %v, want %v", test.dec, multiplier, encDst, output) + continue + } + + dec := NewDecoder(&buf) + var decBuf bytes.Buffer + w := struct{ io.Writer }{&decBuf} // io.Writer only; not io.ReaderFrom + if _, err := io.CopyBuffer(w, dec, make([]byte, 7)); err != nil || decBuf.Len() != len(input) { + t.Errorf("decoder.Read(%q*%d) = (%d, %v), want (%d, nil)", test.enc, multiplier, decBuf.Len(), err, len(input)) + } + + if !bytes.Equal(decBuf.Bytes(), input) { + t.Errorf("decBuf(%q*%d) = %v, want %v", test.dec, multiplier, decBuf.Bytes(), input) + continue + } + } + } +} + +func TestDecoderErr(t *testing.T) { + for _, tt := range errTests { + dec := NewDecoder(strings.NewReader(tt.in)) + out, err := io.ReadAll(dec) + wantErr := tt.err + // Decoder is reading from stream, so it reports io.ErrUnexpectedEOF instead of ErrLength. + if wantErr == ErrLength { + wantErr = io.ErrUnexpectedEOF + } + if string(out) != tt.out || err != wantErr { + t.Errorf("NewDecoder(%q) = %q, %v, want %q, %v", tt.in, out, err, tt.out, wantErr) + } + } +} + +func TestDumper(t *testing.T) { + var in [40]byte + for i := range in { + in[i] = byte(i + 30) + } + + for stride := 1; stride < len(in); stride++ { + var out bytes.Buffer + dumper := Dumper(&out) + done := 0 + for done < len(in) { + todo := done + stride + if todo > len(in) { + todo = len(in) + } + dumper.Write(in[done:todo]) + done = todo + } + + dumper.Close() + if !bytes.Equal(out.Bytes(), expectedHexDump) { + t.Errorf("stride: %d failed. got:\n%s\nwant:\n%s", stride, out.Bytes(), expectedHexDump) + } + } +} + +func TestDumper_doubleclose(t *testing.T) { + var out strings.Builder + dumper := Dumper(&out) + + dumper.Write([]byte(`gopher`)) + dumper.Close() + dumper.Close() + dumper.Write([]byte(`gopher`)) + dumper.Close() + + expected := "00000000 67 6f 70 68 65 72 |gopher|\n" + if out.String() != expected { + t.Fatalf("got:\n%#v\nwant:\n%#v", out.String(), expected) + } +} + +func TestDumper_earlyclose(t *testing.T) { + var out strings.Builder + dumper := Dumper(&out) + + dumper.Close() + dumper.Write([]byte(`gopher`)) + + expected := "" + if out.String() != expected { + t.Fatalf("got:\n%#v\nwant:\n%#v", out.String(), expected) + } +} + +func TestDump(t *testing.T) { + var in [40]byte + for i := range in { + in[i] = byte(i + 30) + } + + out := []byte(Dump(in[:])) + if !bytes.Equal(out, expectedHexDump) { + t.Errorf("got:\n%s\nwant:\n%s", out, expectedHexDump) + } +} + +var expectedHexDump = []byte(`00000000 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d |.. !"#$%&'()*+,-| +00000010 2e 2f 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d |./0123456789:;<=| +00000020 3e 3f 40 41 42 43 44 45 |>?@ABCDE| +`) + +var sink []byte + +func BenchmarkEncode(b *testing.B) { + for _, size := range []int{256, 1024, 4096, 16384} { + src := bytes.Repeat([]byte{2, 3, 5, 7, 9, 11, 13, 17}, size/8) + sink = make([]byte, 2*size) + + b.Run(fmt.Sprintf("%v", size), func(b *testing.B) { + b.SetBytes(int64(size)) + for i := 0; i < b.N; i++ { + Encode(sink, src) + } + }) + } +} + +func BenchmarkDecode(b *testing.B) { + for _, size := range []int{256, 1024, 4096, 16384} { + src := bytes.Repeat([]byte{'2', 'b', '7', '4', '4', 'f', 'a', 'a'}, size/8) + sink = make([]byte, size/2) + + b.Run(fmt.Sprintf("%v", size), func(b *testing.B) { + b.SetBytes(int64(size)) + for i := 0; i < b.N; i++ { + Decode(sink, src) + } + }) + } +} + +func BenchmarkDump(b *testing.B) { + for _, size := range []int{256, 1024, 4096, 16384} { + src := bytes.Repeat([]byte{2, 3, 5, 7, 9, 11, 13, 17}, size/8) + + b.Run(fmt.Sprintf("%v", size), func(b *testing.B) { + b.SetBytes(int64(size)) + for i := 0; i < b.N; i++ { + Dump(src) + } + }) + } +} diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go new file mode 100644 index 0000000..f7bcf80 --- /dev/null +++ b/src/encoding/json/bench_test.go @@ -0,0 +1,573 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Large data benchmark. +// The JSON data is a summary of agl's changes in the +// go, webkit, and chromium open source projects. +// We benchmark converting between the JSON form +// and in-memory data structures. + +package json + +import ( + "bytes" + "compress/gzip" + "fmt" + "internal/testenv" + "io" + "os" + "reflect" + "regexp" + "runtime" + "strings" + "sync" + "testing" +) + +type codeResponse struct { + Tree *codeNode `json:"tree"` + Username string `json:"username"` +} + +type codeNode struct { + Name string `json:"name"` + Kids []*codeNode `json:"kids"` + CLWeight float64 `json:"cl_weight"` + Touches int `json:"touches"` + MinT int64 `json:"min_t"` + MaxT int64 `json:"max_t"` + MeanT int64 `json:"mean_t"` +} + +var codeJSON []byte +var codeStruct codeResponse + +func codeInit() { + f, err := os.Open("testdata/code.json.gz") + if err != nil { + panic(err) + } + defer f.Close() + gz, err := gzip.NewReader(f) + if err != nil { + panic(err) + } + data, err := io.ReadAll(gz) + if err != nil { + panic(err) + } + + codeJSON = data + + if err := Unmarshal(codeJSON, &codeStruct); err != nil { + panic("unmarshal code.json: " + err.Error()) + } + + if data, err = Marshal(&codeStruct); err != nil { + panic("marshal code.json: " + err.Error()) + } + + if !bytes.Equal(data, codeJSON) { + println("different lengths", len(data), len(codeJSON)) + for i := 0; i < len(data) && i < len(codeJSON); i++ { + if data[i] != codeJSON[i] { + println("re-marshal: changed at byte", i) + println("orig: ", string(codeJSON[i-10:i+10])) + println("new: ", string(data[i-10:i+10])) + break + } + } + panic("re-marshal code.json: different result") + } +} + +func BenchmarkCodeEncoder(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + for pb.Next() { + if err := enc.Encode(&codeStruct); err != nil { + b.Fatalf("Encode error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeEncoderError(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + for pb.Next() { + if err := enc.Encode(&codeStruct); err != nil { + b.Fatalf("Encode error: %v", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("Marshal error: got nil, want non-nil") + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeMarshal(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&codeStruct); err != nil { + b.Fatalf("Marshal error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeMarshalError(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&codeStruct); err != nil { + b.Fatalf("Marshal error: %v", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("Marshal error: got nil, want non-nil") + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func benchMarshalBytes(n int) func(*testing.B) { + sample := []byte("hello world") + // Use a struct pointer, to avoid an allocation when passing it as an + // interface parameter to Marshal. + v := &struct { + Bytes []byte + }{ + bytes.Repeat(sample, (n/len(sample))+1)[:n], + } + return func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := Marshal(v); err != nil { + b.Fatalf("Marshal error: %v", err) + } + } + } +} + +func benchMarshalBytesError(n int) func(*testing.B) { + sample := []byte("hello world") + // Use a struct pointer, to avoid an allocation when passing it as an + // interface parameter to Marshal. + v := &struct { + Bytes []byte + }{ + bytes.Repeat(sample, (n/len(sample))+1)[:n], + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + return func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := Marshal(v); err != nil { + b.Fatalf("Marshal error: %v", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("Marshal error: got nil, want non-nil") + } + } + } +} + +func BenchmarkMarshalBytes(b *testing.B) { + b.ReportAllocs() + // 32 fits within encodeState.scratch. + b.Run("32", benchMarshalBytes(32)) + // 256 doesn't fit in encodeState.scratch, but is small enough to + // allocate and avoid the slower base64.NewEncoder. + b.Run("256", benchMarshalBytes(256)) + // 4096 is large enough that we want to avoid allocating for it. + b.Run("4096", benchMarshalBytes(4096)) +} + +func BenchmarkMarshalBytesError(b *testing.B) { + b.ReportAllocs() + // 32 fits within encodeState.scratch. + b.Run("32", benchMarshalBytesError(32)) + // 256 doesn't fit in encodeState.scratch, but is small enough to + // allocate and avoid the slower base64.NewEncoder. + b.Run("256", benchMarshalBytesError(256)) + // 4096 is large enough that we want to avoid allocating for it. + b.Run("4096", benchMarshalBytesError(4096)) +} + +func BenchmarkMarshalMap(b *testing.B) { + b.ReportAllocs() + m := map[string]int{ + "key3": 3, + "key2": 2, + "key1": 1, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(m); err != nil { + b.Fatal("Marshal:", err) + } + } + }) +} + +func BenchmarkCodeDecoder(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + var buf bytes.Buffer + dec := NewDecoder(&buf) + var r codeResponse + for pb.Next() { + buf.Write(codeJSON) + // hide EOF + buf.WriteByte('\n') + buf.WriteByte('\n') + buf.WriteByte('\n') + if err := dec.Decode(&r); err != nil { + b.Fatalf("Decode error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkUnicodeDecoder(b *testing.B) { + b.ReportAllocs() + j := []byte(`"\uD83D\uDE01"`) + b.SetBytes(int64(len(j))) + r := bytes.NewReader(j) + dec := NewDecoder(r) + var out string + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := dec.Decode(&out); err != nil { + b.Fatalf("Decode error: %v", err) + } + r.Seek(0, 0) + } +} + +func BenchmarkDecoderStream(b *testing.B) { + b.ReportAllocs() + b.StopTimer() + var buf bytes.Buffer + dec := NewDecoder(&buf) + buf.WriteString(`"` + strings.Repeat("x", 1000000) + `"` + "\n\n\n") + var x any + if err := dec.Decode(&x); err != nil { + b.Fatalf("Decode error: %v", err) + } + ones := strings.Repeat(" 1\n", 300000) + "\n\n\n" + b.StartTimer() + for i := 0; i < b.N; i++ { + if i%300000 == 0 { + buf.WriteString(ones) + } + x = nil + switch err := dec.Decode(&x); { + case err != nil: + b.Fatalf("Decode error: %v", err) + case x != 1.0: + b.Fatalf("Decode: got %v want 1.0", i) + } + } +} + +func BenchmarkCodeUnmarshal(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + var r codeResponse + if err := Unmarshal(codeJSON, &r); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeUnmarshalReuse(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + var r codeResponse + for pb.Next() { + if err := Unmarshal(codeJSON, &r); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkUnmarshalString(b *testing.B) { + b.ReportAllocs() + data := []byte(`"hello, world"`) + b.RunParallel(func(pb *testing.PB) { + var s string + for pb.Next() { + if err := Unmarshal(data, &s); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmarshalFloat64(b *testing.B) { + b.ReportAllocs() + data := []byte(`3.14`) + b.RunParallel(func(pb *testing.PB) { + var f float64 + for pb.Next() { + if err := Unmarshal(data, &f); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmarshalInt64(b *testing.B) { + b.ReportAllocs() + data := []byte(`3`) + b.RunParallel(func(pb *testing.PB) { + var x int64 + for pb.Next() { + if err := Unmarshal(data, &x); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmarshalMap(b *testing.B) { + b.ReportAllocs() + data := []byte(`{"key1":"value1","key2":"value2","key3":"value3"}`) + b.RunParallel(func(pb *testing.PB) { + x := make(map[string]string, 3) + for pb.Next() { + if err := Unmarshal(data, &x); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkIssue10335(b *testing.B) { + b.ReportAllocs() + j := []byte(`{"a":{ }}`) + b.RunParallel(func(pb *testing.PB) { + var s struct{} + for pb.Next() { + if err := Unmarshal(j, &s); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkIssue34127(b *testing.B) { + b.ReportAllocs() + j := struct { + Bar string `json:"bar,string"` + }{ + Bar: `foobar`, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&j); err != nil { + b.Fatalf("Marshal error: %v", err) + } + } + }) +} + +func BenchmarkUnmapped(b *testing.B) { + b.ReportAllocs() + j := []byte(`{"s": "hello", "y": 2, "o": {"x": 0}, "a": [1, 99, {"x": 1}]}`) + b.RunParallel(func(pb *testing.PB) { + var s struct{} + for pb.Next() { + if err := Unmarshal(j, &s); err != nil { + b.Fatalf("Unmarshal error: %v", err) + } + } + }) +} + +func BenchmarkTypeFieldsCache(b *testing.B) { + b.ReportAllocs() + var maxTypes int = 1e6 + if testenv.Builder() != "" { + maxTypes = 1e3 // restrict cache sizes on builders + } + + // Dynamically generate many new types. + types := make([]reflect.Type, maxTypes) + fs := []reflect.StructField{{ + Type: reflect.TypeFor[string](), + Index: []int{0}, + }} + for i := range types { + fs[0].Name = fmt.Sprintf("TypeFieldsCache%d", i) + types[i] = reflect.StructOf(fs) + } + + // clearClear clears the cache. Other JSON operations, must not be running. + clearCache := func() { + fieldCache = sync.Map{} + } + + // MissTypes tests the performance of repeated cache misses. + // This measures the time to rebuild a cache of size nt. + for nt := 1; nt <= maxTypes; nt *= 10 { + ts := types[:nt] + b.Run(fmt.Sprintf("MissTypes%d", nt), func(b *testing.B) { + nc := runtime.GOMAXPROCS(0) + for i := 0; i < b.N; i++ { + clearCache() + var wg sync.WaitGroup + for j := 0; j < nc; j++ { + wg.Add(1) + go func(j int) { + for _, t := range ts[(j*len(ts))/nc : ((j+1)*len(ts))/nc] { + cachedTypeFields(t) + } + wg.Done() + }(j) + } + wg.Wait() + } + }) + } + + // HitTypes tests the performance of repeated cache hits. + // This measures the average time of each cache lookup. + for nt := 1; nt <= maxTypes; nt *= 10 { + // Pre-warm a cache of size nt. + clearCache() + for _, t := range types[:nt] { + cachedTypeFields(t) + } + b.Run(fmt.Sprintf("HitTypes%d", nt), func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + cachedTypeFields(types[0]) + } + }) + }) + } +} + +func BenchmarkEncodeMarshaler(b *testing.B) { + b.ReportAllocs() + + m := struct { + A int + B RawMessage + }{} + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + + for pb.Next() { + if err := enc.Encode(&m); err != nil { + b.Fatalf("Encode error: %v", err) + } + } + }) +} + +func BenchmarkEncoderEncode(b *testing.B) { + b.ReportAllocs() + type T struct { + X, Y string + } + v := &T{"foo", "bar"} + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if err := NewEncoder(io.Discard).Encode(v); err != nil { + b.Fatalf("Encode error: %v", err) + } + } + }) +} + +func BenchmarkNumberIsValid(b *testing.B) { + s := "-61657.61667E+61673" + for i := 0; i < b.N; i++ { + isValidNumber(s) + } +} + +func BenchmarkNumberIsValidRegexp(b *testing.B) { + var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) + s := "-61657.61667E+61673" + for i := 0; i < b.N; i++ { + jsonNumberRegexp.MatchString(s) + } +} diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go new file mode 100644 index 0000000..bc1891f --- /dev/null +++ b/src/encoding/json/decode.go @@ -0,0 +1,1292 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Represents JSON data structure using native Go types: booleans, floats, +// strings, arrays, and maps. + +package json + +import ( + "encoding" + "encoding/base64" + "fmt" + "reflect" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" +) + +// Unmarshal parses the JSON-encoded data and stores the result +// in the value pointed to by v. If v is nil or not a pointer, +// Unmarshal returns an [InvalidUnmarshalError]. +// +// Unmarshal uses the inverse of the encodings that +// [Marshal] uses, allocating maps, slices, and pointers as necessary, +// with the following additional rules: +// +// To unmarshal JSON into a pointer, Unmarshal first handles the case of +// the JSON being the JSON literal null. In that case, Unmarshal sets +// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into +// the value pointed at by the pointer. If the pointer is nil, Unmarshal +// allocates a new value for it to point to. +// +// To unmarshal JSON into a value implementing [Unmarshaler], +// Unmarshal calls that value's [Unmarshaler.UnmarshalJSON] method, including +// when the input is a JSON null. +// Otherwise, if the value implements [encoding.TextUnmarshaler] +// and the input is a JSON quoted string, Unmarshal calls +// [encoding.TextUnmarshaler.UnmarshalText] with the unquoted form of the string. +// +// To unmarshal JSON into a struct, Unmarshal matches incoming object +// keys to the keys used by [Marshal] (either the struct field name or its tag), +// preferring an exact match but also accepting a case-insensitive match. By +// default, object keys which don't have a corresponding struct field are +// ignored (see [Decoder.DisallowUnknownFields] for an alternative). +// +// To unmarshal JSON into an interface value, +// Unmarshal stores one of these in the interface value: +// +// - bool, for JSON booleans +// - float64, for JSON numbers +// - string, for JSON strings +// - []interface{}, for JSON arrays +// - map[string]interface{}, for JSON objects +// - nil for JSON null +// +// To unmarshal a JSON array into a slice, Unmarshal resets the slice length +// to zero and then appends each element to the slice. +// As a special case, to unmarshal an empty JSON array into a slice, +// Unmarshal replaces the slice with a new empty slice. +// +// To unmarshal a JSON array into a Go array, Unmarshal decodes +// JSON array elements into corresponding Go array elements. +// If the Go array is smaller than the JSON array, +// the additional JSON array elements are discarded. +// If the JSON array is smaller than the Go array, +// the additional Go array elements are set to zero values. +// +// To unmarshal a JSON object into a map, Unmarshal first establishes a map to +// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal +// reuses the existing map, keeping existing entries. Unmarshal then stores +// key-value pairs from the JSON object into the map. The map's key type must +// either be any string type, an integer, implement [json.Unmarshaler], or +// implement [encoding.TextUnmarshaler]. +// +// If the JSON-encoded data contain a syntax error, Unmarshal returns a [SyntaxError]. +// +// If a JSON value is not appropriate for a given target type, +// or if a JSON number overflows the target type, Unmarshal +// skips that field and completes the unmarshaling as best it can. +// If no more serious errors are encountered, Unmarshal returns +// an [UnmarshalTypeError] describing the earliest such error. In any +// case, it's not guaranteed that all the remaining fields following +// the problematic one will be unmarshaled into the target object. +// +// The JSON null value unmarshals into an interface, map, pointer, or slice +// by setting that Go value to nil. Because null is often used in JSON to mean +// “not present,” unmarshaling a JSON null into any other Go type has no effect +// on the value and produces no error. +// +// When unmarshaling quoted strings, invalid UTF-8 or +// invalid UTF-16 surrogate pairs are not treated as an error. +// Instead, they are replaced by the Unicode replacement +// character U+FFFD. +func Unmarshal(data []byte, v any) error { + // Check for well-formedness. + // Avoids filling out half a data structure + // before discovering a JSON syntax error. + var d decodeState + err := checkValid(data, &d.scan) + if err != nil { + return err + } + + d.init(data) + return d.unmarshal(v) +} + +// Unmarshaler is the interface implemented by types +// that can unmarshal a JSON description of themselves. +// The input can be assumed to be a valid encoding of +// a JSON value. UnmarshalJSON must copy the JSON data +// if it wishes to retain the data after returning. +// +// By convention, to approximate the behavior of [Unmarshal] itself, +// Unmarshalers implement UnmarshalJSON([]byte("null")) as a no-op. +type Unmarshaler interface { + UnmarshalJSON([]byte) error +} + +// An UnmarshalTypeError describes a JSON value that was +// not appropriate for a value of a specific Go type. +type UnmarshalTypeError struct { + Value string // description of JSON value - "bool", "array", "number -5" + Type reflect.Type // type of Go value it could not be assigned to + Offset int64 // error occurred after reading Offset bytes + Struct string // name of the struct type containing the field + Field string // the full path from root node to the field +} + +func (e *UnmarshalTypeError) Error() string { + if e.Struct != "" || e.Field != "" { + return "json: cannot unmarshal " + e.Value + " into Go struct field " + e.Struct + "." + e.Field + " of type " + e.Type.String() + } + return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String() +} + +// An UnmarshalFieldError describes a JSON object key that +// led to an unexported (and therefore unwritable) struct field. +// +// Deprecated: No longer used; kept for compatibility. +type UnmarshalFieldError struct { + Key string + Type reflect.Type + Field reflect.StructField +} + +func (e *UnmarshalFieldError) Error() string { + return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String() +} + +// An InvalidUnmarshalError describes an invalid argument passed to [Unmarshal]. +// (The argument to [Unmarshal] must be a non-nil pointer.) +type InvalidUnmarshalError struct { + Type reflect.Type +} + +func (e *InvalidUnmarshalError) Error() string { + if e.Type == nil { + return "json: Unmarshal(nil)" + } + + if e.Type.Kind() != reflect.Pointer { + return "json: Unmarshal(non-pointer " + e.Type.String() + ")" + } + return "json: Unmarshal(nil " + e.Type.String() + ")" +} + +func (d *decodeState) unmarshal(v any) error { + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Pointer || rv.IsNil() { + return &InvalidUnmarshalError{reflect.TypeOf(v)} + } + + d.scan.reset() + d.scanWhile(scanSkipSpace) + // We decode rv not rv.Elem because the Unmarshaler interface + // test must be applied at the top level of the value. + err := d.value(rv) + if err != nil { + return d.addErrorContext(err) + } + return d.savedError +} + +// A Number represents a JSON number literal. +type Number string + +// String returns the literal text of the number. +func (n Number) String() string { return string(n) } + +// Float64 returns the number as a float64. +func (n Number) Float64() (float64, error) { + return strconv.ParseFloat(string(n), 64) +} + +// Int64 returns the number as an int64. +func (n Number) Int64() (int64, error) { + return strconv.ParseInt(string(n), 10, 64) +} + +// An errorContext provides context for type errors during decoding. +type errorContext struct { + Struct reflect.Type + FieldStack []string +} + +// decodeState represents the state while decoding a JSON value. +type decodeState struct { + data []byte + off int // next read offset in data + opcode int // last read result + scan scanner + errorContext *errorContext + savedError error + useNumber bool + disallowUnknownFields bool +} + +// readIndex returns the position of the last byte read. +func (d *decodeState) readIndex() int { + return d.off - 1 +} + +// phasePanicMsg is used as a panic message when we end up with something that +// shouldn't happen. It can indicate a bug in the JSON decoder, or that +// something is editing the data slice while the decoder executes. +const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" + +func (d *decodeState) init(data []byte) *decodeState { + d.data = data + d.off = 0 + d.savedError = nil + if d.errorContext != nil { + d.errorContext.Struct = nil + // Reuse the allocated space for the FieldStack slice. + d.errorContext.FieldStack = d.errorContext.FieldStack[:0] + } + return d +} + +// saveError saves the first err it is called with, +// for reporting at the end of the unmarshal. +func (d *decodeState) saveError(err error) { + if d.savedError == nil { + d.savedError = d.addErrorContext(err) + } +} + +// addErrorContext returns a new error enhanced with information from d.errorContext +func (d *decodeState) addErrorContext(err error) error { + if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) { + switch err := err.(type) { + case *UnmarshalTypeError: + err.Struct = d.errorContext.Struct.Name() + err.Field = strings.Join(d.errorContext.FieldStack, ".") + } + } + return err +} + +// skip scans to the end of what was started. +func (d *decodeState) skip() { + s, data, i := &d.scan, d.data, d.off + depth := len(s.parseState) + for { + op := s.step(s, data[i]) + i++ + if len(s.parseState) < depth { + d.off = i + d.opcode = op + return + } + } +} + +// scanNext processes the byte at d.data[d.off]. +func (d *decodeState) scanNext() { + if d.off < len(d.data) { + d.opcode = d.scan.step(&d.scan, d.data[d.off]) + d.off++ + } else { + d.opcode = d.scan.eof() + d.off = len(d.data) + 1 // mark processed EOF with len+1 + } +} + +// scanWhile processes bytes in d.data[d.off:] until it +// receives a scan code not equal to op. +func (d *decodeState) scanWhile(op int) { + s, data, i := &d.scan, d.data, d.off + for i < len(data) { + newOp := s.step(s, data[i]) + i++ + if newOp != op { + d.opcode = newOp + d.off = i + return + } + } + + d.off = len(data) + 1 // mark processed EOF with len+1 + d.opcode = d.scan.eof() +} + +// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the +// common case where we're decoding a literal. The decoder scans the input +// twice, once for syntax errors and to check the length of the value, and the +// second to perform the decoding. +// +// Only in the second step do we use decodeState to tokenize literals, so we +// know there aren't any syntax errors. We can take advantage of that knowledge, +// and scan a literal's bytes much more quickly. +func (d *decodeState) rescanLiteral() { + data, i := d.data, d.off +Switch: + switch data[i-1] { + case '"': // string + for ; i < len(data); i++ { + switch data[i] { + case '\\': + i++ // escaped char + case '"': + i++ // tokenize the closing quote too + break Switch + } + } + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number + for ; i < len(data); i++ { + switch data[i] { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '.', 'e', 'E', '+', '-': + default: + break Switch + } + } + case 't': // true + i += len("rue") + case 'f': // false + i += len("alse") + case 'n': // null + i += len("ull") + } + if i < len(data) { + d.opcode = stateEndValue(&d.scan, data[i]) + } else { + d.opcode = scanEnd + } + d.off = i + 1 +} + +// value consumes a JSON value from d.data[d.off-1:], decoding into v, and +// reads the following byte ahead. If v is invalid, the value is discarded. +// The first byte of the value has been read already. +func (d *decodeState) value(v reflect.Value) error { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray: + if v.IsValid() { + if err := d.array(v); err != nil { + return err + } + } else { + d.skip() + } + d.scanNext() + + case scanBeginObject: + if v.IsValid() { + if err := d.object(v); err != nil { + return err + } + } else { + d.skip() + } + d.scanNext() + + case scanBeginLiteral: + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.rescanLiteral() + + if v.IsValid() { + if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil { + return err + } + } + } + return nil +} + +type unquotedValue struct{} + +// valueQuoted is like value but decodes a +// quoted string literal or literal null into an interface value. +// If it finds anything other than a quoted string literal or null, +// valueQuoted returns unquotedValue{}. +func (d *decodeState) valueQuoted() any { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray, scanBeginObject: + d.skip() + d.scanNext() + + case scanBeginLiteral: + v := d.literalInterface() + switch v.(type) { + case nil, string: + return v + } + } + return unquotedValue{} +} + +// indirect walks down v allocating pointers as needed, +// until it gets to a non-pointer. +// If it encounters an Unmarshaler, indirect stops and returns that. +// If decodingNull is true, indirect stops at the first settable pointer so it +// can be set to nil. +func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { + // Issue #24153 indicates that it is generally not a guaranteed property + // that you may round-trip a reflect.Value by calling Value.Addr().Elem() + // and expect the value to still be settable for values derived from + // unexported embedded struct fields. + // + // The logic below effectively does this when it first addresses the value + // (to satisfy possible pointer methods) and continues to dereference + // subsequent pointers as necessary. + // + // After the first round-trip, we set v back to the original value to + // preserve the original RW flags contained in reflect.Value. + v0 := v + haveAddr := false + + // If v is a named type and is addressable, + // start with its address, so that if the type has pointer methods, + // we find them. + if v.Kind() != reflect.Pointer && v.Type().Name() != "" && v.CanAddr() { + haveAddr = true + v = v.Addr() + } + for { + // Load value from interface, but only if the result will be + // usefully addressable. + if v.Kind() == reflect.Interface && !v.IsNil() { + e := v.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Pointer) { + haveAddr = false + v = e + continue + } + } + + if v.Kind() != reflect.Pointer { + break + } + + if decodingNull && v.CanSet() { + break + } + + // Prevent infinite loop if v is an interface pointing to its own address: + // var v interface{} + // v = &v + if v.Elem().Kind() == reflect.Interface && v.Elem().Elem() == v { + v = v.Elem() + break + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + if v.Type().NumMethod() > 0 && v.CanInterface() { + if u, ok := v.Interface().(Unmarshaler); ok { + return u, nil, reflect.Value{} + } + if !decodingNull { + if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { + return nil, u, reflect.Value{} + } + } + } + + if haveAddr { + v = v0 // restore original value after round-trip Value.Addr().Elem() + haveAddr = false + } else { + v = v.Elem() + } + } + return nil, nil, v +} + +// array consumes an array from d.data[d.off-1:], decoding into v. +// The first byte of the array ('[') has been read already. +func (d *decodeState) array(v reflect.Value) error { + // Check for unmarshaler. + u, ut, pv := indirect(v, false) + if u != nil { + start := d.readIndex() + d.skip() + return u.UnmarshalJSON(d.data[start:d.off]) + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + } + v = pv + + // Check type of target. + switch v.Kind() { + case reflect.Interface: + if v.NumMethod() == 0 { + // Decoding into nil interface? Switch to non-reflect code. + ai := d.arrayInterface() + v.Set(reflect.ValueOf(ai)) + return nil + } + // Otherwise it's invalid. + fallthrough + default: + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + case reflect.Array, reflect.Slice: + break + } + + i := 0 + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + // Expand slice length, growing the slice if necessary. + if v.Kind() == reflect.Slice { + if i >= v.Cap() { + v.Grow(1) + } + if i >= v.Len() { + v.SetLen(i + 1) + } + } + + if i < v.Len() { + // Decode into element. + if err := d.value(v.Index(i)); err != nil { + return err + } + } else { + // Ran out of fixed array: skip. + if err := d.value(reflect.Value{}); err != nil { + return err + } + } + i++ + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + + if i < v.Len() { + if v.Kind() == reflect.Array { + for ; i < v.Len(); i++ { + v.Index(i).SetZero() // zero remainder of array + } + } else { + v.SetLen(i) // truncate the slice + } + } + if i == 0 && v.Kind() == reflect.Slice { + v.Set(reflect.MakeSlice(v.Type(), 0, 0)) + } + return nil +} + +var nullLiteral = []byte("null") +var textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() + +// object consumes an object from d.data[d.off-1:], decoding into v. +// The first byte ('{') of the object has been read already. +func (d *decodeState) object(v reflect.Value) error { + // Check for unmarshaler. + u, ut, pv := indirect(v, false) + if u != nil { + start := d.readIndex() + d.skip() + return u.UnmarshalJSON(d.data[start:d.off]) + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + } + v = pv + t := v.Type() + + // Decoding into nil interface? Switch to non-reflect code. + if v.Kind() == reflect.Interface && v.NumMethod() == 0 { + oi := d.objectInterface() + v.Set(reflect.ValueOf(oi)) + return nil + } + + var fields structFields + + // Check type of target: + // struct or + // map[T1]T2 where T1 is string, an integer type, + // or an encoding.TextUnmarshaler + switch v.Kind() { + case reflect.Map: + // Map key must either have string kind, have an integer kind, + // or be an encoding.TextUnmarshaler. + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !reflect.PointerTo(t.Key()).Implements(textUnmarshalerType) { + d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) + d.skip() + return nil + } + } + if v.IsNil() { + v.Set(reflect.MakeMap(t)) + } + case reflect.Struct: + fields = cachedTypeFields(t) + // ok + default: + d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) + d.skip() + return nil + } + + var mapElem reflect.Value + var origErrorContext errorContext + if d.errorContext != nil { + origErrorContext = *d.errorContext + } + + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read key. + start := d.readIndex() + d.rescanLiteral() + item := d.data[start:d.readIndex()] + key, ok := unquoteBytes(item) + if !ok { + panic(phasePanicMsg) + } + + // Figure out field corresponding to key. + var subv reflect.Value + destring := false // whether the value is wrapped in a string to be decoded first + + if v.Kind() == reflect.Map { + elemType := t.Elem() + if !mapElem.IsValid() { + mapElem = reflect.New(elemType).Elem() + } else { + mapElem.SetZero() + } + subv = mapElem + } else { + f := fields.byExactName[string(key)] + if f == nil { + f = fields.byFoldedName[string(foldName(key))] + } + if f != nil { + subv = v + destring = f.quoted + for _, i := range f.index { + if subv.Kind() == reflect.Pointer { + if subv.IsNil() { + // If a struct embeds a pointer to an unexported type, + // it is not possible to set a newly allocated value + // since the field is unexported. + // + // See https://golang.org/issue/21357 + if !subv.CanSet() { + d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem())) + // Invalidate subv to ensure d.value(subv) skips over + // the JSON value without assigning it to subv. + subv = reflect.Value{} + destring = false + break + } + subv.Set(reflect.New(subv.Type().Elem())) + } + subv = subv.Elem() + } + subv = subv.Field(i) + } + if d.errorContext == nil { + d.errorContext = new(errorContext) + } + d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name) + d.errorContext.Struct = t + } else if d.disallowUnknownFields { + d.saveError(fmt.Errorf("json: unknown field %q", key)) + } + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + if destring { + switch qv := d.valueQuoted().(type) { + case nil: + if err := d.literalStore(nullLiteral, subv, false); err != nil { + return err + } + case string: + if err := d.literalStore([]byte(qv), subv, true); err != nil { + return err + } + default: + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) + } + } else { + if err := d.value(subv); err != nil { + return err + } + } + + // Write value back to map; + // if using struct, subv points into struct already. + if v.Kind() == reflect.Map { + kt := t.Key() + var kv reflect.Value + if reflect.PointerTo(kt).Implements(textUnmarshalerType) { + kv = reflect.New(kt) + if err := d.literalStore(item, kv, true); err != nil { + return err + } + kv = kv.Elem() + } else { + switch kt.Kind() { + case reflect.String: + kv = reflect.New(kt).Elem() + kv.SetString(string(key)) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + s := string(key) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil || reflect.Zero(kt).OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + break + } + kv = reflect.New(kt).Elem() + kv.SetInt(n) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + s := string(key) + n, err := strconv.ParseUint(s, 10, 64) + if err != nil || reflect.Zero(kt).OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + break + } + kv = reflect.New(kt).Elem() + kv.SetUint(n) + default: + panic("json: Unexpected key type") // should never occur + } + } + if kv.IsValid() { + v.SetMapIndex(kv, subv) + } + } + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.errorContext != nil { + // Reset errorContext to its original state. + // Keep the same underlying array for FieldStack, to reuse the + // space and avoid unnecessary allocs. + d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)] + d.errorContext.Struct = origErrorContext.Struct + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + return nil +} + +// convertNumber converts the number literal s to a float64 or a Number +// depending on the setting of d.useNumber. +func (d *decodeState) convertNumber(s string) (any, error) { + if d.useNumber { + return Number(s), nil + } + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeFor[float64](), Offset: int64(d.off)} + } + return f, nil +} + +var numberType = reflect.TypeFor[Number]() + +// literalStore decodes a literal stored in item into v. +// +// fromQuoted indicates whether this literal came from unwrapping a +// string from the ",string" struct tag option. this is used only to +// produce more helpful error messages. +func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error { + // Check for unmarshaler. + if len(item) == 0 { + // Empty string given. + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return nil + } + isNull := item[0] == 'n' // null + u, ut, pv := indirect(v, isNull) + if u != nil { + return u.UnmarshalJSON(item) + } + if ut != nil { + if item[0] != '"' { + if fromQuoted { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return nil + } + val := "number" + switch item[0] { + case 'n': + val = "null" + case 't', 'f': + val = "bool" + } + d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) + return nil + } + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + return ut.UnmarshalText(s) + } + + v = pv + + switch c := item[0]; c { + case 'n': // null + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "null" { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice: + v.SetZero() + // otherwise, ignore null for primitives/string + } + case 't', 'f': // true, false + value := item[0] == 't' + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "true" && string(item) != "false" { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + default: + if fromQuoted { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) + } + case reflect.Bool: + v.SetBool(value) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(value)) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) + } + } + + case '"': // string + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + case reflect.Slice: + if v.Type().Elem().Kind() != reflect.Uint8 { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) + n, err := base64.StdEncoding.Decode(b, s) + if err != nil { + d.saveError(err) + break + } + v.SetBytes(b[:n]) + case reflect.String: + if v.Type() == numberType && !isValidNumber(string(s)) { + return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) + } + v.SetString(string(s)) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(string(s))) + } else { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + } + } + + default: // number + if c != '-' && (c < '0' || c > '9') { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + switch v.Kind() { + default: + if v.Kind() == reflect.String && v.Type() == numberType { + // s must be a valid number, because it's + // already been tokenized. + v.SetString(string(item)) + break + } + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) + case reflect.Interface: + n, err := d.convertNumber(string(item)) + if err != nil { + d.saveError(err) + break + } + if v.NumMethod() != 0 { + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.Set(reflect.ValueOf(n)) + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.ParseInt(string(item), 10, 64) + if err != nil || v.OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetInt(n) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + n, err := strconv.ParseUint(string(item), 10, 64) + if err != nil || v.OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetUint(n) + + case reflect.Float32, reflect.Float64: + n, err := strconv.ParseFloat(string(item), v.Type().Bits()) + if err != nil || v.OverflowFloat(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetFloat(n) + } + } + return nil +} + +// The xxxInterface routines build up a value to be stored +// in an empty interface. They are not strictly necessary, +// but they avoid the weight of reflection in this common case. + +// valueInterface is like value but returns interface{} +func (d *decodeState) valueInterface() (val any) { + switch d.opcode { + default: + panic(phasePanicMsg) + case scanBeginArray: + val = d.arrayInterface() + d.scanNext() + case scanBeginObject: + val = d.objectInterface() + d.scanNext() + case scanBeginLiteral: + val = d.literalInterface() + } + return +} + +// arrayInterface is like array but returns []interface{}. +func (d *decodeState) arrayInterface() []any { + var v = make([]any, 0) + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + v = append(v, d.valueInterface()) + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + return v +} + +// objectInterface is like object but returns map[string]interface{}. +func (d *decodeState) objectInterface() map[string]any { + m := make(map[string]any) + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read string key. + start := d.readIndex() + d.rescanLiteral() + item := d.data[start:d.readIndex()] + key, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + // Read value. + m[key] = d.valueInterface() + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + return m +} + +// literalInterface consumes and returns a literal from d.data[d.off-1:] and +// it reads the following byte ahead. The first byte of the literal has been +// read already (that's how the caller knows it's a literal). +func (d *decodeState) literalInterface() any { + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.rescanLiteral() + + item := d.data[start:d.readIndex()] + + switch c := item[0]; c { + case 'n': // null + return nil + + case 't', 'f': // true, false + return c == 't' + + case '"': // string + s, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + return s + + default: // number + if c != '-' && (c < '0' || c > '9') { + panic(phasePanicMsg) + } + n, err := d.convertNumber(string(item)) + if err != nil { + d.saveError(err) + } + return n + } +} + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + +// unquote converts a quoted JSON string literal s into an actual string t. +// The rules are different than for Go, so cannot use strconv.Unquote. +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) + t = string(s) + return +} + +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go new file mode 100644 index 0000000..a10c1e1 --- /dev/null +++ b/src/encoding/json/decode_test.go @@ -0,0 +1,2620 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "image" + "math" + "math/big" + "net" + "reflect" + "strconv" + "strings" + "testing" + "time" +) + +type T struct { + X string + Y int + Z int `json:"-"` +} + +type U struct { + Alphabet string `json:"alpha"` +} + +type V struct { + F1 any + F2 int32 + F3 Number + F4 *VOuter +} + +type VOuter struct { + V V +} + +type W struct { + S SS +} + +type P struct { + PP PP +} + +type PP struct { + T T + Ts []T +} + +type SS string + +func (*SS) UnmarshalJSON(data []byte) error { + return &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[SS]()} +} + +// ifaceNumAsFloat64/ifaceNumAsNumber are used to test unmarshaling with and +// without UseNumber +var ifaceNumAsFloat64 = map[string]any{ + "k1": float64(1), + "k2": "s", + "k3": []any{float64(1), float64(2.0), float64(3e-3)}, + "k4": map[string]any{"kk1": "s", "kk2": float64(2)}, +} + +var ifaceNumAsNumber = map[string]any{ + "k1": Number("1"), + "k2": "s", + "k3": []any{Number("1"), Number("2.0"), Number("3e-3")}, + "k4": map[string]any{"kk1": "s", "kk2": Number("2")}, +} + +type tx struct { + x int +} + +type u8 uint8 + +// A type that can unmarshal itself. + +type unmarshaler struct { + T bool +} + +func (u *unmarshaler) UnmarshalJSON(b []byte) error { + *u = unmarshaler{true} // All we need to see that UnmarshalJSON is called. + return nil +} + +type ustruct struct { + M unmarshaler +} + +type unmarshalerText struct { + A, B string +} + +// needed for re-marshaling tests +func (u unmarshalerText) MarshalText() ([]byte, error) { + return []byte(u.A + ":" + u.B), nil +} + +func (u *unmarshalerText) UnmarshalText(b []byte) error { + pos := bytes.IndexByte(b, ':') + if pos == -1 { + return errors.New("missing separator") + } + u.A, u.B = string(b[:pos]), string(b[pos+1:]) + return nil +} + +var _ encoding.TextUnmarshaler = (*unmarshalerText)(nil) + +type ustructText struct { + M unmarshalerText +} + +// u8marshal is an integer type that can marshal/unmarshal itself. +type u8marshal uint8 + +func (u8 u8marshal) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf("u%d", u8)), nil +} + +var errMissingU8Prefix = errors.New("missing 'u' prefix") + +func (u8 *u8marshal) UnmarshalText(b []byte) error { + if !bytes.HasPrefix(b, []byte{'u'}) { + return errMissingU8Prefix + } + n, err := strconv.Atoi(string(b[1:])) + if err != nil { + return err + } + *u8 = u8marshal(n) + return nil +} + +var _ encoding.TextUnmarshaler = (*u8marshal)(nil) + +var ( + umtrue = unmarshaler{true} + umslice = []unmarshaler{{true}} + umstruct = ustruct{unmarshaler{true}} + + umtrueXY = unmarshalerText{"x", "y"} + umsliceXY = []unmarshalerText{{"x", "y"}} + umstructXY = ustructText{unmarshalerText{"x", "y"}} + + ummapXY = map[unmarshalerText]bool{{"x", "y"}: true} +) + +// Test data structures for anonymous fields. + +type Point struct { + Z int +} + +type Top struct { + Level0 int + Embed0 + *Embed0a + *Embed0b `json:"e,omitempty"` // treated as named + Embed0c `json:"-"` // ignored + Loop + Embed0p // has Point with X, Y, used + Embed0q // has Point with Z, used + embed // contains exported field +} + +type Embed0 struct { + Level1a int // overridden by Embed0a's Level1a with json tag + Level1b int // used because Embed0a's Level1b is renamed + Level1c int // used because Embed0a's Level1c is ignored + Level1d int // annihilated by Embed0a's Level1d + Level1e int `json:"x"` // annihilated by Embed0a.Level1e +} + +type Embed0a struct { + Level1a int `json:"Level1a,omitempty"` + Level1b int `json:"LEVEL1B,omitempty"` + Level1c int `json:"-"` + Level1d int // annihilated by Embed0's Level1d + Level1f int `json:"x"` // annihilated by Embed0's Level1e +} + +type Embed0b Embed0 + +type Embed0c Embed0 + +type Embed0p struct { + image.Point +} + +type Embed0q struct { + Point +} + +type embed struct { + Q int +} + +type Loop struct { + Loop1 int `json:",omitempty"` + Loop2 int `json:",omitempty"` + *Loop +} + +// From reflect test: +// The X in S6 and S7 annihilate, but they also block the X in S8.S9. +type S5 struct { + S6 + S7 + S8 +} + +type S6 struct { + X int +} + +type S7 S6 + +type S8 struct { + S9 +} + +type S9 struct { + X int + Y int +} + +// From reflect test: +// The X in S11.S6 and S12.S6 annihilate, but they also block the X in S13.S8.S9. +type S10 struct { + S11 + S12 + S13 +} + +type S11 struct { + S6 +} + +type S12 struct { + S6 +} + +type S13 struct { + S8 +} + +type Ambig struct { + // Given "hello", the first match should win. + First int `json:"HELLO"` + Second int `json:"Hello"` +} + +type XYZ struct { + X any + Y any + Z any +} + +type unexportedWithMethods struct{} + +func (unexportedWithMethods) F() {} + +type byteWithMarshalJSON byte + +func (b byteWithMarshalJSON) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf(`"Z%.2x"`, byte(b))), nil +} + +func (b *byteWithMarshalJSON) UnmarshalJSON(data []byte) error { + if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[2:4]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = byteWithMarshalJSON(i) + return nil +} + +type byteWithPtrMarshalJSON byte + +func (b *byteWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { + return byteWithMarshalJSON(*b).MarshalJSON() +} + +func (b *byteWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { + return (*byteWithMarshalJSON)(b).UnmarshalJSON(data) +} + +type byteWithMarshalText byte + +func (b byteWithMarshalText) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf(`Z%.2x`, byte(b))), nil +} + +func (b *byteWithMarshalText) UnmarshalText(data []byte) error { + if len(data) != 3 || data[0] != 'Z' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[1:3]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = byteWithMarshalText(i) + return nil +} + +type byteWithPtrMarshalText byte + +func (b *byteWithPtrMarshalText) MarshalText() ([]byte, error) { + return byteWithMarshalText(*b).MarshalText() +} + +func (b *byteWithPtrMarshalText) UnmarshalText(data []byte) error { + return (*byteWithMarshalText)(b).UnmarshalText(data) +} + +type intWithMarshalJSON int + +func (b intWithMarshalJSON) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf(`"Z%.2x"`, int(b))), nil +} + +func (b *intWithMarshalJSON) UnmarshalJSON(data []byte) error { + if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[2:4]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = intWithMarshalJSON(i) + return nil +} + +type intWithPtrMarshalJSON int + +func (b *intWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { + return intWithMarshalJSON(*b).MarshalJSON() +} + +func (b *intWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { + return (*intWithMarshalJSON)(b).UnmarshalJSON(data) +} + +type intWithMarshalText int + +func (b intWithMarshalText) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf(`Z%.2x`, int(b))), nil +} + +func (b *intWithMarshalText) UnmarshalText(data []byte) error { + if len(data) != 3 || data[0] != 'Z' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[1:3]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = intWithMarshalText(i) + return nil +} + +type intWithPtrMarshalText int + +func (b *intWithPtrMarshalText) MarshalText() ([]byte, error) { + return intWithMarshalText(*b).MarshalText() +} + +func (b *intWithPtrMarshalText) UnmarshalText(data []byte) error { + return (*intWithMarshalText)(b).UnmarshalText(data) +} + +type mapStringToStringData struct { + Data map[string]string `json:"data"` +} + +type B struct { + B bool `json:",string"` +} + +type DoublePtr struct { + I **int + J **int +} + +var unmarshalTests = []struct { + CaseName + in string + ptr any // new(type) + out any + err error + useNumber bool + golden bool + disallowUnknownFields bool +}{ + // basic types + {CaseName: Name(""), in: `true`, ptr: new(bool), out: true}, + {CaseName: Name(""), in: `1`, ptr: new(int), out: 1}, + {CaseName: Name(""), in: `1.2`, ptr: new(float64), out: 1.2}, + {CaseName: Name(""), in: `-5`, ptr: new(int16), out: int16(-5)}, + {CaseName: Name(""), in: `2`, ptr: new(Number), out: Number("2"), useNumber: true}, + {CaseName: Name(""), in: `2`, ptr: new(Number), out: Number("2")}, + {CaseName: Name(""), in: `2`, ptr: new(any), out: float64(2.0)}, + {CaseName: Name(""), in: `2`, ptr: new(any), out: Number("2"), useNumber: true}, + {CaseName: Name(""), in: `"a\u1234"`, ptr: new(string), out: "a\u1234"}, + {CaseName: Name(""), in: `"http:\/\/"`, ptr: new(string), out: "http://"}, + {CaseName: Name(""), in: `"g-clef: \uD834\uDD1E"`, ptr: new(string), out: "g-clef: \U0001D11E"}, + {CaseName: Name(""), in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"}, + {CaseName: Name(""), in: "null", ptr: new(any), out: nil}, + {CaseName: Name(""), in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeFor[string](), 7, "T", "X"}}, + {CaseName: Name(""), in: `{"X": 23}`, ptr: new(T), out: T{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[string](), 8, "T", "X"}}, + {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {CaseName: Name(""), in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true}, + {CaseName: Name(""), in: `{"S": 23}`, ptr: new(W), out: W{}, err: &UnmarshalTypeError{"number", reflect.TypeFor[SS](), 0, "W", "S"}}, + {CaseName: Name(""), in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}}, + {CaseName: Name(""), in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true}, + {CaseName: Name(""), in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsFloat64}, + {CaseName: Name(""), in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsNumber, useNumber: true}, + + // raw values with whitespace + {CaseName: Name(""), in: "\n true ", ptr: new(bool), out: true}, + {CaseName: Name(""), in: "\t 1 ", ptr: new(int), out: 1}, + {CaseName: Name(""), in: "\r 1.2 ", ptr: new(float64), out: 1.2}, + {CaseName: Name(""), in: "\t -5 \n", ptr: new(int16), out: int16(-5)}, + {CaseName: Name(""), in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"}, + + // Z has a "-" tag. + {CaseName: Name(""), in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}}, + {CaseName: Name(""), in: `{"Y": 1, "Z": 2}`, ptr: new(T), err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true}, + + {CaseName: Name(""), in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {CaseName: Name(""), in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, + {CaseName: Name(""), in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}}, + {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, + + // syntax errors + {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}}, + {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}}, + {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true}, + {CaseName: Name(""), in: `[2, 3`, err: &SyntaxError{msg: "unexpected end of JSON input", Offset: 5}}, + {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), out: V{F3: Number("-")}, err: &SyntaxError{msg: "invalid character '}' in numeric literal", Offset: 9}}, + + // raw value errors + {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}}, + {CaseName: Name(""), in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}}, + {CaseName: Name(""), in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}}, + {CaseName: Name(""), in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {CaseName: Name(""), in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}}, + + // array tests + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}}, + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}}, + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([5]int), out: [5]int{1, 2, 3, 0, 0}}, + {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new(MustNotUnmarshalJSON), err: errors.New("MustNotUnmarshalJSON was used")}, + + // empty array to interface test + {CaseName: Name(""), in: `[]`, ptr: new([]any), out: []any{}}, + {CaseName: Name(""), in: `null`, ptr: new([]any), out: []any(nil)}, + {CaseName: Name(""), in: `{"T":[]}`, ptr: new(map[string]any), out: map[string]any{"T": []any{}}}, + {CaseName: Name(""), in: `{"T":null}`, ptr: new(map[string]any), out: map[string]any{"T": any(nil)}}, + + // composite tests + {CaseName: Name(""), in: allValueIndent, ptr: new(All), out: allValue}, + {CaseName: Name(""), in: allValueCompact, ptr: new(All), out: allValue}, + {CaseName: Name(""), in: allValueIndent, ptr: new(*All), out: &allValue}, + {CaseName: Name(""), in: allValueCompact, ptr: new(*All), out: &allValue}, + {CaseName: Name(""), in: pallValueIndent, ptr: new(All), out: pallValue}, + {CaseName: Name(""), in: pallValueCompact, ptr: new(All), out: pallValue}, + {CaseName: Name(""), in: pallValueIndent, ptr: new(*All), out: &pallValue}, + {CaseName: Name(""), in: pallValueCompact, ptr: new(*All), out: &pallValue}, + + // unmarshal interface test + {CaseName: Name(""), in: `{"T":false}`, ptr: new(unmarshaler), out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called + {CaseName: Name(""), in: `{"T":false}`, ptr: new(*unmarshaler), out: &umtrue}, + {CaseName: Name(""), in: `[{"T":false}]`, ptr: new([]unmarshaler), out: umslice}, + {CaseName: Name(""), in: `[{"T":false}]`, ptr: new(*[]unmarshaler), out: &umslice}, + {CaseName: Name(""), in: `{"M":{"T":"x:y"}}`, ptr: new(ustruct), out: umstruct}, + + // UnmarshalText interface test + {CaseName: Name(""), in: `"x:y"`, ptr: new(unmarshalerText), out: umtrueXY}, + {CaseName: Name(""), in: `"x:y"`, ptr: new(*unmarshalerText), out: &umtrueXY}, + {CaseName: Name(""), in: `["x:y"]`, ptr: new([]unmarshalerText), out: umsliceXY}, + {CaseName: Name(""), in: `["x:y"]`, ptr: new(*[]unmarshalerText), out: &umsliceXY}, + {CaseName: Name(""), in: `{"M":"x:y"}`, ptr: new(ustructText), out: umstructXY}, + + // integer-keyed map test + { + CaseName: Name(""), + in: `{"-1":"a","0":"b","1":"c"}`, + ptr: new(map[int]string), + out: map[int]string{-1: "a", 0: "b", 1: "c"}, + }, + { + CaseName: Name(""), + in: `{"0":"a","10":"c","9":"b"}`, + ptr: new(map[u8]string), + out: map[u8]string{0: "a", 9: "b", 10: "c"}, + }, + { + CaseName: Name(""), + in: `{"-9223372036854775808":"min","9223372036854775807":"max"}`, + ptr: new(map[int64]string), + out: map[int64]string{math.MinInt64: "min", math.MaxInt64: "max"}, + }, + { + CaseName: Name(""), + in: `{"18446744073709551615":"max"}`, + ptr: new(map[uint64]string), + out: map[uint64]string{math.MaxUint64: "max"}, + }, + { + CaseName: Name(""), + in: `{"0":false,"10":true}`, + ptr: new(map[uintptr]bool), + out: map[uintptr]bool{0: false, 10: true}, + }, + + // Check that MarshalText and UnmarshalText take precedence + // over default integer handling in map keys. + { + CaseName: Name(""), + in: `{"u2":4}`, + ptr: new(map[u8marshal]int), + out: map[u8marshal]int{2: 4}, + }, + { + CaseName: Name(""), + in: `{"2":4}`, + ptr: new(map[u8marshal]int), + err: errMissingU8Prefix, + }, + + // integer-keyed map errors + { + CaseName: Name(""), + in: `{"abc":"abc"}`, + ptr: new(map[int]string), + err: &UnmarshalTypeError{Value: "number abc", Type: reflect.TypeFor[int](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"256":"abc"}`, + ptr: new(map[uint8]string), + err: &UnmarshalTypeError{Value: "number 256", Type: reflect.TypeFor[uint8](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"128":"abc"}`, + ptr: new(map[int8]string), + err: &UnmarshalTypeError{Value: "number 128", Type: reflect.TypeFor[int8](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"-1":"abc"}`, + ptr: new(map[uint8]string), + err: &UnmarshalTypeError{Value: "number -1", Type: reflect.TypeFor[uint8](), Offset: 2}, + }, + { + CaseName: Name(""), + in: `{"F":{"a":2,"3":4}}`, + ptr: new(map[string]map[int]int), + err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeFor[int](), Offset: 7}, + }, + { + CaseName: Name(""), + in: `{"F":{"a":2,"3":4}}`, + ptr: new(map[string]map[uint]int), + err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeFor[uint](), Offset: 7}, + }, + + // Map keys can be encoding.TextUnmarshalers. + {CaseName: Name(""), in: `{"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, + // If multiple values for the same key exists, only the most recent value is used. + {CaseName: Name(""), in: `{"x:y":false,"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, + + { + CaseName: Name(""), + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + out: Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{ + Level1a: 5, + Level1b: 6, + }, + Embed0b: &Embed0b{ + Level1a: 8, + Level1b: 9, + Level1c: 10, + Level1d: 11, + Level1e: 12, + }, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + }, + Embed0p: Embed0p{ + Point: image.Point{X: 15, Y: 16}, + }, + Embed0q: Embed0q{ + Point: Point{Z: 17}, + }, + embed: embed{ + Q: 18, + }, + }, + }, + { + CaseName: Name(""), + in: `{"hello": 1}`, + ptr: new(Ambig), + out: Ambig{First: 1}, + }, + + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S5), + out: S5{S8: S8{S9: S9{Y: 2}}}, + }, + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S5), + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}}, + }, + { + CaseName: Name(""), + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + CaseName: Name(""), + in: `{"I": 0, "I": null, "J": null}`, + ptr: new(DoublePtr), + out: DoublePtr{I: nil, J: nil}, + }, + + // invalid UTF-8 is coerced to valid UTF-8. + { + CaseName: Name(""), + in: "\"hello\xffworld\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\xc2\xc2world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\xc2\xffworld\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\\ud800world\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + CaseName: Name(""), + in: "\"hello\xed\xa0\x80\xed\xb0\x80world\"", + ptr: new(string), + out: "hello\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdworld", + }, + + // Used to be issue 8305, but time.Time implements encoding.TextUnmarshaler so this works now. + { + CaseName: Name(""), + in: `{"2009-11-10T23:00:00Z": "hello world"}`, + ptr: new(map[time.Time]string), + out: map[time.Time]string{time.Date(2009, 11, 10, 23, 0, 0, 0, time.UTC): "hello world"}, + }, + + // issue 8305 + { + CaseName: Name(""), + in: `{"2009-11-10T23:00:00Z": "hello world"}`, + ptr: new(map[Point]string), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[map[Point]string](), Offset: 1}, + }, + { + CaseName: Name(""), + in: `{"asdf": "hello world"}`, + ptr: new(map[unmarshaler]string), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[map[unmarshaler]string](), Offset: 1}, + }, + + // related to issue 13783. + // Go 1.7 changed marshaling a slice of typed byte to use the methods on the byte type, + // similar to marshaling a slice of typed int. + // These tests check that, assuming the byte type also has valid decoding methods, + // either the old base64 string encoding or the new per-element encoding can be + // successfully unmarshaled. The custom unmarshalers were accessible in earlier + // versions of Go, even though the custom marshaler was not. + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithMarshalJSON), + out: []byteWithMarshalJSON{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithMarshalJSON), + out: []byteWithMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithMarshalText), + out: []byteWithMarshalText{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithMarshalText), + out: []byteWithMarshalText{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithPtrMarshalJSON), + out: []byteWithPtrMarshalJSON{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithPtrMarshalJSON), + out: []byteWithPtrMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `"AQID"`, + ptr: new([]byteWithPtrMarshalText), + out: []byteWithPtrMarshalText{1, 2, 3}, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithPtrMarshalText), + out: []byteWithPtrMarshalText{1, 2, 3}, + golden: true, + }, + + // ints work with the marshaler but not the base64 []byte case + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithMarshalJSON), + out: []intWithMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithMarshalText), + out: []intWithMarshalText{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithPtrMarshalJSON), + out: []intWithPtrMarshalJSON{1, 2, 3}, + golden: true, + }, + { + CaseName: Name(""), + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithPtrMarshalText), + out: []intWithPtrMarshalText{1, 2, 3}, + golden: true, + }, + + {CaseName: Name(""), in: `0.000001`, ptr: new(float64), out: 0.000001, golden: true}, + {CaseName: Name(""), in: `1e-7`, ptr: new(float64), out: 1e-7, golden: true}, + {CaseName: Name(""), in: `100000000000000000000`, ptr: new(float64), out: 100000000000000000000.0, golden: true}, + {CaseName: Name(""), in: `1e+21`, ptr: new(float64), out: 1e21, golden: true}, + {CaseName: Name(""), in: `-0.000001`, ptr: new(float64), out: -0.000001, golden: true}, + {CaseName: Name(""), in: `-1e-7`, ptr: new(float64), out: -1e-7, golden: true}, + {CaseName: Name(""), in: `-100000000000000000000`, ptr: new(float64), out: -100000000000000000000.0, golden: true}, + {CaseName: Name(""), in: `-1e+21`, ptr: new(float64), out: -1e21, golden: true}, + {CaseName: Name(""), in: `999999999999999900000`, ptr: new(float64), out: 999999999999999900000.0, golden: true}, + {CaseName: Name(""), in: `9007199254740992`, ptr: new(float64), out: 9007199254740992.0, golden: true}, + {CaseName: Name(""), in: `9007199254740993`, ptr: new(float64), out: 9007199254740992.0, golden: false}, + + { + CaseName: Name(""), + in: `{"V": {"F2": "hello"}}`, + ptr: new(VOuter), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "V", + Field: "V.F2", + Type: reflect.TypeFor[int32](), + Offset: 20, + }, + }, + { + CaseName: Name(""), + in: `{"V": {"F4": {}, "F2": "hello"}}`, + ptr: new(VOuter), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "V", + Field: "V.F2", + Type: reflect.TypeFor[int32](), + Offset: 30, + }, + }, + + // issue 15146. + // invalid inputs in wrongStringTests below. + {CaseName: Name(""), in: `{"B":"true"}`, ptr: new(B), out: B{true}, golden: true}, + {CaseName: Name(""), in: `{"B":"false"}`, ptr: new(B), out: B{false}, golden: true}, + {CaseName: Name(""), in: `{"B": "maybe"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "maybe" into bool`)}, + {CaseName: Name(""), in: `{"B": "tru"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "tru" into bool`)}, + {CaseName: Name(""), in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)}, + {CaseName: Name(""), in: `{"B": "null"}`, ptr: new(B), out: B{false}}, + {CaseName: Name(""), in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)}, + {CaseName: Name(""), in: `{"B": [2, 3]}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal unquoted value into bool`)}, + + // additional tests for disallowUnknownFields + { + CaseName: Name(""), + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18, + "extra": true + }`, + ptr: new(Top), + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + { + CaseName: Name(""), + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12, + "extra": null + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + // issue 26444 + // UnmarshalTypeError without field & struct values + { + CaseName: Name(""), + in: `{"data":{"test1": "bob", "test2": 123}}`, + ptr: new(mapStringToStringData), + err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[string](), Offset: 37, Struct: "mapStringToStringData", Field: "data"}, + }, + { + CaseName: Name(""), + in: `{"data":{"test1": 123, "test2": "bob"}}`, + ptr: new(mapStringToStringData), + err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeFor[string](), Offset: 21, Struct: "mapStringToStringData", Field: "data"}, + }, + + // trying to decode JSON arrays or objects via TextUnmarshaler + { + CaseName: Name(""), + in: `[1, 2, 3]`, + ptr: new(MustNotUnmarshalText), + err: &UnmarshalTypeError{Value: "array", Type: reflect.TypeFor[*MustNotUnmarshalText](), Offset: 1}, + }, + { + CaseName: Name(""), + in: `{"foo": "bar"}`, + ptr: new(MustNotUnmarshalText), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeFor[*MustNotUnmarshalText](), Offset: 1}, + }, + // #22369 + { + CaseName: Name(""), + in: `{"PP": {"T": {"Y": "bad-type"}}}`, + ptr: new(P), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "T", + Field: "PP.T.Y", + Type: reflect.TypeFor[int](), + Offset: 29, + }, + }, + { + CaseName: Name(""), + in: `{"Ts": [{"Y": 1}, {"Y": 2}, {"Y": "bad-type"}]}`, + ptr: new(PP), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "T", + Field: "Ts.Y", + Type: reflect.TypeFor[int](), + Offset: 29, + }, + }, + // #14702 + { + CaseName: Name(""), + in: `invalid`, + ptr: new(Number), + err: &SyntaxError{ + msg: "invalid character 'i' looking for beginning of value", + Offset: 1, + }, + }, + { + CaseName: Name(""), + in: `"invalid"`, + ptr: new(Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + CaseName: Name(""), + in: `{"A":"invalid"}`, + ptr: new(struct{ A Number }), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + CaseName: Name(""), + in: `{"A":"invalid"}`, + ptr: new(struct { + A Number `json:",string"` + }), + err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into json.Number", `invalid`), + }, + { + CaseName: Name(""), + in: `{"A":"invalid"}`, + ptr: new(map[string]Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, +} + +func TestMarshal(t *testing.T) { + b, err := Marshal(allValue) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(b) != allValueCompact { + t.Errorf("Marshal:") + diff(t, b, []byte(allValueCompact)) + return + } + + b, err = Marshal(pallValue) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(b) != pallValueCompact { + t.Errorf("Marshal:") + diff(t, b, []byte(pallValueCompact)) + return + } +} + +func TestMarshalInvalidUTF8(t *testing.T) { + tests := []struct { + CaseName + in string + want string + }{ + {Name(""), "hello\xffworld", `"hello\ufffdworld"`}, + {Name(""), "", `""`}, + {Name(""), "\xff", `"\ufffd"`}, + {Name(""), "\xff\xff", `"\ufffd\ufffd"`}, + {Name(""), "a\xffb", `"a\ufffdb"`}, + {Name(""), "\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"日本\ufffd\ufffd\ufffd"`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + got, err := Marshal(tt.in) + if string(got) != tt.want || err != nil { + t.Errorf("%s: Marshal(%q):\n\tgot: (%q, %v)\n\twant: (%q, nil)", tt.Where, tt.in, got, err, tt.want) + } + }) + } +} + +func TestMarshalNumberZeroVal(t *testing.T) { + var n Number + out, err := Marshal(n) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + got := string(out) + if got != "0" { + t.Fatalf("Marshal: got %s, want 0", got) + } +} + +func TestMarshalEmbeds(t *testing.T) { + top := &Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{ + Level1a: 5, + Level1b: 6, + }, + Embed0b: &Embed0b{ + Level1a: 8, + Level1b: 9, + Level1c: 10, + Level1d: 11, + Level1e: 12, + }, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + }, + Embed0p: Embed0p{ + Point: image.Point{X: 15, Y: 16}, + }, + Embed0q: Embed0q{ + Point: Point{Z: 17}, + }, + embed: embed{ + Q: 18, + }, + } + got, err := Marshal(top) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + want := "{\"Level0\":1,\"Level1b\":2,\"Level1c\":3,\"Level1a\":5,\"LEVEL1B\":6,\"e\":{\"Level1a\":8,\"Level1b\":9,\"Level1c\":10,\"Level1d\":11,\"x\":12},\"Loop1\":13,\"Loop2\":14,\"X\":15,\"Y\":16,\"Z\":17,\"Q\":18}" + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func equalError(a, b error) bool { + if a == nil || b == nil { + return a == nil && b == nil + } + return a.Error() == b.Error() +} + +func TestUnmarshal(t *testing.T) { + for _, tt := range unmarshalTests { + t.Run(tt.Name, func(t *testing.T) { + in := []byte(tt.in) + var scan scanner + if err := checkValid(in, &scan); err != nil { + if !equalError(err, tt.err) { + t.Fatalf("%s: checkValid error: %#v", tt.Where, err) + } + } + if tt.ptr == nil { + return + } + + typ := reflect.TypeOf(tt.ptr) + if typ.Kind() != reflect.Pointer { + t.Fatalf("%s: unmarshalTest.ptr %T is not a pointer type", tt.Where, tt.ptr) + } + typ = typ.Elem() + + // v = new(right-type) + v := reflect.New(typ) + + if !reflect.DeepEqual(tt.ptr, v.Interface()) { + // There's no reason for ptr to point to non-zero data, + // as we decode into new(right-type), so the data is + // discarded. + // This can easily mean tests that silently don't test + // what they should. To test decoding into existing + // data, see TestPrefilled. + t.Fatalf("%s: unmarshalTest.ptr %#v is not a pointer to a zero value", tt.Where, tt.ptr) + } + + dec := NewDecoder(bytes.NewReader(in)) + if tt.useNumber { + dec.UseNumber() + } + if tt.disallowUnknownFields { + dec.DisallowUnknownFields() + } + if err := dec.Decode(v.Interface()); !equalError(err, tt.err) { + t.Fatalf("%s: Decode error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } else if err != nil { + return + } + if got := v.Elem().Interface(); !reflect.DeepEqual(got, tt.out) { + gotJSON, _ := Marshal(got) + wantJSON, _ := Marshal(tt.out) + t.Fatalf("%s: Decode:\n\tgot: %#+v\n\twant: %#+v\n\n\tgotJSON: %s\n\twantJSON: %s", tt.Where, got, tt.out, gotJSON, wantJSON) + } + + // Check round trip also decodes correctly. + if tt.err == nil { + enc, err := Marshal(v.Interface()) + if err != nil { + t.Fatalf("%s: Marshal error after roundtrip: %v", tt.Where, err) + } + if tt.golden && !bytes.Equal(enc, in) { + t.Errorf("%s: Marshal:\n\tgot: %s\n\twant: %s", tt.Where, enc, in) + } + vv := reflect.New(reflect.TypeOf(tt.ptr).Elem()) + dec = NewDecoder(bytes.NewReader(enc)) + if tt.useNumber { + dec.UseNumber() + } + if err := dec.Decode(vv.Interface()); err != nil { + t.Fatalf("%s: Decode(%#q) error after roundtrip: %v", tt.Where, enc, err) + } + if !reflect.DeepEqual(v.Elem().Interface(), vv.Elem().Interface()) { + t.Fatalf("%s: Decode:\n\tgot: %#+v\n\twant: %#+v\n\n\tgotJSON: %s\n\twantJSON: %s", + tt.Where, v.Elem().Interface(), vv.Elem().Interface(), + stripWhitespace(string(enc)), stripWhitespace(string(in))) + } + } + }) + } +} + +func TestUnmarshalMarshal(t *testing.T) { + initBig() + var v any + if err := Unmarshal(jsonBig, &v); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + b, err := Marshal(v) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if !bytes.Equal(jsonBig, b) { + t.Errorf("Marshal:") + diff(t, b, jsonBig) + return + } +} + +// Independent of Decode, basic coverage of the accessors in Number +func TestNumberAccessors(t *testing.T) { + tests := []struct { + CaseName + in string + i int64 + intErr string + f float64 + floatErr string + }{ + {CaseName: Name(""), in: "-1.23e1", intErr: "strconv.ParseInt: parsing \"-1.23e1\": invalid syntax", f: -1.23e1}, + {CaseName: Name(""), in: "-12", i: -12, f: -12.0}, + {CaseName: Name(""), in: "1e1000", intErr: "strconv.ParseInt: parsing \"1e1000\": invalid syntax", floatErr: "strconv.ParseFloat: parsing \"1e1000\": value out of range"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + n := Number(tt.in) + if got := n.String(); got != tt.in { + t.Errorf("%s: Number(%q).String() = %s, want %s", tt.Where, tt.in, got, tt.in) + } + if i, err := n.Int64(); err == nil && tt.intErr == "" && i != tt.i { + t.Errorf("%s: Number(%q).Int64() = %d, want %d", tt.Where, tt.in, i, tt.i) + } else if (err == nil && tt.intErr != "") || (err != nil && err.Error() != tt.intErr) { + t.Errorf("%s: Number(%q).Int64() error:\n\tgot: %v\n\twant: %v", tt.Where, tt.in, err, tt.intErr) + } + if f, err := n.Float64(); err == nil && tt.floatErr == "" && f != tt.f { + t.Errorf("%s: Number(%q).Float64() = %g, want %g", tt.Where, tt.in, f, tt.f) + } else if (err == nil && tt.floatErr != "") || (err != nil && err.Error() != tt.floatErr) { + t.Errorf("%s: Number(%q).Float64() error:\n\tgot %v\n\twant: %v", tt.Where, tt.in, err, tt.floatErr) + } + }) + } +} + +func TestLargeByteSlice(t *testing.T) { + s0 := make([]byte, 2000) + for i := range s0 { + s0[i] = byte(i) + } + b, err := Marshal(s0) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var s1 []byte + if err := Unmarshal(b, &s1); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !bytes.Equal(s0, s1) { + t.Errorf("Marshal:") + diff(t, s0, s1) + } +} + +type Xint struct { + X int +} + +func TestUnmarshalInterface(t *testing.T) { + var xint Xint + var i any = &xint + if err := Unmarshal([]byte(`{"X":1}`), &i); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if xint.X != 1 { + t.Fatalf("xint.X = %d, want 1", xint.X) + } +} + +func TestUnmarshalPtrPtr(t *testing.T) { + var xint Xint + pxint := &xint + if err := Unmarshal([]byte(`{"X":1}`), &pxint); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if xint.X != 1 { + t.Fatalf("xint.X = %d, want 1", xint.X) + } +} + +func TestEscape(t *testing.T) { + const input = `"foobar"<html>` + " [\u2028 \u2029]" + const want = `"\"foobar\"\u003chtml\u003e [\u2028 \u2029]"` + got, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Errorf("Marshal(%#q):\n\tgot: %s\n\twant: %s", input, got, want) + } +} + +// If people misuse the ,string modifier, the error message should be +// helpful, telling the user that they're doing it wrong. +func TestErrorMessageFromMisusedString(t *testing.T) { + // WrongString is a struct that's misusing the ,string modifier. + type WrongString struct { + Message string `json:"result,string"` + } + tests := []struct { + CaseName + in, err string + }{ + {Name(""), `{"result":"x"}`, `json: invalid use of ,string struct tag, trying to unmarshal "x" into string`}, + {Name(""), `{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`}, + {Name(""), `{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`}, + {Name(""), `{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`}, + {Name(""), `{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`}, + {Name(""), `{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + r := strings.NewReader(tt.in) + var s WrongString + err := NewDecoder(r).Decode(&s) + got := fmt.Sprintf("%v", err) + if got != tt.err { + t.Errorf("%s: Decode error:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.err) + } + }) + } +} + +type All struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + + Foo string `json:"bar"` + Foo2 string `json:"bar2,dummyopt"` + + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` + + PBool *bool + PInt *int + PInt8 *int8 + PInt16 *int16 + PInt32 *int32 + PInt64 *int64 + PUint *uint + PUint8 *uint8 + PUint16 *uint16 + PUint32 *uint32 + PUint64 *uint64 + PUintptr *uintptr + PFloat32 *float32 + PFloat64 *float64 + + String string + PString *string + + Map map[string]Small + MapP map[string]*Small + PMap *map[string]Small + PMapP *map[string]*Small + + EmptyMap map[string]Small + NilMap map[string]Small + + Slice []Small + SliceP []*Small + PSlice *[]Small + PSliceP *[]*Small + + EmptySlice []Small + NilSlice []Small + + StringSlice []string + ByteSlice []byte + + Small Small + PSmall *Small + PPSmall **Small + + Interface any + PInterface *any + + unexported int +} + +type Small struct { + Tag string +} + +var allValue = All{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Uintptr: 12, + Float32: 14.1, + Float64: 15.1, + Foo: "foo", + Foo2: "foo2", + IntStr: 42, + UintptrStr: 44, + String: "16", + Map: map[string]Small{ + "17": {Tag: "tag17"}, + "18": {Tag: "tag18"}, + }, + MapP: map[string]*Small{ + "19": {Tag: "tag19"}, + "20": nil, + }, + EmptyMap: map[string]Small{}, + Slice: []Small{{Tag: "tag20"}, {Tag: "tag21"}}, + SliceP: []*Small{{Tag: "tag22"}, nil, {Tag: "tag23"}}, + EmptySlice: []Small{}, + StringSlice: []string{"str24", "str25", "str26"}, + ByteSlice: []byte{27, 28, 29}, + Small: Small{Tag: "tag30"}, + PSmall: &Small{Tag: "tag31"}, + Interface: 5.2, +} + +var pallValue = All{ + PBool: &allValue.Bool, + PInt: &allValue.Int, + PInt8: &allValue.Int8, + PInt16: &allValue.Int16, + PInt32: &allValue.Int32, + PInt64: &allValue.Int64, + PUint: &allValue.Uint, + PUint8: &allValue.Uint8, + PUint16: &allValue.Uint16, + PUint32: &allValue.Uint32, + PUint64: &allValue.Uint64, + PUintptr: &allValue.Uintptr, + PFloat32: &allValue.Float32, + PFloat64: &allValue.Float64, + PString: &allValue.String, + PMap: &allValue.Map, + PMapP: &allValue.MapP, + PSlice: &allValue.Slice, + PSliceP: &allValue.SliceP, + PPSmall: &allValue.PSmall, + PInterface: &allValue.Interface, +} + +var allValueIndent = `{ + "Bool": true, + "Int": 2, + "Int8": 3, + "Int16": 4, + "Int32": 5, + "Int64": 6, + "Uint": 7, + "Uint8": 8, + "Uint16": 9, + "Uint32": 10, + "Uint64": 11, + "Uintptr": 12, + "Float32": 14.1, + "Float64": 15.1, + "bar": "foo", + "bar2": "foo2", + "IntStr": "42", + "UintptrStr": "44", + "PBool": null, + "PInt": null, + "PInt8": null, + "PInt16": null, + "PInt32": null, + "PInt64": null, + "PUint": null, + "PUint8": null, + "PUint16": null, + "PUint32": null, + "PUint64": null, + "PUintptr": null, + "PFloat32": null, + "PFloat64": null, + "String": "16", + "PString": null, + "Map": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "MapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "PMap": null, + "PMapP": null, + "EmptyMap": {}, + "NilMap": null, + "Slice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "SliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "PSlice": null, + "PSliceP": null, + "EmptySlice": [], + "NilSlice": null, + "StringSlice": [ + "str24", + "str25", + "str26" + ], + "ByteSlice": "Gxwd", + "Small": { + "Tag": "tag30" + }, + "PSmall": { + "Tag": "tag31" + }, + "PPSmall": null, + "Interface": 5.2, + "PInterface": null +}` + +var allValueCompact = stripWhitespace(allValueIndent) + +var pallValueIndent = `{ + "Bool": false, + "Int": 0, + "Int8": 0, + "Int16": 0, + "Int32": 0, + "Int64": 0, + "Uint": 0, + "Uint8": 0, + "Uint16": 0, + "Uint32": 0, + "Uint64": 0, + "Uintptr": 0, + "Float32": 0, + "Float64": 0, + "bar": "", + "bar2": "", + "IntStr": "0", + "UintptrStr": "0", + "PBool": true, + "PInt": 2, + "PInt8": 3, + "PInt16": 4, + "PInt32": 5, + "PInt64": 6, + "PUint": 7, + "PUint8": 8, + "PUint16": 9, + "PUint32": 10, + "PUint64": 11, + "PUintptr": 12, + "PFloat32": 14.1, + "PFloat64": 15.1, + "String": "", + "PString": "16", + "Map": null, + "MapP": null, + "PMap": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "PMapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "EmptyMap": null, + "NilMap": null, + "Slice": null, + "SliceP": null, + "PSlice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "PSliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "EmptySlice": null, + "NilSlice": null, + "StringSlice": null, + "ByteSlice": null, + "Small": { + "Tag": "" + }, + "PSmall": null, + "PPSmall": { + "Tag": "tag31" + }, + "Interface": null, + "PInterface": 5.2 +}` + +var pallValueCompact = stripWhitespace(pallValueIndent) + +func TestRefUnmarshal(t *testing.T) { + type S struct { + // Ref is defined in encode_test.go. + R0 Ref + R1 *Ref + R2 RefText + R3 *RefText + } + want := S{ + R0: 12, + R1: new(Ref), + R2: 13, + R3: new(RefText), + } + *want.R1 = 12 + *want.R3 = 13 + + var got S + if err := Unmarshal([]byte(`{"R0":"ref","R1":"ref","R2":"ref","R3":"ref"}`), &got); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Errorf("Unmarsha:\n\tgot: %+v\n\twant: %+v", got, want) + } +} + +// Test that the empty string doesn't panic decoding when ,string is specified +// Issue 3450 +func TestEmptyString(t *testing.T) { + type T2 struct { + Number1 int `json:",string"` + Number2 int `json:",string"` + } + data := `{"Number1":"1", "Number2":""}` + dec := NewDecoder(strings.NewReader(data)) + var got T2 + switch err := dec.Decode(&got); { + case err == nil: + t.Fatalf("Decode error: got nil, want non-nil") + case got.Number1 != 1: + t.Fatalf("Decode: got.Number1 = %d, want 1", got.Number1) + } +} + +// Test that a null for ,string is not replaced with the previous quoted string (issue 7046). +// It should also not be an error (issue 2540, issue 8587). +func TestNullString(t *testing.T) { + type T struct { + A int `json:",string"` + B int `json:",string"` + C *int `json:",string"` + } + data := []byte(`{"A": "1", "B": null, "C": null}`) + var s T + s.B = 1 + s.C = new(int) + *s.C = 2 + switch err := Unmarshal(data, &s); { + case err != nil: + t.Fatalf("Unmarshal error: %v", err) + case s.B != 1: + t.Fatalf("Unmarshal: s.B = %d, want 1", s.B) + case s.C != nil: + t.Fatalf("Unmarshal: s.C = %d, want non-nil", s.C) + } +} + +func intp(x int) *int { + p := new(int) + *p = x + return p +} + +func intpp(x *int) **int { + pp := new(*int) + *pp = x + return pp +} + +func TestInterfaceSet(t *testing.T) { + tests := []struct { + CaseName + pre any + json string + post any + }{ + {Name(""), "foo", `"bar"`, "bar"}, + {Name(""), "foo", `2`, 2.0}, + {Name(""), "foo", `true`, true}, + {Name(""), "foo", `null`, nil}, + + {Name(""), nil, `null`, nil}, + {Name(""), new(int), `null`, nil}, + {Name(""), (*int)(nil), `null`, nil}, + {Name(""), new(*int), `null`, new(*int)}, + {Name(""), (**int)(nil), `null`, nil}, + {Name(""), intp(1), `null`, nil}, + {Name(""), intpp(nil), `null`, intpp(nil)}, + {Name(""), intpp(intp(1)), `null`, intpp(nil)}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + b := struct{ X any }{tt.pre} + blob := `{"X":` + tt.json + `}` + if err := Unmarshal([]byte(blob), &b); err != nil { + t.Fatalf("%s: Unmarshal(%#q) error: %v", tt.Where, blob, err) + } + if !reflect.DeepEqual(b.X, tt.post) { + t.Errorf("%s: Unmarshal(%#q):\n\tpre.X: %#v\n\tgot.X: %#v\n\twant.X: %#v", tt.Where, blob, tt.pre, b.X, tt.post) + } + }) + } +} + +type NullTest struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Float32 float32 + Float64 float64 + String string + PBool *bool + Map map[string]string + Slice []string + Interface any + + PRaw *RawMessage + PTime *time.Time + PBigInt *big.Int + PText *MustNotUnmarshalText + PBuffer *bytes.Buffer // has methods, just not relevant ones + PStruct *struct{} + + Raw RawMessage + Time time.Time + BigInt big.Int + Text MustNotUnmarshalText + Buffer bytes.Buffer + Struct struct{} +} + +// JSON null values should be ignored for primitives and string values instead of resulting in an error. +// Issue 2540 +func TestUnmarshalNulls(t *testing.T) { + // Unmarshal docs: + // The JSON null value unmarshals into an interface, map, pointer, or slice + // by setting that Go value to nil. Because null is often used in JSON to mean + // ``not present,'' unmarshaling a JSON null into any other Go type has no effect + // on the value and produces no error. + + jsonData := []byte(`{ + "Bool" : null, + "Int" : null, + "Int8" : null, + "Int16" : null, + "Int32" : null, + "Int64" : null, + "Uint" : null, + "Uint8" : null, + "Uint16" : null, + "Uint32" : null, + "Uint64" : null, + "Float32" : null, + "Float64" : null, + "String" : null, + "PBool": null, + "Map": null, + "Slice": null, + "Interface": null, + "PRaw": null, + "PTime": null, + "PBigInt": null, + "PText": null, + "PBuffer": null, + "PStruct": null, + "Raw": null, + "Time": null, + "BigInt": null, + "Text": null, + "Buffer": null, + "Struct": null + }`) + nulls := NullTest{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Float32: 12.1, + Float64: 13.1, + String: "14", + PBool: new(bool), + Map: map[string]string{}, + Slice: []string{}, + Interface: new(MustNotUnmarshalJSON), + PRaw: new(RawMessage), + PTime: new(time.Time), + PBigInt: new(big.Int), + PText: new(MustNotUnmarshalText), + PStruct: new(struct{}), + PBuffer: new(bytes.Buffer), + Raw: RawMessage("123"), + Time: time.Unix(123456789, 0), + BigInt: *big.NewInt(123), + } + + before := nulls.Time.String() + + err := Unmarshal(jsonData, &nulls) + if err != nil { + t.Errorf("Unmarshal of null values failed: %v", err) + } + if !nulls.Bool || nulls.Int != 2 || nulls.Int8 != 3 || nulls.Int16 != 4 || nulls.Int32 != 5 || nulls.Int64 != 6 || + nulls.Uint != 7 || nulls.Uint8 != 8 || nulls.Uint16 != 9 || nulls.Uint32 != 10 || nulls.Uint64 != 11 || + nulls.Float32 != 12.1 || nulls.Float64 != 13.1 || nulls.String != "14" { + t.Errorf("Unmarshal of null values affected primitives") + } + + if nulls.PBool != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBool") + } + if nulls.Map != nil { + t.Errorf("Unmarshal of null did not clear nulls.Map") + } + if nulls.Slice != nil { + t.Errorf("Unmarshal of null did not clear nulls.Slice") + } + if nulls.Interface != nil { + t.Errorf("Unmarshal of null did not clear nulls.Interface") + } + if nulls.PRaw != nil { + t.Errorf("Unmarshal of null did not clear nulls.PRaw") + } + if nulls.PTime != nil { + t.Errorf("Unmarshal of null did not clear nulls.PTime") + } + if nulls.PBigInt != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBigInt") + } + if nulls.PText != nil { + t.Errorf("Unmarshal of null did not clear nulls.PText") + } + if nulls.PBuffer != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBuffer") + } + if nulls.PStruct != nil { + t.Errorf("Unmarshal of null did not clear nulls.PStruct") + } + + if string(nulls.Raw) != "null" { + t.Errorf("Unmarshal of RawMessage null did not record null: %v", string(nulls.Raw)) + } + if nulls.Time.String() != before { + t.Errorf("Unmarshal of time.Time null set time to %v", nulls.Time.String()) + } + if nulls.BigInt.String() != "123" { + t.Errorf("Unmarshal of big.Int null set int to %v", nulls.BigInt.String()) + } +} + +type MustNotUnmarshalJSON struct{} + +func (x MustNotUnmarshalJSON) UnmarshalJSON(data []byte) error { + return errors.New("MustNotUnmarshalJSON was used") +} + +type MustNotUnmarshalText struct{} + +func (x MustNotUnmarshalText) UnmarshalText(text []byte) error { + return errors.New("MustNotUnmarshalText was used") +} + +func TestStringKind(t *testing.T) { + type stringKind string + want := map[stringKind]int{"foo": 42} + data, err := Marshal(want) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got map[stringKind]int + err = Unmarshal(data, &got) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want) + } +} + +// Custom types with []byte as underlying type could not be marshaled +// and then unmarshaled. +// Issue 8962. +func TestByteKind(t *testing.T) { + type byteKind []byte + want := byteKind("hello") + data, err := Marshal(want) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got byteKind + err = Unmarshal(data, &got) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want) + } +} + +// The fix for issue 8962 introduced a regression. +// Issue 12921. +func TestSliceOfCustomByte(t *testing.T) { + type Uint8 uint8 + want := []Uint8("hello") + data, err := Marshal(want) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got []Uint8 + err = Unmarshal(data, &got) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("Marshal/Unmarshal mismatch:\n\tgot: %v\n\twant: %v", got, want) + } +} + +func TestUnmarshalTypeError(t *testing.T) { + tests := []struct { + CaseName + dest any + in string + }{ + {Name(""), new(string), `{"user": "name"}`}, // issue 4628. + {Name(""), new(error), `{}`}, // issue 4222 + {Name(""), new(error), `[]`}, + {Name(""), new(error), `""`}, + {Name(""), new(error), `123`}, + {Name(""), new(error), `true`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.in), tt.dest) + if _, ok := err.(*UnmarshalTypeError); !ok { + t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %T\n\twant: %T", + tt.Where, tt.in, tt.dest, err, new(UnmarshalTypeError)) + } + }) + } +} + +func TestUnmarshalSyntax(t *testing.T) { + var x any + tests := []struct { + CaseName + in string + }{ + {Name(""), "tru"}, + {Name(""), "fals"}, + {Name(""), "nul"}, + {Name(""), "123e"}, + {Name(""), `"hello`}, + {Name(""), `[1,2,3`}, + {Name(""), `{"key":1`}, + {Name(""), `{"key":1,`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.in), &x) + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Unmarshal(%#q, any):\n\tgot: %T\n\twant: %T", + tt.Where, tt.in, err, new(SyntaxError)) + } + }) + } +} + +// Test handling of unexported fields that should be ignored. +// Issue 4660 +type unexportedFields struct { + Name string + m map[string]any `json:"-"` + m2 map[string]any `json:"abcd"` + + s []int `json:"-"` +} + +func TestUnmarshalUnexported(t *testing.T) { + input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}` + want := &unexportedFields{Name: "Bob"} + + out := &unexportedFields{} + err := Unmarshal([]byte(input), out) + if err != nil { + t.Errorf("Unmarshal error: %v", err) + } + if !reflect.DeepEqual(out, want) { + t.Errorf("Unmarshal:\n\tgot: %+v\n\twant: %+v", out, want) + } +} + +// Time3339 is a time.Time which encodes to and from JSON +// as an RFC 3339 time in UTC. +type Time3339 time.Time + +func (t *Time3339) UnmarshalJSON(b []byte) error { + if len(b) < 2 || b[0] != '"' || b[len(b)-1] != '"' { + return fmt.Errorf("types: failed to unmarshal non-string value %q as an RFC 3339 time", b) + } + tm, err := time.Parse(time.RFC3339, string(b[1:len(b)-1])) + if err != nil { + return err + } + *t = Time3339(tm) + return nil +} + +func TestUnmarshalJSONLiteralError(t *testing.T) { + var t3 Time3339 + switch err := Unmarshal([]byte(`"0000-00-00T00:00:00Z"`), &t3); { + case err == nil: + t.Fatalf("Unmarshal error: got nil, want non-nil") + case !strings.Contains(err.Error(), "range"): + t.Errorf("Unmarshal error:\n\tgot: %v\n\twant: out of range", err) + } +} + +// Test that extra object elements in an array do not result in a +// "data changing underfoot" error. +// Issue 3717 +func TestSkipArrayObjects(t *testing.T) { + json := `[{}]` + var dest [0]any + + err := Unmarshal([]byte(json), &dest) + if err != nil { + t.Errorf("Unmarshal error: %v", err) + } +} + +// Test semantics of pre-filled data, such as struct fields, map elements, +// slices, and arrays. +// Issues 4900 and 8837, among others. +func TestPrefilled(t *testing.T) { + // Values here change, cannot reuse table across runs. + tests := []struct { + CaseName + in string + ptr any + out any + }{{ + CaseName: Name(""), + in: `{"X": 1, "Y": 2}`, + ptr: &XYZ{X: float32(3), Y: int16(4), Z: 1.5}, + out: &XYZ{X: float64(1), Y: float64(2), Z: 1.5}, + }, { + CaseName: Name(""), + in: `{"X": 1, "Y": 2}`, + ptr: &map[string]any{"X": float32(3), "Y": int16(4), "Z": 1.5}, + out: &map[string]any{"X": float64(1), "Y": float64(2), "Z": 1.5}, + }, { + CaseName: Name(""), + in: `[2]`, + ptr: &[]int{1}, + out: &[]int{2}, + }, { + CaseName: Name(""), + in: `[2, 3]`, + ptr: &[]int{1}, + out: &[]int{2, 3}, + }, { + CaseName: Name(""), + in: `[2, 3]`, + ptr: &[...]int{1}, + out: &[...]int{2}, + }, { + CaseName: Name(""), + in: `[3]`, + ptr: &[...]int{1, 2}, + out: &[...]int{3, 0}, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + ptrstr := fmt.Sprintf("%v", tt.ptr) + err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here + if err != nil { + t.Errorf("%s: Unmarshal error: %v", tt.Where, err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %v\n\twant: %v", tt.Where, tt.in, ptrstr, tt.ptr, tt.out) + } + }) + } +} + +func TestInvalidUnmarshal(t *testing.T) { + buf := []byte(`{"a":"1"}`) + tests := []struct { + CaseName + v any + want string + }{ + {Name(""), nil, "json: Unmarshal(nil)"}, + {Name(""), struct{}{}, "json: Unmarshal(non-pointer struct {})"}, + {Name(""), (*int)(nil), "json: Unmarshal(nil *int)"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal(buf, tt.v) + if err == nil { + t.Fatalf("%s: Unmarshal error: got nil, want non-nil", tt.Where) + } + if got := err.Error(); got != tt.want { + t.Errorf("%s: Unmarshal error:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.want) + } + }) + } +} + +func TestInvalidUnmarshalText(t *testing.T) { + buf := []byte(`123`) + tests := []struct { + CaseName + v any + want string + }{ + {Name(""), nil, "json: Unmarshal(nil)"}, + {Name(""), struct{}{}, "json: Unmarshal(non-pointer struct {})"}, + {Name(""), (*int)(nil), "json: Unmarshal(nil *int)"}, + {Name(""), new(net.IP), "json: cannot unmarshal number into Go value of type *net.IP"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal(buf, tt.v) + if err == nil { + t.Fatalf("%s: Unmarshal error: got nil, want non-nil", tt.Where) + } + if got := err.Error(); got != tt.want { + t.Errorf("%s: Unmarshal error:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.want) + } + }) + } +} + +// Test that string option is ignored for invalid types. +// Issue 9812. +func TestInvalidStringOption(t *testing.T) { + num := 0 + item := struct { + T time.Time `json:",string"` + M map[string]string `json:",string"` + S []string `json:",string"` + A [1]string `json:",string"` + I any `json:",string"` + P *int `json:",string"` + }{M: make(map[string]string), S: make([]string, 0), I: num, P: &num} + + data, err := Marshal(item) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + + err = Unmarshal(data, &item) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } +} + +// Test unmarshal behavior with regards to embedded unexported structs. +// +// (Issue 21357) If the embedded struct is a pointer and is unallocated, +// this returns an error because unmarshal cannot set the field. +// +// (Issue 24152) If the embedded struct is given an explicit name, +// ensure that the normal unmarshal logic does not panic in reflect. +// +// (Issue 28145) If the embedded struct is given an explicit name and has +// exported methods, don't cause a panic trying to get its value. +func TestUnmarshalEmbeddedUnexported(t *testing.T) { + type ( + embed1 struct{ Q int } + embed2 struct{ Q int } + embed3 struct { + Q int64 `json:",string"` + } + S1 struct { + *embed1 + R int + } + S2 struct { + *embed1 + Q int + } + S3 struct { + embed1 + R int + } + S4 struct { + *embed1 + embed2 + } + S5 struct { + *embed3 + R int + } + S6 struct { + embed1 `json:"embed1"` + } + S7 struct { + embed1 `json:"embed1"` + embed2 + } + S8 struct { + embed1 `json:"embed1"` + embed2 `json:"embed2"` + Q int + } + S9 struct { + unexportedWithMethods `json:"embed"` + } + ) + + tests := []struct { + CaseName + in string + ptr any + out any + err error + }{{ + // Error since we cannot set S1.embed1, but still able to set S1.R. + CaseName: Name(""), + in: `{"R":2,"Q":1}`, + ptr: new(S1), + out: &S1{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed1"), + }, { + // The top level Q field takes precedence. + CaseName: Name(""), + in: `{"Q":1}`, + ptr: new(S2), + out: &S2{Q: 1}, + }, { + // No issue with non-pointer variant. + CaseName: Name(""), + in: `{"R":2,"Q":1}`, + ptr: new(S3), + out: &S3{embed1: embed1{Q: 1}, R: 2}, + }, { + // No error since both embedded structs have field R, which annihilate each other. + // Thus, no attempt is made at setting S4.embed1. + CaseName: Name(""), + in: `{"R":2}`, + ptr: new(S4), + out: new(S4), + }, { + // Error since we cannot set S5.embed1, but still able to set S5.R. + CaseName: Name(""), + in: `{"R":2,"Q":1}`, + ptr: new(S5), + out: &S5{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed3"), + }, { + // Issue 24152, ensure decodeState.indirect does not panic. + CaseName: Name(""), + in: `{"embed1": {"Q": 1}}`, + ptr: new(S6), + out: &S6{embed1{1}}, + }, { + // Issue 24153, check that we can still set forwarded fields even in + // the presence of a name conflict. + // + // This relies on obscure behavior of reflect where it is possible + // to set a forwarded exported field on an unexported embedded struct + // even though there is a name conflict, even when it would have been + // impossible to do so according to Go visibility rules. + // Go forbids this because it is ambiguous whether S7.Q refers to + // S7.embed1.Q or S7.embed2.Q. Since embed1 and embed2 are unexported, + // it should be impossible for an external package to set either Q. + // + // It is probably okay for a future reflect change to break this. + CaseName: Name(""), + in: `{"embed1": {"Q": 1}, "Q": 2}`, + ptr: new(S7), + out: &S7{embed1{1}, embed2{2}}, + }, { + // Issue 24153, similar to the S7 case. + CaseName: Name(""), + in: `{"embed1": {"Q": 1}, "embed2": {"Q": 2}, "Q": 3}`, + ptr: new(S8), + out: &S8{embed1{1}, embed2{2}, 3}, + }, { + // Issue 228145, similar to the cases above. + CaseName: Name(""), + in: `{"embed": {}}`, + ptr: new(S9), + out: &S9{}, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.in), tt.ptr) + if !equalError(err, tt.err) { + t.Errorf("%s: Unmarshal error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("%s: Unmarshal:\n\tgot: %#+v\n\twant: %#+v", tt.Where, tt.ptr, tt.out) + } + }) + } +} + +func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { + tests := []struct { + CaseName + in string + err error + }{{ + CaseName: Name(""), + in: `1 false null :`, + err: &SyntaxError{"invalid character ':' looking for beginning of value", 14}, + }, { + CaseName: Name(""), + in: `1 [] [,]`, + err: &SyntaxError{"invalid character ',' looking for beginning of value", 7}, + }, { + CaseName: Name(""), + in: `1 [] [true:]`, + err: &SyntaxError{"invalid character ':' after array element", 11}, + }, { + CaseName: Name(""), + in: `1 {} {"x"=}`, + err: &SyntaxError{"invalid character '=' after object key", 14}, + }, { + CaseName: Name(""), + in: `falsetruenul#`, + err: &SyntaxError{"invalid character '#' in literal null (expecting 'l')", 13}, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + dec := NewDecoder(strings.NewReader(tt.in)) + var err error + for err == nil { + var v any + err = dec.Decode(&v) + } + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("%s: Decode error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } + }) + } +} + +type unmarshalPanic struct{} + +func (unmarshalPanic) UnmarshalJSON([]byte) error { panic(0xdead) } + +func TestUnmarshalPanic(t *testing.T) { + defer func() { + if got := recover(); !reflect.DeepEqual(got, 0xdead) { + t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) + } + }() + Unmarshal([]byte("{}"), &unmarshalPanic{}) + t.Fatalf("Unmarshal should have panicked") +} + +// The decoder used to hang if decoding into an interface pointing to its own address. +// See golang.org/issues/31740. +func TestUnmarshalRecursivePointer(t *testing.T) { + var v any + v = &v + data := []byte(`{"a": "b"}`) + + if err := Unmarshal(data, v); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } +} + +type textUnmarshalerString string + +func (m *textUnmarshalerString) UnmarshalText(text []byte) error { + *m = textUnmarshalerString(strings.ToLower(string(text))) + return nil +} + +// Test unmarshal to a map, where the map key is a user defined type. +// See golang.org/issues/34437. +func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"FOO": "1"}`), &p); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + + if _, ok := p["foo"]; !ok { + t.Errorf(`key "foo" missing in map: %v`, p) + } +} + +func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) { + // See golang.org/issues/38105. + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if _, ok := p["开源"]; !ok { + t.Errorf(`key "开源" missing in map: %v`, p) + } + + // See golang.org/issues/38126. + type T struct { + F1 string `json:"F1,string"` + } + wantT := T{"aaa\tbbb"} + + b, err := Marshal(wantT) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var gotT T + if err := Unmarshal(b, &gotT); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if gotT != wantT { + t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %q\n\twant: %q", gotT, wantT) + } + + // See golang.org/issues/39555. + input := map[textUnmarshalerString]string{"FOO": "", `"`: ""} + + encoded, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + var got map[textUnmarshalerString]string + if err := Unmarshal(encoded, &got); err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + want := map[textUnmarshalerString]string{"foo": "", `"`: ""} + if !reflect.DeepEqual(got, want) { + t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %q\n\twant: %q", gotT, wantT) + } +} + +func TestUnmarshalMaxDepth(t *testing.T) { + tests := []struct { + CaseName + data string + errMaxDepth bool + }{{ + CaseName: Name("ArrayUnderMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`[`, 10000-1) + strings.Repeat(`]`, 10000-1) + `}`, + errMaxDepth: false, + }, { + CaseName: Name("ArrayOverMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`[`, 10000) + strings.Repeat(`]`, 10000) + `}`, + errMaxDepth: true, + }, { + CaseName: Name("ArrayOverStackDepth"), + data: `{"a":` + strings.Repeat(`[`, 3000000) + strings.Repeat(`]`, 3000000) + `}`, + errMaxDepth: true, + }, { + CaseName: Name("ObjectUnderMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`{"a":`, 10000-1) + `0` + strings.Repeat(`}`, 10000-1) + `}`, + errMaxDepth: false, + }, { + CaseName: Name("ObjectOverMaxNestingDepth"), + data: `{"a":` + strings.Repeat(`{"a":`, 10000) + `0` + strings.Repeat(`}`, 10000) + `}`, + errMaxDepth: true, + }, { + CaseName: Name("ObjectOverStackDepth"), + data: `{"a":` + strings.Repeat(`{"a":`, 3000000) + `0` + strings.Repeat(`}`, 3000000) + `}`, + errMaxDepth: true, + }} + + targets := []struct { + CaseName + newValue func() any + }{{ + CaseName: Name("unstructured"), + newValue: func() any { + var v any + return &v + }, + }, { + CaseName: Name("typed named field"), + newValue: func() any { + v := struct { + A any `json:"a"` + }{} + return &v + }, + }, { + CaseName: Name("typed missing field"), + newValue: func() any { + v := struct { + B any `json:"b"` + }{} + return &v + }, + }, { + CaseName: Name("custom unmarshaler"), + newValue: func() any { + v := unmarshaler{} + return &v + }, + }} + + for _, tt := range tests { + for _, target := range targets { + t.Run(target.Name+"-"+tt.Name, func(t *testing.T) { + err := Unmarshal([]byte(tt.data), target.newValue()) + if !tt.errMaxDepth { + if err != nil { + t.Errorf("%s: %s: Unmarshal error: %v", tt.Where, target.Where, err) + } + } else { + if err == nil || !strings.Contains(err.Error(), "exceeded max depth") { + t.Errorf("%s: %s: Unmarshal error:\n\tgot: %v\n\twant: exceeded max depth", tt.Where, target.Where, err) + } + } + }) + } + } +} diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go new file mode 100644 index 0000000..d6f6900 --- /dev/null +++ b/src/encoding/json/encode.go @@ -0,0 +1,1280 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package json implements encoding and decoding of JSON as defined in +// RFC 7159. The mapping between JSON and Go values is described +// in the documentation for the Marshal and Unmarshal functions. +// +// See "JSON and Go" for an introduction to this package: +// https://golang.org/doc/articles/json_and_go.html +package json + +import ( + "bytes" + "encoding" + "encoding/base64" + "fmt" + "math" + "reflect" + "slices" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +// Marshal returns the JSON encoding of v. +// +// Marshal traverses the value v recursively. +// If an encountered value implements [Marshaler] +// and is not a nil pointer, Marshal calls [Marshaler.MarshalJSON] +// to produce JSON. If no [Marshaler.MarshalJSON] method is present but the +// value implements [encoding.TextMarshaler] instead, Marshal calls +// [encoding.TextMarshaler.MarshalText] and encodes the result as a JSON string. +// The nil pointer exception is not strictly necessary +// but mimics a similar, necessary exception in the behavior of +// [Unmarshaler.UnmarshalJSON]. +// +// Otherwise, Marshal uses the following type-dependent default encodings: +// +// Boolean values encode as JSON booleans. +// +// Floating point, integer, and [Number] values encode as JSON numbers. +// NaN and +/-Inf values will return an [UnsupportedValueError]. +// +// String values encode as JSON strings coerced to valid UTF-8, +// replacing invalid bytes with the Unicode replacement rune. +// So that the JSON will be safe to embed inside HTML <script> tags, +// the string is encoded using [HTMLEscape], +// which replaces "<", ">", "&", U+2028, and U+2029 are escaped +// to "\u003c","\u003e", "\u0026", "\u2028", and "\u2029". +// This replacement can be disabled when using an [Encoder], +// by calling [Encoder.SetEscapeHTML](false). +// +// Array and slice values encode as JSON arrays, except that +// []byte encodes as a base64-encoded string, and a nil slice +// encodes as the null JSON value. +// +// Struct values encode as JSON objects. +// Each exported struct field becomes a member of the object, using the +// field name as the object key, unless the field is omitted for one of the +// reasons given below. +// +// The encoding of each struct field can be customized by the format string +// stored under the "json" key in the struct field's tag. +// The format string gives the name of the field, possibly followed by a +// comma-separated list of options. The name may be empty in order to +// specify options without overriding the default field name. +// +// The "omitempty" option specifies that the field should be omitted +// from the encoding if the field has an empty value, defined as +// false, 0, a nil pointer, a nil interface value, and any empty array, +// slice, map, or string. +// +// As a special case, if the field tag is "-", the field is always omitted. +// Note that a field with name "-" can still be generated using the tag "-,". +// +// Examples of struct field tags and their meanings: +// +// // Field appears in JSON as key "myName". +// Field int `json:"myName"` +// +// // Field appears in JSON as key "myName" and +// // the field is omitted from the object if its value is empty, +// // as defined above. +// Field int `json:"myName,omitempty"` +// +// // Field appears in JSON as key "Field" (the default), but +// // the field is skipped if empty. +// // Note the leading comma. +// Field int `json:",omitempty"` +// +// // Field is ignored by this package. +// Field int `json:"-"` +// +// // Field appears in JSON as key "-". +// Field int `json:"-,"` +// +// The "string" option signals that a field is stored as JSON inside a +// JSON-encoded string. It applies only to fields of string, floating point, +// integer, or boolean types. This extra level of encoding is sometimes used +// when communicating with JavaScript programs: +// +// Int64String int64 `json:",string"` +// +// The key name will be used if it's a non-empty string consisting of +// only Unicode letters, digits, and ASCII punctuation except quotation +// marks, backslash, and comma. +// +// Embedded struct fields are usually marshaled as if their inner exported fields +// were fields in the outer struct, subject to the usual Go visibility rules amended +// as described in the next paragraph. +// An anonymous struct field with a name given in its JSON tag is treated as +// having that name, rather than being anonymous. +// An anonymous struct field of interface type is treated the same as having +// that type as its name, rather than being anonymous. +// +// The Go visibility rules for struct fields are amended for JSON when +// deciding which field to marshal or unmarshal. If there are +// multiple fields at the same level, and that level is the least +// nested (and would therefore be the nesting level selected by the +// usual Go rules), the following extra rules apply: +// +// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered, +// even if there are multiple untagged fields that would otherwise conflict. +// +// 2) If there is exactly one field (tagged or not according to the first rule), that is selected. +// +// 3) Otherwise there are multiple fields, and all are ignored; no error occurs. +// +// Handling of anonymous struct fields is new in Go 1.1. +// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of +// an anonymous struct field in both current and earlier versions, give the field +// a JSON tag of "-". +// +// Map values encode as JSON objects. The map's key type must either be a +// string, an integer type, or implement [encoding.TextMarshaler]. The map keys +// are sorted and used as JSON object keys by applying the following rules, +// subject to the UTF-8 coercion described for string values above: +// - keys of any string type are used directly +// - [encoding.TextMarshalers] are marshaled +// - integer keys are converted to strings +// +// Pointer values encode as the value pointed to. +// A nil pointer encodes as the null JSON value. +// +// Interface values encode as the value contained in the interface. +// A nil interface value encodes as the null JSON value. +// +// Channel, complex, and function values cannot be encoded in JSON. +// Attempting to encode such a value causes Marshal to return +// an [UnsupportedTypeError]. +// +// JSON cannot represent cyclic data structures and Marshal does not +// handle them. Passing cyclic structures to Marshal will result in +// an error. +func Marshal(v any) ([]byte, error) { + e := newEncodeState() + defer encodeStatePool.Put(e) + + err := e.marshal(v, encOpts{escapeHTML: true}) + if err != nil { + return nil, err + } + buf := append([]byte(nil), e.Bytes()...) + + return buf, nil +} + +// MarshalIndent is like [Marshal] but applies [Indent] to format the output. +// Each JSON element in the output will begin on a new line beginning with prefix +// followed by one or more copies of indent according to the indentation nesting. +func MarshalIndent(v any, prefix, indent string) ([]byte, error) { + b, err := Marshal(v) + if err != nil { + return nil, err + } + b2 := make([]byte, 0, indentGrowthFactor*len(b)) + b2, err = appendIndent(b2, b, prefix, indent) + if err != nil { + return nil, err + } + return b2, nil +} + +// Marshaler is the interface implemented by types that +// can marshal themselves into valid JSON. +type Marshaler interface { + MarshalJSON() ([]byte, error) +} + +// An UnsupportedTypeError is returned by [Marshal] when attempting +// to encode an unsupported value type. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "json: unsupported type: " + e.Type.String() +} + +// An UnsupportedValueError is returned by [Marshal] when attempting +// to encode an unsupported value. +type UnsupportedValueError struct { + Value reflect.Value + Str string +} + +func (e *UnsupportedValueError) Error() string { + return "json: unsupported value: " + e.Str +} + +// Before Go 1.2, an InvalidUTF8Error was returned by [Marshal] when +// attempting to encode a string value with invalid UTF-8 sequences. +// As of Go 1.2, [Marshal] instead coerces the string to valid UTF-8 by +// replacing invalid bytes with the Unicode replacement rune U+FFFD. +// +// Deprecated: No longer used; kept for compatibility. +type InvalidUTF8Error struct { + S string // the whole string value that caused the error +} + +func (e *InvalidUTF8Error) Error() string { + return "json: invalid UTF-8 in string: " + strconv.Quote(e.S) +} + +// A MarshalerError represents an error from calling a +// [Marshaler.MarshalJSON] or [encoding.TextMarshaler.MarshalText] method. +type MarshalerError struct { + Type reflect.Type + Err error + sourceFunc string +} + +func (e *MarshalerError) Error() string { + srcFunc := e.sourceFunc + if srcFunc == "" { + srcFunc = "MarshalJSON" + } + return "json: error calling " + srcFunc + + " for type " + e.Type.String() + + ": " + e.Err.Error() +} + +// Unwrap returns the underlying error. +func (e *MarshalerError) Unwrap() error { return e.Err } + +const hex = "0123456789abcdef" + +// An encodeState encodes JSON into a bytes.Buffer. +type encodeState struct { + bytes.Buffer // accumulated output + + // Keep track of what pointers we've seen in the current recursive call + // path, to avoid cycles that could lead to a stack overflow. Only do + // the relatively expensive map operations if ptrLevel is larger than + // startDetectingCyclesAfter, so that we skip the work if we're within a + // reasonable amount of nested pointers deep. + ptrLevel uint + ptrSeen map[any]struct{} +} + +const startDetectingCyclesAfter = 1000 + +var encodeStatePool sync.Pool + +func newEncodeState() *encodeState { + if v := encodeStatePool.Get(); v != nil { + e := v.(*encodeState) + e.Reset() + if len(e.ptrSeen) > 0 { + panic("ptrEncoder.encode should have emptied ptrSeen via defers") + } + e.ptrLevel = 0 + return e + } + return &encodeState{ptrSeen: make(map[any]struct{})} +} + +// jsonError is an error wrapper type for internal use only. +// Panics with errors are wrapped in jsonError so that the top-level recover +// can distinguish intentional panics from this package. +type jsonError struct{ error } + +func (e *encodeState) marshal(v any, opts encOpts) (err error) { + defer func() { + if r := recover(); r != nil { + if je, ok := r.(jsonError); ok { + err = je.error + } else { + panic(r) + } + } + }() + e.reflectValue(reflect.ValueOf(v), opts) + return nil +} + +// error aborts the encoding by panicking with err wrapped in jsonError. +func (e *encodeState) error(err error) { + panic(jsonError{err}) +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.Interface, reflect.Pointer: + return v.IsZero() + } + return false +} + +func (e *encodeState) reflectValue(v reflect.Value, opts encOpts) { + valueEncoder(v)(e, v, opts) +} + +type encOpts struct { + // quoted causes primitive fields to be encoded inside JSON strings. + quoted bool + // escapeHTML causes '<', '>', and '&' to be escaped in JSON strings. + escapeHTML bool +} + +type encoderFunc func(e *encodeState, v reflect.Value, opts encOpts) + +var encoderCache sync.Map // map[reflect.Type]encoderFunc + +func valueEncoder(v reflect.Value) encoderFunc { + if !v.IsValid() { + return invalidValueEncoder + } + return typeEncoder(v.Type()) +} + +func typeEncoder(t reflect.Type) encoderFunc { + if fi, ok := encoderCache.Load(t); ok { + return fi.(encoderFunc) + } + + // To deal with recursive types, populate the map with an + // indirect func before we build it. This type waits on the + // real func (f) to be ready and then calls it. This indirect + // func is only used for recursive types. + var ( + wg sync.WaitGroup + f encoderFunc + ) + wg.Add(1) + fi, loaded := encoderCache.LoadOrStore(t, encoderFunc(func(e *encodeState, v reflect.Value, opts encOpts) { + wg.Wait() + f(e, v, opts) + })) + if loaded { + return fi.(encoderFunc) + } + + // Compute the real encoder and replace the indirect func with it. + f = newTypeEncoder(t, true) + wg.Done() + encoderCache.Store(t, f) + return f +} + +var ( + marshalerType = reflect.TypeFor[Marshaler]() + textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() +) + +// newTypeEncoder constructs an encoderFunc for a type. +// The returned encoder only checks CanAddr when allowAddr is true. +func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { + // If we have a non-pointer value whose type implements + // Marshaler with a value receiver, then we're better off taking + // the address of the value - otherwise we end up with an + // allocation as we cast the value to an interface. + if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(marshalerType) { + return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) + } + if t.Implements(marshalerType) { + return marshalerEncoder + } + if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(textMarshalerType) { + return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) + } + if t.Implements(textMarshalerType) { + return textMarshalerEncoder + } + + switch t.Kind() { + case reflect.Bool: + return boolEncoder + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return intEncoder + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return uintEncoder + case reflect.Float32: + return float32Encoder + case reflect.Float64: + return float64Encoder + case reflect.String: + return stringEncoder + case reflect.Interface: + return interfaceEncoder + case reflect.Struct: + return newStructEncoder(t) + case reflect.Map: + return newMapEncoder(t) + case reflect.Slice: + return newSliceEncoder(t) + case reflect.Array: + return newArrayEncoder(t) + case reflect.Pointer: + return newPtrEncoder(t) + default: + return unsupportedTypeEncoder + } +} + +func invalidValueEncoder(e *encodeState, v reflect.Value, _ encOpts) { + e.WriteString("null") +} + +func marshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Kind() == reflect.Pointer && v.IsNil() { + e.WriteString("null") + return + } + m, ok := v.Interface().(Marshaler) + if !ok { + e.WriteString("null") + return + } + b, err := m.MarshalJSON() + if err == nil { + e.Grow(len(b)) + out := e.AvailableBuffer() + out, err = appendCompact(out, b, opts.escapeHTML) + e.Buffer.Write(out) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) + } +} + +func addrMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + va := v.Addr() + if va.IsNil() { + e.WriteString("null") + return + } + m := va.Interface().(Marshaler) + b, err := m.MarshalJSON() + if err == nil { + e.Grow(len(b)) + out := e.AvailableBuffer() + out, err = appendCompact(out, b, opts.escapeHTML) + e.Buffer.Write(out) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) + } +} + +func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Kind() == reflect.Pointer && v.IsNil() { + e.WriteString("null") + return + } + m, ok := v.Interface().(encoding.TextMarshaler) + if !ok { + e.WriteString("null") + return + } + b, err := m.MarshalText() + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalText"}) + } + e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) +} + +func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + va := v.Addr() + if va.IsNil() { + e.WriteString("null") + return + } + m := va.Interface().(encoding.TextMarshaler) + b, err := m.MarshalText() + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalText"}) + } + e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) +} + +func boolEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = strconv.AppendBool(b, v.Bool()) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +func intEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = strconv.AppendInt(b, v.Int(), 10) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +func uintEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = strconv.AppendUint(b, v.Uint(), 10) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +type floatEncoder int // number of bits + +func (bits floatEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + f := v.Float() + if math.IsInf(f, 0) || math.IsNaN(f) { + e.error(&UnsupportedValueError{v, strconv.FormatFloat(f, 'g', -1, int(bits))}) + } + + // Convert as if by ES6 number to string conversion. + // This matches most other JSON generators. + // See golang.org/issue/6384 and golang.org/issue/14135. + // Like fmt %g, but the exponent cutoffs are different + // and exponents themselves are not padded to two digits. + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + abs := math.Abs(f) + fmt := byte('f') + // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. + if abs != 0 { + if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { + fmt = 'e' + } + } + b = strconv.AppendFloat(b, f, fmt, -1, int(bits)) + if fmt == 'e' { + // clean up e-09 to e-9 + n := len(b) + if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' { + b[n-2] = b[n-1] + b = b[:n-1] + } + } + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +var ( + float32Encoder = (floatEncoder(32)).encode + float64Encoder = (floatEncoder(64)).encode +) + +func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Type() == numberType { + numStr := v.String() + // In Go1.5 the empty string encodes to "0", while this is not a valid number literal + // we keep compatibility so check validity after this. + if numStr == "" { + numStr = "0" // Number's zero-val + } + if !isValidNumber(numStr) { + e.error(fmt.Errorf("json: invalid number literal %q", numStr)) + } + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = append(b, numStr...) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) + return + } + if opts.quoted { + b := appendString(nil, v.String(), opts.escapeHTML) + e.Write(appendString(e.AvailableBuffer(), b, false)) // no need to escape again since it is already escaped + } else { + e.Write(appendString(e.AvailableBuffer(), v.String(), opts.escapeHTML)) + } +} + +// isValidNumber reports whether s is a valid JSON number literal. +func isValidNumber(s string) bool { + // This function implements the JSON numbers grammar. + // See https://tools.ietf.org/html/rfc7159#section-6 + // and https://www.json.org/img/number.png + + if s == "" { + return false + } + + // Optional - + if s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + + // Digits + switch { + default: + return false + + case s[0] == '0': + s = s[1:] + + case '1' <= s[0] && s[0] <= '9': + s = s[1:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // . followed by 1 or more digits. + if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { + s = s[2:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // e or E followed by an optional - or + and + // 1 or more digits. + if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { + s = s[1:] + if s[0] == '+' || s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // Make sure we are at the end. + return s == "" +} + +func interfaceEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + e.reflectValue(v.Elem(), opts) +} + +func unsupportedTypeEncoder(e *encodeState, v reflect.Value, _ encOpts) { + e.error(&UnsupportedTypeError{v.Type()}) +} + +type structEncoder struct { + fields structFields +} + +type structFields struct { + list []field + byExactName map[string]*field + byFoldedName map[string]*field +} + +func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + next := byte('{') +FieldLoop: + for i := range se.fields.list { + f := &se.fields.list[i] + + // Find the nested struct field by following f.index. + fv := v + for _, i := range f.index { + if fv.Kind() == reflect.Pointer { + if fv.IsNil() { + continue FieldLoop + } + fv = fv.Elem() + } + fv = fv.Field(i) + } + + if f.omitEmpty && isEmptyValue(fv) { + continue + } + e.WriteByte(next) + next = ',' + if opts.escapeHTML { + e.WriteString(f.nameEscHTML) + } else { + e.WriteString(f.nameNonEsc) + } + opts.quoted = f.quoted + f.encoder(e, fv, opts) + } + if next == '{' { + e.WriteString("{}") + } else { + e.WriteByte('}') + } +} + +func newStructEncoder(t reflect.Type) encoderFunc { + se := structEncoder{fields: cachedTypeFields(t)} + return se.encode +} + +type mapEncoder struct { + elemEnc encoderFunc +} + +func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + ptr := v.UnsafePointer() + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } + e.WriteByte('{') + + // Extract and sort the keys. + var ( + sv = make([]reflectWithString, v.Len()) + mi = v.MapRange() + err error + ) + for i := 0; mi.Next(); i++ { + if sv[i].ks, err = resolveKeyName(mi.Key()); err != nil { + e.error(fmt.Errorf("json: encoding error for type %q: %q", v.Type().String(), err.Error())) + } + sv[i].v = mi.Value() + } + slices.SortFunc(sv, func(i, j reflectWithString) int { + return strings.Compare(i.ks, j.ks) + }) + + for i, kv := range sv { + if i > 0 { + e.WriteByte(',') + } + e.Write(appendString(e.AvailableBuffer(), kv.ks, opts.escapeHTML)) + e.WriteByte(':') + me.elemEnc(e, kv.v, opts) + } + e.WriteByte('}') + e.ptrLevel-- +} + +func newMapEncoder(t reflect.Type) encoderFunc { + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !t.Key().Implements(textMarshalerType) { + return unsupportedTypeEncoder + } + } + me := mapEncoder{typeEncoder(t.Elem())} + return me.encode +} + +func encodeByteSlice(e *encodeState, v reflect.Value, _ encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + + s := v.Bytes() + b := e.AvailableBuffer() + b = append(b, '"') + b = base64.StdEncoding.AppendEncode(b, s) + b = append(b, '"') + e.Write(b) +} + +// sliceEncoder just wraps an arrayEncoder, checking to make sure the value isn't nil. +type sliceEncoder struct { + arrayEnc encoderFunc +} + +func (se sliceEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + // Here we use a struct to memorize the pointer to the first element of the slice + // and its length. + ptr := struct { + ptr interface{} // always an unsafe.Pointer, but avoids a dependency on package unsafe + len int + }{v.UnsafePointer(), v.Len()} + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } + se.arrayEnc(e, v, opts) + e.ptrLevel-- +} + +func newSliceEncoder(t reflect.Type) encoderFunc { + // Byte slices get special treatment; arrays don't. + if t.Elem().Kind() == reflect.Uint8 { + p := reflect.PointerTo(t.Elem()) + if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) { + return encodeByteSlice + } + } + enc := sliceEncoder{newArrayEncoder(t)} + return enc.encode +} + +type arrayEncoder struct { + elemEnc encoderFunc +} + +func (ae arrayEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + e.WriteByte('[') + n := v.Len() + for i := 0; i < n; i++ { + if i > 0 { + e.WriteByte(',') + } + ae.elemEnc(e, v.Index(i), opts) + } + e.WriteByte(']') +} + +func newArrayEncoder(t reflect.Type) encoderFunc { + enc := arrayEncoder{typeEncoder(t.Elem())} + return enc.encode +} + +type ptrEncoder struct { + elemEnc encoderFunc +} + +func (pe ptrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + ptr := v.Interface() + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } + pe.elemEnc(e, v.Elem(), opts) + e.ptrLevel-- +} + +func newPtrEncoder(t reflect.Type) encoderFunc { + enc := ptrEncoder{typeEncoder(t.Elem())} + return enc.encode +} + +type condAddrEncoder struct { + canAddrEnc, elseEnc encoderFunc +} + +func (ce condAddrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.CanAddr() { + ce.canAddrEnc(e, v, opts) + } else { + ce.elseEnc(e, v, opts) + } +} + +// newCondAddrEncoder returns an encoder that checks whether its value +// CanAddr and delegates to canAddrEnc if so, else to elseEnc. +func newCondAddrEncoder(canAddrEnc, elseEnc encoderFunc) encoderFunc { + enc := condAddrEncoder{canAddrEnc: canAddrEnc, elseEnc: elseEnc} + return enc.encode +} + +func isValidTag(s string) bool { + if s == "" { + return false + } + for _, c := range s { + switch { + case strings.ContainsRune("!#$%&()*+-./:;<=>?@[]^_{|}~ ", c): + // Backslash and quote chars are reserved, but + // otherwise any punctuation chars are allowed + // in a tag name. + case !unicode.IsLetter(c) && !unicode.IsDigit(c): + return false + } + } + return true +} + +func typeByIndex(t reflect.Type, index []int) reflect.Type { + for _, i := range index { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + t = t.Field(i).Type + } + return t +} + +type reflectWithString struct { + v reflect.Value + ks string +} + +func resolveKeyName(k reflect.Value) (string, error) { + if k.Kind() == reflect.String { + return k.String(), nil + } + if tm, ok := k.Interface().(encoding.TextMarshaler); ok { + if k.Kind() == reflect.Pointer && k.IsNil() { + return "", nil + } + buf, err := tm.MarshalText() + return string(buf), err + } + switch k.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return strconv.FormatInt(k.Int(), 10), nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return strconv.FormatUint(k.Uint(), 10), nil + } + panic("unexpected map key type") +} + +func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool) []byte { + dst = append(dst, '"') + start := 0 + for i := 0; i < len(src); { + if b := src[i]; b < utf8.RuneSelf { + if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { + i++ + continue + } + dst = append(dst, src[start:i]...) + switch b { + case '\\', '"': + dst = append(dst, '\\', b) + case '\b': + dst = append(dst, '\\', 'b') + case '\f': + dst = append(dst, '\\', 'f') + case '\n': + dst = append(dst, '\\', 'n') + case '\r': + dst = append(dst, '\\', 'r') + case '\t': + dst = append(dst, '\\', 't') + default: + // This encodes bytes < 0x20 except for \b, \f, \n, \r and \t. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF]) + } + i++ + start = i + continue + } + // TODO(https://go.dev/issue/56948): Use generic utf8 functionality. + // For now, cast only a small portion of byte slices to a string + // so that it can be stack allocated. This slows down []byte slightly + // due to the extra copy, but keeps string performance roughly the same. + n := len(src) - i + if n > utf8.UTFMax { + n = utf8.UTFMax + } + c, size := utf8.DecodeRuneInString(string(src[i : i+n])) + if c == utf8.RuneError && size == 1 { + dst = append(dst, src[start:i]...) + dst = append(dst, `\ufffd`...) + i += size + start = i + continue + } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See https://en.wikipedia.org/wiki/JSON#Safety. + if c == '\u2028' || c == '\u2029' { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '2', '0', '2', hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + dst = append(dst, src[start:]...) + dst = append(dst, '"') + return dst +} + +// A field represents a single field found in a struct. +type field struct { + name string + nameBytes []byte // []byte(name) + + nameNonEsc string // `"` + name + `":` + nameEscHTML string // `"` + HTMLEscape(name) + `":` + + tag bool + index []int + typ reflect.Type + omitEmpty bool + quoted bool + + encoder encoderFunc +} + +// byIndex sorts field by index sequence. +type byIndex []field + +func (x byIndex) Len() int { return len(x) } + +func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (x byIndex) Less(i, j int) bool { + for k, xik := range x[i].index { + if k >= len(x[j].index) { + return false + } + if xik != x[j].index[k] { + return xik < x[j].index[k] + } + } + return len(x[i].index) < len(x[j].index) +} + +// typeFields returns a list of fields that JSON should recognize for the given type. +// The algorithm is breadth-first search over the set of structs to include - the top struct +// and then any reachable anonymous structs. +func typeFields(t reflect.Type) structFields { + // Anonymous fields to explore at the current level and the next. + current := []field{} + next := []field{{typ: t}} + + // Count of queued names for current level and the next. + var count, nextCount map[reflect.Type]int + + // Types already visited at an earlier level. + visited := map[reflect.Type]bool{} + + // Fields found. + var fields []field + + // Buffer to run appendHTMLEscape on field names. + var nameEscBuf []byte + + for len(next) > 0 { + current, next = next, current[:0] + count, nextCount = nextCount, map[reflect.Type]int{} + + for _, f := range current { + if visited[f.typ] { + continue + } + visited[f.typ] = true + + // Scan f.typ for fields to include. + for i := 0; i < f.typ.NumField(); i++ { + sf := f.typ.Field(i) + if sf.Anonymous { + t := sf.Type + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if !sf.IsExported() && t.Kind() != reflect.Struct { + // Ignore embedded fields of unexported non-struct types. + continue + } + // Do not ignore embedded fields of unexported struct types + // since they may have exported fields. + } else if !sf.IsExported() { + // Ignore unexported non-embedded fields. + continue + } + tag := sf.Tag.Get("json") + if tag == "-" { + continue + } + name, opts := parseTag(tag) + if !isValidTag(name) { + name = "" + } + index := make([]int, len(f.index)+1) + copy(index, f.index) + index[len(f.index)] = i + + ft := sf.Type + if ft.Name() == "" && ft.Kind() == reflect.Pointer { + // Follow pointer. + ft = ft.Elem() + } + + // Only strings, floats, integers, and booleans can be quoted. + quoted := false + if opts.Contains("string") { + switch ft.Kind() { + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.String: + quoted = true + } + } + + // Record found field and index sequence. + if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { + tagged := name != "" + if name == "" { + name = sf.Name + } + field := field{ + name: name, + tag: tagged, + index: index, + typ: ft, + omitEmpty: opts.Contains("omitempty"), + quoted: quoted, + } + field.nameBytes = []byte(field.name) + + // Build nameEscHTML and nameNonEsc ahead of time. + nameEscBuf = appendHTMLEscape(nameEscBuf[:0], field.nameBytes) + field.nameEscHTML = `"` + string(nameEscBuf) + `":` + field.nameNonEsc = `"` + field.name + `":` + + fields = append(fields, field) + if count[f.typ] > 1 { + // If there were multiple instances, add a second, + // so that the annihilation code will see a duplicate. + // It only cares about the distinction between 1 and 2, + // so don't bother generating any more copies. + fields = append(fields, fields[len(fields)-1]) + } + continue + } + + // Record new anonymous struct to explore in next round. + nextCount[ft]++ + if nextCount[ft] == 1 { + next = append(next, field{name: ft.Name(), index: index, typ: ft}) + } + } + } + } + + sort.Slice(fields, func(i, j int) bool { + x := fields + // sort field by name, breaking ties with depth, then + // breaking ties with "name came from json tag", then + // breaking ties with index sequence. + if x[i].name != x[j].name { + return x[i].name < x[j].name + } + if len(x[i].index) != len(x[j].index) { + return len(x[i].index) < len(x[j].index) + } + if x[i].tag != x[j].tag { + return x[i].tag + } + return byIndex(x).Less(i, j) + }) + + // Delete all fields that are hidden by the Go rules for embedded fields, + // except that fields with JSON tags are promoted. + + // The fields are sorted in primary order of name, secondary order + // of field index length. Loop over names; for each name, delete + // hidden fields by choosing the one dominant field that survives. + out := fields[:0] + for advance, i := 0, 0; i < len(fields); i += advance { + // One iteration per name. + // Find the sequence of fields with the name of this first field. + fi := fields[i] + name := fi.name + for advance = 1; i+advance < len(fields); advance++ { + fj := fields[i+advance] + if fj.name != name { + break + } + } + if advance == 1 { // Only one field with this name + out = append(out, fi) + continue + } + dominant, ok := dominantField(fields[i : i+advance]) + if ok { + out = append(out, dominant) + } + } + + fields = out + sort.Sort(byIndex(fields)) + + for i := range fields { + f := &fields[i] + f.encoder = typeEncoder(typeByIndex(t, f.index)) + } + exactNameIndex := make(map[string]*field, len(fields)) + foldedNameIndex := make(map[string]*field, len(fields)) + for i, field := range fields { + exactNameIndex[field.name] = &fields[i] + // For historical reasons, first folded match takes precedence. + if _, ok := foldedNameIndex[string(foldName(field.nameBytes))]; !ok { + foldedNameIndex[string(foldName(field.nameBytes))] = &fields[i] + } + } + return structFields{fields, exactNameIndex, foldedNameIndex} +} + +// dominantField looks through the fields, all of which are known to +// have the same name, to find the single field that dominates the +// others using Go's embedding rules, modified by the presence of +// JSON tags. If there are multiple top-level fields, the boolean +// will be false: This condition is an error in Go and we skip all +// the fields. +func dominantField(fields []field) (field, bool) { + // The fields are sorted in increasing index-length order, then by presence of tag. + // That means that the first field is the dominant one. We need only check + // for error cases: two fields at top level, either both tagged or neither tagged. + if len(fields) > 1 && len(fields[0].index) == len(fields[1].index) && fields[0].tag == fields[1].tag { + return field{}, false + } + return fields[0], true +} + +var fieldCache sync.Map // map[reflect.Type]structFields + +// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. +func cachedTypeFields(t reflect.Type) structFields { + if f, ok := fieldCache.Load(t); ok { + return f.(structFields) + } + f, _ := fieldCache.LoadOrStore(t, typeFields(t)) + return f.(structFields) +} + +func mayAppendQuote(b []byte, quoted bool) []byte { + if quoted { + b = append(b, '"') + } + return b +} diff --git a/src/encoding/json/encode_test.go b/src/encoding/json/encode_test.go new file mode 100644 index 0000000..53259f4 --- /dev/null +++ b/src/encoding/json/encode_test.go @@ -0,0 +1,1221 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "encoding" + "fmt" + "log" + "math" + "reflect" + "regexp" + "runtime/debug" + "strconv" + "testing" +) + +type Optionals struct { + Sr string `json:"sr"` + So string `json:"so,omitempty"` + Sw string `json:"-"` + + Ir int `json:"omitempty"` // actually named omitempty, not an option + Io int `json:"io,omitempty"` + + Slr []string `json:"slr,random"` + Slo []string `json:"slo,omitempty"` + + Mr map[string]any `json:"mr"` + Mo map[string]any `json:",omitempty"` + + Fr float64 `json:"fr"` + Fo float64 `json:"fo,omitempty"` + + Br bool `json:"br"` + Bo bool `json:"bo,omitempty"` + + Ur uint `json:"ur"` + Uo uint `json:"uo,omitempty"` + + Str struct{} `json:"str"` + Sto struct{} `json:"sto,omitempty"` +} + +func TestOmitEmpty(t *testing.T) { + var want = `{ + "sr": "", + "omitempty": 0, + "slr": null, + "mr": {}, + "fr": 0, + "br": false, + "ur": 0, + "str": {}, + "sto": {} +}` + var o Optionals + o.Sw = "something" + o.Mr = map[string]any{} + o.Mo = map[string]any{} + + got, err := MarshalIndent(&o, "", " ") + if err != nil { + t.Fatalf("MarshalIndent error: %v", err) + } + if got := string(got); got != want { + t.Errorf("MarshalIndent:\n\tgot: %s\n\twant: %s\n", indentNewlines(got), indentNewlines(want)) + } +} + +type StringTag struct { + BoolStr bool `json:",string"` + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` + StrStr string `json:",string"` + NumberStr Number `json:",string"` +} + +func TestRoundtripStringTag(t *testing.T) { + tests := []struct { + CaseName + in StringTag + want string // empty to just test that we roundtrip + }{{ + CaseName: Name("AllTypes"), + in: StringTag{ + BoolStr: true, + IntStr: 42, + UintptrStr: 44, + StrStr: "xzbit", + NumberStr: "46", + }, + want: `{ + "BoolStr": "true", + "IntStr": "42", + "UintptrStr": "44", + "StrStr": "\"xzbit\"", + "NumberStr": "46" +}`, + }, { + // See golang.org/issues/38173. + CaseName: Name("StringDoubleEscapes"), + in: StringTag{ + StrStr: "\b\f\n\r\t\"\\", + NumberStr: "0", // just to satisfy the roundtrip + }, + want: `{ + "BoolStr": "false", + "IntStr": "0", + "UintptrStr": "0", + "StrStr": "\"\\b\\f\\n\\r\\t\\\"\\\\\"", + "NumberStr": "0" +}`, + }} + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + got, err := MarshalIndent(&tt.in, "", "\t") + if err != nil { + t.Fatalf("%s: MarshalIndent error: %v", tt.Where, err) + } + if got := string(got); got != tt.want { + t.Fatalf("%s: MarshalIndent:\n\tgot: %s\n\twant: %s", tt.Where, stripWhitespace(got), stripWhitespace(tt.want)) + } + + // Verify that it round-trips. + var s2 StringTag + if err := Unmarshal(got, &s2); err != nil { + t.Fatalf("%s: Decode error: %v", tt.Where, err) + } + if !reflect.DeepEqual(s2, tt.in) { + t.Fatalf("%s: Decode:\n\tinput: %s\n\tgot: %#v\n\twant: %#v", tt.Where, indentNewlines(string(got)), s2, tt.in) + } + }) + } +} + +// byte slices are special even if they're renamed types. +type renamedByte byte +type renamedByteSlice []byte +type renamedRenamedByteSlice []renamedByte + +func TestEncodeRenamedByteSlice(t *testing.T) { + s := renamedByteSlice("abc") + got, err := Marshal(s) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + want := `"YWJj"` + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } + r := renamedRenamedByteSlice("abc") + got, err = Marshal(r) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +type SamePointerNoCycle struct { + Ptr1, Ptr2 *SamePointerNoCycle +} + +var samePointerNoCycle = &SamePointerNoCycle{} + +type PointerCycle struct { + Ptr *PointerCycle +} + +var pointerCycle = &PointerCycle{} + +type PointerCycleIndirect struct { + Ptrs []any +} + +type RecursiveSlice []RecursiveSlice + +var ( + pointerCycleIndirect = &PointerCycleIndirect{} + mapCycle = make(map[string]any) + sliceCycle = []any{nil} + sliceNoCycle = []any{nil, nil} + recursiveSliceCycle = []RecursiveSlice{nil} +) + +func init() { + ptr := &SamePointerNoCycle{} + samePointerNoCycle.Ptr1 = ptr + samePointerNoCycle.Ptr2 = ptr + + pointerCycle.Ptr = pointerCycle + pointerCycleIndirect.Ptrs = []any{pointerCycleIndirect} + + mapCycle["x"] = mapCycle + sliceCycle[0] = sliceCycle + sliceNoCycle[1] = sliceNoCycle[:1] + for i := startDetectingCyclesAfter; i > 0; i-- { + sliceNoCycle = []any{sliceNoCycle} + } + recursiveSliceCycle[0] = recursiveSliceCycle +} + +func TestSamePointerNoCycle(t *testing.T) { + if _, err := Marshal(samePointerNoCycle); err != nil { + t.Fatalf("Marshal error: %v", err) + } +} + +func TestSliceNoCycle(t *testing.T) { + if _, err := Marshal(sliceNoCycle); err != nil { + t.Fatalf("Marshal error: %v", err) + } +} + +func TestUnsupportedValues(t *testing.T) { + tests := []struct { + CaseName + in any + }{ + {Name(""), math.NaN()}, + {Name(""), math.Inf(-1)}, + {Name(""), math.Inf(1)}, + {Name(""), pointerCycle}, + {Name(""), pointerCycleIndirect}, + {Name(""), mapCycle}, + {Name(""), sliceCycle}, + {Name(""), recursiveSliceCycle}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + if _, err := Marshal(tt.in); err != nil { + if _, ok := err.(*UnsupportedValueError); !ok { + t.Errorf("%s: Marshal error:\n\tgot: %T\n\twant: %T", tt.Where, err, new(UnsupportedValueError)) + } + } else { + t.Errorf("%s: Marshal error: got nil, want non-nil", tt.Where) + } + }) + } +} + +// Issue 43207 +func TestMarshalTextFloatMap(t *testing.T) { + m := map[textfloat]string{ + textfloat(math.NaN()): "1", + textfloat(math.NaN()): "1", + } + got, err := Marshal(m) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + want := `{"TF:NaN":"1","TF:NaN":"1"}` + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +// Ref has Marshaler and Unmarshaler methods with pointer receiver. +type Ref int + +func (*Ref) MarshalJSON() ([]byte, error) { + return []byte(`"ref"`), nil +} + +func (r *Ref) UnmarshalJSON([]byte) error { + *r = 12 + return nil +} + +// Val has Marshaler methods with value receiver. +type Val int + +func (Val) MarshalJSON() ([]byte, error) { + return []byte(`"val"`), nil +} + +// RefText has Marshaler and Unmarshaler methods with pointer receiver. +type RefText int + +func (*RefText) MarshalText() ([]byte, error) { + return []byte(`"ref"`), nil +} + +func (r *RefText) UnmarshalText([]byte) error { + *r = 13 + return nil +} + +// ValText has Marshaler methods with value receiver. +type ValText int + +func (ValText) MarshalText() ([]byte, error) { + return []byte(`"val"`), nil +} + +func TestRefValMarshal(t *testing.T) { + var s = struct { + R0 Ref + R1 *Ref + R2 RefText + R3 *RefText + V0 Val + V1 *Val + V2 ValText + V3 *ValText + }{ + R0: 12, + R1: new(Ref), + R2: 14, + R3: new(RefText), + V0: 13, + V1: new(Val), + V2: 15, + V3: new(ValText), + } + const want = `{"R0":"ref","R1":"ref","R2":"\"ref\"","R3":"\"ref\"","V0":"val","V1":"val","V2":"\"val\"","V3":"\"val\""}` + b, err := Marshal(&s) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if got := string(b); got != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +// C implements Marshaler and returns unescaped JSON. +type C int + +func (C) MarshalJSON() ([]byte, error) { + return []byte(`"<&>"`), nil +} + +// CText implements Marshaler and returns unescaped text. +type CText int + +func (CText) MarshalText() ([]byte, error) { + return []byte(`"<&>"`), nil +} + +func TestMarshalerEscaping(t *testing.T) { + var c C + want := `"\u003c\u0026\u003e"` + b, err := Marshal(c) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if got := string(b); got != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } + + var ct CText + want = `"\"\u003c\u0026\u003e\""` + b, err = Marshal(ct) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if got := string(b); got != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestAnonymousFields(t *testing.T) { + tests := []struct { + CaseName + makeInput func() any // Function to create input value + want string // Expected JSON output + }{{ + // Both S1 and S2 have a field named X. From the perspective of S, + // it is ambiguous which one X refers to. + // This should not serialize either field. + CaseName: Name("AmbiguousField"), + makeInput: func() any { + type ( + S1 struct{ x, X int } + S2 struct{ x, X int } + S struct { + S1 + S2 + } + ) + return S{S1{1, 2}, S2{3, 4}} + }, + want: `{}`, + }, { + CaseName: Name("DominantField"), + // Both S1 and S2 have a field named X, but since S has an X field as + // well, it takes precedence over S1.X and S2.X. + makeInput: func() any { + type ( + S1 struct{ x, X int } + S2 struct{ x, X int } + S struct { + S1 + S2 + x, X int + } + ) + return S{S1{1, 2}, S2{3, 4}, 5, 6} + }, + want: `{"X":6}`, + }, { + // Unexported embedded field of non-struct type should not be serialized. + CaseName: Name("UnexportedEmbeddedInt"), + makeInput: func() any { + type ( + myInt int + S struct{ myInt } + ) + return S{5} + }, + want: `{}`, + }, { + // Exported embedded field of non-struct type should be serialized. + CaseName: Name("ExportedEmbeddedInt"), + makeInput: func() any { + type ( + MyInt int + S struct{ MyInt } + ) + return S{5} + }, + want: `{"MyInt":5}`, + }, { + // Unexported embedded field of pointer to non-struct type + // should not be serialized. + CaseName: Name("UnexportedEmbeddedIntPointer"), + makeInput: func() any { + type ( + myInt int + S struct{ *myInt } + ) + s := S{new(myInt)} + *s.myInt = 5 + return s + }, + want: `{}`, + }, { + // Exported embedded field of pointer to non-struct type + // should be serialized. + CaseName: Name("ExportedEmbeddedIntPointer"), + makeInput: func() any { + type ( + MyInt int + S struct{ *MyInt } + ) + s := S{new(MyInt)} + *s.MyInt = 5 + return s + }, + want: `{"MyInt":5}`, + }, { + // Exported fields of embedded structs should have their + // exported fields be serialized regardless of whether the struct types + // themselves are exported. + CaseName: Name("EmbeddedStruct"), + makeInput: func() any { + type ( + s1 struct{ x, X int } + S2 struct{ y, Y int } + S struct { + s1 + S2 + } + ) + return S{s1{1, 2}, S2{3, 4}} + }, + want: `{"X":2,"Y":4}`, + }, { + // Exported fields of pointers to embedded structs should have their + // exported fields be serialized regardless of whether the struct types + // themselves are exported. + CaseName: Name("EmbeddedStructPointer"), + makeInput: func() any { + type ( + s1 struct{ x, X int } + S2 struct{ y, Y int } + S struct { + *s1 + *S2 + } + ) + return S{&s1{1, 2}, &S2{3, 4}} + }, + want: `{"X":2,"Y":4}`, + }, { + // Exported fields on embedded unexported structs at multiple levels + // of nesting should still be serialized. + CaseName: Name("NestedStructAndInts"), + makeInput: func() any { + type ( + MyInt1 int + MyInt2 int + myInt int + s2 struct { + MyInt2 + myInt + } + s1 struct { + MyInt1 + myInt + s2 + } + S struct { + s1 + myInt + } + ) + return S{s1{1, 2, s2{3, 4}}, 6} + }, + want: `{"MyInt1":1,"MyInt2":3}`, + }, { + // If an anonymous struct pointer field is nil, we should ignore + // the embedded fields behind it. Not properly doing so may + // result in the wrong output or reflect panics. + CaseName: Name("EmbeddedFieldBehindNilPointer"), + makeInput: func() any { + type ( + S2 struct{ Field string } + S struct{ *S2 } + ) + return S{} + }, + want: `{}`, + }} + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + b, err := Marshal(tt.makeInput()) + if err != nil { + t.Fatalf("%s: Marshal error: %v", tt.Where, err) + } + if string(b) != tt.want { + t.Fatalf("%s: Marshal:\n\tgot: %s\n\twant: %s", tt.Where, b, tt.want) + } + }) + } +} + +type BugA struct { + S string +} + +type BugB struct { + BugA + S string +} + +type BugC struct { + S string +} + +// Legal Go: We never use the repeated embedded field (S). +type BugX struct { + A int + BugA + BugB +} + +// golang.org/issue/16042. +// Even if a nil interface value is passed in, as long as +// it implements Marshaler, it should be marshaled. +type nilJSONMarshaler string + +func (nm *nilJSONMarshaler) MarshalJSON() ([]byte, error) { + if nm == nil { + return Marshal("0zenil0") + } + return Marshal("zenil:" + string(*nm)) +} + +// golang.org/issue/34235. +// Even if a nil interface value is passed in, as long as +// it implements encoding.TextMarshaler, it should be marshaled. +type nilTextMarshaler string + +func (nm *nilTextMarshaler) MarshalText() ([]byte, error) { + if nm == nil { + return []byte("0zenil0"), nil + } + return []byte("zenil:" + string(*nm)), nil +} + +// See golang.org/issue/16042 and golang.org/issue/34235. +func TestNilMarshal(t *testing.T) { + tests := []struct { + CaseName + in any + want string + }{ + {Name(""), nil, `null`}, + {Name(""), new(float64), `0`}, + {Name(""), []any(nil), `null`}, + {Name(""), []string(nil), `null`}, + {Name(""), map[string]string(nil), `null`}, + {Name(""), []byte(nil), `null`}, + {Name(""), struct{ M string }{"gopher"}, `{"M":"gopher"}`}, + {Name(""), struct{ M Marshaler }{}, `{"M":null}`}, + {Name(""), struct{ M Marshaler }{(*nilJSONMarshaler)(nil)}, `{"M":"0zenil0"}`}, + {Name(""), struct{ M any }{(*nilJSONMarshaler)(nil)}, `{"M":null}`}, + {Name(""), struct{ M encoding.TextMarshaler }{}, `{"M":null}`}, + {Name(""), struct{ M encoding.TextMarshaler }{(*nilTextMarshaler)(nil)}, `{"M":"0zenil0"}`}, + {Name(""), struct{ M any }{(*nilTextMarshaler)(nil)}, `{"M":null}`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + switch got, err := Marshal(tt.in); { + case err != nil: + t.Fatalf("%s: Marshal error: %v", tt.Where, err) + case string(got) != tt.want: + t.Fatalf("%s: Marshal:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.want) + } + }) + } +} + +// Issue 5245. +func TestEmbeddedBug(t *testing.T) { + v := BugB{ + BugA{"A"}, + "B", + } + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal error:", err) + } + want := `{"S":"B"}` + got := string(b) + if got != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } + // Now check that the duplicate field, S, does not appear. + x := BugX{ + A: 23, + } + b, err = Marshal(x) + if err != nil { + t.Fatal("Marshal error:", err) + } + want = `{"A":23}` + got = string(b) + if got != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +type BugD struct { // Same as BugA after tagging. + XXX string `json:"S"` +} + +// BugD's tagged S field should dominate BugA's. +type BugY struct { + BugA + BugD +} + +// Test that a field with a tag dominates untagged fields. +func TestTaggedFieldDominates(t *testing.T) { + v := BugY{ + BugA{"BugA"}, + BugD{"BugD"}, + } + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal error:", err) + } + want := `{"S":"BugD"}` + got := string(b) + if got != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +// There are no tags here, so S should not appear. +type BugZ struct { + BugA + BugC + BugY // Contains a tagged S field through BugD; should not dominate. +} + +func TestDuplicatedFieldDisappears(t *testing.T) { + v := BugZ{ + BugA{"BugA"}, + BugC{"BugC"}, + BugY{ + BugA{"nested BugA"}, + BugD{"nested BugD"}, + }, + } + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal error:", err) + } + want := `{}` + got := string(b) + if got != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestIssue10281(t *testing.T) { + type Foo struct { + N Number + } + x := Foo{Number(`invalid`)} + + if _, err := Marshal(&x); err == nil { + t.Fatalf("Marshal error: got nil, want non-nil") + } +} + +func TestMarshalErrorAndReuseEncodeState(t *testing.T) { + // Disable the GC temporarily to prevent encodeState's in Pool being cleaned away during the test. + percent := debug.SetGCPercent(-1) + defer debug.SetGCPercent(percent) + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + if _, err := Marshal(dummy); err == nil { + t.Errorf("Marshal error: got nil, want non-nil") + } + + type Data struct { + A string + I int + } + want := Data{A: "a", I: 1} + b, err := Marshal(want) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + + var got Data + if err := Unmarshal(b, &got); err != nil { + t.Errorf("Unmarshal error: %v", err) + } + if got != want { + t.Errorf("Unmarshal:\n\tgot: %v\n\twant: %v", got, want) + } +} + +func TestHTMLEscape(t *testing.T) { + var b, want bytes.Buffer + m := `{"M":"<html>foo &` + "\xe2\x80\xa8 \xe2\x80\xa9" + `</html>"}` + want.Write([]byte(`{"M":"\u003chtml\u003efoo \u0026\u2028 \u2029\u003c/html\u003e"}`)) + HTMLEscape(&b, []byte(m)) + if !bytes.Equal(b.Bytes(), want.Bytes()) { + t.Errorf("HTMLEscape:\n\tgot: %s\n\twant: %s", b.Bytes(), want.Bytes()) + } +} + +// golang.org/issue/8582 +func TestEncodePointerString(t *testing.T) { + type stringPointer struct { + N *int64 `json:"n,string"` + } + var n int64 = 42 + b, err := Marshal(stringPointer{N: &n}) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if got, want := string(b), `{"n":"42"}`; got != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } + var back stringPointer + switch err = Unmarshal(b, &back); { + case err != nil: + t.Fatalf("Unmarshal error: %v", err) + case back.N == nil: + t.Fatalf("Unmarshal: back.N = nil, want non-nil") + case *back.N != 42: + t.Fatalf("Unmarshal: *back.N = %d, want 42", *back.N) + } +} + +var encodeStringTests = []struct { + in string + out string +}{ + {"\x00", `"\u0000"`}, + {"\x01", `"\u0001"`}, + {"\x02", `"\u0002"`}, + {"\x03", `"\u0003"`}, + {"\x04", `"\u0004"`}, + {"\x05", `"\u0005"`}, + {"\x06", `"\u0006"`}, + {"\x07", `"\u0007"`}, + {"\x08", `"\b"`}, + {"\x09", `"\t"`}, + {"\x0a", `"\n"`}, + {"\x0b", `"\u000b"`}, + {"\x0c", `"\f"`}, + {"\x0d", `"\r"`}, + {"\x0e", `"\u000e"`}, + {"\x0f", `"\u000f"`}, + {"\x10", `"\u0010"`}, + {"\x11", `"\u0011"`}, + {"\x12", `"\u0012"`}, + {"\x13", `"\u0013"`}, + {"\x14", `"\u0014"`}, + {"\x15", `"\u0015"`}, + {"\x16", `"\u0016"`}, + {"\x17", `"\u0017"`}, + {"\x18", `"\u0018"`}, + {"\x19", `"\u0019"`}, + {"\x1a", `"\u001a"`}, + {"\x1b", `"\u001b"`}, + {"\x1c", `"\u001c"`}, + {"\x1d", `"\u001d"`}, + {"\x1e", `"\u001e"`}, + {"\x1f", `"\u001f"`}, +} + +func TestEncodeString(t *testing.T) { + for _, tt := range encodeStringTests { + b, err := Marshal(tt.in) + if err != nil { + t.Errorf("Marshal(%q) error: %v", tt.in, err) + continue + } + out := string(b) + if out != tt.out { + t.Errorf("Marshal(%q) = %#q, want %#q", tt.in, out, tt.out) + } + } +} + +type jsonbyte byte + +func (b jsonbyte) MarshalJSON() ([]byte, error) { return tenc(`{"JB":%d}`, b) } + +type textbyte byte + +func (b textbyte) MarshalText() ([]byte, error) { return tenc(`TB:%d`, b) } + +type jsonint int + +func (i jsonint) MarshalJSON() ([]byte, error) { return tenc(`{"JI":%d}`, i) } + +type textint int + +func (i textint) MarshalText() ([]byte, error) { return tenc(`TI:%d`, i) } + +func tenc(format string, a ...any) ([]byte, error) { + var buf bytes.Buffer + fmt.Fprintf(&buf, format, a...) + return buf.Bytes(), nil +} + +type textfloat float64 + +func (f textfloat) MarshalText() ([]byte, error) { return tenc(`TF:%0.2f`, f) } + +// Issue 13783 +func TestEncodeBytekind(t *testing.T) { + tests := []struct { + CaseName + in any + want string + }{ + {Name(""), byte(7), "7"}, + {Name(""), jsonbyte(7), `{"JB":7}`}, + {Name(""), textbyte(4), `"TB:4"`}, + {Name(""), jsonint(5), `{"JI":5}`}, + {Name(""), textint(1), `"TI:1"`}, + {Name(""), []byte{0, 1}, `"AAE="`}, + {Name(""), []jsonbyte{0, 1}, `[{"JB":0},{"JB":1}]`}, + {Name(""), [][]jsonbyte{{0, 1}, {3}}, `[[{"JB":0},{"JB":1}],[{"JB":3}]]`}, + {Name(""), []textbyte{2, 3}, `["TB:2","TB:3"]`}, + {Name(""), []jsonint{5, 4}, `[{"JI":5},{"JI":4}]`}, + {Name(""), []textint{9, 3}, `["TI:9","TI:3"]`}, + {Name(""), []int{9, 3}, `[9,3]`}, + {Name(""), []textfloat{12, 3}, `["TF:12.00","TF:3.00"]`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + b, err := Marshal(tt.in) + if err != nil { + t.Errorf("%s: Marshal error: %v", tt.Where, err) + } + got, want := string(b), tt.want + if got != want { + t.Errorf("%s: Marshal:\n\tgot: %s\n\twant: %s", tt.Where, got, want) + } + }) + } +} + +func TestTextMarshalerMapKeysAreSorted(t *testing.T) { + got, err := Marshal(map[unmarshalerText]int{ + {"x", "y"}: 1, + {"y", "x"}: 2, + {"a", "z"}: 3, + {"z", "a"}: 4, + }) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + const want = `{"a:z":3,"x:y":1,"y:x":2,"z:a":4}` + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +// https://golang.org/issue/33675 +func TestNilMarshalerTextMapKey(t *testing.T) { + got, err := Marshal(map[*unmarshalerText]int{ + (*unmarshalerText)(nil): 1, + {"A", "B"}: 2, + }) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + const want = `{"":1,"A:B":2}` + if string(got) != want { + t.Errorf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +var re = regexp.MustCompile + +// syntactic checks on form of marshaled floating point numbers. +var badFloatREs = []*regexp.Regexp{ + re(`p`), // no binary exponential notation + re(`^\+`), // no leading + sign + re(`^-?0[^.]`), // no unnecessary leading zeros + re(`^-?\.`), // leading zero required before decimal point + re(`\.(e|$)`), // no trailing decimal + re(`\.[0-9]+0(e|$)`), // no trailing zero in fraction + re(`^-?(0|[0-9]{2,})\..*e`), // exponential notation must have normalized mantissa + re(`e[0-9]`), // positive exponent must be signed + re(`e[+-]0`), // exponent must not have leading zeros + re(`e-[1-6]$`), // not tiny enough for exponential notation + re(`e+(.|1.|20)$`), // not big enough for exponential notation + re(`^-?0\.0000000`), // too tiny, should use exponential notation + re(`^-?[0-9]{22}`), // too big, should use exponential notation + re(`[1-9][0-9]{16}[1-9]`), // too many significant digits in integer + re(`[1-9][0-9.]{17}[1-9]`), // too many significant digits in decimal + // below here for float32 only + re(`[1-9][0-9]{8}[1-9]`), // too many significant digits in integer + re(`[1-9][0-9.]{9}[1-9]`), // too many significant digits in decimal +} + +func TestMarshalFloat(t *testing.T) { + t.Parallel() + nfail := 0 + test := func(f float64, bits int) { + vf := any(f) + if bits == 32 { + f = float64(float32(f)) // round + vf = float32(f) + } + bout, err := Marshal(vf) + if err != nil { + t.Errorf("Marshal(%T(%g)) error: %v", vf, vf, err) + nfail++ + return + } + out := string(bout) + + // result must convert back to the same float + g, err := strconv.ParseFloat(out, bits) + if err != nil { + t.Errorf("ParseFloat(%q) error: %v", out, err) + nfail++ + return + } + if f != g || fmt.Sprint(f) != fmt.Sprint(g) { // fmt.Sprint handles ±0 + t.Errorf("ParseFloat(%q):\n\tgot: %g\n\twant: %g", out, float32(g), vf) + nfail++ + return + } + + bad := badFloatREs + if bits == 64 { + bad = bad[:len(bad)-2] + } + for _, re := range bad { + if re.MatchString(out) { + t.Errorf("Marshal(%T(%g)) = %q; must not match /%s/", vf, vf, out, re) + nfail++ + return + } + } + } + + var ( + bigger = math.Inf(+1) + smaller = math.Inf(-1) + ) + + var digits = "1.2345678901234567890123" + for i := len(digits); i >= 2; i-- { + if testing.Short() && i < len(digits)-4 { + break + } + for exp := -30; exp <= 30; exp++ { + for _, sign := range "+-" { + for bits := 32; bits <= 64; bits += 32 { + s := fmt.Sprintf("%c%se%d", sign, digits[:i], exp) + f, err := strconv.ParseFloat(s, bits) + if err != nil { + log.Fatal(err) + } + next := math.Nextafter + if bits == 32 { + next = func(g, h float64) float64 { + return float64(math.Nextafter32(float32(g), float32(h))) + } + } + test(f, bits) + test(next(f, bigger), bits) + test(next(f, smaller), bits) + if nfail > 50 { + t.Fatalf("stopping test early") + } + } + } + } + } + test(0, 64) + test(math.Copysign(0, -1), 64) + test(0, 32) + test(math.Copysign(0, -1), 32) +} + +func TestMarshalRawMessageValue(t *testing.T) { + type ( + T1 struct { + M RawMessage `json:",omitempty"` + } + T2 struct { + M *RawMessage `json:",omitempty"` + } + ) + + var ( + rawNil = RawMessage(nil) + rawEmpty = RawMessage([]byte{}) + rawText = RawMessage([]byte(`"foo"`)) + ) + + tests := []struct { + CaseName + in any + want string + ok bool + }{ + // Test with nil RawMessage. + {Name(""), rawNil, "null", true}, + {Name(""), &rawNil, "null", true}, + {Name(""), []any{rawNil}, "[null]", true}, + {Name(""), &[]any{rawNil}, "[null]", true}, + {Name(""), []any{&rawNil}, "[null]", true}, + {Name(""), &[]any{&rawNil}, "[null]", true}, + {Name(""), struct{ M RawMessage }{rawNil}, `{"M":null}`, true}, + {Name(""), &struct{ M RawMessage }{rawNil}, `{"M":null}`, true}, + {Name(""), struct{ M *RawMessage }{&rawNil}, `{"M":null}`, true}, + {Name(""), &struct{ M *RawMessage }{&rawNil}, `{"M":null}`, true}, + {Name(""), map[string]any{"M": rawNil}, `{"M":null}`, true}, + {Name(""), &map[string]any{"M": rawNil}, `{"M":null}`, true}, + {Name(""), map[string]any{"M": &rawNil}, `{"M":null}`, true}, + {Name(""), &map[string]any{"M": &rawNil}, `{"M":null}`, true}, + {Name(""), T1{rawNil}, "{}", true}, + {Name(""), T2{&rawNil}, `{"M":null}`, true}, + {Name(""), &T1{rawNil}, "{}", true}, + {Name(""), &T2{&rawNil}, `{"M":null}`, true}, + + // Test with empty, but non-nil, RawMessage. + {Name(""), rawEmpty, "", false}, + {Name(""), &rawEmpty, "", false}, + {Name(""), []any{rawEmpty}, "", false}, + {Name(""), &[]any{rawEmpty}, "", false}, + {Name(""), []any{&rawEmpty}, "", false}, + {Name(""), &[]any{&rawEmpty}, "", false}, + {Name(""), struct{ X RawMessage }{rawEmpty}, "", false}, + {Name(""), &struct{ X RawMessage }{rawEmpty}, "", false}, + {Name(""), struct{ X *RawMessage }{&rawEmpty}, "", false}, + {Name(""), &struct{ X *RawMessage }{&rawEmpty}, "", false}, + {Name(""), map[string]any{"nil": rawEmpty}, "", false}, + {Name(""), &map[string]any{"nil": rawEmpty}, "", false}, + {Name(""), map[string]any{"nil": &rawEmpty}, "", false}, + {Name(""), &map[string]any{"nil": &rawEmpty}, "", false}, + {Name(""), T1{rawEmpty}, "{}", true}, + {Name(""), T2{&rawEmpty}, "", false}, + {Name(""), &T1{rawEmpty}, "{}", true}, + {Name(""), &T2{&rawEmpty}, "", false}, + + // Test with RawMessage with some text. + // + // The tests below marked with Issue6458 used to generate "ImZvbyI=" instead "foo". + // This behavior was intentionally changed in Go 1.8. + // See https://golang.org/issues/14493#issuecomment-255857318 + {Name(""), rawText, `"foo"`, true}, // Issue6458 + {Name(""), &rawText, `"foo"`, true}, + {Name(""), []any{rawText}, `["foo"]`, true}, // Issue6458 + {Name(""), &[]any{rawText}, `["foo"]`, true}, // Issue6458 + {Name(""), []any{&rawText}, `["foo"]`, true}, + {Name(""), &[]any{&rawText}, `["foo"]`, true}, + {Name(""), struct{ M RawMessage }{rawText}, `{"M":"foo"}`, true}, // Issue6458 + {Name(""), &struct{ M RawMessage }{rawText}, `{"M":"foo"}`, true}, + {Name(""), struct{ M *RawMessage }{&rawText}, `{"M":"foo"}`, true}, + {Name(""), &struct{ M *RawMessage }{&rawText}, `{"M":"foo"}`, true}, + {Name(""), map[string]any{"M": rawText}, `{"M":"foo"}`, true}, // Issue6458 + {Name(""), &map[string]any{"M": rawText}, `{"M":"foo"}`, true}, // Issue6458 + {Name(""), map[string]any{"M": &rawText}, `{"M":"foo"}`, true}, + {Name(""), &map[string]any{"M": &rawText}, `{"M":"foo"}`, true}, + {Name(""), T1{rawText}, `{"M":"foo"}`, true}, // Issue6458 + {Name(""), T2{&rawText}, `{"M":"foo"}`, true}, + {Name(""), &T1{rawText}, `{"M":"foo"}`, true}, + {Name(""), &T2{&rawText}, `{"M":"foo"}`, true}, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + b, err := Marshal(tt.in) + if ok := (err == nil); ok != tt.ok { + if err != nil { + t.Errorf("%s: Marshal error: %v", tt.Where, err) + } else { + t.Errorf("%s: Marshal error: got nil, want non-nil", tt.Where) + } + } + if got := string(b); got != tt.want { + t.Errorf("%s: Marshal:\n\tinput: %#v\n\tgot: %s\n\twant: %s", tt.Where, tt.in, got, tt.want) + } + }) + } +} + +type marshalPanic struct{} + +func (marshalPanic) MarshalJSON() ([]byte, error) { panic(0xdead) } + +func TestMarshalPanic(t *testing.T) { + defer func() { + if got := recover(); !reflect.DeepEqual(got, 0xdead) { + t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) + } + }() + Marshal(&marshalPanic{}) + t.Error("Marshal should have panicked") +} + +func TestMarshalUncommonFieldNames(t *testing.T) { + v := struct { + A0, À, Aβ int + }{} + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal error:", err) + } + want := `{"A0":0,"À":0,"Aβ":0}` + got := string(b) + if got != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestMarshalerError(t *testing.T) { + s := "test variable" + st := reflect.TypeOf(s) + errText := "json: test error" + + tests := []struct { + CaseName + err *MarshalerError + want string + }{{ + Name(""), + &MarshalerError{st, fmt.Errorf(errText), ""}, + "json: error calling MarshalJSON for type " + st.String() + ": " + errText, + }, { + Name(""), + &MarshalerError{st, fmt.Errorf(errText), "TestMarshalerError"}, + "json: error calling TestMarshalerError for type " + st.String() + ": " + errText, + }} + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + got := tt.err.Error() + if got != tt.want { + t.Errorf("%s: Error:\n\tgot: %s\n\twant: %s", tt.Where, got, tt.want) + } + }) + } +} + +type marshaledValue string + +func (v marshaledValue) MarshalJSON() ([]byte, error) { + return []byte(v), nil +} + +func TestIssue63379(t *testing.T) { + for _, v := range []string{ + "[]<", + "[]>", + "[]&", + "[]\u2028", + "[]\u2029", + "{}<", + "{}>", + "{}&", + "{}\u2028", + "{}\u2029", + } { + _, err := Marshal(marshaledValue(v)) + if err == nil { + t.Errorf("expected error for %q", v) + } + } +} diff --git a/src/encoding/json/example_marshaling_test.go b/src/encoding/json/example_marshaling_test.go new file mode 100644 index 0000000..7f15c74 --- /dev/null +++ b/src/encoding/json/example_marshaling_test.go @@ -0,0 +1,73 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "encoding/json" + "fmt" + "log" + "strings" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalJSON() ([]byte, error) { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + + return json.Marshal(s) +} + +func Example_customMarshalJSON() { + blob := `["gopher","armadillo","zebra","unknown","gopher","bee","gopher","zebra"]` + var zoo []Animal + if err := json.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/src/encoding/json/example_test.go b/src/encoding/json/example_test.go new file mode 100644 index 0000000..2261c77 --- /dev/null +++ b/src/encoding/json/example_test.go @@ -0,0 +1,310 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "log" + "os" + "strings" +) + +func ExampleMarshal() { + type ColorGroup struct { + ID int + Name string + Colors []string + } + group := ColorGroup{ + ID: 1, + Name: "Reds", + Colors: []string{"Crimson", "Red", "Ruby", "Maroon"}, + } + b, err := json.Marshal(group) + if err != nil { + fmt.Println("error:", err) + } + os.Stdout.Write(b) + // Output: + // {"ID":1,"Name":"Reds","Colors":["Crimson","Red","Ruby","Maroon"]} +} + +func ExampleUnmarshal() { + var jsonBlob = []byte(`[ + {"Name": "Platypus", "Order": "Monotremata"}, + {"Name": "Quoll", "Order": "Dasyuromorphia"} +]`) + type Animal struct { + Name string + Order string + } + var animals []Animal + err := json.Unmarshal(jsonBlob, &animals) + if err != nil { + fmt.Println("error:", err) + } + fmt.Printf("%+v", animals) + // Output: + // [{Name:Platypus Order:Monotremata} {Name:Quoll Order:Dasyuromorphia}] +} + +// This example uses a Decoder to decode a stream of distinct JSON values. +func ExampleDecoder() { + const jsonStream = ` + {"Name": "Ed", "Text": "Knock knock."} + {"Name": "Sam", "Text": "Who's there?"} + {"Name": "Ed", "Text": "Go fmt."} + {"Name": "Sam", "Text": "Go fmt who?"} + {"Name": "Ed", "Text": "Go fmt yourself!"} +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(jsonStream)) + for { + var m Message + if err := dec.Decode(&m); err == io.EOF { + break + } else if err != nil { + log.Fatal(err) + } + fmt.Printf("%s: %s\n", m.Name, m.Text) + } + // Output: + // Ed: Knock knock. + // Sam: Who's there? + // Ed: Go fmt. + // Sam: Go fmt who? + // Ed: Go fmt yourself! +} + +// This example uses a Decoder to decode a stream of distinct JSON values. +func ExampleDecoder_Token() { + const jsonStream = ` + {"Message": "Hello", "Array": [1, 2, 3], "Null": null, "Number": 1.234} +` + dec := json.NewDecoder(strings.NewReader(jsonStream)) + for { + t, err := dec.Token() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v", t, t) + if dec.More() { + fmt.Printf(" (more)") + } + fmt.Printf("\n") + } + // Output: + // json.Delim: { (more) + // string: Message (more) + // string: Hello (more) + // string: Array (more) + // json.Delim: [ (more) + // float64: 1 (more) + // float64: 2 (more) + // float64: 3 + // json.Delim: ] (more) + // string: Null (more) + // <nil>: <nil> (more) + // string: Number (more) + // float64: 1.234 + // json.Delim: } +} + +// This example uses a Decoder to decode a streaming array of JSON objects. +func ExampleDecoder_Decode_stream() { + const jsonStream = ` + [ + {"Name": "Ed", "Text": "Knock knock."}, + {"Name": "Sam", "Text": "Who's there?"}, + {"Name": "Ed", "Text": "Go fmt."}, + {"Name": "Sam", "Text": "Go fmt who?"}, + {"Name": "Ed", "Text": "Go fmt yourself!"} + ] +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(jsonStream)) + + // read open bracket + t, err := dec.Token() + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v\n", t, t) + + // while the array contains values + for dec.More() { + var m Message + // decode an array value (Message) + err := dec.Decode(&m) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%v: %v\n", m.Name, m.Text) + } + + // read closing bracket + t, err = dec.Token() + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v\n", t, t) + + // Output: + // json.Delim: [ + // Ed: Knock knock. + // Sam: Who's there? + // Ed: Go fmt. + // Sam: Go fmt who? + // Ed: Go fmt yourself! + // json.Delim: ] +} + +// This example uses RawMessage to delay parsing part of a JSON message. +func ExampleRawMessage_unmarshal() { + type Color struct { + Space string + Point json.RawMessage // delay parsing until we know the color space + } + type RGB struct { + R uint8 + G uint8 + B uint8 + } + type YCbCr struct { + Y uint8 + Cb int8 + Cr int8 + } + + var j = []byte(`[ + {"Space": "YCbCr", "Point": {"Y": 255, "Cb": 0, "Cr": -10}}, + {"Space": "RGB", "Point": {"R": 98, "G": 218, "B": 255}} +]`) + var colors []Color + err := json.Unmarshal(j, &colors) + if err != nil { + log.Fatalln("error:", err) + } + + for _, c := range colors { + var dst any + switch c.Space { + case "RGB": + dst = new(RGB) + case "YCbCr": + dst = new(YCbCr) + } + err := json.Unmarshal(c.Point, dst) + if err != nil { + log.Fatalln("error:", err) + } + fmt.Println(c.Space, dst) + } + // Output: + // YCbCr &{255 0 -10} + // RGB &{98 218 255} +} + +// This example uses RawMessage to use a precomputed JSON during marshal. +func ExampleRawMessage_marshal() { + h := json.RawMessage(`{"precomputed": true}`) + + c := struct { + Header *json.RawMessage `json:"header"` + Body string `json:"body"` + }{Header: &h, Body: "Hello Gophers!"} + + b, err := json.MarshalIndent(&c, "", "\t") + if err != nil { + fmt.Println("error:", err) + } + os.Stdout.Write(b) + + // Output: + // { + // "header": { + // "precomputed": true + // }, + // "body": "Hello Gophers!" + // } +} + +func ExampleIndent() { + type Road struct { + Name string + Number int + } + roads := []Road{ + {"Diamond Fork", 29}, + {"Sheep Creek", 51}, + } + + b, err := json.Marshal(roads) + if err != nil { + log.Fatal(err) + } + + var out bytes.Buffer + json.Indent(&out, b, "=", "\t") + out.WriteTo(os.Stdout) + // Output: + // [ + // = { + // = "Name": "Diamond Fork", + // = "Number": 29 + // = }, + // = { + // = "Name": "Sheep Creek", + // = "Number": 51 + // = } + // =] +} + +func ExampleMarshalIndent() { + data := map[string]int{ + "a": 1, + "b": 2, + } + + b, err := json.MarshalIndent(data, "<prefix>", "<indent>") + if err != nil { + log.Fatal(err) + } + + fmt.Println(string(b)) + // Output: + // { + // <prefix><indent>"a": 1, + // <prefix><indent>"b": 2 + // <prefix>} +} + +func ExampleValid() { + goodJSON := `{"example": 1}` + badJSON := `{"example":2:]}}` + + fmt.Println(json.Valid([]byte(goodJSON)), json.Valid([]byte(badJSON))) + // Output: + // true false +} + +func ExampleHTMLEscape() { + var out bytes.Buffer + json.HTMLEscape(&out, []byte(`{"Name":"<b>HTML content</b>"}`)) + out.WriteTo(os.Stdout) + // Output: + //{"Name":"\u003cb\u003eHTML content\u003c/b\u003e"} +} diff --git a/src/encoding/json/example_text_marshaling_test.go b/src/encoding/json/example_text_marshaling_test.go new file mode 100644 index 0000000..04c7813 --- /dev/null +++ b/src/encoding/json/example_text_marshaling_test.go @@ -0,0 +1,67 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "encoding/json" + "fmt" + "log" + "strings" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalJSON() { + blob := `["small","regular","large","unrecognized","small","normal","small","large"]` + var inventory []Size + if err := json.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} diff --git a/src/encoding/json/fold.go b/src/encoding/json/fold.go new file mode 100644 index 0000000..c4c671b --- /dev/null +++ b/src/encoding/json/fold.go @@ -0,0 +1,48 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "unicode" + "unicode/utf8" +) + +// foldName returns a folded string such that foldName(x) == foldName(y) +// is identical to bytes.EqualFold(x, y). +func foldName(in []byte) []byte { + // This is inlinable to take advantage of "function outlining". + var arr [32]byte // large enough for most JSON names + return appendFoldedName(arr[:0], in) +} + +func appendFoldedName(out, in []byte) []byte { + for i := 0; i < len(in); { + // Handle single-byte ASCII. + if c := in[i]; c < utf8.RuneSelf { + if 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + out = append(out, c) + i++ + continue + } + // Handle multi-byte Unicode. + r, n := utf8.DecodeRune(in[i:]) + out = utf8.AppendRune(out, foldRune(r)) + i += n + } + return out +} + +// foldRune is returns the smallest rune for all runes in the same fold set. +func foldRune(r rune) rune { + for { + r2 := unicode.SimpleFold(r) + if r2 <= r { + return r2 + } + r = r2 + } +} diff --git a/src/encoding/json/fold_test.go b/src/encoding/json/fold_test.go new file mode 100644 index 0000000..9d6fd05 --- /dev/null +++ b/src/encoding/json/fold_test.go @@ -0,0 +1,50 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "testing" +) + +func FuzzEqualFold(f *testing.F) { + for _, ss := range [][2]string{ + {"", ""}, + {"123abc", "123ABC"}, + {"αβδ", "ΑΒΔ"}, + {"abc", "xyz"}, + {"abc", "XYZ"}, + {"1", "2"}, + {"hello, world!", "hello, world!"}, + {"hello, world!", "Hello, World!"}, + {"hello, world!", "HELLO, WORLD!"}, + {"hello, world!", "jello, world!"}, + {"γειά, κόσμε!", "γειά, κόσμε!"}, + {"γειά, κόσμε!", "Γειά, Κόσμε!"}, + {"γειά, κόσμε!", "ΓΕΙΆ, ΚΌΣΜΕ!"}, + {"γειά, κόσμε!", "ΛΕΙΆ, ΚΌΣΜΕ!"}, + {"AESKey", "aesKey"}, + {"AESKEY", "aes_key"}, + {"aes_key", "AES_KEY"}, + {"AES_KEY", "aes-key"}, + {"aes-key", "AES-KEY"}, + {"AES-KEY", "aesKey"}, + {"aesKey", "AesKey"}, + {"AesKey", "AESKey"}, + {"AESKey", "aeskey"}, + {"DESKey", "aeskey"}, + {"AES Key", "aeskey"}, + } { + f.Add([]byte(ss[0]), []byte(ss[1])) + } + equalFold := func(x, y []byte) bool { return string(foldName(x)) == string(foldName(y)) } + f.Fuzz(func(t *testing.T, x, y []byte) { + got := equalFold(x, y) + want := bytes.EqualFold(x, y) + if got != want { + t.Errorf("equalFold(%q, %q) = %v, want %v", x, y, got, want) + } + }) +} diff --git a/src/encoding/json/fuzz_test.go b/src/encoding/json/fuzz_test.go new file mode 100644 index 0000000..778664c --- /dev/null +++ b/src/encoding/json/fuzz_test.go @@ -0,0 +1,83 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "io" + "testing" +) + +func FuzzUnmarshalJSON(f *testing.F) { + f.Add([]byte(`{ +"object": { + "slice": [ + 1, + 2.0, + "3", + [4], + {5: {}} + ] +}, +"slice": [[]], +"string": ":)", +"int": 1e5, +"float": 3e-9" +}`)) + + f.Fuzz(func(t *testing.T, b []byte) { + for _, typ := range []func() interface{}{ + func() interface{} { return new(interface{}) }, + func() interface{} { return new(map[string]interface{}) }, + func() interface{} { return new([]interface{}) }, + } { + i := typ() + if err := Unmarshal(b, i); err != nil { + return + } + + encoded, err := Marshal(i) + if err != nil { + t.Fatalf("failed to marshal: %s", err) + } + + if err := Unmarshal(encoded, i); err != nil { + t.Fatalf("failed to roundtrip: %s", err) + } + } + }) +} + +func FuzzDecoderToken(f *testing.F) { + f.Add([]byte(`{ +"object": { + "slice": [ + 1, + 2.0, + "3", + [4], + {5: {}} + ] +}, +"slice": [[]], +"string": ":)", +"int": 1e5, +"float": 3e-9" +}`)) + + f.Fuzz(func(t *testing.T, b []byte) { + r := bytes.NewReader(b) + d := NewDecoder(r) + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + return + } + } + }) +} diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go new file mode 100644 index 0000000..01bfdf6 --- /dev/null +++ b/src/encoding/json/indent.go @@ -0,0 +1,182 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "bytes" + +// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 +// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 +// so that the JSON will be safe to embed inside HTML <script> tags. +// For historical reasons, web browsers don't honor standard HTML +// escaping within <script> tags, so an alternative JSON encoding must be used. +func HTMLEscape(dst *bytes.Buffer, src []byte) { + dst.Grow(len(src)) + dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src)) +} + +func appendHTMLEscape(dst, src []byte) []byte { + // The characters can only appear in string literals, + // so just scan the string one byte at a time. + start := 0 + for i, c := range src { + if c == '<' || c == '>' || c == '&' { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF]) + start = i + 1 + } + // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). + if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF]) + start = i + len("\u2029") + } + } + return append(dst, src[start:]...) +} + +// Compact appends to dst the JSON-encoded src with +// insignificant space characters elided. +func Compact(dst *bytes.Buffer, src []byte) error { + dst.Grow(len(src)) + b := dst.AvailableBuffer() + b, err := appendCompact(b, src, false) + dst.Write(b) + return err +} + +func appendCompact(dst, src []byte, escape bool) ([]byte, error) { + origLen := len(dst) + scan := newScanner() + defer freeScanner(scan) + start := 0 + for i, c := range src { + if escape && (c == '<' || c == '>' || c == '&') { + if start < i { + dst = append(dst, src[start:i]...) + } + dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF]) + start = i + 1 + } + // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). + if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { + if start < i { + dst = append(dst, src[start:i]...) + } + dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF]) + start = i + 3 + } + v := scan.step(scan, c) + if v >= scanSkipSpace { + if v == scanError { + break + } + if start < i { + dst = append(dst, src[start:i]...) + } + start = i + 1 + } + } + if scan.eof() == scanError { + return dst[:origLen], scan.err + } + if start < len(src) { + dst = append(dst, src[start:]...) + } + return dst, nil +} + +func appendNewline(dst []byte, prefix, indent string, depth int) []byte { + dst = append(dst, '\n') + dst = append(dst, prefix...) + for i := 0; i < depth; i++ { + dst = append(dst, indent...) + } + return dst +} + +// indentGrowthFactor specifies the growth factor of indenting JSON input. +// Empirically, the growth factor was measured to be between 1.4x to 1.8x +// for some set of compacted JSON with the indent being a single tab. +// Specify a growth factor slightly larger than what is observed +// to reduce probability of allocation in appendIndent. +// A factor no higher than 2 ensures that wasted space never exceeds 50%. +const indentGrowthFactor = 2 + +// Indent appends to dst an indented form of the JSON-encoded src. +// Each element in a JSON object or array begins on a new, +// indented line beginning with prefix followed by one or more +// copies of indent according to the indentation nesting. +// The data appended to dst does not begin with the prefix nor +// any indentation, to make it easier to embed inside other formatted JSON data. +// Although leading space characters (space, tab, carriage return, newline) +// at the beginning of src are dropped, trailing space characters +// at the end of src are preserved and copied to dst. +// For example, if src has no trailing spaces, neither will dst; +// if src ends in a trailing newline, so will dst. +func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { + dst.Grow(indentGrowthFactor * len(src)) + b := dst.AvailableBuffer() + b, err := appendIndent(b, src, prefix, indent) + dst.Write(b) + return err +} + +func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) { + origLen := len(dst) + scan := newScanner() + defer freeScanner(scan) + needIndent := false + depth := 0 + for _, c := range src { + scan.bytes++ + v := scan.step(scan, c) + if v == scanSkipSpace { + continue + } + if v == scanError { + break + } + if needIndent && v != scanEndObject && v != scanEndArray { + needIndent = false + depth++ + dst = appendNewline(dst, prefix, indent, depth) + } + + // Emit semantically uninteresting bytes + // (in particular, punctuation in strings) unmodified. + if v == scanContinue { + dst = append(dst, c) + continue + } + + // Add spacing around real punctuation. + switch c { + case '{', '[': + // delay indent so that empty object and array are formatted as {} and []. + needIndent = true + dst = append(dst, c) + case ',': + dst = append(dst, c) + dst = appendNewline(dst, prefix, indent, depth) + case ':': + dst = append(dst, c, ' ') + case '}', ']': + if needIndent { + // suppress indent in empty object/array + needIndent = false + } else { + depth-- + dst = appendNewline(dst, prefix, indent, depth) + } + dst = append(dst, c) + default: + dst = append(dst, c) + } + } + if scan.eof() == scanError { + return dst[:origLen], scan.err + } + return dst, nil +} diff --git a/src/encoding/json/number_test.go b/src/encoding/json/number_test.go new file mode 100644 index 0000000..c82e6de --- /dev/null +++ b/src/encoding/json/number_test.go @@ -0,0 +1,118 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "regexp" + "testing" +) + +func TestNumberIsValid(t *testing.T) { + // From: https://stackoverflow.com/a/13340826 + var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) + + validTests := []string{ + "0", + "-0", + "1", + "-1", + "0.1", + "-0.1", + "1234", + "-1234", + "12.34", + "-12.34", + "12E0", + "12E1", + "12e34", + "12E-0", + "12e+1", + "12e-34", + "-12E0", + "-12E1", + "-12e34", + "-12E-0", + "-12e+1", + "-12e-34", + "1.2E0", + "1.2E1", + "1.2e34", + "1.2E-0", + "1.2e+1", + "1.2e-34", + "-1.2E0", + "-1.2E1", + "-1.2e34", + "-1.2E-0", + "-1.2e+1", + "-1.2e-34", + "0E0", + "0E1", + "0e34", + "0E-0", + "0e+1", + "0e-34", + "-0E0", + "-0E1", + "-0e34", + "-0E-0", + "-0e+1", + "-0e-34", + } + + for _, test := range validTests { + if !isValidNumber(test) { + t.Errorf("%s should be valid", test) + } + + var f float64 + if err := Unmarshal([]byte(test), &f); err != nil { + t.Errorf("%s should be valid but Unmarshal failed: %v", test, err) + } + + if !jsonNumberRegexp.MatchString(test) { + t.Errorf("%s should be valid but regexp does not match", test) + } + } + + invalidTests := []string{ + "", + "invalid", + "1.0.1", + "1..1", + "-1-2", + "012a42", + "01.2", + "012", + "12E12.12", + "1e2e3", + "1e+-2", + "1e--23", + "1e", + "e1", + "1e+", + "1ea", + "1a", + "1.a", + "1.", + "01", + "1.e1", + } + + for _, test := range invalidTests { + if isValidNumber(test) { + t.Errorf("%s should be invalid", test) + } + + var f float64 + if err := Unmarshal([]byte(test), &f); err == nil { + t.Errorf("%s should be invalid but unmarshal wrote %v", test, f) + } + + if jsonNumberRegexp.MatchString(test) { + t.Errorf("%s should be invalid but matches regexp", test) + } + } +} diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go new file mode 100644 index 0000000..da6ea2a --- /dev/null +++ b/src/encoding/json/scanner.go @@ -0,0 +1,610 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +// JSON value parser state machine. +// Just about at the limit of what is reasonable to write by hand. +// Some parts are a bit tedious, but overall it nicely factors out the +// otherwise common code from the multiple scanning functions +// in this package (Compact, Indent, checkValid, etc). +// +// This file starts with two simple examples using the scanner +// before diving into the scanner itself. + +import ( + "strconv" + "sync" +) + +// Valid reports whether data is a valid JSON encoding. +func Valid(data []byte) bool { + scan := newScanner() + defer freeScanner(scan) + return checkValid(data, scan) == nil +} + +// checkValid verifies that data is valid JSON-encoded data. +// scan is passed in for use by checkValid to avoid an allocation. +// checkValid returns nil or a SyntaxError. +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.bytes++ + if scan.step(scan, c) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +// A SyntaxError is a description of a JSON syntax error. +// [Unmarshal] will return a SyntaxError if the JSON can't be parsed. +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { return e.msg } + +// A scanner is a JSON scanning state machine. +// Callers call scan.reset and then pass bytes in one at a time +// by calling scan.step(&scan, c) for each byte. +// The return value, referred to as an opcode, tells the +// caller about significant parsing events like beginning +// and ending literals, objects, and arrays, so that the +// caller can follow along if it wishes. +// The return value scanEnd indicates that a single top-level +// JSON value has been completed, *before* the byte that +// just got passed in. (The indication must be delayed in order +// to recognize the end of numbers: is 123 a whole value or +// the beginning of 12345e+6?). +type scanner struct { + // The step is a func to be called to execute the next transition. + // Also tried using an integer constant and a single func + // with a switch, but using the func directly was 10% faster + // on a 64-bit Mac Mini, and it's nicer to read. + step func(*scanner, byte) int + + // Reached end of top-level value. + endTop bool + + // Stack of what we're in the middle of - array values, object keys, object values. + parseState []int + + // Error that happened, if any. + err error + + // total bytes consumed, updated by decoder.Decode (and deliberately + // not set to zero by scan.reset) + bytes int64 +} + +var scannerPool = sync.Pool{ + New: func() any { + return &scanner{} + }, +} + +func newScanner() *scanner { + scan := scannerPool.Get().(*scanner) + // scan.reset by design doesn't set bytes to zero + scan.bytes = 0 + scan.reset() + return scan +} + +func freeScanner(scan *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(scan.parseState) > 1024 { + scan.parseState = nil + } + scannerPool.Put(scan) +} + +// These values are returned by the state transition functions +// assigned to scanner.state and the method scanner.eof. +// They give details about the current state of the scan that +// callers might be interested to know about. +// It is okay to ignore the return value of any particular +// call to scanner.state: if one call returns scanError, +// every subsequent call will return scanError too. +const ( + // Continue. + scanContinue = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + + // Stop. + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. +) + +// These values are stored in the parseState stack. +// They give the current state of a composite value +// being scanned. If the parser is inside a nested value +// the parseState describes the nested state, outermost at entry 0. +const ( + parseObjectKey = iota // parsing object key (before colon) + parseObjectValue // parsing object value (after colon) + parseArrayValue // parsing array value +) + +// This limits the max nesting depth to prevent stack overflow. +// This is permitted by https://tools.ietf.org/html/rfc7159#section-9 +const maxNestingDepth = 10000 + +// reset prepares the scanner for use. +// It must be called before calling s.step. +func (s *scanner) reset() { + s.step = stateBeginValue + s.parseState = s.parseState[0:0] + s.err = nil + s.endTop = false +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() int { + if s.err != nil { + return scanError + } + if s.endTop { + return scanEnd + } + s.step(s, ' ') + if s.endTop { + return scanEnd + } + if s.err == nil { + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + } + return scanError +} + +// pushParseState pushes a new parse state p onto the parse stack. +// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. +func (s *scanner) pushParseState(c byte, newParseState int, successState int) int { + s.parseState = append(s.parseState, newParseState) + if len(s.parseState) <= maxNestingDepth { + return successState + } + return s.error(c, "exceeded max depth") +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.parseState) - 1 + s.parseState = s.parseState[0:n] + if n == 0 { + s.step = stateEndTop + s.endTop = true + } else { + s.step = stateEndValue + } +} + +func isSpace(c byte) bool { + return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') +} + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + return s.pushParseState(c, parseObjectKey, scanBeginObject) + case '[': + s.step = stateBeginValueOrEmpty + return s.pushParseState(c, parseArrayValue, scanBeginArray) + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == '}' { + n := len(s.parseState) + s.parseState[n-1] = parseObjectValue + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c byte) int { + n := len(s.parseState) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + s.endTop = true + return stateEndTop(s, c) + } + if isSpace(c) { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.parseState[n-1] + switch ps { + case parseObjectKey: + if c == ':' { + s.parseState[n-1] = parseObjectValue + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case parseObjectValue: + if c == ',' { + s.parseState[n-1] = parseObjectKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case parseArrayValue: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c byte) int { + if !isSpace(c) { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c byte) int { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c byte) int { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + case 'u': + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateNeg is the state after reading `-` during a number. +func stateNeg(s *scanner, c byte) int { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c byte) int { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c byte) int { + if c == '+' || c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c byte) int { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c byte) int { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c byte) int { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c byte) int { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c byte) int { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c byte) int { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c byte) int { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c byte) int { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c byte) int { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c byte, context string) int { + s.step = stateError + s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + return scanError +} + +// quoteChar formats c as a quoted character literal. +func quoteChar(c byte) string { + // special cases - different from quoted strings + if c == '\'' { + return `'\''` + } + if c == '"' { + return `'"'` + } + + // use quoted string with different quotation marks + s := strconv.Quote(string(c)) + return "'" + s[1:len(s)-1] + "'" +} diff --git a/src/encoding/json/scanner_test.go b/src/encoding/json/scanner_test.go new file mode 100644 index 0000000..068439d --- /dev/null +++ b/src/encoding/json/scanner_test.go @@ -0,0 +1,304 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "math" + "math/rand" + "reflect" + "strings" + "testing" +) + +func indentNewlines(s string) string { + return strings.Join(strings.Split(s, "\n"), "\n\t") +} + +func stripWhitespace(s string) string { + return strings.Map(func(r rune) rune { + if r == ' ' || r == '\n' || r == '\r' || r == '\t' { + return -1 + } + return r + }, s) +} + +func TestValid(t *testing.T) { + tests := []struct { + CaseName + data string + ok bool + }{ + {Name(""), `foo`, false}, + {Name(""), `}{`, false}, + {Name(""), `{]`, false}, + {Name(""), `{}`, true}, + {Name(""), `{"foo":"bar"}`, true}, + {Name(""), `{"foo":"bar","bar":{"baz":["qux"]}}`, true}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + if ok := Valid([]byte(tt.data)); ok != tt.ok { + t.Errorf("%s: Valid(`%s`) = %v, want %v", tt.Where, tt.data, ok, tt.ok) + } + }) + } +} + +func TestCompactAndIndent(t *testing.T) { + tests := []struct { + CaseName + compact string + indent string + }{ + {Name(""), `1`, `1`}, + {Name(""), `{}`, `{}`}, + {Name(""), `[]`, `[]`}, + {Name(""), `{"":2}`, "{\n\t\"\": 2\n}"}, + {Name(""), `[3]`, "[\n\t3\n]"}, + {Name(""), `[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"}, + {Name(""), `{"x":1}`, "{\n\t\"x\": 1\n}"}, + {Name(""), `[true,false,null,"x",1,1.5,0,-5e+2]`, `[ + true, + false, + null, + "x", + 1, + 1.5, + 0, + -5e+2 +]`}, + {Name(""), "{\"\":\"<>&\u2028\u2029\"}", "{\n\t\"\": \"<>&\u2028\u2029\"\n}"}, // See golang.org/issue/34070 + } + var buf bytes.Buffer + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + buf.Reset() + if err := Compact(&buf, []byte(tt.compact)); err != nil { + t.Errorf("%s: Compact error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.compact { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact)) + } + + buf.Reset() + if err := Compact(&buf, []byte(tt.indent)); err != nil { + t.Errorf("%s: Compact error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.compact { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact)) + } + + buf.Reset() + if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil { + t.Errorf("%s: Indent error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.indent { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent)) + } + + buf.Reset() + if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil { + t.Errorf("%s: Indent error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.indent { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.indent)) + } + }) + } +} + +func TestCompactSeparators(t *testing.T) { + // U+2028 and U+2029 should be escaped inside strings. + // They should not appear outside strings. + tests := []struct { + CaseName + in, compact string + }{ + {Name(""), "{\"\u2028\": 1}", "{\"\u2028\":1}"}, + {Name(""), "{\"\u2029\" :2}", "{\"\u2029\":2}"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + var buf bytes.Buffer + if err := Compact(&buf, []byte(tt.in)); err != nil { + t.Errorf("%s: Compact error: %v", tt.Where, err) + } else if got := buf.String(); got != tt.compact { + t.Errorf("%s: Compact:\n\tgot: %s\n\twant: %s", tt.Where, indentNewlines(got), indentNewlines(tt.compact)) + } + }) + } +} + +// Tests of a large random structure. + +func TestCompactBig(t *testing.T) { + initBig() + var buf bytes.Buffer + if err := Compact(&buf, jsonBig); err != nil { + t.Fatalf("Compact error: %v", err) + } + b := buf.Bytes() + if !bytes.Equal(b, jsonBig) { + t.Error("Compact:") + diff(t, b, jsonBig) + return + } +} + +func TestIndentBig(t *testing.T) { + t.Parallel() + initBig() + var buf bytes.Buffer + if err := Indent(&buf, jsonBig, "", "\t"); err != nil { + t.Fatalf("Indent error: %v", err) + } + b := buf.Bytes() + if len(b) == len(jsonBig) { + // jsonBig is compact (no unnecessary spaces); + // indenting should make it bigger + t.Fatalf("Indent did not expand the input") + } + + // should be idempotent + var buf1 bytes.Buffer + if err := Indent(&buf1, b, "", "\t"); err != nil { + t.Fatalf("Indent error: %v", err) + } + b1 := buf1.Bytes() + if !bytes.Equal(b1, b) { + t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig):") + diff(t, b1, b) + return + } + + // should get back to original + buf1.Reset() + if err := Compact(&buf1, b); err != nil { + t.Fatalf("Compact error: %v", err) + } + b1 = buf1.Bytes() + if !bytes.Equal(b1, jsonBig) { + t.Error("Compact(Indent(jsonBig)) != jsonBig:") + diff(t, b1, jsonBig) + return + } +} + +func TestIndentErrors(t *testing.T) { + tests := []struct { + CaseName + in string + err error + }{ + {Name(""), `{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", 17}}, + {Name(""), `{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", 13}}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + slice := make([]uint8, 0) + buf := bytes.NewBuffer(slice) + if err := Indent(buf, []uint8(tt.in), "", ""); err != nil { + if !reflect.DeepEqual(err, tt.err) { + t.Fatalf("%s: Indent error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) + } + } + }) + } +} + +func diff(t *testing.T, a, b []byte) { + t.Helper() + for i := 0; ; i++ { + if i >= len(a) || i >= len(b) || a[i] != b[i] { + j := i - 10 + if j < 0 { + j = 0 + } + t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:])) + return + } + } +} + +func trim(b []byte) []byte { + return b[:min(len(b), 20)] +} + +// Generate a random JSON object. + +var jsonBig []byte + +func initBig() { + n := 10000 + if testing.Short() { + n = 100 + } + b, err := Marshal(genValue(n)) + if err != nil { + panic(err) + } + jsonBig = b +} + +func genValue(n int) any { + if n > 1 { + switch rand.Intn(2) { + case 0: + return genArray(n) + case 1: + return genMap(n) + } + } + switch rand.Intn(3) { + case 0: + return rand.Intn(2) == 0 + case 1: + return rand.NormFloat64() + case 2: + return genString(30) + } + panic("unreachable") +} + +func genString(stddev float64) string { + n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2)) + c := make([]rune, n) + for i := range c { + f := math.Abs(rand.NormFloat64()*64 + 32) + if f > 0x10ffff { + f = 0x10ffff + } + c[i] = rune(f) + } + return string(c) +} + +func genArray(n int) []any { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if f < 1 { + f = 1 + } + x := make([]any, f) + for i := range x { + x[i] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} + +func genMap(n int) map[string]any { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if n > 0 && f == 0 { + f = 1 + } + x := make(map[string]any) + for i := 0; i < f; i++ { + x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} diff --git a/src/encoding/json/stream.go b/src/encoding/json/stream.go new file mode 100644 index 0000000..5c98d1d --- /dev/null +++ b/src/encoding/json/stream.go @@ -0,0 +1,511 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "errors" + "io" +) + +// A Decoder reads and decodes JSON values from an input stream. +type Decoder struct { + r io.Reader + buf []byte + d decodeState + scanp int // start of unread data in buf + scanned int64 // amount of data already scanned + scan scanner + err error + + tokenState int + tokenStack []int +} + +// NewDecoder returns a new decoder that reads from r. +// +// The decoder introduces its own buffering and may +// read data from r beyond the JSON values requested. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +// UseNumber causes the Decoder to unmarshal a number into an interface{} as a +// [Number] instead of as a float64. +func (dec *Decoder) UseNumber() { dec.d.useNumber = true } + +// DisallowUnknownFields causes the Decoder to return an error when the destination +// is a struct and the input contains object keys which do not match any +// non-ignored, exported fields in the destination. +func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } + +// Decode reads the next JSON-encoded value from its +// input and stores it in the value pointed to by v. +// +// See the documentation for [Unmarshal] for details about +// the conversion of JSON into a Go value. +func (dec *Decoder) Decode(v any) error { + if dec.err != nil { + return dec.err + } + + if err := dec.tokenPrepareForDecode(); err != nil { + return err + } + + if !dec.tokenValueAllowed() { + return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} + } + + // Read whole value into buffer. + n, err := dec.readValue() + if err != nil { + return err + } + dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) + dec.scanp += n + + // Don't save err from unmarshal into dec.err: + // the connection is still usable since we read a complete JSON + // object from it before the error happened. + err = dec.d.unmarshal(v) + + // fixup token streaming state + dec.tokenValueEnd() + + return err +} + +// Buffered returns a reader of the data remaining in the Decoder's +// buffer. The reader is valid until the next call to [Decoder.Decode]. +func (dec *Decoder) Buffered() io.Reader { + return bytes.NewReader(dec.buf[dec.scanp:]) +} + +// readValue reads a JSON value into dec.buf. +// It returns the length of the encoding. +func (dec *Decoder) readValue() (int, error) { + dec.scan.reset() + + scanp := dec.scanp + var err error +Input: + // help the compiler see that scanp is never negative, so it can remove + // some bounds checks below. + for scanp >= 0 { + + // Look in the buffer for a new value. + for ; scanp < len(dec.buf); scanp++ { + c := dec.buf[scanp] + dec.scan.bytes++ + switch dec.scan.step(&dec.scan, c) { + case scanEnd: + // scanEnd is delayed one byte so we decrement + // the scanner bytes count by 1 to ensure that + // this value is correct in the next call of Decode. + dec.scan.bytes-- + break Input + case scanEndObject, scanEndArray: + // scanEnd is delayed one byte. + // We might block trying to get that byte from src, + // so instead invent a space byte. + if stateEndValue(&dec.scan, ' ') == scanEnd { + scanp++ + break Input + } + case scanError: + dec.err = dec.scan.err + return 0, dec.scan.err + } + } + + // Did the last read have an error? + // Delayed until now to allow buffer scan. + if err != nil { + if err == io.EOF { + if dec.scan.step(&dec.scan, ' ') == scanEnd { + break Input + } + if nonSpace(dec.buf) { + err = io.ErrUnexpectedEOF + } + } + dec.err = err + return 0, err + } + + n := scanp - dec.scanp + err = dec.refill() + scanp = dec.scanp + n + } + return scanp - dec.scanp, nil +} + +func (dec *Decoder) refill() error { + // Make room to read more into the buffer. + // First slide down data already consumed. + if dec.scanp > 0 { + dec.scanned += int64(dec.scanp) + n := copy(dec.buf, dec.buf[dec.scanp:]) + dec.buf = dec.buf[:n] + dec.scanp = 0 + } + + // Grow buffer if not large enough. + const minRead = 512 + if cap(dec.buf)-len(dec.buf) < minRead { + newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) + copy(newBuf, dec.buf) + dec.buf = newBuf + } + + // Read. Delay error for next iteration (after scan). + n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) + dec.buf = dec.buf[0 : len(dec.buf)+n] + + return err +} + +func nonSpace(b []byte) bool { + for _, c := range b { + if !isSpace(c) { + return true + } + } + return false +} + +// An Encoder writes JSON values to an output stream. +type Encoder struct { + w io.Writer + err error + escapeHTML bool + + indentBuf []byte + indentPrefix string + indentValue string +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{w: w, escapeHTML: true} +} + +// Encode writes the JSON encoding of v to the stream, +// followed by a newline character. +// +// See the documentation for [Marshal] for details about the +// conversion of Go values to JSON. +func (enc *Encoder) Encode(v any) error { + if enc.err != nil { + return enc.err + } + + e := newEncodeState() + defer encodeStatePool.Put(e) + + err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) + if err != nil { + return err + } + + // Terminate each value with a newline. + // This makes the output look a little nicer + // when debugging, and some kind of space + // is required if the encoded value was a number, + // so that the reader knows there aren't more + // digits coming. + e.WriteByte('\n') + + b := e.Bytes() + if enc.indentPrefix != "" || enc.indentValue != "" { + enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) + if err != nil { + return err + } + b = enc.indentBuf + } + if _, err = enc.w.Write(b); err != nil { + enc.err = err + } + return err +} + +// SetIndent instructs the encoder to format each subsequent encoded +// value as if indented by the package-level function Indent(dst, src, prefix, indent). +// Calling SetIndent("", "") disables indentation. +func (enc *Encoder) SetIndent(prefix, indent string) { + enc.indentPrefix = prefix + enc.indentValue = indent +} + +// SetEscapeHTML specifies whether problematic HTML characters +// should be escaped inside JSON quoted strings. +// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e +// to avoid certain safety problems that can arise when embedding JSON in HTML. +// +// In non-HTML settings where the escaping interferes with the readability +// of the output, SetEscapeHTML(false) disables this behavior. +func (enc *Encoder) SetEscapeHTML(on bool) { + enc.escapeHTML = on +} + +// RawMessage is a raw encoded JSON value. +// It implements [Marshaler] and [Unmarshaler] and can +// be used to delay JSON decoding or precompute a JSON encoding. +type RawMessage []byte + +// MarshalJSON returns m as the JSON encoding of m. +func (m RawMessage) MarshalJSON() ([]byte, error) { + if m == nil { + return []byte("null"), nil + } + return m, nil +} + +// UnmarshalJSON sets *m to a copy of data. +func (m *RawMessage) UnmarshalJSON(data []byte) error { + if m == nil { + return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") + } + *m = append((*m)[0:0], data...) + return nil +} + +var _ Marshaler = (*RawMessage)(nil) +var _ Unmarshaler = (*RawMessage)(nil) + +// A Token holds a value of one of these types: +// +// - [Delim], for the four JSON delimiters [ ] { } +// - bool, for JSON booleans +// - float64, for JSON numbers +// - [Number], for JSON numbers +// - string, for JSON string literals +// - nil, for JSON null +type Token any + +const ( + tokenTopValue = iota + tokenArrayStart + tokenArrayValue + tokenArrayComma + tokenObjectStart + tokenObjectKey + tokenObjectColon + tokenObjectValue + tokenObjectComma +) + +// advance tokenstate from a separator state to a value state +func (dec *Decoder) tokenPrepareForDecode() error { + // Note: Not calling peek before switch, to avoid + // putting peek into the standard Decode path. + // peek is only called when using the Token API. + switch dec.tokenState { + case tokenArrayComma: + c, err := dec.peek() + if err != nil { + return err + } + if c != ',' { + return &SyntaxError{"expected comma after array element", dec.InputOffset()} + } + dec.scanp++ + dec.tokenState = tokenArrayValue + case tokenObjectColon: + c, err := dec.peek() + if err != nil { + return err + } + if c != ':' { + return &SyntaxError{"expected colon after object key", dec.InputOffset()} + } + dec.scanp++ + dec.tokenState = tokenObjectValue + } + return nil +} + +func (dec *Decoder) tokenValueAllowed() bool { + switch dec.tokenState { + case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: + return true + } + return false +} + +func (dec *Decoder) tokenValueEnd() { + switch dec.tokenState { + case tokenArrayStart, tokenArrayValue: + dec.tokenState = tokenArrayComma + case tokenObjectValue: + dec.tokenState = tokenObjectComma + } +} + +// A Delim is a JSON array or object delimiter, one of [ ] { or }. +type Delim rune + +func (d Delim) String() string { + return string(d) +} + +// Token returns the next JSON token in the input stream. +// At the end of the input stream, Token returns nil, [io.EOF]. +// +// Token guarantees that the delimiters [ ] { } it returns are +// properly nested and matched: if Token encounters an unexpected +// delimiter in the input, it will return an error. +// +// The input stream consists of basic JSON values—bool, string, +// number, and null—along with delimiters [ ] { } of type [Delim] +// to mark the start and end of arrays and objects. +// Commas and colons are elided. +func (dec *Decoder) Token() (Token, error) { + for { + c, err := dec.peek() + if err != nil { + return nil, err + } + switch c { + case '[': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenArrayStart + return Delim('['), nil + + case ']': + if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim(']'), nil + + case '{': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenObjectStart + return Delim('{'), nil + + case '}': + if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim('}'), nil + + case ':': + if dec.tokenState != tokenObjectColon { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = tokenObjectValue + continue + + case ',': + if dec.tokenState == tokenArrayComma { + dec.scanp++ + dec.tokenState = tokenArrayValue + continue + } + if dec.tokenState == tokenObjectComma { + dec.scanp++ + dec.tokenState = tokenObjectKey + continue + } + return dec.tokenError(c) + + case '"': + if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { + var x string + old := dec.tokenState + dec.tokenState = tokenTopValue + err := dec.Decode(&x) + dec.tokenState = old + if err != nil { + return nil, err + } + dec.tokenState = tokenObjectColon + return x, nil + } + fallthrough + + default: + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + var x any + if err := dec.Decode(&x); err != nil { + return nil, err + } + return x, nil + } + } +} + +func (dec *Decoder) tokenError(c byte) (Token, error) { + var context string + switch dec.tokenState { + case tokenTopValue: + context = " looking for beginning of value" + case tokenArrayStart, tokenArrayValue, tokenObjectValue: + context = " looking for beginning of value" + case tokenArrayComma: + context = " after array element" + case tokenObjectKey: + context = " looking for beginning of object key string" + case tokenObjectColon: + context = " after object key" + case tokenObjectComma: + context = " after object key:value pair" + } + return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} +} + +// More reports whether there is another element in the +// current array or object being parsed. +func (dec *Decoder) More() bool { + c, err := dec.peek() + return err == nil && c != ']' && c != '}' +} + +func (dec *Decoder) peek() (byte, error) { + var err error + for { + for i := dec.scanp; i < len(dec.buf); i++ { + c := dec.buf[i] + if isSpace(c) { + continue + } + dec.scanp = i + return c, nil + } + // buffer has been scanned, now report any error + if err != nil { + return 0, err + } + err = dec.refill() + } +} + +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token +// and the beginning of the next token. +func (dec *Decoder) InputOffset() int64 { + return dec.scanned + int64(dec.scanp) +} diff --git a/src/encoding/json/stream_test.go b/src/encoding/json/stream_test.go new file mode 100644 index 0000000..32ede8c --- /dev/null +++ b/src/encoding/json/stream_test.go @@ -0,0 +1,522 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "fmt" + "io" + "log" + "net" + "net/http" + "net/http/httptest" + "path" + "reflect" + "runtime" + "runtime/debug" + "strings" + "testing" +) + +// TODO(https://go.dev/issue/52751): Replace with native testing support. + +// CaseName is a case name annotated with a file and line. +type CaseName struct { + Name string + Where CasePos +} + +// Name annotates a case name with the file and line of the caller. +func Name(s string) (c CaseName) { + c.Name = s + runtime.Callers(2, c.Where.pc[:]) + return c +} + +// CasePos represents a file and line number. +type CasePos struct{ pc [1]uintptr } + +func (pos CasePos) String() string { + frames := runtime.CallersFrames(pos.pc[:]) + frame, _ := frames.Next() + return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line) +} + +// Test values for the stream test. +// One of each JSON kind. +var streamTest = []any{ + 0.1, + "hello", + nil, + true, + false, + []any{"a", "b", "c"}, + map[string]any{"K": "Kelvin", "ß": "long s"}, + 3.14, // another value to make sure something can follow map +} + +var streamEncoded = `0.1 +"hello" +null +true +false +["a","b","c"] +{"ß":"long s","K":"Kelvin"} +3.14 +` + +func TestEncoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + var buf strings.Builder + enc := NewEncoder(&buf) + // Check that enc.SetIndent("", "") turns off indentation. + enc.SetIndent(">", ".") + enc.SetIndent("", "") + for j, v := range streamTest[0:i] { + if err := enc.Encode(v); err != nil { + t.Fatalf("#%d.%d Encode error: %v", i, j, err) + } + } + if have, want := buf.String(), nlines(streamEncoded, i); have != want { + t.Errorf("encoding %d items: mismatch:", i) + diff(t, []byte(have), []byte(want)) + break + } + } +} + +func TestEncoderErrorAndReuseEncodeState(t *testing.T) { + // Disable the GC temporarily to prevent encodeState's in Pool being cleaned away during the test. + percent := debug.SetGCPercent(-1) + defer debug.SetGCPercent(percent) + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(dummy); err == nil { + t.Errorf("Encode(dummy) error: got nil, want non-nil") + } + + type Data struct { + A string + I int + } + want := Data{A: "a", I: 1} + if err := enc.Encode(want); err != nil { + t.Errorf("Marshal error: %v", err) + } + + var got Data + if err := Unmarshal(buf.Bytes(), &got); err != nil { + t.Errorf("Unmarshal error: %v", err) + } + if got != want { + t.Errorf("Marshal/Unmarshal roundtrip:\n\tgot: %v\n\twant: %v", got, want) + } +} + +var streamEncodedIndent = `0.1 +"hello" +null +true +false +[ +>."a", +>."b", +>."c" +>] +{ +>."ß": "long s", +>."K": "Kelvin" +>} +3.14 +` + +func TestEncoderIndent(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + enc.SetIndent(">", ".") + for _, v := range streamTest { + enc.Encode(v) + } + if have, want := buf.String(), streamEncodedIndent; have != want { + t.Error("Encode mismatch:") + diff(t, []byte(have), []byte(want)) + } +} + +type strMarshaler string + +func (s strMarshaler) MarshalJSON() ([]byte, error) { + return []byte(s), nil +} + +type strPtrMarshaler string + +func (s *strPtrMarshaler) MarshalJSON() ([]byte, error) { + return []byte(*s), nil +} + +func TestEncoderSetEscapeHTML(t *testing.T) { + var c C + var ct CText + var tagStruct struct { + Valid int `json:"<>&#! "` + Invalid int `json:"\\"` + } + + // This case is particularly interesting, as we force the encoder to + // take the address of the Ptr field to use its MarshalJSON method. This + // is why the '&' is important. + marshalerStruct := &struct { + NonPtr strMarshaler + Ptr strPtrMarshaler + }{`"<str>"`, `"<str>"`} + + // https://golang.org/issue/34154 + stringOption := struct { + Bar string `json:"bar,string"` + }{`<html>foobar</html>`} + + tests := []struct { + CaseName + v any + wantEscape string + want string + }{ + {Name("c"), c, `"\u003c\u0026\u003e"`, `"<&>"`}, + {Name("ct"), ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`}, + {Name(`"<&>"`), "<&>", `"\u003c\u0026\u003e"`, `"<&>"`}, + { + Name("tagStruct"), tagStruct, + `{"\u003c\u003e\u0026#! ":0,"Invalid":0}`, + `{"<>&#! ":0,"Invalid":0}`, + }, + { + Name(`"<str>"`), marshalerStruct, + `{"NonPtr":"\u003cstr\u003e","Ptr":"\u003cstr\u003e"}`, + `{"NonPtr":"<str>","Ptr":"<str>"}`, + }, + { + Name("stringOption"), stringOption, + `{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`, + `{"bar":"\"<html>foobar</html>\""}`, + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.Encode(tt.v); err != nil { + t.Fatalf("%s: Encode(%s) error: %s", tt.Where, tt.Name, err) + } + if got := strings.TrimSpace(buf.String()); got != tt.wantEscape { + t.Errorf("%s: Encode(%s):\n\tgot: %s\n\twant: %s", tt.Where, tt.Name, got, tt.wantEscape) + } + buf.Reset() + enc.SetEscapeHTML(false) + if err := enc.Encode(tt.v); err != nil { + t.Fatalf("%s: SetEscapeHTML(false) Encode(%s) error: %s", tt.Where, tt.Name, err) + } + if got := strings.TrimSpace(buf.String()); got != tt.want { + t.Errorf("%s: SetEscapeHTML(false) Encode(%s):\n\tgot: %s\n\twant: %s", + tt.Where, tt.Name, got, tt.want) + } + }) + } +} + +func TestDecoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + // Use stream without newlines as input, + // just to stress the decoder even more. + // Our test input does not include back-to-back numbers. + // Otherwise stripping the newlines would + // merge two adjacent JSON values. + var buf bytes.Buffer + for _, c := range nlines(streamEncoded, i) { + if c != '\n' { + buf.WriteRune(c) + } + } + out := make([]any, i) + dec := NewDecoder(&buf) + for j := range out { + if err := dec.Decode(&out[j]); err != nil { + t.Fatalf("decode #%d/%d error: %v", j, i, err) + } + } + if !reflect.DeepEqual(out, streamTest[0:i]) { + t.Errorf("decoding %d items: mismatch:", i) + for j := range out { + if !reflect.DeepEqual(out[j], streamTest[j]) { + t.Errorf("#%d:\n\tgot: %v\n\twant: %v", j, out[j], streamTest[j]) + } + } + break + } + } +} + +func TestDecoderBuffered(t *testing.T) { + r := strings.NewReader(`{"Name": "Gopher"} extra `) + var m struct { + Name string + } + d := NewDecoder(r) + err := d.Decode(&m) + if err != nil { + t.Fatal(err) + } + if m.Name != "Gopher" { + t.Errorf("Name = %s, want Gopher", m.Name) + } + rest, err := io.ReadAll(d.Buffered()) + if err != nil { + t.Fatal(err) + } + if got, want := string(rest), " extra "; got != want { + t.Errorf("Remaining = %s, want %s", got, want) + } +} + +func nlines(s string, n int) string { + if n <= 0 { + return "" + } + for i, c := range s { + if c == '\n' { + if n--; n == 0 { + return s[0 : i+1] + } + } + } + return s +} + +func TestRawMessage(t *testing.T) { + var data struct { + X float64 + Id RawMessage + Y float32 + } + const raw = `["\u0056",null]` + const want = `{"X":0.1,"Id":["\u0056",null],"Y":0.2}` + err := Unmarshal([]byte(want), &data) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if string([]byte(data.Id)) != raw { + t.Fatalf("Unmarshal:\n\tgot: %s\n\twant: %s", []byte(data.Id), raw) + } + got, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestNullRawMessage(t *testing.T) { + var data struct { + X float64 + Id RawMessage + IdPtr *RawMessage + Y float32 + } + const want = `{"X":0.1,"Id":null,"IdPtr":null,"Y":0.2}` + err := Unmarshal([]byte(want), &data) + if err != nil { + t.Fatalf("Unmarshal error: %v", err) + } + if want, got := "null", string(data.Id); want != got { + t.Fatalf("Unmarshal:\n\tgot: %s\n\twant: %s", got, want) + } + if data.IdPtr != nil { + t.Fatalf("pointer mismatch: got non-nil, want nil") + } + got, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if string(got) != want { + t.Fatalf("Marshal:\n\tgot: %s\n\twant: %s", got, want) + } +} + +func TestBlocking(t *testing.T) { + tests := []struct { + CaseName + in string + }{ + {Name(""), `{"x": 1}`}, + {Name(""), `[1, 2, 3]`}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + r, w := net.Pipe() + go w.Write([]byte(tt.in)) + var val any + + // If Decode reads beyond what w.Write writes above, + // it will block, and the test will deadlock. + if err := NewDecoder(r).Decode(&val); err != nil { + t.Errorf("%s: NewDecoder(%s).Decode error: %v", tt.Where, tt.in, err) + } + r.Close() + w.Close() + }) + } +} + +type decodeThis struct { + v any +} + +func TestDecodeInStream(t *testing.T) { + tests := []struct { + CaseName + json string + expTokens []any + }{ + // streaming token cases + {CaseName: Name(""), json: `10`, expTokens: []any{float64(10)}}, + {CaseName: Name(""), json: ` [10] `, expTokens: []any{ + Delim('['), float64(10), Delim(']')}}, + {CaseName: Name(""), json: ` [false,10,"b"] `, expTokens: []any{ + Delim('['), false, float64(10), "b", Delim(']')}}, + {CaseName: Name(""), json: `{ "a": 1 }`, expTokens: []any{ + Delim('{'), "a", float64(1), Delim('}')}}, + {CaseName: Name(""), json: `{"a": 1, "b":"3"}`, expTokens: []any{ + Delim('{'), "a", float64(1), "b", "3", Delim('}')}}, + {CaseName: Name(""), json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{ + Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim('{'), "a", float64(2), Delim('}'), + Delim(']')}}, + {CaseName: Name(""), json: `{"obj": {"a": 1}}`, expTokens: []any{ + Delim('{'), "obj", Delim('{'), "a", float64(1), Delim('}'), + Delim('}')}}, + {CaseName: Name(""), json: `{"obj": [{"a": 1}]}`, expTokens: []any{ + Delim('{'), "obj", Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim(']'), Delim('}')}}, + + // streaming tokens with intermittent Decode() + {CaseName: Name(""), json: `{ "a": 1 }`, expTokens: []any{ + Delim('{'), "a", + decodeThis{float64(1)}, + Delim('}')}}, + {CaseName: Name(""), json: ` [ { "a" : 1 } ] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + Delim(']')}}, + {CaseName: Name(""), json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + decodeThis{map[string]any{"a": float64(2)}}, + Delim(']')}}, + {CaseName: Name(""), json: `{ "obj" : [ { "a" : 1 } ] }`, expTokens: []any{ + Delim('{'), "obj", Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + Delim(']'), Delim('}')}}, + + {CaseName: Name(""), json: `{"obj": {"a": 1}}`, expTokens: []any{ + Delim('{'), "obj", + decodeThis{map[string]any{"a": float64(1)}}, + Delim('}')}}, + {CaseName: Name(""), json: `{"obj": [{"a": 1}]}`, expTokens: []any{ + Delim('{'), "obj", + decodeThis{[]any{ + map[string]any{"a": float64(1)}, + }}, + Delim('}')}}, + {CaseName: Name(""), json: ` [{"a": 1} {"a": 2}] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + decodeThis{&SyntaxError{"expected comma after array element", 11}}, + }}, + {CaseName: Name(""), json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []any{ + Delim('{'), strings.Repeat("a", 513), + decodeThis{&SyntaxError{"expected colon after object key", 518}}, + }}, + {CaseName: Name(""), json: `{ "\a" }`, expTokens: []any{ + Delim('{'), + &SyntaxError{"invalid character 'a' in string escape code", 3}, + }}, + {CaseName: Name(""), json: ` \a`, expTokens: []any{ + &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1}, + }}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + dec := NewDecoder(strings.NewReader(tt.json)) + for i, want := range tt.expTokens { + var got any + var err error + + if dt, ok := want.(decodeThis); ok { + want = dt.v + err = dec.Decode(&got) + } else { + got, err = dec.Token() + } + if errWant, ok := want.(error); ok { + if err == nil || !reflect.DeepEqual(err, errWant) { + t.Fatalf("%s:\n\tinput: %s\n\tgot error: %v\n\twant error: %v", tt.Where, tt.json, err, errWant) + } + break + } else if err != nil { + t.Fatalf("%s:\n\tinput: %s\n\tgot error: %v\n\twant error: nil", tt.Where, tt.json, err) + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("%s: token %d:\n\tinput: %s\n\tgot: %T(%v)\n\twant: %T(%v)", tt.Where, i, tt.json, got, got, want, want) + } + } + }) + } +} + +// Test from golang.org/issue/11893 +func TestHTTPDecoding(t *testing.T) { + const raw = `{ "foo": "bar" }` + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(raw)) + })) + defer ts.Close() + res, err := http.Get(ts.URL) + if err != nil { + log.Fatalf("http.Get error: %v", err) + } + defer res.Body.Close() + + foo := struct { + Foo string + }{} + + d := NewDecoder(res.Body) + err = d.Decode(&foo) + if err != nil { + t.Fatalf("Decode error: %v", err) + } + if foo.Foo != "bar" { + t.Errorf(`Decode: got %q, want "bar"`, foo.Foo) + } + + // make sure we get the EOF the second time + err = d.Decode(&foo) + if err != io.EOF { + t.Errorf("Decode error:\n\tgot: %v\n\twant: io.EOF", err) + } +} diff --git a/src/encoding/json/tables.go b/src/encoding/json/tables.go new file mode 100644 index 0000000..10acdc1 --- /dev/null +++ b/src/encoding/json/tables.go @@ -0,0 +1,218 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML <script> tags, without any additional escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), the backslash character ("\"), HTML opening and closing +// tags ("<" and ">"), and the ampersand ("&"). +var htmlSafeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': false, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': false, + '=': true, + '>': false, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} diff --git a/src/encoding/json/tagkey_test.go b/src/encoding/json/tagkey_test.go new file mode 100644 index 0000000..d432cd7 --- /dev/null +++ b/src/encoding/json/tagkey_test.go @@ -0,0 +1,121 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "testing" +) + +type basicLatin2xTag struct { + V string `json:"$%-/"` +} + +type basicLatin3xTag struct { + V string `json:"0123456789"` +} + +type basicLatin4xTag struct { + V string `json:"ABCDEFGHIJKLMO"` +} + +type basicLatin5xTag struct { + V string `json:"PQRSTUVWXYZ_"` +} + +type basicLatin6xTag struct { + V string `json:"abcdefghijklmno"` +} + +type basicLatin7xTag struct { + V string `json:"pqrstuvwxyz"` +} + +type miscPlaneTag struct { + V string `json:"色は匂へど"` +} + +type percentSlashTag struct { + V string `json:"text/html%"` // https://golang.org/issue/2718 +} + +type punctuationTag struct { + V string `json:"!#$%&()*+-./:;<=>?@[]^_{|}~ "` // https://golang.org/issue/3546 +} + +type dashTag struct { + V string `json:"-,"` +} + +type emptyTag struct { + W string +} + +type misnamedTag struct { + X string `jsom:"Misnamed"` +} + +type badFormatTag struct { + Y string `:"BadFormat"` +} + +type badCodeTag struct { + Z string `json:" !\"#&'()*+,."` +} + +type spaceTag struct { + Q string `json:"With space"` +} + +type unicodeTag struct { + W string `json:"Ελλάδα"` +} + +func TestStructTagObjectKey(t *testing.T) { + tests := []struct { + CaseName + raw any + value string + key string + }{ + {Name(""), basicLatin2xTag{"2x"}, "2x", "$%-/"}, + {Name(""), basicLatin3xTag{"3x"}, "3x", "0123456789"}, + {Name(""), basicLatin4xTag{"4x"}, "4x", "ABCDEFGHIJKLMO"}, + {Name(""), basicLatin5xTag{"5x"}, "5x", "PQRSTUVWXYZ_"}, + {Name(""), basicLatin6xTag{"6x"}, "6x", "abcdefghijklmno"}, + {Name(""), basicLatin7xTag{"7x"}, "7x", "pqrstuvwxyz"}, + {Name(""), miscPlaneTag{"いろはにほへと"}, "いろはにほへと", "色は匂へど"}, + {Name(""), dashTag{"foo"}, "foo", "-"}, + {Name(""), emptyTag{"Pour Moi"}, "Pour Moi", "W"}, + {Name(""), misnamedTag{"Animal Kingdom"}, "Animal Kingdom", "X"}, + {Name(""), badFormatTag{"Orfevre"}, "Orfevre", "Y"}, + {Name(""), badCodeTag{"Reliable Man"}, "Reliable Man", "Z"}, + {Name(""), percentSlashTag{"brut"}, "brut", "text/html%"}, + {Name(""), punctuationTag{"Union Rags"}, "Union Rags", "!#$%&()*+-./:;<=>?@[]^_{|}~ "}, + {Name(""), spaceTag{"Perreddu"}, "Perreddu", "With space"}, + {Name(""), unicodeTag{"Loukanikos"}, "Loukanikos", "Ελλάδα"}, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + b, err := Marshal(tt.raw) + if err != nil { + t.Fatalf("%s: Marshal error: %v", tt.Where, err) + } + var f any + err = Unmarshal(b, &f) + if err != nil { + t.Fatalf("%s: Unmarshal error: %v", tt.Where, err) + } + for k, v := range f.(map[string]any) { + if k == tt.key { + if s, ok := v.(string); !ok || s != tt.value { + t.Fatalf("%s: Unmarshal(%#q) value:\n\tgot: %q\n\twant: %q", tt.Where, b, s, tt.value) + } + } else { + t.Fatalf("%s: Unmarshal(%#q): unexpected key: %q", tt.Where, b, k) + } + } + }) + } +} diff --git a/src/encoding/json/tags.go b/src/encoding/json/tags.go new file mode 100644 index 0000000..b490328 --- /dev/null +++ b/src/encoding/json/tags.go @@ -0,0 +1,38 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "strings" +) + +// tagOptions is the string following a comma in a struct field's "json" +// tag, or the empty string. It does not include the leading comma. +type tagOptions string + +// parseTag splits a struct field's json tag into its name and +// comma-separated options. +func parseTag(tag string) (string, tagOptions) { + tag, opt, _ := strings.Cut(tag, ",") + return tag, tagOptions(opt) +} + +// Contains reports whether a comma-separated list of options +// contains a particular substr flag. substr must be surrounded by a +// string boundary or commas. +func (o tagOptions) Contains(optionName string) bool { + if len(o) == 0 { + return false + } + s := string(o) + for s != "" { + var name string + name, s, _ = strings.Cut(s, ",") + if name == optionName { + return true + } + } + return false +} diff --git a/src/encoding/json/tags_test.go b/src/encoding/json/tags_test.go new file mode 100644 index 0000000..1d2323d --- /dev/null +++ b/src/encoding/json/tags_test.go @@ -0,0 +1,28 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "testing" +) + +func TestTagParsing(t *testing.T) { + name, opts := parseTag("field,foobar,foo") + if name != "field" { + t.Fatalf("name = %q, want field", name) + } + for _, tt := range []struct { + opt string + want bool + }{ + {"foobar", true}, + {"foo", true}, + {"bar", false}, + } { + if opts.Contains(tt.opt) != tt.want { + t.Errorf("Contains(%q) = %v, want %v", tt.opt, !tt.want, tt.want) + } + } +} diff --git a/src/encoding/json/testdata/code.json.gz b/src/encoding/json/testdata/code.json.gz Binary files differnew file mode 100644 index 0000000..1572a92 --- /dev/null +++ b/src/encoding/json/testdata/code.json.gz diff --git a/src/encoding/pem/example_test.go b/src/encoding/pem/example_test.go new file mode 100644 index 0000000..806e7bb --- /dev/null +++ b/src/encoding/pem/example_test.go @@ -0,0 +1,65 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pem_test + +import ( + "crypto/x509" + "encoding/pem" + "fmt" + "log" + "os" +) + +func ExampleDecode() { + var pubPEMData = []byte(` +-----BEGIN PUBLIC KEY----- +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAlRuRnThUjU8/prwYxbty +WPT9pURI3lbsKMiB6Fn/VHOKE13p4D8xgOCADpdRagdT6n4etr9atzDKUSvpMtR3 +CP5noNc97WiNCggBjVWhs7szEe8ugyqF23XwpHQ6uV1LKH50m92MbOWfCtjU9p/x +qhNpQQ1AZhqNy5Gevap5k8XzRmjSldNAFZMY7Yv3Gi+nyCwGwpVtBUwhuLzgNFK/ +yDtw2WcWmUU7NuC8Q6MWvPebxVtCfVp/iQU6q60yyt6aGOBkhAX0LpKAEhKidixY +nP9PNVBvxgu3XZ4P36gZV6+ummKdBVnc3NqwBLu5+CcdRdusmHPHd5pHf4/38Z3/ +6qU2a/fPvWzceVTEgZ47QjFMTCTmCwNt29cvi7zZeQzjtwQgn4ipN9NibRH/Ax/q +TbIzHfrJ1xa2RteWSdFjwtxi9C20HUkjXSeI4YlzQMH0fPX6KCE7aVePTOnB69I/ +a9/q96DiXZajwlpq3wFctrs1oXqBp5DVrCIj8hU2wNgB7LtQ1mCtsYz//heai0K9 +PhE4X6hiE0YmeAZjR0uHl8M/5aW9xCoJ72+12kKpWAa0SFRWLy6FejNYCYpkupVJ +yecLk/4L1W0l6jQQZnWErXZYe0PNFcmwGXy1Rep83kfBRNKRy5tvocalLlwXLdUk +AIU+2GKjyT3iMuzZxxFxPFMCAwEAAQ== +-----END PUBLIC KEY----- +and some more`) + + block, rest := pem.Decode(pubPEMData) + if block == nil || block.Type != "PUBLIC KEY" { + log.Fatal("failed to decode PEM block containing public key") + } + + pub, err := x509.ParsePKIXPublicKey(block.Bytes) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Got a %T, with remaining data: %q", pub, rest) + // Output: Got a *rsa.PublicKey, with remaining data: "and some more" +} + +func ExampleEncode() { + block := &pem.Block{ + Type: "MESSAGE", + Headers: map[string]string{ + "Animal": "Gopher", + }, + Bytes: []byte("test"), + } + + if err := pem.Encode(os.Stdout, block); err != nil { + log.Fatal(err) + } + // Output: + // -----BEGIN MESSAGE----- + // Animal: Gopher + // + // dGVzdA== + // -----END MESSAGE----- +} diff --git a/src/encoding/pem/pem.go b/src/encoding/pem/pem.go new file mode 100644 index 0000000..4b4f749 --- /dev/null +++ b/src/encoding/pem/pem.go @@ -0,0 +1,316 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pem implements the PEM data encoding, which originated in Privacy +// Enhanced Mail. The most common use of PEM encoding today is in TLS keys and +// certificates. See RFC 1421. +package pem + +import ( + "bytes" + "encoding/base64" + "errors" + "io" + "sort" + "strings" +) + +// A Block represents a PEM encoded structure. +// +// The encoded form is: +// +// -----BEGIN Type----- +// Headers +// base64-encoded Bytes +// -----END Type----- +// +// where [Block.Headers] is a possibly empty sequence of Key: Value lines. +type Block struct { + Type string // The type, taken from the preamble (i.e. "RSA PRIVATE KEY"). + Headers map[string]string // Optional headers. + Bytes []byte // The decoded bytes of the contents. Typically a DER encoded ASN.1 structure. +} + +// getLine results the first \r\n or \n delineated line from the given byte +// array. The line does not include trailing whitespace or the trailing new +// line bytes. The remainder of the byte array (also not including the new line +// bytes) is also returned and this will always be smaller than the original +// argument. +func getLine(data []byte) (line, rest []byte) { + i := bytes.IndexByte(data, '\n') + var j int + if i < 0 { + i = len(data) + j = i + } else { + j = i + 1 + if i > 0 && data[i-1] == '\r' { + i-- + } + } + return bytes.TrimRight(data[0:i], " \t"), data[j:] +} + +// removeSpacesAndTabs returns a copy of its input with all spaces and tabs +// removed, if there were any. Otherwise, the input is returned unchanged. +// +// The base64 decoder already skips newline characters, so we don't need to +// filter them out here. +func removeSpacesAndTabs(data []byte) []byte { + if !bytes.ContainsAny(data, " \t") { + // Fast path; most base64 data within PEM contains newlines, but + // no spaces nor tabs. Skip the extra alloc and work. + return data + } + result := make([]byte, len(data)) + n := 0 + + for _, b := range data { + if b == ' ' || b == '\t' { + continue + } + result[n] = b + n++ + } + + return result[0:n] +} + +var pemStart = []byte("\n-----BEGIN ") +var pemEnd = []byte("\n-----END ") +var pemEndOfLine = []byte("-----") +var colon = []byte(":") + +// Decode will find the next PEM formatted block (certificate, private key +// etc) in the input. It returns that block and the remainder of the input. If +// no PEM data is found, p is nil and the whole of the input is returned in +// rest. +func Decode(data []byte) (p *Block, rest []byte) { + // pemStart begins with a newline. However, at the very beginning of + // the byte array, we'll accept the start string without it. + rest = data + for { + if bytes.HasPrefix(rest, pemStart[1:]) { + rest = rest[len(pemStart)-1:] + } else if _, after, ok := bytes.Cut(rest, pemStart); ok { + rest = after + } else { + return nil, data + } + + var typeLine []byte + typeLine, rest = getLine(rest) + if !bytes.HasSuffix(typeLine, pemEndOfLine) { + continue + } + typeLine = typeLine[0 : len(typeLine)-len(pemEndOfLine)] + + p = &Block{ + Headers: make(map[string]string), + Type: string(typeLine), + } + + for { + // This loop terminates because getLine's second result is + // always smaller than its argument. + if len(rest) == 0 { + return nil, data + } + line, next := getLine(rest) + + key, val, ok := bytes.Cut(line, colon) + if !ok { + break + } + + // TODO(agl): need to cope with values that spread across lines. + key = bytes.TrimSpace(key) + val = bytes.TrimSpace(val) + p.Headers[string(key)] = string(val) + rest = next + } + + var endIndex, endTrailerIndex int + + // If there were no headers, the END line might occur + // immediately, without a leading newline. + if len(p.Headers) == 0 && bytes.HasPrefix(rest, pemEnd[1:]) { + endIndex = 0 + endTrailerIndex = len(pemEnd) - 1 + } else { + endIndex = bytes.Index(rest, pemEnd) + endTrailerIndex = endIndex + len(pemEnd) + } + + if endIndex < 0 { + continue + } + + // After the "-----" of the ending line, there should be the same type + // and then a final five dashes. + endTrailer := rest[endTrailerIndex:] + endTrailerLen := len(typeLine) + len(pemEndOfLine) + if len(endTrailer) < endTrailerLen { + continue + } + + restOfEndLine := endTrailer[endTrailerLen:] + endTrailer = endTrailer[:endTrailerLen] + if !bytes.HasPrefix(endTrailer, typeLine) || + !bytes.HasSuffix(endTrailer, pemEndOfLine) { + continue + } + + // The line must end with only whitespace. + if s, _ := getLine(restOfEndLine); len(s) != 0 { + continue + } + + base64Data := removeSpacesAndTabs(rest[:endIndex]) + p.Bytes = make([]byte, base64.StdEncoding.DecodedLen(len(base64Data))) + n, err := base64.StdEncoding.Decode(p.Bytes, base64Data) + if err != nil { + continue + } + p.Bytes = p.Bytes[:n] + + // the -1 is because we might have only matched pemEnd without the + // leading newline if the PEM block was empty. + _, rest = getLine(rest[endIndex+len(pemEnd)-1:]) + return p, rest + } +} + +const pemLineLength = 64 + +type lineBreaker struct { + line [pemLineLength]byte + used int + out io.Writer +} + +var nl = []byte{'\n'} + +func (l *lineBreaker) Write(b []byte) (n int, err error) { + if l.used+len(b) < pemLineLength { + copy(l.line[l.used:], b) + l.used += len(b) + return len(b), nil + } + + n, err = l.out.Write(l.line[0:l.used]) + if err != nil { + return + } + excess := pemLineLength - l.used + l.used = 0 + + n, err = l.out.Write(b[0:excess]) + if err != nil { + return + } + + n, err = l.out.Write(nl) + if err != nil { + return + } + + return l.Write(b[excess:]) +} + +func (l *lineBreaker) Close() (err error) { + if l.used > 0 { + _, err = l.out.Write(l.line[0:l.used]) + if err != nil { + return + } + _, err = l.out.Write(nl) + } + + return +} + +func writeHeader(out io.Writer, k, v string) error { + _, err := out.Write([]byte(k + ": " + v + "\n")) + return err +} + +// Encode writes the PEM encoding of b to out. +func Encode(out io.Writer, b *Block) error { + // Check for invalid block before writing any output. + for k := range b.Headers { + if strings.Contains(k, ":") { + return errors.New("pem: cannot encode a header key that contains a colon") + } + } + + // All errors below are relayed from underlying io.Writer, + // so it is now safe to write data. + + if _, err := out.Write(pemStart[1:]); err != nil { + return err + } + if _, err := out.Write([]byte(b.Type + "-----\n")); err != nil { + return err + } + + if len(b.Headers) > 0 { + const procType = "Proc-Type" + h := make([]string, 0, len(b.Headers)) + hasProcType := false + for k := range b.Headers { + if k == procType { + hasProcType = true + continue + } + h = append(h, k) + } + // The Proc-Type header must be written first. + // See RFC 1421, section 4.6.1.1 + if hasProcType { + if err := writeHeader(out, procType, b.Headers[procType]); err != nil { + return err + } + } + // For consistency of output, write other headers sorted by key. + sort.Strings(h) + for _, k := range h { + if err := writeHeader(out, k, b.Headers[k]); err != nil { + return err + } + } + if _, err := out.Write(nl); err != nil { + return err + } + } + + var breaker lineBreaker + breaker.out = out + + b64 := base64.NewEncoder(base64.StdEncoding, &breaker) + if _, err := b64.Write(b.Bytes); err != nil { + return err + } + b64.Close() + breaker.Close() + + if _, err := out.Write(pemEnd[1:]); err != nil { + return err + } + _, err := out.Write([]byte(b.Type + "-----\n")) + return err +} + +// EncodeToMemory returns the PEM encoding of b. +// +// If b has invalid headers and cannot be encoded, +// EncodeToMemory returns nil. If it is important to +// report details about this error case, use [Encode] instead. +func EncodeToMemory(b *Block) []byte { + var buf bytes.Buffer + if err := Encode(&buf, b); err != nil { + return nil + } + return buf.Bytes() +} diff --git a/src/encoding/pem/pem_test.go b/src/encoding/pem/pem_test.go new file mode 100644 index 0000000..56a7754 --- /dev/null +++ b/src/encoding/pem/pem_test.go @@ -0,0 +1,640 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pem + +import ( + "bytes" + "io" + "reflect" + "strings" + "testing" + "testing/quick" +) + +type GetLineTest struct { + in, out1, out2 string +} + +var getLineTests = []GetLineTest{ + {"abc", "abc", ""}, + {"abc\r", "abc\r", ""}, + {"abc\n", "abc", ""}, + {"abc\r\n", "abc", ""}, + {"abc\nd", "abc", "d"}, + {"abc\r\nd", "abc", "d"}, + {"\nabc", "", "abc"}, + {"\r\nabc", "", "abc"}, + {"abc\t \nd", "abc", "d"}, + {"\t abc\nd", "\t abc", "d"}, + {"abc\n\t d", "abc", "\t d"}, + {"abc\nd\t ", "abc", "d\t "}, +} + +func TestGetLine(t *testing.T) { + for i, test := range getLineTests { + x, y := getLine([]byte(test.in)) + if string(x) != test.out1 || string(y) != test.out2 { + t.Errorf("#%d got:%+v,%+v want:%s,%s", i, x, y, test.out1, test.out2) + } + } +} + +func TestDecode(t *testing.T) { + result, remainder := Decode([]byte(pemData)) + if !reflect.DeepEqual(result, certificate) { + t.Errorf("#0 got:%#v want:%#v", result, certificate) + } + result, remainder = Decode(remainder) + if !reflect.DeepEqual(result, privateKey) { + t.Errorf("#1 got:%#v want:%#v", result, privateKey) + } + + isEmpty := func(block *Block) bool { + return block != nil && block.Type == "EMPTY" && len(block.Headers) == 0 && len(block.Bytes) == 0 + } + result, remainder = Decode(remainder) + if !isEmpty(result) { + t.Errorf("#2 should be empty but got:%#v", result) + } + result, remainder = Decode(remainder) + if !isEmpty(result) { + t.Errorf("#3 should be empty but got:%#v", result) + } + result, remainder = Decode(remainder) + if !isEmpty(result) { + t.Errorf("#4 should be empty but got:%#v", result) + } + + result, remainder = Decode(remainder) + if result == nil || result.Type != "HEADERS" || len(result.Headers) != 1 { + t.Errorf("#5 expected single header block but got :%v", result) + } + + if len(remainder) != 0 { + t.Errorf("expected nothing remaining of pemData, but found %s", string(remainder)) + } + + result, _ = Decode([]byte(pemPrivateKey2)) + if !reflect.DeepEqual(result, privateKey2) { + t.Errorf("#2 got:%#v want:%#v", result, privateKey2) + } +} + +const pemTooFewEndingDashes = ` +-----BEGIN FOO----- +dGVzdA== +-----END FOO----` + +const pemTooManyEndingDashes = ` +-----BEGIN FOO----- +dGVzdA== +-----END FOO------` + +const pemTrailingNonWhitespace = ` +-----BEGIN FOO----- +dGVzdA== +-----END FOO----- .` + +const pemWrongEndingType = ` +-----BEGIN FOO----- +dGVzdA== +-----END BAR-----` + +const pemMissingEndingSpace = ` +-----BEGIN FOO----- +dGVzdA== +-----ENDBAR-----` + +const pemMissingEndLine = ` +-----BEGIN FOO----- +Header: 1` + +var pemRepeatingBegin = strings.Repeat("-----BEGIN \n", 10) + +var badPEMTests = []struct { + name string + input string +}{ + { + "too few trailing dashes", + pemTooFewEndingDashes, + }, + { + "too many trailing dashes", + pemTooManyEndingDashes, + }, + { + "trailing non-whitespace", + pemTrailingNonWhitespace, + }, + { + "incorrect ending type", + pemWrongEndingType, + }, + { + "missing ending space", + pemMissingEndingSpace, + }, + { + "repeating begin", + pemRepeatingBegin, + }, + { + "missing end line", + pemMissingEndLine, + }, +} + +func TestBadDecode(t *testing.T) { + for _, test := range badPEMTests { + result, rest := Decode([]byte(test.input)) + if result != nil { + t.Errorf("unexpected success while parsing %q", test.name) + } + if string(rest) != test.input { + t.Errorf("unexpected rest: %q; want = %q", rest, test.input) + } + } +} + +func TestCVE202224675(t *testing.T) { + // Prior to CVE-2022-24675, this input would cause a stack overflow. + input := []byte(strings.Repeat("-----BEGIN \n", 10000000)) + result, rest := Decode(input) + if result != nil || !reflect.DeepEqual(rest, input) { + t.Errorf("Encode of %#v decoded as %#v", input, rest) + } +} + +func TestEncode(t *testing.T) { + r := EncodeToMemory(privateKey2) + if string(r) != pemPrivateKey2 { + t.Errorf("got:%s want:%s", r, pemPrivateKey2) + } +} + +type lineBreakerTest struct { + in, out string +} + +const sixtyFourCharString = "0123456789012345678901234567890123456789012345678901234567890123" + +var lineBreakerTests = []lineBreakerTest{ + {"", ""}, + {"a", "a\n"}, + {"ab", "ab\n"}, + {sixtyFourCharString, sixtyFourCharString + "\n"}, + {sixtyFourCharString + "X", sixtyFourCharString + "\nX\n"}, + {sixtyFourCharString + sixtyFourCharString, sixtyFourCharString + "\n" + sixtyFourCharString + "\n"}, +} + +func TestLineBreaker(t *testing.T) { + for i, test := range lineBreakerTests { + buf := new(strings.Builder) + var breaker lineBreaker + breaker.out = buf + _, err := breaker.Write([]byte(test.in)) + if err != nil { + t.Errorf("#%d: error from Write: %s", i, err) + continue + } + err = breaker.Close() + if err != nil { + t.Errorf("#%d: error from Close: %s", i, err) + continue + } + + if got := buf.String(); got != test.out { + t.Errorf("#%d: got:%s want:%s", i, got, test.out) + } + } + + for i, test := range lineBreakerTests { + buf := new(strings.Builder) + var breaker lineBreaker + breaker.out = buf + + for i := 0; i < len(test.in); i++ { + _, err := breaker.Write([]byte(test.in[i : i+1])) + if err != nil { + t.Errorf("#%d: error from Write (byte by byte): %s", i, err) + continue + } + } + err := breaker.Close() + if err != nil { + t.Errorf("#%d: error from Close (byte by byte): %s", i, err) + continue + } + + if got := buf.String(); got != test.out { + t.Errorf("#%d: (byte by byte) got:%s want:%s", i, got, test.out) + } + } +} + +func TestFuzz(t *testing.T) { + // PEM is a text-based format. Assume header fields with leading/trailing spaces + // or embedded newlines will not round trip correctly and don't need to be tested. + isBad := func(s string) bool { + return strings.ContainsAny(s, "\r\n") || strings.TrimSpace(s) != s + } + + testRoundtrip := func(block Block) bool { + // Reject bad Type + // Type with colons will proceed as key/val pair and cause an error. + if isBad(block.Type) || strings.Contains(block.Type, ":") { + return true + } + for key, val := range block.Headers { + // Reject bad key/val. + // Also, keys with colons cannot be encoded, because : is the key: val separator. + if isBad(key) || isBad(val) || strings.Contains(key, ":") { + return true + } + } + + var buf bytes.Buffer + if err := Encode(&buf, &block); err != nil { + t.Errorf("Encode of %#v resulted in error: %s", &block, err) + return false + } + decoded, rest := Decode(buf.Bytes()) + if block.Headers == nil { + // Encoder supports nil Headers but decoder returns initialized. + block.Headers = make(map[string]string) + } + if block.Bytes == nil { + // Encoder supports nil Bytes but decoder returns initialized. + block.Bytes = make([]byte, 0) + } + if !reflect.DeepEqual(decoded, &block) { + t.Errorf("Encode of %#v decoded as %#v", &block, decoded) + return false + } + if len(rest) != 0 { + t.Errorf("Encode of %#v decoded correctly, but with %x left over", block, rest) + return false + } + return true + } + + // Explicitly test the empty block. + if !testRoundtrip(Block{ + Type: "EMPTY", + Headers: make(map[string]string), + Bytes: []byte{}, + }) { + return + } + + quick.Check(testRoundtrip, nil) +} + +func BenchmarkEncode(b *testing.B) { + data := &Block{Bytes: make([]byte, 65536)} + b.SetBytes(int64(len(data.Bytes))) + for i := 0; i < b.N; i++ { + Encode(io.Discard, data) + } +} + +func BenchmarkDecode(b *testing.B) { + block := &Block{Bytes: make([]byte, 65536)} + data := EncodeToMemory(block) + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + Decode(data) + } +} + +var pemData = testingKey(`verify return:0 +-----BEGIN CERTIFICATE----- +sdlfkjskldfj + -----BEGIN CERTIFICATE----- +--- +Certificate chain + 0 s:/C=AU/ST=Somewhere/L=Someplace/O=Foo Bar/CN=foo.example.com + i:/C=ZA/O=CA Inc./CN=CA Inc +-----BEGIN CERTIFICATE----- +testing +-----BEGIN CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIID6TCCA1ICAQEwDQYJKoZIhvcNAQEFBQAwgYsxCzAJBgNVBAYTAlVTMRMwEQYD +VQQIEwpDYWxpZm9ybmlhMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRQwEgYDVQQK +EwtHb29nbGUgSW5jLjEMMAoGA1UECxMDRW5nMQwwCgYDVQQDEwNhZ2wxHTAbBgkq +hkiG9w0BCQEWDmFnbEBnb29nbGUuY29tMB4XDTA5MDkwOTIyMDU0M1oXDTEwMDkw +OTIyMDU0M1owajELMAkGA1UEBhMCQVUxEzARBgNVBAgTClNvbWUtU3RhdGUxITAf +BgNVBAoTGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEjMCEGA1UEAxMaZXVyb3Bh +LnNmby5jb3JwLmdvb2dsZS5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK +AoICAQC6pgYt7/EibBDumASF+S0qvqdL/f+nouJw2T1Qc8GmXF/iiUcrsgzh/Fd8 +pDhz/T96Qg9IyR4ztuc2MXrmPra+zAuSf5bevFReSqvpIt8Duv0HbDbcqs/XKPfB +uMDe+of7a9GCywvAZ4ZUJcp0thqD9fKTTjUWOBzHY1uNE4RitrhmJCrbBGXbJ249 +bvgmb7jgdInH2PU7PT55hujvOoIsQW2osXBFRur4pF1wmVh4W4lTLD6pjfIMUcML +ICHEXEN73PDic8KS3EtNYCwoIld+tpIBjE1QOb1KOyuJBNW6Esw9ALZn7stWdYcE +qAwvv20egN2tEXqj7Q4/1ccyPZc3PQgC3FJ8Be2mtllM+80qf4dAaQ/fWvCtOrQ5 +pnfe9juQvCo8Y0VGlFcrSys/MzSg9LJ/24jZVgzQved/Qupsp89wVidwIzjt+WdS +fyWfH0/v1aQLvu5cMYuW//C0W2nlYziL5blETntM8My2ybNARy3ICHxCBv2RNtPI +WQVm+E9/W5rwh2IJR4DHn2LHwUVmT/hHNTdBLl5Uhwr4Wc7JhE7AVqb14pVNz1lr +5jxsp//ncIwftb7mZQ3DF03Yna+jJhpzx8CQoeLT6aQCHyzmH68MrHHT4MALPyUs +Pomjn71GNTtDeWAXibjCgdL6iHACCF6Htbl0zGlG0OAK+bdn0QIDAQABMA0GCSqG +SIb3DQEBBQUAA4GBAOKnQDtqBV24vVqvesL5dnmyFpFPXBn3WdFfwD6DzEb21UVG +5krmJiu+ViipORJPGMkgoL6BjU21XI95VQbun5P8vvg8Z+FnFsvRFY3e1CCzAVQY +ZsUkLw2I7zI/dNlWdB8Xp7v+3w9sX5N3J/WuJ1KOO5m26kRlHQo7EzT3974g +-----END CERTIFICATE----- + 1 s:/C=ZA/O=Ca Inc./CN=CA Inc + +-----BEGIN RSA TESTING KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,80C7C7A09690757A + +eQp5ZkH6CyHBz7BZfUPxyLCCmftsBJ7HlqGb8Ld21cSwnzWZ4/SIlhyrUtsfw7VR +2TTwA+odo9ex7GdxOTaH8oZFumIRoiEjHsk8U7Bhntp+ekkPP79xunnN7hb7hkhr +yGDQZgA7s2cQHQ71v3gwT2BACAft26jCjbM1wgNzBnJ8M0Rzn68YWqaPtdBu8qb/ +zVR5JB1mnqvTSbFsfF5yMc6o2WQ9jJCl6KypnMl+BpL+dlvdjYVK4l9lYsB1Hs3d ++zDBbWxos818zzhS8/y6eIfiSG27cqrbhURbmgiSfDXjncK4m/pLcQ7mmBL6mFOr +3Pj4jepzgOiFRL6MKE//h62fZvI1ErYr8VunHEykgKNhChDvb1RO6LEfqKBu+Ivw +TB6fBhW3TCLMnVPYVoYwA+fHNTmZZm8BEonlIMfI+KktjWUg4Oia+NI6vKcPpFox +hSnlGgCtvfEaq5/H4kHJp95eOpnFsLviw2seHNkz/LxJMRP1X428+DpYW/QD/0JU +tJSuC/q9FUHL6RI3u/Asrv8pCb4+D7i1jW/AMIdJTtycOGsbPxQA7yHMWujHmeb1 +BTiHcL3s3KrJu1vDVrshvxfnz71KTeNnZH8UbOqT5i7fPGyXtY1XJddcbI/Q6tXf +wHFsZc20TzSdsVLBtwksUacpbDogcEVMctnNrB8FIrB3vZEv9Q0Z1VeY7nmTpF+6 +a+z2P7acL7j6A6Pr3+q8P9CPiPC7zFonVzuVPyB8GchGR2hytyiOVpuD9+k8hcuw +ZWAaUoVtWIQ52aKS0p19G99hhb+IVANC4akkdHV4SP8i7MVNZhfUmg== +-----END RSA TESTING KEY----- + + +-----BEGIN EMPTY----- +-----END EMPTY----- + +-----BEGIN EMPTY----- + +-----END EMPTY----- + +-----BEGIN EMPTY----- + + +-----END EMPTY----- + +# This shouldn't be recognised because of the missing newline after the +headers. +-----BEGIN HEADERS----- +Header: 1 +-----END HEADERS----- + +# This should be valid, however. +-----BEGIN HEADERS----- +Header: 1 + +-----END HEADERS-----`) + +var certificate = &Block{Type: "CERTIFICATE", + Headers: map[string]string{}, + Bytes: []uint8{0x30, 0x82, 0x3, 0xe9, 0x30, 0x82, 0x3, 0x52, 0x2, 0x1, + 0x1, 0x30, 0xd, 0x6, 0x9, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0xd, + 0x1, 0x1, 0x5, 0x5, 0x0, 0x30, 0x81, 0x8b, 0x31, 0xb, 0x30, + 0x9, 0x6, 0x3, 0x55, 0x4, 0x6, 0x13, 0x2, 0x55, 0x53, 0x31, + 0x13, 0x30, 0x11, 0x6, 0x3, 0x55, 0x4, 0x8, 0x13, 0xa, 0x43, + 0x61, 0x6c, 0x69, 0x66, 0x6f, 0x72, 0x6e, 0x69, 0x61, 0x31, + 0x16, 0x30, 0x14, 0x6, 0x3, 0x55, 0x4, 0x7, 0x13, 0xd, 0x53, + 0x61, 0x6e, 0x20, 0x46, 0x72, 0x61, 0x6e, 0x63, 0x69, 0x73, + 0x63, 0x6f, 0x31, 0x14, 0x30, 0x12, 0x6, 0x3, 0x55, 0x4, 0xa, + 0x13, 0xb, 0x47, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x20, 0x49, + 0x6e, 0x63, 0x2e, 0x31, 0xc, 0x30, 0xa, 0x6, 0x3, 0x55, 0x4, + 0xb, 0x13, 0x3, 0x45, 0x6e, 0x67, 0x31, 0xc, 0x30, 0xa, 0x6, + 0x3, 0x55, 0x4, 0x3, 0x13, 0x3, 0x61, 0x67, 0x6c, 0x31, 0x1d, + 0x30, 0x1b, 0x6, 0x9, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0xd, 0x1, + 0x9, 0x1, 0x16, 0xe, 0x61, 0x67, 0x6c, 0x40, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x30, 0x1e, 0x17, + 0xd, 0x30, 0x39, 0x30, 0x39, 0x30, 0x39, 0x32, 0x32, 0x30, + 0x35, 0x34, 0x33, 0x5a, 0x17, 0xd, 0x31, 0x30, 0x30, 0x39, + 0x30, 0x39, 0x32, 0x32, 0x30, 0x35, 0x34, 0x33, 0x5a, 0x30, + 0x6a, 0x31, 0xb, 0x30, 0x9, 0x6, 0x3, 0x55, 0x4, 0x6, 0x13, + 0x2, 0x41, 0x55, 0x31, 0x13, 0x30, 0x11, 0x6, 0x3, 0x55, 0x4, + 0x8, 0x13, 0xa, 0x53, 0x6f, 0x6d, 0x65, 0x2d, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x31, 0x21, 0x30, 0x1f, 0x6, 0x3, 0x55, 0x4, 0xa, + 0x13, 0x18, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, + 0x20, 0x57, 0x69, 0x64, 0x67, 0x69, 0x74, 0x73, 0x20, 0x50, + 0x74, 0x79, 0x20, 0x4c, 0x74, 0x64, 0x31, 0x23, 0x30, 0x21, + 0x6, 0x3, 0x55, 0x4, 0x3, 0x13, 0x1a, 0x65, 0x75, 0x72, 0x6f, + 0x70, 0x61, 0x2e, 0x73, 0x66, 0x6f, 0x2e, 0x63, 0x6f, 0x72, + 0x70, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x63, + 0x6f, 0x6d, 0x30, 0x82, 0x2, 0x22, 0x30, 0xd, 0x6, 0x9, 0x2a, + 0x86, 0x48, 0x86, 0xf7, 0xd, 0x1, 0x1, 0x1, 0x5, 0x0, 0x3, + 0x82, 0x2, 0xf, 0x0, 0x30, 0x82, 0x2, 0xa, 0x2, 0x82, 0x2, 0x1, + 0x0, 0xba, 0xa6, 0x6, 0x2d, 0xef, 0xf1, 0x22, 0x6c, 0x10, 0xee, + 0x98, 0x4, 0x85, 0xf9, 0x2d, 0x2a, 0xbe, 0xa7, 0x4b, 0xfd, + 0xff, 0xa7, 0xa2, 0xe2, 0x70, 0xd9, 0x3d, 0x50, 0x73, 0xc1, + 0xa6, 0x5c, 0x5f, 0xe2, 0x89, 0x47, 0x2b, 0xb2, 0xc, 0xe1, + 0xfc, 0x57, 0x7c, 0xa4, 0x38, 0x73, 0xfd, 0x3f, 0x7a, 0x42, + 0xf, 0x48, 0xc9, 0x1e, 0x33, 0xb6, 0xe7, 0x36, 0x31, 0x7a, + 0xe6, 0x3e, 0xb6, 0xbe, 0xcc, 0xb, 0x92, 0x7f, 0x96, 0xde, + 0xbc, 0x54, 0x5e, 0x4a, 0xab, 0xe9, 0x22, 0xdf, 0x3, 0xba, + 0xfd, 0x7, 0x6c, 0x36, 0xdc, 0xaa, 0xcf, 0xd7, 0x28, 0xf7, + 0xc1, 0xb8, 0xc0, 0xde, 0xfa, 0x87, 0xfb, 0x6b, 0xd1, 0x82, + 0xcb, 0xb, 0xc0, 0x67, 0x86, 0x54, 0x25, 0xca, 0x74, 0xb6, + 0x1a, 0x83, 0xf5, 0xf2, 0x93, 0x4e, 0x35, 0x16, 0x38, 0x1c, + 0xc7, 0x63, 0x5b, 0x8d, 0x13, 0x84, 0x62, 0xb6, 0xb8, 0x66, + 0x24, 0x2a, 0xdb, 0x4, 0x65, 0xdb, 0x27, 0x6e, 0x3d, 0x6e, + 0xf8, 0x26, 0x6f, 0xb8, 0xe0, 0x74, 0x89, 0xc7, 0xd8, 0xf5, + 0x3b, 0x3d, 0x3e, 0x79, 0x86, 0xe8, 0xef, 0x3a, 0x82, 0x2c, + 0x41, 0x6d, 0xa8, 0xb1, 0x70, 0x45, 0x46, 0xea, 0xf8, 0xa4, + 0x5d, 0x70, 0x99, 0x58, 0x78, 0x5b, 0x89, 0x53, 0x2c, 0x3e, + 0xa9, 0x8d, 0xf2, 0xc, 0x51, 0xc3, 0xb, 0x20, 0x21, 0xc4, 0x5c, + 0x43, 0x7b, 0xdc, 0xf0, 0xe2, 0x73, 0xc2, 0x92, 0xdc, 0x4b, + 0x4d, 0x60, 0x2c, 0x28, 0x22, 0x57, 0x7e, 0xb6, 0x92, 0x1, + 0x8c, 0x4d, 0x50, 0x39, 0xbd, 0x4a, 0x3b, 0x2b, 0x89, 0x4, + 0xd5, 0xba, 0x12, 0xcc, 0x3d, 0x0, 0xb6, 0x67, 0xee, 0xcb, + 0x56, 0x75, 0x87, 0x4, 0xa8, 0xc, 0x2f, 0xbf, 0x6d, 0x1e, 0x80, + 0xdd, 0xad, 0x11, 0x7a, 0xa3, 0xed, 0xe, 0x3f, 0xd5, 0xc7, + 0x32, 0x3d, 0x97, 0x37, 0x3d, 0x8, 0x2, 0xdc, 0x52, 0x7c, 0x5, + 0xed, 0xa6, 0xb6, 0x59, 0x4c, 0xfb, 0xcd, 0x2a, 0x7f, 0x87, + 0x40, 0x69, 0xf, 0xdf, 0x5a, 0xf0, 0xad, 0x3a, 0xb4, 0x39, + 0xa6, 0x77, 0xde, 0xf6, 0x3b, 0x90, 0xbc, 0x2a, 0x3c, 0x63, + 0x45, 0x46, 0x94, 0x57, 0x2b, 0x4b, 0x2b, 0x3f, 0x33, 0x34, + 0xa0, 0xf4, 0xb2, 0x7f, 0xdb, 0x88, 0xd9, 0x56, 0xc, 0xd0, + 0xbd, 0xe7, 0x7f, 0x42, 0xea, 0x6c, 0xa7, 0xcf, 0x70, 0x56, + 0x27, 0x70, 0x23, 0x38, 0xed, 0xf9, 0x67, 0x52, 0x7f, 0x25, + 0x9f, 0x1f, 0x4f, 0xef, 0xd5, 0xa4, 0xb, 0xbe, 0xee, 0x5c, + 0x31, 0x8b, 0x96, 0xff, 0xf0, 0xb4, 0x5b, 0x69, 0xe5, 0x63, + 0x38, 0x8b, 0xe5, 0xb9, 0x44, 0x4e, 0x7b, 0x4c, 0xf0, 0xcc, + 0xb6, 0xc9, 0xb3, 0x40, 0x47, 0x2d, 0xc8, 0x8, 0x7c, 0x42, 0x6, + 0xfd, 0x91, 0x36, 0xd3, 0xc8, 0x59, 0x5, 0x66, 0xf8, 0x4f, + 0x7f, 0x5b, 0x9a, 0xf0, 0x87, 0x62, 0x9, 0x47, 0x80, 0xc7, + 0x9f, 0x62, 0xc7, 0xc1, 0x45, 0x66, 0x4f, 0xf8, 0x47, 0x35, + 0x37, 0x41, 0x2e, 0x5e, 0x54, 0x87, 0xa, 0xf8, 0x59, 0xce, + 0xc9, 0x84, 0x4e, 0xc0, 0x56, 0xa6, 0xf5, 0xe2, 0x95, 0x4d, + 0xcf, 0x59, 0x6b, 0xe6, 0x3c, 0x6c, 0xa7, 0xff, 0xe7, 0x70, + 0x8c, 0x1f, 0xb5, 0xbe, 0xe6, 0x65, 0xd, 0xc3, 0x17, 0x4d, + 0xd8, 0x9d, 0xaf, 0xa3, 0x26, 0x1a, 0x73, 0xc7, 0xc0, 0x90, + 0xa1, 0xe2, 0xd3, 0xe9, 0xa4, 0x2, 0x1f, 0x2c, 0xe6, 0x1f, + 0xaf, 0xc, 0xac, 0x71, 0xd3, 0xe0, 0xc0, 0xb, 0x3f, 0x25, 0x2c, + 0x3e, 0x89, 0xa3, 0x9f, 0xbd, 0x46, 0x35, 0x3b, 0x43, 0x79, + 0x60, 0x17, 0x89, 0xb8, 0xc2, 0x81, 0xd2, 0xfa, 0x88, 0x70, + 0x2, 0x8, 0x5e, 0x87, 0xb5, 0xb9, 0x74, 0xcc, 0x69, 0x46, 0xd0, + 0xe0, 0xa, 0xf9, 0xb7, 0x67, 0xd1, 0x2, 0x3, 0x1, 0x0, 0x1, + 0x30, 0xd, 0x6, 0x9, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0xd, 0x1, + 0x1, 0x5, 0x5, 0x0, 0x3, 0x81, 0x81, 0x0, 0xe2, 0xa7, 0x40, + 0x3b, 0x6a, 0x5, 0x5d, 0xb8, 0xbd, 0x5a, 0xaf, 0x7a, 0xc2, + 0xf9, 0x76, 0x79, 0xb2, 0x16, 0x91, 0x4f, 0x5c, 0x19, 0xf7, + 0x59, 0xd1, 0x5f, 0xc0, 0x3e, 0x83, 0xcc, 0x46, 0xf6, 0xd5, + 0x45, 0x46, 0xe6, 0x4a, 0xe6, 0x26, 0x2b, 0xbe, 0x56, 0x28, + 0xa9, 0x39, 0x12, 0x4f, 0x18, 0xc9, 0x20, 0xa0, 0xbe, 0x81, + 0x8d, 0x4d, 0xb5, 0x5c, 0x8f, 0x79, 0x55, 0x6, 0xee, 0x9f, + 0x93, 0xfc, 0xbe, 0xf8, 0x3c, 0x67, 0xe1, 0x67, 0x16, 0xcb, + 0xd1, 0x15, 0x8d, 0xde, 0xd4, 0x20, 0xb3, 0x1, 0x54, 0x18, + 0x66, 0xc5, 0x24, 0x2f, 0xd, 0x88, 0xef, 0x32, 0x3f, 0x74, + 0xd9, 0x56, 0x74, 0x1f, 0x17, 0xa7, 0xbb, 0xfe, 0xdf, 0xf, + 0x6c, 0x5f, 0x93, 0x77, 0x27, 0xf5, 0xae, 0x27, 0x52, 0x8e, + 0x3b, 0x99, 0xb6, 0xea, 0x44, 0x65, 0x1d, 0xa, 0x3b, 0x13, + 0x34, 0xf7, 0xf7, 0xbe, 0x20, + }, +} + +var privateKey = &Block{Type: "RSA PRIVATE KEY", + Headers: map[string]string{"DEK-Info": "DES-EDE3-CBC,80C7C7A09690757A", "Proc-Type": "4,ENCRYPTED"}, + Bytes: []uint8{0x79, 0xa, 0x79, 0x66, 0x41, 0xfa, 0xb, + 0x21, 0xc1, 0xcf, 0xb0, 0x59, 0x7d, 0x43, 0xf1, 0xc8, 0xb0, + 0x82, 0x99, 0xfb, 0x6c, 0x4, 0x9e, 0xc7, 0x96, 0xa1, 0x9b, + 0xf0, 0xb7, 0x76, 0xd5, 0xc4, 0xb0, 0x9f, 0x35, 0x99, 0xe3, + 0xf4, 0x88, 0x96, 0x1c, 0xab, 0x52, 0xdb, 0x1f, 0xc3, 0xb5, + 0x51, 0xd9, 0x34, 0xf0, 0x3, 0xea, 0x1d, 0xa3, 0xd7, 0xb1, + 0xec, 0x67, 0x71, 0x39, 0x36, 0x87, 0xf2, 0x86, 0x45, 0xba, + 0x62, 0x11, 0xa2, 0x21, 0x23, 0x1e, 0xc9, 0x3c, 0x53, 0xb0, + 0x61, 0x9e, 0xda, 0x7e, 0x7a, 0x49, 0xf, 0x3f, 0xbf, 0x71, + 0xba, 0x79, 0xcd, 0xee, 0x16, 0xfb, 0x86, 0x48, 0x6b, 0xc8, + 0x60, 0xd0, 0x66, 0x0, 0x3b, 0xb3, 0x67, 0x10, 0x1d, 0xe, + 0xf5, 0xbf, 0x78, 0x30, 0x4f, 0x60, 0x40, 0x8, 0x7, 0xed, + 0xdb, 0xa8, 0xc2, 0x8d, 0xb3, 0x35, 0xc2, 0x3, 0x73, 0x6, + 0x72, 0x7c, 0x33, 0x44, 0x73, 0x9f, 0xaf, 0x18, 0x5a, 0xa6, + 0x8f, 0xb5, 0xd0, 0x6e, 0xf2, 0xa6, 0xff, 0xcd, 0x54, 0x79, + 0x24, 0x1d, 0x66, 0x9e, 0xab, 0xd3, 0x49, 0xb1, 0x6c, 0x7c, + 0x5e, 0x72, 0x31, 0xce, 0xa8, 0xd9, 0x64, 0x3d, 0x8c, 0x90, + 0xa5, 0xe8, 0xac, 0xa9, 0x9c, 0xc9, 0x7e, 0x6, 0x92, 0xfe, + 0x76, 0x5b, 0xdd, 0x8d, 0x85, 0x4a, 0xe2, 0x5f, 0x65, 0x62, + 0xc0, 0x75, 0x1e, 0xcd, 0xdd, 0xfb, 0x30, 0xc1, 0x6d, 0x6c, + 0x68, 0xb3, 0xcd, 0x7c, 0xcf, 0x38, 0x52, 0xf3, 0xfc, 0xba, + 0x78, 0x87, 0xe2, 0x48, 0x6d, 0xbb, 0x72, 0xaa, 0xdb, 0x85, + 0x44, 0x5b, 0x9a, 0x8, 0x92, 0x7c, 0x35, 0xe3, 0x9d, 0xc2, + 0xb8, 0x9b, 0xfa, 0x4b, 0x71, 0xe, 0xe6, 0x98, 0x12, 0xfa, + 0x98, 0x53, 0xab, 0xdc, 0xf8, 0xf8, 0x8d, 0xea, 0x73, 0x80, + 0xe8, 0x85, 0x44, 0xbe, 0x8c, 0x28, 0x4f, 0xff, 0x87, 0xad, + 0x9f, 0x66, 0xf2, 0x35, 0x12, 0xb6, 0x2b, 0xf1, 0x5b, 0xa7, + 0x1c, 0x4c, 0xa4, 0x80, 0xa3, 0x61, 0xa, 0x10, 0xef, 0x6f, + 0x54, 0x4e, 0xe8, 0xb1, 0x1f, 0xa8, 0xa0, 0x6e, 0xf8, 0x8b, + 0xf0, 0x4c, 0x1e, 0x9f, 0x6, 0x15, 0xb7, 0x4c, 0x22, 0xcc, + 0x9d, 0x53, 0xd8, 0x56, 0x86, 0x30, 0x3, 0xe7, 0xc7, 0x35, + 0x39, 0x99, 0x66, 0x6f, 0x1, 0x12, 0x89, 0xe5, 0x20, 0xc7, + 0xc8, 0xf8, 0xa9, 0x2d, 0x8d, 0x65, 0x20, 0xe0, 0xe8, 0x9a, + 0xf8, 0xd2, 0x3a, 0xbc, 0xa7, 0xf, 0xa4, 0x5a, 0x31, 0x85, + 0x29, 0xe5, 0x1a, 0x0, 0xad, 0xbd, 0xf1, 0x1a, 0xab, 0x9f, + 0xc7, 0xe2, 0x41, 0xc9, 0xa7, 0xde, 0x5e, 0x3a, 0x99, 0xc5, + 0xb0, 0xbb, 0xe2, 0xc3, 0x6b, 0x1e, 0x1c, 0xd9, 0x33, 0xfc, + 0xbc, 0x49, 0x31, 0x13, 0xf5, 0x5f, 0x8d, 0xbc, 0xf8, 0x3a, + 0x58, 0x5b, 0xf4, 0x3, 0xff, 0x42, 0x54, 0xb4, 0x94, 0xae, + 0xb, 0xfa, 0xbd, 0x15, 0x41, 0xcb, 0xe9, 0x12, 0x37, 0xbb, + 0xf0, 0x2c, 0xae, 0xff, 0x29, 0x9, 0xbe, 0x3e, 0xf, 0xb8, + 0xb5, 0x8d, 0x6f, 0xc0, 0x30, 0x87, 0x49, 0x4e, 0xdc, 0x9c, + 0x38, 0x6b, 0x1b, 0x3f, 0x14, 0x0, 0xef, 0x21, 0xcc, 0x5a, + 0xe8, 0xc7, 0x99, 0xe6, 0xf5, 0x5, 0x38, 0x87, 0x70, 0xbd, + 0xec, 0xdc, 0xaa, 0xc9, 0xbb, 0x5b, 0xc3, 0x56, 0xbb, 0x21, + 0xbf, 0x17, 0xe7, 0xcf, 0xbd, 0x4a, 0x4d, 0xe3, 0x67, 0x64, + 0x7f, 0x14, 0x6c, 0xea, 0x93, 0xe6, 0x2e, 0xdf, 0x3c, 0x6c, + 0x97, 0xb5, 0x8d, 0x57, 0x25, 0xd7, 0x5c, 0x6c, 0x8f, 0xd0, + 0xea, 0xd5, 0xdf, 0xc0, 0x71, 0x6c, 0x65, 0xcd, 0xb4, 0x4f, + 0x34, 0x9d, 0xb1, 0x52, 0xc1, 0xb7, 0x9, 0x2c, 0x51, 0xa7, + 0x29, 0x6c, 0x3a, 0x20, 0x70, 0x45, 0x4c, 0x72, 0xd9, 0xcd, + 0xac, 0x1f, 0x5, 0x22, 0xb0, 0x77, 0xbd, 0x91, 0x2f, 0xf5, + 0xd, 0x19, 0xd5, 0x57, 0x98, 0xee, 0x79, 0x93, 0xa4, 0x5f, + 0xba, 0x6b, 0xec, 0xf6, 0x3f, 0xb6, 0x9c, 0x2f, 0xb8, 0xfa, + 0x3, 0xa3, 0xeb, 0xdf, 0xea, 0xbc, 0x3f, 0xd0, 0x8f, 0x88, + 0xf0, 0xbb, 0xcc, 0x5a, 0x27, 0x57, 0x3b, 0x95, 0x3f, 0x20, + 0x7c, 0x19, 0xc8, 0x46, 0x47, 0x68, 0x72, 0xb7, 0x28, 0x8e, + 0x56, 0x9b, 0x83, 0xf7, 0xe9, 0x3c, 0x85, 0xcb, 0xb0, 0x65, + 0x60, 0x1a, 0x52, 0x85, 0x6d, 0x58, 0x84, 0x39, 0xd9, 0xa2, + 0x92, 0xd2, 0x9d, 0x7d, 0x1b, 0xdf, 0x61, 0x85, 0xbf, 0x88, + 0x54, 0x3, 0x42, 0xe1, 0xa9, 0x24, 0x74, 0x75, 0x78, 0x48, + 0xff, 0x22, 0xec, 0xc5, 0x4d, 0x66, 0x17, 0xd4, 0x9a, + }, +} + +var privateKey2 = &Block{ + Type: "RSA PRIVATE KEY", + Headers: map[string]string{ + "Proc-Type": "4,ENCRYPTED", + "DEK-Info": "AES-128-CBC,BFCD243FEDBB40A4AA6DDAA1335473A4", + "Content-Domain": "RFC822", + }, + Bytes: []uint8{ + 0xa8, 0x35, 0xcc, 0x2b, 0xb9, 0xcb, 0x21, 0xab, 0xc0, + 0x9d, 0x76, 0x61, 0x0, 0xf4, 0x81, 0xad, 0x69, 0xd2, + 0xc0, 0x42, 0x41, 0x3b, 0xe4, 0x3c, 0xaf, 0x59, 0x5e, + 0x6d, 0x2a, 0x3c, 0x9c, 0xa1, 0xa4, 0x5e, 0x68, 0x37, + 0xc4, 0x8c, 0x70, 0x1c, 0xa9, 0x18, 0xe6, 0xc2, 0x2b, + 0x8a, 0x91, 0xdc, 0x2d, 0x1f, 0x8, 0x23, 0x39, 0xf1, + 0x4b, 0x8b, 0x1b, 0x2f, 0x46, 0xb, 0xb2, 0x26, 0xba, + 0x4f, 0x40, 0x80, 0x39, 0xc4, 0xb1, 0xcb, 0x3b, 0xb4, + 0x65, 0x3f, 0x1b, 0xb2, 0xf7, 0x8, 0xd2, 0xc6, 0xd5, + 0xa8, 0x9f, 0x23, 0x69, 0xb6, 0x3d, 0xf9, 0xac, 0x1c, + 0xb3, 0x13, 0x87, 0x64, 0x4, 0x37, 0xdb, 0x40, 0xc8, + 0x82, 0xc, 0xd0, 0xf8, 0x21, 0x7c, 0xdc, 0xbd, 0x9, 0x4, + 0x20, 0x16, 0xb0, 0x97, 0xe2, 0x6d, 0x56, 0x1d, 0xe3, + 0xec, 0xf0, 0xfc, 0xe2, 0x56, 0xad, 0xa4, 0x3, 0x70, + 0x6d, 0x63, 0x3c, 0x1, 0xbe, 0x3e, 0x28, 0x38, 0x6f, + 0xc0, 0xe6, 0xfd, 0x85, 0xd1, 0x53, 0xa8, 0x9b, 0xcb, + 0xd4, 0x4, 0xb1, 0x73, 0xb9, 0x73, 0x32, 0xd6, 0x7a, + 0xc6, 0x29, 0x25, 0xa5, 0xda, 0x17, 0x93, 0x7a, 0x10, + 0xe8, 0x41, 0xfb, 0xa5, 0x17, 0x20, 0xf8, 0x4e, 0xe9, + 0xe3, 0x8f, 0x51, 0x20, 0x13, 0xbb, 0xde, 0xb7, 0x93, + 0xae, 0x13, 0x8a, 0xf6, 0x9, 0xf4, 0xa6, 0x41, 0xe0, + 0x2b, 0x51, 0x1a, 0x30, 0x38, 0xd, 0xb1, 0x3b, 0x67, + 0x87, 0x64, 0xf5, 0xca, 0x32, 0x67, 0xd1, 0xc8, 0xa5, + 0x3d, 0x23, 0x72, 0xc4, 0x6, 0xaf, 0x8f, 0x7b, 0x26, + 0xac, 0x3c, 0x75, 0x91, 0xa1, 0x0, 0x13, 0xc6, 0x5c, + 0x49, 0xd5, 0x3c, 0xe7, 0xb2, 0xb2, 0x99, 0xe0, 0xd5, + 0x25, 0xfa, 0xe2, 0x12, 0x80, 0x37, 0x85, 0xcf, 0x92, + 0xca, 0x1b, 0x9f, 0xf3, 0x4e, 0xd8, 0x80, 0xef, 0x3c, + 0xce, 0xcd, 0xf5, 0x90, 0x9e, 0xf9, 0xa7, 0xb2, 0xc, + 0x49, 0x4, 0xf1, 0x9, 0x8f, 0xea, 0x63, 0xd2, 0x70, + 0xbb, 0x86, 0xbf, 0x34, 0xab, 0xb2, 0x3, 0xb1, 0x59, + 0x33, 0x16, 0x17, 0xb0, 0xdb, 0x77, 0x38, 0xf4, 0xb4, + 0x94, 0xb, 0x25, 0x16, 0x7e, 0x22, 0xd4, 0xf9, 0x22, + 0xb9, 0x78, 0xa3, 0x4, 0x84, 0x4, 0xd2, 0xda, 0x84, + 0x2d, 0x63, 0xdd, 0xf8, 0x50, 0x6a, 0xf6, 0xe3, 0xf5, + 0x65, 0x40, 0x7c, 0xa9, + }, +} + +var pemPrivateKey2 = testingKey(`-----BEGIN RSA TESTING KEY----- +Proc-Type: 4,ENCRYPTED +Content-Domain: RFC822 +DEK-Info: AES-128-CBC,BFCD243FEDBB40A4AA6DDAA1335473A4 + +qDXMK7nLIavAnXZhAPSBrWnSwEJBO+Q8r1lebSo8nKGkXmg3xIxwHKkY5sIripHc +LR8IIznxS4sbL0YLsia6T0CAOcSxyzu0ZT8bsvcI0sbVqJ8jabY9+awcsxOHZAQ3 +20DIggzQ+CF83L0JBCAWsJfibVYd4+zw/OJWraQDcG1jPAG+Pig4b8Dm/YXRU6ib +y9QEsXO5czLWesYpJaXaF5N6EOhB+6UXIPhO6eOPUSATu963k64TivYJ9KZB4CtR +GjA4DbE7Z4dk9coyZ9HIpT0jcsQGr497Jqw8dZGhABPGXEnVPOeyspng1SX64hKA +N4XPksobn/NO2IDvPM7N9ZCe+aeyDEkE8QmP6mPScLuGvzSrsgOxWTMWF7Dbdzj0 +tJQLJRZ+ItT5Irl4owSEBNLahC1j3fhQavbj9WVAfKk= +-----END RSA TESTING KEY----- +`) + +func TestBadEncode(t *testing.T) { + b := &Block{Type: "BAD", Headers: map[string]string{"X:Y": "Z"}} + var buf bytes.Buffer + if err := Encode(&buf, b); err == nil { + t.Fatalf("Encode did not report invalid header") + } + if buf.Len() != 0 { + t.Fatalf("Encode wrote data before reporting invalid header") + } + if data := EncodeToMemory(b); data != nil { + t.Fatalf("EncodeToMemory returned non-nil data") + } +} + +func testingKey(s string) string { return strings.ReplaceAll(s, "TESTING KEY", "PRIVATE KEY") } diff --git a/src/encoding/xml/atom_test.go b/src/encoding/xml/atom_test.go new file mode 100644 index 0000000..f394dab --- /dev/null +++ b/src/encoding/xml/atom_test.go @@ -0,0 +1,56 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import "time" + +var atomValue = &Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Example Feed", + Link: []Link{{Href: "http://example.org/"}}, + Updated: ParseTime("2003-12-13T18:30:02Z"), + Author: Person{Name: "John Doe"}, + ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + + Entry: []Entry{ + { + Title: "Atom-Powered Robots Run Amok", + Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, + ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + Updated: ParseTime("2003-12-13T18:30:02Z"), + Summary: NewText("Some text."), + }, + }, +} + +var atomXML = `` + + `<feed xmlns="http://www.w3.org/2005/Atom" updated="2003-12-13T18:30:02Z">` + + `<title>Example Feed</title>` + + `<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` + + `<link href="http://example.org/"></link>` + + `<author><name>John Doe</name><uri></uri><email></email></author>` + + `<entry>` + + `<title>Atom-Powered Robots Run Amok</title>` + + `<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>` + + `<link href="http://example.org/2003/12/13/atom03"></link>` + + `<updated>2003-12-13T18:30:02Z</updated>` + + `<author><name></name><uri></uri><email></email></author>` + + `<summary>Some text.</summary>` + + `</entry>` + + `</feed>` + +func ParseTime(str string) time.Time { + t, err := time.Parse(time.RFC3339, str) + if err != nil { + panic(err) + } + return t +} + +func NewText(text string) Text { + return Text{ + Body: text, + } +} diff --git a/src/encoding/xml/example_marshaling_test.go b/src/encoding/xml/example_marshaling_test.go new file mode 100644 index 0000000..9f9e801 --- /dev/null +++ b/src/encoding/xml/example_marshaling_test.go @@ -0,0 +1,84 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml_test + +import ( + "encoding/xml" + "fmt" + "log" + "strings" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + var s string + if err := d.DecodeElement(&s, &start); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalXML(e *xml.Encoder, start xml.StartElement) error { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + return e.EncodeElement(s, start) +} + +func Example_customMarshalXML() { + blob := ` + <animals> + <animal>gopher</animal> + <animal>armadillo</animal> + <animal>zebra</animal> + <animal>unknown</animal> + <animal>gopher</animal> + <animal>bee</animal> + <animal>gopher</animal> + <animal>zebra</animal> + </animals>` + var zoo struct { + Animals []Animal `xml:"animal"` + } + if err := xml.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo.Animals { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/src/encoding/xml/example_test.go b/src/encoding/xml/example_test.go new file mode 100644 index 0000000..21b48de --- /dev/null +++ b/src/encoding/xml/example_test.go @@ -0,0 +1,151 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml_test + +import ( + "encoding/xml" + "fmt" + "os" +) + +func ExampleMarshalIndent() { + type Address struct { + City, State string + } + type Person struct { + XMLName xml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + output, err := xml.MarshalIndent(v, " ", " ") + if err != nil { + fmt.Printf("error: %v\n", err) + } + + os.Stdout.Write(output) + // Output: + // <person id="13"> + // <name> + // <first>John</first> + // <last>Doe</last> + // </name> + // <age>42</age> + // <Married>false</Married> + // <City>Hanga Roa</City> + // <State>Easter Island</State> + // <!-- Need more details. --> + // </person> +} + +func ExampleEncoder() { + type Address struct { + City, State string + } + type Person struct { + XMLName xml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + enc := xml.NewEncoder(os.Stdout) + enc.Indent(" ", " ") + if err := enc.Encode(v); err != nil { + fmt.Printf("error: %v\n", err) + } + + // Output: + // <person id="13"> + // <name> + // <first>John</first> + // <last>Doe</last> + // </name> + // <age>42</age> + // <Married>false</Married> + // <City>Hanga Roa</City> + // <State>Easter Island</State> + // <!-- Need more details. --> + // </person> +} + +// This example demonstrates unmarshaling an XML excerpt into a value with +// some preset fields. Note that the Phone field isn't modified and that +// the XML <Company> element is ignored. Also, the Groups field is assigned +// considering the element path provided in its tag. +func ExampleUnmarshal() { + type Email struct { + Where string `xml:"where,attr"` + Addr string + } + type Address struct { + City, State string + } + type Result struct { + XMLName xml.Name `xml:"Person"` + Name string `xml:"FullName"` + Phone string + Email []Email + Groups []string `xml:"Group>Value"` + Address + } + v := Result{Name: "none", Phone: "none"} + + data := ` + <Person> + <FullName>Grace R. Emlin</FullName> + <Company>Example Inc.</Company> + <Email where="home"> + <Addr>gre@example.com</Addr> + </Email> + <Email where='work'> + <Addr>gre@work.com</Addr> + </Email> + <Group> + <Value>Friends</Value> + <Value>Squash</Value> + </Group> + <City>Hanga Roa</City> + <State>Easter Island</State> + </Person> + ` + err := xml.Unmarshal([]byte(data), &v) + if err != nil { + fmt.Printf("error: %v", err) + return + } + fmt.Printf("XMLName: %#v\n", v.XMLName) + fmt.Printf("Name: %q\n", v.Name) + fmt.Printf("Phone: %q\n", v.Phone) + fmt.Printf("Email: %v\n", v.Email) + fmt.Printf("Groups: %v\n", v.Groups) + fmt.Printf("Address: %v\n", v.Address) + // Output: + // XMLName: xml.Name{Space:"", Local:"Person"} + // Name: "Grace R. Emlin" + // Phone: "none" + // Email: [{home gre@example.com} {work gre@work.com}] + // Groups: [Friends Squash] + // Address: {Hanga Roa Easter Island} +} diff --git a/src/encoding/xml/example_text_marshaling_test.go b/src/encoding/xml/example_text_marshaling_test.go new file mode 100644 index 0000000..2549cb1 --- /dev/null +++ b/src/encoding/xml/example_text_marshaling_test.go @@ -0,0 +1,79 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml_test + +import ( + "encoding/xml" + "fmt" + "log" + "strings" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalXML() { + blob := ` + <sizes> + <size>small</size> + <size>regular</size> + <size>large</size> + <size>unrecognized</size> + <size>small</size> + <size>normal</size> + <size>small</size> + <size>large</size> + </sizes>` + var inventory struct { + Sizes []Size `xml:"size"` + } + if err := xml.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory.Sizes { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go new file mode 100644 index 0000000..05b5542 --- /dev/null +++ b/src/encoding/xml/marshal.go @@ -0,0 +1,1131 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bufio" + "bytes" + "encoding" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" +) + +const ( + // Header is a generic XML header suitable for use with the output of [Marshal]. + // This is not automatically added to any output of this package, + // it is provided as a convenience. + Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n" +) + +// Marshal returns the XML encoding of v. +// +// Marshal handles an array or slice by marshaling each of the elements. +// Marshal handles a pointer by marshaling the value it points at or, if the +// pointer is nil, by writing nothing. Marshal handles an interface value by +// marshaling the value it contains or, if the interface value is nil, by +// writing nothing. Marshal handles all other data by writing one or more XML +// elements containing the data. +// +// The name for the XML elements is taken from, in order of preference: +// - the tag on the XMLName field, if the data is a struct +// - the value of the XMLName field of type [Name] +// - the tag of the struct field used to obtain the data +// - the name of the struct field used to obtain the data +// - the name of the marshaled type +// +// The XML element for a struct contains marshaled elements for each of the +// exported fields of the struct, with these exceptions: +// - the XMLName field, described above, is omitted. +// - a field with tag "-" is omitted. +// - a field with tag "name,attr" becomes an attribute with +// the given name in the XML element. +// - a field with tag ",attr" becomes an attribute with the +// field name in the XML element. +// - a field with tag ",chardata" is written as character data, +// not as an XML element. +// - a field with tag ",cdata" is written as character data +// wrapped in one or more <![CDATA[ ... ]]> tags, not as an XML element. +// - a field with tag ",innerxml" is written verbatim, not subject +// to the usual marshaling procedure. +// - a field with tag ",comment" is written as an XML comment, not +// subject to the usual marshaling procedure. It must not contain +// the "--" string within it. +// - a field with a tag including the "omitempty" option is omitted +// if the field value is empty. The empty values are false, 0, any +// nil pointer or interface value, and any array, slice, map, or +// string of length zero. +// - an anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// - a field implementing [Marshaler] is written by calling its MarshalXML +// method. +// - a field implementing [encoding.TextMarshaler] is written by encoding the +// result of its MarshalText method as text. +// +// If a field uses a tag "a>b>c", then the element c will be nested inside +// parent elements a and b. Fields that appear next to each other that name +// the same parent will be enclosed in one XML element. +// +// If the XML name for a struct field is defined by both the field tag and the +// struct's XMLName field, the names must match. +// +// See [MarshalIndent] for an example. +// +// Marshal will return an error if asked to marshal a channel, function, or map. +func Marshal(v any) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// Marshaler is the interface implemented by objects that can marshal +// themselves into valid XML elements. +// +// MarshalXML encodes the receiver as zero or more XML elements. +// By convention, arrays or slices are typically encoded as a sequence +// of elements, one per entry. +// Using start as the element tag is not required, but doing so +// will enable [Unmarshal] to match the XML elements to the correct +// struct field. +// One common implementation strategy is to construct a separate +// value with a layout corresponding to the desired XML and then +// to encode it using e.EncodeElement. +// Another common strategy is to use repeated calls to e.EncodeToken +// to generate the XML output one token at a time. +// The sequence of encoded tokens must make up zero or more valid +// XML elements. +type Marshaler interface { + MarshalXML(e *Encoder, start StartElement) error +} + +// MarshalerAttr is the interface implemented by objects that can marshal +// themselves into valid XML attributes. +// +// MarshalXMLAttr returns an XML attribute with the encoded value of the receiver. +// Using name as the attribute name is not required, but doing so +// will enable [Unmarshal] to match the attribute to the correct +// struct field. +// If MarshalXMLAttr returns the zero attribute [Attr]{}, no attribute +// will be generated in the output. +// MarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type MarshalerAttr interface { + MarshalXMLAttr(name Name) (Attr, error) +} + +// MarshalIndent works like [Marshal], but each XML element begins on a new +// indented line that starts with prefix and is followed by one or more +// copies of indent according to the nesting depth. +func MarshalIndent(v any, prefix, indent string) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + enc.Indent(prefix, indent) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// An Encoder writes XML data to an output stream. +type Encoder struct { + p printer +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + e := &Encoder{printer{w: bufio.NewWriter(w)}} + e.p.encoder = e + return e +} + +// Indent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func (enc *Encoder) Indent(prefix, indent string) { + enc.p.prefix = prefix + enc.p.indent = indent +} + +// Encode writes the XML encoding of v to the stream. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// Encode calls [Encoder.Flush] before returning. +func (enc *Encoder) Encode(v any) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, nil) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +// EncodeElement writes the XML encoding of v to the stream, +// using start as the outermost tag in the encoding. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// EncodeElement calls [Encoder.Flush] before returning. +func (enc *Encoder) EncodeElement(v any, start StartElement) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, &start) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +var ( + begComment = []byte("<!--") + endComment = []byte("-->") + endProcInst = []byte("?>") +) + +// EncodeToken writes the given XML token to the stream. +// It returns an error if [StartElement] and [EndElement] tokens are not properly matched. +// +// EncodeToken does not call [Encoder.Flush], because usually it is part of a larger operation +// such as [Encoder.Encode] or [Encoder.EncodeElement] (or a custom [Marshaler]'s MarshalXML invoked +// during those), and those will call Flush when finished. +// Callers that create an Encoder and then invoke EncodeToken directly, without +// using Encode or EncodeElement, need to call Flush when finished to ensure +// that the XML is written to the underlying writer. +// +// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token +// in the stream. +func (enc *Encoder) EncodeToken(t Token) error { + + p := &enc.p + switch t := t.(type) { + case StartElement: + if err := p.writeStart(&t); err != nil { + return err + } + case EndElement: + if err := p.writeEnd(t.Name); err != nil { + return err + } + case CharData: + escapeText(p, t, false) + case Comment: + if bytes.Contains(t, endComment) { + return fmt.Errorf("xml: EncodeToken of Comment containing --> marker") + } + p.WriteString("<!--") + p.Write(t) + p.WriteString("-->") + return p.cachedWriteError() + case ProcInst: + // First token to be encoded which is also a ProcInst with target of xml + // is the xml declaration. The only ProcInst where target of xml is allowed. + if t.Target == "xml" && p.w.Buffered() != 0 { + return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded") + } + if !isNameString(t.Target) { + return fmt.Errorf("xml: EncodeToken of ProcInst with invalid Target") + } + if bytes.Contains(t.Inst, endProcInst) { + return fmt.Errorf("xml: EncodeToken of ProcInst containing ?> marker") + } + p.WriteString("<?") + p.WriteString(t.Target) + if len(t.Inst) > 0 { + p.WriteByte(' ') + p.Write(t.Inst) + } + p.WriteString("?>") + case Directive: + if !isValidDirective(t) { + return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") + } + p.WriteString("<!") + p.Write(t) + p.WriteString(">") + default: + return fmt.Errorf("xml: EncodeToken of invalid token type") + + } + return p.cachedWriteError() +} + +// isValidDirective reports whether dir is a valid directive text, +// meaning angle brackets are matched, ignoring comments and strings. +func isValidDirective(dir Directive) bool { + var ( + depth int + inquote uint8 + incomment bool + ) + for i, c := range dir { + switch { + case incomment: + if c == '>' { + if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { + incomment = false + } + } + // Just ignore anything in comment + case inquote != 0: + if c == inquote { + inquote = 0 + } + // Just ignore anything within quotes + case c == '\'' || c == '"': + inquote = c + case c == '<': + if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { + incomment = true + } else { + depth++ + } + case c == '>': + if depth == 0 { + return false + } + depth-- + } + } + return depth == 0 && inquote == 0 && !incomment +} + +// Flush flushes any buffered XML to the underlying writer. +// See the [Encoder.EncodeToken] documentation for details about when it is necessary. +func (enc *Encoder) Flush() error { + return enc.p.w.Flush() +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (enc *Encoder) Close() error { + return enc.p.Close() +} + +type printer struct { + w *bufio.Writer + encoder *Encoder + seq int + indent string + prefix string + depth int + indentedIn bool + putNewline bool + attrNS map[string]string // map prefix -> name space + attrPrefix map[string]string // map name space -> prefix + prefixes []string + tags []Name + closed bool + err error +} + +// createAttrPrefix finds the name space prefix attribute to use for the given name space, +// defining a new prefix if necessary. It returns the prefix. +func (p *printer) createAttrPrefix(url string) string { + if prefix := p.attrPrefix[url]; prefix != "" { + return prefix + } + + // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" + // and must be referred to that way. + // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", + // but users should not be trying to use that one directly - that's our job.) + if url == xmlURL { + return xmlPrefix + } + + // Need to define a new name space. + if p.attrPrefix == nil { + p.attrPrefix = make(map[string]string) + p.attrNS = make(map[string]string) + } + + // Pick a name. We try to use the final element of the path + // but fall back to _. + prefix := strings.TrimRight(url, "/") + if i := strings.LastIndex(prefix, "/"); i >= 0 { + prefix = prefix[i+1:] + } + if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") { + prefix = "_" + } + // xmlanything is reserved and any variant of it regardless of + // case should be matched, so: + // (('X'|'x') ('M'|'m') ('L'|'l')) + // See Section 2.3 of https://www.w3.org/TR/REC-xml/ + if len(prefix) >= 3 && strings.EqualFold(prefix[:3], "xml") { + prefix = "_" + prefix + } + if p.attrNS[prefix] != "" { + // Name is taken. Find a better one. + for p.seq++; ; p.seq++ { + if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" { + prefix = id + break + } + } + } + + p.attrPrefix[url] = prefix + p.attrNS[prefix] = url + + p.WriteString(`xmlns:`) + p.WriteString(prefix) + p.WriteString(`="`) + EscapeText(p, []byte(url)) + p.WriteString(`" `) + + p.prefixes = append(p.prefixes, prefix) + + return prefix +} + +// deleteAttrPrefix removes an attribute name space prefix. +func (p *printer) deleteAttrPrefix(prefix string) { + delete(p.attrPrefix, p.attrNS[prefix]) + delete(p.attrNS, prefix) +} + +func (p *printer) markPrefix() { + p.prefixes = append(p.prefixes, "") +} + +func (p *printer) popPrefix() { + for len(p.prefixes) > 0 { + prefix := p.prefixes[len(p.prefixes)-1] + p.prefixes = p.prefixes[:len(p.prefixes)-1] + if prefix == "" { + break + } + p.deleteAttrPrefix(prefix) + } +} + +var ( + marshalerType = reflect.TypeFor[Marshaler]() + marshalerAttrType = reflect.TypeFor[MarshalerAttr]() + textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() +) + +// marshalValue writes one or more XML elements representing val. +// If val was obtained from a struct field, finfo must have its details. +func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplate *StartElement) error { + if startTemplate != nil && startTemplate.Name.Local == "" { + return fmt.Errorf("xml: EncodeElement of StartElement with missing name") + } + + if !val.IsValid() { + return nil + } + if finfo != nil && finfo.flags&fOmitEmpty != 0 && isEmptyValue(val) { + return nil + } + + // Drill into interfaces and pointers. + // This can turn into an infinite loop given a cyclic chain, + // but it matches the Go 1 behavior. + for val.Kind() == reflect.Interface || val.Kind() == reflect.Pointer { + if val.IsNil() { + return nil + } + val = val.Elem() + } + + kind := val.Kind() + typ := val.Type() + + // Check for marshaler. + if val.CanInterface() && typ.Implements(marshalerType) { + return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerType) { + return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Check for text marshaler. + if val.CanInterface() && typ.Implements(textMarshalerType) { + return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Slices and arrays iterate over the elements. They do not have an enclosing tag. + if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { + for i, n := 0, val.Len(); i < n; i++ { + if err := p.marshalValue(val.Index(i), finfo, startTemplate); err != nil { + return err + } + } + return nil + } + + tinfo, err := getTypeInfo(typ) + if err != nil { + return err + } + + // Create start element. + // Precedence for the XML element name is: + // 0. startTemplate + // 1. XMLName field in underlying struct; + // 2. field name/tag in the struct field; and + // 3. type name + var start StartElement + + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if tinfo.xmlname != nil { + xmlname := tinfo.xmlname + if xmlname.name != "" { + start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name + } else { + fv := xmlname.value(val, dontInitNilPointers) + if v, ok := fv.Interface().(Name); ok && v.Local != "" { + start.Name = v + } + } + } + if start.Name.Local == "" && finfo != nil { + start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name + } + if start.Name.Local == "" { + name := typ.Name() + if i := strings.IndexByte(name, '['); i >= 0 { + // Truncate generic instantiation name. See issue 48318. + name = name[:i] + } + if name == "" { + return &UnsupportedTypeError{typ} + } + start.Name.Local = name + } + + // Attributes + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr == 0 { + continue + } + fv := finfo.value(val, dontInitNilPointers) + + if finfo.flags&fOmitEmpty != 0 && (!fv.IsValid() || isEmptyValue(fv)) { + continue + } + + if fv.Kind() == reflect.Interface && fv.IsNil() { + continue + } + + name := Name{Space: finfo.xmlns, Local: finfo.name} + if err := p.marshalAttr(&start, name, fv); err != nil { + return err + } + } + + // If an empty name was found, namespace is overridden with an empty space + if tinfo.xmlname != nil && start.Name.Space == "" && + tinfo.xmlname.xmlns == "" && tinfo.xmlname.name == "" && + len(p.tags) != 0 && p.tags[len(p.tags)-1].Space != "" { + start.Attr = append(start.Attr, Attr{Name{"", xmlnsPrefix}, ""}) + } + if err := p.writeStart(&start); err != nil { + return err + } + + if val.Kind() == reflect.Struct { + err = p.marshalStruct(tinfo, val) + } else { + s, b, err1 := p.marshalSimple(typ, val) + if err1 != nil { + err = err1 + } else if b != nil { + EscapeText(p, b) + } else { + p.EscapeString(s) + } + } + if err != nil { + return err + } + + if err := p.writeEnd(start.Name); err != nil { + return err + } + + return p.cachedWriteError() +} + +// marshalAttr marshals an attribute with the given name and value, adding to start.Attr. +func (p *printer) marshalAttr(start *StartElement, name Name, val reflect.Value) error { + if val.CanInterface() && val.Type().Implements(marshalerAttrType) { + attr, err := val.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerAttrType) { + attr, err := pv.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + } + + if val.CanInterface() && val.Type().Implements(textMarshalerType) { + text, err := val.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + text, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + } + + // Dereference or skip nil pointer, interface values. + switch val.Kind() { + case reflect.Pointer, reflect.Interface: + if val.IsNil() { + return nil + } + val = val.Elem() + } + + // Walk slices. + if val.Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + n := val.Len() + for i := 0; i < n; i++ { + if err := p.marshalAttr(start, name, val.Index(i)); err != nil { + return err + } + } + return nil + } + + if val.Type() == attrType { + start.Attr = append(start.Attr, val.Interface().(Attr)) + return nil + } + + s, b, err := p.marshalSimple(val.Type(), val) + if err != nil { + return err + } + if b != nil { + s = string(b) + } + start.Attr = append(start.Attr, Attr{name, s}) + return nil +} + +// defaultStart returns the default start element to use, +// given the reflect type, field info, and start template. +func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { + var start StartElement + // Precedence for the XML element name is as above, + // except that we do not look inside structs for the first field. + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if finfo != nil && finfo.name != "" { + start.Name.Local = finfo.name + start.Name.Space = finfo.xmlns + } else if typ.Name() != "" { + start.Name.Local = typ.Name() + } else { + // Must be a pointer to a named type, + // since it has the Marshaler methods. + start.Name.Local = typ.Elem().Name() + } + return start +} + +// marshalInterface marshals a Marshaler interface value. +func (p *printer) marshalInterface(val Marshaler, start StartElement) error { + // Push a marker onto the tag stack so that MarshalXML + // cannot close the XML tags that it did not open. + p.tags = append(p.tags, Name{}) + n := len(p.tags) + + err := val.MarshalXML(p.encoder, start) + if err != nil { + return err + } + + // Make sure MarshalXML closed all its tags. p.tags[n-1] is the mark. + if len(p.tags) > n { + return fmt.Errorf("xml: %s.MarshalXML wrote invalid XML: <%s> not closed", receiverType(val), p.tags[len(p.tags)-1].Local) + } + p.tags = p.tags[:n-1] + return nil +} + +// marshalTextInterface marshals a TextMarshaler interface value. +func (p *printer) marshalTextInterface(val encoding.TextMarshaler, start StartElement) error { + if err := p.writeStart(&start); err != nil { + return err + } + text, err := val.MarshalText() + if err != nil { + return err + } + EscapeText(p, text) + return p.writeEnd(start.Name) +} + +// writeStart writes the given start element. +func (p *printer) writeStart(start *StartElement) error { + if start.Name.Local == "" { + return fmt.Errorf("xml: start tag with no name") + } + + p.tags = append(p.tags, start.Name) + p.markPrefix() + + p.writeIndent(1) + p.WriteByte('<') + p.WriteString(start.Name.Local) + + if start.Name.Space != "" { + p.WriteString(` xmlns="`) + p.EscapeString(start.Name.Space) + p.WriteByte('"') + } + + // Attributes + for _, attr := range start.Attr { + name := attr.Name + if name.Local == "" { + continue + } + p.WriteByte(' ') + if name.Space != "" { + p.WriteString(p.createAttrPrefix(name.Space)) + p.WriteByte(':') + } + p.WriteString(name.Local) + p.WriteString(`="`) + p.EscapeString(attr.Value) + p.WriteByte('"') + } + p.WriteByte('>') + return nil +} + +func (p *printer) writeEnd(name Name) error { + if name.Local == "" { + return fmt.Errorf("xml: end tag with no name") + } + if len(p.tags) == 0 || p.tags[len(p.tags)-1].Local == "" { + return fmt.Errorf("xml: end tag </%s> without start tag", name.Local) + } + if top := p.tags[len(p.tags)-1]; top != name { + if top.Local != name.Local { + return fmt.Errorf("xml: end tag </%s> does not match start tag <%s>", name.Local, top.Local) + } + return fmt.Errorf("xml: end tag </%s> in namespace %s does not match start tag <%s> in namespace %s", name.Local, name.Space, top.Local, top.Space) + } + p.tags = p.tags[:len(p.tags)-1] + + p.writeIndent(-1) + p.WriteByte('<') + p.WriteByte('/') + p.WriteString(name.Local) + p.WriteByte('>') + p.popPrefix() + return nil +} + +func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) (string, []byte, error) { + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return strconv.FormatInt(val.Int(), 10), nil, nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return strconv.FormatUint(val.Uint(), 10), nil, nil + case reflect.Float32, reflect.Float64: + return strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()), nil, nil + case reflect.String: + return val.String(), nil, nil + case reflect.Bool: + return strconv.FormatBool(val.Bool()), nil, nil + case reflect.Array: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // [...]byte + var bytes []byte + if val.CanAddr() { + bytes = val.Bytes() + } else { + bytes = make([]byte, val.Len()) + reflect.Copy(reflect.ValueOf(bytes), val) + } + return "", bytes, nil + case reflect.Slice: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // []byte + return "", val.Bytes(), nil + } + return "", nil, &UnsupportedTypeError{typ} +} + +var ddBytes = []byte("--") + +// indirect drills into interfaces and pointers, returning the pointed-at value. +// If it encounters a nil interface or pointer, indirect returns that nil value. +// This can turn into an infinite loop given a cyclic chain, +// but it matches the Go 1 behavior. +func indirect(vf reflect.Value) reflect.Value { + for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Pointer { + if vf.IsNil() { + return vf + } + vf = vf.Elem() + } + return vf +} + +func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { + s := parentStack{p: p} + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr != 0 { + continue + } + vf := finfo.value(val, dontInitNilPointers) + if !vf.IsValid() { + // The field is behind an anonymous struct field that's + // nil. Skip it. + continue + } + + switch finfo.flags & fMode { + case fCDATA, fCharData: + emit := EscapeText + if finfo.flags&fMode == fCDATA { + emit = emitCDATA + } + if err := s.trim(finfo.parents); err != nil { + return err + } + if vf.CanInterface() && vf.Type().Implements(textMarshalerType) { + data, err := vf.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + if vf.CanAddr() { + pv := vf.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + data, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + } + + var scratch [64]byte + vf = indirect(vf) + switch vf.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil { + return err + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if err := emit(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)); err != nil { + return err + } + case reflect.Float32, reflect.Float64: + if err := emit(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())); err != nil { + return err + } + case reflect.Bool: + if err := emit(p, strconv.AppendBool(scratch[:0], vf.Bool())); err != nil { + return err + } + case reflect.String: + if err := emit(p, []byte(vf.String())); err != nil { + return err + } + case reflect.Slice: + if elem, ok := vf.Interface().([]byte); ok { + if err := emit(p, elem); err != nil { + return err + } + } + } + continue + + case fComment: + if err := s.trim(finfo.parents); err != nil { + return err + } + vf = indirect(vf) + k := vf.Kind() + if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) { + return fmt.Errorf("xml: bad type for comment field of %s", val.Type()) + } + if vf.Len() == 0 { + continue + } + p.writeIndent(0) + p.WriteString("<!--") + dashDash := false + dashLast := false + switch k { + case reflect.String: + s := vf.String() + dashDash = strings.Contains(s, "--") + dashLast = s[len(s)-1] == '-' + if !dashDash { + p.WriteString(s) + } + case reflect.Slice: + b := vf.Bytes() + dashDash = bytes.Contains(b, ddBytes) + dashLast = b[len(b)-1] == '-' + if !dashDash { + p.Write(b) + } + default: + panic("can't happen") + } + if dashDash { + return fmt.Errorf(`xml: comments must not contain "--"`) + } + if dashLast { + // "--->" is invalid grammar. Make it "- -->" + p.WriteByte(' ') + } + p.WriteString("-->") + continue + + case fInnerXML: + vf = indirect(vf) + iface := vf.Interface() + switch raw := iface.(type) { + case []byte: + p.Write(raw) + continue + case string: + p.WriteString(raw) + continue + } + + case fElement, fElement | fAny: + if err := s.trim(finfo.parents); err != nil { + return err + } + if len(finfo.parents) > len(s.stack) { + if vf.Kind() != reflect.Pointer && vf.Kind() != reflect.Interface || !vf.IsNil() { + if err := s.push(finfo.parents[len(s.stack):]); err != nil { + return err + } + } + } + } + if err := p.marshalValue(vf, finfo, nil); err != nil { + return err + } + } + s.trim(nil) + return p.cachedWriteError() +} + +// Write implements io.Writer +func (p *printer) Write(b []byte) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.Write(b) + } + return n, p.err +} + +// WriteString implements io.StringWriter +func (p *printer) WriteString(s string) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.WriteString(s) + } + return n, p.err +} + +// WriteByte implements io.ByteWriter +func (p *printer) WriteByte(c byte) error { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + p.err = p.w.WriteByte(c) + } + return p.err +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (p *printer) Close() error { + if p.closed { + return nil + } + p.closed = true + if err := p.w.Flush(); err != nil { + return err + } + if len(p.tags) > 0 { + return fmt.Errorf("unclosed tag <%s>", p.tags[len(p.tags)-1].Local) + } + return nil +} + +// return the bufio Writer's cached write error +func (p *printer) cachedWriteError() error { + _, err := p.Write(nil) + return err +} + +func (p *printer) writeIndent(depthDelta int) { + if len(p.prefix) == 0 && len(p.indent) == 0 { + return + } + if depthDelta < 0 { + p.depth-- + if p.indentedIn { + p.indentedIn = false + return + } + p.indentedIn = false + } + if p.putNewline { + p.WriteByte('\n') + } else { + p.putNewline = true + } + if len(p.prefix) > 0 { + p.WriteString(p.prefix) + } + if len(p.indent) > 0 { + for i := 0; i < p.depth; i++ { + p.WriteString(p.indent) + } + } + if depthDelta > 0 { + p.depth++ + p.indentedIn = true + } +} + +type parentStack struct { + p *printer + stack []string +} + +// trim updates the XML context to match the longest common prefix of the stack +// and the given parents. A closing tag will be written for every parent +// popped. Passing a zero slice or nil will close all the elements. +func (s *parentStack) trim(parents []string) error { + split := 0 + for ; split < len(parents) && split < len(s.stack); split++ { + if parents[split] != s.stack[split] { + break + } + } + for i := len(s.stack) - 1; i >= split; i-- { + if err := s.p.writeEnd(Name{Local: s.stack[i]}); err != nil { + return err + } + } + s.stack = s.stack[:split] + return nil +} + +// push adds parent elements to the stack and writes open tags. +func (s *parentStack) push(parents []string) error { + for i := 0; i < len(parents); i++ { + if err := s.p.writeStart(&StartElement{Name: Name{Local: parents[i]}}); err != nil { + return err + } + } + s.stack = append(s.stack, parents...) + return nil +} + +// UnsupportedTypeError is returned when [Marshal] encounters a type +// that cannot be converted into XML. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "xml: unsupported type: " + e.Type.String() +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.Interface, reflect.Pointer: + return v.IsZero() + } + return false +} diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go new file mode 100644 index 0000000..f6bcc7f --- /dev/null +++ b/src/encoding/xml/marshal_test.go @@ -0,0 +1,2591 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +type DriveType int + +const ( + HyperDrive DriveType = iota + ImprobabilityDrive +) + +type Passenger struct { + Name []string `xml:"name"` + Weight float32 `xml:"weight"` +} + +type Ship struct { + XMLName struct{} `xml:"spaceship"` + + Name string `xml:"name,attr"` + Pilot string `xml:"pilot,attr"` + Drive DriveType `xml:"drive"` + Age uint `xml:"age"` + Passenger []*Passenger `xml:"passenger"` + secret string +} + +type NamedType string + +type Port struct { + XMLName struct{} `xml:"port"` + Type string `xml:"type,attr,omitempty"` + Comment string `xml:",comment"` + Number string `xml:",chardata"` +} + +type Domain struct { + XMLName struct{} `xml:"domain"` + Country string `xml:",attr,omitempty"` + Name []byte `xml:",chardata"` + Comment []byte `xml:",comment"` +} + +type Book struct { + XMLName struct{} `xml:"book"` + Title string `xml:",chardata"` +} + +type Event struct { + XMLName struct{} `xml:"event"` + Year int `xml:",chardata"` +} + +type Movie struct { + XMLName struct{} `xml:"movie"` + Length uint `xml:",chardata"` +} + +type Pi struct { + XMLName struct{} `xml:"pi"` + Approximation float32 `xml:",chardata"` +} + +type Universe struct { + XMLName struct{} `xml:"universe"` + Visible float64 `xml:",chardata"` +} + +type Particle struct { + XMLName struct{} `xml:"particle"` + HasMass bool `xml:",chardata"` +} + +type Departure struct { + XMLName struct{} `xml:"departure"` + When time.Time `xml:",chardata"` +} + +type SecretAgent struct { + XMLName struct{} `xml:"agent"` + Handle string `xml:"handle,attr"` + Identity string + Obfuscate string `xml:",innerxml"` +} + +type NestedItems struct { + XMLName struct{} `xml:"result"` + Items []string `xml:">item"` + Item1 []string `xml:"Items>item1"` +} + +type NestedOrder struct { + XMLName struct{} `xml:"result"` + Field1 string `xml:"parent>c"` + Field2 string `xml:"parent>b"` + Field3 string `xml:"parent>a"` +} + +type MixedNested struct { + XMLName struct{} `xml:"result"` + A string `xml:"parent1>a"` + B string `xml:"b"` + C string `xml:"parent1>parent2>c"` + D string `xml:"parent1>d"` +} + +type NilTest struct { + A any `xml:"parent1>parent2>a"` + B any `xml:"parent1>b"` + C any `xml:"parent1>parent2>c"` +} + +type Service struct { + XMLName struct{} `xml:"service"` + Domain *Domain `xml:"host>domain"` + Port *Port `xml:"host>port"` + Extra1 any + Extra2 any `xml:"host>extra2"` +} + +var nilStruct *Ship + +type EmbedA struct { + EmbedC + EmbedB EmbedB + FieldA string + embedD +} + +type EmbedB struct { + FieldB string + *EmbedC +} + +type EmbedC struct { + FieldA1 string `xml:"FieldA>A1"` + FieldA2 string `xml:"FieldA>A2"` + FieldB string + FieldC string +} + +type embedD struct { + fieldD string + FieldE string // Promoted and visible when embedD is embedded. +} + +type NameCasing struct { + XMLName struct{} `xml:"casing"` + Xy string + XY string + XyA string `xml:"Xy,attr"` + XYA string `xml:"XY,attr"` +} + +type NamePrecedence struct { + XMLName Name `xml:"Parent"` + FromTag XMLNameWithoutTag `xml:"InTag"` + FromNameVal XMLNameWithoutTag + FromNameTag XMLNameWithTag + InFieldName string +} + +type XMLNameWithTag struct { + XMLName Name `xml:"InXMLNameTag"` + Value string `xml:",chardata"` +} + +type XMLNameWithoutTag struct { + XMLName Name + Value string `xml:",chardata"` +} + +type NameInField struct { + Foo Name `xml:"ns foo"` +} + +type AttrTest struct { + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type AttrsTest struct { + Attrs []Attr `xml:",any,attr"` + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type OmitAttrTest struct { + Int int `xml:",attr,omitempty"` + Named int `xml:"int,attr,omitempty"` + Float float64 `xml:",attr,omitempty"` + Uint8 uint8 `xml:",attr,omitempty"` + Bool bool `xml:",attr,omitempty"` + Str string `xml:",attr,omitempty"` + Bytes []byte `xml:",attr,omitempty"` + PStr *string `xml:",attr,omitempty"` +} + +type OmitFieldTest struct { + Int int `xml:",omitempty"` + Named int `xml:"int,omitempty"` + Float float64 `xml:",omitempty"` + Uint8 uint8 `xml:",omitempty"` + Bool bool `xml:",omitempty"` + Str string `xml:",omitempty"` + Bytes []byte `xml:",omitempty"` + PStr *string `xml:",omitempty"` + Ptr *PresenceTest `xml:",omitempty"` +} + +type AnyTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField AnyHolder `xml:",any"` +} + +type AnyOmitTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField *AnyHolder `xml:",any,omitempty"` +} + +type AnySliceTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField []AnyHolder `xml:",any"` +} + +type AnyHolder struct { + XMLName Name + XML string `xml:",innerxml"` +} + +type RecurseA struct { + A string + B *RecurseB +} + +type RecurseB struct { + A *RecurseA + B string +} + +type PresenceTest struct { + Exists *struct{} +} + +type IgnoreTest struct { + PublicSecret string `xml:"-"` +} + +type MyBytes []byte + +type Data struct { + Bytes []byte + Attr []byte `xml:",attr"` + Custom MyBytes +} + +type Plain struct { + V any +} + +type MyInt int + +type EmbedInt struct { + MyInt +} + +type Strings struct { + X []string `xml:"A>B,omitempty"` +} + +type PointerFieldsTest struct { + XMLName Name `xml:"dummy"` + Name *string `xml:"name,attr"` + Age *uint `xml:"age,attr"` + Empty *string `xml:"empty,attr"` + Contents *string `xml:",chardata"` +} + +type ChardataEmptyTest struct { + XMLName Name `xml:"test"` + Contents *string `xml:",chardata"` +} + +type PointerAnonFields struct { + *MyInt + *NamedType +} + +type MyMarshalerTest struct { +} + +var _ Marshaler = (*MyMarshalerTest)(nil) + +func (m *MyMarshalerTest) MarshalXML(e *Encoder, start StartElement) error { + e.EncodeToken(start) + e.EncodeToken(CharData([]byte("hello world"))) + e.EncodeToken(EndElement{start.Name}) + return nil +} + +type MyMarshalerAttrTest struct { +} + +var _ MarshalerAttr = (*MyMarshalerAttrTest)(nil) + +func (m *MyMarshalerAttrTest) MarshalXMLAttr(name Name) (Attr, error) { + return Attr{name, "hello world"}, nil +} + +func (m *MyMarshalerAttrTest) UnmarshalXMLAttr(attr Attr) error { + return nil +} + +type MarshalerStruct struct { + Foo MyMarshalerAttrTest `xml:",attr"` +} + +type InnerStruct struct { + XMLName Name `xml:"testns outer"` +} + +type OuterStruct struct { + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterNamedStruct struct { + InnerStruct + XMLName Name `xml:"outerns test"` + IntAttr int `xml:"int,attr"` +} + +type OuterNamedOrderedStruct struct { + XMLName Name `xml:"outerns test"` + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterOuterStruct struct { + OuterStruct +} + +type NestedAndChardata struct { + AB []string `xml:"A>B"` + Chardata string `xml:",chardata"` +} + +type NestedAndComment struct { + AB []string `xml:"A>B"` + Comment string `xml:",comment"` +} + +type CDataTest struct { + Chardata string `xml:",cdata"` +} + +type NestedAndCData struct { + AB []string `xml:"A>B"` + CDATA string `xml:",cdata"` +} + +func ifaceptr(x any) any { + return &x +} + +func stringptr(x string) *string { + return &x +} + +type T1 struct{} +type T2 struct{} + +type IndirComment struct { + T1 T1 + Comment *string `xml:",comment"` + T2 T2 +} + +type DirectComment struct { + T1 T1 + Comment string `xml:",comment"` + T2 T2 +} + +type IfaceComment struct { + T1 T1 + Comment any `xml:",comment"` + T2 T2 +} + +type IndirChardata struct { + T1 T1 + Chardata *string `xml:",chardata"` + T2 T2 +} + +type DirectChardata struct { + T1 T1 + Chardata string `xml:",chardata"` + T2 T2 +} + +type IfaceChardata struct { + T1 T1 + Chardata any `xml:",chardata"` + T2 T2 +} + +type IndirCDATA struct { + T1 T1 + CDATA *string `xml:",cdata"` + T2 T2 +} + +type DirectCDATA struct { + T1 T1 + CDATA string `xml:",cdata"` + T2 T2 +} + +type IfaceCDATA struct { + T1 T1 + CDATA any `xml:",cdata"` + T2 T2 +} + +type IndirInnerXML struct { + T1 T1 + InnerXML *string `xml:",innerxml"` + T2 T2 +} + +type DirectInnerXML struct { + T1 T1 + InnerXML string `xml:",innerxml"` + T2 T2 +} + +type IfaceInnerXML struct { + T1 T1 + InnerXML any `xml:",innerxml"` + T2 T2 +} + +type IndirElement struct { + T1 T1 + Element *string + T2 T2 +} + +type DirectElement struct { + T1 T1 + Element string + T2 T2 +} + +type IfaceElement struct { + T1 T1 + Element any + T2 T2 +} + +type IndirOmitEmpty struct { + T1 T1 + OmitEmpty *string `xml:",omitempty"` + T2 T2 +} + +type DirectOmitEmpty struct { + T1 T1 + OmitEmpty string `xml:",omitempty"` + T2 T2 +} + +type IfaceOmitEmpty struct { + T1 T1 + OmitEmpty any `xml:",omitempty"` + T2 T2 +} + +type IndirAny struct { + T1 T1 + Any *string `xml:",any"` + T2 T2 +} + +type DirectAny struct { + T1 T1 + Any string `xml:",any"` + T2 T2 +} + +type IfaceAny struct { + T1 T1 + Any any `xml:",any"` + T2 T2 +} + +type Generic[T any] struct { + X T +} + +var ( + nameAttr = "Sarah" + ageAttr = uint(12) + contentsAttr = "lorem ipsum" + empty = "" +) + +// Unless explicitly stated as such (or *Plain), all of the +// tests below are two-way tests. When introducing new tests, +// please try to make them two-way as well to ensure that +// marshaling and unmarshaling are as symmetrical as feasible. +var marshalTests = []struct { + Value any + ExpectXML string + MarshalOnly bool + MarshalError string + UnmarshalOnly bool + UnmarshalError string +}{ + // Test nil marshals to nothing + {Value: nil, ExpectXML: ``, MarshalOnly: true}, + {Value: nilStruct, ExpectXML: ``, MarshalOnly: true}, + + // Test value types + {Value: &Plain{true}, ExpectXML: `<Plain><V>true</V></Plain>`}, + {Value: &Plain{false}, ExpectXML: `<Plain><V>false</V></Plain>`}, + {Value: &Plain{int(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{int8(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{int16(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{int32(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint8(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint16(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint32(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{float32(1.25)}, ExpectXML: `<Plain><V>1.25</V></Plain>`}, + {Value: &Plain{float64(1.25)}, ExpectXML: `<Plain><V>1.25</V></Plain>`}, + {Value: &Plain{uintptr(0xFFDD)}, ExpectXML: `<Plain><V>65501</V></Plain>`}, + {Value: &Plain{"gopher"}, ExpectXML: `<Plain><V>gopher</V></Plain>`}, + {Value: &Plain{[]byte("gopher")}, ExpectXML: `<Plain><V>gopher</V></Plain>`}, + {Value: &Plain{"</>"}, ExpectXML: `<Plain><V></></V></Plain>`}, + {Value: &Plain{[]byte("</>")}, ExpectXML: `<Plain><V></></V></Plain>`}, + {Value: &Plain{[3]byte{'<', '/', '>'}}, ExpectXML: `<Plain><V></></V></Plain>`}, + {Value: &Plain{NamedType("potato")}, ExpectXML: `<Plain><V>potato</V></Plain>`}, + {Value: &Plain{[]int{1, 2, 3}}, ExpectXML: `<Plain><V>1</V><V>2</V><V>3</V></Plain>`}, + {Value: &Plain{[3]int{1, 2, 3}}, ExpectXML: `<Plain><V>1</V><V>2</V><V>3</V></Plain>`}, + {Value: ifaceptr(true), MarshalOnly: true, ExpectXML: `<bool>true</bool>`}, + + // Test time. + { + Value: &Plain{time.Unix(1e9, 123456789).UTC()}, + ExpectXML: `<Plain><V>2001-09-09T01:46:40.123456789Z</V></Plain>`, + }, + + // A pointer to struct{} may be used to test for an element's presence. + { + Value: &PresenceTest{new(struct{})}, + ExpectXML: `<PresenceTest><Exists></Exists></PresenceTest>`, + }, + { + Value: &PresenceTest{}, + ExpectXML: `<PresenceTest></PresenceTest>`, + }, + + // A []byte field is only nil if the element was not found. + { + Value: &Data{}, + ExpectXML: `<Data></Data>`, + UnmarshalOnly: true, + }, + { + Value: &Data{Bytes: []byte{}, Custom: MyBytes{}, Attr: []byte{}}, + ExpectXML: `<Data Attr=""><Bytes></Bytes><Custom></Custom></Data>`, + UnmarshalOnly: true, + }, + + // Check that []byte works, including named []byte types. + { + Value: &Data{Bytes: []byte("ab"), Custom: MyBytes("cd"), Attr: []byte{'v'}}, + ExpectXML: `<Data Attr="v"><Bytes>ab</Bytes><Custom>cd</Custom></Data>`, + }, + + // Test innerxml + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<redacted/>", + }, + ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`, + MarshalOnly: true, + }, + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<Identity>James Bond</Identity><redacted/>", + }, + ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`, + UnmarshalOnly: true, + }, + + // Test structs + {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `<port type="ssl">443</port>`}, + {Value: &Port{Number: "443"}, ExpectXML: `<port>443</port>`}, + {Value: &Port{Type: "<unix>"}, ExpectXML: `<port type="<unix>"></port>`}, + {Value: &Port{Number: "443", Comment: "https"}, ExpectXML: `<port><!--https-->443</port>`}, + {Value: &Port{Number: "443", Comment: "add space-"}, ExpectXML: `<port><!--add space- -->443</port>`, MarshalOnly: true}, + {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&friends</domain>`}, + {Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `<domain>google.com<!-- &friends --></domain>`}, + {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride & Prejudice</book>`}, + {Value: &Event{Year: -3114}, ExpectXML: `<event>-3114</event>`}, + {Value: &Movie{Length: 13440}, ExpectXML: `<movie>13440</movie>`}, + {Value: &Pi{Approximation: 3.14159265}, ExpectXML: `<pi>3.1415927</pi>`}, + {Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`}, + {Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`}, + {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`}, + {Value: atomValue, ExpectXML: atomXML}, + {Value: &Generic[int]{1}, ExpectXML: `<Generic><X>1</X></Generic>`}, + { + Value: &Ship{ + Name: "Heart of Gold", + Pilot: "Computer", + Age: 1, + Drive: ImprobabilityDrive, + Passenger: []*Passenger{ + { + Name: []string{"Zaphod", "Beeblebrox"}, + Weight: 7.25, + }, + { + Name: []string{"Trisha", "McMillen"}, + Weight: 5.5, + }, + { + Name: []string{"Ford", "Prefect"}, + Weight: 7, + }, + { + Name: []string{"Arthur", "Dent"}, + Weight: 6.75, + }, + }, + }, + ExpectXML: `<spaceship name="Heart of Gold" pilot="Computer">` + + `<drive>` + strconv.Itoa(int(ImprobabilityDrive)) + `</drive>` + + `<age>1</age>` + + `<passenger>` + + `<name>Zaphod</name>` + + `<name>Beeblebrox</name>` + + `<weight>7.25</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Trisha</name>` + + `<name>McMillen</name>` + + `<weight>5.5</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Ford</name>` + + `<name>Prefect</name>` + + `<weight>7</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Arthur</name>` + + `<name>Dent</name>` + + `<weight>6.75</weight>` + + `</passenger>` + + `</spaceship>`, + }, + + // Test a>b + { + Value: &NestedItems{Items: nil, Item1: nil}, + ExpectXML: `<result>` + + `<Items>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedItems{Items: []string{}, Item1: []string{}}, + ExpectXML: `<result>` + + `<Items>` + + `</Items>` + + `</result>`, + MarshalOnly: true, + }, + { + Value: &NestedItems{Items: nil, Item1: []string{"A"}}, + ExpectXML: `<result>` + + `<Items>` + + `<item1>A</item1>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: nil}, + ExpectXML: `<result>` + + `<Items>` + + `<item>A</item>` + + `<item>B</item>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}}, + ExpectXML: `<result>` + + `<Items>` + + `<item>A</item>` + + `<item>B</item>` + + `<item1>C</item1>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedOrder{Field1: "C", Field2: "B", Field3: "A"}, + ExpectXML: `<result>` + + `<parent>` + + `<c>C</c>` + + `<b>B</b>` + + `<a>A</a>` + + `</parent>` + + `</result>`, + }, + { + Value: &NilTest{A: "A", B: nil, C: "C"}, + ExpectXML: `<NilTest>` + + `<parent1>` + + `<parent2><a>A</a></parent2>` + + `<parent2><c>C</c></parent2>` + + `</parent1>` + + `</NilTest>`, + MarshalOnly: true, // Uses interface{} + }, + { + Value: &MixedNested{A: "A", B: "B", C: "C", D: "D"}, + ExpectXML: `<result>` + + `<parent1><a>A</a></parent1>` + + `<b>B</b>` + + `<parent1>` + + `<parent2><c>C</c></parent2>` + + `<d>D</d>` + + `</parent1>` + + `</result>`, + }, + { + Value: &Service{Port: &Port{Number: "80"}}, + ExpectXML: `<service><host><port>80</port></host></service>`, + }, + { + Value: &Service{}, + ExpectXML: `<service></service>`, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"}, + ExpectXML: `<service>` + + `<host><port>80</port></host>` + + `<Extra1>A</Extra1>` + + `<host><extra2>B</extra2></host>` + + `</service>`, + MarshalOnly: true, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra2: "example"}, + ExpectXML: `<service>` + + `<host><port>80</port></host>` + + `<host><extra2>example</extra2></host>` + + `</service>`, + MarshalOnly: true, + }, + { + Value: &struct { + XMLName struct{} `xml:"space top"` + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `<top xmlns="space">` + + `<x><a>a</a><b>b</b><c xmlns="space">c</c>` + + `<c xmlns="space1">c1</c>` + + `<d xmlns="space1">d1</d>` + + `</x>` + + `</top>`, + }, + { + Value: &struct { + XMLName Name + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + XMLName: Name{ + Space: "space0", + Local: "top", + }, + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `<top xmlns="space0">` + + `<x><a>a</a><b>b</b>` + + `<c xmlns="space">c</c>` + + `<c xmlns="space1">c1</c>` + + `<d xmlns="space1">d1</d>` + + `</x>` + + `</top>`, + }, + { + Value: &struct { + XMLName struct{} `xml:"top"` + B string `xml:"space x>b"` + B1 string `xml:"space1 x>b"` + }{ + B: "b", + B1: "b1", + }, + ExpectXML: `<top>` + + `<x><b xmlns="space">b</b>` + + `<b xmlns="space1">b1</b></x>` + + `</top>`, + }, + + // Test struct embedding + { + Value: &EmbedA{ + EmbedC: EmbedC{ + FieldA1: "", // Shadowed by A.A + FieldA2: "", // Shadowed by A.A + FieldB: "A.C.B", + FieldC: "A.C.C", + }, + EmbedB: EmbedB{ + FieldB: "A.B.B", + EmbedC: &EmbedC{ + FieldA1: "A.B.C.A1", + FieldA2: "A.B.C.A2", + FieldB: "", // Shadowed by A.B.B + FieldC: "A.B.C.C", + }, + }, + FieldA: "A.A", + embedD: embedD{ + FieldE: "A.D.E", + }, + }, + ExpectXML: `<EmbedA>` + + `<FieldB>A.C.B</FieldB>` + + `<FieldC>A.C.C</FieldC>` + + `<EmbedB>` + + `<FieldB>A.B.B</FieldB>` + + `<FieldA>` + + `<A1>A.B.C.A1</A1>` + + `<A2>A.B.C.A2</A2>` + + `</FieldA>` + + `<FieldC>A.B.C.C</FieldC>` + + `</EmbedB>` + + `<FieldA>A.A</FieldA>` + + `<FieldE>A.D.E</FieldE>` + + `</EmbedA>`, + }, + + // Anonymous struct pointer field which is nil + { + Value: &EmbedB{}, + ExpectXML: `<EmbedB><FieldB></FieldB></EmbedB>`, + }, + + // Other kinds of nil anonymous fields + { + Value: &PointerAnonFields{}, + ExpectXML: `<PointerAnonFields></PointerAnonFields>`, + }, + + // Test that name casing matters + { + Value: &NameCasing{Xy: "mixed", XY: "upper", XyA: "mixedA", XYA: "upperA"}, + ExpectXML: `<casing Xy="mixedA" XY="upperA"><Xy>mixed</Xy><XY>upper</XY></casing>`, + }, + + // Test the order in which the XML element name is chosen + { + Value: &NamePrecedence{ + FromTag: XMLNameWithoutTag{Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "InXMLName"}, Value: "B"}, + FromNameTag: XMLNameWithTag{Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `<Parent>` + + `<InTag>A</InTag>` + + `<InXMLName>B</InXMLName>` + + `<InXMLNameTag>C</InXMLNameTag>` + + `<InFieldName>D</InFieldName>` + + `</Parent>`, + MarshalOnly: true, + }, + { + Value: &NamePrecedence{ + XMLName: Name{Local: "Parent"}, + FromTag: XMLNameWithoutTag{XMLName: Name{Local: "InTag"}, Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "FromNameVal"}, Value: "B"}, + FromNameTag: XMLNameWithTag{XMLName: Name{Local: "InXMLNameTag"}, Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `<Parent>` + + `<InTag>A</InTag>` + + `<FromNameVal>B</FromNameVal>` + + `<InXMLNameTag>C</InXMLNameTag>` + + `<InFieldName>D</InFieldName>` + + `</Parent>`, + UnmarshalOnly: true, + }, + + // xml.Name works in a plain field as well. + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: `<NameInField><foo xmlns="ns"></foo></NameInField>`, + }, + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: `<NameInField><foo xmlns="ns"><ignore></ignore></foo></NameInField>`, + UnmarshalOnly: true, + }, + + // Marshaling zero xml.Name uses the tag or field name. + { + Value: &NameInField{}, + ExpectXML: `<NameInField><foo xmlns="ns"></foo></NameInField>`, + MarshalOnly: true, + }, + + // Test attributes + { + Value: &AttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: `<AttrTest Int="8" int="9" Float="23.5" Uint8="255"` + + ` Bool="true" Str="str" Bytes="byt"></AttrTest>`, + }, + { + Value: &AttrTest{Bytes: []byte{}}, + ExpectXML: `<AttrTest Int="0" int="0" Float="0" Uint8="0"` + + ` Bool="false" Str="" Bytes=""></AttrTest>`, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + {Name: Name{Local: "Int"}, Value: "8"}, + {Name: Name{Local: "int"}, Value: "9"}, + {Name: Name{Local: "Float"}, Value: "23.5"}, + {Name: Name{Local: "Uint8"}, Value: "255"}, + {Name: Name{Local: "Bool"}, Value: "true"}, + {Name: Name{Local: "Str"}, Value: "str"}, + {Name: Name{Local: "Bytes"}, Value: "byt"}, + }, + }, + ExpectXML: `<AttrsTest Answer="42" Int="8" int="9" Float="23.5" Uint8="255" Bool="true" Str="str" Bytes="byt" Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes=""></AttrsTest>`, + MarshalOnly: true, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + }, + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: `<AttrsTest Answer="42" Int="8" int="9" Float="23.5" Uint8="255" Bool="true" Str="str" Bytes="byt"></AttrsTest>`, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Int"}, Value: "0"}, + {Name: Name{Local: "int"}, Value: "0"}, + {Name: Name{Local: "Float"}, Value: "0"}, + {Name: Name{Local: "Uint8"}, Value: "0"}, + {Name: Name{Local: "Bool"}, Value: "false"}, + {Name: Name{Local: "Str"}}, + {Name: Name{Local: "Bytes"}}, + }, + Bytes: []byte{}, + }, + ExpectXML: `<AttrsTest Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes="" Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes=""></AttrsTest>`, + MarshalOnly: true, + }, + { + Value: &OmitAttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + }, + ExpectXML: `<OmitAttrTest Int="8" int="9" Float="23.5" Uint8="255"` + + ` Bool="true" Str="str" Bytes="byt" PStr=""></OmitAttrTest>`, + }, + { + Value: &OmitAttrTest{}, + ExpectXML: `<OmitAttrTest></OmitAttrTest>`, + }, + + // pointer fields + { + Value: &PointerFieldsTest{Name: &nameAttr, Age: &ageAttr, Contents: &contentsAttr}, + ExpectXML: `<dummy name="Sarah" age="12">lorem ipsum</dummy>`, + MarshalOnly: true, + }, + + // empty chardata pointer field + { + Value: &ChardataEmptyTest{}, + ExpectXML: `<test></test>`, + MarshalOnly: true, + }, + + // omitempty on fields + { + Value: &OmitFieldTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + Ptr: &PresenceTest{}, + }, + ExpectXML: `<OmitFieldTest>` + + `<Int>8</Int>` + + `<int>9</int>` + + `<Float>23.5</Float>` + + `<Uint8>255</Uint8>` + + `<Bool>true</Bool>` + + `<Str>str</Str>` + + `<Bytes>byt</Bytes>` + + `<PStr></PStr>` + + `<Ptr></Ptr>` + + `</OmitFieldTest>`, + }, + { + Value: &OmitFieldTest{}, + ExpectXML: `<OmitFieldTest></OmitFieldTest>`, + }, + + // Test ",any" + { + ExpectXML: `<a><nested><value>known</value></nested><other><sub>unknown</sub></other></a>`, + Value: &AnyTest{ + Nested: "known", + AnyField: AnyHolder{ + XMLName: Name{Local: "other"}, + XML: "<sub>unknown</sub>", + }, + }, + }, + { + Value: &AnyTest{Nested: "known", + AnyField: AnyHolder{ + XML: "<unknown/>", + XMLName: Name{Local: "AnyField"}, + }, + }, + ExpectXML: `<a><nested><value>known</value></nested><AnyField><unknown/></AnyField></a>`, + }, + { + ExpectXML: `<a><nested><value>b</value></nested></a>`, + Value: &AnyOmitTest{ + Nested: "b", + }, + }, + { + ExpectXML: `<a><nested><value>b</value></nested><c><d>e</d></c><g xmlns="f"><h>i</h></g></a>`, + Value: &AnySliceTest{ + Nested: "b", + AnyField: []AnyHolder{ + { + XMLName: Name{Local: "c"}, + XML: "<d>e</d>", + }, + { + XMLName: Name{Space: "f", Local: "g"}, + XML: "<h>i</h>", + }, + }, + }, + }, + { + ExpectXML: `<a><nested><value>b</value></nested></a>`, + Value: &AnySliceTest{ + Nested: "b", + }, + }, + + // Test recursive types. + { + Value: &RecurseA{ + A: "a1", + B: &RecurseB{ + A: &RecurseA{"a2", nil}, + B: "b1", + }, + }, + ExpectXML: `<RecurseA><A>a1</A><B><A><A>a2</A></A><B>b1</B></B></RecurseA>`, + }, + + // Test ignoring fields via "-" tag + { + ExpectXML: `<IgnoreTest></IgnoreTest>`, + Value: &IgnoreTest{}, + }, + { + ExpectXML: `<IgnoreTest></IgnoreTest>`, + Value: &IgnoreTest{PublicSecret: "can't tell"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IgnoreTest><PublicSecret>ignore me</PublicSecret></IgnoreTest>`, + Value: &IgnoreTest{}, + UnmarshalOnly: true, + }, + + // Test escaping. + { + ExpectXML: `<a><nested><value>dquote: "; squote: '; ampersand: &; less: <; greater: >;</value></nested><empty></empty></a>`, + Value: &AnyTest{ + Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + AnyField: AnyHolder{XMLName: Name{Local: "empty"}}, + }, + }, + { + ExpectXML: `<a><nested><value>newline: 
; cr: 
; tab: 	;</value></nested><AnyField></AnyField></a>`, + Value: &AnyTest{ + Nested: "newline: \n; cr: \r; tab: \t;", + AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}}, + }, + }, + { + ExpectXML: "<a><nested><value>1\r2\r\n3\n\r4\n5</value></nested></a>", + Value: &AnyTest{ + Nested: "1\n2\n3\n\n4\n5", + }, + UnmarshalOnly: true, + }, + { + ExpectXML: `<EmbedInt><MyInt>42</MyInt></EmbedInt>`, + Value: &EmbedInt{ + MyInt: 42, + }, + }, + // Test outputting CDATA-wrapped text. + { + ExpectXML: `<CDataTest></CDataTest>`, + Value: &CDataTest{}, + }, + { + ExpectXML: `<CDataTest><![CDATA[http://example.com/tests/1?foo=1&bar=baz]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "http://example.com/tests/1?foo=1&bar=baz", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[Literal <![CDATA[Nested]]]]><![CDATA[>!]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "Literal <![CDATA[Nested]]>!", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[<![CDATA[Nested]]]]><![CDATA[> Literal!]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "<![CDATA[Nested]]> Literal!", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[<![CDATA[Nested]]]]><![CDATA[> Literal! <![CDATA[Nested]]]]><![CDATA[> Literal!]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "<![CDATA[Nested]]> Literal! <![CDATA[Nested]]> Literal!", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[<![CDATA[<![CDATA[Nested]]]]><![CDATA[>]]]]><![CDATA[>]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "<![CDATA[<![CDATA[Nested]]>]]>", + }, + }, + + // Test omitempty with parent chain; see golang.org/issue/4168. + { + ExpectXML: `<Strings><A></A></Strings>`, + Value: &Strings{}, + }, + // Custom marshalers. + { + ExpectXML: `<MyMarshalerTest>hello world</MyMarshalerTest>`, + Value: &MyMarshalerTest{}, + }, + { + ExpectXML: `<MarshalerStruct Foo="hello world"></MarshalerStruct>`, + Value: &MarshalerStruct{}, + }, + { + ExpectXML: `<outer xmlns="testns" int="10"></outer>`, + Value: &OuterStruct{IntAttr: 10}, + }, + { + ExpectXML: `<test xmlns="outerns" int="10"></test>`, + Value: &OuterNamedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: `<test xmlns="outerns" int="10"></test>`, + Value: &OuterNamedOrderedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: `<outer xmlns="testns" int="10"></outer>`, + Value: &OuterOuterStruct{OuterStruct{IntAttr: 10}}, + }, + { + ExpectXML: `<NestedAndChardata><A><B></B><B></B></A>test</NestedAndChardata>`, + Value: &NestedAndChardata{AB: make([]string, 2), Chardata: "test"}, + }, + { + ExpectXML: `<NestedAndComment><A><B></B><B></B></A><!--test--></NestedAndComment>`, + Value: &NestedAndComment{AB: make([]string, 2), Comment: "test"}, + }, + { + ExpectXML: `<NestedAndCData><A><B></B><B></B></A><![CDATA[test]]></NestedAndCData>`, + Value: &NestedAndCData{AB: make([]string, 2), CDATA: "test"}, + }, + // Test pointer indirection in various kinds of fields. + // https://golang.org/issue/19063 + { + ExpectXML: `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: stringptr("hi")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirComment><T1></T1><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirComment><T1></T1><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IndirComment", + }, + { + ExpectXML: `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceComment><T1></T1><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IfaceComment", + }, + { + ExpectXML: `<IfaceComment><T1></T1><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectComment><T1></T1><!--hi--><T2></T2></DirectComment>`, + Value: &DirectComment{Comment: string("hi")}, + }, + { + ExpectXML: `<DirectComment><T1></T1><T2></T2></DirectComment>`, + Value: &DirectComment{Comment: string("")}, + }, + { + ExpectXML: `<IndirChardata><T1></T1>hi<T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: stringptr("hi")}, + }, + { + ExpectXML: `<IndirChardata><T1></T1><![CDATA[hi]]><T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: stringptr("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: stringptr("")}, + }, + { + ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: nil}, + MarshalOnly: true, // unmarshal leaves Chardata=stringptr("") + }, + { + ExpectXML: `<IfaceChardata><T1></T1>hi<T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceChardata><T1></T1><![CDATA[hi]]><T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<DirectChardata><T1></T1>hi<T2></T2></DirectChardata>`, + Value: &DirectChardata{Chardata: string("hi")}, + }, + { + ExpectXML: `<DirectChardata><T1></T1><![CDATA[hi]]><T2></T2></DirectChardata>`, + Value: &DirectChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: `<DirectChardata><T1></T1><T2></T2></DirectChardata>`, + Value: &DirectChardata{Chardata: string("")}, + }, + { + ExpectXML: `<IndirCDATA><T1></T1><![CDATA[hi]]><T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + }, + { + ExpectXML: `<IndirCDATA><T1></T1>hi<T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: stringptr("")}, + }, + { + ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: nil}, + MarshalOnly: true, // unmarshal leaves CDATA=stringptr("") + }, + { + ExpectXML: `<IfaceCDATA><T1></T1><![CDATA[hi]]><T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceCDATA><T1></T1>hi<T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<DirectCDATA><T1></T1><![CDATA[hi]]><T2></T2></DirectCDATA>`, + Value: &DirectCDATA{CDATA: string("hi")}, + }, + { + ExpectXML: `<DirectCDATA><T1></T1>hi<T2></T2></DirectCDATA>`, + Value: &DirectCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: `<DirectCDATA><T1></T1><T2></T2></DirectCDATA>`, + Value: &DirectCDATA{CDATA: string("")}, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: stringptr("<hi/>")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: nil}, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: "<hi/>"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: nil}, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("<hi/>")}, + MarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("<T1></T1><hi/><T2></T2>")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("<T1></T1><T2></T2>")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirElement><T1></T1><Element>hi</Element><T2></T2></IndirElement>`, + Value: &IndirElement{Element: stringptr("hi")}, + }, + { + ExpectXML: `<IndirElement><T1></T1><Element></Element><T2></T2></IndirElement>`, + Value: &IndirElement{Element: stringptr("")}, + }, + { + ExpectXML: `<IndirElement><T1></T1><T2></T2></IndirElement>`, + Value: &IndirElement{Element: nil}, + }, + { + ExpectXML: `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: nil}, + }, + { + ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectElement><T1></T1><Element>hi</Element><T2></T2></DirectElement>`, + Value: &DirectElement{Element: string("hi")}, + }, + { + ExpectXML: `<DirectElement><T1></T1><Element></Element><T2></T2></DirectElement>`, + Value: &DirectElement{Element: string("")}, + }, + { + ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("hi")}, + }, + { + // Note: Changed in Go 1.8 to include <OmitEmpty> element (because x.OmitEmpty != nil). + ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirOmitEmpty><T1></T1><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></DirectOmitEmpty>`, + Value: &DirectOmitEmpty{OmitEmpty: string("hi")}, + }, + { + ExpectXML: `<DirectOmitEmpty><T1></T1><T2></T2></DirectOmitEmpty>`, + Value: &DirectOmitEmpty{OmitEmpty: string("")}, + }, + { + ExpectXML: `<IndirAny><T1></T1><Any>hi</Any><T2></T2></IndirAny>`, + Value: &IndirAny{Any: stringptr("hi")}, + }, + { + ExpectXML: `<IndirAny><T1></T1><Any></Any><T2></T2></IndirAny>`, + Value: &IndirAny{Any: stringptr("")}, + }, + { + ExpectXML: `<IndirAny><T1></T1><T2></T2></IndirAny>`, + Value: &IndirAny{Any: nil}, + }, + { + ExpectXML: `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: nil}, + }, + { + ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectAny><T1></T1><Any>hi</Any><T2></T2></DirectAny>`, + Value: &DirectAny{Any: string("hi")}, + }, + { + ExpectXML: `<DirectAny><T1></T1><Any></Any><T2></T2></DirectAny>`, + Value: &DirectAny{Any: string("")}, + }, + { + ExpectXML: `<IndirFoo><T1></T1><Foo>hi</Foo><T2></T2></IndirFoo>`, + Value: &IndirAny{Any: stringptr("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirFoo><T1></T1><Foo></Foo><T2></T2></IndirFoo>`, + Value: &IndirAny{Any: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirFoo><T1></T1><T2></T2></IndirFoo>`, + Value: &IndirAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceFoo><T1></T1><Foo>hi</Foo><T2></T2></IfaceFoo>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectFoo><T1></T1><Foo>hi</Foo><T2></T2></DirectFoo>`, + Value: &DirectAny{Any: string("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectFoo><T1></T1><Foo></Foo><T2></T2></DirectFoo>`, + Value: &DirectAny{Any: string("")}, + UnmarshalOnly: true, + }, +} + +func TestMarshal(t *testing.T) { + for idx, test := range marshalTests { + if test.UnmarshalOnly { + continue + } + + t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) { + data, err := Marshal(test.Value) + if err != nil { + if test.MarshalError == "" { + t.Errorf("marshal(%#v): %s", test.Value, err) + return + } + if !strings.Contains(err.Error(), test.MarshalError) { + t.Errorf("marshal(%#v): %s, want %q", test.Value, err, test.MarshalError) + } + return + } + if test.MarshalError != "" { + t.Errorf("Marshal succeeded, want error %q", test.MarshalError) + return + } + if got, want := string(data), test.ExpectXML; got != want { + if strings.Contains(want, "\n") { + t.Errorf("marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", test.Value, got, want) + } else { + t.Errorf("marshal(%#v):\nhave %#q\nwant %#q", test.Value, got, want) + } + } + }) + } +} + +type AttrParent struct { + X string `xml:"X>Y,attr"` +} + +type BadAttr struct { + Name map[string]string `xml:"name,attr"` +} + +var marshalErrorTests = []struct { + Value any + Err string + Kind reflect.Kind +}{ + { + Value: make(chan bool), + Err: "xml: unsupported type: chan bool", + Kind: reflect.Chan, + }, + { + Value: map[string]string{ + "question": "What do you get when you multiply six by nine?", + "answer": "42", + }, + Err: "xml: unsupported type: map[string]string", + Kind: reflect.Map, + }, + { + Value: map[*Ship]bool{nil: false}, + Err: "xml: unsupported type: map[*xml.Ship]bool", + Kind: reflect.Map, + }, + { + Value: &Domain{Comment: []byte("f--bar")}, + Err: `xml: comments must not contain "--"`, + }, + // Reject parent chain with attr, never worked; see golang.org/issue/5033. + { + Value: &AttrParent{}, + Err: `xml: X>Y chain not valid with attr flag`, + }, + { + Value: BadAttr{map[string]string{"X": "Y"}}, + Err: `xml: unsupported type: map[string]string`, + }, +} + +var marshalIndentTests = []struct { + Value any + Prefix string + Indent string + ExpectXML string +}{ + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<redacted/>", + }, + Prefix: "", + Indent: "\t", + ExpectXML: fmt.Sprintf("<agent handle=\"007\">\n\t<Identity>James Bond</Identity><redacted/>\n</agent>"), + }, +} + +func TestMarshalErrors(t *testing.T) { + for idx, test := range marshalErrorTests { + data, err := Marshal(test.Value) + if err == nil { + t.Errorf("#%d: marshal(%#v) = [success] %q, want error %v", idx, test.Value, data, test.Err) + continue + } + if err.Error() != test.Err { + t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err) + } + if test.Kind != reflect.Invalid { + if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind { + t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind) + } + } + } +} + +// Do invertibility testing on the various structures that we test +func TestUnmarshal(t *testing.T) { + for i, test := range marshalTests { + if test.MarshalOnly { + continue + } + if _, ok := test.Value.(*Plain); ok { + continue + } + if test.ExpectXML == `<top>`+ + `<x><b xmlns="space">b</b>`+ + `<b xmlns="space1">b1</b></x>`+ + `</top>` { + // TODO(rogpeppe): re-enable this test in + // https://go-review.googlesource.com/#/c/5910/ + continue + } + + vt := reflect.TypeOf(test.Value) + dest := reflect.New(vt.Elem()).Interface() + err := Unmarshal([]byte(test.ExpectXML), dest) + + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + switch fix := dest.(type) { + case *Feed: + fix.Author.InnerXML = "" + for i := range fix.Entry { + fix.Entry[i].Author.InnerXML = "" + } + } + + if err != nil { + if test.UnmarshalError == "" { + t.Errorf("unmarshal(%#v): %s", test.ExpectXML, err) + return + } + if !strings.Contains(err.Error(), test.UnmarshalError) { + t.Errorf("unmarshal(%#v): %s, want %q", test.ExpectXML, err, test.UnmarshalError) + } + return + } + if got, want := dest, test.Value; !reflect.DeepEqual(got, want) { + t.Errorf("unmarshal(%q):\nhave %#v\nwant %#v", test.ExpectXML, got, want) + } + }) + } +} + +func TestMarshalIndent(t *testing.T) { + for i, test := range marshalIndentTests { + data, err := MarshalIndent(test.Value, test.Prefix, test.Indent) + if err != nil { + t.Errorf("#%d: Error: %s", i, err) + continue + } + if got, want := string(data), test.ExpectXML; got != want { + t.Errorf("#%d: MarshalIndent:\nGot:%s\nWant:\n%s", i, got, want) + } + } +} + +type limitedBytesWriter struct { + w io.Writer + remain int // until writes fail +} + +func (lw *limitedBytesWriter) Write(p []byte) (n int, err error) { + if lw.remain <= 0 { + println("error") + return 0, errors.New("write limit hit") + } + if len(p) > lw.remain { + p = p[:lw.remain] + n, _ = lw.w.Write(p) + lw.remain = 0 + return n, errors.New("write limit hit") + } + n, err = lw.w.Write(p) + lw.remain -= n + return n, err +} + +func TestMarshalWriteErrors(t *testing.T) { + var buf bytes.Buffer + const writeCap = 1024 + w := &limitedBytesWriter{&buf, writeCap} + enc := NewEncoder(w) + var err error + var i int + const n = 4000 + for i = 1; i <= n; i++ { + err = enc.Encode(&Passenger{ + Name: []string{"Alice", "Bob"}, + Weight: 5, + }) + if err != nil { + break + } + } + if err == nil { + t.Error("expected an error") + } + if i == n { + t.Errorf("expected to fail before the end") + } + if buf.Len() != writeCap { + t.Errorf("buf.Len() = %d; want %d", buf.Len(), writeCap) + } +} + +func TestMarshalWriteIOErrors(t *testing.T) { + enc := NewEncoder(errWriter{}) + + expectErr := "unwritable" + err := enc.Encode(&Passenger{}) + if err == nil || err.Error() != expectErr { + t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + } +} + +func TestMarshalFlush(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(CharData("hello world")); err != nil { + t.Fatalf("enc.EncodeToken: %v", err) + } + if buf.Len() > 0 { + t.Fatalf("enc.EncodeToken caused actual write: %q", buf.String()) + } + if err := enc.Flush(); err != nil { + t.Fatalf("enc.Flush: %v", err) + } + if buf.String() != "hello world" { + t.Fatalf("after enc.Flush, buf.String() = %q, want %q", buf.String(), "hello world") + } +} + +func BenchmarkMarshal(b *testing.B) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Marshal(atomValue) + } + }) +} + +func BenchmarkUnmarshal(b *testing.B) { + b.ReportAllocs() + xml := []byte(atomXML) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Unmarshal(xml, &Feed{}) + } + }) +} + +// golang.org/issue/6556 +func TestStructPointerMarshal(t *testing.T) { + type A struct { + XMLName string `xml:"a"` + B []any + } + type C struct { + XMLName Name + Value string `xml:"value"` + } + + a := new(A) + a.B = append(a.B, &C{ + XMLName: Name{Local: "c"}, + Value: "x", + }) + + b, err := Marshal(a) + if err != nil { + t.Fatal(err) + } + if x := string(b); x != "<a><c><value>x</value></c></a>" { + t.Fatal(x) + } + var v A + err = Unmarshal(b, &v) + if err != nil { + t.Fatal(err) + } +} + +var encodeTokenTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "start element with name space", + toks: []Token{ + StartElement{Name{"space", "local"}, nil}, + }, + want: `<local xmlns="space">`, +}, { + desc: "start element with no name", + toks: []Token{ + StartElement{Name{"space", ""}, nil}, + }, + err: "xml: start tag with no name", +}, { + desc: "end element with no name", + toks: []Token{ + EndElement{Name{"space", ""}}, + }, + err: "xml: end tag with no name", +}, { + desc: "char data", + toks: []Token{ + CharData("foo"), + }, + want: `foo`, +}, { + desc: "char data with escaped chars", + toks: []Token{ + CharData(" \t\n"), + }, + want: " 	\n", +}, { + desc: "comment", + toks: []Token{ + Comment("foo"), + }, + want: `<!--foo-->`, +}, { + desc: "comment with invalid content", + toks: []Token{ + Comment("foo-->"), + }, + err: "xml: EncodeToken of Comment containing --> marker", +}, { + desc: "proc instruction", + toks: []Token{ + ProcInst{"Target", []byte("Instruction")}, + }, + want: `<?Target Instruction?>`, +}, { + desc: "proc instruction with empty target", + toks: []Token{ + ProcInst{"", []byte("Instruction")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "proc instruction with bad content", + toks: []Token{ + ProcInst{"", []byte("Instruction?>")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: `<!foo>`, +}, { + desc: "more complex directive", + toks: []Token{ + Directive("DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]"), + }, + want: `<!DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]>`, +}, { + desc: "directive instruction with bad name", + toks: []Token{ + Directive("foo>"), + }, + err: "xml: EncodeToken of Directive containing wrong < or > markers", +}, { + desc: "end tag without start tag", + toks: []Token{ + EndElement{Name{"foo", "bar"}}, + }, + err: "xml: end tag </bar> without start tag", +}, { + desc: "mismatching end tag local name", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "bar"}}, + }, + err: "xml: end tag </bar> does not match start tag <foo>", + want: `<foo>`, +}, { + desc: "mismatching end tag namespace", + toks: []Token{ + StartElement{Name{"space", "foo"}, nil}, + EndElement{Name{"another", "foo"}}, + }, + err: "xml: end tag </foo> in namespace another does not match start tag <foo> in namespace space", + want: `<foo xmlns="space">`, +}, { + desc: "start element with explicit namespace", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + }}, + }, + want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value">`, +}, { + desc: "start element with explicit namespace and colliding prefix", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + {Name{"x", "bar"}, "other"}, + }}, + }, + want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value" xmlns:x="x" x:bar="other">`, +}, { + desc: "start element using previously defined namespace", + toks: []Token{ + StartElement{Name{"", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"space", "x"}, "y"}, + }}, + }, + want: `<local xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns:space="space" space:x="y">`, +}, { + desc: "nested name space with same prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space1"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space2"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + EndElement{Name{"", "foo"}}, + EndElement{Name{"", "foo"}}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space1"><foo _xmlns:x="space2"><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value"></foo></foo><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value">`, +}, { + desc: "start element defining several prefixes for the same name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "a"}, "space"}, + {Name{"xmlns", "b"}, "space"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:a="space" _xmlns:b="space" xmlns:space="space" space:x="value">`, +}, { + desc: "nested element redefines name space", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" _xmlns:y="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element creates alias for default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:y="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element defines default name space with existing prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element uses empty attribute name space when default ns defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" attr="value">`, +}, { + desc: "redefine xmlns", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"foo", "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns:foo="foo" foo:xmlns="space">`, +}, { + desc: "xmlns with explicit name space #1", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xml", "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns="space" xmlns:_xml="xml" _xml:xmlns="space">`, +}, { + desc: "xmlns with explicit name space #2", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{xmlURL, "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns="space" xml:xmlns="space">`, +}, { + desc: "empty name space declaration is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "foo"}, ""}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:foo="">`, +}, { + desc: "attribute with no name is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", ""}, "value"}, + }}, + }, + want: `<foo>`, +}, { + desc: "namespace URL with non-valid name", + toks: []Token{ + StartElement{Name{"/34", "foo"}, []Attr{ + {Name{"/34", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="/34" xmlns:_="/34" _:x="value">`, +}, { + desc: "nested element resets default namespace to empty", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, ""}, + {Name{"", "x"}, "value"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="" x="value" xmlns:space="space" space:x="value">`, +}, { + desc: "nested element requires empty default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, nil}, + }, + want: `<foo xmlns="space" xmlns="space"><foo>`, +}, { + desc: "attribute uses name space from xmlns", + toks: []Token{ + StartElement{Name{"some/space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + {Name{"some/space", "other"}, "other value"}, + }}, + }, + want: `<foo xmlns="some/space" attr="value" xmlns:space="some/space" space:other="other value">`, +}, { + desc: "default name space should not be used by attributes", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"xmlns", "bar"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: `<foo xmlns="space" xmlns="space" xmlns:_xmlns="xmlns" _xmlns:bar="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`, +}, { + desc: "default name space not used by attributes, not explicitly defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: `<foo xmlns="space" xmlns="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`, +}, { + desc: "impossible xmlns declaration", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "bar"}, []Attr{ + {Name{"space", "attr"}, "value"}, + }}, + }, + want: `<foo xmlns="space"><bar xmlns="space" xmlns:space="space" space:attr="value">`, +}, { + desc: "reserved namespace prefix -- all lower case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/xmlSchema-instance", "nil"}, "true"}, + }}, + }, + want: `<foo xmlns:_xmlSchema-instance="http://www.w3.org/2001/xmlSchema-instance" _xmlSchema-instance:nil="true">`, +}, { + desc: "reserved namespace prefix -- all upper case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XMLSchema-instance", "nil"}, "true"}, + }}, + }, + want: `<foo xmlns:_XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" _XMLSchema-instance:nil="true">`, +}, { + desc: "reserved namespace prefix -- all mixed case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XmLSchema-instance", "nil"}, "true"}, + }}, + }, + want: `<foo xmlns:_XmLSchema-instance="http://www.w3.org/2001/XmLSchema-instance" _XmLSchema-instance:nil="true">`, +}} + +func TestEncodeToken(t *testing.T) { +loop: + for i, tt := range encodeTokenTests { + var buf strings.Builder + enc := NewEncoder(&buf) + var err error + for j, tok := range tt.toks { + err = enc.EncodeToken(tok) + if err != nil && j < len(tt.toks)-1 { + t.Errorf("#%d %s token #%d: %v", i, tt.desc, j, err) + continue loop + } + } + errorf := func(f string, a ...any) { + t.Errorf("#%d %s token #%d:%s", i, tt.desc, len(tt.toks)-1, fmt.Sprintf(f, a...)) + } + switch { + case tt.err != "" && err == nil: + errorf(" expected error; got none") + continue + case tt.err == "" && err != nil: + errorf(" got error: %v", err) + continue + case tt.err != "" && err != nil && tt.err != err.Error(): + errorf(" error mismatch; got %v, want %v", err, tt.err) + continue + } + if err := enc.Flush(); err != nil { + errorf(" %v", err) + continue + } + if got := buf.String(); got != tt.want { + errorf("\ngot %v\nwant %v", got, tt.want) + continue + } + } +} + +func TestProcInstEncodeToken(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err) + } + + if err := enc.EncodeToken(ProcInst{"Target", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to add non-xml target ProcInst") + } + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err == nil { + t.Fatalf("enc.EncodeToken: expected to not be allowed to encode xml target ProcInst when not first token") + } +} + +func TestDecodeEncode(t *testing.T) { + var in, out bytes.Buffer + in.WriteString(`<?xml version="1.0" encoding="UTF-8"?> +<?Target Instruction?> +<root> +</root> +`) + dec := NewDecoder(&in) + enc := NewEncoder(&out) + for tok, err := dec.Token(); err == nil; tok, err = dec.Token() { + err = enc.EncodeToken(tok) + if err != nil { + t.Fatalf("enc.EncodeToken: Unable to encode token (%#v), %v", tok, err) + } + } +} + +// Issue 9796. Used to fail with GORACE="halt_on_error=1" -race. +func TestRace9796(t *testing.T) { + type A struct{} + type B struct { + C []A `xml:"X>Y"` + } + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + Marshal(B{[]A{{}}}) + wg.Done() + }() + } + wg.Wait() +} + +func TestIsValidDirective(t *testing.T) { + testOK := []string{ + "<>", + "< < > >", + "<!DOCTYPE '<' '>' '>' <!--nothing-->>", + "<!DOCTYPE doc [ <!ELEMENT doc ANY> <!ELEMENT doc ANY> ]>", + "<!DOCTYPE doc [ <!ELEMENT doc \"ANY> '<' <!E\" LEMENT '>' doc ANY> ]>", + "<!DOCTYPE doc <!-- just>>>> a < comment --> [ <!ITEM anything> ] >", + } + testKO := []string{ + "<", + ">", + "<!--", + "-->", + "< > > < < >", + "<!dummy <!-- > -->", + "<!DOCTYPE doc '>", + "<!DOCTYPE doc '>'", + "<!DOCTYPE doc <!--comment>", + } + for _, s := range testOK { + if !isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be valid", s) + } + } + for _, s := range testKO { + if isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be invalid", s) + } + } +} + +// Issue 11719. EncodeToken used to silently eat tokens with an invalid type. +func TestSimpleUseOfEncodeToken(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(&StartElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(&EndElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(StartElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: StartElement %s", err) + } + if err := enc.EncodeToken(EndElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: EndElement %s", err) + } + if err := enc.EncodeToken(Universe{}); err == nil { + t.Errorf("enc.EncodeToken: invalid type not caught") + } + if err := enc.Flush(); err != nil { + t.Errorf("enc.Flush: %s", err) + } + if buf.Len() == 0 { + t.Errorf("enc.EncodeToken: empty buffer") + } + want := "<object2></object2>" + if buf.String() != want { + t.Errorf("enc.EncodeToken: expected %q; got %q", want, buf.String()) + } +} + +// Issue 16158. Decoder.unmarshalAttr ignores the return value of copyValue. +func TestIssue16158(t *testing.T) { + const data = `<foo b="HELLOWORLD"></foo>` + err := Unmarshal([]byte(data), &struct { + B byte `xml:"b,attr,omitempty"` + }{}) + if err == nil { + t.Errorf("Unmarshal: expected error, got nil") + } +} + +// Issue 20953. Crash on invalid XMLName attribute. + +type InvalidXMLName struct { + XMLName Name `xml:"error"` + Type struct { + XMLName Name `xml:"type,attr"` + } +} + +func TestInvalidXMLName(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(InvalidXMLName{}); err == nil { + t.Error("unexpected success") + } else if want := "invalid tag"; !strings.Contains(err.Error(), want) { + t.Errorf("error %q does not contain %q", err, want) + } +} + +// Issue 50164. Crash on zero value XML attribute. +type LayerOne struct { + XMLName Name `xml:"l1"` + + Value *float64 `xml:"value,omitempty"` + *LayerTwo `xml:",omitempty"` +} + +type LayerTwo struct { + ValueTwo *int `xml:"value_two,attr,omitempty"` +} + +func TestMarshalZeroValue(t *testing.T) { + proofXml := `<l1><value>1.2345</value></l1>` + var l1 LayerOne + err := Unmarshal([]byte(proofXml), &l1) + if err != nil { + t.Fatalf("unmarshal XML error: %v", err) + } + want := float64(1.2345) + got := *l1.Value + if got != want { + t.Fatalf("unexpected unmarshal result, want %f but got %f", want, got) + } + + // Marshal again (or Encode again) + // In issue 50164, here `Marshal(l1)` will panic because of the zero value of xml attribute ValueTwo `value_two`. + anotherXML, err := Marshal(l1) + if err != nil { + t.Fatalf("marshal XML error: %v", err) + } + if string(anotherXML) != proofXml { + t.Fatalf("unexpected unmarshal result, want %q but got %q", proofXml, anotherXML) + } +} + +var closeTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "unclosed start element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + }, + want: `<foo>`, + err: "unclosed tag <foo>", +}, { + desc: "closed element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "foo"}}, + }, + want: `<foo></foo>`, +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: `<!foo>`, +}} + +func TestClose(t *testing.T) { + for _, tt := range closeTests { + tt := tt + t.Run(tt.desc, func(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&out) + for j, tok := range tt.toks { + if err := enc.EncodeToken(tok); err != nil { + t.Fatalf("token #%d: %v", j, err) + } + } + err := enc.Close() + switch { + case tt.err != "" && err == nil: + t.Error(" expected error; got none") + case tt.err == "" && err != nil: + t.Errorf(" got error: %v", err) + case tt.err != "" && err != nil && tt.err != err.Error(): + t.Errorf(" error mismatch; got %v, want %v", err, tt.err) + } + if got := out.String(); got != tt.want { + t.Errorf("\ngot %v\nwant %v", got, tt.want) + } + t.Log(enc.p.closed) + if err := enc.EncodeToken(Directive("foo")); err == nil { + t.Errorf("unexpected success when encoding after Close") + } + }) + } +} diff --git a/src/encoding/xml/read.go b/src/encoding/xml/read.go new file mode 100644 index 0000000..3cc4968 --- /dev/null +++ b/src/encoding/xml/read.go @@ -0,0 +1,777 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "reflect" + "runtime" + "strconv" + "strings" +) + +// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: +// an XML element is an order-dependent collection of anonymous +// values, while a data structure is an order-independent collection +// of named values. +// See [encoding/json] for a textual representation more suitable +// to data structures. + +// Unmarshal parses the XML-encoded data and stores the result in +// the value pointed to by v, which must be an arbitrary struct, +// slice, or string. Well-formed data that does not fit into v is +// discarded. +// +// Because Unmarshal uses the reflect package, it can only assign +// to exported (upper case) fields. Unmarshal uses a case-sensitive +// comparison to match XML element names to tag values and struct +// field names. +// +// Unmarshal maps an XML element to a struct using the following rules. +// In the rules, the tag of a field refers to the value associated with the +// key 'xml' in the struct field's tag (see the example above). +// +// - If the struct has a field of type []byte or string with tag +// ",innerxml", Unmarshal accumulates the raw XML nested inside the +// element in that field. The rest of the rules still apply. +// +// - If the struct has a field named XMLName of type Name, +// Unmarshal records the element name in that field. +// +// - If the XMLName field has an associated tag of the form +// "name" or "namespace-URL name", the XML element must have +// the given name (and, optionally, name space) or else Unmarshal +// returns an error. +// +// - If the XML element has an attribute whose name matches a +// struct field name with an associated tag containing ",attr" or +// the explicit name in a struct field tag of the form "name,attr", +// Unmarshal records the attribute value in that field. +// +// - If the XML element has an attribute not handled by the previous +// rule and the struct has a field with an associated tag containing +// ",any,attr", Unmarshal records the attribute value in the first +// such field. +// +// - If the XML element contains character data, that data is +// accumulated in the first struct field that has tag ",chardata". +// The struct field may have type []byte or string. +// If there is no such field, the character data is discarded. +// +// - If the XML element contains comments, they are accumulated in +// the first struct field that has tag ",comment". The struct +// field may have type []byte or string. If there is no such +// field, the comments are discarded. +// +// - If the XML element contains a sub-element whose name matches +// the prefix of a tag formatted as "a" or "a>b>c", unmarshal +// will descend into the XML structure looking for elements with the +// given names, and will map the innermost elements to that struct +// field. A tag starting with ">" is equivalent to one starting +// with the field name followed by ">". +// +// - If the XML element contains a sub-element whose name matches +// a struct field's XMLName tag and the struct field has no +// explicit name tag as per the previous rule, unmarshal maps +// the sub-element to that struct field. +// +// - If the XML element contains a sub-element whose name matches a +// field without any mode flags (",attr", ",chardata", etc), Unmarshal +// maps the sub-element to that struct field. +// +// - If the XML element contains a sub-element that hasn't matched any +// of the above rules and the struct has a field with tag ",any", +// unmarshal maps the sub-element to that struct field. +// +// - An anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// +// - A struct field with tag "-" is never unmarshaled into. +// +// If Unmarshal encounters a field type that implements the Unmarshaler +// interface, Unmarshal calls its UnmarshalXML method to produce the value from +// the XML element. Otherwise, if the value implements +// [encoding.TextUnmarshaler], Unmarshal calls that value's UnmarshalText method. +// +// Unmarshal maps an XML element to a string or []byte by saving the +// concatenation of that element's character data in the string or +// []byte. The saved []byte is never nil. +// +// Unmarshal maps an attribute value to a string or []byte by saving +// the value in the string or slice. +// +// Unmarshal maps an attribute value to an [Attr] by saving the attribute, +// including its name, in the Attr. +// +// Unmarshal maps an XML element or attribute value to a slice by +// extending the length of the slice and mapping the element or attribute +// to the newly created value. +// +// Unmarshal maps an XML element or attribute value to a bool by +// setting it to the boolean value represented by the string. Whitespace +// is trimmed and ignored. +// +// Unmarshal maps an XML element or attribute value to an integer or +// floating-point field by setting the field to the result of +// interpreting the string value in decimal. There is no check for +// overflow. Whitespace is trimmed and ignored. +// +// Unmarshal maps an XML element to a Name by recording the element +// name. +// +// Unmarshal maps an XML element to a pointer by setting the pointer +// to a freshly allocated value and then mapping the element to that value. +// +// A missing element or empty attribute value will be unmarshaled as a zero value. +// If the field is a slice, a zero value will be appended to the field. Otherwise, the +// field will be set to its zero value. +func Unmarshal(data []byte, v any) error { + return NewDecoder(bytes.NewReader(data)).Decode(v) +} + +// Decode works like [Unmarshal], except it reads the decoder +// stream to find the start element. +func (d *Decoder) Decode(v any) error { + return d.DecodeElement(v, nil) +} + +// DecodeElement works like [Unmarshal] except that it takes +// a pointer to the start XML element to decode into v. +// It is useful when a client reads some raw XML tokens itself +// but also wants to defer to [Unmarshal] for some elements. +func (d *Decoder) DecodeElement(v any, start *StartElement) error { + val := reflect.ValueOf(v) + if val.Kind() != reflect.Pointer { + return errors.New("non-pointer passed to Unmarshal") + } + + if val.IsNil() { + return errors.New("nil pointer passed to Unmarshal") + } + return d.unmarshal(val.Elem(), start, 0) +} + +// An UnmarshalError represents an error in the unmarshaling process. +type UnmarshalError string + +func (e UnmarshalError) Error() string { return string(e) } + +// Unmarshaler is the interface implemented by objects that can unmarshal +// an XML element description of themselves. +// +// UnmarshalXML decodes a single XML element +// beginning with the given start element. +// If it returns an error, the outer call to Unmarshal stops and +// returns that error. +// UnmarshalXML must consume exactly one XML element. +// One common implementation strategy is to unmarshal into +// a separate value with a layout matching the expected XML +// using d.DecodeElement, and then to copy the data from +// that value into the receiver. +// Another common strategy is to use d.Token to process the +// XML object one token at a time. +// UnmarshalXML may not use d.RawToken. +type Unmarshaler interface { + UnmarshalXML(d *Decoder, start StartElement) error +} + +// UnmarshalerAttr is the interface implemented by objects that can unmarshal +// an XML attribute description of themselves. +// +// UnmarshalXMLAttr decodes a single XML attribute. +// If it returns an error, the outer call to [Unmarshal] stops and +// returns that error. +// UnmarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type UnmarshalerAttr interface { + UnmarshalXMLAttr(attr Attr) error +} + +// receiverType returns the receiver type to use in an expression like "%s.MethodName". +func receiverType(val any) string { + t := reflect.TypeOf(val) + if t.Name() != "" { + return t.String() + } + return "(" + t.String() + ")" +} + +// unmarshalInterface unmarshals a single XML element into val. +// start is the opening tag of the element. +func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { + // Record that decoder must stop at end tag corresponding to start. + d.pushEOF() + + d.unmarshalDepth++ + err := val.UnmarshalXML(d, *start) + d.unmarshalDepth-- + if err != nil { + d.popEOF() + return err + } + + if !d.popEOF() { + return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) + } + + return nil +} + +// unmarshalTextInterface unmarshals a single XML element into val. +// The chardata contained in the element (but not its children) +// is passed to the text unmarshaler. +func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { + var buf []byte + depth := 1 + for depth > 0 { + t, err := d.Token() + if err != nil { + return err + } + switch t := t.(type) { + case CharData: + if depth == 1 { + buf = append(buf, t...) + } + case StartElement: + depth++ + case EndElement: + depth-- + } + } + return val.UnmarshalText(buf) +} + +// unmarshalAttr unmarshals a single XML attribute into val. +func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) { + return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + } + + // Not an UnmarshalerAttr; try encoding.TextUnmarshaler. + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + } + + if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + // Slice of element values. + // Grow slice. + n := val.Len() + val.Grow(1) + val.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshalAttr(val.Index(n), attr); err != nil { + val.SetLen(n) + return err + } + return nil + } + + if val.Type() == attrType { + val.Set(reflect.ValueOf(attr)) + return nil + } + + return copyValue(val, []byte(attr.Value)) +} + +var ( + attrType = reflect.TypeFor[Attr]() + unmarshalerType = reflect.TypeFor[Unmarshaler]() + unmarshalerAttrType = reflect.TypeFor[UnmarshalerAttr]() + textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() +) + +const ( + maxUnmarshalDepth = 10000 + maxUnmarshalDepthWasm = 5000 // go.dev/issue/56498 +) + +var errUnmarshalDepth = errors.New("exceeded max depth") + +// Unmarshal a single XML element into val. +func (d *Decoder) unmarshal(val reflect.Value, start *StartElement, depth int) error { + if depth >= maxUnmarshalDepth || runtime.GOARCH == "wasm" && depth >= maxUnmarshalDepthWasm { + return errUnmarshalDepth + } + // Find start element if we need it. + if start == nil { + for { + tok, err := d.Token() + if err != nil { + return err + } + if t, ok := tok.(StartElement); ok { + start = &t + break + } + } + } + + // Load value from interface, but only if the result will be + // usefully addressable. + if val.Kind() == reflect.Interface && !val.IsNil() { + e := val.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() { + val = e + } + } + + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + + if val.CanInterface() && val.Type().Implements(unmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return d.unmarshalInterface(val.Interface().(Unmarshaler), start) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { + return d.unmarshalInterface(pv.Interface().(Unmarshaler), start) + } + } + + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) + } + } + + var ( + data []byte + saveData reflect.Value + comment []byte + saveComment reflect.Value + saveXML reflect.Value + saveXMLIndex int + saveXMLData []byte + saveAny reflect.Value + sv reflect.Value + tinfo *typeInfo + err error + ) + + switch v := val; v.Kind() { + default: + return errors.New("unknown type " + v.Type().String()) + + case reflect.Interface: + // TODO: For now, simply ignore the field. In the near + // future we may choose to unmarshal the start + // element on it, if not nil. + return d.Skip() + + case reflect.Slice: + typ := v.Type() + if typ.Elem().Kind() == reflect.Uint8 { + // []byte + saveData = v + break + } + + // Slice of element values. + // Grow slice. + n := v.Len() + v.Grow(1) + v.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshal(v.Index(n), start, depth+1); err != nil { + v.SetLen(n) + return err + } + return nil + + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: + saveData = v + + case reflect.Struct: + typ := v.Type() + if typ == nameType { + v.Set(reflect.ValueOf(start.Name)) + break + } + + sv = v + tinfo, err = getTypeInfo(typ) + if err != nil { + return err + } + + // Validate and assign element name. + if tinfo.xmlname != nil { + finfo := tinfo.xmlname + if finfo.name != "" && finfo.name != start.Name.Local { + return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") + } + if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " + if start.Name.Space == "" { + e += "no name space" + } else { + e += start.Name.Space + } + return UnmarshalError(e) + } + fv := finfo.value(sv, initNilPointers) + if _, ok := fv.Interface().(Name); ok { + fv.Set(reflect.ValueOf(start.Name)) + } + } + + // Assign attributes. + for _, a := range start.Attr { + handled := false + any := -1 + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fAttr: + strv := finfo.value(sv, initNilPointers) + if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + handled = true + } + + case fAny | fAttr: + if any == -1 { + any = i + } + } + } + if !handled && any >= 0 { + finfo := &tinfo.fields[any] + strv := finfo.value(sv, initNilPointers) + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + } + } + + // Determine whether we need to save character data or comments. + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fCDATA, fCharData: + if !saveData.IsValid() { + saveData = finfo.value(sv, initNilPointers) + } + + case fComment: + if !saveComment.IsValid() { + saveComment = finfo.value(sv, initNilPointers) + } + + case fAny, fAny | fElement: + if !saveAny.IsValid() { + saveAny = finfo.value(sv, initNilPointers) + } + + case fInnerXML: + if !saveXML.IsValid() { + saveXML = finfo.value(sv, initNilPointers) + if d.saved == nil { + saveXMLIndex = 0 + d.saved = new(bytes.Buffer) + } else { + saveXMLIndex = d.savedOffset() + } + } + } + } + } + + // Find end element. + // Process sub-elements along the way. +Loop: + for { + var savedOffset int + if saveXML.IsValid() { + savedOffset = d.savedOffset() + } + tok, err := d.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + consumed := false + if sv.IsValid() { + // unmarshalPath can call unmarshal, so we need to pass the depth through so that + // we can continue to enforce the maximum recursion limit. + consumed, err = d.unmarshalPath(tinfo, sv, nil, &t, depth) + if err != nil { + return err + } + if !consumed && saveAny.IsValid() { + consumed = true + if err := d.unmarshal(saveAny, &t, depth+1); err != nil { + return err + } + } + } + if !consumed { + if err := d.Skip(); err != nil { + return err + } + } + + case EndElement: + if saveXML.IsValid() { + saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset] + if saveXMLIndex == 0 { + d.saved = nil + } + } + break Loop + + case CharData: + if saveData.IsValid() { + data = append(data, t...) + } + + case Comment: + if saveComment.IsValid() { + comment = append(comment, t...) + } + } + } + + if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) { + if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + + if saveData.IsValid() && saveData.CanAddr() { + pv := saveData.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + } + + if err := copyValue(saveData, data); err != nil { + return err + } + + switch t := saveComment; t.Kind() { + case reflect.String: + t.SetString(string(comment)) + case reflect.Slice: + t.Set(reflect.ValueOf(comment)) + } + + switch t := saveXML; t.Kind() { + case reflect.String: + t.SetString(string(saveXMLData)) + case reflect.Slice: + if t.Type().Elem().Kind() == reflect.Uint8 { + t.Set(reflect.ValueOf(saveXMLData)) + } + } + + return nil +} + +func copyValue(dst reflect.Value, src []byte) (err error) { + dst0 := dst + + if dst.Kind() == reflect.Pointer { + if dst.IsNil() { + dst.Set(reflect.New(dst.Type().Elem())) + } + dst = dst.Elem() + } + + // Save accumulated data. + switch dst.Kind() { + case reflect.Invalid: + // Probably a comment. + default: + return errors.New("cannot unmarshal into " + dst0.Type().String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if len(src) == 0 { + dst.SetInt(0) + return nil + } + itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetInt(itmp) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if len(src) == 0 { + dst.SetUint(0) + return nil + } + utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetUint(utmp) + case reflect.Float32, reflect.Float64: + if len(src) == 0 { + dst.SetFloat(0) + return nil + } + ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits()) + if err != nil { + return err + } + dst.SetFloat(ftmp) + case reflect.Bool: + if len(src) == 0 { + dst.SetBool(false) + return nil + } + value, err := strconv.ParseBool(strings.TrimSpace(string(src))) + if err != nil { + return err + } + dst.SetBool(value) + case reflect.String: + dst.SetString(string(src)) + case reflect.Slice: + if len(src) == 0 { + // non-nil to flag presence + src = []byte{} + } + dst.SetBytes(src) + } + return nil +} + +// unmarshalPath walks down an XML structure looking for wanted +// paths, and calls unmarshal on them. +// The consumed result tells whether XML elements have been consumed +// from the Decoder until start's matching end element, or if it's +// still untouched because start is uninteresting for sv's fields. +func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement, depth int) (consumed bool, err error) { + recurse := false +Loop: + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + continue + } + for j := range parents { + if parents[j] != finfo.parents[j] { + continue Loop + } + } + if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { + // It's a perfect match, unmarshal the field. + return true, d.unmarshal(finfo.value(sv, initNilPointers), start, depth+1) + } + if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { + // It's a prefix for the field. Break and recurse + // since it's not ok for one field path to be itself + // the prefix for another field path. + recurse = true + + // We can reuse the same slice as long as we + // don't try to append to it. + parents = finfo.parents[:len(parents)+1] + break + } + } + if !recurse { + // We have no business with this element. + return false, nil + } + // The element is not a perfect match for any field, but one + // or more fields have the path to this element as a parent + // prefix. Recurse and attempt to match these. + for { + var tok Token + tok, err = d.Token() + if err != nil { + return true, err + } + switch t := tok.(type) { + case StartElement: + // the recursion depth of unmarshalPath is limited to the path length specified + // by the struct field tag, so we don't increment the depth here. + consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t, depth) + if err != nil { + return true, err + } + if !consumed2 { + if err := d.Skip(); err != nil { + return true, err + } + } + case EndElement: + return true, nil + } + } +} + +// Skip reads tokens until it has consumed the end element +// matching the most recent start element already consumed, +// skipping nested structures. +// It returns nil if it finds an end element matching the start +// element; otherwise it returns an error describing the problem. +func (d *Decoder) Skip() error { + var depth int64 + for { + tok, err := d.Token() + if err != nil { + return err + } + switch tok.(type) { + case StartElement: + depth++ + case EndElement: + if depth == 0 { + return nil + } + depth-- + } + } +} diff --git a/src/encoding/xml/read_test.go b/src/encoding/xml/read_test.go new file mode 100644 index 0000000..ce99894 --- /dev/null +++ b/src/encoding/xml/read_test.go @@ -0,0 +1,1128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "errors" + "io" + "reflect" + "runtime" + "strings" + "testing" + "time" +) + +// Stripped down Atom feed data structures. + +func TestUnmarshalFeed(t *testing.T) { + var f Feed + if err := Unmarshal([]byte(atomFeedString), &f); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(f, atomFeed) { + t.Fatalf("have %#v\nwant %#v", f, atomFeed) + } +} + +// hget http://codereview.appspot.com/rss/mine/rsc +const atomFeedString = ` +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us" updated="2009-10-04T01:35:58+00:00"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><author><name>rietveld<></name></author><entry><title>rietveld: an attempt at pubsubhubbub +</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a &lt;link rel=&quot;hub&quot; href=&quot;hub-server&quot;&gt; tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can&#39;t quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed&#39;s actual URL in +the link rel=&quot;self&quot;, but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +</summary></entry><entry><title>rietveld: correct tab handling +</title><link href="http://codereview.appspot.com/124106" rel="alternate"></link><updated>2009-10-03T23:02:17+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:0a2a4f19bb815101f0ba2904aed7c35a</id><summary type="html"> + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn&#39;t know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +</summary></entry></feed> ` + +type Feed struct { + XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated,attr"` + Author Person `xml:"author"` + Entry []Entry `xml:"entry"` +} + +type Entry struct { + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated"` + Author Person `xml:"author"` + Summary Text `xml:"summary"` +} + +type Link struct { + Rel string `xml:"rel,attr,omitempty"` + Href string `xml:"href,attr"` +} + +type Person struct { + Name string `xml:"name"` + URI string `xml:"uri"` + Email string `xml:"email"` + InnerXML string `xml:",innerxml"` +} + +type Text struct { + Type string `xml:"type,attr,omitempty"` + Body string `xml:",chardata"` +} + +var atomFeed = Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Code Review - My issues", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/"}, + {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, + }, + ID: "http://codereview.appspot.com/", + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "rietveld<>", + InnerXML: "<name>rietveld<></name>", + }, + Entry: []Entry{ + { + Title: "rietveld: an attempt at pubsubhubbub\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/126085"}, + }, + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "<name>email-address-removed</name>", + }, + ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a", + Summary: Text{ + Type: "html", + Body: ` + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a <link rel="hub" href="hub-server"> tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can't quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed's actual URL in +the link rel="self", but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +`, + }, + }, + { + Title: "rietveld: correct tab handling\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/124106"}, + }, + Updated: ParseTime("2009-10-03T23:02:17+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "<name>email-address-removed</name>", + }, + ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", + Summary: Text{ + Type: "html", + Body: ` + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn't know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +`, + }, + }, + }, +} + +const pathTestString = ` +<Result> + <Before>1</Before> + <Items> + <Item1> + <Value>A</Value> + </Item1> + <Item2> + <Value>B</Value> + </Item2> + <Item1> + <Value>C</Value> + <Value>D</Value> + </Item1> + <_> + <Value>E</Value> + </_> + </Items> + <After>2</After> +</Result> +` + +type PathTestItem struct { + Value string +} + +type PathTestA struct { + Items []PathTestItem `xml:">Item1"` + Before, After string +} + +type PathTestB struct { + Other []PathTestItem `xml:"Items>Item1"` + Before, After string +} + +type PathTestC struct { + Values1 []string `xml:"Items>Item1>Value"` + Values2 []string `xml:"Items>Item2>Value"` + Before, After string +} + +type PathTestSet struct { + Item1 []PathTestItem +} + +type PathTestD struct { + Other PathTestSet `xml:"Items"` + Before, After string +} + +type PathTestE struct { + Underline string `xml:"Items>_>Value"` + Before, After string +} + +var pathTests = []any{ + &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, + &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, + &PathTestE{Underline: "E", Before: "1", After: "2"}, +} + +func TestUnmarshalPaths(t *testing.T) { + for _, pt := range pathTests { + v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() + if err := Unmarshal([]byte(pathTestString), v); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(v, pt) { + t.Fatalf("have %#v\nwant %#v", v, pt) + } + } +} + +type BadPathTestA struct { + First string `xml:"items>item1"` + Other string `xml:"items>item2"` + Second string `xml:"items"` +} + +type BadPathTestB struct { + Other string `xml:"items>item2>value"` + First string `xml:"items>item1"` + Second string `xml:"items>item1>value"` +} + +type BadPathTestC struct { + First string + Second string `xml:"First"` +} + +type BadPathTestD struct { + BadPathEmbeddedA + BadPathEmbeddedB +} + +type BadPathEmbeddedA struct { + First string +} + +type BadPathEmbeddedB struct { + Second string `xml:"First"` +} + +var badPathTests = []struct { + v, e any +}{ + {&BadPathTestA{}, &TagPathError{reflect.TypeFor[BadPathTestA](), "First", "items>item1", "Second", "items"}}, + {&BadPathTestB{}, &TagPathError{reflect.TypeFor[BadPathTestB](), "First", "items>item1", "Second", "items>item1>value"}}, + {&BadPathTestC{}, &TagPathError{reflect.TypeFor[BadPathTestC](), "First", "", "Second", "First"}}, + {&BadPathTestD{}, &TagPathError{reflect.TypeFor[BadPathTestD](), "First", "", "Second", "First"}}, +} + +func TestUnmarshalBadPaths(t *testing.T) { + for _, tt := range badPathTests { + err := Unmarshal([]byte(pathTestString), tt.v) + if !reflect.DeepEqual(err, tt.e) { + t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e) + } + } +} + +const OK = "OK" +const withoutNameTypeData = ` +<?xml version="1.0" charset="utf-8"?> +<Test3 Attr="OK" />` + +type TestThree struct { + XMLName Name `xml:"Test3"` + Attr string `xml:",attr"` +} + +func TestUnmarshalWithoutNameType(t *testing.T) { + var x TestThree + if err := Unmarshal([]byte(withoutNameTypeData), &x); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if x.Attr != OK { + t.Fatalf("have %v\nwant %v", x.Attr, OK) + } +} + +func TestUnmarshalAttr(t *testing.T) { + type ParamVal struct { + Int int `xml:"int,attr"` + } + + type ParamPtr struct { + Int *int `xml:"int,attr"` + } + + type ParamStringPtr struct { + Int *string `xml:"int,attr"` + } + + x := []byte(`<Param int="1" />`) + + p1 := &ParamPtr{} + if err := Unmarshal(x, p1); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p1.Int == nil { + t.Fatalf("Unmarshal failed in to *int field") + } else if *p1.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1) + } + + p2 := &ParamVal{} + if err := Unmarshal(x, p2); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p2.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1) + } + + p3 := &ParamStringPtr{} + if err := Unmarshal(x, p3); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p3.Int == nil { + t.Fatalf("Unmarshal failed in to *string field") + } else if *p3.Int != "1" { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1) + } +} + +type Tables struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table"` + FTable string `xml:"http://www.w3schools.com/furniture table"` +} + +var tables = []struct { + xml string + tab Tables + ns string +}{ + { + xml: `<Tables>` + + `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` + + `<table xmlns="http://www.w3schools.com/furniture">world</table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables>` + + `<table xmlns="http://www.w3schools.com/furniture">world</table>` + + `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/">` + + `<f:table>world</f:table>` + + `<h:table>hello</h:table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables>` + + `<table>bogus</table>` + + `</Tables>`, + tab: Tables{}, + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{HTable: "only"}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{FTable: "only"}, + ns: "http://www.w3schools.com/furniture", + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNS(t *testing.T) { + for i, tt := range tables { + var dst Tables + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNS(t *testing.T) { + dst := Tables{"hello", "world"} + data, err := Marshal(&dst) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `<Tables><table xmlns="http://www.w3.org/TR/html4/">hello</table><table xmlns="http://www.w3schools.com/furniture">world</table></Tables>` + str := string(data) + if str != want { + t.Errorf("have: %q\nwant: %q\n", str, want) + } +} + +type TableAttrs struct { + TAttr TAttr +} + +type TAttr struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table,attr"` + FTable string `xml:"http://www.w3schools.com/furniture table,attr"` + Lang string `xml:"http://www.w3.org/XML/1998/namespace lang,attr,omitempty"` + Other1 string `xml:"http://golang.org/xml/ other,attr,omitempty"` + Other2 string `xml:"http://golang.org/xmlfoo/ other,attr,omitempty"` + Other3 string `xml:"http://golang.org/json/ other,attr,omitempty"` + Other4 string `xml:"http://golang.org/2/json/ other,attr,omitempty"` +} + +var tableAttrs = []struct { + xml string + tab TableAttrs + ns string +}{ + { + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` + + `h:table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr ` + + `h:table="hello" f:table="world" xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr xmlns="http://www.w3.org/TR/html4/" ` + + `table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr ` + + `table="bogus" ` + + `/></TableAttrs>`, + tab: TableAttrs{}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + ns: "http://www.w3schools.com/furniture", + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr ` + + `table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `<TableAttrs><TAttr ` + + `table="bogus" ` + + `/></TableAttrs>`, + tab: TableAttrs{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNSAttr(t *testing.T) { + for i, tt := range tableAttrs { + var dst TableAttrs + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNSAttr(t *testing.T) { + src := TableAttrs{TAttr{"hello", "world", "en_US", "other1", "other2", "other3", "other4"}} + data, err := Marshal(&src) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `<TableAttrs><TAttr xmlns:html4="http://www.w3.org/TR/html4/" html4:table="hello" xmlns:furniture="http://www.w3schools.com/furniture" furniture:table="world" xml:lang="en_US" xmlns:_xml="http://golang.org/xml/" _xml:other="other1" xmlns:_xmlfoo="http://golang.org/xmlfoo/" _xmlfoo:other="other2" xmlns:json="http://golang.org/json/" json:other="other3" xmlns:json_1="http://golang.org/2/json/" json_1:other="other4"></TAttr></TableAttrs>` + str := string(data) + if str != want { + t.Errorf("Marshal:\nhave: %#q\nwant: %#q\n", str, want) + } + + var dst TableAttrs + if err := Unmarshal(data, &dst); err != nil { + t.Errorf("Unmarshal: %v", err) + } + + if dst != src { + t.Errorf("Unmarshal = %q, want %q", dst, src) + } +} + +type MyCharData struct { + body string +} + +func (m *MyCharData) UnmarshalXML(d *Decoder, start StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { // found end of element + break + } + if err != nil { + return err + } + if char, ok := t.(CharData); ok { + m.body += string(char) + } + } + return nil +} + +var _ Unmarshaler = (*MyCharData)(nil) + +func (m *MyCharData) UnmarshalXMLAttr(attr Attr) error { + panic("must not call") +} + +type MyAttr struct { + attr string +} + +func (m *MyAttr) UnmarshalXMLAttr(attr Attr) error { + m.attr = attr.Value + return nil +} + +var _ UnmarshalerAttr = (*MyAttr)(nil) + +type MyStruct struct { + Data *MyCharData + Attr *MyAttr `xml:",attr"` + + Data2 MyCharData + Attr2 MyAttr `xml:",attr"` +} + +func TestUnmarshaler(t *testing.T) { + xml := `<?xml version="1.0" encoding="utf-8"?> + <MyStruct Attr="attr1" Attr2="attr2"> + <Data>hello <!-- comment -->world</Data> + <Data2>howdy <!-- comment -->world</Data2> + </MyStruct> + ` + + var m MyStruct + if err := Unmarshal([]byte(xml), &m); err != nil { + t.Fatal(err) + } + + if m.Data == nil || m.Attr == nil || m.Data.body != "hello world" || m.Attr.attr != "attr1" || m.Data2.body != "howdy world" || m.Attr2.attr != "attr2" { + t.Errorf("m=%#+v\n", m) + } +} + +type Pea struct { + Cotelydon string +} + +type Pod struct { + Pea any `xml:"Pea"` +} + +// https://golang.org/issue/6836 +func TestUnmarshalIntoInterface(t *testing.T) { + pod := new(Pod) + pod.Pea = new(Pea) + xml := `<Pod><Pea><Cotelydon>Green stuff</Cotelydon></Pea></Pod>` + err := Unmarshal([]byte(xml), pod) + if err != nil { + t.Fatalf("failed to unmarshal %q: %v", xml, err) + } + pea, ok := pod.Pea.(*Pea) + if !ok { + t.Fatalf("unmarshaled into wrong type: have %T want *Pea", pod.Pea) + } + have, want := pea.Cotelydon, "Green stuff" + if have != want { + t.Errorf("failed to unmarshal into interface, have %q want %q", have, want) + } +} + +type X struct { + D string `xml:",comment"` +} + +// Issue 11112. Unmarshal must reject invalid comments. +func TestMalformedComment(t *testing.T) { + testData := []string{ + "<X><!-- a---></X>", + "<X><!-- -- --></X>", + "<X><!-- a--b --></X>", + "<X><!------></X>", + } + for i, test := range testData { + data := []byte(test) + v := new(X) + if err := Unmarshal(data, v); err == nil { + t.Errorf("%d: unmarshal should reject invalid comments", i) + } + } +} + +type IXField struct { + Five int `xml:"five"` + NotInnerXML []string `xml:",innerxml"` +} + +// Issue 15600. ",innerxml" on a field that can't hold it. +func TestInvalidInnerXMLType(t *testing.T) { + v := new(IXField) + if err := Unmarshal([]byte(`<tag><five>5</five><innertag/></tag>`), v); err != nil { + t.Errorf("Unmarshal failed: got %v", err) + } + if v.Five != 5 { + t.Errorf("Five = %v, want 5", v.Five) + } + if v.NotInnerXML != nil { + t.Errorf("NotInnerXML = %v, want nil", v.NotInnerXML) + } +} + +type Child struct { + G struct { + I int + } +} + +type ChildToEmbed struct { + X bool +} + +type Parent struct { + I int + IPtr *int + Is []int + IPtrs []*int + F float32 + FPtr *float32 + Fs []float32 + FPtrs []*float32 + B bool + BPtr *bool + Bs []bool + BPtrs []*bool + Bytes []byte + BytesPtr *[]byte + S string + SPtr *string + Ss []string + SPtrs []*string + MyI MyInt + Child Child + Children []Child + ChildPtr *Child + ChildToEmbed +} + +const ( + emptyXML = ` +<Parent> + <I></I> + <IPtr></IPtr> + <Is></Is> + <IPtrs></IPtrs> + <F></F> + <FPtr></FPtr> + <Fs></Fs> + <FPtrs></FPtrs> + <B></B> + <BPtr></BPtr> + <Bs></Bs> + <BPtrs></BPtrs> + <Bytes></Bytes> + <BytesPtr></BytesPtr> + <S></S> + <SPtr></SPtr> + <Ss></Ss> + <SPtrs></SPtrs> + <MyI></MyI> + <Child></Child> + <Children></Children> + <ChildPtr></ChildPtr> + <X></X> +</Parent> +` +) + +// golang.org/issues/13417 +func TestUnmarshalEmptyValues(t *testing.T) { + // Test first with a zero-valued dst. + v := new(Parent) + if err := Unmarshal([]byte(emptyXML), v); err != nil { + t.Fatalf("zero: Unmarshal failed: got %v", err) + } + + zBytes, zInt, zStr, zFloat, zBool := []byte{}, 0, "", float32(0), false + want := &Parent{ + IPtr: &zInt, + Is: []int{zInt}, + IPtrs: []*int{&zInt}, + FPtr: &zFloat, + Fs: []float32{zFloat}, + FPtrs: []*float32{&zFloat}, + BPtr: &zBool, + Bs: []bool{zBool}, + BPtrs: []*bool{&zBool}, + Bytes: []byte{}, + BytesPtr: &zBytes, + SPtr: &zStr, + Ss: []string{zStr}, + SPtrs: []*string{&zStr}, + Children: []Child{{}}, + ChildPtr: new(Child), + ChildToEmbed: ChildToEmbed{}, + } + if !reflect.DeepEqual(v, want) { + t.Fatalf("zero: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } + + // Test with a pre-populated dst. + // Multiple addressable copies, as pointer-to fields will replace value during unmarshal. + vBytes0, vInt0, vStr0, vFloat0, vBool0 := []byte("x"), 1, "x", float32(1), true + vBytes1, vInt1, vStr1, vFloat1, vBool1 := []byte("x"), 1, "x", float32(1), true + vInt2, vStr2, vFloat2, vBool2 := 1, "x", float32(1), true + v = &Parent{ + I: vInt0, + IPtr: &vInt1, + Is: []int{vInt0}, + IPtrs: []*int{&vInt2}, + F: vFloat0, + FPtr: &vFloat1, + Fs: []float32{vFloat0}, + FPtrs: []*float32{&vFloat2}, + B: vBool0, + BPtr: &vBool1, + Bs: []bool{vBool0}, + BPtrs: []*bool{&vBool2}, + Bytes: vBytes0, + BytesPtr: &vBytes1, + S: vStr0, + SPtr: &vStr1, + Ss: []string{vStr0}, + SPtrs: []*string{&vStr2}, + MyI: MyInt(vInt0), + Child: Child{G: struct{ I int }{I: vInt0}}, + Children: []Child{{G: struct{ I int }{I: vInt0}}}, + ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, + ChildToEmbed: ChildToEmbed{X: vBool0}, + } + if err := Unmarshal([]byte(emptyXML), v); err != nil { + t.Fatalf("populated: Unmarshal failed: got %v", err) + } + + want = &Parent{ + IPtr: &zInt, + Is: []int{vInt0, zInt}, + IPtrs: []*int{&vInt0, &zInt}, + FPtr: &zFloat, + Fs: []float32{vFloat0, zFloat}, + FPtrs: []*float32{&vFloat0, &zFloat}, + BPtr: &zBool, + Bs: []bool{vBool0, zBool}, + BPtrs: []*bool{&vBool0, &zBool}, + Bytes: []byte{}, + BytesPtr: &zBytes, + SPtr: &zStr, + Ss: []string{vStr0, zStr}, + SPtrs: []*string{&vStr0, &zStr}, + Child: Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value) + Children: []Child{{G: struct{ I int }{I: vInt0}}, {}}, + ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value) + } + if !reflect.DeepEqual(v, want) { + t.Fatalf("populated: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceValuesParent struct { + BFalse bool + BTrue bool + I int + INeg int + I8 int8 + I8Neg int8 + I16 int16 + I16Neg int16 + I32 int32 + I32Neg int32 + I64 int64 + I64Neg int64 + UI uint + UI8 uint8 + UI16 uint16 + UI32 uint32 + UI64 uint64 + F32 float32 + F32Neg float32 + F64 float64 + F64Neg float64 +} + +const whitespaceValuesXML = ` +<WhitespaceValuesParent> + <BFalse> false </BFalse> + <BTrue> true </BTrue> + <I> 266703 </I> + <INeg> -266703 </INeg> + <I8> 112 </I8> + <I8Neg> -112 </I8Neg> + <I16> 6703 </I16> + <I16Neg> -6703 </I16Neg> + <I32> 266703 </I32> + <I32Neg> -266703 </I32Neg> + <I64> 266703 </I64> + <I64Neg> -266703 </I64Neg> + <UI> 266703 </UI> + <UI8> 112 </UI8> + <UI16> 6703 </UI16> + <UI32> 266703 </UI32> + <UI64> 266703 </UI64> + <F32> 266.703 </F32> + <F32Neg> -266.703 </F32Neg> + <F64> 266.703 </F64> + <F64Neg> -266.703 </F64Neg> +</WhitespaceValuesParent> +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceValues(t *testing.T) { + v := WhitespaceValuesParent{} + if err := Unmarshal([]byte(whitespaceValuesXML), &v); err != nil { + t.Fatalf("whitespace values: Unmarshal failed: got %v", err) + } + + want := WhitespaceValuesParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace values: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceAttrsParent struct { + BFalse bool `xml:",attr"` + BTrue bool `xml:",attr"` + I int `xml:",attr"` + INeg int `xml:",attr"` + I8 int8 `xml:",attr"` + I8Neg int8 `xml:",attr"` + I16 int16 `xml:",attr"` + I16Neg int16 `xml:",attr"` + I32 int32 `xml:",attr"` + I32Neg int32 `xml:",attr"` + I64 int64 `xml:",attr"` + I64Neg int64 `xml:",attr"` + UI uint `xml:",attr"` + UI8 uint8 `xml:",attr"` + UI16 uint16 `xml:",attr"` + UI32 uint32 `xml:",attr"` + UI64 uint64 `xml:",attr"` + F32 float32 `xml:",attr"` + F32Neg float32 `xml:",attr"` + F64 float64 `xml:",attr"` + F64Neg float64 `xml:",attr"` +} + +const whitespaceAttrsXML = ` +<WhitespaceAttrsParent + BFalse=" false " + BTrue=" true " + I=" 266703 " + INeg=" -266703 " + I8=" 112 " + I8Neg=" -112 " + I16=" 6703 " + I16Neg=" -6703 " + I32=" 266703 " + I32Neg=" -266703 " + I64=" 266703 " + I64Neg=" -266703 " + UI=" 266703 " + UI8=" 112 " + UI16=" 6703 " + UI32=" 266703 " + UI64=" 266703 " + F32=" 266.703 " + F32Neg=" -266.703 " + F64=" 266.703 " + F64Neg=" -266.703 " +> +</WhitespaceAttrsParent> +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceAttrs(t *testing.T) { + v := WhitespaceAttrsParent{} + if err := Unmarshal([]byte(whitespaceAttrsXML), &v); err != nil { + t.Fatalf("whitespace attrs: Unmarshal failed: got %v", err) + } + + want := WhitespaceAttrsParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace attrs: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +// golang.org/issues/53350 +func TestUnmarshalIntoNil(t *testing.T) { + type T struct { + A int `xml:"A"` + } + + var nilPointer *T + err := Unmarshal([]byte("<T><A>1</A></T>"), nilPointer) + + if err == nil { + t.Fatalf("no error in unmarshalling") + } + +} + +func TestCVE202228131(t *testing.T) { + type nested struct { + Parent *nested `xml:",any"` + } + var n nested + err := Unmarshal(bytes.Repeat([]byte("<a>"), maxUnmarshalDepth+1), &n) + if err == nil { + t.Fatal("Unmarshal did not fail") + } else if !errors.Is(err, errUnmarshalDepth) { + t.Fatalf("Unmarshal unexpected error: got %q, want %q", err, errUnmarshalDepth) + } +} + +func TestCVE202230633(t *testing.T) { + if testing.Short() || runtime.GOARCH == "wasm" { + t.Skip("test requires significant memory") + } + defer func() { + p := recover() + if p != nil { + t.Fatal("Unmarshal panicked") + } + }() + var example struct { + Things []string + } + Unmarshal(bytes.Repeat([]byte("<a>"), 17_000_000), &example) +} diff --git a/src/encoding/xml/typeinfo.go b/src/encoding/xml/typeinfo.go new file mode 100644 index 0000000..b18ed28 --- /dev/null +++ b/src/encoding/xml/typeinfo.go @@ -0,0 +1,367 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "fmt" + "reflect" + "strings" + "sync" +) + +// typeInfo holds details for the xml representation of a type. +type typeInfo struct { + xmlname *fieldInfo + fields []fieldInfo +} + +// fieldInfo holds details for the xml representation of a single field. +type fieldInfo struct { + idx []int + name string + xmlns string + flags fieldFlags + parents []string +} + +type fieldFlags int + +const ( + fElement fieldFlags = 1 << iota + fAttr + fCDATA + fCharData + fInnerXML + fComment + fAny + + fOmitEmpty + + fMode = fElement | fAttr | fCDATA | fCharData | fInnerXML | fComment | fAny + + xmlName = "XMLName" +) + +var tinfoMap sync.Map // map[reflect.Type]*typeInfo + +var nameType = reflect.TypeFor[Name]() + +// getTypeInfo returns the typeInfo structure with details necessary +// for marshaling and unmarshaling typ. +func getTypeInfo(typ reflect.Type) (*typeInfo, error) { + if ti, ok := tinfoMap.Load(typ); ok { + return ti.(*typeInfo), nil + } + + tinfo := &typeInfo{} + if typ.Kind() == reflect.Struct && typ != nameType { + n := typ.NumField() + for i := 0; i < n; i++ { + f := typ.Field(i) + if (!f.IsExported() && !f.Anonymous) || f.Tag.Get("xml") == "-" { + continue // Private field + } + + // For embedded structs, embed its fields. + if f.Anonymous { + t := f.Type + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() == reflect.Struct { + inner, err := getTypeInfo(t) + if err != nil { + return nil, err + } + if tinfo.xmlname == nil { + tinfo.xmlname = inner.xmlname + } + for _, finfo := range inner.fields { + finfo.idx = append([]int{i}, finfo.idx...) + if err := addFieldInfo(typ, tinfo, &finfo); err != nil { + return nil, err + } + } + continue + } + } + + finfo, err := structFieldInfo(typ, &f) + if err != nil { + return nil, err + } + + if f.Name == xmlName { + tinfo.xmlname = finfo + continue + } + + // Add the field if it doesn't conflict with other fields. + if err := addFieldInfo(typ, tinfo, finfo); err != nil { + return nil, err + } + } + } + + ti, _ := tinfoMap.LoadOrStore(typ, tinfo) + return ti.(*typeInfo), nil +} + +// structFieldInfo builds and returns a fieldInfo for f. +func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, error) { + finfo := &fieldInfo{idx: f.Index} + + // Split the tag from the xml namespace if necessary. + tag := f.Tag.Get("xml") + if ns, t, ok := strings.Cut(tag, " "); ok { + finfo.xmlns, tag = ns, t + } + + // Parse flags. + tokens := strings.Split(tag, ",") + if len(tokens) == 1 { + finfo.flags = fElement + } else { + tag = tokens[0] + for _, flag := range tokens[1:] { + switch flag { + case "attr": + finfo.flags |= fAttr + case "cdata": + finfo.flags |= fCDATA + case "chardata": + finfo.flags |= fCharData + case "innerxml": + finfo.flags |= fInnerXML + case "comment": + finfo.flags |= fComment + case "any": + finfo.flags |= fAny + case "omitempty": + finfo.flags |= fOmitEmpty + } + } + + // Validate the flags used. + valid := true + switch mode := finfo.flags & fMode; mode { + case 0: + finfo.flags |= fElement + case fAttr, fCDATA, fCharData, fInnerXML, fComment, fAny, fAny | fAttr: + if f.Name == xmlName || tag != "" && mode != fAttr { + valid = false + } + default: + // This will also catch multiple modes in a single field. + valid = false + } + if finfo.flags&fMode == fAny { + finfo.flags |= fElement + } + if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 { + valid = false + } + if !valid { + return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q", + f.Name, typ, f.Tag.Get("xml")) + } + } + + // Use of xmlns without a name is not allowed. + if finfo.xmlns != "" && tag == "" { + return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q", + f.Name, typ, f.Tag.Get("xml")) + } + + if f.Name == xmlName { + // The XMLName field records the XML element name. Don't + // process it as usual because its name should default to + // empty rather than to the field name. + finfo.name = tag + return finfo, nil + } + + if tag == "" { + // If the name part of the tag is completely empty, get + // default from XMLName of underlying struct if feasible, + // or field name otherwise. + if xmlname := lookupXMLName(f.Type); xmlname != nil { + finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name + } else { + finfo.name = f.Name + } + return finfo, nil + } + + // Prepare field name and parents. + parents := strings.Split(tag, ">") + if parents[0] == "" { + parents[0] = f.Name + } + if parents[len(parents)-1] == "" { + return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ) + } + finfo.name = parents[len(parents)-1] + if len(parents) > 1 { + if (finfo.flags & fElement) == 0 { + return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ",")) + } + finfo.parents = parents[:len(parents)-1] + } + + // If the field type has an XMLName field, the names must match + // so that the behavior of both marshaling and unmarshaling + // is straightforward and unambiguous. + if finfo.flags&fElement != 0 { + ftyp := f.Type + xmlname := lookupXMLName(ftyp) + if xmlname != nil && xmlname.name != finfo.name { + return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName", + finfo.name, typ, f.Name, xmlname.name, ftyp) + } + } + return finfo, nil +} + +// lookupXMLName returns the fieldInfo for typ's XMLName field +// in case it exists and has a valid xml field tag, otherwise +// it returns nil. +func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) { + for typ.Kind() == reflect.Pointer { + typ = typ.Elem() + } + if typ.Kind() != reflect.Struct { + return nil + } + for i, n := 0, typ.NumField(); i < n; i++ { + f := typ.Field(i) + if f.Name != xmlName { + continue + } + finfo, err := structFieldInfo(typ, &f) + if err == nil && finfo.name != "" { + return finfo + } + // Also consider errors as a non-existent field tag + // and let getTypeInfo itself report the error. + break + } + return nil +} + +// addFieldInfo adds finfo to tinfo.fields if there are no +// conflicts, or if conflicts arise from previous fields that were +// obtained from deeper embedded structures than finfo. In the latter +// case, the conflicting entries are dropped. +// A conflict occurs when the path (parent + name) to a field is +// itself a prefix of another path, or when two paths match exactly. +// It is okay for field paths to share a common, shorter prefix. +func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error { + var conflicts []int +Loop: + // First, figure all conflicts. Most working code will have none. + for i := range tinfo.fields { + oldf := &tinfo.fields[i] + if oldf.flags&fMode != newf.flags&fMode { + continue + } + if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns { + continue + } + minl := min(len(newf.parents), len(oldf.parents)) + for p := 0; p < minl; p++ { + if oldf.parents[p] != newf.parents[p] { + continue Loop + } + } + if len(oldf.parents) > len(newf.parents) { + if oldf.parents[len(newf.parents)] == newf.name { + conflicts = append(conflicts, i) + } + } else if len(oldf.parents) < len(newf.parents) { + if newf.parents[len(oldf.parents)] == oldf.name { + conflicts = append(conflicts, i) + } + } else { + if newf.name == oldf.name && newf.xmlns == oldf.xmlns { + conflicts = append(conflicts, i) + } + } + } + // Without conflicts, add the new field and return. + if conflicts == nil { + tinfo.fields = append(tinfo.fields, *newf) + return nil + } + + // If any conflict is shallower, ignore the new field. + // This matches the Go field resolution on embedding. + for _, i := range conflicts { + if len(tinfo.fields[i].idx) < len(newf.idx) { + return nil + } + } + + // Otherwise, if any of them is at the same depth level, it's an error. + for _, i := range conflicts { + oldf := &tinfo.fields[i] + if len(oldf.idx) == len(newf.idx) { + f1 := typ.FieldByIndex(oldf.idx) + f2 := typ.FieldByIndex(newf.idx) + return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")} + } + } + + // Otherwise, the new field is shallower, and thus takes precedence, + // so drop the conflicting fields from tinfo and append the new one. + for c := len(conflicts) - 1; c >= 0; c-- { + i := conflicts[c] + copy(tinfo.fields[i:], tinfo.fields[i+1:]) + tinfo.fields = tinfo.fields[:len(tinfo.fields)-1] + } + tinfo.fields = append(tinfo.fields, *newf) + return nil +} + +// A TagPathError represents an error in the unmarshaling process +// caused by the use of field tags with conflicting paths. +type TagPathError struct { + Struct reflect.Type + Field1, Tag1 string + Field2, Tag2 string +} + +func (e *TagPathError) Error() string { + return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) +} + +const ( + initNilPointers = true + dontInitNilPointers = false +) + +// value returns v's field value corresponding to finfo. +// It's equivalent to v.FieldByIndex(finfo.idx), but when passed +// initNilPointers, it initializes and dereferences pointers as necessary. +// When passed dontInitNilPointers and a nil pointer is reached, the function +// returns a zero reflect.Value. +func (finfo *fieldInfo) value(v reflect.Value, shouldInitNilPointers bool) reflect.Value { + for i, x := range finfo.idx { + if i > 0 { + t := v.Type() + if t.Kind() == reflect.Pointer && t.Elem().Kind() == reflect.Struct { + if v.IsNil() { + if !shouldInitNilPointers { + return reflect.Value{} + } + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + } + v = v.Field(x) + } + return v +} diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go new file mode 100644 index 0000000..73eedad --- /dev/null +++ b/src/encoding/xml/xml.go @@ -0,0 +1,2060 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package xml implements a simple XML 1.0 parser that +// understands XML name spaces. +package xml + +// References: +// Annotated XML spec: https://www.xml.com/axml/testaxml.htm +// XML name spaces: https://www.w3.org/TR/REC-xml-names/ + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A SyntaxError represents a syntax error in the XML input stream. +type SyntaxError struct { + Msg string + Line int +} + +func (e *SyntaxError) Error() string { + return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg +} + +// A Name represents an XML name (Local) annotated +// with a name space identifier (Space). +// In tokens returned by [Decoder.Token], the Space identifier +// is given as a canonical URL, not the short prefix used +// in the document being parsed. +type Name struct { + Space, Local string +} + +// An Attr represents an attribute in an XML element (Name=Value). +type Attr struct { + Name Name + Value string +} + +// A Token is an interface holding one of the token types: +// [StartElement], [EndElement], [CharData], [Comment], [ProcInst], or [Directive]. +type Token any + +// A StartElement represents an XML start element. +type StartElement struct { + Name Name + Attr []Attr +} + +// Copy creates a new copy of StartElement. +func (e StartElement) Copy() StartElement { + attrs := make([]Attr, len(e.Attr)) + copy(attrs, e.Attr) + e.Attr = attrs + return e +} + +// End returns the corresponding XML end element. +func (e StartElement) End() EndElement { + return EndElement{e.Name} +} + +// An EndElement represents an XML end element. +type EndElement struct { + Name Name +} + +// A CharData represents XML character data (raw text), +// in which XML escape sequences have been replaced by +// the characters they represent. +type CharData []byte + +// Copy creates a new copy of CharData. +func (c CharData) Copy() CharData { return CharData(bytes.Clone(c)) } + +// A Comment represents an XML comment of the form <!--comment-->. +// The bytes do not include the <!-- and --> comment markers. +type Comment []byte + +// Copy creates a new copy of Comment. +func (c Comment) Copy() Comment { return Comment(bytes.Clone(c)) } + +// A ProcInst represents an XML processing instruction of the form <?target inst?> +type ProcInst struct { + Target string + Inst []byte +} + +// Copy creates a new copy of ProcInst. +func (p ProcInst) Copy() ProcInst { + p.Inst = bytes.Clone(p.Inst) + return p +} + +// A Directive represents an XML directive of the form <!text>. +// The bytes do not include the <! and > markers. +type Directive []byte + +// Copy creates a new copy of Directive. +func (d Directive) Copy() Directive { return Directive(bytes.Clone(d)) } + +// CopyToken returns a copy of a Token. +func CopyToken(t Token) Token { + switch v := t.(type) { + case CharData: + return v.Copy() + case Comment: + return v.Copy() + case Directive: + return v.Copy() + case ProcInst: + return v.Copy() + case StartElement: + return v.Copy() + } + return t +} + +// A TokenReader is anything that can decode a stream of XML tokens, including a +// [Decoder]. +// +// When Token encounters an error or end-of-file condition after successfully +// reading a token, it returns the token. It may return the (non-nil) error from +// the same call or return the error (and a nil token) from a subsequent call. +// An instance of this general case is that a TokenReader returning a non-nil +// token at the end of the token stream may return either io.EOF or a nil error. +// The next Read should return nil, [io.EOF]. +// +// Implementations of Token are discouraged from returning a nil token with a +// nil error. Callers should treat a return of nil, nil as indicating that +// nothing happened; in particular it does not indicate EOF. +type TokenReader interface { + Token() (Token, error) +} + +// A Decoder represents an XML parser reading a particular input stream. +// The parser assumes that its input is encoded in UTF-8. +type Decoder struct { + // Strict defaults to true, enforcing the requirements + // of the XML specification. + // If set to false, the parser allows input containing common + // mistakes: + // * If an element is missing an end tag, the parser invents + // end tags as necessary to keep the return values from Token + // properly balanced. + // * In attribute values and character data, unknown or malformed + // character entities (sequences beginning with &) are left alone. + // + // Setting: + // + // d.Strict = false + // d.AutoClose = xml.HTMLAutoClose + // d.Entity = xml.HTMLEntity + // + // creates a parser that can handle typical HTML. + // + // Strict mode does not enforce the requirements of the XML name spaces TR. + // In particular it does not reject name space tags using undefined prefixes. + // Such tags are recorded with the unknown prefix as the name space URL. + Strict bool + + // When Strict == false, AutoClose indicates a set of elements to + // consider closed immediately after they are opened, regardless + // of whether an end element is present. + AutoClose []string + + // Entity can be used to map non-standard entity names to string replacements. + // The parser behaves as if these standard mappings are present in the map, + // regardless of the actual map content: + // + // "lt": "<", + // "gt": ">", + // "amp": "&", + // "apos": "'", + // "quot": `"`, + Entity map[string]string + + // CharsetReader, if non-nil, defines a function to generate + // charset-conversion readers, converting from the provided + // non-UTF-8 charset into UTF-8. If CharsetReader is nil or + // returns an error, parsing stops with an error. One of the + // CharsetReader's result values must be non-nil. + CharsetReader func(charset string, input io.Reader) (io.Reader, error) + + // DefaultSpace sets the default name space used for unadorned tags, + // as if the entire XML stream were wrapped in an element containing + // the attribute xmlns="DefaultSpace". + DefaultSpace string + + r io.ByteReader + t TokenReader + buf bytes.Buffer + saved *bytes.Buffer + stk *stack + free *stack + needClose bool + toClose Name + nextToken Token + nextByte int + ns map[string]string + err error + line int + linestart int64 + offset int64 + unmarshalDepth int +} + +// NewDecoder creates a new XML parser reading from r. +// If r does not implement [io.ByteReader], NewDecoder will +// do its own buffering. +func NewDecoder(r io.Reader) *Decoder { + d := &Decoder{ + ns: make(map[string]string), + nextByte: -1, + line: 1, + Strict: true, + } + d.switchToReader(r) + return d +} + +// NewTokenDecoder creates a new XML parser using an underlying token stream. +func NewTokenDecoder(t TokenReader) *Decoder { + // Is it already a Decoder? + if d, ok := t.(*Decoder); ok { + return d + } + d := &Decoder{ + ns: make(map[string]string), + t: t, + nextByte: -1, + line: 1, + Strict: true, + } + return d +} + +// Token returns the next XML token in the input stream. +// At the end of the input stream, Token returns nil, [io.EOF]. +// +// Slices of bytes in the returned token data refer to the +// parser's internal buffer and remain valid only until the next +// call to Token. To acquire a copy of the bytes, call [CopyToken] +// or the token's Copy method. +// +// Token expands self-closing elements such as <br> +// into separate start and end elements returned by successive calls. +// +// Token guarantees that the [StartElement] and [EndElement] +// tokens it returns are properly nested and matched: +// if Token encounters an unexpected end element +// or EOF before all expected end elements, +// it will return an error. +// +// If [Decoder.CharsetReader] is called and returns an error, +// the error is wrapped and returned. +// +// Token implements XML name spaces as described by +// https://www.w3.org/TR/REC-xml-names/. Each of the +// [Name] structures contained in the Token has the Space +// set to the URL identifying its name space when known. +// If Token encounters an unrecognized name space prefix, +// it uses the prefix as the Space rather than report an error. +func (d *Decoder) Token() (Token, error) { + var t Token + var err error + if d.stk != nil && d.stk.kind == stkEOF { + return nil, io.EOF + } + if d.nextToken != nil { + t = d.nextToken + d.nextToken = nil + } else { + if t, err = d.rawToken(); t == nil && err != nil { + if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { + err = d.syntaxError("unexpected EOF") + } + return nil, err + } + // We still have a token to process, so clear any + // errors (e.g. EOF) and proceed. + err = nil + } + if !d.Strict { + if t1, ok := d.autoClose(t); ok { + d.nextToken = t + t = t1 + } + } + switch t1 := t.(type) { + case StartElement: + // In XML name spaces, the translations listed in the + // attributes apply to the element name and + // to the other attribute names, so process + // the translations first. + for _, a := range t1.Attr { + if a.Name.Space == xmlnsPrefix { + v, ok := d.ns[a.Name.Local] + d.pushNs(a.Name.Local, v, ok) + d.ns[a.Name.Local] = a.Value + } + if a.Name.Space == "" && a.Name.Local == xmlnsPrefix { + // Default space for untagged names + v, ok := d.ns[""] + d.pushNs("", v, ok) + d.ns[""] = a.Value + } + } + + d.pushElement(t1.Name) + d.translate(&t1.Name, true) + for i := range t1.Attr { + d.translate(&t1.Attr[i].Name, false) + } + t = t1 + + case EndElement: + if !d.popElement(&t1) { + return nil, d.err + } + t = t1 + } + return t, err +} + +const ( + xmlURL = "http://www.w3.org/XML/1998/namespace" + xmlnsPrefix = "xmlns" + xmlPrefix = "xml" +) + +// Apply name space translation to name n. +// The default name space (for Space=="") +// applies only to element names, not to attribute names. +func (d *Decoder) translate(n *Name, isElementName bool) { + switch { + case n.Space == xmlnsPrefix: + return + case n.Space == "" && !isElementName: + return + case n.Space == xmlPrefix: + n.Space = xmlURL + case n.Space == "" && n.Local == xmlnsPrefix: + return + } + if v, ok := d.ns[n.Space]; ok { + n.Space = v + } else if n.Space == "" { + n.Space = d.DefaultSpace + } +} + +func (d *Decoder) switchToReader(r io.Reader) { + // Get efficient byte at a time reader. + // Assume that if reader has its own + // ReadByte, it's efficient enough. + // Otherwise, use bufio. + if rb, ok := r.(io.ByteReader); ok { + d.r = rb + } else { + d.r = bufio.NewReader(r) + } +} + +// Parsing state - stack holds old name space translations +// and the current set of open elements. The translations to pop when +// ending a given tag are *below* it on the stack, which is +// more work but forced on us by XML. +type stack struct { + next *stack + kind int + name Name + ok bool +} + +const ( + stkStart = iota + stkNs + stkEOF +) + +func (d *Decoder) push(kind int) *stack { + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.next = d.stk + s.kind = kind + d.stk = s + return s +} + +func (d *Decoder) pop() *stack { + s := d.stk + if s != nil { + d.stk = s.next + s.next = d.free + d.free = s + } + return s +} + +// Record that after the current element is finished +// (that element is already pushed on the stack) +// Token should return EOF until popEOF is called. +func (d *Decoder) pushEOF() { + // Walk down stack to find Start. + // It might not be the top, because there might be stkNs + // entries above it. + start := d.stk + for start.kind != stkStart { + start = start.next + } + // The stkNs entries below a start are associated with that + // element too; skip over them. + for start.next != nil && start.next.kind == stkNs { + start = start.next + } + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.kind = stkEOF + s.next = start.next + start.next = s +} + +// Undo a pushEOF. +// The element must have been finished, so the EOF should be at the top of the stack. +func (d *Decoder) popEOF() bool { + if d.stk == nil || d.stk.kind != stkEOF { + return false + } + d.pop() + return true +} + +// Record that we are starting an element with the given name. +func (d *Decoder) pushElement(name Name) { + s := d.push(stkStart) + s.name = name +} + +// Record that we are changing the value of ns[local]. +// The old value is url, ok. +func (d *Decoder) pushNs(local string, url string, ok bool) { + s := d.push(stkNs) + s.name.Local = local + s.name.Space = url + s.ok = ok +} + +// Creates a SyntaxError with the current line number. +func (d *Decoder) syntaxError(msg string) error { + return &SyntaxError{Msg: msg, Line: d.line} +} + +// Record that we are ending an element with the given name. +// The name must match the record at the top of the stack, +// which must be a pushElement record. +// After popping the element, apply any undo records from +// the stack to restore the name translations that existed +// before we saw this element. +func (d *Decoder) popElement(t *EndElement) bool { + s := d.pop() + name := t.Name + switch { + case s == nil || s.kind != stkStart: + d.err = d.syntaxError("unexpected end element </" + name.Local + ">") + return false + case s.name.Local != name.Local: + if !d.Strict { + d.needClose = true + d.toClose = t.Name + t.Name = s.name + return true + } + d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") + return false + case s.name.Space != name.Space: + d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + + " closed by </" + name.Local + "> in space " + name.Space) + return false + } + + d.translate(&t.Name, true) + + // Pop stack until a Start or EOF is on the top, undoing the + // translations that were associated with the element we just closed. + for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF { + s := d.pop() + if s.ok { + d.ns[s.name.Local] = s.name.Space + } else { + delete(d.ns, s.name.Local) + } + } + + return true +} + +// If the top element on the stack is autoclosing and +// t is not the end tag, invent the end tag. +func (d *Decoder) autoClose(t Token) (Token, bool) { + if d.stk == nil || d.stk.kind != stkStart { + return nil, false + } + for _, s := range d.AutoClose { + if strings.EqualFold(s, d.stk.name.Local) { + // This one should be auto closed if t doesn't close it. + et, ok := t.(EndElement) + if !ok || !strings.EqualFold(et.Name.Local, d.stk.name.Local) { + return EndElement{d.stk.name}, true + } + break + } + } + return nil, false +} + +var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method") + +// RawToken is like [Decoder.Token] but does not verify that +// start and end elements match and does not translate +// name space prefixes to their corresponding URLs. +func (d *Decoder) RawToken() (Token, error) { + if d.unmarshalDepth > 0 { + return nil, errRawToken + } + return d.rawToken() +} + +func (d *Decoder) rawToken() (Token, error) { + if d.t != nil { + return d.t.Token() + } + if d.err != nil { + return nil, d.err + } + if d.needClose { + // The last element we read was self-closing and + // we returned just the StartElement half. + // Return the EndElement half now. + d.needClose = false + return EndElement{d.toClose}, nil + } + + b, ok := d.getc() + if !ok { + return nil, d.err + } + + if b != '<' { + // Text section. + d.ungetc(b) + data := d.text(-1, false) + if data == nil { + return nil, d.err + } + return CharData(data), nil + } + + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + switch b { + case '/': + // </: End element + var name Name + if name, ok = d.nsname(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected element name after </") + } + return nil, d.err + } + d.space() + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '>' { + d.err = d.syntaxError("invalid characters between </" + name.Local + " and >") + return nil, d.err + } + return EndElement{name}, nil + + case '?': + // <?: Processing instruction. + var target string + if target, ok = d.name(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected target name after <?") + } + return nil, d.err + } + d.space() + d.buf.Reset() + var b0 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + d.buf.WriteByte(b) + if b0 == '?' && b == '>' { + break + } + b0 = b + } + data := d.buf.Bytes() + data = data[0 : len(data)-2] // chop ?> + + if target == "xml" { + content := string(data) + ver := procInst("version", content) + if ver != "" && ver != "1.0" { + d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver) + return nil, d.err + } + enc := procInst("encoding", content) + if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") { + if d.CharsetReader == nil { + d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc) + return nil, d.err + } + newr, err := d.CharsetReader(enc, d.r.(io.Reader)) + if err != nil { + d.err = fmt.Errorf("xml: opening charset %q: %w", enc, err) + return nil, d.err + } + if newr == nil { + panic("CharsetReader returned a nil Reader for charset " + enc) + } + d.switchToReader(newr) + } + } + return ProcInst{target, data}, nil + + case '!': + // <!: Maybe comment, maybe CDATA. + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + switch b { + case '-': // <!- + // Probably <!-- for a comment. + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '-' { + d.err = d.syntaxError("invalid sequence <!- not part of <!--") + return nil, d.err + } + // Look for terminator. + d.buf.Reset() + var b0, b1 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + d.buf.WriteByte(b) + if b0 == '-' && b1 == '-' { + if b != '>' { + d.err = d.syntaxError( + `invalid sequence "--" not allowed in comments`) + return nil, d.err + } + break + } + b0, b1 = b1, b + } + data := d.buf.Bytes() + data = data[0 : len(data)-3] // chop --> + return Comment(data), nil + + case '[': // <![ + // Probably <![CDATA[. + for i := 0; i < 6; i++ { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != "CDATA["[i] { + d.err = d.syntaxError("invalid <![ sequence") + return nil, d.err + } + } + // Have <![CDATA[. Read text until ]]>. + data := d.text(-1, true) + if data == nil { + return nil, d.err + } + return CharData(data), nil + } + + // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc. + // We don't care, but accumulate for caller. Quoted angle + // brackets do not count for nesting. + d.buf.Reset() + d.buf.WriteByte(b) + inquote := uint8(0) + depth := 0 + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if inquote == 0 && b == '>' && depth == 0 { + break + } + HandleB: + d.buf.WriteByte(b) + switch { + case b == inquote: + inquote = 0 + + case inquote != 0: + // in quotes, no special action + + case b == '\'' || b == '"': + inquote = b + + case b == '>' && inquote == 0: + depth-- + + case b == '<' && inquote == 0: + // Look for <!-- to begin comment. + s := "!--" + for i := 0; i < len(s); i++ { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != s[i] { + for j := 0; j < i; j++ { + d.buf.WriteByte(s[j]) + } + depth++ + goto HandleB + } + } + + // Remove < that was written above. + d.buf.Truncate(d.buf.Len() - 1) + + // Look for terminator. + var b0, b1 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b0 == '-' && b1 == '-' && b == '>' { + break + } + b0, b1 = b1, b + } + + // Replace the comment with a space in the returned Directive + // body, so that markup parts that were separated by the comment + // (like a "<" and a "!") don't get joined when re-encoding the + // Directive, taking new semantic meaning. + d.buf.WriteByte(' ') + } + } + return Directive(d.buf.Bytes()), nil + } + + // Must be an open element like <a href="foo"> + d.ungetc(b) + + var ( + name Name + empty bool + attr []Attr + ) + if name, ok = d.nsname(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected element name after <") + } + return nil, d.err + } + + attr = []Attr{} + for { + d.space() + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b == '/' { + empty = true + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '>' { + d.err = d.syntaxError("expected /> in element") + return nil, d.err + } + break + } + if b == '>' { + break + } + d.ungetc(b) + + a := Attr{} + if a.Name, ok = d.nsname(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected attribute name in element") + } + return nil, d.err + } + d.space() + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '=' { + if d.Strict { + d.err = d.syntaxError("attribute name without = in element") + return nil, d.err + } + d.ungetc(b) + a.Value = a.Name.Local + } else { + d.space() + data := d.attrval() + if data == nil { + return nil, d.err + } + a.Value = string(data) + } + attr = append(attr, a) + } + if empty { + d.needClose = true + d.toClose = name + } + return StartElement{name, attr}, nil +} + +func (d *Decoder) attrval() []byte { + b, ok := d.mustgetc() + if !ok { + return nil + } + // Handle quoted attribute values + if b == '"' || b == '\'' { + return d.text(int(b), false) + } + // Handle unquoted attribute values for strict parsers + if d.Strict { + d.err = d.syntaxError("unquoted or missing attribute value in element") + return nil + } + // Handle unquoted attribute values for unstrict parsers + d.ungetc(b) + d.buf.Reset() + for { + b, ok = d.mustgetc() + if !ok { + return nil + } + // https://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2 + if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || + '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' { + d.buf.WriteByte(b) + } else { + d.ungetc(b) + break + } + } + return d.buf.Bytes() +} + +// Skip spaces if any +func (d *Decoder) space() { + for { + b, ok := d.getc() + if !ok { + return + } + switch b { + case ' ', '\r', '\n', '\t': + default: + d.ungetc(b) + return + } + } +} + +// Read a single byte. +// If there is no byte to read, return ok==false +// and leave the error in d.err. +// Maintain line number. +func (d *Decoder) getc() (b byte, ok bool) { + if d.err != nil { + return 0, false + } + if d.nextByte >= 0 { + b = byte(d.nextByte) + d.nextByte = -1 + } else { + b, d.err = d.r.ReadByte() + if d.err != nil { + return 0, false + } + if d.saved != nil { + d.saved.WriteByte(b) + } + } + if b == '\n' { + d.line++ + d.linestart = d.offset + 1 + } + d.offset++ + return b, true +} + +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token +// and the beginning of the next token. +func (d *Decoder) InputOffset() int64 { + return d.offset +} + +// InputPos returns the line of the current decoder position and the 1 based +// input position of the line. The position gives the location of the end of the +// most recently returned token. +func (d *Decoder) InputPos() (line, column int) { + return d.line, int(d.offset-d.linestart) + 1 +} + +// Return saved offset. +// If we did ungetc (nextByte >= 0), have to back up one. +func (d *Decoder) savedOffset() int { + n := d.saved.Len() + if d.nextByte >= 0 { + n-- + } + return n +} + +// Must read a single byte. +// If there is no byte to read, +// set d.err to SyntaxError("unexpected EOF") +// and return ok==false +func (d *Decoder) mustgetc() (b byte, ok bool) { + if b, ok = d.getc(); !ok { + if d.err == io.EOF { + d.err = d.syntaxError("unexpected EOF") + } + } + return +} + +// Unread a single byte. +func (d *Decoder) ungetc(b byte) { + if b == '\n' { + d.line-- + } + d.nextByte = int(b) + d.offset-- +} + +var entity = map[string]rune{ + "lt": '<', + "gt": '>', + "amp": '&', + "apos": '\'', + "quot": '"', +} + +// Read plain text section (XML calls it character data). +// If quote >= 0, we are in a quoted string and need to find the matching quote. +// If cdata == true, we are in a <![CDATA[ section and need to find ]]>. +// On failure return nil and leave the error in d.err. +func (d *Decoder) text(quote int, cdata bool) []byte { + var b0, b1 byte + var trunc int + d.buf.Reset() +Input: + for { + b, ok := d.getc() + if !ok { + if cdata { + if d.err == io.EOF { + d.err = d.syntaxError("unexpected EOF in CDATA section") + } + return nil + } + break Input + } + + // <![CDATA[ section ends with ]]>. + // It is an error for ]]> to appear in ordinary text. + if b0 == ']' && b1 == ']' && b == '>' { + if cdata { + trunc = 2 + break Input + } + d.err = d.syntaxError("unescaped ]]> not in CDATA section") + return nil + } + + // Stop reading text if we see a <. + if b == '<' && !cdata { + if quote >= 0 { + d.err = d.syntaxError("unescaped < inside quoted string") + return nil + } + d.ungetc('<') + break Input + } + if quote >= 0 && b == byte(quote) { + break Input + } + if b == '&' && !cdata { + // Read escaped character expression up to semicolon. + // XML in all its glory allows a document to define and use + // its own character names with <!ENTITY ...> directives. + // Parsers are required to recognize lt, gt, amp, apos, and quot + // even if they have not been declared. + before := d.buf.Len() + d.buf.WriteByte('&') + var ok bool + var text string + var haveText bool + if b, ok = d.mustgetc(); !ok { + return nil + } + if b == '#' { + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } + base := 10 + if b == 'x' { + base = 16 + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } + } + start := d.buf.Len() + for '0' <= b && b <= '9' || + base == 16 && 'a' <= b && b <= 'f' || + base == 16 && 'A' <= b && b <= 'F' { + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } + } + if b != ';' { + d.ungetc(b) + } else { + s := string(d.buf.Bytes()[start:]) + d.buf.WriteByte(';') + n, err := strconv.ParseUint(s, base, 64) + if err == nil && n <= unicode.MaxRune { + text = string(rune(n)) + haveText = true + } + } + } else { + d.ungetc(b) + if !d.readName() { + if d.err != nil { + return nil + } + } + if b, ok = d.mustgetc(); !ok { + return nil + } + if b != ';' { + d.ungetc(b) + } else { + name := d.buf.Bytes()[before+1:] + d.buf.WriteByte(';') + if isName(name) { + s := string(name) + if r, ok := entity[s]; ok { + text = string(r) + haveText = true + } else if d.Entity != nil { + text, haveText = d.Entity[s] + } + } + } + } + + if haveText { + d.buf.Truncate(before) + d.buf.WriteString(text) + b0, b1 = 0, 0 + continue Input + } + if !d.Strict { + b0, b1 = 0, 0 + continue Input + } + ent := string(d.buf.Bytes()[before:]) + if ent[len(ent)-1] != ';' { + ent += " (no semicolon)" + } + d.err = d.syntaxError("invalid character entity " + ent) + return nil + } + + // We must rewrite unescaped \r and \r\n into \n. + if b == '\r' { + d.buf.WriteByte('\n') + } else if b1 == '\r' && b == '\n' { + // Skip \r\n--we already wrote \n. + } else { + d.buf.WriteByte(b) + } + + b0, b1 = b1, b + } + data := d.buf.Bytes() + data = data[0 : len(data)-trunc] + + // Inspect each rune for being a disallowed character. + buf := data + for len(buf) > 0 { + r, size := utf8.DecodeRune(buf) + if r == utf8.RuneError && size == 1 { + d.err = d.syntaxError("invalid UTF-8") + return nil + } + buf = buf[size:] + if !isInCharacterRange(r) { + d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r)) + return nil + } + } + + return data +} + +// Decide whether the given rune is in the XML Character Range, per +// the Char production of https://www.xml.com/axml/testaxml.htm, +// Section 2.2 Characters. +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} + +// Get name space name: name with a : stuck in the middle. +// The part before the : is the name space identifier. +func (d *Decoder) nsname() (name Name, ok bool) { + s, ok := d.name() + if !ok { + return + } + if strings.Count(s, ":") > 1 { + return name, false + } else if space, local, ok := strings.Cut(s, ":"); !ok || space == "" || local == "" { + name.Local = s + } else { + name.Space = space + name.Local = local + } + return name, true +} + +// Get name: /first(first|second)*/ +// Do not set d.err if the name is missing (unless unexpected EOF is received): +// let the caller provide better context. +func (d *Decoder) name() (s string, ok bool) { + d.buf.Reset() + if !d.readName() { + return "", false + } + + // Now we check the characters. + b := d.buf.Bytes() + if !isName(b) { + d.err = d.syntaxError("invalid XML name: " + string(b)) + return "", false + } + return string(b), true +} + +// Read a name and append its bytes to d.buf. +// The name is delimited by any single-byte character not valid in names. +// All multi-byte characters are accepted; the caller must check their validity. +func (d *Decoder) readName() (ok bool) { + var b byte + if b, ok = d.mustgetc(); !ok { + return + } + if b < utf8.RuneSelf && !isNameByte(b) { + d.ungetc(b) + return false + } + d.buf.WriteByte(b) + + for { + if b, ok = d.mustgetc(); !ok { + return + } + if b < utf8.RuneSelf && !isNameByte(b) { + d.ungetc(b) + break + } + d.buf.WriteByte(b) + } + return true +} + +func isNameByte(c byte) bool { + return 'A' <= c && c <= 'Z' || + 'a' <= c && c <= 'z' || + '0' <= c && c <= '9' || + c == '_' || c == ':' || c == '.' || c == '-' +} + +func isName(s []byte) bool { + if len(s) == 0 { + return false + } + c, n := utf8.DecodeRune(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) { + return false + } + for n < len(s) { + s = s[n:] + c, n = utf8.DecodeRune(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) && !unicode.Is(second, c) { + return false + } + } + return true +} + +func isNameString(s string) bool { + if len(s) == 0 { + return false + } + c, n := utf8.DecodeRuneInString(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) { + return false + } + for n < len(s) { + s = s[n:] + c, n = utf8.DecodeRuneInString(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) && !unicode.Is(second, c) { + return false + } + } + return true +} + +// These tables were generated by cut and paste from Appendix B of +// the XML spec at https://www.xml.com/axml/testaxml.htm +// and then reformatting. First corresponds to (Letter | '_' | ':') +// and second corresponds to NameChar. + +var first = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x003A, 0x003A, 1}, + {0x0041, 0x005A, 1}, + {0x005F, 0x005F, 1}, + {0x0061, 0x007A, 1}, + {0x00C0, 0x00D6, 1}, + {0x00D8, 0x00F6, 1}, + {0x00F8, 0x00FF, 1}, + {0x0100, 0x0131, 1}, + {0x0134, 0x013E, 1}, + {0x0141, 0x0148, 1}, + {0x014A, 0x017E, 1}, + {0x0180, 0x01C3, 1}, + {0x01CD, 0x01F0, 1}, + {0x01F4, 0x01F5, 1}, + {0x01FA, 0x0217, 1}, + {0x0250, 0x02A8, 1}, + {0x02BB, 0x02C1, 1}, + {0x0386, 0x0386, 1}, + {0x0388, 0x038A, 1}, + {0x038C, 0x038C, 1}, + {0x038E, 0x03A1, 1}, + {0x03A3, 0x03CE, 1}, + {0x03D0, 0x03D6, 1}, + {0x03DA, 0x03E0, 2}, + {0x03E2, 0x03F3, 1}, + {0x0401, 0x040C, 1}, + {0x040E, 0x044F, 1}, + {0x0451, 0x045C, 1}, + {0x045E, 0x0481, 1}, + {0x0490, 0x04C4, 1}, + {0x04C7, 0x04C8, 1}, + {0x04CB, 0x04CC, 1}, + {0x04D0, 0x04EB, 1}, + {0x04EE, 0x04F5, 1}, + {0x04F8, 0x04F9, 1}, + {0x0531, 0x0556, 1}, + {0x0559, 0x0559, 1}, + {0x0561, 0x0586, 1}, + {0x05D0, 0x05EA, 1}, + {0x05F0, 0x05F2, 1}, + {0x0621, 0x063A, 1}, + {0x0641, 0x064A, 1}, + {0x0671, 0x06B7, 1}, + {0x06BA, 0x06BE, 1}, + {0x06C0, 0x06CE, 1}, + {0x06D0, 0x06D3, 1}, + {0x06D5, 0x06D5, 1}, + {0x06E5, 0x06E6, 1}, + {0x0905, 0x0939, 1}, + {0x093D, 0x093D, 1}, + {0x0958, 0x0961, 1}, + {0x0985, 0x098C, 1}, + {0x098F, 0x0990, 1}, + {0x0993, 0x09A8, 1}, + {0x09AA, 0x09B0, 1}, + {0x09B2, 0x09B2, 1}, + {0x09B6, 0x09B9, 1}, + {0x09DC, 0x09DD, 1}, + {0x09DF, 0x09E1, 1}, + {0x09F0, 0x09F1, 1}, + {0x0A05, 0x0A0A, 1}, + {0x0A0F, 0x0A10, 1}, + {0x0A13, 0x0A28, 1}, + {0x0A2A, 0x0A30, 1}, + {0x0A32, 0x0A33, 1}, + {0x0A35, 0x0A36, 1}, + {0x0A38, 0x0A39, 1}, + {0x0A59, 0x0A5C, 1}, + {0x0A5E, 0x0A5E, 1}, + {0x0A72, 0x0A74, 1}, + {0x0A85, 0x0A8B, 1}, + {0x0A8D, 0x0A8D, 1}, + {0x0A8F, 0x0A91, 1}, + {0x0A93, 0x0AA8, 1}, + {0x0AAA, 0x0AB0, 1}, + {0x0AB2, 0x0AB3, 1}, + {0x0AB5, 0x0AB9, 1}, + {0x0ABD, 0x0AE0, 0x23}, + {0x0B05, 0x0B0C, 1}, + {0x0B0F, 0x0B10, 1}, + {0x0B13, 0x0B28, 1}, + {0x0B2A, 0x0B30, 1}, + {0x0B32, 0x0B33, 1}, + {0x0B36, 0x0B39, 1}, + {0x0B3D, 0x0B3D, 1}, + {0x0B5C, 0x0B5D, 1}, + {0x0B5F, 0x0B61, 1}, + {0x0B85, 0x0B8A, 1}, + {0x0B8E, 0x0B90, 1}, + {0x0B92, 0x0B95, 1}, + {0x0B99, 0x0B9A, 1}, + {0x0B9C, 0x0B9C, 1}, + {0x0B9E, 0x0B9F, 1}, + {0x0BA3, 0x0BA4, 1}, + {0x0BA8, 0x0BAA, 1}, + {0x0BAE, 0x0BB5, 1}, + {0x0BB7, 0x0BB9, 1}, + {0x0C05, 0x0C0C, 1}, + {0x0C0E, 0x0C10, 1}, + {0x0C12, 0x0C28, 1}, + {0x0C2A, 0x0C33, 1}, + {0x0C35, 0x0C39, 1}, + {0x0C60, 0x0C61, 1}, + {0x0C85, 0x0C8C, 1}, + {0x0C8E, 0x0C90, 1}, + {0x0C92, 0x0CA8, 1}, + {0x0CAA, 0x0CB3, 1}, + {0x0CB5, 0x0CB9, 1}, + {0x0CDE, 0x0CDE, 1}, + {0x0CE0, 0x0CE1, 1}, + {0x0D05, 0x0D0C, 1}, + {0x0D0E, 0x0D10, 1}, + {0x0D12, 0x0D28, 1}, + {0x0D2A, 0x0D39, 1}, + {0x0D60, 0x0D61, 1}, + {0x0E01, 0x0E2E, 1}, + {0x0E30, 0x0E30, 1}, + {0x0E32, 0x0E33, 1}, + {0x0E40, 0x0E45, 1}, + {0x0E81, 0x0E82, 1}, + {0x0E84, 0x0E84, 1}, + {0x0E87, 0x0E88, 1}, + {0x0E8A, 0x0E8D, 3}, + {0x0E94, 0x0E97, 1}, + {0x0E99, 0x0E9F, 1}, + {0x0EA1, 0x0EA3, 1}, + {0x0EA5, 0x0EA7, 2}, + {0x0EAA, 0x0EAB, 1}, + {0x0EAD, 0x0EAE, 1}, + {0x0EB0, 0x0EB0, 1}, + {0x0EB2, 0x0EB3, 1}, + {0x0EBD, 0x0EBD, 1}, + {0x0EC0, 0x0EC4, 1}, + {0x0F40, 0x0F47, 1}, + {0x0F49, 0x0F69, 1}, + {0x10A0, 0x10C5, 1}, + {0x10D0, 0x10F6, 1}, + {0x1100, 0x1100, 1}, + {0x1102, 0x1103, 1}, + {0x1105, 0x1107, 1}, + {0x1109, 0x1109, 1}, + {0x110B, 0x110C, 1}, + {0x110E, 0x1112, 1}, + {0x113C, 0x1140, 2}, + {0x114C, 0x1150, 2}, + {0x1154, 0x1155, 1}, + {0x1159, 0x1159, 1}, + {0x115F, 0x1161, 1}, + {0x1163, 0x1169, 2}, + {0x116D, 0x116E, 1}, + {0x1172, 0x1173, 1}, + {0x1175, 0x119E, 0x119E - 0x1175}, + {0x11A8, 0x11AB, 0x11AB - 0x11A8}, + {0x11AE, 0x11AF, 1}, + {0x11B7, 0x11B8, 1}, + {0x11BA, 0x11BA, 1}, + {0x11BC, 0x11C2, 1}, + {0x11EB, 0x11F0, 0x11F0 - 0x11EB}, + {0x11F9, 0x11F9, 1}, + {0x1E00, 0x1E9B, 1}, + {0x1EA0, 0x1EF9, 1}, + {0x1F00, 0x1F15, 1}, + {0x1F18, 0x1F1D, 1}, + {0x1F20, 0x1F45, 1}, + {0x1F48, 0x1F4D, 1}, + {0x1F50, 0x1F57, 1}, + {0x1F59, 0x1F5B, 0x1F5B - 0x1F59}, + {0x1F5D, 0x1F5D, 1}, + {0x1F5F, 0x1F7D, 1}, + {0x1F80, 0x1FB4, 1}, + {0x1FB6, 0x1FBC, 1}, + {0x1FBE, 0x1FBE, 1}, + {0x1FC2, 0x1FC4, 1}, + {0x1FC6, 0x1FCC, 1}, + {0x1FD0, 0x1FD3, 1}, + {0x1FD6, 0x1FDB, 1}, + {0x1FE0, 0x1FEC, 1}, + {0x1FF2, 0x1FF4, 1}, + {0x1FF6, 0x1FFC, 1}, + {0x2126, 0x2126, 1}, + {0x212A, 0x212B, 1}, + {0x212E, 0x212E, 1}, + {0x2180, 0x2182, 1}, + {0x3007, 0x3007, 1}, + {0x3021, 0x3029, 1}, + {0x3041, 0x3094, 1}, + {0x30A1, 0x30FA, 1}, + {0x3105, 0x312C, 1}, + {0x4E00, 0x9FA5, 1}, + {0xAC00, 0xD7A3, 1}, + }, +} + +var second = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x002D, 0x002E, 1}, + {0x0030, 0x0039, 1}, + {0x00B7, 0x00B7, 1}, + {0x02D0, 0x02D1, 1}, + {0x0300, 0x0345, 1}, + {0x0360, 0x0361, 1}, + {0x0387, 0x0387, 1}, + {0x0483, 0x0486, 1}, + {0x0591, 0x05A1, 1}, + {0x05A3, 0x05B9, 1}, + {0x05BB, 0x05BD, 1}, + {0x05BF, 0x05BF, 1}, + {0x05C1, 0x05C2, 1}, + {0x05C4, 0x0640, 0x0640 - 0x05C4}, + {0x064B, 0x0652, 1}, + {0x0660, 0x0669, 1}, + {0x0670, 0x0670, 1}, + {0x06D6, 0x06DC, 1}, + {0x06DD, 0x06DF, 1}, + {0x06E0, 0x06E4, 1}, + {0x06E7, 0x06E8, 1}, + {0x06EA, 0x06ED, 1}, + {0x06F0, 0x06F9, 1}, + {0x0901, 0x0903, 1}, + {0x093C, 0x093C, 1}, + {0x093E, 0x094C, 1}, + {0x094D, 0x094D, 1}, + {0x0951, 0x0954, 1}, + {0x0962, 0x0963, 1}, + {0x0966, 0x096F, 1}, + {0x0981, 0x0983, 1}, + {0x09BC, 0x09BC, 1}, + {0x09BE, 0x09BF, 1}, + {0x09C0, 0x09C4, 1}, + {0x09C7, 0x09C8, 1}, + {0x09CB, 0x09CD, 1}, + {0x09D7, 0x09D7, 1}, + {0x09E2, 0x09E3, 1}, + {0x09E6, 0x09EF, 1}, + {0x0A02, 0x0A3C, 0x3A}, + {0x0A3E, 0x0A3F, 1}, + {0x0A40, 0x0A42, 1}, + {0x0A47, 0x0A48, 1}, + {0x0A4B, 0x0A4D, 1}, + {0x0A66, 0x0A6F, 1}, + {0x0A70, 0x0A71, 1}, + {0x0A81, 0x0A83, 1}, + {0x0ABC, 0x0ABC, 1}, + {0x0ABE, 0x0AC5, 1}, + {0x0AC7, 0x0AC9, 1}, + {0x0ACB, 0x0ACD, 1}, + {0x0AE6, 0x0AEF, 1}, + {0x0B01, 0x0B03, 1}, + {0x0B3C, 0x0B3C, 1}, + {0x0B3E, 0x0B43, 1}, + {0x0B47, 0x0B48, 1}, + {0x0B4B, 0x0B4D, 1}, + {0x0B56, 0x0B57, 1}, + {0x0B66, 0x0B6F, 1}, + {0x0B82, 0x0B83, 1}, + {0x0BBE, 0x0BC2, 1}, + {0x0BC6, 0x0BC8, 1}, + {0x0BCA, 0x0BCD, 1}, + {0x0BD7, 0x0BD7, 1}, + {0x0BE7, 0x0BEF, 1}, + {0x0C01, 0x0C03, 1}, + {0x0C3E, 0x0C44, 1}, + {0x0C46, 0x0C48, 1}, + {0x0C4A, 0x0C4D, 1}, + {0x0C55, 0x0C56, 1}, + {0x0C66, 0x0C6F, 1}, + {0x0C82, 0x0C83, 1}, + {0x0CBE, 0x0CC4, 1}, + {0x0CC6, 0x0CC8, 1}, + {0x0CCA, 0x0CCD, 1}, + {0x0CD5, 0x0CD6, 1}, + {0x0CE6, 0x0CEF, 1}, + {0x0D02, 0x0D03, 1}, + {0x0D3E, 0x0D43, 1}, + {0x0D46, 0x0D48, 1}, + {0x0D4A, 0x0D4D, 1}, + {0x0D57, 0x0D57, 1}, + {0x0D66, 0x0D6F, 1}, + {0x0E31, 0x0E31, 1}, + {0x0E34, 0x0E3A, 1}, + {0x0E46, 0x0E46, 1}, + {0x0E47, 0x0E4E, 1}, + {0x0E50, 0x0E59, 1}, + {0x0EB1, 0x0EB1, 1}, + {0x0EB4, 0x0EB9, 1}, + {0x0EBB, 0x0EBC, 1}, + {0x0EC6, 0x0EC6, 1}, + {0x0EC8, 0x0ECD, 1}, + {0x0ED0, 0x0ED9, 1}, + {0x0F18, 0x0F19, 1}, + {0x0F20, 0x0F29, 1}, + {0x0F35, 0x0F39, 2}, + {0x0F3E, 0x0F3F, 1}, + {0x0F71, 0x0F84, 1}, + {0x0F86, 0x0F8B, 1}, + {0x0F90, 0x0F95, 1}, + {0x0F97, 0x0F97, 1}, + {0x0F99, 0x0FAD, 1}, + {0x0FB1, 0x0FB7, 1}, + {0x0FB9, 0x0FB9, 1}, + {0x20D0, 0x20DC, 1}, + {0x20E1, 0x3005, 0x3005 - 0x20E1}, + {0x302A, 0x302F, 1}, + {0x3031, 0x3035, 1}, + {0x3099, 0x309A, 1}, + {0x309D, 0x309E, 1}, + {0x30FC, 0x30FE, 1}, + }, +} + +// HTMLEntity is an entity map containing translations for the +// standard HTML entity characters. +// +// See the [Decoder.Strict] and [Decoder.Entity] fields' documentation. +var HTMLEntity map[string]string = htmlEntity + +var htmlEntity = map[string]string{ + /* + hget http://www.w3.org/TR/html4/sgml/entities.html | + ssam ' + ,y /\>/ x/\<(.|\n)+/ s/\n/ /g + ,x v/^\<!ENTITY/d + ,s/\<!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/ "\1": "\\u\2",/g + ' + */ + "nbsp": "\u00A0", + "iexcl": "\u00A1", + "cent": "\u00A2", + "pound": "\u00A3", + "curren": "\u00A4", + "yen": "\u00A5", + "brvbar": "\u00A6", + "sect": "\u00A7", + "uml": "\u00A8", + "copy": "\u00A9", + "ordf": "\u00AA", + "laquo": "\u00AB", + "not": "\u00AC", + "shy": "\u00AD", + "reg": "\u00AE", + "macr": "\u00AF", + "deg": "\u00B0", + "plusmn": "\u00B1", + "sup2": "\u00B2", + "sup3": "\u00B3", + "acute": "\u00B4", + "micro": "\u00B5", + "para": "\u00B6", + "middot": "\u00B7", + "cedil": "\u00B8", + "sup1": "\u00B9", + "ordm": "\u00BA", + "raquo": "\u00BB", + "frac14": "\u00BC", + "frac12": "\u00BD", + "frac34": "\u00BE", + "iquest": "\u00BF", + "Agrave": "\u00C0", + "Aacute": "\u00C1", + "Acirc": "\u00C2", + "Atilde": "\u00C3", + "Auml": "\u00C4", + "Aring": "\u00C5", + "AElig": "\u00C6", + "Ccedil": "\u00C7", + "Egrave": "\u00C8", + "Eacute": "\u00C9", + "Ecirc": "\u00CA", + "Euml": "\u00CB", + "Igrave": "\u00CC", + "Iacute": "\u00CD", + "Icirc": "\u00CE", + "Iuml": "\u00CF", + "ETH": "\u00D0", + "Ntilde": "\u00D1", + "Ograve": "\u00D2", + "Oacute": "\u00D3", + "Ocirc": "\u00D4", + "Otilde": "\u00D5", + "Ouml": "\u00D6", + "times": "\u00D7", + "Oslash": "\u00D8", + "Ugrave": "\u00D9", + "Uacute": "\u00DA", + "Ucirc": "\u00DB", + "Uuml": "\u00DC", + "Yacute": "\u00DD", + "THORN": "\u00DE", + "szlig": "\u00DF", + "agrave": "\u00E0", + "aacute": "\u00E1", + "acirc": "\u00E2", + "atilde": "\u00E3", + "auml": "\u00E4", + "aring": "\u00E5", + "aelig": "\u00E6", + "ccedil": "\u00E7", + "egrave": "\u00E8", + "eacute": "\u00E9", + "ecirc": "\u00EA", + "euml": "\u00EB", + "igrave": "\u00EC", + "iacute": "\u00ED", + "icirc": "\u00EE", + "iuml": "\u00EF", + "eth": "\u00F0", + "ntilde": "\u00F1", + "ograve": "\u00F2", + "oacute": "\u00F3", + "ocirc": "\u00F4", + "otilde": "\u00F5", + "ouml": "\u00F6", + "divide": "\u00F7", + "oslash": "\u00F8", + "ugrave": "\u00F9", + "uacute": "\u00FA", + "ucirc": "\u00FB", + "uuml": "\u00FC", + "yacute": "\u00FD", + "thorn": "\u00FE", + "yuml": "\u00FF", + "fnof": "\u0192", + "Alpha": "\u0391", + "Beta": "\u0392", + "Gamma": "\u0393", + "Delta": "\u0394", + "Epsilon": "\u0395", + "Zeta": "\u0396", + "Eta": "\u0397", + "Theta": "\u0398", + "Iota": "\u0399", + "Kappa": "\u039A", + "Lambda": "\u039B", + "Mu": "\u039C", + "Nu": "\u039D", + "Xi": "\u039E", + "Omicron": "\u039F", + "Pi": "\u03A0", + "Rho": "\u03A1", + "Sigma": "\u03A3", + "Tau": "\u03A4", + "Upsilon": "\u03A5", + "Phi": "\u03A6", + "Chi": "\u03A7", + "Psi": "\u03A8", + "Omega": "\u03A9", + "alpha": "\u03B1", + "beta": "\u03B2", + "gamma": "\u03B3", + "delta": "\u03B4", + "epsilon": "\u03B5", + "zeta": "\u03B6", + "eta": "\u03B7", + "theta": "\u03B8", + "iota": "\u03B9", + "kappa": "\u03BA", + "lambda": "\u03BB", + "mu": "\u03BC", + "nu": "\u03BD", + "xi": "\u03BE", + "omicron": "\u03BF", + "pi": "\u03C0", + "rho": "\u03C1", + "sigmaf": "\u03C2", + "sigma": "\u03C3", + "tau": "\u03C4", + "upsilon": "\u03C5", + "phi": "\u03C6", + "chi": "\u03C7", + "psi": "\u03C8", + "omega": "\u03C9", + "thetasym": "\u03D1", + "upsih": "\u03D2", + "piv": "\u03D6", + "bull": "\u2022", + "hellip": "\u2026", + "prime": "\u2032", + "Prime": "\u2033", + "oline": "\u203E", + "frasl": "\u2044", + "weierp": "\u2118", + "image": "\u2111", + "real": "\u211C", + "trade": "\u2122", + "alefsym": "\u2135", + "larr": "\u2190", + "uarr": "\u2191", + "rarr": "\u2192", + "darr": "\u2193", + "harr": "\u2194", + "crarr": "\u21B5", + "lArr": "\u21D0", + "uArr": "\u21D1", + "rArr": "\u21D2", + "dArr": "\u21D3", + "hArr": "\u21D4", + "forall": "\u2200", + "part": "\u2202", + "exist": "\u2203", + "empty": "\u2205", + "nabla": "\u2207", + "isin": "\u2208", + "notin": "\u2209", + "ni": "\u220B", + "prod": "\u220F", + "sum": "\u2211", + "minus": "\u2212", + "lowast": "\u2217", + "radic": "\u221A", + "prop": "\u221D", + "infin": "\u221E", + "ang": "\u2220", + "and": "\u2227", + "or": "\u2228", + "cap": "\u2229", + "cup": "\u222A", + "int": "\u222B", + "there4": "\u2234", + "sim": "\u223C", + "cong": "\u2245", + "asymp": "\u2248", + "ne": "\u2260", + "equiv": "\u2261", + "le": "\u2264", + "ge": "\u2265", + "sub": "\u2282", + "sup": "\u2283", + "nsub": "\u2284", + "sube": "\u2286", + "supe": "\u2287", + "oplus": "\u2295", + "otimes": "\u2297", + "perp": "\u22A5", + "sdot": "\u22C5", + "lceil": "\u2308", + "rceil": "\u2309", + "lfloor": "\u230A", + "rfloor": "\u230B", + "lang": "\u2329", + "rang": "\u232A", + "loz": "\u25CA", + "spades": "\u2660", + "clubs": "\u2663", + "hearts": "\u2665", + "diams": "\u2666", + "quot": "\u0022", + "amp": "\u0026", + "lt": "\u003C", + "gt": "\u003E", + "OElig": "\u0152", + "oelig": "\u0153", + "Scaron": "\u0160", + "scaron": "\u0161", + "Yuml": "\u0178", + "circ": "\u02C6", + "tilde": "\u02DC", + "ensp": "\u2002", + "emsp": "\u2003", + "thinsp": "\u2009", + "zwnj": "\u200C", + "zwj": "\u200D", + "lrm": "\u200E", + "rlm": "\u200F", + "ndash": "\u2013", + "mdash": "\u2014", + "lsquo": "\u2018", + "rsquo": "\u2019", + "sbquo": "\u201A", + "ldquo": "\u201C", + "rdquo": "\u201D", + "bdquo": "\u201E", + "dagger": "\u2020", + "Dagger": "\u2021", + "permil": "\u2030", + "lsaquo": "\u2039", + "rsaquo": "\u203A", + "euro": "\u20AC", +} + +// HTMLAutoClose is the set of HTML elements that +// should be considered to close automatically. +// +// See the [Decoder.Strict] and [Decoder.Entity] fields' documentation. +var HTMLAutoClose []string = htmlAutoClose + +var htmlAutoClose = []string{ + /* + hget http://www.w3.org/TR/html4/loose.dtd | + 9 sed -n 's/<!ELEMENT ([^ ]*) +- O EMPTY.+/ "\1",/p' | tr A-Z a-z + */ + "basefont", + "br", + "area", + "link", + "img", + "param", + "hr", + "input", + "col", + "frame", + "isindex", + "base", + "meta", +} + +var ( + escQuot = []byte(""") // shorter than """ + escApos = []byte("'") // shorter than "'" + escAmp = []byte("&") + escLT = []byte("<") + escGT = []byte(">") + escTab = []byte("	") + escNL = []byte("
") + escCR = []byte("
") + escFFFD = []byte("\uFFFD") // Unicode replacement character +) + +// EscapeText writes to w the properly escaped XML equivalent +// of the plain text data s. +func EscapeText(w io.Writer, s []byte) error { + return escapeText(w, s, true) +} + +// escapeText writes to w the properly escaped XML equivalent +// of the plain text data s. If escapeNewline is true, newline +// characters will be escaped. +func escapeText(w io.Writer, s []byte, escapeNewline bool) error { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRune(s[i:]) + i += width + switch r { + case '"': + esc = escQuot + case '\'': + esc = escApos + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + case '\t': + esc = escTab + case '\n': + if !escapeNewline { + continue + } + esc = escNL + case '\r': + esc = escCR + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = escFFFD + break + } + continue + } + if _, err := w.Write(s[last : i-width]); err != nil { + return err + } + if _, err := w.Write(esc); err != nil { + return err + } + last = i + } + _, err := w.Write(s[last:]) + return err +} + +// EscapeString writes to p the properly escaped XML equivalent +// of the plain text data s. +func (p *printer) EscapeString(s string) { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRuneInString(s[i:]) + i += width + switch r { + case '"': + esc = escQuot + case '\'': + esc = escApos + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + case '\t': + esc = escTab + case '\n': + esc = escNL + case '\r': + esc = escCR + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = escFFFD + break + } + continue + } + p.WriteString(s[last : i-width]) + p.Write(esc) + last = i + } + p.WriteString(s[last:]) +} + +// Escape is like [EscapeText] but omits the error return value. +// It is provided for backwards compatibility with Go 1.0. +// Code targeting Go 1.1 or later should use [EscapeText]. +func Escape(w io.Writer, s []byte) { + EscapeText(w, s) +} + +var ( + cdataStart = []byte("<![CDATA[") + cdataEnd = []byte("]]>") + cdataEscape = []byte("]]]]><![CDATA[>") +) + +// emitCDATA writes to w the CDATA-wrapped plain text data s. +// It escapes CDATA directives nested in s. +func emitCDATA(w io.Writer, s []byte) error { + if len(s) == 0 { + return nil + } + if _, err := w.Write(cdataStart); err != nil { + return err + } + + for { + before, after, ok := bytes.Cut(s, cdataEnd) + if !ok { + break + } + // Found a nested CDATA directive end. + if _, err := w.Write(before); err != nil { + return err + } + if _, err := w.Write(cdataEscape); err != nil { + return err + } + s = after + } + + if _, err := w.Write(s); err != nil { + return err + } + + _, err := w.Write(cdataEnd) + return err +} + +// procInst parses the `param="..."` or `param='...'` +// value out of the provided string, returning "" if not found. +func procInst(param, s string) string { + // TODO: this parsing is somewhat lame and not exact. + // It works for all actual cases, though. + param = param + "=" + _, v, _ := strings.Cut(s, param) + if v == "" { + return "" + } + if v[0] != '\'' && v[0] != '"' { + return "" + } + unquote, _, ok := strings.Cut(v[1:], v[:1]) + if !ok { + return "" + } + return unquote +} diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go new file mode 100644 index 0000000..42f5f5f --- /dev/null +++ b/src/encoding/xml/xml_test.go @@ -0,0 +1,1479 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "fmt" + "io" + "reflect" + "strings" + "testing" + "unicode/utf8" +) + +type toks struct { + earlyEOF bool + t []Token +} + +func (t *toks) Token() (Token, error) { + if len(t.t) == 0 { + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + if t.earlyEOF && len(t.t) == 0 { + return tok, io.EOF + } + return tok, nil +} + +func TestDecodeEOF(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + tests := []struct { + name string + tokens []Token + ok bool + }{ + { + name: "OK", + tokens: []Token{ + start, + start.End(), + }, + ok: true, + }, + { + name: "Malformed", + tokens: []Token{ + start, + StartElement{Name: Name{Local: "bad"}}, + start.End(), + }, + ok: false, + }, + } + for _, tc := range tests { + for _, eof := range []bool{true, false} { + name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) + t.Run(name, func(t *testing.T) { + d := NewTokenDecoder(&toks{ + earlyEOF: eof, + t: tc.tokens, + }) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if tc.ok && err != nil { + t.Fatalf("d.Decode: expected nil error, got %v", err) + } + if _, ok := err.(*SyntaxError); !tc.ok && !ok { + t.Errorf("d.Decode: expected syntax error, got %v", err) + } + }) + } + } +} + +type toksNil struct { + returnEOF bool + t []Token +} + +func (t *toksNil) Token() (Token, error) { + if len(t.t) == 0 { + if !t.returnEOF { + // Return nil, nil before returning an EOF. It's legal, but + // discouraged. + t.returnEOF = true + return nil, nil + } + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + return tok, nil +} + +func TestDecodeNilToken(t *testing.T) { + for _, strict := range []bool{true, false} { + name := fmt.Sprintf("Strict=%v", strict) + t.Run(name, func(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + bad := StartElement{Name: Name{Local: "bad"}} + d := NewTokenDecoder(&toksNil{ + // Malformed + t: []Token{start, bad, start.End()}, + }) + d.Strict = strict + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("d.Decode: expected syntax error, got %v", err) + } + }) + } +} + +const testInput = ` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + + "\r\n\t" + ` > + <hello lang="en">World <>'" 白鵬翔</hello> + <query>&何; &is-it;</query> + <goodbye /> + <outer foo:attr="value" xmlns:tag="ns4"> + <inner/> + </outer> + <tag:name> + <![CDATA[Some text here.]]> + </tag:name> +</body><!-- missing final newline -->` + +var testEntity = map[string]string{"何": "What", "is-it": "is it?"} + +var rawTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"", "hello"}}, + CharData("\n "), + StartElement{Name{"", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"", "query"}}, + CharData("\n "), + StartElement{Name{"", "goodbye"}, []Attr{}}, + EndElement{Name{"", "goodbye"}}, + CharData("\n "), + StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"", "inner"}, []Attr{}}, + EndElement{Name{"", "inner"}}, + CharData("\n "), + EndElement{Name{"", "outer"}}, + CharData("\n "), + StartElement{Name{"tag", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"tag", "name"}}, + CharData("\n"), + EndElement{Name{"", "body"}}, + Comment(" missing final newline "), +} + +var cookedTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"ns2", "hello"}}, + CharData("\n "), + StartElement{Name{"ns2", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"ns2", "query"}}, + CharData("\n "), + StartElement{Name{"ns2", "goodbye"}, []Attr{}}, + EndElement{Name{"ns2", "goodbye"}}, + CharData("\n "), + StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"ns2", "inner"}, []Attr{}}, + EndElement{Name{"ns2", "inner"}}, + CharData("\n "), + EndElement{Name{"ns2", "outer"}}, + CharData("\n "), + StartElement{Name{"ns3", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"ns3", "name"}}, + CharData("\n"), + EndElement{Name{"ns2", "body"}}, + Comment(" missing final newline "), +} + +const testInputAltEncoding = ` +<?xml version="1.0" encoding="x-testing-uppercase"?> +<TAG>VALUE</TAG>` + +var rawTokensAltEncoding = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("value"), + EndElement{Name{"", "tag"}}, +} + +var xmlInput = []string{ + // unexpected EOF cases + "<", + "<t", + "<t ", + "<t/", + "<!", + "<!-", + "<!--", + "<!--c-", + "<!--c--", + "<!d", + "<t></", + "<t></t", + "<?", + "<?p", + "<t a", + "<t a=", + "<t a='", + "<t a=''", + "<t/><![", + "<t/><![C", + "<t/><![CDATA[d", + "<t/><![CDATA[d]", + "<t/><![CDATA[d]]", + + // other Syntax errors + "<>", + "<t/a", + "<0 />", + "<?0 >", + // "<!0 >", // let the Token() caller handle + "</0>", + "<t 0=''>", + "<t a='&'>", + "<t a='<'>", + "<t> c;</t>", + "<t a>", + "<t a=>", + "<t a=v>", + // "<![CDATA[d]]>", // let the Token() caller handle + "<t></e>", + "<t></>", + "<t></t!", + "<t>cdata]]></t>", +} + +func TestRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + testRawToken(t, d, testInput, rawTokens) +} + +const nonStrictInput = ` +<tag>non&entity</tag> +<tag>&unknown;entity</tag> +<tag>{</tag> +<tag>&#zzz;</tag> +<tag>&なまえ3;</tag> +<tag><-gt;</tag> +<tag>&;</tag> +<tag>&0a;</tag> +` + +var nonStrictTokens = []Token{ + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("non&entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&unknown;entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("{"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&#zzz;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&なまえ3;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("<-gt;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&0a;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), +} + +func TestNonStrictRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(nonStrictInput)) + d.Strict = false + testRawToken(t, d, nonStrictInput, nonStrictTokens) +} + +type downCaser struct { + t *testing.T + r io.ByteReader +} + +func (d *downCaser) ReadByte() (c byte, err error) { + c, err = d.r.ReadByte() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + return +} + +func (d *downCaser) Read(p []byte) (int, error) { + d.t.Fatalf("unexpected Read call on downCaser reader") + panic("unreachable") +} + +func TestRawTokenAltEncoding(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + if charset != "x-testing-uppercase" { + t.Fatalf("unexpected charset %q", charset) + } + return &downCaser{t, input.(io.ByteReader)}, nil + } + testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) +} + +func TestRawTokenAltEncodingNoConverter(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + token, err := d.RawToken() + if token == nil { + t.Fatalf("expected a token on first RawToken call") + } + if err != nil { + t.Fatal(err) + } + token, err = d.RawToken() + if token != nil { + t.Errorf("expected a nil token; got %#v", token) + } + if err == nil { + t.Fatalf("expected an error on second RawToken call") + } + const encoding = "x-testing-uppercase" + if !strings.Contains(err.Error(), encoding) { + t.Errorf("expected error to contain %q; got error: %v", + encoding, err) + } +} + +func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { + lastEnd := int64(0) + for i, want := range rawTokens { + start := d.InputOffset() + have, err := d.RawToken() + end := d.InputOffset() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + var shave, swant string + if _, ok := have.(CharData); ok { + shave = fmt.Sprintf("CharData(%q)", have) + } else { + shave = fmt.Sprintf("%#v", have) + } + if _, ok := want.(CharData); ok { + swant = fmt.Sprintf("CharData(%q)", want) + } else { + swant = fmt.Sprintf("%#v", want) + } + t.Errorf("token %d = %s, want %s", i, shave, swant) + } + + // Check that InputOffset returned actual token. + switch { + case start < lastEnd: + t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) + case start >= end: + // Special case: EndElement can be synthesized. + if start == end && end == lastEnd { + break + } + t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) + case end > int64(len(raw)): + t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) + default: + text := raw[start:end] + if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { + t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) + } + } + lastEnd = end + } +} + +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY xlt ">">]> +<!DOCTYPE [<!ENTITY xlt "<">]> +<!DOCTYPE [<!ENTITY xlt '>'>]> +<!DOCTYPE [<!ENTITY xlt '<'>]> +<!DOCTYPE [<!ENTITY xlt '">'>]> +<!DOCTYPE [<!ENTITY xlt "'<">]> +` + +var nestedDirectivesTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt ">">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt "<">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), + CharData("\n"), +} + +func TestNestedDirectives(t *testing.T) { + d := NewDecoder(strings.NewReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + + for i, want := range cookedTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + d := NewDecoder(strings.NewReader(xmlInput[i])) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} + +func TestInputLinePos(t *testing.T) { + testInput := `<root> +<?pi + ?> <elt +att += +"val"> +<![CDATA[ +]]><!-- + +--></elt> +</root>` + linePos := [][]int{ + {1, 7}, + {2, 1}, + {3, 4}, + {3, 6}, + {6, 7}, + {7, 1}, + {8, 4}, + {10, 4}, + {10, 10}, + {11, 1}, + {11, 8}, + } + dec := NewDecoder(strings.NewReader(testInput)) + for _, want := range linePos { + if _, err := dec.Token(); err != nil { + t.Errorf("Unexpected error: %v", err) + continue + } + + gotLine, gotCol := dec.InputPos() + if gotLine != want[0] || gotCol != want[1] { + t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) + } + } +} + +type allScalars struct { + True1 bool + True2 bool + False1 bool + False2 bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint int + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + String string + PtrString *string +} + +var all = allScalars{ + True1: true, + True2: true, + False1: false, + False2: false, + Int: 1, + Int8: -2, + Int16: 3, + Int32: -4, + Int64: 5, + Uint: 6, + Uint8: 7, + Uint16: 8, + Uint32: 9, + Uint64: 10, + Uintptr: 11, + Float32: 13.0, + Float64: 14.0, + String: "15", + PtrString: &sixteen, +} + +var sixteen = "16" + +const testScalarsInput = `<allscalars> + <True1>true</True1> + <True2>1</True2> + <False1>false</False1> + <False2>0</False2> + <Int>1</Int> + <Int8>-2</Int8> + <Int16>3</Int16> + <Int32>-4</Int32> + <Int64>5</Int64> + <Uint>6</Uint> + <Uint8>7</Uint8> + <Uint16>8</Uint16> + <Uint32>9</Uint32> + <Uint64>10</Uint64> + <Uintptr>11</Uintptr> + <Float>12.0</Float> + <Float32>13.0</Float32> + <Float64>14.0</Float64> + <String>15</String> + <PtrString>16</PtrString> +</allscalars>` + +func TestAllScalars(t *testing.T) { + var a allScalars + err := Unmarshal([]byte(testScalarsInput), &a) + + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, all) { + t.Errorf("have %+v want %+v", a, all) + } +} + +type item struct { + FieldA string +} + +func TestIssue569(t *testing.T) { + data := `<item><FieldA>abcd</FieldA></item>` + var i item + err := Unmarshal([]byte(data), &i) + + if err != nil || i.FieldA != "abcd" { + t.Fatal("Expecting abcd") + } +} + +func TestUnquotedAttrs(t *testing.T) { + data := "<tag attr=azAZ09:-_\t>" + d := NewDecoder(strings.NewReader(data)) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != "tag" { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != "azAZ09:-_" { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != "attr" { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } +} + +func TestValuelessAttrs(t *testing.T) { + tests := [][3]string{ + {"<p nowrap>", "p", "nowrap"}, + {"<p nowrap >", "p", "nowrap"}, + {"<input checked/>", "input", "checked"}, + {"<input checked />", "input", "checked"}, + } + for _, test := range tests { + d := NewDecoder(strings.NewReader(test[0])) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if tok1.(StartElement).Attr[0].Value != "en" { + t.Error("CopyToken overwrote Attr[0]") + } + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenComment(t *testing.T) { + data := []byte("<!-- some comment -->") + var tok1 Token = Comment(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) != Comment") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "<P>Foo<P>\n\n<P>Bar</>\n" + d := NewDecoder(strings.NewReader(testInput)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := `<FOO></FOO> ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { + } + if err != io.EOF { + t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := `<FOO></FOO> ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `<test><![CDATA[ &val=foo ]]></test>` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +var characterTests = []struct { + in string + err string +}{ + {"\x12<doc/>", "illegal character code U+0012"}, + {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, + {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, + {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, + {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, + {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, + {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, + {"<doc>&hello;</doc>", "invalid character entity &hello;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + d := NewDecoder(strings.NewReader(tt.in)) + var err error + + for err == nil { + _, err = d.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) + } + } +} + +func TestIsInCharacterRange(t *testing.T) { + invalid := []rune{ + utf8.MaxRune + 1, + 0xD800, // surrogate min + 0xDFFF, // surrogate max + -1, + } + for _, r := range invalid { + if isInCharacterRange(r) { + t.Errorf("rune %U considered valid", r) + } + } +} + +var procInstTests = []struct { + input string + expect [2]string +}{ + {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, + {`encoding="FOO" `, [2]string{"", "FOO"}}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + if got := procInst("version", test.input); got != test.expect[0] { + t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) + } + if got := procInst("encoding", test.input); got != test.expect[1] { + t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) + } + } +} + +// Ensure that directives with comments include the complete +// text of any nested directives. + +var directivesWithCommentsInput = ` +<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> +<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> +` + +var directivesWithCommentsTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), + CharData("\n"), + Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), + CharData("\n"), +} + +func TestDirectivesWithComments(t *testing.T) { + d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) + + for i, want := range directivesWithCommentsTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Writer whose Write method always returns an error. +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } + +func TestEscapeTextIOErrors(t *testing.T) { + expectErr := "unwritable" + err := EscapeText(errWriter{}, []byte{'A'}) + + if err == nil || err.Error() != expectErr { + t.Errorf("have %v, want %v", err, expectErr) + } +} + +func TestEscapeTextInvalidChar(t *testing.T) { + input := []byte("A \x00 terminated string.") + expected := "A \uFFFD terminated string." + + buff := new(strings.Builder) + if err := EscapeText(buff, input); err != nil { + t.Fatalf("have %v, want nil", err) + } + text := buff.String() + + if text != expected { + t.Errorf("have %v, want %v", text, expected) + } +} + +func TestIssue5880(t *testing.T) { + type T []byte + data, err := Marshal(T{192, 168, 0, 1}) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + if !utf8.Valid(data) { + t.Errorf("Marshal generated invalid UTF-8: %x", data) + } +} + +func TestIssue8535(t *testing.T) { + + type ExampleConflict struct { + XMLName Name `xml:"example"` + Link string `xml:"link"` + AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space + } + testCase := `<example> + <title>Example</title> + <link>http://example.com/default</link> <!-- not assigned --> + <link>http://example.com/home</link> <!-- not assigned --> + <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link> + </example>` + + var dest ExampleConflict + d := NewDecoder(strings.NewReader(testCase)) + if err := d.Decode(&dest); err != nil { + t.Fatal(err) + } +} + +func TestEncodeXMLNS(t *testing.T) { + testCases := []struct { + f func() ([]byte, error) + want string + ok bool + }{ + {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, + {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true}, + {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, + {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false}, + } + + for i, tc := range testCases { + if b, err := tc.f(); err == nil { + if got, want := string(b), tc.want; got != want { + t.Errorf("%d: got %s, want %s \n", i, got, want) + } + } else { + t.Errorf("%d: marshal failed with %s", i, err) + } + } +} + +func encodeXMLNS1() ([]byte, error) { + + type T struct { + XMLName Name `xml:"Test"` + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &T{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS2() ([]byte, error) { + + type Test struct { + Body string `xml:"http://example.com/ns body"` + } + + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS3() ([]byte, error) { + + type Test struct { + XMLName Name `xml:"http://example.com/ns Test"` + Body string + } + + //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing + // as documentation states + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS4() ([]byte, error) { + + type Test struct { + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &Test{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func TestIssue11405(t *testing.T) { + testCases := []string{ + "<root>", + "<root><foo>", + "<root><foo></foo>", + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) + } + } +} + +func TestIssue12417(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) + } + } +} + +func TestIssue7113(t *testing.T) { + type C struct { + XMLName Name `xml:""` // Sets empty namespace + } + + type D struct { + XMLName Name `xml:"d"` + } + + type A struct { + XMLName Name `xml:""` + C C `xml:""` + D D + } + + var a A + structSpace := "b" + xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>` + t.Log(xmlTest) + err := Unmarshal([]byte(xmlTest), &a) + if err != nil { + t.Fatal(err) + } + + if a.XMLName.Space != structSpace { + t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(a.C.XMLName.Space) != 0 { + t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space) + } + + var b []byte + b, err = Marshal(&a) + if err != nil { + t.Fatal(err) + } + if len(a.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) + } + if string(b) != xmlTest { + t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest) + } + var c A + err = Unmarshal(b, &c) + if err != nil { + t.Fatalf("second Unmarshal failed: %s", err) + } + if c.XMLName.Space != "b" { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(c.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) + } +} + +func TestIssue20396(t *testing.T) { + + var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") + + testCases := []struct { + s string + wantErr error + }{ + {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396 + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`<a:te=st xmlns:a="abcd"/>`, attrError}, + {`<a:te&st xmlns:a="abcd"/>`, attrError}, + {`<a:test xmlns:a="abcd"/>`, nil}, + {`<a:te:st xmlns:a="abcd">1</a:te:st>`, + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError}, + {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError}, + {`<a:test xmlns:a="abcd">1</a:test>`, nil}, + } + + var dest string + for _, tc := range testCases { + if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { + if got == nil { + t.Errorf("%s: Unexpected success, want %v", tc.s, want) + } else if want == nil { + t.Errorf("%s: Unexpected error, got %v", tc.s, got) + } else if got.Error() != want.Error() { + t.Errorf("%s: got %v, want %v", tc.s, got, want) + } + } + } +} + +func TestIssue20685(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false}, + {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true}, + {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false}, + {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false}, + {`<x:book xmlns:x="abcd">one</y:book>`, false}, + {`<x:book>one</y:book>`, false}, + {`<xbook>one</ybook>`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) + } + } +} + +func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { + return func(src TokenReader) TokenReader { + return mapper{ + t: src, + f: mapping, + } + } +} + +type mapper struct { + t TokenReader + f func(Token) Token +} + +func (m mapper) Token() (Token, error) { + tok, err := m.t.Token() + if err != nil { + return nil, err + } + return m.f(tok), nil +} + +func TestNewTokenDecoderIdempotent(t *testing.T) { + d := NewDecoder(strings.NewReader(`<br>`)) + d2 := NewTokenDecoder(d) + if d != d2 { + t.Error("NewTokenDecoder did not detect underlying Decoder") + } +} + +func TestWrapDecoder(t *testing.T) { + d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) + m := tokenMap(func(t Token) Token { + switch tok := t.(type) { + case StartElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + case EndElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + } + return t + }) + + d = NewTokenDecoder(m(d)) + + o := struct { + XMLName Name `xml:"blocking"` + Chardata string `xml:",chardata"` + }{} + + if err := d.Decode(&o); err != nil { + t.Fatal("Got unexpected error while decoding:", err) + } + + if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { + t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) + } +} + +type tokReader struct{} + +func (tokReader) Token() (Token, error) { + return StartElement{}, nil +} + +type Failure struct{} + +func (Failure) UnmarshalXML(*Decoder, StartElement) error { + return nil +} + +func TestTokenUnmarshaler(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Error("Unexpected panic using custom token unmarshaler") + } + }() + + d := NewTokenDecoder(tokReader{}) + d.Decode(&Failure{}) +} + +func testRoundTrip(t *testing.T, input string) { + d := NewDecoder(strings.NewReader(input)) + var tokens []Token + var buf bytes.Buffer + e := NewEncoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("invalid input: %v", err) + } + if err := e.EncodeToken(tok); err != nil { + t.Fatalf("failed to re-encode input: %v", err) + } + tokens = append(tokens, CopyToken(tok)) + } + if err := e.Flush(); err != nil { + t.Fatal(err) + } + + d = NewDecoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("failed to decode output: %v", err) + } + if len(tokens) == 0 { + t.Fatalf("unexpected token: %#v", tok) + } + a, b := tokens[0], tok + if !reflect.DeepEqual(a, b) { + t.Fatalf("token mismatch: %#v vs %#v", a, b) + } + tokens = tokens[1:] + } + if len(tokens) > 0 { + t.Fatalf("lost tokens: %#v", tokens) + } +} + +func TestRoundTrip(t *testing.T) { + tests := map[string]string{ + "trailing colon": `<foo abc:="x"></foo>`, + "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, + } + for name, input := range tests { + t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) + } +} + +func TestParseErrors(t *testing.T) { + withDefaultHeader := func(s string) string { + return `<?xml version="1.0" encoding="UTF-8"?>` + s + } + tests := []struct { + src string + err string + }{ + {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, + {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, + {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, + {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, + {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, + {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, + {withDefaultHeader("\xf1"), `invalid UTF-8`}, + + // Header-related errors. + {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, + + // Cases below are for "no errors". + {withDefaultHeader(`<?ok?>`), ``}, + {withDefaultHeader(`<?ok version="ok"?>`), ``}, + } + + for _, test := range tests { + d := NewDecoder(strings.NewReader(test.src)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if test.err == "" { + if err != io.EOF { + t.Errorf("parse %s: have %q error, expected none", test.src, err) + } + continue + } + // Inv: err != nil + if err == io.EOF { + t.Errorf("parse %s: unexpected EOF", test.src) + continue + } + if !strings.Contains(err.Error(), test.err) { + t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err) + continue + } + } +} + +const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> +<br> +<br/><br/> +<br><br> +<br></br> +<BR> +<BR/><BR/> +<Br></Br> +<BR><span id="test">abc</span><br/><br/>` + +func BenchmarkHTMLAutoClose(b *testing.B) { + b.RunParallel(func(p *testing.PB) { + for p.Next() { + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + b.Fatalf("unexpected error: %v", err) + } + } + } + }) +} + +func TestHTMLAutoClose(t *testing.T) { + wantTokens := []Token{ + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "Br"}, []Attr{}}, + EndElement{Name{"", "Br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, + CharData("abc"), + EndElement{Name{"", "span"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + } + + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + var haveTokens []Token + for { + tok, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + t.Fatalf("unexpected error: %v", err) + } + haveTokens = append(haveTokens, CopyToken(tok)) + } + if len(haveTokens) != len(wantTokens) { + t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) + } + for i, want := range wantTokens { + if i >= len(haveTokens) { + t.Errorf("token[%d] expected %#v, have no token", i, want) + } else { + have := haveTokens[i] + if !reflect.DeepEqual(have, want) { + t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) + } + } + } +} |