diff options
Diffstat (limited to 'src/encoding/json')
22 files changed, 10125 insertions, 0 deletions
diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go new file mode 100644 index 0000000..d3af0dc --- /dev/null +++ b/src/encoding/json/bench_test.go @@ -0,0 +1,541 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Large data benchmark. +// The JSON data is a summary of agl's changes in the +// go, webkit, and chromium open source projects. +// We benchmark converting between the JSON form +// and in-memory data structures. + +package json + +import ( + "bytes" + "compress/gzip" + "fmt" + "internal/testenv" + "io" + "os" + "reflect" + "regexp" + "runtime" + "strings" + "sync" + "testing" +) + +type codeResponse struct { + Tree *codeNode `json:"tree"` + Username string `json:"username"` +} + +type codeNode struct { + Name string `json:"name"` + Kids []*codeNode `json:"kids"` + CLWeight float64 `json:"cl_weight"` + Touches int `json:"touches"` + MinT int64 `json:"min_t"` + MaxT int64 `json:"max_t"` + MeanT int64 `json:"mean_t"` +} + +var codeJSON []byte +var codeStruct codeResponse + +func codeInit() { + f, err := os.Open("testdata/code.json.gz") + if err != nil { + panic(err) + } + defer f.Close() + gz, err := gzip.NewReader(f) + if err != nil { + panic(err) + } + data, err := io.ReadAll(gz) + if err != nil { + panic(err) + } + + codeJSON = data + + if err := Unmarshal(codeJSON, &codeStruct); err != nil { + panic("unmarshal code.json: " + err.Error()) + } + + if data, err = Marshal(&codeStruct); err != nil { + panic("marshal code.json: " + err.Error()) + } + + if !bytes.Equal(data, codeJSON) { + println("different lengths", len(data), len(codeJSON)) + for i := 0; i < len(data) && i < len(codeJSON); i++ { + if data[i] != codeJSON[i] { + println("re-marshal: changed at byte", i) + println("orig: ", string(codeJSON[i-10:i+10])) + println("new: ", string(data[i-10:i+10])) + break + } + } + panic("re-marshal code.json: different result") + } +} + +func BenchmarkCodeEncoder(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + for pb.Next() { + if err := enc.Encode(&codeStruct); err != nil { + b.Fatal("Encode:", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeEncoderError(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + for pb.Next() { + if err := enc.Encode(&codeStruct); err != nil { + b.Fatal("Encode:", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("expect an error here") + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeMarshal(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&codeStruct); err != nil { + b.Fatal("Marshal:", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeMarshalError(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&codeStruct); err != nil { + b.Fatal("Marshal:", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("expect an error here") + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func benchMarshalBytes(n int) func(*testing.B) { + sample := []byte("hello world") + // Use a struct pointer, to avoid an allocation when passing it as an + // interface parameter to Marshal. + v := &struct { + Bytes []byte + }{ + bytes.Repeat(sample, (n/len(sample))+1)[:n], + } + return func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := Marshal(v); err != nil { + b.Fatal("Marshal:", err) + } + } + } +} + +func benchMarshalBytesError(n int) func(*testing.B) { + sample := []byte("hello world") + // Use a struct pointer, to avoid an allocation when passing it as an + // interface parameter to Marshal. + v := &struct { + Bytes []byte + }{ + bytes.Repeat(sample, (n/len(sample))+1)[:n], + } + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + return func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := Marshal(v); err != nil { + b.Fatal("Marshal:", err) + } + if _, err := Marshal(dummy); err == nil { + b.Fatal("expect an error here") + } + } + } +} + +func BenchmarkMarshalBytes(b *testing.B) { + b.ReportAllocs() + // 32 fits within encodeState.scratch. + b.Run("32", benchMarshalBytes(32)) + // 256 doesn't fit in encodeState.scratch, but is small enough to + // allocate and avoid the slower base64.NewEncoder. + b.Run("256", benchMarshalBytes(256)) + // 4096 is large enough that we want to avoid allocating for it. + b.Run("4096", benchMarshalBytes(4096)) +} + +func BenchmarkMarshalBytesError(b *testing.B) { + b.ReportAllocs() + // 32 fits within encodeState.scratch. + b.Run("32", benchMarshalBytesError(32)) + // 256 doesn't fit in encodeState.scratch, but is small enough to + // allocate and avoid the slower base64.NewEncoder. + b.Run("256", benchMarshalBytesError(256)) + // 4096 is large enough that we want to avoid allocating for it. + b.Run("4096", benchMarshalBytesError(4096)) +} + +func BenchmarkCodeDecoder(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + var buf bytes.Buffer + dec := NewDecoder(&buf) + var r codeResponse + for pb.Next() { + buf.Write(codeJSON) + // hide EOF + buf.WriteByte('\n') + buf.WriteByte('\n') + buf.WriteByte('\n') + if err := dec.Decode(&r); err != nil { + b.Fatal("Decode:", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkUnicodeDecoder(b *testing.B) { + b.ReportAllocs() + j := []byte(`"\uD83D\uDE01"`) + b.SetBytes(int64(len(j))) + r := bytes.NewReader(j) + dec := NewDecoder(r) + var out string + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := dec.Decode(&out); err != nil { + b.Fatal("Decode:", err) + } + r.Seek(0, 0) + } +} + +func BenchmarkDecoderStream(b *testing.B) { + b.ReportAllocs() + b.StopTimer() + var buf bytes.Buffer + dec := NewDecoder(&buf) + buf.WriteString(`"` + strings.Repeat("x", 1000000) + `"` + "\n\n\n") + var x any + if err := dec.Decode(&x); err != nil { + b.Fatal("Decode:", err) + } + ones := strings.Repeat(" 1\n", 300000) + "\n\n\n" + b.StartTimer() + for i := 0; i < b.N; i++ { + if i%300000 == 0 { + buf.WriteString(ones) + } + x = nil + if err := dec.Decode(&x); err != nil || x != 1.0 { + b.Fatalf("Decode: %v after %d", err, i) + } + } +} + +func BenchmarkCodeUnmarshal(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + var r codeResponse + if err := Unmarshal(codeJSON, &r); err != nil { + b.Fatal("Unmarshal:", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkCodeUnmarshalReuse(b *testing.B) { + b.ReportAllocs() + if codeJSON == nil { + b.StopTimer() + codeInit() + b.StartTimer() + } + b.RunParallel(func(pb *testing.PB) { + var r codeResponse + for pb.Next() { + if err := Unmarshal(codeJSON, &r); err != nil { + b.Fatal("Unmarshal:", err) + } + } + }) + b.SetBytes(int64(len(codeJSON))) +} + +func BenchmarkUnmarshalString(b *testing.B) { + b.ReportAllocs() + data := []byte(`"hello, world"`) + b.RunParallel(func(pb *testing.PB) { + var s string + for pb.Next() { + if err := Unmarshal(data, &s); err != nil { + b.Fatal("Unmarshal:", err) + } + } + }) +} + +func BenchmarkUnmarshalFloat64(b *testing.B) { + b.ReportAllocs() + data := []byte(`3.14`) + b.RunParallel(func(pb *testing.PB) { + var f float64 + for pb.Next() { + if err := Unmarshal(data, &f); err != nil { + b.Fatal("Unmarshal:", err) + } + } + }) +} + +func BenchmarkUnmarshalInt64(b *testing.B) { + b.ReportAllocs() + data := []byte(`3`) + b.RunParallel(func(pb *testing.PB) { + var x int64 + for pb.Next() { + if err := Unmarshal(data, &x); err != nil { + b.Fatal("Unmarshal:", err) + } + } + }) +} + +func BenchmarkIssue10335(b *testing.B) { + b.ReportAllocs() + j := []byte(`{"a":{ }}`) + b.RunParallel(func(pb *testing.PB) { + var s struct{} + for pb.Next() { + if err := Unmarshal(j, &s); err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkIssue34127(b *testing.B) { + b.ReportAllocs() + j := struct { + Bar string `json:"bar,string"` + }{ + Bar: `foobar`, + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if _, err := Marshal(&j); err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkUnmapped(b *testing.B) { + b.ReportAllocs() + j := []byte(`{"s": "hello", "y": 2, "o": {"x": 0}, "a": [1, 99, {"x": 1}]}`) + b.RunParallel(func(pb *testing.PB) { + var s struct{} + for pb.Next() { + if err := Unmarshal(j, &s); err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkTypeFieldsCache(b *testing.B) { + b.ReportAllocs() + var maxTypes int = 1e6 + if testenv.Builder() != "" { + maxTypes = 1e3 // restrict cache sizes on builders + } + + // Dynamically generate many new types. + types := make([]reflect.Type, maxTypes) + fs := []reflect.StructField{{ + Type: reflect.TypeOf(""), + Index: []int{0}, + }} + for i := range types { + fs[0].Name = fmt.Sprintf("TypeFieldsCache%d", i) + types[i] = reflect.StructOf(fs) + } + + // clearClear clears the cache. Other JSON operations, must not be running. + clearCache := func() { + fieldCache = sync.Map{} + } + + // MissTypes tests the performance of repeated cache misses. + // This measures the time to rebuild a cache of size nt. + for nt := 1; nt <= maxTypes; nt *= 10 { + ts := types[:nt] + b.Run(fmt.Sprintf("MissTypes%d", nt), func(b *testing.B) { + nc := runtime.GOMAXPROCS(0) + for i := 0; i < b.N; i++ { + clearCache() + var wg sync.WaitGroup + for j := 0; j < nc; j++ { + wg.Add(1) + go func(j int) { + for _, t := range ts[(j*len(ts))/nc : ((j+1)*len(ts))/nc] { + cachedTypeFields(t) + } + wg.Done() + }(j) + } + wg.Wait() + } + }) + } + + // HitTypes tests the performance of repeated cache hits. + // This measures the average time of each cache lookup. + for nt := 1; nt <= maxTypes; nt *= 10 { + // Pre-warm a cache of size nt. + clearCache() + for _, t := range types[:nt] { + cachedTypeFields(t) + } + b.Run(fmt.Sprintf("HitTypes%d", nt), func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + cachedTypeFields(types[0]) + } + }) + }) + } +} + +func BenchmarkEncodeMarshaler(b *testing.B) { + b.ReportAllocs() + + m := struct { + A int + B RawMessage + }{} + + b.RunParallel(func(pb *testing.PB) { + enc := NewEncoder(io.Discard) + + for pb.Next() { + if err := enc.Encode(&m); err != nil { + b.Fatal("Encode:", err) + } + } + }) +} + +func BenchmarkEncoderEncode(b *testing.B) { + b.ReportAllocs() + type T struct { + X, Y string + } + v := &T{"foo", "bar"} + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if err := NewEncoder(io.Discard).Encode(v); err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkNumberIsValid(b *testing.B) { + s := "-61657.61667E+61673" + for i := 0; i < b.N; i++ { + isValidNumber(s) + } +} + +func BenchmarkNumberIsValidRegexp(b *testing.B) { + var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) + s := "-61657.61667E+61673" + for i := 0; i < b.N; i++ { + jsonNumberRegexp.MatchString(s) + } +} diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go new file mode 100644 index 0000000..53470d8 --- /dev/null +++ b/src/encoding/json/decode.go @@ -0,0 +1,1291 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Represents JSON data structure using native Go types: booleans, floats, +// strings, arrays, and maps. + +package json + +import ( + "encoding" + "encoding/base64" + "fmt" + "reflect" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" +) + +// Unmarshal parses the JSON-encoded data and stores the result +// in the value pointed to by v. If v is nil or not a pointer, +// Unmarshal returns an InvalidUnmarshalError. +// +// Unmarshal uses the inverse of the encodings that +// Marshal uses, allocating maps, slices, and pointers as necessary, +// with the following additional rules: +// +// To unmarshal JSON into a pointer, Unmarshal first handles the case of +// the JSON being the JSON literal null. In that case, Unmarshal sets +// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into +// the value pointed at by the pointer. If the pointer is nil, Unmarshal +// allocates a new value for it to point to. +// +// To unmarshal JSON into a value implementing the Unmarshaler interface, +// Unmarshal calls that value's UnmarshalJSON method, including +// when the input is a JSON null. +// Otherwise, if the value implements encoding.TextUnmarshaler +// and the input is a JSON quoted string, Unmarshal calls that value's +// UnmarshalText method with the unquoted form of the string. +// +// To unmarshal JSON into a struct, Unmarshal matches incoming object +// keys to the keys used by Marshal (either the struct field name or its tag), +// preferring an exact match but also accepting a case-insensitive match. By +// default, object keys which don't have a corresponding struct field are +// ignored (see Decoder.DisallowUnknownFields for an alternative). +// +// To unmarshal JSON into an interface value, +// Unmarshal stores one of these in the interface value: +// +// bool, for JSON booleans +// float64, for JSON numbers +// string, for JSON strings +// []interface{}, for JSON arrays +// map[string]interface{}, for JSON objects +// nil for JSON null +// +// To unmarshal a JSON array into a slice, Unmarshal resets the slice length +// to zero and then appends each element to the slice. +// As a special case, to unmarshal an empty JSON array into a slice, +// Unmarshal replaces the slice with a new empty slice. +// +// To unmarshal a JSON array into a Go array, Unmarshal decodes +// JSON array elements into corresponding Go array elements. +// If the Go array is smaller than the JSON array, +// the additional JSON array elements are discarded. +// If the JSON array is smaller than the Go array, +// the additional Go array elements are set to zero values. +// +// To unmarshal a JSON object into a map, Unmarshal first establishes a map to +// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal +// reuses the existing map, keeping existing entries. Unmarshal then stores +// key-value pairs from the JSON object into the map. The map's key type must +// either be any string type, an integer, implement json.Unmarshaler, or +// implement encoding.TextUnmarshaler. +// +// If the JSON-encoded data contain a syntax error, Unmarshal returns a SyntaxError. +// +// If a JSON value is not appropriate for a given target type, +// or if a JSON number overflows the target type, Unmarshal +// skips that field and completes the unmarshaling as best it can. +// If no more serious errors are encountered, Unmarshal returns +// an UnmarshalTypeError describing the earliest such error. In any +// case, it's not guaranteed that all the remaining fields following +// the problematic one will be unmarshaled into the target object. +// +// The JSON null value unmarshals into an interface, map, pointer, or slice +// by setting that Go value to nil. Because null is often used in JSON to mean +// “not present,” unmarshaling a JSON null into any other Go type has no effect +// on the value and produces no error. +// +// When unmarshaling quoted strings, invalid UTF-8 or +// invalid UTF-16 surrogate pairs are not treated as an error. +// Instead, they are replaced by the Unicode replacement +// character U+FFFD. +func Unmarshal(data []byte, v any) error { + // Check for well-formedness. + // Avoids filling out half a data structure + // before discovering a JSON syntax error. + var d decodeState + err := checkValid(data, &d.scan) + if err != nil { + return err + } + + d.init(data) + return d.unmarshal(v) +} + +// Unmarshaler is the interface implemented by types +// that can unmarshal a JSON description of themselves. +// The input can be assumed to be a valid encoding of +// a JSON value. UnmarshalJSON must copy the JSON data +// if it wishes to retain the data after returning. +// +// By convention, to approximate the behavior of Unmarshal itself, +// Unmarshalers implement UnmarshalJSON([]byte("null")) as a no-op. +type Unmarshaler interface { + UnmarshalJSON([]byte) error +} + +// An UnmarshalTypeError describes a JSON value that was +// not appropriate for a value of a specific Go type. +type UnmarshalTypeError struct { + Value string // description of JSON value - "bool", "array", "number -5" + Type reflect.Type // type of Go value it could not be assigned to + Offset int64 // error occurred after reading Offset bytes + Struct string // name of the struct type containing the field + Field string // the full path from root node to the field +} + +func (e *UnmarshalTypeError) Error() string { + if e.Struct != "" || e.Field != "" { + return "json: cannot unmarshal " + e.Value + " into Go struct field " + e.Struct + "." + e.Field + " of type " + e.Type.String() + } + return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String() +} + +// An UnmarshalFieldError describes a JSON object key that +// led to an unexported (and therefore unwritable) struct field. +// +// Deprecated: No longer used; kept for compatibility. +type UnmarshalFieldError struct { + Key string + Type reflect.Type + Field reflect.StructField +} + +func (e *UnmarshalFieldError) Error() string { + return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String() +} + +// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal. +// (The argument to Unmarshal must be a non-nil pointer.) +type InvalidUnmarshalError struct { + Type reflect.Type +} + +func (e *InvalidUnmarshalError) Error() string { + if e.Type == nil { + return "json: Unmarshal(nil)" + } + + if e.Type.Kind() != reflect.Pointer { + return "json: Unmarshal(non-pointer " + e.Type.String() + ")" + } + return "json: Unmarshal(nil " + e.Type.String() + ")" +} + +func (d *decodeState) unmarshal(v any) error { + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Pointer || rv.IsNil() { + return &InvalidUnmarshalError{reflect.TypeOf(v)} + } + + d.scan.reset() + d.scanWhile(scanSkipSpace) + // We decode rv not rv.Elem because the Unmarshaler interface + // test must be applied at the top level of the value. + err := d.value(rv) + if err != nil { + return d.addErrorContext(err) + } + return d.savedError +} + +// A Number represents a JSON number literal. +type Number string + +// String returns the literal text of the number. +func (n Number) String() string { return string(n) } + +// Float64 returns the number as a float64. +func (n Number) Float64() (float64, error) { + return strconv.ParseFloat(string(n), 64) +} + +// Int64 returns the number as an int64. +func (n Number) Int64() (int64, error) { + return strconv.ParseInt(string(n), 10, 64) +} + +// An errorContext provides context for type errors during decoding. +type errorContext struct { + Struct reflect.Type + FieldStack []string +} + +// decodeState represents the state while decoding a JSON value. +type decodeState struct { + data []byte + off int // next read offset in data + opcode int // last read result + scan scanner + errorContext *errorContext + savedError error + useNumber bool + disallowUnknownFields bool +} + +// readIndex returns the position of the last byte read. +func (d *decodeState) readIndex() int { + return d.off - 1 +} + +// phasePanicMsg is used as a panic message when we end up with something that +// shouldn't happen. It can indicate a bug in the JSON decoder, or that +// something is editing the data slice while the decoder executes. +const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" + +func (d *decodeState) init(data []byte) *decodeState { + d.data = data + d.off = 0 + d.savedError = nil + if d.errorContext != nil { + d.errorContext.Struct = nil + // Reuse the allocated space for the FieldStack slice. + d.errorContext.FieldStack = d.errorContext.FieldStack[:0] + } + return d +} + +// saveError saves the first err it is called with, +// for reporting at the end of the unmarshal. +func (d *decodeState) saveError(err error) { + if d.savedError == nil { + d.savedError = d.addErrorContext(err) + } +} + +// addErrorContext returns a new error enhanced with information from d.errorContext +func (d *decodeState) addErrorContext(err error) error { + if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) { + switch err := err.(type) { + case *UnmarshalTypeError: + err.Struct = d.errorContext.Struct.Name() + err.Field = strings.Join(d.errorContext.FieldStack, ".") + } + } + return err +} + +// skip scans to the end of what was started. +func (d *decodeState) skip() { + s, data, i := &d.scan, d.data, d.off + depth := len(s.parseState) + for { + op := s.step(s, data[i]) + i++ + if len(s.parseState) < depth { + d.off = i + d.opcode = op + return + } + } +} + +// scanNext processes the byte at d.data[d.off]. +func (d *decodeState) scanNext() { + if d.off < len(d.data) { + d.opcode = d.scan.step(&d.scan, d.data[d.off]) + d.off++ + } else { + d.opcode = d.scan.eof() + d.off = len(d.data) + 1 // mark processed EOF with len+1 + } +} + +// scanWhile processes bytes in d.data[d.off:] until it +// receives a scan code not equal to op. +func (d *decodeState) scanWhile(op int) { + s, data, i := &d.scan, d.data, d.off + for i < len(data) { + newOp := s.step(s, data[i]) + i++ + if newOp != op { + d.opcode = newOp + d.off = i + return + } + } + + d.off = len(data) + 1 // mark processed EOF with len+1 + d.opcode = d.scan.eof() +} + +// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the +// common case where we're decoding a literal. The decoder scans the input +// twice, once for syntax errors and to check the length of the value, and the +// second to perform the decoding. +// +// Only in the second step do we use decodeState to tokenize literals, so we +// know there aren't any syntax errors. We can take advantage of that knowledge, +// and scan a literal's bytes much more quickly. +func (d *decodeState) rescanLiteral() { + data, i := d.data, d.off +Switch: + switch data[i-1] { + case '"': // string + for ; i < len(data); i++ { + switch data[i] { + case '\\': + i++ // escaped char + case '"': + i++ // tokenize the closing quote too + break Switch + } + } + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number + for ; i < len(data); i++ { + switch data[i] { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '.', 'e', 'E', '+', '-': + default: + break Switch + } + } + case 't': // true + i += len("rue") + case 'f': // false + i += len("alse") + case 'n': // null + i += len("ull") + } + if i < len(data) { + d.opcode = stateEndValue(&d.scan, data[i]) + } else { + d.opcode = scanEnd + } + d.off = i + 1 +} + +// value consumes a JSON value from d.data[d.off-1:], decoding into v, and +// reads the following byte ahead. If v is invalid, the value is discarded. +// The first byte of the value has been read already. +func (d *decodeState) value(v reflect.Value) error { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray: + if v.IsValid() { + if err := d.array(v); err != nil { + return err + } + } else { + d.skip() + } + d.scanNext() + + case scanBeginObject: + if v.IsValid() { + if err := d.object(v); err != nil { + return err + } + } else { + d.skip() + } + d.scanNext() + + case scanBeginLiteral: + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.rescanLiteral() + + if v.IsValid() { + if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil { + return err + } + } + } + return nil +} + +type unquotedValue struct{} + +// valueQuoted is like value but decodes a +// quoted string literal or literal null into an interface value. +// If it finds anything other than a quoted string literal or null, +// valueQuoted returns unquotedValue{}. +func (d *decodeState) valueQuoted() any { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray, scanBeginObject: + d.skip() + d.scanNext() + + case scanBeginLiteral: + v := d.literalInterface() + switch v.(type) { + case nil, string: + return v + } + } + return unquotedValue{} +} + +// indirect walks down v allocating pointers as needed, +// until it gets to a non-pointer. +// If it encounters an Unmarshaler, indirect stops and returns that. +// If decodingNull is true, indirect stops at the first settable pointer so it +// can be set to nil. +func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { + // Issue #24153 indicates that it is generally not a guaranteed property + // that you may round-trip a reflect.Value by calling Value.Addr().Elem() + // and expect the value to still be settable for values derived from + // unexported embedded struct fields. + // + // The logic below effectively does this when it first addresses the value + // (to satisfy possible pointer methods) and continues to dereference + // subsequent pointers as necessary. + // + // After the first round-trip, we set v back to the original value to + // preserve the original RW flags contained in reflect.Value. + v0 := v + haveAddr := false + + // If v is a named type and is addressable, + // start with its address, so that if the type has pointer methods, + // we find them. + if v.Kind() != reflect.Pointer && v.Type().Name() != "" && v.CanAddr() { + haveAddr = true + v = v.Addr() + } + for { + // Load value from interface, but only if the result will be + // usefully addressable. + if v.Kind() == reflect.Interface && !v.IsNil() { + e := v.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Pointer) { + haveAddr = false + v = e + continue + } + } + + if v.Kind() != reflect.Pointer { + break + } + + if decodingNull && v.CanSet() { + break + } + + // Prevent infinite loop if v is an interface pointing to its own address: + // var v interface{} + // v = &v + if v.Elem().Kind() == reflect.Interface && v.Elem().Elem() == v { + v = v.Elem() + break + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + if v.Type().NumMethod() > 0 && v.CanInterface() { + if u, ok := v.Interface().(Unmarshaler); ok { + return u, nil, reflect.Value{} + } + if !decodingNull { + if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { + return nil, u, reflect.Value{} + } + } + } + + if haveAddr { + v = v0 // restore original value after round-trip Value.Addr().Elem() + haveAddr = false + } else { + v = v.Elem() + } + } + return nil, nil, v +} + +// array consumes an array from d.data[d.off-1:], decoding into v. +// The first byte of the array ('[') has been read already. +func (d *decodeState) array(v reflect.Value) error { + // Check for unmarshaler. + u, ut, pv := indirect(v, false) + if u != nil { + start := d.readIndex() + d.skip() + return u.UnmarshalJSON(d.data[start:d.off]) + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + } + v = pv + + // Check type of target. + switch v.Kind() { + case reflect.Interface: + if v.NumMethod() == 0 { + // Decoding into nil interface? Switch to non-reflect code. + ai := d.arrayInterface() + v.Set(reflect.ValueOf(ai)) + return nil + } + // Otherwise it's invalid. + fallthrough + default: + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + case reflect.Array, reflect.Slice: + break + } + + i := 0 + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + // Expand slice length, growing the slice if necessary. + if v.Kind() == reflect.Slice { + if i >= v.Cap() { + v.Grow(1) + } + if i >= v.Len() { + v.SetLen(i + 1) + } + } + + if i < v.Len() { + // Decode into element. + if err := d.value(v.Index(i)); err != nil { + return err + } + } else { + // Ran out of fixed array: skip. + if err := d.value(reflect.Value{}); err != nil { + return err + } + } + i++ + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + + if i < v.Len() { + if v.Kind() == reflect.Array { + for ; i < v.Len(); i++ { + v.Index(i).SetZero() // zero remainder of array + } + } else { + v.SetLen(i) // truncate the slice + } + } + if i == 0 && v.Kind() == reflect.Slice { + v.Set(reflect.MakeSlice(v.Type(), 0, 0)) + } + return nil +} + +var nullLiteral = []byte("null") +var textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() + +// object consumes an object from d.data[d.off-1:], decoding into v. +// The first byte ('{') of the object has been read already. +func (d *decodeState) object(v reflect.Value) error { + // Check for unmarshaler. + u, ut, pv := indirect(v, false) + if u != nil { + start := d.readIndex() + d.skip() + return u.UnmarshalJSON(d.data[start:d.off]) + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) + d.skip() + return nil + } + v = pv + t := v.Type() + + // Decoding into nil interface? Switch to non-reflect code. + if v.Kind() == reflect.Interface && v.NumMethod() == 0 { + oi := d.objectInterface() + v.Set(reflect.ValueOf(oi)) + return nil + } + + var fields structFields + + // Check type of target: + // struct or + // map[T1]T2 where T1 is string, an integer type, + // or an encoding.TextUnmarshaler + switch v.Kind() { + case reflect.Map: + // Map key must either have string kind, have an integer kind, + // or be an encoding.TextUnmarshaler. + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !reflect.PointerTo(t.Key()).Implements(textUnmarshalerType) { + d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) + d.skip() + return nil + } + } + if v.IsNil() { + v.Set(reflect.MakeMap(t)) + } + case reflect.Struct: + fields = cachedTypeFields(t) + // ok + default: + d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) + d.skip() + return nil + } + + var mapElem reflect.Value + var origErrorContext errorContext + if d.errorContext != nil { + origErrorContext = *d.errorContext + } + + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read key. + start := d.readIndex() + d.rescanLiteral() + item := d.data[start:d.readIndex()] + key, ok := unquoteBytes(item) + if !ok { + panic(phasePanicMsg) + } + + // Figure out field corresponding to key. + var subv reflect.Value + destring := false // whether the value is wrapped in a string to be decoded first + + if v.Kind() == reflect.Map { + elemType := t.Elem() + if !mapElem.IsValid() { + mapElem = reflect.New(elemType).Elem() + } else { + mapElem.SetZero() + } + subv = mapElem + } else { + f := fields.byExactName[string(key)] + if f == nil { + f = fields.byFoldedName[string(foldName(key))] + } + if f != nil { + subv = v + destring = f.quoted + for _, i := range f.index { + if subv.Kind() == reflect.Pointer { + if subv.IsNil() { + // If a struct embeds a pointer to an unexported type, + // it is not possible to set a newly allocated value + // since the field is unexported. + // + // See https://golang.org/issue/21357 + if !subv.CanSet() { + d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem())) + // Invalidate subv to ensure d.value(subv) skips over + // the JSON value without assigning it to subv. + subv = reflect.Value{} + destring = false + break + } + subv.Set(reflect.New(subv.Type().Elem())) + } + subv = subv.Elem() + } + subv = subv.Field(i) + } + if d.errorContext == nil { + d.errorContext = new(errorContext) + } + d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name) + d.errorContext.Struct = t + } else if d.disallowUnknownFields { + d.saveError(fmt.Errorf("json: unknown field %q", key)) + } + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + if destring { + switch qv := d.valueQuoted().(type) { + case nil: + if err := d.literalStore(nullLiteral, subv, false); err != nil { + return err + } + case string: + if err := d.literalStore([]byte(qv), subv, true); err != nil { + return err + } + default: + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) + } + } else { + if err := d.value(subv); err != nil { + return err + } + } + + // Write value back to map; + // if using struct, subv points into struct already. + if v.Kind() == reflect.Map { + kt := t.Key() + var kv reflect.Value + switch { + case reflect.PointerTo(kt).Implements(textUnmarshalerType): + kv = reflect.New(kt) + if err := d.literalStore(item, kv, true); err != nil { + return err + } + kv = kv.Elem() + case kt.Kind() == reflect.String: + kv = reflect.ValueOf(key).Convert(kt) + default: + switch kt.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + s := string(key) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil || reflect.Zero(kt).OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + break + } + kv = reflect.ValueOf(n).Convert(kt) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + s := string(key) + n, err := strconv.ParseUint(s, 10, 64) + if err != nil || reflect.Zero(kt).OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + break + } + kv = reflect.ValueOf(n).Convert(kt) + default: + panic("json: Unexpected key type") // should never occur + } + } + if kv.IsValid() { + v.SetMapIndex(kv, subv) + } + } + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.errorContext != nil { + // Reset errorContext to its original state. + // Keep the same underlying array for FieldStack, to reuse the + // space and avoid unnecessary allocs. + d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)] + d.errorContext.Struct = origErrorContext.Struct + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + return nil +} + +// convertNumber converts the number literal s to a float64 or a Number +// depending on the setting of d.useNumber. +func (d *decodeState) convertNumber(s string) (any, error) { + if d.useNumber { + return Number(s), nil + } + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeOf(0.0), Offset: int64(d.off)} + } + return f, nil +} + +var numberType = reflect.TypeOf(Number("")) + +// literalStore decodes a literal stored in item into v. +// +// fromQuoted indicates whether this literal came from unwrapping a +// string from the ",string" struct tag option. this is used only to +// produce more helpful error messages. +func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error { + // Check for unmarshaler. + if len(item) == 0 { + //Empty string given + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return nil + } + isNull := item[0] == 'n' // null + u, ut, pv := indirect(v, isNull) + if u != nil { + return u.UnmarshalJSON(item) + } + if ut != nil { + if item[0] != '"' { + if fromQuoted { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return nil + } + val := "number" + switch item[0] { + case 'n': + val = "null" + case 't', 'f': + val = "bool" + } + d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) + return nil + } + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + return ut.UnmarshalText(s) + } + + v = pv + + switch c := item[0]; c { + case 'n': // null + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "null" { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice: + v.SetZero() + // otherwise, ignore null for primitives/string + } + case 't', 'f': // true, false + value := item[0] == 't' + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "true" && string(item) != "false" { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + default: + if fromQuoted { + d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) + } + case reflect.Bool: + v.SetBool(value) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(value)) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) + } + } + + case '"': // string + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + case reflect.Slice: + if v.Type().Elem().Kind() != reflect.Uint8 { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) + n, err := base64.StdEncoding.Decode(b, s) + if err != nil { + d.saveError(err) + break + } + v.SetBytes(b[:n]) + case reflect.String: + if v.Type() == numberType && !isValidNumber(string(s)) { + return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) + } + v.SetString(string(s)) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(string(s))) + } else { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) + } + } + + default: // number + if c != '-' && (c < '0' || c > '9') { + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + panic(phasePanicMsg) + } + s := string(item) + switch v.Kind() { + default: + if v.Kind() == reflect.String && v.Type() == numberType { + // s must be a valid number, because it's + // already been tokenized. + v.SetString(s) + break + } + if fromQuoted { + return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) + } + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) + case reflect.Interface: + n, err := d.convertNumber(s) + if err != nil { + d.saveError(err) + break + } + if v.NumMethod() != 0 { + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.Set(reflect.ValueOf(n)) + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.ParseInt(s, 10, 64) + if err != nil || v.OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetInt(n) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + n, err := strconv.ParseUint(s, 10, 64) + if err != nil || v.OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetUint(n) + + case reflect.Float32, reflect.Float64: + n, err := strconv.ParseFloat(s, v.Type().Bits()) + if err != nil || v.OverflowFloat(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())}) + break + } + v.SetFloat(n) + } + } + return nil +} + +// The xxxInterface routines build up a value to be stored +// in an empty interface. They are not strictly necessary, +// but they avoid the weight of reflection in this common case. + +// valueInterface is like value but returns interface{} +func (d *decodeState) valueInterface() (val any) { + switch d.opcode { + default: + panic(phasePanicMsg) + case scanBeginArray: + val = d.arrayInterface() + d.scanNext() + case scanBeginObject: + val = d.objectInterface() + d.scanNext() + case scanBeginLiteral: + val = d.literalInterface() + } + return +} + +// arrayInterface is like array but returns []interface{}. +func (d *decodeState) arrayInterface() []any { + var v = make([]any, 0) + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + v = append(v, d.valueInterface()) + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + return v +} + +// objectInterface is like object but returns map[string]interface{}. +func (d *decodeState) objectInterface() map[string]any { + m := make(map[string]any) + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read string key. + start := d.readIndex() + d.rescanLiteral() + item := d.data[start:d.readIndex()] + key, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + // Read value. + m[key] = d.valueInterface() + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + return m +} + +// literalInterface consumes and returns a literal from d.data[d.off-1:] and +// it reads the following byte ahead. The first byte of the literal has been +// read already (that's how the caller knows it's a literal). +func (d *decodeState) literalInterface() any { + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.rescanLiteral() + + item := d.data[start:d.readIndex()] + + switch c := item[0]; c { + case 'n': // null + return nil + + case 't', 'f': // true, false + return c == 't' + + case '"': // string + s, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + return s + + default: // number + if c != '-' && (c < '0' || c > '9') { + panic(phasePanicMsg) + } + n, err := d.convertNumber(string(item)) + if err != nil { + d.saveError(err) + } + return n + } +} + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + +// unquote converts a quoted JSON string literal s into an actual string t. +// The rules are different than for Go, so cannot use strconv.Unquote. +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) + t = string(s) + return +} + +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go new file mode 100644 index 0000000..c2c036b --- /dev/null +++ b/src/encoding/json/decode_test.go @@ -0,0 +1,2574 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "image" + "math" + "math/big" + "net" + "reflect" + "strconv" + "strings" + "testing" + "time" +) + +type T struct { + X string + Y int + Z int `json:"-"` +} + +type U struct { + Alphabet string `json:"alpha"` +} + +type V struct { + F1 any + F2 int32 + F3 Number + F4 *VOuter +} + +type VOuter struct { + V V +} + +type W struct { + S SS +} + +type P struct { + PP PP +} + +type PP struct { + T T + Ts []T +} + +type SS string + +func (*SS) UnmarshalJSON(data []byte) error { + return &UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(SS(""))} +} + +// ifaceNumAsFloat64/ifaceNumAsNumber are used to test unmarshaling with and +// without UseNumber +var ifaceNumAsFloat64 = map[string]any{ + "k1": float64(1), + "k2": "s", + "k3": []any{float64(1), float64(2.0), float64(3e-3)}, + "k4": map[string]any{"kk1": "s", "kk2": float64(2)}, +} + +var ifaceNumAsNumber = map[string]any{ + "k1": Number("1"), + "k2": "s", + "k3": []any{Number("1"), Number("2.0"), Number("3e-3")}, + "k4": map[string]any{"kk1": "s", "kk2": Number("2")}, +} + +type tx struct { + x int +} + +type u8 uint8 + +// A type that can unmarshal itself. + +type unmarshaler struct { + T bool +} + +func (u *unmarshaler) UnmarshalJSON(b []byte) error { + *u = unmarshaler{true} // All we need to see that UnmarshalJSON is called. + return nil +} + +type ustruct struct { + M unmarshaler +} + +type unmarshalerText struct { + A, B string +} + +// needed for re-marshaling tests +func (u unmarshalerText) MarshalText() ([]byte, error) { + return []byte(u.A + ":" + u.B), nil +} + +func (u *unmarshalerText) UnmarshalText(b []byte) error { + pos := bytes.IndexByte(b, ':') + if pos == -1 { + return errors.New("missing separator") + } + u.A, u.B = string(b[:pos]), string(b[pos+1:]) + return nil +} + +var _ encoding.TextUnmarshaler = (*unmarshalerText)(nil) + +type ustructText struct { + M unmarshalerText +} + +// u8marshal is an integer type that can marshal/unmarshal itself. +type u8marshal uint8 + +func (u8 u8marshal) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf("u%d", u8)), nil +} + +var errMissingU8Prefix = errors.New("missing 'u' prefix") + +func (u8 *u8marshal) UnmarshalText(b []byte) error { + if !bytes.HasPrefix(b, []byte{'u'}) { + return errMissingU8Prefix + } + n, err := strconv.Atoi(string(b[1:])) + if err != nil { + return err + } + *u8 = u8marshal(n) + return nil +} + +var _ encoding.TextUnmarshaler = (*u8marshal)(nil) + +var ( + umtrue = unmarshaler{true} + umslice = []unmarshaler{{true}} + umstruct = ustruct{unmarshaler{true}} + + umtrueXY = unmarshalerText{"x", "y"} + umsliceXY = []unmarshalerText{{"x", "y"}} + umstructXY = ustructText{unmarshalerText{"x", "y"}} + + ummapXY = map[unmarshalerText]bool{{"x", "y"}: true} +) + +// Test data structures for anonymous fields. + +type Point struct { + Z int +} + +type Top struct { + Level0 int + Embed0 + *Embed0a + *Embed0b `json:"e,omitempty"` // treated as named + Embed0c `json:"-"` // ignored + Loop + Embed0p // has Point with X, Y, used + Embed0q // has Point with Z, used + embed // contains exported field +} + +type Embed0 struct { + Level1a int // overridden by Embed0a's Level1a with json tag + Level1b int // used because Embed0a's Level1b is renamed + Level1c int // used because Embed0a's Level1c is ignored + Level1d int // annihilated by Embed0a's Level1d + Level1e int `json:"x"` // annihilated by Embed0a.Level1e +} + +type Embed0a struct { + Level1a int `json:"Level1a,omitempty"` + Level1b int `json:"LEVEL1B,omitempty"` + Level1c int `json:"-"` + Level1d int // annihilated by Embed0's Level1d + Level1f int `json:"x"` // annihilated by Embed0's Level1e +} + +type Embed0b Embed0 + +type Embed0c Embed0 + +type Embed0p struct { + image.Point +} + +type Embed0q struct { + Point +} + +type embed struct { + Q int +} + +type Loop struct { + Loop1 int `json:",omitempty"` + Loop2 int `json:",omitempty"` + *Loop +} + +// From reflect test: +// The X in S6 and S7 annihilate, but they also block the X in S8.S9. +type S5 struct { + S6 + S7 + S8 +} + +type S6 struct { + X int +} + +type S7 S6 + +type S8 struct { + S9 +} + +type S9 struct { + X int + Y int +} + +// From reflect test: +// The X in S11.S6 and S12.S6 annihilate, but they also block the X in S13.S8.S9. +type S10 struct { + S11 + S12 + S13 +} + +type S11 struct { + S6 +} + +type S12 struct { + S6 +} + +type S13 struct { + S8 +} + +type Ambig struct { + // Given "hello", the first match should win. + First int `json:"HELLO"` + Second int `json:"Hello"` +} + +type XYZ struct { + X any + Y any + Z any +} + +type unexportedWithMethods struct{} + +func (unexportedWithMethods) F() {} + +type byteWithMarshalJSON byte + +func (b byteWithMarshalJSON) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf(`"Z%.2x"`, byte(b))), nil +} + +func (b *byteWithMarshalJSON) UnmarshalJSON(data []byte) error { + if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[2:4]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = byteWithMarshalJSON(i) + return nil +} + +type byteWithPtrMarshalJSON byte + +func (b *byteWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { + return byteWithMarshalJSON(*b).MarshalJSON() +} + +func (b *byteWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { + return (*byteWithMarshalJSON)(b).UnmarshalJSON(data) +} + +type byteWithMarshalText byte + +func (b byteWithMarshalText) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf(`Z%.2x`, byte(b))), nil +} + +func (b *byteWithMarshalText) UnmarshalText(data []byte) error { + if len(data) != 3 || data[0] != 'Z' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[1:3]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = byteWithMarshalText(i) + return nil +} + +type byteWithPtrMarshalText byte + +func (b *byteWithPtrMarshalText) MarshalText() ([]byte, error) { + return byteWithMarshalText(*b).MarshalText() +} + +func (b *byteWithPtrMarshalText) UnmarshalText(data []byte) error { + return (*byteWithMarshalText)(b).UnmarshalText(data) +} + +type intWithMarshalJSON int + +func (b intWithMarshalJSON) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf(`"Z%.2x"`, int(b))), nil +} + +func (b *intWithMarshalJSON) UnmarshalJSON(data []byte) error { + if len(data) != 5 || data[0] != '"' || data[1] != 'Z' || data[4] != '"' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[2:4]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = intWithMarshalJSON(i) + return nil +} + +type intWithPtrMarshalJSON int + +func (b *intWithPtrMarshalJSON) MarshalJSON() ([]byte, error) { + return intWithMarshalJSON(*b).MarshalJSON() +} + +func (b *intWithPtrMarshalJSON) UnmarshalJSON(data []byte) error { + return (*intWithMarshalJSON)(b).UnmarshalJSON(data) +} + +type intWithMarshalText int + +func (b intWithMarshalText) MarshalText() ([]byte, error) { + return []byte(fmt.Sprintf(`Z%.2x`, int(b))), nil +} + +func (b *intWithMarshalText) UnmarshalText(data []byte) error { + if len(data) != 3 || data[0] != 'Z' { + return fmt.Errorf("bad quoted string") + } + i, err := strconv.ParseInt(string(data[1:3]), 16, 8) + if err != nil { + return fmt.Errorf("bad hex") + } + *b = intWithMarshalText(i) + return nil +} + +type intWithPtrMarshalText int + +func (b *intWithPtrMarshalText) MarshalText() ([]byte, error) { + return intWithMarshalText(*b).MarshalText() +} + +func (b *intWithPtrMarshalText) UnmarshalText(data []byte) error { + return (*intWithMarshalText)(b).UnmarshalText(data) +} + +type mapStringToStringData struct { + Data map[string]string `json:"data"` +} + +type unmarshalTest struct { + in string + ptr any // new(type) + out any + err error + useNumber bool + golden bool + disallowUnknownFields bool +} + +type B struct { + B bool `json:",string"` +} + +type DoublePtr struct { + I **int + J **int +} + +var unmarshalTests = []unmarshalTest{ + // basic types + {in: `true`, ptr: new(bool), out: true}, + {in: `1`, ptr: new(int), out: 1}, + {in: `1.2`, ptr: new(float64), out: 1.2}, + {in: `-5`, ptr: new(int16), out: int16(-5)}, + {in: `2`, ptr: new(Number), out: Number("2"), useNumber: true}, + {in: `2`, ptr: new(Number), out: Number("2")}, + {in: `2`, ptr: new(any), out: float64(2.0)}, + {in: `2`, ptr: new(any), out: Number("2"), useNumber: true}, + {in: `"a\u1234"`, ptr: new(string), out: "a\u1234"}, + {in: `"http:\/\/"`, ptr: new(string), out: "http://"}, + {in: `"g-clef: \uD834\uDD1E"`, ptr: new(string), out: "g-clef: \U0001D11E"}, + {in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"}, + {in: "null", ptr: new(any), out: nil}, + {in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf(""), 7, "T", "X"}}, + {in: `{"X": 23}`, ptr: new(T), out: T{}, err: &UnmarshalTypeError{"number", reflect.TypeOf(""), 8, "T", "X"}}, {in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {in: `{"x": 1}`, ptr: new(tx), out: tx{}}, + {in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true}, + {in: `{"S": 23}`, ptr: new(W), out: W{}, err: &UnmarshalTypeError{"number", reflect.TypeOf(SS("")), 0, "W", "S"}}, + {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}}, + {in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true}, + {in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsFloat64}, + {in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(any), out: ifaceNumAsNumber, useNumber: true}, + + // raw values with whitespace + {in: "\n true ", ptr: new(bool), out: true}, + {in: "\t 1 ", ptr: new(int), out: 1}, + {in: "\r 1.2 ", ptr: new(float64), out: 1.2}, + {in: "\t -5 \n", ptr: new(int16), out: int16(-5)}, + {in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"}, + + // Z has a "-" tag. + {in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}}, + {in: `{"Y": 1, "Z": 2}`, ptr: new(T), err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true}, + + {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, + {in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}}, + {in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}}, + {in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, + + // syntax errors + {in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}}, + {in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}}, + {in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true}, + {in: `[2, 3`, err: &SyntaxError{msg: "unexpected end of JSON input", Offset: 5}}, + {in: `{"F3": -}`, ptr: new(V), out: V{F3: Number("-")}, err: &SyntaxError{msg: "invalid character '}' in numeric literal", Offset: 9}}, + + // raw value errors + {in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}}, + {in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}}, + {in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}}, + {in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, + {in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}}, + + // array tests + {in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}}, + {in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}}, + {in: `[1, 2, 3]`, ptr: new([5]int), out: [5]int{1, 2, 3, 0, 0}}, + {in: `[1, 2, 3]`, ptr: new(MustNotUnmarshalJSON), err: errors.New("MustNotUnmarshalJSON was used")}, + + // empty array to interface test + {in: `[]`, ptr: new([]any), out: []any{}}, + {in: `null`, ptr: new([]any), out: []any(nil)}, + {in: `{"T":[]}`, ptr: new(map[string]any), out: map[string]any{"T": []any{}}}, + {in: `{"T":null}`, ptr: new(map[string]any), out: map[string]any{"T": any(nil)}}, + + // composite tests + {in: allValueIndent, ptr: new(All), out: allValue}, + {in: allValueCompact, ptr: new(All), out: allValue}, + {in: allValueIndent, ptr: new(*All), out: &allValue}, + {in: allValueCompact, ptr: new(*All), out: &allValue}, + {in: pallValueIndent, ptr: new(All), out: pallValue}, + {in: pallValueCompact, ptr: new(All), out: pallValue}, + {in: pallValueIndent, ptr: new(*All), out: &pallValue}, + {in: pallValueCompact, ptr: new(*All), out: &pallValue}, + + // unmarshal interface test + {in: `{"T":false}`, ptr: new(unmarshaler), out: umtrue}, // use "false" so test will fail if custom unmarshaler is not called + {in: `{"T":false}`, ptr: new(*unmarshaler), out: &umtrue}, + {in: `[{"T":false}]`, ptr: new([]unmarshaler), out: umslice}, + {in: `[{"T":false}]`, ptr: new(*[]unmarshaler), out: &umslice}, + {in: `{"M":{"T":"x:y"}}`, ptr: new(ustruct), out: umstruct}, + + // UnmarshalText interface test + {in: `"x:y"`, ptr: new(unmarshalerText), out: umtrueXY}, + {in: `"x:y"`, ptr: new(*unmarshalerText), out: &umtrueXY}, + {in: `["x:y"]`, ptr: new([]unmarshalerText), out: umsliceXY}, + {in: `["x:y"]`, ptr: new(*[]unmarshalerText), out: &umsliceXY}, + {in: `{"M":"x:y"}`, ptr: new(ustructText), out: umstructXY}, + + // integer-keyed map test + { + in: `{"-1":"a","0":"b","1":"c"}`, + ptr: new(map[int]string), + out: map[int]string{-1: "a", 0: "b", 1: "c"}, + }, + { + in: `{"0":"a","10":"c","9":"b"}`, + ptr: new(map[u8]string), + out: map[u8]string{0: "a", 9: "b", 10: "c"}, + }, + { + in: `{"-9223372036854775808":"min","9223372036854775807":"max"}`, + ptr: new(map[int64]string), + out: map[int64]string{math.MinInt64: "min", math.MaxInt64: "max"}, + }, + { + in: `{"18446744073709551615":"max"}`, + ptr: new(map[uint64]string), + out: map[uint64]string{math.MaxUint64: "max"}, + }, + { + in: `{"0":false,"10":true}`, + ptr: new(map[uintptr]bool), + out: map[uintptr]bool{0: false, 10: true}, + }, + + // Check that MarshalText and UnmarshalText take precedence + // over default integer handling in map keys. + { + in: `{"u2":4}`, + ptr: new(map[u8marshal]int), + out: map[u8marshal]int{2: 4}, + }, + { + in: `{"2":4}`, + ptr: new(map[u8marshal]int), + err: errMissingU8Prefix, + }, + + // integer-keyed map errors + { + in: `{"abc":"abc"}`, + ptr: new(map[int]string), + err: &UnmarshalTypeError{Value: "number abc", Type: reflect.TypeOf(0), Offset: 2}, + }, + { + in: `{"256":"abc"}`, + ptr: new(map[uint8]string), + err: &UnmarshalTypeError{Value: "number 256", Type: reflect.TypeOf(uint8(0)), Offset: 2}, + }, + { + in: `{"128":"abc"}`, + ptr: new(map[int8]string), + err: &UnmarshalTypeError{Value: "number 128", Type: reflect.TypeOf(int8(0)), Offset: 2}, + }, + { + in: `{"-1":"abc"}`, + ptr: new(map[uint8]string), + err: &UnmarshalTypeError{Value: "number -1", Type: reflect.TypeOf(uint8(0)), Offset: 2}, + }, + { + in: `{"F":{"a":2,"3":4}}`, + ptr: new(map[string]map[int]int), + err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeOf(int(0)), Offset: 7}, + }, + { + in: `{"F":{"a":2,"3":4}}`, + ptr: new(map[string]map[uint]int), + err: &UnmarshalTypeError{Value: "number a", Type: reflect.TypeOf(uint(0)), Offset: 7}, + }, + + // Map keys can be encoding.TextUnmarshalers. + {in: `{"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, + // If multiple values for the same key exists, only the most recent value is used. + {in: `{"x:y":false,"x:y":true}`, ptr: new(map[unmarshalerText]bool), out: ummapXY}, + + { + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + out: Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{ + Level1a: 5, + Level1b: 6, + }, + Embed0b: &Embed0b{ + Level1a: 8, + Level1b: 9, + Level1c: 10, + Level1d: 11, + Level1e: 12, + }, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + }, + Embed0p: Embed0p{ + Point: image.Point{X: 15, Y: 16}, + }, + Embed0q: Embed0q{ + Point: Point{Z: 17}, + }, + embed: embed{ + Q: 18, + }, + }, + }, + { + in: `{"hello": 1}`, + ptr: new(Ambig), + out: Ambig{First: 1}, + }, + + { + in: `{"X": 1,"Y":2}`, + ptr: new(S5), + out: S5{S8: S8{S9: S9{Y: 2}}}, + }, + { + in: `{"X": 1,"Y":2}`, + ptr: new(S5), + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}}, + }, + { + in: `{"X": 1,"Y":2}`, + ptr: new(S10), + err: fmt.Errorf("json: unknown field \"X\""), + disallowUnknownFields: true, + }, + { + in: `{"I": 0, "I": null, "J": null}`, + ptr: new(DoublePtr), + out: DoublePtr{I: nil, J: nil}, + }, + + // invalid UTF-8 is coerced to valid UTF-8. + { + in: "\"hello\xffworld\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + in: "\"hello\xc2\xc2world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\xc2\xffworld\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\\ud800world\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\xed\xa0\x80\xed\xb0\x80world\"", + ptr: new(string), + out: "hello\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdworld", + }, + + // Used to be issue 8305, but time.Time implements encoding.TextUnmarshaler so this works now. + { + in: `{"2009-11-10T23:00:00Z": "hello world"}`, + ptr: new(map[time.Time]string), + out: map[time.Time]string{time.Date(2009, 11, 10, 23, 0, 0, 0, time.UTC): "hello world"}, + }, + + // issue 8305 + { + in: `{"2009-11-10T23:00:00Z": "hello world"}`, + ptr: new(map[Point]string), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(map[Point]string{}), Offset: 1}, + }, + { + in: `{"asdf": "hello world"}`, + ptr: new(map[unmarshaler]string), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(map[unmarshaler]string{}), Offset: 1}, + }, + + // related to issue 13783. + // Go 1.7 changed marshaling a slice of typed byte to use the methods on the byte type, + // similar to marshaling a slice of typed int. + // These tests check that, assuming the byte type also has valid decoding methods, + // either the old base64 string encoding or the new per-element encoding can be + // successfully unmarshaled. The custom unmarshalers were accessible in earlier + // versions of Go, even though the custom marshaler was not. + { + in: `"AQID"`, + ptr: new([]byteWithMarshalJSON), + out: []byteWithMarshalJSON{1, 2, 3}, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithMarshalJSON), + out: []byteWithMarshalJSON{1, 2, 3}, + golden: true, + }, + { + in: `"AQID"`, + ptr: new([]byteWithMarshalText), + out: []byteWithMarshalText{1, 2, 3}, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithMarshalText), + out: []byteWithMarshalText{1, 2, 3}, + golden: true, + }, + { + in: `"AQID"`, + ptr: new([]byteWithPtrMarshalJSON), + out: []byteWithPtrMarshalJSON{1, 2, 3}, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithPtrMarshalJSON), + out: []byteWithPtrMarshalJSON{1, 2, 3}, + golden: true, + }, + { + in: `"AQID"`, + ptr: new([]byteWithPtrMarshalText), + out: []byteWithPtrMarshalText{1, 2, 3}, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]byteWithPtrMarshalText), + out: []byteWithPtrMarshalText{1, 2, 3}, + golden: true, + }, + + // ints work with the marshaler but not the base64 []byte case + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithMarshalJSON), + out: []intWithMarshalJSON{1, 2, 3}, + golden: true, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithMarshalText), + out: []intWithMarshalText{1, 2, 3}, + golden: true, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithPtrMarshalJSON), + out: []intWithPtrMarshalJSON{1, 2, 3}, + golden: true, + }, + { + in: `["Z01","Z02","Z03"]`, + ptr: new([]intWithPtrMarshalText), + out: []intWithPtrMarshalText{1, 2, 3}, + golden: true, + }, + + {in: `0.000001`, ptr: new(float64), out: 0.000001, golden: true}, + {in: `1e-7`, ptr: new(float64), out: 1e-7, golden: true}, + {in: `100000000000000000000`, ptr: new(float64), out: 100000000000000000000.0, golden: true}, + {in: `1e+21`, ptr: new(float64), out: 1e21, golden: true}, + {in: `-0.000001`, ptr: new(float64), out: -0.000001, golden: true}, + {in: `-1e-7`, ptr: new(float64), out: -1e-7, golden: true}, + {in: `-100000000000000000000`, ptr: new(float64), out: -100000000000000000000.0, golden: true}, + {in: `-1e+21`, ptr: new(float64), out: -1e21, golden: true}, + {in: `999999999999999900000`, ptr: new(float64), out: 999999999999999900000.0, golden: true}, + {in: `9007199254740992`, ptr: new(float64), out: 9007199254740992.0, golden: true}, + {in: `9007199254740993`, ptr: new(float64), out: 9007199254740992.0, golden: false}, + + { + in: `{"V": {"F2": "hello"}}`, + ptr: new(VOuter), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "V", + Field: "V.F2", + Type: reflect.TypeOf(int32(0)), + Offset: 20, + }, + }, + { + in: `{"V": {"F4": {}, "F2": "hello"}}`, + ptr: new(VOuter), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "V", + Field: "V.F2", + Type: reflect.TypeOf(int32(0)), + Offset: 30, + }, + }, + + // issue 15146. + // invalid inputs in wrongStringTests below. + {in: `{"B":"true"}`, ptr: new(B), out: B{true}, golden: true}, + {in: `{"B":"false"}`, ptr: new(B), out: B{false}, golden: true}, + {in: `{"B": "maybe"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "maybe" into bool`)}, + {in: `{"B": "tru"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "tru" into bool`)}, + {in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)}, + {in: `{"B": "null"}`, ptr: new(B), out: B{false}}, + {in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)}, + {in: `{"B": [2, 3]}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal unquoted value into bool`)}, + + // additional tests for disallowUnknownFields + { + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12 + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18, + "extra": true + }`, + ptr: new(Top), + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + { + in: `{ + "Level0": 1, + "Level1b": 2, + "Level1c": 3, + "x": 4, + "Level1a": 5, + "LEVEL1B": 6, + "e": { + "Level1a": 8, + "Level1b": 9, + "Level1c": 10, + "Level1d": 11, + "x": 12, + "extra": null + }, + "Loop1": 13, + "Loop2": 14, + "X": 15, + "Y": 16, + "Z": 17, + "Q": 18 + }`, + ptr: new(Top), + err: fmt.Errorf("json: unknown field \"extra\""), + disallowUnknownFields: true, + }, + // issue 26444 + // UnmarshalTypeError without field & struct values + { + in: `{"data":{"test1": "bob", "test2": 123}}`, + ptr: new(mapStringToStringData), + err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(""), Offset: 37, Struct: "mapStringToStringData", Field: "data"}, + }, + { + in: `{"data":{"test1": 123, "test2": "bob"}}`, + ptr: new(mapStringToStringData), + err: &UnmarshalTypeError{Value: "number", Type: reflect.TypeOf(""), Offset: 21, Struct: "mapStringToStringData", Field: "data"}, + }, + + // trying to decode JSON arrays or objects via TextUnmarshaler + { + in: `[1, 2, 3]`, + ptr: new(MustNotUnmarshalText), + err: &UnmarshalTypeError{Value: "array", Type: reflect.TypeOf(&MustNotUnmarshalText{}), Offset: 1}, + }, + { + in: `{"foo": "bar"}`, + ptr: new(MustNotUnmarshalText), + err: &UnmarshalTypeError{Value: "object", Type: reflect.TypeOf(&MustNotUnmarshalText{}), Offset: 1}, + }, + // #22369 + { + in: `{"PP": {"T": {"Y": "bad-type"}}}`, + ptr: new(P), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "T", + Field: "PP.T.Y", + Type: reflect.TypeOf(int(0)), + Offset: 29, + }, + }, + { + in: `{"Ts": [{"Y": 1}, {"Y": 2}, {"Y": "bad-type"}]}`, + ptr: new(PP), + err: &UnmarshalTypeError{ + Value: "string", + Struct: "T", + Field: "Ts.Y", + Type: reflect.TypeOf(int(0)), + Offset: 29, + }, + }, + // #14702 + { + in: `invalid`, + ptr: new(Number), + err: &SyntaxError{ + msg: "invalid character 'i' looking for beginning of value", + Offset: 1, + }, + }, + { + in: `"invalid"`, + ptr: new(Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + in: `{"A":"invalid"}`, + ptr: new(struct{ A Number }), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, + { + in: `{"A":"invalid"}`, + ptr: new(struct { + A Number `json:",string"` + }), + err: fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into json.Number", `invalid`), + }, + { + in: `{"A":"invalid"}`, + ptr: new(map[string]Number), + err: fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", `"invalid"`), + }, +} + +func TestMarshal(t *testing.T) { + b, err := Marshal(allValue) + if err != nil { + t.Fatalf("Marshal allValue: %v", err) + } + if string(b) != allValueCompact { + t.Errorf("Marshal allValueCompact") + diff(t, b, []byte(allValueCompact)) + return + } + + b, err = Marshal(pallValue) + if err != nil { + t.Fatalf("Marshal pallValue: %v", err) + } + if string(b) != pallValueCompact { + t.Errorf("Marshal pallValueCompact") + diff(t, b, []byte(pallValueCompact)) + return + } +} + +var badUTF8 = []struct { + in, out string +}{ + {"hello\xffworld", `"hello\ufffdworld"`}, + {"", `""`}, + {"\xff", `"\ufffd"`}, + {"\xff\xff", `"\ufffd\ufffd"`}, + {"a\xffb", `"a\ufffdb"`}, + {"\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"日本\ufffd\ufffd\ufffd"`}, +} + +func TestMarshalBadUTF8(t *testing.T) { + for _, tt := range badUTF8 { + b, err := Marshal(tt.in) + if string(b) != tt.out || err != nil { + t.Errorf("Marshal(%q) = %#q, %v, want %#q, nil", tt.in, b, err, tt.out) + } + } +} + +func TestMarshalNumberZeroVal(t *testing.T) { + var n Number + out, err := Marshal(n) + if err != nil { + t.Fatal(err) + } + outStr := string(out) + if outStr != "0" { + t.Fatalf("Invalid zero val for Number: %q", outStr) + } +} + +func TestMarshalEmbeds(t *testing.T) { + top := &Top{ + Level0: 1, + Embed0: Embed0{ + Level1b: 2, + Level1c: 3, + }, + Embed0a: &Embed0a{ + Level1a: 5, + Level1b: 6, + }, + Embed0b: &Embed0b{ + Level1a: 8, + Level1b: 9, + Level1c: 10, + Level1d: 11, + Level1e: 12, + }, + Loop: Loop{ + Loop1: 13, + Loop2: 14, + }, + Embed0p: Embed0p{ + Point: image.Point{X: 15, Y: 16}, + }, + Embed0q: Embed0q{ + Point: Point{Z: 17}, + }, + embed: embed{ + Q: 18, + }, + } + b, err := Marshal(top) + if err != nil { + t.Fatal(err) + } + want := "{\"Level0\":1,\"Level1b\":2,\"Level1c\":3,\"Level1a\":5,\"LEVEL1B\":6,\"e\":{\"Level1a\":8,\"Level1b\":9,\"Level1c\":10,\"Level1d\":11,\"x\":12},\"Loop1\":13,\"Loop2\":14,\"X\":15,\"Y\":16,\"Z\":17,\"Q\":18}" + if string(b) != want { + t.Errorf("Wrong marshal result.\n got: %q\nwant: %q", b, want) + } +} + +func equalError(a, b error) bool { + if a == nil { + return b == nil + } + if b == nil { + return a == nil + } + return a.Error() == b.Error() +} + +func TestUnmarshal(t *testing.T) { + for i, tt := range unmarshalTests { + var scan scanner + in := []byte(tt.in) + if err := checkValid(in, &scan); err != nil { + if !equalError(err, tt.err) { + t.Errorf("#%d: checkValid: %#v", i, err) + continue + } + } + if tt.ptr == nil { + continue + } + + typ := reflect.TypeOf(tt.ptr) + if typ.Kind() != reflect.Pointer { + t.Errorf("#%d: unmarshalTest.ptr %T is not a pointer type", i, tt.ptr) + continue + } + typ = typ.Elem() + + // v = new(right-type) + v := reflect.New(typ) + + if !reflect.DeepEqual(tt.ptr, v.Interface()) { + // There's no reason for ptr to point to non-zero data, + // as we decode into new(right-type), so the data is + // discarded. + // This can easily mean tests that silently don't test + // what they should. To test decoding into existing + // data, see TestPrefilled. + t.Errorf("#%d: unmarshalTest.ptr %#v is not a pointer to a zero value", i, tt.ptr) + continue + } + + dec := NewDecoder(bytes.NewReader(in)) + if tt.useNumber { + dec.UseNumber() + } + if tt.disallowUnknownFields { + dec.DisallowUnknownFields() + } + if err := dec.Decode(v.Interface()); !equalError(err, tt.err) { + t.Errorf("#%d: %v, want %v", i, err, tt.err) + continue + } else if err != nil { + continue + } + if !reflect.DeepEqual(v.Elem().Interface(), tt.out) { + t.Errorf("#%d: mismatch\nhave: %#+v\nwant: %#+v", i, v.Elem().Interface(), tt.out) + data, _ := Marshal(v.Elem().Interface()) + println(string(data)) + data, _ = Marshal(tt.out) + println(string(data)) + continue + } + + // Check round trip also decodes correctly. + if tt.err == nil { + enc, err := Marshal(v.Interface()) + if err != nil { + t.Errorf("#%d: error re-marshaling: %v", i, err) + continue + } + if tt.golden && !bytes.Equal(enc, in) { + t.Errorf("#%d: remarshal mismatch:\nhave: %s\nwant: %s", i, enc, in) + } + vv := reflect.New(reflect.TypeOf(tt.ptr).Elem()) + dec = NewDecoder(bytes.NewReader(enc)) + if tt.useNumber { + dec.UseNumber() + } + if err := dec.Decode(vv.Interface()); err != nil { + t.Errorf("#%d: error re-unmarshaling %#q: %v", i, enc, err) + continue + } + if !reflect.DeepEqual(v.Elem().Interface(), vv.Elem().Interface()) { + t.Errorf("#%d: mismatch\nhave: %#+v\nwant: %#+v", i, v.Elem().Interface(), vv.Elem().Interface()) + t.Errorf(" In: %q", strings.Map(noSpace, string(in))) + t.Errorf("Marshal: %q", strings.Map(noSpace, string(enc))) + continue + } + } + } +} + +func TestUnmarshalMarshal(t *testing.T) { + initBig() + var v any + if err := Unmarshal(jsonBig, &v); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + b, err := Marshal(v) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if !bytes.Equal(jsonBig, b) { + t.Errorf("Marshal jsonBig") + diff(t, b, jsonBig) + return + } +} + +var numberTests = []struct { + in string + i int64 + intErr string + f float64 + floatErr string +}{ + {in: "-1.23e1", intErr: "strconv.ParseInt: parsing \"-1.23e1\": invalid syntax", f: -1.23e1}, + {in: "-12", i: -12, f: -12.0}, + {in: "1e1000", intErr: "strconv.ParseInt: parsing \"1e1000\": invalid syntax", floatErr: "strconv.ParseFloat: parsing \"1e1000\": value out of range"}, +} + +// Independent of Decode, basic coverage of the accessors in Number +func TestNumberAccessors(t *testing.T) { + for _, tt := range numberTests { + n := Number(tt.in) + if s := n.String(); s != tt.in { + t.Errorf("Number(%q).String() is %q", tt.in, s) + } + if i, err := n.Int64(); err == nil && tt.intErr == "" && i != tt.i { + t.Errorf("Number(%q).Int64() is %d", tt.in, i) + } else if (err == nil && tt.intErr != "") || (err != nil && err.Error() != tt.intErr) { + t.Errorf("Number(%q).Int64() wanted error %q but got: %v", tt.in, tt.intErr, err) + } + if f, err := n.Float64(); err == nil && tt.floatErr == "" && f != tt.f { + t.Errorf("Number(%q).Float64() is %g", tt.in, f) + } else if (err == nil && tt.floatErr != "") || (err != nil && err.Error() != tt.floatErr) { + t.Errorf("Number(%q).Float64() wanted error %q but got: %v", tt.in, tt.floatErr, err) + } + } +} + +func TestLargeByteSlice(t *testing.T) { + s0 := make([]byte, 2000) + for i := range s0 { + s0[i] = byte(i) + } + b, err := Marshal(s0) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + var s1 []byte + if err := Unmarshal(b, &s1); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if !bytes.Equal(s0, s1) { + t.Errorf("Marshal large byte slice") + diff(t, s0, s1) + } +} + +type Xint struct { + X int +} + +func TestUnmarshalInterface(t *testing.T) { + var xint Xint + var i any = &xint + if err := Unmarshal([]byte(`{"X":1}`), &i); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if xint.X != 1 { + t.Fatalf("Did not write to xint") + } +} + +func TestUnmarshalPtrPtr(t *testing.T) { + var xint Xint + pxint := &xint + if err := Unmarshal([]byte(`{"X":1}`), &pxint); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if xint.X != 1 { + t.Fatalf("Did not write to xint") + } +} + +func TestEscape(t *testing.T) { + const input = `"foobar"<html>` + " [\u2028 \u2029]" + const expected = `"\"foobar\"\u003chtml\u003e [\u2028 \u2029]"` + b, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal error: %v", err) + } + if s := string(b); s != expected { + t.Errorf("Encoding of [%s]:\n got [%s]\nwant [%s]", input, s, expected) + } +} + +// WrongString is a struct that's misusing the ,string modifier. +type WrongString struct { + Message string `json:"result,string"` +} + +type wrongStringTest struct { + in, err string +} + +var wrongStringTests = []wrongStringTest{ + {`{"result":"x"}`, `json: invalid use of ,string struct tag, trying to unmarshal "x" into string`}, + {`{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`}, + {`{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`}, + {`{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`}, + {`{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`}, + {`{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`}, +} + +// If people misuse the ,string modifier, the error message should be +// helpful, telling the user that they're doing it wrong. +func TestErrorMessageFromMisusedString(t *testing.T) { + for n, tt := range wrongStringTests { + r := strings.NewReader(tt.in) + var s WrongString + err := NewDecoder(r).Decode(&s) + got := fmt.Sprintf("%v", err) + if got != tt.err { + t.Errorf("%d. got err = %q, want %q", n, got, tt.err) + } + } +} + +func noSpace(c rune) rune { + if isSpace(byte(c)) { //only used for ascii + return -1 + } + return c +} + +type All struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + + Foo string `json:"bar"` + Foo2 string `json:"bar2,dummyopt"` + + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` + + PBool *bool + PInt *int + PInt8 *int8 + PInt16 *int16 + PInt32 *int32 + PInt64 *int64 + PUint *uint + PUint8 *uint8 + PUint16 *uint16 + PUint32 *uint32 + PUint64 *uint64 + PUintptr *uintptr + PFloat32 *float32 + PFloat64 *float64 + + String string + PString *string + + Map map[string]Small + MapP map[string]*Small + PMap *map[string]Small + PMapP *map[string]*Small + + EmptyMap map[string]Small + NilMap map[string]Small + + Slice []Small + SliceP []*Small + PSlice *[]Small + PSliceP *[]*Small + + EmptySlice []Small + NilSlice []Small + + StringSlice []string + ByteSlice []byte + + Small Small + PSmall *Small + PPSmall **Small + + Interface any + PInterface *any + + unexported int +} + +type Small struct { + Tag string +} + +var allValue = All{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Uintptr: 12, + Float32: 14.1, + Float64: 15.1, + Foo: "foo", + Foo2: "foo2", + IntStr: 42, + UintptrStr: 44, + String: "16", + Map: map[string]Small{ + "17": {Tag: "tag17"}, + "18": {Tag: "tag18"}, + }, + MapP: map[string]*Small{ + "19": {Tag: "tag19"}, + "20": nil, + }, + EmptyMap: map[string]Small{}, + Slice: []Small{{Tag: "tag20"}, {Tag: "tag21"}}, + SliceP: []*Small{{Tag: "tag22"}, nil, {Tag: "tag23"}}, + EmptySlice: []Small{}, + StringSlice: []string{"str24", "str25", "str26"}, + ByteSlice: []byte{27, 28, 29}, + Small: Small{Tag: "tag30"}, + PSmall: &Small{Tag: "tag31"}, + Interface: 5.2, +} + +var pallValue = All{ + PBool: &allValue.Bool, + PInt: &allValue.Int, + PInt8: &allValue.Int8, + PInt16: &allValue.Int16, + PInt32: &allValue.Int32, + PInt64: &allValue.Int64, + PUint: &allValue.Uint, + PUint8: &allValue.Uint8, + PUint16: &allValue.Uint16, + PUint32: &allValue.Uint32, + PUint64: &allValue.Uint64, + PUintptr: &allValue.Uintptr, + PFloat32: &allValue.Float32, + PFloat64: &allValue.Float64, + PString: &allValue.String, + PMap: &allValue.Map, + PMapP: &allValue.MapP, + PSlice: &allValue.Slice, + PSliceP: &allValue.SliceP, + PPSmall: &allValue.PSmall, + PInterface: &allValue.Interface, +} + +var allValueIndent = `{ + "Bool": true, + "Int": 2, + "Int8": 3, + "Int16": 4, + "Int32": 5, + "Int64": 6, + "Uint": 7, + "Uint8": 8, + "Uint16": 9, + "Uint32": 10, + "Uint64": 11, + "Uintptr": 12, + "Float32": 14.1, + "Float64": 15.1, + "bar": "foo", + "bar2": "foo2", + "IntStr": "42", + "UintptrStr": "44", + "PBool": null, + "PInt": null, + "PInt8": null, + "PInt16": null, + "PInt32": null, + "PInt64": null, + "PUint": null, + "PUint8": null, + "PUint16": null, + "PUint32": null, + "PUint64": null, + "PUintptr": null, + "PFloat32": null, + "PFloat64": null, + "String": "16", + "PString": null, + "Map": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "MapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "PMap": null, + "PMapP": null, + "EmptyMap": {}, + "NilMap": null, + "Slice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "SliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "PSlice": null, + "PSliceP": null, + "EmptySlice": [], + "NilSlice": null, + "StringSlice": [ + "str24", + "str25", + "str26" + ], + "ByteSlice": "Gxwd", + "Small": { + "Tag": "tag30" + }, + "PSmall": { + "Tag": "tag31" + }, + "PPSmall": null, + "Interface": 5.2, + "PInterface": null +}` + +var allValueCompact = strings.Map(noSpace, allValueIndent) + +var pallValueIndent = `{ + "Bool": false, + "Int": 0, + "Int8": 0, + "Int16": 0, + "Int32": 0, + "Int64": 0, + "Uint": 0, + "Uint8": 0, + "Uint16": 0, + "Uint32": 0, + "Uint64": 0, + "Uintptr": 0, + "Float32": 0, + "Float64": 0, + "bar": "", + "bar2": "", + "IntStr": "0", + "UintptrStr": "0", + "PBool": true, + "PInt": 2, + "PInt8": 3, + "PInt16": 4, + "PInt32": 5, + "PInt64": 6, + "PUint": 7, + "PUint8": 8, + "PUint16": 9, + "PUint32": 10, + "PUint64": 11, + "PUintptr": 12, + "PFloat32": 14.1, + "PFloat64": 15.1, + "String": "", + "PString": "16", + "Map": null, + "MapP": null, + "PMap": { + "17": { + "Tag": "tag17" + }, + "18": { + "Tag": "tag18" + } + }, + "PMapP": { + "19": { + "Tag": "tag19" + }, + "20": null + }, + "EmptyMap": null, + "NilMap": null, + "Slice": null, + "SliceP": null, + "PSlice": [ + { + "Tag": "tag20" + }, + { + "Tag": "tag21" + } + ], + "PSliceP": [ + { + "Tag": "tag22" + }, + null, + { + "Tag": "tag23" + } + ], + "EmptySlice": null, + "NilSlice": null, + "StringSlice": null, + "ByteSlice": null, + "Small": { + "Tag": "" + }, + "PSmall": null, + "PPSmall": { + "Tag": "tag31" + }, + "Interface": null, + "PInterface": 5.2 +}` + +var pallValueCompact = strings.Map(noSpace, pallValueIndent) + +func TestRefUnmarshal(t *testing.T) { + type S struct { + // Ref is defined in encode_test.go. + R0 Ref + R1 *Ref + R2 RefText + R3 *RefText + } + want := S{ + R0: 12, + R1: new(Ref), + R2: 13, + R3: new(RefText), + } + *want.R1 = 12 + *want.R3 = 13 + + var got S + if err := Unmarshal([]byte(`{"R0":"ref","R1":"ref","R2":"ref","R3":"ref"}`), &got); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Errorf("got %+v, want %+v", got, want) + } +} + +// Test that the empty string doesn't panic decoding when ,string is specified +// Issue 3450 +func TestEmptyString(t *testing.T) { + type T2 struct { + Number1 int `json:",string"` + Number2 int `json:",string"` + } + data := `{"Number1":"1", "Number2":""}` + dec := NewDecoder(strings.NewReader(data)) + var t2 T2 + err := dec.Decode(&t2) + if err == nil { + t.Fatal("Decode: did not return error") + } + if t2.Number1 != 1 { + t.Fatal("Decode: did not set Number1") + } +} + +// Test that a null for ,string is not replaced with the previous quoted string (issue 7046). +// It should also not be an error (issue 2540, issue 8587). +func TestNullString(t *testing.T) { + type T struct { + A int `json:",string"` + B int `json:",string"` + C *int `json:",string"` + } + data := []byte(`{"A": "1", "B": null, "C": null}`) + var s T + s.B = 1 + s.C = new(int) + *s.C = 2 + err := Unmarshal(data, &s) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if s.B != 1 || s.C != nil { + t.Fatalf("after Unmarshal, s.B=%d, s.C=%p, want 1, nil", s.B, s.C) + } +} + +func intp(x int) *int { + p := new(int) + *p = x + return p +} + +func intpp(x *int) **int { + pp := new(*int) + *pp = x + return pp +} + +var interfaceSetTests = []struct { + pre any + json string + post any +}{ + {"foo", `"bar"`, "bar"}, + {"foo", `2`, 2.0}, + {"foo", `true`, true}, + {"foo", `null`, nil}, + + {nil, `null`, nil}, + {new(int), `null`, nil}, + {(*int)(nil), `null`, nil}, + {new(*int), `null`, new(*int)}, + {(**int)(nil), `null`, nil}, + {intp(1), `null`, nil}, + {intpp(nil), `null`, intpp(nil)}, + {intpp(intp(1)), `null`, intpp(nil)}, +} + +func TestInterfaceSet(t *testing.T) { + for _, tt := range interfaceSetTests { + b := struct{ X any }{tt.pre} + blob := `{"X":` + tt.json + `}` + if err := Unmarshal([]byte(blob), &b); err != nil { + t.Errorf("Unmarshal %#q: %v", blob, err) + continue + } + if !reflect.DeepEqual(b.X, tt.post) { + t.Errorf("Unmarshal %#q into %#v: X=%#v, want %#v", blob, tt.pre, b.X, tt.post) + } + } +} + +type NullTest struct { + Bool bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint uint + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Float32 float32 + Float64 float64 + String string + PBool *bool + Map map[string]string + Slice []string + Interface any + + PRaw *RawMessage + PTime *time.Time + PBigInt *big.Int + PText *MustNotUnmarshalText + PBuffer *bytes.Buffer // has methods, just not relevant ones + PStruct *struct{} + + Raw RawMessage + Time time.Time + BigInt big.Int + Text MustNotUnmarshalText + Buffer bytes.Buffer + Struct struct{} +} + +// JSON null values should be ignored for primitives and string values instead of resulting in an error. +// Issue 2540 +func TestUnmarshalNulls(t *testing.T) { + // Unmarshal docs: + // The JSON null value unmarshals into an interface, map, pointer, or slice + // by setting that Go value to nil. Because null is often used in JSON to mean + // ``not present,'' unmarshaling a JSON null into any other Go type has no effect + // on the value and produces no error. + + jsonData := []byte(`{ + "Bool" : null, + "Int" : null, + "Int8" : null, + "Int16" : null, + "Int32" : null, + "Int64" : null, + "Uint" : null, + "Uint8" : null, + "Uint16" : null, + "Uint32" : null, + "Uint64" : null, + "Float32" : null, + "Float64" : null, + "String" : null, + "PBool": null, + "Map": null, + "Slice": null, + "Interface": null, + "PRaw": null, + "PTime": null, + "PBigInt": null, + "PText": null, + "PBuffer": null, + "PStruct": null, + "Raw": null, + "Time": null, + "BigInt": null, + "Text": null, + "Buffer": null, + "Struct": null + }`) + nulls := NullTest{ + Bool: true, + Int: 2, + Int8: 3, + Int16: 4, + Int32: 5, + Int64: 6, + Uint: 7, + Uint8: 8, + Uint16: 9, + Uint32: 10, + Uint64: 11, + Float32: 12.1, + Float64: 13.1, + String: "14", + PBool: new(bool), + Map: map[string]string{}, + Slice: []string{}, + Interface: new(MustNotUnmarshalJSON), + PRaw: new(RawMessage), + PTime: new(time.Time), + PBigInt: new(big.Int), + PText: new(MustNotUnmarshalText), + PStruct: new(struct{}), + PBuffer: new(bytes.Buffer), + Raw: RawMessage("123"), + Time: time.Unix(123456789, 0), + BigInt: *big.NewInt(123), + } + + before := nulls.Time.String() + + err := Unmarshal(jsonData, &nulls) + if err != nil { + t.Errorf("Unmarshal of null values failed: %v", err) + } + if !nulls.Bool || nulls.Int != 2 || nulls.Int8 != 3 || nulls.Int16 != 4 || nulls.Int32 != 5 || nulls.Int64 != 6 || + nulls.Uint != 7 || nulls.Uint8 != 8 || nulls.Uint16 != 9 || nulls.Uint32 != 10 || nulls.Uint64 != 11 || + nulls.Float32 != 12.1 || nulls.Float64 != 13.1 || nulls.String != "14" { + t.Errorf("Unmarshal of null values affected primitives") + } + + if nulls.PBool != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBool") + } + if nulls.Map != nil { + t.Errorf("Unmarshal of null did not clear nulls.Map") + } + if nulls.Slice != nil { + t.Errorf("Unmarshal of null did not clear nulls.Slice") + } + if nulls.Interface != nil { + t.Errorf("Unmarshal of null did not clear nulls.Interface") + } + if nulls.PRaw != nil { + t.Errorf("Unmarshal of null did not clear nulls.PRaw") + } + if nulls.PTime != nil { + t.Errorf("Unmarshal of null did not clear nulls.PTime") + } + if nulls.PBigInt != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBigInt") + } + if nulls.PText != nil { + t.Errorf("Unmarshal of null did not clear nulls.PText") + } + if nulls.PBuffer != nil { + t.Errorf("Unmarshal of null did not clear nulls.PBuffer") + } + if nulls.PStruct != nil { + t.Errorf("Unmarshal of null did not clear nulls.PStruct") + } + + if string(nulls.Raw) != "null" { + t.Errorf("Unmarshal of RawMessage null did not record null: %v", string(nulls.Raw)) + } + if nulls.Time.String() != before { + t.Errorf("Unmarshal of time.Time null set time to %v", nulls.Time.String()) + } + if nulls.BigInt.String() != "123" { + t.Errorf("Unmarshal of big.Int null set int to %v", nulls.BigInt.String()) + } +} + +type MustNotUnmarshalJSON struct{} + +func (x MustNotUnmarshalJSON) UnmarshalJSON(data []byte) error { + return errors.New("MustNotUnmarshalJSON was used") +} + +type MustNotUnmarshalText struct{} + +func (x MustNotUnmarshalText) UnmarshalText(text []byte) error { + return errors.New("MustNotUnmarshalText was used") +} + +func TestStringKind(t *testing.T) { + type stringKind string + + var m1, m2 map[stringKind]int + m1 = map[stringKind]int{ + "foo": 42, + } + + data, err := Marshal(m1) + if err != nil { + t.Errorf("Unexpected error marshaling: %v", err) + } + + err = Unmarshal(data, &m2) + if err != nil { + t.Errorf("Unexpected error unmarshaling: %v", err) + } + + if !reflect.DeepEqual(m1, m2) { + t.Error("Items should be equal after encoding and then decoding") + } +} + +// Custom types with []byte as underlying type could not be marshaled +// and then unmarshaled. +// Issue 8962. +func TestByteKind(t *testing.T) { + type byteKind []byte + + a := byteKind("hello") + + data, err := Marshal(a) + if err != nil { + t.Error(err) + } + var b byteKind + err = Unmarshal(data, &b) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, b) { + t.Errorf("expected %v == %v", a, b) + } +} + +// The fix for issue 8962 introduced a regression. +// Issue 12921. +func TestSliceOfCustomByte(t *testing.T) { + type Uint8 uint8 + + a := []Uint8("hello") + + data, err := Marshal(a) + if err != nil { + t.Fatal(err) + } + var b []Uint8 + err = Unmarshal(data, &b) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, b) { + t.Fatalf("expected %v == %v", a, b) + } +} + +var decodeTypeErrorTests = []struct { + dest any + src string +}{ + {new(string), `{"user": "name"}`}, // issue 4628. + {new(error), `{}`}, // issue 4222 + {new(error), `[]`}, + {new(error), `""`}, + {new(error), `123`}, + {new(error), `true`}, +} + +func TestUnmarshalTypeError(t *testing.T) { + for _, item := range decodeTypeErrorTests { + err := Unmarshal([]byte(item.src), item.dest) + if _, ok := err.(*UnmarshalTypeError); !ok { + t.Errorf("expected type error for Unmarshal(%q, type %T): got %T", + item.src, item.dest, err) + } + } +} + +var unmarshalSyntaxTests = []string{ + "tru", + "fals", + "nul", + "123e", + `"hello`, + `[1,2,3`, + `{"key":1`, + `{"key":1,`, +} + +func TestUnmarshalSyntax(t *testing.T) { + var x any + for _, src := range unmarshalSyntaxTests { + err := Unmarshal([]byte(src), &x) + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("expected syntax error for Unmarshal(%q): got %T", src, err) + } + } +} + +// Test handling of unexported fields that should be ignored. +// Issue 4660 +type unexportedFields struct { + Name string + m map[string]any `json:"-"` + m2 map[string]any `json:"abcd"` + + s []int `json:"-"` +} + +func TestUnmarshalUnexported(t *testing.T) { + input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}` + want := &unexportedFields{Name: "Bob"} + + out := &unexportedFields{} + err := Unmarshal([]byte(input), out) + if err != nil { + t.Errorf("got error %v, expected nil", err) + } + if !reflect.DeepEqual(out, want) { + t.Errorf("got %q, want %q", out, want) + } +} + +// Time3339 is a time.Time which encodes to and from JSON +// as an RFC 3339 time in UTC. +type Time3339 time.Time + +func (t *Time3339) UnmarshalJSON(b []byte) error { + if len(b) < 2 || b[0] != '"' || b[len(b)-1] != '"' { + return fmt.Errorf("types: failed to unmarshal non-string value %q as an RFC 3339 time", b) + } + tm, err := time.Parse(time.RFC3339, string(b[1:len(b)-1])) + if err != nil { + return err + } + *t = Time3339(tm) + return nil +} + +func TestUnmarshalJSONLiteralError(t *testing.T) { + var t3 Time3339 + err := Unmarshal([]byte(`"0000-00-00T00:00:00Z"`), &t3) + if err == nil { + t.Fatalf("expected error; got time %v", time.Time(t3)) + } + if !strings.Contains(err.Error(), "range") { + t.Errorf("got err = %v; want out of range error", err) + } +} + +// Test that extra object elements in an array do not result in a +// "data changing underfoot" error. +// Issue 3717 +func TestSkipArrayObjects(t *testing.T) { + json := `[{}]` + var dest [0]any + + err := Unmarshal([]byte(json), &dest) + if err != nil { + t.Errorf("got error %q, want nil", err) + } +} + +// Test semantics of pre-filled data, such as struct fields, map elements, +// slices, and arrays. +// Issues 4900 and 8837, among others. +func TestPrefilled(t *testing.T) { + // Values here change, cannot reuse table across runs. + var prefillTests = []struct { + in string + ptr any + out any + }{ + { + in: `{"X": 1, "Y": 2}`, + ptr: &XYZ{X: float32(3), Y: int16(4), Z: 1.5}, + out: &XYZ{X: float64(1), Y: float64(2), Z: 1.5}, + }, + { + in: `{"X": 1, "Y": 2}`, + ptr: &map[string]any{"X": float32(3), "Y": int16(4), "Z": 1.5}, + out: &map[string]any{"X": float64(1), "Y": float64(2), "Z": 1.5}, + }, + { + in: `[2]`, + ptr: &[]int{1}, + out: &[]int{2}, + }, + { + in: `[2, 3]`, + ptr: &[]int{1}, + out: &[]int{2, 3}, + }, + { + in: `[2, 3]`, + ptr: &[...]int{1}, + out: &[...]int{2}, + }, + { + in: `[3]`, + ptr: &[...]int{1, 2}, + out: &[...]int{3, 0}, + }, + } + + for _, tt := range prefillTests { + ptrstr := fmt.Sprintf("%v", tt.ptr) + err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here + if err != nil { + t.Errorf("Unmarshal: %v", err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("Unmarshal(%#q, %s): have %v, want %v", tt.in, ptrstr, tt.ptr, tt.out) + } + } +} + +var invalidUnmarshalTests = []struct { + v any + want string +}{ + {nil, "json: Unmarshal(nil)"}, + {struct{}{}, "json: Unmarshal(non-pointer struct {})"}, + {(*int)(nil), "json: Unmarshal(nil *int)"}, +} + +func TestInvalidUnmarshal(t *testing.T) { + buf := []byte(`{"a":"1"}`) + for _, tt := range invalidUnmarshalTests { + err := Unmarshal(buf, tt.v) + if err == nil { + t.Errorf("Unmarshal expecting error, got nil") + continue + } + if got := err.Error(); got != tt.want { + t.Errorf("Unmarshal = %q; want %q", got, tt.want) + } + } +} + +var invalidUnmarshalTextTests = []struct { + v any + want string +}{ + {nil, "json: Unmarshal(nil)"}, + {struct{}{}, "json: Unmarshal(non-pointer struct {})"}, + {(*int)(nil), "json: Unmarshal(nil *int)"}, + {new(net.IP), "json: cannot unmarshal number into Go value of type *net.IP"}, +} + +func TestInvalidUnmarshalText(t *testing.T) { + buf := []byte(`123`) + for _, tt := range invalidUnmarshalTextTests { + err := Unmarshal(buf, tt.v) + if err == nil { + t.Errorf("Unmarshal expecting error, got nil") + continue + } + if got := err.Error(); got != tt.want { + t.Errorf("Unmarshal = %q; want %q", got, tt.want) + } + } +} + +// Test that string option is ignored for invalid types. +// Issue 9812. +func TestInvalidStringOption(t *testing.T) { + num := 0 + item := struct { + T time.Time `json:",string"` + M map[string]string `json:",string"` + S []string `json:",string"` + A [1]string `json:",string"` + I any `json:",string"` + P *int `json:",string"` + }{M: make(map[string]string), S: make([]string, 0), I: num, P: &num} + + data, err := Marshal(item) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + + err = Unmarshal(data, &item) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } +} + +// Test unmarshal behavior with regards to embedded unexported structs. +// +// (Issue 21357) If the embedded struct is a pointer and is unallocated, +// this returns an error because unmarshal cannot set the field. +// +// (Issue 24152) If the embedded struct is given an explicit name, +// ensure that the normal unmarshal logic does not panic in reflect. +// +// (Issue 28145) If the embedded struct is given an explicit name and has +// exported methods, don't cause a panic trying to get its value. +func TestUnmarshalEmbeddedUnexported(t *testing.T) { + type ( + embed1 struct{ Q int } + embed2 struct{ Q int } + embed3 struct { + Q int64 `json:",string"` + } + S1 struct { + *embed1 + R int + } + S2 struct { + *embed1 + Q int + } + S3 struct { + embed1 + R int + } + S4 struct { + *embed1 + embed2 + } + S5 struct { + *embed3 + R int + } + S6 struct { + embed1 `json:"embed1"` + } + S7 struct { + embed1 `json:"embed1"` + embed2 + } + S8 struct { + embed1 `json:"embed1"` + embed2 `json:"embed2"` + Q int + } + S9 struct { + unexportedWithMethods `json:"embed"` + } + ) + + tests := []struct { + in string + ptr any + out any + err error + }{{ + // Error since we cannot set S1.embed1, but still able to set S1.R. + in: `{"R":2,"Q":1}`, + ptr: new(S1), + out: &S1{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed1"), + }, { + // The top level Q field takes precedence. + in: `{"Q":1}`, + ptr: new(S2), + out: &S2{Q: 1}, + }, { + // No issue with non-pointer variant. + in: `{"R":2,"Q":1}`, + ptr: new(S3), + out: &S3{embed1: embed1{Q: 1}, R: 2}, + }, { + // No error since both embedded structs have field R, which annihilate each other. + // Thus, no attempt is made at setting S4.embed1. + in: `{"R":2}`, + ptr: new(S4), + out: new(S4), + }, { + // Error since we cannot set S5.embed1, but still able to set S5.R. + in: `{"R":2,"Q":1}`, + ptr: new(S5), + out: &S5{R: 2}, + err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed3"), + }, { + // Issue 24152, ensure decodeState.indirect does not panic. + in: `{"embed1": {"Q": 1}}`, + ptr: new(S6), + out: &S6{embed1{1}}, + }, { + // Issue 24153, check that we can still set forwarded fields even in + // the presence of a name conflict. + // + // This relies on obscure behavior of reflect where it is possible + // to set a forwarded exported field on an unexported embedded struct + // even though there is a name conflict, even when it would have been + // impossible to do so according to Go visibility rules. + // Go forbids this because it is ambiguous whether S7.Q refers to + // S7.embed1.Q or S7.embed2.Q. Since embed1 and embed2 are unexported, + // it should be impossible for an external package to set either Q. + // + // It is probably okay for a future reflect change to break this. + in: `{"embed1": {"Q": 1}, "Q": 2}`, + ptr: new(S7), + out: &S7{embed1{1}, embed2{2}}, + }, { + // Issue 24153, similar to the S7 case. + in: `{"embed1": {"Q": 1}, "embed2": {"Q": 2}, "Q": 3}`, + ptr: new(S8), + out: &S8{embed1{1}, embed2{2}, 3}, + }, { + // Issue 228145, similar to the cases above. + in: `{"embed": {}}`, + ptr: new(S9), + out: &S9{}, + }} + + for i, tt := range tests { + err := Unmarshal([]byte(tt.in), tt.ptr) + if !equalError(err, tt.err) { + t.Errorf("#%d: %v, want %v", i, err, tt.err) + } + if !reflect.DeepEqual(tt.ptr, tt.out) { + t.Errorf("#%d: mismatch\ngot: %#+v\nwant: %#+v", i, tt.ptr, tt.out) + } + } +} + +func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { + tests := []struct { + in string + err error + }{{ + in: `1 false null :`, + err: &SyntaxError{"invalid character ':' looking for beginning of value", 14}, + }, { + in: `1 [] [,]`, + err: &SyntaxError{"invalid character ',' looking for beginning of value", 7}, + }, { + in: `1 [] [true:]`, + err: &SyntaxError{"invalid character ':' after array element", 11}, + }, { + in: `1 {} {"x"=}`, + err: &SyntaxError{"invalid character '=' after object key", 14}, + }, { + in: `falsetruenul#`, + err: &SyntaxError{"invalid character '#' in literal null (expecting 'l')", 13}, + }} + for i, tt := range tests { + dec := NewDecoder(strings.NewReader(tt.in)) + var err error + for { + var v any + if err = dec.Decode(&v); err != nil { + break + } + } + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: got %#v, want %#v", i, err, tt.err) + } + } +} + +type unmarshalPanic struct{} + +func (unmarshalPanic) UnmarshalJSON([]byte) error { panic(0xdead) } + +func TestUnmarshalPanic(t *testing.T) { + defer func() { + if got := recover(); !reflect.DeepEqual(got, 0xdead) { + t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) + } + }() + Unmarshal([]byte("{}"), &unmarshalPanic{}) + t.Fatalf("Unmarshal should have panicked") +} + +// The decoder used to hang if decoding into an interface pointing to its own address. +// See golang.org/issues/31740. +func TestUnmarshalRecursivePointer(t *testing.T) { + var v any + v = &v + data := []byte(`{"a": "b"}`) + + if err := Unmarshal(data, v); err != nil { + t.Fatal(err) + } +} + +type textUnmarshalerString string + +func (m *textUnmarshalerString) UnmarshalText(text []byte) error { + *m = textUnmarshalerString(strings.ToLower(string(text))) + return nil +} + +// Test unmarshal to a map, where the map key is a user defined type. +// See golang.org/issues/34437. +func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) { + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"FOO": "1"}`), &p); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + + if _, ok := p["foo"]; !ok { + t.Errorf(`Key "foo" does not exist in map: %v`, p) + } +} + +func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) { + // See golang.org/issues/38105. + var p map[textUnmarshalerString]string + if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + if _, ok := p["开源"]; !ok { + t.Errorf(`Key "开源" does not exist in map: %v`, p) + } + + // See golang.org/issues/38126. + type T struct { + F1 string `json:"F1,string"` + } + t1 := T{"aaa\tbbb"} + + b, err := Marshal(t1) + if err != nil { + t.Fatalf("Marshal unexpected error: %v", err) + } + var t2 T + if err := Unmarshal(b, &t2); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + if t1 != t2 { + t.Errorf("Marshal and Unmarshal roundtrip mismatch: want %q got %q", t1, t2) + } + + // See golang.org/issues/39555. + input := map[textUnmarshalerString]string{"FOO": "", `"`: ""} + + encoded, err := Marshal(input) + if err != nil { + t.Fatalf("Marshal unexpected error: %v", err) + } + var got map[textUnmarshalerString]string + if err := Unmarshal(encoded, &got); err != nil { + t.Fatalf("Unmarshal unexpected error: %v", err) + } + want := map[textUnmarshalerString]string{"foo": "", `"`: ""} + if !reflect.DeepEqual(want, got) { + t.Fatalf("Unexpected roundtrip result:\nwant: %q\ngot: %q", want, got) + } +} + +func TestUnmarshalMaxDepth(t *testing.T) { + testcases := []struct { + name string + data string + errMaxDepth bool + }{ + { + name: "ArrayUnderMaxNestingDepth", + data: `{"a":` + strings.Repeat(`[`, 10000-1) + strings.Repeat(`]`, 10000-1) + `}`, + errMaxDepth: false, + }, + { + name: "ArrayOverMaxNestingDepth", + data: `{"a":` + strings.Repeat(`[`, 10000) + strings.Repeat(`]`, 10000) + `}`, + errMaxDepth: true, + }, + { + name: "ArrayOverStackDepth", + data: `{"a":` + strings.Repeat(`[`, 3000000) + strings.Repeat(`]`, 3000000) + `}`, + errMaxDepth: true, + }, + { + name: "ObjectUnderMaxNestingDepth", + data: `{"a":` + strings.Repeat(`{"a":`, 10000-1) + `0` + strings.Repeat(`}`, 10000-1) + `}`, + errMaxDepth: false, + }, + { + name: "ObjectOverMaxNestingDepth", + data: `{"a":` + strings.Repeat(`{"a":`, 10000) + `0` + strings.Repeat(`}`, 10000) + `}`, + errMaxDepth: true, + }, + { + name: "ObjectOverStackDepth", + data: `{"a":` + strings.Repeat(`{"a":`, 3000000) + `0` + strings.Repeat(`}`, 3000000) + `}`, + errMaxDepth: true, + }, + } + + targets := []struct { + name string + newValue func() any + }{ + { + name: "unstructured", + newValue: func() any { + var v any + return &v + }, + }, + { + name: "typed named field", + newValue: func() any { + v := struct { + A any `json:"a"` + }{} + return &v + }, + }, + { + name: "typed missing field", + newValue: func() any { + v := struct { + B any `json:"b"` + }{} + return &v + }, + }, + { + name: "custom unmarshaler", + newValue: func() any { + v := unmarshaler{} + return &v + }, + }, + } + + for _, tc := range testcases { + for _, target := range targets { + t.Run(target.name+"-"+tc.name, func(t *testing.T) { + err := Unmarshal([]byte(tc.data), target.newValue()) + if !tc.errMaxDepth { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + } else { + if err == nil { + t.Errorf("expected error containing 'exceeded max depth', got none") + } else if !strings.Contains(err.Error(), "exceeded max depth") { + t.Errorf("expected error containing 'exceeded max depth', got: %v", err) + } + } + }) + } + } +} diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go new file mode 100644 index 0000000..6da0bd9 --- /dev/null +++ b/src/encoding/json/encode.go @@ -0,0 +1,1283 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package json implements encoding and decoding of JSON as defined in +// RFC 7159. The mapping between JSON and Go values is described +// in the documentation for the Marshal and Unmarshal functions. +// +// See "JSON and Go" for an introduction to this package: +// https://golang.org/doc/articles/json_and_go.html +package json + +import ( + "bytes" + "encoding" + "encoding/base64" + "fmt" + "math" + "reflect" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +// Marshal returns the JSON encoding of v. +// +// Marshal traverses the value v recursively. +// If an encountered value implements the Marshaler interface +// and is not a nil pointer, Marshal calls its MarshalJSON method +// to produce JSON. If no MarshalJSON method is present but the +// value implements encoding.TextMarshaler instead, Marshal calls +// its MarshalText method and encodes the result as a JSON string. +// The nil pointer exception is not strictly necessary +// but mimics a similar, necessary exception in the behavior of +// UnmarshalJSON. +// +// Otherwise, Marshal uses the following type-dependent default encodings: +// +// Boolean values encode as JSON booleans. +// +// Floating point, integer, and Number values encode as JSON numbers. +// NaN and +/-Inf values will return an [UnsupportedValueError]. +// +// String values encode as JSON strings coerced to valid UTF-8, +// replacing invalid bytes with the Unicode replacement rune. +// So that the JSON will be safe to embed inside HTML <script> tags, +// the string is encoded using HTMLEscape, +// which replaces "<", ">", "&", U+2028, and U+2029 are escaped +// to "\u003c","\u003e", "\u0026", "\u2028", and "\u2029". +// This replacement can be disabled when using an Encoder, +// by calling SetEscapeHTML(false). +// +// Array and slice values encode as JSON arrays, except that +// []byte encodes as a base64-encoded string, and a nil slice +// encodes as the null JSON value. +// +// Struct values encode as JSON objects. +// Each exported struct field becomes a member of the object, using the +// field name as the object key, unless the field is omitted for one of the +// reasons given below. +// +// The encoding of each struct field can be customized by the format string +// stored under the "json" key in the struct field's tag. +// The format string gives the name of the field, possibly followed by a +// comma-separated list of options. The name may be empty in order to +// specify options without overriding the default field name. +// +// The "omitempty" option specifies that the field should be omitted +// from the encoding if the field has an empty value, defined as +// false, 0, a nil pointer, a nil interface value, and any empty array, +// slice, map, or string. +// +// As a special case, if the field tag is "-", the field is always omitted. +// Note that a field with name "-" can still be generated using the tag "-,". +// +// Examples of struct field tags and their meanings: +// +// // Field appears in JSON as key "myName". +// Field int `json:"myName"` +// +// // Field appears in JSON as key "myName" and +// // the field is omitted from the object if its value is empty, +// // as defined above. +// Field int `json:"myName,omitempty"` +// +// // Field appears in JSON as key "Field" (the default), but +// // the field is skipped if empty. +// // Note the leading comma. +// Field int `json:",omitempty"` +// +// // Field is ignored by this package. +// Field int `json:"-"` +// +// // Field appears in JSON as key "-". +// Field int `json:"-,"` +// +// The "string" option signals that a field is stored as JSON inside a +// JSON-encoded string. It applies only to fields of string, floating point, +// integer, or boolean types. This extra level of encoding is sometimes used +// when communicating with JavaScript programs: +// +// Int64String int64 `json:",string"` +// +// The key name will be used if it's a non-empty string consisting of +// only Unicode letters, digits, and ASCII punctuation except quotation +// marks, backslash, and comma. +// +// Anonymous struct fields are usually marshaled as if their inner exported fields +// were fields in the outer struct, subject to the usual Go visibility rules amended +// as described in the next paragraph. +// An anonymous struct field with a name given in its JSON tag is treated as +// having that name, rather than being anonymous. +// An anonymous struct field of interface type is treated the same as having +// that type as its name, rather than being anonymous. +// +// The Go visibility rules for struct fields are amended for JSON when +// deciding which field to marshal or unmarshal. If there are +// multiple fields at the same level, and that level is the least +// nested (and would therefore be the nesting level selected by the +// usual Go rules), the following extra rules apply: +// +// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered, +// even if there are multiple untagged fields that would otherwise conflict. +// +// 2) If there is exactly one field (tagged or not according to the first rule), that is selected. +// +// 3) Otherwise there are multiple fields, and all are ignored; no error occurs. +// +// Handling of anonymous struct fields is new in Go 1.1. +// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of +// an anonymous struct field in both current and earlier versions, give the field +// a JSON tag of "-". +// +// Map values encode as JSON objects. The map's key type must either be a +// string, an integer type, or implement encoding.TextMarshaler. The map keys +// are sorted and used as JSON object keys by applying the following rules, +// subject to the UTF-8 coercion described for string values above: +// - keys of any string type are used directly +// - encoding.TextMarshalers are marshaled +// - integer keys are converted to strings +// +// Pointer values encode as the value pointed to. +// A nil pointer encodes as the null JSON value. +// +// Interface values encode as the value contained in the interface. +// A nil interface value encodes as the null JSON value. +// +// Channel, complex, and function values cannot be encoded in JSON. +// Attempting to encode such a value causes Marshal to return +// an UnsupportedTypeError. +// +// JSON cannot represent cyclic data structures and Marshal does not +// handle them. Passing cyclic structures to Marshal will result in +// an error. +func Marshal(v any) ([]byte, error) { + e := newEncodeState() + defer encodeStatePool.Put(e) + + err := e.marshal(v, encOpts{escapeHTML: true}) + if err != nil { + return nil, err + } + buf := append([]byte(nil), e.Bytes()...) + + return buf, nil +} + +// MarshalIndent is like Marshal but applies Indent to format the output. +// Each JSON element in the output will begin on a new line beginning with prefix +// followed by one or more copies of indent according to the indentation nesting. +func MarshalIndent(v any, prefix, indent string) ([]byte, error) { + b, err := Marshal(v) + if err != nil { + return nil, err + } + b2 := make([]byte, 0, indentGrowthFactor*len(b)) + b2, err = appendIndent(b2, b, prefix, indent) + if err != nil { + return nil, err + } + return b2, nil +} + +// Marshaler is the interface implemented by types that +// can marshal themselves into valid JSON. +type Marshaler interface { + MarshalJSON() ([]byte, error) +} + +// An UnsupportedTypeError is returned by Marshal when attempting +// to encode an unsupported value type. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "json: unsupported type: " + e.Type.String() +} + +// An UnsupportedValueError is returned by Marshal when attempting +// to encode an unsupported value. +type UnsupportedValueError struct { + Value reflect.Value + Str string +} + +func (e *UnsupportedValueError) Error() string { + return "json: unsupported value: " + e.Str +} + +// Before Go 1.2, an InvalidUTF8Error was returned by Marshal when +// attempting to encode a string value with invalid UTF-8 sequences. +// As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by +// replacing invalid bytes with the Unicode replacement rune U+FFFD. +// +// Deprecated: No longer used; kept for compatibility. +type InvalidUTF8Error struct { + S string // the whole string value that caused the error +} + +func (e *InvalidUTF8Error) Error() string { + return "json: invalid UTF-8 in string: " + strconv.Quote(e.S) +} + +// A MarshalerError represents an error from calling a MarshalJSON or MarshalText method. +type MarshalerError struct { + Type reflect.Type + Err error + sourceFunc string +} + +func (e *MarshalerError) Error() string { + srcFunc := e.sourceFunc + if srcFunc == "" { + srcFunc = "MarshalJSON" + } + return "json: error calling " + srcFunc + + " for type " + e.Type.String() + + ": " + e.Err.Error() +} + +// Unwrap returns the underlying error. +func (e *MarshalerError) Unwrap() error { return e.Err } + +var hex = "0123456789abcdef" + +// An encodeState encodes JSON into a bytes.Buffer. +type encodeState struct { + bytes.Buffer // accumulated output + + // Keep track of what pointers we've seen in the current recursive call + // path, to avoid cycles that could lead to a stack overflow. Only do + // the relatively expensive map operations if ptrLevel is larger than + // startDetectingCyclesAfter, so that we skip the work if we're within a + // reasonable amount of nested pointers deep. + ptrLevel uint + ptrSeen map[any]struct{} +} + +const startDetectingCyclesAfter = 1000 + +var encodeStatePool sync.Pool + +func newEncodeState() *encodeState { + if v := encodeStatePool.Get(); v != nil { + e := v.(*encodeState) + e.Reset() + if len(e.ptrSeen) > 0 { + panic("ptrEncoder.encode should have emptied ptrSeen via defers") + } + e.ptrLevel = 0 + return e + } + return &encodeState{ptrSeen: make(map[any]struct{})} +} + +// jsonError is an error wrapper type for internal use only. +// Panics with errors are wrapped in jsonError so that the top-level recover +// can distinguish intentional panics from this package. +type jsonError struct{ error } + +func (e *encodeState) marshal(v any, opts encOpts) (err error) { + defer func() { + if r := recover(); r != nil { + if je, ok := r.(jsonError); ok { + err = je.error + } else { + panic(r) + } + } + }() + e.reflectValue(reflect.ValueOf(v), opts) + return nil +} + +// error aborts the encoding by panicking with err wrapped in jsonError. +func (e *encodeState) error(err error) { + panic(jsonError{err}) +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool: + return v.Bool() == false + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.Interface, reflect.Pointer: + return v.IsNil() + } + return false +} + +func (e *encodeState) reflectValue(v reflect.Value, opts encOpts) { + valueEncoder(v)(e, v, opts) +} + +type encOpts struct { + // quoted causes primitive fields to be encoded inside JSON strings. + quoted bool + // escapeHTML causes '<', '>', and '&' to be escaped in JSON strings. + escapeHTML bool +} + +type encoderFunc func(e *encodeState, v reflect.Value, opts encOpts) + +var encoderCache sync.Map // map[reflect.Type]encoderFunc + +func valueEncoder(v reflect.Value) encoderFunc { + if !v.IsValid() { + return invalidValueEncoder + } + return typeEncoder(v.Type()) +} + +func typeEncoder(t reflect.Type) encoderFunc { + if fi, ok := encoderCache.Load(t); ok { + return fi.(encoderFunc) + } + + // To deal with recursive types, populate the map with an + // indirect func before we build it. This type waits on the + // real func (f) to be ready and then calls it. This indirect + // func is only used for recursive types. + var ( + wg sync.WaitGroup + f encoderFunc + ) + wg.Add(1) + fi, loaded := encoderCache.LoadOrStore(t, encoderFunc(func(e *encodeState, v reflect.Value, opts encOpts) { + wg.Wait() + f(e, v, opts) + })) + if loaded { + return fi.(encoderFunc) + } + + // Compute the real encoder and replace the indirect func with it. + f = newTypeEncoder(t, true) + wg.Done() + encoderCache.Store(t, f) + return f +} + +var ( + marshalerType = reflect.TypeOf((*Marshaler)(nil)).Elem() + textMarshalerType = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem() +) + +// newTypeEncoder constructs an encoderFunc for a type. +// The returned encoder only checks CanAddr when allowAddr is true. +func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { + // If we have a non-pointer value whose type implements + // Marshaler with a value receiver, then we're better off taking + // the address of the value - otherwise we end up with an + // allocation as we cast the value to an interface. + if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(marshalerType) { + return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) + } + if t.Implements(marshalerType) { + return marshalerEncoder + } + if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(textMarshalerType) { + return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) + } + if t.Implements(textMarshalerType) { + return textMarshalerEncoder + } + + switch t.Kind() { + case reflect.Bool: + return boolEncoder + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return intEncoder + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return uintEncoder + case reflect.Float32: + return float32Encoder + case reflect.Float64: + return float64Encoder + case reflect.String: + return stringEncoder + case reflect.Interface: + return interfaceEncoder + case reflect.Struct: + return newStructEncoder(t) + case reflect.Map: + return newMapEncoder(t) + case reflect.Slice: + return newSliceEncoder(t) + case reflect.Array: + return newArrayEncoder(t) + case reflect.Pointer: + return newPtrEncoder(t) + default: + return unsupportedTypeEncoder + } +} + +func invalidValueEncoder(e *encodeState, v reflect.Value, _ encOpts) { + e.WriteString("null") +} + +func marshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Kind() == reflect.Pointer && v.IsNil() { + e.WriteString("null") + return + } + m, ok := v.Interface().(Marshaler) + if !ok { + e.WriteString("null") + return + } + b, err := m.MarshalJSON() + if err == nil { + e.Grow(len(b)) + out := e.AvailableBuffer() + out, err = appendCompact(out, b, opts.escapeHTML) + e.Buffer.Write(out) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) + } +} + +func addrMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + va := v.Addr() + if va.IsNil() { + e.WriteString("null") + return + } + m := va.Interface().(Marshaler) + b, err := m.MarshalJSON() + if err == nil { + e.Grow(len(b)) + out := e.AvailableBuffer() + out, err = appendCompact(out, b, opts.escapeHTML) + e.Buffer.Write(out) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) + } +} + +func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Kind() == reflect.Pointer && v.IsNil() { + e.WriteString("null") + return + } + m, ok := v.Interface().(encoding.TextMarshaler) + if !ok { + e.WriteString("null") + return + } + b, err := m.MarshalText() + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalText"}) + } + e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) +} + +func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + va := v.Addr() + if va.IsNil() { + e.WriteString("null") + return + } + m := va.Interface().(encoding.TextMarshaler) + b, err := m.MarshalText() + if err != nil { + e.error(&MarshalerError{v.Type(), err, "MarshalText"}) + } + e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) +} + +func boolEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = strconv.AppendBool(b, v.Bool()) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +func intEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = strconv.AppendInt(b, v.Int(), 10) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +func uintEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = strconv.AppendUint(b, v.Uint(), 10) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +type floatEncoder int // number of bits + +func (bits floatEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + f := v.Float() + if math.IsInf(f, 0) || math.IsNaN(f) { + e.error(&UnsupportedValueError{v, strconv.FormatFloat(f, 'g', -1, int(bits))}) + } + + // Convert as if by ES6 number to string conversion. + // This matches most other JSON generators. + // See golang.org/issue/6384 and golang.org/issue/14135. + // Like fmt %g, but the exponent cutoffs are different + // and exponents themselves are not padded to two digits. + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + abs := math.Abs(f) + fmt := byte('f') + // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. + if abs != 0 { + if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { + fmt = 'e' + } + } + b = strconv.AppendFloat(b, f, fmt, -1, int(bits)) + if fmt == 'e' { + // clean up e-09 to e-9 + n := len(b) + if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' { + b[n-2] = b[n-1] + b = b[:n-1] + } + } + b = mayAppendQuote(b, opts.quoted) + e.Write(b) +} + +var ( + float32Encoder = (floatEncoder(32)).encode + float64Encoder = (floatEncoder(64)).encode +) + +func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Type() == numberType { + numStr := v.String() + // In Go1.5 the empty string encodes to "0", while this is not a valid number literal + // we keep compatibility so check validity after this. + if numStr == "" { + numStr = "0" // Number's zero-val + } + if !isValidNumber(numStr) { + e.error(fmt.Errorf("json: invalid number literal %q", numStr)) + } + b := e.AvailableBuffer() + b = mayAppendQuote(b, opts.quoted) + b = append(b, numStr...) + b = mayAppendQuote(b, opts.quoted) + e.Write(b) + return + } + if opts.quoted { + b := appendString(nil, v.String(), opts.escapeHTML) + e.Write(appendString(e.AvailableBuffer(), b, false)) // no need to escape again since it is already escaped + } else { + e.Write(appendString(e.AvailableBuffer(), v.String(), opts.escapeHTML)) + } +} + +// isValidNumber reports whether s is a valid JSON number literal. +func isValidNumber(s string) bool { + // This function implements the JSON numbers grammar. + // See https://tools.ietf.org/html/rfc7159#section-6 + // and https://www.json.org/img/number.png + + if s == "" { + return false + } + + // Optional - + if s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + + // Digits + switch { + default: + return false + + case s[0] == '0': + s = s[1:] + + case '1' <= s[0] && s[0] <= '9': + s = s[1:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // . followed by 1 or more digits. + if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { + s = s[2:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // e or E followed by an optional - or + and + // 1 or more digits. + if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { + s = s[1:] + if s[0] == '+' || s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // Make sure we are at the end. + return s == "" +} + +func interfaceEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + e.reflectValue(v.Elem(), opts) +} + +func unsupportedTypeEncoder(e *encodeState, v reflect.Value, _ encOpts) { + e.error(&UnsupportedTypeError{v.Type()}) +} + +type structEncoder struct { + fields structFields +} + +type structFields struct { + list []field + byExactName map[string]*field + byFoldedName map[string]*field +} + +func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + next := byte('{') +FieldLoop: + for i := range se.fields.list { + f := &se.fields.list[i] + + // Find the nested struct field by following f.index. + fv := v + for _, i := range f.index { + if fv.Kind() == reflect.Pointer { + if fv.IsNil() { + continue FieldLoop + } + fv = fv.Elem() + } + fv = fv.Field(i) + } + + if f.omitEmpty && isEmptyValue(fv) { + continue + } + e.WriteByte(next) + next = ',' + if opts.escapeHTML { + e.WriteString(f.nameEscHTML) + } else { + e.WriteString(f.nameNonEsc) + } + opts.quoted = f.quoted + f.encoder(e, fv, opts) + } + if next == '{' { + e.WriteString("{}") + } else { + e.WriteByte('}') + } +} + +func newStructEncoder(t reflect.Type) encoderFunc { + se := structEncoder{fields: cachedTypeFields(t)} + return se.encode +} + +type mapEncoder struct { + elemEnc encoderFunc +} + +func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + ptr := v.UnsafePointer() + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } + e.WriteByte('{') + + // Extract and sort the keys. + sv := make([]reflectWithString, v.Len()) + mi := v.MapRange() + for i := 0; mi.Next(); i++ { + sv[i].k = mi.Key() + sv[i].v = mi.Value() + if err := sv[i].resolve(); err != nil { + e.error(fmt.Errorf("json: encoding error for type %q: %q", v.Type().String(), err.Error())) + } + } + sort.Slice(sv, func(i, j int) bool { return sv[i].ks < sv[j].ks }) + + for i, kv := range sv { + if i > 0 { + e.WriteByte(',') + } + e.Write(appendString(e.AvailableBuffer(), kv.ks, opts.escapeHTML)) + e.WriteByte(':') + me.elemEnc(e, kv.v, opts) + } + e.WriteByte('}') + e.ptrLevel-- +} + +func newMapEncoder(t reflect.Type) encoderFunc { + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !t.Key().Implements(textMarshalerType) { + return unsupportedTypeEncoder + } + } + me := mapEncoder{typeEncoder(t.Elem())} + return me.encode +} + +func encodeByteSlice(e *encodeState, v reflect.Value, _ encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + s := v.Bytes() + encodedLen := base64.StdEncoding.EncodedLen(len(s)) + e.Grow(len(`"`) + encodedLen + len(`"`)) + + // TODO(https://go.dev/issue/53693): Use base64.Encoding.AppendEncode. + b := e.AvailableBuffer() + b = append(b, '"') + base64.StdEncoding.Encode(b[len(b):][:encodedLen], s) + b = b[:len(b)+encodedLen] + b = append(b, '"') + e.Write(b) +} + +// sliceEncoder just wraps an arrayEncoder, checking to make sure the value isn't nil. +type sliceEncoder struct { + arrayEnc encoderFunc +} + +func (se sliceEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + // Here we use a struct to memorize the pointer to the first element of the slice + // and its length. + ptr := struct { + ptr interface{} // always an unsafe.Pointer, but avoids a dependency on package unsafe + len int + }{v.UnsafePointer(), v.Len()} + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } + se.arrayEnc(e, v, opts) + e.ptrLevel-- +} + +func newSliceEncoder(t reflect.Type) encoderFunc { + // Byte slices get special treatment; arrays don't. + if t.Elem().Kind() == reflect.Uint8 { + p := reflect.PointerTo(t.Elem()) + if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) { + return encodeByteSlice + } + } + enc := sliceEncoder{newArrayEncoder(t)} + return enc.encode +} + +type arrayEncoder struct { + elemEnc encoderFunc +} + +func (ae arrayEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + e.WriteByte('[') + n := v.Len() + for i := 0; i < n; i++ { + if i > 0 { + e.WriteByte(',') + } + ae.elemEnc(e, v.Index(i), opts) + } + e.WriteByte(']') +} + +func newArrayEncoder(t reflect.Type) encoderFunc { + enc := arrayEncoder{typeEncoder(t.Elem())} + return enc.encode +} + +type ptrEncoder struct { + elemEnc encoderFunc +} + +func (pe ptrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { + // We're a large number of nested ptrEncoder.encode calls deep; + // start checking if we've run into a pointer cycle. + ptr := v.Interface() + if _, ok := e.ptrSeen[ptr]; ok { + e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) + } + e.ptrSeen[ptr] = struct{}{} + defer delete(e.ptrSeen, ptr) + } + pe.elemEnc(e, v.Elem(), opts) + e.ptrLevel-- +} + +func newPtrEncoder(t reflect.Type) encoderFunc { + enc := ptrEncoder{typeEncoder(t.Elem())} + return enc.encode +} + +type condAddrEncoder struct { + canAddrEnc, elseEnc encoderFunc +} + +func (ce condAddrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.CanAddr() { + ce.canAddrEnc(e, v, opts) + } else { + ce.elseEnc(e, v, opts) + } +} + +// newCondAddrEncoder returns an encoder that checks whether its value +// CanAddr and delegates to canAddrEnc if so, else to elseEnc. +func newCondAddrEncoder(canAddrEnc, elseEnc encoderFunc) encoderFunc { + enc := condAddrEncoder{canAddrEnc: canAddrEnc, elseEnc: elseEnc} + return enc.encode +} + +func isValidTag(s string) bool { + if s == "" { + return false + } + for _, c := range s { + switch { + case strings.ContainsRune("!#$%&()*+-./:;<=>?@[]^_{|}~ ", c): + // Backslash and quote chars are reserved, but + // otherwise any punctuation chars are allowed + // in a tag name. + case !unicode.IsLetter(c) && !unicode.IsDigit(c): + return false + } + } + return true +} + +func typeByIndex(t reflect.Type, index []int) reflect.Type { + for _, i := range index { + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + t = t.Field(i).Type + } + return t +} + +type reflectWithString struct { + k reflect.Value + v reflect.Value + ks string +} + +func (w *reflectWithString) resolve() error { + if w.k.Kind() == reflect.String { + w.ks = w.k.String() + return nil + } + if tm, ok := w.k.Interface().(encoding.TextMarshaler); ok { + if w.k.Kind() == reflect.Pointer && w.k.IsNil() { + return nil + } + buf, err := tm.MarshalText() + w.ks = string(buf) + return err + } + switch w.k.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + w.ks = strconv.FormatInt(w.k.Int(), 10) + return nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + w.ks = strconv.FormatUint(w.k.Uint(), 10) + return nil + } + panic("unexpected map key type") +} + +func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool) []byte { + dst = append(dst, '"') + start := 0 + for i := 0; i < len(src); { + if b := src[i]; b < utf8.RuneSelf { + if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { + i++ + continue + } + dst = append(dst, src[start:i]...) + switch b { + case '\\', '"': + dst = append(dst, '\\', b) + case '\n': + dst = append(dst, '\\', 'n') + case '\r': + dst = append(dst, '\\', 'r') + case '\t': + dst = append(dst, '\\', 't') + default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF]) + } + i++ + start = i + continue + } + // TODO(https://go.dev/issue/56948): Use generic utf8 functionality. + // For now, cast only a small portion of byte slices to a string + // so that it can be stack allocated. This slows down []byte slightly + // due to the extra copy, but keeps string performance roughly the same. + n := len(src) - i + if n > utf8.UTFMax { + n = utf8.UTFMax + } + c, size := utf8.DecodeRuneInString(string(src[i : i+n])) + if c == utf8.RuneError && size == 1 { + dst = append(dst, src[start:i]...) + dst = append(dst, `\ufffd`...) + i += size + start = i + continue + } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. + if c == '\u2028' || c == '\u2029' { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '2', '0', '2', hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + dst = append(dst, src[start:]...) + dst = append(dst, '"') + return dst +} + +// A field represents a single field found in a struct. +type field struct { + name string + nameBytes []byte // []byte(name) + + nameNonEsc string // `"` + name + `":` + nameEscHTML string // `"` + HTMLEscape(name) + `":` + + tag bool + index []int + typ reflect.Type + omitEmpty bool + quoted bool + + encoder encoderFunc +} + +// byIndex sorts field by index sequence. +type byIndex []field + +func (x byIndex) Len() int { return len(x) } + +func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (x byIndex) Less(i, j int) bool { + for k, xik := range x[i].index { + if k >= len(x[j].index) { + return false + } + if xik != x[j].index[k] { + return xik < x[j].index[k] + } + } + return len(x[i].index) < len(x[j].index) +} + +// typeFields returns a list of fields that JSON should recognize for the given type. +// The algorithm is breadth-first search over the set of structs to include - the top struct +// and then any reachable anonymous structs. +func typeFields(t reflect.Type) structFields { + // Anonymous fields to explore at the current level and the next. + current := []field{} + next := []field{{typ: t}} + + // Count of queued names for current level and the next. + var count, nextCount map[reflect.Type]int + + // Types already visited at an earlier level. + visited := map[reflect.Type]bool{} + + // Fields found. + var fields []field + + // Buffer to run appendHTMLEscape on field names. + var nameEscBuf []byte + + for len(next) > 0 { + current, next = next, current[:0] + count, nextCount = nextCount, map[reflect.Type]int{} + + for _, f := range current { + if visited[f.typ] { + continue + } + visited[f.typ] = true + + // Scan f.typ for fields to include. + for i := 0; i < f.typ.NumField(); i++ { + sf := f.typ.Field(i) + if sf.Anonymous { + t := sf.Type + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if !sf.IsExported() && t.Kind() != reflect.Struct { + // Ignore embedded fields of unexported non-struct types. + continue + } + // Do not ignore embedded fields of unexported struct types + // since they may have exported fields. + } else if !sf.IsExported() { + // Ignore unexported non-embedded fields. + continue + } + tag := sf.Tag.Get("json") + if tag == "-" { + continue + } + name, opts := parseTag(tag) + if !isValidTag(name) { + name = "" + } + index := make([]int, len(f.index)+1) + copy(index, f.index) + index[len(f.index)] = i + + ft := sf.Type + if ft.Name() == "" && ft.Kind() == reflect.Pointer { + // Follow pointer. + ft = ft.Elem() + } + + // Only strings, floats, integers, and booleans can be quoted. + quoted := false + if opts.Contains("string") { + switch ft.Kind() { + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.String: + quoted = true + } + } + + // Record found field and index sequence. + if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { + tagged := name != "" + if name == "" { + name = sf.Name + } + field := field{ + name: name, + tag: tagged, + index: index, + typ: ft, + omitEmpty: opts.Contains("omitempty"), + quoted: quoted, + } + field.nameBytes = []byte(field.name) + + // Build nameEscHTML and nameNonEsc ahead of time. + nameEscBuf = appendHTMLEscape(nameEscBuf[:0], field.nameBytes) + field.nameEscHTML = `"` + string(nameEscBuf) + `":` + field.nameNonEsc = `"` + field.name + `":` + + fields = append(fields, field) + if count[f.typ] > 1 { + // If there were multiple instances, add a second, + // so that the annihilation code will see a duplicate. + // It only cares about the distinction between 1 or 2, + // so don't bother generating any more copies. + fields = append(fields, fields[len(fields)-1]) + } + continue + } + + // Record new anonymous struct to explore in next round. + nextCount[ft]++ + if nextCount[ft] == 1 { + next = append(next, field{name: ft.Name(), index: index, typ: ft}) + } + } + } + } + + sort.Slice(fields, func(i, j int) bool { + x := fields + // sort field by name, breaking ties with depth, then + // breaking ties with "name came from json tag", then + // breaking ties with index sequence. + if x[i].name != x[j].name { + return x[i].name < x[j].name + } + if len(x[i].index) != len(x[j].index) { + return len(x[i].index) < len(x[j].index) + } + if x[i].tag != x[j].tag { + return x[i].tag + } + return byIndex(x).Less(i, j) + }) + + // Delete all fields that are hidden by the Go rules for embedded fields, + // except that fields with JSON tags are promoted. + + // The fields are sorted in primary order of name, secondary order + // of field index length. Loop over names; for each name, delete + // hidden fields by choosing the one dominant field that survives. + out := fields[:0] + for advance, i := 0, 0; i < len(fields); i += advance { + // One iteration per name. + // Find the sequence of fields with the name of this first field. + fi := fields[i] + name := fi.name + for advance = 1; i+advance < len(fields); advance++ { + fj := fields[i+advance] + if fj.name != name { + break + } + } + if advance == 1 { // Only one field with this name + out = append(out, fi) + continue + } + dominant, ok := dominantField(fields[i : i+advance]) + if ok { + out = append(out, dominant) + } + } + + fields = out + sort.Sort(byIndex(fields)) + + for i := range fields { + f := &fields[i] + f.encoder = typeEncoder(typeByIndex(t, f.index)) + } + exactNameIndex := make(map[string]*field, len(fields)) + foldedNameIndex := make(map[string]*field, len(fields)) + for i, field := range fields { + exactNameIndex[field.name] = &fields[i] + // For historical reasons, first folded match takes precedence. + if _, ok := foldedNameIndex[string(foldName(field.nameBytes))]; !ok { + foldedNameIndex[string(foldName(field.nameBytes))] = &fields[i] + } + } + return structFields{fields, exactNameIndex, foldedNameIndex} +} + +// dominantField looks through the fields, all of which are known to +// have the same name, to find the single field that dominates the +// others using Go's embedding rules, modified by the presence of +// JSON tags. If there are multiple top-level fields, the boolean +// will be false: This condition is an error in Go and we skip all +// the fields. +func dominantField(fields []field) (field, bool) { + // The fields are sorted in increasing index-length order, then by presence of tag. + // That means that the first field is the dominant one. We need only check + // for error cases: two fields at top level, either both tagged or neither tagged. + if len(fields) > 1 && len(fields[0].index) == len(fields[1].index) && fields[0].tag == fields[1].tag { + return field{}, false + } + return fields[0], true +} + +var fieldCache sync.Map // map[reflect.Type]structFields + +// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. +func cachedTypeFields(t reflect.Type) structFields { + if f, ok := fieldCache.Load(t); ok { + return f.(structFields) + } + f, _ := fieldCache.LoadOrStore(t, typeFields(t)) + return f.(structFields) +} + +func mayAppendQuote(b []byte, quoted bool) []byte { + if quoted { + b = append(b, '"') + } + return b +} diff --git a/src/encoding/json/encode_test.go b/src/encoding/json/encode_test.go new file mode 100644 index 0000000..d027972 --- /dev/null +++ b/src/encoding/json/encode_test.go @@ -0,0 +1,1190 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "encoding" + "fmt" + "log" + "math" + "reflect" + "regexp" + "runtime/debug" + "strconv" + "testing" +) + +type Optionals struct { + Sr string `json:"sr"` + So string `json:"so,omitempty"` + Sw string `json:"-"` + + Ir int `json:"omitempty"` // actually named omitempty, not an option + Io int `json:"io,omitempty"` + + Slr []string `json:"slr,random"` + Slo []string `json:"slo,omitempty"` + + Mr map[string]any `json:"mr"` + Mo map[string]any `json:",omitempty"` + + Fr float64 `json:"fr"` + Fo float64 `json:"fo,omitempty"` + + Br bool `json:"br"` + Bo bool `json:"bo,omitempty"` + + Ur uint `json:"ur"` + Uo uint `json:"uo,omitempty"` + + Str struct{} `json:"str"` + Sto struct{} `json:"sto,omitempty"` +} + +var optionalsExpected = `{ + "sr": "", + "omitempty": 0, + "slr": null, + "mr": {}, + "fr": 0, + "br": false, + "ur": 0, + "str": {}, + "sto": {} +}` + +func TestOmitEmpty(t *testing.T) { + var o Optionals + o.Sw = "something" + o.Mr = map[string]any{} + o.Mo = map[string]any{} + + got, err := MarshalIndent(&o, "", " ") + if err != nil { + t.Fatal(err) + } + if got := string(got); got != optionalsExpected { + t.Errorf(" got: %s\nwant: %s\n", got, optionalsExpected) + } +} + +type StringTag struct { + BoolStr bool `json:",string"` + IntStr int64 `json:",string"` + UintptrStr uintptr `json:",string"` + StrStr string `json:",string"` + NumberStr Number `json:",string"` +} + +func TestRoundtripStringTag(t *testing.T) { + tests := []struct { + name string + in StringTag + want string // empty to just test that we roundtrip + }{ + { + name: "AllTypes", + in: StringTag{ + BoolStr: true, + IntStr: 42, + UintptrStr: 44, + StrStr: "xzbit", + NumberStr: "46", + }, + want: `{ + "BoolStr": "true", + "IntStr": "42", + "UintptrStr": "44", + "StrStr": "\"xzbit\"", + "NumberStr": "46" + }`, + }, + { + // See golang.org/issues/38173. + name: "StringDoubleEscapes", + in: StringTag{ + StrStr: "\b\f\n\r\t\"\\", + NumberStr: "0", // just to satisfy the roundtrip + }, + want: `{ + "BoolStr": "false", + "IntStr": "0", + "UintptrStr": "0", + "StrStr": "\"\\u0008\\u000c\\n\\r\\t\\\"\\\\\"", + "NumberStr": "0" + }`, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Indent with a tab prefix to make the multi-line string + // literals in the table nicer to read. + got, err := MarshalIndent(&test.in, "\t\t\t", "\t") + if err != nil { + t.Fatal(err) + } + if got := string(got); got != test.want { + t.Fatalf(" got: %s\nwant: %s\n", got, test.want) + } + + // Verify that it round-trips. + var s2 StringTag + if err := Unmarshal(got, &s2); err != nil { + t.Fatalf("Decode: %v", err) + } + if !reflect.DeepEqual(test.in, s2) { + t.Fatalf("decode didn't match.\nsource: %#v\nEncoded as:\n%s\ndecode: %#v", test.in, string(got), s2) + } + }) + } +} + +// byte slices are special even if they're renamed types. +type renamedByte byte +type renamedByteSlice []byte +type renamedRenamedByteSlice []renamedByte + +func TestEncodeRenamedByteSlice(t *testing.T) { + s := renamedByteSlice("abc") + result, err := Marshal(s) + if err != nil { + t.Fatal(err) + } + expect := `"YWJj"` + if string(result) != expect { + t.Errorf(" got %s want %s", result, expect) + } + r := renamedRenamedByteSlice("abc") + result, err = Marshal(r) + if err != nil { + t.Fatal(err) + } + if string(result) != expect { + t.Errorf(" got %s want %s", result, expect) + } +} + +type SamePointerNoCycle struct { + Ptr1, Ptr2 *SamePointerNoCycle +} + +var samePointerNoCycle = &SamePointerNoCycle{} + +type PointerCycle struct { + Ptr *PointerCycle +} + +var pointerCycle = &PointerCycle{} + +type PointerCycleIndirect struct { + Ptrs []any +} + +type RecursiveSlice []RecursiveSlice + +var ( + pointerCycleIndirect = &PointerCycleIndirect{} + mapCycle = make(map[string]any) + sliceCycle = []any{nil} + sliceNoCycle = []any{nil, nil} + recursiveSliceCycle = []RecursiveSlice{nil} +) + +func init() { + ptr := &SamePointerNoCycle{} + samePointerNoCycle.Ptr1 = ptr + samePointerNoCycle.Ptr2 = ptr + + pointerCycle.Ptr = pointerCycle + pointerCycleIndirect.Ptrs = []any{pointerCycleIndirect} + + mapCycle["x"] = mapCycle + sliceCycle[0] = sliceCycle + sliceNoCycle[1] = sliceNoCycle[:1] + for i := startDetectingCyclesAfter; i > 0; i-- { + sliceNoCycle = []any{sliceNoCycle} + } + recursiveSliceCycle[0] = recursiveSliceCycle +} + +func TestSamePointerNoCycle(t *testing.T) { + if _, err := Marshal(samePointerNoCycle); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestSliceNoCycle(t *testing.T) { + if _, err := Marshal(sliceNoCycle); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +var unsupportedValues = []any{ + math.NaN(), + math.Inf(-1), + math.Inf(1), + pointerCycle, + pointerCycleIndirect, + mapCycle, + sliceCycle, + recursiveSliceCycle, +} + +func TestUnsupportedValues(t *testing.T) { + for _, v := range unsupportedValues { + if _, err := Marshal(v); err != nil { + if _, ok := err.(*UnsupportedValueError); !ok { + t.Errorf("for %v, got %T want UnsupportedValueError", v, err) + } + } else { + t.Errorf("for %v, expected error", v) + } + } +} + +// Issue 43207 +func TestMarshalTextFloatMap(t *testing.T) { + m := map[textfloat]string{ + textfloat(math.NaN()): "1", + textfloat(math.NaN()): "1", + } + got, err := Marshal(m) + if err != nil { + t.Errorf("Marshal() error: %v", err) + } + want := `{"TF:NaN":"1","TF:NaN":"1"}` + if string(got) != want { + t.Errorf("Marshal() = %s, want %s", got, want) + } +} + +// Ref has Marshaler and Unmarshaler methods with pointer receiver. +type Ref int + +func (*Ref) MarshalJSON() ([]byte, error) { + return []byte(`"ref"`), nil +} + +func (r *Ref) UnmarshalJSON([]byte) error { + *r = 12 + return nil +} + +// Val has Marshaler methods with value receiver. +type Val int + +func (Val) MarshalJSON() ([]byte, error) { + return []byte(`"val"`), nil +} + +// RefText has Marshaler and Unmarshaler methods with pointer receiver. +type RefText int + +func (*RefText) MarshalText() ([]byte, error) { + return []byte(`"ref"`), nil +} + +func (r *RefText) UnmarshalText([]byte) error { + *r = 13 + return nil +} + +// ValText has Marshaler methods with value receiver. +type ValText int + +func (ValText) MarshalText() ([]byte, error) { + return []byte(`"val"`), nil +} + +func TestRefValMarshal(t *testing.T) { + var s = struct { + R0 Ref + R1 *Ref + R2 RefText + R3 *RefText + V0 Val + V1 *Val + V2 ValText + V3 *ValText + }{ + R0: 12, + R1: new(Ref), + R2: 14, + R3: new(RefText), + V0: 13, + V1: new(Val), + V2: 15, + V3: new(ValText), + } + const want = `{"R0":"ref","R1":"ref","R2":"\"ref\"","R3":"\"ref\"","V0":"val","V1":"val","V2":"\"val\"","V3":"\"val\""}` + b, err := Marshal(&s) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if got := string(b); got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +// C implements Marshaler and returns unescaped JSON. +type C int + +func (C) MarshalJSON() ([]byte, error) { + return []byte(`"<&>"`), nil +} + +// CText implements Marshaler and returns unescaped text. +type CText int + +func (CText) MarshalText() ([]byte, error) { + return []byte(`"<&>"`), nil +} + +func TestMarshalerEscaping(t *testing.T) { + var c C + want := `"\u003c\u0026\u003e"` + b, err := Marshal(c) + if err != nil { + t.Fatalf("Marshal(c): %v", err) + } + if got := string(b); got != want { + t.Errorf("Marshal(c) = %#q, want %#q", got, want) + } + + var ct CText + want = `"\"\u003c\u0026\u003e\""` + b, err = Marshal(ct) + if err != nil { + t.Fatalf("Marshal(ct): %v", err) + } + if got := string(b); got != want { + t.Errorf("Marshal(ct) = %#q, want %#q", got, want) + } +} + +func TestAnonymousFields(t *testing.T) { + tests := []struct { + label string // Test name + makeInput func() any // Function to create input value + want string // Expected JSON output + }{{ + // Both S1 and S2 have a field named X. From the perspective of S, + // it is ambiguous which one X refers to. + // This should not serialize either field. + label: "AmbiguousField", + makeInput: func() any { + type ( + S1 struct{ x, X int } + S2 struct{ x, X int } + S struct { + S1 + S2 + } + ) + return S{S1{1, 2}, S2{3, 4}} + }, + want: `{}`, + }, { + label: "DominantField", + // Both S1 and S2 have a field named X, but since S has an X field as + // well, it takes precedence over S1.X and S2.X. + makeInput: func() any { + type ( + S1 struct{ x, X int } + S2 struct{ x, X int } + S struct { + S1 + S2 + x, X int + } + ) + return S{S1{1, 2}, S2{3, 4}, 5, 6} + }, + want: `{"X":6}`, + }, { + // Unexported embedded field of non-struct type should not be serialized. + label: "UnexportedEmbeddedInt", + makeInput: func() any { + type ( + myInt int + S struct{ myInt } + ) + return S{5} + }, + want: `{}`, + }, { + // Exported embedded field of non-struct type should be serialized. + label: "ExportedEmbeddedInt", + makeInput: func() any { + type ( + MyInt int + S struct{ MyInt } + ) + return S{5} + }, + want: `{"MyInt":5}`, + }, { + // Unexported embedded field of pointer to non-struct type + // should not be serialized. + label: "UnexportedEmbeddedIntPointer", + makeInput: func() any { + type ( + myInt int + S struct{ *myInt } + ) + s := S{new(myInt)} + *s.myInt = 5 + return s + }, + want: `{}`, + }, { + // Exported embedded field of pointer to non-struct type + // should be serialized. + label: "ExportedEmbeddedIntPointer", + makeInput: func() any { + type ( + MyInt int + S struct{ *MyInt } + ) + s := S{new(MyInt)} + *s.MyInt = 5 + return s + }, + want: `{"MyInt":5}`, + }, { + // Exported fields of embedded structs should have their + // exported fields be serialized regardless of whether the struct types + // themselves are exported. + label: "EmbeddedStruct", + makeInput: func() any { + type ( + s1 struct{ x, X int } + S2 struct{ y, Y int } + S struct { + s1 + S2 + } + ) + return S{s1{1, 2}, S2{3, 4}} + }, + want: `{"X":2,"Y":4}`, + }, { + // Exported fields of pointers to embedded structs should have their + // exported fields be serialized regardless of whether the struct types + // themselves are exported. + label: "EmbeddedStructPointer", + makeInput: func() any { + type ( + s1 struct{ x, X int } + S2 struct{ y, Y int } + S struct { + *s1 + *S2 + } + ) + return S{&s1{1, 2}, &S2{3, 4}} + }, + want: `{"X":2,"Y":4}`, + }, { + // Exported fields on embedded unexported structs at multiple levels + // of nesting should still be serialized. + label: "NestedStructAndInts", + makeInput: func() any { + type ( + MyInt1 int + MyInt2 int + myInt int + s2 struct { + MyInt2 + myInt + } + s1 struct { + MyInt1 + myInt + s2 + } + S struct { + s1 + myInt + } + ) + return S{s1{1, 2, s2{3, 4}}, 6} + }, + want: `{"MyInt1":1,"MyInt2":3}`, + }, { + // If an anonymous struct pointer field is nil, we should ignore + // the embedded fields behind it. Not properly doing so may + // result in the wrong output or reflect panics. + label: "EmbeddedFieldBehindNilPointer", + makeInput: func() any { + type ( + S2 struct{ Field string } + S struct{ *S2 } + ) + return S{} + }, + want: `{}`, + }} + + for _, tt := range tests { + t.Run(tt.label, func(t *testing.T) { + b, err := Marshal(tt.makeInput()) + if err != nil { + t.Fatalf("Marshal() = %v, want nil error", err) + } + if string(b) != tt.want { + t.Fatalf("Marshal() = %q, want %q", b, tt.want) + } + }) + } +} + +type BugA struct { + S string +} + +type BugB struct { + BugA + S string +} + +type BugC struct { + S string +} + +// Legal Go: We never use the repeated embedded field (S). +type BugX struct { + A int + BugA + BugB +} + +// golang.org/issue/16042. +// Even if a nil interface value is passed in, as long as +// it implements Marshaler, it should be marshaled. +type nilJSONMarshaler string + +func (nm *nilJSONMarshaler) MarshalJSON() ([]byte, error) { + if nm == nil { + return Marshal("0zenil0") + } + return Marshal("zenil:" + string(*nm)) +} + +// golang.org/issue/34235. +// Even if a nil interface value is passed in, as long as +// it implements encoding.TextMarshaler, it should be marshaled. +type nilTextMarshaler string + +func (nm *nilTextMarshaler) MarshalText() ([]byte, error) { + if nm == nil { + return []byte("0zenil0"), nil + } + return []byte("zenil:" + string(*nm)), nil +} + +// See golang.org/issue/16042 and golang.org/issue/34235. +func TestNilMarshal(t *testing.T) { + testCases := []struct { + v any + want string + }{ + {v: nil, want: `null`}, + {v: new(float64), want: `0`}, + {v: []any(nil), want: `null`}, + {v: []string(nil), want: `null`}, + {v: map[string]string(nil), want: `null`}, + {v: []byte(nil), want: `null`}, + {v: struct{ M string }{"gopher"}, want: `{"M":"gopher"}`}, + {v: struct{ M Marshaler }{}, want: `{"M":null}`}, + {v: struct{ M Marshaler }{(*nilJSONMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, + {v: struct{ M any }{(*nilJSONMarshaler)(nil)}, want: `{"M":null}`}, + {v: struct{ M encoding.TextMarshaler }{}, want: `{"M":null}`}, + {v: struct{ M encoding.TextMarshaler }{(*nilTextMarshaler)(nil)}, want: `{"M":"0zenil0"}`}, + {v: struct{ M any }{(*nilTextMarshaler)(nil)}, want: `{"M":null}`}, + } + + for _, tt := range testCases { + out, err := Marshal(tt.v) + if err != nil || string(out) != tt.want { + t.Errorf("Marshal(%#v) = %#q, %#v, want %#q, nil", tt.v, out, err, tt.want) + continue + } + } +} + +// Issue 5245. +func TestEmbeddedBug(t *testing.T) { + v := BugB{ + BugA{"A"}, + "B", + } + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal:", err) + } + want := `{"S":"B"}` + got := string(b) + if got != want { + t.Fatalf("Marshal: got %s want %s", got, want) + } + // Now check that the duplicate field, S, does not appear. + x := BugX{ + A: 23, + } + b, err = Marshal(x) + if err != nil { + t.Fatal("Marshal:", err) + } + want = `{"A":23}` + got = string(b) + if got != want { + t.Fatalf("Marshal: got %s want %s", got, want) + } +} + +type BugD struct { // Same as BugA after tagging. + XXX string `json:"S"` +} + +// BugD's tagged S field should dominate BugA's. +type BugY struct { + BugA + BugD +} + +// Test that a field with a tag dominates untagged fields. +func TestTaggedFieldDominates(t *testing.T) { + v := BugY{ + BugA{"BugA"}, + BugD{"BugD"}, + } + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal:", err) + } + want := `{"S":"BugD"}` + got := string(b) + if got != want { + t.Fatalf("Marshal: got %s want %s", got, want) + } +} + +// There are no tags here, so S should not appear. +type BugZ struct { + BugA + BugC + BugY // Contains a tagged S field through BugD; should not dominate. +} + +func TestDuplicatedFieldDisappears(t *testing.T) { + v := BugZ{ + BugA{"BugA"}, + BugC{"BugC"}, + BugY{ + BugA{"nested BugA"}, + BugD{"nested BugD"}, + }, + } + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal:", err) + } + want := `{}` + got := string(b) + if got != want { + t.Fatalf("Marshal: got %s want %s", got, want) + } +} + +func TestIssue10281(t *testing.T) { + type Foo struct { + N Number + } + x := Foo{Number(`invalid`)} + + b, err := Marshal(&x) + if err == nil { + t.Errorf("Marshal(&x) = %#q; want error", b) + } +} + +func TestMarshalErrorAndReuseEncodeState(t *testing.T) { + // Disable the GC temporarily to prevent encodeState's in Pool being cleaned away during the test. + percent := debug.SetGCPercent(-1) + defer debug.SetGCPercent(percent) + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + if b, err := Marshal(dummy); err == nil { + t.Errorf("Marshal(dummy) = %#q; want error", b) + } + + type Data struct { + A string + I int + } + data := Data{A: "a", I: 1} + b, err := Marshal(data) + if err != nil { + t.Errorf("Marshal(%v) = %v", data, err) + } + + var data2 Data + if err := Unmarshal(b, &data2); err != nil { + t.Errorf("Unmarshal(%v) = %v", data2, err) + } + if data2 != data { + t.Errorf("expect: %v, but get: %v", data, data2) + } +} + +func TestHTMLEscape(t *testing.T) { + var b, want bytes.Buffer + m := `{"M":"<html>foo &` + "\xe2\x80\xa8 \xe2\x80\xa9" + `</html>"}` + want.Write([]byte(`{"M":"\u003chtml\u003efoo \u0026\u2028 \u2029\u003c/html\u003e"}`)) + HTMLEscape(&b, []byte(m)) + if !bytes.Equal(b.Bytes(), want.Bytes()) { + t.Errorf("HTMLEscape(&b, []byte(m)) = %s; want %s", b.Bytes(), want.Bytes()) + } +} + +// golang.org/issue/8582 +func TestEncodePointerString(t *testing.T) { + type stringPointer struct { + N *int64 `json:"n,string"` + } + var n int64 = 42 + b, err := Marshal(stringPointer{N: &n}) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if got, want := string(b), `{"n":"42"}`; got != want { + t.Errorf("Marshal = %s, want %s", got, want) + } + var back stringPointer + err = Unmarshal(b, &back) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if back.N == nil { + t.Fatalf("Unmarshaled nil N field") + } + if *back.N != 42 { + t.Fatalf("*N = %d; want 42", *back.N) + } +} + +var encodeStringTests = []struct { + in string + out string +}{ + {"\x00", `"\u0000"`}, + {"\x01", `"\u0001"`}, + {"\x02", `"\u0002"`}, + {"\x03", `"\u0003"`}, + {"\x04", `"\u0004"`}, + {"\x05", `"\u0005"`}, + {"\x06", `"\u0006"`}, + {"\x07", `"\u0007"`}, + {"\x08", `"\u0008"`}, + {"\x09", `"\t"`}, + {"\x0a", `"\n"`}, + {"\x0b", `"\u000b"`}, + {"\x0c", `"\u000c"`}, + {"\x0d", `"\r"`}, + {"\x0e", `"\u000e"`}, + {"\x0f", `"\u000f"`}, + {"\x10", `"\u0010"`}, + {"\x11", `"\u0011"`}, + {"\x12", `"\u0012"`}, + {"\x13", `"\u0013"`}, + {"\x14", `"\u0014"`}, + {"\x15", `"\u0015"`}, + {"\x16", `"\u0016"`}, + {"\x17", `"\u0017"`}, + {"\x18", `"\u0018"`}, + {"\x19", `"\u0019"`}, + {"\x1a", `"\u001a"`}, + {"\x1b", `"\u001b"`}, + {"\x1c", `"\u001c"`}, + {"\x1d", `"\u001d"`}, + {"\x1e", `"\u001e"`}, + {"\x1f", `"\u001f"`}, +} + +func TestEncodeString(t *testing.T) { + for _, tt := range encodeStringTests { + b, err := Marshal(tt.in) + if err != nil { + t.Errorf("Marshal(%q): %v", tt.in, err) + continue + } + out := string(b) + if out != tt.out { + t.Errorf("Marshal(%q) = %#q, want %#q", tt.in, out, tt.out) + } + } +} + +type jsonbyte byte + +func (b jsonbyte) MarshalJSON() ([]byte, error) { return tenc(`{"JB":%d}`, b) } + +type textbyte byte + +func (b textbyte) MarshalText() ([]byte, error) { return tenc(`TB:%d`, b) } + +type jsonint int + +func (i jsonint) MarshalJSON() ([]byte, error) { return tenc(`{"JI":%d}`, i) } + +type textint int + +func (i textint) MarshalText() ([]byte, error) { return tenc(`TI:%d`, i) } + +func tenc(format string, a ...any) ([]byte, error) { + var buf bytes.Buffer + fmt.Fprintf(&buf, format, a...) + return buf.Bytes(), nil +} + +type textfloat float64 + +func (f textfloat) MarshalText() ([]byte, error) { return tenc(`TF:%0.2f`, f) } + +// Issue 13783 +func TestEncodeBytekind(t *testing.T) { + testdata := []struct { + data any + want string + }{ + {byte(7), "7"}, + {jsonbyte(7), `{"JB":7}`}, + {textbyte(4), `"TB:4"`}, + {jsonint(5), `{"JI":5}`}, + {textint(1), `"TI:1"`}, + {[]byte{0, 1}, `"AAE="`}, + {[]jsonbyte{0, 1}, `[{"JB":0},{"JB":1}]`}, + {[][]jsonbyte{{0, 1}, {3}}, `[[{"JB":0},{"JB":1}],[{"JB":3}]]`}, + {[]textbyte{2, 3}, `["TB:2","TB:3"]`}, + {[]jsonint{5, 4}, `[{"JI":5},{"JI":4}]`}, + {[]textint{9, 3}, `["TI:9","TI:3"]`}, + {[]int{9, 3}, `[9,3]`}, + {[]textfloat{12, 3}, `["TF:12.00","TF:3.00"]`}, + } + for _, d := range testdata { + js, err := Marshal(d.data) + if err != nil { + t.Error(err) + continue + } + got, want := string(js), d.want + if got != want { + t.Errorf("got %s, want %s", got, want) + } + } +} + +func TestTextMarshalerMapKeysAreSorted(t *testing.T) { + b, err := Marshal(map[unmarshalerText]int{ + {"x", "y"}: 1, + {"y", "x"}: 2, + {"a", "z"}: 3, + {"z", "a"}: 4, + }) + if err != nil { + t.Fatalf("Failed to Marshal text.Marshaler: %v", err) + } + const want = `{"a:z":3,"x:y":1,"y:x":2,"z:a":4}` + if string(b) != want { + t.Errorf("Marshal map with text.Marshaler keys: got %#q, want %#q", b, want) + } +} + +// https://golang.org/issue/33675 +func TestNilMarshalerTextMapKey(t *testing.T) { + b, err := Marshal(map[*unmarshalerText]int{ + (*unmarshalerText)(nil): 1, + {"A", "B"}: 2, + }) + if err != nil { + t.Fatalf("Failed to Marshal *text.Marshaler: %v", err) + } + const want = `{"":1,"A:B":2}` + if string(b) != want { + t.Errorf("Marshal map with *text.Marshaler keys: got %#q, want %#q", b, want) + } +} + +var re = regexp.MustCompile + +// syntactic checks on form of marshaled floating point numbers. +var badFloatREs = []*regexp.Regexp{ + re(`p`), // no binary exponential notation + re(`^\+`), // no leading + sign + re(`^-?0[^.]`), // no unnecessary leading zeros + re(`^-?\.`), // leading zero required before decimal point + re(`\.(e|$)`), // no trailing decimal + re(`\.[0-9]+0(e|$)`), // no trailing zero in fraction + re(`^-?(0|[0-9]{2,})\..*e`), // exponential notation must have normalized mantissa + re(`e[0-9]`), // positive exponent must be signed + re(`e[+-]0`), // exponent must not have leading zeros + re(`e-[1-6]$`), // not tiny enough for exponential notation + re(`e+(.|1.|20)$`), // not big enough for exponential notation + re(`^-?0\.0000000`), // too tiny, should use exponential notation + re(`^-?[0-9]{22}`), // too big, should use exponential notation + re(`[1-9][0-9]{16}[1-9]`), // too many significant digits in integer + re(`[1-9][0-9.]{17}[1-9]`), // too many significant digits in decimal + // below here for float32 only + re(`[1-9][0-9]{8}[1-9]`), // too many significant digits in integer + re(`[1-9][0-9.]{9}[1-9]`), // too many significant digits in decimal +} + +func TestMarshalFloat(t *testing.T) { + t.Parallel() + nfail := 0 + test := func(f float64, bits int) { + vf := any(f) + if bits == 32 { + f = float64(float32(f)) // round + vf = float32(f) + } + bout, err := Marshal(vf) + if err != nil { + t.Errorf("Marshal(%T(%g)): %v", vf, vf, err) + nfail++ + return + } + out := string(bout) + + // result must convert back to the same float + g, err := strconv.ParseFloat(out, bits) + if err != nil { + t.Errorf("Marshal(%T(%g)) = %q, cannot parse back: %v", vf, vf, out, err) + nfail++ + return + } + if f != g || fmt.Sprint(f) != fmt.Sprint(g) { // fmt.Sprint handles ±0 + t.Errorf("Marshal(%T(%g)) = %q (is %g, not %g)", vf, vf, out, float32(g), vf) + nfail++ + return + } + + bad := badFloatREs + if bits == 64 { + bad = bad[:len(bad)-2] + } + for _, re := range bad { + if re.MatchString(out) { + t.Errorf("Marshal(%T(%g)) = %q, must not match /%s/", vf, vf, out, re) + nfail++ + return + } + } + } + + var ( + bigger = math.Inf(+1) + smaller = math.Inf(-1) + ) + + var digits = "1.2345678901234567890123" + for i := len(digits); i >= 2; i-- { + if testing.Short() && i < len(digits)-4 { + break + } + for exp := -30; exp <= 30; exp++ { + for _, sign := range "+-" { + for bits := 32; bits <= 64; bits += 32 { + s := fmt.Sprintf("%c%se%d", sign, digits[:i], exp) + f, err := strconv.ParseFloat(s, bits) + if err != nil { + log.Fatal(err) + } + next := math.Nextafter + if bits == 32 { + next = func(g, h float64) float64 { + return float64(math.Nextafter32(float32(g), float32(h))) + } + } + test(f, bits) + test(next(f, bigger), bits) + test(next(f, smaller), bits) + if nfail > 50 { + t.Fatalf("stopping test early") + } + } + } + } + } + test(0, 64) + test(math.Copysign(0, -1), 64) + test(0, 32) + test(math.Copysign(0, -1), 32) +} + +func TestMarshalRawMessageValue(t *testing.T) { + type ( + T1 struct { + M RawMessage `json:",omitempty"` + } + T2 struct { + M *RawMessage `json:",omitempty"` + } + ) + + var ( + rawNil = RawMessage(nil) + rawEmpty = RawMessage([]byte{}) + rawText = RawMessage([]byte(`"foo"`)) + ) + + tests := []struct { + in any + want string + ok bool + }{ + // Test with nil RawMessage. + {rawNil, "null", true}, + {&rawNil, "null", true}, + {[]any{rawNil}, "[null]", true}, + {&[]any{rawNil}, "[null]", true}, + {[]any{&rawNil}, "[null]", true}, + {&[]any{&rawNil}, "[null]", true}, + {struct{ M RawMessage }{rawNil}, `{"M":null}`, true}, + {&struct{ M RawMessage }{rawNil}, `{"M":null}`, true}, + {struct{ M *RawMessage }{&rawNil}, `{"M":null}`, true}, + {&struct{ M *RawMessage }{&rawNil}, `{"M":null}`, true}, + {map[string]any{"M": rawNil}, `{"M":null}`, true}, + {&map[string]any{"M": rawNil}, `{"M":null}`, true}, + {map[string]any{"M": &rawNil}, `{"M":null}`, true}, + {&map[string]any{"M": &rawNil}, `{"M":null}`, true}, + {T1{rawNil}, "{}", true}, + {T2{&rawNil}, `{"M":null}`, true}, + {&T1{rawNil}, "{}", true}, + {&T2{&rawNil}, `{"M":null}`, true}, + + // Test with empty, but non-nil, RawMessage. + {rawEmpty, "", false}, + {&rawEmpty, "", false}, + {[]any{rawEmpty}, "", false}, + {&[]any{rawEmpty}, "", false}, + {[]any{&rawEmpty}, "", false}, + {&[]any{&rawEmpty}, "", false}, + {struct{ X RawMessage }{rawEmpty}, "", false}, + {&struct{ X RawMessage }{rawEmpty}, "", false}, + {struct{ X *RawMessage }{&rawEmpty}, "", false}, + {&struct{ X *RawMessage }{&rawEmpty}, "", false}, + {map[string]any{"nil": rawEmpty}, "", false}, + {&map[string]any{"nil": rawEmpty}, "", false}, + {map[string]any{"nil": &rawEmpty}, "", false}, + {&map[string]any{"nil": &rawEmpty}, "", false}, + {T1{rawEmpty}, "{}", true}, + {T2{&rawEmpty}, "", false}, + {&T1{rawEmpty}, "{}", true}, + {&T2{&rawEmpty}, "", false}, + + // Test with RawMessage with some text. + // + // The tests below marked with Issue6458 used to generate "ImZvbyI=" instead "foo". + // This behavior was intentionally changed in Go 1.8. + // See https://golang.org/issues/14493#issuecomment-255857318 + {rawText, `"foo"`, true}, // Issue6458 + {&rawText, `"foo"`, true}, + {[]any{rawText}, `["foo"]`, true}, // Issue6458 + {&[]any{rawText}, `["foo"]`, true}, // Issue6458 + {[]any{&rawText}, `["foo"]`, true}, + {&[]any{&rawText}, `["foo"]`, true}, + {struct{ M RawMessage }{rawText}, `{"M":"foo"}`, true}, // Issue6458 + {&struct{ M RawMessage }{rawText}, `{"M":"foo"}`, true}, + {struct{ M *RawMessage }{&rawText}, `{"M":"foo"}`, true}, + {&struct{ M *RawMessage }{&rawText}, `{"M":"foo"}`, true}, + {map[string]any{"M": rawText}, `{"M":"foo"}`, true}, // Issue6458 + {&map[string]any{"M": rawText}, `{"M":"foo"}`, true}, // Issue6458 + {map[string]any{"M": &rawText}, `{"M":"foo"}`, true}, + {&map[string]any{"M": &rawText}, `{"M":"foo"}`, true}, + {T1{rawText}, `{"M":"foo"}`, true}, // Issue6458 + {T2{&rawText}, `{"M":"foo"}`, true}, + {&T1{rawText}, `{"M":"foo"}`, true}, + {&T2{&rawText}, `{"M":"foo"}`, true}, + } + + for i, tt := range tests { + b, err := Marshal(tt.in) + if ok := (err == nil); ok != tt.ok { + if err != nil { + t.Errorf("test %d, unexpected failure: %v", i, err) + } else { + t.Errorf("test %d, unexpected success", i) + } + } + if got := string(b); got != tt.want { + t.Errorf("test %d, Marshal(%#v) = %q, want %q", i, tt.in, got, tt.want) + } + } +} + +type marshalPanic struct{} + +func (marshalPanic) MarshalJSON() ([]byte, error) { panic(0xdead) } + +func TestMarshalPanic(t *testing.T) { + defer func() { + if got := recover(); !reflect.DeepEqual(got, 0xdead) { + t.Errorf("panic() = (%T)(%v), want 0xdead", got, got) + } + }() + Marshal(&marshalPanic{}) + t.Error("Marshal should have panicked") +} + +func TestMarshalUncommonFieldNames(t *testing.T) { + v := struct { + A0, À, Aβ int + }{} + b, err := Marshal(v) + if err != nil { + t.Fatal("Marshal:", err) + } + want := `{"A0":0,"À":0,"Aβ":0}` + got := string(b) + if got != want { + t.Fatalf("Marshal: got %s want %s", got, want) + } +} + +func TestMarshalerError(t *testing.T) { + s := "test variable" + st := reflect.TypeOf(s) + errText := "json: test error" + + tests := []struct { + err *MarshalerError + want string + }{ + { + &MarshalerError{st, fmt.Errorf(errText), ""}, + "json: error calling MarshalJSON for type " + st.String() + ": " + errText, + }, + { + &MarshalerError{st, fmt.Errorf(errText), "TestMarshalerError"}, + "json: error calling TestMarshalerError for type " + st.String() + ": " + errText, + }, + } + + for i, tt := range tests { + got := tt.err.Error() + if got != tt.want { + t.Errorf("MarshalerError test %d, got: %s, want: %s", i, got, tt.want) + } + } +} diff --git a/src/encoding/json/example_marshaling_test.go b/src/encoding/json/example_marshaling_test.go new file mode 100644 index 0000000..7f15c74 --- /dev/null +++ b/src/encoding/json/example_marshaling_test.go @@ -0,0 +1,73 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "encoding/json" + "fmt" + "log" + "strings" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalJSON() ([]byte, error) { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + + return json.Marshal(s) +} + +func Example_customMarshalJSON() { + blob := `["gopher","armadillo","zebra","unknown","gopher","bee","gopher","zebra"]` + var zoo []Animal + if err := json.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/src/encoding/json/example_test.go b/src/encoding/json/example_test.go new file mode 100644 index 0000000..2261c77 --- /dev/null +++ b/src/encoding/json/example_test.go @@ -0,0 +1,310 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "log" + "os" + "strings" +) + +func ExampleMarshal() { + type ColorGroup struct { + ID int + Name string + Colors []string + } + group := ColorGroup{ + ID: 1, + Name: "Reds", + Colors: []string{"Crimson", "Red", "Ruby", "Maroon"}, + } + b, err := json.Marshal(group) + if err != nil { + fmt.Println("error:", err) + } + os.Stdout.Write(b) + // Output: + // {"ID":1,"Name":"Reds","Colors":["Crimson","Red","Ruby","Maroon"]} +} + +func ExampleUnmarshal() { + var jsonBlob = []byte(`[ + {"Name": "Platypus", "Order": "Monotremata"}, + {"Name": "Quoll", "Order": "Dasyuromorphia"} +]`) + type Animal struct { + Name string + Order string + } + var animals []Animal + err := json.Unmarshal(jsonBlob, &animals) + if err != nil { + fmt.Println("error:", err) + } + fmt.Printf("%+v", animals) + // Output: + // [{Name:Platypus Order:Monotremata} {Name:Quoll Order:Dasyuromorphia}] +} + +// This example uses a Decoder to decode a stream of distinct JSON values. +func ExampleDecoder() { + const jsonStream = ` + {"Name": "Ed", "Text": "Knock knock."} + {"Name": "Sam", "Text": "Who's there?"} + {"Name": "Ed", "Text": "Go fmt."} + {"Name": "Sam", "Text": "Go fmt who?"} + {"Name": "Ed", "Text": "Go fmt yourself!"} +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(jsonStream)) + for { + var m Message + if err := dec.Decode(&m); err == io.EOF { + break + } else if err != nil { + log.Fatal(err) + } + fmt.Printf("%s: %s\n", m.Name, m.Text) + } + // Output: + // Ed: Knock knock. + // Sam: Who's there? + // Ed: Go fmt. + // Sam: Go fmt who? + // Ed: Go fmt yourself! +} + +// This example uses a Decoder to decode a stream of distinct JSON values. +func ExampleDecoder_Token() { + const jsonStream = ` + {"Message": "Hello", "Array": [1, 2, 3], "Null": null, "Number": 1.234} +` + dec := json.NewDecoder(strings.NewReader(jsonStream)) + for { + t, err := dec.Token() + if err == io.EOF { + break + } + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v", t, t) + if dec.More() { + fmt.Printf(" (more)") + } + fmt.Printf("\n") + } + // Output: + // json.Delim: { (more) + // string: Message (more) + // string: Hello (more) + // string: Array (more) + // json.Delim: [ (more) + // float64: 1 (more) + // float64: 2 (more) + // float64: 3 + // json.Delim: ] (more) + // string: Null (more) + // <nil>: <nil> (more) + // string: Number (more) + // float64: 1.234 + // json.Delim: } +} + +// This example uses a Decoder to decode a streaming array of JSON objects. +func ExampleDecoder_Decode_stream() { + const jsonStream = ` + [ + {"Name": "Ed", "Text": "Knock knock."}, + {"Name": "Sam", "Text": "Who's there?"}, + {"Name": "Ed", "Text": "Go fmt."}, + {"Name": "Sam", "Text": "Go fmt who?"}, + {"Name": "Ed", "Text": "Go fmt yourself!"} + ] +` + type Message struct { + Name, Text string + } + dec := json.NewDecoder(strings.NewReader(jsonStream)) + + // read open bracket + t, err := dec.Token() + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v\n", t, t) + + // while the array contains values + for dec.More() { + var m Message + // decode an array value (Message) + err := dec.Decode(&m) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("%v: %v\n", m.Name, m.Text) + } + + // read closing bracket + t, err = dec.Token() + if err != nil { + log.Fatal(err) + } + fmt.Printf("%T: %v\n", t, t) + + // Output: + // json.Delim: [ + // Ed: Knock knock. + // Sam: Who's there? + // Ed: Go fmt. + // Sam: Go fmt who? + // Ed: Go fmt yourself! + // json.Delim: ] +} + +// This example uses RawMessage to delay parsing part of a JSON message. +func ExampleRawMessage_unmarshal() { + type Color struct { + Space string + Point json.RawMessage // delay parsing until we know the color space + } + type RGB struct { + R uint8 + G uint8 + B uint8 + } + type YCbCr struct { + Y uint8 + Cb int8 + Cr int8 + } + + var j = []byte(`[ + {"Space": "YCbCr", "Point": {"Y": 255, "Cb": 0, "Cr": -10}}, + {"Space": "RGB", "Point": {"R": 98, "G": 218, "B": 255}} +]`) + var colors []Color + err := json.Unmarshal(j, &colors) + if err != nil { + log.Fatalln("error:", err) + } + + for _, c := range colors { + var dst any + switch c.Space { + case "RGB": + dst = new(RGB) + case "YCbCr": + dst = new(YCbCr) + } + err := json.Unmarshal(c.Point, dst) + if err != nil { + log.Fatalln("error:", err) + } + fmt.Println(c.Space, dst) + } + // Output: + // YCbCr &{255 0 -10} + // RGB &{98 218 255} +} + +// This example uses RawMessage to use a precomputed JSON during marshal. +func ExampleRawMessage_marshal() { + h := json.RawMessage(`{"precomputed": true}`) + + c := struct { + Header *json.RawMessage `json:"header"` + Body string `json:"body"` + }{Header: &h, Body: "Hello Gophers!"} + + b, err := json.MarshalIndent(&c, "", "\t") + if err != nil { + fmt.Println("error:", err) + } + os.Stdout.Write(b) + + // Output: + // { + // "header": { + // "precomputed": true + // }, + // "body": "Hello Gophers!" + // } +} + +func ExampleIndent() { + type Road struct { + Name string + Number int + } + roads := []Road{ + {"Diamond Fork", 29}, + {"Sheep Creek", 51}, + } + + b, err := json.Marshal(roads) + if err != nil { + log.Fatal(err) + } + + var out bytes.Buffer + json.Indent(&out, b, "=", "\t") + out.WriteTo(os.Stdout) + // Output: + // [ + // = { + // = "Name": "Diamond Fork", + // = "Number": 29 + // = }, + // = { + // = "Name": "Sheep Creek", + // = "Number": 51 + // = } + // =] +} + +func ExampleMarshalIndent() { + data := map[string]int{ + "a": 1, + "b": 2, + } + + b, err := json.MarshalIndent(data, "<prefix>", "<indent>") + if err != nil { + log.Fatal(err) + } + + fmt.Println(string(b)) + // Output: + // { + // <prefix><indent>"a": 1, + // <prefix><indent>"b": 2 + // <prefix>} +} + +func ExampleValid() { + goodJSON := `{"example": 1}` + badJSON := `{"example":2:]}}` + + fmt.Println(json.Valid([]byte(goodJSON)), json.Valid([]byte(badJSON))) + // Output: + // true false +} + +func ExampleHTMLEscape() { + var out bytes.Buffer + json.HTMLEscape(&out, []byte(`{"Name":"<b>HTML content</b>"}`)) + out.WriteTo(os.Stdout) + // Output: + //{"Name":"\u003cb\u003eHTML content\u003c/b\u003e"} +} diff --git a/src/encoding/json/example_text_marshaling_test.go b/src/encoding/json/example_text_marshaling_test.go new file mode 100644 index 0000000..04c7813 --- /dev/null +++ b/src/encoding/json/example_text_marshaling_test.go @@ -0,0 +1,67 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json_test + +import ( + "encoding/json" + "fmt" + "log" + "strings" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalJSON() { + blob := `["small","regular","large","unrecognized","small","normal","small","large"]` + var inventory []Size + if err := json.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} diff --git a/src/encoding/json/fold.go b/src/encoding/json/fold.go new file mode 100644 index 0000000..c4c671b --- /dev/null +++ b/src/encoding/json/fold.go @@ -0,0 +1,48 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "unicode" + "unicode/utf8" +) + +// foldName returns a folded string such that foldName(x) == foldName(y) +// is identical to bytes.EqualFold(x, y). +func foldName(in []byte) []byte { + // This is inlinable to take advantage of "function outlining". + var arr [32]byte // large enough for most JSON names + return appendFoldedName(arr[:0], in) +} + +func appendFoldedName(out, in []byte) []byte { + for i := 0; i < len(in); { + // Handle single-byte ASCII. + if c := in[i]; c < utf8.RuneSelf { + if 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + out = append(out, c) + i++ + continue + } + // Handle multi-byte Unicode. + r, n := utf8.DecodeRune(in[i:]) + out = utf8.AppendRune(out, foldRune(r)) + i += n + } + return out +} + +// foldRune is returns the smallest rune for all runes in the same fold set. +func foldRune(r rune) rune { + for { + r2 := unicode.SimpleFold(r) + if r2 <= r { + return r2 + } + r = r2 + } +} diff --git a/src/encoding/json/fold_test.go b/src/encoding/json/fold_test.go new file mode 100644 index 0000000..9d6fd05 --- /dev/null +++ b/src/encoding/json/fold_test.go @@ -0,0 +1,50 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "testing" +) + +func FuzzEqualFold(f *testing.F) { + for _, ss := range [][2]string{ + {"", ""}, + {"123abc", "123ABC"}, + {"αβδ", "ΑΒΔ"}, + {"abc", "xyz"}, + {"abc", "XYZ"}, + {"1", "2"}, + {"hello, world!", "hello, world!"}, + {"hello, world!", "Hello, World!"}, + {"hello, world!", "HELLO, WORLD!"}, + {"hello, world!", "jello, world!"}, + {"γειά, κόσμε!", "γειά, κόσμε!"}, + {"γειά, κόσμε!", "Γειά, Κόσμε!"}, + {"γειά, κόσμε!", "ΓΕΙΆ, ΚΌΣΜΕ!"}, + {"γειά, κόσμε!", "ΛΕΙΆ, ΚΌΣΜΕ!"}, + {"AESKey", "aesKey"}, + {"AESKEY", "aes_key"}, + {"aes_key", "AES_KEY"}, + {"AES_KEY", "aes-key"}, + {"aes-key", "AES-KEY"}, + {"AES-KEY", "aesKey"}, + {"aesKey", "AesKey"}, + {"AesKey", "AESKey"}, + {"AESKey", "aeskey"}, + {"DESKey", "aeskey"}, + {"AES Key", "aeskey"}, + } { + f.Add([]byte(ss[0]), []byte(ss[1])) + } + equalFold := func(x, y []byte) bool { return string(foldName(x)) == string(foldName(y)) } + f.Fuzz(func(t *testing.T, x, y []byte) { + got := equalFold(x, y) + want := bytes.EqualFold(x, y) + if got != want { + t.Errorf("equalFold(%q, %q) = %v, want %v", x, y, got, want) + } + }) +} diff --git a/src/encoding/json/fuzz_test.go b/src/encoding/json/fuzz_test.go new file mode 100644 index 0000000..778664c --- /dev/null +++ b/src/encoding/json/fuzz_test.go @@ -0,0 +1,83 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "io" + "testing" +) + +func FuzzUnmarshalJSON(f *testing.F) { + f.Add([]byte(`{ +"object": { + "slice": [ + 1, + 2.0, + "3", + [4], + {5: {}} + ] +}, +"slice": [[]], +"string": ":)", +"int": 1e5, +"float": 3e-9" +}`)) + + f.Fuzz(func(t *testing.T, b []byte) { + for _, typ := range []func() interface{}{ + func() interface{} { return new(interface{}) }, + func() interface{} { return new(map[string]interface{}) }, + func() interface{} { return new([]interface{}) }, + } { + i := typ() + if err := Unmarshal(b, i); err != nil { + return + } + + encoded, err := Marshal(i) + if err != nil { + t.Fatalf("failed to marshal: %s", err) + } + + if err := Unmarshal(encoded, i); err != nil { + t.Fatalf("failed to roundtrip: %s", err) + } + } + }) +} + +func FuzzDecoderToken(f *testing.F) { + f.Add([]byte(`{ +"object": { + "slice": [ + 1, + 2.0, + "3", + [4], + {5: {}} + ] +}, +"slice": [[]], +"string": ":)", +"int": 1e5, +"float": 3e-9" +}`)) + + f.Fuzz(func(t *testing.T, b []byte) { + r := bytes.NewReader(b) + d := NewDecoder(r) + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + return + } + } + }) +} diff --git a/src/encoding/json/indent.go b/src/encoding/json/indent.go new file mode 100644 index 0000000..26bb5d2 --- /dev/null +++ b/src/encoding/json/indent.go @@ -0,0 +1,174 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "bytes" + +// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 +// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 +// so that the JSON will be safe to embed inside HTML <script> tags. +// For historical reasons, web browsers don't honor standard HTML +// escaping within <script> tags, so an alternative JSON encoding must be used. +func HTMLEscape(dst *bytes.Buffer, src []byte) { + dst.Grow(len(src)) + dst.Write(appendHTMLEscape(dst.AvailableBuffer(), src)) +} + +func appendHTMLEscape(dst, src []byte) []byte { + // The characters can only appear in string literals, + // so just scan the string one byte at a time. + start := 0 + for i, c := range src { + if c == '<' || c == '>' || c == '&' { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF]) + start = i + 1 + } + // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). + if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF]) + start = i + len("\u2029") + } + } + return append(dst, src[start:]...) +} + +// Compact appends to dst the JSON-encoded src with +// insignificant space characters elided. +func Compact(dst *bytes.Buffer, src []byte) error { + dst.Grow(len(src)) + b := dst.AvailableBuffer() + b, err := appendCompact(b, src, false) + dst.Write(b) + return err +} + +func appendCompact(dst, src []byte, escape bool) ([]byte, error) { + origLen := len(dst) + scan := newScanner() + defer freeScanner(scan) + start := 0 + for i, c := range src { + if escape && (c == '<' || c == '>' || c == '&') { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF]) + start = i + 1 + } + // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). + if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { + dst = append(dst, src[start:i]...) + dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF]) + start = i + len("\u2029") + } + v := scan.step(scan, c) + if v >= scanSkipSpace { + if v == scanError { + break + } + dst = append(dst, src[start:i]...) + start = i + 1 + } + } + if scan.eof() == scanError { + return dst[:origLen], scan.err + } + dst = append(dst, src[start:]...) + return dst, nil +} + +func appendNewline(dst []byte, prefix, indent string, depth int) []byte { + dst = append(dst, '\n') + dst = append(dst, prefix...) + for i := 0; i < depth; i++ { + dst = append(dst, indent...) + } + return dst +} + +// indentGrowthFactor specifies the growth factor of indenting JSON input. +// Empirically, the growth factor was measured to be between 1.4x to 1.8x +// for some set of compacted JSON with the indent being a single tab. +// Specify a growth factor slightly larger than what is observed +// to reduce probability of allocation in appendIndent. +// A factor no higher than 2 ensures that wasted space never exceeds 50%. +const indentGrowthFactor = 2 + +// Indent appends to dst an indented form of the JSON-encoded src. +// Each element in a JSON object or array begins on a new, +// indented line beginning with prefix followed by one or more +// copies of indent according to the indentation nesting. +// The data appended to dst does not begin with the prefix nor +// any indentation, to make it easier to embed inside other formatted JSON data. +// Although leading space characters (space, tab, carriage return, newline) +// at the beginning of src are dropped, trailing space characters +// at the end of src are preserved and copied to dst. +// For example, if src has no trailing spaces, neither will dst; +// if src ends in a trailing newline, so will dst. +func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { + dst.Grow(indentGrowthFactor * len(src)) + b := dst.AvailableBuffer() + b, err := appendIndent(b, src, prefix, indent) + dst.Write(b) + return err +} + +func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) { + origLen := len(dst) + scan := newScanner() + defer freeScanner(scan) + needIndent := false + depth := 0 + for _, c := range src { + scan.bytes++ + v := scan.step(scan, c) + if v == scanSkipSpace { + continue + } + if v == scanError { + break + } + if needIndent && v != scanEndObject && v != scanEndArray { + needIndent = false + depth++ + dst = appendNewline(dst, prefix, indent, depth) + } + + // Emit semantically uninteresting bytes + // (in particular, punctuation in strings) unmodified. + if v == scanContinue { + dst = append(dst, c) + continue + } + + // Add spacing around real punctuation. + switch c { + case '{', '[': + // delay indent so that empty object and array are formatted as {} and []. + needIndent = true + dst = append(dst, c) + case ',': + dst = append(dst, c) + dst = appendNewline(dst, prefix, indent, depth) + case ':': + dst = append(dst, c, ' ') + case '}', ']': + if needIndent { + // suppress indent in empty object/array + needIndent = false + } else { + depth-- + dst = appendNewline(dst, prefix, indent, depth) + } + dst = append(dst, c) + default: + dst = append(dst, c) + } + } + if scan.eof() == scanError { + return dst[:origLen], scan.err + } + return dst, nil +} diff --git a/src/encoding/json/number_test.go b/src/encoding/json/number_test.go new file mode 100644 index 0000000..c82e6de --- /dev/null +++ b/src/encoding/json/number_test.go @@ -0,0 +1,118 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "regexp" + "testing" +) + +func TestNumberIsValid(t *testing.T) { + // From: https://stackoverflow.com/a/13340826 + var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) + + validTests := []string{ + "0", + "-0", + "1", + "-1", + "0.1", + "-0.1", + "1234", + "-1234", + "12.34", + "-12.34", + "12E0", + "12E1", + "12e34", + "12E-0", + "12e+1", + "12e-34", + "-12E0", + "-12E1", + "-12e34", + "-12E-0", + "-12e+1", + "-12e-34", + "1.2E0", + "1.2E1", + "1.2e34", + "1.2E-0", + "1.2e+1", + "1.2e-34", + "-1.2E0", + "-1.2E1", + "-1.2e34", + "-1.2E-0", + "-1.2e+1", + "-1.2e-34", + "0E0", + "0E1", + "0e34", + "0E-0", + "0e+1", + "0e-34", + "-0E0", + "-0E1", + "-0e34", + "-0E-0", + "-0e+1", + "-0e-34", + } + + for _, test := range validTests { + if !isValidNumber(test) { + t.Errorf("%s should be valid", test) + } + + var f float64 + if err := Unmarshal([]byte(test), &f); err != nil { + t.Errorf("%s should be valid but Unmarshal failed: %v", test, err) + } + + if !jsonNumberRegexp.MatchString(test) { + t.Errorf("%s should be valid but regexp does not match", test) + } + } + + invalidTests := []string{ + "", + "invalid", + "1.0.1", + "1..1", + "-1-2", + "012a42", + "01.2", + "012", + "12E12.12", + "1e2e3", + "1e+-2", + "1e--23", + "1e", + "e1", + "1e+", + "1ea", + "1a", + "1.a", + "1.", + "01", + "1.e1", + } + + for _, test := range invalidTests { + if isValidNumber(test) { + t.Errorf("%s should be invalid", test) + } + + var f float64 + if err := Unmarshal([]byte(test), &f); err == nil { + t.Errorf("%s should be invalid but unmarshal wrote %v", test, f) + } + + if jsonNumberRegexp.MatchString(test) { + t.Errorf("%s should be invalid but matches regexp", test) + } + } +} diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go new file mode 100644 index 0000000..4c43f5f --- /dev/null +++ b/src/encoding/json/scanner.go @@ -0,0 +1,610 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +// JSON value parser state machine. +// Just about at the limit of what is reasonable to write by hand. +// Some parts are a bit tedious, but overall it nicely factors out the +// otherwise common code from the multiple scanning functions +// in this package (Compact, Indent, checkValid, etc). +// +// This file starts with two simple examples using the scanner +// before diving into the scanner itself. + +import ( + "strconv" + "sync" +) + +// Valid reports whether data is a valid JSON encoding. +func Valid(data []byte) bool { + scan := newScanner() + defer freeScanner(scan) + return checkValid(data, scan) == nil +} + +// checkValid verifies that data is valid JSON-encoded data. +// scan is passed in for use by checkValid to avoid an allocation. +// checkValid returns nil or a SyntaxError. +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.bytes++ + if scan.step(scan, c) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +// A SyntaxError is a description of a JSON syntax error. +// Unmarshal will return a SyntaxError if the JSON can't be parsed. +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { return e.msg } + +// A scanner is a JSON scanning state machine. +// Callers call scan.reset and then pass bytes in one at a time +// by calling scan.step(&scan, c) for each byte. +// The return value, referred to as an opcode, tells the +// caller about significant parsing events like beginning +// and ending literals, objects, and arrays, so that the +// caller can follow along if it wishes. +// The return value scanEnd indicates that a single top-level +// JSON value has been completed, *before* the byte that +// just got passed in. (The indication must be delayed in order +// to recognize the end of numbers: is 123 a whole value or +// the beginning of 12345e+6?). +type scanner struct { + // The step is a func to be called to execute the next transition. + // Also tried using an integer constant and a single func + // with a switch, but using the func directly was 10% faster + // on a 64-bit Mac Mini, and it's nicer to read. + step func(*scanner, byte) int + + // Reached end of top-level value. + endTop bool + + // Stack of what we're in the middle of - array values, object keys, object values. + parseState []int + + // Error that happened, if any. + err error + + // total bytes consumed, updated by decoder.Decode (and deliberately + // not set to zero by scan.reset) + bytes int64 +} + +var scannerPool = sync.Pool{ + New: func() any { + return &scanner{} + }, +} + +func newScanner() *scanner { + scan := scannerPool.Get().(*scanner) + // scan.reset by design doesn't set bytes to zero + scan.bytes = 0 + scan.reset() + return scan +} + +func freeScanner(scan *scanner) { + // Avoid hanging on to too much memory in extreme cases. + if len(scan.parseState) > 1024 { + scan.parseState = nil + } + scannerPool.Put(scan) +} + +// These values are returned by the state transition functions +// assigned to scanner.state and the method scanner.eof. +// They give details about the current state of the scan that +// callers might be interested to know about. +// It is okay to ignore the return value of any particular +// call to scanner.state: if one call returns scanError, +// every subsequent call will return scanError too. +const ( + // Continue. + scanContinue = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + + // Stop. + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. +) + +// These values are stored in the parseState stack. +// They give the current state of a composite value +// being scanned. If the parser is inside a nested value +// the parseState describes the nested state, outermost at entry 0. +const ( + parseObjectKey = iota // parsing object key (before colon) + parseObjectValue // parsing object value (after colon) + parseArrayValue // parsing array value +) + +// This limits the max nesting depth to prevent stack overflow. +// This is permitted by https://tools.ietf.org/html/rfc7159#section-9 +const maxNestingDepth = 10000 + +// reset prepares the scanner for use. +// It must be called before calling s.step. +func (s *scanner) reset() { + s.step = stateBeginValue + s.parseState = s.parseState[0:0] + s.err = nil + s.endTop = false +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() int { + if s.err != nil { + return scanError + } + if s.endTop { + return scanEnd + } + s.step(s, ' ') + if s.endTop { + return scanEnd + } + if s.err == nil { + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + } + return scanError +} + +// pushParseState pushes a new parse state p onto the parse stack. +// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. +func (s *scanner) pushParseState(c byte, newParseState int, successState int) int { + s.parseState = append(s.parseState, newParseState) + if len(s.parseState) <= maxNestingDepth { + return successState + } + return s.error(c, "exceeded max depth") +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.parseState) - 1 + s.parseState = s.parseState[0:n] + if n == 0 { + s.step = stateEndTop + s.endTop = true + } else { + s.step = stateEndValue + } +} + +func isSpace(c byte) bool { + return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') +} + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + return s.pushParseState(c, parseObjectKey, scanBeginObject) + case '[': + s.step = stateBeginValueOrEmpty + return s.pushParseState(c, parseArrayValue, scanBeginArray) + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == '}' { + n := len(s.parseState) + s.parseState[n-1] = parseObjectValue + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c byte) int { + if isSpace(c) { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c byte) int { + n := len(s.parseState) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + s.endTop = true + return stateEndTop(s, c) + } + if isSpace(c) { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.parseState[n-1] + switch ps { + case parseObjectKey: + if c == ':' { + s.parseState[n-1] = parseObjectValue + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case parseObjectValue: + if c == ',' { + s.parseState[n-1] = parseObjectKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case parseArrayValue: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c byte) int { + if !isSpace(c) { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c byte) int { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c byte) int { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + case 'u': + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateNeg is the state after reading `-` during a number. +func stateNeg(s *scanner, c byte) int { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c byte) int { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c byte) int { + if c == '+' || c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c byte) int { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c byte) int { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c byte) int { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c byte) int { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c byte) int { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c byte) int { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c byte) int { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c byte) int { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c byte) int { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c byte, context string) int { + s.step = stateError + s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + return scanError +} + +// quoteChar formats c as a quoted character literal. +func quoteChar(c byte) string { + // special cases - different from quoted strings + if c == '\'' { + return `'\''` + } + if c == '"' { + return `'"'` + } + + // use quoted string with different quotation marks + s := strconv.Quote(string(c)) + return "'" + s[1:len(s)-1] + "'" +} diff --git a/src/encoding/json/scanner_test.go b/src/encoding/json/scanner_test.go new file mode 100644 index 0000000..3474b3e --- /dev/null +++ b/src/encoding/json/scanner_test.go @@ -0,0 +1,301 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "math" + "math/rand" + "reflect" + "testing" +) + +var validTests = []struct { + data string + ok bool +}{ + {`foo`, false}, + {`}{`, false}, + {`{]`, false}, + {`{}`, true}, + {`{"foo":"bar"}`, true}, + {`{"foo":"bar","bar":{"baz":["qux"]}}`, true}, +} + +func TestValid(t *testing.T) { + for _, tt := range validTests { + if ok := Valid([]byte(tt.data)); ok != tt.ok { + t.Errorf("Valid(%#q) = %v, want %v", tt.data, ok, tt.ok) + } + } +} + +// Tests of simple examples. + +type example struct { + compact string + indent string +} + +var examples = []example{ + {`1`, `1`}, + {`{}`, `{}`}, + {`[]`, `[]`}, + {`{"":2}`, "{\n\t\"\": 2\n}"}, + {`[3]`, "[\n\t3\n]"}, + {`[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"}, + {`{"x":1}`, "{\n\t\"x\": 1\n}"}, + {ex1, ex1i}, + {"{\"\":\"<>&\u2028\u2029\"}", "{\n\t\"\": \"<>&\u2028\u2029\"\n}"}, // See golang.org/issue/34070 +} + +var ex1 = `[true,false,null,"x",1,1.5,0,-5e+2]` + +var ex1i = `[ + true, + false, + null, + "x", + 1, + 1.5, + 0, + -5e+2 +]` + +func TestCompact(t *testing.T) { + var buf bytes.Buffer + for _, tt := range examples { + buf.Reset() + if err := Compact(&buf, []byte(tt.compact)); err != nil { + t.Errorf("Compact(%#q): %v", tt.compact, err) + } else if s := buf.String(); s != tt.compact { + t.Errorf("Compact(%#q) = %#q, want original", tt.compact, s) + } + + buf.Reset() + if err := Compact(&buf, []byte(tt.indent)); err != nil { + t.Errorf("Compact(%#q): %v", tt.indent, err) + continue + } else if s := buf.String(); s != tt.compact { + t.Errorf("Compact(%#q) = %#q, want %#q", tt.indent, s, tt.compact) + } + } +} + +func TestCompactSeparators(t *testing.T) { + // U+2028 and U+2029 should be escaped inside strings. + // They should not appear outside strings. + tests := []struct { + in, compact string + }{ + {"{\"\u2028\": 1}", "{\"\u2028\":1}"}, + {"{\"\u2029\" :2}", "{\"\u2029\":2}"}, + } + for _, tt := range tests { + var buf bytes.Buffer + if err := Compact(&buf, []byte(tt.in)); err != nil { + t.Errorf("Compact(%q): %v", tt.in, err) + } else if s := buf.String(); s != tt.compact { + t.Errorf("Compact(%q) = %q, want %q", tt.in, s, tt.compact) + } + } +} + +func TestIndent(t *testing.T) { + var buf bytes.Buffer + for _, tt := range examples { + buf.Reset() + if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil { + t.Errorf("Indent(%#q): %v", tt.indent, err) + } else if s := buf.String(); s != tt.indent { + t.Errorf("Indent(%#q) = %#q, want original", tt.indent, s) + } + + buf.Reset() + if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil { + t.Errorf("Indent(%#q): %v", tt.compact, err) + continue + } else if s := buf.String(); s != tt.indent { + t.Errorf("Indent(%#q) = %#q, want %#q", tt.compact, s, tt.indent) + } + } +} + +// Tests of a large random structure. + +func TestCompactBig(t *testing.T) { + initBig() + var buf bytes.Buffer + if err := Compact(&buf, jsonBig); err != nil { + t.Fatalf("Compact: %v", err) + } + b := buf.Bytes() + if !bytes.Equal(b, jsonBig) { + t.Error("Compact(jsonBig) != jsonBig") + diff(t, b, jsonBig) + return + } +} + +func TestIndentBig(t *testing.T) { + t.Parallel() + initBig() + var buf bytes.Buffer + if err := Indent(&buf, jsonBig, "", "\t"); err != nil { + t.Fatalf("Indent1: %v", err) + } + b := buf.Bytes() + if len(b) == len(jsonBig) { + // jsonBig is compact (no unnecessary spaces); + // indenting should make it bigger + t.Fatalf("Indent(jsonBig) did not get bigger") + } + + // should be idempotent + var buf1 bytes.Buffer + if err := Indent(&buf1, b, "", "\t"); err != nil { + t.Fatalf("Indent2: %v", err) + } + b1 := buf1.Bytes() + if !bytes.Equal(b1, b) { + t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig)") + diff(t, b1, b) + return + } + + // should get back to original + buf1.Reset() + if err := Compact(&buf1, b); err != nil { + t.Fatalf("Compact: %v", err) + } + b1 = buf1.Bytes() + if !bytes.Equal(b1, jsonBig) { + t.Error("Compact(Indent(jsonBig)) != jsonBig") + diff(t, b1, jsonBig) + return + } +} + +type indentErrorTest struct { + in string + err error +} + +var indentErrorTests = []indentErrorTest{ + {`{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", 17}}, + {`{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", 13}}, +} + +func TestIndentErrors(t *testing.T) { + for i, tt := range indentErrorTests { + slice := make([]uint8, 0) + buf := bytes.NewBuffer(slice) + if err := Indent(buf, []uint8(tt.in), "", ""); err != nil { + if !reflect.DeepEqual(err, tt.err) { + t.Errorf("#%d: Indent: %#v", i, err) + continue + } + } + } +} + +func diff(t *testing.T, a, b []byte) { + for i := 0; ; i++ { + if i >= len(a) || i >= len(b) || a[i] != b[i] { + j := i - 10 + if j < 0 { + j = 0 + } + t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:])) + return + } + } +} + +func trim(b []byte) []byte { + if len(b) > 20 { + return b[0:20] + } + return b +} + +// Generate a random JSON object. + +var jsonBig []byte + +func initBig() { + n := 10000 + if testing.Short() { + n = 100 + } + b, err := Marshal(genValue(n)) + if err != nil { + panic(err) + } + jsonBig = b +} + +func genValue(n int) any { + if n > 1 { + switch rand.Intn(2) { + case 0: + return genArray(n) + case 1: + return genMap(n) + } + } + switch rand.Intn(3) { + case 0: + return rand.Intn(2) == 0 + case 1: + return rand.NormFloat64() + case 2: + return genString(30) + } + panic("unreachable") +} + +func genString(stddev float64) string { + n := int(math.Abs(rand.NormFloat64()*stddev + stddev/2)) + c := make([]rune, n) + for i := range c { + f := math.Abs(rand.NormFloat64()*64 + 32) + if f > 0x10ffff { + f = 0x10ffff + } + c[i] = rune(f) + } + return string(c) +} + +func genArray(n int) []any { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if f < 1 { + f = 1 + } + x := make([]any, f) + for i := range x { + x[i] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} + +func genMap(n int) map[string]any { + f := int(math.Abs(rand.NormFloat64()) * math.Min(10, float64(n/2))) + if f > n { + f = n + } + if n > 0 && f == 0 { + f = 1 + } + x := make(map[string]any) + for i := 0; i < f; i++ { + x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f) + } + return x +} diff --git a/src/encoding/json/stream.go b/src/encoding/json/stream.go new file mode 100644 index 0000000..b4146a3 --- /dev/null +++ b/src/encoding/json/stream.go @@ -0,0 +1,511 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "errors" + "io" +) + +// A Decoder reads and decodes JSON values from an input stream. +type Decoder struct { + r io.Reader + buf []byte + d decodeState + scanp int // start of unread data in buf + scanned int64 // amount of data already scanned + scan scanner + err error + + tokenState int + tokenStack []int +} + +// NewDecoder returns a new decoder that reads from r. +// +// The decoder introduces its own buffering and may +// read data from r beyond the JSON values requested. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +// UseNumber causes the Decoder to unmarshal a number into an interface{} as a +// Number instead of as a float64. +func (dec *Decoder) UseNumber() { dec.d.useNumber = true } + +// DisallowUnknownFields causes the Decoder to return an error when the destination +// is a struct and the input contains object keys which do not match any +// non-ignored, exported fields in the destination. +func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } + +// Decode reads the next JSON-encoded value from its +// input and stores it in the value pointed to by v. +// +// See the documentation for Unmarshal for details about +// the conversion of JSON into a Go value. +func (dec *Decoder) Decode(v any) error { + if dec.err != nil { + return dec.err + } + + if err := dec.tokenPrepareForDecode(); err != nil { + return err + } + + if !dec.tokenValueAllowed() { + return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} + } + + // Read whole value into buffer. + n, err := dec.readValue() + if err != nil { + return err + } + dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) + dec.scanp += n + + // Don't save err from unmarshal into dec.err: + // the connection is still usable since we read a complete JSON + // object from it before the error happened. + err = dec.d.unmarshal(v) + + // fixup token streaming state + dec.tokenValueEnd() + + return err +} + +// Buffered returns a reader of the data remaining in the Decoder's +// buffer. The reader is valid until the next call to Decode. +func (dec *Decoder) Buffered() io.Reader { + return bytes.NewReader(dec.buf[dec.scanp:]) +} + +// readValue reads a JSON value into dec.buf. +// It returns the length of the encoding. +func (dec *Decoder) readValue() (int, error) { + dec.scan.reset() + + scanp := dec.scanp + var err error +Input: + // help the compiler see that scanp is never negative, so it can remove + // some bounds checks below. + for scanp >= 0 { + + // Look in the buffer for a new value. + for ; scanp < len(dec.buf); scanp++ { + c := dec.buf[scanp] + dec.scan.bytes++ + switch dec.scan.step(&dec.scan, c) { + case scanEnd: + // scanEnd is delayed one byte so we decrement + // the scanner bytes count by 1 to ensure that + // this value is correct in the next call of Decode. + dec.scan.bytes-- + break Input + case scanEndObject, scanEndArray: + // scanEnd is delayed one byte. + // We might block trying to get that byte from src, + // so instead invent a space byte. + if stateEndValue(&dec.scan, ' ') == scanEnd { + scanp++ + break Input + } + case scanError: + dec.err = dec.scan.err + return 0, dec.scan.err + } + } + + // Did the last read have an error? + // Delayed until now to allow buffer scan. + if err != nil { + if err == io.EOF { + if dec.scan.step(&dec.scan, ' ') == scanEnd { + break Input + } + if nonSpace(dec.buf) { + err = io.ErrUnexpectedEOF + } + } + dec.err = err + return 0, err + } + + n := scanp - dec.scanp + err = dec.refill() + scanp = dec.scanp + n + } + return scanp - dec.scanp, nil +} + +func (dec *Decoder) refill() error { + // Make room to read more into the buffer. + // First slide down data already consumed. + if dec.scanp > 0 { + dec.scanned += int64(dec.scanp) + n := copy(dec.buf, dec.buf[dec.scanp:]) + dec.buf = dec.buf[:n] + dec.scanp = 0 + } + + // Grow buffer if not large enough. + const minRead = 512 + if cap(dec.buf)-len(dec.buf) < minRead { + newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) + copy(newBuf, dec.buf) + dec.buf = newBuf + } + + // Read. Delay error for next iteration (after scan). + n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) + dec.buf = dec.buf[0 : len(dec.buf)+n] + + return err +} + +func nonSpace(b []byte) bool { + for _, c := range b { + if !isSpace(c) { + return true + } + } + return false +} + +// An Encoder writes JSON values to an output stream. +type Encoder struct { + w io.Writer + err error + escapeHTML bool + + indentBuf []byte + indentPrefix string + indentValue string +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{w: w, escapeHTML: true} +} + +// Encode writes the JSON encoding of v to the stream, +// followed by a newline character. +// +// See the documentation for Marshal for details about the +// conversion of Go values to JSON. +func (enc *Encoder) Encode(v any) error { + if enc.err != nil { + return enc.err + } + + e := newEncodeState() + defer encodeStatePool.Put(e) + + err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) + if err != nil { + return err + } + + // Terminate each value with a newline. + // This makes the output look a little nicer + // when debugging, and some kind of space + // is required if the encoded value was a number, + // so that the reader knows there aren't more + // digits coming. + e.WriteByte('\n') + + b := e.Bytes() + if enc.indentPrefix != "" || enc.indentValue != "" { + enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) + if err != nil { + return err + } + b = enc.indentBuf + } + if _, err = enc.w.Write(b); err != nil { + enc.err = err + } + return err +} + +// SetIndent instructs the encoder to format each subsequent encoded +// value as if indented by the package-level function Indent(dst, src, prefix, indent). +// Calling SetIndent("", "") disables indentation. +func (enc *Encoder) SetIndent(prefix, indent string) { + enc.indentPrefix = prefix + enc.indentValue = indent +} + +// SetEscapeHTML specifies whether problematic HTML characters +// should be escaped inside JSON quoted strings. +// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e +// to avoid certain safety problems that can arise when embedding JSON in HTML. +// +// In non-HTML settings where the escaping interferes with the readability +// of the output, SetEscapeHTML(false) disables this behavior. +func (enc *Encoder) SetEscapeHTML(on bool) { + enc.escapeHTML = on +} + +// RawMessage is a raw encoded JSON value. +// It implements Marshaler and Unmarshaler and can +// be used to delay JSON decoding or precompute a JSON encoding. +type RawMessage []byte + +// MarshalJSON returns m as the JSON encoding of m. +func (m RawMessage) MarshalJSON() ([]byte, error) { + if m == nil { + return []byte("null"), nil + } + return m, nil +} + +// UnmarshalJSON sets *m to a copy of data. +func (m *RawMessage) UnmarshalJSON(data []byte) error { + if m == nil { + return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") + } + *m = append((*m)[0:0], data...) + return nil +} + +var _ Marshaler = (*RawMessage)(nil) +var _ Unmarshaler = (*RawMessage)(nil) + +// A Token holds a value of one of these types: +// +// Delim, for the four JSON delimiters [ ] { } +// bool, for JSON booleans +// float64, for JSON numbers +// Number, for JSON numbers +// string, for JSON string literals +// nil, for JSON null +type Token any + +const ( + tokenTopValue = iota + tokenArrayStart + tokenArrayValue + tokenArrayComma + tokenObjectStart + tokenObjectKey + tokenObjectColon + tokenObjectValue + tokenObjectComma +) + +// advance tokenstate from a separator state to a value state +func (dec *Decoder) tokenPrepareForDecode() error { + // Note: Not calling peek before switch, to avoid + // putting peek into the standard Decode path. + // peek is only called when using the Token API. + switch dec.tokenState { + case tokenArrayComma: + c, err := dec.peek() + if err != nil { + return err + } + if c != ',' { + return &SyntaxError{"expected comma after array element", dec.InputOffset()} + } + dec.scanp++ + dec.tokenState = tokenArrayValue + case tokenObjectColon: + c, err := dec.peek() + if err != nil { + return err + } + if c != ':' { + return &SyntaxError{"expected colon after object key", dec.InputOffset()} + } + dec.scanp++ + dec.tokenState = tokenObjectValue + } + return nil +} + +func (dec *Decoder) tokenValueAllowed() bool { + switch dec.tokenState { + case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: + return true + } + return false +} + +func (dec *Decoder) tokenValueEnd() { + switch dec.tokenState { + case tokenArrayStart, tokenArrayValue: + dec.tokenState = tokenArrayComma + case tokenObjectValue: + dec.tokenState = tokenObjectComma + } +} + +// A Delim is a JSON array or object delimiter, one of [ ] { or }. +type Delim rune + +func (d Delim) String() string { + return string(d) +} + +// Token returns the next JSON token in the input stream. +// At the end of the input stream, Token returns nil, io.EOF. +// +// Token guarantees that the delimiters [ ] { } it returns are +// properly nested and matched: if Token encounters an unexpected +// delimiter in the input, it will return an error. +// +// The input stream consists of basic JSON values—bool, string, +// number, and null—along with delimiters [ ] { } of type Delim +// to mark the start and end of arrays and objects. +// Commas and colons are elided. +func (dec *Decoder) Token() (Token, error) { + for { + c, err := dec.peek() + if err != nil { + return nil, err + } + switch c { + case '[': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenArrayStart + return Delim('['), nil + + case ']': + if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim(']'), nil + + case '{': + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenStack = append(dec.tokenStack, dec.tokenState) + dec.tokenState = tokenObjectStart + return Delim('{'), nil + + case '}': + if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] + dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] + dec.tokenValueEnd() + return Delim('}'), nil + + case ':': + if dec.tokenState != tokenObjectColon { + return dec.tokenError(c) + } + dec.scanp++ + dec.tokenState = tokenObjectValue + continue + + case ',': + if dec.tokenState == tokenArrayComma { + dec.scanp++ + dec.tokenState = tokenArrayValue + continue + } + if dec.tokenState == tokenObjectComma { + dec.scanp++ + dec.tokenState = tokenObjectKey + continue + } + return dec.tokenError(c) + + case '"': + if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { + var x string + old := dec.tokenState + dec.tokenState = tokenTopValue + err := dec.Decode(&x) + dec.tokenState = old + if err != nil { + return nil, err + } + dec.tokenState = tokenObjectColon + return x, nil + } + fallthrough + + default: + if !dec.tokenValueAllowed() { + return dec.tokenError(c) + } + var x any + if err := dec.Decode(&x); err != nil { + return nil, err + } + return x, nil + } + } +} + +func (dec *Decoder) tokenError(c byte) (Token, error) { + var context string + switch dec.tokenState { + case tokenTopValue: + context = " looking for beginning of value" + case tokenArrayStart, tokenArrayValue, tokenObjectValue: + context = " looking for beginning of value" + case tokenArrayComma: + context = " after array element" + case tokenObjectKey: + context = " looking for beginning of object key string" + case tokenObjectColon: + context = " after object key" + case tokenObjectComma: + context = " after object key:value pair" + } + return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} +} + +// More reports whether there is another element in the +// current array or object being parsed. +func (dec *Decoder) More() bool { + c, err := dec.peek() + return err == nil && c != ']' && c != '}' +} + +func (dec *Decoder) peek() (byte, error) { + var err error + for { + for i := dec.scanp; i < len(dec.buf); i++ { + c := dec.buf[i] + if isSpace(c) { + continue + } + dec.scanp = i + return c, nil + } + // buffer has been scanned, now report any error + if err != nil { + return 0, err + } + err = dec.refill() + } +} + +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token +// and the beginning of the next token. +func (dec *Decoder) InputOffset() int64 { + return dec.scanned + int64(dec.scanp) +} diff --git a/src/encoding/json/stream_test.go b/src/encoding/json/stream_test.go new file mode 100644 index 0000000..97f9fbd --- /dev/null +++ b/src/encoding/json/stream_test.go @@ -0,0 +1,497 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "io" + "log" + "net" + "net/http" + "net/http/httptest" + "reflect" + "runtime/debug" + "strings" + "testing" +) + +// Test values for the stream test. +// One of each JSON kind. +var streamTest = []any{ + 0.1, + "hello", + nil, + true, + false, + []any{"a", "b", "c"}, + map[string]any{"K": "Kelvin", "ß": "long s"}, + 3.14, // another value to make sure something can follow map +} + +var streamEncoded = `0.1 +"hello" +null +true +false +["a","b","c"] +{"ß":"long s","K":"Kelvin"} +3.14 +` + +func TestEncoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + var buf strings.Builder + enc := NewEncoder(&buf) + // Check that enc.SetIndent("", "") turns off indentation. + enc.SetIndent(">", ".") + enc.SetIndent("", "") + for j, v := range streamTest[0:i] { + if err := enc.Encode(v); err != nil { + t.Fatalf("encode #%d: %v", j, err) + } + } + if have, want := buf.String(), nlines(streamEncoded, i); have != want { + t.Errorf("encoding %d items: mismatch", i) + diff(t, []byte(have), []byte(want)) + break + } + } +} + +func TestEncoderErrorAndReuseEncodeState(t *testing.T) { + // Disable the GC temporarily to prevent encodeState's in Pool being cleaned away during the test. + percent := debug.SetGCPercent(-1) + defer debug.SetGCPercent(percent) + + // Trigger an error in Marshal with cyclic data. + type Dummy struct { + Name string + Next *Dummy + } + dummy := Dummy{Name: "Dummy"} + dummy.Next = &dummy + + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(dummy); err == nil { + t.Errorf("Encode(dummy) == nil; want error") + } + + type Data struct { + A string + I int + } + data := Data{A: "a", I: 1} + if err := enc.Encode(data); err != nil { + t.Errorf("Marshal(%v) = %v", data, err) + } + + var data2 Data + if err := Unmarshal(buf.Bytes(), &data2); err != nil { + t.Errorf("Unmarshal(%v) = %v", data2, err) + } + if data2 != data { + t.Errorf("expect: %v, but get: %v", data, data2) + } +} + +var streamEncodedIndent = `0.1 +"hello" +null +true +false +[ +>."a", +>."b", +>."c" +>] +{ +>."ß": "long s", +>."K": "Kelvin" +>} +3.14 +` + +func TestEncoderIndent(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + enc.SetIndent(">", ".") + for _, v := range streamTest { + enc.Encode(v) + } + if have, want := buf.String(), streamEncodedIndent; have != want { + t.Error("indented encoding mismatch") + diff(t, []byte(have), []byte(want)) + } +} + +type strMarshaler string + +func (s strMarshaler) MarshalJSON() ([]byte, error) { + return []byte(s), nil +} + +type strPtrMarshaler string + +func (s *strPtrMarshaler) MarshalJSON() ([]byte, error) { + return []byte(*s), nil +} + +func TestEncoderSetEscapeHTML(t *testing.T) { + var c C + var ct CText + var tagStruct struct { + Valid int `json:"<>&#! "` + Invalid int `json:"\\"` + } + + // This case is particularly interesting, as we force the encoder to + // take the address of the Ptr field to use its MarshalJSON method. This + // is why the '&' is important. + marshalerStruct := &struct { + NonPtr strMarshaler + Ptr strPtrMarshaler + }{`"<str>"`, `"<str>"`} + + // https://golang.org/issue/34154 + stringOption := struct { + Bar string `json:"bar,string"` + }{`<html>foobar</html>`} + + for _, tt := range []struct { + name string + v any + wantEscape string + want string + }{ + {"c", c, `"\u003c\u0026\u003e"`, `"<&>"`}, + {"ct", ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`}, + {`"<&>"`, "<&>", `"\u003c\u0026\u003e"`, `"<&>"`}, + { + "tagStruct", tagStruct, + `{"\u003c\u003e\u0026#! ":0,"Invalid":0}`, + `{"<>&#! ":0,"Invalid":0}`, + }, + { + `"<str>"`, marshalerStruct, + `{"NonPtr":"\u003cstr\u003e","Ptr":"\u003cstr\u003e"}`, + `{"NonPtr":"<str>","Ptr":"<str>"}`, + }, + { + "stringOption", stringOption, + `{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`, + `{"bar":"\"<html>foobar</html>\""}`, + }, + } { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.Encode(tt.v); err != nil { + t.Errorf("Encode(%s): %s", tt.name, err) + continue + } + if got := strings.TrimSpace(buf.String()); got != tt.wantEscape { + t.Errorf("Encode(%s) = %#q, want %#q", tt.name, got, tt.wantEscape) + } + buf.Reset() + enc.SetEscapeHTML(false) + if err := enc.Encode(tt.v); err != nil { + t.Errorf("SetEscapeHTML(false) Encode(%s): %s", tt.name, err) + continue + } + if got := strings.TrimSpace(buf.String()); got != tt.want { + t.Errorf("SetEscapeHTML(false) Encode(%s) = %#q, want %#q", + tt.name, got, tt.want) + } + } +} + +func TestDecoder(t *testing.T) { + for i := 0; i <= len(streamTest); i++ { + // Use stream without newlines as input, + // just to stress the decoder even more. + // Our test input does not include back-to-back numbers. + // Otherwise stripping the newlines would + // merge two adjacent JSON values. + var buf bytes.Buffer + for _, c := range nlines(streamEncoded, i) { + if c != '\n' { + buf.WriteRune(c) + } + } + out := make([]any, i) + dec := NewDecoder(&buf) + for j := range out { + if err := dec.Decode(&out[j]); err != nil { + t.Fatalf("decode #%d/%d: %v", j, i, err) + } + } + if !reflect.DeepEqual(out, streamTest[0:i]) { + t.Errorf("decoding %d items: mismatch", i) + for j := range out { + if !reflect.DeepEqual(out[j], streamTest[j]) { + t.Errorf("#%d: have %v want %v", j, out[j], streamTest[j]) + } + } + break + } + } +} + +func TestDecoderBuffered(t *testing.T) { + r := strings.NewReader(`{"Name": "Gopher"} extra `) + var m struct { + Name string + } + d := NewDecoder(r) + err := d.Decode(&m) + if err != nil { + t.Fatal(err) + } + if m.Name != "Gopher" { + t.Errorf("Name = %q; want Gopher", m.Name) + } + rest, err := io.ReadAll(d.Buffered()) + if err != nil { + t.Fatal(err) + } + if g, w := string(rest), " extra "; g != w { + t.Errorf("Remaining = %q; want %q", g, w) + } +} + +func nlines(s string, n int) string { + if n <= 0 { + return "" + } + for i, c := range s { + if c == '\n' { + if n--; n == 0 { + return s[0 : i+1] + } + } + } + return s +} + +func TestRawMessage(t *testing.T) { + var data struct { + X float64 + Id RawMessage + Y float32 + } + const raw = `["\u0056",null]` + const msg = `{"X":0.1,"Id":["\u0056",null],"Y":0.2}` + err := Unmarshal([]byte(msg), &data) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if string([]byte(data.Id)) != raw { + t.Fatalf("Raw mismatch: have %#q want %#q", []byte(data.Id), raw) + } + b, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if string(b) != msg { + t.Fatalf("Marshal: have %#q want %#q", b, msg) + } +} + +func TestNullRawMessage(t *testing.T) { + var data struct { + X float64 + Id RawMessage + IdPtr *RawMessage + Y float32 + } + const msg = `{"X":0.1,"Id":null,"IdPtr":null,"Y":0.2}` + err := Unmarshal([]byte(msg), &data) + if err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if want, got := "null", string(data.Id); want != got { + t.Fatalf("Raw mismatch: have %q, want %q", got, want) + } + if data.IdPtr != nil { + t.Fatalf("Raw pointer mismatch: have non-nil, want nil") + } + b, err := Marshal(&data) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if string(b) != msg { + t.Fatalf("Marshal: have %#q want %#q", b, msg) + } +} + +var blockingTests = []string{ + `{"x": 1}`, + `[1, 2, 3]`, +} + +func TestBlocking(t *testing.T) { + for _, enc := range blockingTests { + r, w := net.Pipe() + go w.Write([]byte(enc)) + var val any + + // If Decode reads beyond what w.Write writes above, + // it will block, and the test will deadlock. + if err := NewDecoder(r).Decode(&val); err != nil { + t.Errorf("decoding %s: %v", enc, err) + } + r.Close() + w.Close() + } +} + +type tokenStreamCase struct { + json string + expTokens []any +} + +type decodeThis struct { + v any +} + +var tokenStreamCases = []tokenStreamCase{ + // streaming token cases + {json: `10`, expTokens: []any{float64(10)}}, + {json: ` [10] `, expTokens: []any{ + Delim('['), float64(10), Delim(']')}}, + {json: ` [false,10,"b"] `, expTokens: []any{ + Delim('['), false, float64(10), "b", Delim(']')}}, + {json: `{ "a": 1 }`, expTokens: []any{ + Delim('{'), "a", float64(1), Delim('}')}}, + {json: `{"a": 1, "b":"3"}`, expTokens: []any{ + Delim('{'), "a", float64(1), "b", "3", Delim('}')}}, + {json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{ + Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim('{'), "a", float64(2), Delim('}'), + Delim(']')}}, + {json: `{"obj": {"a": 1}}`, expTokens: []any{ + Delim('{'), "obj", Delim('{'), "a", float64(1), Delim('}'), + Delim('}')}}, + {json: `{"obj": [{"a": 1}]}`, expTokens: []any{ + Delim('{'), "obj", Delim('['), + Delim('{'), "a", float64(1), Delim('}'), + Delim(']'), Delim('}')}}, + + // streaming tokens with intermittent Decode() + {json: `{ "a": 1 }`, expTokens: []any{ + Delim('{'), "a", + decodeThis{float64(1)}, + Delim('}')}}, + {json: ` [ { "a" : 1 } ] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + Delim(']')}}, + {json: ` [{"a": 1},{"a": 2}] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + decodeThis{map[string]any{"a": float64(2)}}, + Delim(']')}}, + {json: `{ "obj" : [ { "a" : 1 } ] }`, expTokens: []any{ + Delim('{'), "obj", Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + Delim(']'), Delim('}')}}, + + {json: `{"obj": {"a": 1}}`, expTokens: []any{ + Delim('{'), "obj", + decodeThis{map[string]any{"a": float64(1)}}, + Delim('}')}}, + {json: `{"obj": [{"a": 1}]}`, expTokens: []any{ + Delim('{'), "obj", + decodeThis{[]any{ + map[string]any{"a": float64(1)}, + }}, + Delim('}')}}, + {json: ` [{"a": 1} {"a": 2}] `, expTokens: []any{ + Delim('['), + decodeThis{map[string]any{"a": float64(1)}}, + decodeThis{&SyntaxError{"expected comma after array element", 11}}, + }}, + {json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []any{ + Delim('{'), strings.Repeat("a", 513), + decodeThis{&SyntaxError{"expected colon after object key", 518}}, + }}, + {json: `{ "\a" }`, expTokens: []any{ + Delim('{'), + &SyntaxError{"invalid character 'a' in string escape code", 3}, + }}, + {json: ` \a`, expTokens: []any{ + &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1}, + }}, +} + +func TestDecodeInStream(t *testing.T) { + for ci, tcase := range tokenStreamCases { + + dec := NewDecoder(strings.NewReader(tcase.json)) + for i, etk := range tcase.expTokens { + + var tk any + var err error + + if dt, ok := etk.(decodeThis); ok { + etk = dt.v + err = dec.Decode(&tk) + } else { + tk, err = dec.Token() + } + if experr, ok := etk.(error); ok { + if err == nil || !reflect.DeepEqual(err, experr) { + t.Errorf("case %v: Expected error %#v in %q, but was %#v", ci, experr, tcase.json, err) + } + break + } else if err == io.EOF { + t.Errorf("case %v: Unexpected EOF in %q", ci, tcase.json) + break + } else if err != nil { + t.Errorf("case %v: Unexpected error '%#v' in %q", ci, err, tcase.json) + break + } + if !reflect.DeepEqual(tk, etk) { + t.Errorf(`case %v: %q @ %v expected %T(%v) was %T(%v)`, ci, tcase.json, i, etk, etk, tk, tk) + break + } + } + } +} + +// Test from golang.org/issue/11893 +func TestHTTPDecoding(t *testing.T) { + const raw = `{ "foo": "bar" }` + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(raw)) + })) + defer ts.Close() + res, err := http.Get(ts.URL) + if err != nil { + log.Fatalf("GET failed: %v", err) + } + defer res.Body.Close() + + foo := struct { + Foo string + }{} + + d := NewDecoder(res.Body) + err = d.Decode(&foo) + if err != nil { + t.Fatalf("Decode: %v", err) + } + if foo.Foo != "bar" { + t.Errorf("decoded %q; want \"bar\"", foo.Foo) + } + + // make sure we get the EOF the second time + err = d.Decode(&foo) + if err != io.EOF { + t.Errorf("err = %v; want io.EOF", err) + } +} diff --git a/src/encoding/json/tables.go b/src/encoding/json/tables.go new file mode 100644 index 0000000..10acdc1 --- /dev/null +++ b/src/encoding/json/tables.go @@ -0,0 +1,218 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML <script> tags, without any additional escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), the backslash character ("\"), HTML opening and closing +// tags ("<" and ">"), and the ampersand ("&"). +var htmlSafeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': false, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': false, + '=': true, + '>': false, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} diff --git a/src/encoding/json/tagkey_test.go b/src/encoding/json/tagkey_test.go new file mode 100644 index 0000000..6330efd --- /dev/null +++ b/src/encoding/json/tagkey_test.go @@ -0,0 +1,120 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "testing" +) + +type basicLatin2xTag struct { + V string `json:"$%-/"` +} + +type basicLatin3xTag struct { + V string `json:"0123456789"` +} + +type basicLatin4xTag struct { + V string `json:"ABCDEFGHIJKLMO"` +} + +type basicLatin5xTag struct { + V string `json:"PQRSTUVWXYZ_"` +} + +type basicLatin6xTag struct { + V string `json:"abcdefghijklmno"` +} + +type basicLatin7xTag struct { + V string `json:"pqrstuvwxyz"` +} + +type miscPlaneTag struct { + V string `json:"色は匂へど"` +} + +type percentSlashTag struct { + V string `json:"text/html%"` // https://golang.org/issue/2718 +} + +type punctuationTag struct { + V string `json:"!#$%&()*+-./:;<=>?@[]^_{|}~ "` // https://golang.org/issue/3546 +} + +type dashTag struct { + V string `json:"-,"` +} + +type emptyTag struct { + W string +} + +type misnamedTag struct { + X string `jsom:"Misnamed"` +} + +type badFormatTag struct { + Y string `:"BadFormat"` +} + +type badCodeTag struct { + Z string `json:" !\"#&'()*+,."` +} + +type spaceTag struct { + Q string `json:"With space"` +} + +type unicodeTag struct { + W string `json:"Ελλάδα"` +} + +var structTagObjectKeyTests = []struct { + raw any + value string + key string +}{ + {basicLatin2xTag{"2x"}, "2x", "$%-/"}, + {basicLatin3xTag{"3x"}, "3x", "0123456789"}, + {basicLatin4xTag{"4x"}, "4x", "ABCDEFGHIJKLMO"}, + {basicLatin5xTag{"5x"}, "5x", "PQRSTUVWXYZ_"}, + {basicLatin6xTag{"6x"}, "6x", "abcdefghijklmno"}, + {basicLatin7xTag{"7x"}, "7x", "pqrstuvwxyz"}, + {miscPlaneTag{"いろはにほへと"}, "いろはにほへと", "色は匂へど"}, + {dashTag{"foo"}, "foo", "-"}, + {emptyTag{"Pour Moi"}, "Pour Moi", "W"}, + {misnamedTag{"Animal Kingdom"}, "Animal Kingdom", "X"}, + {badFormatTag{"Orfevre"}, "Orfevre", "Y"}, + {badCodeTag{"Reliable Man"}, "Reliable Man", "Z"}, + {percentSlashTag{"brut"}, "brut", "text/html%"}, + {punctuationTag{"Union Rags"}, "Union Rags", "!#$%&()*+-./:;<=>?@[]^_{|}~ "}, + {spaceTag{"Perreddu"}, "Perreddu", "With space"}, + {unicodeTag{"Loukanikos"}, "Loukanikos", "Ελλάδα"}, +} + +func TestStructTagObjectKey(t *testing.T) { + for _, tt := range structTagObjectKeyTests { + b, err := Marshal(tt.raw) + if err != nil { + t.Fatalf("Marshal(%#q) failed: %v", tt.raw, err) + } + var f any + err = Unmarshal(b, &f) + if err != nil { + t.Fatalf("Unmarshal(%#q) failed: %v", b, err) + } + for i, v := range f.(map[string]any) { + switch i { + case tt.key: + if s, ok := v.(string); !ok || s != tt.value { + t.Fatalf("Unexpected value: %#q, want %v", s, tt.value) + } + default: + t.Fatalf("Unexpected key: %#q, from %#q", i, b) + } + } + } +} diff --git a/src/encoding/json/tags.go b/src/encoding/json/tags.go new file mode 100644 index 0000000..b490328 --- /dev/null +++ b/src/encoding/json/tags.go @@ -0,0 +1,38 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "strings" +) + +// tagOptions is the string following a comma in a struct field's "json" +// tag, or the empty string. It does not include the leading comma. +type tagOptions string + +// parseTag splits a struct field's json tag into its name and +// comma-separated options. +func parseTag(tag string) (string, tagOptions) { + tag, opt, _ := strings.Cut(tag, ",") + return tag, tagOptions(opt) +} + +// Contains reports whether a comma-separated list of options +// contains a particular substr flag. substr must be surrounded by a +// string boundary or commas. +func (o tagOptions) Contains(optionName string) bool { + if len(o) == 0 { + return false + } + s := string(o) + for s != "" { + var name string + name, s, _ = strings.Cut(s, ",") + if name == optionName { + return true + } + } + return false +} diff --git a/src/encoding/json/tags_test.go b/src/encoding/json/tags_test.go new file mode 100644 index 0000000..8ba8ddd --- /dev/null +++ b/src/encoding/json/tags_test.go @@ -0,0 +1,28 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "testing" +) + +func TestTagParsing(t *testing.T) { + name, opts := parseTag("field,foobar,foo") + if name != "field" { + t.Fatalf("name = %q, want field", name) + } + for _, tt := range []struct { + opt string + want bool + }{ + {"foobar", true}, + {"foo", true}, + {"bar", false}, + } { + if opts.Contains(tt.opt) != tt.want { + t.Errorf("Contains(%q) = %v", tt.opt, !tt.want) + } + } +} diff --git a/src/encoding/json/testdata/code.json.gz b/src/encoding/json/testdata/code.json.gz Binary files differnew file mode 100644 index 0000000..1572a92 --- /dev/null +++ b/src/encoding/json/testdata/code.json.gz |