diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
commit | f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch) | |
tree | 7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/internal/fuzz/encoding.go | |
parent | Initial commit. (diff) | |
download | golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip |
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/internal/fuzz/encoding.go')
-rw-r--r-- | src/internal/fuzz/encoding.go | 361 |
1 files changed, 361 insertions, 0 deletions
diff --git a/src/internal/fuzz/encoding.go b/src/internal/fuzz/encoding.go new file mode 100644 index 0000000..270ef7a --- /dev/null +++ b/src/internal/fuzz/encoding.go @@ -0,0 +1,361 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "bytes" + "fmt" + "go/ast" + "go/parser" + "go/token" + "math" + "strconv" + "strings" + "unicode/utf8" +) + +// encVersion1 will be the first line of a file with version 1 encoding. +var encVersion1 = "go test fuzz v1" + +// marshalCorpusFile encodes an arbitrary number of arguments into the file format for the +// corpus. +func marshalCorpusFile(vals ...any) []byte { + if len(vals) == 0 { + panic("must have at least one value to marshal") + } + b := bytes.NewBuffer([]byte(encVersion1 + "\n")) + // TODO(katiehockman): keep uint8 and int32 encoding where applicable, + // instead of changing to byte and rune respectively. + for _, val := range vals { + switch t := val.(type) { + case int, int8, int16, int64, uint, uint16, uint32, uint64, bool: + fmt.Fprintf(b, "%T(%v)\n", t, t) + case float32: + if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) { + // We encode unusual NaNs as hex values, because that is how users are + // likely to encounter them in literature about floating-point encoding. + // This allows us to reproduce fuzz failures that depend on the specific + // NaN representation (for float32 there are about 2^24 possibilities!), + // not just the fact that the value is *a* NaN. + // + // Note that the specific value of float32(math.NaN()) can vary based on + // whether the architecture represents signaling NaNs using a low bit + // (as is common) or a high bit (as commonly implemented on MIPS + // hardware before around 2012). We believe that the increase in clarity + // from identifying "NaN" with math.NaN() is worth the slight ambiguity + // from a platform-dependent value. + fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t)) + } else { + // We encode all other values — including the NaN value that is + // bitwise-identical to float32(math.Nan()) — using the default + // formatting, which is equivalent to strconv.FormatFloat with format + // 'g' and can be parsed by strconv.ParseFloat. + // + // For an ordinary floating-point number this format includes + // sufficiently many digits to reconstruct the exact value. For positive + // or negative infinity it is the string "+Inf" or "-Inf". For positive + // or negative zero it is "0" or "-0". For NaN, it is the string "NaN". + fmt.Fprintf(b, "%T(%v)\n", t, t) + } + case float64: + if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) { + fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t)) + } else { + fmt.Fprintf(b, "%T(%v)\n", t, t) + } + case string: + fmt.Fprintf(b, "string(%q)\n", t) + case rune: // int32 + // Although rune and int32 are represented by the same type, only a subset + // of valid int32 values can be expressed as rune literals. Notably, + // negative numbers, surrogate halves, and values above unicode.MaxRune + // have no quoted representation. + // + // fmt with "%q" (and the corresponding functions in the strconv package) + // would quote out-of-range values to the Unicode replacement character + // instead of the original value (see https://go.dev/issue/51526), so + // they must be treated as int32 instead. + // + // We arbitrarily draw the line at UTF-8 validity, which biases toward the + // "rune" interpretation. (However, we accept either format as input.) + if utf8.ValidRune(t) { + fmt.Fprintf(b, "rune(%q)\n", t) + } else { + fmt.Fprintf(b, "int32(%v)\n", t) + } + case byte: // uint8 + // For bytes, we arbitrarily prefer the character interpretation. + // (Every byte has a valid character encoding.) + fmt.Fprintf(b, "byte(%q)\n", t) + case []byte: // []uint8 + fmt.Fprintf(b, "[]byte(%q)\n", t) + default: + panic(fmt.Sprintf("unsupported type: %T", t)) + } + } + return b.Bytes() +} + +// unmarshalCorpusFile decodes corpus bytes into their respective values. +func unmarshalCorpusFile(b []byte) ([]any, error) { + if len(b) == 0 { + return nil, fmt.Errorf("cannot unmarshal empty string") + } + lines := bytes.Split(b, []byte("\n")) + if len(lines) < 2 { + return nil, fmt.Errorf("must include version and at least one value") + } + version := strings.TrimSuffix(string(lines[0]), "\r") + if version != encVersion1 { + return nil, fmt.Errorf("unknown encoding version: %s", version) + } + var vals []any + for _, line := range lines[1:] { + line = bytes.TrimSpace(line) + if len(line) == 0 { + continue + } + v, err := parseCorpusValue(line) + if err != nil { + return nil, fmt.Errorf("malformed line %q: %v", line, err) + } + vals = append(vals, v) + } + return vals, nil +} + +func parseCorpusValue(line []byte) (any, error) { + fs := token.NewFileSet() + expr, err := parser.ParseExprFrom(fs, "(test)", line, 0) + if err != nil { + return nil, err + } + call, ok := expr.(*ast.CallExpr) + if !ok { + return nil, fmt.Errorf("expected call expression") + } + if len(call.Args) != 1 { + return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args)) + } + arg := call.Args[0] + + if arrayType, ok := call.Fun.(*ast.ArrayType); ok { + if arrayType.Len != nil { + return nil, fmt.Errorf("expected []byte or primitive type") + } + elt, ok := arrayType.Elt.(*ast.Ident) + if !ok || elt.Name != "byte" { + return nil, fmt.Errorf("expected []byte") + } + lit, ok := arg.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return nil, fmt.Errorf("string literal required for type []byte") + } + s, err := strconv.Unquote(lit.Value) + if err != nil { + return nil, err + } + return []byte(s), nil + } + + var idType *ast.Ident + if selector, ok := call.Fun.(*ast.SelectorExpr); ok { + xIdent, ok := selector.X.(*ast.Ident) + if !ok || xIdent.Name != "math" { + return nil, fmt.Errorf("invalid selector type") + } + switch selector.Sel.Name { + case "Float64frombits": + idType = &ast.Ident{Name: "float64-bits"} + case "Float32frombits": + idType = &ast.Ident{Name: "float32-bits"} + default: + return nil, fmt.Errorf("invalid selector type") + } + } else { + idType, ok = call.Fun.(*ast.Ident) + if !ok { + return nil, fmt.Errorf("expected []byte or primitive type") + } + if idType.Name == "bool" { + id, ok := arg.(*ast.Ident) + if !ok { + return nil, fmt.Errorf("malformed bool") + } + if id.Name == "true" { + return true, nil + } else if id.Name == "false" { + return false, nil + } else { + return nil, fmt.Errorf("true or false required for type bool") + } + } + } + + var ( + val string + kind token.Token + ) + if op, ok := arg.(*ast.UnaryExpr); ok { + switch lit := op.X.(type) { + case *ast.BasicLit: + if op.Op != token.SUB { + return nil, fmt.Errorf("unsupported operation on int/float: %v", op.Op) + } + // Special case for negative numbers. + val = op.Op.String() + lit.Value // e.g. "-" + "124" + kind = lit.Kind + case *ast.Ident: + if lit.Name != "Inf" { + return nil, fmt.Errorf("expected operation on int or float type") + } + if op.Op == token.SUB { + val = "-Inf" + } else { + val = "+Inf" + } + kind = token.FLOAT + default: + return nil, fmt.Errorf("expected operation on int or float type") + } + } else { + switch lit := arg.(type) { + case *ast.BasicLit: + val, kind = lit.Value, lit.Kind + case *ast.Ident: + if lit.Name != "NaN" { + return nil, fmt.Errorf("literal value required for primitive type") + } + val, kind = "NaN", token.FLOAT + default: + return nil, fmt.Errorf("literal value required for primitive type") + } + } + + switch typ := idType.Name; typ { + case "string": + if kind != token.STRING { + return nil, fmt.Errorf("string literal value required for type string") + } + return strconv.Unquote(val) + case "byte", "rune": + if kind == token.INT { + switch typ { + case "rune": + return parseInt(val, typ) + case "byte": + return parseUint(val, typ) + } + } + if kind != token.CHAR { + return nil, fmt.Errorf("character literal required for byte/rune types") + } + n := len(val) + if n < 2 { + return nil, fmt.Errorf("malformed character literal, missing single quotes") + } + code, _, _, err := strconv.UnquoteChar(val[1:n-1], '\'') + if err != nil { + return nil, err + } + if typ == "rune" { + return code, nil + } + if code >= 256 { + return nil, fmt.Errorf("can only encode single byte to a byte type") + } + return byte(code), nil + case "int", "int8", "int16", "int32", "int64": + if kind != token.INT { + return nil, fmt.Errorf("integer literal required for int types") + } + return parseInt(val, typ) + case "uint", "uint8", "uint16", "uint32", "uint64": + if kind != token.INT { + return nil, fmt.Errorf("integer literal required for uint types") + } + return parseUint(val, typ) + case "float32": + if kind != token.FLOAT && kind != token.INT { + return nil, fmt.Errorf("float or integer literal required for float32 type") + } + v, err := strconv.ParseFloat(val, 32) + return float32(v), err + case "float64": + if kind != token.FLOAT && kind != token.INT { + return nil, fmt.Errorf("float or integer literal required for float64 type") + } + return strconv.ParseFloat(val, 64) + case "float32-bits": + if kind != token.INT { + return nil, fmt.Errorf("integer literal required for math.Float32frombits type") + } + bits, err := parseUint(val, "uint32") + if err != nil { + return nil, err + } + return math.Float32frombits(bits.(uint32)), nil + case "float64-bits": + if kind != token.FLOAT && kind != token.INT { + return nil, fmt.Errorf("integer literal required for math.Float64frombits type") + } + bits, err := parseUint(val, "uint64") + if err != nil { + return nil, err + } + return math.Float64frombits(bits.(uint64)), nil + default: + return nil, fmt.Errorf("expected []byte or primitive type") + } +} + +// parseInt returns an integer of value val and type typ. +func parseInt(val, typ string) (any, error) { + switch typ { + case "int": + // The int type may be either 32 or 64 bits. If 32, the fuzz tests in the + // corpus may include 64-bit values produced by fuzzing runs on 64-bit + // architectures. When running those tests, we implicitly wrap the values to + // fit in a regular int. (The test case is still “interesting”, even if the + // specific values of its inputs are platform-dependent.) + i, err := strconv.ParseInt(val, 0, 64) + return int(i), err + case "int8": + i, err := strconv.ParseInt(val, 0, 8) + return int8(i), err + case "int16": + i, err := strconv.ParseInt(val, 0, 16) + return int16(i), err + case "int32", "rune": + i, err := strconv.ParseInt(val, 0, 32) + return int32(i), err + case "int64": + return strconv.ParseInt(val, 0, 64) + default: + panic("unreachable") + } +} + +// parseUint returns an unsigned integer of value val and type typ. +func parseUint(val, typ string) (any, error) { + switch typ { + case "uint": + i, err := strconv.ParseUint(val, 0, 64) + return uint(i), err + case "uint8", "byte": + i, err := strconv.ParseUint(val, 0, 8) + return uint8(i), err + case "uint16": + i, err := strconv.ParseUint(val, 0, 16) + return uint16(i), err + case "uint32": + i, err := strconv.ParseUint(val, 0, 32) + return uint32(i), err + case "uint64": + return strconv.ParseUint(val, 0, 64) + default: + panic("unreachable") + } +} |