diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:25:22 +0000 |
commit | f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch) | |
tree | 7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/internal/fuzz | |
parent | Initial commit. (diff) | |
download | golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip |
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/internal/fuzz')
-rw-r--r-- | src/internal/fuzz/counters_supported.go | 21 | ||||
-rw-r--r-- | src/internal/fuzz/counters_unsupported.go | 24 | ||||
-rw-r--r-- | src/internal/fuzz/coverage.go | 107 | ||||
-rw-r--r-- | src/internal/fuzz/encoding.go | 361 | ||||
-rw-r--r-- | src/internal/fuzz/encoding_test.go | 409 | ||||
-rw-r--r-- | src/internal/fuzz/fuzz.go | 1102 | ||||
-rw-r--r-- | src/internal/fuzz/mem.go | 138 | ||||
-rw-r--r-- | src/internal/fuzz/minimize.go | 95 | ||||
-rw-r--r-- | src/internal/fuzz/minimize_test.go | 182 | ||||
-rw-r--r-- | src/internal/fuzz/mutator.go | 293 | ||||
-rw-r--r-- | src/internal/fuzz/mutator_test.go | 117 | ||||
-rw-r--r-- | src/internal/fuzz/mutators_byteslice.go | 313 | ||||
-rw-r--r-- | src/internal/fuzz/mutators_byteslice_test.go | 186 | ||||
-rw-r--r-- | src/internal/fuzz/pcg.go | 145 | ||||
-rw-r--r-- | src/internal/fuzz/queue.go | 71 | ||||
-rw-r--r-- | src/internal/fuzz/queue_test.go | 58 | ||||
-rw-r--r-- | src/internal/fuzz/sys_posix.go | 130 | ||||
-rw-r--r-- | src/internal/fuzz/sys_unimplemented.go | 44 | ||||
-rw-r--r-- | src/internal/fuzz/sys_windows.go | 144 | ||||
-rw-r--r-- | src/internal/fuzz/trace.go | 35 | ||||
-rw-r--r-- | src/internal/fuzz/worker.go | 1195 | ||||
-rw-r--r-- | src/internal/fuzz/worker_test.go | 206 |
22 files changed, 5376 insertions, 0 deletions
diff --git a/src/internal/fuzz/counters_supported.go b/src/internal/fuzz/counters_supported.go new file mode 100644 index 0000000..79e27d2 --- /dev/null +++ b/src/internal/fuzz/counters_supported.go @@ -0,0 +1,21 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build (darwin || linux || windows || freebsd) && (amd64 || arm64) + +package fuzz + +import ( + "unsafe" +) + +// coverage returns a []byte containing unique 8-bit counters for each edge of +// the instrumented source code. This coverage data will only be generated if +// `-d=libfuzzer` is set at build time. This can be used to understand the code +// coverage of a test execution. +func coverage() []byte { + addr := unsafe.Pointer(&_counters) + size := uintptr(unsafe.Pointer(&_ecounters)) - uintptr(addr) + return unsafe.Slice((*byte)(addr), int(size)) +} diff --git a/src/internal/fuzz/counters_unsupported.go b/src/internal/fuzz/counters_unsupported.go new file mode 100644 index 0000000..287bb4b --- /dev/null +++ b/src/internal/fuzz/counters_unsupported.go @@ -0,0 +1,24 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO: expand the set of supported platforms, with testing. Nothing about +// the instrumentation is OS specific, but only amd64 and arm64 are +// supported in the runtime. See src/runtime/libfuzzer*. +// +// If you update this constraint, also update internal/platform.FuzzInstrumented. +// +//go:build !((darwin || linux || windows || freebsd) && (amd64 || arm64)) + +package fuzz + +// TODO(#48504): re-enable on platforms where instrumentation works. +// In theory, we shouldn't need this file at all: if the binary was built +// without coverage, then _counters and _ecounters should have the same address. +// However, this caused an init failure on aix/ppc64, so it's disabled here. + +// coverage returns a []byte containing unique 8-bit counters for each edge of +// the instrumented source code. This coverage data will only be generated if +// `-d=libfuzzer` is set at build time. This can be used to understand the code +// coverage of a test execution. +func coverage() []byte { return nil } diff --git a/src/internal/fuzz/coverage.go b/src/internal/fuzz/coverage.go new file mode 100644 index 0000000..0c5e17e --- /dev/null +++ b/src/internal/fuzz/coverage.go @@ -0,0 +1,107 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "fmt" + "math/bits" +) + +// ResetCoverage sets all of the counters for each edge of the instrumented +// source code to 0. +func ResetCoverage() { + cov := coverage() + for i := range cov { + cov[i] = 0 + } +} + +// SnapshotCoverage copies the current counter values into coverageSnapshot, +// preserving them for later inspection. SnapshotCoverage also rounds each +// counter down to the nearest power of two. This lets the coordinator store +// multiple values for each counter by OR'ing them together. +func SnapshotCoverage() { + cov := coverage() + for i, b := range cov { + b |= b >> 1 + b |= b >> 2 + b |= b >> 4 + b -= b >> 1 + coverageSnapshot[i] = b + } +} + +// diffCoverage returns a set of bits set in snapshot but not in base. +// If there are no new bits set, diffCoverage returns nil. +func diffCoverage(base, snapshot []byte) []byte { + if len(base) != len(snapshot) { + panic(fmt.Sprintf("the number of coverage bits changed: before=%d, after=%d", len(base), len(snapshot))) + } + found := false + for i := range snapshot { + if snapshot[i]&^base[i] != 0 { + found = true + break + } + } + if !found { + return nil + } + diff := make([]byte, len(snapshot)) + for i := range diff { + diff[i] = snapshot[i] &^ base[i] + } + return diff +} + +// countNewCoverageBits returns the number of bits set in snapshot that are not +// set in base. +func countNewCoverageBits(base, snapshot []byte) int { + n := 0 + for i := range snapshot { + n += bits.OnesCount8(snapshot[i] &^ base[i]) + } + return n +} + +// isCoverageSubset returns true if all the base coverage bits are set in +// snapshot. +func isCoverageSubset(base, snapshot []byte) bool { + for i, v := range base { + if v&snapshot[i] != v { + return false + } + } + return true +} + +// hasCoverageBit returns true if snapshot has at least one bit set that is +// also set in base. +func hasCoverageBit(base, snapshot []byte) bool { + for i := range snapshot { + if snapshot[i]&base[i] != 0 { + return true + } + } + return false +} + +func countBits(cov []byte) int { + n := 0 + for _, c := range cov { + n += bits.OnesCount8(c) + } + return n +} + +var ( + coverageEnabled = len(coverage()) > 0 + coverageSnapshot = make([]byte, len(coverage())) + + // _counters and _ecounters mark the start and end, respectively, of where + // the 8-bit coverage counters reside in memory. They're known to cmd/link, + // which specially assigns their addresses for this purpose. + _counters, _ecounters [0]byte +) diff --git a/src/internal/fuzz/encoding.go b/src/internal/fuzz/encoding.go new file mode 100644 index 0000000..270ef7a --- /dev/null +++ b/src/internal/fuzz/encoding.go @@ -0,0 +1,361 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "bytes" + "fmt" + "go/ast" + "go/parser" + "go/token" + "math" + "strconv" + "strings" + "unicode/utf8" +) + +// encVersion1 will be the first line of a file with version 1 encoding. +var encVersion1 = "go test fuzz v1" + +// marshalCorpusFile encodes an arbitrary number of arguments into the file format for the +// corpus. +func marshalCorpusFile(vals ...any) []byte { + if len(vals) == 0 { + panic("must have at least one value to marshal") + } + b := bytes.NewBuffer([]byte(encVersion1 + "\n")) + // TODO(katiehockman): keep uint8 and int32 encoding where applicable, + // instead of changing to byte and rune respectively. + for _, val := range vals { + switch t := val.(type) { + case int, int8, int16, int64, uint, uint16, uint32, uint64, bool: + fmt.Fprintf(b, "%T(%v)\n", t, t) + case float32: + if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) { + // We encode unusual NaNs as hex values, because that is how users are + // likely to encounter them in literature about floating-point encoding. + // This allows us to reproduce fuzz failures that depend on the specific + // NaN representation (for float32 there are about 2^24 possibilities!), + // not just the fact that the value is *a* NaN. + // + // Note that the specific value of float32(math.NaN()) can vary based on + // whether the architecture represents signaling NaNs using a low bit + // (as is common) or a high bit (as commonly implemented on MIPS + // hardware before around 2012). We believe that the increase in clarity + // from identifying "NaN" with math.NaN() is worth the slight ambiguity + // from a platform-dependent value. + fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t)) + } else { + // We encode all other values — including the NaN value that is + // bitwise-identical to float32(math.Nan()) — using the default + // formatting, which is equivalent to strconv.FormatFloat with format + // 'g' and can be parsed by strconv.ParseFloat. + // + // For an ordinary floating-point number this format includes + // sufficiently many digits to reconstruct the exact value. For positive + // or negative infinity it is the string "+Inf" or "-Inf". For positive + // or negative zero it is "0" or "-0". For NaN, it is the string "NaN". + fmt.Fprintf(b, "%T(%v)\n", t, t) + } + case float64: + if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) { + fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t)) + } else { + fmt.Fprintf(b, "%T(%v)\n", t, t) + } + case string: + fmt.Fprintf(b, "string(%q)\n", t) + case rune: // int32 + // Although rune and int32 are represented by the same type, only a subset + // of valid int32 values can be expressed as rune literals. Notably, + // negative numbers, surrogate halves, and values above unicode.MaxRune + // have no quoted representation. + // + // fmt with "%q" (and the corresponding functions in the strconv package) + // would quote out-of-range values to the Unicode replacement character + // instead of the original value (see https://go.dev/issue/51526), so + // they must be treated as int32 instead. + // + // We arbitrarily draw the line at UTF-8 validity, which biases toward the + // "rune" interpretation. (However, we accept either format as input.) + if utf8.ValidRune(t) { + fmt.Fprintf(b, "rune(%q)\n", t) + } else { + fmt.Fprintf(b, "int32(%v)\n", t) + } + case byte: // uint8 + // For bytes, we arbitrarily prefer the character interpretation. + // (Every byte has a valid character encoding.) + fmt.Fprintf(b, "byte(%q)\n", t) + case []byte: // []uint8 + fmt.Fprintf(b, "[]byte(%q)\n", t) + default: + panic(fmt.Sprintf("unsupported type: %T", t)) + } + } + return b.Bytes() +} + +// unmarshalCorpusFile decodes corpus bytes into their respective values. +func unmarshalCorpusFile(b []byte) ([]any, error) { + if len(b) == 0 { + return nil, fmt.Errorf("cannot unmarshal empty string") + } + lines := bytes.Split(b, []byte("\n")) + if len(lines) < 2 { + return nil, fmt.Errorf("must include version and at least one value") + } + version := strings.TrimSuffix(string(lines[0]), "\r") + if version != encVersion1 { + return nil, fmt.Errorf("unknown encoding version: %s", version) + } + var vals []any + for _, line := range lines[1:] { + line = bytes.TrimSpace(line) + if len(line) == 0 { + continue + } + v, err := parseCorpusValue(line) + if err != nil { + return nil, fmt.Errorf("malformed line %q: %v", line, err) + } + vals = append(vals, v) + } + return vals, nil +} + +func parseCorpusValue(line []byte) (any, error) { + fs := token.NewFileSet() + expr, err := parser.ParseExprFrom(fs, "(test)", line, 0) + if err != nil { + return nil, err + } + call, ok := expr.(*ast.CallExpr) + if !ok { + return nil, fmt.Errorf("expected call expression") + } + if len(call.Args) != 1 { + return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args)) + } + arg := call.Args[0] + + if arrayType, ok := call.Fun.(*ast.ArrayType); ok { + if arrayType.Len != nil { + return nil, fmt.Errorf("expected []byte or primitive type") + } + elt, ok := arrayType.Elt.(*ast.Ident) + if !ok || elt.Name != "byte" { + return nil, fmt.Errorf("expected []byte") + } + lit, ok := arg.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return nil, fmt.Errorf("string literal required for type []byte") + } + s, err := strconv.Unquote(lit.Value) + if err != nil { + return nil, err + } + return []byte(s), nil + } + + var idType *ast.Ident + if selector, ok := call.Fun.(*ast.SelectorExpr); ok { + xIdent, ok := selector.X.(*ast.Ident) + if !ok || xIdent.Name != "math" { + return nil, fmt.Errorf("invalid selector type") + } + switch selector.Sel.Name { + case "Float64frombits": + idType = &ast.Ident{Name: "float64-bits"} + case "Float32frombits": + idType = &ast.Ident{Name: "float32-bits"} + default: + return nil, fmt.Errorf("invalid selector type") + } + } else { + idType, ok = call.Fun.(*ast.Ident) + if !ok { + return nil, fmt.Errorf("expected []byte or primitive type") + } + if idType.Name == "bool" { + id, ok := arg.(*ast.Ident) + if !ok { + return nil, fmt.Errorf("malformed bool") + } + if id.Name == "true" { + return true, nil + } else if id.Name == "false" { + return false, nil + } else { + return nil, fmt.Errorf("true or false required for type bool") + } + } + } + + var ( + val string + kind token.Token + ) + if op, ok := arg.(*ast.UnaryExpr); ok { + switch lit := op.X.(type) { + case *ast.BasicLit: + if op.Op != token.SUB { + return nil, fmt.Errorf("unsupported operation on int/float: %v", op.Op) + } + // Special case for negative numbers. + val = op.Op.String() + lit.Value // e.g. "-" + "124" + kind = lit.Kind + case *ast.Ident: + if lit.Name != "Inf" { + return nil, fmt.Errorf("expected operation on int or float type") + } + if op.Op == token.SUB { + val = "-Inf" + } else { + val = "+Inf" + } + kind = token.FLOAT + default: + return nil, fmt.Errorf("expected operation on int or float type") + } + } else { + switch lit := arg.(type) { + case *ast.BasicLit: + val, kind = lit.Value, lit.Kind + case *ast.Ident: + if lit.Name != "NaN" { + return nil, fmt.Errorf("literal value required for primitive type") + } + val, kind = "NaN", token.FLOAT + default: + return nil, fmt.Errorf("literal value required for primitive type") + } + } + + switch typ := idType.Name; typ { + case "string": + if kind != token.STRING { + return nil, fmt.Errorf("string literal value required for type string") + } + return strconv.Unquote(val) + case "byte", "rune": + if kind == token.INT { + switch typ { + case "rune": + return parseInt(val, typ) + case "byte": + return parseUint(val, typ) + } + } + if kind != token.CHAR { + return nil, fmt.Errorf("character literal required for byte/rune types") + } + n := len(val) + if n < 2 { + return nil, fmt.Errorf("malformed character literal, missing single quotes") + } + code, _, _, err := strconv.UnquoteChar(val[1:n-1], '\'') + if err != nil { + return nil, err + } + if typ == "rune" { + return code, nil + } + if code >= 256 { + return nil, fmt.Errorf("can only encode single byte to a byte type") + } + return byte(code), nil + case "int", "int8", "int16", "int32", "int64": + if kind != token.INT { + return nil, fmt.Errorf("integer literal required for int types") + } + return parseInt(val, typ) + case "uint", "uint8", "uint16", "uint32", "uint64": + if kind != token.INT { + return nil, fmt.Errorf("integer literal required for uint types") + } + return parseUint(val, typ) + case "float32": + if kind != token.FLOAT && kind != token.INT { + return nil, fmt.Errorf("float or integer literal required for float32 type") + } + v, err := strconv.ParseFloat(val, 32) + return float32(v), err + case "float64": + if kind != token.FLOAT && kind != token.INT { + return nil, fmt.Errorf("float or integer literal required for float64 type") + } + return strconv.ParseFloat(val, 64) + case "float32-bits": + if kind != token.INT { + return nil, fmt.Errorf("integer literal required for math.Float32frombits type") + } + bits, err := parseUint(val, "uint32") + if err != nil { + return nil, err + } + return math.Float32frombits(bits.(uint32)), nil + case "float64-bits": + if kind != token.FLOAT && kind != token.INT { + return nil, fmt.Errorf("integer literal required for math.Float64frombits type") + } + bits, err := parseUint(val, "uint64") + if err != nil { + return nil, err + } + return math.Float64frombits(bits.(uint64)), nil + default: + return nil, fmt.Errorf("expected []byte or primitive type") + } +} + +// parseInt returns an integer of value val and type typ. +func parseInt(val, typ string) (any, error) { + switch typ { + case "int": + // The int type may be either 32 or 64 bits. If 32, the fuzz tests in the + // corpus may include 64-bit values produced by fuzzing runs on 64-bit + // architectures. When running those tests, we implicitly wrap the values to + // fit in a regular int. (The test case is still “interesting”, even if the + // specific values of its inputs are platform-dependent.) + i, err := strconv.ParseInt(val, 0, 64) + return int(i), err + case "int8": + i, err := strconv.ParseInt(val, 0, 8) + return int8(i), err + case "int16": + i, err := strconv.ParseInt(val, 0, 16) + return int16(i), err + case "int32", "rune": + i, err := strconv.ParseInt(val, 0, 32) + return int32(i), err + case "int64": + return strconv.ParseInt(val, 0, 64) + default: + panic("unreachable") + } +} + +// parseUint returns an unsigned integer of value val and type typ. +func parseUint(val, typ string) (any, error) { + switch typ { + case "uint": + i, err := strconv.ParseUint(val, 0, 64) + return uint(i), err + case "uint8", "byte": + i, err := strconv.ParseUint(val, 0, 8) + return uint8(i), err + case "uint16": + i, err := strconv.ParseUint(val, 0, 16) + return uint16(i), err + case "uint32": + i, err := strconv.ParseUint(val, 0, 32) + return uint32(i), err + case "uint64": + return strconv.ParseUint(val, 0, 64) + default: + panic("unreachable") + } +} diff --git a/src/internal/fuzz/encoding_test.go b/src/internal/fuzz/encoding_test.go new file mode 100644 index 0000000..6f6173d --- /dev/null +++ b/src/internal/fuzz/encoding_test.go @@ -0,0 +1,409 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "math" + "strconv" + "testing" + "unicode" +) + +func TestUnmarshalMarshal(t *testing.T) { + var tests = []struct { + desc string + in string + reject bool + want string // if different from in + }{ + { + desc: "missing version", + in: "int(1234)", + reject: true, + }, + { + desc: "malformed string", + in: `go test fuzz v1 +string("a"bcad")`, + reject: true, + }, + { + desc: "empty value", + in: `go test fuzz v1 +int()`, + reject: true, + }, + { + desc: "negative uint", + in: `go test fuzz v1 +uint(-32)`, + reject: true, + }, + { + desc: "int8 too large", + in: `go test fuzz v1 +int8(1234456)`, + reject: true, + }, + { + desc: "multiplication in int value", + in: `go test fuzz v1 +int(20*5)`, + reject: true, + }, + { + desc: "double negation", + in: `go test fuzz v1 +int(--5)`, + reject: true, + }, + { + desc: "malformed bool", + in: `go test fuzz v1 +bool(0)`, + reject: true, + }, + { + desc: "malformed byte", + in: `go test fuzz v1 +byte('aa)`, + reject: true, + }, + { + desc: "byte out of range", + in: `go test fuzz v1 +byte('☃')`, + reject: true, + }, + { + desc: "extra newline", + in: `go test fuzz v1 +string("has extra newline") +`, + want: `go test fuzz v1 +string("has extra newline")`, + }, + { + desc: "trailing spaces", + in: `go test fuzz v1 +string("extra") +[]byte("spacing") + `, + want: `go test fuzz v1 +string("extra") +[]byte("spacing")`, + }, + { + desc: "float types", + in: `go test fuzz v1 +float64(0) +float32(0)`, + }, + { + desc: "various types", + in: `go test fuzz v1 +int(-23) +int8(-2) +int64(2342425) +uint(1) +uint16(234) +uint32(352342) +uint64(123) +rune('œ') +byte('K') +byte('ÿ') +[]byte("hello¿") +[]byte("a") +bool(true) +string("hello\\xbd\\xb2=\\xbc ⌘") +float64(-12.5) +float32(2.5)`, + }, + { + desc: "float edge cases", + // The two IEEE 754 bit patterns used for the math.Float{64,32}frombits + // encodings are non-math.NAN quiet-NaN values. Since they are not equal + // to math.NaN(), they should be re-encoded to their bit patterns. They + // are, respectively: + // * math.Float64bits(math.NaN())+1 + // * math.Float32bits(float32(math.NaN()))+1 + in: `go test fuzz v1 +float32(-0) +float64(-0) +float32(+Inf) +float32(-Inf) +float32(NaN) +float64(+Inf) +float64(-Inf) +float64(NaN) +math.Float64frombits(0x7ff8000000000002) +math.Float32frombits(0x7fc00001)`, + }, + { + desc: "int variations", + // Although we arbitrarily choose default integer bases (0 or 16), we may + // want to change those arbitrary choices in the future and should not + // break the parser. Verify that integers in the opposite bases still + // parse correctly. + in: `go test fuzz v1 +int(0x0) +int32(0x41) +int64(0xfffffffff) +uint32(0xcafef00d) +uint64(0xffffffffffffffff) +uint8(0b0000000) +byte(0x0) +byte('\000') +byte('\u0000') +byte('\'') +math.Float64frombits(9221120237041090562) +math.Float32frombits(2143289345)`, + want: `go test fuzz v1 +int(0) +rune('A') +int64(68719476735) +uint32(3405705229) +uint64(18446744073709551615) +byte('\x00') +byte('\x00') +byte('\x00') +byte('\x00') +byte('\'') +math.Float64frombits(0x7ff8000000000002) +math.Float32frombits(0x7fc00001)`, + }, + { + desc: "rune validation", + in: `go test fuzz v1 +rune(0) +rune(0x41) +rune(-1) +rune(0xfffd) +rune(0xd800) +rune(0x10ffff) +rune(0x110000) +`, + want: `go test fuzz v1 +rune('\x00') +rune('A') +int32(-1) +rune('�') +int32(55296) +rune('\U0010ffff') +int32(1114112)`, + }, + { + desc: "int overflow", + in: `go test fuzz v1 +int(0x7fffffffffffffff) +uint(0xffffffffffffffff)`, + want: func() string { + switch strconv.IntSize { + case 32: + return `go test fuzz v1 +int(-1) +uint(4294967295)` + case 64: + return `go test fuzz v1 +int(9223372036854775807) +uint(18446744073709551615)` + default: + panic("unreachable") + } + }(), + }, + { + desc: "windows new line", + in: "go test fuzz v1\r\nint(0)\r\n", + want: "go test fuzz v1\nint(0)", + }, + } + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + vals, err := unmarshalCorpusFile([]byte(test.in)) + if test.reject { + if err == nil { + t.Fatalf("unmarshal unexpected success") + } + return + } + if err != nil { + t.Fatalf("unmarshal unexpected error: %v", err) + } + newB := marshalCorpusFile(vals...) + if err != nil { + t.Fatalf("marshal unexpected error: %v", err) + } + if newB[len(newB)-1] != '\n' { + t.Error("didn't write final newline to corpus file") + } + + want := test.want + if want == "" { + want = test.in + } + want += "\n" + got := string(newB) + if got != want { + t.Errorf("unexpected marshaled value\ngot:\n%s\nwant:\n%s", got, want) + } + }) + } +} + +// BenchmarkMarshalCorpusFile measures the time it takes to serialize byte +// slices of various sizes to a corpus file. The slice contains a repeating +// sequence of bytes 0-255 to mix escaped and non-escaped characters. +func BenchmarkMarshalCorpusFile(b *testing.B) { + buf := make([]byte, 1024*1024) + for i := 0; i < len(buf); i++ { + buf[i] = byte(i) + } + + for sz := 1; sz <= len(buf); sz <<= 1 { + sz := sz + b.Run(strconv.Itoa(sz), func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.SetBytes(int64(sz)) + marshalCorpusFile(buf[:sz]) + } + }) + } +} + +// BenchmarkUnmarshalCorpusfile measures the time it takes to deserialize +// files encoding byte slices of various sizes. The slice contains a repeating +// sequence of bytes 0-255 to mix escaped and non-escaped characters. +func BenchmarkUnmarshalCorpusFile(b *testing.B) { + buf := make([]byte, 1024*1024) + for i := 0; i < len(buf); i++ { + buf[i] = byte(i) + } + + for sz := 1; sz <= len(buf); sz <<= 1 { + sz := sz + data := marshalCorpusFile(buf[:sz]) + b.Run(strconv.Itoa(sz), func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.SetBytes(int64(sz)) + unmarshalCorpusFile(data) + } + }) + } +} + +func TestByteRoundTrip(t *testing.T) { + for x := 0; x < 256; x++ { + b1 := byte(x) + buf := marshalCorpusFile(b1) + vs, err := unmarshalCorpusFile(buf) + if err != nil { + t.Fatal(err) + } + b2 := vs[0].(byte) + if b2 != b1 { + t.Fatalf("unmarshaled %v, want %v:\n%s", b2, b1, buf) + } + } +} + +func TestInt8RoundTrip(t *testing.T) { + for x := -128; x < 128; x++ { + i1 := int8(x) + buf := marshalCorpusFile(i1) + vs, err := unmarshalCorpusFile(buf) + if err != nil { + t.Fatal(err) + } + i2 := vs[0].(int8) + if i2 != i1 { + t.Fatalf("unmarshaled %v, want %v:\n%s", i2, i1, buf) + } + } +} + +func FuzzFloat64RoundTrip(f *testing.F) { + f.Add(math.Float64bits(0)) + f.Add(math.Float64bits(math.Copysign(0, -1))) + f.Add(math.Float64bits(math.MaxFloat64)) + f.Add(math.Float64bits(math.SmallestNonzeroFloat64)) + f.Add(math.Float64bits(math.NaN())) + f.Add(uint64(0x7FF0000000000001)) // signaling NaN + f.Add(math.Float64bits(math.Inf(1))) + f.Add(math.Float64bits(math.Inf(-1))) + + f.Fuzz(func(t *testing.T, u1 uint64) { + x1 := math.Float64frombits(u1) + + b := marshalCorpusFile(x1) + t.Logf("marshaled math.Float64frombits(0x%x):\n%s", u1, b) + + xs, err := unmarshalCorpusFile(b) + if err != nil { + t.Fatal(err) + } + if len(xs) != 1 { + t.Fatalf("unmarshaled %d values", len(xs)) + } + x2 := xs[0].(float64) + u2 := math.Float64bits(x2) + if u2 != u1 { + t.Errorf("unmarshaled %v (bits 0x%x)", x2, u2) + } + }) +} + +func FuzzRuneRoundTrip(f *testing.F) { + f.Add(rune(-1)) + f.Add(rune(0xd800)) + f.Add(rune(0xdfff)) + f.Add(rune(unicode.ReplacementChar)) + f.Add(rune(unicode.MaxASCII)) + f.Add(rune(unicode.MaxLatin1)) + f.Add(rune(unicode.MaxRune)) + f.Add(rune(unicode.MaxRune + 1)) + f.Add(rune(-0x80000000)) + f.Add(rune(0x7fffffff)) + + f.Fuzz(func(t *testing.T, r1 rune) { + b := marshalCorpusFile(r1) + t.Logf("marshaled rune(0x%x):\n%s", r1, b) + + rs, err := unmarshalCorpusFile(b) + if err != nil { + t.Fatal(err) + } + if len(rs) != 1 { + t.Fatalf("unmarshaled %d values", len(rs)) + } + r2 := rs[0].(rune) + if r2 != r1 { + t.Errorf("unmarshaled rune(0x%x)", r2) + } + }) +} + +func FuzzStringRoundTrip(f *testing.F) { + f.Add("") + f.Add("\x00") + f.Add(string([]rune{unicode.ReplacementChar})) + + f.Fuzz(func(t *testing.T, s1 string) { + b := marshalCorpusFile(s1) + t.Logf("marshaled %q:\n%s", s1, b) + + rs, err := unmarshalCorpusFile(b) + if err != nil { + t.Fatal(err) + } + if len(rs) != 1 { + t.Fatalf("unmarshaled %d values", len(rs)) + } + s2 := rs[0].(string) + if s2 != s1 { + t.Errorf("unmarshaled %q", s2) + } + }) +} diff --git a/src/internal/fuzz/fuzz.go b/src/internal/fuzz/fuzz.go new file mode 100644 index 0000000..fb4e1d3 --- /dev/null +++ b/src/internal/fuzz/fuzz.go @@ -0,0 +1,1102 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fuzz provides common fuzzing functionality for tests built with +// "go test" and for programs that use fuzzing functionality in the testing +// package. +package fuzz + +import ( + "bytes" + "context" + "crypto/sha256" + "errors" + "fmt" + "internal/godebug" + "io" + "math/bits" + "os" + "path/filepath" + "reflect" + "runtime" + "strings" + "time" +) + +// CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. +// The zero value is valid for each field unless specified otherwise. +type CoordinateFuzzingOpts struct { + // Log is a writer for logging progress messages and warnings. + // If nil, io.Discard will be used instead. + Log io.Writer + + // Timeout is the amount of wall clock time to spend fuzzing after the corpus + // has loaded. If zero, there will be no time limit. + Timeout time.Duration + + // Limit is the number of random values to generate and test. If zero, + // there will be no limit on the number of generated values. + Limit int64 + + // MinimizeTimeout is the amount of wall clock time to spend minimizing + // after discovering a crasher. If zero, there will be no time limit. If + // MinimizeTimeout and MinimizeLimit are both zero, then minimization will + // be disabled. + MinimizeTimeout time.Duration + + // MinimizeLimit is the maximum number of calls to the fuzz function to be + // made while minimizing after finding a crash. If zero, there will be no + // limit. Calls to the fuzz function made when minimizing also count toward + // Limit. If MinimizeTimeout and MinimizeLimit are both zero, then + // minimization will be disabled. + MinimizeLimit int64 + + // parallel is the number of worker processes to run in parallel. If zero, + // CoordinateFuzzing will run GOMAXPROCS workers. + Parallel int + + // Seed is a list of seed values added by the fuzz target with testing.F.Add + // and in testdata. + Seed []CorpusEntry + + // Types is the list of types which make up a corpus entry. + // Types must be set and must match values in Seed. + Types []reflect.Type + + // CorpusDir is a directory where files containing values that crash the + // code being tested may be written. CorpusDir must be set. + CorpusDir string + + // CacheDir is a directory containing additional "interesting" values. + // The fuzzer may derive new values from these, and may write new values here. + CacheDir string +} + +// CoordinateFuzzing creates several worker processes and communicates with +// them to test random inputs that could trigger crashes and expose bugs. +// The worker processes run the same binary in the same directory with the +// same environment variables as the coordinator process. Workers also run +// with the same arguments as the coordinator, except with the -test.fuzzworker +// flag prepended to the argument list. +// +// If a crash occurs, the function will return an error containing information +// about the crash, which can be reported to the user. +func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { + if err := ctx.Err(); err != nil { + return err + } + if opts.Log == nil { + opts.Log = io.Discard + } + if opts.Parallel == 0 { + opts.Parallel = runtime.GOMAXPROCS(0) + } + if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit { + // Don't start more workers than we need. + opts.Parallel = int(opts.Limit) + } + + c, err := newCoordinator(opts) + if err != nil { + return err + } + + if opts.Timeout > 0 { + var cancel func() + ctx, cancel = context.WithTimeout(ctx, opts.Timeout) + defer cancel() + } + + // fuzzCtx is used to stop workers, for example, after finding a crasher. + fuzzCtx, cancelWorkers := context.WithCancel(ctx) + defer cancelWorkers() + doneC := ctx.Done() + + // stop is called when a worker encounters a fatal error. + var fuzzErr error + stopping := false + stop := func(err error) { + if shouldPrintDebugInfo() { + _, file, line, ok := runtime.Caller(1) + if ok { + c.debugLogf("stop called at %s:%d. stopping: %t", file, line, stopping) + } else { + c.debugLogf("stop called at unknown. stopping: %t", stopping) + } + } + + if err == fuzzCtx.Err() || isInterruptError(err) { + // Suppress cancellation errors and terminations due to SIGINT. + // The messages are not helpful since either the user triggered the error + // (with ^C) or another more helpful message will be printed (a crasher). + err = nil + } + if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { + fuzzErr = err + } + if stopping { + return + } + stopping = true + cancelWorkers() + doneC = nil + } + + // Ensure that any crash we find is written to the corpus, even if an error + // or interruption occurs while minimizing it. + crashWritten := false + defer func() { + if c.crashMinimizing == nil || crashWritten { + return + } + werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir) + if werr != nil { + err = fmt.Errorf("%w\n%v", err, werr) + return + } + if err == nil { + err = &crashError{ + path: c.crashMinimizing.entry.Path, + err: errors.New(c.crashMinimizing.crasherMsg), + } + } + }() + + // Start workers. + // TODO(jayconrod): do we want to support fuzzing different binaries? + dir := "" // same as self + binPath := os.Args[0] + args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) + env := os.Environ() // same as self + + errC := make(chan error) + workers := make([]*worker, opts.Parallel) + for i := range workers { + var err error + workers[i], err = newWorker(c, dir, binPath, args, env) + if err != nil { + return err + } + } + for i := range workers { + w := workers[i] + go func() { + err := w.coordinate(fuzzCtx) + if fuzzCtx.Err() != nil || isInterruptError(err) { + err = nil + } + cleanErr := w.cleanup() + if err == nil { + err = cleanErr + } + errC <- err + }() + } + + // Main event loop. + // Do not return until all workers have terminated. We avoid a deadlock by + // receiving messages from workers even after ctx is cancelled. + activeWorkers := len(workers) + statTicker := time.NewTicker(3 * time.Second) + defer statTicker.Stop() + defer c.logStats() + + c.logStats() + for { + // If there is an execution limit, and we've reached it, stop. + if c.opts.Limit > 0 && c.count >= c.opts.Limit { + stop(nil) + } + + var inputC chan fuzzInput + input, ok := c.peekInput() + if ok && c.crashMinimizing == nil && !stopping { + inputC = c.inputC + } + + var minimizeC chan fuzzMinimizeInput + minimizeInput, ok := c.peekMinimizeInput() + if ok && !stopping { + minimizeC = c.minimizeC + } + + select { + case <-doneC: + // Interrupted, cancelled, or timed out. + // stop sets doneC to nil so we don't busy wait here. + stop(ctx.Err()) + + case err := <-errC: + // A worker terminated, possibly after encountering a fatal error. + stop(err) + activeWorkers-- + if activeWorkers == 0 { + return fuzzErr + } + + case result := <-c.resultC: + // Received response from worker. + if stopping { + break + } + c.updateStats(result) + + if result.crasherMsg != "" { + if c.warmupRun() && result.entry.IsSeed { + target := filepath.Base(c.opts.CorpusDir) + fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent)) + stop(errors.New(result.crasherMsg)) + break + } + if c.canMinimize() && result.canMinimize { + if c.crashMinimizing != nil { + // This crash is not minimized, and another crash is being minimized. + // Ignore this one and wait for the other one to finish. + if shouldPrintDebugInfo() { + c.debugLogf("found unminimized crasher, skipping in favor of minimizable crasher") + } + break + } + // Found a crasher but haven't yet attempted to minimize it. + // Send it back to a worker for minimization. Disable inputC so + // other workers don't continue fuzzing. + c.crashMinimizing = &result + fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data)) + c.queueForMinimization(result, nil) + } else if !crashWritten { + // Found a crasher that's either minimized or not minimizable. + // Write to corpus and stop. + err := writeToCorpus(&result.entry, opts.CorpusDir) + if err == nil { + crashWritten = true + err = &crashError{ + path: result.entry.Path, + err: errors.New(result.crasherMsg), + } + } + if shouldPrintDebugInfo() { + c.debugLogf( + "found crasher, id: %s, parent: %s, gen: %d, size: %d, exec time: %s", + result.entry.Path, + result.entry.Parent, + result.entry.Generation, + len(result.entry.Data), + result.entryDuration, + ) + } + stop(err) + } + } else if result.coverageData != nil { + if c.warmupRun() { + if shouldPrintDebugInfo() { + c.debugLogf( + "processed an initial input, id: %s, new bits: %d, size: %d, exec time: %s", + result.entry.Parent, + countBits(diffCoverage(c.coverageMask, result.coverageData)), + len(result.entry.Data), + result.entryDuration, + ) + } + c.updateCoverage(result.coverageData) + c.warmupInputLeft-- + if c.warmupInputLeft == 0 { + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) + if shouldPrintDebugInfo() { + c.debugLogf( + "finished processing input corpus, entries: %d, initial coverage bits: %d", + len(c.corpus.entries), + countBits(c.coverageMask), + ) + } + } + } else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil { + // Found a value that expanded coverage. + // It's not a crasher, but we may want to add it to the on-disk + // corpus and prioritize it for future fuzzing. + // TODO(jayconrod, katiehockman): Prioritize fuzzing these + // values which expanded coverage, perhaps based on the + // number of new edges that this result expanded. + // TODO(jayconrod, katiehockman): Don't write a value that's already + // in the corpus. + if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil { + // Send back to workers to find a smaller value that preserves + // at least one new coverage bit. + c.queueForMinimization(result, keepCoverage) + } else { + // Update the coordinator's coverage mask and save the value. + inputSize := len(result.entry.Data) + entryNew, err := c.addCorpusEntries(true, result.entry) + if err != nil { + stop(err) + break + } + if !entryNew { + if shouldPrintDebugInfo() { + c.debugLogf( + "ignoring duplicate input which increased coverage, id: %s", + result.entry.Path, + ) + } + break + } + c.updateCoverage(keepCoverage) + c.inputQueue.enqueue(result.entry) + c.interestingCount++ + if shouldPrintDebugInfo() { + c.debugLogf( + "new interesting input, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s", + result.entry.Path, + result.entry.Parent, + result.entry.Generation, + countBits(keepCoverage), + countBits(c.coverageMask), + inputSize, + result.entryDuration, + ) + } + } + } else { + if shouldPrintDebugInfo() { + c.debugLogf( + "worker reported interesting input that doesn't expand coverage, id: %s, parent: %s, canMinimize: %t", + result.entry.Path, + result.entry.Parent, + result.canMinimize, + ) + } + } + } else if c.warmupRun() { + // No error or coverage data was reported for this input during + // warmup, so continue processing results. + c.warmupInputLeft-- + if c.warmupInputLeft == 0 { + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) + if shouldPrintDebugInfo() { + c.debugLogf( + "finished testing-only phase, entries: %d", + len(c.corpus.entries), + ) + } + } + } + + case inputC <- input: + // Sent the next input to a worker. + c.sentInput(input) + + case minimizeC <- minimizeInput: + // Sent the next input for minimization to a worker. + c.sentMinimizeInput(minimizeInput) + + case <-statTicker.C: + c.logStats() + } + } + + // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, + // write to the cache instead. +} + +// crashError wraps a crasher written to the seed corpus. It saves the name +// of the file where the input causing the crasher was saved. The testing +// framework uses this to report a command to re-run that specific input. +type crashError struct { + path string + err error +} + +func (e *crashError) Error() string { + return e.err.Error() +} + +func (e *crashError) Unwrap() error { + return e.err +} + +func (e *crashError) CrashPath() string { + return e.path +} + +type corpus struct { + entries []CorpusEntry + hashes map[[sha256.Size]byte]bool +} + +// addCorpusEntries adds entries to the corpus, and optionally writes the entries +// to the cache directory. If an entry is already in the corpus it is skipped. If +// all of the entries are unique, addCorpusEntries returns true and a nil error, +// if at least one of the entries was a duplicate, it returns false and a nil error. +func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) { + noDupes := true + for _, e := range entries { + data, err := corpusEntryData(e) + if err != nil { + return false, err + } + h := sha256.Sum256(data) + if c.corpus.hashes[h] { + noDupes = false + continue + } + if addToCache { + if err := writeToCorpus(&e, c.opts.CacheDir); err != nil { + return false, err + } + // For entries written to disk, we don't hold onto the bytes, + // since the corpus would consume a significant amount of + // memory. + e.Data = nil + } + c.corpus.hashes[h] = true + c.corpus.entries = append(c.corpus.entries, e) + } + return noDupes, nil +} + +// CorpusEntry represents an individual input for fuzzing. +// +// We must use an equivalent type in the testing and testing/internal/testdeps +// packages, but testing can't import this package directly, and we don't want +// to export this type from testing. Instead, we use the same struct type and +// use a type alias (not a defined type) for convenience. +type CorpusEntry = struct { + Parent string + + // Path is the path of the corpus file, if the entry was loaded from disk. + // For other entries, including seed values provided by f.Add, Path is the + // name of the test, e.g. seed#0 or its hash. + Path string + + // Data is the raw input data. Data should only be populated for seed + // values. For on-disk corpus files, Data will be nil, as it will be loaded + // from disk using Path. + Data []byte + + // Values is the unmarshaled values from a corpus file. + Values []any + + Generation int + + // IsSeed indicates whether this entry is part of the seed corpus. + IsSeed bool +} + +// corpusEntryData returns the raw input bytes, either from the data struct +// field, or from disk. +func corpusEntryData(ce CorpusEntry) ([]byte, error) { + if ce.Data != nil { + return ce.Data, nil + } + + return os.ReadFile(ce.Path) +} + +type fuzzInput struct { + // entry is the value to test initially. The worker will randomly mutate + // values from this starting point. + entry CorpusEntry + + // timeout is the time to spend fuzzing variations of this input, + // not including starting or cleaning up. + timeout time.Duration + + // limit is the maximum number of calls to the fuzz function the worker may + // make. The worker may make fewer calls, for example, if it finds an + // error early. If limit is zero, there is no limit on calls to the + // fuzz function. + limit int64 + + // warmup indicates whether this is a warmup input before fuzzing begins. If + // true, the input should not be fuzzed. + warmup bool + + // coverageData reflects the coordinator's current coverageMask. + coverageData []byte +} + +type fuzzResult struct { + // entry is an interesting value or a crasher. + entry CorpusEntry + + // crasherMsg is an error message from a crash. It's "" if no crash was found. + crasherMsg string + + // canMinimize is true if the worker should attempt to minimize this result. + // It may be false because an attempt has already been made. + canMinimize bool + + // coverageData is set if the worker found new coverage. + coverageData []byte + + // limit is the number of values the coordinator asked the worker + // to test. 0 if there was no limit. + limit int64 + + // count is the number of values the worker actually tested. + count int64 + + // totalDuration is the time the worker spent testing inputs. + totalDuration time.Duration + + // entryDuration is the time the worker spent execution an interesting result + entryDuration time.Duration +} + +type fuzzMinimizeInput struct { + // entry is an interesting value or crasher to minimize. + entry CorpusEntry + + // crasherMsg is an error message from a crash. It's "" if no crash was found. + // If set, the worker will attempt to find a smaller input that also produces + // an error, though not necessarily the same error. + crasherMsg string + + // limit is the maximum number of calls to the fuzz function the worker may + // make. The worker may make fewer calls, for example, if it can't reproduce + // an error. If limit is zero, there is no limit on calls to the fuzz function. + limit int64 + + // timeout is the time to spend minimizing this input. + // A zero timeout means no limit. + timeout time.Duration + + // keepCoverage is a set of coverage bits that entry found that were not in + // the coordinator's combined set. When minimizing, the worker should find an + // input that preserves at least one of these bits. keepCoverage is nil for + // crashing inputs. + keepCoverage []byte +} + +// coordinator holds channels that workers can use to communicate with +// the coordinator. +type coordinator struct { + opts CoordinateFuzzingOpts + + // startTime is the time we started the workers after loading the corpus. + // Used for logging. + startTime time.Time + + // inputC is sent values to fuzz by the coordinator. Any worker may receive + // values from this channel. Workers send results to resultC. + inputC chan fuzzInput + + // minimizeC is sent values to minimize by the coordinator. Any worker may + // receive values from this channel. Workers send results to resultC. + minimizeC chan fuzzMinimizeInput + + // resultC is sent results of fuzzing by workers. The coordinator + // receives these. Multiple types of messages are allowed. + resultC chan fuzzResult + + // count is the number of values fuzzed so far. + count int64 + + // countLastLog is the number of values fuzzed when the output was last + // logged. + countLastLog int64 + + // timeLastLog is the time at which the output was last logged. + timeLastLog time.Time + + // interestingCount is the number of unique interesting values which have + // been found this execution. + interestingCount int + + // warmupInputCount is the count of all entries in the corpus which will + // need to be received from workers to run once during warmup, but not fuzz. + // This could be for coverage data, or only for the purposes of verifying + // that the seed corpus doesn't have any crashers. See warmupRun. + warmupInputCount int + + // warmupInputLeft is the number of entries in the corpus which still need + // to be received from workers to run once during warmup, but not fuzz. + // See warmupInputLeft. + warmupInputLeft int + + // duration is the time spent fuzzing inside workers, not counting time + // starting up or tearing down. + duration time.Duration + + // countWaiting is the number of fuzzing executions the coordinator is + // waiting on workers to complete. + countWaiting int64 + + // corpus is a set of interesting values, including the seed corpus and + // generated values that workers reported as interesting. + corpus corpus + + // minimizationAllowed is true if one or more of the types of fuzz + // function's parameters can be minimized. + minimizationAllowed bool + + // inputQueue is a queue of inputs that workers should try fuzzing. This is + // initially populated from the seed corpus and cached inputs. More inputs + // may be added as new coverage is discovered. + inputQueue queue + + // minimizeQueue is a queue of inputs that caused errors or exposed new + // coverage. Workers should attempt to find smaller inputs that do the + // same thing. + minimizeQueue queue + + // crashMinimizing is the crash that is currently being minimized. + crashMinimizing *fuzzResult + + // coverageMask aggregates coverage that was found for all inputs in the + // corpus. Each byte represents a single basic execution block. Each set bit + // within the byte indicates that an input has triggered that block at least + // 1 << n times, where n is the position of the bit in the byte. For example, a + // value of 12 indicates that separate inputs have triggered this block + // between 4-7 times and 8-15 times. + coverageMask []byte +} + +func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { + // Make sure all of the seed corpus has marshalled data. + for i := range opts.Seed { + if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil { + opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) + } + } + c := &coordinator{ + opts: opts, + startTime: time.Now(), + inputC: make(chan fuzzInput), + minimizeC: make(chan fuzzMinimizeInput), + resultC: make(chan fuzzResult), + timeLastLog: time.Now(), + corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)}, + } + if err := c.readCache(); err != nil { + return nil, err + } + if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { + for _, t := range opts.Types { + if isMinimizable(t) { + c.minimizationAllowed = true + break + } + } + } + + covSize := len(coverage()) + if covSize == 0 { + fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n") + // Even though a coverage-only run won't occur, we should still run all + // of the seed corpus to make sure there are no existing failures before + // we start fuzzing. + c.warmupInputCount = len(c.opts.Seed) + for _, e := range c.opts.Seed { + c.inputQueue.enqueue(e) + } + } else { + c.warmupInputCount = len(c.corpus.entries) + for _, e := range c.corpus.entries { + c.inputQueue.enqueue(e) + } + // Set c.coverageMask to a clean []byte full of zeros. + c.coverageMask = make([]byte, covSize) + } + c.warmupInputLeft = c.warmupInputCount + + if len(c.corpus.entries) == 0 { + fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n") + var vals []any + for _, t := range opts.Types { + vals = append(vals, zeroValue(t)) + } + data := marshalCorpusFile(vals...) + h := sha256.Sum256(data) + name := fmt.Sprintf("%x", h[:4]) + c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data}) + } + + return c, nil +} + +func (c *coordinator) updateStats(result fuzzResult) { + c.count += result.count + c.countWaiting -= result.limit + c.duration += result.totalDuration +} + +func (c *coordinator) logStats() { + now := time.Now() + if c.warmupRun() { + runSoFar := c.warmupInputCount - c.warmupInputLeft + if coverageEnabled { + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) + } else { + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) + } + } else if c.crashMinimizing != nil { + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed()) + } else { + rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds() + if coverageEnabled { + total := c.warmupInputCount + c.interestingCount + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total) + } else { + fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate) + } + } + c.countLastLog = c.count + c.timeLastLog = now +} + +// peekInput returns the next value that should be sent to workers. +// If the number of executions is limited, the returned value includes +// a limit for one worker. If there are no executions left, peekInput returns +// a zero value and false. +// +// peekInput doesn't actually remove the input from the queue. The caller +// must call sentInput after sending the input. +// +// If the input queue is empty and the coverage/testing-only run has completed, +// queue refills it from the corpus. +func (c *coordinator) peekInput() (fuzzInput, bool) { + if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit { + // Already making the maximum number of calls to the fuzz function. + // Don't send more inputs right now. + return fuzzInput{}, false + } + if c.inputQueue.len == 0 { + if c.warmupRun() { + // Wait for coverage/testing-only run to finish before sending more + // inputs. + return fuzzInput{}, false + } + c.refillInputQueue() + } + + entry, ok := c.inputQueue.peek() + if !ok { + panic("input queue empty after refill") + } + input := fuzzInput{ + entry: entry.(CorpusEntry), + timeout: workerFuzzDuration, + warmup: c.warmupRun(), + } + if c.coverageMask != nil { + input.coverageData = bytes.Clone(c.coverageMask) + } + if input.warmup { + // No fuzzing will occur, but it should count toward the limit set by + // -fuzztime. + input.limit = 1 + return input, true + } + + if c.opts.Limit > 0 { + input.limit = c.opts.Limit / int64(c.opts.Parallel) + if c.opts.Limit%int64(c.opts.Parallel) > 0 { + input.limit++ + } + remaining := c.opts.Limit - c.count - c.countWaiting + if input.limit > remaining { + input.limit = remaining + } + } + return input, true +} + +// sentInput updates internal counters after an input is sent to c.inputC. +func (c *coordinator) sentInput(input fuzzInput) { + c.inputQueue.dequeue() + c.countWaiting += input.limit +} + +// refillInputQueue refills the input queue from the corpus after it becomes +// empty. +func (c *coordinator) refillInputQueue() { + for _, e := range c.corpus.entries { + c.inputQueue.enqueue(e) + } +} + +// queueForMinimization creates a fuzzMinimizeInput from result and adds it +// to the minimization queue to be sent to workers. +func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) { + if shouldPrintDebugInfo() { + c.debugLogf( + "queueing input for minimization, id: %s, parent: %s, keepCoverage: %t, crasher: %t", + result.entry.Path, + result.entry.Parent, + keepCoverage != nil, + result.crasherMsg != "", + ) + } + if result.crasherMsg != "" { + c.minimizeQueue.clear() + } + + input := fuzzMinimizeInput{ + entry: result.entry, + crasherMsg: result.crasherMsg, + keepCoverage: keepCoverage, + } + c.minimizeQueue.enqueue(input) +} + +// peekMinimizeInput returns the next input that should be sent to workers for +// minimization. +func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) { + if !c.canMinimize() { + // Already making the maximum number of calls to the fuzz function. + // Don't send more inputs right now. + return fuzzMinimizeInput{}, false + } + v, ok := c.minimizeQueue.peek() + if !ok { + return fuzzMinimizeInput{}, false + } + input := v.(fuzzMinimizeInput) + + if c.opts.MinimizeTimeout > 0 { + input.timeout = c.opts.MinimizeTimeout + } + if c.opts.MinimizeLimit > 0 { + input.limit = c.opts.MinimizeLimit + } else if c.opts.Limit > 0 { + if input.crasherMsg != "" { + input.limit = c.opts.Limit + } else { + input.limit = c.opts.Limit / int64(c.opts.Parallel) + if c.opts.Limit%int64(c.opts.Parallel) > 0 { + input.limit++ + } + } + } + if c.opts.Limit > 0 { + remaining := c.opts.Limit - c.count - c.countWaiting + if input.limit > remaining { + input.limit = remaining + } + } + return input, true +} + +// sentMinimizeInput removes an input from the minimization queue after it's +// sent to minimizeC. +func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) { + c.minimizeQueue.dequeue() + c.countWaiting += input.limit +} + +// warmupRun returns true while the coordinator is running inputs without +// mutating them as a warmup before fuzzing. This could be to gather baseline +// coverage data for entries in the corpus, or to test all of the seed corpus +// for errors before fuzzing begins. +// +// The coordinator doesn't store coverage data in the cache with each input +// because that data would be invalid when counter offsets in the test binary +// change. +// +// When gathering coverage, the coordinator sends each entry to a worker to +// gather coverage for that entry only, without fuzzing or minimizing. This +// phase ends when all workers have finished, and the coordinator has a combined +// coverage map. +func (c *coordinator) warmupRun() bool { + return c.warmupInputLeft > 0 +} + +// updateCoverage sets bits in c.coverageMask that are set in newCoverage. +// updateCoverage returns the number of newly set bits. See the comment on +// coverageMask for the format. +func (c *coordinator) updateCoverage(newCoverage []byte) int { + if len(newCoverage) != len(c.coverageMask) { + panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask))) + } + newBitCount := 0 + for i := range newCoverage { + diff := newCoverage[i] &^ c.coverageMask[i] + newBitCount += bits.OnesCount8(diff) + c.coverageMask[i] |= newCoverage[i] + } + return newBitCount +} + +// canMinimize returns whether the coordinator should attempt to find smaller +// inputs that reproduce a crash or new coverage. +func (c *coordinator) canMinimize() bool { + return c.minimizationAllowed && + (c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit) +} + +func (c *coordinator) elapsed() time.Duration { + return time.Since(c.startTime).Round(1 * time.Second) +} + +// readCache creates a combined corpus from seed values and values in the cache +// (in GOCACHE/fuzz). +// +// TODO(fuzzing): need a mechanism that can remove values that +// aren't useful anymore, for example, because they have the wrong type. +func (c *coordinator) readCache() error { + if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil { + return err + } + entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types) + if err != nil { + if _, ok := err.(*MalformedCorpusError); !ok { + // It's okay if some files in the cache directory are malformed and + // are not included in the corpus, but fail if it's an I/O error. + return err + } + // TODO(jayconrod,katiehockman): consider printing some kind of warning + // indicating the number of files which were skipped because they are + // malformed. + } + if _, err := c.addCorpusEntries(false, entries...); err != nil { + return err + } + return nil +} + +// MalformedCorpusError is an error found while reading the corpus from the +// filesystem. All of the errors are stored in the errs list. The testing +// framework uses this to report malformed files in testdata. +type MalformedCorpusError struct { + errs []error +} + +func (e *MalformedCorpusError) Error() string { + var msgs []string + for _, s := range e.errs { + msgs = append(msgs, s.Error()) + } + return strings.Join(msgs, "\n") +} + +// ReadCorpus reads the corpus from the provided dir. The returned corpus +// entries are guaranteed to match the given types. Any malformed files will +// be saved in a MalformedCorpusError and returned, along with the most recent +// error. +func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { + files, err := os.ReadDir(dir) + if os.IsNotExist(err) { + return nil, nil // No corpus to read + } else if err != nil { + return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) + } + var corpus []CorpusEntry + var errs []error + for _, file := range files { + // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input + // based on its name. We should only read files created by writeToCorpus. + // If we read ALL files, we won't be able to change the file format by + // changing the extension. We also won't be able to add files like + // README.txt explaining why the directory exists. + if file.IsDir() { + continue + } + filename := filepath.Join(dir, file.Name()) + data, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("failed to read corpus file: %v", err) + } + var vals []any + vals, err = readCorpusData(data, types) + if err != nil { + errs = append(errs, fmt.Errorf("%q: %v", filename, err)) + continue + } + corpus = append(corpus, CorpusEntry{Path: filename, Values: vals}) + } + if len(errs) > 0 { + return corpus, &MalformedCorpusError{errs: errs} + } + return corpus, nil +} + +func readCorpusData(data []byte, types []reflect.Type) ([]any, error) { + vals, err := unmarshalCorpusFile(data) + if err != nil { + return nil, fmt.Errorf("unmarshal: %v", err) + } + if err = CheckCorpus(vals, types); err != nil { + return nil, err + } + return vals, nil +} + +// CheckCorpus verifies that the types in vals match the expected types +// provided. +func CheckCorpus(vals []any, types []reflect.Type) error { + if len(vals) != len(types) { + return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types)) + } + valsT := make([]reflect.Type, len(vals)) + for valsI, v := range vals { + valsT[valsI] = reflect.TypeOf(v) + } + for i := range types { + if valsT[i] != types[i] { + return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types) + } + } + return nil +} + +// writeToCorpus atomically writes the given bytes to a new file in testdata. If +// the directory does not exist, it will create one. If the file already exists, +// writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new +// file that was just written or an error if it failed. +func writeToCorpus(entry *CorpusEntry, dir string) (err error) { + sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))[:16] + entry.Path = filepath.Join(dir, sum) + if err := os.MkdirAll(dir, 0777); err != nil { + return err + } + if err := os.WriteFile(entry.Path, entry.Data, 0666); err != nil { + os.Remove(entry.Path) // remove partially written file + return err + } + return nil +} + +func testName(path string) string { + return filepath.Base(path) +} + +func zeroValue(t reflect.Type) any { + for _, v := range zeroVals { + if reflect.TypeOf(v) == t { + return v + } + } + panic(fmt.Sprintf("unsupported type: %v", t)) +} + +var zeroVals []any = []any{ + []byte(""), + string(""), + false, + byte(0), + rune(0), + float32(0), + float64(0), + int(0), + int8(0), + int16(0), + int32(0), + int64(0), + uint(0), + uint8(0), + uint16(0), + uint32(0), + uint64(0), +} + +var debugInfo = godebug.New("#fuzzdebug").Value() == "1" + +func shouldPrintDebugInfo() bool { + return debugInfo +} + +func (c *coordinator) debugLogf(format string, args ...any) { + t := time.Now().Format("2006-01-02 15:04:05.999999999") + fmt.Fprintf(c.opts.Log, t+" DEBUG "+format+"\n", args...) +} diff --git a/src/internal/fuzz/mem.go b/src/internal/fuzz/mem.go new file mode 100644 index 0000000..4155e4e --- /dev/null +++ b/src/internal/fuzz/mem.go @@ -0,0 +1,138 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "bytes" + "fmt" + "os" + "unsafe" +) + +// sharedMem manages access to a region of virtual memory mapped from a file, +// shared between multiple processes. The region includes space for a header and +// a value of variable length. +// +// When fuzzing, the coordinator creates a sharedMem from a temporary file for +// each worker. This buffer is used to pass values to fuzz between processes. +// Care must be taken to manage access to shared memory across processes; +// sharedMem provides no synchronization on its own. See workerComm for an +// explanation. +type sharedMem struct { + // f is the file mapped into memory. + f *os.File + + // region is the mapped region of virtual memory for f. The content of f may + // be read or written through this slice. + region []byte + + // removeOnClose is true if the file should be deleted by Close. + removeOnClose bool + + // sys contains OS-specific information. + sys sharedMemSys +} + +// sharedMemHeader stores metadata in shared memory. +type sharedMemHeader struct { + // count is the number of times the worker has called the fuzz function. + // May be reset by coordinator. + count int64 + + // valueLen is the number of bytes in region which should be read. + valueLen int + + // randState and randInc hold the state of a pseudo-random number generator. + randState, randInc uint64 + + // rawInMem is true if the region holds raw bytes, which occurs during + // minimization. If true after the worker fails during minimization, this + // indicates that an unrecoverable error occurred, and the region can be + // used to retrieve the raw bytes that caused the error. + rawInMem bool +} + +// sharedMemSize returns the size needed for a shared memory buffer that can +// contain values of the given size. +func sharedMemSize(valueSize int) int { + // TODO(jayconrod): set a reasonable maximum size per platform. + return int(unsafe.Sizeof(sharedMemHeader{})) + valueSize +} + +// sharedMemTempFile creates a new temporary file of the given size, then maps +// it into memory. The file will be removed when the Close method is called. +func sharedMemTempFile(size int) (m *sharedMem, err error) { + // Create a temporary file. + f, err := os.CreateTemp("", "fuzz-*") + if err != nil { + return nil, err + } + defer func() { + if err != nil { + f.Close() + os.Remove(f.Name()) + } + }() + + // Resize it to the correct size. + totalSize := sharedMemSize(size) + if err := f.Truncate(int64(totalSize)); err != nil { + return nil, err + } + + // Map the file into memory. + removeOnClose := true + return sharedMemMapFile(f, totalSize, removeOnClose) +} + +// header returns a pointer to metadata within the shared memory region. +func (m *sharedMem) header() *sharedMemHeader { + return (*sharedMemHeader)(unsafe.Pointer(&m.region[0])) +} + +// valueRef returns the value currently stored in shared memory. The returned +// slice points to shared memory; it is not a copy. +func (m *sharedMem) valueRef() []byte { + length := m.header().valueLen + valueOffset := int(unsafe.Sizeof(sharedMemHeader{})) + return m.region[valueOffset : valueOffset+length] +} + +// valueCopy returns a copy of the value stored in shared memory. +func (m *sharedMem) valueCopy() []byte { + ref := m.valueRef() + return bytes.Clone(ref) +} + +// setValue copies the data in b into the shared memory buffer and sets +// the length. len(b) must be less than or equal to the capacity of the buffer +// (as returned by cap(m.value())). +func (m *sharedMem) setValue(b []byte) { + v := m.valueRef() + if len(b) > cap(v) { + panic(fmt.Sprintf("value length %d larger than shared memory capacity %d", len(b), cap(v))) + } + m.header().valueLen = len(b) + copy(v[:cap(v)], b) +} + +// setValueLen sets the length of the shared memory buffer returned by valueRef +// to n, which may be at most the cap of that slice. +// +// Note that we can only store the length in the shared memory header. The full +// slice header contains a pointer, which is likely only valid for one process, +// since each process can map shared memory at a different virtual address. +func (m *sharedMem) setValueLen(n int) { + v := m.valueRef() + if n > cap(v) { + panic(fmt.Sprintf("length %d larger than shared memory capacity %d", n, cap(v))) + } + m.header().valueLen = n +} + +// TODO(jayconrod): add method to resize the buffer. We'll need that when the +// mutator can increase input length. Only the coordinator will be able to +// do it, since we'll need to send a message to the worker telling it to +// remap the file. diff --git a/src/internal/fuzz/minimize.go b/src/internal/fuzz/minimize.go new file mode 100644 index 0000000..0e410fb --- /dev/null +++ b/src/internal/fuzz/minimize.go @@ -0,0 +1,95 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "reflect" +) + +func isMinimizable(t reflect.Type) bool { + return t == reflect.TypeOf("") || t == reflect.TypeOf([]byte(nil)) +} + +func minimizeBytes(v []byte, try func([]byte) bool, shouldStop func() bool) { + tmp := make([]byte, len(v)) + // If minimization was successful at any point during minimizeBytes, + // then the vals slice in (*workerServer).minimizeInput will point to + // tmp. Since tmp is altered while making new candidates, we need to + // make sure that it is equal to the correct value, v, before exiting + // this function. + defer copy(tmp, v) + + // First, try to cut the tail. + for n := 1024; n != 0; n /= 2 { + for len(v) > n { + if shouldStop() { + return + } + candidate := v[:len(v)-n] + if !try(candidate) { + break + } + // Set v to the new value to continue iterating. + v = candidate + } + } + + // Then, try to remove each individual byte. + for i := 0; i < len(v)-1; i++ { + if shouldStop() { + return + } + candidate := tmp[:len(v)-1] + copy(candidate[:i], v[:i]) + copy(candidate[i:], v[i+1:]) + if !try(candidate) { + continue + } + // Update v to delete the value at index i. + copy(v[i:], v[i+1:]) + v = v[:len(candidate)] + // v[i] is now different, so decrement i to redo this iteration + // of the loop with the new value. + i-- + } + + // Then, try to remove each possible subset of bytes. + for i := 0; i < len(v)-1; i++ { + copy(tmp, v[:i]) + for j := len(v); j > i+1; j-- { + if shouldStop() { + return + } + candidate := tmp[:len(v)-j+i] + copy(candidate[i:], v[j:]) + if !try(candidate) { + continue + } + // Update v and reset the loop with the new length. + copy(v[i:], v[j:]) + v = v[:len(candidate)] + j = len(v) + } + } + + // Then, try to make it more simplified and human-readable by trying to replace each + // byte with a printable character. + printableChars := []byte("012789ABCXYZabcxyz !\"#$%&'()*+,.") + for i, b := range v { + if shouldStop() { + return + } + + for _, pc := range printableChars { + v[i] = pc + if try(v) { + // Successful. Move on to the next byte in v. + break + } + // Unsuccessful. Revert v[i] back to original value. + v[i] = b + } + } +} diff --git a/src/internal/fuzz/minimize_test.go b/src/internal/fuzz/minimize_test.go new file mode 100644 index 0000000..2db2633 --- /dev/null +++ b/src/internal/fuzz/minimize_test.go @@ -0,0 +1,182 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin || freebsd || linux || windows + +package fuzz + +import ( + "bytes" + "context" + "errors" + "fmt" + "reflect" + "testing" + "time" + "unicode" + "unicode/utf8" +) + +func TestMinimizeInput(t *testing.T) { + type testcase struct { + name string + fn func(CorpusEntry) error + input []any + expected []any + } + cases := []testcase{ + { + name: "ones_byte", + fn: func(e CorpusEntry) error { + b := e.Values[0].([]byte) + ones := 0 + for _, v := range b { + if v == 1 { + ones++ + } + } + if ones == 3 { + return fmt.Errorf("bad %v", e.Values[0]) + } + return nil + }, + input: []any{[]byte{0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + expected: []any{[]byte{1, 1, 1}}, + }, + { + name: "single_bytes", + fn: func(e CorpusEntry) error { + b := e.Values[0].([]byte) + if len(b) < 2 { + return nil + } + if len(b) == 2 && b[0] == 1 && b[1] == 2 { + return nil + } + return fmt.Errorf("bad %v", e.Values[0]) + }, + input: []any{[]byte{1, 2, 3, 4, 5}}, + expected: []any{[]byte("00")}, + }, + { + name: "set_of_bytes", + fn: func(e CorpusEntry) error { + b := e.Values[0].([]byte) + if len(b) < 3 { + return nil + } + if bytes.Equal(b, []byte{0, 1, 2, 3, 4, 5}) || bytes.Equal(b, []byte{0, 4, 5}) { + return fmt.Errorf("bad %v", e.Values[0]) + } + return nil + }, + input: []any{[]byte{0, 1, 2, 3, 4, 5}}, + expected: []any{[]byte{0, 4, 5}}, + }, + { + name: "non_ascii_bytes", + fn: func(e CorpusEntry) error { + b := e.Values[0].([]byte) + if len(b) == 3 { + return fmt.Errorf("bad %v", e.Values[0]) + } + return nil + }, + input: []any{[]byte("ท")}, // ท is 3 bytes + expected: []any{[]byte("000")}, + }, + { + name: "ones_string", + fn: func(e CorpusEntry) error { + b := e.Values[0].(string) + ones := 0 + for _, v := range b { + if v == '1' { + ones++ + } + } + if ones == 3 { + return fmt.Errorf("bad %v", e.Values[0]) + } + return nil + }, + input: []any{"001010001000000000000000000"}, + expected: []any{"111"}, + }, + { + name: "string_length", + fn: func(e CorpusEntry) error { + b := e.Values[0].(string) + if len(b) == 5 { + return fmt.Errorf("bad %v", e.Values[0]) + } + return nil + }, + input: []any{"zzzzz"}, + expected: []any{"00000"}, + }, + { + name: "string_with_letter", + fn: func(e CorpusEntry) error { + b := e.Values[0].(string) + r, _ := utf8.DecodeRune([]byte(b)) + if unicode.IsLetter(r) { + return fmt.Errorf("bad %v", e.Values[0]) + } + return nil + }, + input: []any{"ZZZZZ"}, + expected: []any{"A"}, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ws := &workerServer{ + fuzzFn: func(e CorpusEntry) (time.Duration, error) { + return time.Second, tc.fn(e) + }, + } + mem := &sharedMem{region: make([]byte, 100)} // big enough to hold value and header + vals := tc.input + success, err := ws.minimizeInput(context.Background(), vals, mem, minimizeArgs{}) + if !success { + t.Errorf("minimizeInput did not succeed") + } + if err == nil { + t.Fatal("minimizeInput didn't provide an error") + } + if expected := fmt.Sprintf("bad %v", tc.expected[0]); err.Error() != expected { + t.Errorf("unexpected error: got %q, want %q", err, expected) + } + if !reflect.DeepEqual(vals, tc.expected) { + t.Errorf("unexpected results: got %v, want %v", vals, tc.expected) + } + }) + } +} + +// TestMinimizeFlaky checks that if we're minimizing an interesting +// input and a flaky failure occurs, that minimization was not indicated +// to be successful, and the error isn't returned (since it's flaky). +func TestMinimizeFlaky(t *testing.T) { + ws := &workerServer{fuzzFn: func(e CorpusEntry) (time.Duration, error) { + return time.Second, errors.New("ohno") + }} + mem := &sharedMem{region: make([]byte, 100)} // big enough to hold value and header + vals := []any{[]byte(nil)} + args := minimizeArgs{KeepCoverage: make([]byte, len(coverageSnapshot))} + success, err := ws.minimizeInput(context.Background(), vals, mem, args) + if success { + t.Error("unexpected success") + } + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if count := mem.header().count; count != 1 { + t.Errorf("count: got %d, want 1", count) + } +} diff --git a/src/internal/fuzz/mutator.go b/src/internal/fuzz/mutator.go new file mode 100644 index 0000000..9bba0d6 --- /dev/null +++ b/src/internal/fuzz/mutator.go @@ -0,0 +1,293 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "encoding/binary" + "fmt" + "math" + "unsafe" +) + +type mutator struct { + r mutatorRand + scratch []byte // scratch slice to avoid additional allocations +} + +func newMutator() *mutator { + return &mutator{r: newPcgRand()} +} + +func (m *mutator) rand(n int) int { + return m.r.intn(n) +} + +func (m *mutator) randByteOrder() binary.ByteOrder { + if m.r.bool() { + return binary.LittleEndian + } + return binary.BigEndian +} + +// chooseLen chooses length of range mutation in range [1,n]. It gives +// preference to shorter ranges. +func (m *mutator) chooseLen(n int) int { + switch x := m.rand(100); { + case x < 90: + return m.rand(min(8, n)) + 1 + case x < 99: + return m.rand(min(32, n)) + 1 + default: + return m.rand(n) + 1 + } +} + +// mutate performs several mutations on the provided values. +func (m *mutator) mutate(vals []any, maxBytes int) { + // TODO(katiehockman): pull some of these functions into helper methods and + // test that each case is working as expected. + // TODO(katiehockman): perform more types of mutations for []byte. + + // maxPerVal will represent the maximum number of bytes that each value be + // allowed after mutating, giving an equal amount of capacity to each line. + // Allow a little wiggle room for the encoding. + maxPerVal := maxBytes/len(vals) - 100 + + // Pick a random value to mutate. + // TODO: consider mutating more than one value at a time. + i := m.rand(len(vals)) + switch v := vals[i].(type) { + case int: + vals[i] = int(m.mutateInt(int64(v), maxInt)) + case int8: + vals[i] = int8(m.mutateInt(int64(v), math.MaxInt8)) + case int16: + vals[i] = int16(m.mutateInt(int64(v), math.MaxInt16)) + case int64: + vals[i] = m.mutateInt(v, maxInt) + case uint: + vals[i] = uint(m.mutateUInt(uint64(v), maxUint)) + case uint16: + vals[i] = uint16(m.mutateUInt(uint64(v), math.MaxUint16)) + case uint32: + vals[i] = uint32(m.mutateUInt(uint64(v), math.MaxUint32)) + case uint64: + vals[i] = m.mutateUInt(v, maxUint) + case float32: + vals[i] = float32(m.mutateFloat(float64(v), math.MaxFloat32)) + case float64: + vals[i] = m.mutateFloat(v, math.MaxFloat64) + case bool: + if m.rand(2) == 1 { + vals[i] = !v // 50% chance of flipping the bool + } + case rune: // int32 + vals[i] = rune(m.mutateInt(int64(v), math.MaxInt32)) + case byte: // uint8 + vals[i] = byte(m.mutateUInt(uint64(v), math.MaxUint8)) + case string: + if len(v) > maxPerVal { + panic(fmt.Sprintf("cannot mutate bytes of length %d", len(v))) + } + if cap(m.scratch) < maxPerVal { + m.scratch = append(make([]byte, 0, maxPerVal), v...) + } else { + m.scratch = m.scratch[:len(v)] + copy(m.scratch, v) + } + m.mutateBytes(&m.scratch) + vals[i] = string(m.scratch) + case []byte: + if len(v) > maxPerVal { + panic(fmt.Sprintf("cannot mutate bytes of length %d", len(v))) + } + if cap(m.scratch) < maxPerVal { + m.scratch = append(make([]byte, 0, maxPerVal), v...) + } else { + m.scratch = m.scratch[:len(v)] + copy(m.scratch, v) + } + m.mutateBytes(&m.scratch) + vals[i] = m.scratch + default: + panic(fmt.Sprintf("type not supported for mutating: %T", vals[i])) + } +} + +func (m *mutator) mutateInt(v, maxValue int64) int64 { + var max int64 + for { + max = 100 + switch m.rand(2) { + case 0: + // Add a random number + if v >= maxValue { + continue + } + if v > 0 && maxValue-v < max { + // Don't let v exceed maxValue + max = maxValue - v + } + v += int64(1 + m.rand(int(max))) + return v + case 1: + // Subtract a random number + if v <= -maxValue { + continue + } + if v < 0 && maxValue+v < max { + // Don't let v drop below -maxValue + max = maxValue + v + } + v -= int64(1 + m.rand(int(max))) + return v + } + } +} + +func (m *mutator) mutateUInt(v, maxValue uint64) uint64 { + var max uint64 + for { + max = 100 + switch m.rand(2) { + case 0: + // Add a random number + if v >= maxValue { + continue + } + if v > 0 && maxValue-v < max { + // Don't let v exceed maxValue + max = maxValue - v + } + + v += uint64(1 + m.rand(int(max))) + return v + case 1: + // Subtract a random number + if v <= 0 { + continue + } + if v < max { + // Don't let v drop below 0 + max = v + } + v -= uint64(1 + m.rand(int(max))) + return v + } + } +} + +func (m *mutator) mutateFloat(v, maxValue float64) float64 { + var max float64 + for { + switch m.rand(4) { + case 0: + // Add a random number + if v >= maxValue { + continue + } + max = 100 + if v > 0 && maxValue-v < max { + // Don't let v exceed maxValue + max = maxValue - v + } + v += float64(1 + m.rand(int(max))) + return v + case 1: + // Subtract a random number + if v <= -maxValue { + continue + } + max = 100 + if v < 0 && maxValue+v < max { + // Don't let v drop below -maxValue + max = maxValue + v + } + v -= float64(1 + m.rand(int(max))) + return v + case 2: + // Multiply by a random number + absV := math.Abs(v) + if v == 0 || absV >= maxValue { + continue + } + max = 10 + if maxValue/absV < max { + // Don't let v go beyond the minimum or maximum value + max = maxValue / absV + } + v *= float64(1 + m.rand(int(max))) + return v + case 3: + // Divide by a random number + if v == 0 { + continue + } + v /= float64(1 + m.rand(10)) + return v + } + } +} + +type byteSliceMutator func(*mutator, []byte) []byte + +var byteSliceMutators = []byteSliceMutator{ + byteSliceRemoveBytes, + byteSliceInsertRandomBytes, + byteSliceDuplicateBytes, + byteSliceOverwriteBytes, + byteSliceBitFlip, + byteSliceXORByte, + byteSliceSwapByte, + byteSliceArithmeticUint8, + byteSliceArithmeticUint16, + byteSliceArithmeticUint32, + byteSliceArithmeticUint64, + byteSliceOverwriteInterestingUint8, + byteSliceOverwriteInterestingUint16, + byteSliceOverwriteInterestingUint32, + byteSliceInsertConstantBytes, + byteSliceOverwriteConstantBytes, + byteSliceShuffleBytes, + byteSliceSwapBytes, +} + +func (m *mutator) mutateBytes(ptrB *[]byte) { + b := *ptrB + defer func() { + if unsafe.SliceData(*ptrB) != unsafe.SliceData(b) { + panic("data moved to new address") + } + *ptrB = b + }() + + for { + mut := byteSliceMutators[m.rand(len(byteSliceMutators))] + if mutated := mut(m, b); mutated != nil { + b = mutated + return + } + } +} + +var ( + interesting8 = []int8{-128, -1, 0, 1, 16, 32, 64, 100, 127} + interesting16 = []int16{-32768, -129, 128, 255, 256, 512, 1000, 1024, 4096, 32767} + interesting32 = []int32{-2147483648, -100663046, -32769, 32768, 65535, 65536, 100663045, 2147483647} +) + +const ( + maxUint = uint64(^uint(0)) + maxInt = int64(maxUint >> 1) +) + +func init() { + for _, v := range interesting8 { + interesting16 = append(interesting16, int16(v)) + } + for _, v := range interesting16 { + interesting32 = append(interesting32, int32(v)) + } +} diff --git a/src/internal/fuzz/mutator_test.go b/src/internal/fuzz/mutator_test.go new file mode 100644 index 0000000..cea7e2e --- /dev/null +++ b/src/internal/fuzz/mutator_test.go @@ -0,0 +1,117 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "bytes" + "fmt" + "os" + "strconv" + "testing" +) + +func BenchmarkMutatorBytes(b *testing.B) { + origEnv := os.Getenv("GODEBUG") + defer func() { os.Setenv("GODEBUG", origEnv) }() + os.Setenv("GODEBUG", fmt.Sprintf("%s,fuzzseed=123", origEnv)) + m := newMutator() + + for _, size := range []int{ + 1, + 10, + 100, + 1000, + 10000, + 100000, + } { + b.Run(strconv.Itoa(size), func(b *testing.B) { + buf := make([]byte, size) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + // resize buffer to the correct shape and reset the PCG + buf = buf[0:size] + m.r = newPcgRand() + m.mutate([]any{buf}, workerSharedMemSize) + } + }) + } +} + +func BenchmarkMutatorString(b *testing.B) { + origEnv := os.Getenv("GODEBUG") + defer func() { os.Setenv("GODEBUG", origEnv) }() + os.Setenv("GODEBUG", fmt.Sprintf("%s,fuzzseed=123", origEnv)) + m := newMutator() + + for _, size := range []int{ + 1, + 10, + 100, + 1000, + 10000, + 100000, + } { + b.Run(strconv.Itoa(size), func(b *testing.B) { + buf := make([]byte, size) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + // resize buffer to the correct shape and reset the PCG + buf = buf[0:size] + m.r = newPcgRand() + m.mutate([]any{string(buf)}, workerSharedMemSize) + } + }) + } +} + +func BenchmarkMutatorAllBasicTypes(b *testing.B) { + origEnv := os.Getenv("GODEBUG") + defer func() { os.Setenv("GODEBUG", origEnv) }() + os.Setenv("GODEBUG", fmt.Sprintf("%s,fuzzseed=123", origEnv)) + m := newMutator() + + types := []any{ + []byte(""), + string(""), + false, + float32(0), + float64(0), + int(0), + int8(0), + int16(0), + int32(0), + int64(0), + uint8(0), + uint16(0), + uint32(0), + uint64(0), + } + + for _, t := range types { + b.Run(fmt.Sprintf("%T", t), func(b *testing.B) { + for i := 0; i < b.N; i++ { + m.r = newPcgRand() + m.mutate([]any{t}, workerSharedMemSize) + } + }) + } +} + +func TestStringImmutability(t *testing.T) { + v := []any{"hello"} + m := newMutator() + m.mutate(v, 1024) + original := v[0].(string) + originalCopy := make([]byte, len(original)) + copy(originalCopy, []byte(original)) + for i := 0; i < 25; i++ { + m.mutate(v, 1024) + } + if !bytes.Equal([]byte(original), originalCopy) { + t.Fatalf("string was mutated: got %x, want %x", []byte(original), originalCopy) + } +} diff --git a/src/internal/fuzz/mutators_byteslice.go b/src/internal/fuzz/mutators_byteslice.go new file mode 100644 index 0000000..d9dab1d --- /dev/null +++ b/src/internal/fuzz/mutators_byteslice.go @@ -0,0 +1,313 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +// byteSliceRemoveBytes removes a random chunk of bytes from b. +func byteSliceRemoveBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + pos0 := m.rand(len(b)) + pos1 := pos0 + m.chooseLen(len(b)-pos0) + copy(b[pos0:], b[pos1:]) + b = b[:len(b)-(pos1-pos0)] + return b +} + +// byteSliceInsertRandomBytes inserts a chunk of random bytes into b at a random +// position. +func byteSliceInsertRandomBytes(m *mutator, b []byte) []byte { + pos := m.rand(len(b) + 1) + n := m.chooseLen(1024) + if len(b)+n >= cap(b) { + return nil + } + b = b[:len(b)+n] + copy(b[pos+n:], b[pos:]) + for i := 0; i < n; i++ { + b[pos+i] = byte(m.rand(256)) + } + return b +} + +// byteSliceDuplicateBytes duplicates a chunk of bytes in b and inserts it into +// a random position. +func byteSliceDuplicateBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + n := m.chooseLen(len(b) - src) + // Use the end of the slice as scratch space to avoid doing an + // allocation. If the slice is too small abort and try something + // else. + if len(b)+(n*2) >= cap(b) { + return nil + } + end := len(b) + // Increase the size of b to fit the duplicated block as well as + // some extra working space + b = b[:end+(n*2)] + // Copy the block of bytes we want to duplicate to the end of the + // slice + copy(b[end+n:], b[src:src+n]) + // Shift the bytes after the splice point n positions to the right + // to make room for the new block + copy(b[dst+n:end+n], b[dst:end]) + // Insert the duplicate block into the splice point + copy(b[dst:], b[end+n:]) + b = b[:end+n] + return b +} + +// byteSliceOverwriteBytes overwrites a chunk of b with another chunk of b. +func byteSliceOverwriteBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + n := m.chooseLen(len(b) - src - 1) + copy(b[dst:], b[src:src+n]) + return b +} + +// byteSliceBitFlip flips a random bit in a random byte in b. +func byteSliceBitFlip(m *mutator, b []byte) []byte { + if len(b) == 0 { + return nil + } + pos := m.rand(len(b)) + b[pos] ^= 1 << uint(m.rand(8)) + return b +} + +// byteSliceXORByte XORs a random byte in b with a random value. +func byteSliceXORByte(m *mutator, b []byte) []byte { + if len(b) == 0 { + return nil + } + pos := m.rand(len(b)) + // In order to avoid a no-op (where the random value matches + // the existing value), use XOR instead of just setting to + // the random value. + b[pos] ^= byte(1 + m.rand(255)) + return b +} + +// byteSliceSwapByte swaps two random bytes in b. +func byteSliceSwapByte(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + b[src], b[dst] = b[dst], b[src] + return b +} + +// byteSliceArithmeticUint8 adds/subtracts from a random byte in b. +func byteSliceArithmeticUint8(m *mutator, b []byte) []byte { + if len(b) == 0 { + return nil + } + pos := m.rand(len(b)) + v := byte(m.rand(35) + 1) + if m.r.bool() { + b[pos] += v + } else { + b[pos] -= v + } + return b +} + +// byteSliceArithmeticUint16 adds/subtracts from a random uint16 in b. +func byteSliceArithmeticUint16(m *mutator, b []byte) []byte { + if len(b) < 2 { + return nil + } + v := uint16(m.rand(35) + 1) + if m.r.bool() { + v = 0 - v + } + pos := m.rand(len(b) - 1) + enc := m.randByteOrder() + enc.PutUint16(b[pos:], enc.Uint16(b[pos:])+v) + return b +} + +// byteSliceArithmeticUint32 adds/subtracts from a random uint32 in b. +func byteSliceArithmeticUint32(m *mutator, b []byte) []byte { + if len(b) < 4 { + return nil + } + v := uint32(m.rand(35) + 1) + if m.r.bool() { + v = 0 - v + } + pos := m.rand(len(b) - 3) + enc := m.randByteOrder() + enc.PutUint32(b[pos:], enc.Uint32(b[pos:])+v) + return b +} + +// byteSliceArithmeticUint64 adds/subtracts from a random uint64 in b. +func byteSliceArithmeticUint64(m *mutator, b []byte) []byte { + if len(b) < 8 { + return nil + } + v := uint64(m.rand(35) + 1) + if m.r.bool() { + v = 0 - v + } + pos := m.rand(len(b) - 7) + enc := m.randByteOrder() + enc.PutUint64(b[pos:], enc.Uint64(b[pos:])+v) + return b +} + +// byteSliceOverwriteInterestingUint8 overwrites a random byte in b with an interesting +// value. +func byteSliceOverwriteInterestingUint8(m *mutator, b []byte) []byte { + if len(b) == 0 { + return nil + } + pos := m.rand(len(b)) + b[pos] = byte(interesting8[m.rand(len(interesting8))]) + return b +} + +// byteSliceOverwriteInterestingUint16 overwrites a random uint16 in b with an interesting +// value. +func byteSliceOverwriteInterestingUint16(m *mutator, b []byte) []byte { + if len(b) < 2 { + return nil + } + pos := m.rand(len(b) - 1) + v := uint16(interesting16[m.rand(len(interesting16))]) + m.randByteOrder().PutUint16(b[pos:], v) + return b +} + +// byteSliceOverwriteInterestingUint32 overwrites a random uint16 in b with an interesting +// value. +func byteSliceOverwriteInterestingUint32(m *mutator, b []byte) []byte { + if len(b) < 4 { + return nil + } + pos := m.rand(len(b) - 3) + v := uint32(interesting32[m.rand(len(interesting32))]) + m.randByteOrder().PutUint32(b[pos:], v) + return b +} + +// byteSliceInsertConstantBytes inserts a chunk of constant bytes into a random position in b. +func byteSliceInsertConstantBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + dst := m.rand(len(b)) + // TODO(rolandshoemaker,katiehockman): 4096 was mainly picked + // randomly. We may want to either pick a much larger value + // (AFL uses 32768, paired with a similar impl to chooseLen + // which biases towards smaller lengths that grow over time), + // or set the max based on characteristics of the corpus + // (libFuzzer sets a min/max based on the min/max size of + // entries in the corpus and then picks uniformly from + // that range). + n := m.chooseLen(4096) + if len(b)+n >= cap(b) { + return nil + } + b = b[:len(b)+n] + copy(b[dst+n:], b[dst:]) + rb := byte(m.rand(256)) + for i := dst; i < dst+n; i++ { + b[i] = rb + } + return b +} + +// byteSliceOverwriteConstantBytes overwrites a chunk of b with constant bytes. +func byteSliceOverwriteConstantBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + dst := m.rand(len(b)) + n := m.chooseLen(len(b) - dst) + rb := byte(m.rand(256)) + for i := dst; i < dst+n; i++ { + b[i] = rb + } + return b +} + +// byteSliceShuffleBytes shuffles a chunk of bytes in b. +func byteSliceShuffleBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + dst := m.rand(len(b)) + n := m.chooseLen(len(b) - dst) + if n <= 2 { + return nil + } + // Start at the end of the range, and iterate backwards + // to dst, swapping each element with another element in + // dst:dst+n (Fisher-Yates shuffle). + for i := n - 1; i > 0; i-- { + j := m.rand(i + 1) + b[dst+i], b[dst+j] = b[dst+j], b[dst+i] + } + return b +} + +// byteSliceSwapBytes swaps two chunks of bytes in b. +func byteSliceSwapBytes(m *mutator, b []byte) []byte { + if len(b) <= 1 { + return nil + } + src := m.rand(len(b)) + dst := m.rand(len(b)) + for dst == src { + dst = m.rand(len(b)) + } + // Choose the random length as len(b) - max(src, dst) + // so that we don't attempt to swap a chunk that extends + // beyond the end of the slice + max := dst + if src > max { + max = src + } + n := m.chooseLen(len(b) - max - 1) + // Check that neither chunk intersect, so that we don't end up + // duplicating parts of the input, rather than swapping them + if src > dst && dst+n >= src || dst > src && src+n >= dst { + return nil + } + // Use the end of the slice as scratch space to avoid doing an + // allocation. If the slice is too small abort and try something + // else. + if len(b)+n >= cap(b) { + return nil + } + end := len(b) + b = b[:end+n] + copy(b[end:], b[dst:dst+n]) + copy(b[dst:], b[src:src+n]) + copy(b[src:], b[end:]) + b = b[:end] + return b +} diff --git a/src/internal/fuzz/mutators_byteslice_test.go b/src/internal/fuzz/mutators_byteslice_test.go new file mode 100644 index 0000000..7886967 --- /dev/null +++ b/src/internal/fuzz/mutators_byteslice_test.go @@ -0,0 +1,186 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "bytes" + "testing" +) + +type mockRand struct { + values []int + counter int + b bool +} + +func (mr *mockRand) uint32() uint32 { + c := mr.values[mr.counter] + mr.counter++ + return uint32(c) +} + +func (mr *mockRand) intn(n int) int { + c := mr.values[mr.counter] + mr.counter++ + return c % n +} + +func (mr *mockRand) uint32n(n uint32) uint32 { + c := mr.values[mr.counter] + mr.counter++ + return uint32(c) % n +} + +func (mr *mockRand) exp2() int { + c := mr.values[mr.counter] + mr.counter++ + return c +} + +func (mr *mockRand) bool() bool { + b := mr.b + mr.b = !mr.b + return b +} + +func (mr *mockRand) save(*uint64, *uint64) { + panic("unimplemented") +} + +func (mr *mockRand) restore(uint64, uint64) { + panic("unimplemented") +} + +func TestByteSliceMutators(t *testing.T) { + for _, tc := range []struct { + name string + mutator func(*mutator, []byte) []byte + randVals []int + input []byte + expected []byte + }{ + { + name: "byteSliceRemoveBytes", + mutator: byteSliceRemoveBytes, + input: []byte{1, 2, 3, 4}, + expected: []byte{4}, + }, + { + name: "byteSliceInsertRandomBytes", + mutator: byteSliceInsertRandomBytes, + input: make([]byte, 4, 8), + expected: []byte{3, 4, 5, 0, 0, 0, 0}, + }, + { + name: "byteSliceDuplicateBytes", + mutator: byteSliceDuplicateBytes, + input: append(make([]byte, 0, 13), []byte{1, 2, 3, 4}...), + expected: []byte{1, 1, 2, 3, 4, 2, 3, 4}, + }, + { + name: "byteSliceOverwriteBytes", + mutator: byteSliceOverwriteBytes, + input: []byte{1, 2, 3, 4}, + expected: []byte{1, 1, 3, 4}, + }, + { + name: "byteSliceBitFlip", + mutator: byteSliceBitFlip, + input: []byte{1, 2, 3, 4}, + expected: []byte{3, 2, 3, 4}, + }, + { + name: "byteSliceXORByte", + mutator: byteSliceXORByte, + input: []byte{1, 2, 3, 4}, + expected: []byte{3, 2, 3, 4}, + }, + { + name: "byteSliceSwapByte", + mutator: byteSliceSwapByte, + input: []byte{1, 2, 3, 4}, + expected: []byte{2, 1, 3, 4}, + }, + { + name: "byteSliceArithmeticUint8", + mutator: byteSliceArithmeticUint8, + input: []byte{1, 2, 3, 4}, + expected: []byte{255, 2, 3, 4}, + }, + { + name: "byteSliceArithmeticUint16", + mutator: byteSliceArithmeticUint16, + input: []byte{1, 2, 3, 4}, + expected: []byte{1, 3, 3, 4}, + }, + { + name: "byteSliceArithmeticUint32", + mutator: byteSliceArithmeticUint32, + input: []byte{1, 2, 3, 4}, + expected: []byte{2, 2, 3, 4}, + }, + { + name: "byteSliceArithmeticUint64", + mutator: byteSliceArithmeticUint64, + input: []byte{1, 2, 3, 4, 5, 6, 7, 8}, + expected: []byte{2, 2, 3, 4, 5, 6, 7, 8}, + }, + { + name: "byteSliceOverwriteInterestingUint8", + mutator: byteSliceOverwriteInterestingUint8, + input: []byte{1, 2, 3, 4}, + expected: []byte{255, 2, 3, 4}, + }, + { + name: "byteSliceOverwriteInterestingUint16", + mutator: byteSliceOverwriteInterestingUint16, + input: []byte{1, 2, 3, 4}, + expected: []byte{255, 127, 3, 4}, + }, + { + name: "byteSliceOverwriteInterestingUint32", + mutator: byteSliceOverwriteInterestingUint32, + input: []byte{1, 2, 3, 4}, + expected: []byte{250, 0, 0, 250}, + }, + { + name: "byteSliceInsertConstantBytes", + mutator: byteSliceInsertConstantBytes, + input: append(make([]byte, 0, 8), []byte{1, 2, 3, 4}...), + expected: []byte{3, 3, 3, 1, 2, 3, 4}, + }, + { + name: "byteSliceOverwriteConstantBytes", + mutator: byteSliceOverwriteConstantBytes, + input: []byte{1, 2, 3, 4}, + expected: []byte{3, 3, 3, 4}, + }, + { + name: "byteSliceShuffleBytes", + mutator: byteSliceShuffleBytes, + input: []byte{1, 2, 3, 4}, + expected: []byte{2, 3, 1, 4}, + }, + { + name: "byteSliceSwapBytes", + mutator: byteSliceSwapBytes, + randVals: []int{0, 2, 0, 2}, + input: append(make([]byte, 0, 9), []byte{1, 2, 3, 4}...), + expected: []byte{3, 2, 1, 4}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + r := &mockRand{values: []int{0, 1, 2, 3, 4, 5}} + if tc.randVals != nil { + r.values = tc.randVals + } + m := &mutator{r: r} + b := tc.mutator(m, tc.input) + if !bytes.Equal(b, tc.expected) { + t.Errorf("got %x, want %x", b, tc.expected) + } + }) + } +} diff --git a/src/internal/fuzz/pcg.go b/src/internal/fuzz/pcg.go new file mode 100644 index 0000000..4fe8aeb --- /dev/null +++ b/src/internal/fuzz/pcg.go @@ -0,0 +1,145 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "math/bits" + "os" + "strconv" + "strings" + "sync/atomic" + "time" +) + +type mutatorRand interface { + uint32() uint32 + intn(int) int + uint32n(uint32) uint32 + exp2() int + bool() bool + + save(randState, randInc *uint64) + restore(randState, randInc uint64) +} + +// The functions in pcg implement a 32 bit PRNG with a 64 bit period: pcg xsh rr +// 64 32. See https://www.pcg-random.org/ for more information. This +// implementation is geared specifically towards the needs of fuzzing: Simple +// creation and use, no reproducibility, no concurrency safety, just the +// necessary methods, optimized for speed. + +var globalInc atomic.Uint64 // PCG stream + +const multiplier uint64 = 6364136223846793005 + +// pcgRand is a PRNG. It should not be copied or shared. No Rand methods are +// concurrency safe. +type pcgRand struct { + noCopy noCopy // help avoid mistakes: ask vet to ensure that we don't make a copy + state uint64 + inc uint64 +} + +func godebugSeed() *int { + debug := strings.Split(os.Getenv("GODEBUG"), ",") + for _, f := range debug { + if strings.HasPrefix(f, "fuzzseed=") { + seed, err := strconv.Atoi(strings.TrimPrefix(f, "fuzzseed=")) + if err != nil { + panic("malformed fuzzseed") + } + return &seed + } + } + return nil +} + +// newPcgRand generates a new, seeded Rand, ready for use. +func newPcgRand() *pcgRand { + r := new(pcgRand) + now := uint64(time.Now().UnixNano()) + if seed := godebugSeed(); seed != nil { + now = uint64(*seed) + } + inc := globalInc.Add(1) + r.state = now + r.inc = (inc << 1) | 1 + r.step() + r.state += now + r.step() + return r +} + +func (r *pcgRand) step() { + r.state *= multiplier + r.state += r.inc +} + +func (r *pcgRand) save(randState, randInc *uint64) { + *randState = r.state + *randInc = r.inc +} + +func (r *pcgRand) restore(randState, randInc uint64) { + r.state = randState + r.inc = randInc +} + +// uint32 returns a pseudo-random uint32. +func (r *pcgRand) uint32() uint32 { + x := r.state + r.step() + return bits.RotateLeft32(uint32(((x>>18)^x)>>27), -int(x>>59)) +} + +// intn returns a pseudo-random number in [0, n). +// n must fit in a uint32. +func (r *pcgRand) intn(n int) int { + if int(uint32(n)) != n { + panic("large Intn") + } + return int(r.uint32n(uint32(n))) +} + +// uint32n returns a pseudo-random number in [0, n). +// +// For implementation details, see: +// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction +// https://lemire.me/blog/2016/06/30/fast-random-shuffling +func (r *pcgRand) uint32n(n uint32) uint32 { + v := r.uint32() + prod := uint64(v) * uint64(n) + low := uint32(prod) + if low < n { + thresh := uint32(-int32(n)) % n + for low < thresh { + v = r.uint32() + prod = uint64(v) * uint64(n) + low = uint32(prod) + } + } + return uint32(prod >> 32) +} + +// exp2 generates n with probability 1/2^(n+1). +func (r *pcgRand) exp2() int { + return bits.TrailingZeros32(r.uint32()) +} + +// bool generates a random bool. +func (r *pcgRand) bool() bool { + return r.uint32()&1 == 0 +} + +// noCopy may be embedded into structs which must not be copied +// after the first use. +// +// See https://golang.org/issues/8005#issuecomment-190753527 +// for details. +type noCopy struct{} + +// lock is a no-op used by -copylocks checker from `go vet`. +func (*noCopy) lock() {} +func (*noCopy) unlock() {} diff --git a/src/internal/fuzz/queue.go b/src/internal/fuzz/queue.go new file mode 100644 index 0000000..195d6eb --- /dev/null +++ b/src/internal/fuzz/queue.go @@ -0,0 +1,71 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +// queue holds a growable sequence of inputs for fuzzing and minimization. +// +// For now, this is a simple ring buffer +// (https://en.wikipedia.org/wiki/Circular_buffer). +// +// TODO(golang.org/issue/46224): use a prioritization algorithm based on input +// size, previous duration, coverage, and any other metrics that seem useful. +type queue struct { + // elems holds a ring buffer. + // The queue is empty when begin = end. + // The queue is full (until grow is called) when end = begin + N - 1 (mod N) + // where N = cap(elems). + elems []any + head, len int +} + +func (q *queue) cap() int { + return len(q.elems) +} + +func (q *queue) grow() { + oldCap := q.cap() + newCap := oldCap * 2 + if newCap == 0 { + newCap = 8 + } + newElems := make([]any, newCap) + oldLen := q.len + for i := 0; i < oldLen; i++ { + newElems[i] = q.elems[(q.head+i)%oldCap] + } + q.elems = newElems + q.head = 0 +} + +func (q *queue) enqueue(e any) { + if q.len+1 > q.cap() { + q.grow() + } + i := (q.head + q.len) % q.cap() + q.elems[i] = e + q.len++ +} + +func (q *queue) dequeue() (any, bool) { + if q.len == 0 { + return nil, false + } + e := q.elems[q.head] + q.elems[q.head] = nil + q.head = (q.head + 1) % q.cap() + q.len-- + return e, true +} + +func (q *queue) peek() (any, bool) { + if q.len == 0 { + return nil, false + } + return q.elems[q.head], true +} + +func (q *queue) clear() { + *q = queue{} +} diff --git a/src/internal/fuzz/queue_test.go b/src/internal/fuzz/queue_test.go new file mode 100644 index 0000000..3b179af --- /dev/null +++ b/src/internal/fuzz/queue_test.go @@ -0,0 +1,58 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import "testing" + +func TestQueue(t *testing.T) { + // Zero valued queue should have 0 length and capacity. + var q queue + if n := q.len; n != 0 { + t.Fatalf("empty queue has len %d; want 0", n) + } + if n := q.cap(); n != 0 { + t.Fatalf("empty queue has cap %d; want 0", n) + } + + // As we add elements, len should grow. + N := 32 + for i := 0; i < N; i++ { + q.enqueue(i) + if n := q.len; n != i+1 { + t.Fatalf("after adding %d elements, queue has len %d", i, n) + } + if v, ok := q.peek(); !ok { + t.Fatalf("couldn't peek after adding %d elements", i) + } else if v.(int) != 0 { + t.Fatalf("after adding %d elements, peek is %d; want 0", i, v) + } + } + + // As we remove and add elements, len should shrink and grow. + // We should also remove elements in the same order they were added. + want := 0 + for _, r := range []int{1, 2, 3, 5, 8, 13, 21} { + s := make([]int, 0, r) + for i := 0; i < r; i++ { + if got, ok := q.dequeue(); !ok { + t.Fatalf("after removing %d of %d elements, could not dequeue", i+1, r) + } else if got != want { + t.Fatalf("after removing %d of %d elements, got %d; want %d", i+1, r, got, want) + } else { + s = append(s, got.(int)) + } + want = (want + 1) % N + if n := q.len; n != N-i-1 { + t.Fatalf("after removing %d of %d elements, len is %d; want %d", i+1, r, n, N-i-1) + } + } + for i, v := range s { + q.enqueue(v) + if n := q.len; n != N-r+i+1 { + t.Fatalf("after adding back %d of %d elements, len is %d; want %d", i+1, r, n, n-r+i+1) + } + } + } +} diff --git a/src/internal/fuzz/sys_posix.go b/src/internal/fuzz/sys_posix.go new file mode 100644 index 0000000..fec6054 --- /dev/null +++ b/src/internal/fuzz/sys_posix.go @@ -0,0 +1,130 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin || freebsd || linux + +package fuzz + +import ( + "fmt" + "os" + "os/exec" + "syscall" +) + +type sharedMemSys struct{} + +func sharedMemMapFile(f *os.File, size int, removeOnClose bool) (*sharedMem, error) { + prot := syscall.PROT_READ | syscall.PROT_WRITE + flags := syscall.MAP_FILE | syscall.MAP_SHARED + region, err := syscall.Mmap(int(f.Fd()), 0, size, prot, flags) + if err != nil { + return nil, err + } + + return &sharedMem{f: f, region: region, removeOnClose: removeOnClose}, nil +} + +// Close unmaps the shared memory and closes the temporary file. If this +// sharedMem was created with sharedMemTempFile, Close also removes the file. +func (m *sharedMem) Close() error { + // Attempt all operations, even if we get an error for an earlier operation. + // os.File.Close may fail due to I/O errors, but we still want to delete + // the temporary file. + var errs []error + errs = append(errs, + syscall.Munmap(m.region), + m.f.Close()) + if m.removeOnClose { + errs = append(errs, os.Remove(m.f.Name())) + } + for _, err := range errs { + if err != nil { + return err + } + } + return nil +} + +// setWorkerComm configures communication channels on the cmd that will +// run a worker process. +func setWorkerComm(cmd *exec.Cmd, comm workerComm) { + mem := <-comm.memMu + memFile := mem.f + comm.memMu <- mem + cmd.ExtraFiles = []*os.File{comm.fuzzIn, comm.fuzzOut, memFile} +} + +// getWorkerComm returns communication channels in the worker process. +func getWorkerComm() (comm workerComm, err error) { + fuzzIn := os.NewFile(3, "fuzz_in") + fuzzOut := os.NewFile(4, "fuzz_out") + memFile := os.NewFile(5, "fuzz_mem") + fi, err := memFile.Stat() + if err != nil { + return workerComm{}, err + } + size := int(fi.Size()) + if int64(size) != fi.Size() { + return workerComm{}, fmt.Errorf("fuzz temp file exceeds maximum size") + } + removeOnClose := false + mem, err := sharedMemMapFile(memFile, size, removeOnClose) + if err != nil { + return workerComm{}, err + } + memMu := make(chan *sharedMem, 1) + memMu <- mem + return workerComm{fuzzIn: fuzzIn, fuzzOut: fuzzOut, memMu: memMu}, nil +} + +// isInterruptError returns whether an error was returned by a process that +// was terminated by an interrupt signal (SIGINT). +func isInterruptError(err error) bool { + exitErr, ok := err.(*exec.ExitError) + if !ok || exitErr.ExitCode() >= 0 { + return false + } + status := exitErr.Sys().(syscall.WaitStatus) + return status.Signal() == syscall.SIGINT +} + +// terminationSignal checks if err is an exec.ExitError with a signal status. +// If it is, terminationSignal returns the signal and true. +// If not, -1 and false. +func terminationSignal(err error) (os.Signal, bool) { + exitErr, ok := err.(*exec.ExitError) + if !ok || exitErr.ExitCode() >= 0 { + return syscall.Signal(-1), false + } + status := exitErr.Sys().(syscall.WaitStatus) + return status.Signal(), status.Signaled() +} + +// isCrashSignal returns whether a signal was likely to have been caused by an +// error in the program that received it, triggered by a fuzz input. For +// example, SIGSEGV would be received after a nil pointer dereference. +// Other signals like SIGKILL or SIGHUP are more likely to have been sent by +// another process, and we shouldn't record a crasher if the worker process +// receives one of these. +// +// Note that Go installs its own signal handlers on startup, so some of these +// signals may only be received if signal handlers are changed. For example, +// SIGSEGV is normally transformed into a panic that causes the process to exit +// with status 2 if not recovered, which we handle as a crash. +func isCrashSignal(signal os.Signal) bool { + switch signal { + case + syscall.SIGILL, // illegal instruction + syscall.SIGTRAP, // breakpoint + syscall.SIGABRT, // abort() called + syscall.SIGBUS, // invalid memory access (e.g., misaligned address) + syscall.SIGFPE, // math error, e.g., integer divide by zero + syscall.SIGSEGV, // invalid memory access (e.g., write to read-only) + syscall.SIGPIPE: // sent data to closed pipe or socket + return true + default: + return false + } +} diff --git a/src/internal/fuzz/sys_unimplemented.go b/src/internal/fuzz/sys_unimplemented.go new file mode 100644 index 0000000..8687c1f --- /dev/null +++ b/src/internal/fuzz/sys_unimplemented.go @@ -0,0 +1,44 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// If you update this constraint, also update internal/platform.FuzzSupported. +// +//go:build !darwin && !freebsd && !linux && !windows + +package fuzz + +import ( + "os" + "os/exec" +) + +type sharedMemSys struct{} + +func sharedMemMapFile(f *os.File, size int, removeOnClose bool) (*sharedMem, error) { + panic("not implemented") +} + +func (m *sharedMem) Close() error { + panic("not implemented") +} + +func setWorkerComm(cmd *exec.Cmd, comm workerComm) { + panic("not implemented") +} + +func getWorkerComm() (comm workerComm, err error) { + panic("not implemented") +} + +func isInterruptError(err error) bool { + panic("not implemented") +} + +func terminationSignal(err error) (os.Signal, bool) { + panic("not implemented") +} + +func isCrashSignal(signal os.Signal) bool { + panic("not implemented") +} diff --git a/src/internal/fuzz/sys_windows.go b/src/internal/fuzz/sys_windows.go new file mode 100644 index 0000000..82c9703 --- /dev/null +++ b/src/internal/fuzz/sys_windows.go @@ -0,0 +1,144 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "fmt" + "os" + "os/exec" + "syscall" + "unsafe" +) + +type sharedMemSys struct { + mapObj syscall.Handle +} + +func sharedMemMapFile(f *os.File, size int, removeOnClose bool) (mem *sharedMem, err error) { + defer func() { + if err != nil { + err = fmt.Errorf("mapping temporary file %s: %w", f.Name(), err) + } + }() + + // Create a file mapping object. The object itself is not shared. + mapObj, err := syscall.CreateFileMapping( + syscall.Handle(f.Fd()), // fhandle + nil, // sa + syscall.PAGE_READWRITE, // prot + 0, // maxSizeHigh + 0, // maxSizeLow + nil, // name + ) + if err != nil { + return nil, err + } + + // Create a view from the file mapping object. + access := uint32(syscall.FILE_MAP_READ | syscall.FILE_MAP_WRITE) + addr, err := syscall.MapViewOfFile( + mapObj, // handle + access, // access + 0, // offsetHigh + 0, // offsetLow + uintptr(size), // length + ) + if err != nil { + syscall.CloseHandle(mapObj) + return nil, err + } + + region := unsafe.Slice((*byte)(unsafe.Pointer(addr)), size) + return &sharedMem{ + f: f, + region: region, + removeOnClose: removeOnClose, + sys: sharedMemSys{mapObj: mapObj}, + }, nil +} + +// Close unmaps the shared memory and closes the temporary file. If this +// sharedMem was created with sharedMemTempFile, Close also removes the file. +func (m *sharedMem) Close() error { + // Attempt all operations, even if we get an error for an earlier operation. + // os.File.Close may fail due to I/O errors, but we still want to delete + // the temporary file. + var errs []error + errs = append(errs, + syscall.UnmapViewOfFile(uintptr(unsafe.Pointer(&m.region[0]))), + syscall.CloseHandle(m.sys.mapObj), + m.f.Close()) + if m.removeOnClose { + errs = append(errs, os.Remove(m.f.Name())) + } + for _, err := range errs { + if err != nil { + return err + } + } + return nil +} + +// setWorkerComm configures communication channels on the cmd that will +// run a worker process. +func setWorkerComm(cmd *exec.Cmd, comm workerComm) { + mem := <-comm.memMu + memFD := mem.f.Fd() + comm.memMu <- mem + syscall.SetHandleInformation(syscall.Handle(comm.fuzzIn.Fd()), syscall.HANDLE_FLAG_INHERIT, 1) + syscall.SetHandleInformation(syscall.Handle(comm.fuzzOut.Fd()), syscall.HANDLE_FLAG_INHERIT, 1) + syscall.SetHandleInformation(syscall.Handle(memFD), syscall.HANDLE_FLAG_INHERIT, 1) + cmd.Env = append(cmd.Env, fmt.Sprintf("GO_TEST_FUZZ_WORKER_HANDLES=%x,%x,%x", comm.fuzzIn.Fd(), comm.fuzzOut.Fd(), memFD)) + cmd.SysProcAttr = &syscall.SysProcAttr{AdditionalInheritedHandles: []syscall.Handle{syscall.Handle(comm.fuzzIn.Fd()), syscall.Handle(comm.fuzzOut.Fd()), syscall.Handle(memFD)}} +} + +// getWorkerComm returns communication channels in the worker process. +func getWorkerComm() (comm workerComm, err error) { + v := os.Getenv("GO_TEST_FUZZ_WORKER_HANDLES") + if v == "" { + return workerComm{}, fmt.Errorf("GO_TEST_FUZZ_WORKER_HANDLES not set") + } + var fuzzInFD, fuzzOutFD, memFileFD uintptr + if _, err := fmt.Sscanf(v, "%x,%x,%x", &fuzzInFD, &fuzzOutFD, &memFileFD); err != nil { + return workerComm{}, fmt.Errorf("parsing GO_TEST_FUZZ_WORKER_HANDLES=%s: %v", v, err) + } + + fuzzIn := os.NewFile(fuzzInFD, "fuzz_in") + fuzzOut := os.NewFile(fuzzOutFD, "fuzz_out") + memFile := os.NewFile(memFileFD, "fuzz_mem") + fi, err := memFile.Stat() + if err != nil { + return workerComm{}, fmt.Errorf("worker checking temp file size: %w", err) + } + size := int(fi.Size()) + if int64(size) != fi.Size() { + return workerComm{}, fmt.Errorf("fuzz temp file exceeds maximum size") + } + removeOnClose := false + mem, err := sharedMemMapFile(memFile, size, removeOnClose) + if err != nil { + return workerComm{}, err + } + memMu := make(chan *sharedMem, 1) + memMu <- mem + + return workerComm{fuzzIn: fuzzIn, fuzzOut: fuzzOut, memMu: memMu}, nil +} + +func isInterruptError(err error) bool { + // On Windows, we can't tell whether the process was interrupted by the error + // returned by Wait. It looks like an ExitError with status 1. + return false +} + +// terminationSignal returns -1 and false because Windows doesn't have signals. +func terminationSignal(err error) (os.Signal, bool) { + return syscall.Signal(-1), false +} + +// isCrashSignal is not implemented because Windows doesn't have signals. +func isCrashSignal(signal os.Signal) bool { + panic("not implemented: no signals on windows") +} diff --git a/src/internal/fuzz/trace.go b/src/internal/fuzz/trace.go new file mode 100644 index 0000000..a15c370 --- /dev/null +++ b/src/internal/fuzz/trace.go @@ -0,0 +1,35 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !libfuzzer + +package fuzz + +import _ "unsafe" // for go:linkname + +//go:linkname libfuzzerTraceCmp1 runtime.libfuzzerTraceCmp1 +//go:linkname libfuzzerTraceCmp2 runtime.libfuzzerTraceCmp2 +//go:linkname libfuzzerTraceCmp4 runtime.libfuzzerTraceCmp4 +//go:linkname libfuzzerTraceCmp8 runtime.libfuzzerTraceCmp8 + +//go:linkname libfuzzerTraceConstCmp1 runtime.libfuzzerTraceConstCmp1 +//go:linkname libfuzzerTraceConstCmp2 runtime.libfuzzerTraceConstCmp2 +//go:linkname libfuzzerTraceConstCmp4 runtime.libfuzzerTraceConstCmp4 +//go:linkname libfuzzerTraceConstCmp8 runtime.libfuzzerTraceConstCmp8 + +//go:linkname libfuzzerHookStrCmp runtime.libfuzzerHookStrCmp +//go:linkname libfuzzerHookEqualFold runtime.libfuzzerHookEqualFold + +func libfuzzerTraceCmp1(arg0, arg1 uint8, fakePC uint) {} +func libfuzzerTraceCmp2(arg0, arg1 uint16, fakePC uint) {} +func libfuzzerTraceCmp4(arg0, arg1 uint32, fakePC uint) {} +func libfuzzerTraceCmp8(arg0, arg1 uint64, fakePC uint) {} + +func libfuzzerTraceConstCmp1(arg0, arg1 uint8, fakePC uint) {} +func libfuzzerTraceConstCmp2(arg0, arg1 uint16, fakePC uint) {} +func libfuzzerTraceConstCmp4(arg0, arg1 uint32, fakePC uint) {} +func libfuzzerTraceConstCmp8(arg0, arg1 uint64, fakePC uint) {} + +func libfuzzerHookStrCmp(arg0, arg1 string, fakePC uint) {} +func libfuzzerHookEqualFold(arg0, arg1 string, fakePC uint) {} diff --git a/src/internal/fuzz/worker.go b/src/internal/fuzz/worker.go new file mode 100644 index 0000000..c952670 --- /dev/null +++ b/src/internal/fuzz/worker.go @@ -0,0 +1,1195 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "os/exec" + "reflect" + "runtime" + "sync" + "time" +) + +const ( + // workerFuzzDuration is the amount of time a worker can spend testing random + // variations of an input given by the coordinator. + workerFuzzDuration = 100 * time.Millisecond + + // workerTimeoutDuration is the amount of time a worker can go without + // responding to the coordinator before being stopped. + workerTimeoutDuration = 1 * time.Second + + // workerExitCode is used as an exit code by fuzz worker processes after an internal error. + // This distinguishes internal errors from uncontrolled panics and other crashes. + // Keep in sync with internal/fuzz.workerExitCode. + workerExitCode = 70 + + // workerSharedMemSize is the maximum size of the shared memory file used to + // communicate with workers. This limits the size of fuzz inputs. + workerSharedMemSize = 100 << 20 // 100 MB +) + +// worker manages a worker process running a test binary. The worker object +// exists only in the coordinator (the process started by 'go test -fuzz'). +// workerClient is used by the coordinator to send RPCs to the worker process, +// which handles them with workerServer. +type worker struct { + dir string // working directory, same as package directory + binPath string // path to test executable + args []string // arguments for test executable + env []string // environment for test executable + + coordinator *coordinator + + memMu chan *sharedMem // mutex guarding shared memory with worker; persists across processes. + + cmd *exec.Cmd // current worker process + client *workerClient // used to communicate with worker process + waitErr error // last error returned by wait, set before termC is closed. + interrupted bool // true after stop interrupts a running worker. + termC chan struct{} // closed by wait when worker process terminates +} + +func newWorker(c *coordinator, dir, binPath string, args, env []string) (*worker, error) { + mem, err := sharedMemTempFile(workerSharedMemSize) + if err != nil { + return nil, err + } + memMu := make(chan *sharedMem, 1) + memMu <- mem + return &worker{ + dir: dir, + binPath: binPath, + args: args, + env: env[:len(env):len(env)], // copy on append to ensure workers don't overwrite each other. + coordinator: c, + memMu: memMu, + }, nil +} + +// cleanup releases persistent resources associated with the worker. +func (w *worker) cleanup() error { + mem := <-w.memMu + if mem == nil { + return nil + } + close(w.memMu) + return mem.Close() +} + +// coordinate runs the test binary to perform fuzzing. +// +// coordinate loops until ctx is cancelled or a fatal error is encountered. +// If a test process terminates unexpectedly while fuzzing, coordinate will +// attempt to restart and continue unless the termination can be attributed +// to an interruption (from a timer or the user). +// +// While looping, coordinate receives inputs from the coordinator, passes +// those inputs to the worker process, then passes the results back to +// the coordinator. +func (w *worker) coordinate(ctx context.Context) error { + // Main event loop. + for { + // Start or restart the worker if it's not running. + if !w.isRunning() { + if err := w.startAndPing(ctx); err != nil { + return err + } + } + + select { + case <-ctx.Done(): + // Worker was told to stop. + err := w.stop() + if err != nil && !w.interrupted && !isInterruptError(err) { + return err + } + return ctx.Err() + + case <-w.termC: + // Worker process terminated unexpectedly while waiting for input. + err := w.stop() + if w.interrupted { + panic("worker interrupted after unexpected termination") + } + if err == nil || isInterruptError(err) { + // Worker stopped, either by exiting with status 0 or after being + // interrupted with a signal that was not sent by the coordinator. + // + // When the user presses ^C, on POSIX platforms, SIGINT is delivered to + // all processes in the group concurrently, and the worker may see it + // before the coordinator. The worker should exit 0 gracefully (in + // theory). + // + // This condition is probably intended by the user, so suppress + // the error. + return nil + } + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == workerExitCode { + // Worker exited with a code indicating F.Fuzz was not called correctly, + // for example, F.Fail was called first. + return fmt.Errorf("fuzzing process exited unexpectedly due to an internal failure: %w", err) + } + // Worker exited non-zero or was terminated by a non-interrupt + // signal (for example, SIGSEGV) while fuzzing. + return fmt.Errorf("fuzzing process hung or terminated unexpectedly: %w", err) + // TODO(jayconrod,katiehockman): if -keepfuzzing, restart worker. + + case input := <-w.coordinator.inputC: + // Received input from coordinator. + args := fuzzArgs{ + Limit: input.limit, + Timeout: input.timeout, + Warmup: input.warmup, + CoverageData: input.coverageData, + } + entry, resp, isInternalError, err := w.client.fuzz(ctx, input.entry, args) + canMinimize := true + if err != nil { + // Error communicating with worker. + w.stop() + if ctx.Err() != nil { + // Timeout or interruption. + return ctx.Err() + } + if w.interrupted { + // Communication error before we stopped the worker. + // Report an error, but don't record a crasher. + return fmt.Errorf("communicating with fuzzing process: %v", err) + } + if sig, ok := terminationSignal(w.waitErr); ok && !isCrashSignal(sig) { + // Worker terminated by a signal that probably wasn't caused by a + // specific input to the fuzz function. For example, on Linux, + // the kernel (OOM killer) may send SIGKILL to a process using a lot + // of memory. Or the shell might send SIGHUP when the terminal + // is closed. Don't record a crasher. + return fmt.Errorf("fuzzing process terminated by unexpected signal; no crash will be recorded: %v", w.waitErr) + } + if isInternalError { + // An internal error occurred which shouldn't be considered + // a crash. + return err + } + // Unexpected termination. Set error message and fall through. + // We'll restart the worker on the next iteration. + // Don't attempt to minimize this since it crashed the worker. + resp.Err = fmt.Sprintf("fuzzing process hung or terminated unexpectedly: %v", w.waitErr) + canMinimize = false + } + result := fuzzResult{ + limit: input.limit, + count: resp.Count, + totalDuration: resp.TotalDuration, + entryDuration: resp.InterestingDuration, + entry: entry, + crasherMsg: resp.Err, + coverageData: resp.CoverageData, + canMinimize: canMinimize, + } + w.coordinator.resultC <- result + + case input := <-w.coordinator.minimizeC: + // Received input to minimize from coordinator. + result, err := w.minimize(ctx, input) + if err != nil { + // Error minimizing. Send back the original input. If it didn't cause + // an error before, report it as causing an error now. + // TODO: double-check this is handled correctly when + // implementing -keepfuzzing. + result = fuzzResult{ + entry: input.entry, + crasherMsg: input.crasherMsg, + canMinimize: false, + limit: input.limit, + } + if result.crasherMsg == "" { + result.crasherMsg = err.Error() + } + } + if shouldPrintDebugInfo() { + w.coordinator.debugLogf( + "input minimized, id: %s, original id: %s, crasher: %t, originally crasher: %t, minimizing took: %s", + result.entry.Path, + input.entry.Path, + result.crasherMsg != "", + input.crasherMsg != "", + result.totalDuration, + ) + } + w.coordinator.resultC <- result + } + } +} + +// minimize tells a worker process to attempt to find a smaller value that +// either causes an error (if we started minimizing because we found an input +// that causes an error) or preserves new coverage (if we started minimizing +// because we found an input that expands coverage). +func (w *worker) minimize(ctx context.Context, input fuzzMinimizeInput) (min fuzzResult, err error) { + if w.coordinator.opts.MinimizeTimeout != 0 { + var cancel func() + ctx, cancel = context.WithTimeout(ctx, w.coordinator.opts.MinimizeTimeout) + defer cancel() + } + + args := minimizeArgs{ + Limit: input.limit, + Timeout: input.timeout, + KeepCoverage: input.keepCoverage, + } + entry, resp, err := w.client.minimize(ctx, input.entry, args) + if err != nil { + // Error communicating with worker. + w.stop() + if ctx.Err() != nil || w.interrupted || isInterruptError(w.waitErr) { + // Worker was interrupted, possibly by the user pressing ^C. + // Normally, workers can handle interrupts and timeouts gracefully and + // will return without error. An error here indicates the worker + // may not have been in a good state, but the error won't be meaningful + // to the user. Just return the original crasher without logging anything. + return fuzzResult{ + entry: input.entry, + crasherMsg: input.crasherMsg, + coverageData: input.keepCoverage, + canMinimize: false, + limit: input.limit, + }, nil + } + return fuzzResult{ + entry: entry, + crasherMsg: fmt.Sprintf("fuzzing process hung or terminated unexpectedly while minimizing: %v", err), + canMinimize: false, + limit: input.limit, + count: resp.Count, + totalDuration: resp.Duration, + }, nil + } + + if input.crasherMsg != "" && resp.Err == "" { + return fuzzResult{}, fmt.Errorf("attempted to minimize a crash but could not reproduce") + } + + return fuzzResult{ + entry: entry, + crasherMsg: resp.Err, + coverageData: resp.CoverageData, + canMinimize: false, + limit: input.limit, + count: resp.Count, + totalDuration: resp.Duration, + }, nil +} + +func (w *worker) isRunning() bool { + return w.cmd != nil +} + +// startAndPing starts the worker process and sends it a message to make sure it +// can communicate. +// +// startAndPing returns an error if any part of this didn't work, including if +// the context is expired or the worker process was interrupted before it +// responded. Errors that happen after start but before the ping response +// likely indicate that the worker did not call F.Fuzz or called F.Fail first. +// We don't record crashers for these errors. +func (w *worker) startAndPing(ctx context.Context) error { + if ctx.Err() != nil { + return ctx.Err() + } + if err := w.start(); err != nil { + return err + } + if err := w.client.ping(ctx); err != nil { + w.stop() + if ctx.Err() != nil { + return ctx.Err() + } + if isInterruptError(err) { + // User may have pressed ^C before worker responded. + return err + } + // TODO: record and return stderr. + return fmt.Errorf("fuzzing process terminated without fuzzing: %w", err) + } + return nil +} + +// start runs a new worker process. +// +// If the process couldn't be started, start returns an error. Start won't +// return later termination errors from the process if they occur. +// +// If the process starts successfully, start returns nil. stop must be called +// once later to clean up, even if the process terminates on its own. +// +// When the process terminates, w.waitErr is set to the error (if any), and +// w.termC is closed. +func (w *worker) start() (err error) { + if w.isRunning() { + panic("worker already started") + } + w.waitErr = nil + w.interrupted = false + w.termC = nil + + cmd := exec.Command(w.binPath, w.args...) + cmd.Dir = w.dir + cmd.Env = w.env[:len(w.env):len(w.env)] // copy on append to ensure workers don't overwrite each other. + + // Create the "fuzz_in" and "fuzz_out" pipes so we can communicate with + // the worker. We don't use stdin and stdout, since the test binary may + // do something else with those. + // + // Each pipe has a reader and a writer. The coordinator writes to fuzzInW + // and reads from fuzzOutR. The worker inherits fuzzInR and fuzzOutW. + // The coordinator closes fuzzInR and fuzzOutW after starting the worker, + // since we have no further need of them. + fuzzInR, fuzzInW, err := os.Pipe() + if err != nil { + return err + } + defer fuzzInR.Close() + fuzzOutR, fuzzOutW, err := os.Pipe() + if err != nil { + fuzzInW.Close() + return err + } + defer fuzzOutW.Close() + setWorkerComm(cmd, workerComm{fuzzIn: fuzzInR, fuzzOut: fuzzOutW, memMu: w.memMu}) + + // Start the worker process. + if err := cmd.Start(); err != nil { + fuzzInW.Close() + fuzzOutR.Close() + return err + } + + // Worker started successfully. + // After this, w.client owns fuzzInW and fuzzOutR, so w.client.Close must be + // called later by stop. + w.cmd = cmd + w.termC = make(chan struct{}) + comm := workerComm{fuzzIn: fuzzInW, fuzzOut: fuzzOutR, memMu: w.memMu} + m := newMutator() + w.client = newWorkerClient(comm, m) + + go func() { + w.waitErr = w.cmd.Wait() + close(w.termC) + }() + + return nil +} + +// stop tells the worker process to exit by closing w.client, then blocks until +// it terminates. If the worker doesn't terminate after a short time, stop +// signals it with os.Interrupt (where supported), then os.Kill. +// +// stop returns the error the process terminated with, if any (same as +// w.waitErr). +// +// stop must be called at least once after start returns successfully, even if +// the worker process terminates unexpectedly. +func (w *worker) stop() error { + if w.termC == nil { + panic("worker was not started successfully") + } + select { + case <-w.termC: + // Worker already terminated. + if w.client == nil { + // stop already called. + return w.waitErr + } + // Possible unexpected termination. + w.client.Close() + w.cmd = nil + w.client = nil + return w.waitErr + default: + // Worker still running. + } + + // Tell the worker to stop by closing fuzz_in. It won't actually stop until it + // finishes with earlier calls. + closeC := make(chan struct{}) + go func() { + w.client.Close() + close(closeC) + }() + + sig := os.Interrupt + if runtime.GOOS == "windows" { + // Per https://golang.org/pkg/os/#Signal, “Interrupt is not implemented on + // Windows; using it with os.Process.Signal will return an error.” + // Fall back to Kill instead. + sig = os.Kill + } + + t := time.NewTimer(workerTimeoutDuration) + for { + select { + case <-w.termC: + // Worker terminated. + t.Stop() + <-closeC + w.cmd = nil + w.client = nil + return w.waitErr + + case <-t.C: + // Timer fired before worker terminated. + w.interrupted = true + switch sig { + case os.Interrupt: + // Try to stop the worker with SIGINT and wait a little longer. + w.cmd.Process.Signal(sig) + sig = os.Kill + t.Reset(workerTimeoutDuration) + + case os.Kill: + // Try to stop the worker with SIGKILL and keep waiting. + w.cmd.Process.Signal(sig) + sig = nil + t.Reset(workerTimeoutDuration) + + case nil: + // Still waiting. Print a message to let the user know why. + fmt.Fprintf(w.coordinator.opts.Log, "waiting for fuzzing process to terminate...\n") + } + } + } +} + +// RunFuzzWorker is called in a worker process to communicate with the +// coordinator process in order to fuzz random inputs. RunFuzzWorker loops +// until the coordinator tells it to stop. +// +// fn is a wrapper on the fuzz function. It may return an error to indicate +// a given input "crashed". The coordinator will also record a crasher if +// the function times out or terminates the process. +// +// RunFuzzWorker returns an error if it could not communicate with the +// coordinator process. +func RunFuzzWorker(ctx context.Context, fn func(CorpusEntry) error) error { + comm, err := getWorkerComm() + if err != nil { + return err + } + srv := &workerServer{ + workerComm: comm, + fuzzFn: func(e CorpusEntry) (time.Duration, error) { + timer := time.AfterFunc(10*time.Second, func() { + panic("deadlocked!") // this error message won't be printed + }) + defer timer.Stop() + start := time.Now() + err := fn(e) + return time.Since(start), err + }, + m: newMutator(), + } + return srv.serve(ctx) +} + +// call is serialized and sent from the coordinator on fuzz_in. It acts as +// a minimalist RPC mechanism. Exactly one of its fields must be set to indicate +// which method to call. +type call struct { + Ping *pingArgs + Fuzz *fuzzArgs + Minimize *minimizeArgs +} + +// minimizeArgs contains arguments to workerServer.minimize. The value to +// minimize is already in shared memory. +type minimizeArgs struct { + // Timeout is the time to spend minimizing. This may include time to start up, + // especially if the input causes the worker process to terminated, requiring + // repeated restarts. + Timeout time.Duration + + // Limit is the maximum number of values to test, without spending more time + // than Duration. 0 indicates no limit. + Limit int64 + + // KeepCoverage is a set of coverage counters the worker should attempt to + // keep in minimized values. When provided, the worker will reject inputs that + // don't cause at least one of these bits to be set. + KeepCoverage []byte + + // Index is the index of the fuzz target parameter to be minimized. + Index int +} + +// minimizeResponse contains results from workerServer.minimize. +type minimizeResponse struct { + // WroteToMem is true if the worker found a smaller input and wrote it to + // shared memory. If minimizeArgs.KeepCoverage was set, the minimized input + // preserved at least one coverage bit and did not cause an error. + // Otherwise, the minimized input caused some error, recorded in Err. + WroteToMem bool + + // Err is the error string caused by the value in shared memory, if any. + Err string + + // CoverageData is the set of coverage bits activated by the minimized value + // in shared memory. When set, it contains at least one bit from KeepCoverage. + // CoverageData will be nil if Err is set or if minimization failed. + CoverageData []byte + + // Duration is the time spent minimizing, not including starting or cleaning up. + Duration time.Duration + + // Count is the number of values tested. + Count int64 +} + +// fuzzArgs contains arguments to workerServer.fuzz. The value to fuzz is +// passed in shared memory. +type fuzzArgs struct { + // Timeout is the time to spend fuzzing, not including starting or + // cleaning up. + Timeout time.Duration + + // Limit is the maximum number of values to test, without spending more time + // than Duration. 0 indicates no limit. + Limit int64 + + // Warmup indicates whether this is part of a warmup run, meaning that + // fuzzing should not occur. If coverageEnabled is true, then coverage data + // should be reported. + Warmup bool + + // CoverageData is the coverage data. If set, the worker should update its + // local coverage data prior to fuzzing. + CoverageData []byte +} + +// fuzzResponse contains results from workerServer.fuzz. +type fuzzResponse struct { + // Duration is the time spent fuzzing, not including starting or cleaning up. + TotalDuration time.Duration + InterestingDuration time.Duration + + // Count is the number of values tested. + Count int64 + + // CoverageData is set if the value in shared memory expands coverage + // and therefore may be interesting to the coordinator. + CoverageData []byte + + // Err is the error string caused by the value in shared memory, which is + // non-empty if the value in shared memory caused a crash. + Err string + + // InternalErr is the error string caused by an internal error in the + // worker. This shouldn't be considered a crasher. + InternalErr string +} + +// pingArgs contains arguments to workerServer.ping. +type pingArgs struct{} + +// pingResponse contains results from workerServer.ping. +type pingResponse struct{} + +// workerComm holds pipes and shared memory used for communication +// between the coordinator process (client) and a worker process (server). +// These values are unique to each worker; they are shared only with the +// coordinator, not with other workers. +// +// Access to shared memory is synchronized implicitly over the RPC protocol +// implemented in workerServer and workerClient. During a call, the client +// (worker) has exclusive access to shared memory; at other times, the server +// (coordinator) has exclusive access. +type workerComm struct { + fuzzIn, fuzzOut *os.File + memMu chan *sharedMem // mutex guarding shared memory +} + +// workerServer is a minimalist RPC server, run by fuzz worker processes. +// It allows the coordinator process (using workerClient) to call methods in a +// worker process. This system allows the coordinator to run multiple worker +// processes in parallel and to collect inputs that caused crashes from shared +// memory after a worker process terminates unexpectedly. +type workerServer struct { + workerComm + m *mutator + + // coverageMask is the local coverage data for the worker. It is + // periodically updated to reflect the data in the coordinator when new + // coverage is found. + coverageMask []byte + + // fuzzFn runs the worker's fuzz target on the given input and returns an + // error if it finds a crasher (the process may also exit or crash), and the + // time it took to run the input. It sets a deadline of 10 seconds, at which + // point it will panic with the assumption that the process is hanging or + // deadlocked. + fuzzFn func(CorpusEntry) (time.Duration, error) +} + +// serve reads serialized RPC messages on fuzzIn. When serve receives a message, +// it calls the corresponding method, then sends the serialized result back +// on fuzzOut. +// +// serve handles RPC calls synchronously; it will not attempt to read a message +// until the previous call has finished. +// +// serve returns errors that occurred when communicating over pipes. serve +// does not return errors from method calls; those are passed through serialized +// responses. +func (ws *workerServer) serve(ctx context.Context) error { + enc := json.NewEncoder(ws.fuzzOut) + dec := json.NewDecoder(&contextReader{ctx: ctx, r: ws.fuzzIn}) + for { + var c call + if err := dec.Decode(&c); err != nil { + if err == io.EOF || err == ctx.Err() { + return nil + } else { + return err + } + } + + var resp any + switch { + case c.Fuzz != nil: + resp = ws.fuzz(ctx, *c.Fuzz) + case c.Minimize != nil: + resp = ws.minimize(ctx, *c.Minimize) + case c.Ping != nil: + resp = ws.ping(ctx, *c.Ping) + default: + return errors.New("no arguments provided for any call") + } + + if err := enc.Encode(resp); err != nil { + return err + } + } +} + +// chainedMutations is how many mutations are applied before the worker +// resets the input to it's original state. +// NOTE: this number was picked without much thought. It is low enough that +// it seems to create a significant diversity in mutated inputs. We may want +// to consider looking into this more closely once we have a proper performance +// testing framework. Another option is to randomly pick the number of chained +// mutations on each invocation of the workerServer.fuzz method (this appears to +// be what libFuzzer does, although there seems to be no documentation which +// explains why this choice was made.) +const chainedMutations = 5 + +// fuzz runs the test function on random variations of the input value in shared +// memory for a limited duration or number of iterations. +// +// fuzz returns early if it finds an input that crashes the fuzz function (with +// fuzzResponse.Err set) or an input that expands coverage (with +// fuzzResponse.InterestingDuration set). +// +// fuzz does not modify the input in shared memory. Instead, it saves the +// initial PRNG state in shared memory and increments a counter in shared +// memory before each call to the test function. The caller may reconstruct +// the crashing input with this information, since the PRNG is deterministic. +func (ws *workerServer) fuzz(ctx context.Context, args fuzzArgs) (resp fuzzResponse) { + if args.CoverageData != nil { + if ws.coverageMask != nil && len(args.CoverageData) != len(ws.coverageMask) { + resp.InternalErr = fmt.Sprintf("unexpected size for CoverageData: got %d, expected %d", len(args.CoverageData), len(ws.coverageMask)) + return resp + } + ws.coverageMask = args.CoverageData + } + start := time.Now() + defer func() { resp.TotalDuration = time.Since(start) }() + + if args.Timeout != 0 { + var cancel func() + ctx, cancel = context.WithTimeout(ctx, args.Timeout) + defer cancel() + } + mem := <-ws.memMu + ws.m.r.save(&mem.header().randState, &mem.header().randInc) + defer func() { + resp.Count = mem.header().count + ws.memMu <- mem + }() + if args.Limit > 0 && mem.header().count >= args.Limit { + resp.InternalErr = fmt.Sprintf("mem.header().count %d already exceeds args.Limit %d", mem.header().count, args.Limit) + return resp + } + + originalVals, err := unmarshalCorpusFile(mem.valueCopy()) + if err != nil { + resp.InternalErr = err.Error() + return resp + } + vals := make([]any, len(originalVals)) + copy(vals, originalVals) + + shouldStop := func() bool { + return args.Limit > 0 && mem.header().count >= args.Limit + } + fuzzOnce := func(entry CorpusEntry) (dur time.Duration, cov []byte, errMsg string) { + mem.header().count++ + var err error + dur, err = ws.fuzzFn(entry) + if err != nil { + errMsg = err.Error() + if errMsg == "" { + errMsg = "fuzz function failed with no input" + } + return dur, nil, errMsg + } + if ws.coverageMask != nil && countNewCoverageBits(ws.coverageMask, coverageSnapshot) > 0 { + return dur, coverageSnapshot, "" + } + return dur, nil, "" + } + + if args.Warmup { + dur, _, errMsg := fuzzOnce(CorpusEntry{Values: vals}) + if errMsg != "" { + resp.Err = errMsg + return resp + } + resp.InterestingDuration = dur + if coverageEnabled { + resp.CoverageData = coverageSnapshot + } + return resp + } + + for { + select { + case <-ctx.Done(): + return resp + default: + if mem.header().count%chainedMutations == 0 { + copy(vals, originalVals) + ws.m.r.save(&mem.header().randState, &mem.header().randInc) + } + ws.m.mutate(vals, cap(mem.valueRef())) + + entry := CorpusEntry{Values: vals} + dur, cov, errMsg := fuzzOnce(entry) + if errMsg != "" { + resp.Err = errMsg + return resp + } + if cov != nil { + resp.CoverageData = cov + resp.InterestingDuration = dur + return resp + } + if shouldStop() { + return resp + } + } + } +} + +func (ws *workerServer) minimize(ctx context.Context, args minimizeArgs) (resp minimizeResponse) { + start := time.Now() + defer func() { resp.Duration = time.Since(start) }() + mem := <-ws.memMu + defer func() { ws.memMu <- mem }() + vals, err := unmarshalCorpusFile(mem.valueCopy()) + if err != nil { + panic(err) + } + inpHash := sha256.Sum256(mem.valueCopy()) + if args.Timeout != 0 { + var cancel func() + ctx, cancel = context.WithTimeout(ctx, args.Timeout) + defer cancel() + } + + // Minimize the values in vals, then write to shared memory. We only write + // to shared memory after completing minimization. + success, err := ws.minimizeInput(ctx, vals, mem, args) + if success { + writeToMem(vals, mem) + outHash := sha256.Sum256(mem.valueCopy()) + mem.header().rawInMem = false + resp.WroteToMem = true + if err != nil { + resp.Err = err.Error() + } else { + // If the values didn't change during minimization then coverageSnapshot is likely + // a dirty snapshot which represents the very last step of minimization, not the + // coverage for the initial input. In that case just return the coverage we were + // given initially, since it more accurately represents the coverage map for the + // input we are returning. + if outHash != inpHash { + resp.CoverageData = coverageSnapshot + } else { + resp.CoverageData = args.KeepCoverage + } + } + } + return resp +} + +// minimizeInput applies a series of minimizing transformations on the provided +// vals, ensuring that each minimization still causes an error, or keeps +// coverage, in fuzzFn. It uses the context to determine how long to run, +// stopping once closed. It returns a bool indicating whether minimization was +// successful and an error if one was found. +func (ws *workerServer) minimizeInput(ctx context.Context, vals []any, mem *sharedMem, args minimizeArgs) (success bool, retErr error) { + keepCoverage := args.KeepCoverage + memBytes := mem.valueRef() + bPtr := &memBytes + count := &mem.header().count + shouldStop := func() bool { + return ctx.Err() != nil || + (args.Limit > 0 && *count >= args.Limit) + } + if shouldStop() { + return false, nil + } + + // Check that the original value preserves coverage or causes an error. + // If not, then whatever caused us to think the value was interesting may + // have been a flake, and we can't minimize it. + *count++ + _, retErr = ws.fuzzFn(CorpusEntry{Values: vals}) + if keepCoverage != nil { + if !hasCoverageBit(keepCoverage, coverageSnapshot) || retErr != nil { + return false, nil + } + } else if retErr == nil { + return false, nil + } + mem.header().rawInMem = true + + // tryMinimized runs the fuzz function with candidate replacing the value + // at index valI. tryMinimized returns whether the input with candidate is + // interesting for the same reason as the original input: it returns + // an error if one was expected, or it preserves coverage. + tryMinimized := func(candidate []byte) bool { + prev := vals[args.Index] + switch prev.(type) { + case []byte: + vals[args.Index] = candidate + case string: + vals[args.Index] = string(candidate) + default: + panic("impossible") + } + copy(*bPtr, candidate) + *bPtr = (*bPtr)[:len(candidate)] + mem.setValueLen(len(candidate)) + *count++ + _, err := ws.fuzzFn(CorpusEntry{Values: vals}) + if err != nil { + retErr = err + if keepCoverage != nil { + // Now that we've found a crash, that's more important than any + // minimization of interesting inputs that was being done. Clear out + // keepCoverage to only minimize the crash going forward. + keepCoverage = nil + } + return true + } + // Minimization should preserve coverage bits. + if keepCoverage != nil && isCoverageSubset(keepCoverage, coverageSnapshot) { + return true + } + vals[args.Index] = prev + return false + } + switch v := vals[args.Index].(type) { + case string: + minimizeBytes([]byte(v), tryMinimized, shouldStop) + case []byte: + minimizeBytes(v, tryMinimized, shouldStop) + default: + panic("impossible") + } + return true, retErr +} + +func writeToMem(vals []any, mem *sharedMem) { + b := marshalCorpusFile(vals...) + mem.setValue(b) +} + +// ping does nothing. The coordinator calls this method to ensure the worker +// has called F.Fuzz and can communicate. +func (ws *workerServer) ping(ctx context.Context, args pingArgs) pingResponse { + return pingResponse{} +} + +// workerClient is a minimalist RPC client. The coordinator process uses a +// workerClient to call methods in each worker process (handled by +// workerServer). +type workerClient struct { + workerComm + m *mutator + + // mu is the mutex protecting the workerComm.fuzzIn pipe. This must be + // locked before making calls to the workerServer. It prevents + // workerClient.Close from closing fuzzIn while workerClient methods are + // writing to it concurrently, and prevents multiple callers from writing to + // fuzzIn concurrently. + mu sync.Mutex +} + +func newWorkerClient(comm workerComm, m *mutator) *workerClient { + return &workerClient{workerComm: comm, m: m} +} + +// Close shuts down the connection to the RPC server (the worker process) by +// closing fuzz_in. Close drains fuzz_out (avoiding a SIGPIPE in the worker), +// and closes it after the worker process closes the other end. +func (wc *workerClient) Close() error { + wc.mu.Lock() + defer wc.mu.Unlock() + + // Close fuzzIn. This signals to the server that there are no more calls, + // and it should exit. + if err := wc.fuzzIn.Close(); err != nil { + wc.fuzzOut.Close() + return err + } + + // Drain fuzzOut and close it. When the server exits, the kernel will close + // its end of fuzzOut, and we'll get EOF. + if _, err := io.Copy(io.Discard, wc.fuzzOut); err != nil { + wc.fuzzOut.Close() + return err + } + return wc.fuzzOut.Close() +} + +// errSharedMemClosed is returned by workerClient methods that cannot access +// shared memory because it was closed and unmapped by another goroutine. That +// can happen when worker.cleanup is called in the worker goroutine while a +// workerClient.fuzz call runs concurrently. +// +// This error should not be reported. It indicates the operation was +// interrupted. +var errSharedMemClosed = errors.New("internal error: shared memory was closed and unmapped") + +// minimize tells the worker to call the minimize method. See +// workerServer.minimize. +func (wc *workerClient) minimize(ctx context.Context, entryIn CorpusEntry, args minimizeArgs) (entryOut CorpusEntry, resp minimizeResponse, retErr error) { + wc.mu.Lock() + defer wc.mu.Unlock() + + mem, ok := <-wc.memMu + if !ok { + return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed + } + defer func() { wc.memMu <- mem }() + mem.header().count = 0 + inp, err := corpusEntryData(entryIn) + if err != nil { + return CorpusEntry{}, minimizeResponse{}, err + } + mem.setValue(inp) + entryOut = entryIn + entryOut.Values, err = unmarshalCorpusFile(inp) + if err != nil { + return CorpusEntry{}, minimizeResponse{}, fmt.Errorf("workerClient.minimize unmarshaling provided value: %v", err) + } + for i, v := range entryOut.Values { + if !isMinimizable(reflect.TypeOf(v)) { + continue + } + + wc.memMu <- mem + args.Index = i + c := call{Minimize: &args} + callErr := wc.callLocked(ctx, c, &resp) + mem, ok = <-wc.memMu + if !ok { + return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed + } + + if callErr != nil { + retErr = callErr + if !mem.header().rawInMem { + // An unrecoverable error occurred before minimization began. + return entryIn, minimizeResponse{}, retErr + } + // An unrecoverable error occurred during minimization. mem now + // holds the raw, unmarshalled bytes of entryIn.Values[i] that + // caused the error. + switch entryOut.Values[i].(type) { + case string: + entryOut.Values[i] = string(mem.valueCopy()) + case []byte: + entryOut.Values[i] = mem.valueCopy() + default: + panic("impossible") + } + entryOut.Data = marshalCorpusFile(entryOut.Values...) + // Stop minimizing; another unrecoverable error is likely to occur. + break + } + + if resp.WroteToMem { + // Minimization succeeded, and mem holds the marshaled data. + entryOut.Data = mem.valueCopy() + entryOut.Values, err = unmarshalCorpusFile(entryOut.Data) + if err != nil { + return CorpusEntry{}, minimizeResponse{}, fmt.Errorf("workerClient.minimize unmarshaling minimized value: %v", err) + } + } + + // Prepare for next iteration of the loop. + if args.Timeout != 0 { + args.Timeout -= resp.Duration + if args.Timeout <= 0 { + break + } + } + if args.Limit != 0 { + args.Limit -= mem.header().count + if args.Limit <= 0 { + break + } + } + } + resp.Count = mem.header().count + h := sha256.Sum256(entryOut.Data) + entryOut.Path = fmt.Sprintf("%x", h[:4]) + return entryOut, resp, retErr +} + +// fuzz tells the worker to call the fuzz method. See workerServer.fuzz. +func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzzArgs) (entryOut CorpusEntry, resp fuzzResponse, isInternalError bool, err error) { + wc.mu.Lock() + defer wc.mu.Unlock() + + mem, ok := <-wc.memMu + if !ok { + return CorpusEntry{}, fuzzResponse{}, true, errSharedMemClosed + } + mem.header().count = 0 + inp, err := corpusEntryData(entryIn) + if err != nil { + wc.memMu <- mem + return CorpusEntry{}, fuzzResponse{}, true, err + } + mem.setValue(inp) + wc.memMu <- mem + + c := call{Fuzz: &args} + callErr := wc.callLocked(ctx, c, &resp) + if resp.InternalErr != "" { + return CorpusEntry{}, fuzzResponse{}, true, errors.New(resp.InternalErr) + } + mem, ok = <-wc.memMu + if !ok { + return CorpusEntry{}, fuzzResponse{}, true, errSharedMemClosed + } + defer func() { wc.memMu <- mem }() + resp.Count = mem.header().count + + if !bytes.Equal(inp, mem.valueRef()) { + return CorpusEntry{}, fuzzResponse{}, true, errors.New("workerServer.fuzz modified input") + } + needEntryOut := callErr != nil || resp.Err != "" || + (!args.Warmup && resp.CoverageData != nil) + if needEntryOut { + valuesOut, err := unmarshalCorpusFile(inp) + if err != nil { + return CorpusEntry{}, fuzzResponse{}, true, fmt.Errorf("unmarshaling fuzz input value after call: %v", err) + } + wc.m.r.restore(mem.header().randState, mem.header().randInc) + if !args.Warmup { + // Only mutate the valuesOut if fuzzing actually occurred. + numMutations := ((resp.Count - 1) % chainedMutations) + 1 + for i := int64(0); i < numMutations; i++ { + wc.m.mutate(valuesOut, cap(mem.valueRef())) + } + } + dataOut := marshalCorpusFile(valuesOut...) + + h := sha256.Sum256(dataOut) + name := fmt.Sprintf("%x", h[:4]) + entryOut = CorpusEntry{ + Parent: entryIn.Path, + Path: name, + Data: dataOut, + Generation: entryIn.Generation + 1, + } + if args.Warmup { + // The bytes weren't mutated, so if entryIn was a seed corpus value, + // then entryOut is too. + entryOut.IsSeed = entryIn.IsSeed + } + } + + return entryOut, resp, false, callErr +} + +// ping tells the worker to call the ping method. See workerServer.ping. +func (wc *workerClient) ping(ctx context.Context) error { + wc.mu.Lock() + defer wc.mu.Unlock() + c := call{Ping: &pingArgs{}} + var resp pingResponse + return wc.callLocked(ctx, c, &resp) +} + +// callLocked sends an RPC from the coordinator to the worker process and waits +// for the response. The callLocked may be cancelled with ctx. +func (wc *workerClient) callLocked(ctx context.Context, c call, resp any) (err error) { + enc := json.NewEncoder(wc.fuzzIn) + dec := json.NewDecoder(&contextReader{ctx: ctx, r: wc.fuzzOut}) + if err := enc.Encode(c); err != nil { + return err + } + return dec.Decode(resp) +} + +// contextReader wraps a Reader with a Context. If the context is cancelled +// while the underlying reader is blocked, Read returns immediately. +// +// This is useful for reading from a pipe. Closing a pipe file descriptor does +// not unblock pending Reads on that file descriptor. All copies of the pipe's +// other file descriptor (the write end) must be closed in all processes that +// inherit it. This is difficult to do correctly in the situation we care about +// (process group termination). +type contextReader struct { + ctx context.Context + r io.Reader +} + +func (cr *contextReader) Read(b []byte) (int, error) { + if ctxErr := cr.ctx.Err(); ctxErr != nil { + return 0, ctxErr + } + done := make(chan struct{}) + + // This goroutine may stay blocked after Read returns because the underlying + // read is blocked. + var n int + var err error + go func() { + n, err = cr.r.Read(b) + close(done) + }() + + select { + case <-cr.ctx.Done(): + return 0, cr.ctx.Err() + case <-done: + return n, err + } +} diff --git a/src/internal/fuzz/worker_test.go b/src/internal/fuzz/worker_test.go new file mode 100644 index 0000000..d0b21da --- /dev/null +++ b/src/internal/fuzz/worker_test.go @@ -0,0 +1,206 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fuzz + +import ( + "context" + "errors" + "flag" + "fmt" + "internal/race" + "io" + "os" + "os/signal" + "reflect" + "strconv" + "testing" + "time" +) + +var benchmarkWorkerFlag = flag.Bool("benchmarkworker", false, "") + +func TestMain(m *testing.M) { + flag.Parse() + if *benchmarkWorkerFlag { + runBenchmarkWorker() + return + } + os.Exit(m.Run()) +} + +func BenchmarkWorkerFuzzOverhead(b *testing.B) { + if race.Enabled { + b.Skip("TODO(48504): fix and re-enable") + } + origEnv := os.Getenv("GODEBUG") + defer func() { os.Setenv("GODEBUG", origEnv) }() + os.Setenv("GODEBUG", fmt.Sprintf("%s,fuzzseed=123", origEnv)) + + ws := &workerServer{ + fuzzFn: func(_ CorpusEntry) (time.Duration, error) { return time.Second, nil }, + workerComm: workerComm{memMu: make(chan *sharedMem, 1)}, + } + + mem, err := sharedMemTempFile(workerSharedMemSize) + if err != nil { + b.Fatalf("failed to create temporary shared memory file: %s", err) + } + defer func() { + if err := mem.Close(); err != nil { + b.Error(err) + } + }() + + initialVal := []any{make([]byte, 32)} + encodedVals := marshalCorpusFile(initialVal...) + mem.setValue(encodedVals) + + ws.memMu <- mem + + b.ResetTimer() + for i := 0; i < b.N; i++ { + ws.m = newMutator() + mem.setValue(encodedVals) + mem.header().count = 0 + + ws.fuzz(context.Background(), fuzzArgs{Limit: 1}) + } +} + +// BenchmarkWorkerPing acts as the coordinator and measures the time it takes +// a worker to respond to N pings. This is a rough measure of our RPC latency. +func BenchmarkWorkerPing(b *testing.B) { + if race.Enabled { + b.Skip("TODO(48504): fix and re-enable") + } + b.SetParallelism(1) + w := newWorkerForTest(b) + for i := 0; i < b.N; i++ { + if err := w.client.ping(context.Background()); err != nil { + b.Fatal(err) + } + } +} + +// BenchmarkWorkerFuzz acts as the coordinator and measures the time it takes +// a worker to mutate a given input and call a trivial fuzz function N times. +func BenchmarkWorkerFuzz(b *testing.B) { + if race.Enabled { + b.Skip("TODO(48504): fix and re-enable") + } + b.SetParallelism(1) + w := newWorkerForTest(b) + entry := CorpusEntry{Values: []any{[]byte(nil)}} + entry.Data = marshalCorpusFile(entry.Values...) + for i := int64(0); i < int64(b.N); { + args := fuzzArgs{ + Limit: int64(b.N) - i, + Timeout: workerFuzzDuration, + } + _, resp, _, err := w.client.fuzz(context.Background(), entry, args) + if err != nil { + b.Fatal(err) + } + if resp.Err != "" { + b.Fatal(resp.Err) + } + if resp.Count == 0 { + b.Fatal("worker did not make progress") + } + i += resp.Count + } +} + +// newWorkerForTest creates and starts a worker process for testing or +// benchmarking. The worker process calls RunFuzzWorker, which responds to +// RPC messages until it's stopped. The process is stopped and cleaned up +// automatically when the test is done. +func newWorkerForTest(tb testing.TB) *worker { + tb.Helper() + c, err := newCoordinator(CoordinateFuzzingOpts{ + Types: []reflect.Type{reflect.TypeOf([]byte(nil))}, + Log: io.Discard, + }) + if err != nil { + tb.Fatal(err) + } + dir := "" // same as self + binPath := os.Args[0] // same as self + args := append(os.Args[1:], "-benchmarkworker") + env := os.Environ() // same as self + w, err := newWorker(c, dir, binPath, args, env) + if err != nil { + tb.Fatal(err) + } + tb.Cleanup(func() { + if err := w.cleanup(); err != nil { + tb.Error(err) + } + }) + if err := w.startAndPing(context.Background()); err != nil { + tb.Fatal(err) + } + tb.Cleanup(func() { + if err := w.stop(); err != nil { + tb.Error(err) + } + }) + return w +} + +func runBenchmarkWorker() { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + fn := func(CorpusEntry) error { return nil } + if err := RunFuzzWorker(ctx, fn); err != nil && err != ctx.Err() { + panic(err) + } +} + +func BenchmarkWorkerMinimize(b *testing.B) { + if race.Enabled { + b.Skip("TODO(48504): fix and re-enable") + } + + ws := &workerServer{ + workerComm: workerComm{memMu: make(chan *sharedMem, 1)}, + } + + mem, err := sharedMemTempFile(workerSharedMemSize) + if err != nil { + b.Fatalf("failed to create temporary shared memory file: %s", err) + } + defer func() { + if err := mem.Close(); err != nil { + b.Error(err) + } + }() + ws.memMu <- mem + + bytes := make([]byte, 1024) + ctx := context.Background() + for sz := 1; sz <= len(bytes); sz <<= 1 { + sz := sz + input := []any{bytes[:sz]} + encodedVals := marshalCorpusFile(input...) + mem = <-ws.memMu + mem.setValue(encodedVals) + ws.memMu <- mem + b.Run(strconv.Itoa(sz), func(b *testing.B) { + i := 0 + ws.fuzzFn = func(_ CorpusEntry) (time.Duration, error) { + if i == 0 { + i++ + return time.Second, errors.New("initial failure for deflake") + } + return time.Second, nil + } + for i := 0; i < b.N; i++ { + b.SetBytes(int64(sz)) + ws.minimize(ctx, minimizeArgs{}) + } + }) + } +} |