summaryrefslogtreecommitdiffstats
path: root/src/internal/fuzz/worker.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/internal/fuzz/worker.go')
-rw-r--r--src/internal/fuzz/worker.go1195
1 files changed, 1195 insertions, 0 deletions
diff --git a/src/internal/fuzz/worker.go b/src/internal/fuzz/worker.go
new file mode 100644
index 0000000..c952670
--- /dev/null
+++ b/src/internal/fuzz/worker.go
@@ -0,0 +1,1195 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fuzz
+
+import (
+ "bytes"
+ "context"
+ "crypto/sha256"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "os/exec"
+ "reflect"
+ "runtime"
+ "sync"
+ "time"
+)
+
+const (
+ // workerFuzzDuration is the amount of time a worker can spend testing random
+ // variations of an input given by the coordinator.
+ workerFuzzDuration = 100 * time.Millisecond
+
+ // workerTimeoutDuration is the amount of time a worker can go without
+ // responding to the coordinator before being stopped.
+ workerTimeoutDuration = 1 * time.Second
+
+ // workerExitCode is used as an exit code by fuzz worker processes after an internal error.
+ // This distinguishes internal errors from uncontrolled panics and other crashes.
+ // Keep in sync with internal/fuzz.workerExitCode.
+ workerExitCode = 70
+
+ // workerSharedMemSize is the maximum size of the shared memory file used to
+ // communicate with workers. This limits the size of fuzz inputs.
+ workerSharedMemSize = 100 << 20 // 100 MB
+)
+
+// worker manages a worker process running a test binary. The worker object
+// exists only in the coordinator (the process started by 'go test -fuzz').
+// workerClient is used by the coordinator to send RPCs to the worker process,
+// which handles them with workerServer.
+type worker struct {
+ dir string // working directory, same as package directory
+ binPath string // path to test executable
+ args []string // arguments for test executable
+ env []string // environment for test executable
+
+ coordinator *coordinator
+
+ memMu chan *sharedMem // mutex guarding shared memory with worker; persists across processes.
+
+ cmd *exec.Cmd // current worker process
+ client *workerClient // used to communicate with worker process
+ waitErr error // last error returned by wait, set before termC is closed.
+ interrupted bool // true after stop interrupts a running worker.
+ termC chan struct{} // closed by wait when worker process terminates
+}
+
+func newWorker(c *coordinator, dir, binPath string, args, env []string) (*worker, error) {
+ mem, err := sharedMemTempFile(workerSharedMemSize)
+ if err != nil {
+ return nil, err
+ }
+ memMu := make(chan *sharedMem, 1)
+ memMu <- mem
+ return &worker{
+ dir: dir,
+ binPath: binPath,
+ args: args,
+ env: env[:len(env):len(env)], // copy on append to ensure workers don't overwrite each other.
+ coordinator: c,
+ memMu: memMu,
+ }, nil
+}
+
+// cleanup releases persistent resources associated with the worker.
+func (w *worker) cleanup() error {
+ mem := <-w.memMu
+ if mem == nil {
+ return nil
+ }
+ close(w.memMu)
+ return mem.Close()
+}
+
+// coordinate runs the test binary to perform fuzzing.
+//
+// coordinate loops until ctx is cancelled or a fatal error is encountered.
+// If a test process terminates unexpectedly while fuzzing, coordinate will
+// attempt to restart and continue unless the termination can be attributed
+// to an interruption (from a timer or the user).
+//
+// While looping, coordinate receives inputs from the coordinator, passes
+// those inputs to the worker process, then passes the results back to
+// the coordinator.
+func (w *worker) coordinate(ctx context.Context) error {
+ // Main event loop.
+ for {
+ // Start or restart the worker if it's not running.
+ if !w.isRunning() {
+ if err := w.startAndPing(ctx); err != nil {
+ return err
+ }
+ }
+
+ select {
+ case <-ctx.Done():
+ // Worker was told to stop.
+ err := w.stop()
+ if err != nil && !w.interrupted && !isInterruptError(err) {
+ return err
+ }
+ return ctx.Err()
+
+ case <-w.termC:
+ // Worker process terminated unexpectedly while waiting for input.
+ err := w.stop()
+ if w.interrupted {
+ panic("worker interrupted after unexpected termination")
+ }
+ if err == nil || isInterruptError(err) {
+ // Worker stopped, either by exiting with status 0 or after being
+ // interrupted with a signal that was not sent by the coordinator.
+ //
+ // When the user presses ^C, on POSIX platforms, SIGINT is delivered to
+ // all processes in the group concurrently, and the worker may see it
+ // before the coordinator. The worker should exit 0 gracefully (in
+ // theory).
+ //
+ // This condition is probably intended by the user, so suppress
+ // the error.
+ return nil
+ }
+ if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == workerExitCode {
+ // Worker exited with a code indicating F.Fuzz was not called correctly,
+ // for example, F.Fail was called first.
+ return fmt.Errorf("fuzzing process exited unexpectedly due to an internal failure: %w", err)
+ }
+ // Worker exited non-zero or was terminated by a non-interrupt
+ // signal (for example, SIGSEGV) while fuzzing.
+ return fmt.Errorf("fuzzing process hung or terminated unexpectedly: %w", err)
+ // TODO(jayconrod,katiehockman): if -keepfuzzing, restart worker.
+
+ case input := <-w.coordinator.inputC:
+ // Received input from coordinator.
+ args := fuzzArgs{
+ Limit: input.limit,
+ Timeout: input.timeout,
+ Warmup: input.warmup,
+ CoverageData: input.coverageData,
+ }
+ entry, resp, isInternalError, err := w.client.fuzz(ctx, input.entry, args)
+ canMinimize := true
+ if err != nil {
+ // Error communicating with worker.
+ w.stop()
+ if ctx.Err() != nil {
+ // Timeout or interruption.
+ return ctx.Err()
+ }
+ if w.interrupted {
+ // Communication error before we stopped the worker.
+ // Report an error, but don't record a crasher.
+ return fmt.Errorf("communicating with fuzzing process: %v", err)
+ }
+ if sig, ok := terminationSignal(w.waitErr); ok && !isCrashSignal(sig) {
+ // Worker terminated by a signal that probably wasn't caused by a
+ // specific input to the fuzz function. For example, on Linux,
+ // the kernel (OOM killer) may send SIGKILL to a process using a lot
+ // of memory. Or the shell might send SIGHUP when the terminal
+ // is closed. Don't record a crasher.
+ return fmt.Errorf("fuzzing process terminated by unexpected signal; no crash will be recorded: %v", w.waitErr)
+ }
+ if isInternalError {
+ // An internal error occurred which shouldn't be considered
+ // a crash.
+ return err
+ }
+ // Unexpected termination. Set error message and fall through.
+ // We'll restart the worker on the next iteration.
+ // Don't attempt to minimize this since it crashed the worker.
+ resp.Err = fmt.Sprintf("fuzzing process hung or terminated unexpectedly: %v", w.waitErr)
+ canMinimize = false
+ }
+ result := fuzzResult{
+ limit: input.limit,
+ count: resp.Count,
+ totalDuration: resp.TotalDuration,
+ entryDuration: resp.InterestingDuration,
+ entry: entry,
+ crasherMsg: resp.Err,
+ coverageData: resp.CoverageData,
+ canMinimize: canMinimize,
+ }
+ w.coordinator.resultC <- result
+
+ case input := <-w.coordinator.minimizeC:
+ // Received input to minimize from coordinator.
+ result, err := w.minimize(ctx, input)
+ if err != nil {
+ // Error minimizing. Send back the original input. If it didn't cause
+ // an error before, report it as causing an error now.
+ // TODO: double-check this is handled correctly when
+ // implementing -keepfuzzing.
+ result = fuzzResult{
+ entry: input.entry,
+ crasherMsg: input.crasherMsg,
+ canMinimize: false,
+ limit: input.limit,
+ }
+ if result.crasherMsg == "" {
+ result.crasherMsg = err.Error()
+ }
+ }
+ if shouldPrintDebugInfo() {
+ w.coordinator.debugLogf(
+ "input minimized, id: %s, original id: %s, crasher: %t, originally crasher: %t, minimizing took: %s",
+ result.entry.Path,
+ input.entry.Path,
+ result.crasherMsg != "",
+ input.crasherMsg != "",
+ result.totalDuration,
+ )
+ }
+ w.coordinator.resultC <- result
+ }
+ }
+}
+
+// minimize tells a worker process to attempt to find a smaller value that
+// either causes an error (if we started minimizing because we found an input
+// that causes an error) or preserves new coverage (if we started minimizing
+// because we found an input that expands coverage).
+func (w *worker) minimize(ctx context.Context, input fuzzMinimizeInput) (min fuzzResult, err error) {
+ if w.coordinator.opts.MinimizeTimeout != 0 {
+ var cancel func()
+ ctx, cancel = context.WithTimeout(ctx, w.coordinator.opts.MinimizeTimeout)
+ defer cancel()
+ }
+
+ args := minimizeArgs{
+ Limit: input.limit,
+ Timeout: input.timeout,
+ KeepCoverage: input.keepCoverage,
+ }
+ entry, resp, err := w.client.minimize(ctx, input.entry, args)
+ if err != nil {
+ // Error communicating with worker.
+ w.stop()
+ if ctx.Err() != nil || w.interrupted || isInterruptError(w.waitErr) {
+ // Worker was interrupted, possibly by the user pressing ^C.
+ // Normally, workers can handle interrupts and timeouts gracefully and
+ // will return without error. An error here indicates the worker
+ // may not have been in a good state, but the error won't be meaningful
+ // to the user. Just return the original crasher without logging anything.
+ return fuzzResult{
+ entry: input.entry,
+ crasherMsg: input.crasherMsg,
+ coverageData: input.keepCoverage,
+ canMinimize: false,
+ limit: input.limit,
+ }, nil
+ }
+ return fuzzResult{
+ entry: entry,
+ crasherMsg: fmt.Sprintf("fuzzing process hung or terminated unexpectedly while minimizing: %v", err),
+ canMinimize: false,
+ limit: input.limit,
+ count: resp.Count,
+ totalDuration: resp.Duration,
+ }, nil
+ }
+
+ if input.crasherMsg != "" && resp.Err == "" {
+ return fuzzResult{}, fmt.Errorf("attempted to minimize a crash but could not reproduce")
+ }
+
+ return fuzzResult{
+ entry: entry,
+ crasherMsg: resp.Err,
+ coverageData: resp.CoverageData,
+ canMinimize: false,
+ limit: input.limit,
+ count: resp.Count,
+ totalDuration: resp.Duration,
+ }, nil
+}
+
+func (w *worker) isRunning() bool {
+ return w.cmd != nil
+}
+
+// startAndPing starts the worker process and sends it a message to make sure it
+// can communicate.
+//
+// startAndPing returns an error if any part of this didn't work, including if
+// the context is expired or the worker process was interrupted before it
+// responded. Errors that happen after start but before the ping response
+// likely indicate that the worker did not call F.Fuzz or called F.Fail first.
+// We don't record crashers for these errors.
+func (w *worker) startAndPing(ctx context.Context) error {
+ if ctx.Err() != nil {
+ return ctx.Err()
+ }
+ if err := w.start(); err != nil {
+ return err
+ }
+ if err := w.client.ping(ctx); err != nil {
+ w.stop()
+ if ctx.Err() != nil {
+ return ctx.Err()
+ }
+ if isInterruptError(err) {
+ // User may have pressed ^C before worker responded.
+ return err
+ }
+ // TODO: record and return stderr.
+ return fmt.Errorf("fuzzing process terminated without fuzzing: %w", err)
+ }
+ return nil
+}
+
+// start runs a new worker process.
+//
+// If the process couldn't be started, start returns an error. Start won't
+// return later termination errors from the process if they occur.
+//
+// If the process starts successfully, start returns nil. stop must be called
+// once later to clean up, even if the process terminates on its own.
+//
+// When the process terminates, w.waitErr is set to the error (if any), and
+// w.termC is closed.
+func (w *worker) start() (err error) {
+ if w.isRunning() {
+ panic("worker already started")
+ }
+ w.waitErr = nil
+ w.interrupted = false
+ w.termC = nil
+
+ cmd := exec.Command(w.binPath, w.args...)
+ cmd.Dir = w.dir
+ cmd.Env = w.env[:len(w.env):len(w.env)] // copy on append to ensure workers don't overwrite each other.
+
+ // Create the "fuzz_in" and "fuzz_out" pipes so we can communicate with
+ // the worker. We don't use stdin and stdout, since the test binary may
+ // do something else with those.
+ //
+ // Each pipe has a reader and a writer. The coordinator writes to fuzzInW
+ // and reads from fuzzOutR. The worker inherits fuzzInR and fuzzOutW.
+ // The coordinator closes fuzzInR and fuzzOutW after starting the worker,
+ // since we have no further need of them.
+ fuzzInR, fuzzInW, err := os.Pipe()
+ if err != nil {
+ return err
+ }
+ defer fuzzInR.Close()
+ fuzzOutR, fuzzOutW, err := os.Pipe()
+ if err != nil {
+ fuzzInW.Close()
+ return err
+ }
+ defer fuzzOutW.Close()
+ setWorkerComm(cmd, workerComm{fuzzIn: fuzzInR, fuzzOut: fuzzOutW, memMu: w.memMu})
+
+ // Start the worker process.
+ if err := cmd.Start(); err != nil {
+ fuzzInW.Close()
+ fuzzOutR.Close()
+ return err
+ }
+
+ // Worker started successfully.
+ // After this, w.client owns fuzzInW and fuzzOutR, so w.client.Close must be
+ // called later by stop.
+ w.cmd = cmd
+ w.termC = make(chan struct{})
+ comm := workerComm{fuzzIn: fuzzInW, fuzzOut: fuzzOutR, memMu: w.memMu}
+ m := newMutator()
+ w.client = newWorkerClient(comm, m)
+
+ go func() {
+ w.waitErr = w.cmd.Wait()
+ close(w.termC)
+ }()
+
+ return nil
+}
+
+// stop tells the worker process to exit by closing w.client, then blocks until
+// it terminates. If the worker doesn't terminate after a short time, stop
+// signals it with os.Interrupt (where supported), then os.Kill.
+//
+// stop returns the error the process terminated with, if any (same as
+// w.waitErr).
+//
+// stop must be called at least once after start returns successfully, even if
+// the worker process terminates unexpectedly.
+func (w *worker) stop() error {
+ if w.termC == nil {
+ panic("worker was not started successfully")
+ }
+ select {
+ case <-w.termC:
+ // Worker already terminated.
+ if w.client == nil {
+ // stop already called.
+ return w.waitErr
+ }
+ // Possible unexpected termination.
+ w.client.Close()
+ w.cmd = nil
+ w.client = nil
+ return w.waitErr
+ default:
+ // Worker still running.
+ }
+
+ // Tell the worker to stop by closing fuzz_in. It won't actually stop until it
+ // finishes with earlier calls.
+ closeC := make(chan struct{})
+ go func() {
+ w.client.Close()
+ close(closeC)
+ }()
+
+ sig := os.Interrupt
+ if runtime.GOOS == "windows" {
+ // Per https://golang.org/pkg/os/#Signal, “Interrupt is not implemented on
+ // Windows; using it with os.Process.Signal will return an error.”
+ // Fall back to Kill instead.
+ sig = os.Kill
+ }
+
+ t := time.NewTimer(workerTimeoutDuration)
+ for {
+ select {
+ case <-w.termC:
+ // Worker terminated.
+ t.Stop()
+ <-closeC
+ w.cmd = nil
+ w.client = nil
+ return w.waitErr
+
+ case <-t.C:
+ // Timer fired before worker terminated.
+ w.interrupted = true
+ switch sig {
+ case os.Interrupt:
+ // Try to stop the worker with SIGINT and wait a little longer.
+ w.cmd.Process.Signal(sig)
+ sig = os.Kill
+ t.Reset(workerTimeoutDuration)
+
+ case os.Kill:
+ // Try to stop the worker with SIGKILL and keep waiting.
+ w.cmd.Process.Signal(sig)
+ sig = nil
+ t.Reset(workerTimeoutDuration)
+
+ case nil:
+ // Still waiting. Print a message to let the user know why.
+ fmt.Fprintf(w.coordinator.opts.Log, "waiting for fuzzing process to terminate...\n")
+ }
+ }
+ }
+}
+
+// RunFuzzWorker is called in a worker process to communicate with the
+// coordinator process in order to fuzz random inputs. RunFuzzWorker loops
+// until the coordinator tells it to stop.
+//
+// fn is a wrapper on the fuzz function. It may return an error to indicate
+// a given input "crashed". The coordinator will also record a crasher if
+// the function times out or terminates the process.
+//
+// RunFuzzWorker returns an error if it could not communicate with the
+// coordinator process.
+func RunFuzzWorker(ctx context.Context, fn func(CorpusEntry) error) error {
+ comm, err := getWorkerComm()
+ if err != nil {
+ return err
+ }
+ srv := &workerServer{
+ workerComm: comm,
+ fuzzFn: func(e CorpusEntry) (time.Duration, error) {
+ timer := time.AfterFunc(10*time.Second, func() {
+ panic("deadlocked!") // this error message won't be printed
+ })
+ defer timer.Stop()
+ start := time.Now()
+ err := fn(e)
+ return time.Since(start), err
+ },
+ m: newMutator(),
+ }
+ return srv.serve(ctx)
+}
+
+// call is serialized and sent from the coordinator on fuzz_in. It acts as
+// a minimalist RPC mechanism. Exactly one of its fields must be set to indicate
+// which method to call.
+type call struct {
+ Ping *pingArgs
+ Fuzz *fuzzArgs
+ Minimize *minimizeArgs
+}
+
+// minimizeArgs contains arguments to workerServer.minimize. The value to
+// minimize is already in shared memory.
+type minimizeArgs struct {
+ // Timeout is the time to spend minimizing. This may include time to start up,
+ // especially if the input causes the worker process to terminated, requiring
+ // repeated restarts.
+ Timeout time.Duration
+
+ // Limit is the maximum number of values to test, without spending more time
+ // than Duration. 0 indicates no limit.
+ Limit int64
+
+ // KeepCoverage is a set of coverage counters the worker should attempt to
+ // keep in minimized values. When provided, the worker will reject inputs that
+ // don't cause at least one of these bits to be set.
+ KeepCoverage []byte
+
+ // Index is the index of the fuzz target parameter to be minimized.
+ Index int
+}
+
+// minimizeResponse contains results from workerServer.minimize.
+type minimizeResponse struct {
+ // WroteToMem is true if the worker found a smaller input and wrote it to
+ // shared memory. If minimizeArgs.KeepCoverage was set, the minimized input
+ // preserved at least one coverage bit and did not cause an error.
+ // Otherwise, the minimized input caused some error, recorded in Err.
+ WroteToMem bool
+
+ // Err is the error string caused by the value in shared memory, if any.
+ Err string
+
+ // CoverageData is the set of coverage bits activated by the minimized value
+ // in shared memory. When set, it contains at least one bit from KeepCoverage.
+ // CoverageData will be nil if Err is set or if minimization failed.
+ CoverageData []byte
+
+ // Duration is the time spent minimizing, not including starting or cleaning up.
+ Duration time.Duration
+
+ // Count is the number of values tested.
+ Count int64
+}
+
+// fuzzArgs contains arguments to workerServer.fuzz. The value to fuzz is
+// passed in shared memory.
+type fuzzArgs struct {
+ // Timeout is the time to spend fuzzing, not including starting or
+ // cleaning up.
+ Timeout time.Duration
+
+ // Limit is the maximum number of values to test, without spending more time
+ // than Duration. 0 indicates no limit.
+ Limit int64
+
+ // Warmup indicates whether this is part of a warmup run, meaning that
+ // fuzzing should not occur. If coverageEnabled is true, then coverage data
+ // should be reported.
+ Warmup bool
+
+ // CoverageData is the coverage data. If set, the worker should update its
+ // local coverage data prior to fuzzing.
+ CoverageData []byte
+}
+
+// fuzzResponse contains results from workerServer.fuzz.
+type fuzzResponse struct {
+ // Duration is the time spent fuzzing, not including starting or cleaning up.
+ TotalDuration time.Duration
+ InterestingDuration time.Duration
+
+ // Count is the number of values tested.
+ Count int64
+
+ // CoverageData is set if the value in shared memory expands coverage
+ // and therefore may be interesting to the coordinator.
+ CoverageData []byte
+
+ // Err is the error string caused by the value in shared memory, which is
+ // non-empty if the value in shared memory caused a crash.
+ Err string
+
+ // InternalErr is the error string caused by an internal error in the
+ // worker. This shouldn't be considered a crasher.
+ InternalErr string
+}
+
+// pingArgs contains arguments to workerServer.ping.
+type pingArgs struct{}
+
+// pingResponse contains results from workerServer.ping.
+type pingResponse struct{}
+
+// workerComm holds pipes and shared memory used for communication
+// between the coordinator process (client) and a worker process (server).
+// These values are unique to each worker; they are shared only with the
+// coordinator, not with other workers.
+//
+// Access to shared memory is synchronized implicitly over the RPC protocol
+// implemented in workerServer and workerClient. During a call, the client
+// (worker) has exclusive access to shared memory; at other times, the server
+// (coordinator) has exclusive access.
+type workerComm struct {
+ fuzzIn, fuzzOut *os.File
+ memMu chan *sharedMem // mutex guarding shared memory
+}
+
+// workerServer is a minimalist RPC server, run by fuzz worker processes.
+// It allows the coordinator process (using workerClient) to call methods in a
+// worker process. This system allows the coordinator to run multiple worker
+// processes in parallel and to collect inputs that caused crashes from shared
+// memory after a worker process terminates unexpectedly.
+type workerServer struct {
+ workerComm
+ m *mutator
+
+ // coverageMask is the local coverage data for the worker. It is
+ // periodically updated to reflect the data in the coordinator when new
+ // coverage is found.
+ coverageMask []byte
+
+ // fuzzFn runs the worker's fuzz target on the given input and returns an
+ // error if it finds a crasher (the process may also exit or crash), and the
+ // time it took to run the input. It sets a deadline of 10 seconds, at which
+ // point it will panic with the assumption that the process is hanging or
+ // deadlocked.
+ fuzzFn func(CorpusEntry) (time.Duration, error)
+}
+
+// serve reads serialized RPC messages on fuzzIn. When serve receives a message,
+// it calls the corresponding method, then sends the serialized result back
+// on fuzzOut.
+//
+// serve handles RPC calls synchronously; it will not attempt to read a message
+// until the previous call has finished.
+//
+// serve returns errors that occurred when communicating over pipes. serve
+// does not return errors from method calls; those are passed through serialized
+// responses.
+func (ws *workerServer) serve(ctx context.Context) error {
+ enc := json.NewEncoder(ws.fuzzOut)
+ dec := json.NewDecoder(&contextReader{ctx: ctx, r: ws.fuzzIn})
+ for {
+ var c call
+ if err := dec.Decode(&c); err != nil {
+ if err == io.EOF || err == ctx.Err() {
+ return nil
+ } else {
+ return err
+ }
+ }
+
+ var resp any
+ switch {
+ case c.Fuzz != nil:
+ resp = ws.fuzz(ctx, *c.Fuzz)
+ case c.Minimize != nil:
+ resp = ws.minimize(ctx, *c.Minimize)
+ case c.Ping != nil:
+ resp = ws.ping(ctx, *c.Ping)
+ default:
+ return errors.New("no arguments provided for any call")
+ }
+
+ if err := enc.Encode(resp); err != nil {
+ return err
+ }
+ }
+}
+
+// chainedMutations is how many mutations are applied before the worker
+// resets the input to it's original state.
+// NOTE: this number was picked without much thought. It is low enough that
+// it seems to create a significant diversity in mutated inputs. We may want
+// to consider looking into this more closely once we have a proper performance
+// testing framework. Another option is to randomly pick the number of chained
+// mutations on each invocation of the workerServer.fuzz method (this appears to
+// be what libFuzzer does, although there seems to be no documentation which
+// explains why this choice was made.)
+const chainedMutations = 5
+
+// fuzz runs the test function on random variations of the input value in shared
+// memory for a limited duration or number of iterations.
+//
+// fuzz returns early if it finds an input that crashes the fuzz function (with
+// fuzzResponse.Err set) or an input that expands coverage (with
+// fuzzResponse.InterestingDuration set).
+//
+// fuzz does not modify the input in shared memory. Instead, it saves the
+// initial PRNG state in shared memory and increments a counter in shared
+// memory before each call to the test function. The caller may reconstruct
+// the crashing input with this information, since the PRNG is deterministic.
+func (ws *workerServer) fuzz(ctx context.Context, args fuzzArgs) (resp fuzzResponse) {
+ if args.CoverageData != nil {
+ if ws.coverageMask != nil && len(args.CoverageData) != len(ws.coverageMask) {
+ resp.InternalErr = fmt.Sprintf("unexpected size for CoverageData: got %d, expected %d", len(args.CoverageData), len(ws.coverageMask))
+ return resp
+ }
+ ws.coverageMask = args.CoverageData
+ }
+ start := time.Now()
+ defer func() { resp.TotalDuration = time.Since(start) }()
+
+ if args.Timeout != 0 {
+ var cancel func()
+ ctx, cancel = context.WithTimeout(ctx, args.Timeout)
+ defer cancel()
+ }
+ mem := <-ws.memMu
+ ws.m.r.save(&mem.header().randState, &mem.header().randInc)
+ defer func() {
+ resp.Count = mem.header().count
+ ws.memMu <- mem
+ }()
+ if args.Limit > 0 && mem.header().count >= args.Limit {
+ resp.InternalErr = fmt.Sprintf("mem.header().count %d already exceeds args.Limit %d", mem.header().count, args.Limit)
+ return resp
+ }
+
+ originalVals, err := unmarshalCorpusFile(mem.valueCopy())
+ if err != nil {
+ resp.InternalErr = err.Error()
+ return resp
+ }
+ vals := make([]any, len(originalVals))
+ copy(vals, originalVals)
+
+ shouldStop := func() bool {
+ return args.Limit > 0 && mem.header().count >= args.Limit
+ }
+ fuzzOnce := func(entry CorpusEntry) (dur time.Duration, cov []byte, errMsg string) {
+ mem.header().count++
+ var err error
+ dur, err = ws.fuzzFn(entry)
+ if err != nil {
+ errMsg = err.Error()
+ if errMsg == "" {
+ errMsg = "fuzz function failed with no input"
+ }
+ return dur, nil, errMsg
+ }
+ if ws.coverageMask != nil && countNewCoverageBits(ws.coverageMask, coverageSnapshot) > 0 {
+ return dur, coverageSnapshot, ""
+ }
+ return dur, nil, ""
+ }
+
+ if args.Warmup {
+ dur, _, errMsg := fuzzOnce(CorpusEntry{Values: vals})
+ if errMsg != "" {
+ resp.Err = errMsg
+ return resp
+ }
+ resp.InterestingDuration = dur
+ if coverageEnabled {
+ resp.CoverageData = coverageSnapshot
+ }
+ return resp
+ }
+
+ for {
+ select {
+ case <-ctx.Done():
+ return resp
+ default:
+ if mem.header().count%chainedMutations == 0 {
+ copy(vals, originalVals)
+ ws.m.r.save(&mem.header().randState, &mem.header().randInc)
+ }
+ ws.m.mutate(vals, cap(mem.valueRef()))
+
+ entry := CorpusEntry{Values: vals}
+ dur, cov, errMsg := fuzzOnce(entry)
+ if errMsg != "" {
+ resp.Err = errMsg
+ return resp
+ }
+ if cov != nil {
+ resp.CoverageData = cov
+ resp.InterestingDuration = dur
+ return resp
+ }
+ if shouldStop() {
+ return resp
+ }
+ }
+ }
+}
+
+func (ws *workerServer) minimize(ctx context.Context, args minimizeArgs) (resp minimizeResponse) {
+ start := time.Now()
+ defer func() { resp.Duration = time.Since(start) }()
+ mem := <-ws.memMu
+ defer func() { ws.memMu <- mem }()
+ vals, err := unmarshalCorpusFile(mem.valueCopy())
+ if err != nil {
+ panic(err)
+ }
+ inpHash := sha256.Sum256(mem.valueCopy())
+ if args.Timeout != 0 {
+ var cancel func()
+ ctx, cancel = context.WithTimeout(ctx, args.Timeout)
+ defer cancel()
+ }
+
+ // Minimize the values in vals, then write to shared memory. We only write
+ // to shared memory after completing minimization.
+ success, err := ws.minimizeInput(ctx, vals, mem, args)
+ if success {
+ writeToMem(vals, mem)
+ outHash := sha256.Sum256(mem.valueCopy())
+ mem.header().rawInMem = false
+ resp.WroteToMem = true
+ if err != nil {
+ resp.Err = err.Error()
+ } else {
+ // If the values didn't change during minimization then coverageSnapshot is likely
+ // a dirty snapshot which represents the very last step of minimization, not the
+ // coverage for the initial input. In that case just return the coverage we were
+ // given initially, since it more accurately represents the coverage map for the
+ // input we are returning.
+ if outHash != inpHash {
+ resp.CoverageData = coverageSnapshot
+ } else {
+ resp.CoverageData = args.KeepCoverage
+ }
+ }
+ }
+ return resp
+}
+
+// minimizeInput applies a series of minimizing transformations on the provided
+// vals, ensuring that each minimization still causes an error, or keeps
+// coverage, in fuzzFn. It uses the context to determine how long to run,
+// stopping once closed. It returns a bool indicating whether minimization was
+// successful and an error if one was found.
+func (ws *workerServer) minimizeInput(ctx context.Context, vals []any, mem *sharedMem, args minimizeArgs) (success bool, retErr error) {
+ keepCoverage := args.KeepCoverage
+ memBytes := mem.valueRef()
+ bPtr := &memBytes
+ count := &mem.header().count
+ shouldStop := func() bool {
+ return ctx.Err() != nil ||
+ (args.Limit > 0 && *count >= args.Limit)
+ }
+ if shouldStop() {
+ return false, nil
+ }
+
+ // Check that the original value preserves coverage or causes an error.
+ // If not, then whatever caused us to think the value was interesting may
+ // have been a flake, and we can't minimize it.
+ *count++
+ _, retErr = ws.fuzzFn(CorpusEntry{Values: vals})
+ if keepCoverage != nil {
+ if !hasCoverageBit(keepCoverage, coverageSnapshot) || retErr != nil {
+ return false, nil
+ }
+ } else if retErr == nil {
+ return false, nil
+ }
+ mem.header().rawInMem = true
+
+ // tryMinimized runs the fuzz function with candidate replacing the value
+ // at index valI. tryMinimized returns whether the input with candidate is
+ // interesting for the same reason as the original input: it returns
+ // an error if one was expected, or it preserves coverage.
+ tryMinimized := func(candidate []byte) bool {
+ prev := vals[args.Index]
+ switch prev.(type) {
+ case []byte:
+ vals[args.Index] = candidate
+ case string:
+ vals[args.Index] = string(candidate)
+ default:
+ panic("impossible")
+ }
+ copy(*bPtr, candidate)
+ *bPtr = (*bPtr)[:len(candidate)]
+ mem.setValueLen(len(candidate))
+ *count++
+ _, err := ws.fuzzFn(CorpusEntry{Values: vals})
+ if err != nil {
+ retErr = err
+ if keepCoverage != nil {
+ // Now that we've found a crash, that's more important than any
+ // minimization of interesting inputs that was being done. Clear out
+ // keepCoverage to only minimize the crash going forward.
+ keepCoverage = nil
+ }
+ return true
+ }
+ // Minimization should preserve coverage bits.
+ if keepCoverage != nil && isCoverageSubset(keepCoverage, coverageSnapshot) {
+ return true
+ }
+ vals[args.Index] = prev
+ return false
+ }
+ switch v := vals[args.Index].(type) {
+ case string:
+ minimizeBytes([]byte(v), tryMinimized, shouldStop)
+ case []byte:
+ minimizeBytes(v, tryMinimized, shouldStop)
+ default:
+ panic("impossible")
+ }
+ return true, retErr
+}
+
+func writeToMem(vals []any, mem *sharedMem) {
+ b := marshalCorpusFile(vals...)
+ mem.setValue(b)
+}
+
+// ping does nothing. The coordinator calls this method to ensure the worker
+// has called F.Fuzz and can communicate.
+func (ws *workerServer) ping(ctx context.Context, args pingArgs) pingResponse {
+ return pingResponse{}
+}
+
+// workerClient is a minimalist RPC client. The coordinator process uses a
+// workerClient to call methods in each worker process (handled by
+// workerServer).
+type workerClient struct {
+ workerComm
+ m *mutator
+
+ // mu is the mutex protecting the workerComm.fuzzIn pipe. This must be
+ // locked before making calls to the workerServer. It prevents
+ // workerClient.Close from closing fuzzIn while workerClient methods are
+ // writing to it concurrently, and prevents multiple callers from writing to
+ // fuzzIn concurrently.
+ mu sync.Mutex
+}
+
+func newWorkerClient(comm workerComm, m *mutator) *workerClient {
+ return &workerClient{workerComm: comm, m: m}
+}
+
+// Close shuts down the connection to the RPC server (the worker process) by
+// closing fuzz_in. Close drains fuzz_out (avoiding a SIGPIPE in the worker),
+// and closes it after the worker process closes the other end.
+func (wc *workerClient) Close() error {
+ wc.mu.Lock()
+ defer wc.mu.Unlock()
+
+ // Close fuzzIn. This signals to the server that there are no more calls,
+ // and it should exit.
+ if err := wc.fuzzIn.Close(); err != nil {
+ wc.fuzzOut.Close()
+ return err
+ }
+
+ // Drain fuzzOut and close it. When the server exits, the kernel will close
+ // its end of fuzzOut, and we'll get EOF.
+ if _, err := io.Copy(io.Discard, wc.fuzzOut); err != nil {
+ wc.fuzzOut.Close()
+ return err
+ }
+ return wc.fuzzOut.Close()
+}
+
+// errSharedMemClosed is returned by workerClient methods that cannot access
+// shared memory because it was closed and unmapped by another goroutine. That
+// can happen when worker.cleanup is called in the worker goroutine while a
+// workerClient.fuzz call runs concurrently.
+//
+// This error should not be reported. It indicates the operation was
+// interrupted.
+var errSharedMemClosed = errors.New("internal error: shared memory was closed and unmapped")
+
+// minimize tells the worker to call the minimize method. See
+// workerServer.minimize.
+func (wc *workerClient) minimize(ctx context.Context, entryIn CorpusEntry, args minimizeArgs) (entryOut CorpusEntry, resp minimizeResponse, retErr error) {
+ wc.mu.Lock()
+ defer wc.mu.Unlock()
+
+ mem, ok := <-wc.memMu
+ if !ok {
+ return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed
+ }
+ defer func() { wc.memMu <- mem }()
+ mem.header().count = 0
+ inp, err := corpusEntryData(entryIn)
+ if err != nil {
+ return CorpusEntry{}, minimizeResponse{}, err
+ }
+ mem.setValue(inp)
+ entryOut = entryIn
+ entryOut.Values, err = unmarshalCorpusFile(inp)
+ if err != nil {
+ return CorpusEntry{}, minimizeResponse{}, fmt.Errorf("workerClient.minimize unmarshaling provided value: %v", err)
+ }
+ for i, v := range entryOut.Values {
+ if !isMinimizable(reflect.TypeOf(v)) {
+ continue
+ }
+
+ wc.memMu <- mem
+ args.Index = i
+ c := call{Minimize: &args}
+ callErr := wc.callLocked(ctx, c, &resp)
+ mem, ok = <-wc.memMu
+ if !ok {
+ return CorpusEntry{}, minimizeResponse{}, errSharedMemClosed
+ }
+
+ if callErr != nil {
+ retErr = callErr
+ if !mem.header().rawInMem {
+ // An unrecoverable error occurred before minimization began.
+ return entryIn, minimizeResponse{}, retErr
+ }
+ // An unrecoverable error occurred during minimization. mem now
+ // holds the raw, unmarshalled bytes of entryIn.Values[i] that
+ // caused the error.
+ switch entryOut.Values[i].(type) {
+ case string:
+ entryOut.Values[i] = string(mem.valueCopy())
+ case []byte:
+ entryOut.Values[i] = mem.valueCopy()
+ default:
+ panic("impossible")
+ }
+ entryOut.Data = marshalCorpusFile(entryOut.Values...)
+ // Stop minimizing; another unrecoverable error is likely to occur.
+ break
+ }
+
+ if resp.WroteToMem {
+ // Minimization succeeded, and mem holds the marshaled data.
+ entryOut.Data = mem.valueCopy()
+ entryOut.Values, err = unmarshalCorpusFile(entryOut.Data)
+ if err != nil {
+ return CorpusEntry{}, minimizeResponse{}, fmt.Errorf("workerClient.minimize unmarshaling minimized value: %v", err)
+ }
+ }
+
+ // Prepare for next iteration of the loop.
+ if args.Timeout != 0 {
+ args.Timeout -= resp.Duration
+ if args.Timeout <= 0 {
+ break
+ }
+ }
+ if args.Limit != 0 {
+ args.Limit -= mem.header().count
+ if args.Limit <= 0 {
+ break
+ }
+ }
+ }
+ resp.Count = mem.header().count
+ h := sha256.Sum256(entryOut.Data)
+ entryOut.Path = fmt.Sprintf("%x", h[:4])
+ return entryOut, resp, retErr
+}
+
+// fuzz tells the worker to call the fuzz method. See workerServer.fuzz.
+func (wc *workerClient) fuzz(ctx context.Context, entryIn CorpusEntry, args fuzzArgs) (entryOut CorpusEntry, resp fuzzResponse, isInternalError bool, err error) {
+ wc.mu.Lock()
+ defer wc.mu.Unlock()
+
+ mem, ok := <-wc.memMu
+ if !ok {
+ return CorpusEntry{}, fuzzResponse{}, true, errSharedMemClosed
+ }
+ mem.header().count = 0
+ inp, err := corpusEntryData(entryIn)
+ if err != nil {
+ wc.memMu <- mem
+ return CorpusEntry{}, fuzzResponse{}, true, err
+ }
+ mem.setValue(inp)
+ wc.memMu <- mem
+
+ c := call{Fuzz: &args}
+ callErr := wc.callLocked(ctx, c, &resp)
+ if resp.InternalErr != "" {
+ return CorpusEntry{}, fuzzResponse{}, true, errors.New(resp.InternalErr)
+ }
+ mem, ok = <-wc.memMu
+ if !ok {
+ return CorpusEntry{}, fuzzResponse{}, true, errSharedMemClosed
+ }
+ defer func() { wc.memMu <- mem }()
+ resp.Count = mem.header().count
+
+ if !bytes.Equal(inp, mem.valueRef()) {
+ return CorpusEntry{}, fuzzResponse{}, true, errors.New("workerServer.fuzz modified input")
+ }
+ needEntryOut := callErr != nil || resp.Err != "" ||
+ (!args.Warmup && resp.CoverageData != nil)
+ if needEntryOut {
+ valuesOut, err := unmarshalCorpusFile(inp)
+ if err != nil {
+ return CorpusEntry{}, fuzzResponse{}, true, fmt.Errorf("unmarshaling fuzz input value after call: %v", err)
+ }
+ wc.m.r.restore(mem.header().randState, mem.header().randInc)
+ if !args.Warmup {
+ // Only mutate the valuesOut if fuzzing actually occurred.
+ numMutations := ((resp.Count - 1) % chainedMutations) + 1
+ for i := int64(0); i < numMutations; i++ {
+ wc.m.mutate(valuesOut, cap(mem.valueRef()))
+ }
+ }
+ dataOut := marshalCorpusFile(valuesOut...)
+
+ h := sha256.Sum256(dataOut)
+ name := fmt.Sprintf("%x", h[:4])
+ entryOut = CorpusEntry{
+ Parent: entryIn.Path,
+ Path: name,
+ Data: dataOut,
+ Generation: entryIn.Generation + 1,
+ }
+ if args.Warmup {
+ // The bytes weren't mutated, so if entryIn was a seed corpus value,
+ // then entryOut is too.
+ entryOut.IsSeed = entryIn.IsSeed
+ }
+ }
+
+ return entryOut, resp, false, callErr
+}
+
+// ping tells the worker to call the ping method. See workerServer.ping.
+func (wc *workerClient) ping(ctx context.Context) error {
+ wc.mu.Lock()
+ defer wc.mu.Unlock()
+ c := call{Ping: &pingArgs{}}
+ var resp pingResponse
+ return wc.callLocked(ctx, c, &resp)
+}
+
+// callLocked sends an RPC from the coordinator to the worker process and waits
+// for the response. The callLocked may be cancelled with ctx.
+func (wc *workerClient) callLocked(ctx context.Context, c call, resp any) (err error) {
+ enc := json.NewEncoder(wc.fuzzIn)
+ dec := json.NewDecoder(&contextReader{ctx: ctx, r: wc.fuzzOut})
+ if err := enc.Encode(c); err != nil {
+ return err
+ }
+ return dec.Decode(resp)
+}
+
+// contextReader wraps a Reader with a Context. If the context is cancelled
+// while the underlying reader is blocked, Read returns immediately.
+//
+// This is useful for reading from a pipe. Closing a pipe file descriptor does
+// not unblock pending Reads on that file descriptor. All copies of the pipe's
+// other file descriptor (the write end) must be closed in all processes that
+// inherit it. This is difficult to do correctly in the situation we care about
+// (process group termination).
+type contextReader struct {
+ ctx context.Context
+ r io.Reader
+}
+
+func (cr *contextReader) Read(b []byte) (int, error) {
+ if ctxErr := cr.ctx.Err(); ctxErr != nil {
+ return 0, ctxErr
+ }
+ done := make(chan struct{})
+
+ // This goroutine may stay blocked after Read returns because the underlying
+ // read is blocked.
+ var n int
+ var err error
+ go func() {
+ n, err = cr.r.Read(b)
+ close(done)
+ }()
+
+ select {
+ case <-cr.ctx.Done():
+ return 0, cr.ctx.Err()
+ case <-done:
+ return n, err
+ }
+}