Adding upstream version 1.19.8.upstream/1.19.8 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 13:18:25 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 13:18:25 +0000
commit: 109be507377fe7f6e8819ac94041d3fdcdf6fd2f (patch)
tree: 2806a689f8fab4a2ec9fc949830ef270a91d667d /src/internal/fuzz/fuzz.go
parent: Initial commit. (diff)
download: golang-1.19-109be507377fe7f6e8819ac94041d3fdcdf6fd2f.tar.xz
golang-1.19-109be507377fe7f6e8819ac94041d3fdcdf6fd2f.zip
1 files changed, 1091 insertions, 0 deletions
diff --git a/src/internal/fuzz/fuzz.go b/src/internal/fuzz/fuzz.go
new file mode 100644
index 0000000..3ccf747
--- /dev/null
+++ b/src/internal/fuzz/fuzz.go
@@ -0,0 +1,1091 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fuzz provides common fuzzing functionality for tests built with
+// "go test" and for programs that use fuzzing functionality in the testing
+// package.
+package fuzz
+
+import (
+	"context"
+	"crypto/sha256"
+	"errors"
+	"fmt"
+	"internal/godebug"
+	"io"
+	"io/ioutil"
+	"math/bits"
+	"os"
+	"path/filepath"
+	"reflect"
+	"runtime"
+	"strings"
+	"sync"
+	"time"
+)
+
+// CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing.
+// The zero value is valid for each field unless specified otherwise.
+type CoordinateFuzzingOpts struct {
+	// Log is a writer for logging progress messages and warnings.
+	// If nil, io.Discard will be used instead.
+	Log io.Writer
+
+	// Timeout is the amount of wall clock time to spend fuzzing after the corpus
+	// has loaded. If zero, there will be no time limit.
+	Timeout time.Duration
+
+	// Limit is the number of random values to generate and test. If zero,
+	// there will be no limit on the number of generated values.
+	Limit int64
+
+	// MinimizeTimeout is the amount of wall clock time to spend minimizing
+	// after discovering a crasher. If zero, there will be no time limit. If
+	// MinimizeTimeout and MinimizeLimit are both zero, then minimization will
+	// be disabled.
+	MinimizeTimeout time.Duration
+
+	// MinimizeLimit is the maximum number of calls to the fuzz function to be
+	// made while minimizing after finding a crash. If zero, there will be no
+	// limit. Calls to the fuzz function made when minimizing also count toward
+	// Limit. If MinimizeTimeout and MinimizeLimit are both zero, then
+	// minimization will be disabled.
+	MinimizeLimit int64
+
+	// parallel is the number of worker processes to run in parallel. If zero,
+	// CoordinateFuzzing will run GOMAXPROCS workers.
+	Parallel int
+
+	// Seed is a list of seed values added by the fuzz target with testing.F.Add
+	// and in testdata.
+	Seed []CorpusEntry
+
+	// Types is the list of types which make up a corpus entry.
+	// Types must be set and must match values in Seed.
+	Types []reflect.Type
+
+	// CorpusDir is a directory where files containing values that crash the
+	// code being tested may be written. CorpusDir must be set.
+	CorpusDir string
+
+	// CacheDir is a directory containing additional "interesting" values.
+	// The fuzzer may derive new values from these, and may write new values here.
+	CacheDir string
+}
+
+// CoordinateFuzzing creates several worker processes and communicates with
+// them to test random inputs that could trigger crashes and expose bugs.
+// The worker processes run the same binary in the same directory with the
+// same environment variables as the coordinator process. Workers also run
+// with the same arguments as the coordinator, except with the -test.fuzzworker
+// flag prepended to the argument list.
+//
+// If a crash occurs, the function will return an error containing information
+// about the crash, which can be reported to the user.
+func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) {
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+	if opts.Log == nil {
+		opts.Log = io.Discard
+	}
+	if opts.Parallel == 0 {
+		opts.Parallel = runtime.GOMAXPROCS(0)
+	}
+	if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit {
+		// Don't start more workers than we need.
+		opts.Parallel = int(opts.Limit)
+	}
+
+	c, err := newCoordinator(opts)
+	if err != nil {
+		return err
+	}
+
+	if opts.Timeout > 0 {
+		var cancel func()
+		ctx, cancel = context.WithTimeout(ctx, opts.Timeout)
+		defer cancel()
+	}
+
+	// fuzzCtx is used to stop workers, for example, after finding a crasher.
+	fuzzCtx, cancelWorkers := context.WithCancel(ctx)
+	defer cancelWorkers()
+	doneC := ctx.Done()
+
+	// stop is called when a worker encounters a fatal error.
+	var fuzzErr error
+	stopping := false
+	stop := func(err error) {
+		if err == fuzzCtx.Err() || isInterruptError(err) {
+			// Suppress cancellation errors and terminations due to SIGINT.
+			// The messages are not helpful since either the user triggered the error
+			// (with ^C) or another more helpful message will be printed (a crasher).
+			err = nil
+		}
+		if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) {
+			fuzzErr = err
+		}
+		if stopping {
+			return
+		}
+		stopping = true
+		cancelWorkers()
+		doneC = nil
+	}
+
+	// Ensure that any crash we find is written to the corpus, even if an error
+	// or interruption occurs while minimizing it.
+	crashWritten := false
+	defer func() {
+		if c.crashMinimizing == nil || crashWritten {
+			return
+		}
+		werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir)
+		if werr != nil {
+			err = fmt.Errorf("%w\n%v", err, werr)
+			return
+		}
+		if err == nil {
+			err = &crashError{
+				path: c.crashMinimizing.entry.Path,
+				err:  errors.New(c.crashMinimizing.crasherMsg),
+			}
+		}
+	}()
+
+	// Start workers.
+	// TODO(jayconrod): do we want to support fuzzing different binaries?
+	dir := "" // same as self
+	binPath := os.Args[0]
+	args := append([]string{"-test.fuzzworker"}, os.Args[1:]...)
+	env := os.Environ() // same as self
+
+	errC := make(chan error)
+	workers := make([]*worker, opts.Parallel)
+	for i := range workers {
+		var err error
+		workers[i], err = newWorker(c, dir, binPath, args, env)
+		if err != nil {
+			return err
+		}
+	}
+	for i := range workers {
+		w := workers[i]
+		go func() {
+			err := w.coordinate(fuzzCtx)
+			if fuzzCtx.Err() != nil || isInterruptError(err) {
+				err = nil
+			}
+			cleanErr := w.cleanup()
+			if err == nil {
+				err = cleanErr
+			}
+			errC <- err
+		}()
+	}
+
+	// Main event loop.
+	// Do not return until all workers have terminated. We avoid a deadlock by
+	// receiving messages from workers even after ctx is cancelled.
+	activeWorkers := len(workers)
+	statTicker := time.NewTicker(3 * time.Second)
+	defer statTicker.Stop()
+	defer c.logStats()
+
+	c.logStats()
+	for {
+		var inputC chan fuzzInput
+		input, ok := c.peekInput()
+		if ok && c.crashMinimizing == nil && !stopping {
+			inputC = c.inputC
+		}
+
+		var minimizeC chan fuzzMinimizeInput
+		minimizeInput, ok := c.peekMinimizeInput()
+		if ok && !stopping {
+			minimizeC = c.minimizeC
+		}
+
+		select {
+		case <-doneC:
+			// Interrupted, cancelled, or timed out.
+			// stop sets doneC to nil so we don't busy wait here.
+			stop(ctx.Err())
+
+		case err := <-errC:
+			// A worker terminated, possibly after encountering a fatal error.
+			stop(err)
+			activeWorkers--
+			if activeWorkers == 0 {
+				return fuzzErr
+			}
+
+		case result := <-c.resultC:
+			// Received response from worker.
+			if stopping {
+				break
+			}
+			c.updateStats(result)
+
+			if result.crasherMsg != "" {
+				if c.warmupRun() && result.entry.IsSeed {
+					target := filepath.Base(c.opts.CorpusDir)
+					fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent))
+					stop(errors.New(result.crasherMsg))
+					break
+				}
+				if c.canMinimize() && result.canMinimize {
+					if c.crashMinimizing != nil {
+						// This crash is not minimized, and another crash is being minimized.
+						// Ignore this one and wait for the other one to finish.
+						break
+					}
+					// Found a crasher but haven't yet attempted to minimize it.
+					// Send it back to a worker for minimization. Disable inputC so
+					// other workers don't continue fuzzing.
+					c.crashMinimizing = &result
+					fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data))
+					c.queueForMinimization(result, nil)
+				} else if !crashWritten {
+					// Found a crasher that's either minimized or not minimizable.
+					// Write to corpus and stop.
+					err := writeToCorpus(&result.entry, opts.CorpusDir)
+					if err == nil {
+						crashWritten = true
+						err = &crashError{
+							path: result.entry.Path,
+							err:  errors.New(result.crasherMsg),
+						}
+					}
+					if shouldPrintDebugInfo() {
+						fmt.Fprintf(
+							c.opts.Log,
+							"DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n",
+							c.elapsed(),
+							result.entry.Path,
+							result.entry.Parent,
+							result.entry.Generation,
+							len(result.entry.Data),
+							result.entryDuration,
+						)
+					}
+					stop(err)
+				}
+			} else if result.coverageData != nil {
+				if c.warmupRun() {
+					if shouldPrintDebugInfo() {
+						fmt.Fprintf(
+							c.opts.Log,
+							"DEBUG processed an initial input, elapsed: %s, id: %s, new bits: %d, size: %d, exec time: %s\n",
+							c.elapsed(),
+							result.entry.Parent,
+							countBits(diffCoverage(c.coverageMask, result.coverageData)),
+							len(result.entry.Data),
+							result.entryDuration,
+						)
+					}
+					c.updateCoverage(result.coverageData)
+					c.warmupInputLeft--
+					if c.warmupInputLeft == 0 {
+						fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel)
+						if shouldPrintDebugInfo() {
+							fmt.Fprintf(
+								c.opts.Log,
+								"DEBUG finished processing input corpus, elapsed: %s, entries: %d, initial coverage bits: %d\n",
+								c.elapsed(),
+								len(c.corpus.entries),
+								countBits(c.coverageMask),
+							)
+						}
+					}
+				} else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil {
+					// Found a value that expanded coverage.
+					// It's not a crasher, but we may want to add it to the on-disk
+					// corpus and prioritize it for future fuzzing.
+					// TODO(jayconrod, katiehockman): Prioritize fuzzing these
+					// values which expanded coverage, perhaps based on the
+					// number of new edges that this result expanded.
+					// TODO(jayconrod, katiehockman): Don't write a value that's already
+					// in the corpus.
+					if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil {
+						// Send back to workers to find a smaller value that preserves
+						// at least one new coverage bit.
+						c.queueForMinimization(result, keepCoverage)
+					} else {
+						// Update the coordinator's coverage mask and save the value.
+						inputSize := len(result.entry.Data)
+						entryNew, err := c.addCorpusEntries(true, result.entry)
+						if err != nil {
+							stop(err)
+							break
+						}
+						if !entryNew {
+							continue
+						}
+						c.updateCoverage(keepCoverage)
+						c.inputQueue.enqueue(result.entry)
+						c.interestingCount++
+						if shouldPrintDebugInfo() {
+							fmt.Fprintf(
+								c.opts.Log,
+								"DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n",
+								c.elapsed(),
+								result.entry.Path,
+								result.entry.Parent,
+								result.entry.Generation,
+								countBits(keepCoverage),
+								countBits(c.coverageMask),
+								inputSize,
+								result.entryDuration,
+							)
+						}
+					}
+				} else {
+					if shouldPrintDebugInfo() {
+						fmt.Fprintf(
+							c.opts.Log,
+							"DEBUG worker reported interesting input that doesn't expand coverage, elapsed: %s, id: %s, parent: %s, canMinimize: %t\n",
+							c.elapsed(),
+							result.entry.Path,
+							result.entry.Parent,
+							result.canMinimize,
+						)
+					}
+				}
+			} else if c.warmupRun() {
+				// No error or coverage data was reported for this input during
+				// warmup, so continue processing results.
+				c.warmupInputLeft--
+				if c.warmupInputLeft == 0 {
+					fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel)
+					if shouldPrintDebugInfo() {
+						fmt.Fprintf(
+							c.opts.Log,
+							"DEBUG finished testing-only phase, elapsed: %s, entries: %d\n",
+							time.Since(c.startTime),
+							len(c.corpus.entries),
+						)
+					}
+				}
+			}
+
+			// Once the result has been processed, stop the worker if we
+			// have reached the fuzzing limit.
+			if c.opts.Limit > 0 && c.count >= c.opts.Limit {
+				stop(nil)
+			}
+
+		case inputC <- input:
+			// Sent the next input to a worker.
+			c.sentInput(input)
+
+		case minimizeC <- minimizeInput:
+			// Sent the next input for minimization to a worker.
+			c.sentMinimizeInput(minimizeInput)
+
+		case <-statTicker.C:
+			c.logStats()
+		}
+	}
+
+	// TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus,
+	// write to the cache instead.
+}
+
+// crashError wraps a crasher written to the seed corpus. It saves the name
+// of the file where the input causing the crasher was saved. The testing
+// framework uses this to report a command to re-run that specific input.
+type crashError struct {
+	path string
+	err  error
+}
+
+func (e *crashError) Error() string {
+	return e.err.Error()
+}
+
+func (e *crashError) Unwrap() error {
+	return e.err
+}
+
+func (e *crashError) CrashPath() string {
+	return e.path
+}
+
+type corpus struct {
+	entries []CorpusEntry
+	hashes  map[[sha256.Size]byte]bool
+}
+
+// addCorpusEntries adds entries to the corpus, and optionally writes the entries
+// to the cache directory. If an entry is already in the corpus it is skipped. If
+// all of the entries are unique, addCorpusEntries returns true and a nil error,
+// if at least one of the entries was a duplicate, it returns false and a nil error.
+func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) {
+	noDupes := true
+	for _, e := range entries {
+		data, err := corpusEntryData(e)
+		if err != nil {
+			return false, err
+		}
+		h := sha256.Sum256(data)
+		if c.corpus.hashes[h] {
+			noDupes = false
+			continue
+		}
+		if addToCache {
+			if err := writeToCorpus(&e, c.opts.CacheDir); err != nil {
+				return false, err
+			}
+			// For entries written to disk, we don't hold onto the bytes,
+			// since the corpus would consume a significant amount of
+			// memory.
+			e.Data = nil
+		}
+		c.corpus.hashes[h] = true
+		c.corpus.entries = append(c.corpus.entries, e)
+	}
+	return noDupes, nil
+}
+
+// CorpusEntry represents an individual input for fuzzing.
+//
+// We must use an equivalent type in the testing and testing/internal/testdeps
+// packages, but testing can't import this package directly, and we don't want
+// to export this type from testing. Instead, we use the same struct type and
+// use a type alias (not a defined type) for convenience.
+type CorpusEntry = struct {
+	Parent string
+
+	// Path is the path of the corpus file, if the entry was loaded from disk.
+	// For other entries, including seed values provided by f.Add, Path is the
+	// name of the test, e.g. seed#0 or its hash.
+	Path string
+
+	// Data is the raw input data. Data should only be populated for seed
+	// values. For on-disk corpus files, Data will be nil, as it will be loaded
+	// from disk using Path.
+	Data []byte
+
+	// Values is the unmarshaled values from a corpus file.
+	Values []any
+
+	Generation int
+
+	// IsSeed indicates whether this entry is part of the seed corpus.
+	IsSeed bool
+}
+
+// corpusEntryData returns the raw input bytes, either from the data struct
+// field, or from disk.
+func corpusEntryData(ce CorpusEntry) ([]byte, error) {
+	if ce.Data != nil {
+		return ce.Data, nil
+	}
+
+	return os.ReadFile(ce.Path)
+}
+
+type fuzzInput struct {
+	// entry is the value to test initially. The worker will randomly mutate
+	// values from this starting point.
+	entry CorpusEntry
+
+	// timeout is the time to spend fuzzing variations of this input,
+	// not including starting or cleaning up.
+	timeout time.Duration
+
+	// limit is the maximum number of calls to the fuzz function the worker may
+	// make. The worker may make fewer calls, for example, if it finds an
+	// error early. If limit is zero, there is no limit on calls to the
+	// fuzz function.
+	limit int64
+
+	// warmup indicates whether this is a warmup input before fuzzing begins. If
+	// true, the input should not be fuzzed.
+	warmup bool
+
+	// coverageData reflects the coordinator's current coverageMask.
+	coverageData []byte
+}
+
+type fuzzResult struct {
+	// entry is an interesting value or a crasher.
+	entry CorpusEntry
+
+	// crasherMsg is an error message from a crash. It's "" if no crash was found.
+	crasherMsg string
+
+	// canMinimize is true if the worker should attempt to minimize this result.
+	// It may be false because an attempt has already been made.
+	canMinimize bool
+
+	// coverageData is set if the worker found new coverage.
+	coverageData []byte
+
+	// limit is the number of values the coordinator asked the worker
+	// to test. 0 if there was no limit.
+	limit int64
+
+	// count is the number of values the worker actually tested.
+	count int64
+
+	// totalDuration is the time the worker spent testing inputs.
+	totalDuration time.Duration
+
+	// entryDuration is the time the worker spent execution an interesting result
+	entryDuration time.Duration
+}
+
+type fuzzMinimizeInput struct {
+	// entry is an interesting value or crasher to minimize.
+	entry CorpusEntry
+
+	// crasherMsg is an error message from a crash. It's "" if no crash was found.
+	// If set, the worker will attempt to find a smaller input that also produces
+	// an error, though not necessarily the same error.
+	crasherMsg string
+
+	// limit is the maximum number of calls to the fuzz function the worker may
+	// make. The worker may make fewer calls, for example, if it can't reproduce
+	// an error. If limit is zero, there is no limit on calls to the fuzz function.
+	limit int64
+
+	// timeout is the time to spend minimizing this input.
+	// A zero timeout means no limit.
+	timeout time.Duration
+
+	// keepCoverage is a set of coverage bits that entry found that were not in
+	// the coordinator's combined set. When minimizing, the worker should find an
+	// input that preserves at least one of these bits. keepCoverage is nil for
+	// crashing inputs.
+	keepCoverage []byte
+}
+
+// coordinator holds channels that workers can use to communicate with
+// the coordinator.
+type coordinator struct {
+	opts CoordinateFuzzingOpts
+
+	// startTime is the time we started the workers after loading the corpus.
+	// Used for logging.
+	startTime time.Time
+
+	// inputC is sent values to fuzz by the coordinator. Any worker may receive
+	// values from this channel. Workers send results to resultC.
+	inputC chan fuzzInput
+
+	// minimizeC is sent values to minimize by the coordinator. Any worker may
+	// receive values from this channel. Workers send results to resultC.
+	minimizeC chan fuzzMinimizeInput
+
+	// resultC is sent results of fuzzing by workers. The coordinator
+	// receives these. Multiple types of messages are allowed.
+	resultC chan fuzzResult
+
+	// count is the number of values fuzzed so far.
+	count int64
+
+	// countLastLog is the number of values fuzzed when the output was last
+	// logged.
+	countLastLog int64
+
+	// timeLastLog is the time at which the output was last logged.
+	timeLastLog time.Time
+
+	// interestingCount is the number of unique interesting values which have
+	// been found this execution.
+	interestingCount int
+
+	// warmupInputCount is the count of all entries in the corpus which will
+	// need to be received from workers to run once during warmup, but not fuzz.
+	// This could be for coverage data, or only for the purposes of verifying
+	// that the seed corpus doesn't have any crashers. See warmupRun.
+	warmupInputCount int
+
+	// warmupInputLeft is the number of entries in the corpus which still need
+	// to be received from workers to run once during warmup, but not fuzz.
+	// See warmupInputLeft.
+	warmupInputLeft int
+
+	// duration is the time spent fuzzing inside workers, not counting time
+	// starting up or tearing down.
+	duration time.Duration
+
+	// countWaiting is the number of fuzzing executions the coordinator is
+	// waiting on workers to complete.
+	countWaiting int64
+
+	// corpus is a set of interesting values, including the seed corpus and
+	// generated values that workers reported as interesting.
+	corpus corpus
+
+	// minimizationAllowed is true if one or more of the types of fuzz
+	// function's parameters can be minimized.
+	minimizationAllowed bool
+
+	// inputQueue is a queue of inputs that workers should try fuzzing. This is
+	// initially populated from the seed corpus and cached inputs. More inputs
+	// may be added as new coverage is discovered.
+	inputQueue queue
+
+	// minimizeQueue is a queue of inputs that caused errors or exposed new
+	// coverage. Workers should attempt to find smaller inputs that do the
+	// same thing.
+	minimizeQueue queue
+
+	// crashMinimizing is the crash that is currently being minimized.
+	crashMinimizing *fuzzResult
+
+	// coverageMask aggregates coverage that was found for all inputs in the
+	// corpus. Each byte represents a single basic execution block. Each set bit
+	// within the byte indicates that an input has triggered that block at least
+	// 1 << n times, where n is the position of the bit in the byte. For example, a
+	// value of 12 indicates that separate inputs have triggered this block
+	// between 4-7 times and 8-15 times.
+	coverageMask []byte
+}
+
+func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
+	// Make sure all of the seed corpus has marshalled data.
+	for i := range opts.Seed {
+		if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil {
+			opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...)
+		}
+	}
+	c := &coordinator{
+		opts:        opts,
+		startTime:   time.Now(),
+		inputC:      make(chan fuzzInput),
+		minimizeC:   make(chan fuzzMinimizeInput),
+		resultC:     make(chan fuzzResult),
+		timeLastLog: time.Now(),
+		corpus:      corpus{hashes: make(map[[sha256.Size]byte]bool)},
+	}
+	if err := c.readCache(); err != nil {
+		return nil, err
+	}
+	if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 {
+		for _, t := range opts.Types {
+			if isMinimizable(t) {
+				c.minimizationAllowed = true
+				break
+			}
+		}
+	}
+
+	covSize := len(coverage())
+	if covSize == 0 {
+		fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n")
+		// Even though a coverage-only run won't occur, we should still run all
+		// of the seed corpus to make sure there are no existing failures before
+		// we start fuzzing.
+		c.warmupInputCount = len(c.opts.Seed)
+		for _, e := range c.opts.Seed {
+			c.inputQueue.enqueue(e)
+		}
+	} else {
+		c.warmupInputCount = len(c.corpus.entries)
+		for _, e := range c.corpus.entries {
+			c.inputQueue.enqueue(e)
+		}
+		// Set c.coverageMask to a clean []byte full of zeros.
+		c.coverageMask = make([]byte, covSize)
+	}
+	c.warmupInputLeft = c.warmupInputCount
+
+	if len(c.corpus.entries) == 0 {
+		fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n")
+		var vals []any
+		for _, t := range opts.Types {
+			vals = append(vals, zeroValue(t))
+		}
+		data := marshalCorpusFile(vals...)
+		h := sha256.Sum256(data)
+		name := fmt.Sprintf("%x", h[:4])
+		c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data})
+	}
+
+	return c, nil
+}
+
+func (c *coordinator) updateStats(result fuzzResult) {
+	c.count += result.count
+	c.countWaiting -= result.limit
+	c.duration += result.totalDuration
+}
+
+func (c *coordinator) logStats() {
+	now := time.Now()
+	if c.warmupRun() {
+		runSoFar := c.warmupInputCount - c.warmupInputLeft
+		if coverageEnabled {
+			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount)
+		} else {
+			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount)
+		}
+	} else if c.crashMinimizing != nil {
+		fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed())
+	} else {
+		rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds()
+		if coverageEnabled {
+			total := c.warmupInputCount + c.interestingCount
+			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total)
+		} else {
+			fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate)
+		}
+	}
+	c.countLastLog = c.count
+	c.timeLastLog = now
+}
+
+// peekInput returns the next value that should be sent to workers.
+// If the number of executions is limited, the returned value includes
+// a limit for one worker. If there are no executions left, peekInput returns
+// a zero value and false.
+//
+// peekInput doesn't actually remove the input from the queue. The caller
+// must call sentInput after sending the input.
+//
+// If the input queue is empty and the coverage/testing-only run has completed,
+// queue refills it from the corpus.
+func (c *coordinator) peekInput() (fuzzInput, bool) {
+	if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit {
+		// Already making the maximum number of calls to the fuzz function.
+		// Don't send more inputs right now.
+		return fuzzInput{}, false
+	}
+	if c.inputQueue.len == 0 {
+		if c.warmupRun() {
+			// Wait for coverage/testing-only run to finish before sending more
+			// inputs.
+			return fuzzInput{}, false
+		}
+		c.refillInputQueue()
+	}
+
+	entry, ok := c.inputQueue.peek()
+	if !ok {
+		panic("input queue empty after refill")
+	}
+	input := fuzzInput{
+		entry:   entry.(CorpusEntry),
+		timeout: workerFuzzDuration,
+		warmup:  c.warmupRun(),
+	}
+	if c.coverageMask != nil {
+		input.coverageData = make([]byte, len(c.coverageMask))
+		copy(input.coverageData, c.coverageMask)
+	}
+	if input.warmup {
+		// No fuzzing will occur, but it should count toward the limit set by
+		// -fuzztime.
+		input.limit = 1
+		return input, true
+	}
+
+	if c.opts.Limit > 0 {
+		input.limit = c.opts.Limit / int64(c.opts.Parallel)
+		if c.opts.Limit%int64(c.opts.Parallel) > 0 {
+			input.limit++
+		}
+		remaining := c.opts.Limit - c.count - c.countWaiting
+		if input.limit > remaining {
+			input.limit = remaining
+		}
+	}
+	return input, true
+}
+
+// sentInput updates internal counters after an input is sent to c.inputC.
+func (c *coordinator) sentInput(input fuzzInput) {
+	c.inputQueue.dequeue()
+	c.countWaiting += input.limit
+}
+
+// refillInputQueue refills the input queue from the corpus after it becomes
+// empty.
+func (c *coordinator) refillInputQueue() {
+	for _, e := range c.corpus.entries {
+		c.inputQueue.enqueue(e)
+	}
+}
+
+// queueForMinimization creates a fuzzMinimizeInput from result and adds it
+// to the minimization queue to be sent to workers.
+func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) {
+	if result.crasherMsg != "" {
+		c.minimizeQueue.clear()
+	}
+
+	input := fuzzMinimizeInput{
+		entry:        result.entry,
+		crasherMsg:   result.crasherMsg,
+		keepCoverage: keepCoverage,
+	}
+	c.minimizeQueue.enqueue(input)
+}
+
+// peekMinimizeInput returns the next input that should be sent to workers for
+// minimization.
+func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) {
+	if !c.canMinimize() {
+		// Already making the maximum number of calls to the fuzz function.
+		// Don't send more inputs right now.
+		return fuzzMinimizeInput{}, false
+	}
+	v, ok := c.minimizeQueue.peek()
+	if !ok {
+		return fuzzMinimizeInput{}, false
+	}
+	input := v.(fuzzMinimizeInput)
+
+	if c.opts.MinimizeTimeout > 0 {
+		input.timeout = c.opts.MinimizeTimeout
+	}
+	if c.opts.MinimizeLimit > 0 {
+		input.limit = c.opts.MinimizeLimit
+	} else if c.opts.Limit > 0 {
+		if input.crasherMsg != "" {
+			input.limit = c.opts.Limit
+		} else {
+			input.limit = c.opts.Limit / int64(c.opts.Parallel)
+			if c.opts.Limit%int64(c.opts.Parallel) > 0 {
+				input.limit++
+			}
+		}
+	}
+	if c.opts.Limit > 0 {
+		remaining := c.opts.Limit - c.count - c.countWaiting
+		if input.limit > remaining {
+			input.limit = remaining
+		}
+	}
+	return input, true
+}
+
+// sentMinimizeInput removes an input from the minimization queue after it's
+// sent to minimizeC.
+func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) {
+	c.minimizeQueue.dequeue()
+	c.countWaiting += input.limit
+}
+
+// warmupRun returns true while the coordinator is running inputs without
+// mutating them as a warmup before fuzzing. This could be to gather baseline
+// coverage data for entries in the corpus, or to test all of the seed corpus
+// for errors before fuzzing begins.
+//
+// The coordinator doesn't store coverage data in the cache with each input
+// because that data would be invalid when counter offsets in the test binary
+// change.
+//
+// When gathering coverage, the coordinator sends each entry to a worker to
+// gather coverage for that entry only, without fuzzing or minimizing. This
+// phase ends when all workers have finished, and the coordinator has a combined
+// coverage map.
+func (c *coordinator) warmupRun() bool {
+	return c.warmupInputLeft > 0
+}
+
+// updateCoverage sets bits in c.coverageMask that are set in newCoverage.
+// updateCoverage returns the number of newly set bits. See the comment on
+// coverageMask for the format.
+func (c *coordinator) updateCoverage(newCoverage []byte) int {
+	if len(newCoverage) != len(c.coverageMask) {
+		panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask)))
+	}
+	newBitCount := 0
+	for i := range newCoverage {
+		diff := newCoverage[i] &^ c.coverageMask[i]
+		newBitCount += bits.OnesCount8(diff)
+		c.coverageMask[i] |= newCoverage[i]
+	}
+	return newBitCount
+}
+
+// canMinimize returns whether the coordinator should attempt to find smaller
+// inputs that reproduce a crash or new coverage.
+func (c *coordinator) canMinimize() bool {
+	return c.minimizationAllowed &&
+		(c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit)
+}
+
+func (c *coordinator) elapsed() time.Duration {
+	return time.Since(c.startTime).Round(1 * time.Second)
+}
+
+// readCache creates a combined corpus from seed values and values in the cache
+// (in GOCACHE/fuzz).
+//
+// TODO(fuzzing): need a mechanism that can remove values that
+// aren't useful anymore, for example, because they have the wrong type.
+func (c *coordinator) readCache() error {
+	if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil {
+		return err
+	}
+	entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types)
+	if err != nil {
+		if _, ok := err.(*MalformedCorpusError); !ok {
+			// It's okay if some files in the cache directory are malformed and
+			// are not included in the corpus, but fail if it's an I/O error.
+			return err
+		}
+		// TODO(jayconrod,katiehockman): consider printing some kind of warning
+		// indicating the number of files which were skipped because they are
+		// malformed.
+	}
+	if _, err := c.addCorpusEntries(false, entries...); err != nil {
+		return err
+	}
+	return nil
+}
+
+// MalformedCorpusError is an error found while reading the corpus from the
+// filesystem. All of the errors are stored in the errs list. The testing
+// framework uses this to report malformed files in testdata.
+type MalformedCorpusError struct {
+	errs []error
+}
+
+func (e *MalformedCorpusError) Error() string {
+	var msgs []string
+	for _, s := range e.errs {
+		msgs = append(msgs, s.Error())
+	}
+	return strings.Join(msgs, "\n")
+}
+
+// ReadCorpus reads the corpus from the provided dir. The returned corpus
+// entries are guaranteed to match the given types. Any malformed files will
+// be saved in a MalformedCorpusError and returned, along with the most recent
+// error.
+func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) {
+	files, err := ioutil.ReadDir(dir)
+	if os.IsNotExist(err) {
+		return nil, nil // No corpus to read
+	} else if err != nil {
+		return nil, fmt.Errorf("reading seed corpus from testdata: %v", err)
+	}
+	var corpus []CorpusEntry
+	var errs []error
+	for _, file := range files {
+		// TODO(jayconrod,katiehockman): determine when a file is a fuzzing input
+		// based on its name. We should only read files created by writeToCorpus.
+		// If we read ALL files, we won't be able to change the file format by
+		// changing the extension. We also won't be able to add files like
+		// README.txt explaining why the directory exists.
+		if file.IsDir() {
+			continue
+		}
+		filename := filepath.Join(dir, file.Name())
+		data, err := ioutil.ReadFile(filename)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read corpus file: %v", err)
+		}
+		var vals []any
+		vals, err = readCorpusData(data, types)
+		if err != nil {
+			errs = append(errs, fmt.Errorf("%q: %v", filename, err))
+			continue
+		}
+		corpus = append(corpus, CorpusEntry{Path: filename, Values: vals})
+	}
+	if len(errs) > 0 {
+		return corpus, &MalformedCorpusError{errs: errs}
+	}
+	return corpus, nil
+}
+
+func readCorpusData(data []byte, types []reflect.Type) ([]any, error) {
+	vals, err := unmarshalCorpusFile(data)
+	if err != nil {
+		return nil, fmt.Errorf("unmarshal: %v", err)
+	}
+	if err = CheckCorpus(vals, types); err != nil {
+		return nil, err
+	}
+	return vals, nil
+}
+
+// CheckCorpus verifies that the types in vals match the expected types
+// provided.
+func CheckCorpus(vals []any, types []reflect.Type) error {
+	if len(vals) != len(types) {
+		return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types))
+	}
+	valsT := make([]reflect.Type, len(vals))
+	for valsI, v := range vals {
+		valsT[valsI] = reflect.TypeOf(v)
+	}
+	for i := range types {
+		if valsT[i] != types[i] {
+			return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types)
+		}
+	}
+	return nil
+}
+
+// writeToCorpus atomically writes the given bytes to a new file in testdata. If
+// the directory does not exist, it will create one. If the file already exists,
+// writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new
+// file that was just written or an error if it failed.
+func writeToCorpus(entry *CorpusEntry, dir string) (err error) {
+	sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))
+	entry.Path = filepath.Join(dir, sum)
+	if err := os.MkdirAll(dir, 0777); err != nil {
+		return err
+	}
+	if err := ioutil.WriteFile(entry.Path, entry.Data, 0666); err != nil {
+		os.Remove(entry.Path) // remove partially written file
+		return err
+	}
+	return nil
+}
+
+func testName(path string) string {
+	return filepath.Base(path)
+}
+
+func zeroValue(t reflect.Type) any {
+	for _, v := range zeroVals {
+		if reflect.TypeOf(v) == t {
+			return v
+		}
+	}
+	panic(fmt.Sprintf("unsupported type: %v", t))
+}
+
+var zeroVals []any = []any{
+	[]byte(""),
+	string(""),
+	false,
+	byte(0),
+	rune(0),
+	float32(0),
+	float64(0),
+	int(0),
+	int8(0),
+	int16(0),
+	int32(0),
+	int64(0),
+	uint(0),
+	uint8(0),
+	uint16(0),
+	uint32(0),
+	uint64(0),
+}
+
+var (
+	debugInfo     bool
+	debugInfoOnce sync.Once
+)
+
+func shouldPrintDebugInfo() bool {
+	debugInfoOnce.Do(func() {
+		debugInfo = godebug.Get("fuzzdebug") == "1"
+	})
+	return debugInfo
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 13:18:25 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 13:18:25 +0000
commit	109be507377fe7f6e8819ac94041d3fdcdf6fd2f (patch)
tree	2806a689f8fab4a2ec9fc949830ef270a91d667d /src/internal/fuzz/fuzz.go
parent	Initial commit. (diff)
download	golang-1.19-109be507377fe7f6e8819ac94041d3fdcdf6fd2f.tar.xz golang-1.19-109be507377fe7f6e8819ac94041d3fdcdf6fd2f.zip