diff options
Diffstat (limited to 'src/cmd/go/internal/cache')
-rw-r--r-- | src/cmd/go/internal/cache/cache.go | 627 | ||||
-rw-r--r-- | src/cmd/go/internal/cache/cache_test.go | 285 | ||||
-rw-r--r-- | src/cmd/go/internal/cache/default.go | 105 | ||||
-rw-r--r-- | src/cmd/go/internal/cache/hash.go | 190 | ||||
-rw-r--r-- | src/cmd/go/internal/cache/hash_test.go | 51 | ||||
-rw-r--r-- | src/cmd/go/internal/cache/prog.go | 427 |
6 files changed, 1685 insertions, 0 deletions
diff --git a/src/cmd/go/internal/cache/cache.go b/src/cmd/go/internal/cache/cache.go new file mode 100644 index 0000000..14b2dec --- /dev/null +++ b/src/cmd/go/internal/cache/cache.go @@ -0,0 +1,627 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cache implements a build artifact cache. +package cache + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "internal/godebug" + "io" + "io/fs" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "cmd/go/internal/lockedfile" + "cmd/go/internal/mmap" +) + +// An ActionID is a cache action key, the hash of a complete description of a +// repeatable computation (command line, environment variables, +// input file contents, executable contents). +type ActionID [HashSize]byte + +// An OutputID is a cache output key, the hash of an output of a computation. +type OutputID [HashSize]byte + +// Cache is the interface as used by the cmd/go. +type Cache interface { + // Get returns the cache entry for the provided ActionID. + // On miss, the error type should be of type *entryNotFoundError. + // + // After a success call to Get, OutputFile(Entry.OutputID) must + // exist on disk for until Close is called (at the end of the process). + Get(ActionID) (Entry, error) + + // Put adds an item to the cache. + // + // The seeker is only used to seek to the beginning. After a call to Put, + // the seek position is not guaranteed to be in any particular state. + // + // As a special case, if the ReadSeeker is of type noVerifyReadSeeker, + // the verification from GODEBUG=goverifycache=1 is skipped. + // + // After a success call to Get, OutputFile(Entry.OutputID) must + // exist on disk for until Close is called (at the end of the process). + Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error) + + // Close is called at the end of the go process. Implementations can do + // cache cleanup work at this phase, or wait for and report any errors from + // background cleanup work started earlier. Any cache trimming should in one + // process should not violate cause the invariants of this interface to be + // violated in another process. Namely, a cache trim from one process should + // not delete an ObjectID from disk that was recently Get or Put from + // another process. As a rule of thumb, don't trim things used in the last + // day. + Close() error + + // OutputFile returns the path on disk where OutputID is stored. + // + // It's only called after a successful get or put call so it doesn't need + // to return an error; it's assumed that if the previous get or put succeeded, + // it's already on disk. + OutputFile(OutputID) string + + // FuzzDir returns where fuzz files are stored. + FuzzDir() string +} + +// A Cache is a package cache, backed by a file system directory tree. +type DiskCache struct { + dir string + now func() time.Time +} + +// Open opens and returns the cache in the given directory. +// +// It is safe for multiple processes on a single machine to use the +// same cache directory in a local file system simultaneously. +// They will coordinate using operating system file locks and may +// duplicate effort but will not corrupt the cache. +// +// However, it is NOT safe for multiple processes on different machines +// to share a cache directory (for example, if the directory were stored +// in a network file system). File locking is notoriously unreliable in +// network file systems and may not suffice to protect the cache. +func Open(dir string) (*DiskCache, error) { + info, err := os.Stat(dir) + if err != nil { + return nil, err + } + if !info.IsDir() { + return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} + } + for i := 0; i < 256; i++ { + name := filepath.Join(dir, fmt.Sprintf("%02x", i)) + if err := os.MkdirAll(name, 0777); err != nil { + return nil, err + } + } + c := &DiskCache{ + dir: dir, + now: time.Now, + } + return c, nil +} + +// fileName returns the name of the file corresponding to the given id. +func (c *DiskCache) fileName(id [HashSize]byte, key string) string { + return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) +} + +// An entryNotFoundError indicates that a cache entry was not found, with an +// optional underlying reason. +type entryNotFoundError struct { + Err error +} + +func (e *entryNotFoundError) Error() string { + if e.Err == nil { + return "cache entry not found" + } + return fmt.Sprintf("cache entry not found: %v", e.Err) +} + +func (e *entryNotFoundError) Unwrap() error { + return e.Err +} + +const ( + // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" + hexSize = HashSize * 2 + entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 +) + +// verify controls whether to run the cache in verify mode. +// In verify mode, the cache always returns errMissing from Get +// but then double-checks in Put that the data being written +// exactly matches any existing entry. This provides an easy +// way to detect program behavior that would have been different +// had the cache entry been returned from Get. +// +// verify is enabled by setting the environment variable +// GODEBUG=gocacheverify=1. +var verify = false + +var errVerifyMode = errors.New("gocacheverify=1") + +// DebugTest is set when GODEBUG=gocachetest=1 is in the environment. +var DebugTest = false + +func init() { initEnv() } + +var ( + goCacheVerify = godebug.New("gocacheverify") + goDebugHash = godebug.New("gocachehash") + goCacheTest = godebug.New("gocachetest") +) + +func initEnv() { + if goCacheVerify.Value() == "1" { + goCacheVerify.IncNonDefault() + verify = true + } + if goDebugHash.Value() == "1" { + goDebugHash.IncNonDefault() + debugHash = true + } + if goCacheTest.Value() == "1" { + goCacheTest.IncNonDefault() + DebugTest = true + } +} + +// Get looks up the action ID in the cache, +// returning the corresponding output ID and file size, if any. +// Note that finding an output ID does not guarantee that the +// saved file for that output ID is still available. +func (c *DiskCache) Get(id ActionID) (Entry, error) { + if verify { + return Entry{}, &entryNotFoundError{Err: errVerifyMode} + } + return c.get(id) +} + +type Entry struct { + OutputID OutputID + Size int64 + Time time.Time // when added to cache +} + +// get is Get but does not respect verify mode, so that Put can use it. +func (c *DiskCache) get(id ActionID) (Entry, error) { + missing := func(reason error) (Entry, error) { + return Entry{}, &entryNotFoundError{Err: reason} + } + f, err := os.Open(c.fileName(id, "a")) + if err != nil { + return missing(err) + } + defer f.Close() + entry := make([]byte, entrySize+1) // +1 to detect whether f is too long + if n, err := io.ReadFull(f, entry); n > entrySize { + return missing(errors.New("too long")) + } else if err != io.ErrUnexpectedEOF { + if err == io.EOF { + return missing(errors.New("file is empty")) + } + return missing(err) + } else if n < entrySize { + return missing(errors.New("entry file incomplete")) + } + if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { + return missing(errors.New("invalid header")) + } + eid, entry := entry[3:3+hexSize], entry[3+hexSize:] + eout, entry := entry[1:1+hexSize], entry[1+hexSize:] + esize, entry := entry[1:1+20], entry[1+20:] + etime, entry := entry[1:1+20], entry[1+20:] + var buf [HashSize]byte + if _, err := hex.Decode(buf[:], eid); err != nil { + return missing(fmt.Errorf("decoding ID: %v", err)) + } else if buf != id { + return missing(errors.New("mismatched ID")) + } + if _, err := hex.Decode(buf[:], eout); err != nil { + return missing(fmt.Errorf("decoding output ID: %v", err)) + } + i := 0 + for i < len(esize) && esize[i] == ' ' { + i++ + } + size, err := strconv.ParseInt(string(esize[i:]), 10, 64) + if err != nil { + return missing(fmt.Errorf("parsing size: %v", err)) + } else if size < 0 { + return missing(errors.New("negative size")) + } + i = 0 + for i < len(etime) && etime[i] == ' ' { + i++ + } + tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) + if err != nil { + return missing(fmt.Errorf("parsing timestamp: %v", err)) + } else if tm < 0 { + return missing(errors.New("negative timestamp")) + } + + c.used(c.fileName(id, "a")) + + return Entry{buf, size, time.Unix(0, tm)}, nil +} + +// GetFile looks up the action ID in the cache and returns +// the name of the corresponding data file. +func GetFile(c Cache, id ActionID) (file string, entry Entry, err error) { + entry, err = c.Get(id) + if err != nil { + return "", Entry{}, err + } + file = c.OutputFile(entry.OutputID) + info, err := os.Stat(file) + if err != nil { + return "", Entry{}, &entryNotFoundError{Err: err} + } + if info.Size() != entry.Size { + return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} + } + return file, entry, nil +} + +// GetBytes looks up the action ID in the cache and returns +// the corresponding output bytes. +// GetBytes should only be used for data that can be expected to fit in memory. +func GetBytes(c Cache, id ActionID) ([]byte, Entry, error) { + entry, err := c.Get(id) + if err != nil { + return nil, entry, err + } + data, _ := os.ReadFile(c.OutputFile(entry.OutputID)) + if sha256.Sum256(data) != entry.OutputID { + return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} + } + return data, entry, nil +} + +// GetMmap looks up the action ID in the cache and returns +// the corresponding output bytes. +// GetMmap should only be used for data that can be expected to fit in memory. +func GetMmap(c Cache, id ActionID) ([]byte, Entry, error) { + entry, err := c.Get(id) + if err != nil { + return nil, entry, err + } + md, err := mmap.Mmap(c.OutputFile(entry.OutputID)) + if err != nil { + return nil, Entry{}, err + } + if int64(len(md.Data)) != entry.Size { + return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} + } + return md.Data, entry, nil +} + +// OutputFile returns the name of the cache file storing output with the given OutputID. +func (c *DiskCache) OutputFile(out OutputID) string { + file := c.fileName(out, "d") + c.used(file) + return file +} + +// Time constants for cache expiration. +// +// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), +// to avoid causing many unnecessary inode updates. The mtimes therefore +// roughly reflect "time of last use" but may in fact be older by at most an hour. +// +// We scan the cache for entries to delete at most once per trimInterval (1 day). +// +// When we do scan the cache, we delete entries that have not been used for +// at least trimLimit (5 days). Statistics gathered from a month of usage by +// Go developers found that essentially all reuse of cached entries happened +// within 5 days of the previous reuse. See golang.org/issue/22990. +const ( + mtimeInterval = 1 * time.Hour + trimInterval = 24 * time.Hour + trimLimit = 5 * 24 * time.Hour +) + +// used makes a best-effort attempt to update mtime on file, +// so that mtime reflects cache access time. +// +// Because the reflection only needs to be approximate, +// and to reduce the amount of disk activity caused by using +// cache entries, used only updates the mtime if the current +// mtime is more than an hour old. This heuristic eliminates +// nearly all of the mtime updates that would otherwise happen, +// while still keeping the mtimes useful for cache trimming. +func (c *DiskCache) used(file string) { + info, err := os.Stat(file) + if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { + return + } + os.Chtimes(file, c.now(), c.now()) +} + +func (c *DiskCache) Close() error { return c.Trim() } + +// Trim removes old cache entries that are likely not to be reused. +func (c *DiskCache) Trim() error { + now := c.now() + + // We maintain in dir/trim.txt the time of the last completed cache trim. + // If the cache has been trimmed recently enough, do nothing. + // This is the common case. + // If the trim file is corrupt, detected if the file can't be parsed, or the + // trim time is too far in the future, attempt the trim anyway. It's possible that + // the cache was full when the corruption happened. Attempting a trim on + // an empty cache is cheap, so there wouldn't be a big performance hit in that case. + if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil { + if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil { + lastTrim := time.Unix(t, 0) + if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval { + return nil + } + } + } + + // Trim each of the 256 subdirectories. + // We subtract an additional mtimeInterval + // to account for the imprecision of our "last used" mtimes. + cutoff := now.Add(-trimLimit - mtimeInterval) + for i := 0; i < 256; i++ { + subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) + c.trimSubdir(subdir, cutoff) + } + + // Ignore errors from here: if we don't write the complete timestamp, the + // cache will appear older than it is, and we'll trim it again next time. + var b bytes.Buffer + fmt.Fprintf(&b, "%d", now.Unix()) + if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil { + return err + } + + return nil +} + +// trimSubdir trims a single cache subdirectory. +func (c *DiskCache) trimSubdir(subdir string, cutoff time.Time) { + // Read all directory entries from subdir before removing + // any files, in case removing files invalidates the file offset + // in the directory scan. Also, ignore error from f.Readdirnames, + // because we don't care about reporting the error and we still + // want to process any entries found before the error. + f, err := os.Open(subdir) + if err != nil { + return + } + names, _ := f.Readdirnames(-1) + f.Close() + + for _, name := range names { + // Remove only cache entries (xxxx-a and xxxx-d). + if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { + continue + } + entry := filepath.Join(subdir, name) + info, err := os.Stat(entry) + if err == nil && info.ModTime().Before(cutoff) { + os.Remove(entry) + } + } +} + +// putIndexEntry adds an entry to the cache recording that executing the action +// with the given id produces an output with the given output id (hash) and size. +func (c *DiskCache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { + // Note: We expect that for one reason or another it may happen + // that repeating an action produces a different output hash + // (for example, if the output contains a time stamp or temp dir name). + // While not ideal, this is also not a correctness problem, so we + // don't make a big deal about it. In particular, we leave the action + // cache entries writable specifically so that they can be overwritten. + // + // Setting GODEBUG=gocacheverify=1 does make a big deal: + // in verify mode we are double-checking that the cache entries + // are entirely reproducible. As just noted, this may be unrealistic + // in some cases but the check is also useful for shaking out real bugs. + entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) + if verify && allowVerify { + old, err := c.get(id) + if err == nil && (old.OutputID != out || old.Size != size) { + // panic to show stack trace, so we can see what code is generating this cache entry. + msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) + panic(msg) + } + } + file := c.fileName(id, "a") + + // Copy file to cache directory. + mode := os.O_WRONLY | os.O_CREATE + f, err := os.OpenFile(file, mode, 0666) + if err != nil { + return err + } + _, err = f.WriteString(entry) + if err == nil { + // Truncate the file only *after* writing it. + // (This should be a no-op, but truncate just in case of previous corruption.) + // + // This differs from os.WriteFile, which truncates to 0 *before* writing + // via os.O_TRUNC. Truncating only after writing ensures that a second write + // of the same content to the same file is idempotent, and does not — even + // temporarily! — undo the effect of the first write. + err = f.Truncate(int64(len(entry))) + } + if closeErr := f.Close(); err == nil { + err = closeErr + } + if err != nil { + // TODO(bcmills): This Remove potentially races with another go command writing to file. + // Can we eliminate it? + os.Remove(file) + return err + } + os.Chtimes(file, c.now(), c.now()) // mainly for tests + + return nil +} + +// noVerifyReadSeeker is an io.ReadSeeker wrapper sentinel type +// that says that Cache.Put should skip the verify check +// (from GODEBUG=goverifycache=1). +type noVerifyReadSeeker struct { + io.ReadSeeker +} + +// Put stores the given output in the cache as the output for the action ID. +// It may read file twice. The content of file must not change between the two passes. +func (c *DiskCache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { + wrapper, isNoVerify := file.(noVerifyReadSeeker) + if isNoVerify { + file = wrapper.ReadSeeker + } + return c.put(id, file, !isNoVerify) +} + +// PutNoVerify is like Put but disables the verify check +// when GODEBUG=goverifycache=1 is set. +// It is meant for data that is OK to cache but that we expect to vary slightly from run to run, +// like test output containing times and the like. +func PutNoVerify(c Cache, id ActionID, file io.ReadSeeker) (OutputID, int64, error) { + return c.Put(id, noVerifyReadSeeker{file}) +} + +func (c *DiskCache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { + // Compute output ID. + h := sha256.New() + if _, err := file.Seek(0, 0); err != nil { + return OutputID{}, 0, err + } + size, err := io.Copy(h, file) + if err != nil { + return OutputID{}, 0, err + } + var out OutputID + h.Sum(out[:0]) + + // Copy to cached output file (if not already present). + if err := c.copyFile(file, out, size); err != nil { + return out, size, err + } + + // Add to cache index. + return out, size, c.putIndexEntry(id, out, size, allowVerify) +} + +// PutBytes stores the given bytes in the cache as the output for the action ID. +func PutBytes(c Cache, id ActionID, data []byte) error { + _, _, err := c.Put(id, bytes.NewReader(data)) + return err +} + +// copyFile copies file into the cache, expecting it to have the given +// output ID and size, if that file is not present already. +func (c *DiskCache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { + name := c.fileName(out, "d") + info, err := os.Stat(name) + if err == nil && info.Size() == size { + // Check hash. + if f, err := os.Open(name); err == nil { + h := sha256.New() + io.Copy(h, f) + f.Close() + var out2 OutputID + h.Sum(out2[:0]) + if out == out2 { + return nil + } + } + // Hash did not match. Fall through and rewrite file. + } + + // Copy file to cache directory. + mode := os.O_RDWR | os.O_CREATE + if err == nil && info.Size() > size { // shouldn't happen but fix in case + mode |= os.O_TRUNC + } + f, err := os.OpenFile(name, mode, 0666) + if err != nil { + return err + } + defer f.Close() + if size == 0 { + // File now exists with correct size. + // Only one possible zero-length file, so contents are OK too. + // Early return here makes sure there's a "last byte" for code below. + return nil + } + + // From here on, if any of the I/O writing the file fails, + // we make a best-effort attempt to truncate the file f + // before returning, to avoid leaving bad bytes in the file. + + // Copy file to f, but also into h to double-check hash. + if _, err := file.Seek(0, 0); err != nil { + f.Truncate(0) + return err + } + h := sha256.New() + w := io.MultiWriter(f, h) + if _, err := io.CopyN(w, file, size-1); err != nil { + f.Truncate(0) + return err + } + // Check last byte before writing it; writing it will make the size match + // what other processes expect to find and might cause them to start + // using the file. + buf := make([]byte, 1) + if _, err := file.Read(buf); err != nil { + f.Truncate(0) + return err + } + h.Write(buf) + sum := h.Sum(nil) + if !bytes.Equal(sum, out[:]) { + f.Truncate(0) + return fmt.Errorf("file content changed underfoot") + } + + // Commit cache file entry. + if _, err := f.Write(buf); err != nil { + f.Truncate(0) + return err + } + if err := f.Close(); err != nil { + // Data might not have been written, + // but file may look like it is the right size. + // To be extra careful, remove cached file. + os.Remove(name) + return err + } + os.Chtimes(name, c.now(), c.now()) // mainly for tests + + return nil +} + +// FuzzDir returns a subdirectory within the cache for storing fuzzing data. +// The subdirectory may not exist. +// +// This directory is managed by the internal/fuzz package. Files in this +// directory aren't removed by the 'go clean -cache' command or by Trim. +// They may be removed with 'go clean -fuzzcache'. +// +// TODO(#48526): make Trim remove unused files from this directory. +func (c *DiskCache) FuzzDir() string { + return filepath.Join(c.dir, "fuzz") +} diff --git a/src/cmd/go/internal/cache/cache_test.go b/src/cmd/go/internal/cache/cache_test.go new file mode 100644 index 0000000..a12f1d2 --- /dev/null +++ b/src/cmd/go/internal/cache/cache_test.go @@ -0,0 +1,285 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "bytes" + "encoding/binary" + "fmt" + "internal/testenv" + "os" + "path/filepath" + "testing" + "time" +) + +func init() { + verify = false // even if GODEBUG is set +} + +func TestBasic(t *testing.T) { + dir, err := os.MkdirTemp("", "cachetest-") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + _, err = Open(filepath.Join(dir, "notexist")) + if err == nil { + t.Fatal(`Open("tmp/notexist") succeeded, want failure`) + } + + cdir := filepath.Join(dir, "c1") + if err := os.Mkdir(cdir, 0777); err != nil { + t.Fatal(err) + } + + c1, err := Open(cdir) + if err != nil { + t.Fatalf("Open(c1) (create): %v", err) + } + if err := c1.putIndexEntry(dummyID(1), dummyID(12), 13, true); err != nil { + t.Fatalf("addIndexEntry: %v", err) + } + if err := c1.putIndexEntry(dummyID(1), dummyID(2), 3, true); err != nil { // overwrite entry + t.Fatalf("addIndexEntry: %v", err) + } + if entry, err := c1.Get(dummyID(1)); err != nil || entry.OutputID != dummyID(2) || entry.Size != 3 { + t.Fatalf("c1.Get(1) = %x, %v, %v, want %x, %v, nil", entry.OutputID, entry.Size, err, dummyID(2), 3) + } + + c2, err := Open(cdir) + if err != nil { + t.Fatalf("Open(c2) (reuse): %v", err) + } + if entry, err := c2.Get(dummyID(1)); err != nil || entry.OutputID != dummyID(2) || entry.Size != 3 { + t.Fatalf("c2.Get(1) = %x, %v, %v, want %x, %v, nil", entry.OutputID, entry.Size, err, dummyID(2), 3) + } + if err := c2.putIndexEntry(dummyID(2), dummyID(3), 4, true); err != nil { + t.Fatalf("addIndexEntry: %v", err) + } + if entry, err := c1.Get(dummyID(2)); err != nil || entry.OutputID != dummyID(3) || entry.Size != 4 { + t.Fatalf("c1.Get(2) = %x, %v, %v, want %x, %v, nil", entry.OutputID, entry.Size, err, dummyID(3), 4) + } +} + +func TestGrowth(t *testing.T) { + dir, err := os.MkdirTemp("", "cachetest-") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + c, err := Open(dir) + if err != nil { + t.Fatalf("Open: %v", err) + } + + n := 10000 + if testing.Short() { + n = 10 + } + + for i := 0; i < n; i++ { + if err := c.putIndexEntry(dummyID(i), dummyID(i*99), int64(i)*101, true); err != nil { + t.Fatalf("addIndexEntry: %v", err) + } + id := ActionID(dummyID(i)) + entry, err := c.Get(id) + if err != nil { + t.Fatalf("Get(%x): %v", id, err) + } + if entry.OutputID != dummyID(i*99) || entry.Size != int64(i)*101 { + t.Errorf("Get(%x) = %x, %d, want %x, %d", id, entry.OutputID, entry.Size, dummyID(i*99), int64(i)*101) + } + } + for i := 0; i < n; i++ { + id := ActionID(dummyID(i)) + entry, err := c.Get(id) + if err != nil { + t.Fatalf("Get2(%x): %v", id, err) + } + if entry.OutputID != dummyID(i*99) || entry.Size != int64(i)*101 { + t.Errorf("Get2(%x) = %x, %d, want %x, %d", id, entry.OutputID, entry.Size, dummyID(i*99), int64(i)*101) + } + } +} + +func TestVerifyPanic(t *testing.T) { + os.Setenv("GODEBUG", "gocacheverify=1") + initEnv() + defer func() { + os.Unsetenv("GODEBUG") + verify = false + }() + + if !verify { + t.Fatal("initEnv did not set verify") + } + + dir, err := os.MkdirTemp("", "cachetest-") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + c, err := Open(dir) + if err != nil { + t.Fatalf("Open: %v", err) + } + + id := ActionID(dummyID(1)) + if err := PutBytes(c, id, []byte("abc")); err != nil { + t.Fatal(err) + } + + defer func() { + if err := recover(); err != nil { + t.Log(err) + return + } + }() + PutBytes(c, id, []byte("def")) + t.Fatal("mismatched Put did not panic in verify mode") +} + +func dummyID(x int) [HashSize]byte { + var out [HashSize]byte + binary.LittleEndian.PutUint64(out[:], uint64(x)) + return out +} + +func TestCacheTrim(t *testing.T) { + dir, err := os.MkdirTemp("", "cachetest-") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + c, err := Open(dir) + if err != nil { + t.Fatalf("Open: %v", err) + } + const start = 1000000000 + now := int64(start) + c.now = func() time.Time { return time.Unix(now, 0) } + + checkTime := func(name string, mtime int64) { + t.Helper() + file := filepath.Join(c.dir, name[:2], name) + info, err := os.Stat(file) + if err != nil { + t.Fatal(err) + } + if info.ModTime().Unix() != mtime { + t.Fatalf("%s mtime = %d, want %d", name, info.ModTime().Unix(), mtime) + } + } + + id := ActionID(dummyID(1)) + PutBytes(c, id, []byte("abc")) + entry, _ := c.Get(id) + PutBytes(c, ActionID(dummyID(2)), []byte("def")) + mtime := now + checkTime(fmt.Sprintf("%x-a", id), mtime) + checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime) + + // Get should not change recent mtimes. + now = start + 10 + c.Get(id) + checkTime(fmt.Sprintf("%x-a", id), mtime) + checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime) + + // Get should change distant mtimes. + now = start + 5000 + mtime2 := now + if _, err := c.Get(id); err != nil { + t.Fatal(err) + } + c.OutputFile(entry.OutputID) + checkTime(fmt.Sprintf("%x-a", id), mtime2) + checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime2) + + // Trim should leave everything alone: it's all too new. + if err := c.Trim(); err != nil { + if testenv.SyscallIsNotSupported(err) { + t.Skipf("skipping: Trim is unsupported (%v)", err) + } + t.Fatal(err) + } + if _, err := c.Get(id); err != nil { + t.Fatal(err) + } + c.OutputFile(entry.OutputID) + data, err := os.ReadFile(filepath.Join(dir, "trim.txt")) + if err != nil { + t.Fatal(err) + } + checkTime(fmt.Sprintf("%x-a", dummyID(2)), start) + + // Trim less than a day later should not do any work at all. + now = start + 80000 + if err := c.Trim(); err != nil { + t.Fatal(err) + } + if _, err := c.Get(id); err != nil { + t.Fatal(err) + } + c.OutputFile(entry.OutputID) + data2, err := os.ReadFile(filepath.Join(dir, "trim.txt")) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(data, data2) { + t.Fatalf("second trim did work: %q -> %q", data, data2) + } + + // Fast forward and do another trim just before the 5 day cutoff. + // Note that because of usedQuantum the cutoff is actually 5 days + 1 hour. + // We used c.Get(id) just now, so 5 days later it should still be kept. + // On the other hand almost a full day has gone by since we wrote dummyID(2) + // and we haven't looked at it since, so 5 days later it should be gone. + now += 5 * 86400 + checkTime(fmt.Sprintf("%x-a", dummyID(2)), start) + if err := c.Trim(); err != nil { + t.Fatal(err) + } + if _, err := c.Get(id); err != nil { + t.Fatal(err) + } + c.OutputFile(entry.OutputID) + mtime3 := now + if _, err := c.Get(dummyID(2)); err == nil { // haven't done a Get for this since original write above + t.Fatalf("Trim did not remove dummyID(2)") + } + + // The c.Get(id) refreshed id's mtime again. + // Check that another 5 days later it is still not gone, + // but check by using checkTime, which doesn't bring mtime forward. + now += 5 * 86400 + if err := c.Trim(); err != nil { + t.Fatal(err) + } + checkTime(fmt.Sprintf("%x-a", id), mtime3) + checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime3) + + // Half a day later Trim should still be a no-op, because there was a Trim recently. + // Even though the entry for id is now old enough to be trimmed, + // it gets a reprieve until the time comes for a new Trim scan. + now += 86400 / 2 + if err := c.Trim(); err != nil { + t.Fatal(err) + } + checkTime(fmt.Sprintf("%x-a", id), mtime3) + checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime3) + + // Another half a day later, Trim should actually run, and it should remove id. + now += 86400/2 + 1 + if err := c.Trim(); err != nil { + t.Fatal(err) + } + if _, err := c.Get(dummyID(1)); err == nil { + t.Fatal("Trim did not remove dummyID(1)") + } +} diff --git a/src/cmd/go/internal/cache/default.go b/src/cmd/go/internal/cache/default.go new file mode 100644 index 0000000..b5650ea --- /dev/null +++ b/src/cmd/go/internal/cache/default.go @@ -0,0 +1,105 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "fmt" + "os" + "path/filepath" + "sync" + + "cmd/go/internal/base" + "cmd/go/internal/cfg" + "internal/goexperiment" +) + +// Default returns the default cache to use. +// It never returns nil. +func Default() Cache { + defaultOnce.Do(initDefaultCache) + return defaultCache +} + +var ( + defaultOnce sync.Once + defaultCache Cache +) + +// cacheREADME is a message stored in a README in the cache directory. +// Because the cache lives outside the normal Go trees, we leave the +// README as a courtesy to explain where it came from. +const cacheREADME = `This directory holds cached build artifacts from the Go build system. +Run "go clean -cache" if the directory is getting too large. +Run "go clean -fuzzcache" to delete the fuzz cache. +See golang.org to learn more about Go. +` + +// initDefaultCache does the work of finding the default cache +// the first time Default is called. +func initDefaultCache() { + dir := DefaultDir() + if dir == "off" { + if defaultDirErr != nil { + base.Fatalf("build cache is required, but could not be located: %v", defaultDirErr) + } + base.Fatalf("build cache is disabled by GOCACHE=off, but required as of Go 1.12") + } + if err := os.MkdirAll(dir, 0777); err != nil { + base.Fatalf("failed to initialize build cache at %s: %s\n", dir, err) + } + if _, err := os.Stat(filepath.Join(dir, "README")); err != nil { + // Best effort. + os.WriteFile(filepath.Join(dir, "README"), []byte(cacheREADME), 0666) + } + + diskCache, err := Open(dir) + if err != nil { + base.Fatalf("failed to initialize build cache at %s: %s\n", dir, err) + } + + if v := cfg.Getenv("GOCACHEPROG"); v != "" && goexperiment.CacheProg { + defaultCache = startCacheProg(v, diskCache) + } else { + defaultCache = diskCache + } +} + +var ( + defaultDirOnce sync.Once + defaultDir string + defaultDirErr error +) + +// DefaultDir returns the effective GOCACHE setting. +// It returns "off" if the cache is disabled. +func DefaultDir() string { + // Save the result of the first call to DefaultDir for later use in + // initDefaultCache. cmd/go/main.go explicitly sets GOCACHE so that + // subprocesses will inherit it, but that means initDefaultCache can't + // otherwise distinguish between an explicit "off" and a UserCacheDir error. + + defaultDirOnce.Do(func() { + defaultDir = cfg.Getenv("GOCACHE") + if filepath.IsAbs(defaultDir) || defaultDir == "off" { + return + } + if defaultDir != "" { + defaultDir = "off" + defaultDirErr = fmt.Errorf("GOCACHE is not an absolute path") + return + } + + // Compute default location. + dir, err := os.UserCacheDir() + if err != nil { + defaultDir = "off" + defaultDirErr = fmt.Errorf("GOCACHE is not defined and %v", err) + return + } + defaultDir = filepath.Join(dir, "go-build") + }) + + return defaultDir +} diff --git a/src/cmd/go/internal/cache/hash.go b/src/cmd/go/internal/cache/hash.go new file mode 100644 index 0000000..4f79c31 --- /dev/null +++ b/src/cmd/go/internal/cache/hash.go @@ -0,0 +1,190 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "bytes" + "crypto/sha256" + "fmt" + "hash" + "io" + "os" + "runtime" + "strings" + "sync" +) + +var debugHash = false // set when GODEBUG=gocachehash=1 + +// HashSize is the number of bytes in a hash. +const HashSize = 32 + +// A Hash provides access to the canonical hash function used to index the cache. +// The current implementation uses salted SHA256, but clients must not assume this. +type Hash struct { + h hash.Hash + name string // for debugging + buf *bytes.Buffer // for verify +} + +// hashSalt is a salt string added to the beginning of every hash +// created by NewHash. Using the Go version makes sure that different +// versions of the go command (or even different Git commits during +// work on the development branch) do not address the same cache +// entries, so that a bug in one version does not affect the execution +// of other versions. This salt will result in additional ActionID files +// in the cache, but not additional copies of the large output files, +// which are still addressed by unsalted SHA256. +// +// We strip any GOEXPERIMENTs the go tool was built with from this +// version string on the assumption that they shouldn't affect go tool +// execution. This allows bootstrapping to converge faster: dist builds +// go_bootstrap without any experiments, so by stripping experiments +// go_bootstrap and the final go binary will use the same salt. +var hashSalt = []byte(stripExperiment(runtime.Version())) + +// stripExperiment strips any GOEXPERIMENT configuration from the Go +// version string. +func stripExperiment(version string) string { + if i := strings.Index(version, " X:"); i >= 0 { + return version[:i] + } + return version +} + +// Subkey returns an action ID corresponding to mixing a parent +// action ID with a string description of the subkey. +func Subkey(parent ActionID, desc string) ActionID { + h := sha256.New() + h.Write([]byte("subkey:")) + h.Write(parent[:]) + h.Write([]byte(desc)) + var out ActionID + h.Sum(out[:0]) + if debugHash { + fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out) + } + if verify { + hashDebug.Lock() + hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc) + hashDebug.Unlock() + } + return out +} + +// NewHash returns a new Hash. +// The caller is expected to Write data to it and then call Sum. +func NewHash(name string) *Hash { + h := &Hash{h: sha256.New(), name: name} + if debugHash { + fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name) + } + h.Write(hashSalt) + if verify { + h.buf = new(bytes.Buffer) + } + return h +} + +// Write writes data to the running hash. +func (h *Hash) Write(b []byte) (int, error) { + if debugHash { + fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b) + } + if h.buf != nil { + h.buf.Write(b) + } + return h.h.Write(b) +} + +// Sum returns the hash of the data written previously. +func (h *Hash) Sum() [HashSize]byte { + var out [HashSize]byte + h.h.Sum(out[:0]) + if debugHash { + fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out) + } + if h.buf != nil { + hashDebug.Lock() + if hashDebug.m == nil { + hashDebug.m = make(map[[HashSize]byte]string) + } + hashDebug.m[out] = h.buf.String() + hashDebug.Unlock() + } + return out +} + +// In GODEBUG=gocacheverify=1 mode, +// hashDebug holds the input to every computed hash ID, +// so that we can work backward from the ID involved in a +// cache entry mismatch to a description of what should be there. +var hashDebug struct { + sync.Mutex + m map[[HashSize]byte]string +} + +// reverseHash returns the input used to compute the hash id. +func reverseHash(id [HashSize]byte) string { + hashDebug.Lock() + s := hashDebug.m[id] + hashDebug.Unlock() + return s +} + +var hashFileCache struct { + sync.Mutex + m map[string][HashSize]byte +} + +// FileHash returns the hash of the named file. +// It caches repeated lookups for a given file, +// and the cache entry for a file can be initialized +// using SetFileHash. +// The hash used by FileHash is not the same as +// the hash used by NewHash. +func FileHash(file string) ([HashSize]byte, error) { + hashFileCache.Lock() + out, ok := hashFileCache.m[file] + hashFileCache.Unlock() + + if ok { + return out, nil + } + + h := sha256.New() + f, err := os.Open(file) + if err != nil { + if debugHash { + fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err) + } + return [HashSize]byte{}, err + } + _, err = io.Copy(h, f) + f.Close() + if err != nil { + if debugHash { + fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err) + } + return [HashSize]byte{}, err + } + h.Sum(out[:0]) + if debugHash { + fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out) + } + + SetFileHash(file, out) + return out, nil +} + +// SetFileHash sets the hash returned by FileHash for file. +func SetFileHash(file string, sum [HashSize]byte) { + hashFileCache.Lock() + if hashFileCache.m == nil { + hashFileCache.m = make(map[string][HashSize]byte) + } + hashFileCache.m[file] = sum + hashFileCache.Unlock() +} diff --git a/src/cmd/go/internal/cache/hash_test.go b/src/cmd/go/internal/cache/hash_test.go new file mode 100644 index 0000000..a035677 --- /dev/null +++ b/src/cmd/go/internal/cache/hash_test.go @@ -0,0 +1,51 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "fmt" + "os" + "testing" +) + +func TestHash(t *testing.T) { + oldSalt := hashSalt + hashSalt = nil + defer func() { + hashSalt = oldSalt + }() + + h := NewHash("alice") + h.Write([]byte("hello world")) + sum := fmt.Sprintf("%x", h.Sum()) + want := "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" + if sum != want { + t.Errorf("hash(hello world) = %v, want %v", sum, want) + } +} + +func TestHashFile(t *testing.T) { + f, err := os.CreateTemp("", "cmd-go-test-") + if err != nil { + t.Fatal(err) + } + name := f.Name() + fmt.Fprintf(f, "hello world") + defer os.Remove(name) + if err := f.Close(); err != nil { + t.Fatal(err) + } + + var h ActionID // make sure hash result is assignable to ActionID + h, err = FileHash(name) + if err != nil { + t.Fatal(err) + } + sum := fmt.Sprintf("%x", h) + want := "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" + if sum != want { + t.Errorf("hash(hello world) = %v, want %v", sum, want) + } +} diff --git a/src/cmd/go/internal/cache/prog.go b/src/cmd/go/internal/cache/prog.go new file mode 100644 index 0000000..8d826f0 --- /dev/null +++ b/src/cmd/go/internal/cache/prog.go @@ -0,0 +1,427 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "bufio" + "cmd/go/internal/base" + "cmd/internal/quoted" + "context" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "os" + "os/exec" + "sync" + "sync/atomic" + "time" +) + +// ProgCache implements Cache via JSON messages over stdin/stdout to a child +// helper process which can then implement whatever caching policy/mechanism it +// wants. +// +// See https://github.com/golang/go/issues/59719 +type ProgCache struct { + cmd *exec.Cmd + stdout io.ReadCloser // from the child process + stdin io.WriteCloser // to the child process + bw *bufio.Writer // to stdin + jenc *json.Encoder // to bw + + // can are the commands that the child process declared that it supports. + // This is effectively the versioning mechanism. + can map[ProgCmd]bool + + // fuzzDirCache is another Cache implementation to use for the FuzzDir + // method. In practice this is the default GOCACHE disk-based + // implementation. + // + // TODO(bradfitz): maybe this isn't ideal. But we'd need to extend the Cache + // interface and the fuzzing callers to be less disk-y to do more here. + fuzzDirCache Cache + + closing atomic.Bool + ctx context.Context // valid until Close via ctxClose + ctxCancel context.CancelFunc // called on Close + readLoopDone chan struct{} // closed when readLoop returns + + mu sync.Mutex // guards following fields + nextID int64 + inFlight map[int64]chan<- *ProgResponse + outputFile map[OutputID]string // object => abs path on disk + + // writeMu serializes writing to the child process. + // It must never be held at the same time as mu. + writeMu sync.Mutex +} + +// ProgCmd is a command that can be issued to a child process. +// +// If the interface needs to grow, we can add new commands or new versioned +// commands like "get2". +type ProgCmd string + +const ( + cmdGet = ProgCmd("get") + cmdPut = ProgCmd("put") + cmdClose = ProgCmd("close") +) + +// ProgRequest is the JSON-encoded message that's sent from cmd/go to +// the GOCACHEPROG child process over stdin. Each JSON object is on its +// own line. A ProgRequest of Type "put" with BodySize > 0 will be followed +// by a line containing a base64-encoded JSON string literal of the body. +type ProgRequest struct { + // ID is a unique number per process across all requests. + // It must be echoed in the ProgResponse from the child. + ID int64 + + // Command is the type of request. + // The cmd/go tool will only send commands that were declared + // as supported by the child. + Command ProgCmd + + // ActionID is non-nil for get and puts. + ActionID []byte `json:",omitempty"` // or nil if not used + + // ObjectID is set for Type "put" and "output-file". + ObjectID []byte `json:",omitempty"` // or nil if not used + + // Body is the body for "put" requests. It's sent after the JSON object + // as a base64-encoded JSON string when BodySize is non-zero. + // It's sent as a separate JSON value instead of being a struct field + // send in this JSON object so large values can be streamed in both directions. + // The base64 string body of a ProgRequest will always be written + // immediately after the JSON object and a newline. + Body io.Reader `json:"-"` + + // BodySize is the number of bytes of Body. If zero, the body isn't written. + BodySize int64 `json:",omitempty"` +} + +// ProgResponse is the JSON response from the child process to cmd/go. +// +// With the exception of the first protocol message that the child writes to its +// stdout with ID==0 and KnownCommands populated, these are only sent in +// response to a ProgRequest from cmd/go. +// +// ProgResponses can be sent in any order. The ID must match the request they're +// replying to. +type ProgResponse struct { + ID int64 // that corresponds to ProgRequest; they can be answered out of order + Err string `json:",omitempty"` // if non-empty, the error + + // KnownCommands is included in the first message that cache helper program + // writes to stdout on startup (with ID==0). It includes the + // ProgRequest.Command types that are supported by the program. + // + // This lets us extend the protocol gracefully over time (adding "get2", + // etc), or fail gracefully when needed. It also lets us verify the program + // wants to be a cache helper. + KnownCommands []ProgCmd `json:",omitempty"` + + // For Get requests. + + Miss bool `json:",omitempty"` // cache miss + OutputID []byte `json:",omitempty"` + Size int64 `json:",omitempty"` // in bytes + Time *time.Time `json:",omitempty"` // an Entry.Time; when the object was added to the docs + + // DiskPath is the absolute path on disk of the ObjectID corresponding + // a "get" request's ActionID (on cache hit) or a "put" request's + // provided ObjectID. + DiskPath string `json:",omitempty"` +} + +// startCacheProg starts the prog binary (with optional space-separated flags) +// and returns a Cache implementation that talks to it. +// +// It blocks a few seconds to wait for the child process to successfully start +// and advertise its capabilities. +func startCacheProg(progAndArgs string, fuzzDirCache Cache) Cache { + if fuzzDirCache == nil { + panic("missing fuzzDirCache") + } + args, err := quoted.Split(progAndArgs) + if err != nil { + base.Fatalf("GOCACHEPROG args: %v", err) + } + var prog string + if len(args) > 0 { + prog = args[0] + args = args[1:] + } + + ctx, ctxCancel := context.WithCancel(context.Background()) + + cmd := exec.CommandContext(ctx, prog, args...) + out, err := cmd.StdoutPipe() + if err != nil { + base.Fatalf("StdoutPipe to GOCACHEPROG: %v", err) + } + in, err := cmd.StdinPipe() + if err != nil { + base.Fatalf("StdinPipe to GOCACHEPROG: %v", err) + } + cmd.Stderr = os.Stderr + cmd.Cancel = in.Close + + if err := cmd.Start(); err != nil { + base.Fatalf("error starting GOCACHEPROG program %q: %v", prog, err) + } + + pc := &ProgCache{ + ctx: ctx, + ctxCancel: ctxCancel, + fuzzDirCache: fuzzDirCache, + cmd: cmd, + stdout: out, + stdin: in, + bw: bufio.NewWriter(in), + inFlight: make(map[int64]chan<- *ProgResponse), + outputFile: make(map[OutputID]string), + readLoopDone: make(chan struct{}), + } + + // Register our interest in the initial protocol message from the child to + // us, saying what it can do. + capResc := make(chan *ProgResponse, 1) + pc.inFlight[0] = capResc + + pc.jenc = json.NewEncoder(pc.bw) + go pc.readLoop(pc.readLoopDone) + + // Give the child process a few seconds to report its capabilities. This + // should be instant and not require any slow work by the program. + timer := time.NewTicker(5 * time.Second) + defer timer.Stop() + for { + select { + case <-timer.C: + log.Printf("# still waiting for GOCACHEPROG %v ...", prog) + case capRes := <-capResc: + can := map[ProgCmd]bool{} + for _, cmd := range capRes.KnownCommands { + can[cmd] = true + } + if len(can) == 0 { + base.Fatalf("GOCACHEPROG %v declared no supported commands", prog) + } + pc.can = can + return pc + } + } +} + +func (c *ProgCache) readLoop(readLoopDone chan<- struct{}) { + defer close(readLoopDone) + jd := json.NewDecoder(c.stdout) + for { + res := new(ProgResponse) + if err := jd.Decode(res); err != nil { + if c.closing.Load() { + return // quietly + } + if err == io.EOF { + c.mu.Lock() + inFlight := len(c.inFlight) + c.mu.Unlock() + base.Fatalf("GOCACHEPROG exited pre-Close with %v pending requests", inFlight) + } + base.Fatalf("error reading JSON from GOCACHEPROG: %v", err) + } + c.mu.Lock() + ch, ok := c.inFlight[res.ID] + delete(c.inFlight, res.ID) + c.mu.Unlock() + if ok { + ch <- res + } else { + base.Fatalf("GOCACHEPROG sent response for unknown request ID %v", res.ID) + } + } +} + +func (c *ProgCache) send(ctx context.Context, req *ProgRequest) (*ProgResponse, error) { + resc := make(chan *ProgResponse, 1) + if err := c.writeToChild(req, resc); err != nil { + return nil, err + } + select { + case res := <-resc: + if res.Err != "" { + return nil, errors.New(res.Err) + } + return res, nil + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +func (c *ProgCache) writeToChild(req *ProgRequest, resc chan<- *ProgResponse) (err error) { + c.mu.Lock() + c.nextID++ + req.ID = c.nextID + c.inFlight[req.ID] = resc + c.mu.Unlock() + + defer func() { + if err != nil { + c.mu.Lock() + delete(c.inFlight, req.ID) + c.mu.Unlock() + } + }() + + c.writeMu.Lock() + defer c.writeMu.Unlock() + + if err := c.jenc.Encode(req); err != nil { + return err + } + if err := c.bw.WriteByte('\n'); err != nil { + return err + } + if req.Body != nil && req.BodySize > 0 { + if err := c.bw.WriteByte('"'); err != nil { + return err + } + e := base64.NewEncoder(base64.StdEncoding, c.bw) + wrote, err := io.Copy(e, req.Body) + if err != nil { + return err + } + if err := e.Close(); err != nil { + return nil + } + if wrote != req.BodySize { + return fmt.Errorf("short write writing body to GOCACHEPROG for action %x, object %x: wrote %v; expected %v", + req.ActionID, req.ObjectID, wrote, req.BodySize) + } + if _, err := c.bw.WriteString("\"\n"); err != nil { + return err + } + } + if err := c.bw.Flush(); err != nil { + return err + } + return nil +} + +func (c *ProgCache) Get(a ActionID) (Entry, error) { + if !c.can[cmdGet] { + // They can't do a "get". Maybe they're a write-only cache. + // + // TODO(bradfitz,bcmills): figure out the proper error type here. Maybe + // errors.ErrUnsupported? Is entryNotFoundError even appropriate? There + // might be places where we rely on the fact that a recent Put can be + // read through a corresponding Get. Audit callers and check, and document + // error types on the Cache interface. + return Entry{}, &entryNotFoundError{} + } + res, err := c.send(c.ctx, &ProgRequest{ + Command: cmdGet, + ActionID: a[:], + }) + if err != nil { + return Entry{}, err // TODO(bradfitz): or entryNotFoundError? Audit callers. + } + if res.Miss { + return Entry{}, &entryNotFoundError{} + } + e := Entry{ + Size: res.Size, + } + if res.Time != nil { + e.Time = *res.Time + } else { + e.Time = time.Now() + } + if res.DiskPath == "" { + return Entry{}, &entryNotFoundError{errors.New("GOCACHEPROG didn't populate DiskPath on get hit")} + } + if copy(e.OutputID[:], res.OutputID) != len(res.OutputID) { + return Entry{}, &entryNotFoundError{errors.New("incomplete ProgResponse OutputID")} + } + c.noteOutputFile(e.OutputID, res.DiskPath) + return e, nil +} + +func (c *ProgCache) noteOutputFile(o OutputID, diskPath string) { + c.mu.Lock() + defer c.mu.Unlock() + c.outputFile[o] = diskPath +} + +func (c *ProgCache) OutputFile(o OutputID) string { + c.mu.Lock() + defer c.mu.Unlock() + return c.outputFile[o] +} + +func (c *ProgCache) Put(a ActionID, file io.ReadSeeker) (_ OutputID, size int64, _ error) { + // Compute output ID. + h := sha256.New() + if _, err := file.Seek(0, 0); err != nil { + return OutputID{}, 0, err + } + size, err := io.Copy(h, file) + if err != nil { + return OutputID{}, 0, err + } + var out OutputID + h.Sum(out[:0]) + + if _, err := file.Seek(0, 0); err != nil { + return OutputID{}, 0, err + } + + if !c.can[cmdPut] { + // Child is a read-only cache. Do nothing. + return out, size, nil + } + + res, err := c.send(c.ctx, &ProgRequest{ + Command: cmdPut, + ActionID: a[:], + ObjectID: out[:], + Body: file, + BodySize: size, + }) + if err != nil { + return OutputID{}, 0, err + } + if res.DiskPath == "" { + return OutputID{}, 0, errors.New("GOCACHEPROG didn't return DiskPath in put response") + } + c.noteOutputFile(out, res.DiskPath) + return out, size, err +} + +func (c *ProgCache) Close() error { + c.closing.Store(true) + var err error + + // First write a "close" message to the child so it can exit nicely + // and clean up if it wants. Only after that exchange do we cancel + // the context that kills the process. + if c.can[cmdClose] { + _, err = c.send(c.ctx, &ProgRequest{Command: cmdClose}) + } + c.ctxCancel() + <-c.readLoopDone + return err +} + +func (c *ProgCache) FuzzDir() string { + // TODO(bradfitz): figure out what to do here. For now just use the + // disk-based default. + return c.fuzzDirCache.FuzzDir() +} |