summaryrefslogtreecommitdiffstats
path: root/src/cmd/go/internal/cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/go/internal/cache')
-rw-r--r--src/cmd/go/internal/cache/cache.go565
-rw-r--r--src/cmd/go/internal/cache/cache_test.go275
-rw-r--r--src/cmd/go/internal/cache/default.go98
-rw-r--r--src/cmd/go/internal/cache/hash.go190
-rw-r--r--src/cmd/go/internal/cache/hash_test.go51
5 files changed, 1179 insertions, 0 deletions
diff --git a/src/cmd/go/internal/cache/cache.go b/src/cmd/go/internal/cache/cache.go
new file mode 100644
index 0000000..c30d7c8
--- /dev/null
+++ b/src/cmd/go/internal/cache/cache.go
@@ -0,0 +1,565 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cache implements a build artifact cache.
+package cache
+
+import (
+ "bytes"
+ "crypto/sha256"
+ "encoding/hex"
+ "errors"
+ "fmt"
+ "io"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "cmd/go/internal/lockedfile"
+ "cmd/go/internal/mmap"
+)
+
+// An ActionID is a cache action key, the hash of a complete description of a
+// repeatable computation (command line, environment variables,
+// input file contents, executable contents).
+type ActionID [HashSize]byte
+
+// An OutputID is a cache output key, the hash of an output of a computation.
+type OutputID [HashSize]byte
+
+// A Cache is a package cache, backed by a file system directory tree.
+type Cache struct {
+ dir string
+ now func() time.Time
+}
+
+// Open opens and returns the cache in the given directory.
+//
+// It is safe for multiple processes on a single machine to use the
+// same cache directory in a local file system simultaneously.
+// They will coordinate using operating system file locks and may
+// duplicate effort but will not corrupt the cache.
+//
+// However, it is NOT safe for multiple processes on different machines
+// to share a cache directory (for example, if the directory were stored
+// in a network file system). File locking is notoriously unreliable in
+// network file systems and may not suffice to protect the cache.
+func Open(dir string) (*Cache, error) {
+ info, err := os.Stat(dir)
+ if err != nil {
+ return nil, err
+ }
+ if !info.IsDir() {
+ return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
+ }
+ for i := 0; i < 256; i++ {
+ name := filepath.Join(dir, fmt.Sprintf("%02x", i))
+ if err := os.MkdirAll(name, 0777); err != nil {
+ return nil, err
+ }
+ }
+ c := &Cache{
+ dir: dir,
+ now: time.Now,
+ }
+ return c, nil
+}
+
+// fileName returns the name of the file corresponding to the given id.
+func (c *Cache) fileName(id [HashSize]byte, key string) string {
+ return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
+}
+
+// An entryNotFoundError indicates that a cache entry was not found, with an
+// optional underlying reason.
+type entryNotFoundError struct {
+ Err error
+}
+
+func (e *entryNotFoundError) Error() string {
+ if e.Err == nil {
+ return "cache entry not found"
+ }
+ return fmt.Sprintf("cache entry not found: %v", e.Err)
+}
+
+func (e *entryNotFoundError) Unwrap() error {
+ return e.Err
+}
+
+const (
+ // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
+ hexSize = HashSize * 2
+ entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
+)
+
+// verify controls whether to run the cache in verify mode.
+// In verify mode, the cache always returns errMissing from Get
+// but then double-checks in Put that the data being written
+// exactly matches any existing entry. This provides an easy
+// way to detect program behavior that would have been different
+// had the cache entry been returned from Get.
+//
+// verify is enabled by setting the environment variable
+// GODEBUG=gocacheverify=1.
+var verify = false
+
+var errVerifyMode = errors.New("gocacheverify=1")
+
+// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
+var DebugTest = false
+
+func init() { initEnv() }
+
+func initEnv() {
+ verify = false
+ debugHash = false
+ debug := strings.Split(os.Getenv("GODEBUG"), ",")
+ for _, f := range debug {
+ if f == "gocacheverify=1" {
+ verify = true
+ }
+ if f == "gocachehash=1" {
+ debugHash = true
+ }
+ if f == "gocachetest=1" {
+ DebugTest = true
+ }
+ }
+}
+
+// Get looks up the action ID in the cache,
+// returning the corresponding output ID and file size, if any.
+// Note that finding an output ID does not guarantee that the
+// saved file for that output ID is still available.
+func (c *Cache) Get(id ActionID) (Entry, error) {
+ if verify {
+ return Entry{}, &entryNotFoundError{Err: errVerifyMode}
+ }
+ return c.get(id)
+}
+
+type Entry struct {
+ OutputID OutputID
+ Size int64
+ Time time.Time
+}
+
+// get is Get but does not respect verify mode, so that Put can use it.
+func (c *Cache) get(id ActionID) (Entry, error) {
+ missing := func(reason error) (Entry, error) {
+ return Entry{}, &entryNotFoundError{Err: reason}
+ }
+ f, err := os.Open(c.fileName(id, "a"))
+ if err != nil {
+ return missing(err)
+ }
+ defer f.Close()
+ entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
+ if n, err := io.ReadFull(f, entry); n > entrySize {
+ return missing(errors.New("too long"))
+ } else if err != io.ErrUnexpectedEOF {
+ if err == io.EOF {
+ return missing(errors.New("file is empty"))
+ }
+ return missing(err)
+ } else if n < entrySize {
+ return missing(errors.New("entry file incomplete"))
+ }
+ if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
+ return missing(errors.New("invalid header"))
+ }
+ eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
+ eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
+ esize, entry := entry[1:1+20], entry[1+20:]
+ etime, entry := entry[1:1+20], entry[1+20:]
+ var buf [HashSize]byte
+ if _, err := hex.Decode(buf[:], eid); err != nil {
+ return missing(fmt.Errorf("decoding ID: %v", err))
+ } else if buf != id {
+ return missing(errors.New("mismatched ID"))
+ }
+ if _, err := hex.Decode(buf[:], eout); err != nil {
+ return missing(fmt.Errorf("decoding output ID: %v", err))
+ }
+ i := 0
+ for i < len(esize) && esize[i] == ' ' {
+ i++
+ }
+ size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
+ if err != nil {
+ return missing(fmt.Errorf("parsing size: %v", err))
+ } else if size < 0 {
+ return missing(errors.New("negative size"))
+ }
+ i = 0
+ for i < len(etime) && etime[i] == ' ' {
+ i++
+ }
+ tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
+ if err != nil {
+ return missing(fmt.Errorf("parsing timestamp: %v", err))
+ } else if tm < 0 {
+ return missing(errors.New("negative timestamp"))
+ }
+
+ c.used(c.fileName(id, "a"))
+
+ return Entry{buf, size, time.Unix(0, tm)}, nil
+}
+
+// GetFile looks up the action ID in the cache and returns
+// the name of the corresponding data file.
+func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
+ entry, err = c.Get(id)
+ if err != nil {
+ return "", Entry{}, err
+ }
+ file = c.OutputFile(entry.OutputID)
+ info, err := os.Stat(file)
+ if err != nil {
+ return "", Entry{}, &entryNotFoundError{Err: err}
+ }
+ if info.Size() != entry.Size {
+ return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
+ }
+ return file, entry, nil
+}
+
+// GetBytes looks up the action ID in the cache and returns
+// the corresponding output bytes.
+// GetBytes should only be used for data that can be expected to fit in memory.
+func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
+ entry, err := c.Get(id)
+ if err != nil {
+ return nil, entry, err
+ }
+ data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
+ if sha256.Sum256(data) != entry.OutputID {
+ return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
+ }
+ return data, entry, nil
+}
+
+// GetMmap looks up the action ID in the cache and returns
+// the corresponding output bytes.
+// GetMmap should only be used for data that can be expected to fit in memory.
+func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
+ entry, err := c.Get(id)
+ if err != nil {
+ return nil, entry, err
+ }
+ md, err := mmap.Mmap(c.OutputFile(entry.OutputID))
+ if err != nil {
+ return nil, Entry{}, err
+ }
+ if int64(len(md.Data)) != entry.Size {
+ return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
+ }
+ return md.Data, entry, nil
+}
+
+// OutputFile returns the name of the cache file storing output with the given OutputID.
+func (c *Cache) OutputFile(out OutputID) string {
+ file := c.fileName(out, "d")
+ c.used(file)
+ return file
+}
+
+// Time constants for cache expiration.
+//
+// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
+// to avoid causing many unnecessary inode updates. The mtimes therefore
+// roughly reflect "time of last use" but may in fact be older by at most an hour.
+//
+// We scan the cache for entries to delete at most once per trimInterval (1 day).
+//
+// When we do scan the cache, we delete entries that have not been used for
+// at least trimLimit (5 days). Statistics gathered from a month of usage by
+// Go developers found that essentially all reuse of cached entries happened
+// within 5 days of the previous reuse. See golang.org/issue/22990.
+const (
+ mtimeInterval = 1 * time.Hour
+ trimInterval = 24 * time.Hour
+ trimLimit = 5 * 24 * time.Hour
+)
+
+// used makes a best-effort attempt to update mtime on file,
+// so that mtime reflects cache access time.
+//
+// Because the reflection only needs to be approximate,
+// and to reduce the amount of disk activity caused by using
+// cache entries, used only updates the mtime if the current
+// mtime is more than an hour old. This heuristic eliminates
+// nearly all of the mtime updates that would otherwise happen,
+// while still keeping the mtimes useful for cache trimming.
+func (c *Cache) used(file string) {
+ info, err := os.Stat(file)
+ if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
+ return
+ }
+ os.Chtimes(file, c.now(), c.now())
+}
+
+// Trim removes old cache entries that are likely not to be reused.
+func (c *Cache) Trim() {
+ now := c.now()
+
+ // We maintain in dir/trim.txt the time of the last completed cache trim.
+ // If the cache has been trimmed recently enough, do nothing.
+ // This is the common case.
+ // If the trim file is corrupt, detected if the file can't be parsed, or the
+ // trim time is too far in the future, attempt the trim anyway. It's possible that
+ // the cache was full when the corruption happened. Attempting a trim on
+ // an empty cache is cheap, so there wouldn't be a big performance hit in that case.
+ if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil {
+ if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil {
+ lastTrim := time.Unix(t, 0)
+ if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval {
+ return
+ }
+ }
+ }
+
+ // Trim each of the 256 subdirectories.
+ // We subtract an additional mtimeInterval
+ // to account for the imprecision of our "last used" mtimes.
+ cutoff := now.Add(-trimLimit - mtimeInterval)
+ for i := 0; i < 256; i++ {
+ subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
+ c.trimSubdir(subdir, cutoff)
+ }
+
+ // Ignore errors from here: if we don't write the complete timestamp, the
+ // cache will appear older than it is, and we'll trim it again next time.
+ var b bytes.Buffer
+ fmt.Fprintf(&b, "%d", now.Unix())
+ if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil {
+ return
+ }
+}
+
+// trimSubdir trims a single cache subdirectory.
+func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
+ // Read all directory entries from subdir before removing
+ // any files, in case removing files invalidates the file offset
+ // in the directory scan. Also, ignore error from f.Readdirnames,
+ // because we don't care about reporting the error and we still
+ // want to process any entries found before the error.
+ f, err := os.Open(subdir)
+ if err != nil {
+ return
+ }
+ names, _ := f.Readdirnames(-1)
+ f.Close()
+
+ for _, name := range names {
+ // Remove only cache entries (xxxx-a and xxxx-d).
+ if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
+ continue
+ }
+ entry := filepath.Join(subdir, name)
+ info, err := os.Stat(entry)
+ if err == nil && info.ModTime().Before(cutoff) {
+ os.Remove(entry)
+ }
+ }
+}
+
+// putIndexEntry adds an entry to the cache recording that executing the action
+// with the given id produces an output with the given output id (hash) and size.
+func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
+ // Note: We expect that for one reason or another it may happen
+ // that repeating an action produces a different output hash
+ // (for example, if the output contains a time stamp or temp dir name).
+ // While not ideal, this is also not a correctness problem, so we
+ // don't make a big deal about it. In particular, we leave the action
+ // cache entries writable specifically so that they can be overwritten.
+ //
+ // Setting GODEBUG=gocacheverify=1 does make a big deal:
+ // in verify mode we are double-checking that the cache entries
+ // are entirely reproducible. As just noted, this may be unrealistic
+ // in some cases but the check is also useful for shaking out real bugs.
+ entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
+ if verify && allowVerify {
+ old, err := c.get(id)
+ if err == nil && (old.OutputID != out || old.Size != size) {
+ // panic to show stack trace, so we can see what code is generating this cache entry.
+ msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
+ panic(msg)
+ }
+ }
+ file := c.fileName(id, "a")
+
+ // Copy file to cache directory.
+ mode := os.O_WRONLY | os.O_CREATE
+ f, err := os.OpenFile(file, mode, 0666)
+ if err != nil {
+ return err
+ }
+ _, err = f.WriteString(entry)
+ if err == nil {
+ // Truncate the file only *after* writing it.
+ // (This should be a no-op, but truncate just in case of previous corruption.)
+ //
+ // This differs from os.WriteFile, which truncates to 0 *before* writing
+ // via os.O_TRUNC. Truncating only after writing ensures that a second write
+ // of the same content to the same file is idempotent, and does not — even
+ // temporarily! — undo the effect of the first write.
+ err = f.Truncate(int64(len(entry)))
+ }
+ if closeErr := f.Close(); err == nil {
+ err = closeErr
+ }
+ if err != nil {
+ // TODO(bcmills): This Remove potentially races with another go command writing to file.
+ // Can we eliminate it?
+ os.Remove(file)
+ return err
+ }
+ os.Chtimes(file, c.now(), c.now()) // mainly for tests
+
+ return nil
+}
+
+// Put stores the given output in the cache as the output for the action ID.
+// It may read file twice. The content of file must not change between the two passes.
+func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
+ return c.put(id, file, true)
+}
+
+// PutNoVerify is like Put but disables the verify check
+// when GODEBUG=goverifycache=1 is set.
+// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
+// like test output containing times and the like.
+func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
+ return c.put(id, file, false)
+}
+
+func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
+ // Compute output ID.
+ h := sha256.New()
+ if _, err := file.Seek(0, 0); err != nil {
+ return OutputID{}, 0, err
+ }
+ size, err := io.Copy(h, file)
+ if err != nil {
+ return OutputID{}, 0, err
+ }
+ var out OutputID
+ h.Sum(out[:0])
+
+ // Copy to cached output file (if not already present).
+ if err := c.copyFile(file, out, size); err != nil {
+ return out, size, err
+ }
+
+ // Add to cache index.
+ return out, size, c.putIndexEntry(id, out, size, allowVerify)
+}
+
+// PutBytes stores the given bytes in the cache as the output for the action ID.
+func (c *Cache) PutBytes(id ActionID, data []byte) error {
+ _, _, err := c.Put(id, bytes.NewReader(data))
+ return err
+}
+
+// copyFile copies file into the cache, expecting it to have the given
+// output ID and size, if that file is not present already.
+func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
+ name := c.fileName(out, "d")
+ info, err := os.Stat(name)
+ if err == nil && info.Size() == size {
+ // Check hash.
+ if f, err := os.Open(name); err == nil {
+ h := sha256.New()
+ io.Copy(h, f)
+ f.Close()
+ var out2 OutputID
+ h.Sum(out2[:0])
+ if out == out2 {
+ return nil
+ }
+ }
+ // Hash did not match. Fall through and rewrite file.
+ }
+
+ // Copy file to cache directory.
+ mode := os.O_RDWR | os.O_CREATE
+ if err == nil && info.Size() > size { // shouldn't happen but fix in case
+ mode |= os.O_TRUNC
+ }
+ f, err := os.OpenFile(name, mode, 0666)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if size == 0 {
+ // File now exists with correct size.
+ // Only one possible zero-length file, so contents are OK too.
+ // Early return here makes sure there's a "last byte" for code below.
+ return nil
+ }
+
+ // From here on, if any of the I/O writing the file fails,
+ // we make a best-effort attempt to truncate the file f
+ // before returning, to avoid leaving bad bytes in the file.
+
+ // Copy file to f, but also into h to double-check hash.
+ if _, err := file.Seek(0, 0); err != nil {
+ f.Truncate(0)
+ return err
+ }
+ h := sha256.New()
+ w := io.MultiWriter(f, h)
+ if _, err := io.CopyN(w, file, size-1); err != nil {
+ f.Truncate(0)
+ return err
+ }
+ // Check last byte before writing it; writing it will make the size match
+ // what other processes expect to find and might cause them to start
+ // using the file.
+ buf := make([]byte, 1)
+ if _, err := file.Read(buf); err != nil {
+ f.Truncate(0)
+ return err
+ }
+ h.Write(buf)
+ sum := h.Sum(nil)
+ if !bytes.Equal(sum, out[:]) {
+ f.Truncate(0)
+ return fmt.Errorf("file content changed underfoot")
+ }
+
+ // Commit cache file entry.
+ if _, err := f.Write(buf); err != nil {
+ f.Truncate(0)
+ return err
+ }
+ if err := f.Close(); err != nil {
+ // Data might not have been written,
+ // but file may look like it is the right size.
+ // To be extra careful, remove cached file.
+ os.Remove(name)
+ return err
+ }
+ os.Chtimes(name, c.now(), c.now()) // mainly for tests
+
+ return nil
+}
+
+// FuzzDir returns a subdirectory within the cache for storing fuzzing data.
+// The subdirectory may not exist.
+//
+// This directory is managed by the internal/fuzz package. Files in this
+// directory aren't removed by the 'go clean -cache' command or by Trim.
+// They may be removed with 'go clean -fuzzcache'.
+//
+// TODO(#48526): make Trim remove unused files from this directory.
+func (c *Cache) FuzzDir() string {
+ return filepath.Join(c.dir, "fuzz")
+}
diff --git a/src/cmd/go/internal/cache/cache_test.go b/src/cmd/go/internal/cache/cache_test.go
new file mode 100644
index 0000000..5527d44
--- /dev/null
+++ b/src/cmd/go/internal/cache/cache_test.go
@@ -0,0 +1,275 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cache
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "internal/testenv"
+ "os"
+ "path/filepath"
+ "runtime"
+ "testing"
+ "time"
+)
+
+func init() {
+ verify = false // even if GODEBUG is set
+}
+
+func TestBasic(t *testing.T) {
+ dir, err := os.MkdirTemp("", "cachetest-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+ _, err = Open(filepath.Join(dir, "notexist"))
+ if err == nil {
+ t.Fatal(`Open("tmp/notexist") succeeded, want failure`)
+ }
+
+ cdir := filepath.Join(dir, "c1")
+ if err := os.Mkdir(cdir, 0777); err != nil {
+ t.Fatal(err)
+ }
+
+ c1, err := Open(cdir)
+ if err != nil {
+ t.Fatalf("Open(c1) (create): %v", err)
+ }
+ if err := c1.putIndexEntry(dummyID(1), dummyID(12), 13, true); err != nil {
+ t.Fatalf("addIndexEntry: %v", err)
+ }
+ if err := c1.putIndexEntry(dummyID(1), dummyID(2), 3, true); err != nil { // overwrite entry
+ t.Fatalf("addIndexEntry: %v", err)
+ }
+ if entry, err := c1.Get(dummyID(1)); err != nil || entry.OutputID != dummyID(2) || entry.Size != 3 {
+ t.Fatalf("c1.Get(1) = %x, %v, %v, want %x, %v, nil", entry.OutputID, entry.Size, err, dummyID(2), 3)
+ }
+
+ c2, err := Open(cdir)
+ if err != nil {
+ t.Fatalf("Open(c2) (reuse): %v", err)
+ }
+ if entry, err := c2.Get(dummyID(1)); err != nil || entry.OutputID != dummyID(2) || entry.Size != 3 {
+ t.Fatalf("c2.Get(1) = %x, %v, %v, want %x, %v, nil", entry.OutputID, entry.Size, err, dummyID(2), 3)
+ }
+ if err := c2.putIndexEntry(dummyID(2), dummyID(3), 4, true); err != nil {
+ t.Fatalf("addIndexEntry: %v", err)
+ }
+ if entry, err := c1.Get(dummyID(2)); err != nil || entry.OutputID != dummyID(3) || entry.Size != 4 {
+ t.Fatalf("c1.Get(2) = %x, %v, %v, want %x, %v, nil", entry.OutputID, entry.Size, err, dummyID(3), 4)
+ }
+}
+
+func TestGrowth(t *testing.T) {
+ dir, err := os.MkdirTemp("", "cachetest-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+
+ c, err := Open(dir)
+ if err != nil {
+ t.Fatalf("Open: %v", err)
+ }
+
+ n := 10000
+ if testing.Short() {
+ n = 10
+ }
+
+ for i := 0; i < n; i++ {
+ if err := c.putIndexEntry(dummyID(i), dummyID(i*99), int64(i)*101, true); err != nil {
+ t.Fatalf("addIndexEntry: %v", err)
+ }
+ id := ActionID(dummyID(i))
+ entry, err := c.Get(id)
+ if err != nil {
+ t.Fatalf("Get(%x): %v", id, err)
+ }
+ if entry.OutputID != dummyID(i*99) || entry.Size != int64(i)*101 {
+ t.Errorf("Get(%x) = %x, %d, want %x, %d", id, entry.OutputID, entry.Size, dummyID(i*99), int64(i)*101)
+ }
+ }
+ for i := 0; i < n; i++ {
+ id := ActionID(dummyID(i))
+ entry, err := c.Get(id)
+ if err != nil {
+ t.Fatalf("Get2(%x): %v", id, err)
+ }
+ if entry.OutputID != dummyID(i*99) || entry.Size != int64(i)*101 {
+ t.Errorf("Get2(%x) = %x, %d, want %x, %d", id, entry.OutputID, entry.Size, dummyID(i*99), int64(i)*101)
+ }
+ }
+}
+
+func TestVerifyPanic(t *testing.T) {
+ os.Setenv("GODEBUG", "gocacheverify=1")
+ initEnv()
+ defer func() {
+ os.Unsetenv("GODEBUG")
+ verify = false
+ }()
+
+ if !verify {
+ t.Fatal("initEnv did not set verify")
+ }
+
+ dir, err := os.MkdirTemp("", "cachetest-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+
+ c, err := Open(dir)
+ if err != nil {
+ t.Fatalf("Open: %v", err)
+ }
+
+ id := ActionID(dummyID(1))
+ if err := c.PutBytes(id, []byte("abc")); err != nil {
+ t.Fatal(err)
+ }
+
+ defer func() {
+ if err := recover(); err != nil {
+ t.Log(err)
+ return
+ }
+ }()
+ c.PutBytes(id, []byte("def"))
+ t.Fatal("mismatched Put did not panic in verify mode")
+}
+
+func dummyID(x int) [HashSize]byte {
+ var out [HashSize]byte
+ binary.LittleEndian.PutUint64(out[:], uint64(x))
+ return out
+}
+
+func TestCacheTrim(t *testing.T) {
+ if runtime.GOOS == "js" {
+ testenv.SkipFlaky(t, 35220)
+ }
+
+ dir, err := os.MkdirTemp("", "cachetest-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+
+ c, err := Open(dir)
+ if err != nil {
+ t.Fatalf("Open: %v", err)
+ }
+ const start = 1000000000
+ now := int64(start)
+ c.now = func() time.Time { return time.Unix(now, 0) }
+
+ checkTime := func(name string, mtime int64) {
+ t.Helper()
+ file := filepath.Join(c.dir, name[:2], name)
+ info, err := os.Stat(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if info.ModTime().Unix() != mtime {
+ t.Fatalf("%s mtime = %d, want %d", name, info.ModTime().Unix(), mtime)
+ }
+ }
+
+ id := ActionID(dummyID(1))
+ c.PutBytes(id, []byte("abc"))
+ entry, _ := c.Get(id)
+ c.PutBytes(ActionID(dummyID(2)), []byte("def"))
+ mtime := now
+ checkTime(fmt.Sprintf("%x-a", id), mtime)
+ checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime)
+
+ // Get should not change recent mtimes.
+ now = start + 10
+ c.Get(id)
+ checkTime(fmt.Sprintf("%x-a", id), mtime)
+ checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime)
+
+ // Get should change distant mtimes.
+ now = start + 5000
+ mtime2 := now
+ if _, err := c.Get(id); err != nil {
+ t.Fatal(err)
+ }
+ c.OutputFile(entry.OutputID)
+ checkTime(fmt.Sprintf("%x-a", id), mtime2)
+ checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime2)
+
+ // Trim should leave everything alone: it's all too new.
+ c.Trim()
+ if _, err := c.Get(id); err != nil {
+ t.Fatal(err)
+ }
+ c.OutputFile(entry.OutputID)
+ data, err := os.ReadFile(filepath.Join(dir, "trim.txt"))
+ if err != nil {
+ t.Fatal(err)
+ }
+ checkTime(fmt.Sprintf("%x-a", dummyID(2)), start)
+
+ // Trim less than a day later should not do any work at all.
+ now = start + 80000
+ c.Trim()
+ if _, err := c.Get(id); err != nil {
+ t.Fatal(err)
+ }
+ c.OutputFile(entry.OutputID)
+ data2, err := os.ReadFile(filepath.Join(dir, "trim.txt"))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !bytes.Equal(data, data2) {
+ t.Fatalf("second trim did work: %q -> %q", data, data2)
+ }
+
+ // Fast forward and do another trim just before the 5 day cutoff.
+ // Note that because of usedQuantum the cutoff is actually 5 days + 1 hour.
+ // We used c.Get(id) just now, so 5 days later it should still be kept.
+ // On the other hand almost a full day has gone by since we wrote dummyID(2)
+ // and we haven't looked at it since, so 5 days later it should be gone.
+ now += 5 * 86400
+ checkTime(fmt.Sprintf("%x-a", dummyID(2)), start)
+ c.Trim()
+ if _, err := c.Get(id); err != nil {
+ t.Fatal(err)
+ }
+ c.OutputFile(entry.OutputID)
+ mtime3 := now
+ if _, err := c.Get(dummyID(2)); err == nil { // haven't done a Get for this since original write above
+ t.Fatalf("Trim did not remove dummyID(2)")
+ }
+
+ // The c.Get(id) refreshed id's mtime again.
+ // Check that another 5 days later it is still not gone,
+ // but check by using checkTime, which doesn't bring mtime forward.
+ now += 5 * 86400
+ c.Trim()
+ checkTime(fmt.Sprintf("%x-a", id), mtime3)
+ checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime3)
+
+ // Half a day later Trim should still be a no-op, because there was a Trim recently.
+ // Even though the entry for id is now old enough to be trimmed,
+ // it gets a reprieve until the time comes for a new Trim scan.
+ now += 86400 / 2
+ c.Trim()
+ checkTime(fmt.Sprintf("%x-a", id), mtime3)
+ checkTime(fmt.Sprintf("%x-d", entry.OutputID), mtime3)
+
+ // Another half a day later, Trim should actually run, and it should remove id.
+ now += 86400/2 + 1
+ c.Trim()
+ if _, err := c.Get(dummyID(1)); err == nil {
+ t.Fatal("Trim did not remove dummyID(1)")
+ }
+}
diff --git a/src/cmd/go/internal/cache/default.go b/src/cmd/go/internal/cache/default.go
new file mode 100644
index 0000000..426dddf
--- /dev/null
+++ b/src/cmd/go/internal/cache/default.go
@@ -0,0 +1,98 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cache
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "cmd/go/internal/base"
+ "cmd/go/internal/cfg"
+)
+
+// Default returns the default cache to use, or nil if no cache should be used.
+func Default() *Cache {
+ defaultOnce.Do(initDefaultCache)
+ return defaultCache
+}
+
+var (
+ defaultOnce sync.Once
+ defaultCache *Cache
+)
+
+// cacheREADME is a message stored in a README in the cache directory.
+// Because the cache lives outside the normal Go trees, we leave the
+// README as a courtesy to explain where it came from.
+const cacheREADME = `This directory holds cached build artifacts from the Go build system.
+Run "go clean -cache" if the directory is getting too large.
+Run "go clean -fuzzcache" to delete the fuzz cache.
+See golang.org to learn more about Go.
+`
+
+// initDefaultCache does the work of finding the default cache
+// the first time Default is called.
+func initDefaultCache() {
+ dir := DefaultDir()
+ if dir == "off" {
+ if defaultDirErr != nil {
+ base.Fatalf("build cache is required, but could not be located: %v", defaultDirErr)
+ }
+ base.Fatalf("build cache is disabled by GOCACHE=off, but required as of Go 1.12")
+ }
+ if err := os.MkdirAll(dir, 0777); err != nil {
+ base.Fatalf("failed to initialize build cache at %s: %s\n", dir, err)
+ }
+ if _, err := os.Stat(filepath.Join(dir, "README")); err != nil {
+ // Best effort.
+ os.WriteFile(filepath.Join(dir, "README"), []byte(cacheREADME), 0666)
+ }
+
+ c, err := Open(dir)
+ if err != nil {
+ base.Fatalf("failed to initialize build cache at %s: %s\n", dir, err)
+ }
+ defaultCache = c
+}
+
+var (
+ defaultDirOnce sync.Once
+ defaultDir string
+ defaultDirErr error
+)
+
+// DefaultDir returns the effective GOCACHE setting.
+// It returns "off" if the cache is disabled.
+func DefaultDir() string {
+ // Save the result of the first call to DefaultDir for later use in
+ // initDefaultCache. cmd/go/main.go explicitly sets GOCACHE so that
+ // subprocesses will inherit it, but that means initDefaultCache can't
+ // otherwise distinguish between an explicit "off" and a UserCacheDir error.
+
+ defaultDirOnce.Do(func() {
+ defaultDir = cfg.Getenv("GOCACHE")
+ if filepath.IsAbs(defaultDir) || defaultDir == "off" {
+ return
+ }
+ if defaultDir != "" {
+ defaultDir = "off"
+ defaultDirErr = fmt.Errorf("GOCACHE is not an absolute path")
+ return
+ }
+
+ // Compute default location.
+ dir, err := os.UserCacheDir()
+ if err != nil {
+ defaultDir = "off"
+ defaultDirErr = fmt.Errorf("GOCACHE is not defined and %v", err)
+ return
+ }
+ defaultDir = filepath.Join(dir, "go-build")
+ })
+
+ return defaultDir
+}
diff --git a/src/cmd/go/internal/cache/hash.go b/src/cmd/go/internal/cache/hash.go
new file mode 100644
index 0000000..4f79c31
--- /dev/null
+++ b/src/cmd/go/internal/cache/hash.go
@@ -0,0 +1,190 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cache
+
+import (
+ "bytes"
+ "crypto/sha256"
+ "fmt"
+ "hash"
+ "io"
+ "os"
+ "runtime"
+ "strings"
+ "sync"
+)
+
+var debugHash = false // set when GODEBUG=gocachehash=1
+
+// HashSize is the number of bytes in a hash.
+const HashSize = 32
+
+// A Hash provides access to the canonical hash function used to index the cache.
+// The current implementation uses salted SHA256, but clients must not assume this.
+type Hash struct {
+ h hash.Hash
+ name string // for debugging
+ buf *bytes.Buffer // for verify
+}
+
+// hashSalt is a salt string added to the beginning of every hash
+// created by NewHash. Using the Go version makes sure that different
+// versions of the go command (or even different Git commits during
+// work on the development branch) do not address the same cache
+// entries, so that a bug in one version does not affect the execution
+// of other versions. This salt will result in additional ActionID files
+// in the cache, but not additional copies of the large output files,
+// which are still addressed by unsalted SHA256.
+//
+// We strip any GOEXPERIMENTs the go tool was built with from this
+// version string on the assumption that they shouldn't affect go tool
+// execution. This allows bootstrapping to converge faster: dist builds
+// go_bootstrap without any experiments, so by stripping experiments
+// go_bootstrap and the final go binary will use the same salt.
+var hashSalt = []byte(stripExperiment(runtime.Version()))
+
+// stripExperiment strips any GOEXPERIMENT configuration from the Go
+// version string.
+func stripExperiment(version string) string {
+ if i := strings.Index(version, " X:"); i >= 0 {
+ return version[:i]
+ }
+ return version
+}
+
+// Subkey returns an action ID corresponding to mixing a parent
+// action ID with a string description of the subkey.
+func Subkey(parent ActionID, desc string) ActionID {
+ h := sha256.New()
+ h.Write([]byte("subkey:"))
+ h.Write(parent[:])
+ h.Write([]byte(desc))
+ var out ActionID
+ h.Sum(out[:0])
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out)
+ }
+ if verify {
+ hashDebug.Lock()
+ hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc)
+ hashDebug.Unlock()
+ }
+ return out
+}
+
+// NewHash returns a new Hash.
+// The caller is expected to Write data to it and then call Sum.
+func NewHash(name string) *Hash {
+ h := &Hash{h: sha256.New(), name: name}
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name)
+ }
+ h.Write(hashSalt)
+ if verify {
+ h.buf = new(bytes.Buffer)
+ }
+ return h
+}
+
+// Write writes data to the running hash.
+func (h *Hash) Write(b []byte) (int, error) {
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b)
+ }
+ if h.buf != nil {
+ h.buf.Write(b)
+ }
+ return h.h.Write(b)
+}
+
+// Sum returns the hash of the data written previously.
+func (h *Hash) Sum() [HashSize]byte {
+ var out [HashSize]byte
+ h.h.Sum(out[:0])
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out)
+ }
+ if h.buf != nil {
+ hashDebug.Lock()
+ if hashDebug.m == nil {
+ hashDebug.m = make(map[[HashSize]byte]string)
+ }
+ hashDebug.m[out] = h.buf.String()
+ hashDebug.Unlock()
+ }
+ return out
+}
+
+// In GODEBUG=gocacheverify=1 mode,
+// hashDebug holds the input to every computed hash ID,
+// so that we can work backward from the ID involved in a
+// cache entry mismatch to a description of what should be there.
+var hashDebug struct {
+ sync.Mutex
+ m map[[HashSize]byte]string
+}
+
+// reverseHash returns the input used to compute the hash id.
+func reverseHash(id [HashSize]byte) string {
+ hashDebug.Lock()
+ s := hashDebug.m[id]
+ hashDebug.Unlock()
+ return s
+}
+
+var hashFileCache struct {
+ sync.Mutex
+ m map[string][HashSize]byte
+}
+
+// FileHash returns the hash of the named file.
+// It caches repeated lookups for a given file,
+// and the cache entry for a file can be initialized
+// using SetFileHash.
+// The hash used by FileHash is not the same as
+// the hash used by NewHash.
+func FileHash(file string) ([HashSize]byte, error) {
+ hashFileCache.Lock()
+ out, ok := hashFileCache.m[file]
+ hashFileCache.Unlock()
+
+ if ok {
+ return out, nil
+ }
+
+ h := sha256.New()
+ f, err := os.Open(file)
+ if err != nil {
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
+ }
+ return [HashSize]byte{}, err
+ }
+ _, err = io.Copy(h, f)
+ f.Close()
+ if err != nil {
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
+ }
+ return [HashSize]byte{}, err
+ }
+ h.Sum(out[:0])
+ if debugHash {
+ fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out)
+ }
+
+ SetFileHash(file, out)
+ return out, nil
+}
+
+// SetFileHash sets the hash returned by FileHash for file.
+func SetFileHash(file string, sum [HashSize]byte) {
+ hashFileCache.Lock()
+ if hashFileCache.m == nil {
+ hashFileCache.m = make(map[string][HashSize]byte)
+ }
+ hashFileCache.m[file] = sum
+ hashFileCache.Unlock()
+}
diff --git a/src/cmd/go/internal/cache/hash_test.go b/src/cmd/go/internal/cache/hash_test.go
new file mode 100644
index 0000000..a035677
--- /dev/null
+++ b/src/cmd/go/internal/cache/hash_test.go
@@ -0,0 +1,51 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cache
+
+import (
+ "fmt"
+ "os"
+ "testing"
+)
+
+func TestHash(t *testing.T) {
+ oldSalt := hashSalt
+ hashSalt = nil
+ defer func() {
+ hashSalt = oldSalt
+ }()
+
+ h := NewHash("alice")
+ h.Write([]byte("hello world"))
+ sum := fmt.Sprintf("%x", h.Sum())
+ want := "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
+ if sum != want {
+ t.Errorf("hash(hello world) = %v, want %v", sum, want)
+ }
+}
+
+func TestHashFile(t *testing.T) {
+ f, err := os.CreateTemp("", "cmd-go-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ name := f.Name()
+ fmt.Fprintf(f, "hello world")
+ defer os.Remove(name)
+ if err := f.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ var h ActionID // make sure hash result is assignable to ActionID
+ h, err = FileHash(name)
+ if err != nil {
+ t.Fatal(err)
+ }
+ sum := fmt.Sprintf("%x", h)
+ want := "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
+ if sum != want {
+ t.Errorf("hash(hello world) = %v, want %v", sum, want)
+ }
+}