1 files changed, 524 insertions, 0 deletions
diff --git a/src/cmd/go/internal/cache/cache.go b/src/cmd/go/internal/cache/cache.go
new file mode 100644
index 0000000..41f9216
--- /dev/null
+++ b/src/cmd/go/internal/cache/cache.go
@@ -0,0 +1,524 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cache implements a build artifact cache.
+package cache
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"cmd/go/internal/renameio"
+)
+
+// An ActionID is a cache action key, the hash of a complete description of a
+// repeatable computation (command line, environment variables,
+// input file contents, executable contents).
+type ActionID [HashSize]byte
+
+// An OutputID is a cache output key, the hash of an output of a computation.
+type OutputID [HashSize]byte
+
+// A Cache is a package cache, backed by a file system directory tree.
+type Cache struct {
+	dir string
+	now func() time.Time
+}
+
+// Open opens and returns the cache in the given directory.
+//
+// It is safe for multiple processes on a single machine to use the
+// same cache directory in a local file system simultaneously.
+// They will coordinate using operating system file locks and may
+// duplicate effort but will not corrupt the cache.
+//
+// However, it is NOT safe for multiple processes on different machines
+// to share a cache directory (for example, if the directory were stored
+// in a network file system). File locking is notoriously unreliable in
+// network file systems and may not suffice to protect the cache.
+//
+func Open(dir string) (*Cache, error) {
+	info, err := os.Stat(dir)
+	if err != nil {
+		return nil, err
+	}
+	if !info.IsDir() {
+		return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
+	}
+	for i := 0; i < 256; i++ {
+		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
+		if err := os.MkdirAll(name, 0777); err != nil {
+			return nil, err
+		}
+	}
+	c := &Cache{
+		dir: dir,
+		now: time.Now,
+	}
+	return c, nil
+}
+
+// fileName returns the name of the file corresponding to the given id.
+func (c *Cache) fileName(id [HashSize]byte, key string) string {
+	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
+}
+
+// An entryNotFoundError indicates that a cache entry was not found, with an
+// optional underlying reason.
+type entryNotFoundError struct {
+	Err error
+}
+
+func (e *entryNotFoundError) Error() string {
+	if e.Err == nil {
+		return "cache entry not found"
+	}
+	return fmt.Sprintf("cache entry not found: %v", e.Err)
+}
+
+func (e *entryNotFoundError) Unwrap() error {
+	return e.Err
+}
+
+const (
+	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
+	hexSize   = HashSize * 2
+	entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
+)
+
+// verify controls whether to run the cache in verify mode.
+// In verify mode, the cache always returns errMissing from Get
+// but then double-checks in Put that the data being written
+// exactly matches any existing entry. This provides an easy
+// way to detect program behavior that would have been different
+// had the cache entry been returned from Get.
+//
+// verify is enabled by setting the environment variable
+// GODEBUG=gocacheverify=1.
+var verify = false
+
+var errVerifyMode = errors.New("gocacheverify=1")
+
+// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
+var DebugTest = false
+
+func init() { initEnv() }
+
+func initEnv() {
+	verify = false
+	debugHash = false
+	debug := strings.Split(os.Getenv("GODEBUG"), ",")
+	for _, f := range debug {
+		if f == "gocacheverify=1" {
+			verify = true
+		}
+		if f == "gocachehash=1" {
+			debugHash = true
+		}
+		if f == "gocachetest=1" {
+			DebugTest = true
+		}
+	}
+}
+
+// Get looks up the action ID in the cache,
+// returning the corresponding output ID and file size, if any.
+// Note that finding an output ID does not guarantee that the
+// saved file for that output ID is still available.
+func (c *Cache) Get(id ActionID) (Entry, error) {
+	if verify {
+		return Entry{}, &entryNotFoundError{Err: errVerifyMode}
+	}
+	return c.get(id)
+}
+
+type Entry struct {
+	OutputID OutputID
+	Size     int64
+	Time     time.Time
+}
+
+// get is Get but does not respect verify mode, so that Put can use it.
+func (c *Cache) get(id ActionID) (Entry, error) {
+	missing := func(reason error) (Entry, error) {
+		return Entry{}, &entryNotFoundError{Err: reason}
+	}
+	f, err := os.Open(c.fileName(id, "a"))
+	if err != nil {
+		return missing(err)
+	}
+	defer f.Close()
+	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
+	if n, err := io.ReadFull(f, entry); n > entrySize {
+		return missing(errors.New("too long"))
+	} else if err != io.ErrUnexpectedEOF {
+		if err == io.EOF {
+			return missing(errors.New("file is empty"))
+		}
+		return missing(err)
+	} else if n < entrySize {
+		return missing(errors.New("entry file incomplete"))
+	}
+	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
+		return missing(errors.New("invalid header"))
+	}
+	eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
+	eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
+	esize, entry := entry[1:1+20], entry[1+20:]
+	etime, entry := entry[1:1+20], entry[1+20:]
+	var buf [HashSize]byte
+	if _, err := hex.Decode(buf[:], eid); err != nil {
+		return missing(fmt.Errorf("decoding ID: %v", err))
+	} else if buf != id {
+		return missing(errors.New("mismatched ID"))
+	}
+	if _, err := hex.Decode(buf[:], eout); err != nil {
+		return missing(fmt.Errorf("decoding output ID: %v", err))
+	}
+	i := 0
+	for i < len(esize) && esize[i] == ' ' {
+		i++
+	}
+	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
+	if err != nil {
+		return missing(fmt.Errorf("parsing size: %v", err))
+	} else if size < 0 {
+		return missing(errors.New("negative size"))
+	}
+	i = 0
+	for i < len(etime) && etime[i] == ' ' {
+		i++
+	}
+	tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
+	if err != nil {
+		return missing(fmt.Errorf("parsing timestamp: %v", err))
+	} else if tm < 0 {
+		return missing(errors.New("negative timestamp"))
+	}
+
+	c.used(c.fileName(id, "a"))
+
+	return Entry{buf, size, time.Unix(0, tm)}, nil
+}
+
+// GetFile looks up the action ID in the cache and returns
+// the name of the corresponding data file.
+func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
+	entry, err = c.Get(id)
+	if err != nil {
+		return "", Entry{}, err
+	}
+	file = c.OutputFile(entry.OutputID)
+	info, err := os.Stat(file)
+	if err != nil {
+		return "", Entry{}, &entryNotFoundError{Err: err}
+	}
+	if info.Size() != entry.Size {
+		return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
+	}
+	return file, entry, nil
+}
+
+// GetBytes looks up the action ID in the cache and returns
+// the corresponding output bytes.
+// GetBytes should only be used for data that can be expected to fit in memory.
+func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
+	entry, err := c.Get(id)
+	if err != nil {
+		return nil, entry, err
+	}
+	data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
+	if sha256.Sum256(data) != entry.OutputID {
+		return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
+	}
+	return data, entry, nil
+}
+
+// OutputFile returns the name of the cache file storing output with the given OutputID.
+func (c *Cache) OutputFile(out OutputID) string {
+	file := c.fileName(out, "d")
+	c.used(file)
+	return file
+}
+
+// Time constants for cache expiration.
+//
+// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
+// to avoid causing many unnecessary inode updates. The mtimes therefore
+// roughly reflect "time of last use" but may in fact be older by at most an hour.
+//
+// We scan the cache for entries to delete at most once per trimInterval (1 day).
+//
+// When we do scan the cache, we delete entries that have not been used for
+// at least trimLimit (5 days). Statistics gathered from a month of usage by
+// Go developers found that essentially all reuse of cached entries happened
+// within 5 days of the previous reuse. See golang.org/issue/22990.
+const (
+	mtimeInterval = 1 * time.Hour
+	trimInterval  = 24 * time.Hour
+	trimLimit     = 5 * 24 * time.Hour
+)
+
+// used makes a best-effort attempt to update mtime on file,
+// so that mtime reflects cache access time.
+//
+// Because the reflection only needs to be approximate,
+// and to reduce the amount of disk activity caused by using
+// cache entries, used only updates the mtime if the current
+// mtime is more than an hour old. This heuristic eliminates
+// nearly all of the mtime updates that would otherwise happen,
+// while still keeping the mtimes useful for cache trimming.
+func (c *Cache) used(file string) {
+	info, err := os.Stat(file)
+	if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
+		return
+	}
+	os.Chtimes(file, c.now(), c.now())
+}
+
+// Trim removes old cache entries that are likely not to be reused.
+func (c *Cache) Trim() {
+	now := c.now()
+
+	// We maintain in dir/trim.txt the time of the last completed cache trim.
+	// If the cache has been trimmed recently enough, do nothing.
+	// This is the common case.
+	data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt"))
+	t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
+	if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
+		return
+	}
+
+	// Trim each of the 256 subdirectories.
+	// We subtract an additional mtimeInterval
+	// to account for the imprecision of our "last used" mtimes.
+	cutoff := now.Add(-trimLimit - mtimeInterval)
+	for i := 0; i < 256; i++ {
+		subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
+		c.trimSubdir(subdir, cutoff)
+	}
+
+	// Ignore errors from here: if we don't write the complete timestamp, the
+	// cache will appear older than it is, and we'll trim it again next time.
+	renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
+}
+
+// trimSubdir trims a single cache subdirectory.
+func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
+	// Read all directory entries from subdir before removing
+	// any files, in case removing files invalidates the file offset
+	// in the directory scan. Also, ignore error from f.Readdirnames,
+	// because we don't care about reporting the error and we still
+	// want to process any entries found before the error.
+	f, err := os.Open(subdir)
+	if err != nil {
+		return
+	}
+	names, _ := f.Readdirnames(-1)
+	f.Close()
+
+	for _, name := range names {
+		// Remove only cache entries (xxxx-a and xxxx-d).
+		if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
+			continue
+		}
+		entry := filepath.Join(subdir, name)
+		info, err := os.Stat(entry)
+		if err == nil && info.ModTime().Before(cutoff) {
+			os.Remove(entry)
+		}
+	}
+}
+
+// putIndexEntry adds an entry to the cache recording that executing the action
+// with the given id produces an output with the given output id (hash) and size.
+func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
+	// Note: We expect that for one reason or another it may happen
+	// that repeating an action produces a different output hash
+	// (for example, if the output contains a time stamp or temp dir name).
+	// While not ideal, this is also not a correctness problem, so we
+	// don't make a big deal about it. In particular, we leave the action
+	// cache entries writable specifically so that they can be overwritten.
+	//
+	// Setting GODEBUG=gocacheverify=1 does make a big deal:
+	// in verify mode we are double-checking that the cache entries
+	// are entirely reproducible. As just noted, this may be unrealistic
+	// in some cases but the check is also useful for shaking out real bugs.
+	entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
+	if verify && allowVerify {
+		old, err := c.get(id)
+		if err == nil && (old.OutputID != out || old.Size != size) {
+			// panic to show stack trace, so we can see what code is generating this cache entry.
+			msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
+			panic(msg)
+		}
+	}
+	file := c.fileName(id, "a")
+
+	// Copy file to cache directory.
+	mode := os.O_WRONLY | os.O_CREATE
+	f, err := os.OpenFile(file, mode, 0666)
+	if err != nil {
+		return err
+	}
+	_, err = f.WriteString(entry)
+	if err == nil {
+		// Truncate the file only *after* writing it.
+		// (This should be a no-op, but truncate just in case of previous corruption.)
+		//
+		// This differs from os.WriteFile, which truncates to 0 *before* writing
+		// via os.O_TRUNC. Truncating only after writing ensures that a second write
+		// of the same content to the same file is idempotent, and does not — even
+		// temporarily! — undo the effect of the first write.
+		err = f.Truncate(int64(len(entry)))
+	}
+	if closeErr := f.Close(); err == nil {
+		err = closeErr
+	}
+	if err != nil {
+		// TODO(bcmills): This Remove potentially races with another go command writing to file.
+		// Can we eliminate it?
+		os.Remove(file)
+		return err
+	}
+	os.Chtimes(file, c.now(), c.now()) // mainly for tests
+
+	return nil
+}
+
+// Put stores the given output in the cache as the output for the action ID.
+// It may read file twice. The content of file must not change between the two passes.
+func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
+	return c.put(id, file, true)
+}
+
+// PutNoVerify is like Put but disables the verify check
+// when GODEBUG=goverifycache=1 is set.
+// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
+// like test output containing times and the like.
+func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
+	return c.put(id, file, false)
+}
+
+func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
+	// Compute output ID.
+	h := sha256.New()
+	if _, err := file.Seek(0, 0); err != nil {
+		return OutputID{}, 0, err
+	}
+	size, err := io.Copy(h, file)
+	if err != nil {
+		return OutputID{}, 0, err
+	}
+	var out OutputID
+	h.Sum(out[:0])
+
+	// Copy to cached output file (if not already present).
+	if err := c.copyFile(file, out, size); err != nil {
+		return out, size, err
+	}
+
+	// Add to cache index.
+	return out, size, c.putIndexEntry(id, out, size, allowVerify)
+}
+
+// PutBytes stores the given bytes in the cache as the output for the action ID.
+func (c *Cache) PutBytes(id ActionID, data []byte) error {
+	_, _, err := c.Put(id, bytes.NewReader(data))
+	return err
+}
+
+// copyFile copies file into the cache, expecting it to have the given
+// output ID and size, if that file is not present already.
+func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
+	name := c.fileName(out, "d")
+	info, err := os.Stat(name)
+	if err == nil && info.Size() == size {
+		// Check hash.
+		if f, err := os.Open(name); err == nil {
+			h := sha256.New()
+			io.Copy(h, f)
+			f.Close()
+			var out2 OutputID
+			h.Sum(out2[:0])
+			if out == out2 {
+				return nil
+			}
+		}
+		// Hash did not match. Fall through and rewrite file.
+	}
+
+	// Copy file to cache directory.
+	mode := os.O_RDWR | os.O_CREATE
+	if err == nil && info.Size() > size { // shouldn't happen but fix in case
+		mode |= os.O_TRUNC
+	}
+	f, err := os.OpenFile(name, mode, 0666)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if size == 0 {
+		// File now exists with correct size.
+		// Only one possible zero-length file, so contents are OK too.
+		// Early return here makes sure there's a "last byte" for code below.
+		return nil
+	}
+
+	// From here on, if any of the I/O writing the file fails,
+	// we make a best-effort attempt to truncate the file f
+	// before returning, to avoid leaving bad bytes in the file.
+
+	// Copy file to f, but also into h to double-check hash.
+	if _, err := file.Seek(0, 0); err != nil {
+		f.Truncate(0)
+		return err
+	}
+	h := sha256.New()
+	w := io.MultiWriter(f, h)
+	if _, err := io.CopyN(w, file, size-1); err != nil {
+		f.Truncate(0)
+		return err
+	}
+	// Check last byte before writing it; writing it will make the size match
+	// what other processes expect to find and might cause them to start
+	// using the file.
+	buf := make([]byte, 1)
+	if _, err := file.Read(buf); err != nil {
+		f.Truncate(0)
+		return err
+	}
+	h.Write(buf)
+	sum := h.Sum(nil)
+	if !bytes.Equal(sum, out[:]) {
+		f.Truncate(0)
+		return fmt.Errorf("file content changed underfoot")
+	}
+
+	// Commit cache file entry.
+	if _, err := f.Write(buf); err != nil {
+		f.Truncate(0)
+		return err
+	}
+	if err := f.Close(); err != nil {
+		// Data might not have been written,
+		// but file may look like it is the right size.
+		// To be extra careful, remove cached file.
+		os.Remove(name)
+		return err
+	}
+	os.Chtimes(name, c.now(), c.now()) // mainly for tests
+
+	return nil
+}