diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/cmd/go/internal/modfetch/cache.go | |
parent | Initial commit. (diff) | |
download | golang-1.20-upstream.tar.xz golang-1.20-upstream.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/go/internal/modfetch/cache.go')
-rw-r--r-- | src/cmd/go/internal/modfetch/cache.go | 789 |
1 files changed, 789 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/cache.go b/src/cmd/go/internal/modfetch/cache.go new file mode 100644 index 0000000..928eb1f --- /dev/null +++ b/src/cmd/go/internal/modfetch/cache.go @@ -0,0 +1,789 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package modfetch + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "io/fs" + "math/rand" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "cmd/go/internal/base" + "cmd/go/internal/cfg" + "cmd/go/internal/lockedfile" + "cmd/go/internal/modfetch/codehost" + "cmd/go/internal/par" + "cmd/go/internal/robustio" + + "golang.org/x/mod/module" + "golang.org/x/mod/semver" +) + +func cacheDir(path string) (string, error) { + if err := checkCacheDir(); err != nil { + return "", err + } + enc, err := module.EscapePath(path) + if err != nil { + return "", err + } + return filepath.Join(cfg.GOMODCACHE, "cache/download", enc, "/@v"), nil +} + +func CachePath(m module.Version, suffix string) (string, error) { + dir, err := cacheDir(m.Path) + if err != nil { + return "", err + } + if !semver.IsValid(m.Version) { + return "", fmt.Errorf("non-semver module version %q", m.Version) + } + if module.CanonicalVersion(m.Version) != m.Version { + return "", fmt.Errorf("non-canonical module version %q", m.Version) + } + encVer, err := module.EscapeVersion(m.Version) + if err != nil { + return "", err + } + return filepath.Join(dir, encVer+"."+suffix), nil +} + +// DownloadDir returns the directory to which m should have been downloaded. +// An error will be returned if the module path or version cannot be escaped. +// An error satisfying errors.Is(err, fs.ErrNotExist) will be returned +// along with the directory if the directory does not exist or if the directory +// is not completely populated. +func DownloadDir(m module.Version) (string, error) { + if err := checkCacheDir(); err != nil { + return "", err + } + enc, err := module.EscapePath(m.Path) + if err != nil { + return "", err + } + if !semver.IsValid(m.Version) { + return "", fmt.Errorf("non-semver module version %q", m.Version) + } + if module.CanonicalVersion(m.Version) != m.Version { + return "", fmt.Errorf("non-canonical module version %q", m.Version) + } + encVer, err := module.EscapeVersion(m.Version) + if err != nil { + return "", err + } + + // Check whether the directory itself exists. + dir := filepath.Join(cfg.GOMODCACHE, enc+"@"+encVer) + if fi, err := os.Stat(dir); os.IsNotExist(err) { + return dir, err + } else if err != nil { + return dir, &DownloadDirPartialError{dir, err} + } else if !fi.IsDir() { + return dir, &DownloadDirPartialError{dir, errors.New("not a directory")} + } + + // Check if a .partial file exists. This is created at the beginning of + // a download and removed after the zip is extracted. + partialPath, err := CachePath(m, "partial") + if err != nil { + return dir, err + } + if _, err := os.Stat(partialPath); err == nil { + return dir, &DownloadDirPartialError{dir, errors.New("not completely extracted")} + } else if !os.IsNotExist(err) { + return dir, err + } + + // Check if a .ziphash file exists. It should be created before the + // zip is extracted, but if it was deleted (by another program?), we need + // to re-calculate it. Note that checkMod will repopulate the ziphash + // file if it doesn't exist, but if the module is excluded by checks + // through GONOSUMDB or GOPRIVATE, that check and repopulation won't happen. + ziphashPath, err := CachePath(m, "ziphash") + if err != nil { + return dir, err + } + if _, err := os.Stat(ziphashPath); os.IsNotExist(err) { + return dir, &DownloadDirPartialError{dir, errors.New("ziphash file is missing")} + } else if err != nil { + return dir, err + } + return dir, nil +} + +// DownloadDirPartialError is returned by DownloadDir if a module directory +// exists but was not completely populated. +// +// DownloadDirPartialError is equivalent to fs.ErrNotExist. +type DownloadDirPartialError struct { + Dir string + Err error +} + +func (e *DownloadDirPartialError) Error() string { return fmt.Sprintf("%s: %v", e.Dir, e.Err) } +func (e *DownloadDirPartialError) Is(err error) bool { return err == fs.ErrNotExist } + +// lockVersion locks a file within the module cache that guards the downloading +// and extraction of the zipfile for the given module version. +func lockVersion(mod module.Version) (unlock func(), err error) { + path, err := CachePath(mod, "lock") + if err != nil { + return nil, err + } + if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { + return nil, err + } + return lockedfile.MutexAt(path).Lock() +} + +// SideLock locks a file within the module cache that previously guarded +// edits to files outside the cache, such as go.sum and go.mod files in the +// user's working directory. +// If err is nil, the caller MUST eventually call the unlock function. +func SideLock() (unlock func(), err error) { + if err := checkCacheDir(); err != nil { + return nil, err + } + + path := filepath.Join(cfg.GOMODCACHE, "cache", "lock") + if err := os.MkdirAll(filepath.Dir(path), 0777); err != nil { + return nil, fmt.Errorf("failed to create cache directory: %w", err) + } + + return lockedfile.MutexAt(path).Lock() +} + +// A cachingRepo is a cache around an underlying Repo, +// avoiding redundant calls to ModulePath, Versions, Stat, Latest, and GoMod (but not CheckReuse or Zip). +// It is also safe for simultaneous use by multiple goroutines +// (so that it can be returned from Lookup multiple times). +// It serializes calls to the underlying Repo. +type cachingRepo struct { + path string + cache par.Cache // cache for all operations + + once sync.Once + initRepo func() (Repo, error) + r Repo +} + +func newCachingRepo(path string, initRepo func() (Repo, error)) *cachingRepo { + return &cachingRepo{ + path: path, + initRepo: initRepo, + } +} + +func (r *cachingRepo) repo() Repo { + r.once.Do(func() { + var err error + r.r, err = r.initRepo() + if err != nil { + r.r = errRepo{r.path, err} + } + }) + return r.r +} + +func (r *cachingRepo) CheckReuse(old *codehost.Origin) error { + return r.repo().CheckReuse(old) +} + +func (r *cachingRepo) ModulePath() string { + return r.path +} + +func (r *cachingRepo) Versions(prefix string) (*Versions, error) { + type cached struct { + v *Versions + err error + } + c := r.cache.Do("versions:"+prefix, func() any { + v, err := r.repo().Versions(prefix) + return cached{v, err} + }).(cached) + + if c.err != nil { + return nil, c.err + } + v := &Versions{ + Origin: c.v.Origin, + List: append([]string(nil), c.v.List...), + } + return v, nil +} + +type cachedInfo struct { + info *RevInfo + err error +} + +func (r *cachingRepo) Stat(rev string) (*RevInfo, error) { + c := r.cache.Do("stat:"+rev, func() any { + file, info, err := readDiskStat(r.path, rev) + if err == nil { + return cachedInfo{info, nil} + } + + info, err = r.repo().Stat(rev) + if err == nil { + // If we resolved, say, 1234abcde to v0.0.0-20180604122334-1234abcdef78, + // then save the information under the proper version, for future use. + if info.Version != rev { + file, _ = CachePath(module.Version{Path: r.path, Version: info.Version}, "info") + r.cache.Do("stat:"+info.Version, func() any { + return cachedInfo{info, err} + }) + } + + if err := writeDiskStat(file, info); err != nil { + fmt.Fprintf(os.Stderr, "go: writing stat cache: %v\n", err) + } + } + return cachedInfo{info, err} + }).(cachedInfo) + + info := c.info + if info != nil { + copy := *info + info = © + } + return info, c.err +} + +func (r *cachingRepo) Latest() (*RevInfo, error) { + c := r.cache.Do("latest:", func() any { + info, err := r.repo().Latest() + + // Save info for likely future Stat call. + if err == nil { + r.cache.Do("stat:"+info.Version, func() any { + return cachedInfo{info, err} + }) + if file, _, err := readDiskStat(r.path, info.Version); err != nil { + writeDiskStat(file, info) + } + } + + return cachedInfo{info, err} + }).(cachedInfo) + + info := c.info + if info != nil { + copy := *info + info = © + } + return info, c.err +} + +func (r *cachingRepo) GoMod(version string) ([]byte, error) { + type cached struct { + text []byte + err error + } + c := r.cache.Do("gomod:"+version, func() any { + file, text, err := readDiskGoMod(r.path, version) + if err == nil { + // Note: readDiskGoMod already called checkGoMod. + return cached{text, nil} + } + + text, err = r.repo().GoMod(version) + if err == nil { + if err := checkGoMod(r.path, version, text); err != nil { + return cached{text, err} + } + if err := writeDiskGoMod(file, text); err != nil { + fmt.Fprintf(os.Stderr, "go: writing go.mod cache: %v\n", err) + } + } + return cached{text, err} + }).(cached) + + if c.err != nil { + return nil, c.err + } + return append([]byte(nil), c.text...), nil +} + +func (r *cachingRepo) Zip(dst io.Writer, version string) error { + return r.repo().Zip(dst, version) +} + +// InfoFile is like Lookup(path).Stat(version) but also returns the name of the file +// containing the cached information. +func InfoFile(path, version string) (*RevInfo, string, error) { + if !semver.IsValid(version) { + return nil, "", fmt.Errorf("invalid version %q", version) + } + + if file, info, err := readDiskStat(path, version); err == nil { + return info, file, nil + } + + var info *RevInfo + var err2info map[error]*RevInfo + err := TryProxies(func(proxy string) error { + i, err := Lookup(proxy, path).Stat(version) + if err == nil { + info = i + } else { + if err2info == nil { + err2info = make(map[error]*RevInfo) + } + err2info[err] = info + } + return err + }) + if err != nil { + return err2info[err], "", err + } + + // Stat should have populated the disk cache for us. + file, err := CachePath(module.Version{Path: path, Version: version}, "info") + if err != nil { + return nil, "", err + } + return info, file, nil +} + +// GoMod is like Lookup(path).GoMod(rev) but avoids the +// repository path resolution in Lookup if the result is +// already cached on local disk. +func GoMod(path, rev string) ([]byte, error) { + // Convert commit hash to pseudo-version + // to increase cache hit rate. + if !semver.IsValid(rev) { + if _, info, err := readDiskStat(path, rev); err == nil { + rev = info.Version + } else { + if errors.Is(err, statCacheErr) { + return nil, err + } + err := TryProxies(func(proxy string) error { + info, err := Lookup(proxy, path).Stat(rev) + if err == nil { + rev = info.Version + } + return err + }) + if err != nil { + return nil, err + } + } + } + + _, data, err := readDiskGoMod(path, rev) + if err == nil { + return data, nil + } + + err = TryProxies(func(proxy string) (err error) { + data, err = Lookup(proxy, path).GoMod(rev) + return err + }) + return data, err +} + +// GoModFile is like GoMod but returns the name of the file containing +// the cached information. +func GoModFile(path, version string) (string, error) { + if !semver.IsValid(version) { + return "", fmt.Errorf("invalid version %q", version) + } + if _, err := GoMod(path, version); err != nil { + return "", err + } + // GoMod should have populated the disk cache for us. + file, err := CachePath(module.Version{Path: path, Version: version}, "mod") + if err != nil { + return "", err + } + return file, nil +} + +// GoModSum returns the go.sum entry for the module version's go.mod file. +// (That is, it returns the entry listed in go.sum as "path version/go.mod".) +func GoModSum(path, version string) (string, error) { + if !semver.IsValid(version) { + return "", fmt.Errorf("invalid version %q", version) + } + data, err := GoMod(path, version) + if err != nil { + return "", err + } + sum, err := goModSum(data) + if err != nil { + return "", err + } + return sum, nil +} + +var errNotCached = fmt.Errorf("not in cache") + +// readDiskStat reads a cached stat result from disk, +// returning the name of the cache file and the result. +// If the read fails, the caller can use +// writeDiskStat(file, info) to write a new cache entry. +func readDiskStat(path, rev string) (file string, info *RevInfo, err error) { + file, data, err := readDiskCache(path, rev, "info") + if err != nil { + // If the cache already contains a pseudo-version with the given hash, we + // would previously return that pseudo-version without checking upstream. + // However, that produced an unfortunate side-effect: if the author added a + // tag to the repository, 'go get' would not pick up the effect of that new + // tag on the existing commits, and 'go' commands that referred to those + // commits would use the previous name instead of the new one. + // + // That's especially problematic if the original pseudo-version starts with + // v0.0.0-, as was the case for all pseudo-versions during vgo development, + // since a v0.0.0- pseudo-version has lower precedence than pretty much any + // tagged version. + // + // In practice, we're only looking up by hash during initial conversion of a + // legacy config and during an explicit 'go get', and a little extra latency + // for those operations seems worth the benefit of picking up more accurate + // versions. + // + // Fall back to this resolution scheme only if the GOPROXY setting prohibits + // us from resolving upstream tags. + if cfg.GOPROXY == "off" { + if file, info, err := readDiskStatByHash(path, rev); err == nil { + return file, info, nil + } + } + return file, nil, err + } + info = new(RevInfo) + if err := json.Unmarshal(data, info); err != nil { + return file, nil, errNotCached + } + // The disk might have stale .info files that have Name and Short fields set. + // We want to canonicalize to .info files with those fields omitted. + // Remarshal and update the cache file if needed. + data2, err := json.Marshal(info) + if err == nil && !bytes.Equal(data2, data) { + writeDiskCache(file, data) + } + return file, info, nil +} + +// readDiskStatByHash is a fallback for readDiskStat for the case +// where rev is a commit hash instead of a proper semantic version. +// In that case, we look for a cached pseudo-version that matches +// the commit hash. If we find one, we use it. +// This matters most for converting legacy package management +// configs, when we are often looking up commits by full hash. +// Without this check we'd be doing network I/O to the remote repo +// just to find out about a commit we already know about +// (and have cached under its pseudo-version). +func readDiskStatByHash(path, rev string) (file string, info *RevInfo, err error) { + if cfg.GOMODCACHE == "" { + // Do not download to current directory. + return "", nil, errNotCached + } + + if !codehost.AllHex(rev) || len(rev) < 12 { + return "", nil, errNotCached + } + rev = rev[:12] + cdir, err := cacheDir(path) + if err != nil { + return "", nil, errNotCached + } + dir, err := os.Open(cdir) + if err != nil { + return "", nil, errNotCached + } + names, err := dir.Readdirnames(-1) + dir.Close() + if err != nil { + return "", nil, errNotCached + } + + // A given commit hash may map to more than one pseudo-version, + // depending on which tags are present on the repository. + // Take the highest such version. + var maxVersion string + suffix := "-" + rev + ".info" + err = errNotCached + for _, name := range names { + if strings.HasSuffix(name, suffix) { + v := strings.TrimSuffix(name, ".info") + if module.IsPseudoVersion(v) && semver.Compare(v, maxVersion) > 0 { + maxVersion = v + file, info, err = readDiskStat(path, strings.TrimSuffix(name, ".info")) + } + } + } + return file, info, err +} + +// oldVgoPrefix is the prefix in the old auto-generated cached go.mod files. +// We stopped trying to auto-generate the go.mod files. Now we use a trivial +// go.mod with only a module line, and we've dropped the version prefix +// entirely. If we see a version prefix, that means we're looking at an old copy +// and should ignore it. +var oldVgoPrefix = []byte("//vgo 0.0.") + +// readDiskGoMod reads a cached go.mod file from disk, +// returning the name of the cache file and the result. +// If the read fails, the caller can use +// writeDiskGoMod(file, data) to write a new cache entry. +func readDiskGoMod(path, rev string) (file string, data []byte, err error) { + file, data, err = readDiskCache(path, rev, "mod") + + // If the file has an old auto-conversion prefix, pretend it's not there. + if bytes.HasPrefix(data, oldVgoPrefix) { + err = errNotCached + data = nil + } + + if err == nil { + if err := checkGoMod(path, rev, data); err != nil { + return "", nil, err + } + } + + return file, data, err +} + +// readDiskCache is the generic "read from a cache file" implementation. +// It takes the revision and an identifying suffix for the kind of data being cached. +// It returns the name of the cache file and the content of the file. +// If the read fails, the caller can use +// writeDiskCache(file, data) to write a new cache entry. +func readDiskCache(path, rev, suffix string) (file string, data []byte, err error) { + file, err = CachePath(module.Version{Path: path, Version: rev}, suffix) + if err != nil { + return "", nil, errNotCached + } + data, err = robustio.ReadFile(file) + if err != nil { + return file, nil, errNotCached + } + return file, data, nil +} + +// writeDiskStat writes a stat result cache entry. +// The file name must have been returned by a previous call to readDiskStat. +func writeDiskStat(file string, info *RevInfo) error { + if file == "" { + return nil + } + + if info.Origin != nil { + // Clean the origin information, which might have too many + // validation criteria, for example if we are saving the result of + // m@master as m@pseudo-version. + clean := *info + info = &clean + o := *info.Origin + info.Origin = &o + + // Tags never matter if you are starting with a semver version, + // as we would be when finding this cache entry. + o.TagSum = "" + o.TagPrefix = "" + // Ref doesn't matter if you have a pseudoversion. + if module.IsPseudoVersion(info.Version) { + o.Ref = "" + } + } + + js, err := json.Marshal(info) + if err != nil { + return err + } + return writeDiskCache(file, js) +} + +// writeDiskGoMod writes a go.mod cache entry. +// The file name must have been returned by a previous call to readDiskGoMod. +func writeDiskGoMod(file string, text []byte) error { + return writeDiskCache(file, text) +} + +// writeDiskCache is the generic "write to a cache file" implementation. +// The file must have been returned by a previous call to readDiskCache. +func writeDiskCache(file string, data []byte) error { + if file == "" { + return nil + } + // Make sure directory for file exists. + if err := os.MkdirAll(filepath.Dir(file), 0777); err != nil { + return err + } + + // Write the file to a temporary location, and then rename it to its final + // path to reduce the likelihood of a corrupt file existing at that final path. + f, err := tempFile(filepath.Dir(file), filepath.Base(file), 0666) + if err != nil { + return err + } + defer func() { + // Only call os.Remove on f.Name() if we failed to rename it: otherwise, + // some other process may have created a new file with the same name after + // the rename completed. + if err != nil { + f.Close() + os.Remove(f.Name()) + } + }() + + if _, err := f.Write(data); err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + if err := robustio.Rename(f.Name(), file); err != nil { + return err + } + + if strings.HasSuffix(file, ".mod") { + rewriteVersionList(filepath.Dir(file)) + } + return nil +} + +// tempFile creates a new temporary file with given permission bits. +func tempFile(dir, prefix string, perm fs.FileMode) (f *os.File, err error) { + for i := 0; i < 10000; i++ { + name := filepath.Join(dir, prefix+strconv.Itoa(rand.Intn(1000000000))+".tmp") + f, err = os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, perm) + if os.IsExist(err) { + continue + } + break + } + return +} + +// rewriteVersionList rewrites the version list in dir +// after a new *.mod file has been written. +func rewriteVersionList(dir string) (err error) { + if filepath.Base(dir) != "@v" { + base.Fatalf("go: internal error: misuse of rewriteVersionList") + } + + listFile := filepath.Join(dir, "list") + + // Lock listfile when writing to it to try to avoid corruption to the file. + // Under rare circumstances, for instance, if the system loses power in the + // middle of a write it is possible for corrupt data to be written. This is + // not a problem for the go command itself, but may be an issue if the + // cache is being served by a GOPROXY HTTP server. This will be corrected + // the next time a new version of the module is fetched and the file is rewritten. + // TODO(matloob): golang.org/issue/43313 covers adding a go mod verify + // command that removes module versions that fail checksums. It should also + // remove list files that are detected to be corrupt. + f, err := lockedfile.Edit(listFile) + if err != nil { + return err + } + defer func() { + if cerr := f.Close(); cerr != nil && err == nil { + err = cerr + } + }() + infos, err := os.ReadDir(dir) + if err != nil { + return err + } + var list []string + for _, info := range infos { + // We look for *.mod files on the theory that if we can't supply + // the .mod file then there's no point in listing that version, + // since it's unusable. (We can have *.info without *.mod.) + // We don't require *.zip files on the theory that for code only + // involved in module graph construction, many *.zip files + // will never be requested. + name := info.Name() + if v, found := strings.CutSuffix(name, ".mod"); found { + if v != "" && module.CanonicalVersion(v) == v { + list = append(list, v) + } + } + } + semver.Sort(list) + + var buf bytes.Buffer + for _, v := range list { + buf.WriteString(v) + buf.WriteString("\n") + } + if fi, err := f.Stat(); err == nil && int(fi.Size()) == buf.Len() { + old := make([]byte, buf.Len()+1) + if n, err := f.ReadAt(old, 0); err == io.EOF && n == buf.Len() && bytes.Equal(buf.Bytes(), old) { + return nil // No edit needed. + } + } + // Remove existing contents, so that when we truncate to the actual size it will zero-fill, + // and we will be able to detect (some) incomplete writes as files containing trailing NUL bytes. + if err := f.Truncate(0); err != nil { + return err + } + // Reserve the final size and zero-fill. + if err := f.Truncate(int64(buf.Len())); err != nil { + return err + } + // Write the actual contents. If this fails partway through, + // the remainder of the file should remain as zeroes. + if _, err := f.Write(buf.Bytes()); err != nil { + f.Truncate(0) + return err + } + + return nil +} + +var ( + statCacheOnce sync.Once + statCacheErr error +) + +// checkCacheDir checks if the directory specified by GOMODCACHE exists. An +// error is returned if it does not. +func checkCacheDir() error { + if cfg.GOMODCACHE == "" { + // modload.Init exits if GOPATH[0] is empty, and cfg.GOMODCACHE + // is set to GOPATH[0]/pkg/mod if GOMODCACHE is empty, so this should never happen. + return fmt.Errorf("module cache not found: neither GOMODCACHE nor GOPATH is set") + } + if !filepath.IsAbs(cfg.GOMODCACHE) { + return fmt.Errorf("GOMODCACHE entry is relative; must be absolute path: %q.\n", cfg.GOMODCACHE) + } + + // os.Stat is slow on Windows, so we only call it once to prevent unnecessary + // I/O every time this function is called. + statCacheOnce.Do(func() { + fi, err := os.Stat(cfg.GOMODCACHE) + if err != nil { + if !os.IsNotExist(err) { + statCacheErr = fmt.Errorf("could not create module cache: %w", err) + return + } + if err := os.MkdirAll(cfg.GOMODCACHE, 0777); err != nil { + statCacheErr = fmt.Errorf("could not create module cache: %w", err) + return + } + return + } + if !fi.IsDir() { + statCacheErr = fmt.Errorf("could not create module cache: %q is not a directory", cfg.GOMODCACHE) + return + } + }) + return statCacheErr +} |