diff options
Diffstat (limited to 'src/cmd/go/internal/modfetch/codehost/codehost.go')
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/codehost.go | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/codehost/codehost.go b/src/cmd/go/internal/modfetch/codehost/codehost.go new file mode 100644 index 0000000..855b694 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/codehost.go @@ -0,0 +1,398 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package codehost defines the interface implemented by a code hosting source, +// along with support code for use by implementations. +package codehost + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" + "io/fs" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + + "cmd/go/internal/cfg" + "cmd/go/internal/lockedfile" + "cmd/go/internal/str" + + "golang.org/x/mod/module" + "golang.org/x/mod/semver" +) + +// Downloaded size limits. +const ( + MaxGoMod = 16 << 20 // maximum size of go.mod file + MaxLICENSE = 16 << 20 // maximum size of LICENSE file + MaxZipFile = 500 << 20 // maximum size of downloaded zip file +) + +// A Repo represents a code hosting source. +// Typical implementations include local version control repositories, +// remote version control servers, and code hosting sites. +// +// A Repo must be safe for simultaneous use by multiple goroutines, +// and callers must not modify returned values, which may be cached and shared. +type Repo interface { + // CheckReuse checks whether the old origin information + // remains up to date. If so, whatever cached object it was + // taken from can be reused. + // The subdir gives subdirectory name where the module root is expected to be found, + // "" for the root or "sub/dir" for a subdirectory (no trailing slash). + CheckReuse(old *Origin, subdir string) error + + // List lists all tags with the given prefix. + Tags(prefix string) (*Tags, error) + + // Stat returns information about the revision rev. + // A revision can be any identifier known to the underlying service: + // commit hash, branch, tag, and so on. + Stat(rev string) (*RevInfo, error) + + // Latest returns the latest revision on the default branch, + // whatever that means in the underlying implementation. + Latest() (*RevInfo, error) + + // ReadFile reads the given file in the file tree corresponding to revision rev. + // It should refuse to read more than maxSize bytes. + // + // If the requested file does not exist it should return an error for which + // os.IsNotExist(err) returns true. + ReadFile(rev, file string, maxSize int64) (data []byte, err error) + + // ReadZip downloads a zip file for the subdir subdirectory + // of the given revision to a new file in a given temporary directory. + // It should refuse to read more than maxSize bytes. + // It returns a ReadCloser for a streamed copy of the zip file. + // All files in the zip file are expected to be + // nested in a single top-level directory, whose name is not specified. + ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) + + // RecentTag returns the most recent tag on rev or one of its predecessors + // with the given prefix. allowed may be used to filter out unwanted versions. + RecentTag(rev, prefix string, allowed func(tag string) bool) (tag string, err error) + + // DescendsFrom reports whether rev or any of its ancestors has the given tag. + // + // DescendsFrom must return true for any tag returned by RecentTag for the + // same revision. + DescendsFrom(rev, tag string) (bool, error) +} + +// An Origin describes the provenance of a given repo method result. +// It can be passed to CheckReuse (usually in a different go command invocation) +// to see whether the result remains up-to-date. +type Origin struct { + VCS string `json:",omitempty"` // "git" etc + URL string `json:",omitempty"` // URL of repository + Subdir string `json:",omitempty"` // subdirectory in repo + + // If TagSum is non-empty, then the resolution of this module version + // depends on the set of tags present in the repo, specifically the tags + // of the form TagPrefix + a valid semver version. + // If the matching repo tags and their commit hashes still hash to TagSum, + // the Origin is still valid (at least as far as the tags are concerned). + // The exact checksum is up to the Repo implementation; see (*gitRepo).Tags. + TagPrefix string `json:",omitempty"` + TagSum string `json:",omitempty"` + + // If Ref is non-empty, then the resolution of this module version + // depends on Ref resolving to the revision identified by Hash. + // If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned). + // For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3", + // and the Hash is the Git object hash the ref maps to. + // Other VCS might choose differently, but the idea is that Ref is the name + // with a mutable meaning while Hash is a name with an immutable meaning. + Ref string `json:",omitempty"` + Hash string `json:",omitempty"` + + // If RepoSum is non-empty, then the resolution of this module version + // failed due to the repo being available but the version not being present. + // This depends on the entire state of the repo, which RepoSum summarizes. + // For Git, this is a hash of all the refs and their hashes. + RepoSum string `json:",omitempty"` +} + +// Checkable reports whether the Origin contains anything that can be checked. +// If not, the Origin is purely informational and should fail a CheckReuse call. +func (o *Origin) Checkable() bool { + return o.TagSum != "" || o.Ref != "" || o.Hash != "" || o.RepoSum != "" +} + +// ClearCheckable clears the Origin enough to make Checkable return false. +func (o *Origin) ClearCheckable() { + o.TagSum = "" + o.TagPrefix = "" + o.Ref = "" + o.Hash = "" + o.RepoSum = "" +} + +// A Tags describes the available tags in a code repository. +type Tags struct { + Origin *Origin + List []Tag +} + +// A Tag describes a single tag in a code repository. +type Tag struct { + Name string + Hash string // content hash identifying tag's content, if available +} + +// isOriginTag reports whether tag should be preserved +// in the Tags method's Origin calculation. +// We can safely ignore tags that are not look like pseudo-versions, +// because ../coderepo.go's (*codeRepo).Versions ignores them too. +// We can also ignore non-semver tags, but we have to include semver +// tags with extra suffixes, because the pseudo-version base finder uses them. +func isOriginTag(tag string) bool { + // modfetch.(*codeRepo).Versions uses Canonical == tag, + // but pseudo-version calculation has a weaker condition that + // the canonical is a prefix of the tag. + // Include those too, so that if any new one appears, we'll invalidate the cache entry. + // This will lead to spurious invalidation of version list results, + // but tags of this form being created should be fairly rare + // (and invalidate pseudo-version results anyway). + c := semver.Canonical(tag) + return c != "" && strings.HasPrefix(tag, c) && !module.IsPseudoVersion(tag) +} + +// A RevInfo describes a single revision in a source code repository. +type RevInfo struct { + Origin *Origin + Name string // complete ID in underlying repository + Short string // shortened ID, for use in pseudo-version + Version string // version used in lookup + Time time.Time // commit time + Tags []string // known tags for commit +} + +// UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a +// revision rather than a file. +type UnknownRevisionError struct { + Rev string +} + +func (e *UnknownRevisionError) Error() string { + return "unknown revision " + e.Rev +} +func (UnknownRevisionError) Is(err error) bool { + return err == fs.ErrNotExist +} + +// ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given +// repository or module contains no commits. +var ErrNoCommits error = noCommitsError{} + +type noCommitsError struct{} + +func (noCommitsError) Error() string { + return "no commits" +} +func (noCommitsError) Is(err error) bool { + return err == fs.ErrNotExist +} + +// ErrUnsupported indicates that a requested operation cannot be performed, +// because it is unsupported. This error indicates that there is no alternative +// way to perform the operation. +// +// TODO(#41198): Remove this declaration and use errors.ErrUnsupported instead. +var ErrUnsupported = unsupportedOperationError{} + +type unsupportedOperationError struct{} + +func (unsupportedOperationError) Error() string { + return "unsupported operation" +} + +// AllHex reports whether the revision rev is entirely lower-case hexadecimal digits. +func AllHex(rev string) bool { + for i := 0; i < len(rev); i++ { + c := rev[i] + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' { + continue + } + return false + } + return true +} + +// ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length +// used in pseudo-versions (12 hex digits). +func ShortenSHA1(rev string) string { + if AllHex(rev) && len(rev) == 40 { + return rev[:12] + } + return rev +} + +// WorkDir returns the name of the cached work directory to use for the +// given repository type and name. +func WorkDir(typ, name string) (dir, lockfile string, err error) { + if cfg.GOMODCACHE == "" { + return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set") + } + + // We name the work directory for the SHA256 hash of the type and name. + // We intentionally avoid the actual name both because of possible + // conflicts with valid file system paths and because we want to ensure + // that one checkout is never nested inside another. That nesting has + // led to security problems in the past. + if strings.Contains(typ, ":") { + return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon") + } + key := typ + ":" + name + dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key)))) + + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name) + } + if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil { + return "", "", err + } + + lockfile = dir + ".lock" + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "# lock %s\n", lockfile) + } + + unlock, err := lockedfile.MutexAt(lockfile).Lock() + if err != nil { + return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err) + } + defer unlock() + + data, err := os.ReadFile(dir + ".info") + info, err2 := os.Stat(dir) + if err == nil && err2 == nil && info.IsDir() { + // Info file and directory both already exist: reuse. + have := strings.TrimSuffix(string(data), "\n") + if have != key { + return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key) + } + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "# %s for %s %s\n", dir, typ, name) + } + return dir, lockfile, nil + } + + // Info file or directory missing. Start from scratch. + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", dir, typ, name) + } + os.RemoveAll(dir) + if err := os.MkdirAll(dir, 0777); err != nil { + return "", "", err + } + if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil { + os.RemoveAll(dir) + return "", "", err + } + return dir, lockfile, nil +} + +type RunError struct { + Cmd string + Err error + Stderr []byte + HelpText string +} + +func (e *RunError) Error() string { + text := e.Cmd + ": " + e.Err.Error() + stderr := bytes.TrimRight(e.Stderr, "\n") + if len(stderr) > 0 { + text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t") + } + if len(e.HelpText) > 0 { + text += "\n" + e.HelpText + } + return text +} + +var dirLock sync.Map + +// Run runs the command line in the given directory +// (an empty dir means the current directory). +// It returns the standard output and, for a non-zero exit, +// a *RunError indicating the command, exit status, and standard error. +// Standard error is unavailable for commands that exit successfully. +func Run(dir string, cmdline ...any) ([]byte, error) { + return RunWithStdin(dir, nil, cmdline...) +} + +// bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell. +// See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html. +var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`) + +func RunWithStdin(dir string, stdin io.Reader, cmdline ...any) ([]byte, error) { + if dir != "" { + muIface, ok := dirLock.Load(dir) + if !ok { + muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex)) + } + mu := muIface.(*sync.Mutex) + mu.Lock() + defer mu.Unlock() + } + + cmd := str.StringList(cmdline...) + if os.Getenv("TESTGOVCS") == "panic" { + panic(fmt.Sprintf("use of vcs: %v", cmd)) + } + if cfg.BuildX { + text := new(strings.Builder) + if dir != "" { + text.WriteString("cd ") + text.WriteString(dir) + text.WriteString("; ") + } + for i, arg := range cmd { + if i > 0 { + text.WriteByte(' ') + } + switch { + case strings.ContainsAny(arg, "'"): + // Quote args that could be mistaken for quoted args. + text.WriteByte('"') + text.WriteString(bashQuoter.Replace(arg)) + text.WriteByte('"') + case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"): + // Quote args that contain special characters, glob patterns, or spaces. + text.WriteByte('\'') + text.WriteString(arg) + text.WriteByte('\'') + default: + text.WriteString(arg) + } + } + fmt.Fprintf(os.Stderr, "%s\n", text) + start := time.Now() + defer func() { + fmt.Fprintf(os.Stderr, "%.3fs # %s\n", time.Since(start).Seconds(), text) + }() + } + // TODO: Impose limits on command output size. + // TODO: Set environment to get English error messages. + var stderr bytes.Buffer + var stdout bytes.Buffer + c := exec.Command(cmd[0], cmd[1:]...) + c.Dir = dir + c.Stdin = stdin + c.Stderr = &stderr + c.Stdout = &stdout + err := c.Run() + if err != nil { + err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err} + } + return stdout.Bytes(), err +} |