diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/cmd/go/internal/modfetch/codehost | |
parent | Initial commit. (diff) | |
download | golang-1.20-upstream.tar.xz golang-1.20-upstream.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cmd/go/internal/modfetch/codehost')
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/codehost.go | 398 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/git.go | 904 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/git_test.go | 734 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/shell.go | 141 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/svn.go | 154 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/vcs.go | 634 |
6 files changed, 2965 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/codehost/codehost.go b/src/cmd/go/internal/modfetch/codehost/codehost.go new file mode 100644 index 0000000..855b694 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/codehost.go @@ -0,0 +1,398 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package codehost defines the interface implemented by a code hosting source, +// along with support code for use by implementations. +package codehost + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" + "io/fs" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + + "cmd/go/internal/cfg" + "cmd/go/internal/lockedfile" + "cmd/go/internal/str" + + "golang.org/x/mod/module" + "golang.org/x/mod/semver" +) + +// Downloaded size limits. +const ( + MaxGoMod = 16 << 20 // maximum size of go.mod file + MaxLICENSE = 16 << 20 // maximum size of LICENSE file + MaxZipFile = 500 << 20 // maximum size of downloaded zip file +) + +// A Repo represents a code hosting source. +// Typical implementations include local version control repositories, +// remote version control servers, and code hosting sites. +// +// A Repo must be safe for simultaneous use by multiple goroutines, +// and callers must not modify returned values, which may be cached and shared. +type Repo interface { + // CheckReuse checks whether the old origin information + // remains up to date. If so, whatever cached object it was + // taken from can be reused. + // The subdir gives subdirectory name where the module root is expected to be found, + // "" for the root or "sub/dir" for a subdirectory (no trailing slash). + CheckReuse(old *Origin, subdir string) error + + // List lists all tags with the given prefix. + Tags(prefix string) (*Tags, error) + + // Stat returns information about the revision rev. + // A revision can be any identifier known to the underlying service: + // commit hash, branch, tag, and so on. + Stat(rev string) (*RevInfo, error) + + // Latest returns the latest revision on the default branch, + // whatever that means in the underlying implementation. + Latest() (*RevInfo, error) + + // ReadFile reads the given file in the file tree corresponding to revision rev. + // It should refuse to read more than maxSize bytes. + // + // If the requested file does not exist it should return an error for which + // os.IsNotExist(err) returns true. + ReadFile(rev, file string, maxSize int64) (data []byte, err error) + + // ReadZip downloads a zip file for the subdir subdirectory + // of the given revision to a new file in a given temporary directory. + // It should refuse to read more than maxSize bytes. + // It returns a ReadCloser for a streamed copy of the zip file. + // All files in the zip file are expected to be + // nested in a single top-level directory, whose name is not specified. + ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) + + // RecentTag returns the most recent tag on rev or one of its predecessors + // with the given prefix. allowed may be used to filter out unwanted versions. + RecentTag(rev, prefix string, allowed func(tag string) bool) (tag string, err error) + + // DescendsFrom reports whether rev or any of its ancestors has the given tag. + // + // DescendsFrom must return true for any tag returned by RecentTag for the + // same revision. + DescendsFrom(rev, tag string) (bool, error) +} + +// An Origin describes the provenance of a given repo method result. +// It can be passed to CheckReuse (usually in a different go command invocation) +// to see whether the result remains up-to-date. +type Origin struct { + VCS string `json:",omitempty"` // "git" etc + URL string `json:",omitempty"` // URL of repository + Subdir string `json:",omitempty"` // subdirectory in repo + + // If TagSum is non-empty, then the resolution of this module version + // depends on the set of tags present in the repo, specifically the tags + // of the form TagPrefix + a valid semver version. + // If the matching repo tags and their commit hashes still hash to TagSum, + // the Origin is still valid (at least as far as the tags are concerned). + // The exact checksum is up to the Repo implementation; see (*gitRepo).Tags. + TagPrefix string `json:",omitempty"` + TagSum string `json:",omitempty"` + + // If Ref is non-empty, then the resolution of this module version + // depends on Ref resolving to the revision identified by Hash. + // If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned). + // For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3", + // and the Hash is the Git object hash the ref maps to. + // Other VCS might choose differently, but the idea is that Ref is the name + // with a mutable meaning while Hash is a name with an immutable meaning. + Ref string `json:",omitempty"` + Hash string `json:",omitempty"` + + // If RepoSum is non-empty, then the resolution of this module version + // failed due to the repo being available but the version not being present. + // This depends on the entire state of the repo, which RepoSum summarizes. + // For Git, this is a hash of all the refs and their hashes. + RepoSum string `json:",omitempty"` +} + +// Checkable reports whether the Origin contains anything that can be checked. +// If not, the Origin is purely informational and should fail a CheckReuse call. +func (o *Origin) Checkable() bool { + return o.TagSum != "" || o.Ref != "" || o.Hash != "" || o.RepoSum != "" +} + +// ClearCheckable clears the Origin enough to make Checkable return false. +func (o *Origin) ClearCheckable() { + o.TagSum = "" + o.TagPrefix = "" + o.Ref = "" + o.Hash = "" + o.RepoSum = "" +} + +// A Tags describes the available tags in a code repository. +type Tags struct { + Origin *Origin + List []Tag +} + +// A Tag describes a single tag in a code repository. +type Tag struct { + Name string + Hash string // content hash identifying tag's content, if available +} + +// isOriginTag reports whether tag should be preserved +// in the Tags method's Origin calculation. +// We can safely ignore tags that are not look like pseudo-versions, +// because ../coderepo.go's (*codeRepo).Versions ignores them too. +// We can also ignore non-semver tags, but we have to include semver +// tags with extra suffixes, because the pseudo-version base finder uses them. +func isOriginTag(tag string) bool { + // modfetch.(*codeRepo).Versions uses Canonical == tag, + // but pseudo-version calculation has a weaker condition that + // the canonical is a prefix of the tag. + // Include those too, so that if any new one appears, we'll invalidate the cache entry. + // This will lead to spurious invalidation of version list results, + // but tags of this form being created should be fairly rare + // (and invalidate pseudo-version results anyway). + c := semver.Canonical(tag) + return c != "" && strings.HasPrefix(tag, c) && !module.IsPseudoVersion(tag) +} + +// A RevInfo describes a single revision in a source code repository. +type RevInfo struct { + Origin *Origin + Name string // complete ID in underlying repository + Short string // shortened ID, for use in pseudo-version + Version string // version used in lookup + Time time.Time // commit time + Tags []string // known tags for commit +} + +// UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a +// revision rather than a file. +type UnknownRevisionError struct { + Rev string +} + +func (e *UnknownRevisionError) Error() string { + return "unknown revision " + e.Rev +} +func (UnknownRevisionError) Is(err error) bool { + return err == fs.ErrNotExist +} + +// ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given +// repository or module contains no commits. +var ErrNoCommits error = noCommitsError{} + +type noCommitsError struct{} + +func (noCommitsError) Error() string { + return "no commits" +} +func (noCommitsError) Is(err error) bool { + return err == fs.ErrNotExist +} + +// ErrUnsupported indicates that a requested operation cannot be performed, +// because it is unsupported. This error indicates that there is no alternative +// way to perform the operation. +// +// TODO(#41198): Remove this declaration and use errors.ErrUnsupported instead. +var ErrUnsupported = unsupportedOperationError{} + +type unsupportedOperationError struct{} + +func (unsupportedOperationError) Error() string { + return "unsupported operation" +} + +// AllHex reports whether the revision rev is entirely lower-case hexadecimal digits. +func AllHex(rev string) bool { + for i := 0; i < len(rev); i++ { + c := rev[i] + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' { + continue + } + return false + } + return true +} + +// ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length +// used in pseudo-versions (12 hex digits). +func ShortenSHA1(rev string) string { + if AllHex(rev) && len(rev) == 40 { + return rev[:12] + } + return rev +} + +// WorkDir returns the name of the cached work directory to use for the +// given repository type and name. +func WorkDir(typ, name string) (dir, lockfile string, err error) { + if cfg.GOMODCACHE == "" { + return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set") + } + + // We name the work directory for the SHA256 hash of the type and name. + // We intentionally avoid the actual name both because of possible + // conflicts with valid file system paths and because we want to ensure + // that one checkout is never nested inside another. That nesting has + // led to security problems in the past. + if strings.Contains(typ, ":") { + return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon") + } + key := typ + ":" + name + dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key)))) + + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name) + } + if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil { + return "", "", err + } + + lockfile = dir + ".lock" + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "# lock %s\n", lockfile) + } + + unlock, err := lockedfile.MutexAt(lockfile).Lock() + if err != nil { + return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err) + } + defer unlock() + + data, err := os.ReadFile(dir + ".info") + info, err2 := os.Stat(dir) + if err == nil && err2 == nil && info.IsDir() { + // Info file and directory both already exist: reuse. + have := strings.TrimSuffix(string(data), "\n") + if have != key { + return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key) + } + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "# %s for %s %s\n", dir, typ, name) + } + return dir, lockfile, nil + } + + // Info file or directory missing. Start from scratch. + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", dir, typ, name) + } + os.RemoveAll(dir) + if err := os.MkdirAll(dir, 0777); err != nil { + return "", "", err + } + if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil { + os.RemoveAll(dir) + return "", "", err + } + return dir, lockfile, nil +} + +type RunError struct { + Cmd string + Err error + Stderr []byte + HelpText string +} + +func (e *RunError) Error() string { + text := e.Cmd + ": " + e.Err.Error() + stderr := bytes.TrimRight(e.Stderr, "\n") + if len(stderr) > 0 { + text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t") + } + if len(e.HelpText) > 0 { + text += "\n" + e.HelpText + } + return text +} + +var dirLock sync.Map + +// Run runs the command line in the given directory +// (an empty dir means the current directory). +// It returns the standard output and, for a non-zero exit, +// a *RunError indicating the command, exit status, and standard error. +// Standard error is unavailable for commands that exit successfully. +func Run(dir string, cmdline ...any) ([]byte, error) { + return RunWithStdin(dir, nil, cmdline...) +} + +// bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell. +// See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html. +var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`) + +func RunWithStdin(dir string, stdin io.Reader, cmdline ...any) ([]byte, error) { + if dir != "" { + muIface, ok := dirLock.Load(dir) + if !ok { + muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex)) + } + mu := muIface.(*sync.Mutex) + mu.Lock() + defer mu.Unlock() + } + + cmd := str.StringList(cmdline...) + if os.Getenv("TESTGOVCS") == "panic" { + panic(fmt.Sprintf("use of vcs: %v", cmd)) + } + if cfg.BuildX { + text := new(strings.Builder) + if dir != "" { + text.WriteString("cd ") + text.WriteString(dir) + text.WriteString("; ") + } + for i, arg := range cmd { + if i > 0 { + text.WriteByte(' ') + } + switch { + case strings.ContainsAny(arg, "'"): + // Quote args that could be mistaken for quoted args. + text.WriteByte('"') + text.WriteString(bashQuoter.Replace(arg)) + text.WriteByte('"') + case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"): + // Quote args that contain special characters, glob patterns, or spaces. + text.WriteByte('\'') + text.WriteString(arg) + text.WriteByte('\'') + default: + text.WriteString(arg) + } + } + fmt.Fprintf(os.Stderr, "%s\n", text) + start := time.Now() + defer func() { + fmt.Fprintf(os.Stderr, "%.3fs # %s\n", time.Since(start).Seconds(), text) + }() + } + // TODO: Impose limits on command output size. + // TODO: Set environment to get English error messages. + var stderr bytes.Buffer + var stdout bytes.Buffer + c := exec.Command(cmd[0], cmd[1:]...) + c.Dir = dir + c.Stdin = stdin + c.Stderr = &stderr + c.Stdout = &stdout + err := c.Run() + if err != nil { + err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err} + } + return stdout.Bytes(), err +} diff --git a/src/cmd/go/internal/modfetch/codehost/git.go b/src/cmd/go/internal/modfetch/codehost/git.go new file mode 100644 index 0000000..8bfbe7b --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/git.go @@ -0,0 +1,904 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "bytes" + "crypto/sha256" + "encoding/base64" + "errors" + "fmt" + "io" + "io/fs" + "net/url" + "os" + "os/exec" + "path/filepath" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "time" + + "cmd/go/internal/lockedfile" + "cmd/go/internal/par" + "cmd/go/internal/web" + + "golang.org/x/mod/semver" +) + +// LocalGitRepo is like Repo but accepts both Git remote references +// and paths to repositories on the local file system. +func LocalGitRepo(remote string) (Repo, error) { + return newGitRepoCached(remote, true) +} + +// A notExistError wraps another error to retain its original text +// but makes it opaquely equivalent to fs.ErrNotExist. +type notExistError struct { + err error +} + +func (e notExistError) Error() string { return e.err.Error() } +func (notExistError) Is(err error) bool { return err == fs.ErrNotExist } + +const gitWorkDirType = "git3" + +var gitRepoCache par.Cache + +func newGitRepoCached(remote string, localOK bool) (Repo, error) { + type key struct { + remote string + localOK bool + } + type cached struct { + repo Repo + err error + } + + c := gitRepoCache.Do(key{remote, localOK}, func() any { + repo, err := newGitRepo(remote, localOK) + return cached{repo, err} + }).(cached) + + return c.repo, c.err +} + +func newGitRepo(remote string, localOK bool) (Repo, error) { + r := &gitRepo{remote: remote} + if strings.Contains(remote, "://") { + // This is a remote path. + var err error + r.dir, r.mu.Path, err = WorkDir(gitWorkDirType, r.remote) + if err != nil { + return nil, err + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil { + if _, err := Run(r.dir, "git", "init", "--bare"); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + // We could just say git fetch https://whatever later, + // but this lets us say git fetch origin instead, which + // is a little nicer. More importantly, using a named remote + // avoids a problem with Git LFS. See golang.org/issue/25605. + if _, err := Run(r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + if runtime.GOOS == "windows" { + // Git for Windows by default does not support paths longer than + // MAX_PATH (260 characters) because that may interfere with navigation + // in some Windows programs. However, cmd/go should be able to handle + // long paths just fine, and we expect people to use 'go clean' to + // manipulate the module cache, so it should be harmless to set here, + // and in some cases may be necessary in order to download modules with + // long branch names. + // + // See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path. + if _, err := Run(r.dir, "git", "config", "core.longpaths", "true"); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + } + } + r.remoteURL = r.remote + r.remote = "origin" + } else { + // Local path. + // Disallow colon (not in ://) because sometimes + // that's rcp-style host:path syntax and sometimes it's not (c:\work). + // The go command has always insisted on URL syntax for ssh. + if strings.Contains(remote, ":") { + return nil, fmt.Errorf("git remote cannot use host:path syntax") + } + if !localOK { + return nil, fmt.Errorf("git remote must not be local directory") + } + r.local = true + info, err := os.Stat(remote) + if err != nil { + return nil, err + } + if !info.IsDir() { + return nil, fmt.Errorf("%s exists but is not a directory", remote) + } + r.dir = remote + r.mu.Path = r.dir + ".lock" + } + return r, nil +} + +type gitRepo struct { + remote, remoteURL string + local bool + dir string + + mu lockedfile.Mutex // protects fetchLevel and git repo state + + fetchLevel int + + statCache par.Cache + + refsOnce sync.Once + // refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master") + // to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6") + refs map[string]string + refsErr error + + localTagsOnce sync.Once + localTags map[string]bool +} + +const ( + // How much have we fetched into the git repo (in this process)? + fetchNone = iota // nothing yet + fetchSome // shallow fetches of individual hashes + fetchAll // "fetch -t origin": get all remote branches and tags +) + +// loadLocalTags loads tag references from the local git cache +// into the map r.localTags. +// Should only be called as r.localTagsOnce.Do(r.loadLocalTags). +func (r *gitRepo) loadLocalTags() { + // The git protocol sends all known refs and ls-remote filters them on the client side, + // so we might as well record both heads and tags in one shot. + // Most of the time we only care about tags but sometimes we care about heads too. + out, err := Run(r.dir, "git", "tag", "-l") + if err != nil { + return + } + + r.localTags = make(map[string]bool) + for _, line := range strings.Split(string(out), "\n") { + if line != "" { + r.localTags[line] = true + } + } +} + +func (r *gitRepo) CheckReuse(old *Origin, subdir string) error { + if old == nil { + return fmt.Errorf("missing origin") + } + if old.VCS != "git" || old.URL != r.remoteURL { + return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL) + } + if old.Subdir != subdir { + return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir) + } + + // Note: Can have Hash with no Ref and no TagSum and no RepoSum, + // meaning the Hash simply has to remain in the repo. + // In that case we assume it does in the absence of any real way to check. + // But if neither Hash nor TagSum is present, we have nothing to check, + // which we take to mean we didn't record enough information to be sure. + if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" { + return fmt.Errorf("non-specific origin") + } + + r.loadRefs() + if r.refsErr != nil { + return r.refsErr + } + + if old.Ref != "" { + hash, ok := r.refs[old.Ref] + if !ok { + return fmt.Errorf("ref %q deleted", old.Ref) + } + if hash != old.Hash { + return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash) + } + } + if old.TagSum != "" { + tags, err := r.Tags(old.TagPrefix) + if err != nil { + return err + } + if tags.Origin.TagSum != old.TagSum { + return fmt.Errorf("tags changed") + } + } + if old.RepoSum != "" { + if r.repoSum(r.refs) != old.RepoSum { + return fmt.Errorf("refs changed") + } + } + return nil +} + +// loadRefs loads heads and tags references from the remote into the map r.refs. +// The result is cached in memory. +func (r *gitRepo) loadRefs() (map[string]string, error) { + r.refsOnce.Do(func() { + // The git protocol sends all known refs and ls-remote filters them on the client side, + // so we might as well record both heads and tags in one shot. + // Most of the time we only care about tags but sometimes we care about heads too. + out, gitErr := Run(r.dir, "git", "ls-remote", "-q", r.remote) + if gitErr != nil { + if rerr, ok := gitErr.(*RunError); ok { + if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) { + rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information." + } + } + + // If the remote URL doesn't exist at all, ideally we should treat the whole + // repository as nonexistent by wrapping the error in a notExistError. + // For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL + // ourselves and see what code it serves. + if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") { + if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) { + gitErr = notExistError{gitErr} + } + } + + r.refsErr = gitErr + return + } + + refs := make(map[string]string) + for _, line := range strings.Split(string(out), "\n") { + f := strings.Fields(line) + if len(f) != 2 { + continue + } + if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") { + refs[f[1]] = f[0] + } + } + for ref, hash := range refs { + if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag + refs[k] = hash + delete(refs, ref) + } + } + r.refs = refs + }) + return r.refs, r.refsErr +} + +func (r *gitRepo) Tags(prefix string) (*Tags, error) { + refs, err := r.loadRefs() + if err != nil { + return nil, err + } + + tags := &Tags{ + Origin: &Origin{ + VCS: "git", + URL: r.remoteURL, + TagPrefix: prefix, + }, + List: []Tag{}, + } + for ref, hash := range refs { + if !strings.HasPrefix(ref, "refs/tags/") { + continue + } + tag := ref[len("refs/tags/"):] + if !strings.HasPrefix(tag, prefix) { + continue + } + tags.List = append(tags.List, Tag{tag, hash}) + } + sort.Slice(tags.List, func(i, j int) bool { + return tags.List[i].Name < tags.List[j].Name + }) + + dir := prefix[:strings.LastIndex(prefix, "/")+1] + h := sha256.New() + for _, tag := range tags.List { + if isOriginTag(strings.TrimPrefix(tag.Name, dir)) { + fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash) + } + } + tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)) + return tags, nil +} + +// repoSum returns a checksum of the entire repo state, +// which can be checked (as Origin.RepoSum) to cache +// the absence of a specific module version. +// The caller must supply refs, the result of a successful r.loadRefs. +func (r *gitRepo) repoSum(refs map[string]string) string { + var list []string + for ref := range refs { + list = append(list, ref) + } + sort.Strings(list) + h := sha256.New() + for _, ref := range list { + fmt.Fprintf(h, "%q %s\n", ref, refs[ref]) + } + return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)) +} + +// unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs, +// for use when returning an UnknownRevisionError. +func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo { + return &RevInfo{ + Origin: &Origin{ + VCS: "git", + URL: r.remoteURL, + RepoSum: r.repoSum(refs), + }, + } +} + +func (r *gitRepo) Latest() (*RevInfo, error) { + refs, err := r.loadRefs() + if err != nil { + return nil, err + } + if refs["HEAD"] == "" { + return nil, ErrNoCommits + } + statInfo, err := r.Stat(refs["HEAD"]) + if err != nil { + return nil, err + } + + // Stat may return cached info, so make a copy to modify here. + info := new(RevInfo) + *info = *statInfo + info.Origin = new(Origin) + if statInfo.Origin != nil { + *info.Origin = *statInfo.Origin + } + info.Origin.Ref = "HEAD" + info.Origin.Hash = refs["HEAD"] + + return info, nil +} + +// findRef finds some ref name for the given hash, +// for use when the server requires giving a ref instead of a hash. +// There may be multiple ref names for a given hash, +// in which case this returns some name - it doesn't matter which. +func (r *gitRepo) findRef(hash string) (ref string, ok bool) { + refs, err := r.loadRefs() + if err != nil { + return "", false + } + for ref, h := range refs { + if h == hash { + return ref, true + } + } + return "", false +} + +// minHashDigits is the minimum number of digits to require +// before accepting a hex digit sequence as potentially identifying +// a specific commit in a git repo. (Of course, users can always +// specify more digits, and many will paste in all 40 digits, +// but many of git's commands default to printing short hashes +// as 7 digits.) +const minHashDigits = 7 + +// stat stats the given rev in the local repository, +// or else it fetches more info from the remote repository and tries again. +func (r *gitRepo) stat(rev string) (info *RevInfo, err error) { + if r.local { + return r.statLocal(rev, rev) + } + + // Fast path: maybe rev is a hash we already have locally. + didStatLocal := false + if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { + if info, err := r.statLocal(rev, rev); err == nil { + return info, nil + } + didStatLocal = true + } + + // Maybe rev is a tag we already have locally. + // (Note that we're excluding branches, which can be stale.) + r.localTagsOnce.Do(r.loadLocalTags) + if r.localTags[rev] { + return r.statLocal(rev, "refs/tags/"+rev) + } + + // Maybe rev is the name of a tag or branch on the remote server. + // Or maybe it's the prefix of a hash of a named ref. + // Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash. + refs, err := r.loadRefs() + if err != nil { + return nil, err + } + // loadRefs may return an error if git fails, for example segfaults, or + // could not load a private repo, but defer checking to the else block + // below, in case we already have the rev in question in the local cache. + var ref, hash string + if refs["refs/tags/"+rev] != "" { + ref = "refs/tags/" + rev + hash = refs[ref] + // Keep rev as is: tags are assumed not to change meaning. + } else if refs["refs/heads/"+rev] != "" { + ref = "refs/heads/" + rev + hash = refs[ref] + rev = hash // Replace rev, because meaning of refs/heads/foo can change. + } else if rev == "HEAD" && refs["HEAD"] != "" { + ref = "HEAD" + hash = refs[ref] + rev = hash // Replace rev, because meaning of HEAD can change. + } else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { + // At the least, we have a hash prefix we can look up after the fetch below. + // Maybe we can map it to a full hash using the known refs. + prefix := rev + // Check whether rev is prefix of known ref hash. + for k, h := range refs { + if strings.HasPrefix(h, prefix) { + if hash != "" && hash != h { + // Hash is an ambiguous hash prefix. + // More information will not change that. + return nil, fmt.Errorf("ambiguous revision %s", rev) + } + if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash. + ref = k + } + rev = h + hash = h + } + } + if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash. + hash = rev + } + } else { + return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev} + } + + defer func() { + if info != nil { + info.Origin.Hash = info.Name + // There's a ref = hash below; don't write that hash down as Origin.Ref. + if ref != info.Origin.Hash { + info.Origin.Ref = ref + } + } + }() + + // Protect r.fetchLevel and the "fetch more and more" sequence. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + // Perhaps r.localTags did not have the ref when we loaded local tags, + // but we've since done fetches that pulled down the hash we need + // (or already have the hash we need, just without its tag). + // Either way, try a local stat before falling back to network I/O. + if !didStatLocal { + if info, err := r.statLocal(rev, hash); err == nil { + if after, found := strings.CutPrefix(ref, "refs/tags/"); found { + // Make sure tag exists, so it will be in localTags next time the go command is run. + Run(r.dir, "git", "tag", after, hash) + } + return info, nil + } + } + + // If we know a specific commit we need and its ref, fetch it. + // We do NOT fetch arbitrary hashes (when we don't know the ref) + // because we want to avoid ever importing a commit that isn't + // reachable from refs/tags/* or refs/heads/* or HEAD. + // Both Gerrit and GitHub expose every CL/PR as a named ref, + // and we don't want those commits masquerading as being real + // pseudo-versions in the main repo. + if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local { + r.fetchLevel = fetchSome + var refspec string + if ref != "" && ref != "HEAD" { + // If we do know the ref name, save the mapping locally + // so that (if it is a tag) it can show up in localTags + // on a future call. Also, some servers refuse to allow + // full hashes in ref specs, so prefer a ref name if known. + refspec = ref + ":" + ref + } else { + // Fetch the hash but give it a local name (refs/dummy), + // because that triggers the fetch behavior of creating any + // other known remote tags for the hash. We never use + // refs/dummy (it's not refs/tags/dummy) and it will be + // overwritten in the next command, and that's fine. + ref = hash + refspec = hash + ":refs/dummy" + } + _, err := Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec) + if err == nil { + return r.statLocal(rev, ref) + } + // Don't try to be smart about parsing the error. + // It's too complex and varies too much by git version. + // No matter what went wrong, fall back to a complete fetch. + } + + // Last resort. + // Fetch all heads and tags and hope the hash we want is in the history. + if err := r.fetchRefsLocked(); err != nil { + return nil, err + } + + return r.statLocal(rev, rev) +} + +// fetchRefsLocked fetches all heads and tags from the origin, along with the +// ancestors of those commits. +// +// We only fetch heads and tags, not arbitrary other commits: we don't want to +// pull in off-branch commits (such as rejected GitHub pull requests) that the +// server may be willing to provide. (See the comments within the stat method +// for more detail.) +// +// fetchRefsLocked requires that r.mu remain locked for the duration of the call. +func (r *gitRepo) fetchRefsLocked() error { + if r.fetchLevel < fetchAll { + // NOTE: To work around a bug affecting Git clients up to at least 2.23.0 + // (2019-08-16), we must first expand the set of local refs, and only then + // unshallow the repository as a separate fetch operation. (See + // golang.org/issue/34266 and + // https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.) + + if _, err := Run(r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil { + return err + } + + if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil { + if _, err := Run(r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil { + return err + } + } + + r.fetchLevel = fetchAll + } + return nil +} + +// statLocal returns a new RevInfo describing rev in the local git repository. +// It uses version as info.Version. +func (r *gitRepo) statLocal(version, rev string) (*RevInfo, error) { + out, err := Run(r.dir, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--") + if err != nil { + // Return info with Origin.RepoSum if possible to allow caching of negative lookup. + var info *RevInfo + if refs, err := r.loadRefs(); err == nil { + info = r.unknownRevisionInfo(refs) + } + return info, &UnknownRevisionError{Rev: rev} + } + f := strings.Fields(string(out)) + if len(f) < 2 { + return nil, fmt.Errorf("unexpected response from git log: %q", out) + } + hash := f[0] + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + t, err := strconv.ParseInt(f[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid time from git log: %q", out) + } + + info := &RevInfo{ + Origin: &Origin{ + VCS: "git", + URL: r.remoteURL, + Hash: hash, + }, + Name: hash, + Short: ShortenSHA1(hash), + Time: time.Unix(t, 0).UTC(), + Version: hash, + } + if !strings.HasPrefix(hash, rev) { + info.Origin.Ref = rev + } + + // Add tags. Output looks like: + // ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD + for i := 2; i < len(f); i++ { + if f[i] == "tag:" { + i++ + if i < len(f) { + info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ",")) + } + } + } + sort.Strings(info.Tags) + + // Used hash as info.Version above. + // Use caller's suggested version if it appears in the tag list + // (filters out branch names, HEAD). + for _, tag := range info.Tags { + if version == tag { + info.Version = version + } + } + + return info, nil +} + +func (r *gitRepo) Stat(rev string) (*RevInfo, error) { + if rev == "latest" { + return r.Latest() + } + type cached struct { + info *RevInfo + err error + } + c := r.statCache.Do(rev, func() any { + info, err := r.stat(rev) + return cached{info, err} + }).(cached) + return c.info, c.err +} + +func (r *gitRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) { + // TODO: Could use git cat-file --batch. + info, err := r.Stat(rev) // download rev into local git repo + if err != nil { + return nil, err + } + out, err := Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file) + if err != nil { + return nil, fs.ErrNotExist + } + return out, nil +} + +func (r *gitRepo) RecentTag(rev, prefix string, allowed func(tag string) bool) (tag string, err error) { + info, err := r.Stat(rev) + if err != nil { + return "", err + } + rev = info.Name // expand hash prefixes + + // describe sets tag and err using 'git for-each-ref' and reports whether the + // result is definitive. + describe := func() (definitive bool) { + var out []byte + out, err = Run(r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev) + if err != nil { + return true + } + + // prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix + var highest string + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + // git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here + // instead gives support for git v2.7.0. + if !strings.HasPrefix(line, "refs/tags/") { + continue + } + line = line[len("refs/tags/"):] + + if !strings.HasPrefix(line, prefix) { + continue + } + if !allowed(line) { + continue + } + + semtag := line[len(prefix):] + if semver.Compare(semtag, highest) > 0 { + highest = semtag + } + } + + if highest != "" { + tag = prefix + highest + } + + return tag != "" && !AllHex(tag) + } + + if describe() { + return tag, err + } + + // Git didn't find a version tag preceding the requested rev. + // See whether any plausible tag exists. + tags, err := r.Tags(prefix + "v") + if err != nil { + return "", err + } + if len(tags.List) == 0 { + return "", nil + } + + // There are plausible tags, but we don't know if rev is a descendent of any of them. + // Fetch the history to find out. + + unlock, err := r.mu.Lock() + if err != nil { + return "", err + } + defer unlock() + + if err := r.fetchRefsLocked(); err != nil { + return "", err + } + + // If we've reached this point, we have all of the commits that are reachable + // from all heads and tags. + // + // The only refs we should be missing are those that are no longer reachable + // (or never were reachable) from any branch or tag, including the master + // branch, and we don't want to resolve them anyway (they're probably + // unreachable for a reason). + // + // Try one last time in case some other goroutine fetched rev while we were + // waiting on the lock. + describe() + return tag, err +} + +func (r *gitRepo) DescendsFrom(rev, tag string) (bool, error) { + // The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so + // this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go + // already doesn't work with Git 1.7.1, so at least it's not a regression. + // + // git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or + // 1 if not. + _, err := Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) + + // Git reports "is an ancestor" with exit code 0 and "not an ancestor" with + // exit code 1. + // Unfortunately, if we've already fetched rev with a shallow history, git + // merge-base has been observed to report a false-negative, so don't stop yet + // even if the exit code is 1! + if err == nil { + return true, nil + } + + // See whether the tag and rev even exist. + tags, err := r.Tags(tag) + if err != nil { + return false, err + } + if len(tags.List) == 0 { + return false, nil + } + + // NOTE: r.stat is very careful not to fetch commits that we shouldn't know + // about, like rejected GitHub pull requests, so don't try to short-circuit + // that here. + if _, err = r.stat(rev); err != nil { + return false, err + } + + // Now fetch history so that git can search for a path. + unlock, err := r.mu.Lock() + if err != nil { + return false, err + } + defer unlock() + + if r.fetchLevel < fetchAll { + // Fetch the complete history for all refs and heads. It would be more + // efficient to only fetch the history from rev to tag, but that's much more + // complicated, and any kind of shallow fetch is fairly likely to trigger + // bugs in JGit servers and/or the go command anyway. + if err := r.fetchRefsLocked(); err != nil { + return false, err + } + } + + _, err = Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) + if err == nil { + return true, nil + } + if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 { + return false, nil + } + return false, err +} + +func (r *gitRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) { + // TODO: Use maxSize or drop it. + args := []string{} + if subdir != "" { + args = append(args, "--", subdir) + } + info, err := r.Stat(rev) // download rev into local git repo + if err != nil { + return nil, err + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if err := ensureGitAttributes(r.dir); err != nil { + return nil, err + } + + // Incredibly, git produces different archives depending on whether + // it is running on a Windows system or not, in an attempt to normalize + // text file line endings. Setting -c core.autocrlf=input means only + // translate files on the way into the repo, not on the way out (archive). + // The -c core.eol=lf should be unnecessary but set it anyway. + archive, err := Run(r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args) + if err != nil { + if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) { + return nil, fs.ErrNotExist + } + return nil, err + } + + return io.NopCloser(bytes.NewReader(archive)), nil +} + +// ensureGitAttributes makes sure export-subst and export-ignore features are +// disabled for this repo. This is intended to be run prior to running git +// archive so that zip files are generated that produce consistent ziphashes +// for a given revision, independent of variables such as git version and the +// size of the repo. +// +// See: https://github.com/golang/go/issues/27153 +func ensureGitAttributes(repoDir string) (err error) { + const attr = "\n* -export-subst -export-ignore\n" + + d := repoDir + "/info" + p := d + "/attributes" + + if err := os.MkdirAll(d, 0755); err != nil { + return err + } + + f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) + if err != nil { + return err + } + defer func() { + closeErr := f.Close() + if closeErr != nil { + err = closeErr + } + }() + + b, err := io.ReadAll(f) + if err != nil { + return err + } + if !bytes.HasSuffix(b, []byte(attr)) { + _, err := f.WriteString(attr) + return err + } + + return nil +} diff --git a/src/cmd/go/internal/modfetch/codehost/git_test.go b/src/cmd/go/internal/modfetch/codehost/git_test.go new file mode 100644 index 0000000..ec95097 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/git_test.go @@ -0,0 +1,734 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "archive/zip" + "bytes" + "cmd/go/internal/cfg" + "cmd/go/internal/vcweb/vcstest" + "flag" + "internal/testenv" + "io" + "io/fs" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "reflect" + "strings" + "testing" + "time" +) + +func TestMain(m *testing.M) { + // needed for initializing the test environment variables as testing.Short + // and HasExternalNetwork + flag.Parse() + if err := testMain(m); err != nil { + log.Fatal(err) + } +} + +var gitrepo1, hgrepo1 string + +var altRepos = func() []string { + return []string{ + "localGitRepo", + hgrepo1, + } +} + +// TODO: Convert gitrepo1 to svn, bzr, fossil and add tests. +// For now, at least the hgrepo1 tests check the general vcs.go logic. + +// localGitRepo is like gitrepo1 but allows archive access. +var localGitRepo, localGitURL string + +func testMain(m *testing.M) (err error) { + cfg.BuildX = true + + srv, err := vcstest.NewServer() + if err != nil { + return err + } + defer func() { + if closeErr := srv.Close(); err == nil { + err = closeErr + } + }() + + gitrepo1 = srv.HTTP.URL + "/git/gitrepo1" + hgrepo1 = srv.HTTP.URL + "/hg/hgrepo1" + + dir, err := os.MkdirTemp("", "gitrepo-test-") + if err != nil { + return err + } + defer func() { + if rmErr := os.RemoveAll(dir); err == nil { + err = rmErr + } + }() + + // Redirect the module cache to a fresh directory to avoid crosstalk, and make + // it read/write so that the test can still clean it up easily when done. + cfg.GOMODCACHE = filepath.Join(dir, "modcache") + cfg.ModCacheRW = true + + if !testing.Short() && testenv.HasExec() { + if _, err := exec.LookPath("git"); err == nil { + // Clone gitrepo1 into a local directory. + // If we use a file:// URL to access the local directory, + // then git starts up all the usual protocol machinery, + // which will let us test remote git archive invocations. + localGitRepo = filepath.Join(dir, "gitrepo2") + if _, err := Run("", "git", "clone", "--mirror", gitrepo1, localGitRepo); err != nil { + return err + } + if _, err := Run(localGitRepo, "git", "config", "daemon.uploadarch", "true"); err != nil { + return err + } + + // Convert absolute path to file URL. LocalGitRepo will not accept + // Windows absolute paths because they look like a host:path remote. + // TODO(golang.org/issue/32456): use url.FromFilePath when implemented. + if strings.HasPrefix(localGitRepo, "/") { + localGitURL = "file://" + localGitRepo + } else { + localGitURL = "file:///" + filepath.ToSlash(localGitRepo) + } + } + } + + m.Run() + return nil +} + +func testRepo(t *testing.T, remote string) (Repo, error) { + if remote == "localGitRepo" { + testenv.MustHaveExecPath(t, "git") + return LocalGitRepo(localGitURL) + } + vcsName := "git" + for _, k := range []string{"hg"} { + if strings.Contains(remote, "/"+k+"/") { + vcsName = k + } + } + testenv.MustHaveExecPath(t, vcsName) + return NewRepo(vcsName, remote) +} + +func TestTags(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + t.Parallel() + + type tagsTest struct { + repo string + prefix string + tags []Tag + } + + runTest := func(tt tagsTest) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + + r, err := testRepo(t, tt.repo) + if err != nil { + t.Fatal(err) + } + tags, err := r.Tags(tt.prefix) + if err != nil { + t.Fatal(err) + } + if tags == nil || !reflect.DeepEqual(tags.List, tt.tags) { + t.Errorf("Tags(%q): incorrect tags\nhave %v\nwant %v", tt.prefix, tags, tt.tags) + } + } + } + + for _, tt := range []tagsTest{ + {gitrepo1, "xxx", []Tag{}}, + {gitrepo1, "", []Tag{ + {"v1.2.3", "ede458df7cd0fdca520df19a33158086a8a68e81"}, + {"v1.2.4-annotated", "ede458df7cd0fdca520df19a33158086a8a68e81"}, + {"v2.0.1", "76a00fb249b7f93091bc2c89a789dab1fc1bc26f"}, + {"v2.0.2", "9d02800338b8a55be062c838d1f02e0c5780b9eb"}, + {"v2.3", "76a00fb249b7f93091bc2c89a789dab1fc1bc26f"}, + }}, + {gitrepo1, "v", []Tag{ + {"v1.2.3", "ede458df7cd0fdca520df19a33158086a8a68e81"}, + {"v1.2.4-annotated", "ede458df7cd0fdca520df19a33158086a8a68e81"}, + {"v2.0.1", "76a00fb249b7f93091bc2c89a789dab1fc1bc26f"}, + {"v2.0.2", "9d02800338b8a55be062c838d1f02e0c5780b9eb"}, + {"v2.3", "76a00fb249b7f93091bc2c89a789dab1fc1bc26f"}, + }}, + {gitrepo1, "v1", []Tag{ + {"v1.2.3", "ede458df7cd0fdca520df19a33158086a8a68e81"}, + {"v1.2.4-annotated", "ede458df7cd0fdca520df19a33158086a8a68e81"}, + }}, + {gitrepo1, "2", []Tag{}}, + } { + t.Run(path.Base(tt.repo)+"/"+tt.prefix, runTest(tt)) + if tt.repo == gitrepo1 { + // Clear hashes. + clearTags := []Tag{} + for _, tag := range tt.tags { + clearTags = append(clearTags, Tag{tag.Name, ""}) + } + tags := tt.tags + for _, tt.repo = range altRepos() { + if strings.Contains(tt.repo, "Git") { + tt.tags = tags + } else { + tt.tags = clearTags + } + t.Run(path.Base(tt.repo)+"/"+tt.prefix, runTest(tt)) + } + } + } +} + +func TestLatest(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + t.Parallel() + + type latestTest struct { + repo string + info *RevInfo + } + runTest := func(tt latestTest) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + + r, err := testRepo(t, tt.repo) + if err != nil { + t.Fatal(err) + } + info, err := r.Latest() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(info, tt.info) { + t.Errorf("Latest: incorrect info\nhave %+v (origin %+v)\nwant %+v (origin %+v)", info, info.Origin, tt.info, tt.info.Origin) + } + } + } + + for _, tt := range []latestTest{ + { + gitrepo1, + &RevInfo{ + Origin: &Origin{ + VCS: "git", + URL: gitrepo1, + Ref: "HEAD", + Hash: "ede458df7cd0fdca520df19a33158086a8a68e81", + }, + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "ede458df7cd0fdca520df19a33158086a8a68e81", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + hgrepo1, + &RevInfo{ + Origin: &Origin{ + VCS: "hg", + URL: hgrepo1, + Hash: "18518c07eb8ed5c80221e997e518cccaa8c0c287", + }, + Name: "18518c07eb8ed5c80221e997e518cccaa8c0c287", + Short: "18518c07eb8e", + Version: "18518c07eb8ed5c80221e997e518cccaa8c0c287", + Time: time.Date(2018, 6, 27, 16, 16, 30, 0, time.UTC), + }, + }, + } { + t.Run(path.Base(tt.repo), runTest(tt)) + if tt.repo == gitrepo1 { + tt.repo = "localGitRepo" + info := *tt.info + tt.info = &info + o := *info.Origin + info.Origin = &o + o.URL = localGitURL + t.Run(path.Base(tt.repo), runTest(tt)) + } + } +} + +func TestReadFile(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + t.Parallel() + + type readFileTest struct { + repo string + rev string + file string + err string + data string + } + runTest := func(tt readFileTest) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + + r, err := testRepo(t, tt.repo) + if err != nil { + t.Fatal(err) + } + data, err := r.ReadFile(tt.rev, tt.file, 100) + if err != nil { + if tt.err == "" { + t.Fatalf("ReadFile: unexpected error %v", err) + } + if !strings.Contains(err.Error(), tt.err) { + t.Fatalf("ReadFile: wrong error %q, want %q", err, tt.err) + } + if len(data) != 0 { + t.Errorf("ReadFile: non-empty data %q with error %v", data, err) + } + return + } + if tt.err != "" { + t.Fatalf("ReadFile: no error, wanted %v", tt.err) + } + if string(data) != tt.data { + t.Errorf("ReadFile: incorrect data\nhave %q\nwant %q", data, tt.data) + } + } + } + + for _, tt := range []readFileTest{ + { + repo: gitrepo1, + rev: "latest", + file: "README", + data: "", + }, + { + repo: gitrepo1, + rev: "v2", + file: "another.txt", + data: "another\n", + }, + { + repo: gitrepo1, + rev: "v2.3.4", + file: "another.txt", + err: fs.ErrNotExist.Error(), + }, + } { + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.file, runTest(tt)) + if tt.repo == gitrepo1 { + for _, tt.repo = range altRepos() { + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.file, runTest(tt)) + } + } + } +} + +type zipFile struct { + name string + size int64 +} + +func TestReadZip(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + t.Parallel() + + type readZipTest struct { + repo string + rev string + subdir string + err string + files map[string]uint64 + } + runTest := func(tt readZipTest) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + + r, err := testRepo(t, tt.repo) + if err != nil { + t.Fatal(err) + } + rc, err := r.ReadZip(tt.rev, tt.subdir, 100000) + if err != nil { + if tt.err == "" { + t.Fatalf("ReadZip: unexpected error %v", err) + } + if !strings.Contains(err.Error(), tt.err) { + t.Fatalf("ReadZip: wrong error %q, want %q", err, tt.err) + } + if rc != nil { + t.Errorf("ReadZip: non-nil io.ReadCloser with error %v", err) + } + return + } + defer rc.Close() + if tt.err != "" { + t.Fatalf("ReadZip: no error, wanted %v", tt.err) + } + zipdata, err := io.ReadAll(rc) + if err != nil { + t.Fatal(err) + } + z, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) + if err != nil { + t.Fatalf("ReadZip: cannot read zip file: %v", err) + } + have := make(map[string]bool) + for _, f := range z.File { + size, ok := tt.files[f.Name] + if !ok { + t.Errorf("ReadZip: unexpected file %s", f.Name) + continue + } + have[f.Name] = true + if size != ^uint64(0) && f.UncompressedSize64 != size { + t.Errorf("ReadZip: file %s has unexpected size %d != %d", f.Name, f.UncompressedSize64, size) + } + } + for name := range tt.files { + if !have[name] { + t.Errorf("ReadZip: missing file %s", name) + } + } + } + } + + for _, tt := range []readZipTest{ + { + repo: gitrepo1, + rev: "v2.3.4", + subdir: "", + files: map[string]uint64{ + "prefix/": 0, + "prefix/README": 0, + "prefix/v2": 3, + }, + }, + { + repo: hgrepo1, + rev: "v2.3.4", + subdir: "", + files: map[string]uint64{ + "prefix/.hg_archival.txt": ^uint64(0), + "prefix/README": 0, + "prefix/v2": 3, + }, + }, + + { + repo: gitrepo1, + rev: "v2", + subdir: "", + files: map[string]uint64{ + "prefix/": 0, + "prefix/README": 0, + "prefix/v2": 3, + "prefix/another.txt": 8, + "prefix/foo.txt": 13, + }, + }, + { + repo: hgrepo1, + rev: "v2", + subdir: "", + files: map[string]uint64{ + "prefix/.hg_archival.txt": ^uint64(0), + "prefix/README": 0, + "prefix/v2": 3, + "prefix/another.txt": 8, + "prefix/foo.txt": 13, + }, + }, + + { + repo: gitrepo1, + rev: "v3", + subdir: "", + files: map[string]uint64{ + "prefix/": 0, + "prefix/v3/": 0, + "prefix/v3/sub/": 0, + "prefix/v3/sub/dir/": 0, + "prefix/v3/sub/dir/file.txt": 16, + "prefix/README": 0, + }, + }, + { + repo: hgrepo1, + rev: "v3", + subdir: "", + files: map[string]uint64{ + "prefix/.hg_archival.txt": ^uint64(0), + "prefix/.hgtags": 405, + "prefix/v3/sub/dir/file.txt": 16, + "prefix/README": 0, + }, + }, + + { + repo: gitrepo1, + rev: "v3", + subdir: "v3/sub/dir", + files: map[string]uint64{ + "prefix/": 0, + "prefix/v3/": 0, + "prefix/v3/sub/": 0, + "prefix/v3/sub/dir/": 0, + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + { + repo: hgrepo1, + rev: "v3", + subdir: "v3/sub/dir", + files: map[string]uint64{ + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + + { + repo: gitrepo1, + rev: "v3", + subdir: "v3/sub", + files: map[string]uint64{ + "prefix/": 0, + "prefix/v3/": 0, + "prefix/v3/sub/": 0, + "prefix/v3/sub/dir/": 0, + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + { + repo: hgrepo1, + rev: "v3", + subdir: "v3/sub", + files: map[string]uint64{ + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + + { + repo: gitrepo1, + rev: "aaaaaaaaab", + subdir: "", + err: "unknown revision", + }, + { + repo: hgrepo1, + rev: "aaaaaaaaab", + subdir: "", + err: "unknown revision", + }, + + { + repo: "https://github.com/rsc/vgotest1", + rev: "submod/v1.0.4", + subdir: "submod", + files: map[string]uint64{ + "prefix/": 0, + "prefix/submod/": 0, + "prefix/submod/go.mod": 53, + "prefix/submod/pkg/": 0, + "prefix/submod/pkg/p.go": 31, + }, + }, + } { + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.subdir, runTest(tt)) + if tt.repo == gitrepo1 { + tt.repo = "localGitRepo" + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.subdir, runTest(tt)) + } + } +} + +var hgmap = map[string]string{ + "HEAD": "41964ddce1180313bdc01d0a39a2813344d6261d", // not tip due to bad hgrepo1 conversion + "9d02800338b8a55be062c838d1f02e0c5780b9eb": "8f49ee7a6ddcdec6f0112d9dca48d4a2e4c3c09e", + "76a00fb249b7f93091bc2c89a789dab1fc1bc26f": "88fde824ec8b41a76baa16b7e84212cee9f3edd0", + "ede458df7cd0fdca520df19a33158086a8a68e81": "41964ddce1180313bdc01d0a39a2813344d6261d", + "97f6aa59c81c623494825b43d39e445566e429a4": "c0cbbfb24c7c3c50c35c7b88e7db777da4ff625d", +} + +func TestStat(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + t.Parallel() + + type statTest struct { + repo string + rev string + err string + info *RevInfo + } + runTest := func(tt statTest) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + + r, err := testRepo(t, tt.repo) + if err != nil { + t.Fatal(err) + } + info, err := r.Stat(tt.rev) + if err != nil { + if tt.err == "" { + t.Fatalf("Stat: unexpected error %v", err) + } + if !strings.Contains(err.Error(), tt.err) { + t.Fatalf("Stat: wrong error %q, want %q", err, tt.err) + } + if info != nil && info.Origin == nil { + t.Errorf("Stat: non-nil info with nil Origin with error %q", err) + } + return + } + info.Origin = nil // TestLatest and ../../../testdata/script/reuse_git.txt test Origin well enough + if !reflect.DeepEqual(info, tt.info) { + t.Errorf("Stat: incorrect info\nhave %+v\nwant %+v", *info, *tt.info) + } + } + } + + for _, tt := range []statTest{ + { + repo: gitrepo1, + rev: "HEAD", + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "ede458df7cd0fdca520df19a33158086a8a68e81", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "v2", // branch + info: &RevInfo{ + Name: "9d02800338b8a55be062c838d1f02e0c5780b9eb", + Short: "9d02800338b8", + Version: "9d02800338b8a55be062c838d1f02e0c5780b9eb", + Time: time.Date(2018, 4, 17, 20, 00, 32, 0, time.UTC), + Tags: []string{"v2.0.2"}, + }, + }, + { + repo: gitrepo1, + rev: "v2.3.4", // badly-named branch (semver should be a tag) + info: &RevInfo{ + Name: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f", + Short: "76a00fb249b7", + Version: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f", + Time: time.Date(2018, 4, 17, 19, 45, 48, 0, time.UTC), + Tags: []string{"v2.0.1", "v2.3"}, + }, + }, + { + repo: gitrepo1, + rev: "v2.3", // badly-named tag (we only respect full semver v2.3.0) + info: &RevInfo{ + Name: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f", + Short: "76a00fb249b7", + Version: "v2.3", + Time: time.Date(2018, 4, 17, 19, 45, 48, 0, time.UTC), + Tags: []string{"v2.0.1", "v2.3"}, + }, + }, + { + repo: gitrepo1, + rev: "v1.2.3", // tag + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "v1.2.3", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "ede458df", // hash prefix in refs + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "ede458df7cd0fdca520df19a33158086a8a68e81", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "97f6aa59", // hash prefix not in refs + info: &RevInfo{ + Name: "97f6aa59c81c623494825b43d39e445566e429a4", + Short: "97f6aa59c81c", + Version: "97f6aa59c81c623494825b43d39e445566e429a4", + Time: time.Date(2018, 4, 17, 20, 0, 19, 0, time.UTC), + }, + }, + { + repo: gitrepo1, + rev: "v1.2.4-annotated", // annotated tag uses unwrapped commit hash + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "v1.2.4-annotated", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "aaaaaaaaab", + err: "unknown revision", + }, + } { + t.Run(path.Base(tt.repo)+"/"+tt.rev, runTest(tt)) + if tt.repo == gitrepo1 { + for _, tt.repo = range altRepos() { + old := tt + var m map[string]string + if tt.repo == hgrepo1 { + m = hgmap + } + if tt.info != nil { + info := *tt.info + tt.info = &info + tt.info.Name = remap(tt.info.Name, m) + tt.info.Version = remap(tt.info.Version, m) + tt.info.Short = remap(tt.info.Short, m) + } + tt.rev = remap(tt.rev, m) + t.Run(path.Base(tt.repo)+"/"+tt.rev, runTest(tt)) + tt = old + } + } + } +} + +func remap(name string, m map[string]string) string { + if m[name] != "" { + return m[name] + } + if AllHex(name) { + for k, v := range m { + if strings.HasPrefix(k, name) { + return v[:len(name)] + } + } + } + return name +} diff --git a/src/cmd/go/internal/modfetch/codehost/shell.go b/src/cmd/go/internal/modfetch/codehost/shell.go new file mode 100644 index 0000000..eaa0195 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/shell.go @@ -0,0 +1,141 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// Interactive debugging shell for codehost.Repo implementations. + +package main + +import ( + "archive/zip" + "bufio" + "bytes" + "flag" + "fmt" + "io" + "log" + "os" + "strings" + "time" + + "cmd/go/internal/cfg" + "cmd/go/internal/modfetch/codehost" +) + +func usage() { + fmt.Fprintf(os.Stderr, "usage: go run shell.go vcs remote\n") + os.Exit(2) +} + +func main() { + cfg.GOMODCACHE = "/tmp/vcswork" + log.SetFlags(0) + log.SetPrefix("shell: ") + flag.Usage = usage + flag.Parse() + if flag.NArg() != 2 { + usage() + } + + repo, err := codehost.NewRepo(flag.Arg(0), flag.Arg(1)) + if err != nil { + log.Fatal(err) + } + + b := bufio.NewReader(os.Stdin) + for { + fmt.Fprintf(os.Stderr, ">>> ") + line, err := b.ReadString('\n') + if err != nil { + log.Fatal(err) + } + f := strings.Fields(line) + if len(f) == 0 { + continue + } + switch f[0] { + default: + fmt.Fprintf(os.Stderr, "?unknown command\n") + continue + case "tags": + prefix := "" + if len(f) == 2 { + prefix = f[1] + } + if len(f) > 2 { + fmt.Fprintf(os.Stderr, "?usage: tags [prefix]\n") + continue + } + tags, err := repo.Tags(prefix) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + for _, tag := range tags { + fmt.Printf("%s\n", tag) + } + + case "stat": + if len(f) != 2 { + fmt.Fprintf(os.Stderr, "?usage: stat rev\n") + continue + } + info, err := repo.Stat(f[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + fmt.Printf("name=%s short=%s version=%s time=%s\n", info.Name, info.Short, info.Version, info.Time.UTC().Format(time.RFC3339)) + + case "read": + if len(f) != 3 { + fmt.Fprintf(os.Stderr, "?usage: read rev file\n") + continue + } + data, err := repo.ReadFile(f[1], f[2], 10<<20) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + os.Stdout.Write(data) + + case "zip": + if len(f) != 4 { + fmt.Fprintf(os.Stderr, "?usage: zip rev subdir output\n") + continue + } + subdir := f[2] + if subdir == "-" { + subdir = "" + } + rc, err := repo.ReadZip(f[1], subdir, 10<<20) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + + if f[3] != "-" { + if err := os.WriteFile(f[3], data, 0666); err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + } + z, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + for _, f := range z.File { + fmt.Printf("%s %d\n", f.Name, f.UncompressedSize64) + } + } + } +} diff --git a/src/cmd/go/internal/modfetch/codehost/svn.go b/src/cmd/go/internal/modfetch/codehost/svn.go new file mode 100644 index 0000000..6ec9e59 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/svn.go @@ -0,0 +1,154 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "archive/zip" + "encoding/xml" + "fmt" + "io" + "os" + "path" + "path/filepath" + "time" +) + +func svnParseStat(rev, out string) (*RevInfo, error) { + var log struct { + Logentry struct { + Revision int64 `xml:"revision,attr"` + Date string `xml:"date"` + } `xml:"logentry"` + } + if err := xml.Unmarshal([]byte(out), &log); err != nil { + return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out) + } + + t, err := time.Parse(time.RFC3339, log.Logentry.Date) + if err != nil { + return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out) + } + + info := &RevInfo{ + Name: fmt.Sprintf("%d", log.Logentry.Revision), + Short: fmt.Sprintf("%012d", log.Logentry.Revision), + Time: t.UTC(), + Version: rev, + } + return info, nil +} + +func svnReadZip(dst io.Writer, workDir, rev, subdir, remote string) (err error) { + // The subversion CLI doesn't provide a command to write the repository + // directly to an archive, so we need to export it to the local filesystem + // instead. Unfortunately, the local filesystem might apply arbitrary + // normalization to the filenames, so we need to obtain those directly. + // + // 'svn export' prints the filenames as they are written, but from reading the + // svn source code (as of revision 1868933), those filenames are encoded using + // the system locale rather than preserved byte-for-byte from the origin. For + // our purposes, that won't do, but we don't want to go mucking around with + // the user's locale settings either — that could impact error messages, and + // we don't know what locales the user has available or what LC_* variables + // their platform supports. + // + // Instead, we'll do a two-pass export: first we'll run 'svn list' to get the + // canonical filenames, then we'll 'svn export' and look for those filenames + // in the local filesystem. (If there is an encoding problem at that point, we + // would probably reject the resulting module anyway.) + + remotePath := remote + if subdir != "" { + remotePath += "/" + subdir + } + + out, err := Run(workDir, []string{ + "svn", "list", + "--non-interactive", + "--xml", + "--incremental", + "--recursive", + "--revision", rev, + "--", remotePath, + }) + if err != nil { + return err + } + + type listEntry struct { + Kind string `xml:"kind,attr"` + Name string `xml:"name"` + Size int64 `xml:"size"` + } + var list struct { + Entries []listEntry `xml:"entry"` + } + if err := xml.Unmarshal(out, &list); err != nil { + return vcsErrorf("unexpected response from svn list --xml: %v\n%s", err, out) + } + + exportDir := filepath.Join(workDir, "export") + // Remove any existing contents from a previous (failed) run. + if err := os.RemoveAll(exportDir); err != nil { + return err + } + defer os.RemoveAll(exportDir) // best-effort + + _, err = Run(workDir, []string{ + "svn", "export", + "--non-interactive", + "--quiet", + + // Suppress any platform- or host-dependent transformations. + "--native-eol", "LF", + "--ignore-externals", + "--ignore-keywords", + + "--revision", rev, + "--", remotePath, + exportDir, + }) + if err != nil { + return err + } + + // Scrape the exported files out of the filesystem and encode them in the zipfile. + + // “All files in the zip file are expected to be + // nested in a single top-level directory, whose name is not specified.” + // We'll (arbitrarily) choose the base of the remote path. + basePath := path.Join(path.Base(remote), subdir) + + zw := zip.NewWriter(dst) + for _, e := range list.Entries { + if e.Kind != "file" { + continue + } + + zf, err := zw.Create(path.Join(basePath, e.Name)) + if err != nil { + return err + } + + f, err := os.Open(filepath.Join(exportDir, e.Name)) + if err != nil { + if os.IsNotExist(err) { + return vcsErrorf("file reported by 'svn list', but not written by 'svn export': %s", e.Name) + } + return fmt.Errorf("error opening file created by 'svn export': %v", err) + } + + n, err := io.Copy(zf, f) + f.Close() + if err != nil { + return err + } + if n != e.Size { + return vcsErrorf("file size differs between 'svn list' and 'svn export': file %s listed as %v bytes, but exported as %v bytes", e.Name, e.Size, n) + } + } + + return zw.Close() +} diff --git a/src/cmd/go/internal/modfetch/codehost/vcs.go b/src/cmd/go/internal/modfetch/codehost/vcs.go new file mode 100644 index 0000000..300a23c --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/vcs.go @@ -0,0 +1,634 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "errors" + "fmt" + "internal/lazyregexp" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "time" + + "cmd/go/internal/lockedfile" + "cmd/go/internal/par" + "cmd/go/internal/str" +) + +// A VCSError indicates an error using a version control system. +// The implication of a VCSError is that we know definitively where +// to get the code, but we can't access it due to the error. +// The caller should report this error instead of continuing to probe +// other possible module paths. +// +// TODO(golang.org/issue/31730): See if we can invert this. (Return a +// distinguished error for “repo not found” and treat everything else +// as terminal.) +type VCSError struct { + Err error +} + +func (e *VCSError) Error() string { return e.Err.Error() } + +func (e *VCSError) Unwrap() error { return e.Err } + +func vcsErrorf(format string, a ...any) error { + return &VCSError{Err: fmt.Errorf(format, a...)} +} + +func NewRepo(vcs, remote string) (Repo, error) { + type key struct { + vcs string + remote string + } + type cached struct { + repo Repo + err error + } + c := vcsRepoCache.Do(key{vcs, remote}, func() any { + repo, err := newVCSRepo(vcs, remote) + if err != nil { + err = &VCSError{err} + } + return cached{repo, err} + }).(cached) + + return c.repo, c.err +} + +var vcsRepoCache par.Cache + +type vcsRepo struct { + mu lockedfile.Mutex // protects all commands, so we don't have to decide which are safe on a per-VCS basis + + remote string + cmd *vcsCmd + dir string + + tagsOnce sync.Once + tags map[string]bool + + branchesOnce sync.Once + branches map[string]bool + + fetchOnce sync.Once + fetchErr error +} + +func newVCSRepo(vcs, remote string) (Repo, error) { + if vcs == "git" { + return newGitRepo(remote, false) + } + cmd := vcsCmds[vcs] + if cmd == nil { + return nil, fmt.Errorf("unknown vcs: %s %s", vcs, remote) + } + if !strings.Contains(remote, "://") { + return nil, fmt.Errorf("invalid vcs remote: %s %s", vcs, remote) + } + + r := &vcsRepo{remote: remote, cmd: cmd} + var err error + r.dir, r.mu.Path, err = WorkDir(vcsWorkDirType+vcs, r.remote) + if err != nil { + return nil, err + } + + if cmd.init == nil { + return r, nil + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if _, err := os.Stat(filepath.Join(r.dir, "."+vcs)); err != nil { + if _, err := Run(r.dir, cmd.init(r.remote)); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + } + return r, nil +} + +const vcsWorkDirType = "vcs1." + +type vcsCmd struct { + vcs string // vcs name "hg" + init func(remote string) []string // cmd to init repo to track remote + tags func(remote string) []string // cmd to list local tags + tagRE *lazyregexp.Regexp // regexp to extract tag names from output of tags cmd + branches func(remote string) []string // cmd to list local branches + branchRE *lazyregexp.Regexp // regexp to extract branch names from output of tags cmd + badLocalRevRE *lazyregexp.Regexp // regexp of names that must not be served out of local cache without doing fetch first + statLocal func(rev, remote string) []string // cmd to stat local rev + parseStat func(rev, out string) (*RevInfo, error) // cmd to parse output of statLocal + fetch []string // cmd to fetch everything from remote + latest string // name of latest commit on remote (tip, HEAD, etc) + readFile func(rev, file, remote string) []string // cmd to read rev's file + readZip func(rev, subdir, remote, target string) []string // cmd to read rev's subdir as zip file + doReadZip func(dst io.Writer, workDir, rev, subdir, remote string) error // arbitrary function to read rev's subdir as zip file +} + +var re = lazyregexp.New + +var vcsCmds = map[string]*vcsCmd{ + "hg": { + vcs: "hg", + init: func(remote string) []string { + return []string{"hg", "clone", "-U", "--", remote, "."} + }, + tags: func(remote string) []string { + return []string{"hg", "tags", "-q"} + }, + tagRE: re(`(?m)^[^\n]+$`), + branches: func(remote string) []string { + return []string{"hg", "branches", "-c", "-q"} + }, + branchRE: re(`(?m)^[^\n]+$`), + badLocalRevRE: re(`(?m)^(tip)$`), + statLocal: func(rev, remote string) []string { + return []string{"hg", "log", "-l1", "-r", rev, "--template", "{node} {date|hgdate} {tags}"} + }, + parseStat: hgParseStat, + fetch: []string{"hg", "pull", "-f"}, + latest: "tip", + readFile: func(rev, file, remote string) []string { + return []string{"hg", "cat", "-r", rev, file} + }, + readZip: func(rev, subdir, remote, target string) []string { + pattern := []string{} + if subdir != "" { + pattern = []string{"-I", subdir + "/**"} + } + return str.StringList("hg", "archive", "-t", "zip", "--no-decode", "-r", rev, "--prefix=prefix/", pattern, "--", target) + }, + }, + + "svn": { + vcs: "svn", + init: nil, // no local checkout + tags: func(remote string) []string { + return []string{"svn", "list", "--", strings.TrimSuffix(remote, "/trunk") + "/tags"} + }, + tagRE: re(`(?m)^(.*?)/?$`), + statLocal: func(rev, remote string) []string { + suffix := "@" + rev + if rev == "latest" { + suffix = "" + } + return []string{"svn", "log", "-l1", "--xml", "--", remote + suffix} + }, + parseStat: svnParseStat, + latest: "latest", + readFile: func(rev, file, remote string) []string { + return []string{"svn", "cat", "--", remote + "/" + file + "@" + rev} + }, + doReadZip: svnReadZip, + }, + + "bzr": { + vcs: "bzr", + init: func(remote string) []string { + return []string{"bzr", "branch", "--use-existing-dir", "--", remote, "."} + }, + fetch: []string{ + "bzr", "pull", "--overwrite-tags", + }, + tags: func(remote string) []string { + return []string{"bzr", "tags"} + }, + tagRE: re(`(?m)^\S+`), + badLocalRevRE: re(`^revno:-`), + statLocal: func(rev, remote string) []string { + return []string{"bzr", "log", "-l1", "--long", "--show-ids", "-r", rev} + }, + parseStat: bzrParseStat, + latest: "revno:-1", + readFile: func(rev, file, remote string) []string { + return []string{"bzr", "cat", "-r", rev, file} + }, + readZip: func(rev, subdir, remote, target string) []string { + extra := []string{} + if subdir != "" { + extra = []string{"./" + subdir} + } + return str.StringList("bzr", "export", "--format=zip", "-r", rev, "--root=prefix/", "--", target, extra) + }, + }, + + "fossil": { + vcs: "fossil", + init: func(remote string) []string { + return []string{"fossil", "clone", "--", remote, ".fossil"} + }, + fetch: []string{"fossil", "pull", "-R", ".fossil"}, + tags: func(remote string) []string { + return []string{"fossil", "tag", "-R", ".fossil", "list"} + }, + tagRE: re(`XXXTODO`), + statLocal: func(rev, remote string) []string { + return []string{"fossil", "info", "-R", ".fossil", rev} + }, + parseStat: fossilParseStat, + latest: "trunk", + readFile: func(rev, file, remote string) []string { + return []string{"fossil", "cat", "-R", ".fossil", "-r", rev, file} + }, + readZip: func(rev, subdir, remote, target string) []string { + extra := []string{} + if subdir != "" && !strings.ContainsAny(subdir, "*?[],") { + extra = []string{"--include", subdir} + } + // Note that vcsRepo.ReadZip below rewrites this command + // to run in a different directory, to work around a fossil bug. + return str.StringList("fossil", "zip", "-R", ".fossil", "--name", "prefix", extra, "--", rev, target) + }, + }, +} + +func (r *vcsRepo) loadTags() { + out, err := Run(r.dir, r.cmd.tags(r.remote)) + if err != nil { + return + } + + // Run tag-listing command and extract tags. + r.tags = make(map[string]bool) + for _, tag := range r.cmd.tagRE.FindAllString(string(out), -1) { + if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(tag) { + continue + } + r.tags[tag] = true + } +} + +func (r *vcsRepo) loadBranches() { + if r.cmd.branches == nil { + return + } + + out, err := Run(r.dir, r.cmd.branches(r.remote)) + if err != nil { + return + } + + r.branches = make(map[string]bool) + for _, branch := range r.cmd.branchRE.FindAllString(string(out), -1) { + if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(branch) { + continue + } + r.branches[branch] = true + } +} + +func (r *vcsRepo) CheckReuse(old *Origin, subdir string) error { + return fmt.Errorf("vcs %s: CheckReuse: %w", r.cmd.vcs, ErrUnsupported) +} + +func (r *vcsRepo) Tags(prefix string) (*Tags, error) { + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + r.tagsOnce.Do(r.loadTags) + tags := &Tags{ + // None of the other VCS provide a reasonable way to compute TagSum + // without downloading the whole repo, so we only include VCS and URL + // in the Origin. + Origin: &Origin{ + VCS: r.cmd.vcs, + URL: r.remote, + }, + List: []Tag{}, + } + for tag := range r.tags { + if strings.HasPrefix(tag, prefix) { + tags.List = append(tags.List, Tag{tag, ""}) + } + } + sort.Slice(tags.List, func(i, j int) bool { + return tags.List[i].Name < tags.List[j].Name + }) + return tags, nil +} + +func (r *vcsRepo) Stat(rev string) (*RevInfo, error) { + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if rev == "latest" { + rev = r.cmd.latest + } + r.branchesOnce.Do(r.loadBranches) + revOK := (r.cmd.badLocalRevRE == nil || !r.cmd.badLocalRevRE.MatchString(rev)) && !r.branches[rev] + if revOK { + if info, err := r.statLocal(rev); err == nil { + return info, nil + } + } + + r.fetchOnce.Do(r.fetch) + if r.fetchErr != nil { + return nil, r.fetchErr + } + info, err := r.statLocal(rev) + if err != nil { + return nil, err + } + if !revOK { + info.Version = info.Name + } + return info, nil +} + +func (r *vcsRepo) fetch() { + if len(r.cmd.fetch) > 0 { + _, r.fetchErr = Run(r.dir, r.cmd.fetch) + } +} + +func (r *vcsRepo) statLocal(rev string) (*RevInfo, error) { + out, err := Run(r.dir, r.cmd.statLocal(rev, r.remote)) + if err != nil { + return nil, &UnknownRevisionError{Rev: rev} + } + info, err := r.cmd.parseStat(rev, string(out)) + if err != nil { + return nil, err + } + if info.Origin == nil { + info.Origin = new(Origin) + } + info.Origin.VCS = r.cmd.vcs + info.Origin.URL = r.remote + return info, nil +} + +func (r *vcsRepo) Latest() (*RevInfo, error) { + return r.Stat("latest") +} + +func (r *vcsRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) { + if rev == "latest" { + rev = r.cmd.latest + } + _, err := r.Stat(rev) // download rev into local repo + if err != nil { + return nil, err + } + + // r.Stat acquires r.mu, so lock after that. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + out, err := Run(r.dir, r.cmd.readFile(rev, file, r.remote)) + if err != nil { + return nil, fs.ErrNotExist + } + return out, nil +} + +func (r *vcsRepo) RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error) { + // We don't technically need to lock here since we're returning an error + // uncondititonally, but doing so anyway will help to avoid baking in + // lock-inversion bugs. + unlock, err := r.mu.Lock() + if err != nil { + return "", err + } + defer unlock() + + return "", vcsErrorf("vcs %s: RecentTag: %w", r.cmd.vcs, ErrUnsupported) +} + +func (r *vcsRepo) DescendsFrom(rev, tag string) (bool, error) { + unlock, err := r.mu.Lock() + if err != nil { + return false, err + } + defer unlock() + + return false, vcsErrorf("vcs %s: DescendsFrom: %w", r.cmd.vcs, ErrUnsupported) +} + +func (r *vcsRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) { + if r.cmd.readZip == nil && r.cmd.doReadZip == nil { + return nil, vcsErrorf("vcs %s: ReadZip: %w", r.cmd.vcs, ErrUnsupported) + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if rev == "latest" { + rev = r.cmd.latest + } + f, err := os.CreateTemp("", "go-readzip-*.zip") + if err != nil { + return nil, err + } + if r.cmd.doReadZip != nil { + lw := &limitedWriter{ + W: f, + N: maxSize, + ErrLimitReached: errors.New("ReadZip: encoded file exceeds allowed size"), + } + err = r.cmd.doReadZip(lw, r.dir, rev, subdir, r.remote) + if err == nil { + _, err = f.Seek(0, io.SeekStart) + } + } else if r.cmd.vcs == "fossil" { + // If you run + // fossil zip -R .fossil --name prefix trunk /tmp/x.zip + // fossil fails with "unable to create directory /tmp" [sic]. + // Change the command to run in /tmp instead, + // replacing the -R argument with an absolute path. + args := r.cmd.readZip(rev, subdir, r.remote, filepath.Base(f.Name())) + for i := range args { + if args[i] == ".fossil" { + args[i] = filepath.Join(r.dir, ".fossil") + } + } + _, err = Run(filepath.Dir(f.Name()), args) + } else { + _, err = Run(r.dir, r.cmd.readZip(rev, subdir, r.remote, f.Name())) + } + if err != nil { + f.Close() + os.Remove(f.Name()) + return nil, err + } + return &deleteCloser{f}, nil +} + +// deleteCloser is a file that gets deleted on Close. +type deleteCloser struct { + *os.File +} + +func (d *deleteCloser) Close() error { + defer os.Remove(d.File.Name()) + return d.File.Close() +} + +func hgParseStat(rev, out string) (*RevInfo, error) { + f := strings.Fields(string(out)) + if len(f) < 3 { + return nil, vcsErrorf("unexpected response from hg log: %q", out) + } + hash := f[0] + version := rev + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + t, err := strconv.ParseInt(f[1], 10, 64) + if err != nil { + return nil, vcsErrorf("invalid time from hg log: %q", out) + } + + var tags []string + for _, tag := range f[3:] { + if tag != "tip" { + tags = append(tags, tag) + } + } + sort.Strings(tags) + + info := &RevInfo{ + Origin: &Origin{ + Hash: hash, + }, + Name: hash, + Short: ShortenSHA1(hash), + Time: time.Unix(t, 0).UTC(), + Version: version, + Tags: tags, + } + return info, nil +} + +func bzrParseStat(rev, out string) (*RevInfo, error) { + var revno int64 + var tm time.Time + for _, line := range strings.Split(out, "\n") { + if line == "" || line[0] == ' ' || line[0] == '\t' { + // End of header, start of commit message. + break + } + if line[0] == '-' { + continue + } + before, after, found := strings.Cut(line, ":") + if !found { + // End of header, start of commit message. + break + } + key, val := before, strings.TrimSpace(after) + switch key { + case "revno": + if j := strings.Index(val, " "); j >= 0 { + val = val[:j] + } + i, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return nil, vcsErrorf("unexpected revno from bzr log: %q", line) + } + revno = i + case "timestamp": + j := strings.Index(val, " ") + if j < 0 { + return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line) + } + t, err := time.Parse("2006-01-02 15:04:05 -0700", val[j+1:]) + if err != nil { + return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line) + } + tm = t.UTC() + } + } + if revno == 0 || tm.IsZero() { + return nil, vcsErrorf("unexpected response from bzr log: %q", out) + } + + info := &RevInfo{ + Name: fmt.Sprintf("%d", revno), + Short: fmt.Sprintf("%012d", revno), + Time: tm, + Version: rev, + } + return info, nil +} + +func fossilParseStat(rev, out string) (*RevInfo, error) { + for _, line := range strings.Split(out, "\n") { + if strings.HasPrefix(line, "uuid:") || strings.HasPrefix(line, "hash:") { + f := strings.Fields(line) + if len(f) != 5 || len(f[1]) != 40 || f[4] != "UTC" { + return nil, vcsErrorf("unexpected response from fossil info: %q", line) + } + t, err := time.Parse(time.DateTime, f[2]+" "+f[3]) + if err != nil { + return nil, vcsErrorf("unexpected response from fossil info: %q", line) + } + hash := f[1] + version := rev + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + info := &RevInfo{ + Origin: &Origin{ + Hash: hash, + }, + Name: hash, + Short: ShortenSHA1(hash), + Time: t, + Version: version, + } + return info, nil + } + } + return nil, vcsErrorf("unexpected response from fossil info: %q", out) +} + +type limitedWriter struct { + W io.Writer + N int64 + ErrLimitReached error +} + +func (l *limitedWriter) Write(p []byte) (n int, err error) { + if l.N > 0 { + max := len(p) + if l.N < int64(max) { + max = int(l.N) + } + n, err = l.W.Write(p[:max]) + l.N -= int64(n) + if err != nil || n >= len(p) { + return n, err + } + } + + return n, l.ErrLimitReached +} |