summaryrefslogtreecommitdiffstats
path: root/src/cmd/go/internal/modfetch/codehost
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/go/internal/modfetch/codehost')
-rw-r--r--src/cmd/go/internal/modfetch/codehost/codehost.go315
-rw-r--r--src/cmd/go/internal/modfetch/codehost/git.go875
-rw-r--r--src/cmd/go/internal/modfetch/codehost/git_test.go640
-rw-r--r--src/cmd/go/internal/modfetch/codehost/shell.go141
-rw-r--r--src/cmd/go/internal/modfetch/codehost/svn.go154
-rw-r--r--src/cmd/go/internal/modfetch/codehost/vcs.go616
6 files changed, 2741 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/codehost/codehost.go b/src/cmd/go/internal/modfetch/codehost/codehost.go
new file mode 100644
index 0000000..378fbae
--- /dev/null
+++ b/src/cmd/go/internal/modfetch/codehost/codehost.go
@@ -0,0 +1,315 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package codehost defines the interface implemented by a code hosting source,
+// along with support code for use by implementations.
+package codehost
+
+import (
+ "bytes"
+ "crypto/sha256"
+ "fmt"
+ exec "internal/execabs"
+ "io"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+
+ "cmd/go/internal/cfg"
+ "cmd/go/internal/lockedfile"
+ "cmd/go/internal/str"
+)
+
+// Downloaded size limits.
+const (
+ MaxGoMod = 16 << 20 // maximum size of go.mod file
+ MaxLICENSE = 16 << 20 // maximum size of LICENSE file
+ MaxZipFile = 500 << 20 // maximum size of downloaded zip file
+)
+
+// A Repo represents a code hosting source.
+// Typical implementations include local version control repositories,
+// remote version control servers, and code hosting sites.
+// A Repo must be safe for simultaneous use by multiple goroutines.
+type Repo interface {
+ // List lists all tags with the given prefix.
+ Tags(prefix string) (tags []string, err error)
+
+ // Stat returns information about the revision rev.
+ // A revision can be any identifier known to the underlying service:
+ // commit hash, branch, tag, and so on.
+ Stat(rev string) (*RevInfo, error)
+
+ // Latest returns the latest revision on the default branch,
+ // whatever that means in the underlying implementation.
+ Latest() (*RevInfo, error)
+
+ // ReadFile reads the given file in the file tree corresponding to revision rev.
+ // It should refuse to read more than maxSize bytes.
+ //
+ // If the requested file does not exist it should return an error for which
+ // os.IsNotExist(err) returns true.
+ ReadFile(rev, file string, maxSize int64) (data []byte, err error)
+
+ // ReadFileRevs reads a single file at multiple versions.
+ // It should refuse to read more than maxSize bytes.
+ // The result is a map from each requested rev strings
+ // to the associated FileRev. The map must have a non-nil
+ // entry for every requested rev (unless ReadFileRevs returned an error).
+ // A file simply being missing or even corrupted in revs[i]
+ // should be reported only in files[revs[i]].Err, not in the error result
+ // from ReadFileRevs.
+ // The overall call should return an error (and no map) only
+ // in the case of a problem with obtaining the data, such as
+ // a network failure.
+ // Implementations may assume that revs only contain tags,
+ // not direct commit hashes.
+ ReadFileRevs(revs []string, file string, maxSize int64) (files map[string]*FileRev, err error)
+
+ // ReadZip downloads a zip file for the subdir subdirectory
+ // of the given revision to a new file in a given temporary directory.
+ // It should refuse to read more than maxSize bytes.
+ // It returns a ReadCloser for a streamed copy of the zip file.
+ // All files in the zip file are expected to be
+ // nested in a single top-level directory, whose name is not specified.
+ ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error)
+
+ // RecentTag returns the most recent tag on rev or one of its predecessors
+ // with the given prefix. allowed may be used to filter out unwanted versions.
+ RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error)
+
+ // DescendsFrom reports whether rev or any of its ancestors has the given tag.
+ //
+ // DescendsFrom must return true for any tag returned by RecentTag for the
+ // same revision.
+ DescendsFrom(rev, tag string) (bool, error)
+}
+
+// A Rev describes a single revision in a source code repository.
+type RevInfo struct {
+ Name string // complete ID in underlying repository
+ Short string // shortened ID, for use in pseudo-version
+ Version string // version used in lookup
+ Time time.Time // commit time
+ Tags []string // known tags for commit
+}
+
+// A FileRev describes the result of reading a file at a given revision.
+type FileRev struct {
+ Rev string // requested revision
+ Data []byte // file data
+ Err error // error if any; os.IsNotExist(Err)==true if rev exists but file does not exist in that rev
+}
+
+// UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a
+// revision rather than a file.
+type UnknownRevisionError struct {
+ Rev string
+}
+
+func (e *UnknownRevisionError) Error() string {
+ return "unknown revision " + e.Rev
+}
+func (UnknownRevisionError) Is(err error) bool {
+ return err == fs.ErrNotExist
+}
+
+// ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given
+// repository or module contains no commits.
+var ErrNoCommits error = noCommitsError{}
+
+type noCommitsError struct{}
+
+func (noCommitsError) Error() string {
+ return "no commits"
+}
+func (noCommitsError) Is(err error) bool {
+ return err == fs.ErrNotExist
+}
+
+// AllHex reports whether the revision rev is entirely lower-case hexadecimal digits.
+func AllHex(rev string) bool {
+ for i := 0; i < len(rev); i++ {
+ c := rev[i]
+ if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' {
+ continue
+ }
+ return false
+ }
+ return true
+}
+
+// ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length
+// used in pseudo-versions (12 hex digits).
+func ShortenSHA1(rev string) string {
+ if AllHex(rev) && len(rev) == 40 {
+ return rev[:12]
+ }
+ return rev
+}
+
+// WorkDir returns the name of the cached work directory to use for the
+// given repository type and name.
+func WorkDir(typ, name string) (dir, lockfile string, err error) {
+ if cfg.GOMODCACHE == "" {
+ return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set")
+ }
+
+ // We name the work directory for the SHA256 hash of the type and name.
+ // We intentionally avoid the actual name both because of possible
+ // conflicts with valid file system paths and because we want to ensure
+ // that one checkout is never nested inside another. That nesting has
+ // led to security problems in the past.
+ if strings.Contains(typ, ":") {
+ return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon")
+ }
+ key := typ + ":" + name
+ dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key))))
+
+ if cfg.BuildX {
+ fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name)
+ }
+ if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil {
+ return "", "", err
+ }
+
+ lockfile = dir + ".lock"
+ if cfg.BuildX {
+ fmt.Fprintf(os.Stderr, "# lock %s", lockfile)
+ }
+
+ unlock, err := lockedfile.MutexAt(lockfile).Lock()
+ if err != nil {
+ return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err)
+ }
+ defer unlock()
+
+ data, err := os.ReadFile(dir + ".info")
+ info, err2 := os.Stat(dir)
+ if err == nil && err2 == nil && info.IsDir() {
+ // Info file and directory both already exist: reuse.
+ have := strings.TrimSuffix(string(data), "\n")
+ if have != key {
+ return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key)
+ }
+ if cfg.BuildX {
+ fmt.Fprintf(os.Stderr, "# %s for %s %s\n", dir, typ, name)
+ }
+ return dir, lockfile, nil
+ }
+
+ // Info file or directory missing. Start from scratch.
+ if cfg.BuildX {
+ fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", dir, typ, name)
+ }
+ os.RemoveAll(dir)
+ if err := os.MkdirAll(dir, 0777); err != nil {
+ return "", "", err
+ }
+ if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil {
+ os.RemoveAll(dir)
+ return "", "", err
+ }
+ return dir, lockfile, nil
+}
+
+type RunError struct {
+ Cmd string
+ Err error
+ Stderr []byte
+ HelpText string
+}
+
+func (e *RunError) Error() string {
+ text := e.Cmd + ": " + e.Err.Error()
+ stderr := bytes.TrimRight(e.Stderr, "\n")
+ if len(stderr) > 0 {
+ text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t")
+ }
+ if len(e.HelpText) > 0 {
+ text += "\n" + e.HelpText
+ }
+ return text
+}
+
+var dirLock sync.Map
+
+// Run runs the command line in the given directory
+// (an empty dir means the current directory).
+// It returns the standard output and, for a non-zero exit,
+// a *RunError indicating the command, exit status, and standard error.
+// Standard error is unavailable for commands that exit successfully.
+func Run(dir string, cmdline ...interface{}) ([]byte, error) {
+ return RunWithStdin(dir, nil, cmdline...)
+}
+
+// bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell.
+// See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html.
+var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`)
+
+func RunWithStdin(dir string, stdin io.Reader, cmdline ...interface{}) ([]byte, error) {
+ if dir != "" {
+ muIface, ok := dirLock.Load(dir)
+ if !ok {
+ muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex))
+ }
+ mu := muIface.(*sync.Mutex)
+ mu.Lock()
+ defer mu.Unlock()
+ }
+
+ cmd := str.StringList(cmdline...)
+ if os.Getenv("TESTGOVCS") == "panic" {
+ panic(fmt.Sprintf("use of vcs: %v", cmd))
+ }
+ if cfg.BuildX {
+ text := new(strings.Builder)
+ if dir != "" {
+ text.WriteString("cd ")
+ text.WriteString(dir)
+ text.WriteString("; ")
+ }
+ for i, arg := range cmd {
+ if i > 0 {
+ text.WriteByte(' ')
+ }
+ switch {
+ case strings.ContainsAny(arg, "'"):
+ // Quote args that could be mistaken for quoted args.
+ text.WriteByte('"')
+ text.WriteString(bashQuoter.Replace(arg))
+ text.WriteByte('"')
+ case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"):
+ // Quote args that contain special characters, glob patterns, or spaces.
+ text.WriteByte('\'')
+ text.WriteString(arg)
+ text.WriteByte('\'')
+ default:
+ text.WriteString(arg)
+ }
+ }
+ fmt.Fprintf(os.Stderr, "%s\n", text)
+ start := time.Now()
+ defer func() {
+ fmt.Fprintf(os.Stderr, "%.3fs # %s\n", time.Since(start).Seconds(), text)
+ }()
+ }
+ // TODO: Impose limits on command output size.
+ // TODO: Set environment to get English error messages.
+ var stderr bytes.Buffer
+ var stdout bytes.Buffer
+ c := exec.Command(cmd[0], cmd[1:]...)
+ c.Dir = dir
+ c.Stdin = stdin
+ c.Stderr = &stderr
+ c.Stdout = &stdout
+ err := c.Run()
+ if err != nil {
+ err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err}
+ }
+ return stdout.Bytes(), err
+}
diff --git a/src/cmd/go/internal/modfetch/codehost/git.go b/src/cmd/go/internal/modfetch/codehost/git.go
new file mode 100644
index 0000000..72005e2
--- /dev/null
+++ b/src/cmd/go/internal/modfetch/codehost/git.go
@@ -0,0 +1,875 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codehost
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ exec "internal/execabs"
+ "io"
+ "io/fs"
+ "net/url"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "cmd/go/internal/lockedfile"
+ "cmd/go/internal/par"
+ "cmd/go/internal/web"
+
+ "golang.org/x/mod/semver"
+)
+
+// LocalGitRepo is like Repo but accepts both Git remote references
+// and paths to repositories on the local file system.
+func LocalGitRepo(remote string) (Repo, error) {
+ return newGitRepoCached(remote, true)
+}
+
+// A notExistError wraps another error to retain its original text
+// but makes it opaquely equivalent to fs.ErrNotExist.
+type notExistError struct {
+ err error
+}
+
+func (e notExistError) Error() string { return e.err.Error() }
+func (notExistError) Is(err error) bool { return err == fs.ErrNotExist }
+
+const gitWorkDirType = "git3"
+
+var gitRepoCache par.Cache
+
+func newGitRepoCached(remote string, localOK bool) (Repo, error) {
+ type key struct {
+ remote string
+ localOK bool
+ }
+ type cached struct {
+ repo Repo
+ err error
+ }
+
+ c := gitRepoCache.Do(key{remote, localOK}, func() interface{} {
+ repo, err := newGitRepo(remote, localOK)
+ return cached{repo, err}
+ }).(cached)
+
+ return c.repo, c.err
+}
+
+func newGitRepo(remote string, localOK bool) (Repo, error) {
+ r := &gitRepo{remote: remote}
+ if strings.Contains(remote, "://") {
+ // This is a remote path.
+ var err error
+ r.dir, r.mu.Path, err = WorkDir(gitWorkDirType, r.remote)
+ if err != nil {
+ return nil, err
+ }
+
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
+ if _, err := Run(r.dir, "git", "init", "--bare"); err != nil {
+ os.RemoveAll(r.dir)
+ return nil, err
+ }
+ // We could just say git fetch https://whatever later,
+ // but this lets us say git fetch origin instead, which
+ // is a little nicer. More importantly, using a named remote
+ // avoids a problem with Git LFS. See golang.org/issue/25605.
+ if _, err := Run(r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil {
+ os.RemoveAll(r.dir)
+ return nil, err
+ }
+ }
+ r.remoteURL = r.remote
+ r.remote = "origin"
+ } else {
+ // Local path.
+ // Disallow colon (not in ://) because sometimes
+ // that's rcp-style host:path syntax and sometimes it's not (c:\work).
+ // The go command has always insisted on URL syntax for ssh.
+ if strings.Contains(remote, ":") {
+ return nil, fmt.Errorf("git remote cannot use host:path syntax")
+ }
+ if !localOK {
+ return nil, fmt.Errorf("git remote must not be local directory")
+ }
+ r.local = true
+ info, err := os.Stat(remote)
+ if err != nil {
+ return nil, err
+ }
+ if !info.IsDir() {
+ return nil, fmt.Errorf("%s exists but is not a directory", remote)
+ }
+ r.dir = remote
+ r.mu.Path = r.dir + ".lock"
+ }
+ return r, nil
+}
+
+type gitRepo struct {
+ remote, remoteURL string
+ local bool
+ dir string
+
+ mu lockedfile.Mutex // protects fetchLevel and git repo state
+
+ fetchLevel int
+
+ statCache par.Cache
+
+ refsOnce sync.Once
+ // refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master")
+ // to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6")
+ refs map[string]string
+ refsErr error
+
+ localTagsOnce sync.Once
+ localTags map[string]bool
+}
+
+const (
+ // How much have we fetched into the git repo (in this process)?
+ fetchNone = iota // nothing yet
+ fetchSome // shallow fetches of individual hashes
+ fetchAll // "fetch -t origin": get all remote branches and tags
+)
+
+// loadLocalTags loads tag references from the local git cache
+// into the map r.localTags.
+// Should only be called as r.localTagsOnce.Do(r.loadLocalTags).
+func (r *gitRepo) loadLocalTags() {
+ // The git protocol sends all known refs and ls-remote filters them on the client side,
+ // so we might as well record both heads and tags in one shot.
+ // Most of the time we only care about tags but sometimes we care about heads too.
+ out, err := Run(r.dir, "git", "tag", "-l")
+ if err != nil {
+ return
+ }
+
+ r.localTags = make(map[string]bool)
+ for _, line := range strings.Split(string(out), "\n") {
+ if line != "" {
+ r.localTags[line] = true
+ }
+ }
+}
+
+// loadRefs loads heads and tags references from the remote into the map r.refs.
+// Should only be called as r.refsOnce.Do(r.loadRefs).
+func (r *gitRepo) loadRefs() {
+ // The git protocol sends all known refs and ls-remote filters them on the client side,
+ // so we might as well record both heads and tags in one shot.
+ // Most of the time we only care about tags but sometimes we care about heads too.
+ out, gitErr := Run(r.dir, "git", "ls-remote", "-q", r.remote)
+ if gitErr != nil {
+ if rerr, ok := gitErr.(*RunError); ok {
+ if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
+ rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
+ }
+ }
+
+ // If the remote URL doesn't exist at all, ideally we should treat the whole
+ // repository as nonexistent by wrapping the error in a notExistError.
+ // For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL
+ // ourselves and see what code it serves.
+ if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
+ if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) {
+ gitErr = notExistError{gitErr}
+ }
+ }
+
+ r.refsErr = gitErr
+ return
+ }
+
+ r.refs = make(map[string]string)
+ for _, line := range strings.Split(string(out), "\n") {
+ f := strings.Fields(line)
+ if len(f) != 2 {
+ continue
+ }
+ if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
+ r.refs[f[1]] = f[0]
+ }
+ }
+ for ref, hash := range r.refs {
+ if strings.HasSuffix(ref, "^{}") { // record unwrapped annotated tag as value of tag
+ r.refs[strings.TrimSuffix(ref, "^{}")] = hash
+ delete(r.refs, ref)
+ }
+ }
+}
+
+func (r *gitRepo) Tags(prefix string) ([]string, error) {
+ r.refsOnce.Do(r.loadRefs)
+ if r.refsErr != nil {
+ return nil, r.refsErr
+ }
+
+ tags := []string{}
+ for ref := range r.refs {
+ if !strings.HasPrefix(ref, "refs/tags/") {
+ continue
+ }
+ tag := ref[len("refs/tags/"):]
+ if !strings.HasPrefix(tag, prefix) {
+ continue
+ }
+ tags = append(tags, tag)
+ }
+ sort.Strings(tags)
+ return tags, nil
+}
+
+func (r *gitRepo) Latest() (*RevInfo, error) {
+ r.refsOnce.Do(r.loadRefs)
+ if r.refsErr != nil {
+ return nil, r.refsErr
+ }
+ if r.refs["HEAD"] == "" {
+ return nil, ErrNoCommits
+ }
+ return r.Stat(r.refs["HEAD"])
+}
+
+// findRef finds some ref name for the given hash,
+// for use when the server requires giving a ref instead of a hash.
+// There may be multiple ref names for a given hash,
+// in which case this returns some name - it doesn't matter which.
+func (r *gitRepo) findRef(hash string) (ref string, ok bool) {
+ r.refsOnce.Do(r.loadRefs)
+ for ref, h := range r.refs {
+ if h == hash {
+ return ref, true
+ }
+ }
+ return "", false
+}
+
+// minHashDigits is the minimum number of digits to require
+// before accepting a hex digit sequence as potentially identifying
+// a specific commit in a git repo. (Of course, users can always
+// specify more digits, and many will paste in all 40 digits,
+// but many of git's commands default to printing short hashes
+// as 7 digits.)
+const minHashDigits = 7
+
+// stat stats the given rev in the local repository,
+// or else it fetches more info from the remote repository and tries again.
+func (r *gitRepo) stat(rev string) (*RevInfo, error) {
+ if r.local {
+ return r.statLocal(rev, rev)
+ }
+
+ // Fast path: maybe rev is a hash we already have locally.
+ didStatLocal := false
+ if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
+ if info, err := r.statLocal(rev, rev); err == nil {
+ return info, nil
+ }
+ didStatLocal = true
+ }
+
+ // Maybe rev is a tag we already have locally.
+ // (Note that we're excluding branches, which can be stale.)
+ r.localTagsOnce.Do(r.loadLocalTags)
+ if r.localTags[rev] {
+ return r.statLocal(rev, "refs/tags/"+rev)
+ }
+
+ // Maybe rev is the name of a tag or branch on the remote server.
+ // Or maybe it's the prefix of a hash of a named ref.
+ // Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
+ r.refsOnce.Do(r.loadRefs)
+ var ref, hash string
+ if r.refs["refs/tags/"+rev] != "" {
+ ref = "refs/tags/" + rev
+ hash = r.refs[ref]
+ // Keep rev as is: tags are assumed not to change meaning.
+ } else if r.refs["refs/heads/"+rev] != "" {
+ ref = "refs/heads/" + rev
+ hash = r.refs[ref]
+ rev = hash // Replace rev, because meaning of refs/heads/foo can change.
+ } else if rev == "HEAD" && r.refs["HEAD"] != "" {
+ ref = "HEAD"
+ hash = r.refs[ref]
+ rev = hash // Replace rev, because meaning of HEAD can change.
+ } else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
+ // At the least, we have a hash prefix we can look up after the fetch below.
+ // Maybe we can map it to a full hash using the known refs.
+ prefix := rev
+ // Check whether rev is prefix of known ref hash.
+ for k, h := range r.refs {
+ if strings.HasPrefix(h, prefix) {
+ if hash != "" && hash != h {
+ // Hash is an ambiguous hash prefix.
+ // More information will not change that.
+ return nil, fmt.Errorf("ambiguous revision %s", rev)
+ }
+ if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
+ ref = k
+ }
+ rev = h
+ hash = h
+ }
+ }
+ if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
+ hash = rev
+ }
+ } else {
+ return nil, &UnknownRevisionError{Rev: rev}
+ }
+
+ // Protect r.fetchLevel and the "fetch more and more" sequence.
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ // Perhaps r.localTags did not have the ref when we loaded local tags,
+ // but we've since done fetches that pulled down the hash we need
+ // (or already have the hash we need, just without its tag).
+ // Either way, try a local stat before falling back to network I/O.
+ if !didStatLocal {
+ if info, err := r.statLocal(rev, hash); err == nil {
+ if strings.HasPrefix(ref, "refs/tags/") {
+ // Make sure tag exists, so it will be in localTags next time the go command is run.
+ Run(r.dir, "git", "tag", strings.TrimPrefix(ref, "refs/tags/"), hash)
+ }
+ return info, nil
+ }
+ }
+
+ // If we know a specific commit we need and its ref, fetch it.
+ // We do NOT fetch arbitrary hashes (when we don't know the ref)
+ // because we want to avoid ever importing a commit that isn't
+ // reachable from refs/tags/* or refs/heads/* or HEAD.
+ // Both Gerrit and GitHub expose every CL/PR as a named ref,
+ // and we don't want those commits masquerading as being real
+ // pseudo-versions in the main repo.
+ if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local {
+ r.fetchLevel = fetchSome
+ var refspec string
+ if ref != "" && ref != "HEAD" {
+ // If we do know the ref name, save the mapping locally
+ // so that (if it is a tag) it can show up in localTags
+ // on a future call. Also, some servers refuse to allow
+ // full hashes in ref specs, so prefer a ref name if known.
+ refspec = ref + ":" + ref
+ } else {
+ // Fetch the hash but give it a local name (refs/dummy),
+ // because that triggers the fetch behavior of creating any
+ // other known remote tags for the hash. We never use
+ // refs/dummy (it's not refs/tags/dummy) and it will be
+ // overwritten in the next command, and that's fine.
+ ref = hash
+ refspec = hash + ":refs/dummy"
+ }
+ _, err := Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec)
+ if err == nil {
+ return r.statLocal(rev, ref)
+ }
+ // Don't try to be smart about parsing the error.
+ // It's too complex and varies too much by git version.
+ // No matter what went wrong, fall back to a complete fetch.
+ }
+
+ // Last resort.
+ // Fetch all heads and tags and hope the hash we want is in the history.
+ if err := r.fetchRefsLocked(); err != nil {
+ return nil, err
+ }
+
+ return r.statLocal(rev, rev)
+}
+
+// fetchRefsLocked fetches all heads and tags from the origin, along with the
+// ancestors of those commits.
+//
+// We only fetch heads and tags, not arbitrary other commits: we don't want to
+// pull in off-branch commits (such as rejected GitHub pull requests) that the
+// server may be willing to provide. (See the comments within the stat method
+// for more detail.)
+//
+// fetchRefsLocked requires that r.mu remain locked for the duration of the call.
+func (r *gitRepo) fetchRefsLocked() error {
+ if r.fetchLevel < fetchAll {
+ // NOTE: To work around a bug affecting Git clients up to at least 2.23.0
+ // (2019-08-16), we must first expand the set of local refs, and only then
+ // unshallow the repository as a separate fetch operation. (See
+ // golang.org/issue/34266 and
+ // https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.)
+
+ if _, err := Run(r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
+ return err
+ }
+
+ if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil {
+ if _, err := Run(r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil {
+ return err
+ }
+ }
+
+ r.fetchLevel = fetchAll
+ }
+ return nil
+}
+
+// statLocal returns a RevInfo describing rev in the local git repository.
+// It uses version as info.Version.
+func (r *gitRepo) statLocal(version, rev string) (*RevInfo, error) {
+ out, err := Run(r.dir, "git", "-c", "log.showsignature=false", "log", "-n1", "--format=format:%H %ct %D", rev, "--")
+ if err != nil {
+ return nil, &UnknownRevisionError{Rev: rev}
+ }
+ f := strings.Fields(string(out))
+ if len(f) < 2 {
+ return nil, fmt.Errorf("unexpected response from git log: %q", out)
+ }
+ hash := f[0]
+ if strings.HasPrefix(hash, version) {
+ version = hash // extend to full hash
+ }
+ t, err := strconv.ParseInt(f[1], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("invalid time from git log: %q", out)
+ }
+
+ info := &RevInfo{
+ Name: hash,
+ Short: ShortenSHA1(hash),
+ Time: time.Unix(t, 0).UTC(),
+ Version: hash,
+ }
+
+ // Add tags. Output looks like:
+ // ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
+ for i := 2; i < len(f); i++ {
+ if f[i] == "tag:" {
+ i++
+ if i < len(f) {
+ info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
+ }
+ }
+ }
+ sort.Strings(info.Tags)
+
+ // Used hash as info.Version above.
+ // Use caller's suggested version if it appears in the tag list
+ // (filters out branch names, HEAD).
+ for _, tag := range info.Tags {
+ if version == tag {
+ info.Version = version
+ }
+ }
+
+ return info, nil
+}
+
+func (r *gitRepo) Stat(rev string) (*RevInfo, error) {
+ if rev == "latest" {
+ return r.Latest()
+ }
+ type cached struct {
+ info *RevInfo
+ err error
+ }
+ c := r.statCache.Do(rev, func() interface{} {
+ info, err := r.stat(rev)
+ return cached{info, err}
+ }).(cached)
+ return c.info, c.err
+}
+
+func (r *gitRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) {
+ // TODO: Could use git cat-file --batch.
+ info, err := r.Stat(rev) // download rev into local git repo
+ if err != nil {
+ return nil, err
+ }
+ out, err := Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file)
+ if err != nil {
+ return nil, fs.ErrNotExist
+ }
+ return out, nil
+}
+
+func (r *gitRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) {
+ // Create space to hold results.
+ files := make(map[string]*FileRev)
+ for _, rev := range revs {
+ f := &FileRev{Rev: rev}
+ files[rev] = f
+ }
+
+ // Collect locally-known revs.
+ need, err := r.readFileRevs(revs, file, files)
+ if err != nil {
+ return nil, err
+ }
+ if len(need) == 0 {
+ return files, nil
+ }
+
+ // Build list of known remote refs that might help.
+ var redo []string
+ r.refsOnce.Do(r.loadRefs)
+ if r.refsErr != nil {
+ return nil, r.refsErr
+ }
+ for _, tag := range need {
+ if r.refs["refs/tags/"+tag] != "" {
+ redo = append(redo, tag)
+ }
+ }
+ if len(redo) == 0 {
+ return files, nil
+ }
+
+ // Protect r.fetchLevel and the "fetch more and more" sequence.
+ // See stat method above.
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ if err := r.fetchRefsLocked(); err != nil {
+ return nil, err
+ }
+
+ if _, err := r.readFileRevs(redo, file, files); err != nil {
+ return nil, err
+ }
+
+ return files, nil
+}
+
+func (r *gitRepo) readFileRevs(tags []string, file string, fileMap map[string]*FileRev) (missing []string, err error) {
+ var stdin bytes.Buffer
+ for _, tag := range tags {
+ fmt.Fprintf(&stdin, "refs/tags/%s\n", tag)
+ fmt.Fprintf(&stdin, "refs/tags/%s:%s\n", tag, file)
+ }
+
+ data, err := RunWithStdin(r.dir, &stdin, "git", "cat-file", "--batch")
+ if err != nil {
+ return nil, err
+ }
+
+ next := func() (typ string, body []byte, ok bool) {
+ var line string
+ i := bytes.IndexByte(data, '\n')
+ if i < 0 {
+ return "", nil, false
+ }
+ line, data = string(bytes.TrimSpace(data[:i])), data[i+1:]
+ if strings.HasSuffix(line, " missing") {
+ return "missing", nil, true
+ }
+ f := strings.Fields(line)
+ if len(f) != 3 {
+ return "", nil, false
+ }
+ n, err := strconv.Atoi(f[2])
+ if err != nil || n > len(data) {
+ return "", nil, false
+ }
+ body, data = data[:n], data[n:]
+ if len(data) > 0 && data[0] == '\r' {
+ data = data[1:]
+ }
+ if len(data) > 0 && data[0] == '\n' {
+ data = data[1:]
+ }
+ return f[1], body, true
+ }
+
+ badGit := func() ([]string, error) {
+ return nil, fmt.Errorf("malformed output from git cat-file --batch")
+ }
+
+ for _, tag := range tags {
+ commitType, _, ok := next()
+ if !ok {
+ return badGit()
+ }
+ fileType, fileData, ok := next()
+ if !ok {
+ return badGit()
+ }
+ f := fileMap[tag]
+ f.Data = nil
+ f.Err = nil
+ switch commitType {
+ default:
+ f.Err = fmt.Errorf("unexpected non-commit type %q for rev %s", commitType, tag)
+
+ case "missing":
+ // Note: f.Err must not satisfy os.IsNotExist. That's reserved for the file not existing in a valid commit.
+ f.Err = fmt.Errorf("no such rev %s", tag)
+ missing = append(missing, tag)
+
+ case "tag", "commit":
+ switch fileType {
+ default:
+ f.Err = &fs.PathError{Path: tag + ":" + file, Op: "read", Err: fmt.Errorf("unexpected non-blob type %q", fileType)}
+ case "missing":
+ f.Err = &fs.PathError{Path: tag + ":" + file, Op: "read", Err: fs.ErrNotExist}
+ case "blob":
+ f.Data = fileData
+ }
+ }
+ }
+ if len(bytes.TrimSpace(data)) != 0 {
+ return badGit()
+ }
+
+ return missing, nil
+}
+
+func (r *gitRepo) RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error) {
+ info, err := r.Stat(rev)
+ if err != nil {
+ return "", err
+ }
+ rev = info.Name // expand hash prefixes
+
+ // describe sets tag and err using 'git for-each-ref' and reports whether the
+ // result is definitive.
+ describe := func() (definitive bool) {
+ var out []byte
+ out, err = Run(r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev)
+ if err != nil {
+ return true
+ }
+
+ // prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix
+ var highest string
+ for _, line := range strings.Split(string(out), "\n") {
+ line = strings.TrimSpace(line)
+ // git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here
+ // instead gives support for git v2.7.0.
+ if !strings.HasPrefix(line, "refs/tags/") {
+ continue
+ }
+ line = line[len("refs/tags/"):]
+
+ if !strings.HasPrefix(line, prefix) {
+ continue
+ }
+
+ semtag := line[len(prefix):]
+ // Consider only tags that are valid and complete (not just major.minor prefixes).
+ // NOTE: Do not replace the call to semver.Compare with semver.Max.
+ // We want to return the actual tag, not a canonicalized version of it,
+ // and semver.Max currently canonicalizes (see golang.org/issue/32700).
+ if c := semver.Canonical(semtag); c == "" || !strings.HasPrefix(semtag, c) || !allowed(semtag) {
+ continue
+ }
+ if semver.Compare(semtag, highest) > 0 {
+ highest = semtag
+ }
+ }
+
+ if highest != "" {
+ tag = prefix + highest
+ }
+
+ return tag != "" && !AllHex(tag)
+ }
+
+ if describe() {
+ return tag, err
+ }
+
+ // Git didn't find a version tag preceding the requested rev.
+ // See whether any plausible tag exists.
+ tags, err := r.Tags(prefix + "v")
+ if err != nil {
+ return "", err
+ }
+ if len(tags) == 0 {
+ return "", nil
+ }
+
+ // There are plausible tags, but we don't know if rev is a descendent of any of them.
+ // Fetch the history to find out.
+
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return "", err
+ }
+ defer unlock()
+
+ if err := r.fetchRefsLocked(); err != nil {
+ return "", err
+ }
+
+ // If we've reached this point, we have all of the commits that are reachable
+ // from all heads and tags.
+ //
+ // The only refs we should be missing are those that are no longer reachable
+ // (or never were reachable) from any branch or tag, including the master
+ // branch, and we don't want to resolve them anyway (they're probably
+ // unreachable for a reason).
+ //
+ // Try one last time in case some other goroutine fetched rev while we were
+ // waiting on the lock.
+ describe()
+ return tag, err
+}
+
+func (r *gitRepo) DescendsFrom(rev, tag string) (bool, error) {
+ // The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so
+ // this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go
+ // already doesn't work with Git 1.7.1, so at least it's not a regression.
+ //
+ // git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or
+ // 1 if not.
+ _, err := Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
+
+ // Git reports "is an ancestor" with exit code 0 and "not an ancestor" with
+ // exit code 1.
+ // Unfortunately, if we've already fetched rev with a shallow history, git
+ // merge-base has been observed to report a false-negative, so don't stop yet
+ // even if the exit code is 1!
+ if err == nil {
+ return true, nil
+ }
+
+ // See whether the tag and rev even exist.
+ tags, err := r.Tags(tag)
+ if err != nil {
+ return false, err
+ }
+ if len(tags) == 0 {
+ return false, nil
+ }
+
+ // NOTE: r.stat is very careful not to fetch commits that we shouldn't know
+ // about, like rejected GitHub pull requests, so don't try to short-circuit
+ // that here.
+ if _, err = r.stat(rev); err != nil {
+ return false, err
+ }
+
+ // Now fetch history so that git can search for a path.
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return false, err
+ }
+ defer unlock()
+
+ if r.fetchLevel < fetchAll {
+ // Fetch the complete history for all refs and heads. It would be more
+ // efficient to only fetch the history from rev to tag, but that's much more
+ // complicated, and any kind of shallow fetch is fairly likely to trigger
+ // bugs in JGit servers and/or the go command anyway.
+ if err := r.fetchRefsLocked(); err != nil {
+ return false, err
+ }
+ }
+
+ _, err = Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
+ if err == nil {
+ return true, nil
+ }
+ if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
+ return false, nil
+ }
+ return false, err
+}
+
+func (r *gitRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
+ // TODO: Use maxSize or drop it.
+ args := []string{}
+ if subdir != "" {
+ args = append(args, "--", subdir)
+ }
+ info, err := r.Stat(rev) // download rev into local git repo
+ if err != nil {
+ return nil, err
+ }
+
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ if err := ensureGitAttributes(r.dir); err != nil {
+ return nil, err
+ }
+
+ // Incredibly, git produces different archives depending on whether
+ // it is running on a Windows system or not, in an attempt to normalize
+ // text file line endings. Setting -c core.autocrlf=input means only
+ // translate files on the way into the repo, not on the way out (archive).
+ // The -c core.eol=lf should be unnecessary but set it anyway.
+ archive, err := Run(r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
+ if err != nil {
+ if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
+ return nil, fs.ErrNotExist
+ }
+ return nil, err
+ }
+
+ return io.NopCloser(bytes.NewReader(archive)), nil
+}
+
+// ensureGitAttributes makes sure export-subst and export-ignore features are
+// disabled for this repo. This is intended to be run prior to running git
+// archive so that zip files are generated that produce consistent ziphashes
+// for a given revision, independent of variables such as git version and the
+// size of the repo.
+//
+// See: https://github.com/golang/go/issues/27153
+func ensureGitAttributes(repoDir string) (err error) {
+ const attr = "\n* -export-subst -export-ignore\n"
+
+ d := repoDir + "/info"
+ p := d + "/attributes"
+
+ if err := os.MkdirAll(d, 0755); err != nil {
+ return err
+ }
+
+ f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
+ if err != nil {
+ return err
+ }
+ defer func() {
+ closeErr := f.Close()
+ if closeErr != nil {
+ err = closeErr
+ }
+ }()
+
+ b, err := io.ReadAll(f)
+ if err != nil {
+ return err
+ }
+ if !bytes.HasSuffix(b, []byte(attr)) {
+ _, err := f.WriteString(attr)
+ return err
+ }
+
+ return nil
+}
diff --git a/src/cmd/go/internal/modfetch/codehost/git_test.go b/src/cmd/go/internal/modfetch/codehost/git_test.go
new file mode 100644
index 0000000..89a73ba
--- /dev/null
+++ b/src/cmd/go/internal/modfetch/codehost/git_test.go
@@ -0,0 +1,640 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codehost
+
+import (
+ "archive/zip"
+ "bytes"
+ "flag"
+ "fmt"
+ "internal/testenv"
+ "io"
+ "io/fs"
+ "log"
+ "os"
+ "os/exec"
+ "path"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "testing"
+ "time"
+)
+
+func TestMain(m *testing.M) {
+ // needed for initializing the test environment variables as testing.Short
+ // and HasExternalNetwork
+ flag.Parse()
+ os.Exit(testMain(m))
+}
+
+const (
+ gitrepo1 = "https://vcs-test.golang.org/git/gitrepo1"
+ hgrepo1 = "https://vcs-test.golang.org/hg/hgrepo1"
+)
+
+var altRepos = []string{
+ "localGitRepo",
+ hgrepo1,
+}
+
+// TODO: Convert gitrepo1 to svn, bzr, fossil and add tests.
+// For now, at least the hgrepo1 tests check the general vcs.go logic.
+
+// localGitRepo is like gitrepo1 but allows archive access.
+var localGitRepo string
+
+func testMain(m *testing.M) int {
+ if _, err := exec.LookPath("git"); err != nil {
+ fmt.Fprintln(os.Stderr, "skipping because git binary not found")
+ fmt.Println("PASS")
+ return 0
+ }
+
+ dir, err := os.MkdirTemp("", "gitrepo-test-")
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+
+ if testenv.HasExternalNetwork() && testenv.HasExec() {
+ // Clone gitrepo1 into a local directory.
+ // If we use a file:// URL to access the local directory,
+ // then git starts up all the usual protocol machinery,
+ // which will let us test remote git archive invocations.
+ localGitRepo = filepath.Join(dir, "gitrepo2")
+ if _, err := Run("", "git", "clone", "--mirror", gitrepo1, localGitRepo); err != nil {
+ log.Fatal(err)
+ }
+ if _, err := Run(localGitRepo, "git", "config", "daemon.uploadarch", "true"); err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ return m.Run()
+}
+
+func testRepo(remote string) (Repo, error) {
+ if remote == "localGitRepo" {
+ // Convert absolute path to file URL. LocalGitRepo will not accept
+ // Windows absolute paths because they look like a host:path remote.
+ // TODO(golang.org/issue/32456): use url.FromFilePath when implemented.
+ var url string
+ if strings.HasPrefix(localGitRepo, "/") {
+ url = "file://" + localGitRepo
+ } else {
+ url = "file:///" + filepath.ToSlash(localGitRepo)
+ }
+ return LocalGitRepo(url)
+ }
+ kind := "git"
+ for _, k := range []string{"hg"} {
+ if strings.Contains(remote, "/"+k+"/") {
+ kind = k
+ }
+ }
+ return NewRepo(kind, remote)
+}
+
+var tagsTests = []struct {
+ repo string
+ prefix string
+ tags []string
+}{
+ {gitrepo1, "xxx", []string{}},
+ {gitrepo1, "", []string{"v1.2.3", "v1.2.4-annotated", "v2.0.1", "v2.0.2", "v2.3"}},
+ {gitrepo1, "v", []string{"v1.2.3", "v1.2.4-annotated", "v2.0.1", "v2.0.2", "v2.3"}},
+ {gitrepo1, "v1", []string{"v1.2.3", "v1.2.4-annotated"}},
+ {gitrepo1, "2", []string{}},
+}
+
+func TestTags(t *testing.T) {
+ testenv.MustHaveExternalNetwork(t)
+ testenv.MustHaveExec(t)
+
+ for _, tt := range tagsTests {
+ f := func(t *testing.T) {
+ r, err := testRepo(tt.repo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ tags, err := r.Tags(tt.prefix)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !reflect.DeepEqual(tags, tt.tags) {
+ t.Errorf("Tags: incorrect tags\nhave %v\nwant %v", tags, tt.tags)
+ }
+ }
+ t.Run(path.Base(tt.repo)+"/"+tt.prefix, f)
+ if tt.repo == gitrepo1 {
+ for _, tt.repo = range altRepos {
+ t.Run(path.Base(tt.repo)+"/"+tt.prefix, f)
+ }
+ }
+ }
+}
+
+var latestTests = []struct {
+ repo string
+ info *RevInfo
+}{
+ {
+ gitrepo1,
+ &RevInfo{
+ Name: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Short: "ede458df7cd0",
+ Version: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC),
+ Tags: []string{"v1.2.3", "v1.2.4-annotated"},
+ },
+ },
+ {
+ hgrepo1,
+ &RevInfo{
+ Name: "18518c07eb8ed5c80221e997e518cccaa8c0c287",
+ Short: "18518c07eb8e",
+ Version: "18518c07eb8ed5c80221e997e518cccaa8c0c287",
+ Time: time.Date(2018, 6, 27, 16, 16, 30, 0, time.UTC),
+ },
+ },
+}
+
+func TestLatest(t *testing.T) {
+ testenv.MustHaveExternalNetwork(t)
+ testenv.MustHaveExec(t)
+
+ for _, tt := range latestTests {
+ f := func(t *testing.T) {
+ r, err := testRepo(tt.repo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ info, err := r.Latest()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !reflect.DeepEqual(info, tt.info) {
+ t.Errorf("Latest: incorrect info\nhave %+v\nwant %+v", *info, *tt.info)
+ }
+ }
+ t.Run(path.Base(tt.repo), f)
+ if tt.repo == gitrepo1 {
+ tt.repo = "localGitRepo"
+ t.Run(path.Base(tt.repo), f)
+ }
+ }
+}
+
+var readFileTests = []struct {
+ repo string
+ rev string
+ file string
+ err string
+ data string
+}{
+ {
+ repo: gitrepo1,
+ rev: "latest",
+ file: "README",
+ data: "",
+ },
+ {
+ repo: gitrepo1,
+ rev: "v2",
+ file: "another.txt",
+ data: "another\n",
+ },
+ {
+ repo: gitrepo1,
+ rev: "v2.3.4",
+ file: "another.txt",
+ err: fs.ErrNotExist.Error(),
+ },
+}
+
+func TestReadFile(t *testing.T) {
+ testenv.MustHaveExternalNetwork(t)
+ testenv.MustHaveExec(t)
+
+ for _, tt := range readFileTests {
+ f := func(t *testing.T) {
+ r, err := testRepo(tt.repo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ data, err := r.ReadFile(tt.rev, tt.file, 100)
+ if err != nil {
+ if tt.err == "" {
+ t.Fatalf("ReadFile: unexpected error %v", err)
+ }
+ if !strings.Contains(err.Error(), tt.err) {
+ t.Fatalf("ReadFile: wrong error %q, want %q", err, tt.err)
+ }
+ if len(data) != 0 {
+ t.Errorf("ReadFile: non-empty data %q with error %v", data, err)
+ }
+ return
+ }
+ if tt.err != "" {
+ t.Fatalf("ReadFile: no error, wanted %v", tt.err)
+ }
+ if string(data) != tt.data {
+ t.Errorf("ReadFile: incorrect data\nhave %q\nwant %q", data, tt.data)
+ }
+ }
+ t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.file, f)
+ if tt.repo == gitrepo1 {
+ for _, tt.repo = range altRepos {
+ t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.file, f)
+ }
+ }
+ }
+}
+
+var readZipTests = []struct {
+ repo string
+ rev string
+ subdir string
+ err string
+ files map[string]uint64
+}{
+ {
+ repo: gitrepo1,
+ rev: "v2.3.4",
+ subdir: "",
+ files: map[string]uint64{
+ "prefix/": 0,
+ "prefix/README": 0,
+ "prefix/v2": 3,
+ },
+ },
+ {
+ repo: hgrepo1,
+ rev: "v2.3.4",
+ subdir: "",
+ files: map[string]uint64{
+ "prefix/.hg_archival.txt": ^uint64(0),
+ "prefix/README": 0,
+ "prefix/v2": 3,
+ },
+ },
+
+ {
+ repo: gitrepo1,
+ rev: "v2",
+ subdir: "",
+ files: map[string]uint64{
+ "prefix/": 0,
+ "prefix/README": 0,
+ "prefix/v2": 3,
+ "prefix/another.txt": 8,
+ "prefix/foo.txt": 13,
+ },
+ },
+ {
+ repo: hgrepo1,
+ rev: "v2",
+ subdir: "",
+ files: map[string]uint64{
+ "prefix/.hg_archival.txt": ^uint64(0),
+ "prefix/README": 0,
+ "prefix/v2": 3,
+ "prefix/another.txt": 8,
+ "prefix/foo.txt": 13,
+ },
+ },
+
+ {
+ repo: gitrepo1,
+ rev: "v3",
+ subdir: "",
+ files: map[string]uint64{
+ "prefix/": 0,
+ "prefix/v3/": 0,
+ "prefix/v3/sub/": 0,
+ "prefix/v3/sub/dir/": 0,
+ "prefix/v3/sub/dir/file.txt": 16,
+ "prefix/README": 0,
+ },
+ },
+ {
+ repo: hgrepo1,
+ rev: "v3",
+ subdir: "",
+ files: map[string]uint64{
+ "prefix/.hg_archival.txt": ^uint64(0),
+ "prefix/.hgtags": 405,
+ "prefix/v3/sub/dir/file.txt": 16,
+ "prefix/README": 0,
+ },
+ },
+
+ {
+ repo: gitrepo1,
+ rev: "v3",
+ subdir: "v3/sub/dir",
+ files: map[string]uint64{
+ "prefix/": 0,
+ "prefix/v3/": 0,
+ "prefix/v3/sub/": 0,
+ "prefix/v3/sub/dir/": 0,
+ "prefix/v3/sub/dir/file.txt": 16,
+ },
+ },
+ {
+ repo: hgrepo1,
+ rev: "v3",
+ subdir: "v3/sub/dir",
+ files: map[string]uint64{
+ "prefix/v3/sub/dir/file.txt": 16,
+ },
+ },
+
+ {
+ repo: gitrepo1,
+ rev: "v3",
+ subdir: "v3/sub",
+ files: map[string]uint64{
+ "prefix/": 0,
+ "prefix/v3/": 0,
+ "prefix/v3/sub/": 0,
+ "prefix/v3/sub/dir/": 0,
+ "prefix/v3/sub/dir/file.txt": 16,
+ },
+ },
+ {
+ repo: hgrepo1,
+ rev: "v3",
+ subdir: "v3/sub",
+ files: map[string]uint64{
+ "prefix/v3/sub/dir/file.txt": 16,
+ },
+ },
+
+ {
+ repo: gitrepo1,
+ rev: "aaaaaaaaab",
+ subdir: "",
+ err: "unknown revision",
+ },
+ {
+ repo: hgrepo1,
+ rev: "aaaaaaaaab",
+ subdir: "",
+ err: "unknown revision",
+ },
+
+ {
+ repo: "https://github.com/rsc/vgotest1",
+ rev: "submod/v1.0.4",
+ subdir: "submod",
+ files: map[string]uint64{
+ "prefix/": 0,
+ "prefix/submod/": 0,
+ "prefix/submod/go.mod": 53,
+ "prefix/submod/pkg/": 0,
+ "prefix/submod/pkg/p.go": 31,
+ },
+ },
+}
+
+type zipFile struct {
+ name string
+ size int64
+}
+
+func TestReadZip(t *testing.T) {
+ testenv.MustHaveExternalNetwork(t)
+ testenv.MustHaveExec(t)
+
+ for _, tt := range readZipTests {
+ f := func(t *testing.T) {
+ r, err := testRepo(tt.repo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ rc, err := r.ReadZip(tt.rev, tt.subdir, 100000)
+ if err != nil {
+ if tt.err == "" {
+ t.Fatalf("ReadZip: unexpected error %v", err)
+ }
+ if !strings.Contains(err.Error(), tt.err) {
+ t.Fatalf("ReadZip: wrong error %q, want %q", err, tt.err)
+ }
+ if rc != nil {
+ t.Errorf("ReadZip: non-nil io.ReadCloser with error %v", err)
+ }
+ return
+ }
+ defer rc.Close()
+ if tt.err != "" {
+ t.Fatalf("ReadZip: no error, wanted %v", tt.err)
+ }
+ zipdata, err := io.ReadAll(rc)
+ if err != nil {
+ t.Fatal(err)
+ }
+ z, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata)))
+ if err != nil {
+ t.Fatalf("ReadZip: cannot read zip file: %v", err)
+ }
+ have := make(map[string]bool)
+ for _, f := range z.File {
+ size, ok := tt.files[f.Name]
+ if !ok {
+ t.Errorf("ReadZip: unexpected file %s", f.Name)
+ continue
+ }
+ have[f.Name] = true
+ if size != ^uint64(0) && f.UncompressedSize64 != size {
+ t.Errorf("ReadZip: file %s has unexpected size %d != %d", f.Name, f.UncompressedSize64, size)
+ }
+ }
+ for name := range tt.files {
+ if !have[name] {
+ t.Errorf("ReadZip: missing file %s", name)
+ }
+ }
+ }
+ t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.subdir, f)
+ if tt.repo == gitrepo1 {
+ tt.repo = "localGitRepo"
+ t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.subdir, f)
+ }
+ }
+}
+
+var hgmap = map[string]string{
+ "HEAD": "41964ddce1180313bdc01d0a39a2813344d6261d", // not tip due to bad hgrepo1 conversion
+ "9d02800338b8a55be062c838d1f02e0c5780b9eb": "8f49ee7a6ddcdec6f0112d9dca48d4a2e4c3c09e",
+ "76a00fb249b7f93091bc2c89a789dab1fc1bc26f": "88fde824ec8b41a76baa16b7e84212cee9f3edd0",
+ "ede458df7cd0fdca520df19a33158086a8a68e81": "41964ddce1180313bdc01d0a39a2813344d6261d",
+ "97f6aa59c81c623494825b43d39e445566e429a4": "c0cbbfb24c7c3c50c35c7b88e7db777da4ff625d",
+}
+
+var statTests = []struct {
+ repo string
+ rev string
+ err string
+ info *RevInfo
+}{
+ {
+ repo: gitrepo1,
+ rev: "HEAD",
+ info: &RevInfo{
+ Name: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Short: "ede458df7cd0",
+ Version: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC),
+ Tags: []string{"v1.2.3", "v1.2.4-annotated"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "v2", // branch
+ info: &RevInfo{
+ Name: "9d02800338b8a55be062c838d1f02e0c5780b9eb",
+ Short: "9d02800338b8",
+ Version: "9d02800338b8a55be062c838d1f02e0c5780b9eb",
+ Time: time.Date(2018, 4, 17, 20, 00, 32, 0, time.UTC),
+ Tags: []string{"v2.0.2"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "v2.3.4", // badly-named branch (semver should be a tag)
+ info: &RevInfo{
+ Name: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f",
+ Short: "76a00fb249b7",
+ Version: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f",
+ Time: time.Date(2018, 4, 17, 19, 45, 48, 0, time.UTC),
+ Tags: []string{"v2.0.1", "v2.3"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "v2.3", // badly-named tag (we only respect full semver v2.3.0)
+ info: &RevInfo{
+ Name: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f",
+ Short: "76a00fb249b7",
+ Version: "v2.3",
+ Time: time.Date(2018, 4, 17, 19, 45, 48, 0, time.UTC),
+ Tags: []string{"v2.0.1", "v2.3"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "v1.2.3", // tag
+ info: &RevInfo{
+ Name: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Short: "ede458df7cd0",
+ Version: "v1.2.3",
+ Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC),
+ Tags: []string{"v1.2.3", "v1.2.4-annotated"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "ede458df", // hash prefix in refs
+ info: &RevInfo{
+ Name: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Short: "ede458df7cd0",
+ Version: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC),
+ Tags: []string{"v1.2.3", "v1.2.4-annotated"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "97f6aa59", // hash prefix not in refs
+ info: &RevInfo{
+ Name: "97f6aa59c81c623494825b43d39e445566e429a4",
+ Short: "97f6aa59c81c",
+ Version: "97f6aa59c81c623494825b43d39e445566e429a4",
+ Time: time.Date(2018, 4, 17, 20, 0, 19, 0, time.UTC),
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "v1.2.4-annotated", // annotated tag uses unwrapped commit hash
+ info: &RevInfo{
+ Name: "ede458df7cd0fdca520df19a33158086a8a68e81",
+ Short: "ede458df7cd0",
+ Version: "v1.2.4-annotated",
+ Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC),
+ Tags: []string{"v1.2.3", "v1.2.4-annotated"},
+ },
+ },
+ {
+ repo: gitrepo1,
+ rev: "aaaaaaaaab",
+ err: "unknown revision",
+ },
+}
+
+func TestStat(t *testing.T) {
+ testenv.MustHaveExternalNetwork(t)
+ testenv.MustHaveExec(t)
+
+ for _, tt := range statTests {
+ f := func(t *testing.T) {
+ r, err := testRepo(tt.repo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ info, err := r.Stat(tt.rev)
+ if err != nil {
+ if tt.err == "" {
+ t.Fatalf("Stat: unexpected error %v", err)
+ }
+ if !strings.Contains(err.Error(), tt.err) {
+ t.Fatalf("Stat: wrong error %q, want %q", err, tt.err)
+ }
+ if info != nil {
+ t.Errorf("Stat: non-nil info with error %q", err)
+ }
+ return
+ }
+ if !reflect.DeepEqual(info, tt.info) {
+ t.Errorf("Stat: incorrect info\nhave %+v\nwant %+v", *info, *tt.info)
+ }
+ }
+ t.Run(path.Base(tt.repo)+"/"+tt.rev, f)
+ if tt.repo == gitrepo1 {
+ for _, tt.repo = range altRepos {
+ old := tt
+ var m map[string]string
+ if tt.repo == hgrepo1 {
+ m = hgmap
+ }
+ if tt.info != nil {
+ info := *tt.info
+ tt.info = &info
+ tt.info.Name = remap(tt.info.Name, m)
+ tt.info.Version = remap(tt.info.Version, m)
+ tt.info.Short = remap(tt.info.Short, m)
+ }
+ tt.rev = remap(tt.rev, m)
+ t.Run(path.Base(tt.repo)+"/"+tt.rev, f)
+ tt = old
+ }
+ }
+ }
+}
+
+func remap(name string, m map[string]string) string {
+ if m[name] != "" {
+ return m[name]
+ }
+ if AllHex(name) {
+ for k, v := range m {
+ if strings.HasPrefix(k, name) {
+ return v[:len(name)]
+ }
+ }
+ }
+ return name
+}
diff --git a/src/cmd/go/internal/modfetch/codehost/shell.go b/src/cmd/go/internal/modfetch/codehost/shell.go
new file mode 100644
index 0000000..ce8b501
--- /dev/null
+++ b/src/cmd/go/internal/modfetch/codehost/shell.go
@@ -0,0 +1,141 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Interactive debugging shell for codehost.Repo implementations.
+
+package main
+
+import (
+ "archive/zip"
+ "bufio"
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "strings"
+ "time"
+
+ "cmd/go/internal/cfg"
+ "cmd/go/internal/modfetch/codehost"
+)
+
+func usage() {
+ fmt.Fprintf(os.Stderr, "usage: go run shell.go vcs remote\n")
+ os.Exit(2)
+}
+
+func main() {
+ cfg.GOMODCACHE = "/tmp/vcswork"
+ log.SetFlags(0)
+ log.SetPrefix("shell: ")
+ flag.Usage = usage
+ flag.Parse()
+ if flag.NArg() != 2 {
+ usage()
+ }
+
+ repo, err := codehost.NewRepo(flag.Arg(0), flag.Arg(1))
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ b := bufio.NewReader(os.Stdin)
+ for {
+ fmt.Fprintf(os.Stderr, ">>> ")
+ line, err := b.ReadString('\n')
+ if err != nil {
+ log.Fatal(err)
+ }
+ f := strings.Fields(line)
+ if len(f) == 0 {
+ continue
+ }
+ switch f[0] {
+ default:
+ fmt.Fprintf(os.Stderr, "?unknown command\n")
+ continue
+ case "tags":
+ prefix := ""
+ if len(f) == 2 {
+ prefix = f[1]
+ }
+ if len(f) > 2 {
+ fmt.Fprintf(os.Stderr, "?usage: tags [prefix]\n")
+ continue
+ }
+ tags, err := repo.Tags(prefix)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+ for _, tag := range tags {
+ fmt.Printf("%s\n", tag)
+ }
+
+ case "stat":
+ if len(f) != 2 {
+ fmt.Fprintf(os.Stderr, "?usage: stat rev\n")
+ continue
+ }
+ info, err := repo.Stat(f[1])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+ fmt.Printf("name=%s short=%s version=%s time=%s\n", info.Name, info.Short, info.Version, info.Time.UTC().Format(time.RFC3339))
+
+ case "read":
+ if len(f) != 3 {
+ fmt.Fprintf(os.Stderr, "?usage: read rev file\n")
+ continue
+ }
+ data, err := repo.ReadFile(f[1], f[2], 10<<20)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+ os.Stdout.Write(data)
+
+ case "zip":
+ if len(f) != 4 {
+ fmt.Fprintf(os.Stderr, "?usage: zip rev subdir output\n")
+ continue
+ }
+ subdir := f[2]
+ if subdir == "-" {
+ subdir = ""
+ }
+ rc, err := repo.ReadZip(f[1], subdir, 10<<20)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+ data, err := io.ReadAll(rc)
+ rc.Close()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+
+ if f[3] != "-" {
+ if err := os.WriteFile(f[3], data, 0666); err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+ }
+ z, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "?%s\n", err)
+ continue
+ }
+ for _, f := range z.File {
+ fmt.Printf("%s %d\n", f.Name, f.UncompressedSize64)
+ }
+ }
+ }
+}
diff --git a/src/cmd/go/internal/modfetch/codehost/svn.go b/src/cmd/go/internal/modfetch/codehost/svn.go
new file mode 100644
index 0000000..6ec9e59
--- /dev/null
+++ b/src/cmd/go/internal/modfetch/codehost/svn.go
@@ -0,0 +1,154 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codehost
+
+import (
+ "archive/zip"
+ "encoding/xml"
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "path/filepath"
+ "time"
+)
+
+func svnParseStat(rev, out string) (*RevInfo, error) {
+ var log struct {
+ Logentry struct {
+ Revision int64 `xml:"revision,attr"`
+ Date string `xml:"date"`
+ } `xml:"logentry"`
+ }
+ if err := xml.Unmarshal([]byte(out), &log); err != nil {
+ return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out)
+ }
+
+ t, err := time.Parse(time.RFC3339, log.Logentry.Date)
+ if err != nil {
+ return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out)
+ }
+
+ info := &RevInfo{
+ Name: fmt.Sprintf("%d", log.Logentry.Revision),
+ Short: fmt.Sprintf("%012d", log.Logentry.Revision),
+ Time: t.UTC(),
+ Version: rev,
+ }
+ return info, nil
+}
+
+func svnReadZip(dst io.Writer, workDir, rev, subdir, remote string) (err error) {
+ // The subversion CLI doesn't provide a command to write the repository
+ // directly to an archive, so we need to export it to the local filesystem
+ // instead. Unfortunately, the local filesystem might apply arbitrary
+ // normalization to the filenames, so we need to obtain those directly.
+ //
+ // 'svn export' prints the filenames as they are written, but from reading the
+ // svn source code (as of revision 1868933), those filenames are encoded using
+ // the system locale rather than preserved byte-for-byte from the origin. For
+ // our purposes, that won't do, but we don't want to go mucking around with
+ // the user's locale settings either — that could impact error messages, and
+ // we don't know what locales the user has available or what LC_* variables
+ // their platform supports.
+ //
+ // Instead, we'll do a two-pass export: first we'll run 'svn list' to get the
+ // canonical filenames, then we'll 'svn export' and look for those filenames
+ // in the local filesystem. (If there is an encoding problem at that point, we
+ // would probably reject the resulting module anyway.)
+
+ remotePath := remote
+ if subdir != "" {
+ remotePath += "/" + subdir
+ }
+
+ out, err := Run(workDir, []string{
+ "svn", "list",
+ "--non-interactive",
+ "--xml",
+ "--incremental",
+ "--recursive",
+ "--revision", rev,
+ "--", remotePath,
+ })
+ if err != nil {
+ return err
+ }
+
+ type listEntry struct {
+ Kind string `xml:"kind,attr"`
+ Name string `xml:"name"`
+ Size int64 `xml:"size"`
+ }
+ var list struct {
+ Entries []listEntry `xml:"entry"`
+ }
+ if err := xml.Unmarshal(out, &list); err != nil {
+ return vcsErrorf("unexpected response from svn list --xml: %v\n%s", err, out)
+ }
+
+ exportDir := filepath.Join(workDir, "export")
+ // Remove any existing contents from a previous (failed) run.
+ if err := os.RemoveAll(exportDir); err != nil {
+ return err
+ }
+ defer os.RemoveAll(exportDir) // best-effort
+
+ _, err = Run(workDir, []string{
+ "svn", "export",
+ "--non-interactive",
+ "--quiet",
+
+ // Suppress any platform- or host-dependent transformations.
+ "--native-eol", "LF",
+ "--ignore-externals",
+ "--ignore-keywords",
+
+ "--revision", rev,
+ "--", remotePath,
+ exportDir,
+ })
+ if err != nil {
+ return err
+ }
+
+ // Scrape the exported files out of the filesystem and encode them in the zipfile.
+
+ // “All files in the zip file are expected to be
+ // nested in a single top-level directory, whose name is not specified.”
+ // We'll (arbitrarily) choose the base of the remote path.
+ basePath := path.Join(path.Base(remote), subdir)
+
+ zw := zip.NewWriter(dst)
+ for _, e := range list.Entries {
+ if e.Kind != "file" {
+ continue
+ }
+
+ zf, err := zw.Create(path.Join(basePath, e.Name))
+ if err != nil {
+ return err
+ }
+
+ f, err := os.Open(filepath.Join(exportDir, e.Name))
+ if err != nil {
+ if os.IsNotExist(err) {
+ return vcsErrorf("file reported by 'svn list', but not written by 'svn export': %s", e.Name)
+ }
+ return fmt.Errorf("error opening file created by 'svn export': %v", err)
+ }
+
+ n, err := io.Copy(zf, f)
+ f.Close()
+ if err != nil {
+ return err
+ }
+ if n != e.Size {
+ return vcsErrorf("file size differs between 'svn list' and 'svn export': file %s listed as %v bytes, but exported as %v bytes", e.Name, e.Size, n)
+ }
+ }
+
+ return zw.Close()
+}
diff --git a/src/cmd/go/internal/modfetch/codehost/vcs.go b/src/cmd/go/internal/modfetch/codehost/vcs.go
new file mode 100644
index 0000000..c2cca08
--- /dev/null
+++ b/src/cmd/go/internal/modfetch/codehost/vcs.go
@@ -0,0 +1,616 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codehost
+
+import (
+ "errors"
+ "fmt"
+ "internal/lazyregexp"
+ "io"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "cmd/go/internal/lockedfile"
+ "cmd/go/internal/par"
+ "cmd/go/internal/str"
+)
+
+// A VCSError indicates an error using a version control system.
+// The implication of a VCSError is that we know definitively where
+// to get the code, but we can't access it due to the error.
+// The caller should report this error instead of continuing to probe
+// other possible module paths.
+//
+// TODO(golang.org/issue/31730): See if we can invert this. (Return a
+// distinguished error for “repo not found” and treat everything else
+// as terminal.)
+type VCSError struct {
+ Err error
+}
+
+func (e *VCSError) Error() string { return e.Err.Error() }
+
+func vcsErrorf(format string, a ...interface{}) error {
+ return &VCSError{Err: fmt.Errorf(format, a...)}
+}
+
+func NewRepo(vcs, remote string) (Repo, error) {
+ type key struct {
+ vcs string
+ remote string
+ }
+ type cached struct {
+ repo Repo
+ err error
+ }
+ c := vcsRepoCache.Do(key{vcs, remote}, func() interface{} {
+ repo, err := newVCSRepo(vcs, remote)
+ if err != nil {
+ err = &VCSError{err}
+ }
+ return cached{repo, err}
+ }).(cached)
+
+ return c.repo, c.err
+}
+
+var vcsRepoCache par.Cache
+
+type vcsRepo struct {
+ mu lockedfile.Mutex // protects all commands, so we don't have to decide which are safe on a per-VCS basis
+
+ remote string
+ cmd *vcsCmd
+ dir string
+
+ tagsOnce sync.Once
+ tags map[string]bool
+
+ branchesOnce sync.Once
+ branches map[string]bool
+
+ fetchOnce sync.Once
+ fetchErr error
+}
+
+func newVCSRepo(vcs, remote string) (Repo, error) {
+ if vcs == "git" {
+ return newGitRepo(remote, false)
+ }
+ cmd := vcsCmds[vcs]
+ if cmd == nil {
+ return nil, fmt.Errorf("unknown vcs: %s %s", vcs, remote)
+ }
+ if !strings.Contains(remote, "://") {
+ return nil, fmt.Errorf("invalid vcs remote: %s %s", vcs, remote)
+ }
+
+ r := &vcsRepo{remote: remote, cmd: cmd}
+ var err error
+ r.dir, r.mu.Path, err = WorkDir(vcsWorkDirType+vcs, r.remote)
+ if err != nil {
+ return nil, err
+ }
+
+ if cmd.init == nil {
+ return r, nil
+ }
+
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ if _, err := os.Stat(filepath.Join(r.dir, "."+vcs)); err != nil {
+ if _, err := Run(r.dir, cmd.init(r.remote)); err != nil {
+ os.RemoveAll(r.dir)
+ return nil, err
+ }
+ }
+ return r, nil
+}
+
+const vcsWorkDirType = "vcs1."
+
+type vcsCmd struct {
+ vcs string // vcs name "hg"
+ init func(remote string) []string // cmd to init repo to track remote
+ tags func(remote string) []string // cmd to list local tags
+ tagRE *lazyregexp.Regexp // regexp to extract tag names from output of tags cmd
+ branches func(remote string) []string // cmd to list local branches
+ branchRE *lazyregexp.Regexp // regexp to extract branch names from output of tags cmd
+ badLocalRevRE *lazyregexp.Regexp // regexp of names that must not be served out of local cache without doing fetch first
+ statLocal func(rev, remote string) []string // cmd to stat local rev
+ parseStat func(rev, out string) (*RevInfo, error) // cmd to parse output of statLocal
+ fetch []string // cmd to fetch everything from remote
+ latest string // name of latest commit on remote (tip, HEAD, etc)
+ readFile func(rev, file, remote string) []string // cmd to read rev's file
+ readZip func(rev, subdir, remote, target string) []string // cmd to read rev's subdir as zip file
+ doReadZip func(dst io.Writer, workDir, rev, subdir, remote string) error // arbitrary function to read rev's subdir as zip file
+}
+
+var re = lazyregexp.New
+
+var vcsCmds = map[string]*vcsCmd{
+ "hg": {
+ vcs: "hg",
+ init: func(remote string) []string {
+ return []string{"hg", "clone", "-U", "--", remote, "."}
+ },
+ tags: func(remote string) []string {
+ return []string{"hg", "tags", "-q"}
+ },
+ tagRE: re(`(?m)^[^\n]+$`),
+ branches: func(remote string) []string {
+ return []string{"hg", "branches", "-c", "-q"}
+ },
+ branchRE: re(`(?m)^[^\n]+$`),
+ badLocalRevRE: re(`(?m)^(tip)$`),
+ statLocal: func(rev, remote string) []string {
+ return []string{"hg", "log", "-l1", "-r", rev, "--template", "{node} {date|hgdate} {tags}"}
+ },
+ parseStat: hgParseStat,
+ fetch: []string{"hg", "pull", "-f"},
+ latest: "tip",
+ readFile: func(rev, file, remote string) []string {
+ return []string{"hg", "cat", "-r", rev, file}
+ },
+ readZip: func(rev, subdir, remote, target string) []string {
+ pattern := []string{}
+ if subdir != "" {
+ pattern = []string{"-I", subdir + "/**"}
+ }
+ return str.StringList("hg", "archive", "-t", "zip", "--no-decode", "-r", rev, "--prefix=prefix/", pattern, "--", target)
+ },
+ },
+
+ "svn": {
+ vcs: "svn",
+ init: nil, // no local checkout
+ tags: func(remote string) []string {
+ return []string{"svn", "list", "--", strings.TrimSuffix(remote, "/trunk") + "/tags"}
+ },
+ tagRE: re(`(?m)^(.*?)/?$`),
+ statLocal: func(rev, remote string) []string {
+ suffix := "@" + rev
+ if rev == "latest" {
+ suffix = ""
+ }
+ return []string{"svn", "log", "-l1", "--xml", "--", remote + suffix}
+ },
+ parseStat: svnParseStat,
+ latest: "latest",
+ readFile: func(rev, file, remote string) []string {
+ return []string{"svn", "cat", "--", remote + "/" + file + "@" + rev}
+ },
+ doReadZip: svnReadZip,
+ },
+
+ "bzr": {
+ vcs: "bzr",
+ init: func(remote string) []string {
+ return []string{"bzr", "branch", "--use-existing-dir", "--", remote, "."}
+ },
+ fetch: []string{
+ "bzr", "pull", "--overwrite-tags",
+ },
+ tags: func(remote string) []string {
+ return []string{"bzr", "tags"}
+ },
+ tagRE: re(`(?m)^\S+`),
+ badLocalRevRE: re(`^revno:-`),
+ statLocal: func(rev, remote string) []string {
+ return []string{"bzr", "log", "-l1", "--long", "--show-ids", "-r", rev}
+ },
+ parseStat: bzrParseStat,
+ latest: "revno:-1",
+ readFile: func(rev, file, remote string) []string {
+ return []string{"bzr", "cat", "-r", rev, file}
+ },
+ readZip: func(rev, subdir, remote, target string) []string {
+ extra := []string{}
+ if subdir != "" {
+ extra = []string{"./" + subdir}
+ }
+ return str.StringList("bzr", "export", "--format=zip", "-r", rev, "--root=prefix/", "--", target, extra)
+ },
+ },
+
+ "fossil": {
+ vcs: "fossil",
+ init: func(remote string) []string {
+ return []string{"fossil", "clone", "--", remote, ".fossil"}
+ },
+ fetch: []string{"fossil", "pull", "-R", ".fossil"},
+ tags: func(remote string) []string {
+ return []string{"fossil", "tag", "-R", ".fossil", "list"}
+ },
+ tagRE: re(`XXXTODO`),
+ statLocal: func(rev, remote string) []string {
+ return []string{"fossil", "info", "-R", ".fossil", rev}
+ },
+ parseStat: fossilParseStat,
+ latest: "trunk",
+ readFile: func(rev, file, remote string) []string {
+ return []string{"fossil", "cat", "-R", ".fossil", "-r", rev, file}
+ },
+ readZip: func(rev, subdir, remote, target string) []string {
+ extra := []string{}
+ if subdir != "" && !strings.ContainsAny(subdir, "*?[],") {
+ extra = []string{"--include", subdir}
+ }
+ // Note that vcsRepo.ReadZip below rewrites this command
+ // to run in a different directory, to work around a fossil bug.
+ return str.StringList("fossil", "zip", "-R", ".fossil", "--name", "prefix", extra, "--", rev, target)
+ },
+ },
+}
+
+func (r *vcsRepo) loadTags() {
+ out, err := Run(r.dir, r.cmd.tags(r.remote))
+ if err != nil {
+ return
+ }
+
+ // Run tag-listing command and extract tags.
+ r.tags = make(map[string]bool)
+ for _, tag := range r.cmd.tagRE.FindAllString(string(out), -1) {
+ if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(tag) {
+ continue
+ }
+ r.tags[tag] = true
+ }
+}
+
+func (r *vcsRepo) loadBranches() {
+ if r.cmd.branches == nil {
+ return
+ }
+
+ out, err := Run(r.dir, r.cmd.branches(r.remote))
+ if err != nil {
+ return
+ }
+
+ r.branches = make(map[string]bool)
+ for _, branch := range r.cmd.branchRE.FindAllString(string(out), -1) {
+ if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(branch) {
+ continue
+ }
+ r.branches[branch] = true
+ }
+}
+
+func (r *vcsRepo) Tags(prefix string) ([]string, error) {
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ r.tagsOnce.Do(r.loadTags)
+
+ tags := []string{}
+ for tag := range r.tags {
+ if strings.HasPrefix(tag, prefix) {
+ tags = append(tags, tag)
+ }
+ }
+ sort.Strings(tags)
+ return tags, nil
+}
+
+func (r *vcsRepo) Stat(rev string) (*RevInfo, error) {
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ if rev == "latest" {
+ rev = r.cmd.latest
+ }
+ r.branchesOnce.Do(r.loadBranches)
+ revOK := (r.cmd.badLocalRevRE == nil || !r.cmd.badLocalRevRE.MatchString(rev)) && !r.branches[rev]
+ if revOK {
+ if info, err := r.statLocal(rev); err == nil {
+ return info, nil
+ }
+ }
+
+ r.fetchOnce.Do(r.fetch)
+ if r.fetchErr != nil {
+ return nil, r.fetchErr
+ }
+ info, err := r.statLocal(rev)
+ if err != nil {
+ return nil, err
+ }
+ if !revOK {
+ info.Version = info.Name
+ }
+ return info, nil
+}
+
+func (r *vcsRepo) fetch() {
+ if len(r.cmd.fetch) > 0 {
+ _, r.fetchErr = Run(r.dir, r.cmd.fetch)
+ }
+}
+
+func (r *vcsRepo) statLocal(rev string) (*RevInfo, error) {
+ out, err := Run(r.dir, r.cmd.statLocal(rev, r.remote))
+ if err != nil {
+ return nil, &UnknownRevisionError{Rev: rev}
+ }
+ return r.cmd.parseStat(rev, string(out))
+}
+
+func (r *vcsRepo) Latest() (*RevInfo, error) {
+ return r.Stat("latest")
+}
+
+func (r *vcsRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) {
+ if rev == "latest" {
+ rev = r.cmd.latest
+ }
+ _, err := r.Stat(rev) // download rev into local repo
+ if err != nil {
+ return nil, err
+ }
+
+ // r.Stat acquires r.mu, so lock after that.
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ out, err := Run(r.dir, r.cmd.readFile(rev, file, r.remote))
+ if err != nil {
+ return nil, fs.ErrNotExist
+ }
+ return out, nil
+}
+
+func (r *vcsRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) {
+ // We don't technically need to lock here since we're returning an error
+ // uncondititonally, but doing so anyway will help to avoid baking in
+ // lock-inversion bugs.
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ return nil, vcsErrorf("ReadFileRevs not implemented")
+}
+
+func (r *vcsRepo) RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error) {
+ // We don't technically need to lock here since we're returning an error
+ // uncondititonally, but doing so anyway will help to avoid baking in
+ // lock-inversion bugs.
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return "", err
+ }
+ defer unlock()
+
+ return "", vcsErrorf("RecentTag not implemented")
+}
+
+func (r *vcsRepo) DescendsFrom(rev, tag string) (bool, error) {
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return false, err
+ }
+ defer unlock()
+
+ return false, vcsErrorf("DescendsFrom not implemented")
+}
+
+func (r *vcsRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
+ if r.cmd.readZip == nil && r.cmd.doReadZip == nil {
+ return nil, vcsErrorf("ReadZip not implemented for %s", r.cmd.vcs)
+ }
+
+ unlock, err := r.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ defer unlock()
+
+ if rev == "latest" {
+ rev = r.cmd.latest
+ }
+ f, err := os.CreateTemp("", "go-readzip-*.zip")
+ if err != nil {
+ return nil, err
+ }
+ if r.cmd.doReadZip != nil {
+ lw := &limitedWriter{
+ W: f,
+ N: maxSize,
+ ErrLimitReached: errors.New("ReadZip: encoded file exceeds allowed size"),
+ }
+ err = r.cmd.doReadZip(lw, r.dir, rev, subdir, r.remote)
+ if err == nil {
+ _, err = f.Seek(0, io.SeekStart)
+ }
+ } else if r.cmd.vcs == "fossil" {
+ // If you run
+ // fossil zip -R .fossil --name prefix trunk /tmp/x.zip
+ // fossil fails with "unable to create directory /tmp" [sic].
+ // Change the command to run in /tmp instead,
+ // replacing the -R argument with an absolute path.
+ args := r.cmd.readZip(rev, subdir, r.remote, filepath.Base(f.Name()))
+ for i := range args {
+ if args[i] == ".fossil" {
+ args[i] = filepath.Join(r.dir, ".fossil")
+ }
+ }
+ _, err = Run(filepath.Dir(f.Name()), args)
+ } else {
+ _, err = Run(r.dir, r.cmd.readZip(rev, subdir, r.remote, f.Name()))
+ }
+ if err != nil {
+ f.Close()
+ os.Remove(f.Name())
+ return nil, err
+ }
+ return &deleteCloser{f}, nil
+}
+
+// deleteCloser is a file that gets deleted on Close.
+type deleteCloser struct {
+ *os.File
+}
+
+func (d *deleteCloser) Close() error {
+ defer os.Remove(d.File.Name())
+ return d.File.Close()
+}
+
+func hgParseStat(rev, out string) (*RevInfo, error) {
+ f := strings.Fields(string(out))
+ if len(f) < 3 {
+ return nil, vcsErrorf("unexpected response from hg log: %q", out)
+ }
+ hash := f[0]
+ version := rev
+ if strings.HasPrefix(hash, version) {
+ version = hash // extend to full hash
+ }
+ t, err := strconv.ParseInt(f[1], 10, 64)
+ if err != nil {
+ return nil, vcsErrorf("invalid time from hg log: %q", out)
+ }
+
+ var tags []string
+ for _, tag := range f[3:] {
+ if tag != "tip" {
+ tags = append(tags, tag)
+ }
+ }
+ sort.Strings(tags)
+
+ info := &RevInfo{
+ Name: hash,
+ Short: ShortenSHA1(hash),
+ Time: time.Unix(t, 0).UTC(),
+ Version: version,
+ Tags: tags,
+ }
+ return info, nil
+}
+
+func bzrParseStat(rev, out string) (*RevInfo, error) {
+ var revno int64
+ var tm time.Time
+ for _, line := range strings.Split(out, "\n") {
+ if line == "" || line[0] == ' ' || line[0] == '\t' {
+ // End of header, start of commit message.
+ break
+ }
+ if line[0] == '-' {
+ continue
+ }
+ i := strings.Index(line, ":")
+ if i < 0 {
+ // End of header, start of commit message.
+ break
+ }
+ key, val := line[:i], strings.TrimSpace(line[i+1:])
+ switch key {
+ case "revno":
+ if j := strings.Index(val, " "); j >= 0 {
+ val = val[:j]
+ }
+ i, err := strconv.ParseInt(val, 10, 64)
+ if err != nil {
+ return nil, vcsErrorf("unexpected revno from bzr log: %q", line)
+ }
+ revno = i
+ case "timestamp":
+ j := strings.Index(val, " ")
+ if j < 0 {
+ return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line)
+ }
+ t, err := time.Parse("2006-01-02 15:04:05 -0700", val[j+1:])
+ if err != nil {
+ return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line)
+ }
+ tm = t.UTC()
+ }
+ }
+ if revno == 0 || tm.IsZero() {
+ return nil, vcsErrorf("unexpected response from bzr log: %q", out)
+ }
+
+ info := &RevInfo{
+ Name: fmt.Sprintf("%d", revno),
+ Short: fmt.Sprintf("%012d", revno),
+ Time: tm,
+ Version: rev,
+ }
+ return info, nil
+}
+
+func fossilParseStat(rev, out string) (*RevInfo, error) {
+ for _, line := range strings.Split(out, "\n") {
+ if strings.HasPrefix(line, "uuid:") || strings.HasPrefix(line, "hash:") {
+ f := strings.Fields(line)
+ if len(f) != 5 || len(f[1]) != 40 || f[4] != "UTC" {
+ return nil, vcsErrorf("unexpected response from fossil info: %q", line)
+ }
+ t, err := time.Parse("2006-01-02 15:04:05", f[2]+" "+f[3])
+ if err != nil {
+ return nil, vcsErrorf("unexpected response from fossil info: %q", line)
+ }
+ hash := f[1]
+ version := rev
+ if strings.HasPrefix(hash, version) {
+ version = hash // extend to full hash
+ }
+ info := &RevInfo{
+ Name: hash,
+ Short: ShortenSHA1(hash),
+ Time: t,
+ Version: version,
+ }
+ return info, nil
+ }
+ }
+ return nil, vcsErrorf("unexpected response from fossil info: %q", out)
+}
+
+type limitedWriter struct {
+ W io.Writer
+ N int64
+ ErrLimitReached error
+}
+
+func (l *limitedWriter) Write(p []byte) (n int, err error) {
+ if l.N > 0 {
+ max := len(p)
+ if l.N < int64(max) {
+ max = int(l.N)
+ }
+ n, err = l.W.Write(p[:max])
+ l.N -= int64(n)
+ if err != nil || n >= len(p) {
+ return n, err
+ }
+ }
+
+ return n, l.ErrLimitReached
+}