diff options
Diffstat (limited to 'src/cmd/go/internal/modfetch/codehost')
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/codehost.go | 315 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/git.go | 875 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/git_test.go | 640 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/shell.go | 141 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/svn.go | 154 | ||||
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/vcs.go | 616 |
6 files changed, 2741 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/codehost/codehost.go b/src/cmd/go/internal/modfetch/codehost/codehost.go new file mode 100644 index 0000000..378fbae --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/codehost.go @@ -0,0 +1,315 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package codehost defines the interface implemented by a code hosting source, +// along with support code for use by implementations. +package codehost + +import ( + "bytes" + "crypto/sha256" + "fmt" + exec "internal/execabs" + "io" + "io/fs" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "cmd/go/internal/cfg" + "cmd/go/internal/lockedfile" + "cmd/go/internal/str" +) + +// Downloaded size limits. +const ( + MaxGoMod = 16 << 20 // maximum size of go.mod file + MaxLICENSE = 16 << 20 // maximum size of LICENSE file + MaxZipFile = 500 << 20 // maximum size of downloaded zip file +) + +// A Repo represents a code hosting source. +// Typical implementations include local version control repositories, +// remote version control servers, and code hosting sites. +// A Repo must be safe for simultaneous use by multiple goroutines. +type Repo interface { + // List lists all tags with the given prefix. + Tags(prefix string) (tags []string, err error) + + // Stat returns information about the revision rev. + // A revision can be any identifier known to the underlying service: + // commit hash, branch, tag, and so on. + Stat(rev string) (*RevInfo, error) + + // Latest returns the latest revision on the default branch, + // whatever that means in the underlying implementation. + Latest() (*RevInfo, error) + + // ReadFile reads the given file in the file tree corresponding to revision rev. + // It should refuse to read more than maxSize bytes. + // + // If the requested file does not exist it should return an error for which + // os.IsNotExist(err) returns true. + ReadFile(rev, file string, maxSize int64) (data []byte, err error) + + // ReadFileRevs reads a single file at multiple versions. + // It should refuse to read more than maxSize bytes. + // The result is a map from each requested rev strings + // to the associated FileRev. The map must have a non-nil + // entry for every requested rev (unless ReadFileRevs returned an error). + // A file simply being missing or even corrupted in revs[i] + // should be reported only in files[revs[i]].Err, not in the error result + // from ReadFileRevs. + // The overall call should return an error (and no map) only + // in the case of a problem with obtaining the data, such as + // a network failure. + // Implementations may assume that revs only contain tags, + // not direct commit hashes. + ReadFileRevs(revs []string, file string, maxSize int64) (files map[string]*FileRev, err error) + + // ReadZip downloads a zip file for the subdir subdirectory + // of the given revision to a new file in a given temporary directory. + // It should refuse to read more than maxSize bytes. + // It returns a ReadCloser for a streamed copy of the zip file. + // All files in the zip file are expected to be + // nested in a single top-level directory, whose name is not specified. + ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) + + // RecentTag returns the most recent tag on rev or one of its predecessors + // with the given prefix. allowed may be used to filter out unwanted versions. + RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error) + + // DescendsFrom reports whether rev or any of its ancestors has the given tag. + // + // DescendsFrom must return true for any tag returned by RecentTag for the + // same revision. + DescendsFrom(rev, tag string) (bool, error) +} + +// A Rev describes a single revision in a source code repository. +type RevInfo struct { + Name string // complete ID in underlying repository + Short string // shortened ID, for use in pseudo-version + Version string // version used in lookup + Time time.Time // commit time + Tags []string // known tags for commit +} + +// A FileRev describes the result of reading a file at a given revision. +type FileRev struct { + Rev string // requested revision + Data []byte // file data + Err error // error if any; os.IsNotExist(Err)==true if rev exists but file does not exist in that rev +} + +// UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a +// revision rather than a file. +type UnknownRevisionError struct { + Rev string +} + +func (e *UnknownRevisionError) Error() string { + return "unknown revision " + e.Rev +} +func (UnknownRevisionError) Is(err error) bool { + return err == fs.ErrNotExist +} + +// ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given +// repository or module contains no commits. +var ErrNoCommits error = noCommitsError{} + +type noCommitsError struct{} + +func (noCommitsError) Error() string { + return "no commits" +} +func (noCommitsError) Is(err error) bool { + return err == fs.ErrNotExist +} + +// AllHex reports whether the revision rev is entirely lower-case hexadecimal digits. +func AllHex(rev string) bool { + for i := 0; i < len(rev); i++ { + c := rev[i] + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' { + continue + } + return false + } + return true +} + +// ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length +// used in pseudo-versions (12 hex digits). +func ShortenSHA1(rev string) string { + if AllHex(rev) && len(rev) == 40 { + return rev[:12] + } + return rev +} + +// WorkDir returns the name of the cached work directory to use for the +// given repository type and name. +func WorkDir(typ, name string) (dir, lockfile string, err error) { + if cfg.GOMODCACHE == "" { + return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set") + } + + // We name the work directory for the SHA256 hash of the type and name. + // We intentionally avoid the actual name both because of possible + // conflicts with valid file system paths and because we want to ensure + // that one checkout is never nested inside another. That nesting has + // led to security problems in the past. + if strings.Contains(typ, ":") { + return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon") + } + key := typ + ":" + name + dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key)))) + + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name) + } + if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil { + return "", "", err + } + + lockfile = dir + ".lock" + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "# lock %s", lockfile) + } + + unlock, err := lockedfile.MutexAt(lockfile).Lock() + if err != nil { + return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err) + } + defer unlock() + + data, err := os.ReadFile(dir + ".info") + info, err2 := os.Stat(dir) + if err == nil && err2 == nil && info.IsDir() { + // Info file and directory both already exist: reuse. + have := strings.TrimSuffix(string(data), "\n") + if have != key { + return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key) + } + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "# %s for %s %s\n", dir, typ, name) + } + return dir, lockfile, nil + } + + // Info file or directory missing. Start from scratch. + if cfg.BuildX { + fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", dir, typ, name) + } + os.RemoveAll(dir) + if err := os.MkdirAll(dir, 0777); err != nil { + return "", "", err + } + if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil { + os.RemoveAll(dir) + return "", "", err + } + return dir, lockfile, nil +} + +type RunError struct { + Cmd string + Err error + Stderr []byte + HelpText string +} + +func (e *RunError) Error() string { + text := e.Cmd + ": " + e.Err.Error() + stderr := bytes.TrimRight(e.Stderr, "\n") + if len(stderr) > 0 { + text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t") + } + if len(e.HelpText) > 0 { + text += "\n" + e.HelpText + } + return text +} + +var dirLock sync.Map + +// Run runs the command line in the given directory +// (an empty dir means the current directory). +// It returns the standard output and, for a non-zero exit, +// a *RunError indicating the command, exit status, and standard error. +// Standard error is unavailable for commands that exit successfully. +func Run(dir string, cmdline ...interface{}) ([]byte, error) { + return RunWithStdin(dir, nil, cmdline...) +} + +// bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell. +// See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html. +var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`) + +func RunWithStdin(dir string, stdin io.Reader, cmdline ...interface{}) ([]byte, error) { + if dir != "" { + muIface, ok := dirLock.Load(dir) + if !ok { + muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex)) + } + mu := muIface.(*sync.Mutex) + mu.Lock() + defer mu.Unlock() + } + + cmd := str.StringList(cmdline...) + if os.Getenv("TESTGOVCS") == "panic" { + panic(fmt.Sprintf("use of vcs: %v", cmd)) + } + if cfg.BuildX { + text := new(strings.Builder) + if dir != "" { + text.WriteString("cd ") + text.WriteString(dir) + text.WriteString("; ") + } + for i, arg := range cmd { + if i > 0 { + text.WriteByte(' ') + } + switch { + case strings.ContainsAny(arg, "'"): + // Quote args that could be mistaken for quoted args. + text.WriteByte('"') + text.WriteString(bashQuoter.Replace(arg)) + text.WriteByte('"') + case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"): + // Quote args that contain special characters, glob patterns, or spaces. + text.WriteByte('\'') + text.WriteString(arg) + text.WriteByte('\'') + default: + text.WriteString(arg) + } + } + fmt.Fprintf(os.Stderr, "%s\n", text) + start := time.Now() + defer func() { + fmt.Fprintf(os.Stderr, "%.3fs # %s\n", time.Since(start).Seconds(), text) + }() + } + // TODO: Impose limits on command output size. + // TODO: Set environment to get English error messages. + var stderr bytes.Buffer + var stdout bytes.Buffer + c := exec.Command(cmd[0], cmd[1:]...) + c.Dir = dir + c.Stdin = stdin + c.Stderr = &stderr + c.Stdout = &stdout + err := c.Run() + if err != nil { + err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err} + } + return stdout.Bytes(), err +} diff --git a/src/cmd/go/internal/modfetch/codehost/git.go b/src/cmd/go/internal/modfetch/codehost/git.go new file mode 100644 index 0000000..72005e2 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/git.go @@ -0,0 +1,875 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "bytes" + "errors" + "fmt" + exec "internal/execabs" + "io" + "io/fs" + "net/url" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "time" + + "cmd/go/internal/lockedfile" + "cmd/go/internal/par" + "cmd/go/internal/web" + + "golang.org/x/mod/semver" +) + +// LocalGitRepo is like Repo but accepts both Git remote references +// and paths to repositories on the local file system. +func LocalGitRepo(remote string) (Repo, error) { + return newGitRepoCached(remote, true) +} + +// A notExistError wraps another error to retain its original text +// but makes it opaquely equivalent to fs.ErrNotExist. +type notExistError struct { + err error +} + +func (e notExistError) Error() string { return e.err.Error() } +func (notExistError) Is(err error) bool { return err == fs.ErrNotExist } + +const gitWorkDirType = "git3" + +var gitRepoCache par.Cache + +func newGitRepoCached(remote string, localOK bool) (Repo, error) { + type key struct { + remote string + localOK bool + } + type cached struct { + repo Repo + err error + } + + c := gitRepoCache.Do(key{remote, localOK}, func() interface{} { + repo, err := newGitRepo(remote, localOK) + return cached{repo, err} + }).(cached) + + return c.repo, c.err +} + +func newGitRepo(remote string, localOK bool) (Repo, error) { + r := &gitRepo{remote: remote} + if strings.Contains(remote, "://") { + // This is a remote path. + var err error + r.dir, r.mu.Path, err = WorkDir(gitWorkDirType, r.remote) + if err != nil { + return nil, err + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil { + if _, err := Run(r.dir, "git", "init", "--bare"); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + // We could just say git fetch https://whatever later, + // but this lets us say git fetch origin instead, which + // is a little nicer. More importantly, using a named remote + // avoids a problem with Git LFS. See golang.org/issue/25605. + if _, err := Run(r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + } + r.remoteURL = r.remote + r.remote = "origin" + } else { + // Local path. + // Disallow colon (not in ://) because sometimes + // that's rcp-style host:path syntax and sometimes it's not (c:\work). + // The go command has always insisted on URL syntax for ssh. + if strings.Contains(remote, ":") { + return nil, fmt.Errorf("git remote cannot use host:path syntax") + } + if !localOK { + return nil, fmt.Errorf("git remote must not be local directory") + } + r.local = true + info, err := os.Stat(remote) + if err != nil { + return nil, err + } + if !info.IsDir() { + return nil, fmt.Errorf("%s exists but is not a directory", remote) + } + r.dir = remote + r.mu.Path = r.dir + ".lock" + } + return r, nil +} + +type gitRepo struct { + remote, remoteURL string + local bool + dir string + + mu lockedfile.Mutex // protects fetchLevel and git repo state + + fetchLevel int + + statCache par.Cache + + refsOnce sync.Once + // refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master") + // to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6") + refs map[string]string + refsErr error + + localTagsOnce sync.Once + localTags map[string]bool +} + +const ( + // How much have we fetched into the git repo (in this process)? + fetchNone = iota // nothing yet + fetchSome // shallow fetches of individual hashes + fetchAll // "fetch -t origin": get all remote branches and tags +) + +// loadLocalTags loads tag references from the local git cache +// into the map r.localTags. +// Should only be called as r.localTagsOnce.Do(r.loadLocalTags). +func (r *gitRepo) loadLocalTags() { + // The git protocol sends all known refs and ls-remote filters them on the client side, + // so we might as well record both heads and tags in one shot. + // Most of the time we only care about tags but sometimes we care about heads too. + out, err := Run(r.dir, "git", "tag", "-l") + if err != nil { + return + } + + r.localTags = make(map[string]bool) + for _, line := range strings.Split(string(out), "\n") { + if line != "" { + r.localTags[line] = true + } + } +} + +// loadRefs loads heads and tags references from the remote into the map r.refs. +// Should only be called as r.refsOnce.Do(r.loadRefs). +func (r *gitRepo) loadRefs() { + // The git protocol sends all known refs and ls-remote filters them on the client side, + // so we might as well record both heads and tags in one shot. + // Most of the time we only care about tags but sometimes we care about heads too. + out, gitErr := Run(r.dir, "git", "ls-remote", "-q", r.remote) + if gitErr != nil { + if rerr, ok := gitErr.(*RunError); ok { + if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) { + rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information." + } + } + + // If the remote URL doesn't exist at all, ideally we should treat the whole + // repository as nonexistent by wrapping the error in a notExistError. + // For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL + // ourselves and see what code it serves. + if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") { + if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) { + gitErr = notExistError{gitErr} + } + } + + r.refsErr = gitErr + return + } + + r.refs = make(map[string]string) + for _, line := range strings.Split(string(out), "\n") { + f := strings.Fields(line) + if len(f) != 2 { + continue + } + if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") { + r.refs[f[1]] = f[0] + } + } + for ref, hash := range r.refs { + if strings.HasSuffix(ref, "^{}") { // record unwrapped annotated tag as value of tag + r.refs[strings.TrimSuffix(ref, "^{}")] = hash + delete(r.refs, ref) + } + } +} + +func (r *gitRepo) Tags(prefix string) ([]string, error) { + r.refsOnce.Do(r.loadRefs) + if r.refsErr != nil { + return nil, r.refsErr + } + + tags := []string{} + for ref := range r.refs { + if !strings.HasPrefix(ref, "refs/tags/") { + continue + } + tag := ref[len("refs/tags/"):] + if !strings.HasPrefix(tag, prefix) { + continue + } + tags = append(tags, tag) + } + sort.Strings(tags) + return tags, nil +} + +func (r *gitRepo) Latest() (*RevInfo, error) { + r.refsOnce.Do(r.loadRefs) + if r.refsErr != nil { + return nil, r.refsErr + } + if r.refs["HEAD"] == "" { + return nil, ErrNoCommits + } + return r.Stat(r.refs["HEAD"]) +} + +// findRef finds some ref name for the given hash, +// for use when the server requires giving a ref instead of a hash. +// There may be multiple ref names for a given hash, +// in which case this returns some name - it doesn't matter which. +func (r *gitRepo) findRef(hash string) (ref string, ok bool) { + r.refsOnce.Do(r.loadRefs) + for ref, h := range r.refs { + if h == hash { + return ref, true + } + } + return "", false +} + +// minHashDigits is the minimum number of digits to require +// before accepting a hex digit sequence as potentially identifying +// a specific commit in a git repo. (Of course, users can always +// specify more digits, and many will paste in all 40 digits, +// but many of git's commands default to printing short hashes +// as 7 digits.) +const minHashDigits = 7 + +// stat stats the given rev in the local repository, +// or else it fetches more info from the remote repository and tries again. +func (r *gitRepo) stat(rev string) (*RevInfo, error) { + if r.local { + return r.statLocal(rev, rev) + } + + // Fast path: maybe rev is a hash we already have locally. + didStatLocal := false + if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { + if info, err := r.statLocal(rev, rev); err == nil { + return info, nil + } + didStatLocal = true + } + + // Maybe rev is a tag we already have locally. + // (Note that we're excluding branches, which can be stale.) + r.localTagsOnce.Do(r.loadLocalTags) + if r.localTags[rev] { + return r.statLocal(rev, "refs/tags/"+rev) + } + + // Maybe rev is the name of a tag or branch on the remote server. + // Or maybe it's the prefix of a hash of a named ref. + // Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash. + r.refsOnce.Do(r.loadRefs) + var ref, hash string + if r.refs["refs/tags/"+rev] != "" { + ref = "refs/tags/" + rev + hash = r.refs[ref] + // Keep rev as is: tags are assumed not to change meaning. + } else if r.refs["refs/heads/"+rev] != "" { + ref = "refs/heads/" + rev + hash = r.refs[ref] + rev = hash // Replace rev, because meaning of refs/heads/foo can change. + } else if rev == "HEAD" && r.refs["HEAD"] != "" { + ref = "HEAD" + hash = r.refs[ref] + rev = hash // Replace rev, because meaning of HEAD can change. + } else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { + // At the least, we have a hash prefix we can look up after the fetch below. + // Maybe we can map it to a full hash using the known refs. + prefix := rev + // Check whether rev is prefix of known ref hash. + for k, h := range r.refs { + if strings.HasPrefix(h, prefix) { + if hash != "" && hash != h { + // Hash is an ambiguous hash prefix. + // More information will not change that. + return nil, fmt.Errorf("ambiguous revision %s", rev) + } + if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash. + ref = k + } + rev = h + hash = h + } + } + if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash. + hash = rev + } + } else { + return nil, &UnknownRevisionError{Rev: rev} + } + + // Protect r.fetchLevel and the "fetch more and more" sequence. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + // Perhaps r.localTags did not have the ref when we loaded local tags, + // but we've since done fetches that pulled down the hash we need + // (or already have the hash we need, just without its tag). + // Either way, try a local stat before falling back to network I/O. + if !didStatLocal { + if info, err := r.statLocal(rev, hash); err == nil { + if strings.HasPrefix(ref, "refs/tags/") { + // Make sure tag exists, so it will be in localTags next time the go command is run. + Run(r.dir, "git", "tag", strings.TrimPrefix(ref, "refs/tags/"), hash) + } + return info, nil + } + } + + // If we know a specific commit we need and its ref, fetch it. + // We do NOT fetch arbitrary hashes (when we don't know the ref) + // because we want to avoid ever importing a commit that isn't + // reachable from refs/tags/* or refs/heads/* or HEAD. + // Both Gerrit and GitHub expose every CL/PR as a named ref, + // and we don't want those commits masquerading as being real + // pseudo-versions in the main repo. + if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local { + r.fetchLevel = fetchSome + var refspec string + if ref != "" && ref != "HEAD" { + // If we do know the ref name, save the mapping locally + // so that (if it is a tag) it can show up in localTags + // on a future call. Also, some servers refuse to allow + // full hashes in ref specs, so prefer a ref name if known. + refspec = ref + ":" + ref + } else { + // Fetch the hash but give it a local name (refs/dummy), + // because that triggers the fetch behavior of creating any + // other known remote tags for the hash. We never use + // refs/dummy (it's not refs/tags/dummy) and it will be + // overwritten in the next command, and that's fine. + ref = hash + refspec = hash + ":refs/dummy" + } + _, err := Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec) + if err == nil { + return r.statLocal(rev, ref) + } + // Don't try to be smart about parsing the error. + // It's too complex and varies too much by git version. + // No matter what went wrong, fall back to a complete fetch. + } + + // Last resort. + // Fetch all heads and tags and hope the hash we want is in the history. + if err := r.fetchRefsLocked(); err != nil { + return nil, err + } + + return r.statLocal(rev, rev) +} + +// fetchRefsLocked fetches all heads and tags from the origin, along with the +// ancestors of those commits. +// +// We only fetch heads and tags, not arbitrary other commits: we don't want to +// pull in off-branch commits (such as rejected GitHub pull requests) that the +// server may be willing to provide. (See the comments within the stat method +// for more detail.) +// +// fetchRefsLocked requires that r.mu remain locked for the duration of the call. +func (r *gitRepo) fetchRefsLocked() error { + if r.fetchLevel < fetchAll { + // NOTE: To work around a bug affecting Git clients up to at least 2.23.0 + // (2019-08-16), we must first expand the set of local refs, and only then + // unshallow the repository as a separate fetch operation. (See + // golang.org/issue/34266 and + // https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.) + + if _, err := Run(r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil { + return err + } + + if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil { + if _, err := Run(r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil { + return err + } + } + + r.fetchLevel = fetchAll + } + return nil +} + +// statLocal returns a RevInfo describing rev in the local git repository. +// It uses version as info.Version. +func (r *gitRepo) statLocal(version, rev string) (*RevInfo, error) { + out, err := Run(r.dir, "git", "-c", "log.showsignature=false", "log", "-n1", "--format=format:%H %ct %D", rev, "--") + if err != nil { + return nil, &UnknownRevisionError{Rev: rev} + } + f := strings.Fields(string(out)) + if len(f) < 2 { + return nil, fmt.Errorf("unexpected response from git log: %q", out) + } + hash := f[0] + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + t, err := strconv.ParseInt(f[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid time from git log: %q", out) + } + + info := &RevInfo{ + Name: hash, + Short: ShortenSHA1(hash), + Time: time.Unix(t, 0).UTC(), + Version: hash, + } + + // Add tags. Output looks like: + // ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD + for i := 2; i < len(f); i++ { + if f[i] == "tag:" { + i++ + if i < len(f) { + info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ",")) + } + } + } + sort.Strings(info.Tags) + + // Used hash as info.Version above. + // Use caller's suggested version if it appears in the tag list + // (filters out branch names, HEAD). + for _, tag := range info.Tags { + if version == tag { + info.Version = version + } + } + + return info, nil +} + +func (r *gitRepo) Stat(rev string) (*RevInfo, error) { + if rev == "latest" { + return r.Latest() + } + type cached struct { + info *RevInfo + err error + } + c := r.statCache.Do(rev, func() interface{} { + info, err := r.stat(rev) + return cached{info, err} + }).(cached) + return c.info, c.err +} + +func (r *gitRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) { + // TODO: Could use git cat-file --batch. + info, err := r.Stat(rev) // download rev into local git repo + if err != nil { + return nil, err + } + out, err := Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file) + if err != nil { + return nil, fs.ErrNotExist + } + return out, nil +} + +func (r *gitRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) { + // Create space to hold results. + files := make(map[string]*FileRev) + for _, rev := range revs { + f := &FileRev{Rev: rev} + files[rev] = f + } + + // Collect locally-known revs. + need, err := r.readFileRevs(revs, file, files) + if err != nil { + return nil, err + } + if len(need) == 0 { + return files, nil + } + + // Build list of known remote refs that might help. + var redo []string + r.refsOnce.Do(r.loadRefs) + if r.refsErr != nil { + return nil, r.refsErr + } + for _, tag := range need { + if r.refs["refs/tags/"+tag] != "" { + redo = append(redo, tag) + } + } + if len(redo) == 0 { + return files, nil + } + + // Protect r.fetchLevel and the "fetch more and more" sequence. + // See stat method above. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if err := r.fetchRefsLocked(); err != nil { + return nil, err + } + + if _, err := r.readFileRevs(redo, file, files); err != nil { + return nil, err + } + + return files, nil +} + +func (r *gitRepo) readFileRevs(tags []string, file string, fileMap map[string]*FileRev) (missing []string, err error) { + var stdin bytes.Buffer + for _, tag := range tags { + fmt.Fprintf(&stdin, "refs/tags/%s\n", tag) + fmt.Fprintf(&stdin, "refs/tags/%s:%s\n", tag, file) + } + + data, err := RunWithStdin(r.dir, &stdin, "git", "cat-file", "--batch") + if err != nil { + return nil, err + } + + next := func() (typ string, body []byte, ok bool) { + var line string + i := bytes.IndexByte(data, '\n') + if i < 0 { + return "", nil, false + } + line, data = string(bytes.TrimSpace(data[:i])), data[i+1:] + if strings.HasSuffix(line, " missing") { + return "missing", nil, true + } + f := strings.Fields(line) + if len(f) != 3 { + return "", nil, false + } + n, err := strconv.Atoi(f[2]) + if err != nil || n > len(data) { + return "", nil, false + } + body, data = data[:n], data[n:] + if len(data) > 0 && data[0] == '\r' { + data = data[1:] + } + if len(data) > 0 && data[0] == '\n' { + data = data[1:] + } + return f[1], body, true + } + + badGit := func() ([]string, error) { + return nil, fmt.Errorf("malformed output from git cat-file --batch") + } + + for _, tag := range tags { + commitType, _, ok := next() + if !ok { + return badGit() + } + fileType, fileData, ok := next() + if !ok { + return badGit() + } + f := fileMap[tag] + f.Data = nil + f.Err = nil + switch commitType { + default: + f.Err = fmt.Errorf("unexpected non-commit type %q for rev %s", commitType, tag) + + case "missing": + // Note: f.Err must not satisfy os.IsNotExist. That's reserved for the file not existing in a valid commit. + f.Err = fmt.Errorf("no such rev %s", tag) + missing = append(missing, tag) + + case "tag", "commit": + switch fileType { + default: + f.Err = &fs.PathError{Path: tag + ":" + file, Op: "read", Err: fmt.Errorf("unexpected non-blob type %q", fileType)} + case "missing": + f.Err = &fs.PathError{Path: tag + ":" + file, Op: "read", Err: fs.ErrNotExist} + case "blob": + f.Data = fileData + } + } + } + if len(bytes.TrimSpace(data)) != 0 { + return badGit() + } + + return missing, nil +} + +func (r *gitRepo) RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error) { + info, err := r.Stat(rev) + if err != nil { + return "", err + } + rev = info.Name // expand hash prefixes + + // describe sets tag and err using 'git for-each-ref' and reports whether the + // result is definitive. + describe := func() (definitive bool) { + var out []byte + out, err = Run(r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev) + if err != nil { + return true + } + + // prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix + var highest string + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + // git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here + // instead gives support for git v2.7.0. + if !strings.HasPrefix(line, "refs/tags/") { + continue + } + line = line[len("refs/tags/"):] + + if !strings.HasPrefix(line, prefix) { + continue + } + + semtag := line[len(prefix):] + // Consider only tags that are valid and complete (not just major.minor prefixes). + // NOTE: Do not replace the call to semver.Compare with semver.Max. + // We want to return the actual tag, not a canonicalized version of it, + // and semver.Max currently canonicalizes (see golang.org/issue/32700). + if c := semver.Canonical(semtag); c == "" || !strings.HasPrefix(semtag, c) || !allowed(semtag) { + continue + } + if semver.Compare(semtag, highest) > 0 { + highest = semtag + } + } + + if highest != "" { + tag = prefix + highest + } + + return tag != "" && !AllHex(tag) + } + + if describe() { + return tag, err + } + + // Git didn't find a version tag preceding the requested rev. + // See whether any plausible tag exists. + tags, err := r.Tags(prefix + "v") + if err != nil { + return "", err + } + if len(tags) == 0 { + return "", nil + } + + // There are plausible tags, but we don't know if rev is a descendent of any of them. + // Fetch the history to find out. + + unlock, err := r.mu.Lock() + if err != nil { + return "", err + } + defer unlock() + + if err := r.fetchRefsLocked(); err != nil { + return "", err + } + + // If we've reached this point, we have all of the commits that are reachable + // from all heads and tags. + // + // The only refs we should be missing are those that are no longer reachable + // (or never were reachable) from any branch or tag, including the master + // branch, and we don't want to resolve them anyway (they're probably + // unreachable for a reason). + // + // Try one last time in case some other goroutine fetched rev while we were + // waiting on the lock. + describe() + return tag, err +} + +func (r *gitRepo) DescendsFrom(rev, tag string) (bool, error) { + // The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so + // this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go + // already doesn't work with Git 1.7.1, so at least it's not a regression. + // + // git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or + // 1 if not. + _, err := Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) + + // Git reports "is an ancestor" with exit code 0 and "not an ancestor" with + // exit code 1. + // Unfortunately, if we've already fetched rev with a shallow history, git + // merge-base has been observed to report a false-negative, so don't stop yet + // even if the exit code is 1! + if err == nil { + return true, nil + } + + // See whether the tag and rev even exist. + tags, err := r.Tags(tag) + if err != nil { + return false, err + } + if len(tags) == 0 { + return false, nil + } + + // NOTE: r.stat is very careful not to fetch commits that we shouldn't know + // about, like rejected GitHub pull requests, so don't try to short-circuit + // that here. + if _, err = r.stat(rev); err != nil { + return false, err + } + + // Now fetch history so that git can search for a path. + unlock, err := r.mu.Lock() + if err != nil { + return false, err + } + defer unlock() + + if r.fetchLevel < fetchAll { + // Fetch the complete history for all refs and heads. It would be more + // efficient to only fetch the history from rev to tag, but that's much more + // complicated, and any kind of shallow fetch is fairly likely to trigger + // bugs in JGit servers and/or the go command anyway. + if err := r.fetchRefsLocked(); err != nil { + return false, err + } + } + + _, err = Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) + if err == nil { + return true, nil + } + if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 { + return false, nil + } + return false, err +} + +func (r *gitRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) { + // TODO: Use maxSize or drop it. + args := []string{} + if subdir != "" { + args = append(args, "--", subdir) + } + info, err := r.Stat(rev) // download rev into local git repo + if err != nil { + return nil, err + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if err := ensureGitAttributes(r.dir); err != nil { + return nil, err + } + + // Incredibly, git produces different archives depending on whether + // it is running on a Windows system or not, in an attempt to normalize + // text file line endings. Setting -c core.autocrlf=input means only + // translate files on the way into the repo, not on the way out (archive). + // The -c core.eol=lf should be unnecessary but set it anyway. + archive, err := Run(r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args) + if err != nil { + if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) { + return nil, fs.ErrNotExist + } + return nil, err + } + + return io.NopCloser(bytes.NewReader(archive)), nil +} + +// ensureGitAttributes makes sure export-subst and export-ignore features are +// disabled for this repo. This is intended to be run prior to running git +// archive so that zip files are generated that produce consistent ziphashes +// for a given revision, independent of variables such as git version and the +// size of the repo. +// +// See: https://github.com/golang/go/issues/27153 +func ensureGitAttributes(repoDir string) (err error) { + const attr = "\n* -export-subst -export-ignore\n" + + d := repoDir + "/info" + p := d + "/attributes" + + if err := os.MkdirAll(d, 0755); err != nil { + return err + } + + f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) + if err != nil { + return err + } + defer func() { + closeErr := f.Close() + if closeErr != nil { + err = closeErr + } + }() + + b, err := io.ReadAll(f) + if err != nil { + return err + } + if !bytes.HasSuffix(b, []byte(attr)) { + _, err := f.WriteString(attr) + return err + } + + return nil +} diff --git a/src/cmd/go/internal/modfetch/codehost/git_test.go b/src/cmd/go/internal/modfetch/codehost/git_test.go new file mode 100644 index 0000000..89a73ba --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/git_test.go @@ -0,0 +1,640 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "archive/zip" + "bytes" + "flag" + "fmt" + "internal/testenv" + "io" + "io/fs" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "reflect" + "strings" + "testing" + "time" +) + +func TestMain(m *testing.M) { + // needed for initializing the test environment variables as testing.Short + // and HasExternalNetwork + flag.Parse() + os.Exit(testMain(m)) +} + +const ( + gitrepo1 = "https://vcs-test.golang.org/git/gitrepo1" + hgrepo1 = "https://vcs-test.golang.org/hg/hgrepo1" +) + +var altRepos = []string{ + "localGitRepo", + hgrepo1, +} + +// TODO: Convert gitrepo1 to svn, bzr, fossil and add tests. +// For now, at least the hgrepo1 tests check the general vcs.go logic. + +// localGitRepo is like gitrepo1 but allows archive access. +var localGitRepo string + +func testMain(m *testing.M) int { + if _, err := exec.LookPath("git"); err != nil { + fmt.Fprintln(os.Stderr, "skipping because git binary not found") + fmt.Println("PASS") + return 0 + } + + dir, err := os.MkdirTemp("", "gitrepo-test-") + if err != nil { + log.Fatal(err) + } + defer os.RemoveAll(dir) + + if testenv.HasExternalNetwork() && testenv.HasExec() { + // Clone gitrepo1 into a local directory. + // If we use a file:// URL to access the local directory, + // then git starts up all the usual protocol machinery, + // which will let us test remote git archive invocations. + localGitRepo = filepath.Join(dir, "gitrepo2") + if _, err := Run("", "git", "clone", "--mirror", gitrepo1, localGitRepo); err != nil { + log.Fatal(err) + } + if _, err := Run(localGitRepo, "git", "config", "daemon.uploadarch", "true"); err != nil { + log.Fatal(err) + } + } + + return m.Run() +} + +func testRepo(remote string) (Repo, error) { + if remote == "localGitRepo" { + // Convert absolute path to file URL. LocalGitRepo will not accept + // Windows absolute paths because they look like a host:path remote. + // TODO(golang.org/issue/32456): use url.FromFilePath when implemented. + var url string + if strings.HasPrefix(localGitRepo, "/") { + url = "file://" + localGitRepo + } else { + url = "file:///" + filepath.ToSlash(localGitRepo) + } + return LocalGitRepo(url) + } + kind := "git" + for _, k := range []string{"hg"} { + if strings.Contains(remote, "/"+k+"/") { + kind = k + } + } + return NewRepo(kind, remote) +} + +var tagsTests = []struct { + repo string + prefix string + tags []string +}{ + {gitrepo1, "xxx", []string{}}, + {gitrepo1, "", []string{"v1.2.3", "v1.2.4-annotated", "v2.0.1", "v2.0.2", "v2.3"}}, + {gitrepo1, "v", []string{"v1.2.3", "v1.2.4-annotated", "v2.0.1", "v2.0.2", "v2.3"}}, + {gitrepo1, "v1", []string{"v1.2.3", "v1.2.4-annotated"}}, + {gitrepo1, "2", []string{}}, +} + +func TestTags(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + + for _, tt := range tagsTests { + f := func(t *testing.T) { + r, err := testRepo(tt.repo) + if err != nil { + t.Fatal(err) + } + tags, err := r.Tags(tt.prefix) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(tags, tt.tags) { + t.Errorf("Tags: incorrect tags\nhave %v\nwant %v", tags, tt.tags) + } + } + t.Run(path.Base(tt.repo)+"/"+tt.prefix, f) + if tt.repo == gitrepo1 { + for _, tt.repo = range altRepos { + t.Run(path.Base(tt.repo)+"/"+tt.prefix, f) + } + } + } +} + +var latestTests = []struct { + repo string + info *RevInfo +}{ + { + gitrepo1, + &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "ede458df7cd0fdca520df19a33158086a8a68e81", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + hgrepo1, + &RevInfo{ + Name: "18518c07eb8ed5c80221e997e518cccaa8c0c287", + Short: "18518c07eb8e", + Version: "18518c07eb8ed5c80221e997e518cccaa8c0c287", + Time: time.Date(2018, 6, 27, 16, 16, 30, 0, time.UTC), + }, + }, +} + +func TestLatest(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + + for _, tt := range latestTests { + f := func(t *testing.T) { + r, err := testRepo(tt.repo) + if err != nil { + t.Fatal(err) + } + info, err := r.Latest() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(info, tt.info) { + t.Errorf("Latest: incorrect info\nhave %+v\nwant %+v", *info, *tt.info) + } + } + t.Run(path.Base(tt.repo), f) + if tt.repo == gitrepo1 { + tt.repo = "localGitRepo" + t.Run(path.Base(tt.repo), f) + } + } +} + +var readFileTests = []struct { + repo string + rev string + file string + err string + data string +}{ + { + repo: gitrepo1, + rev: "latest", + file: "README", + data: "", + }, + { + repo: gitrepo1, + rev: "v2", + file: "another.txt", + data: "another\n", + }, + { + repo: gitrepo1, + rev: "v2.3.4", + file: "another.txt", + err: fs.ErrNotExist.Error(), + }, +} + +func TestReadFile(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + + for _, tt := range readFileTests { + f := func(t *testing.T) { + r, err := testRepo(tt.repo) + if err != nil { + t.Fatal(err) + } + data, err := r.ReadFile(tt.rev, tt.file, 100) + if err != nil { + if tt.err == "" { + t.Fatalf("ReadFile: unexpected error %v", err) + } + if !strings.Contains(err.Error(), tt.err) { + t.Fatalf("ReadFile: wrong error %q, want %q", err, tt.err) + } + if len(data) != 0 { + t.Errorf("ReadFile: non-empty data %q with error %v", data, err) + } + return + } + if tt.err != "" { + t.Fatalf("ReadFile: no error, wanted %v", tt.err) + } + if string(data) != tt.data { + t.Errorf("ReadFile: incorrect data\nhave %q\nwant %q", data, tt.data) + } + } + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.file, f) + if tt.repo == gitrepo1 { + for _, tt.repo = range altRepos { + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.file, f) + } + } + } +} + +var readZipTests = []struct { + repo string + rev string + subdir string + err string + files map[string]uint64 +}{ + { + repo: gitrepo1, + rev: "v2.3.4", + subdir: "", + files: map[string]uint64{ + "prefix/": 0, + "prefix/README": 0, + "prefix/v2": 3, + }, + }, + { + repo: hgrepo1, + rev: "v2.3.4", + subdir: "", + files: map[string]uint64{ + "prefix/.hg_archival.txt": ^uint64(0), + "prefix/README": 0, + "prefix/v2": 3, + }, + }, + + { + repo: gitrepo1, + rev: "v2", + subdir: "", + files: map[string]uint64{ + "prefix/": 0, + "prefix/README": 0, + "prefix/v2": 3, + "prefix/another.txt": 8, + "prefix/foo.txt": 13, + }, + }, + { + repo: hgrepo1, + rev: "v2", + subdir: "", + files: map[string]uint64{ + "prefix/.hg_archival.txt": ^uint64(0), + "prefix/README": 0, + "prefix/v2": 3, + "prefix/another.txt": 8, + "prefix/foo.txt": 13, + }, + }, + + { + repo: gitrepo1, + rev: "v3", + subdir: "", + files: map[string]uint64{ + "prefix/": 0, + "prefix/v3/": 0, + "prefix/v3/sub/": 0, + "prefix/v3/sub/dir/": 0, + "prefix/v3/sub/dir/file.txt": 16, + "prefix/README": 0, + }, + }, + { + repo: hgrepo1, + rev: "v3", + subdir: "", + files: map[string]uint64{ + "prefix/.hg_archival.txt": ^uint64(0), + "prefix/.hgtags": 405, + "prefix/v3/sub/dir/file.txt": 16, + "prefix/README": 0, + }, + }, + + { + repo: gitrepo1, + rev: "v3", + subdir: "v3/sub/dir", + files: map[string]uint64{ + "prefix/": 0, + "prefix/v3/": 0, + "prefix/v3/sub/": 0, + "prefix/v3/sub/dir/": 0, + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + { + repo: hgrepo1, + rev: "v3", + subdir: "v3/sub/dir", + files: map[string]uint64{ + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + + { + repo: gitrepo1, + rev: "v3", + subdir: "v3/sub", + files: map[string]uint64{ + "prefix/": 0, + "prefix/v3/": 0, + "prefix/v3/sub/": 0, + "prefix/v3/sub/dir/": 0, + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + { + repo: hgrepo1, + rev: "v3", + subdir: "v3/sub", + files: map[string]uint64{ + "prefix/v3/sub/dir/file.txt": 16, + }, + }, + + { + repo: gitrepo1, + rev: "aaaaaaaaab", + subdir: "", + err: "unknown revision", + }, + { + repo: hgrepo1, + rev: "aaaaaaaaab", + subdir: "", + err: "unknown revision", + }, + + { + repo: "https://github.com/rsc/vgotest1", + rev: "submod/v1.0.4", + subdir: "submod", + files: map[string]uint64{ + "prefix/": 0, + "prefix/submod/": 0, + "prefix/submod/go.mod": 53, + "prefix/submod/pkg/": 0, + "prefix/submod/pkg/p.go": 31, + }, + }, +} + +type zipFile struct { + name string + size int64 +} + +func TestReadZip(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + + for _, tt := range readZipTests { + f := func(t *testing.T) { + r, err := testRepo(tt.repo) + if err != nil { + t.Fatal(err) + } + rc, err := r.ReadZip(tt.rev, tt.subdir, 100000) + if err != nil { + if tt.err == "" { + t.Fatalf("ReadZip: unexpected error %v", err) + } + if !strings.Contains(err.Error(), tt.err) { + t.Fatalf("ReadZip: wrong error %q, want %q", err, tt.err) + } + if rc != nil { + t.Errorf("ReadZip: non-nil io.ReadCloser with error %v", err) + } + return + } + defer rc.Close() + if tt.err != "" { + t.Fatalf("ReadZip: no error, wanted %v", tt.err) + } + zipdata, err := io.ReadAll(rc) + if err != nil { + t.Fatal(err) + } + z, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) + if err != nil { + t.Fatalf("ReadZip: cannot read zip file: %v", err) + } + have := make(map[string]bool) + for _, f := range z.File { + size, ok := tt.files[f.Name] + if !ok { + t.Errorf("ReadZip: unexpected file %s", f.Name) + continue + } + have[f.Name] = true + if size != ^uint64(0) && f.UncompressedSize64 != size { + t.Errorf("ReadZip: file %s has unexpected size %d != %d", f.Name, f.UncompressedSize64, size) + } + } + for name := range tt.files { + if !have[name] { + t.Errorf("ReadZip: missing file %s", name) + } + } + } + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.subdir, f) + if tt.repo == gitrepo1 { + tt.repo = "localGitRepo" + t.Run(path.Base(tt.repo)+"/"+tt.rev+"/"+tt.subdir, f) + } + } +} + +var hgmap = map[string]string{ + "HEAD": "41964ddce1180313bdc01d0a39a2813344d6261d", // not tip due to bad hgrepo1 conversion + "9d02800338b8a55be062c838d1f02e0c5780b9eb": "8f49ee7a6ddcdec6f0112d9dca48d4a2e4c3c09e", + "76a00fb249b7f93091bc2c89a789dab1fc1bc26f": "88fde824ec8b41a76baa16b7e84212cee9f3edd0", + "ede458df7cd0fdca520df19a33158086a8a68e81": "41964ddce1180313bdc01d0a39a2813344d6261d", + "97f6aa59c81c623494825b43d39e445566e429a4": "c0cbbfb24c7c3c50c35c7b88e7db777da4ff625d", +} + +var statTests = []struct { + repo string + rev string + err string + info *RevInfo +}{ + { + repo: gitrepo1, + rev: "HEAD", + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "ede458df7cd0fdca520df19a33158086a8a68e81", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "v2", // branch + info: &RevInfo{ + Name: "9d02800338b8a55be062c838d1f02e0c5780b9eb", + Short: "9d02800338b8", + Version: "9d02800338b8a55be062c838d1f02e0c5780b9eb", + Time: time.Date(2018, 4, 17, 20, 00, 32, 0, time.UTC), + Tags: []string{"v2.0.2"}, + }, + }, + { + repo: gitrepo1, + rev: "v2.3.4", // badly-named branch (semver should be a tag) + info: &RevInfo{ + Name: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f", + Short: "76a00fb249b7", + Version: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f", + Time: time.Date(2018, 4, 17, 19, 45, 48, 0, time.UTC), + Tags: []string{"v2.0.1", "v2.3"}, + }, + }, + { + repo: gitrepo1, + rev: "v2.3", // badly-named tag (we only respect full semver v2.3.0) + info: &RevInfo{ + Name: "76a00fb249b7f93091bc2c89a789dab1fc1bc26f", + Short: "76a00fb249b7", + Version: "v2.3", + Time: time.Date(2018, 4, 17, 19, 45, 48, 0, time.UTC), + Tags: []string{"v2.0.1", "v2.3"}, + }, + }, + { + repo: gitrepo1, + rev: "v1.2.3", // tag + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "v1.2.3", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "ede458df", // hash prefix in refs + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "ede458df7cd0fdca520df19a33158086a8a68e81", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "97f6aa59", // hash prefix not in refs + info: &RevInfo{ + Name: "97f6aa59c81c623494825b43d39e445566e429a4", + Short: "97f6aa59c81c", + Version: "97f6aa59c81c623494825b43d39e445566e429a4", + Time: time.Date(2018, 4, 17, 20, 0, 19, 0, time.UTC), + }, + }, + { + repo: gitrepo1, + rev: "v1.2.4-annotated", // annotated tag uses unwrapped commit hash + info: &RevInfo{ + Name: "ede458df7cd0fdca520df19a33158086a8a68e81", + Short: "ede458df7cd0", + Version: "v1.2.4-annotated", + Time: time.Date(2018, 4, 17, 19, 43, 22, 0, time.UTC), + Tags: []string{"v1.2.3", "v1.2.4-annotated"}, + }, + }, + { + repo: gitrepo1, + rev: "aaaaaaaaab", + err: "unknown revision", + }, +} + +func TestStat(t *testing.T) { + testenv.MustHaveExternalNetwork(t) + testenv.MustHaveExec(t) + + for _, tt := range statTests { + f := func(t *testing.T) { + r, err := testRepo(tt.repo) + if err != nil { + t.Fatal(err) + } + info, err := r.Stat(tt.rev) + if err != nil { + if tt.err == "" { + t.Fatalf("Stat: unexpected error %v", err) + } + if !strings.Contains(err.Error(), tt.err) { + t.Fatalf("Stat: wrong error %q, want %q", err, tt.err) + } + if info != nil { + t.Errorf("Stat: non-nil info with error %q", err) + } + return + } + if !reflect.DeepEqual(info, tt.info) { + t.Errorf("Stat: incorrect info\nhave %+v\nwant %+v", *info, *tt.info) + } + } + t.Run(path.Base(tt.repo)+"/"+tt.rev, f) + if tt.repo == gitrepo1 { + for _, tt.repo = range altRepos { + old := tt + var m map[string]string + if tt.repo == hgrepo1 { + m = hgmap + } + if tt.info != nil { + info := *tt.info + tt.info = &info + tt.info.Name = remap(tt.info.Name, m) + tt.info.Version = remap(tt.info.Version, m) + tt.info.Short = remap(tt.info.Short, m) + } + tt.rev = remap(tt.rev, m) + t.Run(path.Base(tt.repo)+"/"+tt.rev, f) + tt = old + } + } + } +} + +func remap(name string, m map[string]string) string { + if m[name] != "" { + return m[name] + } + if AllHex(name) { + for k, v := range m { + if strings.HasPrefix(k, name) { + return v[:len(name)] + } + } + } + return name +} diff --git a/src/cmd/go/internal/modfetch/codehost/shell.go b/src/cmd/go/internal/modfetch/codehost/shell.go new file mode 100644 index 0000000..ce8b501 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/shell.go @@ -0,0 +1,141 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Interactive debugging shell for codehost.Repo implementations. + +package main + +import ( + "archive/zip" + "bufio" + "bytes" + "flag" + "fmt" + "io" + "log" + "os" + "strings" + "time" + + "cmd/go/internal/cfg" + "cmd/go/internal/modfetch/codehost" +) + +func usage() { + fmt.Fprintf(os.Stderr, "usage: go run shell.go vcs remote\n") + os.Exit(2) +} + +func main() { + cfg.GOMODCACHE = "/tmp/vcswork" + log.SetFlags(0) + log.SetPrefix("shell: ") + flag.Usage = usage + flag.Parse() + if flag.NArg() != 2 { + usage() + } + + repo, err := codehost.NewRepo(flag.Arg(0), flag.Arg(1)) + if err != nil { + log.Fatal(err) + } + + b := bufio.NewReader(os.Stdin) + for { + fmt.Fprintf(os.Stderr, ">>> ") + line, err := b.ReadString('\n') + if err != nil { + log.Fatal(err) + } + f := strings.Fields(line) + if len(f) == 0 { + continue + } + switch f[0] { + default: + fmt.Fprintf(os.Stderr, "?unknown command\n") + continue + case "tags": + prefix := "" + if len(f) == 2 { + prefix = f[1] + } + if len(f) > 2 { + fmt.Fprintf(os.Stderr, "?usage: tags [prefix]\n") + continue + } + tags, err := repo.Tags(prefix) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + for _, tag := range tags { + fmt.Printf("%s\n", tag) + } + + case "stat": + if len(f) != 2 { + fmt.Fprintf(os.Stderr, "?usage: stat rev\n") + continue + } + info, err := repo.Stat(f[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + fmt.Printf("name=%s short=%s version=%s time=%s\n", info.Name, info.Short, info.Version, info.Time.UTC().Format(time.RFC3339)) + + case "read": + if len(f) != 3 { + fmt.Fprintf(os.Stderr, "?usage: read rev file\n") + continue + } + data, err := repo.ReadFile(f[1], f[2], 10<<20) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + os.Stdout.Write(data) + + case "zip": + if len(f) != 4 { + fmt.Fprintf(os.Stderr, "?usage: zip rev subdir output\n") + continue + } + subdir := f[2] + if subdir == "-" { + subdir = "" + } + rc, err := repo.ReadZip(f[1], subdir, 10<<20) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + + if f[3] != "-" { + if err := os.WriteFile(f[3], data, 0666); err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + } + z, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + fmt.Fprintf(os.Stderr, "?%s\n", err) + continue + } + for _, f := range z.File { + fmt.Printf("%s %d\n", f.Name, f.UncompressedSize64) + } + } + } +} diff --git a/src/cmd/go/internal/modfetch/codehost/svn.go b/src/cmd/go/internal/modfetch/codehost/svn.go new file mode 100644 index 0000000..6ec9e59 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/svn.go @@ -0,0 +1,154 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "archive/zip" + "encoding/xml" + "fmt" + "io" + "os" + "path" + "path/filepath" + "time" +) + +func svnParseStat(rev, out string) (*RevInfo, error) { + var log struct { + Logentry struct { + Revision int64 `xml:"revision,attr"` + Date string `xml:"date"` + } `xml:"logentry"` + } + if err := xml.Unmarshal([]byte(out), &log); err != nil { + return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out) + } + + t, err := time.Parse(time.RFC3339, log.Logentry.Date) + if err != nil { + return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out) + } + + info := &RevInfo{ + Name: fmt.Sprintf("%d", log.Logentry.Revision), + Short: fmt.Sprintf("%012d", log.Logentry.Revision), + Time: t.UTC(), + Version: rev, + } + return info, nil +} + +func svnReadZip(dst io.Writer, workDir, rev, subdir, remote string) (err error) { + // The subversion CLI doesn't provide a command to write the repository + // directly to an archive, so we need to export it to the local filesystem + // instead. Unfortunately, the local filesystem might apply arbitrary + // normalization to the filenames, so we need to obtain those directly. + // + // 'svn export' prints the filenames as they are written, but from reading the + // svn source code (as of revision 1868933), those filenames are encoded using + // the system locale rather than preserved byte-for-byte from the origin. For + // our purposes, that won't do, but we don't want to go mucking around with + // the user's locale settings either — that could impact error messages, and + // we don't know what locales the user has available or what LC_* variables + // their platform supports. + // + // Instead, we'll do a two-pass export: first we'll run 'svn list' to get the + // canonical filenames, then we'll 'svn export' and look for those filenames + // in the local filesystem. (If there is an encoding problem at that point, we + // would probably reject the resulting module anyway.) + + remotePath := remote + if subdir != "" { + remotePath += "/" + subdir + } + + out, err := Run(workDir, []string{ + "svn", "list", + "--non-interactive", + "--xml", + "--incremental", + "--recursive", + "--revision", rev, + "--", remotePath, + }) + if err != nil { + return err + } + + type listEntry struct { + Kind string `xml:"kind,attr"` + Name string `xml:"name"` + Size int64 `xml:"size"` + } + var list struct { + Entries []listEntry `xml:"entry"` + } + if err := xml.Unmarshal(out, &list); err != nil { + return vcsErrorf("unexpected response from svn list --xml: %v\n%s", err, out) + } + + exportDir := filepath.Join(workDir, "export") + // Remove any existing contents from a previous (failed) run. + if err := os.RemoveAll(exportDir); err != nil { + return err + } + defer os.RemoveAll(exportDir) // best-effort + + _, err = Run(workDir, []string{ + "svn", "export", + "--non-interactive", + "--quiet", + + // Suppress any platform- or host-dependent transformations. + "--native-eol", "LF", + "--ignore-externals", + "--ignore-keywords", + + "--revision", rev, + "--", remotePath, + exportDir, + }) + if err != nil { + return err + } + + // Scrape the exported files out of the filesystem and encode them in the zipfile. + + // “All files in the zip file are expected to be + // nested in a single top-level directory, whose name is not specified.” + // We'll (arbitrarily) choose the base of the remote path. + basePath := path.Join(path.Base(remote), subdir) + + zw := zip.NewWriter(dst) + for _, e := range list.Entries { + if e.Kind != "file" { + continue + } + + zf, err := zw.Create(path.Join(basePath, e.Name)) + if err != nil { + return err + } + + f, err := os.Open(filepath.Join(exportDir, e.Name)) + if err != nil { + if os.IsNotExist(err) { + return vcsErrorf("file reported by 'svn list', but not written by 'svn export': %s", e.Name) + } + return fmt.Errorf("error opening file created by 'svn export': %v", err) + } + + n, err := io.Copy(zf, f) + f.Close() + if err != nil { + return err + } + if n != e.Size { + return vcsErrorf("file size differs between 'svn list' and 'svn export': file %s listed as %v bytes, but exported as %v bytes", e.Name, e.Size, n) + } + } + + return zw.Close() +} diff --git a/src/cmd/go/internal/modfetch/codehost/vcs.go b/src/cmd/go/internal/modfetch/codehost/vcs.go new file mode 100644 index 0000000..c2cca08 --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/vcs.go @@ -0,0 +1,616 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "errors" + "fmt" + "internal/lazyregexp" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "time" + + "cmd/go/internal/lockedfile" + "cmd/go/internal/par" + "cmd/go/internal/str" +) + +// A VCSError indicates an error using a version control system. +// The implication of a VCSError is that we know definitively where +// to get the code, but we can't access it due to the error. +// The caller should report this error instead of continuing to probe +// other possible module paths. +// +// TODO(golang.org/issue/31730): See if we can invert this. (Return a +// distinguished error for “repo not found” and treat everything else +// as terminal.) +type VCSError struct { + Err error +} + +func (e *VCSError) Error() string { return e.Err.Error() } + +func vcsErrorf(format string, a ...interface{}) error { + return &VCSError{Err: fmt.Errorf(format, a...)} +} + +func NewRepo(vcs, remote string) (Repo, error) { + type key struct { + vcs string + remote string + } + type cached struct { + repo Repo + err error + } + c := vcsRepoCache.Do(key{vcs, remote}, func() interface{} { + repo, err := newVCSRepo(vcs, remote) + if err != nil { + err = &VCSError{err} + } + return cached{repo, err} + }).(cached) + + return c.repo, c.err +} + +var vcsRepoCache par.Cache + +type vcsRepo struct { + mu lockedfile.Mutex // protects all commands, so we don't have to decide which are safe on a per-VCS basis + + remote string + cmd *vcsCmd + dir string + + tagsOnce sync.Once + tags map[string]bool + + branchesOnce sync.Once + branches map[string]bool + + fetchOnce sync.Once + fetchErr error +} + +func newVCSRepo(vcs, remote string) (Repo, error) { + if vcs == "git" { + return newGitRepo(remote, false) + } + cmd := vcsCmds[vcs] + if cmd == nil { + return nil, fmt.Errorf("unknown vcs: %s %s", vcs, remote) + } + if !strings.Contains(remote, "://") { + return nil, fmt.Errorf("invalid vcs remote: %s %s", vcs, remote) + } + + r := &vcsRepo{remote: remote, cmd: cmd} + var err error + r.dir, r.mu.Path, err = WorkDir(vcsWorkDirType+vcs, r.remote) + if err != nil { + return nil, err + } + + if cmd.init == nil { + return r, nil + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if _, err := os.Stat(filepath.Join(r.dir, "."+vcs)); err != nil { + if _, err := Run(r.dir, cmd.init(r.remote)); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + } + return r, nil +} + +const vcsWorkDirType = "vcs1." + +type vcsCmd struct { + vcs string // vcs name "hg" + init func(remote string) []string // cmd to init repo to track remote + tags func(remote string) []string // cmd to list local tags + tagRE *lazyregexp.Regexp // regexp to extract tag names from output of tags cmd + branches func(remote string) []string // cmd to list local branches + branchRE *lazyregexp.Regexp // regexp to extract branch names from output of tags cmd + badLocalRevRE *lazyregexp.Regexp // regexp of names that must not be served out of local cache without doing fetch first + statLocal func(rev, remote string) []string // cmd to stat local rev + parseStat func(rev, out string) (*RevInfo, error) // cmd to parse output of statLocal + fetch []string // cmd to fetch everything from remote + latest string // name of latest commit on remote (tip, HEAD, etc) + readFile func(rev, file, remote string) []string // cmd to read rev's file + readZip func(rev, subdir, remote, target string) []string // cmd to read rev's subdir as zip file + doReadZip func(dst io.Writer, workDir, rev, subdir, remote string) error // arbitrary function to read rev's subdir as zip file +} + +var re = lazyregexp.New + +var vcsCmds = map[string]*vcsCmd{ + "hg": { + vcs: "hg", + init: func(remote string) []string { + return []string{"hg", "clone", "-U", "--", remote, "."} + }, + tags: func(remote string) []string { + return []string{"hg", "tags", "-q"} + }, + tagRE: re(`(?m)^[^\n]+$`), + branches: func(remote string) []string { + return []string{"hg", "branches", "-c", "-q"} + }, + branchRE: re(`(?m)^[^\n]+$`), + badLocalRevRE: re(`(?m)^(tip)$`), + statLocal: func(rev, remote string) []string { + return []string{"hg", "log", "-l1", "-r", rev, "--template", "{node} {date|hgdate} {tags}"} + }, + parseStat: hgParseStat, + fetch: []string{"hg", "pull", "-f"}, + latest: "tip", + readFile: func(rev, file, remote string) []string { + return []string{"hg", "cat", "-r", rev, file} + }, + readZip: func(rev, subdir, remote, target string) []string { + pattern := []string{} + if subdir != "" { + pattern = []string{"-I", subdir + "/**"} + } + return str.StringList("hg", "archive", "-t", "zip", "--no-decode", "-r", rev, "--prefix=prefix/", pattern, "--", target) + }, + }, + + "svn": { + vcs: "svn", + init: nil, // no local checkout + tags: func(remote string) []string { + return []string{"svn", "list", "--", strings.TrimSuffix(remote, "/trunk") + "/tags"} + }, + tagRE: re(`(?m)^(.*?)/?$`), + statLocal: func(rev, remote string) []string { + suffix := "@" + rev + if rev == "latest" { + suffix = "" + } + return []string{"svn", "log", "-l1", "--xml", "--", remote + suffix} + }, + parseStat: svnParseStat, + latest: "latest", + readFile: func(rev, file, remote string) []string { + return []string{"svn", "cat", "--", remote + "/" + file + "@" + rev} + }, + doReadZip: svnReadZip, + }, + + "bzr": { + vcs: "bzr", + init: func(remote string) []string { + return []string{"bzr", "branch", "--use-existing-dir", "--", remote, "."} + }, + fetch: []string{ + "bzr", "pull", "--overwrite-tags", + }, + tags: func(remote string) []string { + return []string{"bzr", "tags"} + }, + tagRE: re(`(?m)^\S+`), + badLocalRevRE: re(`^revno:-`), + statLocal: func(rev, remote string) []string { + return []string{"bzr", "log", "-l1", "--long", "--show-ids", "-r", rev} + }, + parseStat: bzrParseStat, + latest: "revno:-1", + readFile: func(rev, file, remote string) []string { + return []string{"bzr", "cat", "-r", rev, file} + }, + readZip: func(rev, subdir, remote, target string) []string { + extra := []string{} + if subdir != "" { + extra = []string{"./" + subdir} + } + return str.StringList("bzr", "export", "--format=zip", "-r", rev, "--root=prefix/", "--", target, extra) + }, + }, + + "fossil": { + vcs: "fossil", + init: func(remote string) []string { + return []string{"fossil", "clone", "--", remote, ".fossil"} + }, + fetch: []string{"fossil", "pull", "-R", ".fossil"}, + tags: func(remote string) []string { + return []string{"fossil", "tag", "-R", ".fossil", "list"} + }, + tagRE: re(`XXXTODO`), + statLocal: func(rev, remote string) []string { + return []string{"fossil", "info", "-R", ".fossil", rev} + }, + parseStat: fossilParseStat, + latest: "trunk", + readFile: func(rev, file, remote string) []string { + return []string{"fossil", "cat", "-R", ".fossil", "-r", rev, file} + }, + readZip: func(rev, subdir, remote, target string) []string { + extra := []string{} + if subdir != "" && !strings.ContainsAny(subdir, "*?[],") { + extra = []string{"--include", subdir} + } + // Note that vcsRepo.ReadZip below rewrites this command + // to run in a different directory, to work around a fossil bug. + return str.StringList("fossil", "zip", "-R", ".fossil", "--name", "prefix", extra, "--", rev, target) + }, + }, +} + +func (r *vcsRepo) loadTags() { + out, err := Run(r.dir, r.cmd.tags(r.remote)) + if err != nil { + return + } + + // Run tag-listing command and extract tags. + r.tags = make(map[string]bool) + for _, tag := range r.cmd.tagRE.FindAllString(string(out), -1) { + if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(tag) { + continue + } + r.tags[tag] = true + } +} + +func (r *vcsRepo) loadBranches() { + if r.cmd.branches == nil { + return + } + + out, err := Run(r.dir, r.cmd.branches(r.remote)) + if err != nil { + return + } + + r.branches = make(map[string]bool) + for _, branch := range r.cmd.branchRE.FindAllString(string(out), -1) { + if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(branch) { + continue + } + r.branches[branch] = true + } +} + +func (r *vcsRepo) Tags(prefix string) ([]string, error) { + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + r.tagsOnce.Do(r.loadTags) + + tags := []string{} + for tag := range r.tags { + if strings.HasPrefix(tag, prefix) { + tags = append(tags, tag) + } + } + sort.Strings(tags) + return tags, nil +} + +func (r *vcsRepo) Stat(rev string) (*RevInfo, error) { + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if rev == "latest" { + rev = r.cmd.latest + } + r.branchesOnce.Do(r.loadBranches) + revOK := (r.cmd.badLocalRevRE == nil || !r.cmd.badLocalRevRE.MatchString(rev)) && !r.branches[rev] + if revOK { + if info, err := r.statLocal(rev); err == nil { + return info, nil + } + } + + r.fetchOnce.Do(r.fetch) + if r.fetchErr != nil { + return nil, r.fetchErr + } + info, err := r.statLocal(rev) + if err != nil { + return nil, err + } + if !revOK { + info.Version = info.Name + } + return info, nil +} + +func (r *vcsRepo) fetch() { + if len(r.cmd.fetch) > 0 { + _, r.fetchErr = Run(r.dir, r.cmd.fetch) + } +} + +func (r *vcsRepo) statLocal(rev string) (*RevInfo, error) { + out, err := Run(r.dir, r.cmd.statLocal(rev, r.remote)) + if err != nil { + return nil, &UnknownRevisionError{Rev: rev} + } + return r.cmd.parseStat(rev, string(out)) +} + +func (r *vcsRepo) Latest() (*RevInfo, error) { + return r.Stat("latest") +} + +func (r *vcsRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) { + if rev == "latest" { + rev = r.cmd.latest + } + _, err := r.Stat(rev) // download rev into local repo + if err != nil { + return nil, err + } + + // r.Stat acquires r.mu, so lock after that. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + out, err := Run(r.dir, r.cmd.readFile(rev, file, r.remote)) + if err != nil { + return nil, fs.ErrNotExist + } + return out, nil +} + +func (r *vcsRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) { + // We don't technically need to lock here since we're returning an error + // uncondititonally, but doing so anyway will help to avoid baking in + // lock-inversion bugs. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + return nil, vcsErrorf("ReadFileRevs not implemented") +} + +func (r *vcsRepo) RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error) { + // We don't technically need to lock here since we're returning an error + // uncondititonally, but doing so anyway will help to avoid baking in + // lock-inversion bugs. + unlock, err := r.mu.Lock() + if err != nil { + return "", err + } + defer unlock() + + return "", vcsErrorf("RecentTag not implemented") +} + +func (r *vcsRepo) DescendsFrom(rev, tag string) (bool, error) { + unlock, err := r.mu.Lock() + if err != nil { + return false, err + } + defer unlock() + + return false, vcsErrorf("DescendsFrom not implemented") +} + +func (r *vcsRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) { + if r.cmd.readZip == nil && r.cmd.doReadZip == nil { + return nil, vcsErrorf("ReadZip not implemented for %s", r.cmd.vcs) + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if rev == "latest" { + rev = r.cmd.latest + } + f, err := os.CreateTemp("", "go-readzip-*.zip") + if err != nil { + return nil, err + } + if r.cmd.doReadZip != nil { + lw := &limitedWriter{ + W: f, + N: maxSize, + ErrLimitReached: errors.New("ReadZip: encoded file exceeds allowed size"), + } + err = r.cmd.doReadZip(lw, r.dir, rev, subdir, r.remote) + if err == nil { + _, err = f.Seek(0, io.SeekStart) + } + } else if r.cmd.vcs == "fossil" { + // If you run + // fossil zip -R .fossil --name prefix trunk /tmp/x.zip + // fossil fails with "unable to create directory /tmp" [sic]. + // Change the command to run in /tmp instead, + // replacing the -R argument with an absolute path. + args := r.cmd.readZip(rev, subdir, r.remote, filepath.Base(f.Name())) + for i := range args { + if args[i] == ".fossil" { + args[i] = filepath.Join(r.dir, ".fossil") + } + } + _, err = Run(filepath.Dir(f.Name()), args) + } else { + _, err = Run(r.dir, r.cmd.readZip(rev, subdir, r.remote, f.Name())) + } + if err != nil { + f.Close() + os.Remove(f.Name()) + return nil, err + } + return &deleteCloser{f}, nil +} + +// deleteCloser is a file that gets deleted on Close. +type deleteCloser struct { + *os.File +} + +func (d *deleteCloser) Close() error { + defer os.Remove(d.File.Name()) + return d.File.Close() +} + +func hgParseStat(rev, out string) (*RevInfo, error) { + f := strings.Fields(string(out)) + if len(f) < 3 { + return nil, vcsErrorf("unexpected response from hg log: %q", out) + } + hash := f[0] + version := rev + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + t, err := strconv.ParseInt(f[1], 10, 64) + if err != nil { + return nil, vcsErrorf("invalid time from hg log: %q", out) + } + + var tags []string + for _, tag := range f[3:] { + if tag != "tip" { + tags = append(tags, tag) + } + } + sort.Strings(tags) + + info := &RevInfo{ + Name: hash, + Short: ShortenSHA1(hash), + Time: time.Unix(t, 0).UTC(), + Version: version, + Tags: tags, + } + return info, nil +} + +func bzrParseStat(rev, out string) (*RevInfo, error) { + var revno int64 + var tm time.Time + for _, line := range strings.Split(out, "\n") { + if line == "" || line[0] == ' ' || line[0] == '\t' { + // End of header, start of commit message. + break + } + if line[0] == '-' { + continue + } + i := strings.Index(line, ":") + if i < 0 { + // End of header, start of commit message. + break + } + key, val := line[:i], strings.TrimSpace(line[i+1:]) + switch key { + case "revno": + if j := strings.Index(val, " "); j >= 0 { + val = val[:j] + } + i, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return nil, vcsErrorf("unexpected revno from bzr log: %q", line) + } + revno = i + case "timestamp": + j := strings.Index(val, " ") + if j < 0 { + return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line) + } + t, err := time.Parse("2006-01-02 15:04:05 -0700", val[j+1:]) + if err != nil { + return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line) + } + tm = t.UTC() + } + } + if revno == 0 || tm.IsZero() { + return nil, vcsErrorf("unexpected response from bzr log: %q", out) + } + + info := &RevInfo{ + Name: fmt.Sprintf("%d", revno), + Short: fmt.Sprintf("%012d", revno), + Time: tm, + Version: rev, + } + return info, nil +} + +func fossilParseStat(rev, out string) (*RevInfo, error) { + for _, line := range strings.Split(out, "\n") { + if strings.HasPrefix(line, "uuid:") || strings.HasPrefix(line, "hash:") { + f := strings.Fields(line) + if len(f) != 5 || len(f[1]) != 40 || f[4] != "UTC" { + return nil, vcsErrorf("unexpected response from fossil info: %q", line) + } + t, err := time.Parse("2006-01-02 15:04:05", f[2]+" "+f[3]) + if err != nil { + return nil, vcsErrorf("unexpected response from fossil info: %q", line) + } + hash := f[1] + version := rev + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + info := &RevInfo{ + Name: hash, + Short: ShortenSHA1(hash), + Time: t, + Version: version, + } + return info, nil + } + } + return nil, vcsErrorf("unexpected response from fossil info: %q", out) +} + +type limitedWriter struct { + W io.Writer + N int64 + ErrLimitReached error +} + +func (l *limitedWriter) Write(p []byte) (n int, err error) { + if l.N > 0 { + max := len(p) + if l.N < int64(max) { + max = int(l.N) + } + n, err = l.W.Write(p[:max]) + l.N -= int64(n) + if err != nil || n >= len(p) { + return n, err + } + } + + return n, l.ErrLimitReached +} |