diff options
Diffstat (limited to 'src/cmd/go/internal/modfetch/codehost/git.go')
-rw-r--r-- | src/cmd/go/internal/modfetch/codehost/git.go | 904 |
1 files changed, 904 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/codehost/git.go b/src/cmd/go/internal/modfetch/codehost/git.go new file mode 100644 index 0000000..8bfbe7b --- /dev/null +++ b/src/cmd/go/internal/modfetch/codehost/git.go @@ -0,0 +1,904 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codehost + +import ( + "bytes" + "crypto/sha256" + "encoding/base64" + "errors" + "fmt" + "io" + "io/fs" + "net/url" + "os" + "os/exec" + "path/filepath" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "time" + + "cmd/go/internal/lockedfile" + "cmd/go/internal/par" + "cmd/go/internal/web" + + "golang.org/x/mod/semver" +) + +// LocalGitRepo is like Repo but accepts both Git remote references +// and paths to repositories on the local file system. +func LocalGitRepo(remote string) (Repo, error) { + return newGitRepoCached(remote, true) +} + +// A notExistError wraps another error to retain its original text +// but makes it opaquely equivalent to fs.ErrNotExist. +type notExistError struct { + err error +} + +func (e notExistError) Error() string { return e.err.Error() } +func (notExistError) Is(err error) bool { return err == fs.ErrNotExist } + +const gitWorkDirType = "git3" + +var gitRepoCache par.Cache + +func newGitRepoCached(remote string, localOK bool) (Repo, error) { + type key struct { + remote string + localOK bool + } + type cached struct { + repo Repo + err error + } + + c := gitRepoCache.Do(key{remote, localOK}, func() any { + repo, err := newGitRepo(remote, localOK) + return cached{repo, err} + }).(cached) + + return c.repo, c.err +} + +func newGitRepo(remote string, localOK bool) (Repo, error) { + r := &gitRepo{remote: remote} + if strings.Contains(remote, "://") { + // This is a remote path. + var err error + r.dir, r.mu.Path, err = WorkDir(gitWorkDirType, r.remote) + if err != nil { + return nil, err + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil { + if _, err := Run(r.dir, "git", "init", "--bare"); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + // We could just say git fetch https://whatever later, + // but this lets us say git fetch origin instead, which + // is a little nicer. More importantly, using a named remote + // avoids a problem with Git LFS. See golang.org/issue/25605. + if _, err := Run(r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + if runtime.GOOS == "windows" { + // Git for Windows by default does not support paths longer than + // MAX_PATH (260 characters) because that may interfere with navigation + // in some Windows programs. However, cmd/go should be able to handle + // long paths just fine, and we expect people to use 'go clean' to + // manipulate the module cache, so it should be harmless to set here, + // and in some cases may be necessary in order to download modules with + // long branch names. + // + // See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path. + if _, err := Run(r.dir, "git", "config", "core.longpaths", "true"); err != nil { + os.RemoveAll(r.dir) + return nil, err + } + } + } + r.remoteURL = r.remote + r.remote = "origin" + } else { + // Local path. + // Disallow colon (not in ://) because sometimes + // that's rcp-style host:path syntax and sometimes it's not (c:\work). + // The go command has always insisted on URL syntax for ssh. + if strings.Contains(remote, ":") { + return nil, fmt.Errorf("git remote cannot use host:path syntax") + } + if !localOK { + return nil, fmt.Errorf("git remote must not be local directory") + } + r.local = true + info, err := os.Stat(remote) + if err != nil { + return nil, err + } + if !info.IsDir() { + return nil, fmt.Errorf("%s exists but is not a directory", remote) + } + r.dir = remote + r.mu.Path = r.dir + ".lock" + } + return r, nil +} + +type gitRepo struct { + remote, remoteURL string + local bool + dir string + + mu lockedfile.Mutex // protects fetchLevel and git repo state + + fetchLevel int + + statCache par.Cache + + refsOnce sync.Once + // refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master") + // to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6") + refs map[string]string + refsErr error + + localTagsOnce sync.Once + localTags map[string]bool +} + +const ( + // How much have we fetched into the git repo (in this process)? + fetchNone = iota // nothing yet + fetchSome // shallow fetches of individual hashes + fetchAll // "fetch -t origin": get all remote branches and tags +) + +// loadLocalTags loads tag references from the local git cache +// into the map r.localTags. +// Should only be called as r.localTagsOnce.Do(r.loadLocalTags). +func (r *gitRepo) loadLocalTags() { + // The git protocol sends all known refs and ls-remote filters them on the client side, + // so we might as well record both heads and tags in one shot. + // Most of the time we only care about tags but sometimes we care about heads too. + out, err := Run(r.dir, "git", "tag", "-l") + if err != nil { + return + } + + r.localTags = make(map[string]bool) + for _, line := range strings.Split(string(out), "\n") { + if line != "" { + r.localTags[line] = true + } + } +} + +func (r *gitRepo) CheckReuse(old *Origin, subdir string) error { + if old == nil { + return fmt.Errorf("missing origin") + } + if old.VCS != "git" || old.URL != r.remoteURL { + return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL) + } + if old.Subdir != subdir { + return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir) + } + + // Note: Can have Hash with no Ref and no TagSum and no RepoSum, + // meaning the Hash simply has to remain in the repo. + // In that case we assume it does in the absence of any real way to check. + // But if neither Hash nor TagSum is present, we have nothing to check, + // which we take to mean we didn't record enough information to be sure. + if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" { + return fmt.Errorf("non-specific origin") + } + + r.loadRefs() + if r.refsErr != nil { + return r.refsErr + } + + if old.Ref != "" { + hash, ok := r.refs[old.Ref] + if !ok { + return fmt.Errorf("ref %q deleted", old.Ref) + } + if hash != old.Hash { + return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash) + } + } + if old.TagSum != "" { + tags, err := r.Tags(old.TagPrefix) + if err != nil { + return err + } + if tags.Origin.TagSum != old.TagSum { + return fmt.Errorf("tags changed") + } + } + if old.RepoSum != "" { + if r.repoSum(r.refs) != old.RepoSum { + return fmt.Errorf("refs changed") + } + } + return nil +} + +// loadRefs loads heads and tags references from the remote into the map r.refs. +// The result is cached in memory. +func (r *gitRepo) loadRefs() (map[string]string, error) { + r.refsOnce.Do(func() { + // The git protocol sends all known refs and ls-remote filters them on the client side, + // so we might as well record both heads and tags in one shot. + // Most of the time we only care about tags but sometimes we care about heads too. + out, gitErr := Run(r.dir, "git", "ls-remote", "-q", r.remote) + if gitErr != nil { + if rerr, ok := gitErr.(*RunError); ok { + if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) { + rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information." + } + } + + // If the remote URL doesn't exist at all, ideally we should treat the whole + // repository as nonexistent by wrapping the error in a notExistError. + // For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL + // ourselves and see what code it serves. + if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") { + if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) { + gitErr = notExistError{gitErr} + } + } + + r.refsErr = gitErr + return + } + + refs := make(map[string]string) + for _, line := range strings.Split(string(out), "\n") { + f := strings.Fields(line) + if len(f) != 2 { + continue + } + if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") { + refs[f[1]] = f[0] + } + } + for ref, hash := range refs { + if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag + refs[k] = hash + delete(refs, ref) + } + } + r.refs = refs + }) + return r.refs, r.refsErr +} + +func (r *gitRepo) Tags(prefix string) (*Tags, error) { + refs, err := r.loadRefs() + if err != nil { + return nil, err + } + + tags := &Tags{ + Origin: &Origin{ + VCS: "git", + URL: r.remoteURL, + TagPrefix: prefix, + }, + List: []Tag{}, + } + for ref, hash := range refs { + if !strings.HasPrefix(ref, "refs/tags/") { + continue + } + tag := ref[len("refs/tags/"):] + if !strings.HasPrefix(tag, prefix) { + continue + } + tags.List = append(tags.List, Tag{tag, hash}) + } + sort.Slice(tags.List, func(i, j int) bool { + return tags.List[i].Name < tags.List[j].Name + }) + + dir := prefix[:strings.LastIndex(prefix, "/")+1] + h := sha256.New() + for _, tag := range tags.List { + if isOriginTag(strings.TrimPrefix(tag.Name, dir)) { + fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash) + } + } + tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)) + return tags, nil +} + +// repoSum returns a checksum of the entire repo state, +// which can be checked (as Origin.RepoSum) to cache +// the absence of a specific module version. +// The caller must supply refs, the result of a successful r.loadRefs. +func (r *gitRepo) repoSum(refs map[string]string) string { + var list []string + for ref := range refs { + list = append(list, ref) + } + sort.Strings(list) + h := sha256.New() + for _, ref := range list { + fmt.Fprintf(h, "%q %s\n", ref, refs[ref]) + } + return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)) +} + +// unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs, +// for use when returning an UnknownRevisionError. +func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo { + return &RevInfo{ + Origin: &Origin{ + VCS: "git", + URL: r.remoteURL, + RepoSum: r.repoSum(refs), + }, + } +} + +func (r *gitRepo) Latest() (*RevInfo, error) { + refs, err := r.loadRefs() + if err != nil { + return nil, err + } + if refs["HEAD"] == "" { + return nil, ErrNoCommits + } + statInfo, err := r.Stat(refs["HEAD"]) + if err != nil { + return nil, err + } + + // Stat may return cached info, so make a copy to modify here. + info := new(RevInfo) + *info = *statInfo + info.Origin = new(Origin) + if statInfo.Origin != nil { + *info.Origin = *statInfo.Origin + } + info.Origin.Ref = "HEAD" + info.Origin.Hash = refs["HEAD"] + + return info, nil +} + +// findRef finds some ref name for the given hash, +// for use when the server requires giving a ref instead of a hash. +// There may be multiple ref names for a given hash, +// in which case this returns some name - it doesn't matter which. +func (r *gitRepo) findRef(hash string) (ref string, ok bool) { + refs, err := r.loadRefs() + if err != nil { + return "", false + } + for ref, h := range refs { + if h == hash { + return ref, true + } + } + return "", false +} + +// minHashDigits is the minimum number of digits to require +// before accepting a hex digit sequence as potentially identifying +// a specific commit in a git repo. (Of course, users can always +// specify more digits, and many will paste in all 40 digits, +// but many of git's commands default to printing short hashes +// as 7 digits.) +const minHashDigits = 7 + +// stat stats the given rev in the local repository, +// or else it fetches more info from the remote repository and tries again. +func (r *gitRepo) stat(rev string) (info *RevInfo, err error) { + if r.local { + return r.statLocal(rev, rev) + } + + // Fast path: maybe rev is a hash we already have locally. + didStatLocal := false + if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { + if info, err := r.statLocal(rev, rev); err == nil { + return info, nil + } + didStatLocal = true + } + + // Maybe rev is a tag we already have locally. + // (Note that we're excluding branches, which can be stale.) + r.localTagsOnce.Do(r.loadLocalTags) + if r.localTags[rev] { + return r.statLocal(rev, "refs/tags/"+rev) + } + + // Maybe rev is the name of a tag or branch on the remote server. + // Or maybe it's the prefix of a hash of a named ref. + // Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash. + refs, err := r.loadRefs() + if err != nil { + return nil, err + } + // loadRefs may return an error if git fails, for example segfaults, or + // could not load a private repo, but defer checking to the else block + // below, in case we already have the rev in question in the local cache. + var ref, hash string + if refs["refs/tags/"+rev] != "" { + ref = "refs/tags/" + rev + hash = refs[ref] + // Keep rev as is: tags are assumed not to change meaning. + } else if refs["refs/heads/"+rev] != "" { + ref = "refs/heads/" + rev + hash = refs[ref] + rev = hash // Replace rev, because meaning of refs/heads/foo can change. + } else if rev == "HEAD" && refs["HEAD"] != "" { + ref = "HEAD" + hash = refs[ref] + rev = hash // Replace rev, because meaning of HEAD can change. + } else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { + // At the least, we have a hash prefix we can look up after the fetch below. + // Maybe we can map it to a full hash using the known refs. + prefix := rev + // Check whether rev is prefix of known ref hash. + for k, h := range refs { + if strings.HasPrefix(h, prefix) { + if hash != "" && hash != h { + // Hash is an ambiguous hash prefix. + // More information will not change that. + return nil, fmt.Errorf("ambiguous revision %s", rev) + } + if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash. + ref = k + } + rev = h + hash = h + } + } + if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash. + hash = rev + } + } else { + return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev} + } + + defer func() { + if info != nil { + info.Origin.Hash = info.Name + // There's a ref = hash below; don't write that hash down as Origin.Ref. + if ref != info.Origin.Hash { + info.Origin.Ref = ref + } + } + }() + + // Protect r.fetchLevel and the "fetch more and more" sequence. + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + // Perhaps r.localTags did not have the ref when we loaded local tags, + // but we've since done fetches that pulled down the hash we need + // (or already have the hash we need, just without its tag). + // Either way, try a local stat before falling back to network I/O. + if !didStatLocal { + if info, err := r.statLocal(rev, hash); err == nil { + if after, found := strings.CutPrefix(ref, "refs/tags/"); found { + // Make sure tag exists, so it will be in localTags next time the go command is run. + Run(r.dir, "git", "tag", after, hash) + } + return info, nil + } + } + + // If we know a specific commit we need and its ref, fetch it. + // We do NOT fetch arbitrary hashes (when we don't know the ref) + // because we want to avoid ever importing a commit that isn't + // reachable from refs/tags/* or refs/heads/* or HEAD. + // Both Gerrit and GitHub expose every CL/PR as a named ref, + // and we don't want those commits masquerading as being real + // pseudo-versions in the main repo. + if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local { + r.fetchLevel = fetchSome + var refspec string + if ref != "" && ref != "HEAD" { + // If we do know the ref name, save the mapping locally + // so that (if it is a tag) it can show up in localTags + // on a future call. Also, some servers refuse to allow + // full hashes in ref specs, so prefer a ref name if known. + refspec = ref + ":" + ref + } else { + // Fetch the hash but give it a local name (refs/dummy), + // because that triggers the fetch behavior of creating any + // other known remote tags for the hash. We never use + // refs/dummy (it's not refs/tags/dummy) and it will be + // overwritten in the next command, and that's fine. + ref = hash + refspec = hash + ":refs/dummy" + } + _, err := Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec) + if err == nil { + return r.statLocal(rev, ref) + } + // Don't try to be smart about parsing the error. + // It's too complex and varies too much by git version. + // No matter what went wrong, fall back to a complete fetch. + } + + // Last resort. + // Fetch all heads and tags and hope the hash we want is in the history. + if err := r.fetchRefsLocked(); err != nil { + return nil, err + } + + return r.statLocal(rev, rev) +} + +// fetchRefsLocked fetches all heads and tags from the origin, along with the +// ancestors of those commits. +// +// We only fetch heads and tags, not arbitrary other commits: we don't want to +// pull in off-branch commits (such as rejected GitHub pull requests) that the +// server may be willing to provide. (See the comments within the stat method +// for more detail.) +// +// fetchRefsLocked requires that r.mu remain locked for the duration of the call. +func (r *gitRepo) fetchRefsLocked() error { + if r.fetchLevel < fetchAll { + // NOTE: To work around a bug affecting Git clients up to at least 2.23.0 + // (2019-08-16), we must first expand the set of local refs, and only then + // unshallow the repository as a separate fetch operation. (See + // golang.org/issue/34266 and + // https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.) + + if _, err := Run(r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil { + return err + } + + if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil { + if _, err := Run(r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil { + return err + } + } + + r.fetchLevel = fetchAll + } + return nil +} + +// statLocal returns a new RevInfo describing rev in the local git repository. +// It uses version as info.Version. +func (r *gitRepo) statLocal(version, rev string) (*RevInfo, error) { + out, err := Run(r.dir, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--") + if err != nil { + // Return info with Origin.RepoSum if possible to allow caching of negative lookup. + var info *RevInfo + if refs, err := r.loadRefs(); err == nil { + info = r.unknownRevisionInfo(refs) + } + return info, &UnknownRevisionError{Rev: rev} + } + f := strings.Fields(string(out)) + if len(f) < 2 { + return nil, fmt.Errorf("unexpected response from git log: %q", out) + } + hash := f[0] + if strings.HasPrefix(hash, version) { + version = hash // extend to full hash + } + t, err := strconv.ParseInt(f[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("invalid time from git log: %q", out) + } + + info := &RevInfo{ + Origin: &Origin{ + VCS: "git", + URL: r.remoteURL, + Hash: hash, + }, + Name: hash, + Short: ShortenSHA1(hash), + Time: time.Unix(t, 0).UTC(), + Version: hash, + } + if !strings.HasPrefix(hash, rev) { + info.Origin.Ref = rev + } + + // Add tags. Output looks like: + // ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD + for i := 2; i < len(f); i++ { + if f[i] == "tag:" { + i++ + if i < len(f) { + info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ",")) + } + } + } + sort.Strings(info.Tags) + + // Used hash as info.Version above. + // Use caller's suggested version if it appears in the tag list + // (filters out branch names, HEAD). + for _, tag := range info.Tags { + if version == tag { + info.Version = version + } + } + + return info, nil +} + +func (r *gitRepo) Stat(rev string) (*RevInfo, error) { + if rev == "latest" { + return r.Latest() + } + type cached struct { + info *RevInfo + err error + } + c := r.statCache.Do(rev, func() any { + info, err := r.stat(rev) + return cached{info, err} + }).(cached) + return c.info, c.err +} + +func (r *gitRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) { + // TODO: Could use git cat-file --batch. + info, err := r.Stat(rev) // download rev into local git repo + if err != nil { + return nil, err + } + out, err := Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file) + if err != nil { + return nil, fs.ErrNotExist + } + return out, nil +} + +func (r *gitRepo) RecentTag(rev, prefix string, allowed func(tag string) bool) (tag string, err error) { + info, err := r.Stat(rev) + if err != nil { + return "", err + } + rev = info.Name // expand hash prefixes + + // describe sets tag and err using 'git for-each-ref' and reports whether the + // result is definitive. + describe := func() (definitive bool) { + var out []byte + out, err = Run(r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev) + if err != nil { + return true + } + + // prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix + var highest string + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + // git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here + // instead gives support for git v2.7.0. + if !strings.HasPrefix(line, "refs/tags/") { + continue + } + line = line[len("refs/tags/"):] + + if !strings.HasPrefix(line, prefix) { + continue + } + if !allowed(line) { + continue + } + + semtag := line[len(prefix):] + if semver.Compare(semtag, highest) > 0 { + highest = semtag + } + } + + if highest != "" { + tag = prefix + highest + } + + return tag != "" && !AllHex(tag) + } + + if describe() { + return tag, err + } + + // Git didn't find a version tag preceding the requested rev. + // See whether any plausible tag exists. + tags, err := r.Tags(prefix + "v") + if err != nil { + return "", err + } + if len(tags.List) == 0 { + return "", nil + } + + // There are plausible tags, but we don't know if rev is a descendent of any of them. + // Fetch the history to find out. + + unlock, err := r.mu.Lock() + if err != nil { + return "", err + } + defer unlock() + + if err := r.fetchRefsLocked(); err != nil { + return "", err + } + + // If we've reached this point, we have all of the commits that are reachable + // from all heads and tags. + // + // The only refs we should be missing are those that are no longer reachable + // (or never were reachable) from any branch or tag, including the master + // branch, and we don't want to resolve them anyway (they're probably + // unreachable for a reason). + // + // Try one last time in case some other goroutine fetched rev while we were + // waiting on the lock. + describe() + return tag, err +} + +func (r *gitRepo) DescendsFrom(rev, tag string) (bool, error) { + // The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so + // this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go + // already doesn't work with Git 1.7.1, so at least it's not a regression. + // + // git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or + // 1 if not. + _, err := Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) + + // Git reports "is an ancestor" with exit code 0 and "not an ancestor" with + // exit code 1. + // Unfortunately, if we've already fetched rev with a shallow history, git + // merge-base has been observed to report a false-negative, so don't stop yet + // even if the exit code is 1! + if err == nil { + return true, nil + } + + // See whether the tag and rev even exist. + tags, err := r.Tags(tag) + if err != nil { + return false, err + } + if len(tags.List) == 0 { + return false, nil + } + + // NOTE: r.stat is very careful not to fetch commits that we shouldn't know + // about, like rejected GitHub pull requests, so don't try to short-circuit + // that here. + if _, err = r.stat(rev); err != nil { + return false, err + } + + // Now fetch history so that git can search for a path. + unlock, err := r.mu.Lock() + if err != nil { + return false, err + } + defer unlock() + + if r.fetchLevel < fetchAll { + // Fetch the complete history for all refs and heads. It would be more + // efficient to only fetch the history from rev to tag, but that's much more + // complicated, and any kind of shallow fetch is fairly likely to trigger + // bugs in JGit servers and/or the go command anyway. + if err := r.fetchRefsLocked(); err != nil { + return false, err + } + } + + _, err = Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) + if err == nil { + return true, nil + } + if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 { + return false, nil + } + return false, err +} + +func (r *gitRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) { + // TODO: Use maxSize or drop it. + args := []string{} + if subdir != "" { + args = append(args, "--", subdir) + } + info, err := r.Stat(rev) // download rev into local git repo + if err != nil { + return nil, err + } + + unlock, err := r.mu.Lock() + if err != nil { + return nil, err + } + defer unlock() + + if err := ensureGitAttributes(r.dir); err != nil { + return nil, err + } + + // Incredibly, git produces different archives depending on whether + // it is running on a Windows system or not, in an attempt to normalize + // text file line endings. Setting -c core.autocrlf=input means only + // translate files on the way into the repo, not on the way out (archive). + // The -c core.eol=lf should be unnecessary but set it anyway. + archive, err := Run(r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args) + if err != nil { + if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) { + return nil, fs.ErrNotExist + } + return nil, err + } + + return io.NopCloser(bytes.NewReader(archive)), nil +} + +// ensureGitAttributes makes sure export-subst and export-ignore features are +// disabled for this repo. This is intended to be run prior to running git +// archive so that zip files are generated that produce consistent ziphashes +// for a given revision, independent of variables such as git version and the +// size of the repo. +// +// See: https://github.com/golang/go/issues/27153 +func ensureGitAttributes(repoDir string) (err error) { + const attr = "\n* -export-subst -export-ignore\n" + + d := repoDir + "/info" + p := d + "/attributes" + + if err := os.MkdirAll(d, 0755); err != nil { + return err + } + + f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) + if err != nil { + return err + } + defer func() { + closeErr := f.Close() + if closeErr != nil { + err = closeErr + } + }() + + b, err := io.ReadAll(f) + if err != nil { + return err + } + if !bytes.HasSuffix(b, []byte(attr)) { + _, err := f.WriteString(attr) + return err + } + + return nil +} |