diff options
Diffstat (limited to 'src/cmd/go/internal/modfetch/repo.go')
-rw-r--r-- | src/cmd/go/internal/modfetch/repo.go | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/src/cmd/go/internal/modfetch/repo.go b/src/cmd/go/internal/modfetch/repo.go new file mode 100644 index 0000000..d4c57bb --- /dev/null +++ b/src/cmd/go/internal/modfetch/repo.go @@ -0,0 +1,411 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package modfetch + +import ( + "fmt" + "io" + "io/fs" + "os" + "strconv" + "time" + + "cmd/go/internal/cfg" + "cmd/go/internal/modfetch/codehost" + "cmd/go/internal/par" + "cmd/go/internal/vcs" + web "cmd/go/internal/web" + + "golang.org/x/mod/module" +) + +const traceRepo = false // trace all repo actions, for debugging + +// A Repo represents a repository storing all versions of a single module. +// It must be safe for simultaneous use by multiple goroutines. +type Repo interface { + // ModulePath returns the module path. + ModulePath() string + + // CheckReuse checks whether the validation criteria in the origin + // are still satisfied on the server corresponding to this module. + // If so, the caller can reuse any cached Versions or RevInfo containing + // this origin rather than redownloading those from the server. + CheckReuse(old *codehost.Origin) error + + // Versions lists all known versions with the given prefix. + // Pseudo-versions are not included. + // + // Versions should be returned sorted in semver order + // (implementations can use semver.Sort). + // + // Versions returns a non-nil error only if there was a problem + // fetching the list of versions: it may return an empty list + // along with a nil error if the list of matching versions + // is known to be empty. + // + // If the underlying repository does not exist, + // Versions returns an error matching errors.Is(_, os.NotExist). + Versions(prefix string) (*Versions, error) + + // Stat returns information about the revision rev. + // A revision can be any identifier known to the underlying service: + // commit hash, branch, tag, and so on. + Stat(rev string) (*RevInfo, error) + + // Latest returns the latest revision on the default branch, + // whatever that means in the underlying source code repository. + // It is only used when there are no tagged versions. + Latest() (*RevInfo, error) + + // GoMod returns the go.mod file for the given version. + GoMod(version string) (data []byte, err error) + + // Zip writes a zip file for the given version to dst. + Zip(dst io.Writer, version string) error +} + +// A Versions describes the available versions in a module repository. +type Versions struct { + Origin *codehost.Origin `json:",omitempty"` // origin information for reuse + + List []string // semver versions +} + +// A RevInfo describes a single revision in a module repository. +type RevInfo struct { + Version string // suggested version string for this revision + Time time.Time // commit time + + // These fields are used for Stat of arbitrary rev, + // but they are not recorded when talking about module versions. + Name string `json:"-"` // complete ID in underlying repository + Short string `json:"-"` // shortened ID, for use in pseudo-version + + Origin *codehost.Origin `json:",omitempty"` // provenance for reuse +} + +// Re: module paths, import paths, repository roots, and lookups +// +// A module is a collection of Go packages stored in a file tree +// with a go.mod file at the root of the tree. +// The go.mod defines the module path, which is the import path +// corresponding to the root of the file tree. +// The import path of a directory within that file tree is the module path +// joined with the name of the subdirectory relative to the root. +// +// For example, the module with path rsc.io/qr corresponds to the +// file tree in the repository https://github.com/rsc/qr. +// That file tree has a go.mod that says "module rsc.io/qr". +// The package in the root directory has import path "rsc.io/qr". +// The package in the gf256 subdirectory has import path "rsc.io/qr/gf256". +// In this example, "rsc.io/qr" is both a module path and an import path. +// But "rsc.io/qr/gf256" is only an import path, not a module path: +// it names an importable package, but not a module. +// +// As a special case to incorporate code written before modules were +// introduced, if a path p resolves using the pre-module "go get" lookup +// to the root of a source code repository without a go.mod file, +// that repository is treated as if it had a go.mod in its root directory +// declaring module path p. (The go.mod is further considered to +// contain requirements corresponding to any legacy version +// tracking format such as Gopkg.lock, vendor/vendor.conf, and so on.) +// +// The presentation so far ignores the fact that a source code repository +// has many different versions of a file tree, and those versions may +// differ in whether a particular go.mod exists and what it contains. +// In fact there is a well-defined mapping only from a module path, version +// pair - often written path@version - to a particular file tree. +// For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of +// repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod. +// Because the "go get" import paths rsc.io/qr and github.com/rsc/qr +// both redirect to the Git repository https://github.com/rsc/qr, +// github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0 +// but a different module (a different name). In contrast, since v0.2.0 +// of that repository has an explicit go.mod that declares path rsc.io/qr, +// github.com/rsc/qr@v0.2.0 is an invalid module path, version pair. +// Before modules, import comments would have had the same effect. +// +// The set of import paths associated with a given module path is +// clearly not fixed: at the least, new directories with new import paths +// can always be added. But another potential operation is to split a +// subtree out of a module into its own module. If done carefully, +// this operation can be done while preserving compatibility for clients. +// For example, suppose that we want to split rsc.io/qr/gf256 into its +// own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256. +// Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory) +// and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod +// cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires +// rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be +// using an older rsc.io/qr module that includes the gf256 package, but if +// it adds a requirement on either the newer rsc.io/qr or the newer +// rsc.io/qr/gf256 module, it will automatically add the requirement +// on the complementary half, ensuring both that rsc.io/qr/gf256 is +// available for importing by the build and also that it is only defined +// by a single module. The gf256 package could move back into the +// original by another simultaneous release of rsc.io/qr v0.4.0 including +// the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code +// in its root directory, along with a new requirement cycle. +// The ability to shift module boundaries in this way is expected to be +// important in large-scale program refactorings, similar to the ones +// described in https://talks.golang.org/2016/refactor.article. +// +// The possibility of shifting module boundaries reemphasizes +// that you must know both the module path and its version +// to determine the set of packages provided directly by that module. +// +// On top of all this, it is possible for a single code repository +// to contain multiple modules, either in branches or subdirectories, +// as a limited kind of monorepo. For example rsc.io/qr/v2, +// the v2.x.x continuation of rsc.io/qr, is expected to be found +// in v2-tagged commits in https://github.com/rsc/qr, either +// in the root or in a v2 subdirectory, disambiguated by go.mod. +// Again the precise file tree corresponding to a module +// depends on which version we are considering. +// +// It is also possible for the underlying repository to change over time, +// without changing the module path. If I copy the github repo over +// to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1, +// then clients of all versions should start fetching from bitbucket +// instead of github. That is, in contrast to the exact file tree, +// the location of the source code repository associated with a module path +// does not depend on the module version. (This is by design, as the whole +// point of these redirects is to allow package authors to establish a stable +// name that can be updated as code moves from one service to another.) +// +// All of this is important background for the lookup APIs defined in this +// file. +// +// The Lookup function takes a module path and returns a Repo representing +// that module path. Lookup can do only a little with the path alone. +// It can check that the path is well-formed (see semver.CheckPath) +// and it can check that the path can be resolved to a target repository. +// To avoid version control access except when absolutely necessary, +// Lookup does not attempt to connect to the repository itself. + +var lookupCache par.Cache + +type lookupCacheKey struct { + proxy, path string +} + +// Lookup returns the module with the given module path, +// fetched through the given proxy. +// +// The distinguished proxy "direct" indicates that the path should be fetched +// from its origin, and "noproxy" indicates that the patch should be fetched +// directly only if GONOPROXY matches the given path. +// +// For the distinguished proxy "off", Lookup always returns a Repo that returns +// a non-nil error for every method call. +// +// A successful return does not guarantee that the module +// has any defined versions. +func Lookup(proxy, path string) Repo { + if traceRepo { + defer logCall("Lookup(%q, %q)", proxy, path)() + } + + type cached struct { + r Repo + } + c := lookupCache.Do(lookupCacheKey{proxy, path}, func() any { + r := newCachingRepo(path, func() (Repo, error) { + r, err := lookup(proxy, path) + if err == nil && traceRepo { + r = newLoggingRepo(r) + } + return r, err + }) + return cached{r} + }).(cached) + + return c.r +} + +// lookup returns the module with the given module path. +func lookup(proxy, path string) (r Repo, err error) { + if cfg.BuildMod == "vendor" { + return nil, errLookupDisabled + } + + if module.MatchPrefixPatterns(cfg.GONOPROXY, path) { + switch proxy { + case "noproxy", "direct": + return lookupDirect(path) + default: + return nil, errNoproxy + } + } + + switch proxy { + case "off": + return errRepo{path, errProxyOff}, nil + case "direct": + return lookupDirect(path) + case "noproxy": + return nil, errUseProxy + default: + return newProxyRepo(proxy, path) + } +} + +type lookupDisabledError struct{} + +func (lookupDisabledError) Error() string { + if cfg.BuildModReason == "" { + return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod) + } + return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason) +} + +var errLookupDisabled error = lookupDisabledError{} + +var ( + errProxyOff = notExistErrorf("module lookup disabled by GOPROXY=off") + errNoproxy error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY") + errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY") +) + +func lookupDirect(path string) (Repo, error) { + security := web.SecureOnly + + if module.MatchPrefixPatterns(cfg.GOINSECURE, path) { + security = web.Insecure + } + rr, err := vcs.RepoRootForImportPath(path, vcs.PreferMod, security) + if err != nil { + // We don't know where to find code for a module with this path. + return nil, notExistError{err: err} + } + + if rr.VCS.Name == "mod" { + // Fetch module from proxy with base URL rr.Repo. + return newProxyRepo(rr.Repo, path) + } + + code, err := lookupCodeRepo(rr) + if err != nil { + return nil, err + } + return newCodeRepo(code, rr.Root, path) +} + +func lookupCodeRepo(rr *vcs.RepoRoot) (codehost.Repo, error) { + code, err := codehost.NewRepo(rr.VCS.Cmd, rr.Repo) + if err != nil { + if _, ok := err.(*codehost.VCSError); ok { + return nil, err + } + return nil, fmt.Errorf("lookup %s: %v", rr.Root, err) + } + return code, nil +} + +// A loggingRepo is a wrapper around an underlying Repo +// that prints a log message at the start and end of each call. +// It can be inserted when debugging. +type loggingRepo struct { + r Repo +} + +func newLoggingRepo(r Repo) *loggingRepo { + return &loggingRepo{r} +} + +// logCall prints a log message using format and args and then +// also returns a function that will print the same message again, +// along with the elapsed time. +// Typical usage is: +// +// defer logCall("hello %s", arg)() +// +// Note the final (). +func logCall(format string, args ...any) func() { + start := time.Now() + fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...)) + return func() { + fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...)) + } +} + +func (l *loggingRepo) ModulePath() string { + return l.r.ModulePath() +} + +func (l *loggingRepo) CheckReuse(old *codehost.Origin) (err error) { + defer func() { + logCall("CheckReuse[%s]: %v", l.r.ModulePath(), err) + }() + return l.r.CheckReuse(old) +} + +func (l *loggingRepo) Versions(prefix string) (*Versions, error) { + defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)() + return l.r.Versions(prefix) +} + +func (l *loggingRepo) Stat(rev string) (*RevInfo, error) { + defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)() + return l.r.Stat(rev) +} + +func (l *loggingRepo) Latest() (*RevInfo, error) { + defer logCall("Repo[%s]: Latest()", l.r.ModulePath())() + return l.r.Latest() +} + +func (l *loggingRepo) GoMod(version string) ([]byte, error) { + defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)() + return l.r.GoMod(version) +} + +func (l *loggingRepo) Zip(dst io.Writer, version string) error { + dstName := "_" + if dst, ok := dst.(interface{ Name() string }); ok { + dstName = strconv.Quote(dst.Name()) + } + defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)() + return l.r.Zip(dst, version) +} + +// errRepo is a Repo that returns the same error for all operations. +// +// It is useful in conjunction with caching, since cache hits will not attempt +// the prohibited operations. +type errRepo struct { + modulePath string + err error +} + +func (r errRepo) ModulePath() string { return r.modulePath } + +func (r errRepo) CheckReuse(old *codehost.Origin) error { return r.err } +func (r errRepo) Versions(prefix string) (*Versions, error) { return nil, r.err } +func (r errRepo) Stat(rev string) (*RevInfo, error) { return nil, r.err } +func (r errRepo) Latest() (*RevInfo, error) { return nil, r.err } +func (r errRepo) GoMod(version string) ([]byte, error) { return nil, r.err } +func (r errRepo) Zip(dst io.Writer, version string) error { return r.err } + +// A notExistError is like fs.ErrNotExist, but with a custom message +type notExistError struct { + err error +} + +func notExistErrorf(format string, args ...any) error { + return notExistError{fmt.Errorf(format, args...)} +} + +func (e notExistError) Error() string { + return e.err.Error() +} + +func (notExistError) Is(target error) bool { + return target == fs.ErrNotExist +} + +func (e notExistError) Unwrap() error { + return e.err +} |