summaryrefslogtreecommitdiffstats
path: root/src/cmd/go/internal/modfetch/repo.go
blob: d4c57bb300afbaf37aaa808df3859d3f61cd443a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package modfetch

import (
	"fmt"
	"io"
	"io/fs"
	"os"
	"strconv"
	"time"

	"cmd/go/internal/cfg"
	"cmd/go/internal/modfetch/codehost"
	"cmd/go/internal/par"
	"cmd/go/internal/vcs"
	web "cmd/go/internal/web"

	"golang.org/x/mod/module"
)

const traceRepo = false // trace all repo actions, for debugging

// A Repo represents a repository storing all versions of a single module.
// It must be safe for simultaneous use by multiple goroutines.
type Repo interface {
	// ModulePath returns the module path.
	ModulePath() string

	// CheckReuse checks whether the validation criteria in the origin
	// are still satisfied on the server corresponding to this module.
	// If so, the caller can reuse any cached Versions or RevInfo containing
	// this origin rather than redownloading those from the server.
	CheckReuse(old *codehost.Origin) error

	// Versions lists all known versions with the given prefix.
	// Pseudo-versions are not included.
	//
	// Versions should be returned sorted in semver order
	// (implementations can use semver.Sort).
	//
	// Versions returns a non-nil error only if there was a problem
	// fetching the list of versions: it may return an empty list
	// along with a nil error if the list of matching versions
	// is known to be empty.
	//
	// If the underlying repository does not exist,
	// Versions returns an error matching errors.Is(_, os.NotExist).
	Versions(prefix string) (*Versions, error)

	// Stat returns information about the revision rev.
	// A revision can be any identifier known to the underlying service:
	// commit hash, branch, tag, and so on.
	Stat(rev string) (*RevInfo, error)

	// Latest returns the latest revision on the default branch,
	// whatever that means in the underlying source code repository.
	// It is only used when there are no tagged versions.
	Latest() (*RevInfo, error)

	// GoMod returns the go.mod file for the given version.
	GoMod(version string) (data []byte, err error)

	// Zip writes a zip file for the given version to dst.
	Zip(dst io.Writer, version string) error
}

// A Versions describes the available versions in a module repository.
type Versions struct {
	Origin *codehost.Origin `json:",omitempty"` // origin information for reuse

	List []string // semver versions
}

// A RevInfo describes a single revision in a module repository.
type RevInfo struct {
	Version string    // suggested version string for this revision
	Time    time.Time // commit time

	// These fields are used for Stat of arbitrary rev,
	// but they are not recorded when talking about module versions.
	Name  string `json:"-"` // complete ID in underlying repository
	Short string `json:"-"` // shortened ID, for use in pseudo-version

	Origin *codehost.Origin `json:",omitempty"` // provenance for reuse
}

// Re: module paths, import paths, repository roots, and lookups
//
// A module is a collection of Go packages stored in a file tree
// with a go.mod file at the root of the tree.
// The go.mod defines the module path, which is the import path
// corresponding to the root of the file tree.
// The import path of a directory within that file tree is the module path
// joined with the name of the subdirectory relative to the root.
//
// For example, the module with path rsc.io/qr corresponds to the
// file tree in the repository https://github.com/rsc/qr.
// That file tree has a go.mod that says "module rsc.io/qr".
// The package in the root directory has import path "rsc.io/qr".
// The package in the gf256 subdirectory has import path "rsc.io/qr/gf256".
// In this example, "rsc.io/qr" is both a module path and an import path.
// But "rsc.io/qr/gf256" is only an import path, not a module path:
// it names an importable package, but not a module.
//
// As a special case to incorporate code written before modules were
// introduced, if a path p resolves using the pre-module "go get" lookup
// to the root of a source code repository without a go.mod file,
// that repository is treated as if it had a go.mod in its root directory
// declaring module path p. (The go.mod is further considered to
// contain requirements corresponding to any legacy version
// tracking format such as Gopkg.lock, vendor/vendor.conf, and so on.)
//
// The presentation so far ignores the fact that a source code repository
// has many different versions of a file tree, and those versions may
// differ in whether a particular go.mod exists and what it contains.
// In fact there is a well-defined mapping only from a module path, version
// pair - often written path@version - to a particular file tree.
// For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of
// repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod.
// Because the "go get" import paths rsc.io/qr and github.com/rsc/qr
// both redirect to the Git repository https://github.com/rsc/qr,
// github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0
// but a different module (a different name). In contrast, since v0.2.0
// of that repository has an explicit go.mod that declares path rsc.io/qr,
// github.com/rsc/qr@v0.2.0 is an invalid module path, version pair.
// Before modules, import comments would have had the same effect.
//
// The set of import paths associated with a given module path is
// clearly not fixed: at the least, new directories with new import paths
// can always be added. But another potential operation is to split a
// subtree out of a module into its own module. If done carefully,
// this operation can be done while preserving compatibility for clients.
// For example, suppose that we want to split rsc.io/qr/gf256 into its
// own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256.
// Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory)
// and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod
// cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires
// rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be
// using an older rsc.io/qr module that includes the gf256 package, but if
// it adds a requirement on either the newer rsc.io/qr or the newer
// rsc.io/qr/gf256 module, it will automatically add the requirement
// on the complementary half, ensuring both that rsc.io/qr/gf256 is
// available for importing by the build and also that it is only defined
// by a single module. The gf256 package could move back into the
// original by another simultaneous release of rsc.io/qr v0.4.0 including
// the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code
// in its root directory, along with a new requirement cycle.
// The ability to shift module boundaries in this way is expected to be
// important in large-scale program refactorings, similar to the ones
// described in https://talks.golang.org/2016/refactor.article.
//
// The possibility of shifting module boundaries reemphasizes
// that you must know both the module path and its version
// to determine the set of packages provided directly by that module.
//
// On top of all this, it is possible for a single code repository
// to contain multiple modules, either in branches or subdirectories,
// as a limited kind of monorepo. For example rsc.io/qr/v2,
// the v2.x.x continuation of rsc.io/qr, is expected to be found
// in v2-tagged commits in https://github.com/rsc/qr, either
// in the root or in a v2 subdirectory, disambiguated by go.mod.
// Again the precise file tree corresponding to a module
// depends on which version we are considering.
//
// It is also possible for the underlying repository to change over time,
// without changing the module path. If I copy the github repo over
// to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1,
// then clients of all versions should start fetching from bitbucket
// instead of github. That is, in contrast to the exact file tree,
// the location of the source code repository associated with a module path
// does not depend on the module version. (This is by design, as the whole
// point of these redirects is to allow package authors to establish a stable
// name that can be updated as code moves from one service to another.)
//
// All of this is important background for the lookup APIs defined in this
// file.
//
// The Lookup function takes a module path and returns a Repo representing
// that module path. Lookup can do only a little with the path alone.
// It can check that the path is well-formed (see semver.CheckPath)
// and it can check that the path can be resolved to a target repository.
// To avoid version control access except when absolutely necessary,
// Lookup does not attempt to connect to the repository itself.

var lookupCache par.Cache

type lookupCacheKey struct {
	proxy, path string
}

// Lookup returns the module with the given module path,
// fetched through the given proxy.
//
// The distinguished proxy "direct" indicates that the path should be fetched
// from its origin, and "noproxy" indicates that the patch should be fetched
// directly only if GONOPROXY matches the given path.
//
// For the distinguished proxy "off", Lookup always returns a Repo that returns
// a non-nil error for every method call.
//
// A successful return does not guarantee that the module
// has any defined versions.
func Lookup(proxy, path string) Repo {
	if traceRepo {
		defer logCall("Lookup(%q, %q)", proxy, path)()
	}

	type cached struct {
		r Repo
	}
	c := lookupCache.Do(lookupCacheKey{proxy, path}, func() any {
		r := newCachingRepo(path, func() (Repo, error) {
			r, err := lookup(proxy, path)
			if err == nil && traceRepo {
				r = newLoggingRepo(r)
			}
			return r, err
		})
		return cached{r}
	}).(cached)

	return c.r
}

// lookup returns the module with the given module path.
func lookup(proxy, path string) (r Repo, err error) {
	if cfg.BuildMod == "vendor" {
		return nil, errLookupDisabled
	}

	if module.MatchPrefixPatterns(cfg.GONOPROXY, path) {
		switch proxy {
		case "noproxy", "direct":
			return lookupDirect(path)
		default:
			return nil, errNoproxy
		}
	}

	switch proxy {
	case "off":
		return errRepo{path, errProxyOff}, nil
	case "direct":
		return lookupDirect(path)
	case "noproxy":
		return nil, errUseProxy
	default:
		return newProxyRepo(proxy, path)
	}
}

type lookupDisabledError struct{}

func (lookupDisabledError) Error() string {
	if cfg.BuildModReason == "" {
		return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod)
	}
	return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason)
}

var errLookupDisabled error = lookupDisabledError{}

var (
	errProxyOff       = notExistErrorf("module lookup disabled by GOPROXY=off")
	errNoproxy  error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY")
	errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY")
)

func lookupDirect(path string) (Repo, error) {
	security := web.SecureOnly

	if module.MatchPrefixPatterns(cfg.GOINSECURE, path) {
		security = web.Insecure
	}
	rr, err := vcs.RepoRootForImportPath(path, vcs.PreferMod, security)
	if err != nil {
		// We don't know where to find code for a module with this path.
		return nil, notExistError{err: err}
	}

	if rr.VCS.Name == "mod" {
		// Fetch module from proxy with base URL rr.Repo.
		return newProxyRepo(rr.Repo, path)
	}

	code, err := lookupCodeRepo(rr)
	if err != nil {
		return nil, err
	}
	return newCodeRepo(code, rr.Root, path)
}

func lookupCodeRepo(rr *vcs.RepoRoot) (codehost.Repo, error) {
	code, err := codehost.NewRepo(rr.VCS.Cmd, rr.Repo)
	if err != nil {
		if _, ok := err.(*codehost.VCSError); ok {
			return nil, err
		}
		return nil, fmt.Errorf("lookup %s: %v", rr.Root, err)
	}
	return code, nil
}

// A loggingRepo is a wrapper around an underlying Repo
// that prints a log message at the start and end of each call.
// It can be inserted when debugging.
type loggingRepo struct {
	r Repo
}

func newLoggingRepo(r Repo) *loggingRepo {
	return &loggingRepo{r}
}

// logCall prints a log message using format and args and then
// also returns a function that will print the same message again,
// along with the elapsed time.
// Typical usage is:
//
//	defer logCall("hello %s", arg)()
//
// Note the final ().
func logCall(format string, args ...any) func() {
	start := time.Now()
	fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...))
	return func() {
		fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...))
	}
}

func (l *loggingRepo) ModulePath() string {
	return l.r.ModulePath()
}

func (l *loggingRepo) CheckReuse(old *codehost.Origin) (err error) {
	defer func() {
		logCall("CheckReuse[%s]: %v", l.r.ModulePath(), err)
	}()
	return l.r.CheckReuse(old)
}

func (l *loggingRepo) Versions(prefix string) (*Versions, error) {
	defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)()
	return l.r.Versions(prefix)
}

func (l *loggingRepo) Stat(rev string) (*RevInfo, error) {
	defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)()
	return l.r.Stat(rev)
}

func (l *loggingRepo) Latest() (*RevInfo, error) {
	defer logCall("Repo[%s]: Latest()", l.r.ModulePath())()
	return l.r.Latest()
}

func (l *loggingRepo) GoMod(version string) ([]byte, error) {
	defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)()
	return l.r.GoMod(version)
}

func (l *loggingRepo) Zip(dst io.Writer, version string) error {
	dstName := "_"
	if dst, ok := dst.(interface{ Name() string }); ok {
		dstName = strconv.Quote(dst.Name())
	}
	defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)()
	return l.r.Zip(dst, version)
}

// errRepo is a Repo that returns the same error for all operations.
//
// It is useful in conjunction with caching, since cache hits will not attempt
// the prohibited operations.
type errRepo struct {
	modulePath string
	err        error
}

func (r errRepo) ModulePath() string { return r.modulePath }

func (r errRepo) CheckReuse(old *codehost.Origin) error     { return r.err }
func (r errRepo) Versions(prefix string) (*Versions, error) { return nil, r.err }
func (r errRepo) Stat(rev string) (*RevInfo, error)         { return nil, r.err }
func (r errRepo) Latest() (*RevInfo, error)                 { return nil, r.err }
func (r errRepo) GoMod(version string) ([]byte, error)      { return nil, r.err }
func (r errRepo) Zip(dst io.Writer, version string) error   { return r.err }

// A notExistError is like fs.ErrNotExist, but with a custom message
type notExistError struct {
	err error
}

func notExistErrorf(format string, args ...any) error {
	return notExistError{fmt.Errorf(format, args...)}
}

func (e notExistError) Error() string {
	return e.err.Error()
}

func (notExistError) Is(target error) bool {
	return target == fs.ErrNotExist
}

func (e notExistError) Unwrap() error {
	return e.err
}