summaryrefslogtreecommitdiffstats
path: root/modules/git/pipeline
diff options
context:
space:
mode:
Diffstat (limited to 'modules/git/pipeline')
-rw-r--r--modules/git/pipeline/catfile.go108
-rw-r--r--modules/git/pipeline/lfs_common.go32
-rw-r--r--modules/git/pipeline/lfs_gogit.go146
-rw-r--r--modules/git/pipeline/lfs_nogogit.go230
-rw-r--r--modules/git/pipeline/namerev.go33
-rw-r--r--modules/git/pipeline/revlist.go86
6 files changed, 635 insertions, 0 deletions
diff --git a/modules/git/pipeline/catfile.go b/modules/git/pipeline/catfile.go
new file mode 100644
index 00000000..46772181
--- /dev/null
+++ b/modules/git/pipeline/catfile.go
@@ -0,0 +1,108 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package pipeline
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/log"
+)
+
+// CatFileBatchCheck runs cat-file with --batch-check
+func CatFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
+ defer wg.Done()
+ defer shasToCheckReader.Close()
+ defer catFileCheckWriter.Close()
+
+ stderr := new(bytes.Buffer)
+ var errbuf strings.Builder
+ cmd := git.NewCommand(ctx, "cat-file", "--batch-check")
+ if err := cmd.Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdin: shasToCheckReader,
+ Stdout: catFileCheckWriter,
+ Stderr: stderr,
+ }); err != nil {
+ _ = catFileCheckWriter.CloseWithError(fmt.Errorf("git cat-file --batch-check [%s]: %w - %s", tmpBasePath, err, errbuf.String()))
+ }
+}
+
+// CatFileBatchCheckAllObjects runs cat-file with --batch-check --batch-all
+func CatFileBatchCheckAllObjects(ctx context.Context, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string, errChan chan<- error) {
+ defer wg.Done()
+ defer catFileCheckWriter.Close()
+
+ stderr := new(bytes.Buffer)
+ var errbuf strings.Builder
+ cmd := git.NewCommand(ctx, "cat-file", "--batch-check", "--batch-all-objects")
+ if err := cmd.Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdout: catFileCheckWriter,
+ Stderr: stderr,
+ }); err != nil {
+ log.Error("git cat-file --batch-check --batch-all-object [%s]: %v - %s", tmpBasePath, err, errbuf.String())
+ err = fmt.Errorf("git cat-file --batch-check --batch-all-object [%s]: %w - %s", tmpBasePath, err, errbuf.String())
+ _ = catFileCheckWriter.CloseWithError(err)
+ errChan <- err
+ }
+}
+
+// CatFileBatch runs cat-file --batch
+func CatFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
+ defer wg.Done()
+ defer shasToBatchReader.Close()
+ defer catFileBatchWriter.Close()
+
+ stderr := new(bytes.Buffer)
+ var errbuf strings.Builder
+ if err := git.NewCommand(ctx, "cat-file", "--batch").Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdout: catFileBatchWriter,
+ Stdin: shasToBatchReader,
+ Stderr: stderr,
+ }); err != nil {
+ _ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", tmpBasePath, err, errbuf.String()))
+ }
+}
+
+// BlobsLessThan1024FromCatFileBatchCheck reads a pipeline from cat-file --batch-check and returns the blobs <1024 in size
+func BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {
+ defer wg.Done()
+ defer catFileCheckReader.Close()
+ scanner := bufio.NewScanner(catFileCheckReader)
+ defer func() {
+ _ = shasToBatchWriter.CloseWithError(scanner.Err())
+ }()
+ for scanner.Scan() {
+ line := scanner.Text()
+ if len(line) == 0 {
+ continue
+ }
+ fields := strings.Split(line, " ")
+ if len(fields) < 3 || fields[1] != "blob" {
+ continue
+ }
+ size, _ := strconv.Atoi(fields[2])
+ if size > 1024 {
+ continue
+ }
+ toWrite := []byte(fields[0] + "\n")
+ for len(toWrite) > 0 {
+ n, err := shasToBatchWriter.Write(toWrite)
+ if err != nil {
+ _ = catFileCheckReader.CloseWithError(err)
+ break
+ }
+ toWrite = toWrite[n:]
+ }
+ }
+}
diff --git a/modules/git/pipeline/lfs_common.go b/modules/git/pipeline/lfs_common.go
new file mode 100644
index 00000000..188e7d4d
--- /dev/null
+++ b/modules/git/pipeline/lfs_common.go
@@ -0,0 +1,32 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package pipeline
+
+import (
+ "fmt"
+ "time"
+
+ "code.gitea.io/gitea/modules/git"
+)
+
+// LFSResult represents commits found using a provided pointer file hash
+type LFSResult struct {
+ Name string
+ SHA string
+ Summary string
+ When time.Time
+ ParentHashes []git.ObjectID
+ BranchName string
+ FullCommitName string
+}
+
+type lfsResultSlice []*LFSResult
+
+func (a lfsResultSlice) Len() int { return len(a) }
+func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) }
+
+func lfsError(msg string, err error) error {
+ return fmt.Errorf("LFS error occurred, %s: err: %w", msg, err)
+}
diff --git a/modules/git/pipeline/lfs_gogit.go b/modules/git/pipeline/lfs_gogit.go
new file mode 100644
index 00000000..adcf8ed0
--- /dev/null
+++ b/modules/git/pipeline/lfs_gogit.go
@@ -0,0 +1,146 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+//go:build gogit
+
+package pipeline
+
+import (
+ "bufio"
+ "io"
+ "sort"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/git"
+
+ gogit "github.com/go-git/go-git/v5"
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/object"
+)
+
+// FindLFSFile finds commits that contain a provided pointer file hash
+func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) {
+ resultsMap := map[string]*LFSResult{}
+ results := make([]*LFSResult, 0)
+
+ basePath := repo.Path
+ gogitRepo := repo.GoGitRepo()
+
+ commitsIter, err := gogitRepo.Log(&gogit.LogOptions{
+ Order: gogit.LogOrderCommitterTime,
+ All: true,
+ })
+ if err != nil {
+ return nil, lfsError("failed to get GoGit CommitsIter", err)
+ }
+
+ err = commitsIter.ForEach(func(gitCommit *object.Commit) error {
+ tree, err := gitCommit.Tree()
+ if err != nil {
+ return err
+ }
+ treeWalker := object.NewTreeWalker(tree, true, nil)
+ defer treeWalker.Close()
+ for {
+ name, entry, err := treeWalker.Next()
+ if err == io.EOF {
+ break
+ }
+ if entry.Hash == plumbing.Hash(objectID.RawValue()) {
+ parents := make([]git.ObjectID, len(gitCommit.ParentHashes))
+ for i, parentCommitID := range gitCommit.ParentHashes {
+ parents[i] = git.ParseGogitHash(parentCommitID)
+ }
+
+ result := LFSResult{
+ Name: name,
+ SHA: gitCommit.Hash.String(),
+ Summary: strings.Split(strings.TrimSpace(gitCommit.Message), "\n")[0],
+ When: gitCommit.Author.When,
+ ParentHashes: parents,
+ }
+ resultsMap[gitCommit.Hash.String()+":"+name] = &result
+ }
+ }
+ return nil
+ })
+ if err != nil && err != io.EOF {
+ return nil, lfsError("failure in CommitIter.ForEach", err)
+ }
+
+ for _, result := range resultsMap {
+ hasParent := false
+ for _, parentHash := range result.ParentHashes {
+ if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent {
+ break
+ }
+ }
+ if !hasParent {
+ results = append(results, result)
+ }
+ }
+
+ sort.Sort(lfsResultSlice(results))
+
+ // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
+ shasToNameReader, shasToNameWriter := io.Pipe()
+ nameRevStdinReader, nameRevStdinWriter := io.Pipe()
+ errChan := make(chan error, 1)
+ wg := sync.WaitGroup{}
+ wg.Add(3)
+
+ go func() {
+ defer wg.Done()
+ scanner := bufio.NewScanner(nameRevStdinReader)
+ i := 0
+ for scanner.Scan() {
+ line := scanner.Text()
+ if len(line) == 0 {
+ continue
+ }
+ result := results[i]
+ result.FullCommitName = line
+ result.BranchName = strings.Split(line, "~")[0]
+ i++
+ }
+ }()
+ go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath)
+ go func() {
+ defer wg.Done()
+ defer shasToNameWriter.Close()
+ for _, result := range results {
+ i := 0
+ if i < len(result.SHA) {
+ n, err := shasToNameWriter.Write([]byte(result.SHA)[i:])
+ if err != nil {
+ errChan <- err
+ break
+ }
+ i += n
+ }
+ n := 0
+ for n < 1 {
+ n, err = shasToNameWriter.Write([]byte{'\n'})
+ if err != nil {
+ errChan <- err
+ break
+ }
+
+ }
+
+ }
+ }()
+
+ wg.Wait()
+
+ select {
+ case err, has := <-errChan:
+ if has {
+ return nil, lfsError("unable to obtain name for LFS files", err)
+ }
+ default:
+ }
+
+ return results, nil
+}
diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go
new file mode 100644
index 00000000..349cfbd9
--- /dev/null
+++ b/modules/git/pipeline/lfs_nogogit.go
@@ -0,0 +1,230 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+//go:build !gogit
+
+package pipeline
+
+import (
+ "bufio"
+ "bytes"
+ "io"
+ "sort"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/git"
+)
+
+// FindLFSFile finds commits that contain a provided pointer file hash
+func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) {
+ resultsMap := map[string]*LFSResult{}
+ results := make([]*LFSResult, 0)
+
+ basePath := repo.Path
+
+ // Use rev-list to provide us with all commits in order
+ revListReader, revListWriter := io.Pipe()
+ defer func() {
+ _ = revListWriter.Close()
+ _ = revListReader.Close()
+ }()
+
+ go func() {
+ stderr := strings.Builder{}
+ err := git.NewCommand(repo.Ctx, "rev-list", "--all").Run(&git.RunOpts{
+ Dir: repo.Path,
+ Stdout: revListWriter,
+ Stderr: &stderr,
+ })
+ if err != nil {
+ _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String()))
+ } else {
+ _ = revListWriter.Close()
+ }
+ }()
+
+ // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
+ // so let's create a batch stdin and stdout
+ batchStdinWriter, batchReader, cancel := repo.CatFileBatch(repo.Ctx)
+ defer cancel()
+
+ // We'll use a scanner for the revList because it's simpler than a bufio.Reader
+ scan := bufio.NewScanner(revListReader)
+ trees := [][]byte{}
+ paths := []string{}
+
+ fnameBuf := make([]byte, 4096)
+ modeBuf := make([]byte, 40)
+ workingShaBuf := make([]byte, objectID.Type().FullLength()/2)
+
+ for scan.Scan() {
+ // Get the next commit ID
+ commitID := scan.Bytes()
+
+ // push the commit to the cat-file --batch process
+ _, err := batchStdinWriter.Write(commitID)
+ if err != nil {
+ return nil, err
+ }
+ _, err = batchStdinWriter.Write([]byte{'\n'})
+ if err != nil {
+ return nil, err
+ }
+
+ var curCommit *git.Commit
+ curPath := ""
+
+ commitReadingLoop:
+ for {
+ _, typ, size, err := git.ReadBatchLine(batchReader)
+ if err != nil {
+ return nil, err
+ }
+
+ switch typ {
+ case "tag":
+ // This shouldn't happen but if it does well just get the commit and try again
+ id, err := git.ReadTagObjectID(batchReader, size)
+ if err != nil {
+ return nil, err
+ }
+ _, err = batchStdinWriter.Write([]byte(id + "\n"))
+ if err != nil {
+ return nil, err
+ }
+ continue
+ case "commit":
+ // Read in the commit to get its tree and in case this is one of the last used commits
+ curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size))
+ if err != nil {
+ return nil, err
+ }
+ if _, err := batchReader.Discard(1); err != nil {
+ return nil, err
+ }
+
+ if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil {
+ return nil, err
+ }
+ curPath = ""
+ case "tree":
+ var n int64
+ for n < size {
+ mode, fname, binObjectID, count, err := git.ParseTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf)
+ if err != nil {
+ return nil, err
+ }
+ n += int64(count)
+ if bytes.Equal(binObjectID, objectID.RawValue()) {
+ result := LFSResult{
+ Name: curPath + string(fname),
+ SHA: curCommit.ID.String(),
+ Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0],
+ When: curCommit.Author.When,
+ ParentHashes: curCommit.Parents,
+ }
+ resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result
+ } else if string(mode) == git.EntryModeTree.String() {
+ hexObjectID := make([]byte, objectID.Type().FullLength())
+ git.BinToHex(objectID.Type(), binObjectID, hexObjectID)
+ trees = append(trees, hexObjectID)
+ paths = append(paths, curPath+string(fname)+"/")
+ }
+ }
+ if _, err := batchReader.Discard(1); err != nil {
+ return nil, err
+ }
+ if len(trees) > 0 {
+ _, err := batchStdinWriter.Write(trees[len(trees)-1])
+ if err != nil {
+ return nil, err
+ }
+ _, err = batchStdinWriter.Write([]byte("\n"))
+ if err != nil {
+ return nil, err
+ }
+ curPath = paths[len(paths)-1]
+ trees = trees[:len(trees)-1]
+ paths = paths[:len(paths)-1]
+ } else {
+ break commitReadingLoop
+ }
+ default:
+ if err := git.DiscardFull(batchReader, size+1); err != nil {
+ return nil, err
+ }
+ }
+ }
+ }
+
+ if err := scan.Err(); err != nil {
+ return nil, err
+ }
+
+ for _, result := range resultsMap {
+ hasParent := false
+ for _, parentID := range result.ParentHashes {
+ if _, hasParent = resultsMap[parentID.String()+":"+result.Name]; hasParent {
+ break
+ }
+ }
+ if !hasParent {
+ results = append(results, result)
+ }
+ }
+
+ sort.Sort(lfsResultSlice(results))
+
+ // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
+ shasToNameReader, shasToNameWriter := io.Pipe()
+ nameRevStdinReader, nameRevStdinWriter := io.Pipe()
+ errChan := make(chan error, 1)
+ wg := sync.WaitGroup{}
+ wg.Add(3)
+
+ go func() {
+ defer wg.Done()
+ scanner := bufio.NewScanner(nameRevStdinReader)
+ i := 0
+ for scanner.Scan() {
+ line := scanner.Text()
+ if len(line) == 0 {
+ continue
+ }
+ result := results[i]
+ result.FullCommitName = line
+ result.BranchName = strings.Split(line, "~")[0]
+ i++
+ }
+ }()
+ go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath)
+ go func() {
+ defer wg.Done()
+ defer shasToNameWriter.Close()
+ for _, result := range results {
+ _, err := shasToNameWriter.Write([]byte(result.SHA))
+ if err != nil {
+ errChan <- err
+ break
+ }
+ _, err = shasToNameWriter.Write([]byte{'\n'})
+ if err != nil {
+ errChan <- err
+ break
+ }
+ }
+ }()
+
+ wg.Wait()
+
+ select {
+ case err, has := <-errChan:
+ if has {
+ return nil, lfsError("unable to obtain name for LFS files", err)
+ }
+ default:
+ }
+
+ return results, nil
+}
diff --git a/modules/git/pipeline/namerev.go b/modules/git/pipeline/namerev.go
new file mode 100644
index 00000000..ad583a74
--- /dev/null
+++ b/modules/git/pipeline/namerev.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package pipeline
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/git"
+)
+
+// NameRevStdin runs name-rev --stdin
+func NameRevStdin(ctx context.Context, shasToNameReader *io.PipeReader, nameRevStdinWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
+ defer wg.Done()
+ defer shasToNameReader.Close()
+ defer nameRevStdinWriter.Close()
+
+ stderr := new(bytes.Buffer)
+ var errbuf strings.Builder
+ if err := git.NewCommand(ctx, "name-rev", "--stdin", "--name-only", "--always").Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdout: nameRevStdinWriter,
+ Stdin: shasToNameReader,
+ Stderr: stderr,
+ }); err != nil {
+ _ = shasToNameReader.CloseWithError(fmt.Errorf("git name-rev [%s]: %w - %s", tmpBasePath, err, errbuf.String()))
+ }
+}
diff --git a/modules/git/pipeline/revlist.go b/modules/git/pipeline/revlist.go
new file mode 100644
index 00000000..d88ebe78
--- /dev/null
+++ b/modules/git/pipeline/revlist.go
@@ -0,0 +1,86 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package pipeline
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/log"
+)
+
+// RevListAllObjects runs rev-list --objects --all and writes to a pipewriter
+func RevListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) {
+ defer wg.Done()
+ defer revListWriter.Close()
+
+ stderr := new(bytes.Buffer)
+ var errbuf strings.Builder
+ cmd := git.NewCommand(ctx, "rev-list", "--objects", "--all")
+ if err := cmd.Run(&git.RunOpts{
+ Dir: basePath,
+ Stdout: revListWriter,
+ Stderr: stderr,
+ }); err != nil {
+ log.Error("git rev-list --objects --all [%s]: %v - %s", basePath, err, errbuf.String())
+ err = fmt.Errorf("git rev-list --objects --all [%s]: %w - %s", basePath, err, errbuf.String())
+ _ = revListWriter.CloseWithError(err)
+ errChan <- err
+ }
+}
+
+// RevListObjects run rev-list --objects from headSHA to baseSHA
+func RevListObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath, headSHA, baseSHA string, errChan chan<- error) {
+ defer wg.Done()
+ defer revListWriter.Close()
+ stderr := new(bytes.Buffer)
+ var errbuf strings.Builder
+ cmd := git.NewCommand(ctx, "rev-list", "--objects").AddDynamicArguments(headSHA)
+ if baseSHA != "" {
+ cmd = cmd.AddArguments("--not").AddDynamicArguments(baseSHA)
+ }
+ if err := cmd.Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdout: revListWriter,
+ Stderr: stderr,
+ }); err != nil {
+ log.Error("git rev-list [%s]: %v - %s", tmpBasePath, err, errbuf.String())
+ errChan <- fmt.Errorf("git rev-list [%s]: %w - %s", tmpBasePath, err, errbuf.String())
+ }
+}
+
+// BlobsFromRevListObjects reads a RevListAllObjects and only selects blobs
+func BlobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {
+ defer wg.Done()
+ defer revListReader.Close()
+ scanner := bufio.NewScanner(revListReader)
+ defer func() {
+ _ = shasToCheckWriter.CloseWithError(scanner.Err())
+ }()
+ for scanner.Scan() {
+ line := scanner.Text()
+ if len(line) == 0 {
+ continue
+ }
+ fields := strings.Split(line, " ")
+ if len(fields) < 2 || len(fields[1]) == 0 {
+ continue
+ }
+ toWrite := []byte(fields[0] + "\n")
+ for len(toWrite) > 0 {
+ n, err := shasToCheckWriter.Write(toWrite)
+ if err != nil {
+ _ = revListReader.CloseWithError(err)
+ break
+ }
+ toWrite = toWrite[n:]
+ }
+ }
+}