diff options
Diffstat (limited to 'modules/git/pipeline')
-rw-r--r-- | modules/git/pipeline/catfile.go | 108 | ||||
-rw-r--r-- | modules/git/pipeline/lfs_common.go | 32 | ||||
-rw-r--r-- | modules/git/pipeline/lfs_gogit.go | 146 | ||||
-rw-r--r-- | modules/git/pipeline/lfs_nogogit.go | 230 | ||||
-rw-r--r-- | modules/git/pipeline/namerev.go | 33 | ||||
-rw-r--r-- | modules/git/pipeline/revlist.go | 86 |
6 files changed, 635 insertions, 0 deletions
diff --git a/modules/git/pipeline/catfile.go b/modules/git/pipeline/catfile.go new file mode 100644 index 00000000..46772181 --- /dev/null +++ b/modules/git/pipeline/catfile.go @@ -0,0 +1,108 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "strconv" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" +) + +// CatFileBatchCheck runs cat-file with --batch-check +func CatFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) { + defer wg.Done() + defer shasToCheckReader.Close() + defer catFileCheckWriter.Close() + + stderr := new(bytes.Buffer) + var errbuf strings.Builder + cmd := git.NewCommand(ctx, "cat-file", "--batch-check") + if err := cmd.Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdin: shasToCheckReader, + Stdout: catFileCheckWriter, + Stderr: stderr, + }); err != nil { + _ = catFileCheckWriter.CloseWithError(fmt.Errorf("git cat-file --batch-check [%s]: %w - %s", tmpBasePath, err, errbuf.String())) + } +} + +// CatFileBatchCheckAllObjects runs cat-file with --batch-check --batch-all +func CatFileBatchCheckAllObjects(ctx context.Context, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string, errChan chan<- error) { + defer wg.Done() + defer catFileCheckWriter.Close() + + stderr := new(bytes.Buffer) + var errbuf strings.Builder + cmd := git.NewCommand(ctx, "cat-file", "--batch-check", "--batch-all-objects") + if err := cmd.Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: catFileCheckWriter, + Stderr: stderr, + }); err != nil { + log.Error("git cat-file --batch-check --batch-all-object [%s]: %v - %s", tmpBasePath, err, errbuf.String()) + err = fmt.Errorf("git cat-file --batch-check --batch-all-object [%s]: %w - %s", tmpBasePath, err, errbuf.String()) + _ = catFileCheckWriter.CloseWithError(err) + errChan <- err + } +} + +// CatFileBatch runs cat-file --batch +func CatFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) { + defer wg.Done() + defer shasToBatchReader.Close() + defer catFileBatchWriter.Close() + + stderr := new(bytes.Buffer) + var errbuf strings.Builder + if err := git.NewCommand(ctx, "cat-file", "--batch").Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: catFileBatchWriter, + Stdin: shasToBatchReader, + Stderr: stderr, + }); err != nil { + _ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", tmpBasePath, err, errbuf.String())) + } +} + +// BlobsLessThan1024FromCatFileBatchCheck reads a pipeline from cat-file --batch-check and returns the blobs <1024 in size +func BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) { + defer wg.Done() + defer catFileCheckReader.Close() + scanner := bufio.NewScanner(catFileCheckReader) + defer func() { + _ = shasToBatchWriter.CloseWithError(scanner.Err()) + }() + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + fields := strings.Split(line, " ") + if len(fields) < 3 || fields[1] != "blob" { + continue + } + size, _ := strconv.Atoi(fields[2]) + if size > 1024 { + continue + } + toWrite := []byte(fields[0] + "\n") + for len(toWrite) > 0 { + n, err := shasToBatchWriter.Write(toWrite) + if err != nil { + _ = catFileCheckReader.CloseWithError(err) + break + } + toWrite = toWrite[n:] + } + } +} diff --git a/modules/git/pipeline/lfs_common.go b/modules/git/pipeline/lfs_common.go new file mode 100644 index 00000000..188e7d4d --- /dev/null +++ b/modules/git/pipeline/lfs_common.go @@ -0,0 +1,32 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "fmt" + "time" + + "code.gitea.io/gitea/modules/git" +) + +// LFSResult represents commits found using a provided pointer file hash +type LFSResult struct { + Name string + SHA string + Summary string + When time.Time + ParentHashes []git.ObjectID + BranchName string + FullCommitName string +} + +type lfsResultSlice []*LFSResult + +func (a lfsResultSlice) Len() int { return len(a) } +func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) } + +func lfsError(msg string, err error) error { + return fmt.Errorf("LFS error occurred, %s: err: %w", msg, err) +} diff --git a/modules/git/pipeline/lfs_gogit.go b/modules/git/pipeline/lfs_gogit.go new file mode 100644 index 00000000..adcf8ed0 --- /dev/null +++ b/modules/git/pipeline/lfs_gogit.go @@ -0,0 +1,146 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +//go:build gogit + +package pipeline + +import ( + "bufio" + "io" + "sort" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" +) + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + gogitRepo := repo.GoGitRepo() + + commitsIter, err := gogitRepo.Log(&gogit.LogOptions{ + Order: gogit.LogOrderCommitterTime, + All: true, + }) + if err != nil { + return nil, lfsError("failed to get GoGit CommitsIter", err) + } + + err = commitsIter.ForEach(func(gitCommit *object.Commit) error { + tree, err := gitCommit.Tree() + if err != nil { + return err + } + treeWalker := object.NewTreeWalker(tree, true, nil) + defer treeWalker.Close() + for { + name, entry, err := treeWalker.Next() + if err == io.EOF { + break + } + if entry.Hash == plumbing.Hash(objectID.RawValue()) { + parents := make([]git.ObjectID, len(gitCommit.ParentHashes)) + for i, parentCommitID := range gitCommit.ParentHashes { + parents[i] = git.ParseGogitHash(parentCommitID) + } + + result := LFSResult{ + Name: name, + SHA: gitCommit.Hash.String(), + Summary: strings.Split(strings.TrimSpace(gitCommit.Message), "\n")[0], + When: gitCommit.Author.When, + ParentHashes: parents, + } + resultsMap[gitCommit.Hash.String()+":"+name] = &result + } + } + return nil + }) + if err != nil && err != io.EOF { + return nil, lfsError("failure in CommitIter.ForEach", err) + } + + for _, result := range resultsMap { + hasParent := false + for _, parentHash := range result.ParentHashes { + if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + i := 0 + if i < len(result.SHA) { + n, err := shasToNameWriter.Write([]byte(result.SHA)[i:]) + if err != nil { + errChan <- err + break + } + i += n + } + n := 0 + for n < 1 { + n, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + + } + + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, lfsError("unable to obtain name for LFS files", err) + } + default: + } + + return results, nil +} diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go new file mode 100644 index 00000000..349cfbd9 --- /dev/null +++ b/modules/git/pipeline/lfs_nogogit.go @@ -0,0 +1,230 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +//go:build !gogit + +package pipeline + +import ( + "bufio" + "bytes" + "io" + "sort" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" +) + +// FindLFSFile finds commits that contain a provided pointer file hash +func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) { + resultsMap := map[string]*LFSResult{} + results := make([]*LFSResult, 0) + + basePath := repo.Path + + // Use rev-list to provide us with all commits in order + revListReader, revListWriter := io.Pipe() + defer func() { + _ = revListWriter.Close() + _ = revListReader.Close() + }() + + go func() { + stderr := strings.Builder{} + err := git.NewCommand(repo.Ctx, "rev-list", "--all").Run(&git.RunOpts{ + Dir: repo.Path, + Stdout: revListWriter, + Stderr: &stderr, + }) + if err != nil { + _ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String())) + } else { + _ = revListWriter.Close() + } + }() + + // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. + // so let's create a batch stdin and stdout + batchStdinWriter, batchReader, cancel := repo.CatFileBatch(repo.Ctx) + defer cancel() + + // We'll use a scanner for the revList because it's simpler than a bufio.Reader + scan := bufio.NewScanner(revListReader) + trees := [][]byte{} + paths := []string{} + + fnameBuf := make([]byte, 4096) + modeBuf := make([]byte, 40) + workingShaBuf := make([]byte, objectID.Type().FullLength()/2) + + for scan.Scan() { + // Get the next commit ID + commitID := scan.Bytes() + + // push the commit to the cat-file --batch process + _, err := batchStdinWriter.Write(commitID) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte{'\n'}) + if err != nil { + return nil, err + } + + var curCommit *git.Commit + curPath := "" + + commitReadingLoop: + for { + _, typ, size, err := git.ReadBatchLine(batchReader) + if err != nil { + return nil, err + } + + switch typ { + case "tag": + // This shouldn't happen but if it does well just get the commit and try again + id, err := git.ReadTagObjectID(batchReader, size) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte(id + "\n")) + if err != nil { + return nil, err + } + continue + case "commit": + // Read in the commit to get its tree and in case this is one of the last used commits + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + if err != nil { + return nil, err + } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } + + if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { + return nil, err + } + curPath = "" + case "tree": + var n int64 + for n < size { + mode, fname, binObjectID, count, err := git.ParseTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) + if err != nil { + return nil, err + } + n += int64(count) + if bytes.Equal(binObjectID, objectID.RawValue()) { + result := LFSResult{ + Name: curPath + string(fname), + SHA: curCommit.ID.String(), + Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0], + When: curCommit.Author.When, + ParentHashes: curCommit.Parents, + } + resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result + } else if string(mode) == git.EntryModeTree.String() { + hexObjectID := make([]byte, objectID.Type().FullLength()) + git.BinToHex(objectID.Type(), binObjectID, hexObjectID) + trees = append(trees, hexObjectID) + paths = append(paths, curPath+string(fname)+"/") + } + } + if _, err := batchReader.Discard(1); err != nil { + return nil, err + } + if len(trees) > 0 { + _, err := batchStdinWriter.Write(trees[len(trees)-1]) + if err != nil { + return nil, err + } + _, err = batchStdinWriter.Write([]byte("\n")) + if err != nil { + return nil, err + } + curPath = paths[len(paths)-1] + trees = trees[:len(trees)-1] + paths = paths[:len(paths)-1] + } else { + break commitReadingLoop + } + default: + if err := git.DiscardFull(batchReader, size+1); err != nil { + return nil, err + } + } + } + } + + if err := scan.Err(); err != nil { + return nil, err + } + + for _, result := range resultsMap { + hasParent := false + for _, parentID := range result.ParentHashes { + if _, hasParent = resultsMap[parentID.String()+":"+result.Name]; hasParent { + break + } + } + if !hasParent { + results = append(results, result) + } + } + + sort.Sort(lfsResultSlice(results)) + + // Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple + shasToNameReader, shasToNameWriter := io.Pipe() + nameRevStdinReader, nameRevStdinWriter := io.Pipe() + errChan := make(chan error, 1) + wg := sync.WaitGroup{} + wg.Add(3) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(nameRevStdinReader) + i := 0 + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + result := results[i] + result.FullCommitName = line + result.BranchName = strings.Split(line, "~")[0] + i++ + } + }() + go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath) + go func() { + defer wg.Done() + defer shasToNameWriter.Close() + for _, result := range results { + _, err := shasToNameWriter.Write([]byte(result.SHA)) + if err != nil { + errChan <- err + break + } + _, err = shasToNameWriter.Write([]byte{'\n'}) + if err != nil { + errChan <- err + break + } + } + }() + + wg.Wait() + + select { + case err, has := <-errChan: + if has { + return nil, lfsError("unable to obtain name for LFS files", err) + } + default: + } + + return results, nil +} diff --git a/modules/git/pipeline/namerev.go b/modules/git/pipeline/namerev.go new file mode 100644 index 00000000..ad583a74 --- /dev/null +++ b/modules/git/pipeline/namerev.go @@ -0,0 +1,33 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "bytes" + "context" + "fmt" + "io" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" +) + +// NameRevStdin runs name-rev --stdin +func NameRevStdin(ctx context.Context, shasToNameReader *io.PipeReader, nameRevStdinWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) { + defer wg.Done() + defer shasToNameReader.Close() + defer nameRevStdinWriter.Close() + + stderr := new(bytes.Buffer) + var errbuf strings.Builder + if err := git.NewCommand(ctx, "name-rev", "--stdin", "--name-only", "--always").Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: nameRevStdinWriter, + Stdin: shasToNameReader, + Stderr: stderr, + }); err != nil { + _ = shasToNameReader.CloseWithError(fmt.Errorf("git name-rev [%s]: %w - %s", tmpBasePath, err, errbuf.String())) + } +} diff --git a/modules/git/pipeline/revlist.go b/modules/git/pipeline/revlist.go new file mode 100644 index 00000000..d88ebe78 --- /dev/null +++ b/modules/git/pipeline/revlist.go @@ -0,0 +1,86 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" +) + +// RevListAllObjects runs rev-list --objects --all and writes to a pipewriter +func RevListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) { + defer wg.Done() + defer revListWriter.Close() + + stderr := new(bytes.Buffer) + var errbuf strings.Builder + cmd := git.NewCommand(ctx, "rev-list", "--objects", "--all") + if err := cmd.Run(&git.RunOpts{ + Dir: basePath, + Stdout: revListWriter, + Stderr: stderr, + }); err != nil { + log.Error("git rev-list --objects --all [%s]: %v - %s", basePath, err, errbuf.String()) + err = fmt.Errorf("git rev-list --objects --all [%s]: %w - %s", basePath, err, errbuf.String()) + _ = revListWriter.CloseWithError(err) + errChan <- err + } +} + +// RevListObjects run rev-list --objects from headSHA to baseSHA +func RevListObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath, headSHA, baseSHA string, errChan chan<- error) { + defer wg.Done() + defer revListWriter.Close() + stderr := new(bytes.Buffer) + var errbuf strings.Builder + cmd := git.NewCommand(ctx, "rev-list", "--objects").AddDynamicArguments(headSHA) + if baseSHA != "" { + cmd = cmd.AddArguments("--not").AddDynamicArguments(baseSHA) + } + if err := cmd.Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: revListWriter, + Stderr: stderr, + }); err != nil { + log.Error("git rev-list [%s]: %v - %s", tmpBasePath, err, errbuf.String()) + errChan <- fmt.Errorf("git rev-list [%s]: %w - %s", tmpBasePath, err, errbuf.String()) + } +} + +// BlobsFromRevListObjects reads a RevListAllObjects and only selects blobs +func BlobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) { + defer wg.Done() + defer revListReader.Close() + scanner := bufio.NewScanner(revListReader) + defer func() { + _ = shasToCheckWriter.CloseWithError(scanner.Err()) + }() + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 { + continue + } + fields := strings.Split(line, " ") + if len(fields) < 2 || len(fields[1]) == 0 { + continue + } + toWrite := []byte(fields[0] + "\n") + for len(toWrite) > 0 { + n, err := shasToCheckWriter.Write(toWrite) + if err != nil { + _ = revListReader.CloseWithError(err) + break + } + toWrite = toWrite[n:] + } + } +} |