summaryrefslogtreecommitdiffstats
path: root/modules/git/foreachref
diff options
context:
space:
mode:
Diffstat (limited to 'modules/git/foreachref')
-rw-r--r--modules/git/foreachref/format.go83
-rw-r--r--modules/git/foreachref/format_test.go66
-rw-r--r--modules/git/foreachref/parser.go128
-rw-r--r--modules/git/foreachref/parser_test.go227
4 files changed, 504 insertions, 0 deletions
diff --git a/modules/git/foreachref/format.go b/modules/git/foreachref/format.go
new file mode 100644
index 00000000..97e8ee47
--- /dev/null
+++ b/modules/git/foreachref/format.go
@@ -0,0 +1,83 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package foreachref
+
+import (
+ "encoding/hex"
+ "fmt"
+ "io"
+ "strings"
+)
+
+var (
+ nullChar = []byte("\x00")
+ dualNullChar = []byte("\x00\x00")
+)
+
+// Format supports specifying and parsing an output format for 'git
+// for-each-ref'. See See git-for-each-ref(1) for available fields.
+type Format struct {
+ // fieldNames hold %(fieldname)s to be passed to the '--format' flag of
+ // for-each-ref. See git-for-each-ref(1) for available fields.
+ fieldNames []string
+
+ // fieldDelim is the character sequence that is used to separate fields
+ // for each reference. fieldDelim and refDelim should be selected to not
+ // interfere with each other and to not be present in field values.
+ fieldDelim []byte
+ // fieldDelimStr is a string representation of fieldDelim. Used to save
+ // us from repetitive reallocation whenever we need the delimiter as a
+ // string.
+ fieldDelimStr string
+ // refDelim is the character sequence used to separate reference from
+ // each other in the output. fieldDelim and refDelim should be selected
+ // to not interfere with each other and to not be present in field
+ // values.
+ refDelim []byte
+}
+
+// NewFormat creates a forEachRefFormat using the specified fieldNames. See
+// git-for-each-ref(1) for available fields.
+func NewFormat(fieldNames ...string) Format {
+ return Format{
+ fieldNames: fieldNames,
+ fieldDelim: nullChar,
+ fieldDelimStr: string(nullChar),
+ refDelim: dualNullChar,
+ }
+}
+
+// Flag returns a for-each-ref --format flag value that captures the fieldNames.
+func (f Format) Flag() string {
+ var formatFlag strings.Builder
+ for i, field := range f.fieldNames {
+ // field key and field value
+ formatFlag.WriteString(fmt.Sprintf("%s %%(%s)", field, field))
+
+ if i < len(f.fieldNames)-1 {
+ // note: escape delimiters to allow control characters as
+ // delimiters. For example, '%00' for null character or '%0a'
+ // for newline.
+ formatFlag.WriteString(f.hexEscaped(f.fieldDelim))
+ }
+ }
+ formatFlag.WriteString(f.hexEscaped(f.refDelim))
+ return formatFlag.String()
+}
+
+// Parser returns a Parser capable of parsing 'git for-each-ref' output produced
+// with this Format.
+func (f Format) Parser(r io.Reader) *Parser {
+ return NewParser(r, f)
+}
+
+// hexEscaped produces hex-escpaed characters from a string. For example, "\n\0"
+// would turn into "%0a%00".
+func (f Format) hexEscaped(delim []byte) string {
+ escaped := ""
+ for i := 0; i < len(delim); i++ {
+ escaped += "%" + hex.EncodeToString([]byte{delim[i]})
+ }
+ return escaped
+}
diff --git a/modules/git/foreachref/format_test.go b/modules/git/foreachref/format_test.go
new file mode 100644
index 00000000..8ff23932
--- /dev/null
+++ b/modules/git/foreachref/format_test.go
@@ -0,0 +1,66 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package foreachref_test
+
+import (
+ "testing"
+
+ "code.gitea.io/gitea/modules/git/foreachref"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestFormat_Flag(t *testing.T) {
+ tests := []struct {
+ name string
+
+ givenFormat foreachref.Format
+
+ wantFlag string
+ }{
+ {
+ name: "references are delimited by dual null chars",
+
+ // no reference fields requested
+ givenFormat: foreachref.NewFormat(),
+
+ // only a reference delimiter field in --format
+ wantFlag: "%00%00",
+ },
+
+ {
+ name: "a field is a space-separated key-value pair",
+
+ givenFormat: foreachref.NewFormat("refname:short"),
+
+ // only a reference delimiter field
+ wantFlag: "refname:short %(refname:short)%00%00",
+ },
+
+ {
+ name: "fields are separated by a null char field-delimiter",
+
+ givenFormat: foreachref.NewFormat("refname:short", "author"),
+
+ wantFlag: "refname:short %(refname:short)%00author %(author)%00%00",
+ },
+
+ {
+ name: "multiple fields",
+
+ givenFormat: foreachref.NewFormat("refname:lstrip=2", "objecttype", "objectname"),
+
+ wantFlag: "refname:lstrip=2 %(refname:lstrip=2)%00objecttype %(objecttype)%00objectname %(objectname)%00%00",
+ },
+ }
+
+ for _, test := range tests {
+ tc := test // don't close over loop variable
+ t.Run(tc.name, func(t *testing.T) {
+ gotFlag := tc.givenFormat.Flag()
+
+ require.Equal(t, tc.wantFlag, gotFlag, "unexpected for-each-ref --format string. wanted: '%s', got: '%s'", tc.wantFlag, gotFlag)
+ })
+ }
+}
diff --git a/modules/git/foreachref/parser.go b/modules/git/foreachref/parser.go
new file mode 100644
index 00000000..de69eaa2
--- /dev/null
+++ b/modules/git/foreachref/parser.go
@@ -0,0 +1,128 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package foreachref
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io"
+ "strings"
+)
+
+// Parser parses 'git for-each-ref' output according to a given output Format.
+type Parser struct {
+ // tokenizes 'git for-each-ref' output into "reference paragraphs".
+ scanner *bufio.Scanner
+
+ // format represents the '--format' string that describes the expected
+ // 'git for-each-ref' output structure.
+ format Format
+
+ // err holds the last encountered error during parsing.
+ err error
+}
+
+// NewParser creates a 'git for-each-ref' output parser that will parse all
+// references in the provided Reader. The references in the output are assumed
+// to follow the specified Format.
+func NewParser(r io.Reader, format Format) *Parser {
+ scanner := bufio.NewScanner(r)
+
+ // in addition to the reference delimiter we specified in the --format,
+ // `git for-each-ref` will always add a newline after every reference.
+ refDelim := make([]byte, 0, len(format.refDelim)+1)
+ refDelim = append(refDelim, format.refDelim...)
+ refDelim = append(refDelim, '\n')
+
+ // Split input into delimiter-separated "reference blocks".
+ scanner.Split(
+ func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+ // Scan until delimiter, marking end of reference.
+ delimIdx := bytes.Index(data, refDelim)
+ if delimIdx >= 0 {
+ token := data[:delimIdx]
+ advance := delimIdx + len(refDelim)
+ return advance, token, nil
+ }
+ // If we're at EOF, we have a final, non-terminated reference. Return it.
+ if atEOF {
+ return len(data), data, nil
+ }
+ // Not yet a full field. Request more data.
+ return 0, nil, nil
+ })
+
+ return &Parser{
+ scanner: scanner,
+ format: format,
+ err: nil,
+ }
+}
+
+// Next returns the next reference as a collection of key-value pairs. nil
+// denotes EOF but is also returned on errors. The Err method should always be
+// consulted after Next returning nil.
+//
+// It could, for example return something like:
+//
+// { "objecttype": "tag", "refname:short": "v1.16.4", "object": "f460b7543ed500e49c133c2cd85c8c55ee9dbe27" }
+func (p *Parser) Next() map[string]string {
+ if !p.scanner.Scan() {
+ return nil
+ }
+ fields, err := p.parseRef(p.scanner.Text())
+ if err != nil {
+ p.err = err
+ return nil
+ }
+ return fields
+}
+
+// Err returns the latest encountered parsing error.
+func (p *Parser) Err() error {
+ return p.err
+}
+
+// parseRef parses out all key-value pairs from a single reference block, such as
+//
+// "objecttype tag\0refname:short v1.16.4\0object f460b7543ed500e49c133c2cd85c8c55ee9dbe27"
+func (p *Parser) parseRef(refBlock string) (map[string]string, error) {
+ if refBlock == "" {
+ // must be at EOF
+ return nil, nil
+ }
+
+ fieldValues := make(map[string]string)
+
+ fields := strings.Split(refBlock, p.format.fieldDelimStr)
+ if len(fields) != len(p.format.fieldNames) {
+ return nil, fmt.Errorf("unexpected number of reference fields: wanted %d, was %d",
+ len(fields), len(p.format.fieldNames))
+ }
+ for i, field := range fields {
+ field = strings.TrimSpace(field)
+
+ var fieldKey string
+ var fieldVal string
+ firstSpace := strings.Index(field, " ")
+ if firstSpace > 0 {
+ fieldKey = field[:firstSpace]
+ fieldVal = field[firstSpace+1:]
+ } else {
+ // could be the case if the requested field had no value
+ fieldKey = field
+ }
+
+ // enforce the format order of fields
+ if p.format.fieldNames[i] != fieldKey {
+ return nil, fmt.Errorf("unexpected field name at position %d: wanted: '%s', was: '%s'",
+ i, p.format.fieldNames[i], fieldKey)
+ }
+
+ fieldValues[fieldKey] = fieldVal
+ }
+
+ return fieldValues, nil
+}
diff --git a/modules/git/foreachref/parser_test.go b/modules/git/foreachref/parser_test.go
new file mode 100644
index 00000000..7a37ced3
--- /dev/null
+++ b/modules/git/foreachref/parser_test.go
@@ -0,0 +1,227 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package foreachref_test
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "strings"
+ "testing"
+
+ "code.gitea.io/gitea/modules/git/foreachref"
+ "code.gitea.io/gitea/modules/json"
+
+ "github.com/stretchr/testify/require"
+)
+
+type refSlice = []map[string]string
+
+func TestParser(t *testing.T) {
+ tests := []struct {
+ name string
+
+ givenFormat foreachref.Format
+ givenInput io.Reader
+
+ wantRefs refSlice
+ wantErr bool
+ expectedErr error
+ }{
+ // this would, for example, be the result when running `git
+ // for-each-ref refs/tags` on a repo without tags.
+ {
+ name: "no references on empty input",
+
+ givenFormat: foreachref.NewFormat("refname:short"),
+ givenInput: strings.NewReader(``),
+
+ wantRefs: []map[string]string{},
+ },
+
+ // note: `git for-each-ref` will add a newline between every
+ // reference (in addition to the ref-delimiter we've chosen)
+ {
+ name: "single field requested, single reference in output",
+
+ givenFormat: foreachref.NewFormat("refname:short"),
+ givenInput: strings.NewReader("refname:short v0.0.1\x00\x00" + "\n"),
+
+ wantRefs: []map[string]string{
+ {"refname:short": "v0.0.1"},
+ },
+ },
+ {
+ name: "single field requested, multiple references in output",
+
+ givenFormat: foreachref.NewFormat("refname:short"),
+ givenInput: strings.NewReader(
+ "refname:short v0.0.1\x00\x00" + "\n" +
+ "refname:short v0.0.2\x00\x00" + "\n" +
+ "refname:short v0.0.3\x00\x00" + "\n"),
+
+ wantRefs: []map[string]string{
+ {"refname:short": "v0.0.1"},
+ {"refname:short": "v0.0.2"},
+ {"refname:short": "v0.0.3"},
+ },
+ },
+
+ {
+ name: "multiple fields requested for each reference",
+
+ givenFormat: foreachref.NewFormat("refname:short", "objecttype", "objectname"),
+ givenInput: strings.NewReader(
+
+ "refname:short v0.0.1\x00objecttype commit\x00objectname 7b2c5ac9fc04fc5efafb60700713d4fa609b777b\x00\x00" + "\n" +
+ "refname:short v0.0.2\x00objecttype commit\x00objectname a1f051bc3eba734da4772d60e2d677f47cf93ef4\x00\x00" + "\n" +
+ "refname:short v0.0.3\x00objecttype commit\x00objectname ef82de70bb3f60c65fb8eebacbb2d122ef517385\x00\x00" + "\n",
+ ),
+
+ wantRefs: []map[string]string{
+ {
+ "refname:short": "v0.0.1",
+ "objecttype": "commit",
+ "objectname": "7b2c5ac9fc04fc5efafb60700713d4fa609b777b",
+ },
+ {
+ "refname:short": "v0.0.2",
+ "objecttype": "commit",
+ "objectname": "a1f051bc3eba734da4772d60e2d677f47cf93ef4",
+ },
+ {
+ "refname:short": "v0.0.3",
+ "objecttype": "commit",
+ "objectname": "ef82de70bb3f60c65fb8eebacbb2d122ef517385",
+ },
+ },
+ },
+
+ {
+ name: "must handle multi-line fields such as 'content'",
+
+ givenFormat: foreachref.NewFormat("refname:short", "contents", "author"),
+ givenInput: strings.NewReader(
+ "refname:short v0.0.1\x00contents Create new buffer if not present yet (#549)\n\nFixes a nil dereference when ProcessFoo is used\nwith multiple commands.\x00author Foo Bar <foo@bar.com> 1507832733 +0200\x00\x00" + "\n" +
+ "refname:short v0.0.2\x00contents Update CI config (#651)\n\n\x00author John Doe <john.doe@foo.com> 1521643174 +0000\x00\x00" + "\n" +
+ "refname:short v0.0.3\x00contents Fixed code sample for bash completion (#687)\n\n\x00author Foo Baz <foo@baz.com> 1524836750 +0200\x00\x00" + "\n",
+ ),
+
+ wantRefs: []map[string]string{
+ {
+ "refname:short": "v0.0.1",
+ "contents": "Create new buffer if not present yet (#549)\n\nFixes a nil dereference when ProcessFoo is used\nwith multiple commands.",
+ "author": "Foo Bar <foo@bar.com> 1507832733 +0200",
+ },
+ {
+ "refname:short": "v0.0.2",
+ "contents": "Update CI config (#651)",
+ "author": "John Doe <john.doe@foo.com> 1521643174 +0000",
+ },
+ {
+ "refname:short": "v0.0.3",
+ "contents": "Fixed code sample for bash completion (#687)",
+ "author": "Foo Baz <foo@baz.com> 1524836750 +0200",
+ },
+ },
+ },
+
+ {
+ name: "must handle fields without values",
+
+ givenFormat: foreachref.NewFormat("refname:short", "object", "objecttype"),
+ givenInput: strings.NewReader(
+ "refname:short v0.0.1\x00object \x00objecttype commit\x00\x00" + "\n" +
+ "refname:short v0.0.2\x00object \x00objecttype commit\x00\x00" + "\n" +
+ "refname:short v0.0.3\x00object \x00objecttype commit\x00\x00" + "\n",
+ ),
+
+ wantRefs: []map[string]string{
+ {
+ "refname:short": "v0.0.1",
+ "object": "",
+ "objecttype": "commit",
+ },
+ {
+ "refname:short": "v0.0.2",
+ "object": "",
+ "objecttype": "commit",
+ },
+ {
+ "refname:short": "v0.0.3",
+ "object": "",
+ "objecttype": "commit",
+ },
+ },
+ },
+
+ {
+ name: "must fail when the number of fields in the input doesn't match expected format",
+
+ givenFormat: foreachref.NewFormat("refname:short", "objecttype", "objectname"),
+ givenInput: strings.NewReader(
+ "refname:short v0.0.1\x00objecttype commit\x00\x00" + "\n" +
+ "refname:short v0.0.2\x00objecttype commit\x00\x00" + "\n" +
+ "refname:short v0.0.3\x00objecttype commit\x00\x00" + "\n",
+ ),
+
+ wantErr: true,
+ expectedErr: errors.New("unexpected number of reference fields: wanted 2, was 3"),
+ },
+
+ {
+ name: "must fail input fields don't match expected format",
+
+ givenFormat: foreachref.NewFormat("refname:short", "objectname"),
+ givenInput: strings.NewReader(
+ "refname:short v0.0.1\x00objecttype commit\x00\x00" + "\n" +
+ "refname:short v0.0.2\x00objecttype commit\x00\x00" + "\n" +
+ "refname:short v0.0.3\x00objecttype commit\x00\x00" + "\n",
+ ),
+
+ wantErr: true,
+ expectedErr: errors.New("unexpected field name at position 1: wanted: 'objectname', was: 'objecttype'"),
+ },
+ }
+
+ for _, test := range tests {
+ tc := test // don't close over loop variable
+ t.Run(tc.name, func(t *testing.T) {
+ parser := tc.givenFormat.Parser(tc.givenInput)
+
+ //
+ // parse references from input
+ //
+ gotRefs := make([]map[string]string, 0)
+ for {
+ ref := parser.Next()
+ if ref == nil {
+ break
+ }
+ gotRefs = append(gotRefs, ref)
+ }
+ err := parser.Err()
+
+ //
+ // verify expectations
+ //
+ if tc.wantErr {
+ require.Error(t, err)
+ require.EqualError(t, err, tc.expectedErr.Error())
+ } else {
+ require.NoError(t, err, "for-each-ref parser unexpectedly failed with: %v", err)
+ require.Equal(t, tc.wantRefs, gotRefs, "for-each-ref parser produced unexpected reference set. wanted: %v, got: %v", pretty(tc.wantRefs), pretty(gotRefs))
+ }
+ })
+ }
+}
+
+func pretty(v any) string {
+ data, err := json.MarshalIndent(v, "", " ")
+ if err != nil {
+ // shouldn't happen
+ panic(fmt.Sprintf("json-marshalling failed: %v", err))
+ }
+ return string(data)
+}