summaryrefslogtreecommitdiffstats
path: root/src/go/pkg/matcher/glob.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/go/pkg/matcher/glob.go')
-rw-r--r--src/go/pkg/matcher/glob.go265
1 files changed, 265 insertions, 0 deletions
diff --git a/src/go/pkg/matcher/glob.go b/src/go/pkg/matcher/glob.go
new file mode 100644
index 000000000..75d977ff6
--- /dev/null
+++ b/src/go/pkg/matcher/glob.go
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package matcher
+
+import (
+ "errors"
+ "path/filepath"
+ "regexp"
+ "unicode/utf8"
+)
+
+// globMatcher implements Matcher, it uses filepath.MatchString to match.
+type globMatcher string
+
+var (
+ errBadGlobPattern = errors.New("bad glob pattern")
+ erGlobPattern = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+]|\\.|[^*?\\\[])*$`)
+)
+
+// NewGlobMatcher create a new matcher with glob format
+func NewGlobMatcher(expr string) (Matcher, error) {
+ switch expr {
+ case "":
+ return stringFullMatcher(""), nil
+ case "*":
+ return TRUE(), nil
+ }
+
+ // any strings pass this regexp check are valid pattern
+ if !erGlobPattern.MatchString(expr) {
+ return nil, errBadGlobPattern
+ }
+
+ size := len(expr)
+ chars := []rune(expr)
+ startWith := true
+ endWith := true
+ startIdx := 0
+ endIdx := size - 1
+ if chars[startIdx] == '*' {
+ startWith = false
+ startIdx = 1
+ }
+ if chars[endIdx] == '*' {
+ endWith = false
+ endIdx--
+ }
+
+ unescapedExpr := make([]rune, 0, endIdx-startIdx+1)
+ for i := startIdx; i <= endIdx; i++ {
+ ch := chars[i]
+ if ch == '\\' {
+ nextCh := chars[i+1]
+ unescapedExpr = append(unescapedExpr, nextCh)
+ i++
+ } else if isGlobMeta(ch) {
+ return globMatcher(expr), nil
+ } else {
+ unescapedExpr = append(unescapedExpr, ch)
+ }
+ }
+
+ return NewStringMatcher(string(unescapedExpr), startWith, endWith)
+}
+
+func isGlobMeta(ch rune) bool {
+ switch ch {
+ case '*', '?', '[':
+ return true
+ default:
+ return false
+ }
+}
+
+// Match matches.
+func (m globMatcher) Match(b []byte) bool {
+ return m.MatchString(string(b))
+}
+
+// MatchString matches.
+func (m globMatcher) MatchString(line string) bool {
+ rs, _ := m.globMatch(line)
+ return rs
+}
+
+func (m globMatcher) globMatch(name string) (matched bool, err error) {
+ pattern := string(m)
+Pattern:
+ for len(pattern) > 0 {
+ var star bool
+ var chunk string
+ star, chunk, pattern = scanChunk(pattern)
+ if star && chunk == "" {
+ // Trailing * matches rest of string unless it has a /.
+ // return !strings.Contains(name, string(Separator)), nil
+
+ return true, nil
+ }
+ // Look for match at current position.
+ t, ok, err := matchChunk(chunk, name)
+ // if we're the last chunk, make sure we've exhausted the name
+ // otherwise we'll give a false result even if we could still match
+ // using the star
+ if ok && (len(t) == 0 || len(pattern) > 0) {
+ name = t
+ continue
+ }
+ if err != nil {
+ return false, err
+ }
+ if star {
+ // Look for match skipping i+1 bytes.
+ // Cannot skip /.
+ for i := 0; i < len(name); i++ {
+ //for i := 0; i < len(name) && name[i] != Separator; i++ {
+ t, ok, err := matchChunk(chunk, name[i+1:])
+ if ok {
+ // if we're the last chunk, make sure we exhausted the name
+ if len(pattern) == 0 && len(t) > 0 {
+ continue
+ }
+ name = t
+ continue Pattern
+ }
+ if err != nil {
+ return false, err
+ }
+ }
+ }
+ return false, nil
+ }
+ return len(name) == 0, nil
+}
+
+// scanChunk gets the next segment of pattern, which is a non-star string
+// possibly preceded by a star.
+func scanChunk(pattern string) (star bool, chunk, rest string) {
+ for len(pattern) > 0 && pattern[0] == '*' {
+ pattern = pattern[1:]
+ star = true
+ }
+ inrange := false
+ var i int
+Scan:
+ for i = 0; i < len(pattern); i++ {
+ switch pattern[i] {
+ case '\\':
+ if i+1 < len(pattern) {
+ i++
+ }
+ case '[':
+ inrange = true
+ case ']':
+ inrange = false
+ case '*':
+ if !inrange {
+ break Scan
+ }
+ }
+ }
+ return star, pattern[0:i], pattern[i:]
+}
+
+// matchChunk checks whether chunk matches the beginning of s.
+// If so, it returns the remainder of s (after the match).
+// Chunk is all single-character operators: literals, char classes, and ?.
+func matchChunk(chunk, s string) (rest string, ok bool, err error) {
+ for len(chunk) > 0 {
+ if len(s) == 0 {
+ return
+ }
+ switch chunk[0] {
+ case '[':
+ // character class
+ r, n := utf8.DecodeRuneInString(s)
+ s = s[n:]
+ chunk = chunk[1:]
+ // We can't end right after '[', we're expecting at least
+ // a closing bracket and possibly a caret.
+ if len(chunk) == 0 {
+ err = filepath.ErrBadPattern
+ return
+ }
+ // possibly negated
+ negated := chunk[0] == '^'
+ if negated {
+ chunk = chunk[1:]
+ }
+ // parse all ranges
+ match := false
+ nrange := 0
+ for {
+ if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
+ chunk = chunk[1:]
+ break
+ }
+ var lo, hi rune
+ if lo, chunk, err = getEsc(chunk); err != nil {
+ return
+ }
+ hi = lo
+ if chunk[0] == '-' {
+ if hi, chunk, err = getEsc(chunk[1:]); err != nil {
+ return
+ }
+ }
+ if lo <= r && r <= hi {
+ match = true
+ }
+ nrange++
+ }
+ if match == negated {
+ return
+ }
+
+ case '?':
+ //if s[0] == Separator {
+ // return
+ //}
+ _, n := utf8.DecodeRuneInString(s)
+ s = s[n:]
+ chunk = chunk[1:]
+
+ case '\\':
+ chunk = chunk[1:]
+ if len(chunk) == 0 {
+ err = filepath.ErrBadPattern
+ return
+ }
+ fallthrough
+
+ default:
+ if chunk[0] != s[0] {
+ return
+ }
+ s = s[1:]
+ chunk = chunk[1:]
+ }
+ }
+ return s, true, nil
+}
+
+// getEsc gets a possibly-escaped character from chunk, for a character class.
+func getEsc(chunk string) (r rune, nchunk string, err error) {
+ if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
+ err = filepath.ErrBadPattern
+ return
+ }
+ if chunk[0] == '\\' {
+ chunk = chunk[1:]
+ if len(chunk) == 0 {
+ err = filepath.ErrBadPattern
+ return
+ }
+ }
+ r, n := utf8.DecodeRuneInString(chunk)
+ if r == utf8.RuneError && n == 1 {
+ err = filepath.ErrBadPattern
+ }
+ nchunk = chunk[n:]
+ if len(nchunk) == 0 {
+ err = filepath.ErrBadPattern
+ }
+ return
+}