diff options
Diffstat (limited to 'src/go/pkg/matcher/glob.go')
-rw-r--r-- | src/go/pkg/matcher/glob.go | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/src/go/pkg/matcher/glob.go b/src/go/pkg/matcher/glob.go new file mode 100644 index 000000000..75d977ff6 --- /dev/null +++ b/src/go/pkg/matcher/glob.go @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "errors" + "path/filepath" + "regexp" + "unicode/utf8" +) + +// globMatcher implements Matcher, it uses filepath.MatchString to match. +type globMatcher string + +var ( + errBadGlobPattern = errors.New("bad glob pattern") + erGlobPattern = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+]|\\.|[^*?\\\[])*$`) +) + +// NewGlobMatcher create a new matcher with glob format +func NewGlobMatcher(expr string) (Matcher, error) { + switch expr { + case "": + return stringFullMatcher(""), nil + case "*": + return TRUE(), nil + } + + // any strings pass this regexp check are valid pattern + if !erGlobPattern.MatchString(expr) { + return nil, errBadGlobPattern + } + + size := len(expr) + chars := []rune(expr) + startWith := true + endWith := true + startIdx := 0 + endIdx := size - 1 + if chars[startIdx] == '*' { + startWith = false + startIdx = 1 + } + if chars[endIdx] == '*' { + endWith = false + endIdx-- + } + + unescapedExpr := make([]rune, 0, endIdx-startIdx+1) + for i := startIdx; i <= endIdx; i++ { + ch := chars[i] + if ch == '\\' { + nextCh := chars[i+1] + unescapedExpr = append(unescapedExpr, nextCh) + i++ + } else if isGlobMeta(ch) { + return globMatcher(expr), nil + } else { + unescapedExpr = append(unescapedExpr, ch) + } + } + + return NewStringMatcher(string(unescapedExpr), startWith, endWith) +} + +func isGlobMeta(ch rune) bool { + switch ch { + case '*', '?', '[': + return true + default: + return false + } +} + +// Match matches. +func (m globMatcher) Match(b []byte) bool { + return m.MatchString(string(b)) +} + +// MatchString matches. +func (m globMatcher) MatchString(line string) bool { + rs, _ := m.globMatch(line) + return rs +} + +func (m globMatcher) globMatch(name string) (matched bool, err error) { + pattern := string(m) +Pattern: + for len(pattern) > 0 { + var star bool + var chunk string + star, chunk, pattern = scanChunk(pattern) + if star && chunk == "" { + // Trailing * matches rest of string unless it has a /. + // return !strings.Contains(name, string(Separator)), nil + + return true, nil + } + // Look for match at current position. + t, ok, err := matchChunk(chunk, name) + // if we're the last chunk, make sure we've exhausted the name + // otherwise we'll give a false result even if we could still match + // using the star + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t + continue + } + if err != nil { + return false, err + } + if star { + // Look for match skipping i+1 bytes. + // Cannot skip /. + for i := 0; i < len(name); i++ { + //for i := 0; i < len(name) && name[i] != Separator; i++ { + t, ok, err := matchChunk(chunk, name[i+1:]) + if ok { + // if we're the last chunk, make sure we exhausted the name + if len(pattern) == 0 && len(t) > 0 { + continue + } + name = t + continue Pattern + } + if err != nil { + return false, err + } + } + } + return false, nil + } + return len(name) == 0, nil +} + +// scanChunk gets the next segment of pattern, which is a non-star string +// possibly preceded by a star. +func scanChunk(pattern string) (star bool, chunk, rest string) { + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + star = true + } + inrange := false + var i int +Scan: + for i = 0; i < len(pattern); i++ { + switch pattern[i] { + case '\\': + if i+1 < len(pattern) { + i++ + } + case '[': + inrange = true + case ']': + inrange = false + case '*': + if !inrange { + break Scan + } + } + } + return star, pattern[0:i], pattern[i:] +} + +// matchChunk checks whether chunk matches the beginning of s. +// If so, it returns the remainder of s (after the match). +// Chunk is all single-character operators: literals, char classes, and ?. +func matchChunk(chunk, s string) (rest string, ok bool, err error) { + for len(chunk) > 0 { + if len(s) == 0 { + return + } + switch chunk[0] { + case '[': + // character class + r, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + // We can't end right after '[', we're expecting at least + // a closing bracket and possibly a caret. + if len(chunk) == 0 { + err = filepath.ErrBadPattern + return + } + // possibly negated + negated := chunk[0] == '^' + if negated { + chunk = chunk[1:] + } + // parse all ranges + match := false + nrange := 0 + for { + if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { + chunk = chunk[1:] + break + } + var lo, hi rune + if lo, chunk, err = getEsc(chunk); err != nil { + return + } + hi = lo + if chunk[0] == '-' { + if hi, chunk, err = getEsc(chunk[1:]); err != nil { + return + } + } + if lo <= r && r <= hi { + match = true + } + nrange++ + } + if match == negated { + return + } + + case '?': + //if s[0] == Separator { + // return + //} + _, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + + case '\\': + chunk = chunk[1:] + if len(chunk) == 0 { + err = filepath.ErrBadPattern + return + } + fallthrough + + default: + if chunk[0] != s[0] { + return + } + s = s[1:] + chunk = chunk[1:] + } + } + return s, true, nil +} + +// getEsc gets a possibly-escaped character from chunk, for a character class. +func getEsc(chunk string) (r rune, nchunk string, err error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = filepath.ErrBadPattern + return + } + if chunk[0] == '\\' { + chunk = chunk[1:] + if len(chunk) == 0 { + err = filepath.ErrBadPattern + return + } + } + r, n := utf8.DecodeRuneInString(chunk) + if r == utf8.RuneError && n == 1 { + err = filepath.ErrBadPattern + } + nchunk = chunk[n:] + if len(nchunk) == 0 { + err = filepath.ErrBadPattern + } + return +} |