diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-11-25 14:45:37 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-11-25 14:48:03 +0000 |
commit | e55403ed71282d7bfd8b56df219de3c28a8af064 (patch) | |
tree | 524889e5becb81643bf8741e3082955dca076f09 /src/go/pkg/matcher | |
parent | Releasing debian version 1.47.5-1. (diff) | |
download | netdata-e55403ed71282d7bfd8b56df219de3c28a8af064.tar.xz netdata-e55403ed71282d7bfd8b56df219de3c28a8af064.zip |
Merging upstream version 2.0.3+dfsg:
- does not include dygraphs anymore (Closes: #923993)
- does not include pako anymore (Closes: #1042533)
- does not include dashboard binaries anymore (Closes: #1045145)
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/go/pkg/matcher')
-rw-r--r-- | src/go/pkg/matcher/README.md | 134 | ||||
-rw-r--r-- | src/go/pkg/matcher/cache.go | 56 | ||||
-rw-r--r-- | src/go/pkg/matcher/cache_test.go | 53 | ||||
-rw-r--r-- | src/go/pkg/matcher/doc.go | 40 | ||||
-rw-r--r-- | src/go/pkg/matcher/doc_test.go | 51 | ||||
-rw-r--r-- | src/go/pkg/matcher/expr.go | 62 | ||||
-rw-r--r-- | src/go/pkg/matcher/expr_test.go | 100 | ||||
-rw-r--r-- | src/go/pkg/matcher/glob.go | 265 | ||||
-rw-r--r-- | src/go/pkg/matcher/glob_test.go | 97 | ||||
-rw-r--r-- | src/go/pkg/matcher/logical.go | 101 | ||||
-rw-r--r-- | src/go/pkg/matcher/logical_test.go | 97 | ||||
-rw-r--r-- | src/go/pkg/matcher/matcher.go | 149 | ||||
-rw-r--r-- | src/go/pkg/matcher/matcher_test.go | 122 | ||||
-rw-r--r-- | src/go/pkg/matcher/regexp.go | 60 | ||||
-rw-r--r-- | src/go/pkg/matcher/regexp_test.go | 66 | ||||
-rw-r--r-- | src/go/pkg/matcher/simple_patterns.go | 65 | ||||
-rw-r--r-- | src/go/pkg/matcher/simple_patterns_test.go | 88 | ||||
-rw-r--r-- | src/go/pkg/matcher/string.go | 48 | ||||
-rw-r--r-- | src/go/pkg/matcher/string_test.go | 62 |
19 files changed, 1716 insertions, 0 deletions
diff --git a/src/go/pkg/matcher/README.md b/src/go/pkg/matcher/README.md new file mode 100644 index 000000000..a4428b581 --- /dev/null +++ b/src/go/pkg/matcher/README.md @@ -0,0 +1,134 @@ +# matcher +## Supported Format + +* string +* glob +* regexp +* simple patterns + +Depending on the symbol at the start of the string, the `matcher` will use one of the supported formats. + +| matcher | short format | long format | +|-----------------|--------------|-------------------| +| string | ` =` | `string` | +| glob | `*` | `glob` | +| regexp | `~` | `regexp` | +| simple patterns | | `simple_patterns` | + +Example: + +- `* pattern`: It will use the `glob` matcher to find the `pattern` in the string. + +### Syntax + +**Tip**: Read `::=` as `is defined as`. + +``` +Short Syntax + [ <not> ] <format> <space> <expr> + + <not> ::= '!' + negative expression + <format> ::= [ '=', '~', '*' ] + '=' means string match + '~' means regexp match + '*' means glob match + <space> ::= { ' ' | '\t' | '\n' | '\n' | '\r' } + <expr> ::= any string + + Long Syntax + [ <not> ] <format> <separator> <expr> + + <format> ::= [ 'string' | 'glob' | 'regexp' | 'simple_patterns' ] + <not> ::= '!' + negative expression + <separator> ::= ':' + <expr> ::= any string +``` + +When using the short syntax, you can enable the glob format by starting the string with a `*`, while in the long syntax +you need to define it more explicitly. The following examples are identical. `simple_patterns` can be used **only** with +the long syntax. + +Examples: + +- Short Syntax: `'* * '` +- Long Syntax: `'glob:*'` + +### String matcher + +The string matcher reports whether the given value equals to the string. + +Examples: + +- `'= foo'` matches only if the string is `foo`. +- `'!= bar'` matches any string that is not `bar`. + +String matcher means **exact match** of the `string`. There are other string match related cases: + +- string has prefix `something` +- string has suffix `something` +- string contains `something` + +This is achievable using the `glob` matcher: + +- `* PREFIX*`, means that it matches with any string that *starts* with `PREFIX`, e.g `PREFIXnetdata` +- `* *SUFFIX`, means that it matches with any string that *ends* with `SUFFIX`, e.g `netdataSUFFIX` +- `* *SUBSTRING*`, means that it matches with any string that *contains* `SUBSTRING`, e.g `netdataSUBSTRINGnetdata` + +### Glob matcher + +The glob matcher reports whether the given value matches the wildcard pattern. It uses the standard `golang` +library `path`. You can read more about the library in the [golang documentation](https://golang.org/pkg/path/#Match), +where you can also practice with the library in order to learn the syntax and use it in your Netdata configuration. + +The pattern syntax is: + +``` + pattern: + { term } + term: + '*' matches any sequence of characters + '?' matches any single character + '[' [ '^' ] { character-range } ']' + character class (must be non-empty) + c matches character c (c != '*', '?', '\\', '[') + '\\' c matches character c + + character-range: + c matches character c (c != '\\', '-', ']') + '\\' c matches character c + lo '-' hi matches character c for lo <= c <= hi +``` + +Examples: + +- `* ?` matches any string that is a single character. +- `'?a'` matches any 2 character string that starts with any character and the second character is `a`, like `ba` but + not `bb` or `bba`. +- `'[^abc]'` matches any character that is NOT a,b,c. `'[abc]'` matches only a, b, c. +- `'*[a-d]'` matches any string (`*`) that ends with a character that is between `a` and `d` (i.e `a,b,c,d`). + +### Regexp matcher + +The regexp matcher reports whether the given value matches the RegExp pattern ( use regexp.Match ). + +The RegExp syntax is described at https://golang.org/pkg/regexp/syntax/. + +Learn more about regular expressions at [RegexOne](https://regexone.com/). + +### Simple patterns matcher + +The simple patterns matcher reports whether the given value matches the simple patterns. + +Simple patterns are a space separated list of words. Each word may use any number of wildcards `*`. Simple patterns +allow negative matches by prefixing a word with `!`. + +Examples: + +- `!*bad* *` matches anything, except all those that contain the word bad. +- `*foobar* !foo* !*bar *` matches everything containing foobar, except strings that start with foo or end with bar. + + + + diff --git a/src/go/pkg/matcher/cache.go b/src/go/pkg/matcher/cache.go new file mode 100644 index 000000000..4594fa06f --- /dev/null +++ b/src/go/pkg/matcher/cache.go @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import "sync" + +type ( + cachedMatcher struct { + matcher Matcher + + mux sync.RWMutex + cache map[string]bool + } +) + +// WithCache adds cache to the matcher. +func WithCache(m Matcher) Matcher { + switch m { + case TRUE(), FALSE(): + return m + default: + return &cachedMatcher{matcher: m, cache: make(map[string]bool)} + } +} + +func (m *cachedMatcher) Match(b []byte) bool { + s := string(b) + if result, ok := m.fetch(s); ok { + return result + } + result := m.matcher.Match(b) + m.put(s, result) + return result +} + +func (m *cachedMatcher) MatchString(s string) bool { + if result, ok := m.fetch(s); ok { + return result + } + result := m.matcher.MatchString(s) + m.put(s, result) + return result +} + +func (m *cachedMatcher) fetch(key string) (result bool, ok bool) { + m.mux.RLock() + result, ok = m.cache[key] + m.mux.RUnlock() + return +} + +func (m *cachedMatcher) put(key string, result bool) { + m.mux.Lock() + m.cache[key] = result + m.mux.Unlock() +} diff --git a/src/go/pkg/matcher/cache_test.go b/src/go/pkg/matcher/cache_test.go new file mode 100644 index 000000000..a545777b3 --- /dev/null +++ b/src/go/pkg/matcher/cache_test.go @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWithCache(t *testing.T) { + regMatcher, _ := NewRegExpMatcher("[0-9]+") + cached := WithCache(regMatcher) + + assert.True(t, cached.MatchString("1")) + assert.True(t, cached.MatchString("1")) + assert.True(t, cached.Match([]byte("2"))) + assert.True(t, cached.Match([]byte("2"))) +} + +func TestWithCache_specialCase(t *testing.T) { + assert.Equal(t, TRUE(), WithCache(TRUE())) + assert.Equal(t, FALSE(), WithCache(FALSE())) +} +func BenchmarkCachedMatcher_MatchString_cache_hit(b *testing.B) { + benchmarks := []struct { + name string + expr string + target string + }{ + {"stringFullMatcher", "= abc123", "abc123"}, + {"stringPrefixMatcher", "~ ^abc123", "abc123456"}, + {"stringSuffixMatcher", "~ abc123$", "hello abc123"}, + {"stringSuffixMatcher", "~ abc123", "hello abc123 world"}, + {"globMatcher", "* abc*def", "abc12345678def"}, + {"regexp", "~ [0-9]+", "1234567890"}, + } + for _, bm := range benchmarks { + m := Must(Parse(bm.expr)) + b.Run(bm.name+"_raw", func(b *testing.B) { + for i := 0; i < b.N; i++ { + m.MatchString(bm.target) + } + }) + b.Run(bm.name+"_cache", func(b *testing.B) { + cached := WithCache(m) + b.ResetTimer() + for i := 0; i < b.N; i++ { + cached.MatchString(bm.target) + } + }) + } +} diff --git a/src/go/pkg/matcher/doc.go b/src/go/pkg/matcher/doc.go new file mode 100644 index 000000000..33b06988d --- /dev/null +++ b/src/go/pkg/matcher/doc.go @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +/* +Package matcher implements vary formats of string matcher. + +Supported Format + + string + glob + regexp + simple patterns + +The string matcher reports whether the given value equals to the string ( use == ). + +The glob matcher reports whether the given value matches the wildcard pattern. +The pattern syntax is: + + pattern: + { term } + term: + '*' matches any sequence of characters + '?' matches any single character + '[' [ '^' ] { character-range } ']' + character class (must be non-empty) + c matches character c (c != '*', '?', '\\', '[') + '\\' c matches character c + + character-range: + c matches character c (c != '\\', '-', ']') + '\\' c matches character c + lo '-' hi matches character c for lo <= c <= hi + +The regexp matcher reports whether the given value matches the RegExp pattern ( use regexp.Match ). +The RegExp syntax is described at https://golang.org/pkg/regexp/syntax/. + +The simple patterns matcher reports whether the given value matches the simple patterns. +The simple patterns is a custom format used in netdata, +it's syntax is described at https://docs.netdata.cloud/libnetdata/simple_pattern/. +*/ +package matcher diff --git a/src/go/pkg/matcher/doc_test.go b/src/go/pkg/matcher/doc_test.go new file mode 100644 index 000000000..46c7467ac --- /dev/null +++ b/src/go/pkg/matcher/doc_test.go @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher_test + +import ( + "github.com/netdata/netdata/go/plugins/pkg/matcher" +) + +func ExampleNew_string_format() { + // create a string matcher, which perform full text match + m, err := matcher.New(matcher.FmtString, "hello") + if err != nil { + panic(err) + } + m.MatchString("hello") // => true + m.MatchString("hello world") // => false +} + +func ExampleNew_glob_format() { + // create a glob matcher, which perform wildcard match + m, err := matcher.New(matcher.FmtString, "hello*") + if err != nil { + panic(err) + } + m.MatchString("hello") // => true + m.MatchString("hello world") // => true + m.MatchString("Hello world") // => false +} + +func ExampleNew_simple_patterns_format() { + // create a simple patterns matcher, which perform wildcard match + m, err := matcher.New(matcher.FmtSimplePattern, "hello* !*world *") + if err != nil { + panic(err) + } + m.MatchString("hello") // => true + m.MatchString("hello world") // => true + m.MatchString("Hello world") // => false + m.MatchString("Hello world!") // => false +} + +func ExampleNew_regexp_format() { + // create a regexp matcher, which perform wildcard match + m, err := matcher.New(matcher.FmtRegExp, "[0-9]+") + if err != nil { + panic(err) + } + m.MatchString("1") // => true + m.MatchString("1a") // => true + m.MatchString("a") // => false +} diff --git a/src/go/pkg/matcher/expr.go b/src/go/pkg/matcher/expr.go new file mode 100644 index 000000000..e5ea0cb2e --- /dev/null +++ b/src/go/pkg/matcher/expr.go @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "errors" + "fmt" +) + +type ( + Expr interface { + Parse() (Matcher, error) + } + + // SimpleExpr is a simple expression to describe the condition: + // (includes[0].Match(v) || includes[1].Match(v) || ...) && !(excludes[0].Match(v) || excludes[1].Match(v) || ...) + SimpleExpr struct { + Includes []string `yaml:"includes,omitempty" json:"includes"` + Excludes []string `yaml:"excludes,omitempty" json:"excludes"` + } +) + +var ( + ErrEmptyExpr = errors.New("empty expression") +) + +// Empty returns true if both Includes and Excludes are empty. You can't +func (s *SimpleExpr) Empty() bool { + return len(s.Includes) == 0 && len(s.Excludes) == 0 +} + +// Parse parses the given matchers in Includes and Excludes +func (s *SimpleExpr) Parse() (Matcher, error) { + if len(s.Includes) == 0 && len(s.Excludes) == 0 { + return nil, ErrEmptyExpr + } + var ( + includes = FALSE() + excludes = FALSE() + ) + if len(s.Includes) > 0 { + for _, item := range s.Includes { + m, err := Parse(item) + if err != nil { + return nil, fmt.Errorf("parse matcher %q error: %v", item, err) + } + includes = Or(includes, m) + } + } else { + includes = TRUE() + } + + for _, item := range s.Excludes { + m, err := Parse(item) + if err != nil { + return nil, fmt.Errorf("parse matcher %q error: %v", item, err) + } + excludes = Or(excludes, m) + } + + return And(includes, Not(excludes)), nil +} diff --git a/src/go/pkg/matcher/expr_test.go b/src/go/pkg/matcher/expr_test.go new file mode 100644 index 000000000..93a183226 --- /dev/null +++ b/src/go/pkg/matcher/expr_test.go @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSimpleExpr_none(t *testing.T) { + expr := &SimpleExpr{} + + m, err := expr.Parse() + assert.EqualError(t, err, ErrEmptyExpr.Error()) + assert.Nil(t, m) +} + +func TestSimpleExpr_include(t *testing.T) { + expr := &SimpleExpr{ + Includes: []string{ + "~ /api/", + "~ .php$", + }, + } + + m, err := expr.Parse() + assert.NoError(t, err) + + assert.True(t, m.MatchString("/api/a.php")) + assert.True(t, m.MatchString("/api/a.php2")) + assert.True(t, m.MatchString("/api2/a.php")) + assert.True(t, m.MatchString("/api/img.php")) + assert.False(t, m.MatchString("/api2/img.php2")) +} + +func TestSimpleExpr_exclude(t *testing.T) { + expr := &SimpleExpr{ + Excludes: []string{ + "~ /api/img", + }, + } + + m, err := expr.Parse() + assert.NoError(t, err) + + assert.True(t, m.MatchString("/api/a.php")) + assert.True(t, m.MatchString("/api/a.php2")) + assert.True(t, m.MatchString("/api2/a.php")) + assert.False(t, m.MatchString("/api/img.php")) + assert.True(t, m.MatchString("/api2/img.php2")) +} + +func TestSimpleExpr_both(t *testing.T) { + expr := &SimpleExpr{ + Includes: []string{ + "~ /api/", + "~ .php$", + }, + Excludes: []string{ + "~ /api/img", + }, + } + + m, err := expr.Parse() + assert.NoError(t, err) + + assert.True(t, m.MatchString("/api/a.php")) + assert.True(t, m.MatchString("/api/a.php2")) + assert.True(t, m.MatchString("/api2/a.php")) + assert.False(t, m.MatchString("/api/img.php")) + assert.False(t, m.MatchString("/api2/img.php2")) +} + +func TestSimpleExpr_Parse_NG(t *testing.T) { + { + expr := &SimpleExpr{ + Includes: []string{ + "~ (ab", + "~ .php$", + }, + } + + m, err := expr.Parse() + assert.Error(t, err) + assert.Nil(t, m) + } + { + expr := &SimpleExpr{ + Excludes: []string{ + "~ (ab", + "~ .php$", + }, + } + + m, err := expr.Parse() + assert.Error(t, err) + assert.Nil(t, m) + } +} diff --git a/src/go/pkg/matcher/glob.go b/src/go/pkg/matcher/glob.go new file mode 100644 index 000000000..75d977ff6 --- /dev/null +++ b/src/go/pkg/matcher/glob.go @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "errors" + "path/filepath" + "regexp" + "unicode/utf8" +) + +// globMatcher implements Matcher, it uses filepath.MatchString to match. +type globMatcher string + +var ( + errBadGlobPattern = errors.New("bad glob pattern") + erGlobPattern = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+]|\\.|[^*?\\\[])*$`) +) + +// NewGlobMatcher create a new matcher with glob format +func NewGlobMatcher(expr string) (Matcher, error) { + switch expr { + case "": + return stringFullMatcher(""), nil + case "*": + return TRUE(), nil + } + + // any strings pass this regexp check are valid pattern + if !erGlobPattern.MatchString(expr) { + return nil, errBadGlobPattern + } + + size := len(expr) + chars := []rune(expr) + startWith := true + endWith := true + startIdx := 0 + endIdx := size - 1 + if chars[startIdx] == '*' { + startWith = false + startIdx = 1 + } + if chars[endIdx] == '*' { + endWith = false + endIdx-- + } + + unescapedExpr := make([]rune, 0, endIdx-startIdx+1) + for i := startIdx; i <= endIdx; i++ { + ch := chars[i] + if ch == '\\' { + nextCh := chars[i+1] + unescapedExpr = append(unescapedExpr, nextCh) + i++ + } else if isGlobMeta(ch) { + return globMatcher(expr), nil + } else { + unescapedExpr = append(unescapedExpr, ch) + } + } + + return NewStringMatcher(string(unescapedExpr), startWith, endWith) +} + +func isGlobMeta(ch rune) bool { + switch ch { + case '*', '?', '[': + return true + default: + return false + } +} + +// Match matches. +func (m globMatcher) Match(b []byte) bool { + return m.MatchString(string(b)) +} + +// MatchString matches. +func (m globMatcher) MatchString(line string) bool { + rs, _ := m.globMatch(line) + return rs +} + +func (m globMatcher) globMatch(name string) (matched bool, err error) { + pattern := string(m) +Pattern: + for len(pattern) > 0 { + var star bool + var chunk string + star, chunk, pattern = scanChunk(pattern) + if star && chunk == "" { + // Trailing * matches rest of string unless it has a /. + // return !strings.Contains(name, string(Separator)), nil + + return true, nil + } + // Look for match at current position. + t, ok, err := matchChunk(chunk, name) + // if we're the last chunk, make sure we've exhausted the name + // otherwise we'll give a false result even if we could still match + // using the star + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t + continue + } + if err != nil { + return false, err + } + if star { + // Look for match skipping i+1 bytes. + // Cannot skip /. + for i := 0; i < len(name); i++ { + //for i := 0; i < len(name) && name[i] != Separator; i++ { + t, ok, err := matchChunk(chunk, name[i+1:]) + if ok { + // if we're the last chunk, make sure we exhausted the name + if len(pattern) == 0 && len(t) > 0 { + continue + } + name = t + continue Pattern + } + if err != nil { + return false, err + } + } + } + return false, nil + } + return len(name) == 0, nil +} + +// scanChunk gets the next segment of pattern, which is a non-star string +// possibly preceded by a star. +func scanChunk(pattern string) (star bool, chunk, rest string) { + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + star = true + } + inrange := false + var i int +Scan: + for i = 0; i < len(pattern); i++ { + switch pattern[i] { + case '\\': + if i+1 < len(pattern) { + i++ + } + case '[': + inrange = true + case ']': + inrange = false + case '*': + if !inrange { + break Scan + } + } + } + return star, pattern[0:i], pattern[i:] +} + +// matchChunk checks whether chunk matches the beginning of s. +// If so, it returns the remainder of s (after the match). +// Chunk is all single-character operators: literals, char classes, and ?. +func matchChunk(chunk, s string) (rest string, ok bool, err error) { + for len(chunk) > 0 { + if len(s) == 0 { + return + } + switch chunk[0] { + case '[': + // character class + r, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + // We can't end right after '[', we're expecting at least + // a closing bracket and possibly a caret. + if len(chunk) == 0 { + err = filepath.ErrBadPattern + return + } + // possibly negated + negated := chunk[0] == '^' + if negated { + chunk = chunk[1:] + } + // parse all ranges + match := false + nrange := 0 + for { + if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { + chunk = chunk[1:] + break + } + var lo, hi rune + if lo, chunk, err = getEsc(chunk); err != nil { + return + } + hi = lo + if chunk[0] == '-' { + if hi, chunk, err = getEsc(chunk[1:]); err != nil { + return + } + } + if lo <= r && r <= hi { + match = true + } + nrange++ + } + if match == negated { + return + } + + case '?': + //if s[0] == Separator { + // return + //} + _, n := utf8.DecodeRuneInString(s) + s = s[n:] + chunk = chunk[1:] + + case '\\': + chunk = chunk[1:] + if len(chunk) == 0 { + err = filepath.ErrBadPattern + return + } + fallthrough + + default: + if chunk[0] != s[0] { + return + } + s = s[1:] + chunk = chunk[1:] + } + } + return s, true, nil +} + +// getEsc gets a possibly-escaped character from chunk, for a character class. +func getEsc(chunk string) (r rune, nchunk string, err error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = filepath.ErrBadPattern + return + } + if chunk[0] == '\\' { + chunk = chunk[1:] + if len(chunk) == 0 { + err = filepath.ErrBadPattern + return + } + } + r, n := utf8.DecodeRuneInString(chunk) + if r == utf8.RuneError && n == 1 { + err = filepath.ErrBadPattern + } + nchunk = chunk[n:] + if len(nchunk) == 0 { + err = filepath.ErrBadPattern + } + return +} diff --git a/src/go/pkg/matcher/glob_test.go b/src/go/pkg/matcher/glob_test.go new file mode 100644 index 000000000..09d456105 --- /dev/null +++ b/src/go/pkg/matcher/glob_test.go @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewGlobMatcher(t *testing.T) { + cases := []struct { + expr string + matcher Matcher + }{ + {"", stringFullMatcher("")}, + {"a", stringFullMatcher("a")}, + {"a*b", globMatcher("a*b")}, + {`a*\b`, globMatcher(`a*\b`)}, + {`a\[`, stringFullMatcher(`a[`)}, + {`ab\`, nil}, + {`ab[`, nil}, + {`ab]`, stringFullMatcher("ab]")}, + } + for _, c := range cases { + t.Run(c.expr, func(t *testing.T) { + m, err := NewGlobMatcher(c.expr) + if c.matcher != nil { + assert.NoError(t, err) + assert.Equal(t, c.matcher, m) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestGlobMatcher_MatchString(t *testing.T) { + + cases := []struct { + expected bool + expr string + line string + }{ + {true, "/a/*/d", "/a/b/c/d"}, + {true, "foo*", "foo123"}, + {true, "*foo*", "123foo123"}, + {true, "*foo", "123foo"}, + {true, "foo*bar", "foobar"}, + {true, "foo*bar", "foo baz bar"}, + {true, "a[bc]d", "abd"}, + {true, "a[^bc]d", "add"}, + {true, "a??d", "abcd"}, + {true, `a\??d`, "a?cd"}, + {true, "a[b-z]d", "abd"}, + {false, "/a/*/d", "a/b/c/d"}, + {false, "/a/*/d", "This will fail!"}, + } + + for _, c := range cases { + t.Run(c.line, func(t *testing.T) { + m := globMatcher(c.expr) + assert.Equal(t, c.expected, m.Match([]byte(c.line))) + assert.Equal(t, c.expected, m.MatchString(c.line)) + }) + } +} + +func BenchmarkGlob_MatchString(b *testing.B) { + benchmarks := []struct { + expr string + test string + }{ + {"", ""}, + {"abc", "abcd"}, + {"*abc", "abcd"}, + {"abc*", "abcd"}, + {"*abc*", "abcd"}, + {"[a-z]", "abcd"}, + } + for _, bm := range benchmarks { + b.Run(bm.expr+"_raw", func(b *testing.B) { + m := globMatcher(bm.expr) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.MatchString(bm.test) + } + }) + b.Run(bm.expr+"_optimized", func(b *testing.B) { + m, _ := NewGlobMatcher(bm.expr) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.MatchString(bm.test) + } + }) + } +} diff --git a/src/go/pkg/matcher/logical.go b/src/go/pkg/matcher/logical.go new file mode 100644 index 000000000..af07be8f4 --- /dev/null +++ b/src/go/pkg/matcher/logical.go @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +type ( + trueMatcher struct{} + falseMatcher struct{} + andMatcher struct{ lhs, rhs Matcher } + orMatcher struct{ lhs, rhs Matcher } + negMatcher struct{ Matcher } +) + +var ( + matcherT trueMatcher + matcherF falseMatcher +) + +// TRUE returns a matcher which always returns true +func TRUE() Matcher { + return matcherT +} + +// FALSE returns a matcher which always returns false +func FALSE() Matcher { + return matcherF +} + +// Not returns a matcher which positive the sub-matcher's result +func Not(m Matcher) Matcher { + switch m { + case TRUE(): + return FALSE() + case FALSE(): + return TRUE() + default: + return negMatcher{m} + } +} + +// And returns a matcher which returns true only if all of it's sub-matcher return true +func And(lhs, rhs Matcher, others ...Matcher) Matcher { + var matcher Matcher + switch lhs { + case TRUE(): + matcher = rhs + case FALSE(): + matcher = FALSE() + default: + switch rhs { + case TRUE(): + matcher = lhs + case FALSE(): + matcher = FALSE() + default: + matcher = andMatcher{lhs, rhs} + } + } + if len(others) > 0 { + return And(matcher, others[0], others[1:]...) + } + return matcher +} + +// Or returns a matcher which returns true if any of it's sub-matcher return true +func Or(lhs, rhs Matcher, others ...Matcher) Matcher { + var matcher Matcher + switch lhs { + case TRUE(): + matcher = TRUE() + case FALSE(): + matcher = rhs + default: + switch rhs { + case TRUE(): + matcher = TRUE() + case FALSE(): + matcher = lhs + default: + matcher = orMatcher{lhs, rhs} + } + } + if len(others) > 0 { + return Or(matcher, others[0], others[1:]...) + } + return matcher +} + +func (trueMatcher) Match(_ []byte) bool { return true } +func (trueMatcher) MatchString(_ string) bool { return true } + +func (falseMatcher) Match(_ []byte) bool { return false } +func (falseMatcher) MatchString(_ string) bool { return false } + +func (m andMatcher) Match(b []byte) bool { return m.lhs.Match(b) && m.rhs.Match(b) } +func (m andMatcher) MatchString(s string) bool { return m.lhs.MatchString(s) && m.rhs.MatchString(s) } + +func (m orMatcher) Match(b []byte) bool { return m.lhs.Match(b) || m.rhs.Match(b) } +func (m orMatcher) MatchString(s string) bool { return m.lhs.MatchString(s) || m.rhs.MatchString(s) } + +func (m negMatcher) Match(b []byte) bool { return !m.Matcher.Match(b) } +func (m negMatcher) MatchString(s string) bool { return !m.Matcher.MatchString(s) } diff --git a/src/go/pkg/matcher/logical_test.go b/src/go/pkg/matcher/logical_test.go new file mode 100644 index 000000000..64491f1ad --- /dev/null +++ b/src/go/pkg/matcher/logical_test.go @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTRUE(t *testing.T) { + assert.True(t, TRUE().Match(nil)) + assert.True(t, TRUE().MatchString("")) +} + +func TestFALSE(t *testing.T) { + assert.False(t, FALSE().Match(nil)) + assert.False(t, FALSE().MatchString("")) +} + +func TestAnd(t *testing.T) { + assert.Equal(t, + matcherF, + And(FALSE(), stringFullMatcher(""))) + assert.Equal(t, + matcherF, + And(stringFullMatcher(""), FALSE())) + + assert.Equal(t, + stringFullMatcher(""), + And(TRUE(), stringFullMatcher(""))) + assert.Equal(t, + stringFullMatcher(""), + And(stringFullMatcher(""), TRUE())) + + assert.Equal(t, + andMatcher{stringPartialMatcher("a"), stringPartialMatcher("b")}, + And(stringPartialMatcher("a"), stringPartialMatcher("b"))) + + assert.Equal(t, + andMatcher{ + andMatcher{stringPartialMatcher("a"), stringPartialMatcher("b")}, + stringPartialMatcher("c"), + }, + And(stringPartialMatcher("a"), stringPartialMatcher("b"), stringPartialMatcher("c"))) +} + +func TestOr(t *testing.T) { + assert.Equal(t, + stringFullMatcher(""), + Or(FALSE(), stringFullMatcher(""))) + assert.Equal(t, + stringFullMatcher(""), + Or(stringFullMatcher(""), FALSE())) + + assert.Equal(t, + TRUE(), + Or(TRUE(), stringFullMatcher(""))) + assert.Equal(t, + TRUE(), + Or(stringFullMatcher(""), TRUE())) + + assert.Equal(t, + orMatcher{stringPartialMatcher("a"), stringPartialMatcher("b")}, + Or(stringPartialMatcher("a"), stringPartialMatcher("b"))) + + assert.Equal(t, + orMatcher{ + orMatcher{stringPartialMatcher("a"), stringPartialMatcher("b")}, + stringPartialMatcher("c"), + }, + Or(stringPartialMatcher("a"), stringPartialMatcher("b"), stringPartialMatcher("c"))) +} + +func TestAndMatcher_Match(t *testing.T) { + and := andMatcher{ + stringPrefixMatcher("a"), + stringSuffixMatcher("c"), + } + assert.True(t, and.Match([]byte("abc"))) + assert.True(t, and.MatchString("abc")) +} + +func TestOrMatcher_Match(t *testing.T) { + or := orMatcher{ + stringPrefixMatcher("a"), + stringPrefixMatcher("c"), + } + assert.True(t, or.Match([]byte("aaa"))) + assert.True(t, or.MatchString("ccc")) +} + +func TestNegMatcher_Match(t *testing.T) { + neg := negMatcher{stringPrefixMatcher("a")} + assert.False(t, neg.Match([]byte("aaa"))) + assert.True(t, neg.MatchString("ccc")) +} diff --git a/src/go/pkg/matcher/matcher.go b/src/go/pkg/matcher/matcher.go new file mode 100644 index 000000000..76d903325 --- /dev/null +++ b/src/go/pkg/matcher/matcher.go @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "errors" + "fmt" + "regexp" +) + +type ( + // Matcher is an interface that wraps MatchString method. + Matcher interface { + // Match performs match against given []byte + Match(b []byte) bool + // MatchString performs match against given string + MatchString(string) bool + } + + // Format matcher format + Format string +) + +const ( + // FmtString is a string match format. + FmtString Format = "string" + // FmtGlob is a glob match format. + FmtGlob Format = "glob" + // FmtRegExp is a regex match format. + FmtRegExp Format = "regexp" + // FmtSimplePattern is a simple pattern match format + // https://docs.netdata.cloud/libnetdata/simple_pattern/ + FmtSimplePattern Format = "simple_patterns" + + // Separator is a separator between match format and expression. + Separator = ":" +) + +const ( + symString = "=" + symGlob = "*" + symRegExp = "~" +) + +var ( + reShortSyntax = regexp.MustCompile(`(?s)^(!)?(.)\s*(.*)$`) + reLongSyntax = regexp.MustCompile(`(?s)^(!)?([^:]+):(.*)$`) + + errNotShortSyntax = errors.New("not short syntax") +) + +// Must is a helper that wraps a call to a function returning (Matcher, error) and panics if the error is non-nil. +// It is intended for use in variable initializations such as +// +// var m = matcher.Must(matcher.New(matcher.FmtString, "hello world")) +func Must(m Matcher, err error) Matcher { + if err != nil { + panic(err) + } + return m +} + +// New create a matcher +func New(format Format, expr string) (Matcher, error) { + switch format { + case FmtString: + return NewStringMatcher(expr, true, true) + case FmtGlob: + return NewGlobMatcher(expr) + case FmtRegExp: + return NewRegExpMatcher(expr) + case FmtSimplePattern: + return NewSimplePatternsMatcher(expr) + default: + return nil, fmt.Errorf("unsupported matcher format: '%s'", format) + } +} + +// Parse parses line and returns appropriate matcher based on matched format. +// +// Short Syntax +// +// <line> ::= [ <not> ] <format> <space> <expr> +// <not> ::= '!' +// negative expression +// <format> ::= [ '=', '~', '*' ] +// '=' means string match +// '~' means regexp match +// '*' means glob match +// <space> ::= { ' ' | '\t' | '\n' | '\n' | '\r' } +// <expr> ::= any string +// +// Long Syntax +// +// <line> ::= [ <not> ] <format> <separator> <expr> +// <format> ::= [ 'string' | 'glob' | 'regexp' | 'simple_patterns' ] +// <not> ::= '!' +// negative expression +// <separator> ::= ':' +// <expr> ::= any string +func Parse(line string) (Matcher, error) { + matcher, err := parseShortFormat(line) + if err == nil { + return matcher, nil + } + return parseLongSyntax(line) +} + +func parseShortFormat(line string) (Matcher, error) { + m := reShortSyntax.FindStringSubmatch(line) + if m == nil { + return nil, errNotShortSyntax + } + var format Format + switch m[2] { + case symString: + format = FmtString + case symGlob: + format = FmtGlob + case symRegExp: + format = FmtRegExp + default: + return nil, fmt.Errorf("invalid short syntax: unknown symbol '%s'", m[2]) + } + expr := m[3] + matcher, err := New(format, expr) + if err != nil { + return nil, err + } + if m[1] != "" { + matcher = Not(matcher) + } + return matcher, nil +} + +func parseLongSyntax(line string) (Matcher, error) { + m := reLongSyntax.FindStringSubmatch(line) + if m == nil { + return nil, fmt.Errorf("invalid syntax") + } + matcher, err := New(Format(m[2]), m[3]) + if err != nil { + return nil, err + } + if m[1] != "" { + matcher = Not(matcher) + } + return matcher, nil +} diff --git a/src/go/pkg/matcher/matcher_test.go b/src/go/pkg/matcher/matcher_test.go new file mode 100644 index 000000000..f304d983d --- /dev/null +++ b/src/go/pkg/matcher/matcher_test.go @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "log" + "reflect" + "regexp" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stretchr/testify/assert" +) + +func TestParse(t *testing.T) { + tests := []struct { + valid bool + line string + matcher Matcher + }{ + {false, "", nil}, + {false, "abc", nil}, + {false, `~ abc\`, nil}, + {false, `invalid_fmt:abc`, nil}, + + {true, "=", stringFullMatcher("")}, + {true, "= ", stringFullMatcher("")}, + {true, "=full", stringFullMatcher("full")}, + {true, "= full", stringFullMatcher("full")}, + {true, "= \t\ffull", stringFullMatcher("full")}, + + {true, "string:", stringFullMatcher("")}, + {true, "string:full", stringFullMatcher("full")}, + + {true, "!=", Not(stringFullMatcher(""))}, + {true, "!=full", Not(stringFullMatcher("full"))}, + {true, "!= full", Not(stringFullMatcher("full"))}, + {true, "!= \t\ffull", Not(stringFullMatcher("full"))}, + + {true, "!string:", Not(stringFullMatcher(""))}, + {true, "!string:full", Not(stringFullMatcher("full"))}, + + {true, "~", TRUE()}, + {true, "~ ", TRUE()}, + {true, `~ ^$`, stringFullMatcher("")}, + {true, "~ partial", stringPartialMatcher("partial")}, + {true, `~ part\.ial`, stringPartialMatcher("part.ial")}, + {true, "~ ^prefix", stringPrefixMatcher("prefix")}, + {true, "~ suffix$", stringSuffixMatcher("suffix")}, + {true, "~ ^full$", stringFullMatcher("full")}, + {true, "~ [0-9]+", regexp.MustCompile(`[0-9]+`)}, + {true, `~ part\s1`, regexp.MustCompile(`part\s1`)}, + + {true, "!~", FALSE()}, + {true, "!~ ", FALSE()}, + {true, "!~ partial", Not(stringPartialMatcher("partial"))}, + {true, `!~ part\.ial`, Not(stringPartialMatcher("part.ial"))}, + {true, "!~ ^prefix", Not(stringPrefixMatcher("prefix"))}, + {true, "!~ suffix$", Not(stringSuffixMatcher("suffix"))}, + {true, "!~ ^full$", Not(stringFullMatcher("full"))}, + {true, "!~ [0-9]+", Not(regexp.MustCompile(`[0-9]+`))}, + + {true, `regexp:partial`, stringPartialMatcher("partial")}, + {true, `!regexp:partial`, Not(stringPartialMatcher("partial"))}, + + {true, `*`, stringFullMatcher("")}, + {true, `* foo`, stringFullMatcher("foo")}, + {true, `* foo*`, stringPrefixMatcher("foo")}, + {true, `* *foo`, stringSuffixMatcher("foo")}, + {true, `* *foo*`, stringPartialMatcher("foo")}, + {true, `* foo*bar`, globMatcher("foo*bar")}, + {true, `* *foo*bar`, globMatcher("*foo*bar")}, + {true, `* foo?bar`, globMatcher("foo?bar")}, + + {true, `!*`, Not(stringFullMatcher(""))}, + {true, `!* foo`, Not(stringFullMatcher("foo"))}, + {true, `!* foo*`, Not(stringPrefixMatcher("foo"))}, + {true, `!* *foo`, Not(stringSuffixMatcher("foo"))}, + {true, `!* *foo*`, Not(stringPartialMatcher("foo"))}, + {true, `!* foo*bar`, Not(globMatcher("foo*bar"))}, + {true, `!* *foo*bar`, Not(globMatcher("*foo*bar"))}, + {true, `!* foo?bar`, Not(globMatcher("foo?bar"))}, + + {true, "glob:foo*bar", globMatcher("foo*bar")}, + {true, "!glob:foo*bar", Not(globMatcher("foo*bar"))}, + + {true, `simple_patterns:`, FALSE()}, + {true, `simple_patterns: `, FALSE()}, + {true, `simple_patterns: foo`, simplePatternsMatcher{ + {stringFullMatcher("foo"), true}, + }}, + {true, `simple_patterns: !foo`, simplePatternsMatcher{ + {stringFullMatcher("foo"), false}, + }}, + } + for _, test := range tests { + t.Run(test.line, func(t *testing.T) { + m, err := Parse(test.line) + if test.valid { + require.NoError(t, err) + if test.matcher != nil { + log.Printf("%s %#v", reflect.TypeOf(m).Name(), m) + assert.Equal(t, test.matcher, m) + } + } else { + assert.Error(t, err) + } + }) + } +} + +func TestMust(t *testing.T) { + assert.NotPanics(t, func() { + m := Must(New(FmtRegExp, `[0-9]+`)) + assert.NotNil(t, m) + }) + + assert.Panics(t, func() { + Must(New(FmtRegExp, `[0-9]+\`)) + }) +} diff --git a/src/go/pkg/matcher/regexp.go b/src/go/pkg/matcher/regexp.go new file mode 100644 index 000000000..3a297f3b3 --- /dev/null +++ b/src/go/pkg/matcher/regexp.go @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import "regexp" + +// NewRegExpMatcher create new matcher with RegExp format +func NewRegExpMatcher(expr string) (Matcher, error) { + switch expr { + case "", "^", "$": + return TRUE(), nil + case "^$", "$^": + return NewStringMatcher("", true, true) + } + size := len(expr) + chars := []rune(expr) + var startWith, endWith bool + startIdx := 0 + endIdx := size - 1 + if chars[startIdx] == '^' { + startWith = true + startIdx = 1 + } + if chars[endIdx] == '$' { + endWith = true + endIdx-- + } + + unescapedExpr := make([]rune, 0, endIdx-startIdx+1) + for i := startIdx; i <= endIdx; i++ { + ch := chars[i] + if ch == '\\' { + if i == endIdx { // end with '\' => invalid format + return regexp.Compile(expr) + } + nextCh := chars[i+1] + if !isRegExpMeta(nextCh) { // '\' + mon-meta char => special meaning + return regexp.Compile(expr) + } + unescapedExpr = append(unescapedExpr, nextCh) + i++ + } else if isRegExpMeta(ch) { + return regexp.Compile(expr) + } else { + unescapedExpr = append(unescapedExpr, ch) + } + } + + return NewStringMatcher(string(unescapedExpr), startWith, endWith) +} + +// isRegExpMeta reports whether byte b needs to be escaped by QuoteMeta. +func isRegExpMeta(b rune) bool { + switch b { + case '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}', '^', '$': + return true + default: + return false + } +} diff --git a/src/go/pkg/matcher/regexp_test.go b/src/go/pkg/matcher/regexp_test.go new file mode 100644 index 000000000..fe644747b --- /dev/null +++ b/src/go/pkg/matcher/regexp_test.go @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "regexp" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRegExpMatch_Match(t *testing.T) { + m := regexp.MustCompile("[0-9]+") + + cases := []struct { + expected bool + line string + }{ + { + expected: true, + line: "2019", + }, + { + expected: true, + line: "It's over 9000!", + }, + { + expected: false, + line: "This will never fail!", + }, + } + + for _, c := range cases { + assert.Equal(t, c.expected, m.MatchString(c.line)) + } +} + +func BenchmarkRegExp_MatchString(b *testing.B) { + benchmarks := []struct { + expr string + test string + }{ + {"", ""}, + {"abc", "abcd"}, + {"^abc", "abcd"}, + {"abc$", "abcd"}, + {"^abc$", "abcd"}, + {"[a-z]+", "abcd"}, + } + for _, bm := range benchmarks { + b.Run(bm.expr+"_raw", func(b *testing.B) { + m := regexp.MustCompile(bm.expr) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.MatchString(bm.test) + } + }) + b.Run(bm.expr+"_optimized", func(b *testing.B) { + m, _ := NewRegExpMatcher(bm.expr) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.MatchString(bm.test) + } + }) + } +} diff --git a/src/go/pkg/matcher/simple_patterns.go b/src/go/pkg/matcher/simple_patterns.go new file mode 100644 index 000000000..91a0a3bbd --- /dev/null +++ b/src/go/pkg/matcher/simple_patterns.go @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "strings" +) + +type ( + simplePatternTerm struct { + matcher Matcher + positive bool + } + + // simplePatternsMatcher patterns. + simplePatternsMatcher []simplePatternTerm +) + +// NewSimplePatternsMatcher creates new simple patterns. It returns error in case one of patterns has bad syntax. +func NewSimplePatternsMatcher(expr string) (Matcher, error) { + ps := simplePatternsMatcher{} + + for _, pattern := range strings.Fields(expr) { + if err := ps.add(pattern); err != nil { + return nil, err + } + } + if len(ps) == 0 { + return FALSE(), nil + } + return ps, nil +} + +func (m *simplePatternsMatcher) add(term string) error { + p := simplePatternTerm{} + if term[0] == '!' { + p.positive = false + term = term[1:] + } else { + p.positive = true + } + matcher, err := NewGlobMatcher(term) + if err != nil { + return err + } + + p.matcher = matcher + *m = append(*m, p) + + return nil +} + +func (m simplePatternsMatcher) Match(b []byte) bool { + return m.MatchString(string(b)) +} + +// MatchString matches. +func (m simplePatternsMatcher) MatchString(line string) bool { + for _, p := range m { + if p.matcher.MatchString(line) { + return p.positive + } + } + return false +} diff --git a/src/go/pkg/matcher/simple_patterns_test.go b/src/go/pkg/matcher/simple_patterns_test.go new file mode 100644 index 000000000..016096d57 --- /dev/null +++ b/src/go/pkg/matcher/simple_patterns_test.go @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewSimplePatternsMatcher(t *testing.T) { + tests := []struct { + expr string + expected Matcher + }{ + {"", FALSE()}, + {" ", FALSE()}, + {"foo", simplePatternsMatcher{ + {stringFullMatcher("foo"), true}, + }}, + {"!foo", simplePatternsMatcher{ + {stringFullMatcher("foo"), false}, + }}, + {"foo bar", simplePatternsMatcher{ + {stringFullMatcher("foo"), true}, + {stringFullMatcher("bar"), true}, + }}, + {"*foobar* !foo* !*bar *", simplePatternsMatcher{ + {stringPartialMatcher("foobar"), true}, + {stringPrefixMatcher("foo"), false}, + {stringSuffixMatcher("bar"), false}, + {TRUE(), true}, + }}, + {`ab\`, nil}, + } + for _, test := range tests { + t.Run(test.expr, func(t *testing.T) { + matcher, err := NewSimplePatternsMatcher(test.expr) + if test.expected == nil { + assert.Error(t, err) + } else { + assert.Equal(t, test.expected, matcher) + } + }) + } +} + +func TestSimplePatterns_Match(t *testing.T) { + m, err := NewSimplePatternsMatcher("*foobar* !foo* !*bar *") + + require.NoError(t, err) + + cases := []struct { + expected bool + line string + }{ + { + expected: true, + line: "hello world", + }, + { + expected: false, + line: "hello world bar", + }, + { + expected: true, + line: "hello world foobar", + }, + } + + for _, c := range cases { + t.Run(c.line, func(t *testing.T) { + assert.Equal(t, c.expected, m.MatchString(c.line)) + assert.Equal(t, c.expected, m.Match([]byte(c.line))) + }) + } +} + +func TestSimplePatterns_Match2(t *testing.T) { + m, err := NewSimplePatternsMatcher("*foobar") + + require.NoError(t, err) + + assert.True(t, m.MatchString("foobar")) + assert.True(t, m.MatchString("foo foobar")) + assert.False(t, m.MatchString("foobar baz")) +} diff --git a/src/go/pkg/matcher/string.go b/src/go/pkg/matcher/string.go new file mode 100644 index 000000000..43ba43eb3 --- /dev/null +++ b/src/go/pkg/matcher/string.go @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "bytes" + "strings" +) + +type ( + // stringFullMatcher implements Matcher, it uses "==" to match. + stringFullMatcher string + + // stringPartialMatcher implements Matcher, it uses strings.Contains to match. + stringPartialMatcher string + + // stringPrefixMatcher implements Matcher, it uses strings.HasPrefix to match. + stringPrefixMatcher string + + // stringSuffixMatcher implements Matcher, it uses strings.HasSuffix to match. + stringSuffixMatcher string +) + +// NewStringMatcher create a new matcher with string format +func NewStringMatcher(s string, startWith, endWith bool) (Matcher, error) { + if startWith { + if endWith { + return stringFullMatcher(s), nil + } + return stringPrefixMatcher(s), nil + } + if endWith { + return stringSuffixMatcher(s), nil + } + return stringPartialMatcher(s), nil +} + +func (m stringFullMatcher) Match(b []byte) bool { return string(m) == string(b) } +func (m stringFullMatcher) MatchString(line string) bool { return string(m) == line } + +func (m stringPartialMatcher) Match(b []byte) bool { return bytes.Contains(b, []byte(m)) } +func (m stringPartialMatcher) MatchString(line string) bool { return strings.Contains(line, string(m)) } + +func (m stringPrefixMatcher) Match(b []byte) bool { return bytes.HasPrefix(b, []byte(m)) } +func (m stringPrefixMatcher) MatchString(line string) bool { return strings.HasPrefix(line, string(m)) } + +func (m stringSuffixMatcher) Match(b []byte) bool { return bytes.HasSuffix(b, []byte(m)) } +func (m stringSuffixMatcher) MatchString(line string) bool { return strings.HasSuffix(line, string(m)) } diff --git a/src/go/pkg/matcher/string_test.go b/src/go/pkg/matcher/string_test.go new file mode 100644 index 000000000..1694efbd0 --- /dev/null +++ b/src/go/pkg/matcher/string_test.go @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package matcher + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +var stringMatcherTestCases = []struct { + line string + expr string + full, prefix, suffix, partial bool +}{ + {"", "", true, true, true, true}, + {"abc", "", false, true, true, true}, + {"power", "pow", false, true, false, true}, + {"netdata", "data", false, false, true, true}, + {"abc", "def", false, false, false, false}, + {"soon", "o", false, false, false, true}, +} + +func TestStringFullMatcher_MatchString(t *testing.T) { + for _, c := range stringMatcherTestCases { + t.Run(c.line, func(t *testing.T) { + m := stringFullMatcher(c.expr) + assert.Equal(t, c.full, m.Match([]byte(c.line))) + assert.Equal(t, c.full, m.MatchString(c.line)) + }) + } +} + +func TestStringPrefixMatcher_MatchString(t *testing.T) { + for _, c := range stringMatcherTestCases { + t.Run(c.line, func(t *testing.T) { + m := stringPrefixMatcher(c.expr) + assert.Equal(t, c.prefix, m.Match([]byte(c.line))) + assert.Equal(t, c.prefix, m.MatchString(c.line)) + }) + } +} + +func TestStringSuffixMatcher_MatchString(t *testing.T) { + for _, c := range stringMatcherTestCases { + t.Run(c.line, func(t *testing.T) { + m := stringSuffixMatcher(c.expr) + assert.Equal(t, c.suffix, m.Match([]byte(c.line))) + assert.Equal(t, c.suffix, m.MatchString(c.line)) + }) + } +} + +func TestStringPartialMatcher_MatchString(t *testing.T) { + for _, c := range stringMatcherTestCases { + t.Run(c.line, func(t *testing.T) { + m := stringPartialMatcher(c.expr) + assert.Equal(t, c.partial, m.Match([]byte(c.line))) + assert.Equal(t, c.partial, m.MatchString(c.line)) + }) + } +} |