4 files changed, 342 insertions, 0 deletions
diff --git a/libnetdata/simple_pattern/Makefile.am b/libnetdata/simple_pattern/Makefile.am
new file mode 100644
index 0000000..1cb69ed
--- /dev/null
+++ b/libnetdata/simple_pattern/Makefile.am
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+
+dist_noinst_DATA = \
+	README.md \
+	$(NULL)
diff --git a/libnetdata/simple_pattern/README.md b/libnetdata/simple_pattern/README.md
new file mode 100644
index 0000000..79a7131
--- /dev/null
+++ b/libnetdata/simple_pattern/README.md
@@ -0,0 +1,38 @@
+## netdata simple patterns
+
+Unix prefers regular expressions. But they are just too hard, too cryptic
+to use, write and understand.
+
+So, netdata supports **simple patterns**.
+
+Simple patterns are a space separated list of words, that can have `*`
+as a wildcard. Each world may use any number of `*`. Simple patterns
+allow **negative** matches by prefixing a word with `!`.
+
+So, `pattern = !*bad* *` will match anything, except all those that
+contain the word `bad`. 
+
+Simple patterns are quite powerful: `pattern = *foobar* !foo* !*bar *`
+matches everything containing `foobar`, except strings that start
+with `foo` or end with `bar`.
+
+You can use the netdata command line to check simple patterns,
+like this:
+
+```sh
+# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world'
+RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world'
+
+# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world bar'
+RESULT: NOT MATCHED - pattern '*foobar* !foo* !*bar *' does not match 'hello world bar'
+
+# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world foobar'
+RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world foobar'
+```
+
+netdata stops processing to the first positive or negative match
+(left to right). If it is not matched by either positive or negative
+patterns, it is denied at the end.
+
+
+[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Flibnetdata%2Fsimple_pattern%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]()
diff --git a/libnetdata/simple_pattern/simple_pattern.c b/libnetdata/simple_pattern/simple_pattern.c
new file mode 100644
index 0000000..57b0aec
--- /dev/null
+++ b/libnetdata/simple_pattern/simple_pattern.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+struct simple_pattern {
+    const char *match;
+    size_t len;
+
+    SIMPLE_PREFIX_MODE mode;
+    char negative;
+
+    struct simple_pattern *child;
+
+    struct simple_pattern *next;
+};
+
+static inline struct simple_pattern *parse_pattern(char *str, SIMPLE_PREFIX_MODE default_mode) {
+    // fprintf(stderr, "PARSING PATTERN: '%s'\n", str);
+
+    SIMPLE_PREFIX_MODE mode;
+    struct simple_pattern *child = NULL;
+
+    char *s = str, *c = str;
+
+    // skip asterisks in front
+    while(*c == '*') c++;
+
+    // find the next asterisk
+    while(*c && *c != '*') c++;
+
+    // do we have an asterisk in the middle?
+    if(*c == '*' && c[1] != '\0') {
+        // yes, we have
+        child = parse_pattern(c, default_mode);
+        c[1] = '\0';
+    }
+
+    // check what this one matches
+
+    size_t len = strlen(s);
+    if(len >= 2 && *s == '*' && s[len - 1] == '*') {
+        s[len - 1] = '\0';
+        s++;
+        mode = SIMPLE_PATTERN_SUBSTRING;
+    }
+    else if(len >= 1 && *s == '*') {
+        s++;
+        mode = SIMPLE_PATTERN_SUFFIX;
+    }
+    else if(len >= 1 && s[len - 1] == '*') {
+        s[len - 1] = '\0';
+        mode = SIMPLE_PATTERN_PREFIX;
+    }
+    else
+        mode = default_mode;
+
+    // allocate the structure
+    struct simple_pattern *m = callocz(1, sizeof(struct simple_pattern));
+    if(*s) {
+        m->match = strdupz(s);
+        m->len = strlen(m->match);
+        m->mode = mode;
+    }
+    else {
+        m->mode = SIMPLE_PATTERN_SUBSTRING;
+    }
+
+    m->child = child;
+
+    return m;
+}
+
+SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode) {
+    struct simple_pattern *root = NULL, *last = NULL;
+
+    if(unlikely(!list || !*list)) return root;
+
+    int isseparator[256] = {
+            [' '] = 1       // space
+            , ['\t'] = 1    // tab
+            , ['\r'] = 1    // carriage return
+            , ['\n'] = 1    // new line
+            , ['\f'] = 1    // form feed
+            , ['\v'] = 1    // vertical tab
+    };
+
+    if (unlikely(separators && *separators)) {
+        memset(&isseparator[0], 0, sizeof(isseparator));
+        while(*separators) isseparator[(unsigned char)*separators++] = 1;
+    }
+
+    char *buf = mallocz(strlen(list) + 1);
+    const char *s = list;
+
+    while(s && *s) {
+        buf[0] = '\0';
+        char *c = buf;
+
+        char negative = 0;
+
+        // skip all spaces
+        while(isseparator[(unsigned char)*s])
+            s++;
+
+        if(*s == '!') {
+            negative = 1;
+            s++;
+        }
+
+        // empty string
+        if(unlikely(!*s))
+            break;
+
+        // find the next space
+        char escape = 0;
+        while(*s) {
+            if(*s == '\\' && !escape) {
+                escape = 1;
+                s++;
+            }
+            else {
+                if (isseparator[(unsigned char)*s] && !escape) {
+                    s++;
+                    break;
+                }
+
+                *c++ = *s++;
+                escape = 0;
+            }
+        }
+
+        // terminate our string
+        *c = '\0';
+
+        // if we matched the empty string, skip it
+        if(unlikely(!*buf))
+            continue;
+
+        // fprintf(stderr, "FOUND PATTERN: '%s'\n", buf);
+        struct simple_pattern *m = parse_pattern(buf, default_mode);
+        m->negative = negative;
+
+        // link it at the end
+        if(unlikely(!root))
+            root = last = m;
+        else {
+            last->next = m;
+            last = m;
+        }
+    }
+
+    freez(buf);
+    return (SIMPLE_PATTERN *)root;
+}
+
+static inline char *add_wildcarded(const char *matched, size_t matched_size, char *wildcarded, size_t *wildcarded_size) {
+    //if(matched_size) {
+    //    char buf[matched_size + 1];
+    //    strncpyz(buf, matched, matched_size);
+    //    fprintf(stderr, "ADD WILDCARDED '%s' of length %zu\n", buf, matched_size);
+    //}
+
+    if(unlikely(wildcarded && *wildcarded_size && matched && *matched && matched_size)) {
+        size_t wss = *wildcarded_size - 1;
+        size_t len = (matched_size < wss)?matched_size:wss;
+        if(likely(len)) {
+            strncpyz(wildcarded, matched, len);
+
+            *wildcarded_size -= len;
+            return &wildcarded[len];
+        }
+    }
+
+    return wildcarded;
+}
+
+static inline int match_pattern(struct simple_pattern *m, const char *str, size_t len, char *wildcarded, size_t *wildcarded_size) {
+    char *s;
+
+    if(m->len <= len) {
+        switch(m->mode) {
+            case SIMPLE_PATTERN_SUBSTRING:
+                if(!m->len) return 1;
+                if((s = strstr(str, m->match))) {
+                    wildcarded = add_wildcarded(str, s - str, wildcarded, wildcarded_size);
+                    if(!m->child) {
+                        wildcarded = add_wildcarded(&s[m->len], len - (&s[m->len] - str), wildcarded, wildcarded_size);
+                        return 1;
+                    }
+                    return match_pattern(m->child, &s[m->len], len - (s - str) - m->len, wildcarded, wildcarded_size);
+                }
+                break;
+
+            case SIMPLE_PATTERN_PREFIX:
+                if(unlikely(strncmp(str, m->match, m->len) == 0)) {
+                    if(!m->child) {
+                        wildcarded = add_wildcarded(&str[m->len], len - m->len, wildcarded, wildcarded_size);
+                        return 1;
+                    }
+                    return match_pattern(m->child, &str[m->len], len - m->len, wildcarded, wildcarded_size);
+                }
+                break;
+
+            case SIMPLE_PATTERN_SUFFIX:
+                if(unlikely(strcmp(&str[len - m->len], m->match) == 0)) {
+                    wildcarded = add_wildcarded(str, len - m->len, wildcarded, wildcarded_size);
+                    if(!m->child) return 1;
+                    return 0;
+                }
+                break;
+
+            case SIMPLE_PATTERN_EXACT:
+            default:
+                if(unlikely(strcmp(str, m->match) == 0)) {
+                    if(!m->child) return 1;
+                    return 0;
+                }
+                break;
+        }
+    }
+
+    return 0;
+}
+
+int simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size) {
+    struct simple_pattern *m, *root = (struct simple_pattern *)list;
+
+    if(unlikely(!root || !str || !*str)) return 0;
+
+    size_t len = strlen(str);
+    for(m = root; m ; m = m->next) {
+        char *ws = wildcarded;
+        size_t wss = wildcarded_size;
+        if(unlikely(ws)) *ws = '\0';
+
+        if (match_pattern(m, str, len, ws, &wss)) {
+
+            //if(ws && wss)
+            //    fprintf(stderr, "FINAL WILDCARDED '%s' of length %zu\n", ws, strlen(ws));
+
+            if (m->negative) return 0;
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static inline void free_pattern(struct simple_pattern *m) {
+    if(!m) return;
+
+    free_pattern(m->child);
+    free_pattern(m->next);
+    freez((void *)m->match);
+    freez(m);
+}
+
+void simple_pattern_free(SIMPLE_PATTERN *list) {
+    if(!list) return;
+
+    free_pattern(((struct simple_pattern *)list));
+}
diff --git a/libnetdata/simple_pattern/simple_pattern.h b/libnetdata/simple_pattern/simple_pattern.h
new file mode 100644
index 0000000..b96a018
--- /dev/null
+++ b/libnetdata/simple_pattern/simple_pattern.h
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SIMPLE_PATTERN_H
+#define NETDATA_SIMPLE_PATTERN_H
+
+#include "../libnetdata.h"
+
+
+typedef enum {
+    SIMPLE_PATTERN_EXACT,
+    SIMPLE_PATTERN_PREFIX,
+    SIMPLE_PATTERN_SUFFIX,
+    SIMPLE_PATTERN_SUBSTRING
+} SIMPLE_PREFIX_MODE;
+
+typedef void SIMPLE_PATTERN;
+
+// create a simple_pattern from the string given
+// default_mode is used in cases where EXACT matches, without an asterisk,
+// should be considered PREFIX matches.
+extern SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode);
+
+// test if string str is matched from the pattern and fill 'wildcarded' with the parts matched by '*'
+extern int simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size);
+
+// test if string str is matched from the pattern
+#define simple_pattern_matches(list, str) simple_pattern_matches_extract(list, str, NULL, 0)
+
+// free a simple_pattern that was created with simple_pattern_create()
+// list can be NULL, in which case, this does nothing.
+extern void simple_pattern_free(SIMPLE_PATTERN *list);
+
+#endif //NETDATA_SIMPLE_PATTERN_H