summaryrefslogtreecommitdiffstats
path: root/libnetdata/simple_pattern
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
commitbe1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch)
tree9754ff1ca740f6346cf8483ec915d4054bc5da2d /libnetdata/simple_pattern
parentInitial commit. (diff)
downloadnetdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.tar.xz
netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.zip
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'libnetdata/simple_pattern')
-rw-r--r--libnetdata/simple_pattern/Makefile.am8
-rw-r--r--libnetdata/simple_pattern/README.md47
-rw-r--r--libnetdata/simple_pattern/simple_pattern.c427
-rw-r--r--libnetdata/simple_pattern/simple_pattern.h60
4 files changed, 542 insertions, 0 deletions
diff --git a/libnetdata/simple_pattern/Makefile.am b/libnetdata/simple_pattern/Makefile.am
new file mode 100644
index 00000000..161784b8
--- /dev/null
+++ b/libnetdata/simple_pattern/Makefile.am
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+dist_noinst_DATA = \
+ README.md \
+ $(NULL)
diff --git a/libnetdata/simple_pattern/README.md b/libnetdata/simple_pattern/README.md
new file mode 100644
index 00000000..5f56a3af
--- /dev/null
+++ b/libnetdata/simple_pattern/README.md
@@ -0,0 +1,47 @@
+<!--
+title: "Simple patterns"
+description: "Netdata supports simple patterns, which are less cryptic versions of regular expressions. Use familiar notation for powerful results."
+custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/simple_pattern/README.md
+sidebar_label: "Simple patterns"
+learn_status: "Published"
+learn_topic_type: "Tasks"
+learn_rel_path: "Developers/libnetdata"
+-->
+
+# Simple patterns
+
+Unix prefers regular expressions. But they are just too hard, too cryptic
+to use, write and understand.
+
+So, Netdata supports **simple patterns**.
+
+Simple patterns are a space separated list of words, that can have `*`
+as a wildcard. Each word may use any number of `*`. Simple patterns
+allow **negative** matches by prefixing a word with `!`.
+
+So, `pattern = !*bad* *` will match anything, except all those that
+contain the word `bad`.
+
+Simple patterns are quite powerful: `pattern = *foobar* !foo* !*bar *`
+matches everything containing `foobar`, except strings that start
+with `foo` or end with `bar`.
+
+You can use the Netdata command line to check simple patterns,
+like this:
+
+```sh
+# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world'
+RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world'
+
+# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world bar'
+RESULT: NOT MATCHED - pattern '*foobar* !foo* !*bar *' does not match 'hello world bar'
+
+# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world foobar'
+RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world foobar'
+```
+
+Netdata stops processing to the first positive or negative match
+(left to right). If it is not matched by either positive or negative
+patterns, it is denied at the end.
+
+
diff --git a/libnetdata/simple_pattern/simple_pattern.c b/libnetdata/simple_pattern/simple_pattern.c
new file mode 100644
index 00000000..a0051e8f
--- /dev/null
+++ b/libnetdata/simple_pattern/simple_pattern.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+struct simple_pattern {
+ const char *match;
+ uint32_t len;
+
+ SIMPLE_PREFIX_MODE mode;
+ bool negative;
+ bool case_sensitive;
+
+ struct simple_pattern *child;
+ struct simple_pattern *next;
+};
+
+static struct simple_pattern *parse_pattern(char *str, SIMPLE_PREFIX_MODE default_mode, size_t count) {
+ if(unlikely(count >= 1000))
+ return NULL;
+
+ // fprintf(stderr, "PARSING PATTERN: '%s'\n", str);
+
+ SIMPLE_PREFIX_MODE mode;
+ struct simple_pattern *child = NULL;
+
+ char *s = str, *c = str;
+
+ // skip asterisks in front
+ while(*c == '*') c++;
+
+ // find the next asterisk
+ while(*c && *c != '*') c++;
+
+ // do we have an asterisk in the middle?
+ if(*c == '*' && c[1] != '\0') {
+ // yes, we have
+ child = parse_pattern(c, default_mode, count + 1);
+ c[1] = '\0';
+ }
+
+ // check what this one matches
+
+ size_t len = strlen(s);
+ if(len >= 2 && *s == '*' && s[len - 1] == '*') {
+ s[len - 1] = '\0';
+ s++;
+ mode = SIMPLE_PATTERN_SUBSTRING;
+ }
+ else if(len >= 1 && *s == '*') {
+ s++;
+ mode = SIMPLE_PATTERN_SUFFIX;
+ }
+ else if(len >= 1 && s[len - 1] == '*') {
+ s[len - 1] = '\0';
+ mode = SIMPLE_PATTERN_PREFIX;
+ }
+ else
+ mode = default_mode;
+
+ // allocate the structure
+ struct simple_pattern *m = callocz(1, sizeof(struct simple_pattern));
+ if(*s) {
+ m->match = strdupz(s);
+ m->len = strlen(m->match);
+ m->mode = mode;
+ }
+ else {
+ m->mode = SIMPLE_PATTERN_SUBSTRING;
+ }
+
+ m->child = child;
+
+ return m;
+}
+
+SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode, bool case_sensitive) {
+ struct simple_pattern *root = NULL, *last = NULL;
+
+ if(unlikely(!list || !*list)) return root;
+
+ char isseparator[256] = {
+ [' '] = 1 // space
+ , ['\t'] = 1 // tab
+ , ['\r'] = 1 // carriage return
+ , ['\n'] = 1 // new line
+ , ['\f'] = 1 // form feed
+ , ['\v'] = 1 // vertical tab
+ };
+
+ if (unlikely(separators && *separators)) {
+ memset(&isseparator[0], 0, sizeof(isseparator));
+ while(*separators) isseparator[(unsigned char)*separators++] = 1;
+ }
+
+ char *buf = mallocz(strlen(list) + 1);
+ const char *s = list;
+
+ while(s && *s) {
+ buf[0] = '\0';
+ char *c = buf;
+
+ bool negative = false;
+
+ // skip all spaces
+ while(isseparator[(unsigned char)*s])
+ s++;
+
+ if(*s == '!') {
+ negative = true;
+ s++;
+ }
+
+ // empty string
+ if(unlikely(!*s))
+ break;
+
+ // find the next space
+ char escape = 0;
+ while(*s) {
+ if(*s == '\\' && !escape) {
+ escape = 1;
+ s++;
+ }
+ else {
+ if (isseparator[(unsigned char)*s] && !escape) {
+ s++;
+ break;
+ }
+
+ *c++ = *s++;
+ escape = 0;
+ }
+ }
+
+ // terminate our string
+ *c = '\0';
+
+ // if we matched the empty string, skip it
+ if(unlikely(!*buf))
+ continue;
+
+ // fprintf(stderr, "FOUND PATTERN: '%s'\n", buf);
+ struct simple_pattern *m = parse_pattern(buf, default_mode, 0);
+ m->negative = negative;
+ m->case_sensitive = case_sensitive;
+
+ if(default_mode == SIMPLE_PATTERN_SUBSTRING) {
+ m->mode = SIMPLE_PATTERN_SUBSTRING;
+
+ struct simple_pattern *tm = m;
+ for(tm = m; tm->child ; tm = tm->child) ;
+ tm->mode = SIMPLE_PATTERN_SUBSTRING;
+ }
+
+ // link it at the end
+ if(unlikely(!root))
+ root = last = m;
+ else {
+ last->next = m;
+ last = m;
+ }
+ }
+
+ freez(buf);
+ return (SIMPLE_PATTERN *)root;
+}
+
+static inline char *add_wildcarded(const char *matched, size_t matched_size, char *wildcarded, size_t *wildcarded_size) {
+ //if(matched_size) {
+ // char buf[matched_size + 1];
+ // strncpyz(buf, matched, matched_size);
+ // fprintf(stderr, "ADD WILDCARDED '%s' of length %zu\n", buf, matched_size);
+ //}
+
+ if(unlikely(wildcarded && *wildcarded_size && matched && *matched && matched_size)) {
+ size_t wss = *wildcarded_size - 1;
+ size_t len = (matched_size < wss)?matched_size:wss;
+ if(likely(len)) {
+ strncpyz(wildcarded, matched, len);
+
+ *wildcarded_size -= len;
+ return &wildcarded[len];
+ }
+ }
+
+ return wildcarded;
+}
+
+static inline int sp_strcmp(const char *s1, const char *s2, bool case_sensitive) {
+ if(case_sensitive)
+ return strcmp(s1, s2);
+
+ return strcasecmp(s1, s2);
+}
+
+static inline int sp_strncmp(const char *s1, const char *s2, size_t n, bool case_sensitive) {
+ if(case_sensitive)
+ return strncmp(s1, s2, n);
+
+ return strncasecmp(s1, s2, n);
+}
+
+static inline char *sp_strstr(const char *haystack, const char *needle, bool case_sensitive) {
+ if(case_sensitive)
+ return strstr(haystack, needle);
+
+ return strcasestr(haystack, needle);
+}
+
+static inline bool match_pattern(struct simple_pattern *m, const char *str, size_t len, char *wildcarded, size_t *wildcarded_size) {
+ char *s;
+
+ bool loop = true;
+ while(loop && m->len <= len) {
+ loop = false;
+
+ switch(m->mode) {
+ default:
+ case SIMPLE_PATTERN_EXACT:
+ if(unlikely(sp_strcmp(str, m->match, m->case_sensitive) == 0)) {
+ if(!m->child) return true;
+ return false;
+ }
+ break;
+
+ case SIMPLE_PATTERN_SUBSTRING:
+ if(!m->len) return true;
+ if((s = sp_strstr(str, m->match, m->case_sensitive))) {
+ wildcarded = add_wildcarded(str, s - str, wildcarded, wildcarded_size);
+ if(!m->child) {
+ add_wildcarded(&s[m->len], len - (&s[m->len] - str), wildcarded, wildcarded_size);
+ return true;
+ }
+
+ // instead of recursion
+ {
+ len = len - (s - str) - m->len;
+ str = &s[m->len];
+ m = m->child;
+ loop = true;
+ // return match_pattern(m->child, &s[m->len], len - (s - str) - m->len, wildcarded, wildcarded_size);
+ }
+ }
+ break;
+
+ case SIMPLE_PATTERN_PREFIX:
+ if(unlikely(sp_strncmp(str, m->match, m->len, m->case_sensitive) == 0)) {
+ if(!m->child) {
+ add_wildcarded(&str[m->len], len - m->len, wildcarded, wildcarded_size);
+ return true;
+ }
+ // instead of recursion
+ {
+ len = len - m->len;
+ str = &str[m->len];
+ m = m->child;
+ loop = true;
+ // return match_pattern(m->child, &str[m->len], len - m->len, wildcarded, wildcarded_size);
+ }
+ }
+ break;
+
+ case SIMPLE_PATTERN_SUFFIX:
+ if(unlikely(sp_strcmp(&str[len - m->len], m->match, m->case_sensitive) == 0)) {
+ add_wildcarded(str, len - m->len, wildcarded, wildcarded_size);
+ if(!m->child) return true;
+ return false;
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+static inline SIMPLE_PATTERN_RESULT simple_pattern_matches_extract_with_length(SIMPLE_PATTERN *list, const char *str, size_t len, char *wildcarded, size_t wildcarded_size) {
+ struct simple_pattern *m, *root = (struct simple_pattern *)list;
+
+ for(m = root; m ; m = m->next) {
+ char *ws = wildcarded;
+ size_t wss = wildcarded_size;
+ if(unlikely(ws)) *ws = '\0';
+
+ if (match_pattern(m, str, len, ws, &wss)) {
+ if (m->negative) return SP_MATCHED_NEGATIVE;
+ return SP_MATCHED_POSITIVE;
+ }
+ }
+
+ return SP_NOT_MATCHED;
+}
+
+SIMPLE_PATTERN_RESULT simple_pattern_matches_buffer_extract(SIMPLE_PATTERN *list, BUFFER *str, char *wildcarded, size_t wildcarded_size) {
+ if(!list || !str || buffer_strlen(str)) return SP_NOT_MATCHED;
+ return simple_pattern_matches_extract_with_length(list, buffer_tostring(str), buffer_strlen(str), wildcarded, wildcarded_size);
+}
+
+SIMPLE_PATTERN_RESULT simple_pattern_matches_string_extract(SIMPLE_PATTERN *list, STRING *str, char *wildcarded, size_t wildcarded_size) {
+ if(!list || !str) return SP_NOT_MATCHED;
+ return simple_pattern_matches_extract_with_length(list, string2str(str), string_strlen(str), wildcarded, wildcarded_size);
+}
+
+SIMPLE_PATTERN_RESULT simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size) {
+ if(!list || !str || !*str) return SP_NOT_MATCHED;
+ return simple_pattern_matches_extract_with_length(list, str, strlen(str), wildcarded, wildcarded_size);
+}
+
+SIMPLE_PATTERN_RESULT simple_pattern_matches_length_extract(SIMPLE_PATTERN *list, const char *str, size_t len, char *wildcarded, size_t wildcarded_size) {
+ if(!list || !str || !*str || !len) return SP_NOT_MATCHED;
+ return simple_pattern_matches_extract_with_length(list, str, len, wildcarded, wildcarded_size);
+}
+
+static inline void free_pattern(struct simple_pattern *m) {
+ if(!m) return;
+
+ free_pattern(m->child);
+ free_pattern(m->next);
+ freez((void *)m->match);
+ freez(m);
+}
+
+void simple_pattern_free(SIMPLE_PATTERN *list) {
+ if(!list) return;
+
+ free_pattern(((struct simple_pattern *)list));
+}
+
+/* Debugging patterns
+
+ This code should be dead - it is useful for debugging but should not be called by production code.
+ Feel free to comment it out, but please leave it in the file.
+*/
+extern void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p)
+{
+ struct simple_pattern *root = (struct simple_pattern *)p;
+ if(root==NULL) {
+ netdata_log_debug(debug_type,"dump_pattern(NULL)");
+ return;
+ }
+ netdata_log_debug(debug_type,"dump_pattern(%p) child=%p next=%p mode=%u match=%s", root, root->child, root->next, root->mode,
+ root->match);
+ if(root->child!=NULL)
+ simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->child);
+ if(root->next!=NULL)
+ simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->next);
+}
+
+/* Heuristic: decide if the pattern could match a DNS name.
+
+ Although this functionality is used directly by socket.c:connection_allowed() it must be in this file
+ because of the SIMPLE_PATTERN/simple_pattern structure hiding.
+ Based on RFC952 / RFC1123. We need to decide if the pattern may match a DNS name, or not. For the negative
+ cases we need to be sure that it can only match an ipv4 or ipv6 address:
+ * IPv6 addresses contain ':', which are illegal characters in DNS.
+ * IPv4 addresses cannot contain alpha- characters.
+ * DNS TLDs must be alphanumeric to distinguish from IPv4.
+ Some patterns (e.g. "*a*" ) could match multiple cases (i.e. DNS or IPv6).
+ Some patterns will be awkward (e.g. "192.168.*") as they look like they are intended to match IPv4-only
+ but could match DNS (i.e. "192.168.com" is a valid name).
+*/
+static void scan_is_potential_name(struct simple_pattern *p, int *alpha, int *colon, int *wildcards)
+{
+ while (p) {
+ if (p->match) {
+ if(p->mode == SIMPLE_PATTERN_EXACT && !strcmp("localhost", p->match)) {
+ p = p->child;
+ continue;
+ }
+ char const *scan = p->match;
+ while (*scan != 0) {
+ if ((*scan >= 'a' && *scan <= 'z') || (*scan >= 'A' && *scan <= 'Z'))
+ *alpha = 1;
+ if (*scan == ':')
+ *colon = 1;
+ scan++;
+ }
+ if (p->mode != SIMPLE_PATTERN_EXACT)
+ *wildcards = 1;
+ p = p->child;
+ }
+ }
+}
+
+extern int simple_pattern_is_potential_name(SIMPLE_PATTERN *p)
+{
+ int alpha=0, colon=0, wildcards=0;
+ struct simple_pattern *root = (struct simple_pattern*)p;
+ while (root != NULL) {
+ if (root->match != NULL) {
+ scan_is_potential_name(root, &alpha, &colon, &wildcards);
+ }
+ if (root->mode != SIMPLE_PATTERN_EXACT)
+ wildcards = 1;
+ root = root->next;
+ }
+ return (alpha || wildcards) && !colon;
+}
+
+char *simple_pattern_trim_around_equal(char *src) {
+ char *store = mallocz(strlen(src) + 1);
+
+ char *dst = store;
+ while (*src) {
+ if (*src == '=') {
+ if (*(dst -1) == ' ')
+ dst--;
+
+ *dst++ = *src++;
+ if (*src == ' ')
+ src++;
+ }
+
+ *dst++ = *src++;
+ }
+ *dst = 0x00;
+
+ return store;
+}
+
+char *simple_pattern_iterate(SIMPLE_PATTERN **p)
+{
+ struct simple_pattern *root = (struct simple_pattern *) *p;
+ struct simple_pattern **Proot = (struct simple_pattern **)p;
+
+ (*Proot) = (*Proot)->next;
+ return (char *) root->match;
+}
diff --git a/libnetdata/simple_pattern/simple_pattern.h b/libnetdata/simple_pattern/simple_pattern.h
new file mode 100644
index 00000000..1a8d8f7d
--- /dev/null
+++ b/libnetdata/simple_pattern/simple_pattern.h
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SIMPLE_PATTERN_H
+#define NETDATA_SIMPLE_PATTERN_H
+
+#include "../libnetdata.h"
+
+
+typedef enum __attribute__ ((__packed__)) {
+ SIMPLE_PATTERN_EXACT,
+ SIMPLE_PATTERN_PREFIX,
+ SIMPLE_PATTERN_SUFFIX,
+ SIMPLE_PATTERN_SUBSTRING
+} SIMPLE_PREFIX_MODE;
+
+typedef enum __attribute__ ((__packed__)) {
+ SP_NOT_MATCHED,
+ SP_MATCHED_NEGATIVE,
+ SP_MATCHED_POSITIVE,
+} SIMPLE_PATTERN_RESULT;
+
+typedef void SIMPLE_PATTERN;
+
+// create a simple_pattern from the string given
+// default_mode is used in cases where EXACT matches, without an asterisk,
+// should be considered PREFIX matches.
+SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode, bool case_sensitive);
+
+struct netdata_string;
+
+// test if string str is matched from the pattern and fill 'wildcarded' with the parts matched by '*'
+SIMPLE_PATTERN_RESULT simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size);
+SIMPLE_PATTERN_RESULT simple_pattern_matches_string_extract(SIMPLE_PATTERN *list, struct netdata_string *str, char *wildcarded, size_t wildcarded_size);
+SIMPLE_PATTERN_RESULT simple_pattern_matches_buffer_extract(SIMPLE_PATTERN *list, BUFFER *str, char *wildcarded, size_t wildcarded_size);
+SIMPLE_PATTERN_RESULT simple_pattern_matches_length_extract(SIMPLE_PATTERN *list, const char *str, size_t len, char *wildcarded, size_t wildcarded_size);
+
+// test if string str is matched from the pattern
+#define simple_pattern_matches(list, str) (simple_pattern_matches_extract(list, str, NULL, 0) == SP_MATCHED_POSITIVE)
+#define simple_pattern_matches_string(list, str) (simple_pattern_matches_string_extract(list, str, NULL, 0) == SP_MATCHED_POSITIVE)
+#define simple_pattern_matches_buffer(list, str) (simple_pattern_matches_buffer_extract(list, str, NULL, 0) == SP_MATCHED_POSITIVE)
+
+// free a simple_pattern that was created with simple_pattern_create()
+// list can be NULL, in which case, this does nothing.
+void simple_pattern_free(SIMPLE_PATTERN *list);
+
+void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p) ;
+int simple_pattern_is_potential_name(SIMPLE_PATTERN *p) ;
+char *simple_pattern_iterate(SIMPLE_PATTERN **p);
+
+// Auxiliary function to create a pattern
+char *simple_pattern_trim_around_equal(char *src);
+
+#define SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS ",|\t\r\n\f\v"
+
+#define is_valid_sp(x) ((x) && *(x) && !((x)[0] == '*' && (x)[1] == '\0'))
+
+#define string_to_simple_pattern(str) (is_valid_sp(str) ? simple_pattern_create(str, SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS, SIMPLE_PATTERN_EXACT, true) : NULL)
+#define string_to_simple_pattern_nocase(str) (is_valid_sp(str) ? simple_pattern_create(str, SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS, SIMPLE_PATTERN_EXACT, false) : NULL)
+
+#endif //NETDATA_SIMPLE_PATTERN_H