summaryrefslogtreecommitdiffstats
path: root/src/libnetdata/sanitizers
diff options
context:
space:
mode:
Diffstat (limited to 'src/libnetdata/sanitizers')
-rw-r--r--src/libnetdata/sanitizers/chart_id_and_name.c145
-rw-r--r--src/libnetdata/sanitizers/chart_id_and_name.h22
-rw-r--r--src/libnetdata/sanitizers/sanitizers-functions.c68
-rw-r--r--src/libnetdata/sanitizers/sanitizers-functions.h10
-rw-r--r--src/libnetdata/sanitizers/sanitizers-labels.c157
-rw-r--r--src/libnetdata/sanitizers/sanitizers-labels.h13
-rw-r--r--src/libnetdata/sanitizers/sanitizers-pluginsd.c79
-rw-r--r--src/libnetdata/sanitizers/sanitizers-pluginsd.h10
-rw-r--r--src/libnetdata/sanitizers/sanitizers.h12
-rw-r--r--src/libnetdata/sanitizers/utf8-sanitizer.c116
-rw-r--r--src/libnetdata/sanitizers/utf8-sanitizer.h10
11 files changed, 642 insertions, 0 deletions
diff --git a/src/libnetdata/sanitizers/chart_id_and_name.c b/src/libnetdata/sanitizers/chart_id_and_name.c
new file mode 100644
index 000000000..5af8aa686
--- /dev/null
+++ b/src/libnetdata/sanitizers/chart_id_and_name.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+/*
+ * control characters become space, which are deduplicated.
+ *
+ * Character Name Sym To Why
+ * ---------------- --- --- -------------------------------------------------------------------------------------
+ * space [ ] -> [_]
+ * exclamation mark [!] -> [_] (only when it is the first character) simple patterns negation
+ * double quotes ["] -> [_] needs escaping when parsing
+ * dollar [$] -> [_] health variables and security in alarm-notify.sh, cgroup-name.sh, etc.
+ * percent [%] -> [_] http GET encoded characters
+ * ampersand [&] -> [_] http GET fields separator
+ * single quote ['] -> [_] needs escaping when parsing
+ * asterisk [*] -> [_] simple pattern wildcard
+ * plus [+] -> [_] http GET space
+ * comma [,] -> [.] list separator (probably not used today)
+ * equal [=] -> [_] plugins.d protocol separator
+ * question mark [?] -> [_] http GET query string separator
+ * at [@] -> [_] hostname separator (on the UI)
+ * apostrophe [`] -> [_] bash expansion (security in alarm-notify.sh and other shell scripts)
+ * pipe [|] -> [_] list separator (simple patterns and http GET)
+ * backslash [\] -> [/] to avoid interfering with escaping logic
+ */
+
+unsigned char chart_names_allowed_chars[256] = {
+ [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ',
+
+ // control characters to be treated as spaces
+ ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ',
+
+ [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ',
+ [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ',
+ [30] = ' ', [31] = ' ',
+
+ // symbols
+ [' '] = ' ', ['!'] = '!', ['"'] = '_', ['#'] = '#', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_',
+ ['('] = '(', [')'] = ')', ['*'] = '_', ['+'] = '_', [','] = '.', ['-'] = '-', ['.'] = '.', ['/'] = '/',
+
+ // numbers
+ ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
+ ['8'] = '8', ['9'] = '9',
+
+ // symbols
+ [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '_', ['>'] = '>', ['?'] = '_', ['@'] = '_',
+
+ // capitals
+ ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
+ ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
+ ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
+ ['Y'] = 'Y', ['Z'] = 'Z',
+
+ // symbols
+ ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '_', ['_'] = '_', ['`'] = '_',
+
+ // lower
+ ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h',
+ ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p',
+ ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x',
+ ['y'] = 'y', ['z'] = 'z',
+
+ // symbols
+ ['{'] = '{', ['|'] = '_', ['}'] = '}', ['~'] = '~',
+
+ // rest
+ [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ',
+ [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ',
+ [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ',
+ [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ',
+ [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ',
+ [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ',
+ [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ',
+ [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ',
+ [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ',
+ [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ',
+ [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ',
+ [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ',
+ [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ',
+ [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ',
+ [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ',
+ [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ',
+ [255] = ' '
+};
+
+static inline void sanitize_chart_name(char *dst, const char *src, size_t dst_size) {
+ // text_sanitize deduplicates spaces
+ text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size,
+ chart_names_allowed_chars, true, "", NULL);
+
+ char *d = dst;
+
+ // do not accept ! as the first character
+ if(*d == '!') *d = '_';
+
+ // convert remaining spaces to underscores
+ while(*d) {
+ if(*d == ' ') *d = '_';
+ d++;
+ }
+}
+
+// make sure the supplied string
+// is good for a netdata chart/dimension ID/NAME
+void netdata_fix_chart_name(char *s) {
+ sanitize_chart_name(s, s, strlen(s) + 1);
+}
+
+void netdata_fix_chart_id(char *s) {
+ sanitize_chart_name(s, s, strlen(s) + 1);
+// size_t len = strlen(s);
+// char buf[len + 1];
+//
+// text_sanitize((unsigned char *)buf, (const unsigned char *)s, sizeof(buf),
+// chart_names_allowed_chars, true, "", NULL);
+//
+// if(memcmp(s, buf, sizeof(buf)) == 0)
+// // they are the same
+// return;
+//
+// // they differ
+// XXH128_hash_t hash = XXH3_128bits(s, len);
+// ND_UUID *uuid = (ND_UUID *)&hash;
+// internal_fatal(sizeof(hash) != sizeof(ND_UUID), "XXH128 and ND_UUID do not have the same size");
+// buf[0] = 'x';
+// buf[1] = 'x';
+// buf[2] = 'h';
+// buf[3] = '_';
+// uuid_unparse_lower_compact(uuid->uuid, &buf[4]);
+}
+
+char *rrdset_strncpyz_name(char *dst, const char *src, size_t dst_size_minus_1) {
+ // src starts with "type."
+ sanitize_chart_name(dst, src, dst_size_minus_1 + 1);
+ return dst;
+}
+
+bool rrdvar_fix_name(char *variable) {
+ size_t len = strlen(variable);
+ char buf[len + 1];
+ memcpy(buf, variable, sizeof(buf));
+ sanitize_chart_name(variable, variable, len + 1);
+ return memcmp(buf, variable, sizeof(buf)) != 0;
+}
diff --git a/src/libnetdata/sanitizers/chart_id_and_name.h b/src/libnetdata/sanitizers/chart_id_and_name.h
new file mode 100644
index 000000000..eda6e3f30
--- /dev/null
+++ b/src/libnetdata/sanitizers/chart_id_and_name.h
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_CHART_ID_AND_NAME_H
+#define NETDATA_CHART_ID_AND_NAME_H
+
+#include "../libnetdata.h"
+
+void netdata_fix_chart_id(char *s);
+void netdata_fix_chart_name(char *s);
+char *rrdset_strncpyz_name(char *dst, const char *src, size_t dst_size_minus_1);
+bool rrdvar_fix_name(char *variable);
+
+extern unsigned char chart_names_allowed_chars[256];
+static inline bool is_netdata_api_valid_character(char c) {
+ if(IS_UTF8_BYTE(c)) return true;
+ unsigned char t = chart_names_allowed_chars[(unsigned char)c];
+ // the translation converts space to space
+ // so we have to check explicitly
+ return t == (unsigned char)c && t != ' ' && t != '!';
+}
+
+#endif //NETDATA_CHART_ID_AND_NAME_H
diff --git a/src/libnetdata/sanitizers/sanitizers-functions.c b/src/libnetdata/sanitizers/sanitizers-functions.c
new file mode 100644
index 000000000..5e1d87c35
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-functions.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "sanitizers-functions.h"
+
+static unsigned char functions_allowed_chars[256] = {
+ [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ',
+
+ // control characters to be treated as spaces
+ ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ',
+
+ [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ',
+ [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ',
+ [30] = ' ', [31] = ' ',
+
+ // symbols
+ [' '] = ' ', ['!'] = '!', ['"'] = '\'', ['#'] = '#', ['$'] = '$', ['%'] = '%', ['&'] = '&', ['\''] = '\'',
+ ['('] = '(', [')'] = ')', ['*'] = '*', ['+'] = '+', [','] = ',', ['-'] = '-', ['.'] = '.', ['/'] = '/',
+
+ // numbers
+ ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
+ ['8'] = '8', ['9'] = '9',
+
+ // symbols
+ [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '=', ['>'] = '>', ['?'] = '?', ['@'] = '@',
+
+ // capitals
+ ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
+ ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
+ ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
+ ['Y'] = 'Y', ['Z'] = 'Z',
+
+ // symbols
+ ['['] = '[', ['\\'] = '\\', [']'] = ']', ['^'] = '^', ['_'] = '_', ['`'] = '`',
+
+ // lower
+ ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h',
+ ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p',
+ ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x',
+ ['y'] = 'y', ['z'] = 'z',
+
+ // symbols
+ ['{'] = '{', ['|'] = '|', ['}'] = '}', ['~'] = '~',
+
+ // rest
+ [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ',
+ [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ',
+ [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ',
+ [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ',
+ [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ',
+ [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ',
+ [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ',
+ [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ',
+ [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ',
+ [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ',
+ [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ',
+ [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ',
+ [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ',
+ [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ',
+ [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ',
+ [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ',
+ [255] = ' '
+};
+
+size_t rrd_functions_sanitize(char *dst, const char *src, size_t dst_len) {
+ return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len,
+ functions_allowed_chars, true, "", NULL);
+}
+
diff --git a/src/libnetdata/sanitizers/sanitizers-functions.h b/src/libnetdata/sanitizers/sanitizers-functions.h
new file mode 100644
index 000000000..f4c934040
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-functions.h
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SANITIZERS_FUNCTIONS_H
+#define NETDATA_SANITIZERS_FUNCTIONS_H
+
+#include "../libnetdata.h"
+
+size_t rrd_functions_sanitize(char *dst, const char *src, size_t dst_len);
+
+#endif //NETDATA_SANITIZERS_FUNCTIONS_H
diff --git a/src/libnetdata/sanitizers/sanitizers-labels.c b/src/libnetdata/sanitizers/sanitizers-labels.c
new file mode 100644
index 000000000..714897a88
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-labels.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "sanitizers-labels.h"
+
+/*
+ * All labels follow these rules:
+ *
+ * Character Symbol Names Values
+ * UTF-8 characters UTF-8 -> _ yes
+ * Lower case letter [a-z] yes yes
+ * Upper case letter [A-Z] yes yes
+ * Digit [0-9] yes yes
+ * Underscore _ yes yes
+ * Minus - yes yes
+ * Plus + -> _ yes
+ * Colon : -> _ yes
+ * Semicolon ; -> _ -> :
+ * Equal = -> _ -> :
+ * Period . yes yes
+ * Comma , -> . -> .
+ * Slash / yes yes
+ * Backslash \ -> / -> /
+ * At @ -> _ yes
+ * Space -> _ yes
+ * Opening parenthesis ( -> _ yes
+ * Closing parenthesis ) -> _ yes
+ * anything else -> _ -> space
+*
+ * The above rules should allow users to set in tags (indicative):
+ *
+ * 1. hostnames and domain names as-is
+ * 2. email addresses as-is
+ * 3. floating point numbers, converted to always use a dot as the decimal point
+ *
+ * Leading and trailing spaces and control characters are removed from both label
+ * names and values.
+ *
+ * Multiple spaces inside the label name or the value are removed (only 1 is retained).
+ * In names spaces are also converted to underscores.
+ *
+ * Names that are only underscores are rejected (they do not enter the dictionary).
+ *
+ * The above rules do not require any conversion to be included in JSON strings.
+ *
+ * Label names and values are truncated to LABELS_MAX_LENGTH (200) characters.
+ *
+ * When parsing, label key and value are separated by the first colon (:) found.
+ * So label:value1:value2 is parsed as key = "label", value = "value1:value2"
+ *
+ * This means a label key cannot contain a colon (:) - it is converted to
+ * underscore if it does.
+ *
+ */
+
+static unsigned char prometheus_label_names_char_map[256];
+static unsigned char label_names_char_map[256];
+static unsigned char label_values_char_map[256] = {
+ [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ',
+
+ // control characters to be treated as spaces
+ ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ',
+
+ [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ',
+ [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ',
+ [30] = ' ', [31] = ' ',
+
+ // symbols
+ [' '] = ' ', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_',
+ ['('] = '(', [')'] = ')', ['*'] = '_', ['+'] = '+', [','] = '.', ['-'] = '-', ['.'] = '.', ['/'] = '/',
+
+ // numbers
+ ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
+ ['8'] = '8', ['9'] = '9',
+
+ // symbols
+ [':'] = ':', [';'] = ':', ['<'] = '_', ['='] = ':', ['>'] = '_', ['?'] = '_', ['@'] = '@',
+
+ // capitals
+ ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
+ ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
+ ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
+ ['Y'] = 'Y', ['Z'] = 'Z',
+
+ // symbols
+ ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '_', ['_'] = '_', ['`'] = '_',
+
+ // lower
+ ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h',
+ ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p',
+ ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x',
+ ['y'] = 'y', ['z'] = 'z',
+
+ // symbols
+ ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_',
+
+ // rest
+ [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ',
+ [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ',
+ [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ',
+ [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ',
+ [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ',
+ [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ',
+ [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ',
+ [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ',
+ [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ',
+ [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ',
+ [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ',
+ [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ',
+ [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ',
+ [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ',
+ [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ',
+ [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ',
+ [255] = ' '
+};
+
+__attribute__((constructor)) void initialize_labels_keys_char_map(void) {
+ // copy the values char map to the names char map
+ size_t i;
+ for(i = 0; i < 256 ;i++)
+ label_names_char_map[i] = label_values_char_map[i];
+
+ // apply overrides to the label names map
+ label_names_char_map['='] = '_';
+ label_names_char_map[':'] = '_';
+ label_names_char_map['+'] = '_';
+ label_names_char_map[';'] = '_';
+ label_names_char_map['@'] = '_';
+ label_names_char_map['('] = '_';
+ label_names_char_map[')'] = '_';
+ label_names_char_map['\\'] = '/';
+
+ // prometheus label names
+ for(i = 0; i < 256 ;i++) prometheus_label_names_char_map[i] = '_';
+ for(int s = 'A' ; s <= 'Z' ; s++) prometheus_label_names_char_map[s] = s;
+ for(int s = 'a' ; s <= 'z' ; s++) prometheus_label_names_char_map[s] = s;
+ for(int s = '0' ; s <= '9' ; s++) prometheus_label_names_char_map[s] = s;
+ prometheus_label_names_char_map[0] = '\0';
+ prometheus_label_names_char_map[':'] = ':';
+ prometheus_label_names_char_map['_'] = '_';
+}
+
+size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) {
+ size_t rc = text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, "", NULL);
+
+ for(size_t i = 0; i < rc ; i++)
+ if(dst[i] == ' ') dst[i] = '_';
+
+ return rc;
+}
+
+size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size) {
+ return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]", NULL);
+}
+
+size_t prometheus_rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) {
+ return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, prometheus_label_names_char_map, 0, "", NULL);
+}
diff --git a/src/libnetdata/sanitizers/sanitizers-labels.h b/src/libnetdata/sanitizers/sanitizers-labels.h
new file mode 100644
index 000000000..39fd6a67a
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-labels.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SANITIZERS_LABELS_H
+#define NETDATA_SANITIZERS_LABELS_H
+
+#include "../libnetdata.h"
+
+size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size);
+size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size);
+
+size_t prometheus_rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size);
+
+#endif //NETDATA_SANITIZERS_LABELS_H
diff --git a/src/libnetdata/sanitizers/sanitizers-pluginsd.c b/src/libnetdata/sanitizers/sanitizers-pluginsd.c
new file mode 100644
index 000000000..2659cffee
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-pluginsd.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "sanitizers-pluginsd.h"
+
+/*
+ * Undefined and control characters become underscores
+ * ! -> _
+ * " -> _
+ * ' -> _
+ * ` -> _
+ * \ -> /
+ * = -> _
+ * | -> _
+ */
+
+static unsigned char external_plugins_map[256] = {
+ [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ',
+
+ // control characters to be treated as spaces
+ ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ',
+
+ [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ',
+ [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ',
+ [30] = ' ', [31] = ' ',
+
+ // symbols
+ [' '] = ' ', ['!'] = '_', ['"'] = '_', ['#'] = '#', ['$'] = '$', ['%'] = '%', ['&'] = '&', ['\''] = '_',
+ ['('] = '(', [')'] = ')', ['*'] = '*', ['+'] = '+', [','] = ',', ['-'] = '-', ['.'] = '.', ['/'] = '/',
+
+ // numbers
+ ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
+ ['8'] = '8', ['9'] = '9',
+
+ // symbols
+ [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '_', ['>'] = '>', ['?'] = '?', ['@'] = '@',
+
+ // capitals
+ ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
+ ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
+ ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
+ ['Y'] = 'Y', ['Z'] = 'Z',
+
+ // symbols
+ ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '^', ['_'] = '_', ['`'] = '_',
+
+ // lower
+ ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h',
+ ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p',
+ ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x',
+ ['y'] = 'y', ['z'] = 'z',
+
+ // symbols
+ ['{'] = '{', ['|'] = '_', ['}'] = '}', ['~'] = '~',
+
+ // rest
+ [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ',
+ [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ',
+ [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ',
+ [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ',
+ [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ',
+ [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ',
+ [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ',
+ [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ',
+ [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ',
+ [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ',
+ [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ',
+ [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ',
+ [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ',
+ [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ',
+ [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ',
+ [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ',
+ [255] = ' '
+};
+
+size_t external_plugins_sanitize(char *dst, const char *src, size_t dst_len) {
+ return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len,
+ external_plugins_map, true, "", NULL);
+}
+
diff --git a/src/libnetdata/sanitizers/sanitizers-pluginsd.h b/src/libnetdata/sanitizers/sanitizers-pluginsd.h
new file mode 100644
index 000000000..1779a1451
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-pluginsd.h
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SANITIZERS_PLUGINSD_H
+#define NETDATA_SANITIZERS_PLUGINSD_H
+
+#include "../libnetdata.h"
+
+size_t external_plugins_sanitize(char *dst, const char *src, size_t dst_len);
+
+#endif //NETDATA_SANITIZERS_PLUGINSD_H
diff --git a/src/libnetdata/sanitizers/sanitizers.h b/src/libnetdata/sanitizers/sanitizers.h
new file mode 100644
index 000000000..d76b18f7d
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SANITIZERS_H
+#define NETDATA_SANITIZERS_H
+
+#include "utf8-sanitizer.h"
+#include "sanitizers-labels.h"
+#include "sanitizers-functions.h"
+#include "sanitizers-pluginsd.h"
+#include "chart_id_and_name.h"
+
+#endif //NETDATA_SANITIZERS_H
diff --git a/src/libnetdata/sanitizers/utf8-sanitizer.c b/src/libnetdata/sanitizers/utf8-sanitizer.c
new file mode 100644
index 000000000..e10d88f41
--- /dev/null
+++ b/src/libnetdata/sanitizers/utf8-sanitizer.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) {
+ if(unlikely(!dst || !dst_size)) return 0;
+
+ // skip leading spaces and invalid characters
+ while(src && *src && !IS_UTF8_BYTE(*src) && (isspace(*src) || iscntrl(*src) || !isprint(*src)))
+ src++;
+
+ if(unlikely(!src || !*src)) {
+ strncpyz((char *)dst, empty, dst_size);
+ dst[dst_size - 1] = '\0';
+ size_t len = strlen((char *)dst);
+ if(multibyte_length) *multibyte_length = len;
+ return len;
+ }
+
+ unsigned char *d = dst;
+
+ // make room for the final string termination
+ unsigned char *end = &dst[dst_size - 1];
+
+ // copy while converting, but keep only one space
+ // we start wil last_is_space = 1 to skip leading spaces
+ int last_is_space = 1;
+
+ size_t mblen = 0;
+
+ while(*src && d < end) {
+ unsigned char c = *src;
+
+ if(IS_UTF8_STARTBYTE(c) && IS_UTF8_BYTE(src[1]) && d + 2 <= end) {
+ // UTF-8 multi-byte encoded character
+
+ // find how big this character is (2-4 bytes)
+ size_t utf_character_size = 2;
+ while(utf_character_size < 4 &&
+ d + utf_character_size <= end &&
+ IS_UTF8_BYTE(src[utf_character_size]) &&
+ !IS_UTF8_STARTBYTE(src[utf_character_size]))
+ utf_character_size++;
+
+ if(utf) {
+ while(utf_character_size) {
+ utf_character_size--;
+ *d++ = *src++;
+ }
+ }
+ else {
+ // UTF-8 characters are not allowed.
+ // Assume it is an underscore
+ // and skip all except the first byte
+ *d++ = '_';
+ src += (utf_character_size - 1);
+ }
+
+ last_is_space = 0;
+ mblen++;
+ continue;
+ }
+
+ c = char_map[c];
+ if(c == ' ') {
+ // a space character
+
+ if(!last_is_space) {
+ // add one space
+ *d++ = c;
+ mblen++;
+ }
+
+ last_is_space++;
+ }
+ else {
+ *d++ = c;
+ last_is_space = 0;
+ mblen++;
+ }
+
+ src++;
+ }
+
+ // remove trailing spaces
+ while(d > dst && !IS_UTF8_BYTE(*(d - 1)) && *(d - 1) == ' ') {
+ d--;
+ mblen--;
+ }
+
+ // put a termination at the end of what we copied
+ *d = '\0';
+
+ // check if dst is all underscores and empty it if it is
+ if(*dst == '_') {
+ unsigned char *t = dst;
+ while (*t == '_') t++;
+ if (unlikely(*t == '\0')) {
+ *dst = '\0';
+ mblen = 0;
+ }
+ }
+
+ // check if it is empty
+ if(unlikely(*dst == '\0')) {
+ strncpyz((char *)dst, empty, dst_size);
+ dst[dst_size - 1] = '\0';
+ mblen = strlen((char *)dst);
+ if(multibyte_length) *multibyte_length = mblen;
+ return mblen;
+ }
+
+ if(multibyte_length) *multibyte_length = mblen;
+
+ return d - dst;
+}
diff --git a/src/libnetdata/sanitizers/utf8-sanitizer.h b/src/libnetdata/sanitizers/utf8-sanitizer.h
new file mode 100644
index 000000000..8b5f73a7f
--- /dev/null
+++ b/src/libnetdata/sanitizers/utf8-sanitizer.h
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_UTF8_SANITIZER_H
+#define NETDATA_UTF8_SANITIZER_H
+
+#include "../libnetdata.h"
+
+size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length);
+
+#endif //NETDATA_UTF8_SANITIZER_H