diff options
Diffstat (limited to 'src/libnetdata/sanitizers')
-rw-r--r-- | src/libnetdata/sanitizers/chart_id_and_name.c | 145 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/chart_id_and_name.h | 22 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers-functions.c | 68 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers-functions.h | 10 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers-labels.c | 157 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers-labels.h | 13 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers-pluginsd.c | 79 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers-pluginsd.h | 10 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/sanitizers.h | 12 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/utf8-sanitizer.c | 116 | ||||
-rw-r--r-- | src/libnetdata/sanitizers/utf8-sanitizer.h | 10 |
11 files changed, 642 insertions, 0 deletions
diff --git a/src/libnetdata/sanitizers/chart_id_and_name.c b/src/libnetdata/sanitizers/chart_id_and_name.c new file mode 100644 index 000000000..5af8aa686 --- /dev/null +++ b/src/libnetdata/sanitizers/chart_id_and_name.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +/* + * control characters become space, which are deduplicated. + * + * Character Name Sym To Why + * ---------------- --- --- ------------------------------------------------------------------------------------- + * space [ ] -> [_] + * exclamation mark [!] -> [_] (only when it is the first character) simple patterns negation + * double quotes ["] -> [_] needs escaping when parsing + * dollar [$] -> [_] health variables and security in alarm-notify.sh, cgroup-name.sh, etc. + * percent [%] -> [_] http GET encoded characters + * ampersand [&] -> [_] http GET fields separator + * single quote ['] -> [_] needs escaping when parsing + * asterisk [*] -> [_] simple pattern wildcard + * plus [+] -> [_] http GET space + * comma [,] -> [.] list separator (probably not used today) + * equal [=] -> [_] plugins.d protocol separator + * question mark [?] -> [_] http GET query string separator + * at [@] -> [_] hostname separator (on the UI) + * apostrophe [`] -> [_] bash expansion (security in alarm-notify.sh and other shell scripts) + * pipe [|] -> [_] list separator (simple patterns and http GET) + * backslash [\] -> [/] to avoid interfering with escaping logic + */ + +unsigned char chart_names_allowed_chars[256] = { + [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ', + + // control characters to be treated as spaces + ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ', + + [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ', + [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ', + [30] = ' ', [31] = ' ', + + // symbols + [' '] = ' ', ['!'] = '!', ['"'] = '_', ['#'] = '#', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_', + ['('] = '(', [')'] = ')', ['*'] = '_', ['+'] = '_', [','] = '.', ['-'] = '-', ['.'] = '.', ['/'] = '/', + + // numbers + ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', + ['8'] = '8', ['9'] = '9', + + // symbols + [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '_', ['>'] = '>', ['?'] = '_', ['@'] = '_', + + // capitals + ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H', + ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P', + ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X', + ['Y'] = 'Y', ['Z'] = 'Z', + + // symbols + ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '_', ['_'] = '_', ['`'] = '_', + + // lower + ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h', + ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p', + ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x', + ['y'] = 'y', ['z'] = 'z', + + // symbols + ['{'] = '{', ['|'] = '_', ['}'] = '}', ['~'] = '~', + + // rest + [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ', + [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ', + [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ', + [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ', + [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ', + [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ', + [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ', + [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ', + [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ', + [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ', + [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ', + [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ', + [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ', + [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ', + [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ', + [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ', + [255] = ' ' +}; + +static inline void sanitize_chart_name(char *dst, const char *src, size_t dst_size) { + // text_sanitize deduplicates spaces + text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, + chart_names_allowed_chars, true, "", NULL); + + char *d = dst; + + // do not accept ! as the first character + if(*d == '!') *d = '_'; + + // convert remaining spaces to underscores + while(*d) { + if(*d == ' ') *d = '_'; + d++; + } +} + +// make sure the supplied string +// is good for a netdata chart/dimension ID/NAME +void netdata_fix_chart_name(char *s) { + sanitize_chart_name(s, s, strlen(s) + 1); +} + +void netdata_fix_chart_id(char *s) { + sanitize_chart_name(s, s, strlen(s) + 1); +// size_t len = strlen(s); +// char buf[len + 1]; +// +// text_sanitize((unsigned char *)buf, (const unsigned char *)s, sizeof(buf), +// chart_names_allowed_chars, true, "", NULL); +// +// if(memcmp(s, buf, sizeof(buf)) == 0) +// // they are the same +// return; +// +// // they differ +// XXH128_hash_t hash = XXH3_128bits(s, len); +// ND_UUID *uuid = (ND_UUID *)&hash; +// internal_fatal(sizeof(hash) != sizeof(ND_UUID), "XXH128 and ND_UUID do not have the same size"); +// buf[0] = 'x'; +// buf[1] = 'x'; +// buf[2] = 'h'; +// buf[3] = '_'; +// uuid_unparse_lower_compact(uuid->uuid, &buf[4]); +} + +char *rrdset_strncpyz_name(char *dst, const char *src, size_t dst_size_minus_1) { + // src starts with "type." + sanitize_chart_name(dst, src, dst_size_minus_1 + 1); + return dst; +} + +bool rrdvar_fix_name(char *variable) { + size_t len = strlen(variable); + char buf[len + 1]; + memcpy(buf, variable, sizeof(buf)); + sanitize_chart_name(variable, variable, len + 1); + return memcmp(buf, variable, sizeof(buf)) != 0; +} diff --git a/src/libnetdata/sanitizers/chart_id_and_name.h b/src/libnetdata/sanitizers/chart_id_and_name.h new file mode 100644 index 000000000..eda6e3f30 --- /dev/null +++ b/src/libnetdata/sanitizers/chart_id_and_name.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CHART_ID_AND_NAME_H +#define NETDATA_CHART_ID_AND_NAME_H + +#include "../libnetdata.h" + +void netdata_fix_chart_id(char *s); +void netdata_fix_chart_name(char *s); +char *rrdset_strncpyz_name(char *dst, const char *src, size_t dst_size_minus_1); +bool rrdvar_fix_name(char *variable); + +extern unsigned char chart_names_allowed_chars[256]; +static inline bool is_netdata_api_valid_character(char c) { + if(IS_UTF8_BYTE(c)) return true; + unsigned char t = chart_names_allowed_chars[(unsigned char)c]; + // the translation converts space to space + // so we have to check explicitly + return t == (unsigned char)c && t != ' ' && t != '!'; +} + +#endif //NETDATA_CHART_ID_AND_NAME_H diff --git a/src/libnetdata/sanitizers/sanitizers-functions.c b/src/libnetdata/sanitizers/sanitizers-functions.c new file mode 100644 index 000000000..5e1d87c35 --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers-functions.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sanitizers-functions.h" + +static unsigned char functions_allowed_chars[256] = { + [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ', + + // control characters to be treated as spaces + ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ', + + [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ', + [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ', + [30] = ' ', [31] = ' ', + + // symbols + [' '] = ' ', ['!'] = '!', ['"'] = '\'', ['#'] = '#', ['$'] = '$', ['%'] = '%', ['&'] = '&', ['\''] = '\'', + ['('] = '(', [')'] = ')', ['*'] = '*', ['+'] = '+', [','] = ',', ['-'] = '-', ['.'] = '.', ['/'] = '/', + + // numbers + ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', + ['8'] = '8', ['9'] = '9', + + // symbols + [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '=', ['>'] = '>', ['?'] = '?', ['@'] = '@', + + // capitals + ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H', + ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P', + ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X', + ['Y'] = 'Y', ['Z'] = 'Z', + + // symbols + ['['] = '[', ['\\'] = '\\', [']'] = ']', ['^'] = '^', ['_'] = '_', ['`'] = '`', + + // lower + ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h', + ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p', + ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x', + ['y'] = 'y', ['z'] = 'z', + + // symbols + ['{'] = '{', ['|'] = '|', ['}'] = '}', ['~'] = '~', + + // rest + [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ', + [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ', + [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ', + [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ', + [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ', + [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ', + [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ', + [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ', + [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ', + [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ', + [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ', + [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ', + [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ', + [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ', + [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ', + [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ', + [255] = ' ' +}; + +size_t rrd_functions_sanitize(char *dst, const char *src, size_t dst_len) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len, + functions_allowed_chars, true, "", NULL); +} + diff --git a/src/libnetdata/sanitizers/sanitizers-functions.h b/src/libnetdata/sanitizers/sanitizers-functions.h new file mode 100644 index 000000000..f4c934040 --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers-functions.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SANITIZERS_FUNCTIONS_H +#define NETDATA_SANITIZERS_FUNCTIONS_H + +#include "../libnetdata.h" + +size_t rrd_functions_sanitize(char *dst, const char *src, size_t dst_len); + +#endif //NETDATA_SANITIZERS_FUNCTIONS_H diff --git a/src/libnetdata/sanitizers/sanitizers-labels.c b/src/libnetdata/sanitizers/sanitizers-labels.c new file mode 100644 index 000000000..714897a88 --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers-labels.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sanitizers-labels.h" + +/* + * All labels follow these rules: + * + * Character Symbol Names Values + * UTF-8 characters UTF-8 -> _ yes + * Lower case letter [a-z] yes yes + * Upper case letter [A-Z] yes yes + * Digit [0-9] yes yes + * Underscore _ yes yes + * Minus - yes yes + * Plus + -> _ yes + * Colon : -> _ yes + * Semicolon ; -> _ -> : + * Equal = -> _ -> : + * Period . yes yes + * Comma , -> . -> . + * Slash / yes yes + * Backslash \ -> / -> / + * At @ -> _ yes + * Space -> _ yes + * Opening parenthesis ( -> _ yes + * Closing parenthesis ) -> _ yes + * anything else -> _ -> space +* + * The above rules should allow users to set in tags (indicative): + * + * 1. hostnames and domain names as-is + * 2. email addresses as-is + * 3. floating point numbers, converted to always use a dot as the decimal point + * + * Leading and trailing spaces and control characters are removed from both label + * names and values. + * + * Multiple spaces inside the label name or the value are removed (only 1 is retained). + * In names spaces are also converted to underscores. + * + * Names that are only underscores are rejected (they do not enter the dictionary). + * + * The above rules do not require any conversion to be included in JSON strings. + * + * Label names and values are truncated to LABELS_MAX_LENGTH (200) characters. + * + * When parsing, label key and value are separated by the first colon (:) found. + * So label:value1:value2 is parsed as key = "label", value = "value1:value2" + * + * This means a label key cannot contain a colon (:) - it is converted to + * underscore if it does. + * + */ + +static unsigned char prometheus_label_names_char_map[256]; +static unsigned char label_names_char_map[256]; +static unsigned char label_values_char_map[256] = { + [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ', + + // control characters to be treated as spaces + ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ', + + [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ', + [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ', + [30] = ' ', [31] = ' ', + + // symbols + [' '] = ' ', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_', + ['('] = '(', [')'] = ')', ['*'] = '_', ['+'] = '+', [','] = '.', ['-'] = '-', ['.'] = '.', ['/'] = '/', + + // numbers + ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', + ['8'] = '8', ['9'] = '9', + + // symbols + [':'] = ':', [';'] = ':', ['<'] = '_', ['='] = ':', ['>'] = '_', ['?'] = '_', ['@'] = '@', + + // capitals + ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H', + ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P', + ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X', + ['Y'] = 'Y', ['Z'] = 'Z', + + // symbols + ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '_', ['_'] = '_', ['`'] = '_', + + // lower + ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h', + ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p', + ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x', + ['y'] = 'y', ['z'] = 'z', + + // symbols + ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_', + + // rest + [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ', + [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ', + [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ', + [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ', + [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ', + [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ', + [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ', + [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ', + [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ', + [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ', + [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ', + [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ', + [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ', + [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ', + [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ', + [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ', + [255] = ' ' +}; + +__attribute__((constructor)) void initialize_labels_keys_char_map(void) { + // copy the values char map to the names char map + size_t i; + for(i = 0; i < 256 ;i++) + label_names_char_map[i] = label_values_char_map[i]; + + // apply overrides to the label names map + label_names_char_map['='] = '_'; + label_names_char_map[':'] = '_'; + label_names_char_map['+'] = '_'; + label_names_char_map[';'] = '_'; + label_names_char_map['@'] = '_'; + label_names_char_map['('] = '_'; + label_names_char_map[')'] = '_'; + label_names_char_map['\\'] = '/'; + + // prometheus label names + for(i = 0; i < 256 ;i++) prometheus_label_names_char_map[i] = '_'; + for(int s = 'A' ; s <= 'Z' ; s++) prometheus_label_names_char_map[s] = s; + for(int s = 'a' ; s <= 'z' ; s++) prometheus_label_names_char_map[s] = s; + for(int s = '0' ; s <= '9' ; s++) prometheus_label_names_char_map[s] = s; + prometheus_label_names_char_map[0] = '\0'; + prometheus_label_names_char_map[':'] = ':'; + prometheus_label_names_char_map['_'] = '_'; +} + +size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) { + size_t rc = text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, "", NULL); + + for(size_t i = 0; i < rc ; i++) + if(dst[i] == ' ') dst[i] = '_'; + + return rc; +} + +size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]", NULL); +} + +size_t prometheus_rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, prometheus_label_names_char_map, 0, "", NULL); +} diff --git a/src/libnetdata/sanitizers/sanitizers-labels.h b/src/libnetdata/sanitizers/sanitizers-labels.h new file mode 100644 index 000000000..39fd6a67a --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers-labels.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SANITIZERS_LABELS_H +#define NETDATA_SANITIZERS_LABELS_H + +#include "../libnetdata.h" + +size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size); +size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size); + +size_t prometheus_rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size); + +#endif //NETDATA_SANITIZERS_LABELS_H diff --git a/src/libnetdata/sanitizers/sanitizers-pluginsd.c b/src/libnetdata/sanitizers/sanitizers-pluginsd.c new file mode 100644 index 000000000..2659cffee --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers-pluginsd.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sanitizers-pluginsd.h" + +/* + * Undefined and control characters become underscores + * ! -> _ + * " -> _ + * ' -> _ + * ` -> _ + * \ -> / + * = -> _ + * | -> _ + */ + +static unsigned char external_plugins_map[256] = { + [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ', + + // control characters to be treated as spaces + ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ', + + [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ', + [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ', + [30] = ' ', [31] = ' ', + + // symbols + [' '] = ' ', ['!'] = '_', ['"'] = '_', ['#'] = '#', ['$'] = '$', ['%'] = '%', ['&'] = '&', ['\''] = '_', + ['('] = '(', [')'] = ')', ['*'] = '*', ['+'] = '+', [','] = ',', ['-'] = '-', ['.'] = '.', ['/'] = '/', + + // numbers + ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', + ['8'] = '8', ['9'] = '9', + + // symbols + [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '_', ['>'] = '>', ['?'] = '?', ['@'] = '@', + + // capitals + ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H', + ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P', + ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X', + ['Y'] = 'Y', ['Z'] = 'Z', + + // symbols + ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '^', ['_'] = '_', ['`'] = '_', + + // lower + ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h', + ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p', + ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x', + ['y'] = 'y', ['z'] = 'z', + + // symbols + ['{'] = '{', ['|'] = '_', ['}'] = '}', ['~'] = '~', + + // rest + [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ', + [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ', + [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ', + [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ', + [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ', + [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ', + [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ', + [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ', + [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ', + [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ', + [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ', + [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ', + [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ', + [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ', + [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ', + [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ', + [255] = ' ' +}; + +size_t external_plugins_sanitize(char *dst, const char *src, size_t dst_len) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len, + external_plugins_map, true, "", NULL); +} + diff --git a/src/libnetdata/sanitizers/sanitizers-pluginsd.h b/src/libnetdata/sanitizers/sanitizers-pluginsd.h new file mode 100644 index 000000000..1779a1451 --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers-pluginsd.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SANITIZERS_PLUGINSD_H +#define NETDATA_SANITIZERS_PLUGINSD_H + +#include "../libnetdata.h" + +size_t external_plugins_sanitize(char *dst, const char *src, size_t dst_len); + +#endif //NETDATA_SANITIZERS_PLUGINSD_H diff --git a/src/libnetdata/sanitizers/sanitizers.h b/src/libnetdata/sanitizers/sanitizers.h new file mode 100644 index 000000000..d76b18f7d --- /dev/null +++ b/src/libnetdata/sanitizers/sanitizers.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SANITIZERS_H +#define NETDATA_SANITIZERS_H + +#include "utf8-sanitizer.h" +#include "sanitizers-labels.h" +#include "sanitizers-functions.h" +#include "sanitizers-pluginsd.h" +#include "chart_id_and_name.h" + +#endif //NETDATA_SANITIZERS_H diff --git a/src/libnetdata/sanitizers/utf8-sanitizer.c b/src/libnetdata/sanitizers/utf8-sanitizer.c new file mode 100644 index 000000000..e10d88f41 --- /dev/null +++ b/src/libnetdata/sanitizers/utf8-sanitizer.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) { + if(unlikely(!dst || !dst_size)) return 0; + + // skip leading spaces and invalid characters + while(src && *src && !IS_UTF8_BYTE(*src) && (isspace(*src) || iscntrl(*src) || !isprint(*src))) + src++; + + if(unlikely(!src || !*src)) { + strncpyz((char *)dst, empty, dst_size); + dst[dst_size - 1] = '\0'; + size_t len = strlen((char *)dst); + if(multibyte_length) *multibyte_length = len; + return len; + } + + unsigned char *d = dst; + + // make room for the final string termination + unsigned char *end = &dst[dst_size - 1]; + + // copy while converting, but keep only one space + // we start wil last_is_space = 1 to skip leading spaces + int last_is_space = 1; + + size_t mblen = 0; + + while(*src && d < end) { + unsigned char c = *src; + + if(IS_UTF8_STARTBYTE(c) && IS_UTF8_BYTE(src[1]) && d + 2 <= end) { + // UTF-8 multi-byte encoded character + + // find how big this character is (2-4 bytes) + size_t utf_character_size = 2; + while(utf_character_size < 4 && + d + utf_character_size <= end && + IS_UTF8_BYTE(src[utf_character_size]) && + !IS_UTF8_STARTBYTE(src[utf_character_size])) + utf_character_size++; + + if(utf) { + while(utf_character_size) { + utf_character_size--; + *d++ = *src++; + } + } + else { + // UTF-8 characters are not allowed. + // Assume it is an underscore + // and skip all except the first byte + *d++ = '_'; + src += (utf_character_size - 1); + } + + last_is_space = 0; + mblen++; + continue; + } + + c = char_map[c]; + if(c == ' ') { + // a space character + + if(!last_is_space) { + // add one space + *d++ = c; + mblen++; + } + + last_is_space++; + } + else { + *d++ = c; + last_is_space = 0; + mblen++; + } + + src++; + } + + // remove trailing spaces + while(d > dst && !IS_UTF8_BYTE(*(d - 1)) && *(d - 1) == ' ') { + d--; + mblen--; + } + + // put a termination at the end of what we copied + *d = '\0'; + + // check if dst is all underscores and empty it if it is + if(*dst == '_') { + unsigned char *t = dst; + while (*t == '_') t++; + if (unlikely(*t == '\0')) { + *dst = '\0'; + mblen = 0; + } + } + + // check if it is empty + if(unlikely(*dst == '\0')) { + strncpyz((char *)dst, empty, dst_size); + dst[dst_size - 1] = '\0'; + mblen = strlen((char *)dst); + if(multibyte_length) *multibyte_length = mblen; + return mblen; + } + + if(multibyte_length) *multibyte_length = mblen; + + return d - dst; +} diff --git a/src/libnetdata/sanitizers/utf8-sanitizer.h b/src/libnetdata/sanitizers/utf8-sanitizer.h new file mode 100644 index 000000000..8b5f73a7f --- /dev/null +++ b/src/libnetdata/sanitizers/utf8-sanitizer.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_UTF8_SANITIZER_H +#define NETDATA_UTF8_SANITIZER_H + +#include "../libnetdata.h" + +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length); + +#endif //NETDATA_UTF8_SANITIZER_H |