summaryrefslogtreecommitdiffstats
path: root/src/libnetdata/sanitizers/sanitizers-labels.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-11-25 17:33:56 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-11-25 17:34:10 +0000
commit83ba6762cc43d9db581b979bb5e3445669e46cc2 (patch)
tree2e69833b43f791ed253a7a20318b767ebe56cdb8 /src/libnetdata/sanitizers/sanitizers-labels.c
parentReleasing debian version 1.47.5-1. (diff)
downloadnetdata-83ba6762cc43d9db581b979bb5e3445669e46cc2.tar.xz
netdata-83ba6762cc43d9db581b979bb5e3445669e46cc2.zip
Merging upstream version 2.0.3+dfsg (Closes: #923993, #1042533, #1045145).
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libnetdata/sanitizers/sanitizers-labels.c')
-rw-r--r--src/libnetdata/sanitizers/sanitizers-labels.c157
1 files changed, 157 insertions, 0 deletions
diff --git a/src/libnetdata/sanitizers/sanitizers-labels.c b/src/libnetdata/sanitizers/sanitizers-labels.c
new file mode 100644
index 000000000..714897a88
--- /dev/null
+++ b/src/libnetdata/sanitizers/sanitizers-labels.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "sanitizers-labels.h"
+
+/*
+ * All labels follow these rules:
+ *
+ * Character Symbol Names Values
+ * UTF-8 characters UTF-8 -> _ yes
+ * Lower case letter [a-z] yes yes
+ * Upper case letter [A-Z] yes yes
+ * Digit [0-9] yes yes
+ * Underscore _ yes yes
+ * Minus - yes yes
+ * Plus + -> _ yes
+ * Colon : -> _ yes
+ * Semicolon ; -> _ -> :
+ * Equal = -> _ -> :
+ * Period . yes yes
+ * Comma , -> . -> .
+ * Slash / yes yes
+ * Backslash \ -> / -> /
+ * At @ -> _ yes
+ * Space -> _ yes
+ * Opening parenthesis ( -> _ yes
+ * Closing parenthesis ) -> _ yes
+ * anything else -> _ -> space
+*
+ * The above rules should allow users to set in tags (indicative):
+ *
+ * 1. hostnames and domain names as-is
+ * 2. email addresses as-is
+ * 3. floating point numbers, converted to always use a dot as the decimal point
+ *
+ * Leading and trailing spaces and control characters are removed from both label
+ * names and values.
+ *
+ * Multiple spaces inside the label name or the value are removed (only 1 is retained).
+ * In names spaces are also converted to underscores.
+ *
+ * Names that are only underscores are rejected (they do not enter the dictionary).
+ *
+ * The above rules do not require any conversion to be included in JSON strings.
+ *
+ * Label names and values are truncated to LABELS_MAX_LENGTH (200) characters.
+ *
+ * When parsing, label key and value are separated by the first colon (:) found.
+ * So label:value1:value2 is parsed as key = "label", value = "value1:value2"
+ *
+ * This means a label key cannot contain a colon (:) - it is converted to
+ * underscore if it does.
+ *
+ */
+
+static unsigned char prometheus_label_names_char_map[256];
+static unsigned char label_names_char_map[256];
+static unsigned char label_values_char_map[256] = {
+ [0] = '\0', [1] = ' ', [2] = ' ', [3] = ' ', [4] = ' ', [5] = ' ', [6] = ' ', [7] = ' ', [8] = ' ',
+
+ // control characters to be treated as spaces
+ ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', ['\f'] = ' ', ['\r'] = ' ',
+
+ [14] = ' ', [15] = ' ', [16] = ' ', [17] = ' ', [18] = ' ', [19] = ' ', [20] = ' ', [21] = ' ',
+ [22] = ' ', [23] = ' ', [24] = ' ', [25] = ' ', [26] = ' ', [27] = ' ', [28] = ' ', [29] = ' ',
+ [30] = ' ', [31] = ' ',
+
+ // symbols
+ [' '] = ' ', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_',
+ ['('] = '(', [')'] = ')', ['*'] = '_', ['+'] = '+', [','] = '.', ['-'] = '-', ['.'] = '.', ['/'] = '/',
+
+ // numbers
+ ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
+ ['8'] = '8', ['9'] = '9',
+
+ // symbols
+ [':'] = ':', [';'] = ':', ['<'] = '_', ['='] = ':', ['>'] = '_', ['?'] = '_', ['@'] = '@',
+
+ // capitals
+ ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
+ ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
+ ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
+ ['Y'] = 'Y', ['Z'] = 'Z',
+
+ // symbols
+ ['['] = '[', ['\\'] = '/', [']'] = ']', ['^'] = '_', ['_'] = '_', ['`'] = '_',
+
+ // lower
+ ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h',
+ ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p',
+ ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x',
+ ['y'] = 'y', ['z'] = 'z',
+
+ // symbols
+ ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_',
+
+ // rest
+ [127] = ' ', [128] = ' ', [129] = ' ', [130] = ' ', [131] = ' ', [132] = ' ', [133] = ' ', [134] = ' ',
+ [135] = ' ', [136] = ' ', [137] = ' ', [138] = ' ', [139] = ' ', [140] = ' ', [141] = ' ', [142] = ' ',
+ [143] = ' ', [144] = ' ', [145] = ' ', [146] = ' ', [147] = ' ', [148] = ' ', [149] = ' ', [150] = ' ',
+ [151] = ' ', [152] = ' ', [153] = ' ', [154] = ' ', [155] = ' ', [156] = ' ', [157] = ' ', [158] = ' ',
+ [159] = ' ', [160] = ' ', [161] = ' ', [162] = ' ', [163] = ' ', [164] = ' ', [165] = ' ', [166] = ' ',
+ [167] = ' ', [168] = ' ', [169] = ' ', [170] = ' ', [171] = ' ', [172] = ' ', [173] = ' ', [174] = ' ',
+ [175] = ' ', [176] = ' ', [177] = ' ', [178] = ' ', [179] = ' ', [180] = ' ', [181] = ' ', [182] = ' ',
+ [183] = ' ', [184] = ' ', [185] = ' ', [186] = ' ', [187] = ' ', [188] = ' ', [189] = ' ', [190] = ' ',
+ [191] = ' ', [192] = ' ', [193] = ' ', [194] = ' ', [195] = ' ', [196] = ' ', [197] = ' ', [198] = ' ',
+ [199] = ' ', [200] = ' ', [201] = ' ', [202] = ' ', [203] = ' ', [204] = ' ', [205] = ' ', [206] = ' ',
+ [207] = ' ', [208] = ' ', [209] = ' ', [210] = ' ', [211] = ' ', [212] = ' ', [213] = ' ', [214] = ' ',
+ [215] = ' ', [216] = ' ', [217] = ' ', [218] = ' ', [219] = ' ', [220] = ' ', [221] = ' ', [222] = ' ',
+ [223] = ' ', [224] = ' ', [225] = ' ', [226] = ' ', [227] = ' ', [228] = ' ', [229] = ' ', [230] = ' ',
+ [231] = ' ', [232] = ' ', [233] = ' ', [234] = ' ', [235] = ' ', [236] = ' ', [237] = ' ', [238] = ' ',
+ [239] = ' ', [240] = ' ', [241] = ' ', [242] = ' ', [243] = ' ', [244] = ' ', [245] = ' ', [246] = ' ',
+ [247] = ' ', [248] = ' ', [249] = ' ', [250] = ' ', [251] = ' ', [252] = ' ', [253] = ' ', [254] = ' ',
+ [255] = ' '
+};
+
+__attribute__((constructor)) void initialize_labels_keys_char_map(void) {
+ // copy the values char map to the names char map
+ size_t i;
+ for(i = 0; i < 256 ;i++)
+ label_names_char_map[i] = label_values_char_map[i];
+
+ // apply overrides to the label names map
+ label_names_char_map['='] = '_';
+ label_names_char_map[':'] = '_';
+ label_names_char_map['+'] = '_';
+ label_names_char_map[';'] = '_';
+ label_names_char_map['@'] = '_';
+ label_names_char_map['('] = '_';
+ label_names_char_map[')'] = '_';
+ label_names_char_map['\\'] = '/';
+
+ // prometheus label names
+ for(i = 0; i < 256 ;i++) prometheus_label_names_char_map[i] = '_';
+ for(int s = 'A' ; s <= 'Z' ; s++) prometheus_label_names_char_map[s] = s;
+ for(int s = 'a' ; s <= 'z' ; s++) prometheus_label_names_char_map[s] = s;
+ for(int s = '0' ; s <= '9' ; s++) prometheus_label_names_char_map[s] = s;
+ prometheus_label_names_char_map[0] = '\0';
+ prometheus_label_names_char_map[':'] = ':';
+ prometheus_label_names_char_map['_'] = '_';
+}
+
+size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) {
+ size_t rc = text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, "", NULL);
+
+ for(size_t i = 0; i < rc ; i++)
+ if(dst[i] == ' ') dst[i] = '_';
+
+ return rc;
+}
+
+size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size) {
+ return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]", NULL);
+}
+
+size_t prometheus_rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) {
+ return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, prometheus_label_names_char_map, 0, "", NULL);
+}