diff options
Diffstat (limited to 'src/database/rrdlabels.c')
-rw-r--r-- | src/database/rrdlabels.c | 470 |
1 files changed, 21 insertions, 449 deletions
diff --git a/src/database/rrdlabels.c b/src/database/rrdlabels.c index 65e2dc9e4..585b98264 100644 --- a/src/database/rrdlabels.c +++ b/src/database/rrdlabels.c @@ -88,464 +88,15 @@ static inline void STATS_MINUS_MEMORY(struct dictionary_stats *stats, size_t key __atomic_fetch_sub(&stats->memory.values, (long)value_size, __ATOMIC_RELAXED); } -// ---------------------------------------------------------------------------- -// labels sanitization - -/* - * All labels follow these rules: - * - * Character Symbol Values Names - * UTF-8 characters UTF-8 yes -> _ - * Lower case letter [a-z] yes yes - * Upper case letter [A-Z] yes -> [a-z] - * Digit [0-9] yes yes - * Underscore _ yes yes - * Minus - yes yes - * Plus + yes -> _ - * Colon : yes -> _ - * Semicolon ; -> : -> _ - * Equal = -> : -> _ - * Period . yes yes - * Comma , -> . -> . - * Slash / yes yes - * Backslash \ -> / -> / - * At @ yes -> _ - * Space yes -> _ - * Opening parenthesis ( yes -> _ - * Closing parenthesis ) yes -> _ - * anything else -> _ -> _ -* - * The above rules should allow users to set in tags (indicative): - * - * 1. hostnames and domain names as-is - * 2. email addresses as-is - * 3. floating point numbers, converted to always use a dot as the decimal point - * - * Leading and trailing spaces and control characters are removed from both label - * names and values. - * - * Multiple spaces inside the label name or the value are removed (only 1 is retained). - * In names spaces are also converted to underscores. - * - * Names that are only underscores are rejected (they do not enter the dictionary). - * - * The above rules do not require any conversion to be included in JSON strings. - * - * Label names and values are truncated to LABELS_MAX_LENGTH (200) characters. - * - * When parsing, label key and value are separated by the first colon (:) found. - * So label:value1:value2 is parsed as key = "label", value = "value1:value2" - * - * This means a label key cannot contain a colon (:) - it is converted to - * underscore if it does. - * - */ - #define RRDLABELS_MAX_NAME_LENGTH 200 #define RRDLABELS_MAX_VALUE_LENGTH 800 // 800 in bytes, up to 200 UTF-8 characters -static unsigned char label_spaces_char_map[256]; -static unsigned char label_names_char_map[256]; -static unsigned char label_values_char_map[256] = { - [0] = '\0', // - [1] = '_', // - [2] = '_', // - [3] = '_', // - [4] = '_', // - [5] = '_', // - [6] = '_', // - [7] = '_', // - [8] = '_', // - [9] = '_', // - [10] = '_', // - [11] = '_', // - [12] = '_', // - [13] = '_', // - [14] = '_', // - [15] = '_', // - [16] = '_', // - [17] = '_', // - [18] = '_', // - [19] = '_', // - [20] = '_', // - [21] = '_', // - [22] = '_', // - [23] = '_', // - [24] = '_', // - [25] = '_', // - [26] = '_', // - [27] = '_', // - [28] = '_', // - [29] = '_', // - [30] = '_', // - [31] = '_', // - [32] = ' ', // SPACE keep - [33] = '_', // ! - [34] = '_', // " - [35] = '_', // # - [36] = '_', // $ - [37] = '_', // % - [38] = '_', // & - [39] = '_', // ' - [40] = '(', // ( keep - [41] = ')', // ) keep - [42] = '_', // * - [43] = '+', // + keep - [44] = '.', // , convert , to . - [45] = '-', // - keep - [46] = '.', // . keep - [47] = '/', // / keep - [48] = '0', // 0 keep - [49] = '1', // 1 keep - [50] = '2', // 2 keep - [51] = '3', // 3 keep - [52] = '4', // 4 keep - [53] = '5', // 5 keep - [54] = '6', // 6 keep - [55] = '7', // 7 keep - [56] = '8', // 8 keep - [57] = '9', // 9 keep - [58] = ':', // : keep - [59] = ':', // ; convert ; to : - [60] = '_', // < - [61] = ':', // = convert = to : - [62] = '_', // > - [63] = '_', // ? - [64] = '@', // @ - [65] = 'A', // A keep - [66] = 'B', // B keep - [67] = 'C', // C keep - [68] = 'D', // D keep - [69] = 'E', // E keep - [70] = 'F', // F keep - [71] = 'G', // G keep - [72] = 'H', // H keep - [73] = 'I', // I keep - [74] = 'J', // J keep - [75] = 'K', // K keep - [76] = 'L', // L keep - [77] = 'M', // M keep - [78] = 'N', // N keep - [79] = 'O', // O keep - [80] = 'P', // P keep - [81] = 'Q', // Q keep - [82] = 'R', // R keep - [83] = 'S', // S keep - [84] = 'T', // T keep - [85] = 'U', // U keep - [86] = 'V', // V keep - [87] = 'W', // W keep - [88] = 'X', // X keep - [89] = 'Y', // Y keep - [90] = 'Z', // Z keep - [91] = '[', // [ keep - [92] = '/', // backslash convert \ to / - [93] = ']', // ] keep - [94] = '_', // ^ - [95] = '_', // _ keep - [96] = '_', // ` - [97] = 'a', // a keep - [98] = 'b', // b keep - [99] = 'c', // c keep - [100] = 'd', // d keep - [101] = 'e', // e keep - [102] = 'f', // f keep - [103] = 'g', // g keep - [104] = 'h', // h keep - [105] = 'i', // i keep - [106] = 'j', // j keep - [107] = 'k', // k keep - [108] = 'l', // l keep - [109] = 'm', // m keep - [110] = 'n', // n keep - [111] = 'o', // o keep - [112] = 'p', // p keep - [113] = 'q', // q keep - [114] = 'r', // r keep - [115] = 's', // s keep - [116] = 't', // t keep - [117] = 'u', // u keep - [118] = 'v', // v keep - [119] = 'w', // w keep - [120] = 'x', // x keep - [121] = 'y', // y keep - [122] = 'z', // z keep - [123] = '_', // { - [124] = '_', // | - [125] = '_', // } - [126] = '_', // ~ - [127] = '_', // - [128] = '_', // - [129] = '_', // - [130] = '_', // - [131] = '_', // - [132] = '_', // - [133] = '_', // - [134] = '_', // - [135] = '_', // - [136] = '_', // - [137] = '_', // - [138] = '_', // - [139] = '_', // - [140] = '_', // - [141] = '_', // - [142] = '_', // - [143] = '_', // - [144] = '_', // - [145] = '_', // - [146] = '_', // - [147] = '_', // - [148] = '_', // - [149] = '_', // - [150] = '_', // - [151] = '_', // - [152] = '_', // - [153] = '_', // - [154] = '_', // - [155] = '_', // - [156] = '_', // - [157] = '_', // - [158] = '_', // - [159] = '_', // - [160] = '_', // - [161] = '_', // - [162] = '_', // - [163] = '_', // - [164] = '_', // - [165] = '_', // - [166] = '_', // - [167] = '_', // - [168] = '_', // - [169] = '_', // - [170] = '_', // - [171] = '_', // - [172] = '_', // - [173] = '_', // - [174] = '_', // - [175] = '_', // - [176] = '_', // - [177] = '_', // - [178] = '_', // - [179] = '_', // - [180] = '_', // - [181] = '_', // - [182] = '_', // - [183] = '_', // - [184] = '_', // - [185] = '_', // - [186] = '_', // - [187] = '_', // - [188] = '_', // - [189] = '_', // - [190] = '_', // - [191] = '_', // - [192] = '_', // - [193] = '_', // - [194] = '_', // - [195] = '_', // - [196] = '_', // - [197] = '_', // - [198] = '_', // - [199] = '_', // - [200] = '_', // - [201] = '_', // - [202] = '_', // - [203] = '_', // - [204] = '_', // - [205] = '_', // - [206] = '_', // - [207] = '_', // - [208] = '_', // - [209] = '_', // - [210] = '_', // - [211] = '_', // - [212] = '_', // - [213] = '_', // - [214] = '_', // - [215] = '_', // - [216] = '_', // - [217] = '_', // - [218] = '_', // - [219] = '_', // - [220] = '_', // - [221] = '_', // - [222] = '_', // - [223] = '_', // - [224] = '_', // - [225] = '_', // - [226] = '_', // - [227] = '_', // - [228] = '_', // - [229] = '_', // - [230] = '_', // - [231] = '_', // - [232] = '_', // - [233] = '_', // - [234] = '_', // - [235] = '_', // - [236] = '_', // - [237] = '_', // - [238] = '_', // - [239] = '_', // - [240] = '_', // - [241] = '_', // - [242] = '_', // - [243] = '_', // - [244] = '_', // - [245] = '_', // - [246] = '_', // - [247] = '_', // - [248] = '_', // - [249] = '_', // - [250] = '_', // - [251] = '_', // - [252] = '_', // - [253] = '_', // - [254] = '_', // - [255] = '_' // -}; - -__attribute__((constructor)) void initialize_labels_keys_char_map(void) { - // copy the values char map to the names char map - size_t i; - for(i = 0; i < 256 ;i++) - label_names_char_map[i] = label_values_char_map[i]; - - // apply overrides to the label names map - label_names_char_map['='] = '_'; - label_names_char_map[':'] = '_'; - label_names_char_map['+'] = '_'; - label_names_char_map[';'] = '_'; - label_names_char_map['@'] = '_'; - label_names_char_map['('] = '_'; - label_names_char_map[')'] = '_'; - label_names_char_map[' '] = '_'; - label_names_char_map['\\'] = '/'; - - // create the space map - for(i = 0; i < 256 ;i++) - label_spaces_char_map[i] = (isspace(i) || iscntrl(i) || !isprint(i))?1:0; - -} - __attribute__((constructor)) void initialize_label_stats(void) { dictionary_stats_category_rrdlabels.memory.dict = 0; dictionary_stats_category_rrdlabels.memory.index = 0; dictionary_stats_category_rrdlabels.memory.values = 0; } -size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) { - if(unlikely(!src || !dst_size)) return 0; - - if(unlikely(!src || !*src)) { - strncpyz((char *)dst, empty, dst_size); - dst[dst_size - 1] = '\0'; - size_t len = strlen((char *)dst); - if(multibyte_length) *multibyte_length = len; - return len; - } - - unsigned char *d = dst; - - // make room for the final string termination - unsigned char *end = &d[dst_size - 1]; - - // copy while converting, but keep only one space - // we start wil last_is_space = 1 to skip leading spaces - int last_is_space = 1; - - size_t mblen = 0; - - while(*src && d < end) { - unsigned char c = *src; - - if(IS_UTF8_STARTBYTE(c) && IS_UTF8_BYTE(src[1]) && d + 2 < end) { - // UTF-8 multi-byte encoded character - - // find how big this character is (2-4 bytes) - size_t utf_character_size = 2; - while(utf_character_size < 4 && src[utf_character_size] && IS_UTF8_BYTE(src[utf_character_size]) && !IS_UTF8_STARTBYTE(src[utf_character_size])) - utf_character_size++; - - if(utf) { - while(utf_character_size) { - utf_character_size--; - *d++ = *src++; - } - } - else { - // UTF-8 characters are not allowed. - // Assume it is an underscore - // and skip all except the first byte - *d++ = '_'; - src += (utf_character_size - 1); - } - - last_is_space = 0; - mblen++; - continue; - } - - if(label_spaces_char_map[c]) { - // a space character - - if(!last_is_space) { - // add one space - *d++ = char_map[c]; - mblen++; - } - - last_is_space++; - } - else { - *d++ = char_map[c]; - last_is_space = 0; - mblen++; - } - - src++; - } - - // remove the last trailing space - if(last_is_space && d > dst) { - d--; - mblen--; - } - - // put a termination at the end of what we copied - *d = '\0'; - - // check if dst is all underscores and empty it if it is - if(*dst == '_') { - unsigned char *t = dst; - while (*t == '_') t++; - if (unlikely(*t == '\0')) { - *dst = '\0'; - mblen = 0; - } - } - - if(unlikely(*dst == '\0')) { - strncpyz((char *)dst, empty, dst_size); - dst[dst_size - 1] = '\0'; - mblen = strlen((char *)dst); - if(multibyte_length) *multibyte_length = mblen; - return mblen; - } - - if(multibyte_length) *multibyte_length = mblen; - - return d - dst; -} - -static inline size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) { - return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, "", NULL); -} - -static inline size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size) { - return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]", NULL); -} - // ---------------------------------------------------------------------------- // rrdlabels_create() @@ -886,6 +437,7 @@ void rrdlabels_get_value_to_buffer_or_unset(RRDLABELS *labels, BUFFER *wb, const RRDLABEL *lb; RRDLABEL_SRC ls; + bool set = false; lfe_start_read(labels, lb, ls) { if (lb->index.key == this_key) { @@ -893,10 +445,15 @@ void rrdlabels_get_value_to_buffer_or_unset(RRDLABELS *labels, BUFFER *wb, const buffer_strcat(wb, string2str(lb->index.value)); else buffer_strcat(wb, unset); + set = true; break; } } lfe_done(labels); + + if(!set) + buffer_strcat(wb, unset); + string_freez(this_key); } @@ -1601,6 +1158,9 @@ static int rrdlabels_unittest_add_pairs() { // test newlines errors += rrdlabels_unittest_add_a_pair(" tag = \t value \r\n", "tag", "value"); + // test spaces in names + errors += rrdlabels_unittest_add_a_pair(" t a g = value", "t_a_g", "value"); + // test : in values errors += rrdlabels_unittest_add_a_pair("tag=:value", "tag", ":value"); errors += rrdlabels_unittest_add_a_pair("tag::value", "tag", ":value"); @@ -1991,6 +1551,18 @@ int rrdlabels_unittest_sanitization() { // mixed multi-byte errors += rrdlabels_unittest_sanitize_value("Ű‱𩸽‱Ű", "Ű‱𩸽‱Ű"); + // invalid UTF8 No 1 + const unsigned char invalid1[] = { 0xC3, 0x28, 'A', 'B', 0x0 }; + errors += rrdlabels_unittest_sanitize_value((const char *)invalid1, "(AB"); + + // invalid UTF8 No 2 + const unsigned char invalid2[] = { 'A', 'B', 0xC3, 0x28, 'C', 'D', 0x0 }; + errors += rrdlabels_unittest_sanitize_value((const char *)invalid2, "AB (CD"); + + // invalid UTF8 No 3 + const unsigned char invalid3[] = { 'A', 'B', 0xC3, 0x28, 0x0 }; + errors += rrdlabels_unittest_sanitize_value((const char *)invalid3, "AB ("); + return errors; } |