diff options
Diffstat (limited to '')
67 files changed, 10667 insertions, 10262 deletions
diff --git a/database/engine/Makefile.am b/database/engine/Makefile.am index 43405001d..59250a997 100644 --- a/database/engine/Makefile.am +++ b/database/engine/Makefile.am @@ -4,7 +4,6 @@ AUTOMAKE_OPTIONS = subdir-objects MAINTAINERCLEANFILES = $(srcdir)/Makefile.in SUBDIRS = \ - metadata_log \ $(NULL) dist_noinst_DATA = \ diff --git a/database/engine/datafile.c b/database/engine/datafile.c index 2ed98ef88..9c70068d9 100644 --- a/database/engine/datafile.c +++ b/database/engine/datafile.c @@ -174,7 +174,7 @@ int create_data_file(struct rrdengine_datafile *datafile) rrd_stat_atomic_add(&global_io_errors, 1); } uv_fs_req_cleanup(&req); - free(superblock); + posix_memfree(superblock); if (ret < 0) { destroy_data_file(datafile); return ret; @@ -218,7 +218,7 @@ static int check_data_file_superblock(uv_file file) ret = 0; } error: - free(superblock); + posix_memfree(superblock); return ret; } @@ -444,44 +444,17 @@ void finalize_data_files(struct rrdengine_instance *ctx) struct rrdengine_journalfile *journalfile; struct extent_info *extent, *next_extent; - size_t extents_number = 0; - size_t extents_bytes = 0; - size_t page_compressed_sizes = 0; - - size_t files_number = 0; - size_t files_bytes = 0; - for (datafile = ctx->datafiles.first ; datafile != NULL ; datafile = next_datafile) { journalfile = datafile->journalfile; next_datafile = datafile->next; for (extent = datafile->extents.first ; extent != NULL ; extent = next_extent) { - extents_number++; - extents_bytes += sizeof(*extent) + sizeof(struct rrdeng_page_descr *) * extent->number_of_pages; - page_compressed_sizes += extent->size; - next_extent = extent->next; freez(extent); } close_journal_file(journalfile, datafile); close_data_file(datafile); - - files_number++; - files_bytes += sizeof(*journalfile) + sizeof(*datafile); - freez(journalfile); freez(datafile); } - - if(!files_number) files_number = 1; - if(!extents_number) extents_number = 1; - - info("DBENGINE STATISTICS ON DATAFILES:" - " Files %zu, structures %zu bytes, %0.2f bytes per file." - " Extents %zu, structures %zu bytes, %0.2f bytes per extent." - " Compressed size of all pages: %zu bytes." - , files_number, files_bytes, (double)files_bytes/files_number - , extents_number, extents_bytes, (double)extents_bytes/extents_number - , page_compressed_sizes - ); } diff --git a/database/engine/datafile.h b/database/engine/datafile.h index ae94bfdd0..1cf256aff 100644 --- a/database/engine/datafile.h +++ b/database/engine/datafile.h @@ -52,16 +52,16 @@ struct rrdengine_datafile_list { struct rrdengine_datafile *last; /* newest */ }; -extern void df_extent_insert(struct extent_info *extent); -extern void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); -extern void datafile_list_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); -extern void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen); -extern int close_data_file(struct rrdengine_datafile *datafile); -extern int unlink_data_file(struct rrdengine_datafile *datafile); -extern int destroy_data_file(struct rrdengine_datafile *datafile); -extern int create_data_file(struct rrdengine_datafile *datafile); -extern int create_new_datafile_pair(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno); -extern int init_data_files(struct rrdengine_instance *ctx); -extern void finalize_data_files(struct rrdengine_instance *ctx); +void df_extent_insert(struct extent_info *extent); +void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); +void datafile_list_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile); +void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen); +int close_data_file(struct rrdengine_datafile *datafile); +int unlink_data_file(struct rrdengine_datafile *datafile); +int destroy_data_file(struct rrdengine_datafile *datafile); +int create_data_file(struct rrdengine_datafile *datafile); +int create_new_datafile_pair(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno); +int init_data_files(struct rrdengine_instance *ctx); +void finalize_data_files(struct rrdengine_instance *ctx); #endif /* NETDATA_DATAFILE_H */
\ No newline at end of file diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c index dc61f569d..500dd7880 100644 --- a/database/engine/journalfile.c +++ b/database/engine/journalfile.c @@ -17,7 +17,7 @@ static void flush_transaction_buffer_cb(uv_fs_t* req) } uv_fs_req_cleanup(req); - free(io_descr->buf); + posix_memfree(io_descr->buf); freez(io_descr); } @@ -225,7 +225,7 @@ int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdeng rrd_stat_atomic_add(&global_io_errors, 1); } uv_fs_req_cleanup(&req); - free(superblock); + posix_memfree(superblock); if (ret < 0) { destroy_journal_file(journalfile, datafile); return ret; @@ -268,7 +268,7 @@ static int check_journal_file_superblock(uv_file file) ret = 0; } error: - free(superblock); + posix_memfree(superblock); return ret; } @@ -311,20 +311,46 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden } continue; } - uint64_t start_time = jf_metric_data->descr[i].start_time; - uint64_t end_time = jf_metric_data->descr[i].end_time; + uint64_t start_time_ut = jf_metric_data->descr[i].start_time_ut; + uint64_t end_time_ut = jf_metric_data->descr[i].end_time_ut; + size_t entries = jf_metric_data->descr[i].page_length / page_type_size[page_type]; + time_t update_every_s = (entries > 1) ? ((end_time_ut - start_time_ut) / USEC_PER_SEC / (entries - 1)) : 0; + + if (unlikely(start_time_ut > end_time_ut)) { + ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut = end_time_ut; + continue; + } - if (unlikely(start_time > end_time)) { - error("Invalid page encountered, start time %lu > end time %lu", start_time , end_time ); + if (unlikely(start_time_ut == end_time_ut && entries != 1)) { + ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut = end_time_ut; continue; } - if (unlikely(start_time == end_time)) { - size_t entries = jf_metric_data->descr[i].page_length / page_type_size[page_type]; - if (unlikely(entries > 1)) { - error("Invalid page encountered, start time %lu = end time but %zu entries were found", start_time, entries); - continue; - } + if (unlikely(!entries)) { + ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut = end_time_ut; + continue; + } + + if(entries > 1 && update_every_s == 0) { + ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut = end_time_ut; + continue; + } + + if(start_time_ut + update_every_s * USEC_PER_SEC * (entries - 1) != end_time_ut) { + ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter++; + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut < end_time_ut) + ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut = end_time_ut; + + // let this be + // end_time_ut = start_time_ut + update_every_s * USEC_PER_SEC * (entries - 1); } temp_id = (uuid_t *)jf_metric_data->descr[i].uuid; @@ -340,7 +366,7 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden uv_rwlock_wrlock(&pg_cache->metrics_index.lock); PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t), PJE0); fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */ - *PValue = page_index = create_page_index(temp_id); + *PValue = page_index = create_page_index(temp_id, ctx); page_index->prev = pg_cache->metrics_index.last_page_index; pg_cache->metrics_index.last_page_index = page_index; uv_rwlock_wrunlock(&pg_cache->metrics_index.lock); @@ -348,21 +374,32 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden descr = pg_cache_create_descr(); descr->page_length = jf_metric_data->descr[i].page_length; - descr->start_time = start_time; - descr->end_time = end_time; + descr->start_time_ut = start_time_ut; + descr->end_time_ut = end_time_ut; + descr->update_every_s = (update_every_s > 0) ? (uint32_t)update_every_s : (page_index->latest_update_every_s); descr->id = &page_index->id; descr->extent = extent; descr->type = page_type; extent->pages[valid_pages++] = descr; pg_cache_insert(ctx, page_index, descr); + + if(page_index->latest_time_ut == descr->end_time_ut) + page_index->latest_update_every_s = descr->update_every_s; + + if(descr->update_every_s == 0) + fatal( + "DBENGINE: page descriptor update every is zero, end_time_ut = %llu, start_time_ut = %llu, entries = %zu", + (unsigned long long)end_time_ut, (unsigned long long)start_time_ut, entries); } extent->number_of_pages = valid_pages; if (likely(valid_pages)) df_extent_insert(extent); - else + else { freez(extent); + ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter++; + } } /* @@ -442,27 +479,30 @@ static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrde //data_file_size = journalfile->datafile->pos; TODO: utilize this? max_id = 1; - ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); - if (unlikely(ret)) { - fatal("posix_memalign:%s", strerror(ret)); + bool journal_is_mmapped = (journalfile->data != NULL); + if (unlikely(!journal_is_mmapped)) { + ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); + if (unlikely(ret)) + fatal("posix_memalign:%s", strerror(ret)); } - + else + buf = journalfile->data + sizeof(struct rrdeng_jf_sb); for (pos = sizeof(struct rrdeng_jf_sb) ; pos < file_size ; pos += READAHEAD_BYTES) { size_bytes = MIN(READAHEAD_BYTES, file_size - pos); - iov = uv_buf_init(buf, size_bytes); - ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL); - if (ret < 0) { - error("uv_fs_read: pos=%"PRIu64", %s", pos, uv_strerror(ret)); + if (unlikely(!journal_is_mmapped)) { + iov = uv_buf_init(buf, size_bytes); + ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL); + if (ret < 0) { + error("uv_fs_read: pos=%" PRIu64 ", %s", pos, uv_strerror(ret)); + uv_fs_req_cleanup(&req); + goto skip_file; + } + fatal_assert(req.result >= 0); uv_fs_req_cleanup(&req); - goto skip_file; + ++ctx->stats.io_read_requests; + ctx->stats.io_read_bytes += size_bytes; } - fatal_assert(req.result >= 0); - uv_fs_req_cleanup(&req); - ctx->stats.io_read_bytes += size_bytes; - ++ctx->stats.io_read_requests; - //pos_i = pos; - //while (pos_i < pos + size_bytes) { for (pos_i = 0 ; pos_i < size_bytes ; ) { unsigned max_size; @@ -475,9 +515,12 @@ static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrde pos_i += ret; max_id = MAX(max_id, id); } + if (likely(journal_is_mmapped)) + buf += size_bytes; } skip_file: - free(buf); + if (unlikely(!journal_is_mmapped)) + posix_memfree(buf); return max_id; } @@ -512,12 +555,16 @@ int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfi journalfile->file = file; journalfile->pos = file_size; + journalfile->data = netdata_mmap(path, file_size, MAP_SHARED, 0); + info("Loading journal file \"%s\" using %s.", path, journalfile->data?"MMAP":"uv_fs_read"); max_id = iterate_transactions(ctx, journalfile); ctx->commit_log.transaction_id = MAX(ctx->commit_log.transaction_id, max_id + 1); info("Journal file \"%s\" loaded (size:%"PRIu64").", path, file_size); + if (likely(journalfile->data)) + netdata_munmap(journalfile->data, file_size); return 0; error: diff --git a/database/engine/journalfile.h b/database/engine/journalfile.h index f6c43cd16..011c5065f 100644 --- a/database/engine/journalfile.h +++ b/database/engine/journalfile.h @@ -19,7 +19,7 @@ struct rrdengine_journalfile; struct rrdengine_journalfile { uv_file file; uint64_t pos; - + void *data; struct rrdengine_datafile *datafile; }; @@ -33,17 +33,17 @@ struct transaction_commit_log { unsigned buf_size; }; -extern void generate_journalfilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen); -extern void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); -extern void *wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size); -extern void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc); -extern int close_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); -extern int unlink_journal_file(struct rrdengine_journalfile *journalfile); -extern int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); -extern int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); -extern int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, +void generate_journalfilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen); +void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +void *wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size); +void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc); +int close_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +int unlink_journal_file(struct rrdengine_journalfile *journalfile); +int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); +int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile); -extern void init_commit_log(struct rrdengine_instance *ctx); +void init_commit_log(struct rrdengine_instance *ctx); #endif /* NETDATA_JOURNALFILE_H */
\ No newline at end of file diff --git a/database/engine/metadata_log/Makefile.am b/database/engine/metadata_log/Makefile.am deleted file mode 100644 index 161784b8f..000000000 --- a/database/engine/metadata_log/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in - -dist_noinst_DATA = \ - README.md \ - $(NULL) diff --git a/database/engine/metadata_log/compaction.c b/database/engine/metadata_log/compaction.c deleted file mode 100644 index ba19e1edf..000000000 --- a/database/engine/metadata_log/compaction.c +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_RRD_INTERNALS - -#include "metadatalog.h" - -/* Return 0 on success. */ -int compaction_failure_recovery(struct metalog_instance *ctx, struct metadata_logfile **metalogfiles, - unsigned *matched_files) -{ - int ret; - unsigned starting_fileno, fileno, i, j, recovered_files; - struct metadata_logfile *metalogfile = NULL, *compactionfile = NULL, **tmp_metalogfiles; - char *dbfiles_path = ctx->rrdeng_ctx->dbfiles_path; - - for (i = 0 ; i < *matched_files ; ++i) { - metalogfile = metalogfiles[i]; - if (0 == metalogfile->starting_fileno) - continue; /* skip standard metadata log files */ - break; /* this is a compaction temporary file */ - } - if (i == *matched_files) /* no recovery needed */ - return 0; - info("Starting metadata log file failure recovery procedure in \"%s\".", dbfiles_path); - - if (*matched_files - i > 1) { /* Can't have more than 1 temporary compaction files */ - error("Metadata log files are in an invalid state. Cannot proceed."); - return 1; - } - compactionfile = metalogfile; - starting_fileno = compactionfile->starting_fileno; - fileno = compactionfile->fileno; - /* scratchpad space to move file pointers around */ - tmp_metalogfiles = callocz(*matched_files, sizeof(*tmp_metalogfiles)); - - for (j = 0, recovered_files = 0 ; j < i ; ++j) { - metalogfile = metalogfiles[j]; - fatal_assert(0 == metalogfile->starting_fileno); - if (metalogfile->fileno < starting_fileno) { - tmp_metalogfiles[recovered_files++] = metalogfile; - continue; - } - break; /* reached compaction file serial number */ - } - - if ((j == i) /* Shouldn't be possible, invalid compaction temporary file */ || - (metalogfile->fileno == starting_fileno && metalogfile->fileno == fileno)) { - error("Deleting invalid compaction temporary file \"%s/"METALOG_PREFIX METALOG_FILE_NUMBER_PRINT_TMPL - METALOG_EXTENSION"\"", dbfiles_path, starting_fileno, fileno); - unlink_metadata_logfile(compactionfile); - freez(compactionfile); - freez(tmp_metalogfiles); - --*matched_files; /* delete the last one */ - - info("Finished metadata log file failure recovery procedure in \"%s\".", dbfiles_path); - return 0; - } - - for ( ; j < i ; ++j) { /* continue iterating through normal metadata log files */ - metalogfile = metalogfiles[j]; - fatal_assert(0 == metalogfile->starting_fileno); - if (metalogfile->fileno < fileno) { /* It has already been compacted */ - error("Deleting invalid metadata log file \"%s/"METALOG_PREFIX METALOG_FILE_NUMBER_PRINT_TMPL - METALOG_EXTENSION"\"", dbfiles_path, 0U, metalogfile->fileno); - unlink_metadata_logfile(metalogfile); - freez(metalogfile); - continue; - } - tmp_metalogfiles[recovered_files++] = metalogfile; - } - - /* compaction temporary file is valid */ - tmp_metalogfiles[recovered_files++] = compactionfile; - ret = rename_metadata_logfile(compactionfile, 0, starting_fileno); - if (ret < 0) { - error("Cannot rename temporary compaction files. Cannot proceed."); - freez(tmp_metalogfiles); - return 1; - } - - memcpy(metalogfiles, tmp_metalogfiles, recovered_files * sizeof(*metalogfiles)); - *matched_files = recovered_files; - freez(tmp_metalogfiles); - - info("Finished metadata log file failure recovery procedure in \"%s\".", dbfiles_path); - return 0; -} diff --git a/database/engine/metadata_log/compaction.h b/database/engine/metadata_log/compaction.h deleted file mode 100644 index d04613440..000000000 --- a/database/engine/metadata_log/compaction.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_COMPACTION_H -#define NETDATA_COMPACTION_H - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include "../rrdengine.h" - -extern int compaction_failure_recovery(struct metalog_instance *ctx, struct metadata_logfile **metalogfiles, - unsigned *matched_files); - -#endif /* NETDATA_COMPACTION_H */ diff --git a/database/engine/metadata_log/logfile.c b/database/engine/metadata_log/logfile.c deleted file mode 100644 index 07eb9b6fe..000000000 --- a/database/engine/metadata_log/logfile.c +++ /dev/null @@ -1,447 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later -#include <database/sqlite/sqlite_functions.h> -#include "metadatalog.h" -#include "metalogpluginsd.h" - - -void generate_metadata_logfile_path(struct metadata_logfile *metalogfile, char *str, size_t maxlen) -{ - (void) snprintfz(str, maxlen, "%s/" METALOG_PREFIX METALOG_FILE_NUMBER_PRINT_TMPL METALOG_EXTENSION, - metalogfile->ctx->rrdeng_ctx->dbfiles_path, metalogfile->starting_fileno, metalogfile->fileno); -} - -void metadata_logfile_init(struct metadata_logfile *metalogfile, struct metalog_instance *ctx, unsigned starting_fileno, - unsigned fileno) -{ - metalogfile->starting_fileno = starting_fileno; - metalogfile->fileno = fileno; - metalogfile->file = (uv_file)0; - metalogfile->pos = 0; - metalogfile->next = NULL; - metalogfile->ctx = ctx; -} - -int rename_metadata_logfile(struct metadata_logfile *metalogfile, unsigned new_starting_fileno, unsigned new_fileno) -{ - //struct metalog_instance *ctx = metalogfile->ctx; - uv_fs_t req; - int ret; - char oldpath[RRDENG_PATH_MAX], newpath[RRDENG_PATH_MAX]; - unsigned backup_starting_fileno, backup_fileno; - - backup_starting_fileno = metalogfile->starting_fileno; - backup_fileno = metalogfile->fileno; - generate_metadata_logfile_path(metalogfile, oldpath, sizeof(oldpath)); - metalogfile->starting_fileno = new_starting_fileno; - metalogfile->fileno = new_fileno; - generate_metadata_logfile_path(metalogfile, newpath, sizeof(newpath)); - - info("Renaming metadata log file \"%s\" to \"%s\".", oldpath, newpath); - ret = uv_fs_rename(NULL, &req, oldpath, newpath, NULL); - if (ret < 0) { - error("uv_fs_rename(%s): %s", oldpath, uv_strerror(ret)); - //++ctx->stats.fs_errors; /* this is racy, may miss some errors */ - rrd_stat_atomic_add(&global_fs_errors, 1); - /* restore previous values */ - metalogfile->starting_fileno = backup_starting_fileno; - metalogfile->fileno = backup_fileno; - } - uv_fs_req_cleanup(&req); - - return ret; -} - -int unlink_metadata_logfile(struct metadata_logfile *metalogfile) -{ - //struct metalog_instance *ctx = metalogfile->ctx; - uv_fs_t req; - int ret; - char path[RRDENG_PATH_MAX]; - - generate_metadata_logfile_path(metalogfile, path, sizeof(path)); - - ret = uv_fs_unlink(NULL, &req, path, NULL); - if (ret < 0) { - error("uv_fs_fsunlink(%s): %s", path, uv_strerror(ret)); -// ++ctx->stats.fs_errors; - rrd_stat_atomic_add(&global_fs_errors, 1); - } - uv_fs_req_cleanup(&req); - - return ret; -} - -static int check_metadata_logfile_superblock(uv_file file) -{ - int ret; - struct rrdeng_metalog_sb *superblock; - uv_buf_t iov; - uv_fs_t req; - - ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); - if (unlikely(ret)) { - fatal("posix_memalign:%s", strerror(ret)); - } - iov = uv_buf_init((void *)superblock, sizeof(*superblock)); - - ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL); - if (ret < 0) { - error("uv_fs_read: %s", uv_strerror(ret)); - uv_fs_req_cleanup(&req); - goto error; - } - fatal_assert(req.result >= 0); - uv_fs_req_cleanup(&req); - - if (strncmp(superblock->magic_number, RRDENG_METALOG_MAGIC, RRDENG_MAGIC_SZ)) { - error("File has invalid superblock."); - ret = UV_EINVAL; - } else { - ret = 0; - } - if (superblock->version > RRDENG_METALOG_VER) { - error("File has unknown version %"PRIu16". Compatibility is not guaranteed.", superblock->version); - } -error: - free(superblock); - return ret; -} - -void replay_record(struct metadata_logfile *metalogfile, struct rrdeng_metalog_record_header *header, void *payload) -{ - struct metalog_instance *ctx = metalogfile->ctx; - char *line, *nextline, *record_end; - int ret; - - debug(D_METADATALOG, "RECORD contents: %.*s", (int)header->payload_length, (char *)payload); - record_end = (char *)payload + header->payload_length - 1; - *record_end = '\0'; - - for (line = payload ; line ; line = nextline) { - nextline = strchr(line, '\n'); - if (nextline) { - *nextline++ = '\0'; - } - ret = parser_action(ctx->metalog_parser_object->parser, line); - debug(D_METADATALOG, "parser_action ret:%d", ret); - if (ret) - return; /* skip record due to error */ - }; -} - -/* This function only works with buffered I/O */ -static inline int metalogfile_read(struct metadata_logfile *metalogfile, void *buf, size_t len, uint64_t offset) -{ -// struct metalog_instance *ctx; - uv_file file; - uv_buf_t iov; - uv_fs_t req; - int ret; - -// ctx = metalogfile->ctx; - file = metalogfile->file; - iov = uv_buf_init(buf, len); - ret = uv_fs_read(NULL, &req, file, &iov, 1, offset, NULL); - if (unlikely(ret < 0 && ret != req.result)) { - fatal("uv_fs_read: %s", uv_strerror(ret)); - } - if (req.result < 0) { -// ++ctx->stats.io_errors; - rrd_stat_atomic_add(&global_io_errors, 1); - error("%s: uv_fs_read - %s - record at offset %"PRIu64"(%u) in metadata logfile %u-%u.", __func__, - uv_strerror((int)req.result), offset, (unsigned)len, metalogfile->starting_fileno, metalogfile->fileno); - } - uv_fs_req_cleanup(&req); -// ctx->stats.io_read_bytes += len; -// ++ctx->stats.io_read_requests; - - return ret; -} - -/* Return 0 on success */ -static int metadata_record_integrity_check(void *record) -{ - int ret; - uint32_t data_size; - struct rrdeng_metalog_record_header *header; - struct rrdeng_metalog_record_trailer *trailer; - uLong crc; - - header = record; - data_size = header->header_length + header->payload_length; - trailer = record + data_size; - - crc = crc32(0L, Z_NULL, 0); - crc = crc32(crc, record, data_size); - ret = crc32cmp(trailer->checksum, crc); - - return ret; -} - -#define MAX_READ_BYTES (RRDENG_BLOCK_SIZE * 32) /* no record should be over 128KiB in this version */ - -/* - * Iterates metadata log file records and creates database objects (host/chart/dimension) - */ -static void iterate_records(struct metadata_logfile *metalogfile) -{ - uint32_t file_size, pos, bytes_remaining, record_size; - void *buf; - struct rrdeng_metalog_record_header *header; - struct metalog_instance *ctx = metalogfile->ctx; - struct metalog_pluginsd_state *state = ctx->metalog_parser_object->private; - const size_t min_header_size = offsetof(struct rrdeng_metalog_record_header, header_length) + - sizeof(header->header_length); - - file_size = metalogfile->pos; - state->metalogfile = metalogfile; - - buf = mallocz(MAX_READ_BYTES); - - for (pos = sizeof(struct rrdeng_metalog_sb) ; pos < file_size ; pos += record_size) { - bytes_remaining = file_size - pos; - if (bytes_remaining < min_header_size) { - error("%s: unexpected end of file in metadata logfile %u-%u.", __func__, metalogfile->starting_fileno, - metalogfile->fileno); - break; - } - if (metalogfile_read(metalogfile, buf, min_header_size, pos) < 0) - break; - header = (struct rrdeng_metalog_record_header *)buf; - if (METALOG_STORE_PADDING == header->type) { - info("%s: Skipping padding in metadata logfile %u-%u.", __func__, metalogfile->starting_fileno, - metalogfile->fileno); - record_size = ALIGN_BYTES_FLOOR(pos + RRDENG_BLOCK_SIZE) - pos; - continue; - } - if (metalogfile_read(metalogfile, buf + min_header_size, sizeof(*header) - min_header_size, - pos + min_header_size) < 0) - break; - record_size = header->header_length + header->payload_length + sizeof(struct rrdeng_metalog_record_trailer); - if (header->header_length < min_header_size || record_size > bytes_remaining) { - error("%s: Corrupted record in metadata logfile %u-%u.", __func__, metalogfile->starting_fileno, - metalogfile->fileno); - break; - } - if (record_size > MAX_READ_BYTES) { - error("%s: Record is too long (%u bytes) in metadata logfile %u-%u.", __func__, record_size, - metalogfile->starting_fileno, metalogfile->fileno); - continue; - } - if (metalogfile_read(metalogfile, buf + sizeof(*header), record_size - sizeof(*header), - pos + sizeof(*header)) < 0) - break; - if (metadata_record_integrity_check(buf)) { - error("%s: Record at offset %"PRIu32" was read from disk. CRC32 check: FAILED", __func__, pos); - continue; - } - debug(D_METADATALOG, "%s: Record at offset %"PRIu32" was read from disk. CRC32 check: SUCCEEDED", __func__, - pos); - - replay_record(metalogfile, header, buf + header->header_length); - } - - freez(buf); -} - -int load_metadata_logfile(struct metalog_instance *ctx, struct metadata_logfile *metalogfile) -{ - UNUSED(ctx); - uv_fs_t req; - uv_file file; - int ret, fd, error; - uint64_t file_size; - char path[RRDENG_PATH_MAX]; - - generate_metadata_logfile_path(metalogfile, path, sizeof(path)); - if (file_is_migrated(path)) - return 0; - - fd = open_file_buffered_io(path, O_RDWR, &file); - if (fd < 0) { -// ++ctx->stats.fs_errors; - rrd_stat_atomic_add(&global_fs_errors, 1); - return fd; - } - info("Loading metadata log \"%s\".", path); - - ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_metalog_sb)); - if (ret) - goto error; - - ret = check_metadata_logfile_superblock(file); - if (ret) - goto error; -// ctx->stats.io_read_bytes += sizeof(struct rrdeng_jf_sb); -// ++ctx->stats.io_read_requests; - - metalogfile->file = file; - metalogfile->pos = file_size; - - iterate_records(metalogfile); - - info("Metadata log \"%s\" migrated to the database (size:%"PRIu64").", path, file_size); - add_migrated_file(path, file_size); - return 0; - -error: - error = ret; - ret = uv_fs_close(NULL, &req, file, NULL); - if (ret < 0) { - error("uv_fs_close(%s): %s", path, uv_strerror(ret)); -// ++ctx->stats.fs_errors; - rrd_stat_atomic_add(&global_fs_errors, 1); - } - uv_fs_req_cleanup(&req); - return error; -} - -static int scan_metalog_files_cmp(const void *a, const void *b) -{ - struct metadata_logfile *file1, *file2; - char path1[RRDENG_PATH_MAX], path2[RRDENG_PATH_MAX]; - - file1 = *(struct metadata_logfile **)a; - file2 = *(struct metadata_logfile **)b; - generate_metadata_logfile_path(file1, path1, sizeof(path1)); - generate_metadata_logfile_path(file2, path2, sizeof(path2)); - return strcmp(path1, path2); -} - -/* Returns number of metadata logfiles that were loaded or < 0 on error */ -static int scan_metalog_files(struct metalog_instance *ctx) -{ - int ret; - unsigned starting_no, no, matched_files, i, failed_to_load; - static uv_fs_t req; - uv_dirent_t dent; - struct metadata_logfile **metalogfiles, *metalogfile; - char *dbfiles_path = ctx->rrdeng_ctx->dbfiles_path; - - ret = uv_fs_scandir(NULL, &req, dbfiles_path, 0, NULL); - if (ret < 0) { - fatal_assert(req.result < 0); - uv_fs_req_cleanup(&req); - error("uv_fs_scandir(%s): %s", dbfiles_path, uv_strerror(ret)); -// ++ctx->stats.fs_errors; - rrd_stat_atomic_add(&global_fs_errors, 1); - return ret; - } - info("Found %d files in path %s", ret, dbfiles_path); - - metalogfiles = callocz(MIN(ret, MAX_DATAFILES), sizeof(*metalogfiles)); - for (matched_files = 0 ; UV_EOF != uv_fs_scandir_next(&req, &dent) && matched_files < MAX_DATAFILES ; ) { - info("Scanning file \"%s/%s\"", dbfiles_path, dent.name); - ret = sscanf(dent.name, METALOG_PREFIX METALOG_FILE_NUMBER_SCAN_TMPL METALOG_EXTENSION, &starting_no, &no); - if (2 == ret) { - info("Matched file \"%s/%s\"", dbfiles_path, dent.name); - metalogfile = mallocz(sizeof(*metalogfile)); - metadata_logfile_init(metalogfile, ctx, starting_no, no); - metalogfiles[matched_files++] = metalogfile; - } - } - uv_fs_req_cleanup(&req); - - if (0 == matched_files) { - freez(metalogfiles); - return 0; - } - if (matched_files == MAX_DATAFILES) { - error("Warning: hit maximum database engine file limit of %d files", MAX_DATAFILES); - } - qsort(metalogfiles, matched_files, sizeof(*metalogfiles), scan_metalog_files_cmp); - ret = compaction_failure_recovery(ctx, metalogfiles, &matched_files); - if (ret) { /* If the files are corrupted fail */ - for (i = 0 ; i < matched_files ; ++i) { - freez(metalogfiles[i]); - } - freez(metalogfiles); - return UV_EINVAL; - } - //ctx->last_fileno = metalogfiles[matched_files - 1]->fileno; - - struct plugind cd = { - .enabled = 1, - .update_every = 0, - .pid = 0, - .serial_failures = 0, - .successful_collections = 0, - .obsolete = 0, - .started_t = INVALID_TIME, - .next = NULL, - .version = 0, - }; - - struct metalog_pluginsd_state metalog_parser_state; - metalog_pluginsd_state_init(&metalog_parser_state, ctx); - - PARSER_USER_OBJECT metalog_parser_object = { - .enabled = cd.enabled, - .host = ctx->rrdeng_ctx->host, - .cd = &cd, - .trust_durations = 0, - .private = &metalog_parser_state - }; - - PARSER *parser = parser_init(metalog_parser_object.host, &metalog_parser_object, NULL, PARSER_INPUT_SPLIT); - parser_add_keyword(parser, PLUGINSD_KEYWORD_HOST, metalog_pluginsd_host); - parser_add_keyword(parser, PLUGINSD_KEYWORD_GUID, pluginsd_guid); - parser_add_keyword(parser, PLUGINSD_KEYWORD_CONTEXT, pluginsd_context); - parser_add_keyword(parser, PLUGINSD_KEYWORD_TOMBSTONE, pluginsd_tombstone); - parser->plugins_action->dimension_action = &metalog_pluginsd_dimension_action; - parser->plugins_action->chart_action = &metalog_pluginsd_chart_action; - parser->plugins_action->guid_action = &metalog_pluginsd_guid_action; - parser->plugins_action->context_action = &metalog_pluginsd_context_action; - parser->plugins_action->tombstone_action = &metalog_pluginsd_tombstone_action; - parser->plugins_action->host_action = &metalog_pluginsd_host_action; - - - metalog_parser_object.parser = parser; - ctx->metalog_parser_object = &metalog_parser_object; - - for (failed_to_load = 0, i = 0 ; i < matched_files ; ++i) { - metalogfile = metalogfiles[i]; - db_lock(); - db_execute("BEGIN TRANSACTION;"); - ret = load_metadata_logfile(ctx, metalogfile); - if (0 != ret) { - error("Deleting invalid metadata log file \"%s/"METALOG_PREFIX METALOG_FILE_NUMBER_PRINT_TMPL - METALOG_EXTENSION"\"", dbfiles_path, metalogfile->starting_fileno, metalogfile->fileno); - unlink_metadata_logfile(metalogfile); - ++failed_to_load; - db_execute("ROLLBACK TRANSACTION;"); - } - else - db_execute("COMMIT TRANSACTION;"); - db_unlock(); - freez(metalogfile); - } - matched_files -= failed_to_load; - debug(D_METADATALOG, "PARSER ended"); - - parser_destroy(parser); - - size_t count __maybe_unused = metalog_parser_object.count; - - debug(D_METADATALOG, "Parsing count=%u", (unsigned)count); - - freez(metalogfiles); - return matched_files; -} - -/* Return 0 on success. */ -int init_metalog_files(struct metalog_instance *ctx) -{ - int ret; - char *dbfiles_path = ctx->rrdeng_ctx->dbfiles_path; - - ret = scan_metalog_files(ctx); - if (ret < 0) { - error("Failed to scan path \"%s\".", dbfiles_path); - return ret; - }/* else if (0 == ret) { - ctx->last_fileno = 1; - }*/ - - return 0; -} diff --git a/database/engine/metadata_log/logfile.h b/database/engine/metadata_log/logfile.h deleted file mode 100644 index df12ac714..000000000 --- a/database/engine/metadata_log/logfile.h +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_LOGFILE_H -#define NETDATA_LOGFILE_H - -#include "metadatalogprotocol.h" -#include "../rrdengine.h" - -/* Forward declarations */ -struct metadata_logfile; -struct metalog_worker_config; - -#define METALOG_PREFIX "metadatalog-" -#define METALOG_EXTENSION ".mlf" - -/* only one event loop is supported for now */ -struct metadata_logfile { - unsigned fileno; /* Starts at 1 */ - unsigned starting_fileno; /* 0 for normal files, staring number during compaction */ - uv_file file; - uint64_t pos; - struct metalog_instance *ctx; - struct metadata_logfile *next; -}; - -struct metadata_logfile_list { - struct metadata_logfile *first; /* oldest */ - struct metadata_logfile *last; /* newest */ -}; - -extern void generate_metadata_logfile_path(struct metadata_logfile *metadatalog, char *str, size_t maxlen); -extern int rename_metadata_logfile(struct metadata_logfile *metalogfile, unsigned new_starting_fileno, - unsigned new_fileno); -extern int unlink_metadata_logfile(struct metadata_logfile *metalogfile); -extern int load_metadata_logfile(struct metalog_instance *ctx, struct metadata_logfile *logfile); -extern int init_metalog_files(struct metalog_instance *ctx); - - -#endif /* NETDATA_LOGFILE_H */ diff --git a/database/engine/metadata_log/metadatalog.h b/database/engine/metadata_log/metadatalog.h deleted file mode 100644 index 483036a91..000000000 --- a/database/engine/metadata_log/metadatalog.h +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_METADATALOG_H -#define NETDATA_METADATALOG_H - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include "../rrdengine.h" -#include "metadatalogprotocol.h" -#include "logfile.h" -#include "metadatalogapi.h" -#include "compaction.h" - -/* Forward declarations */ -struct metalog_instance; -struct parser_user_object; - -#define METALOG_FILE_NUMBER_SCAN_TMPL "%5u-%5u" -#define METALOG_FILE_NUMBER_PRINT_TMPL "%5.5u-%5.5u" - -struct metalog_instance { - struct rrdengine_instance *rrdeng_ctx; - struct parser_user_object *metalog_parser_object; - uint8_t initialized; /* set to 1 to mark context initialized */ -}; - -#endif /* NETDATA_METADATALOG_H */ diff --git a/database/engine/metadata_log/metadatalogapi.c b/database/engine/metadata_log/metadatalogapi.c deleted file mode 100755 index b206cca05..000000000 --- a/database/engine/metadata_log/metadatalogapi.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_RRD_INTERNALS - -#include "metadatalog.h" - -/* - * Returns 0 on success, negative on error - */ -int metalog_init(struct rrdengine_instance *rrdeng_parent_ctx) -{ - struct metalog_instance *ctx; - int error; - - ctx = callocz(1, sizeof(*ctx)); - ctx->initialized = 0; - rrdeng_parent_ctx->metalog_ctx = ctx; - - ctx->rrdeng_ctx = rrdeng_parent_ctx; - error = init_metalog_files(ctx); - if (error) { - goto error_after_init_rrd_files; - } - ctx->initialized = 1; /* notify dbengine that the metadata log has finished initializing */ - return 0; - -error_after_init_rrd_files: - freez(ctx); - return UV_EIO; -} - -/* This function is called by dbengine rotation logic when the metric has no writers */ -void metalog_delete_dimension_by_uuid(struct metalog_instance *ctx, uuid_t *metric_uuid) -{ - uuid_t multihost_uuid; - - delete_dimension_uuid(metric_uuid); - rrdeng_convert_legacy_uuid_to_multihost(ctx->rrdeng_ctx->machine_guid, metric_uuid, &multihost_uuid); - delete_dimension_uuid(&multihost_uuid); -} diff --git a/database/engine/metadata_log/metadatalogapi.h b/database/engine/metadata_log/metadatalogapi.h deleted file mode 100644 index d558b9317..000000000 --- a/database/engine/metadata_log/metadatalogapi.h +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_METADATALOGAPI_H -#define NETDATA_METADATALOGAPI_H - -extern void metalog_commit_delete_chart(RRDSET *st); -extern void metalog_delete_dimension_by_uuid(struct metalog_instance *ctx, uuid_t *metric_uuid); - -/* must call once before using anything */ -extern int metalog_init(struct rrdengine_instance *rrdeng_parent_ctx); - -#endif /* NETDATA_METADATALOGAPI_H */ diff --git a/database/engine/metadata_log/metadatalogprotocol.h b/database/engine/metadata_log/metadatalogprotocol.h deleted file mode 100644 index 1017213ae..000000000 --- a/database/engine/metadata_log/metadatalogprotocol.h +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_METADATALOGPROTOCOL_H -#define NETDATA_METADATALOGPROTOCOL_H - -#include "../rrddiskprotocol.h" - -#define RRDENG_METALOG_MAGIC "netdata-metadata-log" - -#define RRDENG_METALOG_VER (1) - -#define RRDENG_METALOG_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + sizeof(uint16_t))) -/* - * Metadata log persistent super-block - */ -struct rrdeng_metalog_sb { - char magic_number[RRDENG_MAGIC_SZ]; - uint16_t version; - uint8_t padding[RRDENG_METALOG_SB_PADDING_SZ]; -} __attribute__ ((packed)); - -/* - * Metadata log record types - */ -#define METALOG_STORE_PADDING (0) -#define METALOG_CREATE_OBJECT (1) -#define METALOG_DELETE_OBJECT (2) -#define METALOG_OTHER (3) /* reserved */ - -/* - * Metadata log record header - */ -struct rrdeng_metalog_record_header { - /* when set to METALOG_STORE_PADDING jump to start of next block */ - uint8_t type; - - uint16_t header_length; - uint32_t payload_length; - /****************************************************** - * No fields above this point can ever change. * - ****************************************************** - * All fields below this point are subject to change. * - ******************************************************/ -} __attribute__ ((packed)); - -/* - * Metadata log record trailer - */ -struct rrdeng_metalog_record_trailer { - uint8_t checksum[CHECKSUM_SZ]; /* CRC32 */ -} __attribute__ ((packed)); - -#endif /* NETDATA_METADATALOGPROTOCOL_H */ diff --git a/database/engine/metadata_log/metalogpluginsd.c b/database/engine/metadata_log/metalogpluginsd.c deleted file mode 100755 index a5301bc10..000000000 --- a/database/engine/metadata_log/metalogpluginsd.c +++ /dev/null @@ -1,140 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_RRD_INTERNALS - -#include "metadatalog.h" -#include "metalogpluginsd.h" - -extern struct config stream_config; - -PARSER_RC metalog_pluginsd_host_action( - void *user, char *machine_guid, char *hostname, char *registry_hostname, int update_every, char *os, char *timezone, - char *tags) -{ - struct metalog_pluginsd_state *state = ((PARSER_USER_OBJECT *)user)->private; - - RRDHOST *host = rrdhost_find_by_guid(machine_guid, 0); - if (host) { - if (unlikely(host->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE)) { - error("Archived host '%s' has memory mode '%s', but the archived one is '%s'. Ignoring archived state.", - host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), - rrd_memory_mode_name(RRD_MEMORY_MODE_DBENGINE)); - ((PARSER_USER_OBJECT *) user)->host = NULL; /* Ignore objects if memory mode is not dbengine */ - } - ((PARSER_USER_OBJECT *) user)->host = host; - return PARSER_RC_OK; - } - - if (strcmp(machine_guid, registry_get_this_machine_guid()) == 0) { - ((PARSER_USER_OBJECT *) user)->host = host; - return PARSER_RC_OK; - } - - if (likely(!uuid_parse(machine_guid, state->host_uuid))) { - int rc = sql_store_host(&state->host_uuid, hostname, registry_hostname, update_every, os, timezone, tags, 1); - if (unlikely(rc)) { - errno = 0; - error("Failed to store host %s with UUID %s in the database", hostname, machine_guid); - } - } - else { - errno = 0; - error("Host machine GUID %s is not valid", machine_guid); - } - - return PARSER_RC_OK; -} - -PARSER_RC metalog_pluginsd_chart_action(void *user, char *type, char *id, char *name, char *family, char *context, - char *title, char *units, char *plugin, char *module, int priority, - int update_every, RRDSET_TYPE chart_type, char *options) -{ - UNUSED(options); - - struct metalog_pluginsd_state *state = ((PARSER_USER_OBJECT *)user)->private; - RRDHOST *host = ((PARSER_USER_OBJECT *) user)->host; - - if (unlikely(uuid_is_null(state->host_uuid))) { - debug(D_METADATALOG, "Ignoring chart belonging to missing or ignored host."); - return PARSER_RC_OK; - } - uuid_copy(state->chart_uuid, state->uuid); - uuid_clear(state->uuid); /* Consume UUID */ - (void) sql_store_chart(&state->chart_uuid, &state->host_uuid, - type, id, name, family, context, title, units, - plugin, module, priority, update_every, - chart_type, RRD_MEMORY_MODE_DBENGINE, host ? host->rrd_history_entries : 1); - ((PARSER_USER_OBJECT *)user)->st_exists = 1; - - return PARSER_RC_OK; -} - -PARSER_RC metalog_pluginsd_dimension_action(void *user, RRDSET *st, char *id, char *name, char *algorithm, - long multiplier, long divisor, char *options, RRD_ALGORITHM algorithm_type) -{ - struct metalog_pluginsd_state *state = ((PARSER_USER_OBJECT *)user)->private; - UNUSED(user); - UNUSED(options); - UNUSED(algorithm); - UNUSED(st); - - if (unlikely(uuid_is_null(state->chart_uuid))) { - debug(D_METADATALOG, "Ignoring dimension belonging to missing or ignored chart."); - info("Ignoring dimension belonging to missing or ignored chart."); - return PARSER_RC_OK; - } - - if (unlikely(uuid_is_null(state->uuid))) { - debug(D_METADATALOG, "Ignoring dimension without unknown UUID"); - info("Ignoring dimension without unknown UUID"); - return PARSER_RC_OK; - } - - (void) sql_store_dimension(&state->uuid, &state->chart_uuid, id, name, multiplier, divisor, algorithm_type); - uuid_clear(state->uuid); /* Consume UUID */ - - return PARSER_RC_OK; -} - -PARSER_RC metalog_pluginsd_guid_action(void *user, uuid_t *uuid) -{ - struct metalog_pluginsd_state *state = ((PARSER_USER_OBJECT *)user)->private; - - uuid_copy(state->uuid, *uuid); - - return PARSER_RC_OK; -} - -PARSER_RC metalog_pluginsd_context_action(void *user, uuid_t *uuid) -{ - struct metalog_pluginsd_state *state = ((PARSER_USER_OBJECT *)user)->private; - - int rc = find_uuid_type(uuid); - - if (rc == 1) { - uuid_copy(state->host_uuid, *uuid); - ((PARSER_USER_OBJECT *)user)->st_exists = 0; - ((PARSER_USER_OBJECT *)user)->host_exists = 1; - } else if (rc == 2) { - uuid_copy(state->chart_uuid, *uuid); - ((PARSER_USER_OBJECT *)user)->st_exists = 1; - } else - uuid_copy(state->uuid, *uuid); - - return PARSER_RC_OK; -} - -PARSER_RC metalog_pluginsd_tombstone_action(void *user, uuid_t *uuid) -{ - UNUSED(user); - UNUSED(uuid); - - return PARSER_RC_OK; -} - -void metalog_pluginsd_state_init(struct metalog_pluginsd_state *state, struct metalog_instance *ctx) -{ - state->ctx = ctx; - state->skip_record = 0; - uuid_clear(state->uuid); - state->metalogfile = NULL; -} diff --git a/database/engine/metadata_log/metalogpluginsd.h b/database/engine/metadata_log/metalogpluginsd.h deleted file mode 100644 index 4fd8c3900..000000000 --- a/database/engine/metadata_log/metalogpluginsd.h +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_METALOGPLUGINSD_H -#define NETDATA_METALOGPLUGINSD_H - -#include "collectors/plugins.d/pluginsd_parser.h" -#include "collectors/plugins.d/plugins_d.h" -#include "parser/parser.h" - -struct metalog_pluginsd_state { - struct metalog_instance *ctx; - uuid_t uuid; - uuid_t host_uuid; - uuid_t chart_uuid; - uint8_t skip_record; /* skip this record due to errors in parsing */ - struct metadata_logfile *metalogfile; /* current metadata log file being replayed */ -}; - -extern void metalog_pluginsd_state_init(struct metalog_pluginsd_state *state, struct metalog_instance *ctx); - -extern PARSER_RC metalog_pluginsd_chart_action(void *user, char *type, char *id, char *name, char *family, - char *context, char *title, char *units, char *plugin, char *module, - int priority, int update_every, RRDSET_TYPE chart_type, char *options); -extern PARSER_RC metalog_pluginsd_dimension_action(void *user, RRDSET *st, char *id, char *name, char *algorithm, - long multiplier, long divisor, char *options, - RRD_ALGORITHM algorithm_type); -extern PARSER_RC metalog_pluginsd_guid_action(void *user, uuid_t *uuid); -extern PARSER_RC metalog_pluginsd_context_action(void *user, uuid_t *uuid); -extern PARSER_RC metalog_pluginsd_tombstone_action(void *user, uuid_t *uuid); -extern PARSER_RC metalog_pluginsd_host(char **words, void *user, PLUGINSD_ACTION *plugins_action); -extern PARSER_RC metalog_pluginsd_host_action(void *user, char *machine_guid, char *hostname, char *registry_hostname, int update_every, char *os, char *timezone, char *tags); - -#endif /* NETDATA_METALOGPLUGINSD_H */ diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index 39f7642d0..d65cb35a5 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -4,8 +4,8 @@ #include "rrdengine.h" ARAL page_descr_aral = { - .element_size = sizeof(struct rrdeng_page_descr), - .elements = 20000, + .requested_element_size = sizeof(struct rrdeng_page_descr), + .initial_elements = 20000, .filename = "page_descriptors", .cache_dir = &netdata_configured_cache_dir, .use_mmap = false, @@ -127,12 +127,13 @@ struct rrdeng_page_descr *pg_cache_create_descr(void) descr = rrdeng_page_descr_mallocz(); descr->page_length = 0; - descr->start_time = INVALID_TIME; - descr->end_time = INVALID_TIME; + descr->start_time_ut = INVALID_TIME; + descr->end_time_ut = INVALID_TIME; descr->id = NULL; descr->extent = NULL; descr->pg_cache_descr_state = 0; descr->pg_cache_descr = NULL; + descr->update_every_s = 0; return descr; } @@ -476,7 +477,7 @@ uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_d uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); uv_rwlock_wrlock(&page_index->lock); - ret = JudyLDel(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0); + ret = JudyLDel(&page_index->JudyL_array, (Word_t)(descr->start_time_ut / USEC_PER_SEC), PJE0); if (unlikely(0 == ret)) { uv_rwlock_wrunlock(&page_index->lock); if (unlikely(debug_flags & D_RRDENGINE)) { @@ -506,7 +507,7 @@ uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_d while (!pg_cache_try_get_unsafe(descr, 1)) { debug(D_RRDENGINE, "%s: Waiting for locked page:", __func__); if (unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); pg_cache_wait_event_unsafe(descr); } } @@ -517,7 +518,7 @@ uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_d while (unlikely(pg_cache_descr->flags & RRD_PAGE_DIRTY)) { debug(D_RRDENGINE, "%s: Found dirty page, waiting for it to be flushed:", __func__); if (unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); pg_cache_wait_event_unsafe(descr); } } @@ -548,8 +549,8 @@ static inline int is_page_in_time_range(struct rrdeng_page_descr *descr, usec_t { usec_t pg_start, pg_end; - pg_start = descr->start_time; - pg_end = descr->end_time; + pg_start = descr->start_time_ut; + pg_end = descr->end_time_ut; return (pg_start < start_time && pg_end >= start_time) || (pg_start >= start_time && pg_start <= end_time); @@ -557,7 +558,7 @@ static inline int is_page_in_time_range(struct rrdeng_page_descr *descr, usec_t static inline int is_point_in_time_in_page(struct rrdeng_page_descr *descr, usec_t point_in_time) { - return (point_in_time >= descr->start_time && point_in_time <= descr->end_time); + return (point_in_time >= descr->start_time_ut && point_in_time <= descr->end_time_ut); } /* The caller must hold the page index lock */ @@ -592,14 +593,14 @@ static inline struct rrdeng_page_descr * /* Update metric oldest and latest timestamps efficiently when adding new values */ void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr) { - usec_t oldest_time = page_index->oldest_time; - usec_t latest_time = page_index->latest_time; + usec_t oldest_time = page_index->oldest_time_ut; + usec_t latest_time = page_index->latest_time_ut; - if (unlikely(oldest_time == INVALID_TIME || descr->start_time < oldest_time)) { - page_index->oldest_time = descr->start_time; + if (unlikely(oldest_time == INVALID_TIME || descr->start_time_ut < oldest_time)) { + page_index->oldest_time_ut = descr->start_time_ut; } - if (likely(descr->end_time > latest_time || latest_time == INVALID_TIME)) { - page_index->latest_time = descr->end_time; + if (likely(descr->end_time_ut > latest_time || latest_time == INVALID_TIME)) { + page_index->latest_time_ut = descr->end_time_ut; } } @@ -618,23 +619,23 @@ void pg_cache_update_metric_times(struct pg_cache_page_index *page_index) firstPValue = JudyLFirst(page_index->JudyL_array, &firstIndex, PJE0); if (likely(NULL != firstPValue)) { descr = *firstPValue; - oldest_time = descr->start_time; + oldest_time = descr->start_time_ut; } lastIndex = (Word_t)-1; lastPValue = JudyLLast(page_index->JudyL_array, &lastIndex, PJE0); if (likely(NULL != lastPValue)) { descr = *lastPValue; - latest_time = descr->end_time; + latest_time = descr->end_time_ut; } uv_rwlock_rdunlock(&page_index->lock); if (unlikely(NULL == firstPValue)) { fatal_assert(NULL == lastPValue); - page_index->oldest_time = page_index->latest_time = INVALID_TIME; + page_index->oldest_time_ut = page_index->latest_time_ut = INVALID_TIME; return; } - page_index->oldest_time = oldest_time; - page_index->latest_time = latest_time; + page_index->oldest_time_ut = oldest_time; + page_index->latest_time_ut = latest_time; } /* If index is NULL lookup by UUID (descr->id) */ @@ -669,7 +670,7 @@ void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index } uv_rwlock_wrlock(&page_index->lock); - PValue = JudyLIns(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0); + PValue = JudyLIns(&page_index->JudyL_array, (Word_t)(descr->start_time_ut / USEC_PER_SEC), PJE0); *PValue = descr; ++page_index->page_count; pg_cache_add_new_metric_time(page_index, descr); @@ -681,7 +682,7 @@ void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); } -usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time) +usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time_ut, usec_t end_time_ut) { struct page_cache *pg_cache = &ctx->pg_cache; struct rrdeng_page_descr *descr = NULL; @@ -699,25 +700,25 @@ usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, } uv_rwlock_rdlock(&page_index->lock); - descr = find_first_page_in_time_range(page_index, start_time, end_time); + descr = find_first_page_in_time_range(page_index, start_time_ut, end_time_ut); if (NULL == descr) { uv_rwlock_rdunlock(&page_index->lock); return INVALID_TIME; } uv_rwlock_rdunlock(&page_index->lock); - return descr->start_time; + return descr->start_time_ut; } /** * Return page information for the first page before point_in_time that satisfies the filter. * @param ctx DB context * @param page_index page index of a metric - * @param point_in_time the pages that are searched must be older than this timestamp + * @param point_in_time_ut the pages that are searched must be older than this timestamp * @param filter decides if the page satisfies the caller's criteria * @param page_info the result of the search is set in this pointer */ void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, - usec_t point_in_time, pg_cache_page_info_filter_t *filter, + usec_t point_in_time_ut, pg_cache_page_info_filter_t *filter, struct rrdeng_page_info *page_info) { struct page_cache *pg_cache = &ctx->pg_cache; @@ -728,7 +729,7 @@ void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_c (void)pg_cache; fatal_assert(NULL != page_index); - Index = (Word_t)(point_in_time / USEC_PER_SEC); + Index = (Word_t)(point_in_time_ut / USEC_PER_SEC); uv_rwlock_rdlock(&page_index->lock); do { PValue = JudyLPrev(page_index->JudyL_array, &Index, PJE0); @@ -736,12 +737,12 @@ void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_c } while (descr != NULL && !filter(descr)); if (unlikely(NULL == descr)) { page_info->page_length = 0; - page_info->start_time = INVALID_TIME; - page_info->end_time = INVALID_TIME; + page_info->start_time_ut = INVALID_TIME; + page_info->end_time_ut = INVALID_TIME; } else { page_info->page_length = descr->page_length; - page_info->start_time = descr->start_time; - page_info->end_time = descr->end_time; + page_info->start_time_ut = descr->start_time_ut; + page_info->end_time_ut = descr->end_time_ut; } uv_rwlock_rdunlock(&page_index->lock); } @@ -750,7 +751,7 @@ void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_c * Searches for an unallocated page without triggering disk I/O. Attempts to reserve the page and get a reference. * @param ctx DB context * @param id lookup by UUID - * @param start_time exact starting time in usec + * @param start_time_ut exact starting time in usec * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID. * @return the page descriptor or NULL on failure. It can fail if: * 1. The page is already allocated to the page cache. @@ -758,7 +759,7 @@ void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_c * 3. It did not succeed to reserve a spot in the page cache. */ struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id, - usec_t start_time) + usec_t start_time_ut) { struct page_cache *pg_cache = &ctx->pg_cache; struct rrdeng_page_descr *descr = NULL; @@ -781,7 +782,7 @@ struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_ } uv_rwlock_rdlock(&page_index->lock); - Index = (Word_t)(start_time / USEC_PER_SEC); + Index = (Word_t)(start_time_ut / USEC_PER_SEC); PValue = JudyLGet(page_index->JudyL_array, Index, PJE0); if (likely(NULL != PValue)) { descr = *PValue; @@ -818,15 +819,15 @@ struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_ * Does not get a reference. * @param ctx DB context * @param id UUID - * @param start_time inclusive starting time in usec - * @param end_time inclusive ending time in usec + * @param start_time_ut inclusive starting time in usec + * @param end_time_ut inclusive ending time in usec * @param page_info_arrayp It allocates (*page_arrayp) and populates it with information of pages that overlap * with the time range [start_time,end_time]. The caller must free (*page_info_arrayp) with freez(). * If page_info_arrayp is set to NULL nothing was allocated. * @param ret_page_indexp Sets the page index pointer (*ret_page_indexp) for the given UUID. * @return the number of pages that overlap with the time range [start_time,end_time]. */ -unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time, +unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time_ut, usec_t end_time_ut, struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp) { struct page_cache *pg_cache = &ctx->pg_cache; @@ -854,14 +855,14 @@ unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t sta } uv_rwlock_rdlock(&page_index->lock); - descr = find_first_page_in_time_range(page_index, start_time, end_time); + descr = find_first_page_in_time_range(page_index, start_time_ut, end_time_ut); if (NULL == descr) { uv_rwlock_rdunlock(&page_index->lock); debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__); *ret_page_indexp = NULL; return 0; } else { - Index = (Word_t)(descr->start_time / USEC_PER_SEC); + Index = (Word_t)(descr->start_time_ut / USEC_PER_SEC); } if (page_info_arrayp) { page_info_array_max_size = PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info); @@ -869,7 +870,7 @@ unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t sta } for (count = 0, preload_count = 0 ; - descr != NULL && is_page_in_time_range(descr, start_time, end_time) ; + descr != NULL && is_page_in_time_range(descr, start_time_ut, end_time_ut) ; PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0), descr = unlikely(NULL == PValue) ? NULL : *PValue) { /* Iterate all pages in range */ @@ -881,8 +882,8 @@ unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t sta page_info_array_max_size += PAGE_CACHE_MAX_PRELOAD_PAGES * sizeof(struct rrdeng_page_info); *page_info_arrayp = reallocz(*page_info_arrayp, page_info_array_max_size); } - (*page_info_arrayp)[count].start_time = descr->start_time; - (*page_info_arrayp)[count].end_time = descr->end_time; + (*page_info_arrayp)[count].start_time_ut = descr->start_time_ut; + (*page_info_arrayp)[count].end_time_ut = descr->end_time_ut; (*page_info_arrayp)[count].page_length = descr->page_length; } ++count; @@ -974,7 +975,7 @@ unsigned pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t sta */ struct rrdeng_page_descr * pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, - usec_t point_in_time) + usec_t point_in_time_ut) { struct page_cache *pg_cache = &ctx->pg_cache; struct rrdeng_page_descr *descr = NULL; @@ -1003,15 +1004,15 @@ struct rrdeng_page_descr * page_not_in_cache = 0; uv_rwlock_rdlock(&page_index->lock); while (1) { - Index = (Word_t)(point_in_time / USEC_PER_SEC); + Index = (Word_t)(point_in_time_ut / USEC_PER_SEC); PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0); if (likely(NULL != PValue)) { descr = *PValue; } if (NULL == PValue || 0 == descr->page_length || - (INVALID_TIME != point_in_time && - !is_point_in_time_in_page(descr, point_in_time))) { + (INVALID_TIME != point_in_time_ut && + !is_point_in_time_in_page(descr, point_in_time_ut))) { /* non-empty page not found */ uv_rwlock_rdunlock(&page_index->lock); @@ -1038,7 +1039,7 @@ struct rrdeng_page_descr * debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__); if(unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) { pg_cache_wait_event_unsafe(descr); } @@ -1053,7 +1054,7 @@ struct rrdeng_page_descr * uv_rwlock_rdunlock(&page_index->lock); debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__); if(unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); if (!(flags & RRD_PAGE_POPULATED)) page_not_in_cache = 1; pg_cache_wait_event_unsafe(descr); @@ -1081,7 +1082,7 @@ struct rrdeng_page_descr * */ struct rrdeng_page_descr * pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, - usec_t start_time, usec_t end_time) + usec_t start_time_ut, usec_t end_time_ut) { struct page_cache *pg_cache = &ctx->pg_cache; struct rrdeng_page_descr *descr = NULL; @@ -1110,7 +1111,7 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index uv_rwlock_rdlock(&page_index->lock); int retry_count = 0; while (1) { - descr = find_first_page_in_time_range(page_index, start_time, end_time); + descr = find_first_page_in_time_range(page_index, start_time_ut, end_time_ut); if (NULL == descr || 0 == descr->page_length || retry_count == default_rrdeng_page_fetch_retries) { /* non-empty page not found */ if (retry_count == default_rrdeng_page_fetch_retries) @@ -1140,7 +1141,7 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__); if(unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); while (!(pg_cache_descr->flags & RRD_PAGE_POPULATED)) { pg_cache_wait_event_unsafe(descr); } @@ -1155,7 +1156,7 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index uv_rwlock_rdunlock(&page_index->lock); debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__); if(unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); if (!(flags & RRD_PAGE_POPULATED)) page_not_in_cache = 1; @@ -1180,7 +1181,7 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index return descr; } -struct pg_cache_page_index *create_page_index(uuid_t *id) +struct pg_cache_page_index *create_page_index(uuid_t *id, struct rrdengine_instance *ctx) { struct pg_cache_page_index *page_index; @@ -1188,11 +1189,15 @@ struct pg_cache_page_index *create_page_index(uuid_t *id) page_index->JudyL_array = (Pvoid_t) NULL; uuid_copy(page_index->id, *id); fatal_assert(0 == uv_rwlock_init(&page_index->lock)); - page_index->oldest_time = INVALID_TIME; - page_index->latest_time = INVALID_TIME; + page_index->oldest_time_ut = INVALID_TIME; + page_index->latest_time_ut = INVALID_TIME; page_index->prev = NULL; page_index->page_count = 0; + page_index->refcount = 0; page_index->writers = 0; + page_index->ctx = ctx; + page_index->alignment = NULL; + page_index->latest_update_every_s = default_rrd_update_every; return page_index; } @@ -1238,24 +1243,6 @@ void init_page_cache(struct rrdengine_instance *ctx) init_committed_page_index(ctx); } - - -/* - * METRIC # number - * 1. INDEX: JudyHS # bytes - * 2. DATA: page_index # bytes - * - * PAGE (1 page of 1 metric) # number - * 1. INDEX AT METRIC: page_index->JudyL_array # bytes - * 2. DATA: descr # bytes - * - * PAGE CACHE (1 page of 1 metric at the cache) # number - * 1. pg_cache_descr (if PG_CACHE_DESCR_ALLOCATED) # bytes - * 2. data (if RRD_PAGE_POPULATED) # bytes - * - */ - - void free_page_cache(struct rrdengine_instance *ctx) { struct page_cache *pg_cache = &ctx->pg_cache; @@ -1265,30 +1252,15 @@ void free_page_cache(struct rrdengine_instance *ctx) struct rrdeng_page_descr *descr; struct page_cache_descr *pg_cache_descr; - Word_t metrics_number = 0, - metrics_bytes = 0, - metrics_index_bytes = 0, - metrics_duration = 0; - - Word_t pages_number = 0, - pages_bytes = 0, - pages_index_bytes = 0; - - Word_t pages_size_per_type[256] = { 0 }, - pages_count_per_type[256] = { 0 }; - - Word_t cache_pages_number = 0, - cache_pages_bytes = 0, - cache_pages_data_bytes = 0; - - size_t points_in_db = 0, - uncompressed_points_size = 0, - seconds_in_db = 0, - single_point_pages = 0; - - Word_t pages_dirty_index_bytes = 0; - - usec_t oldest_time_ut = LONG_MAX, latest_time_ut = 0; + // if we are exiting, the OS will recover all memory so do not slow down the shutdown process + // Do the cleanup if we are compiling with NETDATA_INTERNAL_CHECKS + // This affects the reporting of dbengine statistics which are available in real time + // via the /api/v1/dbengine_stats endpoint +#ifndef NETDATA_DBENGINE_FREE + if (netdata_exit) + return; +#endif + Word_t metrics_index_bytes = 0, pages_index_bytes = 0, pages_dirty_index_bytes = 0; /* Free committed page index */ pages_dirty_index_bytes = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0); @@ -1305,116 +1277,30 @@ void free_page_cache(struct rrdengine_instance *ctx) PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0); descr = unlikely(NULL == PValue) ? NULL : *PValue; - size_t metric_duration = 0; - size_t metric_update_every = 0; - size_t metric_single_point_pages = 0; - while (descr != NULL) { /* Iterate all page descriptors of this metric */ if (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) { - cache_pages_number++; - /* Check rrdenglocking.c */ pg_cache_descr = descr->pg_cache_descr; if (pg_cache_descr->flags & RRD_PAGE_POPULATED) { dbengine_page_free(pg_cache_descr->page); - cache_pages_data_bytes += RRDENG_BLOCK_SIZE; } rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); - cache_pages_bytes += sizeof(*pg_cache_descr); } - - if(descr->start_time < oldest_time_ut) - oldest_time_ut = descr->start_time; - - if(descr->end_time > latest_time_ut) - latest_time_ut = descr->end_time; - - pages_size_per_type[descr->type] += descr->page_length; - pages_count_per_type[descr->type]++; - - size_t points_in_page = (descr->page_length / PAGE_POINT_SIZE_BYTES(descr)); - size_t page_duration = ((descr->end_time - descr->start_time) / USEC_PER_SEC); - size_t update_every = (page_duration == 0) ? 1 : page_duration / (points_in_page - 1); - - if (!page_duration && metric_update_every) { - page_duration = metric_update_every; - update_every = metric_update_every; - } - else if(page_duration) - metric_update_every = update_every; - - uncompressed_points_size += descr->page_length; - - if(page_duration > 0) { - page_duration = update_every * points_in_page; - metric_duration += page_duration; - seconds_in_db += page_duration; - points_in_db += descr->page_length / PAGE_POINT_SIZE_BYTES(descr); - } - else - metric_single_point_pages++; - rrdeng_page_descr_freez(descr); - pages_bytes += sizeof(*descr); - pages_number++; PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0); descr = unlikely(NULL == PValue) ? NULL : *PValue; } - if(metric_single_point_pages && metric_update_every) { - points_in_db += metric_single_point_pages; - seconds_in_db += metric_update_every * metric_single_point_pages; - metric_duration += metric_update_every * metric_single_point_pages; - } - else - single_point_pages += metric_single_point_pages; - /* Free page index */ pages_index_bytes += JudyLFreeArray(&page_index->JudyL_array, PJE0); fatal_assert(NULL == page_index->JudyL_array); freez(page_index); - - metrics_number++; - metrics_bytes += sizeof(*page_index); - metrics_duration += metric_duration; } /* Free metrics index */ metrics_index_bytes = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0); fatal_assert(NULL == pg_cache->metrics_index.JudyHS_array); - - if(!metrics_number) metrics_number = 1; - if(!pages_number) pages_number = 1; - if(!cache_pages_number) cache_pages_number = 1; - if(!points_in_db) points_in_db = 1; - if(latest_time_ut == oldest_time_ut) oldest_time_ut -= USEC_PER_SEC; - - if(single_point_pages) { - long double avg_duration = (long double)seconds_in_db / points_in_db; - points_in_db += single_point_pages; - seconds_in_db += (size_t)(avg_duration * single_point_pages); - } - - info("DBENGINE STATISTICS ON METRICS:" - " Metrics: %lu (structures %lu bytes - per metric %0.2f, index (HS) %lu bytes - per metric %0.2f bytes - duration %zu secs) |" - " Page descriptors: %lu (structures %lu bytes - per page %0.2f bytes, index (L) %lu bytes - per page %0.2f, dirty index %lu bytes). |" - " Page cache: %lu pages (structures %lu bytes - per page %0.2f bytes, data %lu bytes). |" - " Points in db %zu, uncompressed size of points database %zu bytes. |" - " Duration of all points %zu seconds, average point duration %0.2f seconds." - " Duration of the database %llu seconds, average metric duration %0.2f seconds, average metric lifetime %0.2f%%." - , metrics_number, metrics_bytes, (double)metrics_bytes/metrics_number, metrics_index_bytes, (double)metrics_index_bytes/metrics_number, metrics_duration - , pages_number, pages_bytes, (double)pages_bytes/pages_number, pages_index_bytes, (double)pages_index_bytes/pages_number, pages_dirty_index_bytes - , cache_pages_number, cache_pages_bytes, (double)cache_pages_bytes/cache_pages_number, cache_pages_data_bytes - , points_in_db, uncompressed_points_size - , seconds_in_db, (double)seconds_in_db/points_in_db - , (latest_time_ut - oldest_time_ut) / USEC_PER_SEC, (double)metrics_duration/metrics_number - , (double)metrics_duration/metrics_number * 100.0 / ((latest_time_ut - oldest_time_ut) / USEC_PER_SEC) - ); - - for(int i = 0; i < 256 ;i++) { - if(pages_count_per_type[i]) - info("DBENGINE STATISTICS ON PAGE TYPES: page type %d total pages %lu, average page size %0.2f bytes", i, pages_count_per_type[i], (double)pages_size_per_type[i]/pages_count_per_type[i]); - } + info("Freed %lu bytes of memory from page cache.", pages_dirty_index_bytes + pages_index_bytes + metrics_index_bytes); } diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h index b938b9e05..2f4d6b332 100644 --- a/database/engine/pagecache.h +++ b/database/engine/pagecache.h @@ -60,18 +60,19 @@ struct rrdeng_page_descr { volatile unsigned long pg_cache_descr_state; /* page information */ - usec_t start_time; - usec_t end_time; - uint32_t page_length; + usec_t start_time_ut; + usec_t end_time_ut; + uint32_t update_every_s:24; uint8_t type; + uint32_t page_length; }; #define PAGE_INFO_SCRATCH_SZ (8) struct rrdeng_page_info { uint8_t scratch[PAGE_INFO_SCRATCH_SZ]; /* scratch area to be used by page-cache users */ - usec_t start_time; - usec_t end_time; + usec_t start_time_ut; + usec_t end_time_ut; uint32_t page_length; }; @@ -80,6 +81,11 @@ typedef int pg_cache_page_info_filter_t(struct rrdeng_page_descr *); #define PAGE_CACHE_MAX_PRELOAD_PAGES (256) +struct pg_alignment { + uint32_t page_length; + uint32_t refcount; +}; + /* maps time ranges to pages */ struct pg_cache_page_index { uuid_t id; @@ -89,6 +95,7 @@ struct pg_cache_page_index { */ Pvoid_t JudyL_array; Word_t page_count; + unsigned short refcount; unsigned short writers; uv_rwlock_t lock; @@ -96,13 +103,17 @@ struct pg_cache_page_index { * Only one effective writer, data deletion workqueue. * It's also written during the DB loading phase. */ - usec_t oldest_time; + usec_t oldest_time_ut; /* * Only one effective writer, data collection thread. * It's also written by the data deletion workqueue when data collection is disabled for this metric. */ - usec_t latest_time; + usec_t latest_time_ut; + + struct rrdengine_instance *ctx; + struct pg_alignment *alignment; + uint32_t latest_update_every_s; struct pg_cache_page_index *prev; }; @@ -152,93 +163,93 @@ struct page_cache { /* TODO: add statistics */ unsigned populated_pages; }; -extern void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr); -extern void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr); -extern unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx, +void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr); +void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr); +unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx, +void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx, +void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern struct rrdeng_page_descr *pg_cache_create_descr(void); -extern int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access); -extern void pg_cache_put_unsafe(struct rrdeng_page_descr *descr); -extern void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, +struct rrdeng_page_descr *pg_cache_create_descr(void); +int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access); +void pg_cache_put_unsafe(struct rrdeng_page_descr *descr); +void pg_cache_put(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, struct rrdeng_page_descr *descr); -extern uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, +uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, uint8_t remove_dirty, uint8_t is_exclusive_holder, uuid_t *metric_id); -extern usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, - usec_t start_time, usec_t end_time); -extern void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, - usec_t point_in_time, pg_cache_page_info_filter_t *filter, +usec_t pg_cache_oldest_time_in_range(struct rrdengine_instance *ctx, uuid_t *id, + usec_t start_time_ut, usec_t end_time_ut); +void pg_cache_get_filtered_info_prev(struct rrdengine_instance *ctx, struct pg_cache_page_index *page_index, + usec_t point_in_time_ut, pg_cache_page_info_filter_t *filter, struct rrdeng_page_info *page_info); -extern struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id, - usec_t start_time); -extern unsigned - pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time, +struct rrdeng_page_descr *pg_cache_lookup_unpopulated_and_lock(struct rrdengine_instance *ctx, uuid_t *id, + usec_t start_time_ut); +unsigned + pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time_ut, usec_t end_time_ut, struct rrdeng_page_info **page_info_arrayp, struct pg_cache_page_index **ret_page_indexp); -extern struct rrdeng_page_descr * +struct rrdeng_page_descr * pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, - usec_t point_in_time); -extern struct rrdeng_page_descr * + usec_t point_in_time_ut); +struct rrdeng_page_descr * pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id, - usec_t start_time, usec_t end_time); -extern struct pg_cache_page_index *create_page_index(uuid_t *id); -extern void init_page_cache(struct rrdengine_instance *ctx); -extern void free_page_cache(struct rrdengine_instance *ctx); -extern void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr); -extern void pg_cache_update_metric_times(struct pg_cache_page_index *page_index); -extern unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx); -extern unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx); -extern unsigned long pg_cache_committed_hard_limit(struct rrdengine_instance *ctx); - -extern void rrdeng_page_descr_aral_go_singlethreaded(void); -extern void rrdeng_page_descr_aral_go_multithreaded(void); -extern void rrdeng_page_descr_use_malloc(void); -extern void rrdeng_page_descr_use_mmap(void); -extern bool rrdeng_page_descr_is_mmap(void); -extern struct rrdeng_page_descr *rrdeng_page_descr_mallocz(void); -extern void rrdeng_page_descr_freez(struct rrdeng_page_descr *descr); + usec_t start_time_ut, usec_t end_time_ut); +struct pg_cache_page_index *create_page_index(uuid_t *id, struct rrdengine_instance *ctx); +void init_page_cache(struct rrdengine_instance *ctx); +void free_page_cache(struct rrdengine_instance *ctx); +void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_descr *descr); +void pg_cache_update_metric_times(struct pg_cache_page_index *page_index); +unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx); +unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx); +unsigned long pg_cache_committed_hard_limit(struct rrdengine_instance *ctx); + +void rrdeng_page_descr_aral_go_singlethreaded(void); +void rrdeng_page_descr_aral_go_multithreaded(void); +void rrdeng_page_descr_use_malloc(void); +void rrdeng_page_descr_use_mmap(void); +bool rrdeng_page_descr_is_mmap(void); +struct rrdeng_page_descr *rrdeng_page_descr_mallocz(void); +void rrdeng_page_descr_freez(struct rrdeng_page_descr *descr); static inline void - pg_cache_atomic_get_pg_info(struct rrdeng_page_descr *descr, usec_t *end_timep, uint32_t *page_lengthp) + pg_cache_atomic_get_pg_info(struct rrdeng_page_descr *descr, usec_t *end_time_ut_p, uint32_t *page_lengthp) { - usec_t end_time, old_end_time; + usec_t end_time_ut, old_end_time_ut; uint32_t page_length; if (NULL == descr->extent) { /* this page is currently being modified, get consistent info locklessly */ do { - end_time = descr->end_time; + end_time_ut = descr->end_time_ut; __sync_synchronize(); - old_end_time = end_time; + old_end_time_ut = end_time_ut; page_length = descr->page_length; __sync_synchronize(); - end_time = descr->end_time; + end_time_ut = descr->end_time_ut; __sync_synchronize(); - } while ((end_time != old_end_time || (end_time & 1) != 0)); + } while ((end_time_ut != old_end_time_ut || (end_time_ut & 1) != 0)); - *end_timep = end_time; + *end_time_ut_p = end_time_ut; *page_lengthp = page_length; } else { - *end_timep = descr->end_time; + *end_time_ut_p = descr->end_time_ut; *page_lengthp = descr->page_length; } } /* The caller must hold a reference to the page and must have already set the new data */ -static inline void pg_cache_atomic_set_pg_info(struct rrdeng_page_descr *descr, usec_t end_time, uint32_t page_length) +static inline void pg_cache_atomic_set_pg_info(struct rrdeng_page_descr *descr, usec_t end_time_ut, uint32_t page_length) { - fatal_assert(!(end_time & 1)); + fatal_assert(!(end_time_ut & 1)); __sync_synchronize(); - descr->end_time |= 1; /* mark start of uncertainty period by adding 1 microsecond */ + descr->end_time_ut |= 1; /* mark start of uncertainty period by adding 1 microsecond */ __sync_synchronize(); descr->page_length = page_length; __sync_synchronize(); - descr->end_time = end_time; /* mark end of uncertainty period */ + descr->end_time_ut = end_time_ut; /* mark end of uncertainty period */ } #endif /* NETDATA_PAGECACHE_H */ diff --git a/database/engine/rrddiskprotocol.h b/database/engine/rrddiskprotocol.h index cb57385a4..5b4be9498 100644 --- a/database/engine/rrddiskprotocol.h +++ b/database/engine/rrddiskprotocol.h @@ -46,8 +46,8 @@ struct rrdeng_extent_page_descr { uint8_t uuid[UUID_SZ]; uint32_t page_length; - uint64_t start_time; - uint64_t end_time; + uint64_t start_time_ut; + uint64_t end_time_ut; } __attribute__ ((packed)); /* diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c index 8b35051d8..e4cd37e98 100644 --- a/database/engine/rrdengine.c +++ b/database/engine/rrdengine.c @@ -30,7 +30,7 @@ void dbengine_page_free(void *page) { if (unlikely(db_engine_use_malloc)) freez(page); else - munmap(page, RRDENG_BLOCK_SIZE); + netdata_munmap(page, RRDENG_BLOCK_SIZE); } static void sanity_check(void) @@ -206,8 +206,8 @@ void read_cached_extent_cb(struct rrdengine_worker_config* wc, unsigned idx, str /* care, we don't hold the descriptor mutex */ if (!uuid_compare(*extent->pages[j]->id, *descr->id) && extent->pages[j]->page_length == descr->page_length && - extent->pages[j]->start_time == descr->start_time && - extent->pages[j]->end_time == descr->end_time) { + extent->pages[j]->start_time_ut == descr->start_time_ut && + extent->pages[j]->end_time_ut == descr->end_time_ut) { break; } page_offset += extent->pages[j]->page_length; @@ -272,11 +272,9 @@ static void fill_page_with_nulls(void *page, uint32_t page_length, uint8_t type) } } -void read_extent_cb(uv_fs_t* req) +static void do_extent_processing (struct rrdengine_worker_config *wc, struct extent_io_descriptor *xt_io_descr, bool read_failed) { - struct rrdengine_worker_config* wc = req->loop->data; struct rrdengine_instance *ctx = wc->ctx; - struct extent_io_descriptor *xt_io_descr; struct rrdeng_page_descr *descr; struct page_cache_descr *pg_cache_descr; int ret; @@ -289,21 +287,20 @@ void read_extent_cb(uv_fs_t* req) struct rrdeng_df_extent_trailer *trailer; uLong crc; - xt_io_descr = req->data; header = xt_io_descr->buf; payload_length = header->payload_length; count = header->number_of_pages; payload_offset = sizeof(*header) + sizeof(header->descr[0]) * count; trailer = xt_io_descr->buf + xt_io_descr->bytes - sizeof(*trailer); - if (req->result < 0) { + if (unlikely(read_failed)) { struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile; ++ctx->stats.io_errors; rrd_stat_atomic_add(&global_io_errors, 1); have_read_error = 1; - error("%s: uv_fs_read - %s - extent at offset %"PRIu64"(%u) in datafile %u-%u.", __func__, - uv_strerror((int)req->result), xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno); + error("%s: uv_fs_read - extent at offset %"PRIu64"(%u) in datafile %u-%u.", __func__, xt_io_descr->pos, + xt_io_descr->bytes, datafile->tier, datafile->fileno); goto after_crc_check; } crc = crc32(0L, Z_NULL, 0); @@ -378,8 +375,8 @@ after_crc_check: /* care, we don't hold the descriptor mutex */ if (!uuid_compare(*(uuid_t *) header->descr[i].uuid, *descrj->id) && header->descr[i].page_length == descrj->page_length && - header->descr[i].start_time == descrj->start_time && - header->descr[i].end_time == descrj->end_time) { + header->descr[i].start_time_ut == descrj->start_time_ut && + header->descr[i].end_time_ut == descrj->end_time_ut) { descr = descrj; break; } @@ -387,7 +384,7 @@ after_crc_check: is_prefetched_page = 0; if (!descr) { /* This extent page has not been requested. Try populating it for locality (best effort). */ descr = pg_cache_lookup_unpopulated_and_lock(ctx, (uuid_t *)header->descr[i].uuid, - header->descr[i].start_time); + header->descr[i].start_time_ut); if (!descr) continue; /* Failed to reserve a suitable page */ is_prefetched_page = 1; @@ -421,11 +418,67 @@ after_crc_check: } if (xt_io_descr->completion) completion_mark_complete(xt_io_descr->completion); +} + +static void read_extent_cb(uv_fs_t *req) +{ + struct rrdengine_worker_config *wc = req->loop->data; + struct extent_io_descriptor *xt_io_descr; + + xt_io_descr = req->data; + do_extent_processing(wc, xt_io_descr, req->result < 0); uv_fs_req_cleanup(req); - free(xt_io_descr->buf); + posix_memfree(xt_io_descr->buf); freez(xt_io_descr); } +static void read_mmap_extent_cb(uv_work_t *req, int status __maybe_unused) +{ + struct rrdengine_worker_config *wc = req->loop->data; + struct rrdengine_instance *ctx = wc->ctx; + struct extent_io_descriptor *xt_io_descr; + xt_io_descr = req->data; + + if (likely(xt_io_descr->map_base)) { + do_extent_processing(wc, xt_io_descr, false); + munmap(xt_io_descr->map_base, xt_io_descr->map_length); + freez(xt_io_descr); + return; + } + + // MMAP failed, so do uv_fs_read + int ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(xt_io_descr->bytes)); + if (unlikely(ret)) { + fatal("posix_memalign:%s", strerror(ret)); + } + unsigned real_io_size = ALIGN_BYTES_CEILING( xt_io_descr->bytes); + xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size); + xt_io_descr->req.data = xt_io_descr; + ret = uv_fs_read(req->loop, &xt_io_descr->req, xt_io_descr->file, &xt_io_descr->iov, 1, (unsigned) xt_io_descr->pos, read_extent_cb); + fatal_assert(-1 != ret); + ctx->stats.io_read_bytes += real_io_size; + ctx->stats.io_read_extent_bytes += real_io_size; +} + +static void do_mmap_read_extent(uv_work_t *req) +{ + struct extent_io_descriptor *xt_io_descr = (struct extent_io_descriptor * )req->data; + struct rrdengine_worker_config *wc = req->loop->data; + struct rrdengine_instance *ctx = wc->ctx; + + off_t map_start = ALIGN_BYTES_FLOOR(xt_io_descr->pos); + size_t length = ALIGN_BYTES_CEILING(xt_io_descr->pos + xt_io_descr->bytes) - map_start; + unsigned real_io_size = xt_io_descr->bytes; + + void *data = mmap(NULL, length, PROT_READ, MAP_SHARED, xt_io_descr->file, map_start); + if (likely(data != MAP_FAILED)) { + xt_io_descr->map_base = data; + xt_io_descr->map_length = length; + xt_io_descr->buf = data + (xt_io_descr->pos - map_start); + ctx->stats.io_read_bytes += real_io_size; + ctx->stats.io_read_extent_bytes += real_io_size; + } +} static void do_read_extent(struct rrdengine_worker_config* wc, struct rrdeng_page_descr **descr, @@ -435,8 +488,7 @@ static void do_read_extent(struct rrdengine_worker_config* wc, struct rrdengine_instance *ctx = wc->ctx; struct page_cache_descr *pg_cache_descr; int ret; - unsigned i, size_bytes, pos, real_io_size; -// uint32_t payload_length; + unsigned i, size_bytes, pos; struct extent_io_descriptor *xt_io_descr; struct rrdengine_datafile *datafile; struct extent_info *extent = descr[0]->extent; @@ -452,18 +504,17 @@ static void do_read_extent(struct rrdengine_worker_config* wc, rrdeng_page_descr_mutex_lock(ctx, descr[i]); pg_cache_descr = descr[i]->pg_cache_descr; pg_cache_descr->flags |= RRD_PAGE_READ_PENDING; -// payload_length = descr[i]->page_length; rrdeng_page_descr_mutex_unlock(ctx, descr[i]); - xt_io_descr->descr_array[i] = descr[i]; } xt_io_descr->descr_count = count; + xt_io_descr->file = datafile->file; xt_io_descr->bytes = size_bytes; xt_io_descr->pos = pos; - xt_io_descr->req.data = xt_io_descr; + xt_io_descr->req_worker.data = xt_io_descr; xt_io_descr->completion = NULL; - /* xt_io_descr->descr_commit_idx_array[0] */ xt_io_descr->release_descr = release_descr; + xt_io_descr->buf = NULL; xt_is_cached = !lookup_in_xt_cache(wc, extent, &xt_idx); if (xt_is_cached) { @@ -483,19 +534,10 @@ static void do_read_extent(struct rrdengine_worker_config* wc, } } - ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes)); - if (unlikely(ret)) { - fatal("posix_memalign:%s", strerror(ret)); - /* freez(xt_io_descr); - return;*/ - } - real_io_size = ALIGN_BYTES_CEILING(size_bytes); - xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size); - ret = uv_fs_read(wc->loop, &xt_io_descr->req, datafile->file, &xt_io_descr->iov, 1, pos, read_extent_cb); + ret = uv_queue_work(wc->loop, &xt_io_descr->req_worker, do_mmap_read_extent, read_mmap_extent_cb); fatal_assert(-1 != ret); - ctx->stats.io_read_bytes += real_io_size; + ++ctx->stats.io_read_requests; - ctx->stats.io_read_extent_bytes += real_io_size; ++ctx->stats.io_read_extents; ctx->stats.pg_cache_backfills += count; } @@ -696,7 +738,7 @@ void flush_pages_cb(uv_fs_t* req) if (xt_io_descr->completion) completion_mark_complete(xt_io_descr->completion); uv_fs_req_cleanup(req); - free(xt_io_descr->buf); + posix_memfree(xt_io_descr->buf); freez(xt_io_descr); uv_rwlock_wrlock(&pg_cache->committed_page_index.lock); @@ -820,8 +862,8 @@ static int do_flush_pages(struct rrdengine_worker_config* wc, int force, struct header->descr[i].type = descr->type; uuid_copy(*(uuid_t *)header->descr[i].uuid, *descr->id); header->descr[i].page_length = descr->page_length; - header->descr[i].start_time = descr->start_time; - header->descr[i].end_time = descr->end_time; + header->descr[i].start_time_ut = descr->start_time_ut; + header->descr[i].end_time_ut = descr->end_time_ut; pos += sizeof(header->descr[i]); } for (i = 0 ; i < count ; ++i) { @@ -922,7 +964,6 @@ static void after_delete_old_data(struct rrdengine_worker_config* wc) wc->now_deleting_files = NULL; wc->cleanup_thread_deleting_files = 0; - aclk_data_rotated(); rrdcontext_db_rotation(); /* interrupt event loop */ @@ -948,12 +989,12 @@ static void delete_old_data(void *arg) for (i = 0 ; i < count ; ++i) { descr = extent->pages[i]; can_delete_metric = pg_cache_punch_hole(ctx, descr, 0, 0, &metric_id); - if (unlikely(can_delete_metric && ctx->metalog_ctx->initialized)) { + if (unlikely(can_delete_metric)) { /* * If the metric is empty, has no active writers and if the metadata log has been initialized then * attempt to delete the corresponding netdata dimension. */ - metalog_delete_dimension_by_uuid(ctx->metalog_ctx, &metric_id); + metaqueue_delete_dimension_uuid(&metric_id); } } next = extent->next; @@ -1044,7 +1085,70 @@ static void rrdeng_cleanup_finished_threads(struct rrdengine_worker_config* wc) /* return 0 on success */ int init_rrd_files(struct rrdengine_instance *ctx) { - return init_data_files(ctx); + int ret = init_data_files(ctx); + + BUFFER *wb = buffer_create(1000); + size_t all_errors = 0; + usec_t now = now_realtime_usec(); + + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter) { + buffer_sprintf(wb, "%s%zu pages had start time > end time (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_FLIPPED_TIME].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter) { + buffer_sprintf(wb, "%s%zu pages had start time = end time with more than 1 entries (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_EQUAL_TIME].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter) { + buffer_sprintf(wb, "%s%zu pages had zero points (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_ZERO_ENTRIES].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter) { + buffer_sprintf(wb, "%s%zu pages had update every == 0 with entries > 1 (latest: %llu secs ago)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_UPDATE_ZERO].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter) { + buffer_sprintf(wb, "%s%zu pages had a different number of points compared to their timestamps (latest: %llu secs ago; these page have been loaded)" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter + , (now - ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].latest_end_time_ut) / USEC_PER_SEC + ); + all_errors += ctx->load_errors[LOAD_ERRORS_PAGE_FLEXY_TIME].counter; + } + + if(ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter) { + buffer_sprintf(wb, "%s%zu extents have been dropped because they didn't have any valid pages" + , (all_errors)?", ":"" + , ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter + ); + all_errors += ctx->load_errors[LOAD_ERRORS_DROPPED_EXTENT].counter; + } + + if(all_errors) + info("DBENGINE: tier %d: %s", ctx->tier, buffer_tostring(wb)); + + buffer_free(wb); + return ret; } void finalize_rrd_files(struct rrdengine_instance *ctx) @@ -1139,10 +1243,6 @@ void timer_cb(uv_timer_t* handle) uv_stop(handle->loop); uv_update_time(handle->loop); - if (unlikely(!ctx->metalog_ctx->initialized)) { - worker_is_idle(); - return; /* Wait for the metadata log to initialize */ - } rrdeng_test_quota(wc); debug(D_RRDENGINE, "%s: timeout reached.", __func__); if (likely(!wc->now_deleting_files && !wc->now_invalidating_dirty_pages)) { @@ -1329,7 +1429,7 @@ void rrdeng_worker(void* arg) } /* cleanup operations of the event loop */ - info("Shutting down RRD engine event loop."); + info("Shutting down RRD engine event loop for tier %d", ctx->tier); /* * uv_async_send after uv_close does not seem to crash in linux at the moment, @@ -1344,7 +1444,7 @@ void rrdeng_worker(void* arg) wal_flush_transaction_buffer(wc); uv_run(loop, UV_RUN_DEFAULT); - info("Shutting down RRD engine event loop complete."); + info("Shutting down RRD engine event loop for tier %d complete", ctx->tier); /* TODO: don't let the API block by waiting to enqueue commands */ uv_cond_destroy(&wc->cmd_cond); /* uv_mutex_destroy(&wc->cmd_mutex); */ diff --git a/database/engine/rrdengine.h b/database/engine/rrdengine.h index 4b383b622..fedadbe86 100644 --- a/database/engine/rrdengine.h +++ b/database/engine/rrdengine.h @@ -17,7 +17,6 @@ #include "rrdenginelib.h" #include "datafile.h" #include "journalfile.h" -#include "metadata_log/metadatalog.h" #include "rrdengineapi.h" #include "pagecache.h" #include "rrdenglocking.h" @@ -37,29 +36,25 @@ struct rrdengine_instance; #define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u" struct rrdeng_collect_handle { - struct rrdeng_metric_handle *metric_handle; + struct pg_cache_page_index *page_index; struct rrdeng_page_descr *descr; unsigned long page_correlation_id; - struct rrdengine_instance *ctx; // set to 1 when this dimension is not page aligned with the other dimensions in the chart uint8_t unaligned_page; }; struct rrdeng_query_handle { - struct rrdeng_metric_handle *metric_handle; struct rrdeng_page_descr *descr; struct rrdengine_instance *ctx; struct pg_cache_page_index *page_index; - time_t next_page_time; - time_t now; + time_t wanted_start_time_s; + time_t now_s; unsigned position; unsigned entries; - TIER_QUERY_FETCH tier_query_fetch_type; storage_number *page; - usec_t page_end_time; + usec_t page_end_time_ut; uint32_t page_length; - usec_t dt; - time_t dt_sec; + time_t dt_s; }; typedef enum { @@ -110,8 +105,12 @@ struct rrdeng_cmdqueue { struct extent_io_descriptor { uv_fs_t req; + uv_work_t req_worker; uv_buf_t iov; + uv_file file; void *buf; + void *map_base; + size_t map_length; uint64_t pos; unsigned bytes; struct completion *completion; @@ -230,8 +229,16 @@ extern rrdeng_stats_t global_flushing_pressure_page_deletions; /* number of dele #define SET_QUIESCE (1) /* set it before shutting down the instance, quiesce long running operations */ #define QUIESCED (2) /* is set after all threads have finished running */ +typedef enum { + LOAD_ERRORS_PAGE_FLIPPED_TIME = 0, + LOAD_ERRORS_PAGE_EQUAL_TIME = 1, + LOAD_ERRORS_PAGE_ZERO_ENTRIES = 2, + LOAD_ERRORS_PAGE_UPDATE_ZERO = 3, + LOAD_ERRORS_PAGE_FLEXY_TIME = 4, + LOAD_ERRORS_DROPPED_EXTENT = 5, +} INVALID_PAGE_ID; + struct rrdengine_instance { - struct metalog_instance *metalog_ctx; struct rrdengine_worker_config worker_config; struct completion rrdengine_completion; struct page_cache pg_cache; @@ -254,16 +261,21 @@ struct rrdengine_instance { uint8_t page_type; /* Default page type for this context */ struct rrdengine_statistics stats; + + struct { + size_t counter; + usec_t latest_end_time_ut; + } load_errors[6]; }; -extern void *dbengine_page_alloc(void); -extern void dbengine_page_free(void *page); +void *dbengine_page_alloc(void); +void dbengine_page_free(void *page); -extern int init_rrd_files(struct rrdengine_instance *ctx); -extern void finalize_rrd_files(struct rrdengine_instance *ctx); -extern void rrdeng_test_quota(struct rrdengine_worker_config* wc); -extern void rrdeng_worker(void* arg); -extern void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd); -extern struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc); +int init_rrd_files(struct rrdengine_instance *ctx); +void finalize_rrd_files(struct rrdengine_instance *ctx); +void rrdeng_test_quota(struct rrdengine_worker_config* wc); +void rrdeng_worker(void* arg); +void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd); +struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc); #endif /* NETDATA_RRDENGINE_H */ diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c index f4da29407..27503baee 100755 --- a/database/engine/rrdengineapi.c +++ b/database/engine/rrdengineapi.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "rrdengine.h" +#include "../storage_engine.h" /* Default global database instance */ struct rrdengine_instance multidb_ctx_storage_tier0; @@ -35,14 +36,31 @@ int default_multidb_disk_quota_mb = 256; /* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */ uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1; -static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host, int tier) { - if(tier < 0 || tier >= RRD_STORAGE_TIERS) tier = 0; - if(!host->storage_instance[tier]) tier = 0; - return (struct rrdengine_instance *)host->storage_instance[tier]; +// ---------------------------------------------------------------------------- +// metrics groups + +STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) { + return callocz(1, sizeof(struct pg_alignment)); +} + +void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg) { + if(!smg) return; + + struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + struct pg_alignment *pa = (struct pg_alignment *)smg; + struct page_cache *pg_cache = &ctx->pg_cache; + + uv_rwlock_rdlock(&pg_cache->metrics_index.lock); + if(pa->refcount == 0) + freez(pa); + uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); } +// ---------------------------------------------------------------------------- +// metric handle for legacy dbs + /* This UUID is not unique across hosts */ -void rrdeng_generate_legacy_uuid(const char *dim_id, char *chart_id, uuid_t *ret_uuid) +void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid) { EVP_MD_CTX *evpctx; unsigned char hash_value[EVP_MAX_MD_SIZE]; @@ -75,98 +93,136 @@ void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uu memcpy(ret_uuid, hash_value, sizeof(uuid_t)); } -struct rrdeng_metric_handle { - RRDDIM *rd; - struct rrdengine_instance *ctx; - uuid_t *rrdeng_uuid; // database engine metric UUID - struct pg_cache_page_index *page_index; -}; +STORAGE_METRIC_HANDLE *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id, STORAGE_METRICS_GROUP *smg) { + uuid_t legacy_uuid; + rrdeng_generate_legacy_uuid(rd_id, st_id, &legacy_uuid); + return rrdeng_metric_get(db_instance, &legacy_uuid, smg); +} -void rrdeng_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle) { - freez(db_metric_handle); +// ---------------------------------------------------------------------------- +// metric handle + +void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + + unsigned short refcount = __atomic_sub_fetch(&page_index->refcount, 1, __ATOMIC_SEQ_CST); + if(refcount == 0 && page_index->alignment) { + __atomic_sub_fetch(&page_index->alignment->refcount, 1, __ATOMIC_SEQ_CST); + page_index->alignment = NULL; + } } -STORAGE_METRIC_HANDLE *rrdeng_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance) { +STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + __atomic_add_fetch(&page_index->refcount, 1, __ATOMIC_SEQ_CST); + return db_metric_handle; +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid, STORAGE_METRICS_GROUP *smg) { struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; - struct page_cache *pg_cache; - uuid_t legacy_uuid; - uuid_t multihost_legacy_uuid; - Pvoid_t *PValue; + struct pg_alignment *pa = (struct pg_alignment *)smg; + struct page_cache *pg_cache = &ctx->pg_cache; struct pg_cache_page_index *page_index = NULL; - int is_multihost_child = 0; - RRDHOST *host = rd->rrdset->rrdhost; - - pg_cache = &ctx->pg_cache; - - rrdeng_generate_legacy_uuid(rd->id, rd->rrdset->id, &legacy_uuid); - if (host != localhost && is_storage_engine_shared((STORAGE_INSTANCE *)ctx)) - is_multihost_child = 1; uv_rwlock_rdlock(&pg_cache->metrics_index.lock); - PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &legacy_uuid, sizeof(uuid_t)); - if (likely(NULL != PValue)) { + Pvoid_t *PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, uuid, sizeof(uuid_t)); + if (likely(NULL != PValue)) page_index = *PValue; - } uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); - if (is_multihost_child || NULL == PValue) { - /* First time we see the legacy UUID or metric belongs to child host in multi-host DB. - * Drop legacy support, normal path */ - - uv_rwlock_rdlock(&pg_cache->metrics_index.lock); - PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &rd->metric_uuid, sizeof(uuid_t)); - if (likely(NULL != PValue)) { - page_index = *PValue; - } - uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); - if (NULL == PValue) { - uv_rwlock_wrlock(&pg_cache->metrics_index.lock); - PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); - fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */ - *PValue = page_index = create_page_index(&rd->metric_uuid); - page_index->prev = pg_cache->metrics_index.last_page_index; - pg_cache->metrics_index.last_page_index = page_index; - uv_rwlock_wrunlock(&pg_cache->metrics_index.lock); + + if (likely(page_index)) { + __atomic_add_fetch(&page_index->refcount, 1, __ATOMIC_SEQ_CST); + + if(pa) { + if(page_index->alignment && page_index->alignment != pa && page_index->writers > 0) + fatal("DBENGINE: page_index has a different alignment (page_index refcount is %u, writers is %u).", + page_index->refcount, page_index->writers); + + page_index->alignment = pa; + __atomic_add_fetch(&pa->refcount, 1, __ATOMIC_SEQ_CST); } - } else { - /* There are legacy UUIDs in the database, implement backward compatibility */ + } - rrdeng_convert_legacy_uuid_to_multihost(rd->rrdset->rrdhost->machine_guid, &legacy_uuid, - &multihost_legacy_uuid); + return (STORAGE_METRIC_HANDLE *)page_index; +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid, STORAGE_METRICS_GROUP *smg) { + internal_fatal(!db_instance, "DBENGINE: db_instance is NULL"); - int need_to_store = uuid_compare(rd->metric_uuid, multihost_legacy_uuid); + struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + struct pg_alignment *pa = (struct pg_alignment *)smg; + struct pg_cache_page_index *page_index; + struct page_cache *pg_cache = &ctx->pg_cache; - uuid_copy(rd->metric_uuid, multihost_legacy_uuid); + uv_rwlock_wrlock(&pg_cache->metrics_index.lock); + Pvoid_t *PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, uuid, sizeof(uuid_t), PJE0); + fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */ + *PValue = page_index = create_page_index(uuid, ctx); + page_index->prev = pg_cache->metrics_index.last_page_index; + pg_cache->metrics_index.last_page_index = page_index; + page_index->alignment = pa; + page_index->refcount = 1; + if(pa) + pa->refcount++; + uv_rwlock_wrunlock(&pg_cache->metrics_index.lock); + + return (STORAGE_METRIC_HANDLE *)page_index; +} + +STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg) { + STORAGE_METRIC_HANDLE *db_metric_handle; + + db_metric_handle = rrdeng_metric_get(db_instance, &rd->metric_uuid, smg); + if(!db_metric_handle) { + db_metric_handle = rrdeng_metric_get_legacy(db_instance, rrddim_id(rd), rrdset_id(rd->rrdset), smg); + if(db_metric_handle) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + uuid_copy(rd->metric_uuid, page_index->id); + } + } + if(!db_metric_handle) + db_metric_handle = rrdeng_metric_create(db_instance, &rd->metric_uuid, smg); - if (unlikely(need_to_store && !ctx->tier)) - (void)sql_store_dimension(&rd->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor, - rd->algorithm); +#ifdef NETDATA_INTERNAL_CHECKS + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + if(uuid_compare(rd->metric_uuid, page_index->id) != 0) { + char uuid1[UUID_STR_LEN + 1]; + char uuid2[UUID_STR_LEN + 1]; + + uuid_unparse(rd->metric_uuid, uuid1); + uuid_unparse(page_index->id, uuid2); + fatal("DBENGINE: uuids do not match, asked for metric '%s', but got page_index of metric '%s'", uuid1, uuid2); } - struct rrdeng_metric_handle *mh = mallocz(sizeof(struct rrdeng_metric_handle)); - mh->rd = rd; - mh->ctx = ctx; - mh->rrdeng_uuid = &page_index->id; - mh->page_index = page_index; - return (STORAGE_METRIC_HANDLE *)mh; + struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + if(page_index->ctx != ctx) + fatal("DBENGINE: mixed up rrdengine instances, asked for metric from %p, got from %p", ctx, page_index->ctx); +#endif + + return db_metric_handle; } + +// ---------------------------------------------------------------------------- +// collect ops + /* * Gets a handle for storing metrics to the database. * The handle must be released with rrdeng_store_metric_final(). */ -STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle) { - struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle; - +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every) { + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; struct rrdeng_collect_handle *handle; - struct pg_cache_page_index *page_index; + + if(!page_index->alignment) + fatal("DBENGINE: metric group is required for collect operations"); handle = callocz(1, sizeof(struct rrdeng_collect_handle)); - handle->metric_handle = metric_handle; - handle->ctx = metric_handle->ctx; + handle->page_index = page_index; handle->descr = NULL; handle->unaligned_page = 0; + page_index->latest_update_every_s = update_every; - page_index = metric_handle->page_index; uv_rwlock_wrlock(&page_index->lock); ++page_index->writers; uv_rwlock_wrunlock(&page_index->lock); @@ -214,7 +270,7 @@ static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr) void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) { struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; // struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle; - struct rrdengine_instance *ctx = handle->ctx; + struct rrdengine_instance *ctx = handle->page_index->ctx; struct rrdeng_page_descr *descr = handle->descr; if (unlikely(!ctx)) return; @@ -227,9 +283,7 @@ void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_h page_is_empty = page_has_only_empty_metrics(descr); if (page_is_empty) { - debug(D_RRDENGINE, "Page has empty metrics only, deleting:"); - if (unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "Page has empty metrics only, deleting", true); pg_cache_put(ctx, descr); pg_cache_punch_hole(ctx, descr, 1, 0, NULL); } else @@ -242,8 +296,8 @@ void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_h handle->descr = NULL; } -void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, - usec_t point_in_time, +static void rrdeng_store_metric_next_internal(STORAGE_COLLECT_HANDLE *collection_handle, + usec_t point_in_time_ut, NETDATA_DOUBLE n, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, @@ -252,11 +306,10 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, SN_FLAGS flags) { struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; - struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle; - struct rrdengine_instance *ctx = handle->ctx; + struct pg_cache_page_index *page_index = handle->page_index; + struct rrdengine_instance *ctx = handle->page_index->ctx; struct page_cache *pg_cache = &ctx->pg_cache; struct rrdeng_page_descr *descr = handle->descr; - RRDDIM *rd = metric_handle->rd; void *page; uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0; @@ -264,21 +317,33 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, if (descr) { /* Make alignment decisions */ - if (descr->page_length == rd->rrdset->rrddim_page_alignment) { +#ifdef NETDATA_INTERNAL_CHECKS + if(descr->end_time_ut + page_index->latest_update_every_s * USEC_PER_SEC != point_in_time_ut) { + char buffer[200 + 1]; + snprintfz(buffer, 200, + "metrics collected are %s, end_time_ut = %llu, point_in_time_ut = %llu, update_every = %u, delta = %llu", + (point_in_time_ut / USEC_PER_SEC - descr->end_time_ut / USEC_PER_SEC > page_index->latest_update_every_s)?"far apart":"not aligned", + descr->end_time_ut / USEC_PER_SEC, + point_in_time_ut / USEC_PER_SEC, + page_index->latest_update_every_s, + point_in_time_ut / USEC_PER_SEC - descr->end_time_ut / USEC_PER_SEC); + print_page_cache_descr(descr, buffer, false); + } +#endif + + if (descr->page_length == page_index->alignment->page_length) { /* this is the leading dimension that defines chart alignment */ perfect_page_alignment = 1; } /* is the metric far enough out of alignment with the others? */ - if (unlikely(descr->page_length + PAGE_POINT_SIZE_BYTES(descr) < rd->rrdset->rrddim_page_alignment)) { + if (unlikely(descr->page_length + PAGE_POINT_SIZE_BYTES(descr) < page_index->alignment->page_length)) { handle->unaligned_page = 1; - debug(D_RRDENGINE, "Metric page is not aligned with chart:"); - if (unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "Metric page is not aligned with chart", true); } if (unlikely(handle->unaligned_page && /* did the other metrics change page? */ - rd->rrdset->rrddim_page_alignment <= PAGE_POINT_SIZE_BYTES(descr))) { - debug(D_RRDENGINE, "Flushing unaligned metric page."); + page_index->alignment->page_length <= PAGE_POINT_SIZE_BYTES(descr))) { + print_page_cache_descr(descr, "must_flush_unaligned_page = 1", true); must_flush_unaligned_page = 1; handle->unaligned_page = 0; } @@ -286,16 +351,21 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, if (unlikely(NULL == descr || descr->page_length + PAGE_POINT_SIZE_BYTES(descr) > RRDENG_BLOCK_SIZE || must_flush_unaligned_page)) { - rrdeng_store_metric_flush_current_page(collection_handle); - page = rrdeng_create_page(ctx, &metric_handle->page_index->id, &descr); + if(descr) { + print_page_cache_descr(descr, "flushing metric", true); + rrdeng_store_metric_flush_current_page(collection_handle); + } + + page = rrdeng_create_page(ctx, &page_index->id, &descr); fatal_assert(page); + descr->update_every_s = page_index->latest_update_every_s; handle->descr = descr; handle->page_correlation_id = rrd_atomic_fetch_add(&pg_cache->committed_page_index.latest_corr_id, 1); - if (0 == rd->rrdset->rrddim_page_alignment) { + if (0 == page_index->alignment->page_length) { /* this is the leading dimension that defines chart alignment */ perfect_page_alignment = 1; } @@ -330,13 +400,13 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, break; } - pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + PAGE_POINT_SIZE_BYTES(descr)); + pg_cache_atomic_set_pg_info(descr, point_in_time_ut, descr->page_length + PAGE_POINT_SIZE_BYTES(descr)); if (perfect_page_alignment) - rd->rrdset->rrddim_page_alignment = descr->page_length; - if (unlikely(INVALID_TIME == descr->start_time)) { + page_index->alignment->page_length = descr->page_length; + if (unlikely(INVALID_TIME == descr->start_time_ut)) { unsigned long new_metric_API_producers, old_metric_API_max_producers, ret_metric_API_max_producers; - descr->start_time = point_in_time; + descr->start_time_ut = point_in_time_ut; new_metric_API_producers = rrd_atomic_add_fetch(&ctx->stats.metric_API_producers, 1); while (unlikely(new_metric_API_producers > (old_metric_API_max_producers = ctx->metric_API_max_producers))) { @@ -350,20 +420,111 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, } } - pg_cache_insert(ctx, metric_handle->page_index, descr); + pg_cache_insert(ctx, page_index, descr); } else { - pg_cache_add_new_metric_time(metric_handle->page_index, descr); + pg_cache_add_new_metric_time(page_index, descr); } + +// { +// unsigned char u[16] = { 0x0C, 0x0A, 0x40, 0xD6, 0x2A, 0x43, 0x4A, 0x7C, 0x95, 0xF7, 0xD1, 0x1E, 0x0C, 0x9E, 0x8A, 0xE7 }; +// if(uuid_compare(u, page_index->id) == 0) { +// char buffer[100]; +// snprintfz(buffer, 100, "store system.cpu, collect:%u, page_index first:%u, last:%u", +// (uint32_t)(point_in_time / USEC_PER_SEC), +// (uint32_t)(page_index->oldest_time / USEC_PER_SEC), +// (uint32_t)(page_index->latest_time / USEC_PER_SEC)); +// +// print_page_cache_descr(descr, buffer, false); +// } +// } } +void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, + usec_t point_in_time_ut, + NETDATA_DOUBLE n, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags) +{ + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct pg_cache_page_index *page_index = handle->page_index; + struct rrdeng_page_descr *descr = handle->descr; + + if(likely(descr)) { + usec_t last_point_in_time_ut = descr->end_time_ut; + usec_t update_every_ut = page_index->latest_update_every_s * USEC_PER_SEC; + size_t points_gap = (point_in_time_ut <= last_point_in_time_ut) ? + (size_t)0 : + (size_t)((point_in_time_ut - last_point_in_time_ut) / update_every_ut); + + if(unlikely(points_gap != 1)) { + if (unlikely(points_gap <= 0)) { + time_t now = now_realtime_sec(); + static __thread size_t counter = 0; + static __thread time_t last_time_logged = 0; + counter++; + + if(now - last_time_logged > 600) { + error("DBENGINE: collected point is in the past (repeated %zu times in the last %zu secs). Ignoring these data collection points.", + counter, (size_t)(last_time_logged?(now - last_time_logged):0)); + + last_time_logged = now; + counter = 0; + } + return; + } + + size_t point_size = PAGE_POINT_SIZE_BYTES(descr); + size_t page_size_in_points = RRDENG_BLOCK_SIZE / point_size; + size_t used_points = descr->page_length / point_size; + size_t remaining_points_in_page = page_size_in_points - used_points; + + bool new_point_is_aligned = true; + if(unlikely((point_in_time_ut - last_point_in_time_ut) / points_gap != update_every_ut)) + new_point_is_aligned = false; + + if(unlikely(points_gap > remaining_points_in_page || !new_point_is_aligned)) { +// char buffer[200]; +// snprintfz(buffer, 200, "data collection skipped %zu points, last stored point %llu, new point %llu, update every %d. Cutting page.", +// points_gap, last_point_in_time_ut / USEC_PER_SEC, point_in_time_ut / USEC_PER_SEC, page_index->latest_update_every_s); +// print_page_cache_descr(descr, buffer, false); + + rrdeng_store_metric_flush_current_page(collection_handle); + } + else { +// char buffer[200]; +// snprintfz(buffer, 200, "data collection skipped %zu points, last stored point %llu, new point %llu, update every %d. Filling the gap.", +// points_gap, last_point_in_time_ut / USEC_PER_SEC, point_in_time_ut / USEC_PER_SEC, page_index->latest_update_every_s); +// print_page_cache_descr(descr, buffer, false); + + // loop to fill the gap + usec_t step_ut = page_index->latest_update_every_s * USEC_PER_SEC; + usec_t last_point_filled_ut = last_point_in_time_ut + step_ut; + + while (last_point_filled_ut < point_in_time_ut) { + rrdeng_store_metric_next_internal( + collection_handle, last_point_filled_ut, NAN, NAN, NAN, + 1, 0, SN_EMPTY_SLOT); + + last_point_filled_ut += step_ut; + } + } + } + } + + rrdeng_store_metric_next_internal(collection_handle, point_in_time_ut, n, min_value, max_value, count, anomaly_count, flags); +} + + /* * Releases the database reference from the handle for storing metrics. * Returns 1 if it's safe to delete the dimension. */ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; - struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle; - struct pg_cache_page_index *page_index = metric_handle->page_index; + struct pg_cache_page_index *page_index = handle->page_index; uint8_t can_delete_metric = 0; @@ -378,6 +539,18 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { return can_delete_metric; } +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct pg_cache_page_index *page_index = handle->page_index; + rrdeng_store_metric_flush_current_page(collection_handle); + uv_rwlock_rdlock(&page_index->lock); + page_index->latest_update_every_s = update_every; + uv_rwlock_rdunlock(&page_index->lock); +} + +// ---------------------------------------------------------------------------- +// query ops + //static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info) //{ // return (uint32_t *)&page_info->scratch[0]; @@ -392,49 +565,45 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { * Gets a handle for loading metrics from the database. * The handle must be released with rrdeng_load_metric_final(). */ -void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type) +void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrdimm_handle, time_t start_time_s, time_t end_time_s) { - struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle; - struct rrdengine_instance *ctx = metric_handle->ctx; - RRDDIM *rd = metric_handle->rd; + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + struct rrdengine_instance *ctx = page_index->ctx; // fprintf(stderr, "%s: %s/%s start time %ld, end time %ld\n", __FUNCTION__ , rd->rrdset->name, rd->name, start_time, end_time); struct rrdeng_query_handle *handle; unsigned pages_nr; - rrdimm_handle->start_time = start_time; - rrdimm_handle->end_time = end_time; + if(!page_index->latest_update_every_s) + page_index->latest_update_every_s = default_rrd_update_every; + + rrdimm_handle->start_time_s = start_time_s; + rrdimm_handle->end_time_s = end_time_s; handle = callocz(1, sizeof(struct rrdeng_query_handle)); - handle->next_page_time = start_time; - handle->now = start_time; - handle->tier_query_fetch_type = tier_query_fetch_type; - // TODO we should store the dt of each page in each page - // this will produce wrong values for dt in case the user changes - // the update every of the charts or the tier grouping iterations - handle->dt_sec = get_tier_grouping(ctx->tier) * (time_t)rd->update_every; - handle->dt = handle->dt_sec * USEC_PER_SEC; + handle->wanted_start_time_s = start_time_s; + handle->now_s = start_time_s; handle->position = 0; handle->ctx = ctx; - handle->metric_handle = metric_handle; handle->descr = NULL; + handle->dt_s = page_index->latest_update_every_s; rrdimm_handle->handle = (STORAGE_QUERY_HANDLE *)handle; - pages_nr = pg_cache_preload(ctx, metric_handle->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC, + pages_nr = pg_cache_preload(ctx, &page_index->id, start_time_s * USEC_PER_SEC, end_time_s * USEC_PER_SEC, NULL, &handle->page_index); if (unlikely(NULL == handle->page_index || 0 == pages_nr)) // there are no metrics to load - handle->next_page_time = INVALID_TIME; + handle->wanted_start_time_s = INVALID_TIME; } -static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) { +static int rrdeng_load_page_next(struct storage_engine_query_handle *rrdimm_handle, bool debug_this __maybe_unused) { struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; struct rrdengine_instance *ctx = handle->ctx; struct rrdeng_page_descr *descr = handle->descr; uint32_t page_length; - usec_t page_end_time; + usec_t page_end_time_ut; unsigned position; if (likely(descr)) { @@ -446,14 +615,15 @@ static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) { pg_cache_put(ctx, descr); handle->descr = NULL; - handle->next_page_time = (handle->page_end_time / USEC_PER_SEC) + 1; + handle->wanted_start_time_s = (time_t)((handle->page_end_time_ut / USEC_PER_SEC) + handle->dt_s); - if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) + if (unlikely(handle->wanted_start_time_s > rrdimm_handle->end_time_s)) return 1; } - usec_t next_page_time = handle->next_page_time * USEC_PER_SEC; - descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, next_page_time, rrdimm_handle->end_time * USEC_PER_SEC); + usec_t wanted_start_time_ut = handle->wanted_start_time_s * USEC_PER_SEC; + descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, + wanted_start_time_ut, rrdimm_handle->end_time_s * USEC_PER_SEC); if (NULL == descr) return 1; @@ -462,77 +632,116 @@ static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) { #endif handle->descr = descr; - pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length); - if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == page_end_time)) + pg_cache_atomic_get_pg_info(descr, &page_end_time_ut, &page_length); + if (unlikely(INVALID_TIME == descr->start_time_ut || INVALID_TIME == page_end_time_ut || 0 == descr->update_every_s)) { + error("DBENGINE: discarding invalid page descriptor (start_time = %llu, end_time = %llu, update_every_s = %d)", + descr->start_time_ut, page_end_time_ut, descr->update_every_s); return 1; + } - if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) { + if (unlikely(descr->start_time_ut != page_end_time_ut && wanted_start_time_ut > descr->start_time_ut)) { // we're in the middle of the page somewhere unsigned entries = page_length / PAGE_POINT_SIZE_BYTES(descr); - position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) / - (page_end_time - descr->start_time); + position = ((uint64_t)(wanted_start_time_ut - descr->start_time_ut)) * (entries - 1) / + (page_end_time_ut - descr->start_time_ut); } else position = 0; - handle->page_end_time = page_end_time; + handle->page_end_time_ut = page_end_time_ut; handle->page_length = page_length; + handle->entries = page_length / PAGE_POINT_SIZE_BYTES(descr); handle->page = descr->pg_cache_descr->page; - usec_t entries = handle->entries = page_length / PAGE_POINT_SIZE_BYTES(descr); - if (likely(entries > 1)) - handle->dt = (page_end_time - descr->start_time) / (entries - 1); - else { - // TODO we should store the dt of each page in each page - // now we keep the dt of whatever was before - ; - } - - handle->dt_sec = (time_t)(handle->dt / USEC_PER_SEC); + handle->dt_s = descr->update_every_s; handle->position = position; +// if(debug_this) +// info("DBENGINE: rrdeng_load_page_next(), " +// "position:%d, " +// "start_time_ut:%llu, " +// "page_end_time_ut:%llu, " +// "next_page_time_ut:%llu, " +// "in_out:%s" +// , position +// , descr->start_time_ut +// , page_end_time_ut +// , +// wanted_start_time_ut, in_out?"true":"false" +// ); + return 0; } // Returns the metric and sets its timestamp into current_time // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags) // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES -STORAGE_POINT rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle) { - struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; // struct rrdeng_metric_handle *metric_handle = handle->metric_handle; - STORAGE_POINT sp; struct rrdeng_page_descr *descr = handle->descr; + time_t now = handle->now_s + handle->dt_s; + +// bool debug_this = false; +// { +// unsigned char u[16] = { 0x0C, 0x0A, 0x40, 0xD6, 0x2A, 0x43, 0x4A, 0x7C, 0x95, 0xF7, 0xD1, 0x1E, 0x0C, 0x9E, 0x8A, 0xE7 }; +// if(uuid_compare(u, handle->page_index->id) == 0) { +// char buffer[100]; +// snprintfz(buffer, 100, "load system.cpu, now:%u, dt:%u, position:%u page_index first:%u, last:%u", +// (uint32_t)(now), +// (uint32_t)(handle->dt_s), +// (uint32_t)(handle->position), +// (uint32_t)(handle->page_index->oldest_time / USEC_PER_SEC), +// (uint32_t)(handle->page_index->latest_time / USEC_PER_SEC)); +// +// print_page_cache_descr(descr, buffer, false); +// debug_this = true; +// } +// } + + STORAGE_POINT sp; unsigned position = handle->position + 1; - time_t now = handle->now + handle->dt_sec; storage_number_tier1_t tier1_value; - if (unlikely(INVALID_TIME == handle->next_page_time)) { - handle->next_page_time = INVALID_TIME; - handle->now = now; - storage_point_empty(sp, now - handle->dt_sec, now); + if (unlikely(INVALID_TIME == handle->wanted_start_time_s)) { + handle->wanted_start_time_s = INVALID_TIME; + handle->now_s = now; + storage_point_empty(sp, now - handle->dt_s, now); return sp; } if (unlikely(!descr || position >= handle->entries)) { // We need to get a new page - if(rrdeng_load_page_next(rrdimm_handle)) { + if(rrdeng_load_page_next(rrddim_handle, false)) { // next calls will not load any more metrics - handle->next_page_time = INVALID_TIME; - handle->now = now; - storage_point_empty(sp, now - handle->dt_sec, now); + handle->wanted_start_time_s = INVALID_TIME; + handle->now_s = now; + storage_point_empty(sp, now - handle->dt_s, now); return sp; } descr = handle->descr; position = handle->position; - now = (time_t)((descr->start_time + position * handle->dt) / USEC_PER_SEC); + now = (time_t)((descr->start_time_ut / USEC_PER_SEC) + position * descr->update_every_s); + +// if(debug_this) { +// char buffer[100]; +// snprintfz(buffer, 100, "NEW PAGE system.cpu, now:%u, dt:%u, position:%u page_index first:%u, last:%u", +// (uint32_t)(now), +// (uint32_t)(handle->dt_s), +// (uint32_t)(handle->position), +// (uint32_t)(handle->page_index->oldest_time / USEC_PER_SEC), +// (uint32_t)(handle->page_index->latest_time / USEC_PER_SEC)); +// +// print_page_cache_descr(descr, buffer, false); +// } } - sp.start_time = now - handle->dt_sec; + sp.start_time = now - handle->dt_s; sp.end_time = now; handle->position = position; - handle->now = now; + handle->now_s = now; switch(descr->type) { case PAGE_METRICS: { @@ -567,24 +776,32 @@ STORAGE_POINT rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle) break; } - if (unlikely(now >= rrdimm_handle->end_time)) { + if (unlikely(now >= rrddim_handle->end_time_s)) { // next calls will not load any more metrics - handle->next_page_time = INVALID_TIME; + handle->wanted_start_time_s = INVALID_TIME; } +// if(debug_this) +// info("DBENGINE: returning point: " +// "time from %ld to %ld // query from %ld to %ld // wanted_start_time_s %ld" +// , sp.start_time, sp.end_time +// , rrddim_handle->start_time_s, rrddim_handle->end_time_s +// , handle->wanted_start_time_s +// ); + return sp; } -int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle) +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrdimm_handle) { struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; - return (INVALID_TIME == handle->next_page_time); + return (INVALID_TIME == handle->wanted_start_time_s); } /* * Releases the database reference from the handle for loading metrics. */ -void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle) +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrdimm_handle) { struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle; struct rrdengine_instance *ctx = handle->ctx; @@ -603,46 +820,12 @@ void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle) } time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { - struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle; - - struct pg_cache_page_index *page_index = metric_handle->page_index; - return page_index->latest_time / USEC_PER_SEC; + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + return (time_t)(page_index->latest_time_ut / USEC_PER_SEC); } time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { - struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle; - - struct pg_cache_page_index *page_index = metric_handle->page_index; - return page_index->oldest_time / USEC_PER_SEC; -} - -int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t, int tier) -{ - struct page_cache *pg_cache; - struct rrdengine_instance *ctx; - Pvoid_t *PValue; - struct pg_cache_page_index *page_index = NULL; - - ctx = get_rrdeng_ctx_from_host(localhost, tier); - if (unlikely(!ctx)) { - error("Failed to fetch multidb context"); - return 1; - } - pg_cache = &ctx->pg_cache; - - uv_rwlock_rdlock(&pg_cache->metrics_index.lock); - PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, dim_uuid, sizeof(uuid_t)); - if (likely(NULL != PValue)) { - page_index = *PValue; - } - uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); - - if (likely(page_index)) { - *first_entry_t = page_index->oldest_time / USEC_PER_SEC; - *last_entry_t = page_index->latest_time / USEC_PER_SEC; - return 0; - } - - return 1; + struct pg_cache_page_index *page_index = (struct pg_cache_page_index *)db_metric_handle; + return (time_t)(page_index->oldest_time_ut / USEC_PER_SEC); } int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t) @@ -667,8 +850,8 @@ int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time uv_rwlock_rdunlock(&pg_cache->metrics_index.lock); if (likely(page_index)) { - *first_entry_t = page_index->oldest_time / USEC_PER_SEC; - *last_entry_t = page_index->latest_time / USEC_PER_SEC; + *first_entry_t = page_index->oldest_time_ut / USEC_PER_SEC; + *last_entry_t = page_index->latest_time_ut / USEC_PER_SEC; return 0; } @@ -695,7 +878,7 @@ void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrde debug(D_RRDENGINE, "Created new page:"); if (unlikely(debug_flags & D_RRDENGINE)) - print_page_cache_descr(descr); + print_page_cache_descr(descr, "", true); rrdeng_page_descr_mutex_unlock(ctx, descr); *ret_descr = descr; return page; @@ -767,13 +950,13 @@ void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void ** } /* Gets a reference for the page */ -void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle) +void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time_ut, void **handle) { struct rrdeng_page_descr *descr; struct page_cache_descr *pg_cache_descr; debug(D_RRDENGINE, "Reading existing page:"); - descr = pg_cache_lookup(ctx, NULL, id, point_in_time); + descr = pg_cache_lookup(ctx, NULL, id, point_in_time_ut); if (NULL == descr) { *handle = NULL; @@ -849,7 +1032,7 @@ void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle) * Returns 0 on success, negative on error */ int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, - unsigned disk_space_mb, int tier) { + unsigned disk_space_mb, size_t tier) { struct rrdengine_instance *ctx; int error; uint32_t max_open_files; @@ -897,7 +1080,6 @@ int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_p ctx->drop_metrics_under_page_cache_pressure = rrdeng_drop_metrics_under_page_cache_pressure; ctx->metric_API_max_producers = 0; ctx->quiesce = NO_QUIESCE; - ctx->metalog_ctx = NULL; /* only set this after the metadata log has finished initializing */ ctx->host = host; memset(&ctx->worker_config, 0, sizeof(ctx->worker_config)); @@ -918,11 +1100,11 @@ int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_p if (ctx->worker_config.error) { goto error_after_rrdeng_worker; } - error = metalog_init(ctx); - if (error) { - error("Failed to initialize metadata log file event loop."); - goto error_after_rrdeng_worker; - } +// error = metalog_init(ctx); +// if (error) { +// error("Failed to initialize metadata log file event loop."); +// goto error_after_rrdeng_worker; +// } return 0; @@ -1010,13 +1192,13 @@ RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx) { size_t points = descr->page_length / PAGE_POINT_SIZE_BYTES(descr); if(likely(points > 1)) - update_every_usec = (descr->end_time - descr->start_time) / (points - 1); + update_every_usec = (descr->end_time_ut - descr->start_time_ut) / (points - 1); else { update_every_usec = default_rrd_update_every * get_tier_grouping(ctx->tier) * USEC_PER_SEC; stats.single_point_pages++; } - time_t duration_secs = (time_t)((descr->end_time - descr->start_time + update_every_usec)/USEC_PER_SEC); + time_t duration_secs = (time_t)((descr->end_time_ut - descr->start_time_ut + update_every_usec)/USEC_PER_SEC); stats.extents_pages++; stats.pages_uncompressed_bytes += descr->page_length; @@ -1028,11 +1210,11 @@ RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx) { stats.page_types[descr->type].pages_duration_secs += duration_secs; stats.page_types[descr->type].points += points; - if(!stats.first_t || (descr->start_time - update_every_usec) < stats.first_t) - stats.first_t = (descr->start_time - update_every_usec) / USEC_PER_SEC; + if(!stats.first_t || (descr->start_time_ut - update_every_usec) < stats.first_t) + stats.first_t = (descr->start_time_ut - update_every_usec) / USEC_PER_SEC; - if(!stats.last_t || descr->end_time > stats.last_t) - stats.last_t = descr->end_time / USEC_PER_SEC; + if(!stats.last_t || descr->end_time_ut > stats.last_t) + stats.last_t = descr->end_time_ut / USEC_PER_SEC; } } } @@ -1072,7 +1254,7 @@ RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx) { } } - stats.sizeof_metric = struct_natural_alignment(sizeof(struct pg_cache_page_index)); + stats.sizeof_metric = struct_natural_alignment(sizeof(struct pg_cache_page_index) + sizeof(struct pg_alignment)); stats.sizeof_page = struct_natural_alignment(sizeof(struct rrdeng_page_descr)); stats.sizeof_datafile = struct_natural_alignment(sizeof(struct rrdengine_datafile)) + struct_natural_alignment(sizeof(struct rrdengine_journalfile)); stats.sizeof_page_in_cache = struct_natural_alignment(sizeof(struct page_cache_descr)); diff --git a/database/engine/rrdengineapi.h b/database/engine/rrdengineapi.h index 509aa48ca..85375044f 100644 --- a/database/engine/rrdengineapi.h +++ b/database/engine/rrdengineapi.h @@ -25,58 +25,63 @@ extern size_t page_type_size[]; #define PAGE_POINT_SIZE_BYTES(x) page_type_size[(x)->type] struct rrdeng_region_info { - time_t start_time; + time_t start_time_s; int update_every; unsigned points; }; -extern void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr); -extern void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, +void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrdeng_page_descr **ret_descr); +void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr, Word_t page_correlation_id); -extern void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle); -extern void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle); -extern void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle); +void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle); +void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time_ut, void **handle); +void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle); -extern void rrdeng_generate_legacy_uuid(const char *dim_id, char *chart_id, uuid_t *ret_uuid); -extern void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, +void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid); +void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uuid_t *legacy_uuid, uuid_t *ret_uuid); -extern STORAGE_METRIC_HANDLE *rrdeng_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance); -extern void rrdeng_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle); +STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); +STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid, STORAGE_METRICS_GROUP *smg); +STORAGE_METRIC_HANDLE *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid, STORAGE_METRICS_GROUP *smg); +STORAGE_METRIC_HANDLE *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id, STORAGE_METRICS_GROUP *smg); +void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle); +STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle); -extern STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle); -extern void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle); -extern void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE n, +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every); +void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle); +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); +void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, NETDATA_DOUBLE n, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags); -extern int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle); +int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle); -extern unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time, - struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp, struct context_param *context_param_list); +void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrdimm_handle, + time_t start_time_s, time_t end_time_s); +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle); -extern void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *rrdimm_handle, - time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type); -extern STORAGE_POINT rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle); -extern int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle); -extern void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle); -extern time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -extern time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrdimm_handle); +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrdimm_handle); +time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); +time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -extern void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array); +void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array); /* must call once before using anything */ -extern int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, - unsigned disk_space_mb, int tier); +int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, + unsigned disk_space_mb, size_t tier); -extern int rrdeng_exit(struct rrdengine_instance *ctx); -extern void rrdeng_prepare_exit(struct rrdengine_instance *ctx); -extern int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t, int tier); -extern int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t); +int rrdeng_exit(struct rrdengine_instance *ctx); +void rrdeng_prepare_exit(struct rrdengine_instance *ctx); +int rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t); + +extern STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +extern void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); typedef struct rrdengine_size_statistics { size_t default_granularity_secs; @@ -134,6 +139,6 @@ typedef struct rrdengine_size_statistics { double average_page_size_bytes; } RRDENG_SIZE_STATS; -extern RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx); +RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx); #endif /* NETDATA_RRDENGINEAPI_H */ diff --git a/database/engine/rrdenginelib.c b/database/engine/rrdenginelib.c index 287b86be8..58bd9c437 100644 --- a/database/engine/rrdenginelib.c +++ b/database/engine/rrdenginelib.c @@ -4,28 +4,45 @@ #define BUFSIZE (512) /* Caller must hold descriptor lock */ -void print_page_cache_descr(struct rrdeng_page_descr *descr) +void print_page_cache_descr(struct rrdeng_page_descr *descr, const char *msg, bool log_debug) { - struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; - char uuid_str[UUID_STR_LEN]; - char str[BUFSIZE + 1]; - int pos = 0; + if(log_debug && !(debug_flags & D_RRDENGINE)) + return; - uuid_unparse_lower(*descr->id, uuid_str); - pos += snprintfz(str, BUFSIZE - pos, "page(%p) id=%s\n" - "--->len:%"PRIu32" time:%"PRIu64"->%"PRIu64" xt_offset:", - pg_cache_descr->page, uuid_str, - descr->page_length, - (uint64_t)descr->start_time, - (uint64_t)descr->end_time); - if (!descr->extent) { - pos += snprintfz(str + pos, BUFSIZE - pos, "N/A"); - } else { - pos += snprintfz(str + pos, BUFSIZE - pos, "%"PRIu64, descr->extent->offset); + BUFFER *wb = buffer_create(512); + + if(!descr) { + buffer_sprintf(wb, "DBENGINE: %s : descr is NULL", msg); } + else { + struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; + char uuid_str[UUID_STR_LEN]; + + uuid_unparse_lower(*descr->id, uuid_str); + buffer_sprintf(wb, "DBENGINE: %s : page(%p) metric:%s, len:%"PRIu32", time:%"PRIu64"->%"PRIu64", update_every:%u, type:%u, xt_offset:", + msg, + pg_cache_descr->page, uuid_str, + descr->page_length, + (uint64_t)descr->start_time_ut, + (uint64_t)descr->end_time_ut, + (uint32_t)descr->update_every_s, + (uint32_t)descr->type + ); + if (!descr->extent) { + buffer_strcat(wb, "N/A"); + } else { + buffer_sprintf(wb, "%"PRIu64, descr->extent->offset); + } + + buffer_sprintf(wb, ", flags:0x%2.2lX refcnt:%u", pg_cache_descr->flags, pg_cache_descr->refcnt); + } + + if(log_debug) + debug(D_RRDENGINE, "%s", buffer_tostring(wb)); + else + internal_error(true, "%s", buffer_tostring(wb)); - snprintfz(str + pos, BUFSIZE - pos, " flags:0x%2.2lX refcnt:%u\n\n", pg_cache_descr->flags, pg_cache_descr->refcnt); - debug(D_RRDENGINE, "%s", str); + buffer_free(wb); } void print_page_descr(struct rrdeng_page_descr *descr) @@ -39,8 +56,8 @@ void print_page_descr(struct rrdeng_page_descr *descr) "--->len:%"PRIu32" time:%"PRIu64"->%"PRIu64" xt_offset:", uuid_str, descr->page_length, - (uint64_t)descr->start_time, - (uint64_t)descr->end_time); + (uint64_t)descr->start_time_ut, + (uint64_t)descr->end_time_ut); if (!descr->extent) { pos += snprintfz(str + pos, BUFSIZE - pos, "N/A"); } else { diff --git a/database/engine/rrdenginelib.h b/database/engine/rrdenginelib.h index 32eebf103..6b1a15fb1 100644 --- a/database/engine/rrdenginelib.h +++ b/database/engine/rrdenginelib.h @@ -83,10 +83,10 @@ static inline void crc32set(void *crcp, uLong crc) *(uint32_t *)crcp = crc; } -extern void print_page_cache_descr(struct rrdeng_page_descr *page_cache_descr); -extern void print_page_descr(struct rrdeng_page_descr *descr); -extern int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size); -extern int open_file_for_io(char *path, int flags, uv_file *file, int direct); +void print_page_cache_descr(struct rrdeng_page_descr *descr, const char *msg, bool log_debug); +void print_page_descr(struct rrdeng_page_descr *descr); +int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size); +int open_file_for_io(char *path, int flags, uv_file *file, int direct); static inline int open_file_direct_io(char *path, int flags, uv_file *file) { return open_file_for_io(path, flags, file, 1); @@ -95,8 +95,8 @@ static inline int open_file_buffered_io(char *path, int flags, uv_file *file) { return open_file_for_io(path, flags, file, 0); } -extern char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size); -extern int compute_multidb_diskspace(); -extern int is_legacy_child(const char *machine_guid); +char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size); +int compute_multidb_diskspace(); +int is_legacy_child(const char *machine_guid); #endif /* NETDATA_RRDENGINELIB_H */ diff --git a/database/engine/rrdenglocking.h b/database/engine/rrdenglocking.h index 127ddc90c..078eab38b 100644 --- a/database/engine/rrdenglocking.h +++ b/database/engine/rrdenglocking.h @@ -8,10 +8,10 @@ /* Forward declarations */ struct page_cache_descr; -extern struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance *ctx); -extern void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr); -extern void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); -extern void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance *ctx); +void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr); +void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); +void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); #endif /* NETDATA_RRDENGLOCKING_H */
\ No newline at end of file diff --git a/database/ram/rrddim_mem.c b/database/ram/rrddim_mem.c index 3226d3c0d..43f32350b 100644 --- a/database/ram/rrddim_mem.c +++ b/database/ram/rrddim_mem.c @@ -1,22 +1,76 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "rrddim_mem.h" +#include "Judy.h" + +static Pvoid_t rrddim_JudyHS_array = NULL; +static netdata_rwlock_t rrddim_JudyHS_rwlock = NETDATA_RWLOCK_INITIALIZER; + +// ---------------------------------------------------------------------------- +// metrics groups + +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) { + return NULL; +} + +void rrddim_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg __maybe_unused) { + // if(!smg) return; // smg may be NULL + ; +} // ---------------------------------------------------------------------------- // RRDDIM legacy data collection functions -STORAGE_METRIC_HANDLE *rrddim_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance __maybe_unused) { +STORAGE_METRIC_HANDLE * +rrddim_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg __maybe_unused) { + STORAGE_METRIC_HANDLE *t = rrddim_metric_get(db_instance, &rd->metric_uuid, smg); + if(!t) { + netdata_rwlock_wrlock(&rrddim_JudyHS_rwlock); + Pvoid_t *PValue = JudyHSIns(&rrddim_JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); + fatal_assert(NULL == *PValue); + *PValue = rd; + t = (STORAGE_METRIC_HANDLE *)rd; + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + } + + if((RRDDIM *)t != rd) + fatal("RRDDIM_MEM: incorrect pointer returned from index."); + return (STORAGE_METRIC_HANDLE *)rd; } -void rrddim_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle __maybe_unused) { - ; +STORAGE_METRIC_HANDLE * +rrddim_metric_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid, STORAGE_METRICS_GROUP *smg __maybe_unused) { + RRDDIM *rd = NULL; + netdata_rwlock_rdlock(&rrddim_JudyHS_rwlock); + Pvoid_t *PValue = JudyHSGet(rrddim_JudyHS_array, uuid, sizeof(uuid_t)); + if (likely(NULL != PValue)) + rd = *PValue; + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + + return (STORAGE_METRIC_HANDLE *)rd; +} + +STORAGE_METRIC_HANDLE *rrddim_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) { + return db_metric_handle; +} + +void rrddim_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle __maybe_unused) { + RRDDIM *rd = (RRDDIM *)db_metric_handle; + + netdata_rwlock_wrlock(&rrddim_JudyHS_rwlock); + JudyHSDel(&rrddim_JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); +} + +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every __maybe_unused) { + rrddim_store_metric_flush(collection_handle); } -STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle) { +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every __maybe_unused) { RRDDIM *rd = (RRDDIM *)db_metric_handle; rd->db[rd->rrdset->current_entry] = pack_storage_number(NAN, SN_FLAG_NONE); - struct mem_collect_handle *ch = calloc(1, sizeof(struct mem_collect_handle)); + struct mem_collect_handle *ch = callocz(1, sizeof(struct mem_collect_handle)); ch->rd = rd; return (STORAGE_COLLECT_HANDLE *)ch; } @@ -41,12 +95,15 @@ void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle) { struct mem_collect_handle *ch = (struct mem_collect_handle *)collection_handle; + RRDDIM *rd = ch->rd; - memset(rd->db, 0, rd->entries * sizeof(storage_number)); + for(int i = 0; i < rd->rrdset->entries ;i++) + rd->db[i] = SN_EMPTY_SLOT; + } int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { - free(collection_handle); + freez(collection_handle); return 0; } @@ -91,7 +148,7 @@ static inline size_t rrddim_time2slot(RRDDIM *rd, time_t t) { } if(unlikely(ret >= entries)) { - error("INTERNAL ERROR: rrddim_time2slot() on %s returns values outside entries", rd->name); + error("INTERNAL ERROR: rrddim_time2slot() on %s returns values outside entries", rrddim_name(rd)); ret = entries - 1; } @@ -119,12 +176,12 @@ static inline time_t rrddim_slot2time(RRDDIM *rd, size_t slot) { ret = last_entry_t - (time_t)(update_every * (last_slot - slot)); if(unlikely(ret < first_entry_t)) { - error("INTERNAL ERROR: rrddim_slot2time() on %s returns time too far in the past", rd->name); + error("INTERNAL ERROR: rrddim_slot2time() on %s returns time too far in the past", rrddim_name(rd)); ret = first_entry_t; } if(unlikely(ret > last_entry_t)) { - error("INTERNAL ERROR: rrddim_slot2time() on %s returns time into the future", rd->name); + error("INTERNAL ERROR: rrddim_slot2time() on %s returns time into the future", rrddim_name(rd)); ret = last_entry_t; } @@ -134,15 +191,13 @@ static inline time_t rrddim_slot2time(RRDDIM *rd, size_t slot) { // ---------------------------------------------------------------------------- // RRDDIM legacy database query functions -void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type) { - UNUSED(tier_query_fetch_type); - +void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, time_t start_time, time_t end_time) { RRDDIM *rd = (RRDDIM *)db_metric_handle; handle->rd = rd; - handle->start_time = start_time; - handle->end_time = end_time; - struct mem_query_handle* h = calloc(1, sizeof(struct mem_query_handle)); + handle->start_time_s = start_time; + handle->end_time_s = end_time; + struct mem_query_handle* h = mallocz(sizeof(struct mem_query_handle)); h->slot = rrddim_time2slot(rd, start_time); h->last_slot = rrddim_time2slot(rd, end_time); h->dt = rd->rrdset->update_every; @@ -159,7 +214,7 @@ void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_qu // Returns the metric and sets its timestamp into current_time // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags) // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES -STORAGE_POINT rrddim_query_next_metric(struct rrddim_query_handle *handle) { +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *handle) { RRDDIM *rd = handle->rd; struct mem_query_handle* h = (struct mem_query_handle*)handle->handle; size_t entries = rd->rrdset->entries; @@ -198,15 +253,15 @@ STORAGE_POINT rrddim_query_next_metric(struct rrddim_query_handle *handle) { return sp; } -int rrddim_query_is_finished(struct rrddim_query_handle *handle) { +int rrddim_query_is_finished(struct storage_engine_query_handle *handle) { struct mem_query_handle* h = (struct mem_query_handle*)handle->handle; - return (h->next_timestamp > handle->end_time); + return (h->next_timestamp > handle->end_time_s); } -void rrddim_query_finalize(struct rrddim_query_handle *handle) { +void rrddim_query_finalize(struct storage_engine_query_handle *handle) { #ifdef NETDATA_INTERNAL_CHECKS if(!rrddim_query_is_finished(handle)) - error("QUERY: query for chart '%s' dimension '%s' has been stopped unfinished", handle->rd->rrdset->id, handle->rd->name); + error("QUERY: query for chart '%s' dimension '%s' has been stopped unfinished", rrdset_id(handle->rd->rrdset), rrddim_name(handle->rd)); #endif freez(handle->handle); } diff --git a/database/ram/rrddim_mem.h b/database/ram/rrddim_mem.h index 400bdd0c2..297388f51 100644 --- a/database/ram/rrddim_mem.h +++ b/database/ram/rrddim_mem.h @@ -20,24 +20,30 @@ struct mem_query_handle { size_t last_slot; }; -extern STORAGE_METRIC_HANDLE *rrddim_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance); -extern void rrddim_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle); +STORAGE_METRIC_HANDLE *rrddim_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); +STORAGE_METRIC_HANDLE *rrddim_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid, STORAGE_METRICS_GROUP *smg); +STORAGE_METRIC_HANDLE *rrddim_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle); +void rrddim_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle); -extern STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle); -extern void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); +void rrddim_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); + +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every); +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); +void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags); -extern void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle); -extern int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle); - -extern void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type); -extern STORAGE_POINT rrddim_query_next_metric(struct rrddim_query_handle *handle); -extern int rrddim_query_is_finished(struct rrddim_query_handle *handle); -extern void rrddim_query_finalize(struct rrddim_query_handle *handle); -extern time_t rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -extern time_t rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); +void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle); +int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle); + +void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, time_t start_time, time_t end_time); +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *handle); +int rrddim_query_is_finished(struct storage_engine_query_handle *handle); +void rrddim_query_finalize(struct storage_engine_query_handle *handle); +time_t rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); +time_t rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); #endif diff --git a/database/rrd.c b/database/rrd.c index f91039ea5..df364419e 100644 --- a/database/rrd.c +++ b/database/rrd.c @@ -154,3 +154,15 @@ char *rrdset_cache_dir(RRDHOST *host, const char *id) { return ret; } +// ---------------------------------------------------------------------------- +// RRD - string management + +STRING *rrd_string_strdupz(const char *s) { + if(unlikely(!s || !*s)) return string_strdupz(s); + + char *tmp = strdupz(s); + json_fix_string(tmp); + STRING *ret = string_strdupz(tmp); + freez(tmp); + return ret; +} diff --git a/database/rrd.h b/database/rrd.h index 605ff50bc..f071ee254 100644 --- a/database/rrd.h +++ b/database/rrd.h @@ -11,26 +11,40 @@ extern "C" { // to enable type checking at compile time typedef struct storage_instance STORAGE_INSTANCE; typedef struct storage_metric_handle STORAGE_METRIC_HANDLE; +typedef struct storage_alignment STORAGE_METRICS_GROUP; // forward typedefs typedef struct rrdhost RRDHOST; typedef struct rrddim RRDDIM; typedef struct rrdset RRDSET; -typedef struct rrdvar RRDVAR; -typedef struct rrdsetvar RRDSETVAR; -typedef struct rrddimvar RRDDIMVAR; typedef struct rrdcalc RRDCALC; typedef struct rrdcalctemplate RRDCALCTEMPLATE; typedef struct alarm_entry ALARM_ENTRY; -typedef struct context_param CONTEXT_PARAM; + +typedef struct rrdfamily_acquired RRDFAMILY_ACQUIRED; +typedef struct rrdvar_acquired RRDVAR_ACQUIRED; +typedef struct rrdsetvar_acquired RRDSETVAR_ACQUIRED; +typedef struct rrdcalc_acquired RRDCALC_ACQUIRED; + +typedef struct rrdhost_acquired RRDHOST_ACQUIRED; +typedef struct rrdset_acquired RRDSET_ACQUIRED; +typedef struct rrddim_acquired RRDDIM_ACQUIRED; typedef void *ml_host_t; typedef void *ml_dimension_t; +typedef enum { + QUERY_SOURCE_UNKNOWN, + QUERY_SOURCE_API_DATA, + QUERY_SOURCE_API_BADGE, + QUERY_SOURCE_API_WEIGHTS, + QUERY_SOURCE_HEALTH, + QUERY_SOURCE_ML, + QUERY_SOURCE_UNITTEST, +} QUERY_SOURCE; + // forward declarations struct rrddim_tier; -struct rrdset_volatile; -struct context_param; #ifdef ENABLE_DBENGINE struct rrdeng_page_descr; @@ -51,8 +65,10 @@ struct pg_cache_page_index; #include "sqlite/sqlite_health.h" #include "rrdcontext.h" -extern int storage_tiers; -extern int storage_tiers_grouping_iterations[RRD_STORAGE_TIERS]; +extern bool unittest_running; +extern bool dbengine_enabled; +extern size_t storage_tiers; +extern size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS]; typedef enum { RRD_BACKFILL_NONE, @@ -75,11 +91,6 @@ struct context_param { uint8_t flags; }; -#define META_CHART_UPDATED 1 -#define META_PLUGIN_UPDATED 2 -#define META_MODULE_UPDATED 4 -#define META_CHART_ACTIVATED 8 - #define UPDATE_EVERY 1 #define UPDATE_EVERY_MAX 3600 @@ -122,7 +133,9 @@ typedef enum rrd_memory_mode { RRD_MEMORY_MODE_MAP = 2, RRD_MEMORY_MODE_SAVE = 3, RRD_MEMORY_MODE_ALLOC = 4, - RRD_MEMORY_MODE_DBENGINE = 5 + RRD_MEMORY_MODE_DBENGINE = 5, + + // this is 8-bit } RRD_MEMORY_MODE; #define RRD_MEMORY_MODE_NONE_NAME "none" @@ -134,8 +147,8 @@ typedef enum rrd_memory_mode { extern RRD_MEMORY_MODE default_rrd_memory_mode; -extern const char *rrd_memory_mode_name(RRD_MEMORY_MODE id); -extern RRD_MEMORY_MODE rrd_memory_mode_id(const char *name); +const char *rrd_memory_mode_name(RRD_MEMORY_MODE id); +RRD_MEMORY_MODE rrd_memory_mode_id(const char *name); // ---------------------------------------------------------------------------- @@ -145,7 +158,9 @@ typedef enum rrd_algorithm { RRD_ALGORITHM_ABSOLUTE = 0, RRD_ALGORITHM_INCREMENTAL = 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL = 2, - RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL = 3 + RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL = 3, + + // this is 8-bit } RRD_ALGORITHM; #define RRD_ALGORITHM_ABSOLUTE_NAME "absolute" @@ -153,43 +168,50 @@ typedef enum rrd_algorithm { #define RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME "percentage-of-incremental-row" #define RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME "percentage-of-absolute-row" -extern RRD_ALGORITHM rrd_algorithm_id(const char *name); -extern const char *rrd_algorithm_name(RRD_ALGORITHM algorithm); +RRD_ALGORITHM rrd_algorithm_id(const char *name); +const char *rrd_algorithm_name(RRD_ALGORITHM algorithm); // ---------------------------------------------------------------------------- // RRD FAMILY -struct rrdfamily { - avl_t avl; +const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id); +void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa); +void rrdfamily_index_init(RRDHOST *host); +void rrdfamily_index_destroy(RRDHOST *host); +DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rf); - const char *family; - uint32_t hash_family; - size_t use_count; +// ---------------------------------------------------------------------------- +// flags & options - avl_tree_lock rrdvar_root_index; -}; -typedef struct rrdfamily RRDFAMILY; +// options are permanent configuration options (no atomics to alter/access them) +typedef enum rrddim_options { + RRDDIM_OPTION_NONE = 0, + RRDDIM_OPTION_HIDDEN = (1 << 0), // this dimension will not be offered to callers + RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS = (1 << 1), // do not offer RESET or OVERFLOW info to callers + RRDDIM_OPTION_BACKFILLED_HIGH_TIERS = (1 << 2), // when set, we have backfilled higher tiers + // this is 8-bit +} RRDDIM_OPTIONS; -// ---------------------------------------------------------------------------- -// flags -// use this for configuration flags, not for state control -// flags are set/unset in a manner that is not thread safe -// and may lead to missing information. +#define rrddim_option_check(rd, option) ((rd)->options & (option)) +#define rrddim_option_set(rd, option) (rd)->options |= (option) +#define rrddim_option_clear(rd, option) (rd)->options &= ~(option) +// flags are runtime changing status flags (atomics are required to alter/access them) typedef enum rrddim_flags { RRDDIM_FLAG_NONE = 0, - RRDDIM_FLAG_HIDDEN = (1 << 0), // this dimension will not be offered to callers - RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS = (1 << 1), // do not offer RESET or OVERFLOW info to callers + RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 0), + RRDDIM_FLAG_OBSOLETE = (1 << 2), // this is marked by the collector/module as obsolete - // No new values have been collected for this dimension since agent start or it was marked RRDDIM_FLAG_OBSOLETE at + // No new values have been collected for this dimension since agent start, or it was marked RRDDIM_FLAG_OBSOLETE at // least rrdset_free_obsolete_time seconds ago. RRDDIM_FLAG_ARCHIVED = (1 << 3), - RRDDIM_FLAG_ACLK = (1 << 4), + RRDDIM_FLAG_METADATA_UPDATE = (1 << 4), // Metadata needs to go to the database - RRDDIM_FLAG_PENDING_FOREACH_ALARM = (1 << 5), // set when foreach alarm has not been initialized yet RRDDIM_FLAG_META_HIDDEN = (1 << 6), // Status of hidden option in the metadata database + + // this is 8 bit } RRDDIM_FLAGS; #define rrddim_flag_check(rd, flag) (__atomic_load_n(&((rd)->flags), __ATOMIC_SEQ_CST) & (flag)) @@ -211,62 +233,55 @@ typedef enum rrdlabel_source { #define RRDLABEL_FLAG_INTERNAL (RRDLABEL_FLAG_OLD | RRDLABEL_FLAG_NEW | RRDLABEL_FLAG_PERMANENT) -extern DICTIONARY *rrdlabels_create(void); -extern void rrdlabels_destroy(DICTIONARY *labels_dict); -extern void rrdlabels_add(DICTIONARY *dict, const char *name, const char *value, RRDLABEL_SRC ls); -extern void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls); -extern void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null); +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length); -extern void rrdlabels_unmark_all(DICTIONARY *labels); -extern void rrdlabels_remove_all_unmarked(DICTIONARY *labels); +DICTIONARY *rrdlabels_create(void); +void rrdlabels_destroy(DICTIONARY *labels_dict); +void rrdlabels_add(DICTIONARY *dict, const char *name, const char *value, RRDLABEL_SRC ls); +void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls); +void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null); +void rrdlabels_get_value_to_char_or_null(DICTIONARY *labels, char **value, const char *key); +void rrdlabels_flush(DICTIONARY *labels_dict); -extern int rrdlabels_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data); -extern int rrdlabels_sorted_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data); +void rrdlabels_unmark_all(DICTIONARY *labels); +void rrdlabels_remove_all_unmarked(DICTIONARY *labels); -extern void rrdlabels_log_to_buffer(DICTIONARY *labels, BUFFER *wb); -extern bool rrdlabels_match_simple_pattern(DICTIONARY *labels, const char *simple_pattern_txt); -extern bool rrdlabels_match_simple_pattern_parsed(DICTIONARY *labels, SIMPLE_PATTERN *pattern, char equal); -extern int rrdlabels_to_buffer(DICTIONARY *labels, BUFFER *wb, const char *before_each, const char *equal, const char *quote, const char *between_them, bool (*filter_callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *filter_data, void (*name_sanitizer)(char *dst, const char *src, size_t dst_size), void (*value_sanitizer)(char *dst, const char *src, size_t dst_size)); +int rrdlabels_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data); +int rrdlabels_sorted_walkthrough_read(DICTIONARY *labels, int (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data); -extern void rrdlabels_migrate_to_these(DICTIONARY *dst, DICTIONARY *src); -extern void rrdlabels_copy(DICTIONARY *dst, DICTIONARY *src); +void rrdlabels_log_to_buffer(DICTIONARY *labels, BUFFER *wb); +bool rrdlabels_match_simple_pattern(DICTIONARY *labels, const char *simple_pattern_txt); +bool rrdlabels_match_simple_pattern_parsed(DICTIONARY *labels, SIMPLE_PATTERN *pattern, char equal); +int rrdlabels_to_buffer(DICTIONARY *labels, BUFFER *wb, const char *before_each, const char *equal, const char *quote, const char *between_them, bool (*filter_callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *filter_data, void (*name_sanitizer)(char *dst, const char *src, size_t dst_size), void (*value_sanitizer)(char *dst, const char *src, size_t dst_size)); -void reload_host_labels(void); -extern void rrdset_update_rrdlabels(RRDSET *st, DICTIONARY *new_rrdlabels); +void rrdlabels_migrate_to_these(DICTIONARY *dst, DICTIONARY *src); +void rrdlabels_copy(DICTIONARY *dst, DICTIONARY *src); -extern int rrdlabels_unittest(void); +void reload_host_labels(void); +void rrdset_update_rrdlabels(RRDSET *st, DICTIONARY *new_rrdlabels); +void rrdset_save_rrdlabels_to_sql(RRDSET *st); +void rrdhost_set_is_parent_label(int count); +int rrdlabels_unittest(void); // unfortunately this break when defined in exporting_engine.h -extern bool exporting_labels_filter_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data); +bool exporting_labels_filter_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data); // ---------------------------------------------------------------------------- // RRD DIMENSION - this is a metric struct rrddim { - // ------------------------------------------------------------------------ - // binary indexing structures - - avl_t avl; // the binary index - this has to be first member! - uuid_t metric_uuid; // global UUID for this metric (unique_across hosts) // ------------------------------------------------------------------------ - // the dimension definition - - const char *id; // the id of this dimension (for internal identification) - const char *name; // the name of this dimension (as presented to user) - // this is a pointer to the config structure - // since the config always has a higher priority - // (the user overwrites the name of the charts) - uint32_t hash; // a simple hash of the id, to speed up searching / indexing - // instead of strcmp() every item in the binary index - // we first compare the hashes - uint32_t hash_name; // a simple hash of the name + // dimension definition + STRING *id; // the id of this dimension (for internal identification) + STRING *name; // the name of this dimension (as presented to user) - RRD_ALGORITHM algorithm; // the algorithm that is applied to add new collected values - RRD_MEMORY_MODE rrd_memory_mode; // the memory mode for this dimension - RRDDIM_FLAGS flags; // configuration flags for the dimension + RRD_ALGORITHM algorithm:8; // the algorithm that is applied to add new collected values + RRDDIM_OPTIONS options:8; // permanent configuration options + RRD_MEMORY_MODE rrd_memory_mode:8; // the memory mode for this dimension + /*RRDDIM_FLAGS*/ uint8_t flags; // run time changing status flags bool updated; // 1 when the dimension has been updated since the last processing bool exposed; // 1 when set what have sent this dimension to the central netdata @@ -274,20 +289,31 @@ struct rrddim { collected_number multiplier; // the multiplier of the collected values collected_number divisor; // the divider of the collected values + int update_every; // every how many seconds is this updated + // TODO - remove update_every from rrddim + // it is always the same in rrdset + // ------------------------------------------------------------------------ - // members for temporary data we need for calculations + // operational state members - struct timeval last_collected_time; // when was this dimension last updated - // this is actual date time we updated the last_collected_value - // THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET + ml_dimension_t ml_dimension; // machine learning data about this dimension -#ifdef ENABLE_ACLK - int aclk_live_status; -#endif - ml_dimension_t ml_dimension; + // ------------------------------------------------------------------------ + // linking to siblings and parents + + struct rrdset *rrdset; + + RRDMETRIC_ACQUIRED *rrdmetric; // the rrdmetric of this dimension + + // ------------------------------------------------------------------------ + // data collection members struct rrddim_tier *tiers[RRD_STORAGE_TIERS]; // our tiers of databases + struct timeval last_collected_time; // when was this dimension last updated + // this is actual date time we updated the last_collected_value + // THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET + size_t collections_counter; // the number of times we added values to this rrddim collected_number collected_value_max; // the absolute maximum of the collected value @@ -298,68 +324,42 @@ struct rrddim { collected_number collected_value; // the current value, as collected - resets to 0 after being used collected_number last_collected_value; // the last value that was collected, after being processed - // the *_volume members are used to calculate the accuracy of the rounding done by the - // storage number - they are printed to debug.log when debug is enabled for a set. - NETDATA_DOUBLE collected_volume; // the sum of all collected values so far - NETDATA_DOUBLE stored_volume; // the sum of all stored values so far - - struct rrddim *next; // linking of dimensions within the same data set - struct rrdset *rrdset; - RRDMETRIC_ACQUIRED *rrdmetric; // the rrdmetric of this dimension +#ifdef NETDATA_LOG_COLLECTION_ERRORS + usec_t rrddim_store_metric_last_ut; // the timestamp we last called rrddim_store_metric() + size_t rrddim_store_metric_count; // the rrddim_store_metric() counter + const char *rrddim_store_metric_last_caller; // the name of the function that last called rrddim_store_metric() +#endif // ------------------------------------------------------------------------ - // members for checking the data when loading from disk - - long entries; // how many entries this dimension has in ram - // this is the same to the entries of the data set - // we set it here, to check the data when we load it from disk. - - int update_every; // every how many seconds is this updated + // db mode RAM, SAVE, MAP, ALLOC, NONE specifics + // TODO - they should be managed by storage engine + // (RRDDIM_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) size_t memsize; // the memory allocated for this dimension (without RRDDIM) - - struct rrddimvar *variables; - - // ------------------------------------------------------------------------ - // the values stored in this dimension, using our floating point numbers - void *rd_on_file; // pointer to the header written on disk storage_number *db; // the array of values }; +#define rrddim_id(rd) string2str((rd)->id) +#define rrddim_name(rd) string2str((rd) ->name) + // returns the RRDDIM cache filename, or NULL if it does not exist -extern const char *rrddim_cache_filename(RRDDIM *rd); +const char *rrddim_cache_filename(RRDDIM *rd); // updated the header with the latest RRDDIM value, for memory mode MAP and SAVE -extern void rrddim_memory_file_update(RRDDIM *rd); +void rrddim_memory_file_update(RRDDIM *rd); // free the memory file structures for memory mode MAP and SAVE -extern void rrddim_memory_file_free(RRDDIM *rd); +void rrddim_memory_file_free(RRDDIM *rd); -extern bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MODE memory_mode); +bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MODE memory_mode); // return the v019 header size of RRDDIM files -extern size_t rrddim_memory_file_header_size(void); - -extern void rrddim_memory_file_save(RRDDIM *rd); +size_t rrddim_memory_file_header_size(void); -// ---------------------------------------------------------------------------- -// engine-specific iterator state for dimension data collection -typedef struct storage_collect_handle STORAGE_COLLECT_HANDLE; - -// ---------------------------------------------------------------------------- -// engine-specific iterator state for dimension data queries -typedef struct storage_query_handle STORAGE_QUERY_HANDLE; +void rrddim_memory_file_save(RRDDIM *rd); // ---------------------------------------------------------------------------- -// iterator state for RRD dimension data queries -struct rrddim_query_handle { - RRDDIM *rd; - time_t start_time; - time_t end_time; - TIER_QUERY_FETCH tier_query_fetch_type; - STORAGE_QUERY_HANDLE* handle; -}; typedef struct storage_point { NETDATA_DOUBLE min; // when count > 1, this is the minimum among them @@ -397,11 +397,19 @@ typedef struct storage_point { #define storage_point_is_unset(x) (!(x).count) #define storage_point_is_empty(x) (!netdata_double_isnumber((x).sum)) +// ---------------------------------------------------------------------------- +// engine-specific iterator state for dimension data collection +typedef struct storage_collect_handle STORAGE_COLLECT_HANDLE; + +// ---------------------------------------------------------------------------- +// engine-specific iterator state for dimension data queries +typedef struct storage_query_handle STORAGE_QUERY_HANDLE; + // ------------------------------------------------------------------------ // function pointers that handle data collection -struct rrddim_collect_ops { +struct storage_engine_collect_ops { // an initialization function to run before starting collection - STORAGE_COLLECT_HANDLE *(*init)(STORAGE_METRIC_HANDLE *db_metric_handle); + STORAGE_COLLECT_HANDLE *(*init)(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every); // run this to store each metric into the database void (*store_metric)(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, NETDATA_DOUBLE min_value, @@ -410,24 +418,38 @@ struct rrddim_collect_ops { // run this to flush / reset the current data collection sequence void (*flush)(STORAGE_COLLECT_HANDLE *collection_handle); - // an finalization function to run after collection is over + // a finalization function to run after collection is over // returns 1 if it's safe to delete the dimension int (*finalize)(STORAGE_COLLECT_HANDLE *collection_handle); + + void (*change_collection_frequency)(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); + + STORAGE_METRICS_GROUP *(*metrics_group_get)(STORAGE_INSTANCE *db_instance, uuid_t *uuid); + void (*metrics_group_release)(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *sa); +}; + +// ---------------------------------------------------------------------------- +// iterator state for RRD dimension data queries +struct storage_engine_query_handle { + RRDDIM *rd; + time_t start_time_s; + time_t end_time_s; + STORAGE_QUERY_HANDLE* handle; }; // function pointers that handle database queries -struct rrddim_query_ops { +struct storage_engine_query_ops { // run this before starting a series of next_metric() database queries - void (*init)(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type); + void (*init)(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, time_t start_time, time_t end_time); // run this to load each metric number from the database - STORAGE_POINT (*next_metric)(struct rrddim_query_handle *handle); + STORAGE_POINT (*next_metric)(struct storage_engine_query_handle *handle); // run this to test if the series of next_metric() database queries is finished - int (*is_finished)(struct rrddim_query_handle *handle); + int (*is_finished)(struct storage_engine_query_handle *handle); // run this after finishing a series of load_metric() database queries - void (*finalize)(struct rrddim_query_handle *handle); + void (*finalize)(struct storage_engine_query_handle *handle); // get the timestamp of the last entry of this metric time_t (*latest_time)(STORAGE_METRIC_HANDLE *db_metric_handle); @@ -436,45 +458,60 @@ struct rrddim_query_ops { time_t (*oldest_time)(STORAGE_METRIC_HANDLE *db_metric_handle); }; +typedef struct storage_engine STORAGE_ENGINE; + +// ------------------------------------------------------------------------ +// function pointers for all APIs provided by a storage engine +typedef struct storage_engine_api { + // metric management + STORAGE_METRIC_HANDLE *(*metric_get)(STORAGE_INSTANCE *instance, uuid_t *uuid, STORAGE_METRICS_GROUP *smg); + STORAGE_METRIC_HANDLE *(*metric_get_or_create)(RRDDIM *rd, STORAGE_INSTANCE *instance, STORAGE_METRICS_GROUP *smg); + void (*metric_release)(STORAGE_METRIC_HANDLE *); + STORAGE_METRIC_HANDLE *(*metric_dup)(STORAGE_METRIC_HANDLE *); + + // operations + struct storage_engine_collect_ops collect_ops; + struct storage_engine_query_ops query_ops; +} STORAGE_ENGINE_API; + +struct storage_engine { + RRD_MEMORY_MODE id; + const char* name; + STORAGE_ENGINE_API api; +}; + +STORAGE_ENGINE* storage_engine_get(RRD_MEMORY_MODE mmode); +STORAGE_ENGINE* storage_engine_find(const char* name); // ---------------------------------------------------------------------------- // Storage tier data for every dimension struct rrddim_tier { - int tier_grouping; - RRD_MEMORY_MODE mode; // the memory mode of this tier - RRD_BACKFILL backfill; // backfilling configuration + size_t tier_grouping; STORAGE_METRIC_HANDLE *db_metric_handle; // the metric handle inside the database STORAGE_COLLECT_HANDLE *db_collection_handle; // the data collection handle STORAGE_POINT virtual_point; time_t next_point_time; - usec_t last_collected_ut; - struct rrddim_collect_ops collect_ops; - struct rrddim_query_ops query_ops; + struct storage_engine_collect_ops *collect_ops; + struct storage_engine_query_ops *query_ops; }; -extern void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, int tier, time_t now); - -// ---------------------------------------------------------------------------- -// volatile state per chart -struct rrdset_volatile { - char *old_title; - char *old_units; - char *old_context; - uuid_t hash_id; - DICTIONARY *chart_labels; - bool is_ar_chart; -}; +void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now); // ---------------------------------------------------------------------------- // these loop macros make sure the linked list is accessed with the right lock #define rrddim_foreach_read(rd, st) \ - for((rd) = (st)->dimensions, rrdset_check_rdlock(st); (rd) ; (rd) = (rd)->next) + dfe_start_read((st)->rrddim_root_index, rd) #define rrddim_foreach_write(rd, st) \ - for((rd) = (st)->dimensions, rrdset_check_wrlock(st); (rd) ; (rd) = (rd)->next) + dfe_start_write((st)->rrddim_root_index, rd) +#define rrddim_foreach_reentrant(rd, st) \ + dfe_start_reentrant((st)->rrddim_root_index, rd) + +#define rrddim_foreach_done(rd) \ + dfe_done(rd) // ---------------------------------------------------------------------------- // RRDSET - this is a chart @@ -484,166 +521,213 @@ struct rrdset_volatile { // and may lead to missing information. typedef enum rrdset_flags { - RRDSET_FLAG_DETAIL = 1 << 1, // if set, the data set should be considered as a detail of another - // (the master data set should be the one that has the same family and is not detail) - RRDSET_FLAG_DEBUG = 1 << 2, // enables or disables debugging for a chart - RRDSET_FLAG_OBSOLETE = 1 << 3, // this is marked by the collector/module as obsolete - RRDSET_FLAG_EXPORTING_SEND = 1 << 4, // if set, this chart should be sent to Prometheus web API and external databases - RRDSET_FLAG_EXPORTING_IGNORE = 1 << 5, // if set, this chart should not be sent to Prometheus web API and external databases - RRDSET_FLAG_UPSTREAM_SEND = 1 << 6, // if set, this chart should be sent upstream (streaming) - RRDSET_FLAG_UPSTREAM_IGNORE = 1 << 7, // if set, this chart should not be sent upstream (streaming) - RRDSET_FLAG_UPSTREAM_EXPOSED = 1 << 8, // if set, we have sent this chart definition to netdata parent (streaming) - RRDSET_FLAG_STORE_FIRST = 1 << 9, // if set, do not eliminate the first collection during interpolation - RRDSET_FLAG_HETEROGENEOUS = 1 << 10, // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) - RRDSET_FLAG_HOMOGENEOUS_CHECK = 1 << 11, // if set, the chart should be checked to determine if the dimensions are homogeneous - RRDSET_FLAG_HIDDEN = 1 << 12, // if set, do not show this chart on the dashboard, but use it for exporting - RRDSET_FLAG_SYNC_CLOCK = 1 << 13, // if set, microseconds on next data collection will be ignored (the chart will be synced to now) - RRDSET_FLAG_OBSOLETE_DIMENSIONS = 1 << 14, // this is marked by the collector/module when a chart has obsolete dimensions - // No new values have been collected for this chart since agent start or it was marked RRDSET_FLAG_OBSOLETE at - // least rrdset_free_obsolete_time seconds ago. - RRDSET_FLAG_ARCHIVED = 1 << 15, - RRDSET_FLAG_ACLK = 1 << 16, - RRDSET_FLAG_PENDING_FOREACH_ALARMS = 1 << 17, // contains dims with uninitialized foreach alarms - RRDSET_FLAG_ANOMALY_DETECTION = 1 << 18 // flag to identify anomaly detection charts. + RRDSET_FLAG_DETAIL = (1 << 1), // if set, the data set should be considered as a detail of another + // (the master data set should be the one that has the same family and is not detail) + RRDSET_FLAG_DEBUG = (1 << 2), // enables or disables debugging for a chart + RRDSET_FLAG_OBSOLETE = (1 << 3), // this is marked by the collector/module as obsolete + RRDSET_FLAG_EXPORTING_SEND = (1 << 4), // if set, this chart should be sent to Prometheus web API and external databases + RRDSET_FLAG_EXPORTING_IGNORE = (1 << 5), // if set, this chart should not be sent to Prometheus web API and external databases + + RRDSET_FLAG_UPSTREAM_SEND = (1 << 6), // if set, this chart should be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_IGNORE = (1 << 7), // if set, this chart should not be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_EXPOSED = (1 << 8), // if set, we have sent this chart definition to netdata parent (streaming) + + RRDSET_FLAG_STORE_FIRST = (1 << 9), // if set, do not eliminate the first collection during interpolation + RRDSET_FLAG_HETEROGENEOUS = (1 << 10), // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) + RRDSET_FLAG_HOMOGENEOUS_CHECK = (1 << 11), // if set, the chart should be checked to determine if the dimensions are homogeneous + RRDSET_FLAG_HIDDEN = (1 << 12), // if set, do not show this chart on the dashboard, but use it for exporting + RRDSET_FLAG_SYNC_CLOCK = (1 << 13), // if set, microseconds on next data collection will be ignored (the chart will be synced to now) + RRDSET_FLAG_OBSOLETE_DIMENSIONS = (1 << 14), // this is marked by the collector/module when a chart has obsolete dimensions + // No new values have been collected for this chart since agent start, or it was marked RRDSET_FLAG_OBSOLETE at + // least rrdset_free_obsolete_time seconds ago. + RRDSET_FLAG_ARCHIVED = (1 << 15), + RRDSET_FLAG_METADATA_UPDATE = (1 << 16), // Mark that metadata needs to be stored + RRDSET_FLAG_ANOMALY_DETECTION = (1 << 18), // flag to identify anomaly detection charts. + RRDSET_FLAG_INDEXED_ID = (1 << 19), // the rrdset is indexed by its id + RRDSET_FLAG_INDEXED_NAME = (1 << 20), // the rrdset is indexed by its name + + RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 21), + + RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS = (1 << 22), // the sending side has replication in progress + RRDSET_FLAG_SENDER_REPLICATION_FINISHED = (1 << 23), // the sending side has completed replication + RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS = (1 << 24), // the receiving side has replication in progress + RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED = (1 << 25), // the receiving side has completed replication + + RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 26), // a custom variable has been updated and needs to be exposed to parent } RRDSET_FLAGS; #define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag)) #define rrdset_flag_set(st, flag) __atomic_or_fetch(&((st)->flags), flag, __ATOMIC_SEQ_CST) -#define rrdset_flag_clear(st, flag) __atomic_and_fetch(&((st)->flags), ~flag, __ATOMIC_SEQ_CST) +#define rrdset_flag_clear(st, flag) __atomic_and_fetch(&((st)->flags), ~(flag), __ATOMIC_SEQ_CST) -struct rrdset { - // ------------------------------------------------------------------------ - // binary indexing structures +#define rrdset_is_replicating(st) (rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS|RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS) \ + && !rrdset_flag_check(st, RRDSET_FLAG_SENDER_REPLICATION_FINISHED|RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED)) - avl_t avl; // the index, with key the id - this has to be first! - avl_t avlname; // the index, with key the name +struct rrdset { + uuid_t chart_uuid; // the global UUID for this chart // ------------------------------------------------------------------------ - // the set configuration - - char id[RRD_ID_LENGTH_MAX + 1]; // id of the data set - - const char *name; // the name of this dimension (as presented to user) - // this is a pointer to the config structure - // since the config always has a higher priority - // (the user overwrites the name of the charts) + // chart configuration + + struct { + STRING *type; // the type of {type}.{id} + STRING *id; // the id of {type}.{id} + STRING *name; // the name of {type}.{name} + } parts; + + STRING *id; // the unique ID of the rrdset as {type}.{id} + STRING *name; // the unique name of the rrdset as {type}.{name} + STRING *family; // grouping sets under the same family + STRING *title; // title shown to user + STRING *units; // units of measurement + STRING *context; // the template of this data set + STRING *plugin_name; // the name of the plugin that generated this + STRING *module_name; // the name of the plugin module that generated this - char *type; // the type of graph RRD_TYPE_* (a category, for determining graphing options) - char *family; // grouping sets under the same family - char *title; // title shown to user - char *units; // units of measurement - - char *context; // the template of this data set - uint32_t hash_context; // the hash of the chart's context + RRDSET_TYPE chart_type; // line, area, stacked - RRDINSTANCE_ACQUIRED *rrdinstance; // the rrdinstance of this chart - RRDCONTEXT_ACQUIRED *rrdcontext; // the rrdcontext this chart belongs to + long priority; // the sorting priority of this chart - RRDSET_TYPE chart_type; // line, area, stacked + int update_every; // data collection frequency - int update_every; // every how many seconds is this updated? + DICTIONARY *rrdlabels; // chart labels + DICTIONARY *rrdsetvar_root_index; // chart variables + DICTIONARY *rrddimvar_root_index; // dimension variables + // we use this dictionary to manage their allocation - long entries; // total number of entries in the data set + // ------------------------------------------------------------------------ + // operational state members - long current_entry; // the entry that is currently being updated - // it goes around in a round-robin fashion + RRDSET_FLAGS flags; // flags + RRD_MEMORY_MODE rrd_memory_mode; // the db mode of this rrdset - RRDSET_FLAGS flags; // configuration flags - RRDSET_FLAGS *exporting_flags; // array of flags for exporting connector instances + DICTIONARY *rrddim_root_index; // dimensions index int gap_when_lost_iterations_above; // after how many lost iterations a gap should be stored // netdata will interpolate values for gaps lower than this + // TODO - use the global - all charts have the same value - long priority; // the sorting priority of this chart - + STORAGE_METRICS_GROUP *storage_metrics_groups[RRD_STORAGE_TIERS]; // ------------------------------------------------------------------------ - // members for temporary data we need for calculations + // linking to siblings and parents - RRD_MEMORY_MODE rrd_memory_mode; // if set to 1, this is memory mapped + RRDHOST *rrdhost; // pointer to RRDHOST this chart belongs to - char *cache_dir; // the directory to store dimensions + RRDINSTANCE_ACQUIRED *rrdinstance; // the rrdinstance of this chart + RRDCONTEXT_ACQUIRED *rrdcontext; // the rrdcontext this chart belongs to - netdata_rwlock_t rrdset_rwlock; // protects dimensions linked list + // ------------------------------------------------------------------------ + // data collection members size_t counter; // the number of times we added values to this database size_t counter_done; // the number of times rrdset_done() has been called - union { - time_t last_accessed_time; // the last time this RRDSET has been accessed - time_t last_entry_t; // the last_entry_t computed for transient RRDSET - }; - time_t upstream_resync_time; // the timestamp up to which we should resync clock upstream - - char *plugin_name; // the name of the plugin that generated this - char *module_name; // the name of the plugin module that generated this - uuid_t *chart_uuid; // Store the global GUID for this chart - // this object. - struct rrdset_volatile *state; // volatile state that is not persistently stored - - size_t rrddim_page_alignment; // keeps metric pages in alignment when using dbengine - - uint32_t hash; // a simple hash on the id, to speed up searching - // we first compare hashes, and only if the hashes are equal we do string comparisons - - uint32_t hash_name; // a simple hash on the name + time_t last_accessed_time; // the last time this RRDSET has been accessed usec_t usec_since_last_update; // the time in microseconds since the last collection of data struct timeval last_updated; // when this data set was last updated (updated every time the rrd_stats_done() function) struct timeval last_collected_time; // when did this data set last collected values - total_number collected_total; // used internally to calculate percentages - total_number last_collected_total; // used internally to calculate percentages - - RRDFAMILY *rrdfamily; // pointer to RRDFAMILY this chart belongs to - RRDHOST *rrdhost; // pointer to RRDHOST this chart belongs to + size_t rrdlabels_last_saved_version; - struct rrdset *next; // linking of rrdsets + DICTIONARY *functions_view; // collector functions this rrdset supports, can be NULL // ------------------------------------------------------------------------ - // local variables + // data collection - streaming to parents, temp variables - NETDATA_DOUBLE green; // green threshold for this chart - NETDATA_DOUBLE red; // red threshold for this chart - - avl_tree_lock rrdvar_root_index; // RRDVAR index for this chart - RRDSETVAR *variables; // RRDSETVAR linked list for this chart (one RRDSETVAR, many RRDVARs) - RRDCALC *alarms; // RRDCALC linked list for this chart + time_t upstream_resync_time; // the timestamp up to which we should resync clock upstream // ------------------------------------------------------------------------ - // members for checking the data when loading from disk + // db mode SAVE, MAP specifics + // TODO - they should be managed by storage engine + // (RRDSET_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) + char *cache_dir; // the directory to store dimensions unsigned long memsize; // how much mem we have allocated for this (without dimensions) void *st_on_file; // compatibility with V019 RRDSET files // ------------------------------------------------------------------------ - // the dimensions + // db mode RAM, SAVE, MAP, ALLOC, NONE specifics + // TODO - they should be managed by storage engine + // (RRDSET_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) - avl_tree_lock dimensions_index; // the root of the dimensions index - RRDDIM *dimensions; // the actual data for every dimension + long entries; // total number of entries in the data set + + long current_entry; // the entry that is currently being updated + // it goes around in a round-robin fashion + + // ------------------------------------------------------------------------ + // exporting to 3rd party time-series members + // TODO - they should be managed by exporting engine + // (RRDSET_EXPORTING_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + RRDSET_FLAGS *exporting_flags; // array of flags for exporting connector instances + + // ------------------------------------------------------------------------ + // health monitoring members + // TODO - they should be managed by health + // (RRDSET_HEALTH_STATE ptr to an undefined structure, and a call to clean this up during destruction) + + NETDATA_DOUBLE green; // green threshold for this chart + NETDATA_DOUBLE red; // red threshold for this chart + + DICTIONARY *rrdvars; // RRDVAR index for this chart + const RRDFAMILY_ACQUIRED *rrdfamily; // pointer to RRDFAMILY dictionary item, this chart belongs to + + struct { + netdata_rwlock_t rwlock; // protection for RRDCALC *base + RRDCALC *base; // double linked list of RRDCALC related to this RRDSET + } alerts; + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + struct { + bool log_next_data_collection; + bool start_streaming; + time_t after; + time_t before; + } replay; +#endif // NETDATA_LOG_REPLICATION_REQUESTS }; -#define rrdset_rdlock(st) netdata_rwlock_rdlock(&((st)->rrdset_rwlock)) -#define rrdset_wrlock(st) netdata_rwlock_wrlock(&((st)->rrdset_rwlock)) -#define rrdset_unlock(st) netdata_rwlock_unlock(&((st)->rrdset_rwlock)) +#define rrdset_plugin_name(st) string2str((st)->plugin_name) +#define rrdset_module_name(st) string2str((st)->module_name) +#define rrdset_units(st) string2str((st)->units) +#define rrdset_parts_type(st) string2str((st)->parts.type) +#define rrdset_family(st) string2str((st)->family) +#define rrdset_title(st) string2str((st)->title) +#define rrdset_context(st) string2str((st)->context) +#define rrdset_name(st) string2str((st)->name) +#define rrdset_id(st) string2str((st)->id) +STRING *rrd_string_strdupz(const char *s); // ---------------------------------------------------------------------------- // these loop macros make sure the linked list is accessed with the right lock #define rrdset_foreach_read(st, host) \ - for((st) = (host)->rrdset_root, rrdhost_check_rdlock(host); st ; (st) = (st)->next) + dfe_start_read((host)->rrdset_root_index, st) #define rrdset_foreach_write(st, host) \ - for((st) = (host)->rrdset_root, rrdhost_check_wrlock(host); st ; (st) = (st)->next) + dfe_start_write((host)->rrdset_root_index, st) + +#define rrdset_foreach_reentrant(st, host) \ + dfe_start_reentrant((host)->rrdset_root_index, st) + +#define rrdset_foreach_done(st) \ + dfe_done(st) +#define rrdset_number_of_dimensions(st) \ + dictionary_entries((st)->rrddim_root_index) -extern void rrdset_memory_file_save(RRDSET *st); -extern void rrdset_memory_file_free(RRDSET *st); -extern void rrdset_memory_file_update(RRDSET *st); -extern const char *rrdset_cache_filename(RRDSET *st); -extern bool rrdset_memory_load_or_create_map_save(RRDSET *st_on_file, RRD_MEMORY_MODE memory_mode); +void rrdset_memory_file_save(RRDSET *st); +void rrdset_memory_file_free(RRDSET *st); +void rrdset_memory_file_update(RRDSET *st); +const char *rrdset_cache_filename(RRDSET *st); +bool rrdset_memory_load_or_create_map_save(RRDSET *st_on_file, RRD_MEMORY_MODE memory_mode); + +#include "rrdfunctions.h" // ---------------------------------------------------------------------------- // RRDHOST flags @@ -652,30 +736,70 @@ extern bool rrdset_memory_load_or_create_map_save(RRDSET *st_on_file, RRD_MEMORY // and may lead to missing information. typedef enum rrdhost_flags { - RRDHOST_FLAG_ORPHAN = (1 << 0), // this host is orphan (not receiving data) - RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS = (1 << 1), // delete files of obsolete charts - RRDHOST_FLAG_DELETE_ORPHAN_HOST = (1 << 2), // delete the entire host when orphan - RRDHOST_FLAG_EXPORTING_SEND = (1 << 3), // send it to external databases - RRDHOST_FLAG_EXPORTING_DONT_SEND = (1 << 4), // don't send it to external databases - RRDHOST_FLAG_ARCHIVED = (1 << 5), // The host is archived, no collected charts yet - RRDHOST_FLAG_MULTIHOST = (1 << 6), // Host belongs to localhost/megadb - RRDHOST_FLAG_PENDING_FOREACH_ALARMS = (1 << 7), // contains dims with uninitialized foreach alarms - RRDHOST_FLAG_STREAM_LABELS_UPDATE = (1 << 8), - RRDHOST_FLAG_STREAM_LABELS_STOP = (1 << 9), - RRDHOST_FLAG_ACLK_STREAM_CONTEXTS = (1 << 10), // when set, we should send ACLK stream context updates + // Orphan, Archived and Obsolete flags + RRDHOST_FLAG_ORPHAN = (1 << 10), // this host is orphan (not receiving data) + RRDHOST_FLAG_ARCHIVED = (1 << 11), // The host is archived, no collected charts yet + RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS = (1 << 12), // the host has pending chart obsoletions + RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS = (1 << 13), // the host has pending dimension obsoletions + + // Streaming sender + RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED = (1 << 14), // the host has initialized rrdpush structures + RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN = (1 << 15), // When set, the sender thread is running + RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED = (1 << 16), // When set, the host is connected to a parent + RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS = (1 << 17), // when set, rrdset_done() should push metrics to parent + RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS = (1 << 18), // when set, we have logged the status of metrics streaming + RRDHOST_FLAG_RRDPUSH_SENDER_JOIN = (1 << 19), // When set, we want to join the sender thread + + // Health + RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 20), // contains charts and dims with uninitialized variables + RRDHOST_FLAG_INITIALIZED_HEALTH = (1 << 21), // the host has initialized health structures + + // Exporting + RRDHOST_FLAG_EXPORTING_SEND = (1 << 22), // send it to external databases + RRDHOST_FLAG_EXPORTING_DONT_SEND = (1 << 23), // don't send it to external databases + + // ACLK + RRDHOST_FLAG_ACLK_STREAM_CONTEXTS = (1 << 24), // when set, we should send ACLK stream context updates + // Metadata + RRDHOST_FLAG_METADATA_UPDATE = (1 << 25), // metadata needs to be stored in the database + + RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED = ( 1 << 26), // set when the receiver part is disconnected } RRDHOST_FLAGS; #define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag)) #define rrdhost_flag_set(host, flag) __atomic_or_fetch(&((host)->flags), flag, __ATOMIC_SEQ_CST) -#define rrdhost_flag_clear(host, flag) __atomic_and_fetch(&((host)->flags), ~flag, __ATOMIC_SEQ_CST) +#define rrdhost_flag_clear(host, flag) __atomic_and_fetch(&((host)->flags), ~(flag), __ATOMIC_SEQ_CST) #ifdef NETDATA_INTERNAL_CHECKS #define rrdset_debug(st, fmt, args...) do { if(unlikely(debug_flags & D_RRD_STATS && rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) \ - debug_int(__FILE__, __FUNCTION__, __LINE__, "%s: " fmt, st->name, ##args); } while(0) + debug_int(__FILE__, __FUNCTION__, __LINE__, "%s: " fmt, rrdset_name(st), ##args); } while(0) #else #define rrdset_debug(st, fmt, args...) debug_dummy() #endif +typedef enum { + // Indexing + RRDHOST_OPTION_INDEXED_MACHINE_GUID = (1 << 0), // when set, we have indexed its machine guid + RRDHOST_OPTION_INDEXED_HOSTNAME = (1 << 1), // when set, we have indexed its hostname + + // Streaming configuration + RRDHOST_OPTION_SENDER_ENABLED = (1 << 2), // set when the host is configured to send metrics to a parent + + // Configuration options + RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS = (1 << 3), // delete files of obsolete charts + RRDHOST_OPTION_DELETE_ORPHAN_HOST = (1 << 4), // delete the entire host when orphan + + RRDHOST_OPTION_REPLICATION = (1 << 5), // when set, we support replication for this host +} RRDHOST_OPTIONS; + +#define rrdhost_option_check(host, flag) ((host)->options & (flag)) +#define rrdhost_option_set(host, flag) (host)->options |= flag +#define rrdhost_option_clear(host, flag) (host)->options &= ~(flag) + +#define rrdhost_has_rrdpush_sender_enabled(host) (rrdhost_option_check(host, RRDHOST_OPTION_SENDER_ENABLED) && (host)->sender) + +#define rrdhost_can_send_definitions_to_parent(host) (rrdhost_has_rrdpush_sender_enabled(host) && rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED)) + // ---------------------------------------------------------------------------- // Health data @@ -689,34 +813,30 @@ struct alarm_entry { time_t duration; time_t non_clear_duration; - char *name; - uint32_t hash_name; - - char *chart; - uint32_t hash_chart; - char *chart_context; + STRING *name; + STRING *chart; + STRING *chart_context; + STRING *family; - char *family; + STRING *classification; + STRING *component; + STRING *type; - char *classification; - char *component; - char *type; - - char *exec; - char *recipient; + STRING *exec; + STRING *recipient; time_t exec_run_timestamp; int exec_code; uint64_t exec_spawn_serial; - char *source; - char *units; - char *info; + STRING *source; + STRING *units; + STRING *info; NETDATA_DOUBLE old_value; NETDATA_DOUBLE new_value; - char *old_value_string; - char *new_value_string; + STRING *old_value_string; + STRING *new_value_string; RRDCALC_STATUS old_status; RRDCALC_STATUS new_status; @@ -736,6 +856,20 @@ struct alarm_entry { struct alarm_entry *prev_in_progress; }; +#define ae_name(ae) string2str((ae)->name) +#define ae_chart_name(ae) string2str((ae)->chart) +#define ae_chart_context(ae) string2str((ae)->chart_context) +#define ae_family(ae) string2str((ae)->family) +#define ae_classification(ae) string2str((ae)->classification) +#define ae_component(ae) string2str((ae)->component) +#define ae_type(ae) string2str((ae)->type) +#define ae_exec(ae) string2str((ae)->exec) +#define ae_recipient(ae) string2str((ae)->recipient) +#define ae_source(ae) string2str((ae)->source) +#define ae_units(ae) string2str((ae)->units) +#define ae_info(ae) string2str((ae)->info) +#define ae_old_value_string(ae) string2str((ae)->old_value_string) +#define ae_new_value_string(ae) string2str((ae)->new_value_string) typedef struct alarm_log { uint32_t next_log_id; @@ -789,79 +923,71 @@ struct rrdhost_system_info { }; struct rrdhost { - avl_t avl; // the index of hosts + char machine_guid[GUID_LEN + 1]; // the unique ID of this host // ------------------------------------------------------------------------ // host information - char *hostname; // the hostname of this host - uint32_t hash_hostname; // the hostname hash + STRING *hostname; // the hostname of this host + STRING *registry_hostname; // the registry hostname for this host + STRING *os; // the O/S type of the host + STRING *tags; // tags for this host + STRING *timezone; // the timezone of the host + STRING *abbrev_timezone; // the abbriviated timezone of the host + STRING *program_name; // the program name that collects metrics for this host + STRING *program_version; // the program version that collects metrics for this host - char *registry_hostname; // the registry hostname for this host - - char machine_guid[GUID_LEN + 1]; // the unique ID of this host - uint32_t hash_machine_guid; // the hash of the unique ID - - const char *os; // the O/S type of the host - const char *tags; // tags for this host - const char *timezone; // the timezone of the host - - const char *abbrev_timezone; // the abbriviated timezone of the host int32_t utc_offset; // the offset in seconds from utc - RRDHOST_FLAGS flags; // flags about this RRDHOST + RRDHOST_OPTIONS options; // configuration option for this RRDHOST (no atomics on this) + RRDHOST_FLAGS flags; // runtime flags about this RRDHOST (atomics on this) RRDHOST_FLAGS *exporting_flags; // array of flags for exporting connector instances int rrd_update_every; // the update frequency of the host long rrd_history_entries; // the number of history entries for the host's charts - RRD_MEMORY_MODE rrd_memory_mode; // the memory more for the charts of this host + + RRD_MEMORY_MODE rrd_memory_mode; // the configured memory more for the charts of this host + // the actual per tier is at .db[tier].mode char *cache_dir; // the directory to save RRD cache files char *varlib_dir; // the directory to save health log - char *program_name; // the program name that collects metrics for this host - char *program_version; // the program version that collects metrics for this host + struct { + RRD_MEMORY_MODE mode; // the db mode for this tier + STORAGE_ENGINE *eng; // the storage engine API for this tier + STORAGE_INSTANCE *instance; // the db instance for this tier + size_t tier_grouping; // tier 0 iterations aggregated on this tier + } db[RRD_STORAGE_TIERS]; struct rrdhost_system_info *system_info; // information collected from the host environment // ------------------------------------------------------------------------ - // streaming of data to remote hosts - rrdpush + // streaming of data to remote hosts - rrdpush sender - unsigned int rrdpush_send_enabled; // 1 when this host sends metrics to another netdata char *rrdpush_send_destination; // where to send metrics to char *rrdpush_send_api_key; // the api key at the receiving netdata struct rrdpush_destinations *destinations; // a linked list of possible destinations struct rrdpush_destinations *destination; // the current destination from the above list + SIMPLE_PATTERN *rrdpush_send_charts_matching; // pattern to match the charts to be sent + + time_t rrdpush_seconds_to_replicate; // max time we want to replicate from the child + time_t rrdpush_replication_step; // seconds per replication step + size_t rrdpush_receiver_replicating_charts; // the number of charts currently being replicated from a child // the following are state information for the threading // streaming metrics from this netdata to an upstream netdata struct sender_state *sender; - volatile unsigned int rrdpush_sender_spawn; // 1 when the sender thread has been spawn netdata_thread_t rrdpush_sender_thread; // the sender thread + size_t rrdpush_sender_replicating_charts; // the number of charts currently being replicated to a parent void *dbsync_worker; - bool rrdpush_sender_connected; // 1 when the sender is ready to push metrics - int rrdpush_sender_socket; // the fd of the socket to the remote host, or -1 - - volatile unsigned int rrdpush_sender_error_shown; // 1 when we have logged a communication error - volatile unsigned int rrdpush_sender_join; // 1 when we have to join the sending thread - - SIMPLE_PATTERN *rrdpush_send_charts_matching; // pattern to match the charts to be sent - - int rrdpush_sender_pipe[2]; // collector to sender thread signaling - //BUFFER *rrdpush_sender_buffer; // collector fills it, sender sends it - - //uint32_t stream_version; //Set the current version of the stream. - // ------------------------------------------------------------------------ - // streaming of data from remote hosts - rrdpush - - volatile size_t connected_senders; // when remote hosts are streaming to this - // host, this is the counter of connected clients + // streaming of data from remote hosts - rrdpush receiver time_t senders_connect_time; // the time the last sender was connected time_t senders_last_chart_command; // the time of the last CHART streaming command time_t senders_disconnected_time; // the time the last sender was disconnected + int senders_count; // number of senders currently streaming struct receiver_state *receiver; netdata_mutex_t receiver_lock; @@ -871,45 +997,30 @@ struct rrdhost { // ------------------------------------------------------------------------ // health monitoring options - unsigned int health_enabled; // 1 when this host has health enabled - time_t health_delay_up_to; // a timestamp to delay alarms processing up to - char *health_default_exec; // the full path of the alarms notifications program - char *health_default_recipient; // the default recipient for all alarms - char *health_log_filename; // the alarms event log filename - size_t health_log_entries_written; // the number of alarm events written to the alarms event log - FILE *health_log_fp; // the FILE pointer to the open alarms event log file - uint32_t health_default_warn_repeat_every; // the default value for the interval between repeating warning notifications - uint32_t health_default_crit_repeat_every; // the default value for the interval between repeating critical notifications - + unsigned int health_enabled; // 1 when this host has health enabled + bool health_spawn; // true when health thread is running + netdata_thread_t health_thread; // the health thread + unsigned int aclk_alert_reloaded; // 1 on thread start and health reload, 0 after removed are sent + time_t health_delay_up_to; // a timestamp to delay alarms processing up to + STRING *health_default_exec; // the full path of the alarms notifications program + STRING *health_default_recipient; // the default recipient for all alarms + char *health_log_filename; // the alarms event log filename + size_t health_log_entries_written; // the number of alarm events written to the alarms event log + FILE *health_log_fp; // the FILE pointer to the open alarms event log file + uint32_t health_default_warn_repeat_every; // the default value for the interval between repeating warning notifications + uint32_t health_default_crit_repeat_every; // the default value for the interval between repeating critical notifications // all RRDCALCs are primarily allocated and linked here - // RRDCALCs may be linked to charts at any point - // (charts may or may not exist when these are loaded) - RRDCALC *alarms; - RRDCALC *alarms_with_foreach; - avl_tree_lock alarms_idx_health_log; - avl_tree_lock alarms_idx_name; + DICTIONARY *rrdcalc_root_index; + + // templates of alarms + DICTIONARY *rrdcalctemplate_root_index; ALARM_LOG health_log; // alarms historical events (event log) uint32_t health_last_processed_id; // the last processed health id from the log uint32_t health_max_unique_id; // the max alarm log unique id given for the host uint32_t health_max_alarm_id; // the max alarm id given for the host - // templates of alarms - // these are used to create alarms when charts - // are created or renamed, that match them - RRDCALCTEMPLATE *templates; - RRDCALCTEMPLATE *alarms_template_with_foreach; - - - // ------------------------------------------------------------------------ - // the charts of the host - - RRDSET *rrdset_root; // the host charts - - unsigned int obsolete_charts_count; - - // ------------------------------------------------------------------------ // locks @@ -921,37 +1032,46 @@ struct rrdhost { // ------------------------------------------------------------------------ // Support for host-level labels - DICTIONARY *host_labels; + DICTIONARY *rrdlabels; // ------------------------------------------------------------------------ - // indexes + // Support for functions + DICTIONARY *functions; // collector functions this rrdset supports, can be NULL - avl_tree_lock rrdset_root_index; // the host's charts index (by id) - avl_tree_lock rrdset_root_index_name; // the host's charts index (by name) + // ------------------------------------------------------------------------ + // indexes - avl_tree_lock rrdfamily_root_index; // the host's chart families index - avl_tree_lock rrdvar_root_index; // the host's chart variables index + DICTIONARY *rrdset_root_index; // the host's charts index (by id) + DICTIONARY *rrdset_root_index_name; // the host's charts index (by name) - STORAGE_INSTANCE *storage_instance[RRD_STORAGE_TIERS]; // the database instances of the storage tiers + DICTIONARY *rrdfamily_root_index; // the host's chart families index + DICTIONARY *rrdvars; // the host's chart variables index + // this includes custom host variables - RRDCONTEXTS *rrdctx_queue; + RRDCONTEXTS *rrdctx_hub_queue; + RRDCONTEXTS *rrdctx_post_processing_queue; RRDCONTEXTS *rrdctx; uuid_t host_uuid; // Global GUID for this host uuid_t *node_id; // Cloud node_id -#ifdef ENABLE_HTTPS - struct netdata_ssl ssl; //Structure used to encrypt the connection - struct netdata_ssl stream_ssl; //Structure used to encrypt the stream -#endif - netdata_mutex_t aclk_state_lock; aclk_rrdhost_state aclk_state; struct rrdhost *next; + struct rrdhost *prev; }; extern RRDHOST *localhost; +#define rrdhost_hostname(host) string2str((host)->hostname) +#define rrdhost_registry_hostname(host) string2str((host)->registry_hostname) +#define rrdhost_os(host) string2str((host)->os) +#define rrdhost_tags(host) string2str((host)->tags) +#define rrdhost_timezone(host) string2str((host)->timezone) +#define rrdhost_abbrev_timezone(host) string2str((host)->abbrev_timezone) +#define rrdhost_program_name(host) string2str((host)->program_name) +#define rrdhost_program_version(host) string2str((host)->program_version) + #define rrdhost_rdlock(host) netdata_rwlock_rdlock(&((host)->rrdhost_rwlock)) #define rrdhost_wrlock(host) netdata_rwlock_wrlock(&((host)->rrdhost_rwlock)) #define rrdhost_unlock(host) netdata_rwlock_unlock(&((host)->rrdhost_rwlock)) @@ -959,6 +1079,19 @@ extern RRDHOST *localhost; #define rrdhost_aclk_state_lock(host) netdata_mutex_lock(&((host)->aclk_state_lock)) #define rrdhost_aclk_state_unlock(host) netdata_mutex_unlock(&((host)->aclk_state_lock)) +#define rrdhost_receiver_replicating_charts(host) (__atomic_load_n(&((host)->rrdpush_receiver_replicating_charts), __ATOMIC_RELAXED)) +#define rrdhost_receiver_replicating_charts_plus_one(host) (__atomic_add_fetch(&((host)->rrdpush_receiver_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_receiver_replicating_charts_minus_one(host) (__atomic_sub_fetch(&((host)->rrdpush_receiver_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_receiver_replicating_charts_zero(host) (__atomic_store_n(&((host)->rrdpush_receiver_replicating_charts), 0, __ATOMIC_RELAXED)) + +#define rrdhost_sender_replicating_charts(host) (__atomic_load_n(&((host)->rrdpush_sender_replicating_charts), __ATOMIC_RELAXED)) +#define rrdhost_sender_replicating_charts_plus_one(host) (__atomic_add_fetch(&((host)->rrdpush_sender_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_sender_replicating_charts_minus_one(host) (__atomic_sub_fetch(&((host)->rrdpush_sender_replicating_charts), 1, __ATOMIC_RELAXED)) +#define rrdhost_sender_replicating_charts_zero(host) (__atomic_store_n(&((host)->rrdpush_sender_replicating_charts), 0, __ATOMIC_RELAXED)) + +extern DICTIONARY *rrdhost_root_index; +long rrdhost_hosts_available(void); + // ---------------------------------------------------------------------------- // these loop macros make sure the linked list is accessed with the right lock @@ -980,19 +1113,24 @@ extern netdata_rwlock_t rrd_rwlock; // ---------------------------------------------------------------------------- -extern bool is_storage_engine_shared(STORAGE_INSTANCE *engine); +bool is_storage_engine_shared(STORAGE_INSTANCE *engine); +void rrdset_index_init(RRDHOST *host); +void rrdset_index_destroy(RRDHOST *host); + +void rrddim_index_init(RRDSET *st); +void rrddim_index_destroy(RRDSET *st); // ---------------------------------------------------------------------------- extern size_t rrd_hosts_available; extern time_t rrdhost_free_orphan_time; -extern int rrd_init(char *hostname, struct rrdhost_system_info *system_info); +int rrd_init(char *hostname, struct rrdhost_system_info *system_info); -extern RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash); -extern RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash); +RRDHOST *rrdhost_find_by_hostname(const char *hostname); +RRDHOST *rrdhost_find_by_guid(const char *guid); -extern RRDHOST *rrdhost_find_or_create( +RRDHOST *rrdhost_find_or_create( const char *hostname , const char *registry_hostname , const char *guid @@ -1011,11 +1149,14 @@ extern RRDHOST *rrdhost_find_or_create( , char *rrdpush_destination , char *rrdpush_api_key , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step , struct rrdhost_system_info *system_info , bool is_archived ); -extern void rrdhost_update(RRDHOST *host +void rrdhost_update(RRDHOST *host , const char *hostname , const char *registry_hostname , const char *guid @@ -1034,18 +1175,21 @@ extern void rrdhost_update(RRDHOST *host , char *rrdpush_destination , char *rrdpush_api_key , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step , struct rrdhost_system_info *system_info ); -extern int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value); +int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value); #if defined(NETDATA_INTERNAL_CHECKS) && defined(NETDATA_VERIFY_LOCKS) -extern void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); -extern void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); -extern void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line); -extern void __rrdset_check_wrlock(RRDSET *st, const char *file, const char *function, const unsigned long line); -extern void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line); -extern void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line); +void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); +void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); +void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line); +void __rrdset_check_wrlock(RRDSET *st, const char *file, const char *function, const unsigned long line); +void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line); +void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line); #define rrdhost_check_rdlock(host) __rrdhost_check_rdlock(host, __FILE__, __FUNCTION__, __LINE__) #define rrdhost_check_wrlock(host) __rrdhost_check_wrlock(host, __FILE__, __FUNCTION__, __LINE__) @@ -1066,9 +1210,9 @@ extern void __rrd_check_wrlock(const char *file, const char *function, const uns // ---------------------------------------------------------------------------- // RRDSET functions -extern int rrdset_set_name(RRDSET *st, const char *name); +int rrdset_reset_name(RRDSET *st, const char *name); -extern RRDSET *rrdset_create_custom(RRDHOST *host +RRDSET *rrdset_create_custom(RRDHOST *host , const char *type , const char *id , const char *name @@ -1090,22 +1234,22 @@ extern RRDSET *rrdset_create_custom(RRDHOST *host #define rrdset_create_localhost(type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) \ rrdset_create(localhost, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) -extern void rrdhost_free_all(void); -extern void rrdhost_save_all(void); -extern void rrdhost_cleanup_all(void); +void rrdhost_free_all(void); +void rrdhost_save_all(void); +void rrdhost_cleanup_all(void); -extern void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected_host); -extern void rrdhost_system_info_free(struct rrdhost_system_info *system_info); -extern void rrdhost_free(RRDHOST *host, bool force); -extern void rrdhost_save_charts(RRDHOST *host); -extern void rrdhost_delete_charts(RRDHOST *host); -extern void rrd_cleanup_obsolete_charts(); +void rrdhost_system_info_free(struct rrdhost_system_info *system_info); +void rrdhost_free(RRDHOST *host, bool force); +void rrdhost_save_charts(RRDHOST *host); +void rrdhost_delete_charts(RRDHOST *host); -extern int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now); +int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now); -extern void rrdset_update_heterogeneous_flag(RRDSET *st); +void rrdset_update_heterogeneous_flag(RRDSET *st); -extern RRDSET *rrdset_find(RRDHOST *host, const char *id); +time_t rrdset_set_update_every(RRDSET *st, time_t update_every); + +RRDSET *rrdset_find(RRDHOST *host, const char *id); #define rrdset_find_localhost(id) rrdset_find(localhost, id) /* This will not return charts that are archived */ static inline RRDSET *rrdset_find_active_localhost(const char *id) @@ -1116,7 +1260,7 @@ static inline RRDSET *rrdset_find_active_localhost(const char *id) return st; } -extern RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id); +RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id); #define rrdset_find_bytype_localhost(type, id) rrdset_find_bytype(localhost, type, id) /* This will not return charts that are archived */ static inline RRDSET *rrdset_find_active_bytype_localhost(const char *type, const char *id) @@ -1127,7 +1271,7 @@ static inline RRDSET *rrdset_find_active_bytype_localhost(const char *type, cons return st; } -extern RRDSET *rrdset_find_byname(RRDHOST *host, const char *name); +RRDSET *rrdset_find_byname(RRDHOST *host, const char *name); #define rrdset_find_byname_localhost(name) rrdset_find_byname(localhost, name) /* This will not return charts that are archived */ static inline RRDSET *rrdset_find_active_byname_localhost(const char *name) @@ -1138,190 +1282,115 @@ static inline RRDSET *rrdset_find_active_byname_localhost(const char *name) return st; } -extern void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds); -extern void rrdset_next_usec(RRDSET *st, usec_t microseconds); +void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds); +void rrdset_next_usec(RRDSET *st, usec_t microseconds); +void rrdset_timed_next(RRDSET *st, struct timeval now, usec_t microseconds); #define rrdset_next(st) rrdset_next_usec(st, 0ULL) -extern void rrdset_done(RRDSET *st); +void rrdset_timed_done(RRDSET *st, struct timeval now, bool pending_rrdset_next); +void rrdset_done(RRDSET *st); -extern void rrdset_is_obsolete(RRDSET *st); -extern void rrdset_isnot_obsolete(RRDSET *st); +void rrdset_is_obsolete(RRDSET *st); +void rrdset_isnot_obsolete(RRDSET *st); // checks if the RRDSET should be offered to viewers -#define rrdset_is_available_for_viewers(st) (!rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE) -#define rrdset_is_available_for_exporting_and_alarms(st) (!rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions) -#define rrdset_is_archived(st) (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions) - -// get the timestamp of the last entry in the round robin database -static inline time_t rrddim_last_entry_t(RRDDIM *rd) { - time_t latest = rd->tiers[0]->query_ops.latest_time(rd->tiers[0]->db_metric_handle); - - for(int tier = 1; tier < storage_tiers ;tier++) { - if(unlikely(!rd->tiers[tier])) continue; - - time_t t = rd->tiers[tier]->query_ops.latest_time(rd->tiers[tier]->db_metric_handle); - if(t > latest) - latest = t; - } - - return latest; -} - -static inline time_t rrddim_first_entry_t(RRDDIM *rd) { - time_t oldest = 0; - - for(int tier = 0; tier < storage_tiers ;tier++) { - if(unlikely(!rd->tiers[tier])) continue; - - time_t t = rd->tiers[tier]->query_ops.oldest_time(rd->tiers[tier]->db_metric_handle); - if(t != 0 && (oldest == 0 || t < oldest)) - oldest = t; - } - - return oldest; -} - -// get the timestamp of the last entry in the round robin database -static inline time_t rrdset_last_entry_t_nolock(RRDSET *st) { - RRDDIM *rd; - time_t last_entry_t = 0; - - rrddim_foreach_read(rd, st) { - time_t t = rrddim_last_entry_t(rd); - if(t > last_entry_t) last_entry_t = t; - } - - return last_entry_t; -} - -static inline time_t rrdset_last_entry_t(RRDSET *st) { - time_t last_entry_t; - - netdata_rwlock_rdlock(&st->rrdset_rwlock); - last_entry_t = rrdset_last_entry_t_nolock(st); - netdata_rwlock_unlock(&st->rrdset_rwlock); - - return last_entry_t; -} - -// get the timestamp of first entry in the round robin database -static inline time_t rrdset_first_entry_t_nolock(RRDSET *st) { - RRDDIM *rd; - time_t first_entry_t = LONG_MAX; - - rrddim_foreach_read(rd, st) { - time_t t = rrddim_first_entry_t(rd); - if(t < first_entry_t) - first_entry_t = t; - } - - if (unlikely(LONG_MAX == first_entry_t)) return 0; - return first_entry_t; -} - -static inline time_t rrdset_first_entry_t(RRDSET *st) -{ - time_t first_entry_t; - - netdata_rwlock_rdlock(&st->rrdset_rwlock); - first_entry_t = rrdset_first_entry_t_nolock(st); - netdata_rwlock_unlock(&st->rrdset_rwlock); - - return first_entry_t; -} - +#define rrdset_is_available_for_viewers(st) (!rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && rrdset_number_of_dimensions(st) && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE) +#define rrdset_is_available_for_exporting_and_alarms(st) (!rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && rrdset_number_of_dimensions(st)) +#define rrdset_is_archived(st) (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && rrdset_number_of_dimensions(st)) + +time_t rrddim_first_entry_t(RRDDIM *rd); +time_t rrddim_first_entry_t_of_tier(RRDDIM *rd, size_t tier); +time_t rrddim_last_entry_t(RRDDIM *rd); +time_t rrdset_last_entry_t(RRDSET *st); +time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier); +time_t rrdset_first_entry_t(RRDSET *st); time_t rrdhost_last_entry_t(RRDHOST *h); // ---------------------------------------------------------------------------- // RRD DIMENSION functions -extern void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host); -extern RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, - collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode);//, - //int is_archived, uuid_t *dim_uuid); -#define rrddim_add(st, id, name, multiplier, divisor, algorithm) rrddim_add_custom(st, id, name, multiplier, divisor, \ - algorithm, (st)->rrd_memory_mode)//, 0, NULL) - -extern int rrddim_set_name(RRDSET *st, RRDDIM *rd, const char *name); -extern int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm); -extern int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier); -extern int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor); - -extern RRDDIM *rrddim_find(RRDSET *st, const char *id); -/* This will not return dimensions that are archived */ -static inline RRDDIM *rrddim_find_active(RRDSET *st, const char *id) -{ - RRDDIM *rd = rrddim_find(st, id); - if (unlikely(rd && rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) - return NULL; - return rd; -} +RRDDIM *rrddim_add_custom(RRDSET *st + , const char *id + , const char *name + , collected_number multiplier + , collected_number divisor + , RRD_ALGORITHM algorithm + , RRD_MEMORY_MODE memory_mode + ); + +#define rrddim_add(st, id, name, multiplier, divisor, algorithm) \ + rrddim_add_custom(st, id, name, multiplier, divisor, algorithm, (st)->rrd_memory_mode) +int rrddim_reset_name(RRDSET *st, RRDDIM *rd, const char *name); +int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm); +int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier); +int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor); -extern int rrddim_hide(RRDSET *st, const char *id); -extern int rrddim_unhide(RRDSET *st, const char *id); +RRDDIM *rrddim_find(RRDSET *st, const char *id); +RRDDIM_ACQUIRED *rrddim_find_and_acquire(RRDSET *st, const char *id); +RRDDIM *rrddim_acquired_to_rrddim(RRDDIM_ACQUIRED *rda); +void rrddim_acquired_release(RRDDIM_ACQUIRED *rda); +RRDDIM *rrddim_find_active(RRDSET *st, const char *id); -extern void rrddim_is_obsolete(RRDSET *st, RRDDIM *rd); -extern void rrddim_isnot_obsolete(RRDSET *st, RRDDIM *rd); +int rrddim_hide(RRDSET *st, const char *id); +int rrddim_unhide(RRDSET *st, const char *id); + +void rrddim_is_obsolete(RRDSET *st, RRDDIM *rd); +void rrddim_isnot_obsolete(RRDSET *st, RRDDIM *rd); + +collected_number rrddim_timed_set_by_pointer(RRDSET *st, RRDDIM *rd, struct timeval collected_time, collected_number value); +collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value); +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value); -extern collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value); -extern collected_number rrddim_set(RRDSET *st, const char *id, collected_number value); #ifdef ENABLE_ACLK -extern time_t calc_dimension_liveness(RRDDIM *rd, time_t now); +time_t calc_dimension_liveness(RRDDIM *rd, time_t now); +#endif +long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries); + +#ifdef NETDATA_LOG_COLLECTION_ERRORS +#define rrddim_store_metric(rd, point_end_time_ut, n, flags) rrddim_store_metric_with_trace(rd, point_end_time_ut, n, flags, __FUNCTION__) +void rrddim_store_metric_with_trace(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags, const char *function); +#else +void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags); #endif -extern long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries); // ---------------------------------------------------------------------------- // Miscellaneous functions -extern int alarm_compare_id(void *a, void *b); -extern int alarm_compare_name(void *a, void *b); +char *rrdset_strncpyz_name(char *to, const char *from, size_t length); // ---------------------------------------------------------------------------- // RRD internal functions -#ifdef NETDATA_RRD_INTERNALS - -extern avl_tree_lock rrdhost_root_index; +void rrdset_delete_files(RRDSET *st); +void rrdset_save(RRDSET *st); +void rrdset_free(RRDSET *st); -extern char *rrdset_strncpyz_name(char *to, const char *from, size_t length); -extern char *rrdset_cache_dir(RRDHOST *host, const char *id); - -extern void rrddim_free(RRDSET *st, RRDDIM *rd); - -extern int rrddim_compare(void* a, void* b); -extern int rrdset_compare(void* a, void* b); -extern int rrdset_compare_name(void* a, void* b); -extern int rrdfamily_compare(void *a, void *b); +#ifdef NETDATA_RRD_INTERNALS -extern RRDFAMILY *rrdfamily_create(RRDHOST *host, const char *id); -extern void rrdfamily_free(RRDHOST *host, RRDFAMILY *rc); +char *rrdset_cache_dir(RRDHOST *host, const char *id); -#define rrdset_index_add(host, st) (RRDSET *)avl_insert_lock(&((host)->rrdset_root_index), (avl_t *)(st)) -#define rrdset_index_del(host, st) (RRDSET *)avl_remove_lock(&((host)->rrdset_root_index), (avl_t *)(st)) -extern RRDSET *rrdset_index_del_name(RRDHOST *host, RRDSET *st); +void rrddim_free(RRDSET *st, RRDDIM *rd); -extern void rrdset_free(RRDSET *st); -extern void rrdset_reset(RRDSET *st); -extern void rrdset_save(RRDSET *st); -extern void rrdset_delete_files(RRDSET *st); -extern void rrdset_delete_obsolete_dimensions(RRDSET *st); +void rrdset_reset(RRDSET *st); +void rrdset_delete_obsolete_dimensions(RRDSET *st); -extern RRDHOST *rrdhost_create( +RRDHOST *rrdhost_create( const char *hostname, const char *registry_hostname, const char *guid, const char *os, const char *timezone, const char *abbrev_timezone, int32_t utc_offset,const char *tags, const char *program_name, const char *program_version, int update_every, long entries, RRD_MEMORY_MODE memory_mode, unsigned int health_enabled, unsigned int rrdpush_enabled, - char *rrdpush_destination, char *rrdpush_api_key, char *rrdpush_send_charts_matching, struct rrdhost_system_info *system_info, - int is_localhost, bool is_archived); + char *rrdpush_destination, char *rrdpush_api_key, char *rrdpush_send_charts_matching, + bool rrdpush_enable_replication, time_t rrdpush_seconds_to_replicate, time_t rrdpush_replication_step, + struct rrdhost_system_info *system_info, int is_localhost, bool is_archived); #endif /* NETDATA_RRD_INTERNALS */ -extern void set_host_properties( - RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *hostname, const char *registry_hostname, - const char *guid, const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset, +void set_host_properties( + RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *registry_hostname, + const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name, const char *program_version); -extern int get_tier_grouping(int tier); +size_t get_tier_grouping(size_t tier); // ---------------------------------------------------------------------------- // RRD DB engine declarations @@ -1331,8 +1400,8 @@ extern int get_tier_grouping(int tier); #endif #include "sqlite/sqlite_functions.h" #include "sqlite/sqlite_context.h" +#include "sqlite/sqlite_metadata.h" #include "sqlite/sqlite_aclk.h" -#include "sqlite/sqlite_aclk_chart.h" #include "sqlite/sqlite_aclk_alert.h" #include "sqlite/sqlite_aclk_node.h" #include "sqlite/sqlite_health.h" diff --git a/database/rrdcalc.c b/database/rrdcalc.c index cab60468b..aad945a90 100644 --- a/database/rrdcalc.c +++ b/database/rrdcalc.c @@ -1,10 +1,9 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_HEALTH_INTERNALS #include "rrd.h" // ---------------------------------------------------------------------------- -// RRDCALC management +// RRDCALC helpers inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { switch(status) { @@ -35,55 +34,219 @@ inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { } } -static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { +uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) { + netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); + + // re-use old IDs, by looking them up in the alarm log + ALARM_ENTRY *ae = NULL; + for(ae = host->health_log.alarms; ae ;ae = ae->next) { + if(unlikely(name == ae->name && chart == ae->chart)) { + if(next_event_id) *next_event_id = ae->alarm_event_id + 1; + break; + } + } + + uint32_t alarm_id; + + if(ae) + alarm_id = ae->alarm_id; + + else { + if (unlikely(!host->health_log.next_alarm_id)) + host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); + + alarm_id = host->health_log.next_alarm_id++; + } + + netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock); + return alarm_id; +} + +// ---------------------------------------------------------------------------- +// RRDCALC replacing info text variables with RRDSET labels + +static STRING *rrdcalc_replace_variables_with_rrdset_labels(const char *line, RRDCALC *rc) { + if (!line || !*line) + return NULL; + + size_t pos = 0; + char *temp = strdupz(line); + char var[RRDCALC_VAR_MAX]; + char *m, *lbl_value = NULL; + + while ((m = strchr(temp + pos, '$'))) { + int i = 0; + char *e = m; + while (*e) { + + if (*e == ' ' || i == RRDCALC_VAR_MAX - 1) + break; + else + var[i] = *e; + + e++; + i++; + } + + var[i] = '\0'; + pos = m - temp + 1; + + if (!strcmp(var, RRDCALC_VAR_FAMILY)) { + char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rrdset_family(rc->rrdset) : "", m); + freez(temp); + temp = buf; + } + else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) { + if(likely(rc->rrdset && rc->rrdset->rrdlabels)) { + rrdlabels_get_value_to_char_or_null(rc->rrdset->rrdlabels, &lbl_value, var+RRDCALC_VAR_LABEL_LEN); + if (lbl_value) { + char *buf = find_and_replace(temp, var, lbl_value, m); + freez(temp); + temp = buf; + freez(lbl_value); + } + } + } + } + + STRING *ret = string_strdupz(temp); + freez(temp); + + return ret; +} + +void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc) { + if(!rc->rrdset || !rc->original_info || !rc->rrdset->rrdlabels) return; + + size_t labels_version = dictionary_version(rc->rrdset->rrdlabels); + if(rc->labels_version != labels_version) { + + STRING *old = rc->info; + rc->info = rrdcalc_replace_variables_with_rrdset_labels(rrdcalc_original_info(rc), rc); + string_freez(old); + + rc->labels_version = labels_version; + } +} + +// ---------------------------------------------------------------------------- +// RRDCALC index management for RRDSET + +// the dictionary requires a unique key for every item +// we use {chart id}.{alert name} for both the RRDHOST and RRDSET alert indexes. + +#define RRDCALC_MAX_KEY_SIZE 1024 +static size_t rrdcalc_key(char *dst, size_t dst_len, const char *chart, const char *alert) { + return snprintfz(dst, dst_len, "%s/%s", chart, alert); +} + +const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name) { + char key[RRDCALC_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), alert_name); + + const RRDCALC_ACQUIRED *rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)(key_len + 1)); + + if(!rca) { + key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_name(st), alert_name); + rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)(key_len + 1)); + } + + return rca; +} + +void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca) { + if(!rca) return; + + dictionary_acquired_item_release(st->rrdhost->rrdcalc_root_index, (const DICTIONARY_ITEM *)rca); +} + +RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca) { + if(rca) + return dictionary_acquired_item_value((const DICTIONARY_ITEM *)rca); + + return NULL; +} + +// ---------------------------------------------------------------------------- +// RRDCALC managing the linking with RRDSET + +static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) { RRDHOST *host = st->rrdhost; - debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); + debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host)); rc->last_status_change = now_realtime_sec(); rc->rrdset = st; - rc->rrdset_next = st->alarms; - rc->rrdset_prev = NULL; - - if(rc->rrdset_next) - rc->rrdset_next->rrdset_prev = rc; - - st->alarms = rc; + netdata_rwlock_wrlock(&st->alerts.rwlock); + DOUBLE_LINKED_LIST_APPEND_UNSAFE(st->alerts.base, rc, prev, next); + netdata_rwlock_unlock(&st->alerts.rwlock); if(rc->update_every < rc->rrdset->update_every) { - error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); + error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); rc->update_every = rc->rrdset->update_every; } if(!isnan(rc->green) && isnan(st->green)) { debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " NETDATA_DOUBLE_FORMAT_AUTO - " to " NETDATA_DOUBLE_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green); + " to " NETDATA_DOUBLE_FORMAT_AUTO ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->green, rc->green); st->green = rc->green; } if(!isnan(rc->red) && isnan(st->red)) { debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " NETDATA_DOUBLE_FORMAT_AUTO " to " NETDATA_DOUBLE_FORMAT_AUTO - ".", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red); + ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->red, rc->red); st->red = rc->red; } - rc->local = rrdvar_create_and_index("local", &st->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_LOCAL_VAR, &rc->value); - rc->family = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_FAMILY_VAR, &rc->value); - - char fullname[RRDVAR_MAX_LENGTH + 1]; - snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name); - rc->hostid = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR, &rc->value); + char buf[RRDVAR_MAX_LENGTH + 1]; + snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), rrdcalc_name(rc)); + STRING *rrdset_name_rrdcalc_name = string_strdupz(buf); + snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), rrdcalc_name(rc)); + STRING *rrdset_id_rrdcalc_name = string_strdupz(buf); - snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name); - rc->hostname = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR, &rc->value); - - if(rc->hostid && !rc->hostname) - rc->hostid->options |= RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR; + rc->rrdvar_local = rrdvar_add_and_acquire( + "local", + st->rrdvars, + rc->name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_LOCAL_VAR, + &rc->value); - if(!rc->units) rc->units = strdupz(st->units); + rc->rrdvar_family = rrdvar_add_and_acquire( + "family", + rrdfamily_rrdvars_dict(st->rrdfamily), + rc->name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_FAMILY_VAR, + &rc->value); + + rc->rrdvar_host_chart_name = rrdvar_add_and_acquire( + "host", + host->rrdvars, + rrdset_name_rrdcalc_name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR, + &rc->value); + + rc->rrdvar_host_chart_id = rrdvar_add_and_acquire( + "host", + host->rrdvars, + rrdset_id_rrdcalc_name, + RRDVAR_TYPE_CALCULATED, + RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR | ((rc->rrdvar_host_chart_name) ? 0 : RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR), + &rc->value); + + string_freez(rrdset_id_rrdcalc_name); + string_freez(rrdset_name_rrdcalc_name); + + if(!rc->units) + rc->units = string_dup(st->units); + + rrdcalc_update_info_using_rrdset_labels(rc); time_t now = now_realtime_sec(); + ALARM_ENTRY *ae = health_create_alarm_entry( host, rc->id, @@ -108,216 +271,224 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { rc->units, rc->info, 0, - 0); - health_alarm_log(host, ae); -} - -static int rrdcalc_is_matching_rrdset(RRDCALC *rc, RRDSET *st) { - if((rc->hash_chart != st->hash || strcmp(rc->chart, st->id) != 0) && - (rc->hash_chart != st->hash_name || strcmp(rc->chart, st->name) != 0)) - return 0; - - if (rc->module_pattern && !simple_pattern_matches(rc->module_pattern, st->module_name)) - return 0; - - if (rc->plugin_pattern && !simple_pattern_matches(rc->plugin_pattern, st->plugin_name)) - return 0; + rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0); - if (st->rrdhost->host_labels && rc->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(st->rrdhost->host_labels, rc->host_labels_pattern, '=')) - return 0; - - return 1; -} - -// this has to be called while the RRDHOST is locked -inline void rrdsetcalc_link_matching(RRDSET *st) { - RRDHOST *host = st->rrdhost; - // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); - - RRDCALC *rc; - for(rc = host->alarms; rc ; rc = rc->next) { - if(unlikely(rc->rrdset)) - continue; - - if(unlikely(rrdcalc_is_matching_rrdset(rc, st))) - rrdsetcalc_link(st, rc); - } + health_alarm_log_add_entry(host, ae); } -// this has to be called while the RRDHOST is locked -inline void rrdsetcalc_unlink(RRDCALC *rc) { +static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) { RRDSET *st = rc->rrdset; if(!st) { - debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); - error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); + debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc)); return; } RRDHOST *host = st->rrdhost; time_t now = now_realtime_sec(); - ALARM_ENTRY *ae = health_create_alarm_entry( - host, - rc->id, - rc->next_event_id++, - rc->config_hash_id, - now, - rc->name, - rc->rrdset->id, - rc->rrdset->context, - rc->rrdset->family, - rc->classification, - rc->component, - rc->type, - rc->exec, - rc->recipient, - now - rc->last_status_change, - rc->old_value, - rc->value, - rc->status, - RRDCALC_STATUS_REMOVED, - rc->source, - rc->units, - rc->info, - 0, - 0); - health_alarm_log(host, ae); - debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); + if (likely(rc->status != RRDCALC_STATUS_REMOVED)) { + ALARM_ENTRY *ae = health_create_alarm_entry( + host, + rc->id, + rc->next_event_id++, + rc->config_hash_id, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->context, + rc->rrdset->family, + rc->classification, + rc->component, + rc->type, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_REMOVED, + rc->source, + rc->units, + rc->info, + 0, + 0); + + health_alarm_log_add_entry(host, ae); + } + + debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host)); // unlink it - if(rc->rrdset_prev) - rc->rrdset_prev->rrdset_next = rc->rrdset_next; - if(rc->rrdset_next) - rc->rrdset_next->rrdset_prev = rc->rrdset_prev; + if(!having_ll_wrlock) + netdata_rwlock_wrlock(&st->alerts.rwlock); - if(st->alarms == rc) - st->alarms = rc->rrdset_next; + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(st->alerts.base, rc, prev, next); - rc->rrdset_prev = rc->rrdset_next = NULL; + if(!having_ll_wrlock) + netdata_rwlock_unlock(&st->alerts.rwlock); - rrdvar_free(host, &st->rrdvar_root_index, rc->local); - rc->local = NULL; + rc->rrdset = NULL; - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rc->family); - rc->family = NULL; + rrdvar_release_and_del(st->rrdvars, rc->rrdvar_local); + rc->rrdvar_local = NULL; - rrdvar_free(host, &host->rrdvar_root_index, rc->hostid); - rc->hostid = NULL; + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rc->rrdvar_family); + rc->rrdvar_family = NULL; - rrdvar_free(host, &host->rrdvar_root_index, rc->hostname); - rc->hostname = NULL; + rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_id); + rc->rrdvar_host_chart_id = NULL; - rc->rrdset = NULL; + rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_name); + rc->rrdvar_host_chart_name = NULL; // RRDCALC will remain in RRDHOST // so that if the matching chart is found in the future // it will be applied automatically } -RRDCALC *rrdcalc_find(RRDSET *st, const char *name) { - RRDCALC *rc; - uint32_t hash = simple_hash(name); +static inline bool rrdcalc_check_if_it_matches_rrdset(RRDCALC *rc, RRDSET *st) { + if ( (rc->chart != st->id) + && (rc->chart != st->name)) + return false; - for( rc = st->alarms; rc ; rc = rc->rrdset_next ) { - if(unlikely(rc->hash == hash && !strcmp(rc->name, name))) - return rc; - } + if (rc->module_pattern && !simple_pattern_matches(rc->module_pattern, rrdset_module_name(st))) + return false; - return NULL; + if (rc->plugin_pattern && !simple_pattern_matches(rc->plugin_pattern, rrdset_plugin_name(st))) + return false; + + if (st->rrdhost->rrdlabels && rc->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(st->rrdhost->rrdlabels, rc->host_labels_pattern, '=')) + return false; + + return true; } -inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) { +void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st) { + RRDHOST *host = st->rrdhost; + // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(host, rc) { + if(rc->rrdset) + continue; - if(unlikely(!chart)) { - error("attempt to find RRDCALC '%s' without giving a chart name", name); - return 1; + if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st))) + rrdcalc_link_to_rrdset(st, rc); } + foreach_rrdcalc_in_rrdhost_done(rc); +} - if(unlikely(!hash_chart)) hash_chart = simple_hash(chart); - if(unlikely(!hash_name)) hash_name = simple_hash(name); +static inline int rrdcalc_check_and_link_rrdset_callback(RRDSET *st, void *rrdcalc) { + RRDCALC *rc = rrdcalc; - // make sure it does not already exist - for(rc = host->alarms; rc ; rc = rc->next) { - if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) { - debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); - info("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); - return 1; - } + if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st))) { + rrdcalc_link_to_rrdset(st, rc); + return -1; } return 0; } -inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) { - if(chart && name) { - uint32_t hash_chart = simple_hash(chart); - uint32_t hash_name = simple_hash(name); - - // re-use old IDs, by looking them up in the alarm log - ALARM_ENTRY *ae; - for(ae = host->health_log.alarms; ae ;ae = ae->next) { - if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) { - if(next_event_id) *next_event_id = ae->alarm_event_id + 1; - return ae->alarm_id; - } +// ---------------------------------------------------------------------------- +// RRDCALC rrdhost index management - constructor + +struct rrdcalc_constructor { + RRDHOST *rrdhost; // the host we operate upon + RRDCALC *from_config; // points to the original RRDCALC, as loaded from the config + RRDCALCTEMPLATE *from_rrdcalctemplate; // the template this alert is generated from + RRDSET *rrdset; // when this comes from rrdcalctemplate, we have a matching rrdset + const char *overwrite_alert_name; // when we have a dimension foreach, the alert is renamed + const char *overwrite_dimensions; // when we have a dimension foreach, the dimensions filter is renamed + + enum { + RRDCALC_REACT_NONE, + RRDCALC_REACT_NEW, + } react_action; + + bool existing_from_template; +}; + +static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; + RRDHOST *host = ctr->rrdhost; + + rc->key = string_strdupz(dictionary_acquired_item_name(item)); + + if(ctr->from_rrdcalctemplate) { + rc->run_flags |= RRDCALC_FLAG_FROM_TEMPLATE; + + RRDCALCTEMPLATE *rt = ctr->from_rrdcalctemplate; + RRDSET *st = ctr->rrdset; + + rc->next_event_id = 1; + rc->name = (ctr->overwrite_alert_name) ? string_strdupz(ctr->overwrite_alert_name) : string_dup(rt->name); + rc->chart = string_dup(st->id); + uuid_copy(rc->config_hash_id, rt->config_hash_id); + + rc->dimensions = (ctr->overwrite_dimensions) ? string_strdupz(ctr->overwrite_dimensions) : string_dup(rt->dimensions); + rc->foreach_dimension = NULL; + rc->foreach_dimension_pattern = NULL; + + rc->green = rt->green; + rc->red = rt->red; + rc->value = NAN; + rc->old_value = NAN; + + rc->delay_up_duration = rt->delay_up_duration; + rc->delay_down_duration = rt->delay_down_duration; + rc->delay_max_duration = rt->delay_max_duration; + rc->delay_multiplier = rt->delay_multiplier; + + rc->last_repeat = 0; + rc->times_repeat = 0; + rc->warn_repeat_every = rt->warn_repeat_every; + rc->crit_repeat_every = rt->crit_repeat_every; + + rc->group = rt->group; + rc->after = rt->after; + rc->before = rt->before; + rc->update_every = rt->update_every; + rc->options = rt->options; + + rc->exec = string_dup(rt->exec); + rc->recipient = string_dup(rt->recipient); + rc->source = string_dup(rt->source); + rc->units = string_dup(rt->units); + rc->info = string_dup(rt->info); + rc->original_info = string_dup(rt->info); + + rc->classification = string_dup(rt->classification); + rc->component = string_dup(rt->component); + rc->type = string_dup(rt->type); + + if(rt->calculation) { + rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); + if(!rc->calculation) + error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->calculation->source); + } + if(rt->warning) { + rc->warning = expression_parse(rt->warning->source, NULL, NULL); + if(!rc->warning) + error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->warning->source); + } + if(rt->critical) { + rc->critical = expression_parse(rt->critical->source, NULL, NULL); + if(!rc->critical) + error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->critical->source); } } - - if (unlikely(!host->health_log.next_alarm_id)) - host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); - - return host->health_log.next_alarm_id++; -} - -/** - * Alarm name with dimension - * - * Change the name of the current alarm appending a new diagram. - * - * @param name the alarm name - * @param namelen is the length of the previous vector. - * @param dim the dimension of the chart. - * @param dimlen is the length of the previous vector. - * - * @return It returns the new name on success and the old otherwise - */ -char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen) { - char *newname,*move; - - newname = mallocz(namelen + dimlen + 2); - move = newname; - memcpy(move, name, namelen); - move += namelen; - - *move++ = '_'; - memcpy(move, dim, dimlen); - move += dimlen; - *move = '\0'; - - return newname; -} - -/** - * Remove pipe comma - * - * Remove the pipes and commas converting to space. - * - * @param str the string to change. - */ -void dimension_remove_pipe_comma(char *str) { - while(*str) { - if(*str == '|' || *str == ',') *str = ' '; - - str++; + else if(ctr->from_config) { + // dictionary has already copied all the members values and pointers + // no need for additional work in this case + ; } -} -inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) { - rrdhost_check_rdlock(host); + rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); if(rc->calculation) { rc->calculation->status = &rc->status; @@ -343,351 +514,241 @@ inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) { rc->critical->rrdcalc = rc; } - if(!rc->foreachdim) { - // link it to the host alarms list - if(likely(host->alarms)) { - // append it - RRDCALC *t; - for(t = host->alarms; t && t->next ; t = t->next) ; - t->next = rc; - } - else { - host->alarms = rc; - } - - // link it to its chart - RRDSET *st; - rrdset_foreach_read(st, host) { - if(rrdcalc_is_matching_rrdset(rc, st)) { - rrdsetcalc_link(st, rc); - break; - } - } - } else { - //link it case there is a foreach - if(likely(host->alarms_with_foreach)) { - // append it - RRDCALC *t; - for(t = host->alarms_with_foreach; t && t->next ; t = t->next) ; - t->next = rc; - } - else { - host->alarms_with_foreach = rc; - } + debug(D_HEALTH, "Health added alarm '%s.%s': exec '%s', recipient '%s', green " NETDATA_DOUBLE_FORMAT_AUTO + ", red " NETDATA_DOUBLE_FORMAT_AUTO + ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", + rrdcalc_chart_name(rc), + rrdcalc_name(rc), + (rc->exec)?rrdcalc_exec(rc):"DEFAULT", + (rc->recipient)?rrdcalc_recipient(rc):"DEFAULT", + rc->green, + rc->red, + (int)rc->group, + rc->after, + rc->before, + rc->options, + (rc->dimensions)?rrdcalc_dimensions(rc):"NONE", + (rc->foreach_dimension)?rrdcalc_foreachdim(rc):"NONE", + rc->update_every, + (rc->calculation)?rc->calculation->parsed_as:"NONE", + (rc->warning)?rc->warning->parsed_as:"NONE", + (rc->critical)?rc->critical->parsed_as:"NONE", + rrdcalc_source(rc), + rc->delay_up_duration, + rc->delay_down_duration, + rc->delay_max_duration, + rc->delay_multiplier, + rc->warn_repeat_every, + rc->crit_repeat_every + ); - //I am not linking this alarm direct to the host here, this will be done when the children is created - } + ctr->react_action = RRDCALC_REACT_NEW; } -inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { - debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name); - - if(rrdcalc_exists(host, chart, rt->name, 0, 0)) - return NULL; - - RRDCALC *rc = callocz(1, sizeof(RRDCALC)); - rc->next_event_id = 1; - rc->name = strdupz(rt->name); - rc->hash = simple_hash(rc->name); - rc->chart = strdupz(chart); - rc->hash_chart = simple_hash(rc->chart); - uuid_copy(rc->config_hash_id, rt->config_hash_id); - - rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); - - if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions); - if(rt->foreachdim) { - rc->foreachdim = strdupz(rt->foreachdim); - rc->spdim = health_pattern_from_foreach(rc->foreachdim); - } - rc->foreachcounter = rt->foreachcounter; - - rc->green = rt->green; - rc->red = rt->red; - rc->value = NAN; - rc->old_value = NAN; - - rc->delay_up_duration = rt->delay_up_duration; - rc->delay_down_duration = rt->delay_down_duration; - rc->delay_max_duration = rt->delay_max_duration; - rc->delay_multiplier = rt->delay_multiplier; - - rc->last_repeat = 0; - rc->times_repeat = 0; - rc->warn_repeat_every = rt->warn_repeat_every; - rc->crit_repeat_every = rt->crit_repeat_every; - - rc->group = rt->group; - rc->after = rt->after; - rc->before = rt->before; - rc->update_every = rt->update_every; - rc->options = rt->options; - - if(rt->exec) rc->exec = strdupz(rt->exec); - if(rt->recipient) rc->recipient = strdupz(rt->recipient); - if(rt->source) rc->source = strdupz(rt->source); - if(rt->units) rc->units = strdupz(rt->units); - if(rt->info) rc->info = strdupz(rt->info); - - if (rt->classification) rc->classification = strdupz(rt->classification); - if (rt->component) rc->component = strdupz(rt->component); - if (rt->type) rc->type = strdupz(rt->type); - - if(rt->calculation) { - rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); - if(!rc->calculation) - error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source); - } - if(rt->warning) { - rc->warning = expression_parse(rt->warning->source, NULL, NULL); - if(!rc->warning) - error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source); - } - if(rt->critical) { - rc->critical = expression_parse(rt->critical->source, NULL, NULL); - if(!rc->critical) - error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); - } +static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdcalc_new __maybe_unused, void *constructor_data ) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; - debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " NETDATA_DOUBLE_FORMAT_AUTO - ", red " NETDATA_DOUBLE_FORMAT_AUTO - ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", - (rc->chart)?rc->chart:"NOCHART", - rc->name, - (rc->exec)?rc->exec:"DEFAULT", - (rc->recipient)?rc->recipient:"DEFAULT", - rc->green, - rc->red, - (int)rc->group, - rc->after, - rc->before, - rc->options, - (rc->dimensions)?rc->dimensions:"NONE", - (rc->foreachdim)?rc->foreachdim:"NONE", - rc->update_every, - (rc->calculation)?rc->calculation->parsed_as:"NONE", - (rc->warning)?rc->warning->parsed_as:"NONE", - (rc->critical)?rc->critical->parsed_as:"NONE", - rc->source, - rc->delay_up_duration, - rc->delay_down_duration, - rc->delay_max_duration, - rc->delay_multiplier, - rc->warn_repeat_every, - rc->crit_repeat_every - ); + if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) + ctr->existing_from_template = true; + else + ctr->existing_from_template = false; - rrdcalc_add_to_host(host, rc); - if(!rt->foreachdim) { - RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl_t *)rc); - if (rdcmp != rc) { - error("Cannot insert the alarm index ID %s",rc->name); - } - } + ctr->react_action = RRDCALC_REACT_NONE; - return rc; + return false; } -/** - * Create from RRDCALC - * - * Create a new alarm using another alarm as template. - * - * @param rc is the alarm that will be used as source - * @param host is the host structure. - * @param name is the newest chart name. - * @param dimension is the current dimension - * @param foreachdim the whole list of dimension - * - * @return it returns the new alarm changed. - */ -inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension) { - RRDCALC *newrc = callocz(1, sizeof(RRDCALC)); - - newrc->next_event_id = 1; - newrc->id = rrdcalc_get_unique_id(host, rc->chart, name, &rc->next_event_id); - newrc->name = (char *)name; - newrc->hash = simple_hash(newrc->name); - newrc->chart = strdupz(rc->chart); - newrc->hash_chart = simple_hash(rc->chart); - uuid_copy(newrc->config_hash_id, *((uuid_t *) &rc->config_hash_id)); - - newrc->dimensions = strdupz(dimension); - newrc->foreachdim = NULL; - rc->foreachcounter++; - newrc->foreachcounter = rc->foreachcounter; - - newrc->green = rc->green; - newrc->red = rc->red; - newrc->value = NAN; - newrc->old_value = NAN; - - newrc->delay_up_duration = rc->delay_up_duration; - newrc->delay_down_duration = rc->delay_down_duration; - newrc->delay_max_duration = rc->delay_max_duration; - newrc->delay_multiplier = rc->delay_multiplier; - - newrc->last_repeat = 0; - newrc->times_repeat = 0; - newrc->warn_repeat_every = rc->warn_repeat_every; - newrc->crit_repeat_every = rc->crit_repeat_every; - - newrc->group = rc->group; - newrc->after = rc->after; - newrc->before = rc->before; - newrc->update_every = rc->update_every; - newrc->options = rc->options; - - if(rc->exec) newrc->exec = strdupz(rc->exec); - if(rc->recipient) newrc->recipient = strdupz(rc->recipient); - if(rc->source) newrc->source = strdupz(rc->source); - if(rc->units) newrc->units = strdupz(rc->units); - if(rc->info) newrc->info = strdupz(rc->info); - - if (rc->classification) newrc->classification = strdupz(rc->classification); - if (rc->component) newrc->component = strdupz(rc->component); - if (rc->type) newrc->type = strdupz(rc->type); +static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; + RRDHOST *host = ctr->rrdhost; - if(rc->calculation) { - newrc->calculation = expression_parse(rc->calculation->source, NULL, NULL); - if(!newrc->calculation) - error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rc->chart, rc->name, rc->calculation->source); - } + if(ctr->react_action == RRDCALC_REACT_NEW) { + if(ctr->rrdset) + rrdcalc_link_to_rrdset(ctr->rrdset, rc); - if(rc->warning) { - newrc->warning = expression_parse(rc->warning->source, NULL, NULL); - if(!newrc->warning) - error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rc->chart, rc->name, rc->warning->source); - } - - if(rc->critical) { - newrc->critical = expression_parse(rc->critical->source, NULL, NULL); - if(!newrc->critical) - error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rc->chart, rc->name, rc->critical->source); + else if (ctr->from_rrdcalctemplate) + rrdcontext_foreach_instance_with_rrdset_in_context(host, string2str(ctr->from_rrdcalctemplate->context), rrdcalc_check_and_link_rrdset_callback, rc); } - - return newrc; } -void rrdcalc_free(RRDCALC *rc) { +// ---------------------------------------------------------------------------- +// RRDCALC rrdhost index management - destructor + +static void rrdcalc_free_internals(RRDCALC *rc) { if(unlikely(!rc)) return; expression_free(rc->calculation); expression_free(rc->warning); expression_free(rc->critical); - freez(rc->name); - freez(rc->chart); - freez(rc->family); - freez(rc->dimensions); - freez(rc->foreachdim); - freez(rc->exec); - freez(rc->recipient); - freez(rc->source); - freez(rc->units); - freez(rc->info); - freez(rc->classification); - freez(rc->component); - freez(rc->type); - simple_pattern_free(rc->spdim); - freez(rc->host_labels); + string_freez(rc->key); + string_freez(rc->name); + string_freez(rc->chart); + string_freez(rc->dimensions); + string_freez(rc->foreach_dimension); + string_freez(rc->exec); + string_freez(rc->recipient); + string_freez(rc->source); + string_freez(rc->units); + string_freez(rc->info); + string_freez(rc->original_info); + string_freez(rc->classification); + string_freez(rc->component); + string_freez(rc->type); + string_freez(rc->host_labels); + string_freez(rc->module_match); + string_freez(rc->plugin_match); + + simple_pattern_free(rc->foreach_dimension_pattern); simple_pattern_free(rc->host_labels_pattern); - freez(rc->module_match); simple_pattern_free(rc->module_pattern); - freez(rc->plugin_match); simple_pattern_free(rc->plugin_pattern); - freez(rc); } -void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { - if(unlikely(!rc)) return; +static void rrdcalc_rrdhost_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdhost __maybe_unused) { + RRDCALC *rc = rrdcalc; + //RRDHOST *host = rrdhost; - debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); + if(unlikely(rc->rrdset)) + rrdcalc_unlink_from_rrdset(rc, false); - // unlink it from RRDSET - if(rc->rrdset) rrdsetcalc_unlink(rc); + // any destruction actions that require other locks + // have to be placed in rrdcalc_del(), because the object is actually locked for deletion - // unlink it from RRDHOST - if(unlikely(rc == host->alarms)) - host->alarms = rc->next; - else { - RRDCALC *t; - for(t = host->alarms; t && t->next != rc; t = t->next) ; - if(t) { - t->next = rc->next; - rc->next = NULL; - } - else - error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); - } + rrdcalc_free_internals(rc); +} - RRDCALC *rdcmp = (RRDCALC *) avl_search_lock(&(host)->alarms_idx_health_log, (avl_t *)rc); - if (rdcmp) { - rdcmp = (RRDCALC *) avl_remove_lock(&(host)->alarms_idx_health_log, (avl_t *)rc); - if (!rdcmp) { - error("Cannot remove the health alarm index from health_log"); - } - } +// ---------------------------------------------------------------------------- +// RRDCALC rrdhost index management - index API - rdcmp = (RRDCALC *) avl_search_lock(&(host)->alarms_idx_name, (avl_t *)rc); - if (rdcmp) { - rdcmp = (RRDCALC *) avl_remove_lock(&(host)->alarms_idx_name, (avl_t *)rc); - if (!rdcmp) { - error("Cannot remove the health alarm index from idx_name"); - } +void rrdcalc_rrdhost_index_init(RRDHOST *host) { + if(!host->rrdcalc_root_index) { + host->rrdcalc_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL); + dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL); + dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL); + dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host); } +} - rrdcalc_free(rc); +void rrdcalc_rrdhost_index_destroy(RRDHOST *host) { + dictionary_destroy(host->rrdcalc_root_index); + host->rrdcalc_root_index = NULL; } -void rrdcalc_foreach_unlink_and_free(RRDHOST *host, RRDCALC *rc) { +void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions) { + char key[RRDCALC_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), + overwrite_alert_name?overwrite_alert_name:string2str(rt->name)); + + struct rrdcalc_constructor tmp = { + .rrdhost = host, + .from_config = NULL, + .from_rrdcalctemplate = rt, + .rrdset = st, + .overwrite_alert_name = overwrite_alert_name, + .overwrite_dimensions = overwrite_dimensions, + .react_action = RRDCALC_REACT_NONE, + .existing_from_template = false, + }; + + dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDCALC), &tmp); + if(tmp.react_action != RRDCALC_REACT_NEW && tmp.existing_from_template == false) + error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It is manually configured.", + string2str(rt->name), rrdset_id(st), key, rrdhost_hostname(host)); +} - if(unlikely(rc == host->alarms_with_foreach)) - host->alarms_with_foreach = rc->next; - else { - RRDCALC *t; - for(t = host->alarms_with_foreach; t && t->next != rc; t = t->next) ; - if(t) { - t->next = rc->next; - rc->next = NULL; +int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc) { + if(!rc->chart) { + error("Health configuration for alarm '%s' does not have a chart", rrdcalc_name(rc)); + return 0; + } + + if(!rc->update_every) { + error("Health configuration for alarm '%s.%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + return 0; + } + + if(!RRDCALC_HAS_DB_LOOKUP(rc) && !rc->calculation && !rc->warning && !rc->critical) { + error("Health configuration for alarm '%s.%s' is useless (no db lookup, no calculation, no warning and no critical expressions)", rrdcalc_chart_name(rc), rrdcalc_name(rc)); + return 0; + } + + char key[RRDCALC_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, string2str(rc->chart), string2str(rc->name)); + + struct rrdcalc_constructor tmp = { + .rrdhost = host, + .from_config = rc, + .from_rrdcalctemplate = NULL, + .rrdset = NULL, + .react_action = RRDCALC_REACT_NONE, + }; + + int ret = 1; + RRDCALC *t = dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), rc, sizeof(RRDCALC), &tmp); + if(tmp.react_action == RRDCALC_REACT_NEW) { + // we copied rc into the dictionary, so we have to free the container here + freez(rc); + rc = t; + + // since we loaded this config from configuration, we need to check if we can link it to alarms + RRDSET *st; + rrdset_foreach_read(st, host) { + if (unlikely(rrdcalc_check_and_link_rrdset_callback(st, rc) == -1)) + break; } - else - error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); + rrdset_foreach_done(st); } + else { + error( + "RRDCALC: from config '%s' on chart '%s' failed to be added to host '%s'. It already exists.", + string2str(rc->name), + string2str(rc->chart), + rrdhost_hostname(host)); + + ret = 0; + + // free all of it, internals and the container + rrdcalc_free_unused_rrdcalc_loaded_from_config(rc); + } + + return ret; +} - rrdcalc_free(rc); +static void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock) { + if(rc->rrdset) + rrdcalc_unlink_from_rrdset(rc, having_ll_wrlock); + + dictionary_del_advanced(host->rrdcalc_root_index, string2str(rc->key), (ssize_t)string_strlen(rc->key) + 1); } -static void rrdcalc_labels_unlink_alarm_loop(RRDHOST *host, RRDCALC *alarms) { - for(RRDCALC *rc = alarms ; rc ; ) { - RRDCALC *rc_next = rc->next; - if (!rc->host_labels) { - rc = rc_next; +// ---------------------------------------------------------------------------- +// RRDCALC cleanup API functions + +void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host) { + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_reentrant(host, rc) { + if (!rc->host_labels) continue; - } - if(!rrdlabels_match_simple_pattern_parsed(host->host_labels, rc->host_labels_pattern, '=')) { + if(!rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rc->host_labels_pattern, '=')) { info("Health configuration for alarm '%s' cannot be applied, because the host %s does not have the label(s) '%s'", - rc->name, - host->hostname, - rc->host_labels); + rrdcalc_name(rc), + rrdhost_hostname(host), + rrdcalc_host_labels(rc)); - if(host->alarms == alarms) - rrdcalc_unlink_and_free(host, rc); - else - rrdcalc_foreach_unlink_and_free(host, rc); + rrdcalc_unlink_and_delete(host, rc, false); } - rc = rc_next; } + foreach_rrdcalc_in_rrdhost_done(rc); } -void rrdcalc_labels_unlink_alarm_from_host(RRDHOST *host) { - rrdcalc_labels_unlink_alarm_loop(host, host->alarms); - rrdcalc_labels_unlink_alarm_loop(host, host->alarms_with_foreach); -} - -void rrdcalc_labels_unlink() { +void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts() { rrd_rdlock(); RRDHOST *host; @@ -695,70 +756,45 @@ void rrdcalc_labels_unlink() { if (unlikely(!host->health_enabled)) continue; - if (host->host_labels) { - rrdhost_wrlock(host); - - rrdcalc_labels_unlink_alarm_from_host(host); - - rrdhost_unlock(host); - } + if (host->rrdlabels) + rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(host); } rrd_unlock(); } -// ---------------------------------------------------------------------------- -// Alarm - - -/** - * Alarm is repeating - * - * Is this alarm repeating ? - * - * @param host The structure that has the binary tree - * @param alarm_id the id of the alarm to search - * - * @return It returns 1 case it is repeating and 0 otherwise - */ -int alarm_isrepeating(RRDHOST *host, uint32_t alarm_id) { - RRDCALC findme; - findme.id = alarm_id; - RRDCALC *rc = (RRDCALC *)avl_search_lock(&host->alarms_idx_health_log, (avl_t *)&findme); - if (!rc) { - return 0; +void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st) { + RRDCALC *rc, *last = NULL; + netdata_rwlock_wrlock(&st->alerts.rwlock); + while((rc = st->alerts.base)) { + if(last == rc) { + error("RRDCALC: malformed list of alerts linked to chart - cannot cleanup - giving up."); + break; + } + last = rc; + + if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) { + // if the alert comes from a template we can just delete it + rrdcalc_unlink_and_delete(st->rrdhost, rc, true); + } + else { + // this is a configuration for a specific chart + // it should stay in the list + rrdcalc_unlink_from_rrdset(rc, true); + } + } - return rrdcalc_isrepeating(rc); + netdata_rwlock_unlock(&st->alerts.rwlock); } -/** - * Entry is repeating - * - * Check whether the id of alarm entry is yet present in the host structure - * - * @param host The structure that has the binary tree - * @param ae the alarm entry - * - * @return It returns 1 case it is repeating and 0 otherwise - */ -int alarm_entry_isrepeating(RRDHOST *host, ALARM_ENTRY *ae) { - return alarm_isrepeating(host, ae->alarm_id); +void rrdcalc_delete_all(RRDHOST *host) { + dictionary_flush(host->rrdcalc_root_index); } -/** - * Max last repeat - * - * Check the maximum last_repeat for the alarms associated a host - * - * @param host The structure that has the binary tree - * - * @return It returns 1 case it is repeating and 0 otherwise - */ -RRDCALC *alarm_max_last_repeat(RRDHOST *host, char *alarm_name,uint32_t hash) { - RRDCALC findme; - findme.name = alarm_name; - findme.hash = hash; - RRDCALC *rc = (RRDCALC *)avl_search_lock(&host->alarms_idx_name, (avl_t *)&findme); - - return rc; +void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc) { + if(rc->rrdset) + rrdcalc_unlink_from_rrdset(rc, false); + + rrdcalc_free_internals(rc); + freez(rc); } diff --git a/database/rrdcalc.h b/database/rrdcalc.h index 0dcd7ce69..a25c05cc6 100644 --- a/database/rrdcalc.h +++ b/database/rrdcalc.h @@ -10,55 +10,60 @@ // (defined in their update_every member below) // They increase the overhead of netdata. // -// These calculations are allocated and linked (->next) -// under RRDHOST. -// Then are also linked to RRDSET (of course only when the -// chart is found, via ->rrdset_next and ->rrdset_prev). -// This double-linked list is maintained sorted at all times -// having as RRDSET.calculations the RRDCALC to be processed -// next. - -#define RRDCALC_FLAG_DB_ERROR 0x00000001 -#define RRDCALC_FLAG_DB_NAN 0x00000002 -/* #define RRDCALC_FLAG_DB_STALE 0x00000004 */ -#define RRDCALC_FLAG_CALC_ERROR 0x00000008 -#define RRDCALC_FLAG_WARN_ERROR 0x00000010 -#define RRDCALC_FLAG_CRIT_ERROR 0x00000020 -#define RRDCALC_FLAG_RUNNABLE 0x00000040 -#define RRDCALC_FLAG_DISABLED 0x00000080 -#define RRDCALC_FLAG_SILENCED 0x00000100 -#define RRDCALC_FLAG_RUN_ONCE 0x00000200 -#define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000 - +// These calculations are stored under RRDHOST. +// Then are also linked to RRDSET (of course only when a +// matching chart is found). + +typedef enum { + RRDCALC_FLAG_DB_ERROR = (1 << 0), + RRDCALC_FLAG_DB_NAN = (1 << 1), + // RRDCALC_FLAG_DB_STALE = (1 << 2), + RRDCALC_FLAG_CALC_ERROR = (1 << 3), + RRDCALC_FLAG_WARN_ERROR = (1 << 4), + RRDCALC_FLAG_CRIT_ERROR = (1 << 5), + RRDCALC_FLAG_RUNNABLE = (1 << 6), + RRDCALC_FLAG_DISABLED = (1 << 7), + RRDCALC_FLAG_SILENCED = (1 << 8), + RRDCALC_FLAG_RUN_ONCE = (1 << 9), + RRDCALC_FLAG_FROM_TEMPLATE = (1 << 10), // the rrdcalc has been created from a template +} RRDCALC_FLAGS; + +typedef enum { + // This list uses several other options from RRDR_OPTIONS for db lookups. + // To add an item here, you need to reserve a bit in RRDR_OPTIONS. + RRDCALC_OPTION_NO_CLEAR_NOTIFICATION = 0x80000000, +} RRDCALC_OPTIONS; + +#define RRDCALC_ALL_OPTIONS_EXCLUDING_THE_RRDR_ONES (RRDCALC_OPTION_NO_CLEAR_NOTIFICATION) struct rrdcalc { - avl_t avl; // the index, with key the id - this has to be first! + STRING *key; // the unique key in the host's rrdcalc_root_index + uint32_t id; // the unique id of this alarm uint32_t next_event_id; // the next event id that will be used for this alarm - char *name; // the name of this alarm - uint32_t hash; // the hash of the alarm name uuid_t config_hash_id; // a predictable hash_id based on specific alert configuration - char *exec; // the command to execute when this alarm switches state - char *recipient; // the recipient of the alarm (the first parameter to exec) + STRING *name; // the name of this alarm + STRING *chart; // the chart id this should be linked to - char *classification; // the class that this alarm belongs - char *component; // the component that this alarm refers to - char *type; // type of the alarm + STRING *exec; // the command to execute when this alarm switches state + STRING *recipient; // the recipient of the alarm (the first parameter to exec) - char *chart; // the chart id this should be linked to - uint32_t hash_chart; + STRING *classification; // the class that this alarm belongs + STRING *component; // the component that this alarm refers to + STRING *type; // type of the alarm - char *plugin_match; //the plugin name that should be linked to + STRING *plugin_match; // the plugin name that should be linked to SIMPLE_PATTERN *plugin_pattern; - char *module_match; //the module name that should be linked to + STRING *module_match; // the module name that should be linked to SIMPLE_PATTERN *module_pattern; - char *source; // the source of this alarm - char *units; // the units of the alarm - char *info; // a short description of the alarm + STRING *source; // the source of this alarm + STRING *units; // the units of the alarm + STRING *original_info; // the original info field before any variable replacement + STRING *info; // a short description of the alarm int update_every; // update frequency for the alarm @@ -69,15 +74,13 @@ struct rrdcalc { // ------------------------------------------------------------------------ // database lookup settings - char *dimensions; // the chart dimensions - char *foreachdim; // the group of dimensions that the `foreach` will be applied. - SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart. - int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the - // children + STRING *dimensions; // the chart dimensions + STRING *foreach_dimension; // the group of dimensions that the `foreach` will be applied. + SIMPLE_PATTERN *foreach_dimension_pattern; // used if and only if there is a simple pattern for the chart. RRDR_GROUPING group; // grouping method: average, max, etc. int before; // ending point in time-series int after; // starting point in time-series - uint32_t options; // calculation options + RRDCALC_OPTIONS options; // configuration options // ------------------------------------------------------------------------ // expressions related to the alarm @@ -98,29 +101,29 @@ struct rrdcalc { // ------------------------------------------------------------------------ // notification repeat settings - uint32_t warn_repeat_every; // interval between repeating warning notifications - uint32_t crit_repeat_every; // interval between repeating critical notifications + uint32_t warn_repeat_every; // interval between repeating warning notifications + uint32_t crit_repeat_every; // interval between repeating critical notifications // ------------------------------------------------------------------------ // Labels settings - char *host_labels; // the label read from an alarm file + STRING *host_labels; // the label read from an alarm file SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels // ------------------------------------------------------------------------ // runtime information - RRDCALC_STATUS old_status; // the old status of the alarm + RRDCALC_STATUS old_status; // the old status of the alarm RRDCALC_STATUS status; // the current status of the alarm - NETDATA_DOUBLE value; // the current value of the alarm - NETDATA_DOUBLE old_value; // the previous value of the alarm + NETDATA_DOUBLE value; // the current value of the alarm + NETDATA_DOUBLE old_value; // the previous value of the alarm - uint32_t rrdcalc_flags; // check RRDCALC_FLAG_* + RRDCALC_FLAGS run_flags; // check RRDCALC_FLAG_* time_t last_updated; // the last update timestamp of the alarm time_t next_update; // the next update timestamp of the alarm time_t last_status_change; // the timestamp of the last time this alarm changed status - time_t last_repeat; // the last time the alarm got repeated + time_t last_repeat; // the last time the alarm got repeated uint32_t times_repeat; // number of times the alarm got repeated time_t db_after; // the first timestamp evaluated by the db lookup @@ -134,85 +137,105 @@ struct rrdcalc { // ------------------------------------------------------------------------ // variables this alarm exposes to the rest of the alarms - RRDVAR *local; - RRDVAR *family; - RRDVAR *hostid; - RRDVAR *hostname; + const RRDVAR_ACQUIRED *rrdvar_local; + const RRDVAR_ACQUIRED *rrdvar_family; + const RRDVAR_ACQUIRED *rrdvar_host_chart_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name; // ------------------------------------------------------------------------ // the chart this alarm it is linked to + size_t labels_version; struct rrdset *rrdset; - // linking of this alarm on its chart - struct rrdcalc *rrdset_next; - struct rrdcalc *rrdset_prev; - struct rrdcalc *next; + struct rrdcalc *prev; }; +#define rrdcalc_name(rc) string2str((rc)->name) +#define rrdcalc_chart_name(rc) string2str((rc)->chart) +#define rrdcalc_exec(rc) string2str((rc)->exec) +#define rrdcalc_recipient(rc) string2str((rc)->recipient) +#define rrdcalc_classification(rc) string2str((rc)->classification) +#define rrdcalc_component(rc) string2str((rc)->component) +#define rrdcalc_type(rc) string2str((rc)->type) +#define rrdcalc_plugin_match(rc) string2str((rc)->plugin_match) +#define rrdcalc_module_match(rc) string2str((rc)->module_match) +#define rrdcalc_source(rc) string2str((rc)->source) +#define rrdcalc_units(rc) string2str((rc)->units) +#define rrdcalc_original_info(rc) string2str((rc)->original_info) +#define rrdcalc_info(rc) string2str((rc)->info) +#define rrdcalc_dimensions(rc) string2str((rc)->dimensions) +#define rrdcalc_foreachdim(rc) string2str((rc)->foreach_dimension) +#define rrdcalc_host_labels(rc) string2str((rc)->host_labels) + +#define foreach_rrdcalc_in_rrdhost_read(host, rc) \ + dfe_start_read((host)->rrdcalc_root_index, rc) \ + +#define foreach_rrdcalc_in_rrdhost_reentrant(host, rc) \ + dfe_start_reentrant((host)->rrdcalc_root_index, rc) + +#define foreach_rrdcalc_in_rrdhost_done(rc) \ + dfe_done(rc) + struct alert_config { - char *alarm; - char *template_key; - char *os; - char *host; - char *on; - char *families; - char *plugin; - char *module; - char *charts; - char *lookup; - char *calc; - char *warn; - char *crit; - char *every; - char *green; - char *red; - char *exec; - char *to; - char *units; - char *info; - char *classification; - char *component; - char *type; - char *delay; - char *options; - char *repeat; - char *host_labels; - - char *p_db_lookup_dimensions; - char *p_db_lookup_method; + STRING *alarm; + STRING *template_key; + STRING *os; + STRING *host; + STRING *on; + STRING *families; + STRING *plugin; + STRING *module; + STRING *charts; + STRING *lookup; + STRING *calc; + STRING *warn; + STRING *crit; + STRING *every; + STRING *green; + STRING *red; + STRING *exec; + STRING *to; + STRING *units; + STRING *info; + STRING *classification; + STRING *component; + STRING *type; + STRING *delay; + STRING *options; + STRING *repeat; + STRING *host_labels; + + STRING *p_db_lookup_dimensions; + STRING *p_db_lookup_method; + uint32_t p_db_lookup_options; int32_t p_db_lookup_after; int32_t p_db_lookup_before; int32_t p_update_every; }; -extern int alarm_isrepeating(RRDHOST *host, uint32_t alarm_id); -extern int alarm_entry_isrepeating(RRDHOST *host, ALARM_ENTRY *ae); -extern RRDCALC *alarm_max_last_repeat(RRDHOST *host, char *alarm_name, uint32_t hash); - #define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after) -extern void rrdsetcalc_link_matching(RRDSET *st); -extern void rrdsetcalc_unlink(RRDCALC *rc); -extern RRDCALC *rrdcalc_find(RRDSET *st, const char *name); +void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc); -extern const char *rrdcalc_status2string(RRDCALC_STATUS status); +void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st); -extern void rrdcalc_free(RRDCALC *rc); -extern void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc); +const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name); +void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca); +RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca); -extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name); -extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id); -extern RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart); -extern RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension); -extern void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc); -extern void dimension_remove_pipe_comma(char *str); -extern char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen); +const char *rrdcalc_status2string(RRDCALC_STATUS status); -extern void rrdcalc_labels_unlink(); -extern void rrdcalc_labels_unlink_alarm_from_host(RRDHOST *host); +void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc); + +uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id); +void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions); +int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc); + +void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts(); +void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host); static inline int rrdcalc_isrepeating(RRDCALC *rc) { if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) { @@ -221,4 +244,15 @@ static inline int rrdcalc_isrepeating(RRDCALC *rc) { return 0; } +void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st); +void rrdcalc_delete_all(RRDHOST *host); + +void rrdcalc_rrdhost_index_init(RRDHOST *host); +void rrdcalc_rrdhost_index_destroy(RRDHOST *host); + +#define RRDCALC_VAR_MAX 100 +#define RRDCALC_VAR_FAMILY "$family" +#define RRDCALC_VAR_LABEL "$label:" +#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1) + #endif //NETDATA_RRDCALC_H diff --git a/database/rrdcalctemplate.c b/database/rrdcalctemplate.c index 3f9804b93..87e085c93 100644 --- a/database/rrdcalctemplate.c +++ b/database/rrdcalctemplate.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_HEALTH_INTERNALS #include "rrd.h" // ---------------------------------------------------------------------------- @@ -11,100 +10,232 @@ * @param rt is the template used to create the chart. * @param st is the chart where the alarm will be attached. */ -void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) { - if(rt->hash_context != st->hash_context || strcmp(rt->context, st->context) != 0) - return; - if (rt->charts_pattern && !simple_pattern_matches(rt->charts_pattern, st->name)) - return; +static char *rrdcalc_alert_name_with_dimension(const char *name, size_t namelen, const char *dim, size_t dimlen) { + char *newname,*move; - if (rt->family_pattern && !simple_pattern_matches(rt->family_pattern, st->family)) - return; + newname = mallocz(namelen + dimlen + 2); + move = newname; + memcpy(move, name, namelen); + move += namelen; - if (rt->module_pattern && !simple_pattern_matches(rt->module_pattern, st->module_name)) - return; + *move++ = '_'; + memcpy(move, dim, dimlen); + move += dimlen; + *move = '\0'; + + return newname; +} + +bool rrdcalctemplate_check_rrdset_conditions(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) { + if(rt->context != st->context) + return false; + + if(rt->foreach_dimension_pattern && !rrdset_number_of_dimensions(st)) + return false; + + if (rt->charts_pattern && !simple_pattern_matches(rt->charts_pattern, rrdset_name(st)) && !simple_pattern_matches(rt->charts_pattern, rrdset_id(st))) + return false; + + if (rt->family_pattern && !simple_pattern_matches(rt->family_pattern, rrdset_family(st))) + return false; + + if (rt->module_pattern && !simple_pattern_matches(rt->module_pattern, rrdset_module_name(st))) + return false; + + if (rt->plugin_pattern && !simple_pattern_matches(rt->plugin_pattern, rrdset_plugin_name(st))) + return false; + + if(host->rrdlabels && rt->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rt->host_labels_pattern, '=')) + return false; - if (rt->plugin_pattern && !simple_pattern_matches(rt->plugin_pattern, st->plugin_name)) + return true; +} + +void rrdcalctemplate_check_rrddim_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDDIM *rd, RRDHOST *host) { + if (simple_pattern_matches(rt->foreach_dimension_pattern, rrddim_id(rd)) || simple_pattern_matches(rt->foreach_dimension_pattern, rrddim_name(rd))) { + char *overwrite_alert_name = rrdcalc_alert_name_with_dimension( + rrdcalctemplate_name(rt), string_strlen(rt->name), rrddim_name(rd), string_strlen(rd->name)); + rrdcalc_add_from_rrdcalctemplate(host, rt, st, overwrite_alert_name, rrddim_name(rd)); + freez(overwrite_alert_name); + } +} + +void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) { + if(!rrdcalctemplate_check_rrdset_conditions(rt, st, host)) return; - if(host->host_labels && rt->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(host->host_labels, rt->host_labels_pattern, '=')) + if(!rt->foreach_dimension_pattern) { + rrdcalc_add_from_rrdcalctemplate(host, rt, st, NULL, NULL); return; + } - RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id); - if (unlikely(!rc)) - info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname); -#ifdef NETDATA_INTERNAL_CHECKS - else if (rc->rrdset != st && !rc->foreachdim) //When we have a template with foreadhdim, the child will be added to the index late - error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart ? rc->chart : "NOCHART", rc->name, st->id); -#endif + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + rrdcalctemplate_check_rrddim_conditions_and_link(rt, st, rd, host); + } + rrddim_foreach_done(rd); } -void rrdcalctemplate_link_matching(RRDSET *st) { +void rrdcalctemplate_link_matching_templates_to_rrdset(RRDSET *st) { RRDHOST *host = st->rrdhost; - RRDCALCTEMPLATE *rt; - - for(rt = host->templates; rt ; rt = rt->next) - rrdcalctemplate_check_conditions_and_link(rt, st, host); - for(rt = host->alarms_template_with_foreach; rt ; rt = rt->next) + RRDCALCTEMPLATE *rt; + foreach_rrdcalctemplate_read(host, rt) { rrdcalctemplate_check_conditions_and_link(rt, st, host); + } + foreach_rrdcalctemplate_done(rt); } -inline void rrdcalctemplate_free(RRDCALCTEMPLATE *rt) { - if(unlikely(!rt)) return; - +static void rrdcalctemplate_free_internals(RRDCALCTEMPLATE *rt) { expression_free(rt->calculation); expression_free(rt->warning); expression_free(rt->critical); - freez(rt->family_match); + string_freez(rt->family_match); simple_pattern_free(rt->family_pattern); - freez(rt->plugin_match); + string_freez(rt->plugin_match); simple_pattern_free(rt->plugin_pattern); - freez(rt->module_match); + string_freez(rt->module_match); simple_pattern_free(rt->module_pattern); - freez(rt->charts_match); + string_freez(rt->charts_match); simple_pattern_free(rt->charts_pattern); - freez(rt->name); - freez(rt->exec); - freez(rt->recipient); - freez(rt->classification); - freez(rt->component); - freez(rt->type); - freez(rt->context); - freez(rt->source); - freez(rt->units); - freez(rt->info); - freez(rt->dimensions); - freez(rt->foreachdim); - freez(rt->host_labels); - simple_pattern_free(rt->spdim); + string_freez(rt->name); + string_freez(rt->exec); + string_freez(rt->recipient); + string_freez(rt->classification); + string_freez(rt->component); + string_freez(rt->type); + string_freez(rt->context); + string_freez(rt->source); + string_freez(rt->units); + string_freez(rt->info); + string_freez(rt->dimensions); + string_freez(rt->foreach_dimension); + string_freez(rt->host_labels); + simple_pattern_free(rt->foreach_dimension_pattern); simple_pattern_free(rt->host_labels_pattern); - freez(rt); } -inline void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt) { +void rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(RRDCALCTEMPLATE *rt) { if(unlikely(!rt)) return; - debug(D_HEALTH, "Health removing template '%s' of host '%s'", rt->name, host->hostname); + rrdcalctemplate_free_internals(rt); + freez(rt); +} +static void rrdcalctemplate_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalctemplate, void *added_bool) { + RRDCALCTEMPLATE *rt = rrdcalctemplate; (void)rt; + + bool *added = added_bool; + *added = true; + + debug(D_HEALTH, "Health configuration adding template '%s'" + ": context '%s'" + ", exec '%s'" + ", recipient '%s'" + ", green " NETDATA_DOUBLE_FORMAT_AUTO + ", red " NETDATA_DOUBLE_FORMAT_AUTO + ", lookup: group %d" + ", after %d" + ", before %d" + ", options %u" + ", dimensions '%s'" + ", for each dimension '%s'" + ", update every %d" + ", calculation '%s'" + ", warning '%s'" + ", critical '%s'" + ", source '%s'" + ", delay up %d" + ", delay down %d" + ", delay max %d" + ", delay_multiplier %f" + ", warn_repeat_every %u" + ", crit_repeat_every %u", + rrdcalctemplate_name(rt), + (rt->context)?string2str(rt->context):"NONE", + (rt->exec)?rrdcalctemplate_exec(rt):"DEFAULT", + (rt->recipient)?rrdcalctemplate_recipient(rt):"DEFAULT", + rt->green, + rt->red, + (int)rt->group, + rt->after, + rt->before, + rt->options, + (rt->dimensions)?rrdcalctemplate_dimensions(rt):"NONE", + (rt->foreach_dimension)?rrdcalctemplate_foreachdim(rt):"NONE", + rt->update_every, + (rt->calculation)?rt->calculation->parsed_as:"NONE", + (rt->warning)?rt->warning->parsed_as:"NONE", + (rt->critical)?rt->critical->parsed_as:"NONE", + rrdcalctemplate_source(rt), + rt->delay_up_duration, + rt->delay_down_duration, + rt->delay_max_duration, + rt->delay_multiplier, + rt->warn_repeat_every, + rt->crit_repeat_every + ); +} + +static void rrdcalctemplate_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalctemplate, void *rrdhost __maybe_unused) { + RRDCALCTEMPLATE *rt = rrdcalctemplate; + rrdcalctemplate_free_internals(rt); +} + +void rrdcalctemplate_index_init(RRDHOST *host) { + if(!host->rrdcalctemplate_root_index) { + host->rrdcalctemplate_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); - if(host->templates == rt) { - host->templates = rt->next; + dictionary_register_insert_callback(host->rrdcalctemplate_root_index, rrdcalctemplate_insert_callback, NULL); + dictionary_register_delete_callback(host->rrdcalctemplate_root_index, rrdcalctemplate_delete_callback, host); } - else { - RRDCALCTEMPLATE *t; - for (t = host->templates; t && t->next != rt; t = t->next ) ; - if(t) { - t->next = rt->next; - rt->next = NULL; - } - else - error("Cannot find RRDCALCTEMPLATE '%s' linked in host '%s'", rt->name, host->hostname); +} + +void rrdcalctemplate_index_destroy(RRDHOST *host) { + dictionary_destroy(host->rrdcalctemplate_root_index); + host->rrdcalctemplate_root_index = NULL; +} + +inline void rrdcalctemplate_delete_all(RRDHOST *host) { + dictionary_flush(host->rrdcalctemplate_root_index); +} + +#define RRDCALCTEMPLATE_MAX_KEY_SIZE 1024 +static size_t rrdcalctemplate_key(char *dst, size_t dst_len, const char *name, const char *family_match) { + return snprintfz(dst, dst_len, "%s/%s", name, (family_match && *family_match)?family_match:"*"); +} + +void rrdcalctemplate_add_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt) { + if(unlikely(!rt->context)) { + error("Health configuration for template '%s' does not have a context", rrdcalctemplate_name(rt)); + return; + } + + if(unlikely(!rt->update_every)) { + error("Health configuration for template '%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalctemplate_name(rt)); + return; } - rrdcalctemplate_free(rt); + if(unlikely(!RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) && !rt->calculation && !rt->warning && !rt->critical)) { + error("Health configuration for template '%s' is useless (no calculation, no warning and no critical evaluation)", rrdcalctemplate_name(rt)); + return; + } + + char key[RRDCALCTEMPLATE_MAX_KEY_SIZE + 1]; + size_t key_len = rrdcalctemplate_key(key, RRDCALCTEMPLATE_MAX_KEY_SIZE, rrdcalctemplate_name(rt), rrdcalctemplate_family_match(rt)); + + bool added = false; + dictionary_set_advanced(host->rrdcalctemplate_root_index, key, (ssize_t)(key_len + 1), rt, sizeof(*rt), &added); + + if(added) + freez(rt); + else { + info("Health configuration template '%s' already exists for host '%s'.", rrdcalctemplate_name(rt), rrdhost_hostname(host)); + rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(rt); + } } diff --git a/database/rrdcalctemplate.h b/database/rrdcalctemplate.h index 51aa33054..6212a42da 100644 --- a/database/rrdcalctemplate.h +++ b/database/rrdcalctemplate.h @@ -9,35 +9,34 @@ // these are to be applied to charts found dynamically // based on their context. struct rrdcalctemplate { - char *name; - uint32_t hash_name; uuid_t config_hash_id; - char *exec; - char *recipient; + STRING *name; - char *classification; - char *component; - char *type; + STRING *exec; + STRING *recipient; - char *context; - uint32_t hash_context; + STRING *classification; + STRING *component; + STRING *type; - char *family_match; + STRING *context; + + STRING *family_match; SIMPLE_PATTERN *family_pattern; - char *plugin_match; + STRING *plugin_match; SIMPLE_PATTERN *plugin_pattern; - char *module_match; + STRING *module_match; SIMPLE_PATTERN *module_pattern; - char *charts_match; + STRING *charts_match; SIMPLE_PATTERN *charts_pattern; - char *source; // the source of this alarm - char *units; // the units of the alarm - char *info; // a short description of the alarm + STRING *source; // the source of this alarm + STRING *units; // the units of the alarm + STRING *info; // a short description of the alarm int update_every; // update frequency for the alarm @@ -48,15 +47,13 @@ struct rrdcalctemplate { // ------------------------------------------------------------------------ // database lookup settings - char *dimensions; // the chart dimensions - char *foreachdim; // the group of dimensions that the lookup will be applied. - SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart. - int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the - // children + STRING *dimensions; // the chart dimensions + STRING *foreach_dimension; // the group of dimensions that the lookup will be applied. + SIMPLE_PATTERN *foreach_dimension_pattern; // used if and only if there is a simple pattern for the chart. RRDR_GROUPING group; // grouping method: average, max, etc. int before; // ending point in time-series int after; // starting point in time-series - uint32_t options; // calculation options + RRDCALC_OPTIONS options; // configuration options // ------------------------------------------------------------------------ // notification delay settings @@ -74,7 +71,7 @@ struct rrdcalctemplate { // ------------------------------------------------------------------------ // Labels settings - char *host_labels; // the label read from an alarm file + STRING *host_labels; // the label read from an alarm file SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels // ------------------------------------------------------------------------ @@ -85,13 +82,47 @@ struct rrdcalctemplate { EVAL_EXPRESSION *critical; struct rrdcalctemplate *next; + struct rrdcalctemplate *prev; }; +#define foreach_rrdcalctemplate_read(host, rt) \ + dfe_start_read((host)->rrdcalctemplate_root_index, rt) + +#define foreach_rrdcalctemplate_done(rt) \ + dfe_done(rt) + +#define rrdcalctemplate_name(rt) string2str((rt)->name) +#define rrdcalctemplate_exec(rt) string2str((rt)->exec) +#define rrdcalctemplate_recipient(rt) string2str((rt)->recipient) +#define rrdcalctemplate_classification(rt) string2str((rt)->classification) +#define rrdcalctemplate_component(rt) string2str((rt)->component) +#define rrdcalctemplate_type(rt) string2str((rt)->type) +#define rrdcalctemplate_family_match(rt) string2str((rt)->family_match) +#define rrdcalctemplate_plugin_match(rt) string2str((rt)->plugin_match) +#define rrdcalctemplate_module_match(rt) string2str((rt)->module_match) +#define rrdcalctemplate_charts_match(rt) string2str((rt)->charts_match) +#define rrdcalctemplate_units(rt) string2str((rt)->units) +#define rrdcalctemplate_info(rt) string2str((rt)->info) +#define rrdcalctemplate_source(rt) string2str((rt)->source) +#define rrdcalctemplate_dimensions(rt) string2str((rt)->dimensions) +#define rrdcalctemplate_foreachdim(rt) string2str((rt)->foreach_dimension) +#define rrdcalctemplate_host_labels(rt) string2str((rt)->host_labels) + #define RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) ((rt)->after) -extern void rrdcalctemplate_link_matching(RRDSET *st); +void rrdcalctemplate_link_matching_templates_to_rrdset(RRDSET *st); + +void rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(RRDCALCTEMPLATE *rt); +void rrdcalctemplate_delete_all(RRDHOST *host); +void rrdcalctemplate_add_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt); + +void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host); + +bool rrdcalctemplate_check_rrdset_conditions(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host); +void rrdcalctemplate_check_rrddim_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDDIM *rd, RRDHOST *host); + + +void rrdcalctemplate_index_init(RRDHOST *host); +void rrdcalctemplate_index_destroy(RRDHOST *host); -extern void rrdcalctemplate_free(RRDCALCTEMPLATE *rt); -extern void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt); -extern void rrdcalctemplate_create_alarms(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st); #endif //NETDATA_RRDCALCTEMPLATE_H diff --git a/database/rrdcontext.c b/database/rrdcontext.c index 24884dbc0..cfa8af3e0 100644 --- a/database/rrdcontext.c +++ b/database/rrdcontext.c @@ -4,17 +4,30 @@ #include "sqlite/sqlite_context.h" #include "aclk/schema-wrappers/context.h" #include "aclk/aclk_contexts_api.h" -#include "aclk/aclk_api.h" - -int rrdcontext_enabled = CONFIG_BOOLEAN_YES; +#include "aclk/aclk.h" +#include "storage_engine.h" #define MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST 5000 #define FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS 120 -#define RRDCONTEXT_WORKER_THREAD_HEARTBEAT_SECS 1 +#define RRDCONTEXT_WORKER_THREAD_HEARTBEAT_USEC (1000 * USEC_PER_MS) #define RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY 10 -// #define LOG_TRANSITIONS 1 -// #define LOG_RRDINSTANCES 1 +#define LOG_TRANSITIONS false + +#define WORKER_JOB_HOSTS 1 +#define WORKER_JOB_CHECK 2 +#define WORKER_JOB_SEND 3 +#define WORKER_JOB_DEQUEUE 4 +#define WORKER_JOB_RETENTION 5 +#define WORKER_JOB_QUEUED 6 +#define WORKER_JOB_CLEANUP 7 +#define WORKER_JOB_CLEANUP_DELETE 8 +#define WORKER_JOB_PP_METRIC 9 // post-processing metrics +#define WORKER_JOB_PP_INSTANCE 10 // post-processing instances +#define WORKER_JOB_PP_CONTEXT 11 // post-processing contexts +#define WORKER_JOB_HUB_QUEUE_SIZE 12 +#define WORKER_JOB_PP_QUEUE_SIZE 13 + typedef enum { RRD_FLAG_NONE = 0, @@ -24,46 +37,36 @@ typedef enum { RRD_FLAG_ARCHIVED = (1 << 3), // this object is not currently being collected RRD_FLAG_OWN_LABELS = (1 << 4), // this instance has its own labels - not linked to an RRDSET RRD_FLAG_LIVE_RETENTION = (1 << 5), // we have got live retention from the database - RRD_FLAG_QUEUED = (1 << 6), // this context is currently queued to be dispatched to hub - RRD_FLAG_DONT_PROCESS = (1 << 7), // don't process updates for this object + RRD_FLAG_QUEUED_FOR_HUB = (1 << 6), // this context is currently queued to be dispatched to hub + RRD_FLAG_QUEUED_FOR_PP = (1 << 7), // this context is currently queued to be post-processed RRD_FLAG_HIDDEN = (1 << 8), // don't expose this to the hub or the API + RRD_FLAG_UPDATE_REASON_TRIGGERED = (1 << 9), // the update was triggered by the child object RRD_FLAG_UPDATE_REASON_LOAD_SQL = (1 << 10), // this object has just been loaded from SQL RRD_FLAG_UPDATE_REASON_NEW_OBJECT = (1 << 11), // this object has just been created RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT = (1 << 12), // we received an update on this object RRD_FLAG_UPDATE_REASON_CHANGED_LINKING = (1 << 13), // an instance or a metric switched RRDSET or RRDDIM - RRD_FLAG_UPDATE_REASON_CHANGED_UUID = (1 << 14), // an instance or a metric changed UUID - RRD_FLAG_UPDATE_REASON_CHANGED_NAME = (1 << 15), // an instance or a metric changed name - RRD_FLAG_UPDATE_REASON_CHANGED_UNITS = (1 << 16), // this context or instance changed units - RRD_FLAG_UPDATE_REASON_CHANGED_TITLE = (1 << 17), // this context or instance changed title - RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY = (1 << 18), // the context or the instance changed family - RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE = (1 << 19), // this context or instance changed chart type - RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY = (1 << 20), // this context or instance changed its priority - RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY = (1 << 21), // the instance or the metric changed update frequency - RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 22), // this object has not retention - RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 23), // this object changed its oldest time in the db - RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 24), // this object change its latest time in the db - RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 25), // this object has stopped being collected - RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 26), // this object has started being collected - RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 27), // this context belongs to a host that just disconnected - RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 28), // this context changed because of a db rotation - RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 29), // this context is not used anymore - RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS = (1 << 30), // this context is not used anymore + RRD_FLAG_UPDATE_REASON_CHANGED_METADATA = (1 << 14), // this context or instance changed uuid, name, units, title, family, chart type, priority, update every, rrd changed flags + RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 15), // this object has no retention + RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 16), // this object changed its oldest time in the db + RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 17), // this object change its latest time in the db + RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 18), // this object has stopped being collected + RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 19), // this object has started being collected + RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 20), // this context belongs to a host that just disconnected + RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 21), // this context is not used anymore + RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 22), // this context changed because of a db rotation + + // action to perform on an object + RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION = (1 << 30), // this object has to update its retention from the db } RRD_FLAGS; #define RRD_FLAG_ALL_UPDATE_REASONS ( \ - RRD_FLAG_UPDATE_REASON_LOAD_SQL \ + RRD_FLAG_UPDATE_REASON_TRIGGERED \ + |RRD_FLAG_UPDATE_REASON_LOAD_SQL \ |RRD_FLAG_UPDATE_REASON_NEW_OBJECT \ |RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT \ |RRD_FLAG_UPDATE_REASON_CHANGED_LINKING \ - |RRD_FLAG_UPDATE_REASON_CHANGED_UUID \ - |RRD_FLAG_UPDATE_REASON_CHANGED_NAME \ - |RRD_FLAG_UPDATE_REASON_CHANGED_UNITS \ - |RRD_FLAG_UPDATE_REASON_CHANGED_TITLE \ - |RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY \ - |RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE \ - |RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY \ - |RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY \ + |RRD_FLAG_UPDATE_REASON_CHANGED_METADATA \ |RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ |RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T \ |RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T \ @@ -72,60 +75,113 @@ typedef enum { |RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ |RRD_FLAG_UPDATE_REASON_DB_ROTATION \ |RRD_FLAG_UPDATE_REASON_UNUSED \ - |RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS \ ) #define RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS ( \ RRD_FLAG_ARCHIVED \ - |RRD_FLAG_DONT_PROCESS \ |RRD_FLAG_HIDDEN \ |RRD_FLAG_ALL_UPDATE_REASONS \ ) +#define RRD_FLAGS_REQUIRED_FOR_DELETIONS ( \ + RRD_FLAG_DELETED \ + |RRD_FLAG_LIVE_RETENTION \ +) + #define RRD_FLAGS_PREVENTING_DELETIONS ( \ - RRD_FLAG_QUEUED \ + RRD_FLAG_QUEUED_FOR_HUB \ |RRD_FLAG_COLLECTED \ - |RRD_FLAG_UPDATE_REASON_LOAD_SQL \ - |RRD_FLAG_UPDATE_REASON_NEW_OBJECT \ - |RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT \ - |RRD_FLAG_UPDATE_REASON_CHANGED_LINKING \ + |RRD_FLAG_QUEUED_FOR_PP \ ) -#define rrd_flag_set_updated(obj, reason) (obj)->flags |= (RRD_FLAG_UPDATED | (reason)) -#define rrd_flag_unset_updated(obj) (obj)->flags &= ~(RRD_FLAG_UPDATED | RRD_FLAG_ALL_UPDATE_REASONS) - -#define rrd_flag_set_collected(obj) do { \ - if(likely( !((obj)->flags & RRD_FLAG_COLLECTED))) \ - (obj)->flags |= (RRD_FLAG_COLLECTED | RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED | RRD_FLAG_UPDATED); \ - if(likely( ((obj)->flags & (RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED)))) \ - (obj)->flags &= ~(RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED); \ - if(unlikely(((obj)->flags & (RRD_FLAG_DELETED | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION)))) \ - (obj)->flags &= ~(RRD_FLAG_DELETED | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); \ - if(unlikely(((obj)->flags & RRD_FLAG_DONT_PROCESS))) \ - (obj)->flags &= ~RRD_FLAG_DONT_PROCESS; \ -} while(0) - -#define rrd_flag_set_archived(obj) do { \ - if(likely( !((obj)->flags & RRD_FLAG_ARCHIVED))) \ - (obj)->flags |= (RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED | RRD_FLAG_UPDATED); \ - if(likely( ((obj)->flags & (RRD_FLAG_COLLECTED | RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED)))) \ - (obj)->flags &= ~(RRD_FLAG_COLLECTED | RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED); \ - if(unlikely(((obj)->flags & (RRD_FLAG_DELETED | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION)))) \ - (obj)->flags &= ~(RRD_FLAG_DELETED | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); \ -} while(0) - -#define rrd_flag_set_deleted(obj, reason) do { \ - if(likely( !((obj)->flags & RRD_FLAG_DELETED))) \ - (obj)->flags |= (RRD_FLAG_DELETED | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION | RRD_FLAG_UPDATED | (reason)); \ - if(unlikely(((obj)->flags & RRD_FLAG_ARCHIVED))) \ - (obj)->flags &= ~RRD_FLAG_ARCHIVED; \ - if(likely( ((obj)->flags & RRD_FLAG_COLLECTED))) \ - (obj)->flags &= ~RRD_FLAG_COLLECTED; \ -} while(0) - - -#define rrd_flag_is_collected(obj) ((obj)->flags & RRD_FLAG_COLLECTED) -#define rrd_flag_is_archived(obj) ((obj)->flags & RRD_FLAG_ARCHIVED) +// get all the flags of an object +#define rrd_flags_get(obj) __atomic_load_n(&((obj)->flags), __ATOMIC_SEQ_CST) + +// check if ANY of the given flags (bits) is set +#define rrd_flag_check(obj, flag) (rrd_flags_get(obj) & (flag)) + +// check if ALL the given flags (bits) are set +#define rrd_flag_check_all(obj, flag) (rrd_flag_check(obj, flag) == (flag)) + +// set one or more flags (bits) +#define rrd_flag_set(obj, flag) __atomic_or_fetch(&((obj)->flags), flag, __ATOMIC_SEQ_CST) + +// clear one or more flags (bits) +#define rrd_flag_clear(obj, flag) __atomic_and_fetch(&((obj)->flags), ~(flag), __ATOMIC_SEQ_CST) + +// replace the flags of an object, with the supplied ones +#define rrd_flags_replace(obj, all_flags) __atomic_store_n(&((obj)->flags), all_flags, __ATOMIC_SEQ_CST) + +static inline void +rrd_flag_add_remove_atomic(RRD_FLAGS *flags, RRD_FLAGS check, RRD_FLAGS conditionally_add, RRD_FLAGS always_remove) { + RRD_FLAGS expected, desired; + do { + expected = *flags; + + desired = expected; + desired &= ~(always_remove); + + if(!(expected & check)) + desired |= (check | conditionally_add); + + } while(!__atomic_compare_exchange_n(flags, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); +} + +#define rrd_flag_set_collected(obj) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_COLLECTED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED | RRD_FLAG_UPDATED \ + \ + /* always remove these flags */ \ + , RRD_FLAG_ARCHIVED \ + | RRD_FLAG_DELETED \ + | RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED \ + | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + | RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ + ) + +#define rrd_flag_set_archived(obj) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_ARCHIVED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED | RRD_FLAG_UPDATED \ + \ + /* always remove these flags */ \ + , RRD_FLAG_COLLECTED \ + | RRD_FLAG_DELETED \ + | RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED \ + | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + ) + +#define rrd_flag_set_deleted(obj, reason) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_DELETED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_ZERO_RETENTION | RRD_FLAG_UPDATED | (reason) \ + \ + /* always remove these flags */ \ + , RRD_FLAG_ARCHIVED \ + | RRD_FLAG_COLLECTED \ + ) + +#define rrd_flag_is_collected(obj) rrd_flag_check(obj, RRD_FLAG_COLLECTED) +#define rrd_flag_is_archived(obj) rrd_flag_check(obj, RRD_FLAG_ARCHIVED) +#define rrd_flag_is_deleted(obj) rrd_flag_check(obj, RRD_FLAG_DELETED) +#define rrd_flag_is_updated(obj) rrd_flag_check(obj, RRD_FLAG_UPDATED) + +// mark an object as updated, providing reasons (additional bits) +#define rrd_flag_set_updated(obj, reason) rrd_flag_set(obj, RRD_FLAG_UPDATED | (reason)) + +// clear an object as being updated, clearing also all the reasons +#define rrd_flag_unset_updated(obj) rrd_flag_clear(obj, RRD_FLAG_UPDATED | RRD_FLAG_ALL_UPDATE_REASONS) + static struct rrdcontext_reason { RRD_FLAGS flag; @@ -133,32 +189,26 @@ static struct rrdcontext_reason { usec_t delay_ut; } rrdcontext_reasons[] = { // context related - { RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_TITLE, "changed title", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_UNITS, "changed units", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY, "changed family", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY, "changed priority", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE, "changed chart type", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 0 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 0 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_METADATA, "changed metadata", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC }, // not context related - { RRD_FLAG_UPDATE_REASON_CHANGED_UUID, "changed uuid", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY, "changed updated every",60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_NAME, "changed name", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 30 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 60 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS, "changed flags", 60 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION, "updated retention", 65 * USEC_PER_SEC }, // terminator - { 0, NULL, 0 }, + {0, NULL, 0 }, }; @@ -175,8 +225,6 @@ typedef struct rrdmetric { RRD_FLAGS flags; struct rrdinstance *ri; - - usec_t created_ut; // the time this object was created } RRDMETRIC; typedef struct rrdinstance { @@ -197,10 +245,16 @@ typedef struct rrdinstance { int update_every; // data collection frequency RRDSET *rrdset; // pointer to RRDSET when collected, or NULL - DICTIONARY *rrdlabels; // linked to RRDSET->state->chart_labels or own version + DICTIONARY *rrdlabels; // linked to RRDSET->chart_labels or own version struct rrdcontext *rc; DICTIONARY *rrdmetrics; + + struct { + uint32_t collected_metrics_count; // a temporary variable to detect BEGIN/END without SET + // don't use it for other purposes + // it goes up and then resets to zero, on every iteration + } internal; } RRDINSTANCE; typedef struct rrdcontext { @@ -223,11 +277,19 @@ typedef struct rrdcontext { RRDHOST *rrdhost; struct { + RRD_FLAGS queued_flags; // the last flags that triggered the post-processing + usec_t queued_ut; // the last time this was queued + usec_t dequeued_ut; // the last time we sent (or deduplicated) this context + size_t executions; // how many times this context has been processed + } pp; + + struct { RRD_FLAGS queued_flags; // the last flags that triggered the queueing usec_t queued_ut; // the last time this was queued usec_t delay_calc_ut; // the last time we calculated the scheduled_dispatched_ut usec_t scheduled_dispatch_ut; // the time it was/is scheduled to be sent - usec_t dequeued_ut; // the last time we sent (or deduped) this context + usec_t dequeued_ut; // the last time we sent (or deduplicated) this context + size_t dispatches; // the number of times this has been dispatched to hub } queue; netdata_mutex_t mutex; @@ -236,28 +298,51 @@ typedef struct rrdcontext { // ---------------------------------------------------------------------------- // helper one-liners for RRDMETRIC +static bool rrdmetric_update_retention(RRDMETRIC *rm); + static inline RRDMETRIC *rrdmetric_acquired_value(RRDMETRIC_ACQUIRED *rma) { return dictionary_acquired_item_value((DICTIONARY_ITEM *)rma); } +static inline RRDMETRIC_ACQUIRED *rrdmetric_acquired_dup(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return (RRDMETRIC_ACQUIRED *)dictionary_acquired_item_dup(rm->ri->rrdmetrics, (DICTIONARY_ITEM *)rma); +} + static inline void rrdmetric_release(RRDMETRIC_ACQUIRED *rma) { RRDMETRIC *rm = rrdmetric_acquired_value(rma); dictionary_acquired_item_release(rm->ri->rrdmetrics, (DICTIONARY_ITEM *)rma); } -// ---------------------------------------------------------------------------- -// helper one-liners for RRDINSTANCE +const char *rrdmetric_acquired_id(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string2str(rm->id); +} + +const char *rrdmetric_acquired_name(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string2str(rm->name); +} -static inline RRDINSTANCE_ACQUIRED *rrdinstance_dup(RRDINSTANCE_ACQUIRED *ria) { - return (RRDINSTANCE_ACQUIRED *)dictionary_acquired_item_dup((DICTIONARY_ITEM *)ria); +NETDATA_DOUBLE rrdmetric_acquired_last_stored_value(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + if(rm->rrddim) + return rm->rrddim->last_stored_value; + + return NAN; } +// ---------------------------------------------------------------------------- +// helper one-liners for RRDINSTANCE + static inline RRDINSTANCE *rrdinstance_acquired_value(RRDINSTANCE_ACQUIRED *ria) { return dictionary_acquired_item_value((DICTIONARY_ITEM *)ria); } -static inline const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria) { - return dictionary_acquired_item_name((DICTIONARY_ITEM *)ria); +static inline RRDINSTANCE_ACQUIRED *rrdinstance_acquired_dup(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return (RRDINSTANCE_ACQUIRED *)dictionary_acquired_item_dup(ri->rc->rrdinstances, (DICTIONARY_ITEM *)ria); } static inline void rrdinstance_release(RRDINSTANCE_ACQUIRED *ria) { @@ -265,23 +350,42 @@ static inline void rrdinstance_release(RRDINSTANCE_ACQUIRED *ria) { dictionary_acquired_item_release(ri->rc->rrdinstances, (DICTIONARY_ITEM *)ria); } -// ---------------------------------------------------------------------------- -// helper one-liners for RRDCONTEXT +const char *rrdinstance_acquired_id(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->id); +} + +const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->name); +} -static inline RRDCONTEXT_ACQUIRED *rrdcontext_dup(RRDCONTEXT_ACQUIRED *rca) { - return (RRDCONTEXT_ACQUIRED *)dictionary_acquired_item_dup((DICTIONARY_ITEM *)rca); +DICTIONARY *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->rrdlabels; } -static inline const char *rrdcontext_acquired_name(RRDCONTEXT_ACQUIRED *rca) { - return dictionary_acquired_item_name((DICTIONARY_ITEM *)rca); +DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + if(!ri->rrdset) return NULL; + return ri->rrdset->functions_view; } +// ---------------------------------------------------------------------------- +// helper one-liners for RRDCONTEXT + static inline RRDCONTEXT *rrdcontext_acquired_value(RRDCONTEXT_ACQUIRED *rca) { return dictionary_acquired_item_value((DICTIONARY_ITEM *)rca); } -static inline RRDCONTEXT_ACQUIRED *rrdcontext_acquire(RRDHOST *host, const char *name) { - return (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)host->rrdctx, name); +const char *rrdcontext_acquired_id(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return string2str(rc->id); +} + +static inline RRDCONTEXT_ACQUIRED *rrdcontext_acquired_dup(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return (RRDCONTEXT_ACQUIRED *)dictionary_acquired_item_dup((DICTIONARY *)rc->rrdhost->rrdctx, (DICTIONARY_ITEM *)rca); } static inline void rrdcontext_release(RRDCONTEXT_ACQUIRED *rca) { @@ -289,29 +393,40 @@ static inline void rrdcontext_release(RRDCONTEXT_ACQUIRED *rca) { dictionary_acquired_item_release((DICTIONARY *)rc->rrdhost->rrdctx, (DICTIONARY_ITEM *)rca); } -static void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, int job_id); -static void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, int job_id); +static void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs); +static void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, bool worker_jobs); #define rrdcontext_version_hash(host) rrdcontext_version_hash_with_callback(host, NULL, false, NULL) static uint64_t rrdcontext_version_hash_with_callback(RRDHOST *host, void (*callback)(RRDCONTEXT *, bool, void *), bool snapshot, void *bundle); -void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc); +static void rrdcontext_garbage_collect_single_host(RRDHOST *host, bool worker_jobs); +static void rrdcontext_garbage_collect_for_all_hosts(void); #define rrdcontext_lock(rc) netdata_mutex_lock(&((rc)->mutex)) #define rrdcontext_unlock(rc) netdata_mutex_unlock(&((rc)->mutex)) // ---------------------------------------------------------------------------- -// Updates triggers +// Forward definitions + +static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc); +static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused); +static void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused); -static void rrdmetric_trigger_updates(RRDMETRIC *rm, bool force, bool escalate); -static void rrdinstance_trigger_updates(RRDINSTANCE *ri, bool force, bool escalate); -static void rrdcontext_trigger_updates(RRDCONTEXT *rc, bool force); +static void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc); + +static void rrdcontext_dequeue_from_post_processing(RRDCONTEXT *rc); +static void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function, RRD_FLAGS flags); +static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAGS reason, bool worker_jobs); + +static void rrdmetric_trigger_updates(RRDMETRIC *rm, const char *function); +static void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function); +static void rrdcontext_trigger_updates(RRDCONTEXT *rc, const char *function); // ---------------------------------------------------------------------------- // visualizing flags static void rrd_flags_to_buffer(RRD_FLAGS flags, BUFFER *wb) { - if(flags & RRD_FLAG_QUEUED) + if(flags & RRD_FLAG_QUEUED_FOR_HUB) buffer_strcat(wb, "QUEUED "); if(flags & RRD_FLAG_DELETED) @@ -332,11 +447,11 @@ static void rrd_flags_to_buffer(RRD_FLAGS flags, BUFFER *wb) { if(flags & RRD_FLAG_LIVE_RETENTION) buffer_strcat(wb, "LIVE_RETENTION "); - if(flags & RRD_FLAG_DONT_PROCESS) - buffer_strcat(wb, "DONT_PROCESS "); - if(flags & RRD_FLAG_HIDDEN) buffer_strcat(wb, "HIDDEN "); + + if(flags & RRD_FLAG_QUEUED_FOR_PP) + buffer_strcat(wb, "PENDING_UPDATES "); } static void rrd_reasons_to_buffer(RRD_FLAGS flags, BUFFER *wb) { @@ -351,117 +466,10 @@ static void rrd_reasons_to_buffer(RRD_FLAGS flags, BUFFER *wb) { } // ---------------------------------------------------------------------------- -// logging of all data collected - -#ifdef LOG_TRANSITIONS -static void log_transition(STRING *metric, STRING *instance, STRING *context, RRD_FLAGS flags, const char *msg) { - BUFFER *wb = buffer_create(1000); - - buffer_sprintf(wb, "RRD TRANSITION: context '%s'", string2str(context)); - - if(instance) - buffer_sprintf(wb, ", instance '%s'", string2str(instance)); - - if(metric) - buffer_sprintf(wb, ", metric '%s'", string2str(metric)); - - buffer_sprintf(wb, ", triggered by %s: ", msg); - - rrd_flags_to_buffer(flags, wb); - - buffer_strcat(wb, ", reasons: "); - - rrd_reasons_to_buffer(flags, wb); - - internal_error(true, "%s", buffer_tostring(wb)); - buffer_free(wb); -} -#else -#define log_transition(metric, instance, context, flags, msg) debug_dummy() -#endif - -#ifdef LOG_RRDINSTANCES -static void rrdinstance_log(RRDINSTANCE *ri, const char *msg) { - char uuid[UUID_STR_LEN]; - - uuid_unparse(ri->uuid, uuid); - - BUFFER *wb = buffer_create(1000); - - buffer_sprintf(wb, - "RRDINSTANCE: %s id '%s' (host '%s'), uuid '%s', name '%s', context '%s', title '%s', units '%s', family '%s', priority %zu, chart type '%s', update every %d, rrdset '%s', flags %s%s%s%s%s%s%s%s, first_time_t %ld, last_time_t %ld", - msg, - string2str(ri->id), - ri->rc->rrdhost->hostname, - uuid, - string2str(ri->name), - string2str(ri->rc->id), - string2str(ri->title), - string2str(ri->units), - string2str(ri->family), - ri->priority, - rrdset_type_name(ri->chart_type), - ri->update_every, - ri->rrdset?ri->rrdset->id:"NONE", - ri->flags & RRD_FLAG_DELETED ?"DELETED ":"", - ri->flags & RRD_FLAG_UPDATED ?"UPDATED ":"", - rrd_flag_is_collected(ri) ?"COLLECTED ":"", - rrd_flag_is_archived(ri) ?"ARCHIVED ":"", - ri->flags & RRD_FLAG_OWNLABELS ?"OWNLABELS ":"", - ri->flags & RRD_FLAG_LIVE_RETENTION ?"LIVE ":"", - ri->flags & RRD_FLAG_QUEUED ?"QUEUED ":"", - ri->flags & RRD_FLAG_DONT_TRIGGER ?"BLOCKED ":"", - ri->first_time_t, - ri->last_time_t - ); - - buffer_strcat(wb, ", update reasons: { "); - for(int i = 0, added = 0; rrdcontext_reasons[i].name ;i++) - if(ri->flags & rrdcontext_reasons[i].flag) { - if(added) buffer_strcat(wb, ", "); - buffer_strcat(wb, rrdcontext_reasons[i].name); - added++; - } - buffer_strcat(wb, " }"); - - buffer_strcat(wb, ", labels: { "); - if(ri->rrdlabels) { - if(!rrdlabels_to_buffer(ri->rrdlabels, wb, "", "=", "'", ", ", NULL, NULL, NULL, NULL)) - buffer_strcat(wb, "EMPTY }"); - else - buffer_strcat(wb, " }"); - } - else - buffer_strcat(wb, "NONE }"); - - buffer_strcat(wb, ", metrics: { "); - if(ri->rrdmetrics) { - RRDMETRIC *v; - int i = 0; - dfe_start_read((DICTIONARY *)ri->rrdmetrics, v) { - buffer_sprintf(wb, "%s%s", i?",":"", v_name); - i++; - } - dfe_done(v); - - if(!i) - buffer_strcat(wb, "EMPTY }"); - else - buffer_strcat(wb, " }"); - } - else - buffer_strcat(wb, "NONE }"); - - internal_error(true, "%s", buffer_tostring(wb)); - buffer_free(wb); -} -#else -#define rrdinstance_log(ir, msg) debug_dummy() -#endif - -// ---------------------------------------------------------------------------- // RRDMETRIC +// free the contents of RRDMETRIC. +// RRDMETRIC itself is managed by DICTIONARY - no need to free it here. static void rrdmetric_free(RRDMETRIC *rm) { string_freez(rm->id); string_freez(rm->name); @@ -471,77 +479,24 @@ static void rrdmetric_free(RRDMETRIC *rm) { rm->ri = NULL; } -static void rrdmetric_update_retention(RRDMETRIC *rm) { - time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; - - if(rm->rrddim) { - min_first_time_t = rrddim_first_entry_t(rm->rrddim); - max_last_time_t = rrddim_last_entry_t(rm->rrddim); - } -#ifdef ENABLE_DBENGINE - else { - RRDHOST *rrdhost = rm->ri->rc->rrdhost; - for (int tier = 0; tier < storage_tiers; tier++) { - if(!rrdhost->storage_instance[tier]) continue; - - time_t first_time_t, last_time_t; - if (rrdeng_metric_retention_by_uuid(rrdhost->storage_instance[tier], &rm->uuid, &first_time_t, &last_time_t) == 0) { - if (first_time_t < min_first_time_t) - min_first_time_t = first_time_t; - - if (last_time_t > max_last_time_t) - max_last_time_t = last_time_t; - } - } - } -#endif - - if(min_first_time_t == LONG_MAX) - min_first_time_t = 0; - - if(min_first_time_t > max_last_time_t) { - internal_error(true, "RRDMETRIC: retention of '%s' is flipped", string2str(rm->id)); - time_t tmp = min_first_time_t; - min_first_time_t = max_last_time_t; - max_last_time_t = tmp; - } - - // check if retention changed - - if (min_first_time_t != rm->first_time_t) { - rm->first_time_t = min_first_time_t; - rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } - - if (max_last_time_t != rm->last_time_t) { - rm->last_time_t = max_last_time_t; - rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } - - if(unlikely(!rm->first_time_t && !rm->last_time_t)) - rrd_flag_set_deleted(rm, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); - - rm->flags |= RRD_FLAG_LIVE_RETENTION; -} - // called when this rrdmetric is inserted to the rrdmetrics dictionary of a rrdinstance -static void rrdmetric_insert_callback(const char *id __maybe_unused, void *value, void *data) { +// the constructor of the rrdmetric object +static void rrdmetric_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance) { RRDMETRIC *rm = value; // link it to its parent - rm->ri = data; + rm->ri = rrdinstance; // remove flags that we need to figure out at runtime - rm->flags = rm->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; - - rm->created_ut = now_realtime_usec(); + rm->flags = rm->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics // signal the react callback to do the job rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); } // called when this rrdmetric is deleted from the rrdmetrics dictionary of a rrdinstance -static void rrdmetric_delete_callback(const char *id __maybe_unused, void *value, void *data __maybe_unused) { +// the destructor of the rrdmetric object +static void rrdmetric_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance __maybe_unused) { RRDMETRIC *rm = value; internal_error(rm->rrddim, "RRDMETRIC: '%s' is freed but there is a RRDDIM linked to it.", string2str(rm->id)); @@ -551,21 +506,49 @@ static void rrdmetric_delete_callback(const char *id __maybe_unused, void *value } // called when the same rrdmetric is inserted again to the rrdmetrics dictionary of a rrdinstance -static void rrdmetric_conflict_callback(const char *id __maybe_unused, void *oldv, void *newv, void *data __maybe_unused) { - RRDMETRIC *rm = oldv; - RRDMETRIC *rm_new = newv; +// while this is called, the dictionary is write locked, but there may be other users of the object +static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = old_value; + RRDMETRIC *rm_new = new_value; internal_error(rm->id != rm_new->id, "RRDMETRIC: '%s' cannot change id to '%s'", string2str(rm->id), string2str(rm_new->id)); if(uuid_compare(rm->uuid, rm_new->uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; uuid_unparse(rm->uuid, uuid1); uuid_unparse(rm_new->uuid, uuid2); - internal_error(true, "RRDMETRIC: '%s' of instance '%s' changed uuid from '%s' to '%s'", string2str(rm->id), string2str(rm->ri->id), uuid1, uuid2); + + time_t old_first_time_t = 0; + time_t old_last_time_t = 0; + if(rrdmetric_update_retention(rm)) { + old_first_time_t = rm->first_time_t; + old_last_time_t = rm->last_time_t; + } + + uuid_copy(rm->uuid, rm_new->uuid); + + time_t new_first_time_t = 0; + time_t new_last_time_t = 0; + if(rrdmetric_update_retention(rm)) { + new_first_time_t = rm->first_time_t; + new_last_time_t = rm->last_time_t; + } + + internal_error(true, + "RRDMETRIC: '%s' of instance '%s' of host '%s' changed UUID from '%s' (retention %ld to %ld, %ld secs) to '%s' (retention %ld to %ld, %ld secs)" + , string2str(rm->id) + , string2str(rm->ri->id) + , rrdhost_hostname(rm->ri->rc->rrdhost) + , uuid1, old_first_time_t, old_last_time_t, old_last_time_t - old_first_time_t + , uuid2, new_first_time_t, new_last_time_t, new_last_time_t - new_first_time_t + ); +#else uuid_copy(rm->uuid, rm_new->uuid); - rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_UUID); +#endif + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rm->rrddim && rm_new->rrddim && rm->rrddim != rm_new->rrddim) { @@ -573,12 +556,14 @@ static void rrdmetric_conflict_callback(const char *id __maybe_unused, void *old rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); } +#ifdef NETDATA_INTERNAL_CHECKS if(rm->rrddim && uuid_compare(rm->uuid, rm->rrddim->metric_uuid) != 0) { char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; uuid_unparse(rm->uuid, uuid1); uuid_unparse(rm_new->uuid, uuid2); - internal_error(true, "RRDMETRIC: '%s' is linked to RRDDIM '%s' but they have different UUIDs. RRDMETRIC has '%s', RRDDIM has '%s'", string2str(rm->id), rm->rrddim->id, uuid1, uuid2); + internal_error(true, "RRDMETRIC: '%s' is linked to RRDDIM '%s' but they have different UUIDs. RRDMETRIC has '%s', RRDDIM has '%s'", string2str(rm->id), rrddim_id(rm->rrddim), uuid1, uuid2); } +#endif if(rm->rrddim != rm_new->rrddim) rm->rrddim = rm_new->rrddim; @@ -587,7 +572,7 @@ static void rrdmetric_conflict_callback(const char *id __maybe_unused, void *old STRING *old = rm->name; rm->name = string_dup(rm_new->name); string_freez(old); - rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_NAME); + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(!rm->first_time_t || (rm_new->first_time_t && rm_new->first_time_t < rm->first_time_t)) { @@ -600,98 +585,74 @@ static void rrdmetric_conflict_callback(const char *id __maybe_unused, void *old rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); } - rm->flags |= (rm_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); + rrd_flag_set(rm, rm_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no needs for atomics on rm_new if(rrd_flag_is_collected(rm) && rrd_flag_is_archived(rm)) rrd_flag_set_collected(rm); - if(rm->flags & RRD_FLAG_UPDATED) - rm->flags |= RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT; + if(rrd_flag_check(rm, RRD_FLAG_UPDATED)) + rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); rrdmetric_free(rm_new); // the react callback will continue from here + return rrd_flag_is_updated(rm); } -static void rrdmetric_react_callback(const char *id __maybe_unused, void *value, void *data __maybe_unused) { +// this is called after the insert or the conflict callbacks, +// but the dictionary is now unlocked +static void rrdmetric_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance __maybe_unused) { RRDMETRIC *rm = value; - - rrdmetric_trigger_updates(rm, false, true); + rrdmetric_trigger_updates(rm, __FUNCTION__ ); } -static void rrdmetrics_create(RRDINSTANCE *ri) { +static void rrdmetrics_create_in_rrdinstance(RRDINSTANCE *ri) { if(unlikely(!ri)) return; if(likely(ri->rrdmetrics)) return; - ri->rrdmetrics = dictionary_create(DICTIONARY_FLAG_DONT_OVERWRITE_VALUE); - dictionary_register_insert_callback(ri->rrdmetrics, rrdmetric_insert_callback, (void *)ri); - dictionary_register_delete_callback(ri->rrdmetrics, rrdmetric_delete_callback, (void *)ri); - dictionary_register_conflict_callback(ri->rrdmetrics, rrdmetric_conflict_callback, (void *)ri); - dictionary_register_react_callback(ri->rrdmetrics, rrdmetric_react_callback, (void *)ri); + ri->rrdmetrics = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(ri->rrdmetrics, rrdmetric_insert_callback, ri); + dictionary_register_delete_callback(ri->rrdmetrics, rrdmetric_delete_callback, ri); + dictionary_register_conflict_callback(ri->rrdmetrics, rrdmetric_conflict_callback, ri); + dictionary_register_react_callback(ri->rrdmetrics, rrdmetric_react_callback, ri); } -static void rrdmetrics_destroy(RRDINSTANCE *ri) { +static void rrdmetrics_destroy_from_rrdinstance(RRDINSTANCE *ri) { if(unlikely(!ri || !ri->rrdmetrics)) return; dictionary_destroy(ri->rrdmetrics); ri->rrdmetrics = NULL; } -static inline bool rrdmetric_should_be_deleted(RRDMETRIC *rm) { - if(likely(!(rm->flags & RRD_FLAG_DELETED))) - return false; - - if(likely(!(rm->flags & RRD_FLAG_LIVE_RETENTION))) - return false; - - if(unlikely(rm->flags & RRD_FLAGS_PREVENTING_DELETIONS)) - return false; - - if(likely(rm->rrddim)) - return false; - - if((now_realtime_usec() - rm->created_ut) < 600 * USEC_PER_SEC) - return false; - - rrdmetric_update_retention(rm); - if(rm->first_time_t || rm->last_time_t) - return false; - - return true; -} - -static void rrdmetric_trigger_updates(RRDMETRIC *rm, bool force, bool escalate) { - if(likely(!force && !(rm->flags & RRD_FLAG_UPDATED))) return; - - if(unlikely(rrd_flag_is_collected(rm) && !rm->rrddim)) - rrd_flag_set_archived(rm); - - if(unlikely((rm->flags & RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) && rrd_flag_is_collected(rm))) - rrd_flag_set_archived(rm); - - rrdmetric_update_retention(rm); +// trigger post-processing of the rrdmetric, escalating changes to the rrdinstance it belongs +static void rrdmetric_trigger_updates(RRDMETRIC *rm, const char *function) { + if(unlikely(rrd_flag_is_collected(rm)) && (!rm->rrddim || rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD))) + rrd_flag_set_archived(rm); - if(unlikely(escalate && rm->flags & RRD_FLAG_UPDATED && !(rm->ri->flags & RRD_FLAG_DONT_PROCESS))) { - log_transition(rm->id, rm->ri->id, rm->ri->rc->id, rm->flags, "RRDMETRIC"); - rrdinstance_trigger_updates(rm->ri, true, true); + if(rrd_flag_is_updated(rm) || !rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION)) { + rrd_flag_set_updated(rm->ri, RRD_FLAG_UPDATE_REASON_TRIGGERED); + rrdcontext_queue_for_post_processing(rm->ri->rc, function, rm->flags); } } +// ---------------------------------------------------------------------------- +// RRDMETRIC HOOKS ON RRDDIM + static inline void rrdmetric_from_rrddim(RRDDIM *rd) { if(unlikely(!rd->rrdset)) - fatal("RRDMETRIC: rrddim '%s' does not have a rrdset.", rd->id); + fatal("RRDMETRIC: rrddim '%s' does not have a rrdset.", rrddim_id(rd)); if(unlikely(!rd->rrdset->rrdhost)) - fatal("RRDMETRIC: rrdset '%s' does not have a rrdhost", rd->rrdset->id); + fatal("RRDMETRIC: rrdset '%s' does not have a rrdhost", rrdset_id(rd->rrdset)); if(unlikely(!rd->rrdset->rrdinstance)) - fatal("RRDMETRIC: rrdset '%s' does not have a rrdinstance", rd->rrdset->id); + fatal("RRDMETRIC: rrdset '%s' does not have a rrdinstance", rrdset_id(rd->rrdset)); RRDINSTANCE *ri = rrdinstance_acquired_value(rd->rrdset->rrdinstance); RRDMETRIC trm = { - .id = string_strdupz(rd->id), - .name = string_strdupz(rd->name), - .flags = RRD_FLAG_NONE, + .id = string_dup(rd->id), + .name = string_dup(rd->name), + .flags = RRD_FLAG_NONE, // no need for atomics .rrddim = rd, }; uuid_copy(trm.uuid, rd->metric_uuid); @@ -707,14 +668,18 @@ static inline void rrdmetric_from_rrddim(RRDDIM *rd) { #define rrddim_get_rrdmetric(rd) rrddim_get_rrdmetric_with_trace(rd, __FUNCTION__) static inline RRDMETRIC *rrddim_get_rrdmetric_with_trace(RRDDIM *rd, const char *function) { if(unlikely(!rd->rrdmetric)) { - error("RRDMETRIC: RRDDIM '%s' is not linked to an RRDMETRIC at %s()", rd->id, function); + error("RRDMETRIC: RRDDIM '%s' is not linked to an RRDMETRIC at %s()", rrddim_id(rd), function); return NULL; } RRDMETRIC *rm = rrdmetric_acquired_value(rd->rrdmetric); + if(unlikely(!rm)) { + error("RRDMETRIC: RRDDIM '%s' lost the link to its RRDMETRIC at %s()", rrddim_id(rd), function); + return NULL; + } if(unlikely(rm->rrddim != rd)) - fatal("RRDMETRIC: '%s' is not linked to RRDDIM '%s' at %s()", string2str(rm->id), rd->id, function); + fatal("RRDMETRIC: '%s' is not linked to RRDDIM '%s' at %s()", string2str(rm->id), rrddim_id(rd), function); return rm; } @@ -727,7 +692,7 @@ static inline void rrdmetric_rrddim_is_freed(RRDDIM *rd) { rrd_flag_set_archived(rm); rm->rrddim = NULL; - rrdmetric_trigger_updates(rm, false, true); + rrdmetric_trigger_updates(rm, __FUNCTION__ ); rrdmetric_release(rd->rrdmetric); rd->rrdmetric = NULL; } @@ -736,12 +701,12 @@ static inline void rrdmetric_updated_rrddim_flags(RRDDIM *rd) { RRDMETRIC *rm = rrddim_get_rrdmetric(rd); if(unlikely(!rm)) return; - if(unlikely(rd->flags & (RRDDIM_FLAG_ARCHIVED | RRDDIM_FLAG_OBSOLETE))) { + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED|RRDDIM_FLAG_OBSOLETE))) { if(unlikely(rrd_flag_is_collected(rm))) rrd_flag_set_archived(rm); } - rrdmetric_trigger_updates(rm, false, true); + rrdmetric_trigger_updates(rm, __FUNCTION__ ); } static inline void rrdmetric_collected_rrddim(RRDDIM *rd) { @@ -751,7 +716,10 @@ static inline void rrdmetric_collected_rrddim(RRDDIM *rd) { if(unlikely(!rrd_flag_is_collected(rm))) rrd_flag_set_collected(rm); - rrdmetric_trigger_updates(rm, false, true); + // we use this variable to detect BEGIN/END without SET + rm->ri->internal.collected_metrics_count++; + + rrdmetric_trigger_updates(rm, __FUNCTION__ ); } // ---------------------------------------------------------------------------- @@ -759,10 +727,10 @@ static inline void rrdmetric_collected_rrddim(RRDDIM *rd) { static void rrdinstance_free(RRDINSTANCE *ri) { - if(ri->flags & RRD_FLAG_OWN_LABELS) + if(rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) dictionary_destroy(ri->rrdlabels); - rrdmetrics_destroy(ri); + rrdmetrics_destroy_from_rrdinstance(ri); string_freez(ri->id); string_freez(ri->name); string_freez(ri->title); @@ -780,7 +748,7 @@ static void rrdinstance_free(RRDINSTANCE *ri) { ri->rrdset = NULL; } -static void rrdinstance_insert_callback(const char *id __maybe_unused, void *value, void *data) { +static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext) { static STRING *ml_anomaly_rates_id = NULL; if(unlikely(!ml_anomaly_rates_id)) @@ -789,64 +757,66 @@ static void rrdinstance_insert_callback(const char *id __maybe_unused, void *val RRDINSTANCE *ri = value; // link it to its parent - ri->rc = data; + ri->rc = rrdcontext; - ri->flags = ri->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; + ri->flags = ri->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics if(!ri->name) ri->name = string_dup(ri->id); - if(ri->rrdset && ri->rrdset->state) { - ri->rrdlabels = ri->rrdset->state->chart_labels; - if(ri->flags & RRD_FLAG_OWN_LABELS) - ri->flags &= ~RRD_FLAG_OWN_LABELS; + if(ri->rrdset) { + ri->rrdlabels = ri->rrdset->rrdlabels; + ri->flags &= ~RRD_FLAG_OWN_LABELS; // no need of atomics at the constructor } else { ri->rrdlabels = rrdlabels_create(); - ri->flags |= RRD_FLAG_OWN_LABELS; + ri->flags |= RRD_FLAG_OWN_LABELS; // no need of atomics at the constructor } if(ri->rrdset) { - if(unlikely((ri->rrdset->flags & RRDSET_FLAG_HIDDEN) || (ri->rrdset->state && ri->rrdset->state->is_ar_chart))) - ri->flags |= RRD_FLAG_HIDDEN; + if(unlikely(rrdset_flag_check(ri->rrdset, RRDSET_FLAG_HIDDEN))) + ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor else - ri->flags &= ~RRD_FLAG_HIDDEN; + ri->flags &= ~RRD_FLAG_HIDDEN; // no need of atomics at the constructor } // we need this when loading from SQL if(unlikely(ri->id == ml_anomaly_rates_id)) - ri->flags |= RRD_FLAG_HIDDEN; + ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor - rrdmetrics_create(ri); - rrdinstance_log(ri, "INSERT"); + rrdmetrics_create_in_rrdinstance(ri); // signal the react callback to do the job rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); } -static void rrdinstance_delete_callback(const char *id, void *value, void *data) { - (void)id; - RRDCONTEXT *rc = data; (void)rc; +static void rrdinstance_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext __maybe_unused) { RRDINSTANCE *ri = (RRDINSTANCE *)value; - rrdinstance_log(ri, "DELETE"); - internal_error(ri->rrdset, "RRDINSTANCE: '%s' is freed but there is a RRDSET linked to it.", string2str(ri->id)); rrdinstance_free(ri); } -static void rrdinstance_conflict_callback(const char *id __maybe_unused, void *oldv, void *newv, void *data __maybe_unused) { - RRDINSTANCE *ri = (RRDINSTANCE *)oldv; - RRDINSTANCE *ri_new = (RRDINSTANCE *)newv; +static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = (RRDINSTANCE *)old_value; + RRDINSTANCE *ri_new = (RRDINSTANCE *)new_value; internal_error(ri->id != ri_new->id, "RRDINSTANCE: '%s' cannot change id to '%s'", string2str(ri->id), string2str(ri_new->id)); if(uuid_compare(ri->uuid, ri_new->uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid1); + uuid_unparse(ri_new->uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' of host '%s' changed UUID from '%s' to '%s'", + string2str(ri->id), rrdhost_hostname(ri->rc->rrdhost), uuid1, uuid2); +#endif + uuid_copy(ri->uuid, ri_new->uuid); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UUID); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->rrdset && ri_new->rrdset && ri->rrdset != ri_new->rrdset) { @@ -854,270 +824,156 @@ static void rrdinstance_conflict_callback(const char *id __maybe_unused, void *o rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); } - if(ri->rrdset && ri->rrdset->chart_uuid && uuid_compare(ri->uuid, *ri->rrdset->chart_uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS + if(ri->rrdset && uuid_compare(ri->uuid, ri->rrdset->chart_uuid) != 0) { char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; uuid_unparse(ri->uuid, uuid1); - uuid_unparse(*ri->rrdset->chart_uuid, uuid2); - internal_error(true, "RRDINSTANCE: '%s' is linked to RRDSET '%s' but they have different UUIDs. RRDINSTANCE has '%s', RRDSET has '%s'", string2str(ri->id), ri->rrdset->id, uuid1, uuid2); + uuid_unparse(ri->rrdset->chart_uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' is linked to RRDSET '%s' but they have different UUIDs. RRDINSTANCE has '%s', RRDSET has '%s'", string2str(ri->id), rrdset_id(ri->rrdset), uuid1, uuid2); } +#endif if(ri->name != ri_new->name) { STRING *old = ri->name; ri->name = string_dup(ri_new->name); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_NAME); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->title != ri_new->title) { STRING *old = ri->title; ri->title = string_dup(ri_new->title); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_TITLE); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->units != ri_new->units) { STRING *old = ri->units; ri->units = string_dup(ri_new->units); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UNITS); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->family != ri_new->family) { STRING *old = ri->family; ri->family = string_dup(ri_new->family); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->chart_type != ri_new->chart_type) { ri->chart_type = ri_new->chart_type; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->priority != ri_new->priority) { ri->priority = ri_new->priority; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->update_every != ri_new->update_every) { ri->update_every = ri_new->update_every; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->rrdset != ri_new->rrdset) { ri->rrdset = ri_new->rrdset; - if(ri->rrdset && (ri->flags & RRD_FLAG_OWN_LABELS)) { + if(ri->rrdset && rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { DICTIONARY *old = ri->rrdlabels; - ri->rrdlabels = ri->rrdset->state->chart_labels; - ri->flags &= ~RRD_FLAG_OWN_LABELS; + ri->rrdlabels = ri->rrdset->rrdlabels; + rrd_flag_clear(ri, RRD_FLAG_OWN_LABELS); rrdlabels_destroy(old); } - else if(!ri->rrdset && !(ri->flags & RRD_FLAG_OWN_LABELS)) { + else if(!ri->rrdset && !rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { ri->rrdlabels = rrdlabels_create(); - ri->flags |= RRD_FLAG_OWN_LABELS; + rrd_flag_set(ri, RRD_FLAG_OWN_LABELS); } } if(ri->rrdset) { - if(unlikely((ri->rrdset->flags & RRDSET_FLAG_HIDDEN) || (ri->rrdset->state && ri->rrdset->state->is_ar_chart))) - ri->flags |= RRD_FLAG_HIDDEN; + if(unlikely(rrdset_flag_check(ri->rrdset, RRDSET_FLAG_HIDDEN))) + rrd_flag_set(ri, RRD_FLAG_HIDDEN); else - ri->flags &= ~RRD_FLAG_HIDDEN; + rrd_flag_clear(ri, RRD_FLAG_HIDDEN); } - ri->flags |= (ri_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); + rrd_flag_set(ri, ri_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on ri_new if(rrd_flag_is_collected(ri) && rrd_flag_is_archived(ri)) rrd_flag_set_collected(ri); - if(ri->flags & RRD_FLAG_UPDATED) - ri->flags |= RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT; - - rrdinstance_log(ri, "CONFLICT"); + if(rrd_flag_is_updated(ri)) + rrd_flag_set(ri, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); // free the new one rrdinstance_free(ri_new); // the react callback will continue from here + return rrd_flag_is_updated(ri); } -static void rrdinstance_react_callback(const char *id __maybe_unused, void *value, void *data __maybe_unused) { +static void rrdinstance_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext __maybe_unused) { RRDINSTANCE *ri = value; - rrdinstance_trigger_updates(ri, false, true); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); } -void rrdinstances_create(RRDCONTEXT *rc) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - +void rrdinstances_create_in_rrdcontext(RRDCONTEXT *rc) { if(unlikely(!rc || rc->rrdinstances)) return; - rc->rrdinstances = dictionary_create(DICTIONARY_FLAG_DONT_OVERWRITE_VALUE); - dictionary_register_insert_callback(rc->rrdinstances, rrdinstance_insert_callback, (void *)rc); - dictionary_register_delete_callback(rc->rrdinstances, rrdinstance_delete_callback, (void *)rc); - dictionary_register_conflict_callback(rc->rrdinstances, rrdinstance_conflict_callback, (void *)rc); - dictionary_register_react_callback(rc->rrdinstances, rrdinstance_react_callback, (void *)rc); + rc->rrdinstances = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(rc->rrdinstances, rrdinstance_insert_callback, rc); + dictionary_register_delete_callback(rc->rrdinstances, rrdinstance_delete_callback, rc); + dictionary_register_conflict_callback(rc->rrdinstances, rrdinstance_conflict_callback, rc); + dictionary_register_react_callback(rc->rrdinstances, rrdinstance_react_callback, rc); } -void rrdinstances_destroy(RRDCONTEXT *rc) { +void rrdinstances_destroy_from_rrdcontext(RRDCONTEXT *rc) { if(unlikely(!rc || !rc->rrdinstances)) return; dictionary_destroy(rc->rrdinstances); rc->rrdinstances = NULL; } -static inline bool rrdinstance_should_be_deleted(RRDINSTANCE *ri) { - if(likely(!(ri->flags & RRD_FLAG_DELETED))) - return false; - - if(likely(!(ri->flags & RRD_FLAG_LIVE_RETENTION))) - return false; - - if(unlikely(ri->flags & RRD_FLAGS_PREVENTING_DELETIONS)) - return false; - - if(likely(ri->rrdset)) - return false; - - if(unlikely(dictionary_stats_referenced_items(ri->rrdmetrics) != 0)) - return false; - - if(unlikely(dictionary_stats_entries(ri->rrdmetrics) != 0)) - return false; - - if(ri->first_time_t || ri->last_time_t) - return false; - - return true; -} - -static void rrdinstance_trigger_updates(RRDINSTANCE *ri, bool force, bool escalate) { - if(unlikely(ri->flags & RRD_FLAG_DONT_PROCESS)) return; - if(unlikely(!force && !(ri->flags & RRD_FLAG_UPDATED))) return; +static void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function) { + RRDSET *st = ri->rrdset; - if(likely(ri->rrdset)) { - if(unlikely(ri->rrdset->priority != ri->priority)) { - ri->priority = ri->rrdset->priority; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + if(likely(st)) { + if(unlikely((unsigned int) st->priority != ri->priority)) { + ri->priority = st->priority; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } - if(unlikely(ri->rrdset->update_every != ri->update_every)) { - ri->update_every = ri->rrdset->update_every; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY); + if(unlikely(st->update_every != ri->update_every)) { + ri->update_every = st->update_every; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } } else if(unlikely(rrd_flag_is_collected(ri))) { + // there is no rrdset, but we have it as collected! + rrd_flag_set_archived(ri); rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); } - time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; - size_t metrics_active = 0, metrics_deleted = 0; - bool live_retention = true, currently_collected = false; - { - RRDMETRIC *rm; - dfe_start_read((DICTIONARY *)ri->rrdmetrics, rm) { - if(!(rm->flags & RRD_FLAG_LIVE_RETENTION)) - live_retention = false; - - if (unlikely((rrdmetric_should_be_deleted(rm)))) { - metrics_deleted++; - rrd_flag_unset_updated(rm); - continue; - } - - if(rm->flags & RRD_FLAG_COLLECTED) - currently_collected = true; - - metrics_active++; - - if (rm->first_time_t && rm->first_time_t < min_first_time_t) - min_first_time_t = rm->first_time_t; - - if (rm->last_time_t && rm->last_time_t > max_last_time_t) - max_last_time_t = rm->last_time_t; - - rrd_flag_unset_updated(rm); - } - dfe_done(rm); - } - - if(live_retention && !(ri->flags & RRD_FLAG_LIVE_RETENTION)) - ri->flags |= RRD_FLAG_LIVE_RETENTION; - else if(!live_retention && (ri->flags & RRD_FLAG_LIVE_RETENTION)) - ri->flags &= ~RRD_FLAG_LIVE_RETENTION; - - if(unlikely(!metrics_active)) { - // no metrics available - - if(ri->first_time_t) { - ri->first_time_t = 0; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } - - if(ri->last_time_t) { - ri->last_time_t = 0; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } - - rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); - } - else { - // we have active metrics... - - if (unlikely(min_first_time_t == LONG_MAX)) - min_first_time_t = 0; - - if (unlikely(min_first_time_t == 0 || max_last_time_t == 0)) { - if(ri->first_time_t) { - ri->first_time_t = 0; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } - - if(ri->last_time_t) { - ri->last_time_t = 0; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } - - if(unlikely(live_retention)) - rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); - } - else { - ri->flags &= ~RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; - - if (unlikely(ri->first_time_t != min_first_time_t)) { - ri->first_time_t = min_first_time_t; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } - - if (unlikely(ri->last_time_t != max_last_time_t)) { - ri->last_time_t = max_last_time_t; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } - - if(likely(currently_collected)) - rrd_flag_set_collected(ri); - else - rrd_flag_set_archived(ri); - } - } - - if(unlikely(escalate && ri->flags & RRD_FLAG_UPDATED && !(ri->rc->flags & RRD_FLAG_DONT_PROCESS))) { - log_transition(NULL, ri->id, ri->rc->id, ri->flags, "RRDINSTANCE"); - rrdcontext_trigger_updates(ri->rc, true); + if(rrd_flag_is_updated(ri) || !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION)) { + rrd_flag_set_updated(ri->rc, RRD_FLAG_UPDATE_REASON_TRIGGERED); + rrdcontext_queue_for_post_processing(ri->rc, function, ri->flags); } } +// ---------------------------------------------------------------------------- +// RRDINSTANCE HOOKS ON RRDSET + static inline void rrdinstance_from_rrdset(RRDSET *st) { RRDCONTEXT trc = { - .id = string_strdupz(st->context), - .title = string_strdupz(st->title), - .units = string_strdupz(st->units), - .family = string_strdupz(st->family), + .id = string_dup(st->context), + .title = string_dup(st->title), + .units = string_dup(st->units), + .family = string_dup(st->family), .priority = st->priority, .chart_type = st->chart_type, - .flags = RRD_FLAG_NONE, + .flags = RRD_FLAG_NONE, // no need for atomics .rrdhost = st->rrdhost, }; @@ -1125,18 +981,18 @@ static inline void rrdinstance_from_rrdset(RRDSET *st) { RRDCONTEXT *rc = rrdcontext_acquired_value(rca); RRDINSTANCE tri = { - .id = string_strdupz(st->id), - .name = string_strdupz(st->name), - .units = string_strdupz(st->units), - .family = string_strdupz(st->family), - .title = string_strdupz(st->title), + .id = string_dup(st->id), + .name = string_dup(st->name), + .units = string_dup(st->units), + .family = string_dup(st->family), + .title = string_dup(st->title), .chart_type = st->chart_type, .priority = st->priority, .update_every = st->update_every, - .flags = RRD_FLAG_DONT_PROCESS, + .flags = RRD_FLAG_NONE, // no need for atomics .rrdset = st, }; - uuid_copy(tri.uuid, *st->chart_uuid); + uuid_copy(tri.uuid, st->chart_uuid); RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_set_and_acquire_item(rc->rrdinstances, string2str(tri.id), &tri, sizeof(tri)); @@ -1157,18 +1013,18 @@ static inline void rrdinstance_from_rrdset(RRDSET *st) { } if(rca_old && ria_old) { - // the chart changed context - RRDCONTEXT *rc_old = rrdcontext_acquired_value(rca_old); + // Oops! The chart changed context! + + // RRDCONTEXT *rc_old = rrdcontext_acquired_value(rca_old); RRDINSTANCE *ri_old = rrdinstance_acquired_value(ria_old); // migrate all dimensions to the new metrics - rrdset_rdlock(st); RRDDIM *rd; rrddim_foreach_read(rd, st) { if (!rd->rrdmetric) continue; RRDMETRIC *rm_old = rrdmetric_acquired_value(rd->rrdmetric); - rm_old->flags = RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; + rrd_flags_replace(rm_old, RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); rm_old->rrddim = NULL; rm_old->first_time_t = 0; rm_old->last_time_t = 0; @@ -1178,37 +1034,32 @@ static inline void rrdinstance_from_rrdset(RRDSET *st) { rrdmetric_from_rrddim(rd); } - rrdset_unlock(st); + rrddim_foreach_done(rd); // mark the old instance, ready to be deleted - if(!(ri_old->flags & RRD_FLAG_OWN_LABELS)) + if(!rrd_flag_check(ri_old, RRD_FLAG_OWN_LABELS)) ri_old->rrdlabels = rrdlabels_create(); - ri_old->flags = RRD_FLAG_OWN_LABELS|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; + rrd_flags_replace(ri_old, RRD_FLAG_OWN_LABELS|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); ri_old->rrdset = NULL; ri_old->first_time_t = 0; ri_old->last_time_t = 0; - ri_old->flags &= ~RRD_FLAG_DONT_PROCESS; - rc_old->flags &= ~RRD_FLAG_DONT_PROCESS; - - rrdinstance_trigger_updates(ri_old, true, true); - - ri_old->flags |= RRD_FLAG_DONT_PROCESS; + rrdinstance_trigger_updates(ri_old, __FUNCTION__ ); rrdinstance_release(ria_old); /* // trigger updates on the old context - if(!dictionary_stats_entries(rc_old->rrdinstances) && !dictionary_stats_referenced_items(rc_old->rrdinstances)) { + if(!dictionary_entries(rc_old->rrdinstances) && !dictionary_stats_referenced_items(rc_old->rrdinstances)) { rrdcontext_lock(rc_old); rc_old->flags = ((rc_old->flags & RRD_FLAG_QUEUED)?RRD_FLAG_QUEUED:RRD_FLAG_NONE)|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; rc_old->first_time_t = 0; rc_old->last_time_t = 0; rrdcontext_unlock(rc_old); - rrdcontext_trigger_updates(rc_old, true); + rrdcontext_trigger_updates(rc_old, __FUNCTION__ ); } else - rrdcontext_trigger_updates(rc_old, true); + rrdcontext_trigger_updates(rc_old, __FUNCTION__ ); */ rrdcontext_release(rca_old); @@ -1223,14 +1074,18 @@ static inline void rrdinstance_from_rrdset(RRDSET *st) { #define rrdset_get_rrdinstance(st) rrdset_get_rrdinstance_with_trace(st, __FUNCTION__); static inline RRDINSTANCE *rrdset_get_rrdinstance_with_trace(RRDSET *st, const char *function) { if(unlikely(!st->rrdinstance)) { - error("RRDINSTANCE: RRDSET '%s' is not linked to an RRDINSTANCE at %s()", st->id, function); + error("RRDINSTANCE: RRDSET '%s' is not linked to an RRDINSTANCE at %s()", rrdset_id(st), function); return NULL; } RRDINSTANCE *ri = rrdinstance_acquired_value(st->rrdinstance); + if(unlikely(!ri)) { + error("RRDINSTANCE: RRDSET '%s' lost its link to an RRDINSTANCE at %s()", rrdset_id(st), function); + return NULL; + } if(unlikely(ri->rrdset != st)) - fatal("RRDINSTANCE: '%s' is not linked to RRDSET '%s' at %s()", string2str(ri->id), st->id, function); + fatal("RRDINSTANCE: '%s' is not linked to RRDSET '%s' at %s()", string2str(ri->id), rrdset_id(st), function); return ri; } @@ -1241,17 +1096,15 @@ static inline void rrdinstance_rrdset_is_freed(RRDSET *st) { rrd_flag_set_archived(ri); - if(!(ri->flags & RRD_FLAG_OWN_LABELS)) { - ri->flags |= RRD_FLAG_OWN_LABELS; + if(!rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { ri->rrdlabels = rrdlabels_create(); - rrdlabels_copy(ri->rrdlabels, st->state->chart_labels); + rrdlabels_copy(ri->rrdlabels, st->rrdlabels); + rrd_flag_set(ri, RRD_FLAG_OWN_LABELS); } ri->rrdset = NULL; - ri->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdinstance_trigger_updates(ri, false, true); - ri->flags |= RRD_FLAG_DONT_PROCESS; + rrdinstance_trigger_updates(ri, __FUNCTION__ ); rrdinstance_release(st->rrdinstance); st->rrdinstance = NULL; @@ -1260,6 +1113,14 @@ static inline void rrdinstance_rrdset_is_freed(RRDSET *st) { st->rrdcontext = NULL; } +static inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) { + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + static inline void rrdinstance_updated_rrdset_name(RRDSET *st) { // the chart may not be initialized when this is called if(unlikely(!st->rrdinstance)) return; @@ -1267,28 +1128,32 @@ static inline void rrdinstance_updated_rrdset_name(RRDSET *st) { RRDINSTANCE *ri = rrdset_get_rrdinstance(st); if(unlikely(!ri)) return; - STRING *old = ri->name; - ri->name = string_strdupz(st->name); - - if(ri->name != old) - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_NAME); - - string_freez(old); + if(st->name != ri->name) { + STRING *old = ri->name; + ri->name = string_dup(st->name); + string_freez(old); - rrdinstance_trigger_updates(ri, false, true); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + } } static inline void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, RRDSET *st) { - if(unlikely(st->flags & (RRDSET_FLAG_ARCHIVED | RRDSET_FLAG_OBSOLETE))) - rrd_flag_set_archived(ri); + if(unlikely(ri->rrdset != st)) + fatal("RRDCONTEXT: instance '%s' is not linked to chart '%s' on host '%s'", + string2str(ri->id), rrdset_id(st), rrdhost_hostname(st->rrdhost)); - if(unlikely((st->flags & RRDSET_FLAG_HIDDEN) && !(ri->flags & RRD_FLAG_HIDDEN))) { - ri->flags |= RRD_FLAG_HIDDEN; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS); - } - else if(unlikely(!(st->flags & RRDSET_FLAG_HIDDEN) && (ri->flags & RRD_FLAG_HIDDEN))) { - ri->flags &= ~RRD_FLAG_HIDDEN; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS); + bool st_is_hidden = rrdset_flag_check(st, RRDSET_FLAG_HIDDEN); + bool ri_is_hidden = rrd_flag_check(ri, RRD_FLAG_HIDDEN); + + if(unlikely(st_is_hidden != ri_is_hidden)) { + if (unlikely(st_is_hidden && !ri_is_hidden)) + rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + + else if (unlikely(!st_is_hidden && ri_is_hidden)) { + rrd_flag_clear(ri, RRD_FLAG_HIDDEN); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } } } @@ -1296,11 +1161,12 @@ static inline void rrdinstance_updated_rrdset_flags(RRDSET *st) { RRDINSTANCE *ri = rrdset_get_rrdinstance(st); if(unlikely(!ri)) return; + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED|RRDSET_FLAG_OBSOLETE))) + rrd_flag_set_archived(ri); + rrdinstance_updated_rrdset_flags_no_action(ri, st); - ri->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdinstance_trigger_updates(ri, false, true); - ri->flags |= RRD_FLAG_DONT_PROCESS; + rrdinstance_trigger_updates(ri, __FUNCTION__ ); } static inline void rrdinstance_collected_rrdset(RRDSET *st) { @@ -1309,13 +1175,13 @@ static inline void rrdinstance_collected_rrdset(RRDSET *st) { rrdinstance_updated_rrdset_flags_no_action(ri, st); - if(unlikely(!rrd_flag_is_collected(ri))) + if(unlikely(ri->internal.collected_metrics_count && !rrd_flag_is_collected(ri))) rrd_flag_set_collected(ri); - if(unlikely(ri->flags & RRD_FLAG_DONT_PROCESS)) - ri->flags &= ~RRD_FLAG_DONT_PROCESS; + // we use this variable to detect BEGIN/END without SET + ri->internal.collected_metrics_count = 0; - rrdinstance_trigger_updates(ri, false, true); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); } // ---------------------------------------------------------------------------- @@ -1328,131 +1194,12 @@ static void rrdcontext_freez(RRDCONTEXT *rc) { string_freez(rc->family); } -static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc) { - time_t now = now_realtime_sec(); - uint64_t version = MAX(rc->version, rc->hub.version); - version = MAX((uint64_t)now, version); - version++; - return version; -} - -static void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused) { - - // save it, so that we know the last version we sent to hub - rc->version = rc->hub.version = rrdcontext_get_next_version(rc); - rc->hub.id = string2str(rc->id); - rc->hub.title = string2str(rc->title); - rc->hub.units = string2str(rc->units); - rc->hub.family = string2str(rc->family); - rc->hub.chart_type = rrdset_type_name(rc->chart_type); - rc->hub.priority = rc->priority; - rc->hub.first_time_t = rc->first_time_t; - rc->hub.last_time_t = rrd_flag_is_collected(rc) ? 0 : rc->last_time_t; - rc->hub.deleted = (rc->flags & RRD_FLAG_DELETED) ? true : false; - -#ifdef ENABLE_ACLK - struct context_updated message = { - .id = rc->hub.id, - .version = rc->hub.version, - .title = rc->hub.title, - .units = rc->hub.units, - .family = rc->hub.family, - .chart_type = rc->hub.chart_type, - .priority = rc->hub.priority, - .first_entry = rc->hub.first_time_t, - .last_entry = rc->hub.last_time_t, - .deleted = rc->hub.deleted, - }; - - if(likely(!(rc->flags & RRD_FLAG_HIDDEN))) { - if (snapshot) { - if (!rc->hub.deleted) - contexts_snapshot_add_ctx_update(bundle, &message); - } - else - contexts_updated_add_ctx_update(bundle, &message); - } -#endif - - // store it to SQL - - if(rc->flags & RRD_FLAG_DELETED) { - rrdcontext_delete_from_sql_unsafe(rc); - } - else { - if (ctx_store_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) - error("RRDCONTEXT: failed to save context '%s' version %"PRIu64" to SQL.", rc->hub.id, rc->hub.version); - } -} - -static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused) { - bool id_changed = false, - title_changed = false, - units_changed = false, - family_changed = false, - chart_type_changed = false, - priority_changed = false, - first_time_changed = false, - last_time_changed = false, - deleted_changed = false; - - if(unlikely(string2str(rc->id) != rc->hub.id)) - id_changed = true; - - if(unlikely(string2str(rc->title) != rc->hub.title)) - title_changed = true; - - if(unlikely(string2str(rc->units) != rc->hub.units)) - units_changed = true; - - if(unlikely(string2str(rc->family) != rc->hub.family)) - family_changed = true; - - if(unlikely(rrdset_type_name(rc->chart_type) != rc->hub.chart_type)) - chart_type_changed = true; - - if(unlikely(rc->priority != rc->hub.priority)) - priority_changed = true; - - if(unlikely((uint64_t)rc->first_time_t != rc->hub.first_time_t)) - first_time_changed = true; - - if(unlikely((uint64_t)(rrd_flag_is_collected(rc) ? 0 : rc->last_time_t) != rc->hub.last_time_t)) - last_time_changed = true; - - if(unlikely(((rc->flags & RRD_FLAG_DELETED) ? true : false) != rc->hub.deleted)) - deleted_changed = true; - - if(unlikely(id_changed || title_changed || units_changed || family_changed || chart_type_changed || priority_changed || first_time_changed || last_time_changed || deleted_changed)) { - - internal_error(true, "RRDCONTEXT: %s NEW VERSION '%s'%s, version %"PRIu64", title '%s'%s, units '%s'%s, family '%s'%s, chart type '%s'%s, priority %u%s, first_time_t %ld%s, last_time_t %ld%s, deleted '%s'%s, (queued for %llu ms, expected %llu ms)", - sending?"SENDING":"QUEUE", - string2str(rc->id), id_changed ? " (CHANGED)" : "", - rc->version, - string2str(rc->title), title_changed ? " (CHANGED)" : "", - string2str(rc->units), units_changed ? " (CHANGED)" : "", - string2str(rc->family), family_changed ? " (CHANGED)" : "", - rrdset_type_name(rc->chart_type), chart_type_changed ? " (CHANGED)" : "", - rc->priority, priority_changed ? " (CHANGED)" : "", - rc->first_time_t, first_time_changed ? " (CHANGED)" : "", - rrd_flag_is_collected(rc) ? 0 : rc->last_time_t, last_time_changed ? " (CHANGED)" : "", - (rc->flags & RRD_FLAG_DELETED) ? "true" : "false", deleted_changed ? " (CHANGED)" : "", - sending ? (now_realtime_usec() - rc->queue.queued_ut) / USEC_PER_MS : 0, - sending ? (rc->queue.scheduled_dispatch_ut - rc->queue.queued_ut) / USEC_PER_SEC : 0 - ); - return true; - } - - return false; -} - -static void rrdcontext_insert_callback(const char *id, void *value, void *data) { - (void)id; - RRDHOST *host = (RRDHOST *)data; +static void rrdcontext_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost) { + RRDHOST *host = (RRDHOST *)rrdhost; RRDCONTEXT *rc = (RRDCONTEXT *)value; rc->rrdhost = host; - rc->flags = rc->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; + rc->flags = rc->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics at constructor if(rc->hub.version) { // we are loading data from the SQL database @@ -1485,11 +1232,11 @@ static void rrdcontext_insert_callback(const char *id, void *value, void *data) rc->version = rc->hub.version; rc->priority = rc->hub.priority; - rc->first_time_t = rc->hub.first_time_t; - rc->last_time_t = rc->hub.last_time_t; + rc->first_time_t = (time_t)rc->hub.first_time_t; + rc->last_time_t = (time_t)rc->hub.last_time_t; if(rc->hub.deleted || !rc->hub.first_time_t) - rrd_flag_set_deleted(rc, 0); + rrd_flag_set_deleted(rc, RRD_FLAG_NONE); else { if (rc->last_time_t == 0) rrd_flag_set_collected(rc); @@ -1497,80 +1244,85 @@ static void rrdcontext_insert_callback(const char *id, void *value, void *data) rrd_flag_set_archived(rc); } - rc->flags |= RRD_FLAG_UPDATE_REASON_LOAD_SQL; + rc->flags |= RRD_FLAG_UPDATE_REASON_LOAD_SQL; // no need for atomics at constructor } else { // we are adding this context now for the first time rc->version = now_realtime_sec(); } - rrdinstances_create(rc); + rrdinstances_create_in_rrdcontext(rc); netdata_mutex_init(&rc->mutex); // signal the react callback to do the job rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); } -static void rrdcontext_delete_callback(const char *id, void *value, void *data) { - (void)id; - RRDHOST *host = (RRDHOST *)data; - (void)host; +static void rrdcontext_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost __maybe_unused) { RRDCONTEXT *rc = (RRDCONTEXT *)value; - rrdinstances_destroy(rc); + rrdinstances_destroy_from_rrdcontext(rc); netdata_mutex_destroy(&rc->mutex); rrdcontext_freez(rc); } -static void rrdcontext_conflict_callback(const char *id, void *oldv, void *newv, void *data) { - (void)id; - RRDHOST *host = (RRDHOST *)data; - (void)host; +static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdhost __maybe_unused) { + RRDCONTEXT *rc = (RRDCONTEXT *)old_value; + RRDCONTEXT *rc_new = (RRDCONTEXT *)new_value; - RRDCONTEXT *rc = (RRDCONTEXT *)oldv; - RRDCONTEXT *rc_new = (RRDCONTEXT *)newv; + //current rc is not archived, new_rc is archived, don't merge + if (!rrd_flag_is_archived(rc) && rrd_flag_is_archived(rc_new)) { + rrdcontext_freez(rc_new); + return false; + } rrdcontext_lock(rc); if(rc->title != rc_new->title) { STRING *old_title = rc->title; - rc->title = string_2way_merge(rc->title, rc_new->title); + if (rrd_flag_is_archived(rc) && !rrd_flag_is_archived(rc_new)) + rc->title = string_dup(rc_new->title); + else + rc->title = string_2way_merge(rc->title, rc_new->title); string_freez(old_title); - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_TITLE); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->units != rc_new->units) { STRING *old_units = rc->units; rc->units = string_dup(rc_new->units); string_freez(old_units); - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_UNITS); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->family != rc_new->family) { STRING *old_family = rc->family; - rc->family = string_2way_merge(rc->family, rc_new->family); + if (rrd_flag_is_archived(rc) && !rrd_flag_is_archived(rc_new)) + rc->family = string_dup(rc_new->family); + else + rc->family = string_2way_merge(rc->family, rc_new->family); string_freez(old_family); - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->chart_type != rc_new->chart_type) { rc->chart_type = rc_new->chart_type; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->priority != rc_new->priority) { rc->priority = rc_new->priority; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } - rc->flags |= (rc_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); + rrd_flag_set(rc, rc_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on rc_new if(rrd_flag_is_collected(rc) && rrd_flag_is_archived(rc)) rrd_flag_set_collected(rc); - if(rc->flags & RRD_FLAG_UPDATED) - rc->flags |= RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT; + if(rrd_flag_is_updated(rc)) + rrd_flag_set(rc, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); rrdcontext_unlock(rc); @@ -1578,310 +1330,284 @@ static void rrdcontext_conflict_callback(const char *id, void *oldv, void *newv, rrdcontext_freez(rc_new); // the react callback will continue from here + return rrd_flag_is_updated(rc); } -static void rrdcontext_react_callback(const char *id __maybe_unused, void *value, void *data __maybe_unused) { +static void rrdcontext_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost __maybe_unused) { RRDCONTEXT *rc = (RRDCONTEXT *)value; - - rrdcontext_trigger_updates(rc, false); + rrdcontext_trigger_updates(rc, __FUNCTION__ ); } -void rrdhost_create_rrdcontexts(RRDHOST *host) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - - if(unlikely(!host)) return; - if(likely(host->rrdctx)) return; - - host->rrdctx = (RRDCONTEXTS *)dictionary_create(DICTIONARY_FLAG_DONT_OVERWRITE_VALUE); - dictionary_register_insert_callback((DICTIONARY *)host->rrdctx, rrdcontext_insert_callback, (void *)host); - dictionary_register_delete_callback((DICTIONARY *)host->rrdctx, rrdcontext_delete_callback, (void *)host); - dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx, rrdcontext_conflict_callback, (void *)host); - dictionary_register_react_callback((DICTIONARY *)host->rrdctx, rrdcontext_react_callback, (void *)host); - - host->rrdctx_queue = (RRDCONTEXTS *)dictionary_create(DICTIONARY_FLAG_DONT_OVERWRITE_VALUE | DICTIONARY_FLAG_VALUE_LINK_DONT_CLONE); +static void rrdcontext_trigger_updates(RRDCONTEXT *rc, const char *function) { + if(rrd_flag_is_updated(rc) || !rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrdcontext_queue_for_post_processing(rc, function, rc->flags); } -void rrdhost_destroy_rrdcontexts(RRDHOST *host) { - if(unlikely(!host)) return; - if(unlikely(!host->rrdctx)) return; - - if(host->rrdctx_queue) { - dictionary_destroy((DICTIONARY *)host->rrdctx_queue); - host->rrdctx_queue = NULL; - } - - dictionary_destroy((DICTIONARY *)host->rrdctx); - host->rrdctx = NULL; +static void rrdcontext_hub_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_HUB); + rc->queue.queued_ut = now_realtime_usec(); + rc->queue.queued_flags = rrd_flags_get(rc); } -static inline bool rrdcontext_should_be_deleted(RRDCONTEXT *rc) { - if(likely(!(rc->flags & RRD_FLAG_DELETED))) - return false; - - if(likely(!(rc->flags & RRD_FLAG_LIVE_RETENTION))) - return false; - - if(unlikely(rc->flags & RRD_FLAGS_PREVENTING_DELETIONS)) - return false; - - if(unlikely(dictionary_stats_referenced_items(rc->rrdinstances) != 0)) - return false; - - if(unlikely(dictionary_stats_entries(rc->rrdinstances) != 0)) - return false; +static void rrdcontext_hub_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_HUB); +} - if(unlikely(rc->first_time_t || rc->last_time_t)) - return false; +static bool rrdcontext_hub_queue_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *new_context __maybe_unused, void *nothing __maybe_unused) { + // context and new_context are the same + // we just need to update the timings + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_HUB); + rc->queue.queued_ut = now_realtime_usec(); + rc->queue.queued_flags |= rrd_flags_get(rc); return true; } -static void rrdcontext_trigger_updates(RRDCONTEXT *rc, bool force) { - if(unlikely(rc->flags & RRD_FLAG_DONT_PROCESS)) return; - if(unlikely(!force && !(rc->flags & RRD_FLAG_UPDATED))) return; - - rrdcontext_lock(rc); - - size_t min_priority = LONG_MAX; - time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; - size_t instances_active = 0, instances_deleted = 0; - bool live_retention = true, currently_collected = false, hidden = true; - { - RRDINSTANCE *ri; - dfe_start_read(rc->rrdinstances, ri) { - if(likely(!(ri->flags & RRD_FLAG_HIDDEN))) - hidden = false; - - if(!(ri->flags & RRD_FLAG_LIVE_RETENTION)) - live_retention = false; - - if (unlikely(rrdinstance_should_be_deleted(ri))) { - instances_deleted++; - rrd_flag_unset_updated(ri); - continue; - } - - if(ri->flags & RRD_FLAG_COLLECTED) - currently_collected = true; - - internal_error(rc->units != ri->units, - "RRDCONTEXT: '%s' rrdinstance '%s' has different units, context '%s', instance '%s'", - string2str(rc->id), string2str(ri->id), - string2str(rc->units), string2str(ri->units)); - - instances_active++; - - if (ri->priority >= RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY && ri->priority < min_priority) - min_priority = ri->priority; +static void rrdcontext_post_processing_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); + rc->pp.queued_flags = rc->flags; + rc->pp.queued_ut = now_realtime_usec(); +} - if (ri->first_time_t && ri->first_time_t < min_first_time_t) - min_first_time_t = ri->first_time_t; +static void rrdcontext_post_processing_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_PP); + rc->pp.dequeued_ut = now_realtime_usec(); +} - if (ri->last_time_t && ri->last_time_t > max_last_time_t) - max_last_time_t = ri->last_time_t; +static bool rrdcontext_post_processing_queue_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *new_context __maybe_unused, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + bool changed = false; - rrd_flag_unset_updated(ri); - } - dfe_done(ri); + if(!(rc->flags & RRD_FLAG_QUEUED_FOR_PP)) { + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); + changed = true; } - if(hidden && !(rc->flags & RRD_FLAG_HIDDEN)) - rc->flags |= RRD_FLAG_HIDDEN; - else if(!hidden && (rc->flags & RRD_FLAG_HIDDEN)) - rc->flags &= ~RRD_FLAG_HIDDEN; - - if(live_retention && !(rc->flags & RRD_FLAG_LIVE_RETENTION)) - rc->flags |= RRD_FLAG_LIVE_RETENTION; - else if(!live_retention && (rc->flags & RRD_FLAG_LIVE_RETENTION)) - rc->flags &= ~RRD_FLAG_LIVE_RETENTION; - - if(unlikely(!instances_active)) { - // we had some instances, but they are gone now... - - if(rc->first_time_t) { - rc->first_time_t = 0; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } - - if(rc->last_time_t) { - rc->last_time_t = 0; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } - - rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + if(rc->pp.queued_flags != rc->flags) { + rc->pp.queued_flags |= rc->flags; + changed = true; } - else { - // we have some active instances... - if (unlikely(min_first_time_t == LONG_MAX)) - min_first_time_t = 0; + return changed; +} - if (unlikely(min_first_time_t == 0 && max_last_time_t == 0)) { - if(rc->first_time_t) { - rc->first_time_t = 0; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } +void rrdhost_create_rrdcontexts(RRDHOST *host) { + if(unlikely(!host)) return; + if(likely(host->rrdctx)) return; - if(rc->last_time_t) { - rc->last_time_t = 0; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } + host->rrdctx = (RRDCONTEXTS *)dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback((DICTIONARY *)host->rrdctx, rrdcontext_insert_callback, host); + dictionary_register_delete_callback((DICTIONARY *)host->rrdctx, rrdcontext_delete_callback, host); + dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx, rrdcontext_conflict_callback, host); + dictionary_register_react_callback((DICTIONARY *)host->rrdctx, rrdcontext_react_callback, host); - rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); - } - else { - rc->flags &= ~RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; + host->rrdctx_hub_queue = (RRDCONTEXTS *)dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_VALUE_LINK_DONT_CLONE); + dictionary_register_insert_callback((DICTIONARY *)host->rrdctx_hub_queue, rrdcontext_hub_queue_insert_callback, NULL); + dictionary_register_delete_callback((DICTIONARY *)host->rrdctx_hub_queue, rrdcontext_hub_queue_delete_callback, NULL); + dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx_hub_queue, rrdcontext_hub_queue_conflict_callback, NULL); - if (unlikely(rc->first_time_t != min_first_time_t)) { - rc->first_time_t = min_first_time_t; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); - } + host->rrdctx_post_processing_queue = (RRDCONTEXTS *)dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_VALUE_LINK_DONT_CLONE); + dictionary_register_insert_callback((DICTIONARY *)host->rrdctx_post_processing_queue, rrdcontext_post_processing_queue_insert_callback, NULL); + dictionary_register_delete_callback((DICTIONARY *)host->rrdctx_post_processing_queue, rrdcontext_post_processing_queue_delete_callback, NULL); + dictionary_register_conflict_callback((DICTIONARY *)host->rrdctx_post_processing_queue, rrdcontext_post_processing_queue_conflict_callback, NULL); +} - if (rc->last_time_t != max_last_time_t) { - rc->last_time_t = max_last_time_t; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); - } +void rrdhost_destroy_rrdcontexts(RRDHOST *host) { + if(unlikely(!host)) return; + if(unlikely(!host->rrdctx)) return; - if(likely(currently_collected)) - rrd_flag_set_collected(rc); - else - rrd_flag_set_archived(rc); - } + DICTIONARY *old; - if (min_priority != LONG_MAX && rc->priority != min_priority) { - rc->priority = min_priority; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + if(host->rrdctx_hub_queue) { + old = (DICTIONARY *)host->rrdctx_hub_queue; + host->rrdctx_hub_queue = NULL; + + RRDCONTEXT *rc; + dfe_start_write(old, rc) { + dictionary_del(old, string2str(rc->id)); } + dfe_done(rc); + dictionary_destroy(old); } - if(unlikely(rc->flags & RRD_FLAG_UPDATED)) { - log_transition(NULL, NULL, rc->id, rc->flags, "RRDCONTEXT"); - - if(check_if_cloud_version_changed_unsafe(rc, false)) { - rc->version = rrdcontext_get_next_version(rc); - - if(rc->flags & RRD_FLAG_QUEUED) { - rc->queue.queued_ut = now_realtime_usec(); - rc->queue.queued_flags |= rc->flags; - } - else { - rc->queue.queued_ut = now_realtime_usec(); - rc->queue.queued_flags = rc->flags; + if(host->rrdctx_post_processing_queue) { + old = (DICTIONARY *)host->rrdctx_post_processing_queue; + host->rrdctx_post_processing_queue = NULL; - rc->flags |= RRD_FLAG_QUEUED; - dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_queue, string2str(rc->id), rc, sizeof(*rc)); - } + RRDCONTEXT *rc; + dfe_start_write(old, rc) { + dictionary_del(old, string2str(rc->id)); } - - rrd_flag_unset_updated(rc); + dfe_done(rc); + dictionary_destroy(old); } - rrdcontext_unlock(rc); + old = (DICTIONARY *)host->rrdctx; + host->rrdctx = NULL; + dictionary_destroy(old); } // ---------------------------------------------------------------------------- // public API void rrdcontext_updated_rrddim(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_from_rrddim(rd); } void rrdcontext_removed_rrddim(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_rrddim_is_freed(rd); } void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_updated_rrddim_flags(rd); } void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_updated_rrddim_flags(rd); } void rrdcontext_updated_rrddim_divisor(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_updated_rrddim_flags(rd); } void rrdcontext_updated_rrddim_flags(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_updated_rrddim_flags(rd); } void rrdcontext_collected_rrddim(RRDDIM *rd) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdmetric_collected_rrddim(rd); } void rrdcontext_updated_rrdset(RRDSET *st) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdinstance_from_rrdset(st); } void rrdcontext_removed_rrdset(RRDSET *st) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdinstance_rrdset_is_freed(st); } -void rrdcontext_updated_rrdset_name(RRDSET *st) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; +void rrdcontext_updated_retention_rrdset(RRDSET *st) { + rrdinstance_rrdset_has_updated_retention(st); +} +void rrdcontext_updated_rrdset_name(RRDSET *st) { rrdinstance_updated_rrdset_name(st); } void rrdcontext_updated_rrdset_flags(RRDSET *st) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdinstance_updated_rrdset_flags(st); } void rrdcontext_collected_rrdset(RRDSET *st) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - rrdinstance_collected_rrdset(st); } void rrdcontext_host_child_connected(RRDHOST *host) { (void)host; - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - // no need to do anything here ; } +int rrdcontext_find_dimension_uuid(RRDSET *st, const char *id, uuid_t *store_uuid) { + if(!st->rrdhost) return 1; + if(!st->context) return 2; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)st->rrdhost->rrdctx, string2str(st->context)); + if(!rca) return 3; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_get_and_acquire_item(rc->rrdinstances, string2str(st->id)); + if(!ria) { + rrdcontext_release(rca); + return 4; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + + RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)dictionary_get_and_acquire_item(ri->rrdmetrics, id); + if(!rma) { + rrdinstance_release(ria); + rrdcontext_release(rca); + return 5; + } + + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + uuid_copy(*store_uuid, rm->uuid); + + rrdmetric_release(rma); + rrdinstance_release(ria); + rrdcontext_release(rca); + return 0; +} + +int rrdcontext_find_chart_uuid(RRDSET *st, uuid_t *store_uuid) { + if(!st->rrdhost) return 1; + if(!st->context) return 2; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)st->rrdhost->rrdctx, string2str(st->context)); + if(!rca) return 3; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_get_and_acquire_item(rc->rrdinstances, string2str(st->id)); + if(!ria) { + rrdcontext_release(rca); + return 4; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + uuid_copy(*store_uuid, ri->uuid); + + rrdinstance_release(ria); + rrdcontext_release(rca); + return 0; +} + void rrdcontext_host_child_disconnected(RRDHOST *host) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, false); +} + +static usec_t rrdcontext_next_db_rotation_ut = 0; +void rrdcontext_db_rotation(void) { + // called when the db rotates its database + rrdcontext_next_db_rotation_ut = now_realtime_usec() + FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS * USEC_PER_SEC; +} + +int rrdcontext_foreach_instance_with_rrdset_in_context(RRDHOST *host, const char *context, int (*callback)(RRDSET *st, void *data), void *data) { + if(unlikely(!host || !context || !*context || !callback)) + return -1; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)host->rrdctx, context); + if(unlikely(!rca)) return -1; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(unlikely(!rc)) return -1; - rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, -1); + int ret = 0; + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(ri->rrdset) { + int r = callback(ri->rrdset, data); + if(r >= 0) ret += r; + else { + ret = r; + break; + } + } + } + dfe_done(ri); + + rrdcontext_release(rca); + + return ret; } // ---------------------------------------------------------------------------- @@ -1933,7 +1659,7 @@ void rrdcontext_hub_checkpoint_command(void *ptr) { if(rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { info("RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', while node '%s' has an active context streaming.", - cmd->claim_id, cmd->node_id, host->hostname); + cmd->claim_id, cmd->node_id, rrdhost_hostname(host)); // disable it temporarily, so that our worker will not attempt to send messages in parallel rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); @@ -1943,7 +1669,7 @@ void rrdcontext_hub_checkpoint_command(void *ptr) { if(cmd->version_hash != our_version_hash) { error("RRDCONTEXT: received version hash %"PRIu64" for host '%s', does not match our version hash %"PRIu64". Sending snapshot of all contexts.", - cmd->version_hash, host->hostname, our_version_hash); + cmd->version_hash, rrdhost_hostname(host), our_version_hash); #ifdef ENABLE_ACLK // prepare the snapshot @@ -1952,7 +1678,7 @@ void rrdcontext_hub_checkpoint_command(void *ptr) { contexts_snapshot_t bundle = contexts_snapshot_new(cmd->claim_id, uuid, our_version_hash); // do a deep scan on every metric of the host to make sure all our data are updated - rrdcontext_recalculate_host_retention(host, RRD_FLAG_NONE, -1); + rrdcontext_recalculate_host_retention(host, RRD_FLAG_NONE, false); // calculate version hash and pack all the messages together in one go our_version_hash = rrdcontext_version_hash_with_callback(host, rrdcontext_message_send_unsafe, true, bundle); @@ -1965,11 +1691,11 @@ void rrdcontext_hub_checkpoint_command(void *ptr) { #endif } - internal_error(true, "RRDCONTEXT: host '%s' enabling streaming of contexts", host->hostname); + internal_error(true, "RRDCONTEXT: host '%s' enabling streaming of contexts", rrdhost_hostname(host)); rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); char node_str[UUID_STR_LEN]; uuid_unparse_lower(*host->node_id, node_str); - log_access("ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED", node_str, host->hostname); + log_access("ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED", node_str, rrdhost_hostname(host)); } void rrdcontext_hub_stop_streaming_command(void *ptr) { @@ -1994,12 +1720,12 @@ void rrdcontext_hub_stop_streaming_command(void *ptr) { if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { error("RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', but node '%s' does not have active context streaming. Ignoring command.", - cmd->claim_id, cmd->node_id, host->hostname); + cmd->claim_id, cmd->node_id, rrdhost_hostname(host)); return; } - internal_error(true, "RRDCONTEXT: host '%s' disabling streaming of contexts", host->hostname); + internal_error(true, "RRDCONTEXT: host '%s' disabling streaming of contexts", rrdhost_hostname(host)); rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); } @@ -2021,7 +1747,8 @@ struct rrdcontext_to_json { RRD_FLAGS combined_flags; }; -static inline int rrdmetric_to_json_callback(const char *id, void *value, void *data) { +static inline int rrdmetric_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); struct rrdcontext_to_json * t = data; RRDMETRIC *rm = value; BUFFER *wb = t->wb; @@ -2029,7 +1756,7 @@ static inline int rrdmetric_to_json_callback(const char *id, void *value, void * time_t after = t->after; time_t before = t->before; - if((rm->flags & RRD_FLAG_DELETED) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED)) + if(unlikely(rrd_flag_is_deleted(rm) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) return 0; if(after && (!rm->last_time_t || after > rm->last_time_t)) @@ -2047,13 +1774,13 @@ static inline int rrdmetric_to_json_callback(const char *id, void *value, void * buffer_strcat(wb, ",\n"); t->combined_first_time_t = MIN(t->combined_first_time_t, rm->first_time_t); t->combined_last_time_t = MAX(t->combined_last_time_t, rm->last_time_t); - t->combined_flags |= rm->flags; + t->combined_flags |= rrd_flags_get(rm); } else { buffer_strcat(wb, "\n"); t->combined_first_time_t = rm->first_time_t; t->combined_last_time_t = rm->last_time_t; - t->combined_flags = rm->flags; + t->combined_flags = rrd_flags_get(rm); } buffer_sprintf(wb, "\t\t\t\t\t\t\"%s\": {", id); @@ -2066,25 +1793,25 @@ static inline int rrdmetric_to_json_callback(const char *id, void *value, void * buffer_sprintf(wb, "\n\t\t\t\t\t\t\t\"name\":\"%s\"" - ",\n\t\t\t\t\t\t\t\"first_time_t\":%ld" - ",\n\t\t\t\t\t\t\t\"last_time_t\":%ld" + ",\n\t\t\t\t\t\t\t\"first_time_t\":%lld" + ",\n\t\t\t\t\t\t\t\"last_time_t\":%lld" ",\n\t\t\t\t\t\t\t\"collected\":%s" , string2str(rm->name) - , rm->first_time_t - , rrd_flag_is_collected(rm) ? t->now : rm->last_time_t - , rm->flags & RRD_FLAG_COLLECTED ? "true" : "false" + , (long long)rm->first_time_t + , rrd_flag_is_collected(rm) ? (long long)t->now : (long long)rm->last_time_t + , rrd_flag_is_collected(rm) ? "true" : "false" ); if(options & RRDCONTEXT_OPTION_SHOW_DELETED) { buffer_sprintf(wb, ",\n\t\t\t\t\t\t\t\"deleted\":%s" - , rm->flags & RRD_FLAG_DELETED ? "true" : "false" + , rrd_flag_is_deleted(rm) ? "true" : "false" ); } if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { buffer_strcat(wb, ",\n\t\t\t\t\t\t\t\"flags\":\""); - rrd_flags_to_buffer(rm->flags, wb); + rrd_flags_to_buffer(rrd_flags_get(rm), wb); buffer_strcat(wb, "\""); } @@ -2093,7 +1820,9 @@ static inline int rrdmetric_to_json_callback(const char *id, void *value, void * return 1; } -static inline int rrdinstance_to_json_callback(const char *id, void *value, void *data) { +static inline int rrdinstance_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + struct rrdcontext_to_json *t_parent = data; RRDINSTANCE *ri = value; BUFFER *wb = t_parent->wb; @@ -2102,7 +1831,7 @@ static inline int rrdinstance_to_json_callback(const char *id, void *value, void time_t before = t_parent->before; bool has_filter = t_parent->chart_label_key || t_parent->chart_labels_filter || t_parent->chart_dimensions; - if((ri->flags & RRD_FLAG_DELETED) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED)) + if(unlikely(rrd_flag_is_deleted(ri) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) return 0; if(after && (!ri->last_time_t || after > ri->last_time_t)) @@ -2119,7 +1848,7 @@ static inline int rrdinstance_to_json_callback(const char *id, void *value, void time_t first_time_t = ri->first_time_t; time_t last_time_t = ri->last_time_t; - RRD_FLAGS flags = ri->flags; + RRD_FLAGS flags = rrd_flags_get(ri); BUFFER *wb_metrics = NULL; if(options & RRDCONTEXT_OPTION_SHOW_METRICS || t_parent->chart_dimensions) { @@ -2179,8 +1908,8 @@ static inline int rrdinstance_to_json_callback(const char *id, void *value, void ",\n\t\t\t\t\t\"chart_type\":\"%s\"" ",\n\t\t\t\t\t\"priority\":%u" ",\n\t\t\t\t\t\"update_every\":%d" - ",\n\t\t\t\t\t\"first_time_t\":%ld" - ",\n\t\t\t\t\t\"last_time_t\":%ld" + ",\n\t\t\t\t\t\"first_time_t\":%lld" + ",\n\t\t\t\t\t\"last_time_t\":%lld" ",\n\t\t\t\t\t\"collected\":%s" , string2str(ri->name) , string2str(ri->rc->id) @@ -2190,25 +1919,25 @@ static inline int rrdinstance_to_json_callback(const char *id, void *value, void , rrdset_type_name(ri->chart_type) , ri->priority , ri->update_every - , first_time_t - , (flags & RRD_FLAG_COLLECTED) ? t_parent->now : last_time_t + , (long long)first_time_t + , (flags & RRD_FLAG_COLLECTED) ? (long long)t_parent->now : (long long)last_time_t , (flags & RRD_FLAG_COLLECTED) ? "true" : "false" ); if(options & RRDCONTEXT_OPTION_SHOW_DELETED) { buffer_sprintf(wb, ",\n\t\t\t\t\t\"deleted\":%s" - , (ri->flags & RRD_FLAG_DELETED) ? "true" : "false" + , rrd_flag_is_deleted(ri) ? "true" : "false" ); } if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { buffer_strcat(wb, ",\n\t\t\t\t\t\"flags\":\""); - rrd_flags_to_buffer(ri->flags, wb); + rrd_flags_to_buffer(rrd_flags_get(ri), wb); buffer_strcat(wb, "\""); } - if(options & RRDCONTEXT_OPTION_SHOW_LABELS && ri->rrdlabels && dictionary_stats_entries(ri->rrdlabels)) { + if(options & RRDCONTEXT_OPTION_SHOW_LABELS && ri->rrdlabels && dictionary_entries(ri->rrdlabels)) { buffer_sprintf(wb, ",\n\t\t\t\t\t\"labels\": {\n"); rrdlabels_to_buffer(ri->rrdlabels, wb, "\t\t\t\t\t\t", ":", "\"", ",\n", NULL, NULL, NULL, NULL); buffer_strcat(wb, "\n\t\t\t\t\t}"); @@ -2227,7 +1956,8 @@ static inline int rrdinstance_to_json_callback(const char *id, void *value, void return 1; } -static inline int rrdcontext_to_json_callback(const char *id, void *value, void *data) { +static inline int rrdcontext_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); struct rrdcontext_to_json *t_parent = data; RRDCONTEXT *rc = value; BUFFER *wb = t_parent->wb; @@ -2236,14 +1966,14 @@ static inline int rrdcontext_to_json_callback(const char *id, void *value, void time_t before = t_parent->before; bool has_filter = t_parent->chart_label_key || t_parent->chart_labels_filter || t_parent->chart_dimensions; - if(unlikely((rc->flags & RRD_FLAG_HIDDEN) && !(options & RRDCONTEXT_OPTION_SHOW_HIDDEN))) + if(unlikely(rrd_flag_check(rc, RRD_FLAG_HIDDEN) && !(options & RRDCONTEXT_OPTION_SHOW_HIDDEN))) return 0; - if((rc->flags & RRD_FLAG_DELETED) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED)) + if(unlikely(rrd_flag_is_deleted(rc) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) return 0; if(options & RRDCONTEXT_OPTION_DEEPSCAN) - rrdcontext_recalculate_context_retention(rc, RRD_FLAG_NONE, -1); + rrdcontext_recalculate_context_retention(rc, RRD_FLAG_NONE, false); if(after && (!rc->last_time_t || after > rc->last_time_t)) return 0; @@ -2253,7 +1983,7 @@ static inline int rrdcontext_to_json_callback(const char *id, void *value, void time_t first_time_t = rc->first_time_t; time_t last_time_t = rc->last_time_t; - RRD_FLAGS flags = rc->flags; + RRD_FLAGS flags = rrd_flags_get(rc); BUFFER *wb_instances = NULL; if((options & (RRDCONTEXT_OPTION_SHOW_LABELS|RRDCONTEXT_OPTION_SHOW_INSTANCES|RRDCONTEXT_OPTION_SHOW_METRICS)) @@ -2304,29 +2034,29 @@ static inline int rrdcontext_to_json_callback(const char *id, void *value, void ",\n\t\t\t\"family\":\"%s\"" ",\n\t\t\t\"chart_type\":\"%s\"" ",\n\t\t\t\"priority\":%u" - ",\n\t\t\t\"first_time_t\":%ld" - ",\n\t\t\t\"last_time_t\":%ld" + ",\n\t\t\t\"first_time_t\":%lld" + ",\n\t\t\t\"last_time_t\":%lld" ",\n\t\t\t\"collected\":%s" , string2str(rc->title) , string2str(rc->units) , string2str(rc->family) , rrdset_type_name(rc->chart_type) , rc->priority - , first_time_t - , (flags & RRD_FLAG_COLLECTED) ? t_parent->now : last_time_t + , (long long)first_time_t + , (flags & RRD_FLAG_COLLECTED) ? (long long)t_parent->now : (long long)last_time_t , (flags & RRD_FLAG_COLLECTED) ? "true" : "false" ); if(options & RRDCONTEXT_OPTION_SHOW_DELETED) { buffer_sprintf(wb, ",\n\t\t\t\"deleted\":%s" - , (rc->flags & RRD_FLAG_DELETED) ? "true" : "false" + , rrd_flag_is_deleted(rc) ? "true" : "false" ); } if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { buffer_strcat(wb, ",\n\t\t\t\"flags\":\""); - rrd_flags_to_buffer(rc->flags, wb); + rrd_flags_to_buffer(rrd_flags_get(rc), wb); buffer_strcat(wb, "\""); } @@ -2339,14 +2069,29 @@ static inline int rrdcontext_to_json_callback(const char *id, void *value, void ",\n\t\t\t\"last_queued\":%llu" ",\n\t\t\t\"scheduled_dispatch\":%llu" ",\n\t\t\t\"last_dequeued\":%llu" + ",\n\t\t\t\"dispatches\":%zu" ",\n\t\t\t\"hub_version\":%"PRIu64"" ",\n\t\t\t\"version\":%"PRIu64"" , rc->queue.queued_ut / USEC_PER_SEC , rc->queue.scheduled_dispatch_ut / USEC_PER_SEC , rc->queue.dequeued_ut / USEC_PER_SEC + , rc->queue.dispatches , rc->hub.version , rc->version ); + + buffer_strcat(wb, ",\n\t\t\t\"pp_reasons\":\""); + rrd_reasons_to_buffer(rc->pp.queued_flags, wb); + buffer_strcat(wb, "\""); + + buffer_sprintf(wb, + ",\n\t\t\t\"pp_last_queued\":%llu" + ",\n\t\t\t\"pp_last_dequeued\":%llu" + ",\n\t\t\t\"pp_executed\":%zu" + , rc->pp.queued_ut / USEC_PER_SEC + , rc->pp.dequeued_ut / USEC_PER_SEC + , rc->pp.executions + ); } rrdcontext_unlock(rc); @@ -2365,18 +2110,18 @@ static inline int rrdcontext_to_json_callback(const char *id, void *value, void } int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions) { + if(!host->rrdctx) { + error("%s(): request for host '%s' that does not have rrdcontexts initialized.", __FUNCTION__, rrdhost_hostname(host)); + return HTTP_RESP_NOT_FOUND; + } + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)host->rrdctx, context); if(!rca) return HTTP_RESP_NOT_FOUND; RRDCONTEXT *rc = rrdcontext_acquired_value(rca); - if(after != 0 && before != 0) { - long long after_wanted = after; - long long before_wanted = before; - rrdr_relative_window_to_absolute(&after_wanted, &before_wanted); - after = after_wanted; - before = before_wanted; - } + if(after != 0 && before != 0) + rrdr_relative_window_to_absolute(&after, &before); struct rrdcontext_to_json t_contexts = { .wb = wb, @@ -2389,7 +2134,7 @@ int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, R .written = 0, .now = now_realtime_sec(), }; - rrdcontext_to_json_callback(context, rc, &t_contexts); + rrdcontext_to_json_callback((DICTIONARY_ITEM *)rca, rc, &t_contexts); rrdcontext_release(rca); @@ -2400,25 +2145,25 @@ int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, R } int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions) { + if(!host->rrdctx) { + error("%s(): request for host '%s' that does not have rrdcontexts initialized.", __FUNCTION__, rrdhost_hostname(host)); + return HTTP_RESP_NOT_FOUND; + } + char node_uuid[UUID_STR_LEN] = ""; if(host->node_id) uuid_unparse(*host->node_id, node_uuid); - if(after != 0 && before != 0) { - long long after_wanted = after; - long long before_wanted = before; - rrdr_relative_window_to_absolute(&after_wanted, &before_wanted); - after = after_wanted; - before = before_wanted; - } + if(after != 0 && before != 0) + rrdr_relative_window_to_absolute(&after, &before); buffer_sprintf(wb, "{\n" "\t\"hostname\": \"%s\"" ",\n\t\"machine_guid\": \"%s\"" ",\n\t\"node_id\": \"%s\"" ",\n\t\"claim_id\": \"%s\"" - , host->hostname + , rrdhost_hostname(host) , host->machine_guid , node_uuid , host->aclk_state.claimed_id ? host->aclk_state.claimed_id : "" @@ -2426,7 +2171,7 @@ int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, if(options & RRDCONTEXT_OPTION_SHOW_LABELS) { buffer_sprintf(wb, ",\n\t\"host_labels\": {\n"); - rrdlabels_to_buffer(host->host_labels, wb, "\t\t", ":", "\"", ",\n", NULL, NULL, NULL, NULL); + rrdlabels_to_buffer(host->rrdlabels, wb, "\t\t", ":", "\"", ",\n", NULL, NULL, NULL, NULL); buffer_strcat(wb, "\n\t}"); } @@ -2451,6 +2196,684 @@ int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, } // ---------------------------------------------------------------------------- +// weights API + +static void metric_entry_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct metric_entry *t = value; + t->rca = rrdcontext_acquired_dup(t->rca); + t->ria = rrdinstance_acquired_dup(t->ria); + t->rma = rrdmetric_acquired_dup(t->rma); +} +static void metric_entry_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct metric_entry *t = value; + rrdcontext_release(t->rca); + rrdinstance_release(t->ria); + rrdmetric_release(t->rma); +} +static bool metric_entry_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value __maybe_unused, void *new_value __maybe_unused, void *data __maybe_unused) { + fatal("RRDCONTEXT: %s() detected a conflict on a metric pointer!", __FUNCTION__); + return false; +} + +DICTIONARY *rrdcontext_all_metrics_to_dict(RRDHOST *host, SIMPLE_PATTERN *contexts) { + if(!host || !host->rrdctx) + return NULL; + + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(dict, metric_entry_insert_callback, NULL); + dictionary_register_delete_callback(dict, metric_entry_delete_callback, NULL); + dictionary_register_conflict_callback(dict, metric_entry_conflict_callback, NULL); + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx, rc) { + if(rrd_flag_is_deleted(rc)) + continue; + + if(contexts && !simple_pattern_matches(contexts, string2str(rc->id))) + continue; + + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(rrd_flag_is_deleted(ri)) + continue; + + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + if(rrd_flag_is_deleted(rm)) + continue; + + struct metric_entry tmp = { + .rca = (RRDCONTEXT_ACQUIRED *)rc_dfe.item, + .ria = (RRDINSTANCE_ACQUIRED *)ri_dfe.item, + .rma = (RRDMETRIC_ACQUIRED *)rm_dfe.item, + }; + + char buffer[20 + 1]; + ssize_t len = snprintfz(buffer, 20, "%p", rm); + dictionary_set_advanced(dict, buffer, len + 1, &tmp, sizeof(struct metric_entry), NULL); + } + dfe_done(rm); + } + dfe_done(ri); + } + dfe_done(rc); + + return dict; +} + +// ---------------------------------------------------------------------------- +// query API + +typedef struct query_target_locals { + time_t start_s; + + QUERY_TARGET *qt; + + RRDSET *st; + + const char *hosts; + const char *contexts; + const char *charts; + const char *dimensions; + const char *chart_label_key; + const char *charts_labels_filter; + + long long after; + long long before; + bool match_ids; + bool match_names; + + RRDHOST *host; + RRDCONTEXT_ACQUIRED *rca; + RRDINSTANCE_ACQUIRED *ria; + + size_t metrics_skipped_due_to_not_matching_timeframe; +} QUERY_TARGET_LOCALS; + +static __thread QUERY_TARGET thread_query_target = {}; +void query_target_release(QUERY_TARGET *qt) { + if(unlikely(!qt)) return; + if(unlikely(!qt->used)) return; + + simple_pattern_free(qt->hosts.pattern); + qt->hosts.pattern = NULL; + + simple_pattern_free(qt->contexts.pattern); + qt->contexts.pattern = NULL; + + simple_pattern_free(qt->instances.pattern); + qt->instances.pattern = NULL; + + simple_pattern_free(qt->instances.chart_label_key_pattern); + qt->instances.chart_label_key_pattern = NULL; + + simple_pattern_free(qt->instances.charts_labels_filter_pattern); + qt->instances.charts_labels_filter_pattern = NULL; + + simple_pattern_free(qt->query.pattern); + qt->query.pattern = NULL; + + // release the query + for(size_t i = 0, used = qt->query.used; i < used ;i++) { + string_freez(qt->query.array[i].dimension.id); + qt->query.array[i].dimension.id = NULL; + + string_freez(qt->query.array[i].dimension.name); + qt->query.array[i].dimension.name = NULL; + + string_freez(qt->query.array[i].chart.id); + qt->query.array[i].chart.id = NULL; + + string_freez(qt->query.array[i].chart.name); + qt->query.array[i].chart.name = NULL; + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(qt->query.array[i].tiers[tier].db_metric_handle) { + STORAGE_ENGINE *eng = qt->query.array[i].tiers[tier].eng; + eng->api.metric_release(qt->query.array[i].tiers[tier].db_metric_handle); + qt->query.array[i].tiers[tier].db_metric_handle = NULL; + } + } + } + + // release the metrics + for(size_t i = 0, used = qt->metrics.used; i < used ;i++) { + rrdmetric_release(qt->metrics.array[i]); + qt->metrics.array[i] = NULL; + } + + // release the instances + for(size_t i = 0, used = qt->instances.used; i < used ;i++) { + rrdinstance_release(qt->instances.array[i]); + qt->instances.array[i] = NULL; + } + + // release the contexts + for(size_t i = 0, used = qt->contexts.used; i < used ;i++) { + rrdcontext_release(qt->contexts.array[i]); + qt->contexts.array[i] = NULL; + } + + // release the hosts + for(size_t i = 0, used = qt->hosts.used; i < used ;i++) { + qt->hosts.array[i] = NULL; + } + + qt->query.used = 0; + qt->metrics.used = 0; + qt->instances.used = 0; + qt->contexts.used = 0; + qt->hosts.used = 0; + + qt->db.minimum_latest_update_every = 0; + qt->db.first_time_t = 0; + qt->db.last_time_t = 0; + + qt->id[0] = '\0'; + + qt->used = false; +} +void query_target_free(void) { + if(thread_query_target.used) + query_target_release(&thread_query_target); + + freez(thread_query_target.query.array); + thread_query_target.query.array = NULL; + thread_query_target.query.size = 0; + + freez(thread_query_target.metrics.array); + thread_query_target.metrics.array = NULL; + thread_query_target.metrics.size = 0; + + freez(thread_query_target.instances.array); + thread_query_target.instances.array = NULL; + thread_query_target.instances.size = 0; + + freez(thread_query_target.contexts.array); + thread_query_target.contexts.array = NULL; + thread_query_target.contexts.size = 0; + + freez(thread_query_target.hosts.array); + thread_query_target.hosts.array = NULL; + thread_query_target.hosts.size = 0; +} + +static void query_target_add_metric(QUERY_TARGET_LOCALS *qtl, RRDMETRIC_ACQUIRED *rma, RRDINSTANCE *ri, + bool queryable_instance) { + QUERY_TARGET *qt = qtl->qt; + + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + if(rrd_flag_is_deleted(rm)) + return; + + if(qt->metrics.used == qt->metrics.size) { + qt->metrics.size = (qt->metrics.size) ? qt->metrics.size * 2 : 1; + qt->metrics.array = reallocz(qt->metrics.array, qt->metrics.size * sizeof(RRDMETRIC_ACQUIRED *)); + } + qt->metrics.array[qt->metrics.used++] = rrdmetric_acquired_dup(rma); + + if(!queryable_instance) + return; + + time_t common_first_time_t = 0; + time_t common_last_time_t = 0; + time_t common_update_every = 0; + size_t tiers_added = 0; + struct { + STORAGE_ENGINE *eng; + STORAGE_METRIC_HANDLE *db_metric_handle; + time_t db_first_time_t; + time_t db_last_time_t; + time_t db_update_every; + } tier_retention[storage_tiers]; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + STORAGE_ENGINE *eng = qtl->host->db[tier].eng; + tier_retention[tier].eng = eng; + tier_retention[tier].db_update_every = (time_t) (qtl->host->db[tier].tier_grouping * ri->update_every); + + if(rm->rrddim && rm->rrddim->tiers[tier] && rm->rrddim->tiers[tier]->db_metric_handle) + tier_retention[tier].db_metric_handle = eng->api.metric_dup(rm->rrddim->tiers[tier]->db_metric_handle); + else + tier_retention[tier].db_metric_handle = eng->api.metric_get(qtl->host->db[tier].instance, &rm->uuid, NULL); + + if(tier_retention[tier].db_metric_handle) { + tier_retention[tier].db_first_time_t = tier_retention[tier].eng->api.query_ops.oldest_time(tier_retention[tier].db_metric_handle); + tier_retention[tier].db_last_time_t = tier_retention[tier].eng->api.query_ops.latest_time(tier_retention[tier].db_metric_handle); + + if(!common_first_time_t) + common_first_time_t = tier_retention[tier].db_first_time_t; + else if(tier_retention[tier].db_first_time_t) + common_first_time_t = MIN(common_first_time_t, tier_retention[tier].db_first_time_t); + + if(!common_last_time_t) + common_last_time_t = tier_retention[tier].db_last_time_t; + else + common_last_time_t = MAX(common_last_time_t, tier_retention[tier].db_last_time_t); + + if(!common_update_every) + common_update_every = tier_retention[tier].db_update_every; + else if(tier_retention[tier].db_update_every) + common_update_every = MIN(common_update_every, tier_retention[tier].db_update_every); + + tiers_added++; + } + else { + tier_retention[tier].db_first_time_t = 0; + tier_retention[tier].db_last_time_t = 0; + tier_retention[tier].db_update_every = 0; + } + } + + bool release_retention = true; + bool timeframe_matches = + (tiers_added + && (common_first_time_t - common_update_every * 2) <= qt->window.before + && (common_last_time_t + common_update_every * 2) >= qt->window.after + ) ? true : false; + + if(timeframe_matches) { + RRDR_DIMENSION_FLAGS options = RRDR_DIMENSION_DEFAULT; + + if (rrd_flag_check(rm, RRD_FLAG_HIDDEN) + || (rm->rrddim && rrddim_option_check(rm->rrddim, RRDDIM_OPTION_HIDDEN))) { + options |= RRDR_DIMENSION_HIDDEN; + options &= ~RRDR_DIMENSION_SELECTED; + } + + if (qt->query.pattern) { + // we have a dimensions pattern + // lets see if this dimension is selected + + if ((qtl->match_ids && simple_pattern_matches(qt->query.pattern, string2str(rm->id))) + || (qtl->match_names && simple_pattern_matches(qt->query.pattern, string2str(rm->name))) + ) { + // it matches the pattern + options |= (RRDR_DIMENSION_SELECTED | RRDR_DIMENSION_NONZERO); + options &= ~RRDR_DIMENSION_HIDDEN; + } + else { + // it does not match the pattern + options |= RRDR_DIMENSION_HIDDEN; + options &= ~RRDR_DIMENSION_SELECTED; + } + } + else { + // we don't have a dimensions pattern + // so this is a selected dimension + // if it is not hidden + if(!(options & RRDR_DIMENSION_HIDDEN)) + options |= RRDR_DIMENSION_SELECTED; + } + + if((options & RRDR_DIMENSION_HIDDEN) && (options & RRDR_DIMENSION_SELECTED)) + options &= ~RRDR_DIMENSION_HIDDEN; + + if(!(options & RRDR_DIMENSION_HIDDEN) || (qt->request.options & RRDR_OPTION_PERCENTAGE)) { + // we have a non-hidden dimension + // let's add it to the query metrics + + if(ri->rrdset) + ri->rrdset->last_accessed_time = qtl->start_s; + + if (qt->query.used == qt->query.size) { + qt->query.size = (qt->query.size) ? qt->query.size * 2 : 1; + qt->query.array = reallocz(qt->query.array, qt->query.size * sizeof(QUERY_METRIC)); + } + QUERY_METRIC *qm = &qt->query.array[qt->query.used++]; + + qm->dimension.options = options; + + qm->link.host = qtl->host; + qm->link.rca = qtl->rca; + qm->link.ria = qtl->ria; + qm->link.rma = rma; + + qm->chart.id = string_dup(ri->id); + qm->chart.name = string_dup(ri->name); + + qm->dimension.id = string_dup(rm->id); + qm->dimension.name = string_dup(rm->name); + + if (!qt->db.first_time_t || common_first_time_t < qt->db.first_time_t) + qt->db.first_time_t = common_first_time_t; + + if (!qt->db.last_time_t || common_last_time_t > qt->db.last_time_t) + qt->db.last_time_t = common_last_time_t; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + qm->tiers[tier].eng = tier_retention[tier].eng; + qm->tiers[tier].db_metric_handle = tier_retention[tier].db_metric_handle; + qm->tiers[tier].db_first_time_t = tier_retention[tier].db_first_time_t; + qm->tiers[tier].db_last_time_t = tier_retention[tier].db_last_time_t; + qm->tiers[tier].db_update_every = tier_retention[tier].db_update_every; + } + release_retention = false; + } + } + else + qtl->metrics_skipped_due_to_not_matching_timeframe++; + + if(release_retention) { + // cleanup anything we allocated to the retention we will not use + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (tier_retention[tier].db_metric_handle) + tier_retention[tier].eng->api.metric_release(tier_retention[tier].db_metric_handle); + } + } +} + +static void query_target_add_instance(QUERY_TARGET_LOCALS *qtl, RRDINSTANCE_ACQUIRED *ria, bool queryable_instance) { + QUERY_TARGET *qt = qtl->qt; + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + if(rrd_flag_is_deleted(ri)) + return; + + if(qt->instances.used == qt->instances.size) { + qt->instances.size = (qt->instances.size) ? qt->instances.size * 2 : 1; + qt->instances.array = reallocz(qt->instances.array, qt->instances.size * sizeof(RRDINSTANCE_ACQUIRED *)); + } + + qtl->ria = qt->instances.array[qt->instances.used++] = rrdinstance_acquired_dup(ria); + + if(qt->db.minimum_latest_update_every == 0 || ri->update_every < qt->db.minimum_latest_update_every) + qt->db.minimum_latest_update_every = ri->update_every; + + if(queryable_instance) { + if ((qt->instances.chart_label_key_pattern && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, qt->instances.chart_label_key_pattern, ':')) || + (qt->instances.charts_labels_filter_pattern && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, qt->instances.charts_labels_filter_pattern, ':'))) + queryable_instance = false; + } + + size_t added = 0; + + if(unlikely(qt->request.rma)) { + query_target_add_metric(qtl, qt->request.rma, ri, queryable_instance); + added++; + } + else { + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + query_target_add_metric(qtl, (RRDMETRIC_ACQUIRED *) rm_dfe.item, ri, queryable_instance); + added++; + } + dfe_done(rm); + } + + if(!added) { + qt->instances.used--; + rrdinstance_release(ria); + } +} + +static void query_target_add_context(QUERY_TARGET_LOCALS *qtl, RRDCONTEXT_ACQUIRED *rca) { + QUERY_TARGET *qt = qtl->qt; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(rrd_flag_is_deleted(rc)) + return; + + if(qt->contexts.used == qt->contexts.size) { + qt->contexts.size = (qt->contexts.size) ? qt->contexts.size * 2 : 1; + qt->contexts.array = reallocz(qt->contexts.array, qt->contexts.size * sizeof(RRDCONTEXT_ACQUIRED *)); + } + qtl->rca = qt->contexts.array[qt->contexts.used++] = rrdcontext_acquired_dup(rca); + + size_t added = 0; + if(unlikely(qt->request.ria)) { + query_target_add_instance(qtl, qt->request.ria, true); + added++; + } + else if(unlikely(qtl->st && qtl->st->rrdcontext == rca && qtl->st->rrdinstance)) { + query_target_add_instance(qtl, qtl->st->rrdinstance, true); + added++; + } + else { + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + bool queryable_instance = false; + if(!qt->instances.pattern + || (qtl->match_ids && simple_pattern_matches(qt->instances.pattern, string2str(ri->id))) + || (qtl->match_names && simple_pattern_matches(qt->instances.pattern, string2str(ri->name))) + ) + queryable_instance = true; + + query_target_add_instance(qtl, (RRDINSTANCE_ACQUIRED *)ri_dfe.item, queryable_instance); + added++; + } + dfe_done(ri); + } + + if(!added) { + qt->contexts.used--; + rrdcontext_release(rca); + } +} + +static void query_target_add_host(QUERY_TARGET_LOCALS *qtl, RRDHOST *host) { + QUERY_TARGET *qt = qtl->qt; + + if(qt->hosts.used == qt->hosts.size) { + qt->hosts.size = (qt->hosts.size) ? qt->hosts.size * 2 : 1; + qt->hosts.array = reallocz(qt->hosts.array, qt->hosts.size * sizeof(RRDHOST *)); + } + qtl->host = qt->hosts.array[qt->hosts.used++] = host; + + // is the chart given valid? + if(unlikely(qtl->st && (!qtl->st->rrdinstance || !qtl->st->rrdcontext))) { + error("QUERY TARGET: RRDSET '%s' given, because it is not linked to rrdcontext structures. Switching to context query.", rrdset_name(qtl->st)); + + if(!is_valid_sp(qtl->charts)) + qtl->charts = rrdset_name(qtl->st); + + qtl->st = NULL; + } + + size_t added = 0; + if(unlikely(qt->request.rca)) { + query_target_add_context(qtl, qt->request.rca); + added++; + } + else if(unlikely(qtl->st)) { + // single chart data queries + query_target_add_context(qtl, qtl->st->rrdcontext); + added++; + } + else { + // context pattern queries + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item((DICTIONARY *)qtl->host->rrdctx, qtl->contexts); + if(likely(rca)) { + // we found it! + query_target_add_context(qtl, rca); + rrdcontext_release(rca); + added++; + } + else { + // Probably it is a pattern, we need to search for it... + RRDCONTEXT *rc; + dfe_start_read((DICTIONARY *)qtl->host->rrdctx, rc) { + if(qt->contexts.pattern && !simple_pattern_matches(qt->contexts.pattern, string2str(rc->id))) + continue; + + query_target_add_context(qtl, (RRDCONTEXT_ACQUIRED *)rc_dfe.item); + added++; + } + dfe_done(rc); + } + } + + if(!added) { + qt->hosts.used--; + } +} + +void query_target_generate_name(QUERY_TARGET *qt) { + char options_buffer[100 + 1]; + web_client_api_request_v1_data_options_to_string(options_buffer, 100, qt->request.options); + + char resampling_buffer[20 + 1] = ""; + if(qt->request.resampling_time > 1) + snprintfz(resampling_buffer, 20, "/resampling:%lld", (long long)qt->request.resampling_time); + + char tier_buffer[20 + 1] = ""; + if(qt->request.options & RRDR_OPTION_SELECTED_TIER) + snprintfz(tier_buffer, 20, "/tier:%zu", qt->request.tier); + + if(qt->request.st) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "chart://host:%s/instance:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , rrdhost_hostname(qt->request.st->rrdhost) + , rrdset_name(qt->request.st) + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , web_client_api_request_v1_data_group_to_string(qt->request.group_method) + , qt->request.group_options?qt->request.group_options:"" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else if(qt->request.host && qt->request.rca && qt->request.ria && qt->request.rma) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "metric://host:%s/context:%s/instance:%s/dimension:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , rrdhost_hostname(qt->request.host) + , rrdcontext_acquired_id(qt->request.rca) + , rrdinstance_acquired_id(qt->request.ria) + , rrdmetric_acquired_id(qt->request.rma) + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , web_client_api_request_v1_data_group_to_string(qt->request.group_method) + , qt->request.group_options?qt->request.group_options:"" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "context://host:%s/contexts:%s/instances:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , (qt->request.host) ? rrdhost_hostname(qt->request.host) : ((qt->request.hosts) ? qt->request.hosts : "*") + , (qt->request.contexts) ? qt->request.contexts : "*" + , (qt->request.charts) ? qt->request.charts : "*" + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , web_client_api_request_v1_data_group_to_string(qt->request.group_method) + , qt->request.group_options?qt->request.group_options:"" + , options_buffer + , resampling_buffer + , tier_buffer + ); + + json_fix_string(qt->id); +} + +QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) { + QUERY_TARGET *qt = &thread_query_target; + + if(qt->used) + fatal("QUERY TARGET: this query target is already used (%zu queries made with this QUERY_TARGET so far).", qt->queries); + + qt->used = true; + qt->queries++; + + // copy the request into query_thread_target + qt->request = *qtr; + + query_target_generate_name(qt); + qt->window.after = qt->request.after; + qt->window.before = qt->request.before; + rrdr_relative_window_to_absolute(&qt->window.after, &qt->window.before); + + // prepare our local variables - we need these across all these functions + QUERY_TARGET_LOCALS qtl = { + .qt = qt, + .start_s = now_realtime_sec(), + .host = qt->request.host, + .st = qt->request.st, + .hosts = qt->request.hosts, + .contexts = qt->request.contexts, + .charts = qt->request.charts, + .dimensions = qt->request.dimensions, + .chart_label_key = qt->request.chart_label_key, + .charts_labels_filter = qt->request.charts_labels_filter, + }; + + qt->db.minimum_latest_update_every = 0; // it will be updated by query_target_add_query() + + // prepare all the patterns + qt->hosts.pattern = is_valid_sp(qtl.hosts) ? simple_pattern_create(qtl.hosts, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->contexts.pattern = is_valid_sp(qtl.contexts) ? simple_pattern_create(qtl.contexts, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->instances.pattern = is_valid_sp(qtl.charts) ? simple_pattern_create(qtl.charts, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->query.pattern = is_valid_sp(qtl.dimensions) ? simple_pattern_create(qtl.dimensions, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->instances.chart_label_key_pattern = is_valid_sp(qtl.chart_label_key) ? simple_pattern_create(qtl.chart_label_key, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + qt->instances.charts_labels_filter_pattern = is_valid_sp(qtl.charts_labels_filter) ? simple_pattern_create(qtl.charts_labels_filter, ",|\t\r\n\f\v", SIMPLE_PATTERN_EXACT) : NULL; + + qtl.match_ids = qt->request.options & RRDR_OPTION_MATCH_IDS; + qtl.match_names = qt->request.options & RRDR_OPTION_MATCH_NAMES; + if(likely(!qtl.match_ids && !qtl.match_names)) + qtl.match_ids = qtl.match_names = true; + + // verify that the chart belongs to the host we are interested + if(qtl.st) { + if (!qtl.host) { + // It is NULL, set it ourselves. + qtl.host = qtl.st->rrdhost; + } + else if (unlikely(qtl.host != qtl.st->rrdhost)) { + // Oops! A different host! + error("QUERY TARGET: RRDSET '%s' given does not belong to host '%s'. Switching query host to '%s'", + rrdset_name(qtl.st), rrdhost_hostname(qtl.host), rrdhost_hostname(qtl.st->rrdhost)); + qtl.host = qtl.st->rrdhost; + } + } + + if(qtl.host) { + // single host query + query_target_add_host(&qtl, qtl.host); + qtl.hosts = rrdhost_hostname(qtl.host); + } + else { + // multi host query + rrd_rdlock(); + rrdhost_foreach_read(qtl.host) { + if(!qt->hosts.pattern || simple_pattern_matches(qt->hosts.pattern, rrdhost_hostname(qtl.host))) + query_target_add_host(&qtl, qtl.host); + } + rrd_unlock(); + } + + // make sure everything is good + if(!qt->query.used || !qt->metrics.used || !qt->instances.used || !qt->contexts.used || !qt->hosts.used) { + internal_error( + true + , "QUERY TARGET: query '%s' does not have all the data required. " + "Matched %u hosts, %u contexts, %u instances, %u dimensions, %u metrics to query, " + "%zu metrics skipped because they don't have data in the desired time-frame. " + "Aborting it." + , qt->id + , qt->hosts.used + , qt->contexts.used + , qt->instances.used + , qt->metrics.used + , qt->query.used + , qtl.metrics_skipped_due_to_not_matching_timeframe + ); + + query_target_release(qt); + return NULL; + } + + if(!query_target_calculate_window(qt)) { + query_target_release(qt); + return NULL; + } + + return qt; +} + + +// ---------------------------------------------------------------------------- // load from SQL static void rrdinstance_load_clabel(SQL_CLABEL_DATA *sld, void *data) { @@ -2464,8 +2887,10 @@ static void rrdinstance_load_dimension(SQL_DIMENSION_DATA *sd, void *data) { RRDMETRIC trm = { .id = string_strdupz(sd->id), .name = string_strdupz(sd->name), - .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomic }; + if(sd->hidden) trm.flags |= RRD_FLAG_HIDDEN; + uuid_copy(trm.uuid, sd->dim_id); dictionary_set(ri->rrdmetrics, string2str(trm.id), &trm, sizeof(trm)); @@ -2481,7 +2906,7 @@ static void rrdinstance_load_chart_callback(SQL_CHART_DATA *sc, void *data) { .family = string_strdupz(sc->family), .priority = sc->priority, .chart_type = sc->chart_type, - .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_DONT_PROCESS | RRD_FLAG_UPDATE_REASON_LOAD_SQL, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics .rrdhost = host, }; @@ -2497,7 +2922,7 @@ static void rrdinstance_load_chart_callback(SQL_CHART_DATA *sc, void *data) { .chart_type = sc->chart_type, .priority = sc->priority, .update_every = sc->update_every, - .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_DONT_PROCESS | RRD_FLAG_UPDATE_REASON_LOAD_SQL, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics }; uuid_copy(tri.uuid, sc->chart_id); @@ -2506,13 +2931,7 @@ static void rrdinstance_load_chart_callback(SQL_CHART_DATA *sc, void *data) { ctx_get_dimension_list(&ri->uuid, rrdinstance_load_dimension, ri); ctx_get_label_list(&ri->uuid, rrdinstance_load_clabel, ri); - ri->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdinstance_trigger_updates(ri, true, true); - - // let the instance be in "don't process" mode - // so that we process it once, when it is collected - ri->flags |= RRD_FLAG_DONT_PROCESS; - + rrdinstance_trigger_updates(ri, __FUNCTION__ ); rrdinstance_release(ria); rrdcontext_release(rca); } @@ -2523,7 +2942,7 @@ static void rrdcontext_load_context_callback(VERSIONED_CONTEXT_DATA *ctx_data, v RRDCONTEXT trc = { .id = string_strdupz(ctx_data->id), - .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_DONT_PROCESS | RRD_FLAG_UPDATE_REASON_LOAD_SQL, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics // no need to set more data here // we only need the hub data @@ -2534,9 +2953,6 @@ static void rrdcontext_load_context_callback(VERSIONED_CONTEXT_DATA *ctx_data, v } void rrdhost_load_rrdcontext_data(RRDHOST *host) { - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - return; - if(host->rrdctx) return; rrdhost_create_rrdcontexts(host); @@ -2545,56 +2961,15 @@ void rrdhost_load_rrdcontext_data(RRDHOST *host) { RRDCONTEXT *rc; dfe_start_read((DICTIONARY *)host->rrdctx, rc) { - rc->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdcontext_trigger_updates(rc, true); + rrdcontext_trigger_updates(rc, __FUNCTION__ ); } dfe_done(rc); -} - -// ---------------------------------------------------------------------------- -// the worker thread - -static inline usec_t rrdcontext_calculate_queued_dispatch_time_ut(RRDCONTEXT *rc, usec_t now_ut) { - - if(likely(rc->queue.delay_calc_ut >= rc->queue.queued_ut)) - return rc->queue.scheduled_dispatch_ut; - - RRD_FLAGS flags = rc->queue.queued_flags; - - usec_t delay = LONG_MAX; - int i; - struct rrdcontext_reason *reason; - for(i = 0, reason = &rrdcontext_reasons[i]; reason->name ; reason = &rrdcontext_reasons[++i]) { - if(unlikely(flags & reason->flag)) { - if(reason->delay_ut < delay) - delay = reason->delay_ut; - } - } - if(unlikely(delay == LONG_MAX)) { - internal_error(true, "RRDCONTEXT: '%s', cannot find minimum delay of flags %x", string2str(rc->id), (unsigned int)flags); - delay = 60 * USEC_PER_SEC; - } - - rc->queue.delay_calc_ut = now_ut; - usec_t dispatch_ut = rc->queue.scheduled_dispatch_ut = rc->queue.queued_ut + delay; - return dispatch_ut; + rrdcontext_garbage_collect_single_host(host, false); } -#define WORKER_JOB_HOSTS 1 -#define WORKER_JOB_CHECK 2 -#define WORKER_JOB_SEND 3 -#define WORKER_JOB_DEQUEUE 4 -#define WORKER_JOB_RETENTION 5 -#define WORKER_JOB_QUEUED 6 -#define WORKER_JOB_CLEANUP 7 -#define WORKER_JOB_CLEANUP_DELETE 8 - -static usec_t rrdcontext_next_db_rotation_ut = 0; -void rrdcontext_db_rotation(void) { - // called when the db rotates its database - rrdcontext_next_db_rotation_ut = now_realtime_usec() + FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS * USEC_PER_SEC; -} +// ---------------------------------------------------------------------------- +// version hash calculation static uint64_t rrdcontext_version_hash_with_callback( RRDHOST *host, @@ -2612,7 +2987,7 @@ static uint64_t rrdcontext_version_hash_with_callback( rrdcontext_lock(rc); - if(unlikely(rc->flags & RRD_FLAG_HIDDEN)) { + if(unlikely(rrd_flag_check(rc, RRD_FLAG_HIDDEN))) { rrdcontext_unlock(rc); continue; } @@ -2621,7 +2996,7 @@ static uint64_t rrdcontext_version_hash_with_callback( callback(rc, snapshot, bundle); // skip any deleted contexts - if(unlikely(rc->flags & RRD_FLAG_DELETED)) { + if(unlikely(rrd_flag_is_deleted(rc))) { rrdcontext_unlock(rc); continue; } @@ -2646,52 +3021,154 @@ static uint64_t rrdcontext_version_hash_with_callback( return hash; } -static void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, int job_id) { - RRDINSTANCE *ri; - dfe_start_read(rc->rrdinstances, ri) { - RRDMETRIC *rm; - dfe_start_read(ri->rrdmetrics, rm) { - - if(job_id >= 0) - worker_is_busy(job_id); - - rrd_flag_set_updated(rm, reason); - - rm->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdmetric_trigger_updates(rm, true, false); - } - dfe_done(rm); - - ri->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdinstance_trigger_updates(ri, true, false); - ri->flags |= RRD_FLAG_DONT_PROCESS; - } - dfe_done(ri); +// ---------------------------------------------------------------------------- +// retention recalculation - rc->flags &= ~RRD_FLAG_DONT_PROCESS; - rrdcontext_trigger_updates(rc, true); +static void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs) { + rrdcontext_post_process_updates(rc, true, reason, worker_jobs); } -static void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, int job_id) { +static void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, bool worker_jobs) { if(unlikely(!host || !host->rrdctx)) return; RRDCONTEXT *rc; dfe_start_read((DICTIONARY *)host->rrdctx, rc) { - rrdcontext_recalculate_context_retention(rc, reason, job_id); + rrdcontext_recalculate_context_retention(rc, reason, worker_jobs); } dfe_done(rc); } -static void rrdcontext_recalculate_retention(int job_id) { +static void rrdcontext_recalculate_retention_all_hosts(void) { rrdcontext_next_db_rotation_ut = 0; rrd_rdlock(); RRDHOST *host; rrdhost_foreach_read(host) { - rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DB_ROTATION, job_id); + worker_is_busy(WORKER_JOB_RETENTION); + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DB_ROTATION, true); } rrd_unlock(); } +// ---------------------------------------------------------------------------- +// garbage collector + +static bool rrdmetric_update_retention(RRDMETRIC *rm) { + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + + if(rm->rrddim) { + min_first_time_t = rrddim_first_entry_t(rm->rrddim); + max_last_time_t = rrddim_last_entry_t(rm->rrddim); + } +#ifdef ENABLE_DBENGINE + else if (dbengine_enabled) { + RRDHOST *rrdhost = rm->ri->rc->rrdhost; + for (size_t tier = 0; tier < storage_tiers; tier++) { + if(!rrdhost->db[tier].instance) continue; + + time_t first_time_t, last_time_t; + if (rrdeng_metric_retention_by_uuid(rrdhost->db[tier].instance, &rm->uuid, &first_time_t, &last_time_t) == 0) { + if (first_time_t < min_first_time_t) + min_first_time_t = first_time_t; + + if (last_time_t > max_last_time_t) + max_last_time_t = last_time_t; + } + } + } + else { + // cannot get retention + return false; + } +#endif + + if(min_first_time_t == LONG_MAX) + min_first_time_t = 0; + + if(min_first_time_t > max_last_time_t) { + internal_error(true, "RRDMETRIC: retention of '%s' is flipped", string2str(rm->id)); + time_t tmp = min_first_time_t; + min_first_time_t = max_last_time_t; + max_last_time_t = tmp; + } + + // check if retention changed + + if (min_first_time_t != rm->first_time_t) { + rm->first_time_t = min_first_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (max_last_time_t != rm->last_time_t) { + rm->last_time_t = max_last_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(unlikely(!rm->first_time_t && !rm->last_time_t)) + rrd_flag_set_deleted(rm, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + rrd_flag_set(rm, RRD_FLAG_LIVE_RETENTION); + + return true; +} + +static inline bool rrdmetric_should_be_deleted(RRDMETRIC *rm) { + if(likely(!rrd_flag_check(rm, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(rm, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(likely(rm->rrddim)) + return false; + + rrdmetric_update_retention(rm); + if(rm->first_time_t || rm->last_time_t) + return false; + + return true; +} + +static inline bool rrdinstance_should_be_deleted(RRDINSTANCE *ri) { + if(likely(!rrd_flag_check(ri, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(ri, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(likely(ri->rrdset)) + return false; + + if(unlikely(dictionary_referenced_items(ri->rrdmetrics) != 0)) + return false; + + if(unlikely(dictionary_entries(ri->rrdmetrics) != 0)) + return false; + + if(ri->first_time_t || ri->last_time_t) + return false; + + return true; +} + +static inline bool rrdcontext_should_be_deleted(RRDCONTEXT *rc) { + if(likely(!rrd_flag_check(rc, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(rc, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(unlikely(dictionary_referenced_items(rc->rrdinstances) != 0)) + return false; + + if(unlikely(dictionary_entries(rc->rrdinstances) != 0)) + return false; + + if(unlikely(rc->first_time_t || rc->last_time_t)) + return false; + + return true; +} + void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc) { // we need to refresh the string pointers in rc->hub // in case the context changed values @@ -2705,80 +3182,715 @@ void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc) { error("RRDCONTEXT: failed to delete context '%s' version %"PRIu64" from SQL.", rc->hub.id, rc->hub.version); } -static void rrdcontext_garbage_collect(void) { +static void rrdcontext_garbage_collect_single_host(RRDHOST *host, bool worker_jobs) { + + internal_error(true, "RRDCONTEXT: garbage collecting context structures of host '%s'", rrdhost_hostname(host)); + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx, rc) { + if(unlikely(netdata_exit)) break; + + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP); + + rrdcontext_lock(rc); + + RRDINSTANCE *ri; + dfe_start_reentrant(rc->rrdinstances, ri) { + if(unlikely(netdata_exit)) break; + + RRDMETRIC *rm; + dfe_start_write(ri->rrdmetrics, rm) { + if(rrdmetric_should_be_deleted(rm)) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + if(!dictionary_del(ri->rrdmetrics, string2str(rm->id))) + error("RRDCONTEXT: metric '%s' of instance '%s' of context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(rm->id), + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: metric '%s' of instance '%s' of context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(rm->id), + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + } + } + dfe_done(rm); + + if(rrdinstance_should_be_deleted(ri)) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + if(!dictionary_del(rc->rrdinstances, string2str(ri->id))) + error("RRDCONTEXT: instance '%s' of context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: instance '%s' of context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + } + } + dfe_done(ri); + + if(unlikely(rrdcontext_should_be_deleted(rc))) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_delete_from_sql_unsafe(rc); + + if(!dictionary_del((DICTIONARY *)host->rrdctx, string2str(rc->id))) + error("RRDCONTEXT: context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(rc->id), + rrdhost_hostname(host)); + + fprintf(stderr, "RRDCONTEXT: deleted context '%s'", string2str(rc->id)); + } + + // the item is referenced in the dictionary + // so, it is still here to unlock, even if we have deleted it + rrdcontext_unlock(rc); + } + dfe_done(rc); +} + +static void rrdcontext_garbage_collect_for_all_hosts(void) { rrd_rdlock(); RRDHOST *host; rrdhost_foreach_read(host) { - RRDCONTEXT *rc; - dfe_start_write((DICTIONARY *)host->rrdctx, rc) { - worker_is_busy(WORKER_JOB_CLEANUP); + rrdcontext_garbage_collect_single_host(host, true); + } + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// post processing + +static void rrdmetric_process_updates(RRDMETRIC *rm, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(rm, reason); + + if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + return; + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_METRIC); + + if(reason & RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) { + rrd_flag_set_archived(rm); + rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD); + } + if(rrd_flag_is_deleted(rm) && (reason & RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + rrd_flag_set_archived(rm); + + rrdmetric_update_retention(rm); + + rrd_flag_unset_updated(rm); +} + +static void rrdinstance_post_process_updates(RRDINSTANCE *ri, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(ri, reason); + + if(!force && !rrd_flag_is_updated(ri) && rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION)) + return; + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_INSTANCE); + + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + size_t metrics_active = 0, metrics_deleted = 0; + bool live_retention = true, currently_collected = false; + if(dictionary_entries(ri->rrdmetrics) > 0) { + RRDMETRIC *rm; + dfe_start_read((DICTIONARY *)ri->rrdmetrics, rm) { + if(unlikely(netdata_exit)) break; + + RRD_FLAGS reason_to_pass = reason; + if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; + + rrdmetric_process_updates(rm, force, reason_to_pass, worker_jobs); + + if(unlikely(!rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION))) + live_retention = false; + + if (unlikely((rrdmetric_should_be_deleted(rm)))) { + metrics_deleted++; + continue; + } + + if(!currently_collected && rrd_flag_check(rm, RRD_FLAG_COLLECTED) && rm->first_time_t) + currently_collected = true; + + metrics_active++; + + if (rm->first_time_t && rm->first_time_t < min_first_time_t) + min_first_time_t = rm->first_time_t; + + if (rm->last_time_t && rm->last_time_t > max_last_time_t) + max_last_time_t = rm->last_time_t; + } + dfe_done(rm); + } + + if(unlikely(live_retention && !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + rrd_flag_set(ri, RRD_FLAG_LIVE_RETENTION); + else if(unlikely(!live_retention && rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + rrd_flag_clear(ri, RRD_FLAG_LIVE_RETENTION); + + if(unlikely(!metrics_active)) { + // no metrics available + + if(ri->first_time_t) { + ri->first_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(ri->last_time_t) { + ri->last_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + // we have active metrics... + + if (unlikely(min_first_time_t == LONG_MAX)) + min_first_time_t = 0; + + if (unlikely(min_first_time_t == 0 || max_last_time_t == 0)) { + if(ri->first_time_t) { + ri->first_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(ri->last_time_t) { + ri->last_time_t = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(live_retention)) + rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + rrd_flag_clear(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + if (unlikely(ri->first_time_t != min_first_time_t)) { + ri->first_time_t = min_first_time_t; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (unlikely(ri->last_time_t != max_last_time_t)) { + ri->last_time_t = max_last_time_t; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(currently_collected)) + rrd_flag_set_collected(ri); + else + rrd_flag_set_archived(ri); + } + } + + rrd_flag_unset_updated(ri); +} + +static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(rc, reason); + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_CONTEXT); + + size_t min_priority = LONG_MAX; + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + size_t instances_active = 0, instances_deleted = 0; + bool live_retention = true, currently_collected = false, hidden = true; + if(dictionary_entries(rc->rrdinstances) > 0) { + RRDINSTANCE *ri; + dfe_start_reentrant(rc->rrdinstances, ri) { + if(unlikely(netdata_exit)) break; + + RRD_FLAGS reason_to_pass = reason; + if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; + + rrdinstance_post_process_updates(ri, force, reason_to_pass, worker_jobs); + + if(unlikely(hidden && !rrd_flag_check(ri, RRD_FLAG_HIDDEN))) + hidden = false; + + if(unlikely(live_retention && !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + live_retention = false; + + if (unlikely(rrdinstance_should_be_deleted(ri))) { + instances_deleted++; + continue; + } + + if(unlikely(!currently_collected && rrd_flag_is_collected(ri) && ri->first_time_t)) + currently_collected = true; + + internal_error(rc->units != ri->units, + "RRDCONTEXT: '%s' rrdinstance '%s' has different units, context '%s', instance '%s'", + string2str(rc->id), string2str(ri->id), + string2str(rc->units), string2str(ri->units)); + + instances_active++; + + if (ri->priority >= RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY && ri->priority < min_priority) + min_priority = ri->priority; + + if (ri->first_time_t && ri->first_time_t < min_first_time_t) + min_first_time_t = ri->first_time_t; + + if (ri->last_time_t && ri->last_time_t > max_last_time_t) + max_last_time_t = ri->last_time_t; + } + dfe_done(ri); + } + + { + bool previous_hidden = rrd_flag_check(rc, RRD_FLAG_HIDDEN); + if (hidden != previous_hidden) { + if (hidden && !rrd_flag_check(rc, RRD_FLAG_HIDDEN)) + rrd_flag_set(rc, RRD_FLAG_HIDDEN); + else if (!hidden && rrd_flag_check(rc, RRD_FLAG_HIDDEN)) + rrd_flag_clear(rc, RRD_FLAG_HIDDEN); + } + + bool previous_live_retention = rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION); + if (live_retention != previous_live_retention) { + if (live_retention && !rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrd_flag_set(rc, RRD_FLAG_LIVE_RETENTION); + else if (!live_retention && rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrd_flag_clear(rc, RRD_FLAG_LIVE_RETENTION); + } + } + + rrdcontext_lock(rc); + rc->pp.executions++; + + if(unlikely(!instances_active)) { + // we had some instances, but they are gone now... + + if(rc->first_time_t) { + rc->first_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(rc->last_time_t) { + rc->last_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + // we have some active instances... + + if (unlikely(min_first_time_t == LONG_MAX)) + min_first_time_t = 0; + + if (unlikely(min_first_time_t == 0 && max_last_time_t == 0)) { + if(rc->first_time_t) { + rc->first_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(rc->last_time_t) { + rc->last_time_t = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + rrd_flag_clear(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + if (unlikely(rc->first_time_t != min_first_time_t)) { + rc->first_time_t = min_first_time_t; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (rc->last_time_t != max_last_time_t) { + rc->last_time_t = max_last_time_t; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(currently_collected)) + rrd_flag_set_collected(rc); + else + rrd_flag_set_archived(rc); + } + + if (min_priority != LONG_MAX && rc->priority != min_priority) { + rc->priority = min_priority; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } + + if(unlikely(rrd_flag_is_updated(rc) && rc->rrdhost->rrdctx_hub_queue)) { + if(check_if_cloud_version_changed_unsafe(rc, false)) { + rc->version = rrdcontext_get_next_version(rc); + dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_hub_queue, + string2str(rc->id), rc, sizeof(*rc)); + } + } + + rrd_flag_unset_updated(rc); + rrdcontext_unlock(rc); +} + +static void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function __maybe_unused, RRD_FLAGS flags __maybe_unused) { + if(unlikely(!rc->rrdhost->rrdctx_post_processing_queue)) return; + + if(!rrd_flag_check(rc, RRD_FLAG_QUEUED_FOR_PP)) { + dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_post_processing_queue, + string2str(rc->id), + rc, + sizeof(*rc)); + +#if(defined(NETDATA_INTERNAL_CHECKS) && defined(LOG_POST_PROCESSING_QUEUE_INSERTIONS)) + { + BUFFER *wb_flags = buffer_create(1000); + rrd_flags_to_buffer(flags, wb_flags); + + BUFFER *wb_reasons = buffer_create(1000); + rrd_reasons_to_buffer(flags, wb_reasons); + + internal_error(true, "RRDCONTEXT: '%s' update triggered by function %s(), due to flags: %s, reasons: %s", + string2str(rc->id), function, + buffer_tostring(wb_flags), + buffer_tostring(wb_reasons)); + + buffer_free(wb_reasons); + buffer_free(wb_flags); + } +#endif + } +} + +static void rrdcontext_dequeue_from_post_processing(RRDCONTEXT *rc) { + if(unlikely(!rc->rrdhost->rrdctx_post_processing_queue)) return; + dictionary_del((DICTIONARY *)rc->rrdhost->rrdctx_post_processing_queue, string2str(rc->id)); +} + +static void rrdcontext_post_process_queued_contexts(RRDHOST *host) { + if(unlikely(!host->rrdctx_post_processing_queue)) return; + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx_post_processing_queue, rc) { + if(unlikely(netdata_exit)) break; + + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_post_process_updates(rc, false, RRD_FLAG_NONE, true); + } + dfe_done(rc); +} + +// ---------------------------------------------------------------------------- +// dispatching contexts to cloud + +static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc) { + time_t now = now_realtime_sec(); + uint64_t version = MAX(rc->version, rc->hub.version); + version = MAX((uint64_t)now, version); + version++; + return version; +} + +static void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused) { + + // save it, so that we know the last version we sent to hub + rc->version = rc->hub.version = rrdcontext_get_next_version(rc); + rc->hub.id = string2str(rc->id); + rc->hub.title = string2str(rc->title); + rc->hub.units = string2str(rc->units); + rc->hub.family = string2str(rc->family); + rc->hub.chart_type = rrdset_type_name(rc->chart_type); + rc->hub.priority = rc->priority; + rc->hub.first_time_t = rc->first_time_t; + rc->hub.last_time_t = rrd_flag_is_collected(rc) ? 0 : rc->last_time_t; + rc->hub.deleted = rrd_flag_is_deleted(rc) ? true : false; + +#ifdef ENABLE_ACLK + struct context_updated message = { + .id = rc->hub.id, + .version = rc->hub.version, + .title = rc->hub.title, + .units = rc->hub.units, + .family = rc->hub.family, + .chart_type = rc->hub.chart_type, + .priority = rc->hub.priority, + .first_entry = rc->hub.first_time_t, + .last_entry = rc->hub.last_time_t, + .deleted = rc->hub.deleted, + }; + + if(likely(!rrd_flag_check(rc, RRD_FLAG_HIDDEN))) { + if (snapshot) { + if (!rc->hub.deleted) + contexts_snapshot_add_ctx_update(bundle, &message); + } + else + contexts_updated_add_ctx_update(bundle, &message); + } +#endif + + // store it to SQL + + if(rrd_flag_is_deleted(rc)) + rrdcontext_delete_from_sql_unsafe(rc); + + else if (ctx_store_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + error("RRDCONTEXT: failed to save context '%s' version %"PRIu64" to SQL.", rc->hub.id, rc->hub.version); +} + +static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused) { + bool id_changed = false, + title_changed = false, + units_changed = false, + family_changed = false, + chart_type_changed = false, + priority_changed = false, + first_time_changed = false, + last_time_changed = false, + deleted_changed = false; + + RRD_FLAGS flags = rrd_flags_get(rc); + + if(unlikely(string2str(rc->id) != rc->hub.id)) + id_changed = true; + + if(unlikely(string2str(rc->title) != rc->hub.title)) + title_changed = true; + + if(unlikely(string2str(rc->units) != rc->hub.units)) + units_changed = true; + + if(unlikely(string2str(rc->family) != rc->hub.family)) + family_changed = true; + + if(unlikely(rrdset_type_name(rc->chart_type) != rc->hub.chart_type)) + chart_type_changed = true; + + if(unlikely(rc->priority != rc->hub.priority)) + priority_changed = true; + + if(unlikely((uint64_t)rc->first_time_t != rc->hub.first_time_t)) + first_time_changed = true; + + if(unlikely((uint64_t)((flags & RRD_FLAG_COLLECTED) ? 0 : rc->last_time_t) != rc->hub.last_time_t)) + last_time_changed = true; + + if(unlikely(((flags & RRD_FLAG_DELETED) ? true : false) != rc->hub.deleted)) + deleted_changed = true; + + if(unlikely(id_changed || title_changed || units_changed || family_changed || chart_type_changed || priority_changed || first_time_changed || last_time_changed || deleted_changed)) { + + internal_error(LOG_TRANSITIONS, + "RRDCONTEXT: %s NEW VERSION '%s'%s of host '%s', version %"PRIu64", title '%s'%s, units '%s'%s, family '%s'%s, chart type '%s'%s, priority %u%s, first_time_t %ld%s, last_time_t %ld%s, deleted '%s'%s, (queued for %llu ms, expected %llu ms)", + sending?"SENDING":"QUEUE", + string2str(rc->id), id_changed ? " (CHANGED)" : "", + rrdhost_hostname(rc->rrdhost), + rc->version, + string2str(rc->title), title_changed ? " (CHANGED)" : "", + string2str(rc->units), units_changed ? " (CHANGED)" : "", + string2str(rc->family), family_changed ? " (CHANGED)" : "", + rrdset_type_name(rc->chart_type), chart_type_changed ? " (CHANGED)" : "", + rc->priority, priority_changed ? " (CHANGED)" : "", + rc->first_time_t, first_time_changed ? " (CHANGED)" : "", + (flags & RRD_FLAG_COLLECTED) ? 0 : rc->last_time_t, last_time_changed ? " (CHANGED)" : "", + (flags & RRD_FLAG_DELETED) ? "true" : "false", deleted_changed ? " (CHANGED)" : "", + sending ? (now_realtime_usec() - rc->queue.queued_ut) / USEC_PER_MS : 0, + sending ? (rc->queue.scheduled_dispatch_ut - rc->queue.queued_ut) / USEC_PER_MS : 0 + ); + + return true; + } + + return false; +} + +static inline usec_t rrdcontext_calculate_queued_dispatch_time_ut(RRDCONTEXT *rc, usec_t now_ut) { + + if(likely(rc->queue.delay_calc_ut >= rc->queue.queued_ut)) + return rc->queue.scheduled_dispatch_ut; + + RRD_FLAGS flags = rc->queue.queued_flags; + + usec_t delay = LONG_MAX; + int i; + struct rrdcontext_reason *reason; + for(i = 0, reason = &rrdcontext_reasons[i]; reason->name ; reason = &rrdcontext_reasons[++i]) { + if(unlikely(flags & reason->flag)) { + if(reason->delay_ut < delay) + delay = reason->delay_ut; + } + } + + if(unlikely(delay == LONG_MAX)) { + internal_error(true, "RRDCONTEXT: '%s', cannot find minimum delay of flags %x", string2str(rc->id), (unsigned int)flags); + delay = 60 * USEC_PER_SEC; + } + + rc->queue.delay_calc_ut = now_ut; + usec_t dispatch_ut = rc->queue.scheduled_dispatch_ut = rc->queue.queued_ut + delay; + return dispatch_ut; +} + +static void rrdcontext_dequeue_from_hub_queue(RRDCONTEXT *rc) { + dictionary_del((DICTIONARY *)rc->rrdhost->rrdctx_hub_queue, string2str(rc->id)); +} + +static void rrdcontext_dispatch_queued_contexts_to_hub(RRDHOST *host, usec_t now_ut) { + + // check if we have received a streaming command for this host + if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS) || !aclk_connected || !host->rrdctx_hub_queue) + return; + + // check if there are queued items to send + if(!dictionary_entries((DICTIONARY *)host->rrdctx_hub_queue)) + return; + + if(!host->node_id) + return; + + size_t messages_added = 0; + contexts_updated_t bundle = NULL; + + RRDCONTEXT *rc; + dfe_start_reentrant((DICTIONARY *)host->rrdctx_hub_queue, rc) { + if(unlikely(netdata_exit)) break; + + if(unlikely(messages_added >= MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST)) + break; + + worker_is_busy(WORKER_JOB_QUEUED); + usec_t dispatch_ut = rrdcontext_calculate_queued_dispatch_time_ut(rc, now_ut); + char *claim_id = get_agent_claimid(); + + if(unlikely(now_ut >= dispatch_ut) && claim_id) { + worker_is_busy(WORKER_JOB_CHECK); rrdcontext_lock(rc); + if(check_if_cloud_version_changed_unsafe(rc, true)) { + worker_is_busy(WORKER_JOB_SEND); + +#ifdef ENABLE_ACLK + if(!bundle) { + // prepare the bundle to send the messages + char uuid[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, uuid); + + bundle = contexts_updated_new(claim_id, uuid, 0, now_ut); + } +#endif + // update the hub data of the context, give a new version, pack the message + // and save an update to SQL + rrdcontext_message_send_unsafe(rc, false, bundle); + messages_added++; + + rc->queue.dispatches++; + rc->queue.dequeued_ut = now_ut; + } + else + rc->version = rc->hub.version; + + // remove it from the queue + worker_is_busy(WORKER_JOB_DEQUEUE); + rrdcontext_dequeue_from_hub_queue(rc); + if(unlikely(rrdcontext_should_be_deleted(rc))) { + // this is a deleted context - delete it forever... + worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + + rrdcontext_dequeue_from_post_processing(rc); rrdcontext_delete_from_sql_unsafe(rc); - if(dictionary_del_having_write_lock((DICTIONARY *)host->rrdctx, string2str(rc->id)) != 0) + STRING *id = string_dup(rc->id); + rrdcontext_unlock(rc); + + // delete it from the master dictionary + if(!dictionary_del((DICTIONARY *)host->rrdctx, string2str(rc->id))) error("RRDCONTEXT: '%s' of host '%s' failed to be deleted from rrdcontext dictionary.", - string2str(rc->id), host->hostname); - } - else { - RRDINSTANCE *ri; - dfe_start_write(rc->rrdinstances, ri) { - if(rrdinstance_should_be_deleted(ri)) { - worker_is_busy(WORKER_JOB_CLEANUP_DELETE); - dictionary_del_having_write_lock(rc->rrdinstances, string2str(ri->id)); - } - else { - RRDMETRIC *rm; - dfe_start_write(ri->rrdmetrics, rm) { - if(rrdmetric_should_be_deleted(rm)) { - worker_is_busy(WORKER_JOB_CLEANUP_DELETE); - dictionary_del_having_write_lock(ri->rrdmetrics, string2str(rm->id)); - } - } - dfe_done(rm); - } - } - dfe_done(ri); - } + string2str(id), rrdhost_hostname(host)); - // the item is referenced in the dictionary - // so, it is still here to unlock, even if we have deleted it - rrdcontext_unlock(rc); + string_freez(id); + } + else + rrdcontext_unlock(rc); } - dfe_done(rc); + freez(claim_id); } - rrd_unlock(); + dfe_done(rc); + +#ifdef ENABLE_ACLK + if(!netdata_exit && bundle) { + // we have a bundle to send messages + + // update the version hash + contexts_updated_update_version_hash(bundle, rrdcontext_version_hash(host)); + + // send it + aclk_send_contexts_updated(bundle); + } + else if(bundle) + contexts_updated_delete(bundle); +#endif + } +// ---------------------------------------------------------------------------- +// worker thread + static void rrdcontext_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + // custom code + worker_unregister(); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } void *rrdcontext_main(void *ptr) { netdata_thread_cleanup_push(rrdcontext_main_cleanup, ptr); - if(unlikely(rrdcontext_enabled == CONFIG_BOOLEAN_NO)) - goto exit; - worker_register("RRDCONTEXT"); worker_register_job_name(WORKER_JOB_HOSTS, "hosts"); worker_register_job_name(WORKER_JOB_CHECK, "dedup checks"); worker_register_job_name(WORKER_JOB_SEND, "sent contexts"); - worker_register_job_name(WORKER_JOB_DEQUEUE, "deduped contexts"); + worker_register_job_name(WORKER_JOB_DEQUEUE, "deduplicated contexts"); worker_register_job_name(WORKER_JOB_RETENTION, "metrics retention"); worker_register_job_name(WORKER_JOB_QUEUED, "queued contexts"); worker_register_job_name(WORKER_JOB_CLEANUP, "cleanups"); worker_register_job_name(WORKER_JOB_CLEANUP_DELETE, "deletes"); + worker_register_job_name(WORKER_JOB_PP_METRIC, "check metrics"); + worker_register_job_name(WORKER_JOB_PP_INSTANCE, "check instances"); + worker_register_job_name(WORKER_JOB_PP_CONTEXT, "check contexts"); + + worker_register_job_custom_metric(WORKER_JOB_HUB_QUEUE_SIZE, "hub queue size", "contexts", WORKER_METRIC_ABSOLUTE); + worker_register_job_custom_metric(WORKER_JOB_PP_QUEUE_SIZE, "post processing queue size", "contexts", WORKER_METRIC_ABSOLUTE); heartbeat_t hb; heartbeat_init(&hb); - usec_t step = USEC_PER_SEC * RRDCONTEXT_WORKER_THREAD_HEARTBEAT_SECS; + usec_t step = RRDCONTEXT_WORKER_THREAD_HEARTBEAT_USEC; while (!netdata_exit) { worker_is_idle(); @@ -2786,16 +3898,17 @@ void *rrdcontext_main(void *ptr) { if(unlikely(netdata_exit)) break; - if(!aclk_connected) continue; - usec_t now_ut = now_realtime_usec(); if(rrdcontext_next_db_rotation_ut && now_ut > rrdcontext_next_db_rotation_ut) { - rrdcontext_recalculate_retention(WORKER_JOB_RETENTION); - rrdcontext_garbage_collect(); + rrdcontext_recalculate_retention_all_hosts(); + rrdcontext_garbage_collect_for_all_hosts(); rrdcontext_next_db_rotation_ut = 0; } + size_t hub_queued_contexts_for_all_hosts = 0; + size_t pp_queued_contexts_for_all_hosts = 0; + rrd_rdlock(); RRDHOST *host; rrdhost_foreach_read(host) { @@ -2803,106 +3916,23 @@ void *rrdcontext_main(void *ptr) { worker_is_busy(WORKER_JOB_HOSTS); - // check if we have received a streaming command for this host - if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) - continue; - - // check if there are queued items to send - if(!dictionary_stats_entries((DICTIONARY *)host->rrdctx_queue)) - continue; - - if(!host->node_id) - continue; - - size_t messages_added = 0; - contexts_updated_t bundle = NULL; - - RRDCONTEXT *rc; - dfe_start_write((DICTIONARY *)host->rrdctx_queue, rc) { - if(unlikely(netdata_exit)) break; - - if(unlikely(messages_added >= MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST)) - break; - - worker_is_busy(WORKER_JOB_QUEUED); - usec_t dispatch_ut = rrdcontext_calculate_queued_dispatch_time_ut(rc, now_ut); - char *claim_id = get_agent_claimid(); - if(unlikely(now_ut >= dispatch_ut) && claim_id) { - worker_is_busy(WORKER_JOB_CHECK); - - rrdcontext_lock(rc); - - if(check_if_cloud_version_changed_unsafe(rc, true)) { - worker_is_busy(WORKER_JOB_SEND); - -#ifdef ENABLE_ACLK - if(!bundle) { - // prepare the bundle to send the messages - char uuid[UUID_STR_LEN]; - uuid_unparse_lower(*host->node_id, uuid); - - bundle = contexts_updated_new(claim_id, uuid, 0, now_ut); - } -#endif - // update the hub data of the context, give a new version, pack the message - // and save an update to SQL - rrdcontext_message_send_unsafe(rc, false, bundle); - messages_added++; - - rc->queue.dequeued_ut = now_ut; - } - else - rc->version = rc->hub.version; - - // remove the queued flag, so that it can be queued again - rc->flags &= ~RRD_FLAG_QUEUED; - - // remove it from the queue - worker_is_busy(WORKER_JOB_DEQUEUE); - dictionary_del_having_write_lock((DICTIONARY *)host->rrdctx_queue, string2str(rc->id)); - - if(unlikely(rrdcontext_should_be_deleted(rc))) { - // this is a deleted context - delete it forever... - - worker_is_busy(WORKER_JOB_CLEANUP_DELETE); - rrdcontext_delete_from_sql_unsafe(rc); - - STRING *id = string_dup(rc->id); - rrdcontext_unlock(rc); - - // delete it from the master dictionary - if(dictionary_del((DICTIONARY *)host->rrdctx, string2str(rc->id)) != 0) - error("RRDCONTEXT: '%s' of host '%s' failed to be deleted from rrdcontext dictionary.", - string2str(id), host->hostname); - - string_freez(id); - } - else - rrdcontext_unlock(rc); - } - freez(claim_id); + if(host->rrdctx_post_processing_queue) { + pp_queued_contexts_for_all_hosts += + dictionary_entries((DICTIONARY *)host->rrdctx_post_processing_queue); + rrdcontext_post_process_queued_contexts(host); } - dfe_done(rc); - -#ifdef ENABLE_ACLK - if(!netdata_exit && bundle) { - // we have a bundle to send messages - // update the version hash - contexts_updated_update_version_hash(bundle, rrdcontext_version_hash(host)); - - // send it - aclk_send_contexts_updated(bundle); + if(host->rrdctx_hub_queue) { + hub_queued_contexts_for_all_hosts += dictionary_entries((DICTIONARY *)host->rrdctx_hub_queue); + rrdcontext_dispatch_queued_contexts_to_hub(host, now_ut); } - else if(bundle) - contexts_updated_delete(bundle); -#endif } rrd_unlock(); + worker_set_metric(WORKER_JOB_HUB_QUEUE_SIZE, (NETDATA_DOUBLE)hub_queued_contexts_for_all_hosts); + worker_set_metric(WORKER_JOB_PP_QUEUE_SIZE, (NETDATA_DOUBLE)pp_queued_contexts_for_all_hosts); } -exit: netdata_thread_cleanup_pop(1); return NULL; } diff --git a/database/rrdcontext.h b/database/rrdcontext.h index a9e0bd2e3..67e6cf394 100644 --- a/database/rrdcontext.h +++ b/database/rrdcontext.h @@ -8,7 +8,6 @@ typedef struct rrdmetric_acquired RRDMETRIC_ACQUIRED; - // ---------------------------------------------------------------------------- // RRDINSTANCE @@ -24,15 +23,26 @@ typedef struct rrdcontext_acquired RRDCONTEXT_ACQUIRED; #include "rrd.h" +const char *rrdmetric_acquired_id(RRDMETRIC_ACQUIRED *rma); +const char *rrdmetric_acquired_name(RRDMETRIC_ACQUIRED *rma); +NETDATA_DOUBLE rrdmetric_acquired_last_stored_value(RRDMETRIC_ACQUIRED *rma); + +const char *rrdinstance_acquired_id(RRDINSTANCE_ACQUIRED *ria); +const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria); +DICTIONARY *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria); +DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria); + // ---------------------------------------------------------------------------- // public API for rrdhost -extern void rrdhost_load_rrdcontext_data(RRDHOST *host); -extern void rrdhost_create_rrdcontexts(RRDHOST *host); -extern void rrdhost_destroy_rrdcontexts(RRDHOST *host); +void rrdhost_load_rrdcontext_data(RRDHOST *host); +void rrdhost_create_rrdcontexts(RRDHOST *host); +void rrdhost_destroy_rrdcontexts(RRDHOST *host); -extern void rrdcontext_host_child_connected(RRDHOST *host); -extern void rrdcontext_host_child_disconnected(RRDHOST *host); +void rrdcontext_host_child_connected(RRDHOST *host); +void rrdcontext_host_child_disconnected(RRDHOST *host); + +int rrdcontext_foreach_instance_with_rrdset_in_context(RRDHOST *host, const char *context, int (*callback)(RRDSET *st, void *data), void *data); typedef enum { RRDCONTEXT_OPTION_NONE = 0, @@ -50,43 +60,191 @@ typedef enum { #define RRDCONTEXT_OPTIONS_ALL (RRDCONTEXT_OPTION_SHOW_METRICS|RRDCONTEXT_OPTION_SHOW_INSTANCES|RRDCONTEXT_OPTION_SHOW_LABELS|RRDCONTEXT_OPTION_SHOW_QUEUED|RRDCONTEXT_OPTION_SHOW_FLAGS|RRDCONTEXT_OPTION_SHOW_DELETED|RRDCONTEXT_OPTION_SHOW_UUIDS|RRDCONTEXT_OPTION_SHOW_HIDDEN) -extern int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); -extern int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); +int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); +int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); + +// ---------------------------------------------------------------------------- +// public API for rrdcontexts + +const char *rrdcontext_acquired_id(RRDCONTEXT_ACQUIRED *rca); // ---------------------------------------------------------------------------- // public API for rrddims -extern void rrdcontext_updated_rrddim(RRDDIM *rd); -extern void rrdcontext_removed_rrddim(RRDDIM *rd); -extern void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd); -extern void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd); -extern void rrdcontext_updated_rrddim_divisor(RRDDIM *rd); -extern void rrdcontext_updated_rrddim_flags(RRDDIM *rd); -extern void rrdcontext_collected_rrddim(RRDDIM *rd); +void rrdcontext_updated_rrddim(RRDDIM *rd); +void rrdcontext_removed_rrddim(RRDDIM *rd); +void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd); +void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd); +void rrdcontext_updated_rrddim_divisor(RRDDIM *rd); +void rrdcontext_updated_rrddim_flags(RRDDIM *rd); +void rrdcontext_collected_rrddim(RRDDIM *rd); +int rrdcontext_find_dimension_uuid(RRDSET *st, const char *id, uuid_t *store_uuid); // ---------------------------------------------------------------------------- // public API for rrdsets -extern void rrdcontext_updated_rrdset(RRDSET *st); -extern void rrdcontext_removed_rrdset(RRDSET *st); -extern void rrdcontext_updated_rrdset_name(RRDSET *st); -extern void rrdcontext_updated_rrdset_flags(RRDSET *st); -extern void rrdcontext_collected_rrdset(RRDSET *st); +void rrdcontext_updated_rrdset(RRDSET *st); +void rrdcontext_removed_rrdset(RRDSET *st); +void rrdcontext_updated_rrdset_name(RRDSET *st); +void rrdcontext_updated_rrdset_flags(RRDSET *st); +void rrdcontext_updated_retention_rrdset(RRDSET *st); +void rrdcontext_collected_rrdset(RRDSET *st); +int rrdcontext_find_chart_uuid(RRDSET *st, uuid_t *store_uuid); // ---------------------------------------------------------------------------- // public API for ACLK -extern void rrdcontext_hub_checkpoint_command(void *cmd); -extern void rrdcontext_hub_stop_streaming_command(void *cmd); +void rrdcontext_hub_checkpoint_command(void *cmd); +void rrdcontext_hub_stop_streaming_command(void *cmd); // ---------------------------------------------------------------------------- // public API for threads -extern int rrdcontext_enabled; +void rrdcontext_db_rotation(void); +void *rrdcontext_main(void *); + +// ---------------------------------------------------------------------------- +// public API for weights + +struct metric_entry { + RRDCONTEXT_ACQUIRED *rca; + RRDINSTANCE_ACQUIRED *ria; + RRDMETRIC_ACQUIRED *rma; +}; + +DICTIONARY *rrdcontext_all_metrics_to_dict(RRDHOST *host, SIMPLE_PATTERN *contexts); -extern void rrdcontext_db_rotation(void); -extern void *rrdcontext_main(void *); +// ---------------------------------------------------------------------------- +// public API for queries + +typedef struct query_metric { + struct query_metric_tier { + struct storage_engine *eng; + STORAGE_METRIC_HANDLE *db_metric_handle; + time_t db_first_time_t; // the oldest timestamp available for this tier + time_t db_last_time_t; // the latest timestamp available for this tier + time_t db_update_every; // latest update every for this tier + } tiers[RRD_STORAGE_TIERS]; + + struct { + RRDHOST *host; + RRDCONTEXT_ACQUIRED *rca; + RRDINSTANCE_ACQUIRED *ria; + RRDMETRIC_ACQUIRED *rma; + } link; + + struct { + STRING *id; + STRING *name; + RRDR_DIMENSION_FLAGS options; + } dimension; + + struct { + STRING *id; + STRING *name; + } chart; + +} QUERY_METRIC; + +#define MAX_QUERY_TARGET_ID_LENGTH 255 + +typedef struct query_target_request { + RRDHOST *host; // the host to be queried (can be NULL, hosts will be used) + RRDCONTEXT_ACQUIRED *rca; // the context to be queried (can be NULL) + RRDINSTANCE_ACQUIRED *ria; // the instance to be queried (can be NULL) + RRDMETRIC_ACQUIRED *rma; // the metric to be queried (can be NULL) + RRDSET *st; // the chart to be queried (NULL, for context queries) + const char *hosts; // hosts simple pattern + const char *contexts; // contexts simple pattern (context queries) + const char *charts; // charts simple pattern (for context queries) + const char *dimensions; // dimensions simple pattern + const char *chart_label_key; // select only the chart having this label key + const char *charts_labels_filter; // select only the charts having this combo of label key:value + time_t after; // the requested timeframe + time_t before; // the requested timeframe + size_t points; // the requested number of points + time_t timeout; // the timeout of the query in seconds + uint32_t format; // DATASOURCE_FORMAT + RRDR_OPTIONS options; + RRDR_GROUPING group_method; + const char *group_options; + time_t resampling_time; + size_t tier; + QUERY_SOURCE query_source; +} QUERY_TARGET_REQUEST; + +typedef struct query_target { + char id[MAX_QUERY_TARGET_ID_LENGTH + 1]; // query identifier (for logging) + QUERY_TARGET_REQUEST request; + + bool used; // when true, this query is currently being used + size_t queries; // how many query we have done so far + + struct { + bool relative; // true when the request made with relative timestamps, true if it was absolute + bool aligned; + time_t after; // the absolute timestamp this query is about + time_t before; // the absolute timestamp this query is about + time_t query_granularity; + size_t points; // the number of points the query will return (maybe different from the request) + size_t group; + RRDR_GROUPING group_method; + const char *group_options; + size_t resampling_group; + NETDATA_DOUBLE resampling_divisor; + RRDR_OPTIONS options; + size_t tier; + } window; + + struct { + time_t first_time_t; // the combined first_time_t of all metrics in the query, across all tiers + time_t last_time_t; // the combined last_time_T of all metrics in the query, across all tiers + time_t minimum_latest_update_every; // the min update every of the metrics in the query + } db; + + struct { + QUERY_METRIC *array; // the metrics to be queried (all of them should be queried, no exceptions) + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } query; + + struct { + RRDMETRIC_ACQUIRED **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + } metrics; + + struct { + RRDINSTANCE_ACQUIRED **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + SIMPLE_PATTERN *chart_label_key_pattern; + SIMPLE_PATTERN *charts_labels_filter_pattern; + } instances; + + struct { + RRDCONTEXT_ACQUIRED **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } contexts; + + struct { + RRDHOST **array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } hosts; + +} QUERY_TARGET; + +void query_target_free(void); +void query_target_release(QUERY_TARGET *qt); + +QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr); #endif // NETDATA_RRDCONTEXT_H diff --git a/database/rrddim.c b/database/rrddim.c index 90165a253..1b3d9952c 100644 --- a/database/rrddim.c +++ b/database/rrddim.c @@ -2,281 +2,117 @@ #define NETDATA_RRD_INTERNALS #include "rrd.h" -#ifdef ENABLE_DBENGINE -#include "database/engine/rrdengineapi.h" -#endif #include "storage_engine.h" // ---------------------------------------------------------------------------- // RRDDIM index -int rrddim_compare(void* a, void* b) { - if(((RRDDIM *)a)->hash < ((RRDDIM *)b)->hash) return -1; - else if(((RRDDIM *)a)->hash > ((RRDDIM *)b)->hash) return 1; - else return strcmp(((RRDDIM *)a)->id, ((RRDDIM *)b)->id); -} - -#define rrddim_index_add(st, rd) (RRDDIM *)avl_insert_lock(&((st)->dimensions_index), (avl_t *)(rd)) -#define rrddim_index_del(st,rd ) (RRDDIM *)avl_remove_lock(&((st)->dimensions_index), (avl_t *)(rd)) - -static inline RRDDIM *rrddim_index_find(RRDSET *st, const char *id, uint32_t hash) { - RRDDIM tmp = { - .id = id, - .hash = (hash)?hash:simple_hash(id) - }; - return (RRDDIM *)avl_search_lock(&(st->dimensions_index), (avl_t *) &tmp); -} - - -// ---------------------------------------------------------------------------- -// RRDDIM - find a dimension - -inline RRDDIM *rrddim_find(RRDSET *st, const char *id) { - debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", st->name, id); - - return rrddim_index_find(st, id, 0); -} - - -// ---------------------------------------------------------------------------- -// RRDDIM rename a dimension - -inline int rrddim_set_name(RRDSET *st, RRDDIM *rd, const char *name) { - if(unlikely(!name || !*name || (rd->name && !strcmp(rd->name, name)))) - return 0; - - debug(D_RRD_CALLS, "rrddim_set_name() from %s.%s to %s.%s", st->name, rd->name, st->name, name); - - if (rd->name) - freez((void *) rd->name); - - rd->name = strdupz(name); - rd->hash_name = simple_hash(rd->name); - - if (!st->state->is_ar_chart) - rrddimvar_rename_all(rd); - - rd->exposed = 0; - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - - ml_dimension_update_name(st, rd, name); - - return 1; -} - -inline int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm) { - if(unlikely(rd->algorithm == algorithm)) - return 0; - - debug(D_RRD_CALLS, "Updating algorithm of dimension '%s/%s' from %s to %s", st->id, rd->name, rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm)); - rd->algorithm = algorithm; - rd->exposed = 0; - rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - rrdcontext_updated_rrddim_algorithm(rd); - return 1; -} - -inline int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier) { - if(unlikely(rd->multiplier == multiplier)) - return 0; +struct rrddim_constructor { + RRDSET *st; + const char *id; + const char *name; + collected_number multiplier; + collected_number divisor; + RRD_ALGORITHM algorithm; + RRD_MEMORY_MODE memory_mode; + + enum { + RRDDIM_REACT_NONE = 0, + RRDDIM_REACT_NEW = (1 << 0), + RRDDIM_REACT_UPDATED = (1 << 2), + } react_action; - debug(D_RRD_CALLS, "Updating multiplier of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, st->id, rd->name, rd->multiplier, multiplier); - rd->multiplier = multiplier; - rd->exposed = 0; - rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - rrdcontext_updated_rrddim_multiplier(rd); - return 1; -} - -inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor) { - if(unlikely(rd->divisor == divisor)) - return 0; - - debug(D_RRD_CALLS, "Updating divisor of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, st->id, rd->name, rd->divisor, divisor); - rd->divisor = divisor; - rd->exposed = 0; - rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - rrdcontext_updated_rrddim_divisor(rd); - return 1; -} - -// ---------------------------------------------------------------------------- -// RRDDIM create a dimension - -void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host) { - RRDCALC *rrdc; - - for (rrdc = host->alarms_with_foreach; rrdc ; rrdc = rrdc->next) { - if (simple_pattern_matches(rrdc->spdim, rd->id) || simple_pattern_matches(rrdc->spdim, rd->name)) { - if (rrdc->hash_chart == st->hash_name || !strcmp(rrdc->chart, st->name) || !strcmp(rrdc->chart, st->id)) { - char *name = alarm_name_with_dim(rrdc->name, strlen(rrdc->name), rd->name, strlen(rd->name)); - if(rrdcalc_exists(host, st->name, name, 0, 0)) { - freez(name); - continue; - } - - netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock); - RRDCALC *child = rrdcalc_create_from_rrdcalc(rrdc, host, name, rd->name); - netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock); - - if (child) { - rrdcalc_add_to_host(host, child); - RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl_t *)child); - if (rdcmp != child) { - error("Cannot insert the alarm index ID %s",child->name); - } - } - else { - error("Cannot allocate a new alarm."); - rrdc->foreachcounter--; - } - } - } - } -} - -// Return either -// 0 : Dimension is live -// last collected time : Dimension is not live +}; -#ifdef ENABLE_ACLK -time_t calc_dimension_liveness(RRDDIM *rd, time_t now) -{ - time_t last_updated = rd->last_collected_time.tv_sec; - int live; - if (rd->aclk_live_status == 1) - live = - ((now - last_updated) < - MIN(rrdset_free_obsolete_time, RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER * rd->update_every)); - else - live = ((now - last_updated) < RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every); - return live ? 0 : last_updated; +// isolated call to appear +// separate in statistics +static void *rrddim_alloc_db(size_t entries) { + return callocz(entries, sizeof(storage_number)); } -#endif -RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, - collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) -{ +static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *constructor_data) { + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; RRDHOST *host = st->rrdhost; - rrdset_wrlock(st); - RRDDIM *rd = rrddim_find(st, id); - if(unlikely(rd)) { - debug(D_RRD_CALLS, "Cannot create rrd dimension '%s/%s', it already exists.", st->id, name?name:"<NONAME>"); - - int rc = rrddim_set_name(st, rd, name); - rc += rrddim_set_algorithm(st, rd, algorithm); - rc += rrddim_set_multiplier(st, rd, multiplier); - rc += rrddim_set_divisor(st, rd, divisor); - - if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { - store_active_dimension(&rd->metric_uuid); - - for(int tier = 0; tier < storage_tiers ;tier++) { - if (rd->tiers[tier]) - rd->tiers[tier]->db_collection_handle = - rd->tiers[tier]->collect_ops.init(rd->tiers[tier]->db_metric_handle); - } - - rrddim_flag_clear(rd, RRDDIM_FLAG_ARCHIVED); - rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->last_stored_value, RRDVAR_OPTION_DEFAULT); - rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->last_collected_value, RRDVAR_OPTION_DEFAULT); - rrddimvar_create(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected_t", &rd->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); + rd->flags = RRDDIM_FLAG_NONE; - rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM); - rrdset_flag_set(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS); - rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS); - } + rd->id = string_strdupz(ctr->id); + rd->name = (ctr->name && *ctr->name)?rrd_string_strdupz(ctr->name):string_dup(rd->id); - if (unlikely(rc)) { - debug(D_METADATALOG, "DIMENSION [%s] metadata updated", rd->id); - (void)sql_store_dimension(&rd->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor, - rd->algorithm); -#ifdef ENABLE_ACLK - queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, now_realtime_sec())); -#endif - rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - } - rrdset_unlock(st); - rrdcontext_updated_rrddim(rd); - return rd; - } - - rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - - rd = callocz(1, sizeof(RRDDIM)); - rd->id = strdupz(id); - rd->hash = simple_hash(rd->id); - - rd->name = (name && *name)?strdupz(name):strdupz(rd->id); - rd->hash_name = simple_hash(rd->name); - - rd->algorithm = algorithm; - rd->multiplier = multiplier; - rd->divisor = divisor; + rd->algorithm = ctr->algorithm; + rd->multiplier = ctr->multiplier; + rd->divisor = ctr->divisor; if(!rd->divisor) rd->divisor = 1; - rd->entries = st->entries; rd->update_every = st->update_every; + rd->rrdset = st; + if(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)) rd->collections_counter = 1; - rd->rrdset = st; - - if(memory_mode == RRD_MEMORY_MODE_MAP || memory_mode == RRD_MEMORY_MODE_SAVE) { - if(!rrddim_memory_load_or_create_map_save(st, rd, memory_mode)) { - info("Failed to use memory mode %s for chart '%s', dimension '%s', falling back to ram", (memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", st->name, rd->name); - memory_mode = RRD_MEMORY_MODE_RAM; + if(ctr->memory_mode == RRD_MEMORY_MODE_MAP || ctr->memory_mode == RRD_MEMORY_MODE_SAVE) { + if(!rrddim_memory_load_or_create_map_save(st, rd, ctr->memory_mode)) { + info("Failed to use memory mode %s for chart '%s', dimension '%s', falling back to ram", (ctr->memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", rrdset_name(st), rrddim_name(rd)); + ctr->memory_mode = RRD_MEMORY_MODE_RAM; } } - if(memory_mode == RRD_MEMORY_MODE_RAM) { + if(ctr->memory_mode == RRD_MEMORY_MODE_RAM) { size_t entries = st->entries; if(!entries) entries = 5; rd->db = netdata_mmap(NULL, entries * sizeof(storage_number), MAP_PRIVATE, 1); if(!rd->db) { - info("Failed to use memory mode ram for chart '%s', dimension '%s', falling back to alloc", st->name, rd->name); - memory_mode = RRD_MEMORY_MODE_ALLOC; + info("Failed to use memory mode ram for chart '%s', dimension '%s', falling back to alloc", rrdset_name(st), rrddim_name(rd)); + ctr->memory_mode = RRD_MEMORY_MODE_ALLOC; } else rd->memsize = entries * sizeof(storage_number); } - if(memory_mode == RRD_MEMORY_MODE_ALLOC || memory_mode == RRD_MEMORY_MODE_NONE) { + if(ctr->memory_mode == RRD_MEMORY_MODE_ALLOC || ctr->memory_mode == RRD_MEMORY_MODE_NONE) { size_t entries = st->entries; if(entries < 5) entries = 5; - rd->db = callocz(entries, sizeof(storage_number)); + rd->db = rrddim_alloc_db(entries); rd->memsize = entries * sizeof(storage_number); } - rd->rrd_memory_mode = memory_mode; - -#ifdef ENABLE_ACLK - rd->aclk_live_status = -1; + rd->rrd_memory_mode = ctr->memory_mode; + + if (unlikely(rrdcontext_find_dimension_uuid(st, rrddim_id(rd), &(rd->metric_uuid)))) { + uuid_generate(rd->metric_uuid); + bool found_in_sql = false; (void)found_in_sql; + +// bool found_in_sql = true; +// if(unlikely(sql_find_dimension_uuid(st, rd, &rd->metric_uuid))) { +// found_in_sql = false; +// uuid_generate(rd->metric_uuid); +// } + +#ifdef NETDATA_INTERNAL_CHECKS + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(rd->metric_uuid, uuid_str); + error_report("Dimension UUID for host %s chart [%s] dimension [%s] not found in context. It is now set to %s (%s)", + string2str(host->hostname), + string2str(st->name), + string2str(rd->name), + uuid_str, found_in_sql ? "found in sqlite" : "newly generated"); #endif - (void) find_dimension_uuid(st, rd, &(rd->metric_uuid)); + } // initialize the db tiers { size_t initialized = 0; - RRD_MEMORY_MODE wanted_mode = memory_mode; - for(int tier = 0; tier < storage_tiers ; tier++, wanted_mode = RRD_MEMORY_MODE_DBENGINE) { - STORAGE_ENGINE *eng = storage_engine_get(wanted_mode); - if(!eng) continue; - + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = host->db[tier].eng; rd->tiers[tier] = callocz(1, sizeof(struct rrddim_tier)); - rd->tiers[tier]->tier_grouping = get_tier_grouping(tier); - rd->tiers[tier]->mode = eng->id; - rd->tiers[tier]->collect_ops = eng->api.collect_ops; - rd->tiers[tier]->query_ops = eng->api.query_ops; - rd->tiers[tier]->db_metric_handle = eng->api.init(rd, host->storage_instance[tier]); + rd->tiers[tier]->tier_grouping = host->db[tier].tier_grouping; + rd->tiers[tier]->collect_ops = &eng->api.collect_ops; + rd->tiers[tier]->query_ops = &eng->api.query_ops; + rd->tiers[tier]->db_metric_handle = eng->api.metric_get_or_create(rd, host->db[tier].instance, rd->rrdset->storage_metrics_groups[tier]); storage_point_unset(rd->tiers[tier]->virtual_point); initialized++; @@ -284,138 +120,114 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte } if(!initialized) - error("Failed to initialize all db tiers for chart '%s', dimension '%s", st->name, rd->name); + error("Failed to initialize all db tiers for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); if(!rd->tiers[0]) - error("Failed to initialize the first db tier for chart '%s', dimension '%s", st->name, rd->name); + error("Failed to initialize the first db tier for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); } - store_active_dimension(&rd->metric_uuid); - // initialize data collection for all tiers { size_t initialized = 0; - for (int tier = 0; tier < storage_tiers; tier++) { + for (size_t tier = 0; tier < storage_tiers; tier++) { if (rd->tiers[tier]) { - rd->tiers[tier]->db_collection_handle = rd->tiers[tier]->collect_ops.init(rd->tiers[tier]->db_metric_handle); + rd->tiers[tier]->db_collection_handle = rd->tiers[tier]->collect_ops->init(rd->tiers[tier]->db_metric_handle, st->rrdhost->db[tier].tier_grouping * st->update_every); initialized++; } } if(!initialized) - error("Failed to initialize data collection for all db tiers for chart '%s', dimension '%s", st->name, rd->name); + error("Failed to initialize data collection for all db tiers for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); } - // append this dimension - if(!st->dimensions) - st->dimensions = rd; - else { - RRDDIM *td = st->dimensions; + if(rrdset_number_of_dimensions(st) != 0) { + RRDDIM *td; + dfe_start_write(st->rrddim_root_index, td) { + if(!td) break; + } + dfe_done(td); - if(td->algorithm != rd->algorithm || ABS(td->multiplier) != ABS(rd->multiplier) || ABS(td->divisor) != ABS(rd->divisor)) { + if(td && (td->algorithm != rd->algorithm || ABS(td->multiplier) != ABS(rd->multiplier) || ABS(td->divisor) != ABS(rd->divisor))) { if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { - #ifdef NETDATA_INTERNAL_CHECKS +#ifdef NETDATA_INTERNAL_CHECKS info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", - rd->name, - st->name, - host->hostname, - rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(td->algorithm), - rd->multiplier, td->multiplier, - rd->divisor, td->divisor + rrddim_name(rd), + rrdset_name(st), + rrdhost_hostname(host), + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(td->algorithm), + rd->multiplier, td->multiplier, + rd->divisor, td->divisor ); - #endif +#endif rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); } } - - for(; td->next; td = td->next) ; - td->next = rd; - } - - if(host->health_enabled && !st->state->is_ar_chart) { - rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->last_stored_value, RRDVAR_OPTION_DEFAULT); - rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->last_collected_value, RRDVAR_OPTION_DEFAULT); - rrddimvar_create(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected_t", &rd->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); } - if(unlikely(rrddim_index_add(st, rd) != rd)) - error("RRDDIM: INTERNAL ERROR: attempt to index duplicate dimension '%s' on chart '%s'", rd->id, st->id); + rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); - rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM); - rrdset_flag_set(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS); - rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS); + // let the chart resync + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); ml_new_dimension(rd); - rrdset_unlock(st); - rrdcontext_updated_rrddim(rd); - return(rd); + ctr->react_action = RRDDIM_REACT_NEW; + + internal_error(false, "RRDDIM: inserted dimension '%s' of chart '%s' of host '%s'", + rrddim_name(rd), rrdset_name(st), rrdhost_hostname(st->rrdhost)); + } -// ---------------------------------------------------------------------------- -// RRDDIM remove / free a dimension +static void rrddim_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *rrdset) { + RRDDIM *rd = rrddim; + RRDSET *st = rrdset; + RRDHOST *host = st->rrdhost; + + internal_error(false, "RRDDIM: deleting dimension '%s' of chart '%s' of host '%s'", + rrddim_name(rd), rrdset_name(st), rrdhost_hostname(host)); -void rrddim_free(RRDSET *st, RRDDIM *rd) -{ rrdcontext_removed_rrddim(rd); - ml_delete_dimension(rd); - - debug(D_RRD_CALLS, "rrddim_free() %s.%s", st->name, rd->name); - if (!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + ml_delete_dimension(rd); - size_t tiers_available = 0, tiers_said_yes = 0; - for(int tier = 0; tier < storage_tiers ;tier++) { - if(rd->tiers[tier]) { - tiers_available++; + debug(D_RRD_CALLS, "rrddim_free() %s.%s", rrdset_name(st), rrddim_name(rd)); - if(rd->tiers[tier]->collect_ops.finalize(rd->tiers[tier]->db_collection_handle)) - tiers_said_yes++; + size_t tiers_available = 0, tiers_said_yes = 0; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(rd->tiers[tier] && rd->tiers[tier]->db_collection_handle) { + tiers_available++; - rd->tiers[tier]->db_collection_handle = NULL; - } - } + if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) + tiers_said_yes++; - if (tiers_available == tiers_said_yes && tiers_said_yes && rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { - /* This metric has no data and no references */ - delete_dimension_uuid(&rd->metric_uuid); + rd->tiers[tier]->db_collection_handle = NULL; } } - if(rd == st->dimensions) - st->dimensions = rd->next; - else { - RRDDIM *i; - for (i = st->dimensions; i && i->next != rd; i = i->next) ; - - if (i && i->next == rd) - i->next = rd->next; - else - error("Request to free dimension '%s.%s' but it is not linked.", st->id, rd->name); + if (tiers_available == tiers_said_yes && tiers_said_yes && rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); } - rd->next = NULL; - while(rd->variables) - rrddimvar_free(rd->variables); - - if(unlikely(rrddim_index_del(st, rd) != rd)) - error("RRDDIM: INTERNAL ERROR: attempt to remove from index dimension '%s' on chart '%s', removed a different dimension.", rd->id, st->id); + rrddimvar_delete_all(rd); // free(rd->annotations); -//#ifdef ENABLE_ACLK -// if (!netdata_exit) -// aclk_send_dimension_update(rd); -//#endif + //#ifdef ENABLE_ACLK + // if (!netdata_exit) + // aclk_send_dimension_update(rd); + //#endif // this will free MEMORY_MODE_SAVE and MEMORY_MODE_MAP structures rrddim_memory_file_free(rd); - for(int tier = 0; tier < storage_tiers ;tier++) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { if(!rd->tiers[tier]) continue; - STORAGE_ENGINE* eng = storage_engine_get(rd->tiers[tier]->mode); - if(eng) - eng->api.free(rd->tiers[tier]->db_metric_handle); + STORAGE_ENGINE* eng = host->db[tier].eng; + eng->api.metric_release(rd->tiers[tier]->db_metric_handle); freez(rd->tiers[tier]); rd->tiers[tier] = NULL; @@ -423,14 +235,252 @@ void rrddim_free(RRDSET *st, RRDDIM *rd) if(rd->db) { if(rd->rrd_memory_mode == RRD_MEMORY_MODE_RAM) - munmap(rd->db, rd->memsize); + netdata_munmap(rd->db, rd->memsize); else freez(rd->db); } - freez((void *)rd->id); - freez((void *)rd->name); - freez(rd); + string_freez(rd->id); + string_freez(rd->name); +} + +static bool rrddim_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *new_rrddim, void *constructor_data) { + (void)new_rrddim; // it is NULL + + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + + ctr->react_action = RRDDIM_REACT_NONE; + + int rc = rrddim_reset_name(st, rd, ctr->name); + rc += rrddim_set_algorithm(st, rd, ctr->algorithm); + rc += rrddim_set_multiplier(st, rd, ctr->multiplier); + rc += rrddim_set_divisor(st, rd, ctr->divisor); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (rd->tiers[tier] && !rd->tiers[tier]->db_collection_handle) + rd->tiers[tier]->db_collection_handle = + rd->tiers[tier]->collect_ops->init(rd->tiers[tier]->db_metric_handle, st->rrdhost->db[tier].tier_grouping * st->update_every); + } + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_ARCHIVED); + + rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + } + + if(unlikely(rc)) + ctr->react_action = RRDDIM_REACT_UPDATED; + + return ctr->react_action == RRDDIM_REACT_UPDATED; +} + +static void rrddim_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *constructor_data) { + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + + if(ctr->react_action & (RRDDIM_REACT_UPDATED | RRDDIM_REACT_NEW)) { + rrddim_flag_set(rd, RRDDIM_FLAG_METADATA_UPDATE); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_METADATA_UPDATE); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + if(ctr->react_action == RRDDIM_REACT_UPDATED) { + // the chart needs to be updated to the parent + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + } + + rrdcontext_updated_rrddim(rd); +} + +void rrddim_index_init(RRDSET *st) { + if(!st->rrddim_root_index) { + st->rrddim_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(st->rrddim_root_index, rrddim_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrddim_root_index, rrddim_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrddim_root_index, rrddim_delete_callback, st); + dictionary_register_react_callback(st->rrddim_root_index, rrddim_react_callback, st); + } +} + +void rrddim_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrddim_root_index); + st->rrddim_root_index = NULL; +} + +static inline RRDDIM *rrddim_index_find(RRDSET *st, const char *id) { + return dictionary_get(st->rrddim_root_index, id); +} + +// ---------------------------------------------------------------------------- +// RRDDIM - find a dimension + +inline RRDDIM *rrddim_find(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", rrdset_name(st), id); + + return rrddim_index_find(st, id); +} + +inline RRDDIM_ACQUIRED *rrddim_find_and_acquire(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", rrdset_name(st), id); + + return (RRDDIM_ACQUIRED *)dictionary_get_and_acquire_item(st->rrddim_root_index, id); +} + +RRDDIM *rrddim_acquired_to_rrddim(RRDDIM_ACQUIRED *rda) { + if(unlikely(!rda)) + return NULL; + + return (RRDDIM *) dictionary_acquired_item_value((const DICTIONARY_ITEM *)rda); +} + +void rrddim_acquired_release(RRDDIM_ACQUIRED *rda) { + if(unlikely(!rda)) + return; + + RRDDIM *rd = rrddim_acquired_to_rrddim(rda); + dictionary_acquired_item_release(rd->rrdset->rrddim_root_index, (const DICTIONARY_ITEM *)rda); +} + +// This will not return dimensions that are archived +RRDDIM *rrddim_find_active(RRDSET *st, const char *id) { + RRDDIM *rd = rrddim_find(st, id); + + if (unlikely(rd && rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) + return NULL; + + return rd; +} + +// ---------------------------------------------------------------------------- +// RRDDIM rename a dimension + +inline int rrddim_reset_name(RRDSET *st, RRDDIM *rd, const char *name) { + if(unlikely(!name || !*name || !strcmp(rrddim_name(rd), name))) + return 0; + + debug(D_RRD_CALLS, "rrddim_reset_name() from %s.%s to %s.%s", rrdset_name(st), rrddim_name(rd), rrdset_name(st), name); + + STRING *old = rd->name; + rd->name = rrd_string_strdupz(name); + string_freez(old); + + rrddimvar_rename_all(rd); + + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + return 1; +} + +inline int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm) { + if(unlikely(rd->algorithm == algorithm)) + return 0; + + debug(D_RRD_CALLS, "Updating algorithm of dimension '%s/%s' from %s to %s", rrdset_id(st), rrddim_name(rd), rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm)); + rd->algorithm = algorithm; + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_algorithm(rd); + return 1; +} + +inline int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier) { + if(unlikely(rd->multiplier == multiplier)) + return 0; + + debug(D_RRD_CALLS, "Updating multiplier of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, rrdset_id(st), rrddim_name(rd), rd->multiplier, multiplier); + rd->multiplier = multiplier; + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_multiplier(rd); + return 1; +} + +inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor) { + if(unlikely(rd->divisor == divisor)) + return 0; + + debug(D_RRD_CALLS, "Updating divisor of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, rrdset_id(st), rrddim_name(rd), rd->divisor, divisor); + rd->divisor = divisor; + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_divisor(rd); + return 1; +} + +// ---------------------------------------------------------------------------- + +// get the timestamp of the last entry in the round-robin database +time_t rrddim_last_entry_t(RRDDIM *rd) { + time_t latest = rd->tiers[0]->query_ops->latest_time(rd->tiers[0]->db_metric_handle); + + for(size_t tier = 1; tier < storage_tiers ;tier++) { + if(unlikely(!rd->tiers[tier])) continue; + + time_t t = rd->tiers[tier]->query_ops->latest_time(rd->tiers[tier]->db_metric_handle); + if(t > latest) + latest = t; + } + + return latest; +} + +time_t rrddim_first_entry_t_of_tier(RRDDIM *rd, size_t tier) { + if(unlikely(tier > storage_tiers || !rd->tiers[tier])) + return 0; + + return rd->tiers[tier]->query_ops->oldest_time(rd->tiers[tier]->db_metric_handle); +} + +time_t rrddim_first_entry_t(RRDDIM *rd) { + time_t oldest = 0; + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + time_t t = rrddim_first_entry_t_of_tier(rd, tier); + if(t != 0 && (oldest == 0 || t < oldest)) + oldest = t; + } + + return oldest; +} + +RRDDIM *rrddim_add_custom(RRDSET *st + , const char *id + , const char *name + , collected_number multiplier + , collected_number divisor + , RRD_ALGORITHM algorithm + , RRD_MEMORY_MODE memory_mode + ) { + struct rrddim_constructor tmp = { + .st = st, + .id = id, + .name = name, + .multiplier = multiplier, + .divisor = divisor, + .algorithm = algorithm, + .memory_mode = memory_mode, + }; + + RRDDIM *rd = dictionary_set_advanced(st->rrddim_root_index, tmp.id, -1, NULL, sizeof(RRDDIM), &tmp); + return(rd); +} + +// ---------------------------------------------------------------------------- +// RRDDIM remove / free a dimension + +void rrddim_free(RRDSET *st, RRDDIM *rd) { + dictionary_del(st->rrddim_root_index, string2str(rd->id)); } @@ -438,56 +488,59 @@ void rrddim_free(RRDSET *st, RRDDIM *rd) // RRDDIM - set dimension options int rrddim_hide(RRDSET *st, const char *id) { - debug(D_RRD_CALLS, "rrddim_hide() for chart %s, dimension %s", st->name, id); + debug(D_RRD_CALLS, "rrddim_hide() for chart %s, dimension %s", rrdset_name(st), id); RRDHOST *host = st->rrdhost; RRDDIM *rd = rrddim_find(st, id); if(unlikely(!rd)) { - error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); return 1; } - if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) - (void)sql_set_dimension_option(&rd->metric_uuid, "hidden"); + if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN); + metaqueue_dimension_update_flags(rd); + } - rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN); - rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN); + rrddim_option_set(rd, RRDDIM_OPTION_HIDDEN); rrdcontext_updated_rrddim_flags(rd); return 0; } int rrddim_unhide(RRDSET *st, const char *id) { - debug(D_RRD_CALLS, "rrddim_unhide() for chart %s, dimension %s", st->name, id); + debug(D_RRD_CALLS, "rrddim_unhide() for chart %s, dimension %s", rrdset_name(st), id); RRDHOST *host = st->rrdhost; RRDDIM *rd = rrddim_find(st, id); if(unlikely(!rd)) { - error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); return 1; } - if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) - (void)sql_set_dimension_option(&rd->metric_uuid, NULL); + if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); + metaqueue_dimension_update_flags(rd); + } - rrddim_flag_clear(rd, RRDDIM_FLAG_HIDDEN); - rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); + rrddim_option_clear(rd, RRDDIM_OPTION_HIDDEN); rrdcontext_updated_rrddim_flags(rd); return 0; } inline void rrddim_is_obsolete(RRDSET *st, RRDDIM *rd) { - debug(D_RRD_CALLS, "rrddim_is_obsolete() for chart %s, dimension %s", st->name, rd->name); + debug(D_RRD_CALLS, "rrddim_is_obsolete() for chart %s, dimension %s", rrdset_name(st), rrddim_name(rd)); if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) { - info("Cannot obsolete already archived dimension %s from chart %s", rd->name, st->name); + info("Cannot obsolete already archived dimension %s from chart %s", rrddim_name(rd), rrdset_name(st)); return; } rrddim_flag_set(rd, RRDDIM_FLAG_OBSOLETE); rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); rrdcontext_updated_rrddim_flags(rd); } inline void rrddim_isnot_obsolete(RRDSET *st __maybe_unused, RRDDIM *rd) { - debug(D_RRD_CALLS, "rrddim_isnot_obsolete() for chart %s, dimension %s", st->name, rd->name); + debug(D_RRD_CALLS, "rrddim_isnot_obsolete() for chart %s, dimension %s", rrdset_name(st), rrddim_name(rd)); rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); rrdcontext_updated_rrddim_flags(rd); @@ -496,30 +549,34 @@ inline void rrddim_isnot_obsolete(RRDSET *st __maybe_unused, RRDDIM *rd) { // ---------------------------------------------------------------------------- // RRDDIM - collect values for a dimension -inline collected_number rrddim_set_by_pointer(RRDSET *st __maybe_unused, RRDDIM *rd, collected_number value) { - debug(D_RRD_CALLS, "rrddim_set_by_pointer() for chart %s, dimension %s, value " COLLECTED_NUMBER_FORMAT, st->name, rd->name, value); +inline collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) { + struct timeval now; + now_realtime_timeval(&now); + + return rrddim_timed_set_by_pointer(st, rd, now, value); +} - rrdcontext_collected_rrddim(rd); +collected_number rrddim_timed_set_by_pointer(RRDSET *st __maybe_unused, RRDDIM *rd, struct timeval collected_time, collected_number value) { + debug(D_RRD_CALLS, "rrddim_set_by_pointer() for chart %s, dimension %s, value " COLLECTED_NUMBER_FORMAT, rrdset_name(st), rrddim_name(rd), value); - now_realtime_timeval(&rd->last_collected_time); + rd->last_collected_time = collected_time; rd->collected_value = value; rd->updated = 1; - rd->collections_counter++; collected_number v = (value >= 0) ? value : -value; - if(unlikely(v > rd->collected_value_max)) rd->collected_value_max = v; - - // fprintf(stderr, "%s.%s %llu " COLLECTED_NUMBER_FORMAT " dt %0.6f" " rate " NETDATA_DOUBLE_FORMAT "\n", st->name, rd->name, st->usec_since_last_update, value, (float)((double)st->usec_since_last_update / (double)1000000), (NETDATA_DOUBLE)((value - rd->last_collected_value) * (NETDATA_DOUBLE)rd->multiplier / (NETDATA_DOUBLE)rd->divisor * 1000000.0 / (NETDATA_DOUBLE)st->usec_since_last_update)); + if (unlikely(v > rd->collected_value_max)) + rd->collected_value_max = v; return rd->last_collected_value; } + collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) { RRDHOST *host = st->rrdhost; RRDDIM *rd = rrddim_find(st, id); if(unlikely(!rd)) { - error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); return 0; } @@ -560,7 +617,7 @@ struct rrddim_map_save_v019 { long double last_calculated_value; // ignored long double last_stored_value; // ignored long long collected_value; // ignored - long long last_collected_value; // ignored + long long last_collected_value; // load and save long double collected_volume; // ignored long double stored_volume; // ignored void *next; // ignored @@ -578,22 +635,23 @@ size_t rrddim_memory_file_header_size(void) { } void rrddim_memory_file_update(RRDDIM *rd) { - if(!rd->rd_on_file) return; + if(!rd || !rd->rd_on_file) return; struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; rd_on_file->last_collected_time.tv_sec = rd->last_collected_time.tv_sec; rd_on_file->last_collected_time.tv_usec = rd->last_collected_time.tv_usec; + rd_on_file->last_collected_value = rd->last_collected_value; } void rrddim_memory_file_free(RRDDIM *rd) { - if(!rd->rd_on_file) return; + if(!rd || !rd->rd_on_file) return; // needed for memory mode map, to save the latest state rrddim_memory_file_update(rd); struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; freez(rd_on_file->cache_filename); - munmap(rd_on_file, rd_on_file->memsize); + netdata_munmap(rd_on_file, rd_on_file->memsize); // remove the pointers from the RRDDIM rd->rd_on_file = NULL; @@ -601,13 +659,13 @@ void rrddim_memory_file_free(RRDDIM *rd) { } const char *rrddim_cache_filename(RRDDIM *rd) { - if(!rd->rd_on_file) return NULL; + if(!rd || !rd->rd_on_file) return NULL; struct rrddim_map_save_v019 *rd_on_file = rd->rd_on_file; return rd_on_file->cache_filename; } void rrddim_memory_file_save(RRDDIM *rd) { - if(!rd->rd_on_file) return; + if(!rd || !rd->rd_on_file) return; rrddim_memory_file_update(rd); @@ -627,7 +685,7 @@ bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MO char filename[FILENAME_MAX + 1]; char fullfilename[FILENAME_MAX + 1]; - rrdset_strncpyz_name(filename, rd->id, FILENAME_MAX); + rrdset_strncpyz_name(filename, rrddim_id(rd), FILENAME_MAX); snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename); rd_on_file = (struct rrddim_map_save_v019 *)netdata_mmap(fullfilename, size, @@ -646,7 +704,7 @@ bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MO reset = 1; } else if(rd_on_file->memsize != size) { - error("File %s does not have the desired size, expected %lu but found %lu. Clearing it.", fullfilename, size, rd_on_file->memsize); + error("File %s does not have the desired size, expected %lu but found %lu. Clearing it.", fullfilename, size, (unsigned long int) rd_on_file->memsize); memset(rd_on_file, 0, size); reset = 1; } @@ -662,6 +720,8 @@ bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MO } if(!reset) { + rd->last_collected_value = rd_on_file->last_collected_value; + if(rd_on_file->algorithm != rd->algorithm) info("File %s does not have the expected algorithm (expected %u '%s', found %u '%s'). Previous values may be wrong.", fullfilename, rd->algorithm, rrd_algorithm_name(rd->algorithm), rd_on_file->algorithm, rrd_algorithm_name(rd_on_file->algorithm)); diff --git a/database/rrddimvar.c b/database/rrddimvar.c index 3c2ed75e5..449ceeb93 100644 --- a/database/rrddimvar.c +++ b/database/rrddimvar.c @@ -1,84 +1,87 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_HEALTH_INTERNALS #include "rrd.h" +typedef struct rrddimvar { + struct rrddim *rrddim; + + STRING *prefix; + STRING *suffix; + void *value; + + const RRDVAR_ACQUIRED *rrdvar_local_dim_id; + const RRDVAR_ACQUIRED *rrdvar_local_dim_name; + + const RRDVAR_ACQUIRED *rrdvar_family_id; + const RRDVAR_ACQUIRED *rrdvar_family_name; + const RRDVAR_ACQUIRED *rrdvar_family_context_dim_id; + const RRDVAR_ACQUIRED *rrdvar_family_context_dim_name; + + const RRDVAR_ACQUIRED *rrdvar_host_chart_id_dim_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_id_dim_name; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name_dim_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name_dim_name; + + RRDVAR_FLAGS flags:24; + RRDVAR_TYPE type:8; +} RRDDIMVAR; + // ---------------------------------------------------------------------------- // RRDDIMVAR management // DIMENSION VARIABLES #define RRDDIMVAR_ID_MAX 1024 -static inline void rrddimvar_free_variables(RRDDIMVAR *rs) { +static inline void rrddimvar_free_variables_unsafe(RRDDIMVAR *rs) { RRDDIM *rd = rs->rrddim; RRDSET *st = rd->rrdset; RRDHOST *host = st->rrdhost; // CHART VARIABLES FOR THIS DIMENSION - rrdvar_free(host, &st->rrdvar_root_index, rs->var_local_id); - rs->var_local_id = NULL; + if(st->rrdvars) { + rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local_dim_id); + rs->rrdvar_local_dim_id = NULL; - rrdvar_free(host, &st->rrdvar_root_index, rs->var_local_name); - rs->var_local_name = NULL; + rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local_dim_name); + rs->rrdvar_local_dim_name = NULL; + } // FAMILY VARIABLES FOR THIS DIMENSION - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_id); - rs->var_family_id = NULL; + if(st->rrdfamily) { + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_id); + rs->rrdvar_family_id = NULL; - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); - rs->var_family_name = NULL; + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_name); + rs->rrdvar_family_name = NULL; - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextid); - rs->var_family_contextid = NULL; + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_context_dim_id); + rs->rrdvar_family_context_dim_id = NULL; - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextname); - rs->var_family_contextname = NULL; + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_context_dim_name); + rs->rrdvar_family_context_dim_name = NULL; + } // HOST VARIABLES FOR THIS DIMENSION - rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartidid); - rs->var_host_chartidid = NULL; - - rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartidname); - rs->var_host_chartidname = NULL; - - rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartnameid); - rs->var_host_chartnameid = NULL; - - rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartnamename); - rs->var_host_chartnamename = NULL; + if(host->rrdvars && host->health_enabled) { + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id_dim_id); + rs->rrdvar_host_chart_id_dim_id = NULL; - // KEYS - - freez(rs->key_id); - rs->key_id = NULL; - - freez(rs->key_name); - rs->key_name = NULL; - - freez(rs->key_fullidid); - rs->key_fullidid = NULL; - - freez(rs->key_fullidname); - rs->key_fullidname = NULL; - - freez(rs->key_contextid); - rs->key_contextid = NULL; + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id_dim_name); + rs->rrdvar_host_chart_id_dim_name = NULL; - freez(rs->key_contextname); - rs->key_contextname = NULL; + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name_dim_id); + rs->rrdvar_host_chart_name_dim_id = NULL; - freez(rs->key_fullnameid); - rs->key_fullnameid = NULL; - - freez(rs->key_fullnamename); - rs->key_fullnamename = NULL; + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name_dim_name); + rs->rrdvar_host_chart_name_dim_name = NULL; + } } -static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { - rrddimvar_free_variables(rs); +static inline void rrddimvar_update_variables_unsafe(RRDDIMVAR *rs) { + rrddimvar_free_variables_unsafe(rs); RRDDIM *rd = rs->rrddim; RRDSET *st = rd->rrdset; @@ -88,29 +91,29 @@ static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { // KEYS - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", rs->prefix, rd->id, rs->suffix); - rs->key_id = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", string2str(rs->prefix), rrddim_id(rd), string2str(rs->suffix)); + STRING *key_dim_id = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", rs->prefix, rd->name, rs->suffix); - rs->key_name = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", string2str(rs->prefix), rrddim_name(rd), string2str(rs->suffix)); + STRING *key_dim_name = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->id, rs->key_id); - rs->key_fullidid = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_id(st), string2str(key_dim_id)); + STRING *key_chart_id_dim_id = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->id, rs->key_name); - rs->key_fullidname = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_id(st), string2str(key_dim_name)); + STRING *key_chart_id_dim_name = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->context, rs->key_id); - rs->key_contextid = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_context(st), string2str(key_dim_id)); + STRING *key_context_dim_id = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->context, rs->key_name); - rs->key_contextname = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_context(st), string2str(key_dim_name)); + STRING *key_context_dim_name = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->name, rs->key_id); - rs->key_fullnameid = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_name(st), string2str(key_dim_id)); + STRING *key_chart_name_dim_id = string_strdupz(buffer); - snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->name, rs->key_name); - rs->key_fullnamename = strdupz(buffer); + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_name(st), string2str(key_dim_name)); + STRING *key_chart_name_dim_name = string_strdupz(buffer); // CHART VARIABLES FOR THIS DIMENSION // ----------------------------------- @@ -119,8 +122,10 @@ static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { // - $id // - $name - rs->var_local_id = rrdvar_create_and_index("local", &st->rrdvar_root_index, rs->key_id, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_local_name = rrdvar_create_and_index("local", &st->rrdvar_root_index, rs->key_name, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + if(st->rrdvars) { + rs->rrdvar_local_dim_id = rrdvar_add_and_acquire("local", st->rrdvars, key_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_local_dim_name = rrdvar_add_and_acquire("local", st->rrdvars, key_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + } // FAMILY VARIABLES FOR THIS DIMENSION // ----------------------------------- @@ -131,10 +136,12 @@ static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { // - $chart-context.id // - $chart-context.name - rs->var_family_id = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_id, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_family_name = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_name, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_family_contextid = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_contextid, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_family_contextname = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_contextname, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + if(st->rrdfamily) { + rs->rrdvar_family_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_family_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_family_context_dim_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_context_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_family_context_dim_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_context_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + } // HOST VARIABLES FOR THIS DIMENSION // ----------------------------------- @@ -145,73 +152,121 @@ static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { // - $chart-name.id // - $chart-name.name - rs->var_host_chartidid = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullidid, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_host_chartidname = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullidname, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_host_chartnameid = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullnameid, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); - rs->var_host_chartnamename = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullnamename, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + if(host->rrdvars && host->health_enabled) { + rs->rrdvar_host_chart_id_dim_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_host_chart_id_dim_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_host_chart_name_dim_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value); + rs->rrdvar_host_chart_name_dim_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value); + } + + // free the keys + + string_freez(key_dim_id); + string_freez(key_dim_name); + string_freez(key_chart_id_dim_id); + string_freez(key_chart_id_dim_name); + string_freez(key_context_dim_id); + string_freez(key_context_dim_name); + string_freez(key_chart_name_dim_id); + string_freez(key_chart_name_dim_name); } -RRDDIMVAR *rrddimvar_create(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_OPTIONS options) { - RRDSET *st = rd->rrdset; - (void)st; +struct rrddimvar_constructor { + RRDDIM *rrddim; + const char *prefix; + const char *suffix; + void *value; + RRDVAR_FLAGS flags :16; + RRDVAR_TYPE type:8; +}; - debug(D_VARIABLES, "RRDDIMSET create for chart id '%s' name '%s', dimension id '%s', name '%s%s%s'", st->id, st->name, rd->id, (prefix)?prefix:"", rd->name, (suffix)?suffix:""); +static void rrddimvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *constructor_data) { + RRDDIMVAR *rs = rrddimvar; + struct rrddimvar_constructor *ctr = constructor_data; - if(!prefix) prefix = ""; - if(!suffix) suffix = ""; + if(!ctr->prefix) ctr->prefix = ""; + if(!ctr->suffix) ctr->suffix = ""; + + rs->prefix = string_strdupz(ctr->prefix); + rs->suffix = string_strdupz(ctr->suffix); + + rs->type = ctr->type; + rs->value = ctr->value; + rs->flags = ctr->flags; + rs->rrddim = ctr->rrddim; + + rrddimvar_update_variables_unsafe(rs); +} + +static bool rrddimvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *new_rrddimvar __maybe_unused, void *constructor_data __maybe_unused) { + RRDDIMVAR *rs = rrddimvar; + rrddimvar_update_variables_unsafe(rs); + + return true; +} - RRDDIMVAR *rs = (RRDDIMVAR *)callocz(1, sizeof(RRDDIMVAR)); +static void rrddimvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *rrdset __maybe_unused) { + RRDDIMVAR *rs = rrddimvar; + rrddimvar_free_variables_unsafe(rs); + string_freez(rs->prefix); + string_freez(rs->suffix); +} - rs->prefix = strdupz(prefix); - rs->suffix = strdupz(suffix); +void rrddimvar_index_init(RRDSET *st) { + if(!st->rrddimvar_root_index) { + st->rrddimvar_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); - rs->type = type; - rs->value = value; - rs->options = options; - rs->rrddim = rd; + dictionary_register_insert_callback(st->rrddimvar_root_index, rrddimvar_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrddimvar_root_index, rrddimvar_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrddimvar_root_index, rrddimvar_delete_callback, st); + } +} - rs->next = rd->variables; - rd->variables = rs; +void rrddimvar_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrddimvar_root_index); + st->rrddimvar_root_index = NULL; +} - rrddimvar_create_variables(rs); +void rrddimvar_add_and_leave_released(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_FLAGS flags) { + if(!prefix) prefix = ""; + if(!suffix) suffix = ""; - return rs; + char key[RRDDIMVAR_ID_MAX + 1]; + size_t key_len = snprintfz(key, RRDDIMVAR_ID_MAX, "%s_%s_%s", prefix, rrddim_id(rd), suffix); + + struct rrddimvar_constructor tmp = { + .suffix = suffix, + .prefix = prefix, + .type = type, + .flags = flags, + .value = value, + .rrddim = rd + }; + dictionary_set_advanced(rd->rrdset->rrddimvar_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDDIMVAR), &tmp); } void rrddimvar_rename_all(RRDDIM *rd) { RRDSET *st = rd->rrdset; - (void)st; - debug(D_VARIABLES, "RRDDIMSET rename for chart id '%s' name '%s', dimension id '%s', name '%s'", st->id, st->name, rd->id, rd->name); + debug(D_VARIABLES, "RRDDIMVAR rename for chart id '%s' name '%s', dimension id '%s', name '%s'", rrdset_id(st), rrdset_name(st), rrddim_id(rd), rrddim_name(rd)); - RRDDIMVAR *rs, *next = rd->variables; - while((rs = next)) { - next = rs->next; - rrddimvar_create_variables(rs); + RRDDIMVAR *rs; + dfe_start_write(st->rrddimvar_root_index, rs) { + if(unlikely(rs->rrddim == rd)) + rrddimvar_update_variables_unsafe(rs); } + dfe_done(rs); } -void rrddimvar_free(RRDDIMVAR *rs) { - RRDDIM *rd = rs->rrddim; +void rrddimvar_delete_all(RRDDIM *rd) { RRDSET *st = rd->rrdset; - debug(D_VARIABLES, "RRDDIMSET free for chart id '%s' name '%s', dimension id '%s', name '%s', prefix='%s', suffix='%s'", st->id, st->name, rd->id, rd->name, rs->prefix, rs->suffix); - rrddimvar_free_variables(rs); + debug(D_VARIABLES, "RRDDIMVAR delete for chart id '%s' name '%s', dimension id '%s', name '%s'", rrdset_id(st), rrdset_name(st), rrddim_id(rd), rrddim_name(rd)); - if(rd->variables == rs) { - debug(D_VARIABLES, "RRDDIMSET removing first entry for chart id '%s' name '%s', dimension id '%s', name '%s'", st->id, st->name, rd->id, rd->name); - rd->variables = rs->next; - } - else { - debug(D_VARIABLES, "RRDDIMSET removing non-first entry for chart id '%s' name '%s', dimension id '%s', name '%s'", st->id, st->name, rd->id, rd->name); - RRDDIMVAR *t; - for (t = rd->variables; t && t->next != rs; t = t->next) ; - if(!t) error("RRDDIMVAR '%s' not found in dimension '%s/%s' variables linked list", rs->key_name, st->id, rd->id); - else t->next = rs->next; + RRDDIMVAR *rs; + dfe_start_write(st->rrddimvar_root_index, rs) { + if(unlikely(rs->rrddim == rd)) + dictionary_del(st->rrddimvar_root_index, rs_dfe.name); } - - freez(rs->prefix); - freez(rs->suffix); - freez(rs); + dfe_done(rs); } - diff --git a/database/rrddimvar.h b/database/rrddimvar.h index 3494824be..a803ea753 100644 --- a/database/rrddimvar.h +++ b/database/rrddimvar.h @@ -10,47 +10,12 @@ // calculated / processed by the normal data collection process // This means, there will be no speed penalty for using // these variables -struct rrddimvar { - char *prefix; - char *suffix; - - char *key_id; // dimension id - char *key_name; // dimension name - char *key_contextid; // context + dimension id - char *key_contextname; // context + dimension name - char *key_fullidid; // chart type.chart id + dimension id - char *key_fullidname; // chart type.chart id + dimension name - char *key_fullnameid; // chart type.chart name + dimension id - char *key_fullnamename; // chart type.chart name + dimension name - - RRDVAR_TYPE type; - void *value; - - RRDVAR_OPTIONS options; - - RRDVAR *var_local_id; - RRDVAR *var_local_name; - - RRDVAR *var_family_id; - RRDVAR *var_family_name; - RRDVAR *var_family_contextid; - RRDVAR *var_family_contextname; - - RRDVAR *var_host_chartidid; - RRDVAR *var_host_chartidname; - RRDVAR *var_host_chartnameid; - RRDVAR *var_host_chartnamename; - - struct rrddim *rrddim; - - struct rrddimvar *next; -}; - - -extern void rrddimvar_rename_all(RRDDIM *rd); -extern RRDDIMVAR *rrddimvar_create(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_OPTIONS options); -extern void rrddimvar_free(RRDDIMVAR *rs); +void rrddimvar_rename_all(RRDDIM *rd); +void rrddimvar_add_and_leave_released(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_FLAGS flags); +void rrddimvar_delete_all(RRDDIM *rd); +void rrddimvar_index_init(RRDSET *st); +void rrddimvar_index_destroy(RRDSET *st); #endif //NETDATA_RRDDIMVAR_H diff --git a/database/rrdfamily.c b/database/rrdfamily.c index 3d91c3788..e7d1536c8 100644 --- a/database/rrdfamily.c +++ b/database/rrdfamily.c @@ -3,59 +3,66 @@ #define NETDATA_RRD_INTERNALS #include "rrd.h" +typedef struct rrdfamily { + STRING *family; + DICTIONARY *rrdvars; +} RRDFAMILY; + // ---------------------------------------------------------------------------- // RRDFAMILY index -int rrdfamily_compare(void *a, void *b) { - if(((RRDFAMILY *)a)->hash_family < ((RRDFAMILY *)b)->hash_family) return -1; - else if(((RRDFAMILY *)a)->hash_family > ((RRDFAMILY *)b)->hash_family) return 1; - else return strcmp(((RRDFAMILY *)a)->family, ((RRDFAMILY *)b)->family); -} +struct rrdfamily_constructor { + const char *family; +}; -#define rrdfamily_index_add(host, rc) (RRDFAMILY *)avl_insert_lock(&((host)->rrdfamily_root_index), (avl_t *)(rc)) -#define rrdfamily_index_del(host, rc) (RRDFAMILY *)avl_remove_lock(&((host)->rrdfamily_root_index), (avl_t *)(rc)) +static void rrdfamily_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdfamily, void *constructor_data) { + RRDFAMILY *rf = rrdfamily; + struct rrdfamily_constructor *ctr = constructor_data; -static RRDFAMILY *rrdfamily_index_find(RRDHOST *host, const char *id, uint32_t hash) { - RRDFAMILY tmp; - tmp.family = id; - tmp.hash_family = (hash)?hash:simple_hash(tmp.family); + rf->family = string_strdupz(ctr->family); + rf->rrdvars = rrdvariables_create(); +} - return (RRDFAMILY *)avl_search_lock(&(host->rrdfamily_root_index), (avl_t *) &tmp); +static void rrdfamily_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdfamily, void *rrdhost __maybe_unused) { + RRDFAMILY *rf = rrdfamily; + string_freez(rf->family); + rrdvariables_destroy(rf->rrdvars); + rf->family = NULL; + rf->rrdvars = NULL; } -RRDFAMILY *rrdfamily_create(RRDHOST *host, const char *id) { - RRDFAMILY *rc = rrdfamily_index_find(host, id, 0); - if(!rc) { - rc = callocz(1, sizeof(RRDFAMILY)); +void rrdfamily_index_init(RRDHOST *host) { + if(!host->rrdfamily_root_index) { + host->rrdfamily_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); - rc->family = strdupz(id); - rc->hash_family = simple_hash(rc->family); + dictionary_register_insert_callback(host->rrdfamily_root_index, rrdfamily_insert_callback, NULL); + dictionary_register_delete_callback(host->rrdfamily_root_index, rrdfamily_delete_callback, host); + } +} - // initialize the variables index - avl_init_lock(&rc->rrdvar_root_index, rrdvar_compare); +void rrdfamily_index_destroy(RRDHOST *host) { + dictionary_destroy(host->rrdfamily_root_index); + host->rrdfamily_root_index = NULL; +} - RRDFAMILY *ret = rrdfamily_index_add(host, rc); - if(ret != rc) - error("RRDFAMILY: INTERNAL ERROR: Expected to INSERT RRDFAMILY '%s' into index, but inserted '%s'.", rc->family, (ret)?ret->family:"NONE"); - } - rc->use_count++; - return rc; +// ---------------------------------------------------------------------------- +// RRDFAMILY management + +const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id) { + struct rrdfamily_constructor tmp = { + .family = id, + }; + return (const RRDFAMILY_ACQUIRED *)dictionary_set_and_acquire_item_advanced(host->rrdfamily_root_index, id, -1, NULL, sizeof(RRDFAMILY), &tmp); } -void rrdfamily_free(RRDHOST *host, RRDFAMILY *rc) { - rc->use_count--; - if(!rc->use_count) { - RRDFAMILY *ret = rrdfamily_index_del(host, rc); - if(ret != rc) - error("RRDFAMILY: INTERNAL ERROR: Expected to DELETE RRDFAMILY '%s' from index, but deleted '%s'.", rc->family, (ret)?ret->family:"NONE"); - else { - debug(D_RRD_CALLS, "RRDFAMILY: Cleaning up remaining family variables for host '%s', family '%s'", host->hostname, rc->family); - rrdvar_free_remaining_variables(host, &rc->rrdvar_root_index); - - freez((void *) rc->family); - freez(rc); - } - } +void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa) { + if(unlikely(!rfa)) return; + dictionary_acquired_item_release(host->rrdfamily_root_index, (const DICTIONARY_ITEM *)rfa); } +DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rfa) { + if(unlikely(!rfa)) return NULL; + RRDFAMILY *rf = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rfa); + return(rf->rrdvars); +} diff --git a/database/rrdfunctions.c b/database/rrdfunctions.c new file mode 100644 index 000000000..fb847a356 --- /dev/null +++ b/database/rrdfunctions.c @@ -0,0 +1,758 @@ +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +#define MAX_FUNCTION_LENGTH (PLUGINSD_LINE_MAX - 512) // we need some space for the rest of the line + +static unsigned char functions_allowed_chars[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = ' ', // Horizontal Tab + [10] = ' ', // Line Feed + [11] = ' ', // Vertical Tab + [12] = ' ', // Form Feed + [13] = ' ', // Carriage Return + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = ' ', // SPACE keep + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '_', // ( + [41] = '_', // ) + [42] = '_', // * + [43] = '_', // + + [44] = ',', // , keep + [45] = '-', // - keep + [46] = '.', // . keep + [47] = '/', // / keep + [48] = '0', // 0 keep + [49] = '1', // 1 keep + [50] = '2', // 2 keep + [51] = '3', // 3 keep + [52] = '4', // 4 keep + [53] = '5', // 5 keep + [54] = '6', // 6 keep + [55] = '7', // 7 keep + [56] = '8', // 8 keep + [57] = '9', // 9 keep + [58] = ':', // : keep + [59] = ':', // ; convert ; to : + [60] = '_', // < + [61] = ':', // = convert = to : + [62] = '_', // > + [63] = '_', // ? + [64] = '_', // @ + [65] = 'A', // A keep + [66] = 'B', // B keep + [67] = 'C', // C keep + [68] = 'D', // D keep + [69] = 'E', // E keep + [70] = 'F', // F keep + [71] = 'G', // G keep + [72] = 'H', // H keep + [73] = 'I', // I keep + [74] = 'J', // J keep + [75] = 'K', // K keep + [76] = 'L', // L keep + [77] = 'M', // M keep + [78] = 'N', // N keep + [79] = 'O', // O keep + [80] = 'P', // P keep + [81] = 'Q', // Q keep + [82] = 'R', // R keep + [83] = 'S', // S keep + [84] = 'T', // T keep + [85] = 'U', // U keep + [86] = 'V', // V keep + [87] = 'W', // W keep + [88] = 'X', // X keep + [89] = 'Y', // Y keep + [90] = 'Z', // Z keep + [91] = '_', // [ + [92] = '/', // backslash convert \ to / + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ keep + [96] = '_', // ` + [97] = 'a', // a keep + [98] = 'b', // b keep + [99] = 'c', // c keep + [100] = 'd', // d keep + [101] = 'e', // e keep + [102] = 'f', // f keep + [103] = 'g', // g keep + [104] = 'h', // h keep + [105] = 'i', // i keep + [106] = 'j', // j keep + [107] = 'k', // k keep + [108] = 'l', // l keep + [109] = 'm', // m keep + [110] = 'n', // n keep + [111] = 'o', // o keep + [112] = 'p', // p keep + [113] = 'q', // q keep + [114] = 'r', // r keep + [115] = 's', // s keep + [116] = 't', // t keep + [117] = 'u', // u keep + [118] = 'v', // v keep + [119] = 'w', // w keep + [120] = 'x', // x keep + [121] = 'y', // y keep + [122] = 'z', // z keep + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +static inline size_t sanitize_function_text(char *dst, const char *src, size_t dst_len) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len, + functions_allowed_chars, true, "", NULL); +} + +// we keep a dictionary per RRDSET with these functions +// the dictionary is created on demand (only when a function is added to an RRDSET) + +typedef enum { + RRD_FUNCTION_LOCAL = (1 << 0), + RRD_FUNCTION_GLOBAL = (1 << 1), + + // this is 8-bit +} RRD_FUNCTION_OPTIONS; + +struct rrd_collector_function { + bool sync; // when true, the function is called synchronously + uint8_t options; // RRD_FUNCTION_OPTIONS + STRING *help; + int timeout; // the default timeout of the function + + int (*function)(BUFFER *wb, int timeout, const char *function, void *collector_data, + function_data_ready_callback callback, void *callback_data); + + void *collector_data; + struct rrd_collector *collector; +}; + +// Each function points to this collector structure +// so that when the collector exits, all of them will +// be invalidated (running == false) +// The last function that is using this collector +// frees the structure too (or when the collector calls +// rrdset_collector_finished()). + +struct rrd_collector { + int32_t refcount; + pid_t tid; + bool running; +}; + +// Each thread that adds RRDSET functions, has to call +// rrdset_collector_started() and rrdset_collector_finished() +// to create the collector structure. + +static __thread struct rrd_collector *thread_rrd_collector = NULL; + +static void rrd_collector_free(struct rrd_collector *rdc) { + int32_t expected = 0; + if(likely(!__atomic_compare_exchange_n(&rdc->refcount, &expected, -1, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))) { + // the collector is still referenced by charts. + // leave it hanging there, the last chart will actually free it. + return; + } + + // we can free it now + freez(rdc); +} + +// called once per collector +void rrd_collector_started(void) { + if(likely(thread_rrd_collector)) return; + + thread_rrd_collector = callocz(1, sizeof(struct rrd_collector)); + thread_rrd_collector->tid = gettid(); + thread_rrd_collector->running = true; +} + +// called once per collector +void rrd_collector_finished(void) { + if(!thread_rrd_collector) + return; + + thread_rrd_collector->running = false; + rrd_collector_free(thread_rrd_collector); + thread_rrd_collector = NULL; +} + +static struct rrd_collector *rrd_collector_acquire(void) { + __atomic_add_fetch(&thread_rrd_collector->refcount, 1, __ATOMIC_SEQ_CST); + return thread_rrd_collector; +} + +static void rrd_collector_release(struct rrd_collector *rdc) { + if(unlikely(!rdc)) return; + + int32_t refcount = __atomic_sub_fetch(&rdc->refcount, 1, __ATOMIC_SEQ_CST); + if(refcount == 0 && !rdc->running) + rrd_collector_free(rdc); +} + +static void rrd_functions_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, + void *rrdhost __maybe_unused) { + struct rrd_collector_function *rdcf = func; + + if(!thread_rrd_collector) + fatal("RRDSET_COLLECTOR: called %s() for function '%s' without calling rrd_collector_started() first.", + __FUNCTION__, dictionary_acquired_item_name(item)); + + rdcf->collector = rrd_collector_acquire(); +} + +static void rrd_functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, + void *rrdhost __maybe_unused) { + struct rrd_collector_function *rdcf = func; + rrd_collector_release(rdcf->collector); +} + +static bool rrd_functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func __maybe_unused, + void *new_func __maybe_unused, void *rrdhost __maybe_unused) { + struct rrd_collector_function *rdcf = func; + struct rrd_collector_function *new_rdcf = new_func; + + if(!thread_rrd_collector) + fatal("RRDSET_COLLECTOR: called %s() for function '%s' without calling rrd_collector_started() first.", + __FUNCTION__, dictionary_acquired_item_name(item)); + + bool changed = false; + + if(rdcf->collector != thread_rrd_collector) { + struct rrd_collector *old_rdc = rdcf->collector; + rdcf->collector = rrd_collector_acquire(); + rrd_collector_release(old_rdc); + changed = true; + } + + if(rdcf->function != new_rdcf->function) { + rdcf->function = new_rdcf->function; + changed = true; + } + + if(rdcf->help != new_rdcf->help) { + STRING *old = rdcf->help; + rdcf->help = new_rdcf->help; + string_freez(old); + changed = true; + } + else + string_freez(new_rdcf->help); + + if(rdcf->timeout != new_rdcf->timeout) { + rdcf->timeout = new_rdcf->timeout; + changed = true; + } + + if(rdcf->sync != new_rdcf->sync) { + rdcf->sync = new_rdcf->sync; + changed = true; + } + + if(rdcf->collector_data != new_rdcf->collector_data) { + rdcf->collector_data = new_rdcf->collector_data; + changed = true; + } + + return changed; +} + + +void rrdfunctions_init(RRDHOST *host) { + if(host->functions) return; + + host->functions = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_insert_callback(host->functions, rrd_functions_insert_callback, host); + dictionary_register_delete_callback(host->functions, rrd_functions_delete_callback, host); + dictionary_register_conflict_callback(host->functions, rrd_functions_conflict_callback, host); +} + +void rrdfunctions_destroy(RRDHOST *host) { + dictionary_destroy(host->functions); +} + +void rrd_collector_add_function(RRDHOST *host, RRDSET *st, const char *name, int timeout, const char *help, + bool sync, function_execute_at_collector function, void *collector_data) { + + // RRDSET *st may be NULL in this function + // to create a GLOBAL function + + if(st && !st->functions_view) + st->functions_view = dictionary_create_view(host->functions); + + char key[PLUGINSD_LINE_MAX + 1]; + sanitize_function_text(key, name, PLUGINSD_LINE_MAX); + + struct rrd_collector_function tmp = { + .sync = sync, + .timeout = timeout, + .options = (st)?RRD_FUNCTION_LOCAL:RRD_FUNCTION_GLOBAL, + .function = function, + .collector_data = collector_data, + .help = string_strdupz(help), + }; + const DICTIONARY_ITEM *item = dictionary_set_and_acquire_item(host->functions, key, &tmp, sizeof(tmp)); + + if(st) + dictionary_view_set(st->functions_view, key, item); + + dictionary_acquired_item_release(host->functions, item); +} + +void rrd_functions_expose_rrdpush(RRDSET *st, BUFFER *wb) { + if(!st->functions_view) + return; + + struct rrd_collector_function *tmp; + dfe_start_read(st->functions_view, tmp) { + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " \"%s\" %d \"%s\"\n" + , tmp_dfe.name + , tmp->timeout + , string2str(tmp->help) + ); + } + dfe_done(tmp); +} + +struct rrd_function_call_wait { + bool free_with_signal; + bool data_are_ready; + netdata_mutex_t mutex; + pthread_cond_t cond; + int code; +}; + +static void rrd_function_call_wait_free(struct rrd_function_call_wait *tmp) { + pthread_cond_destroy(&tmp->cond); + netdata_mutex_destroy(&tmp->mutex); + freez(tmp); +} + +struct { + const char *format; + uint8_t content_type; +} function_formats[] = { + { .format = "application/json", CT_APPLICATION_JSON }, + { .format = "text/plain", CT_TEXT_PLAIN }, + { .format = "application/xml", CT_APPLICATION_XML }, + { .format = "prometheus", CT_PROMETHEUS }, + { .format = "text", CT_TEXT_PLAIN }, + { .format = "txt", CT_TEXT_PLAIN }, + { .format = "json", CT_APPLICATION_JSON }, + { .format = "html", CT_TEXT_HTML }, + { .format = "text/html", CT_TEXT_HTML }, + { .format = "xml", CT_APPLICATION_XML }, + + // terminator + { .format = NULL, CT_TEXT_PLAIN }, +}; + +uint8_t functions_format_to_content_type(const char *format) { + if(format && *format) { + for (int i = 0; function_formats[i].format; i++) + if (strcmp(function_formats[i].format, format) == 0) + return function_formats[i].content_type; + } + + return CT_TEXT_PLAIN; +} + +const char *functions_content_type_to_format(uint8_t content_type) { + for (int i = 0; function_formats[i].format; i++) + if (function_formats[i].content_type == content_type) + return function_formats[i].format; + + return "text/plain"; +} + +int rrd_call_function_error(BUFFER *wb, const char *msg, int code) { + char buffer[PLUGINSD_LINE_MAX]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + buffer_flush(wb); + buffer_sprintf(wb, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + wb->contenttype = CT_APPLICATION_JSON; + buffer_no_cacheable(wb); + return code; +} + +static int rrd_call_function_find(RRDHOST *host, BUFFER *wb, const char *name, size_t key_length, struct rrd_collector_function **rdcf) { + char buffer[MAX_FUNCTION_LENGTH + 1]; + + strncpyz(buffer, name, MAX_FUNCTION_LENGTH); + char *s = NULL; + + *rdcf = NULL; + while(!(*rdcf) && buffer[0]) { + *rdcf = dictionary_get(host->functions, buffer); + if(*rdcf) break; + + // if s == NULL, set it to the end of the buffer + // this should happen only the first time + if(unlikely(!s)) + s = &buffer[key_length - 1]; + + // skip a word from the end + while(s >= buffer && !isspace(*s)) *s-- = '\0'; + + // skip all spaces + while(s >= buffer && isspace(*s)) *s-- = '\0'; + } + + buffer_flush(wb); + + if(!(*rdcf)) + return rrd_call_function_error(wb, "No collector is supplying this function on this host at this time.", HTTP_RESP_NOT_FOUND); + + if(!(*rdcf)->collector->running) + return rrd_call_function_error(wb, "The collector that registered this function, is not currently running.", HTTP_RESP_BACKEND_FETCH_FAILED); + + return HTTP_RESP_OK; +} + +static void rrd_call_function_signal_when_ready(BUFFER *temp_wb __maybe_unused, int code, void *callback_data) { + struct rrd_function_call_wait *tmp = callback_data; + bool we_should_free = false; + + netdata_mutex_lock(&tmp->mutex); + + // since we got the mutex, + // the waiting thread is either in pthread_cond_timedwait() + // or gave up and left. + + tmp->code = code; + tmp->data_are_ready = true; + + if(tmp->free_with_signal) + we_should_free = true; + + pthread_cond_signal(&tmp->cond); + + netdata_mutex_unlock(&tmp->mutex); + + if(we_should_free) { + buffer_free(temp_wb); + rrd_function_call_wait_free(tmp); + } +} + +int rrd_call_function_and_wait(RRDHOST *host, BUFFER *wb, int timeout, const char *name) { + int code; + + struct rrd_collector_function *rdcf = NULL; + + char key[PLUGINSD_LINE_MAX + 1]; + size_t key_length = sanitize_function_text(key, name, PLUGINSD_LINE_MAX); + code = rrd_call_function_find(host, wb, key, key_length, &rdcf); + if(code != HTTP_RESP_OK) + return code; + + if(timeout <= 0) + timeout = rdcf->timeout; + + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + tp.tv_sec += (time_t)timeout; + + if(rdcf->sync) { + code = rdcf->function(wb, timeout, key, rdcf->collector_data, NULL, NULL); + } + else { + struct rrd_function_call_wait *tmp = mallocz(sizeof(struct rrd_function_call_wait)); + tmp->free_with_signal = false; + tmp->data_are_ready = false; + netdata_mutex_init(&tmp->mutex); + pthread_cond_init(&tmp->cond, NULL); + + bool we_should_free = true; + BUFFER *temp_wb = buffer_create(PLUGINSD_LINE_MAX + 1); // we need it because we may give up on it + temp_wb->contenttype = wb->contenttype; + code = rdcf->function(temp_wb, timeout, key, rdcf->collector_data, rrd_call_function_signal_when_ready, tmp); + if (code == HTTP_RESP_OK) { + netdata_mutex_lock(&tmp->mutex); + + int rc = 0; + while (rc == 0 && !tmp->data_are_ready) { + // the mutex is unlocked within pthread_cond_timedwait() + rc = pthread_cond_timedwait(&tmp->cond, &tmp->mutex, &tp); + // the mutex is again ours + } + + if (tmp->data_are_ready) { + // we have a response + buffer_fast_strcat(wb, buffer_tostring(temp_wb), buffer_strlen(temp_wb)); + wb->contenttype = temp_wb->contenttype; + wb->expires = temp_wb->expires; + + if(wb->expires) + buffer_cacheable(wb); + else + buffer_no_cacheable(wb); + + code = tmp->code; + } + else if (rc == ETIMEDOUT) { + // timeout + // we will go away and let the callback free the structure + tmp->free_with_signal = true; + we_should_free = false; + code = rrd_call_function_error(wb, "Timeout while waiting for a response from the collector.", HTTP_RESP_GATEWAY_TIMEOUT); + } + else + code = rrd_call_function_error(wb, "Failed to get the response from the collector.", HTTP_RESP_INTERNAL_SERVER_ERROR); + + netdata_mutex_unlock(&tmp->mutex); + } + else { + buffer_free(temp_wb); + if(!buffer_strlen(wb)) + rrd_call_function_error(wb, "Failed to send request to the collector.", code); + } + + if (we_should_free) + rrd_function_call_wait_free(tmp); + } + + return code; +} + +int rrd_call_function_async(RRDHOST *host, BUFFER *wb, int timeout, const char *name, + rrd_call_function_async_callback callback, void *callback_data) { + int code; + + struct rrd_collector_function *rdcf = NULL; + char key[PLUGINSD_LINE_MAX + 1]; + size_t key_length = sanitize_function_text(key, name, PLUGINSD_LINE_MAX); + code = rrd_call_function_find(host, wb, key, key_length, &rdcf); + if(code != HTTP_RESP_OK) + return code; + + if(timeout <= 0) + timeout = rdcf->timeout; + + code = rdcf->function(wb, timeout, key, rdcf->collector_data, callback, callback_data); + + if(code != HTTP_RESP_OK) { + if (!buffer_strlen(wb)) + rrd_call_function_error(wb, "Failed to send request to the collector.", code); + } + + return code; +} + +static void functions2json(DICTIONARY *functions, BUFFER *wb, const char *ident, const char *kq, const char *sq) { + struct rrd_collector_function *t; + dfe_start_read(functions, t) { + if(!t->collector->running) continue; + + if(t_dfe.counter) + buffer_strcat(wb, ",\n"); + + buffer_sprintf(wb, "%s%s%s%s: {", ident, kq, t_dfe.name, kq); + buffer_sprintf(wb, "\n\t%s%shelp%s: %s%s%s", ident, kq, kq, sq, string2str(t->help), sq); + buffer_sprintf(wb, ",\n\t%s%stimeout%s: %d", ident, kq, kq, t->timeout); + buffer_sprintf(wb, ",\n\t%s%soptions%s: \"%s%s\"", ident, kq, kq + , (t->options & RRD_FUNCTION_LOCAL)?"LOCAL ":"" + , (t->options & RRD_FUNCTION_GLOBAL)?"GLOBAL ":"" + ); + buffer_sprintf(wb, "\n%s}", ident); + } + dfe_done(t); + buffer_strcat(wb, "\n"); +} + +void chart_functions2json(RRDSET *st, BUFFER *wb, int tabs, const char *kq, const char *sq) { + if(!st || !st->functions_view) return; + + char ident[tabs + 1]; + ident[tabs] = '\0'; + while(tabs) ident[--tabs] = '\t'; + + functions2json(st->functions_view, wb, ident, kq, sq); +} + +void host_functions2json(RRDHOST *host, BUFFER *wb, int tabs, const char *kq, const char *sq) { + if(!host || !host->functions) return; + + char ident[tabs + 1]; + ident[tabs] = '\0'; + while(tabs) ident[--tabs] = '\t'; + + functions2json(host->functions, wb, ident, kq, sq); +} + +void chart_functions_to_dict(DICTIONARY *rrdset_functions_view, DICTIONARY *dst) { + if(!rrdset_functions_view || !dst) return; + + struct rrd_collector_function *t; + dfe_start_read(rrdset_functions_view, t) { + if(!t->collector->running) continue; + + dictionary_set(dst, t_dfe.name, NULL, 0); + } + dfe_done(t); +} diff --git a/database/rrdfunctions.h b/database/rrdfunctions.h new file mode 100644 index 000000000..f031ec34d --- /dev/null +++ b/database/rrdfunctions.h @@ -0,0 +1,35 @@ +#ifndef NETDATA_RRDFUNCTIONS_H +#define NETDATA_RRDFUNCTIONS_H 1 + +#include "rrd.h" + +void rrdfunctions_init(RRDHOST *host); +void rrdfunctions_destroy(RRDHOST *host); + +void rrd_collector_started(void); +void rrd_collector_finished(void); + +typedef void (*function_data_ready_callback)(BUFFER *wb, int code, void *callback_data); + +typedef int (*function_execute_at_collector)(BUFFER *wb, int timeout, const char *function, void *collector_data, + function_data_ready_callback callback, void *callback_data); + +void rrd_collector_add_function(RRDHOST *host, RRDSET *st, const char *name, int timeout, const char *help, + bool sync, function_execute_at_collector function, void *collector_data); + +int rrd_call_function_and_wait(RRDHOST *host, BUFFER *wb, int timeout, const char *name); + +typedef void (*rrd_call_function_async_callback)(BUFFER *wb, int code, void *callback_data); +int rrd_call_function_async(RRDHOST *host, BUFFER *wb, int timeout, const char *name, rrd_call_function_async_callback, void *callback_data); + +void rrd_functions_expose_rrdpush(RRDSET *st, BUFFER *wb); + +void chart_functions2json(RRDSET *st, BUFFER *wb, int tabs, const char *kq, const char *sq); +void chart_functions_to_dict(DICTIONARY *rrdset_functions_view, DICTIONARY *dst); +void host_functions2json(RRDHOST *host, BUFFER *wb, int tabs, const char *kq, const char *sq); + +uint8_t functions_format_to_content_type(const char *format); +const char *functions_content_type_to_format(uint8_t content_type); +int rrd_call_function_error(BUFFER *wb, const char *msg, int code); + +#endif // NETDATA_RRDFUNCTIONS_H diff --git a/database/rrdhost.c b/database/rrdhost.c index 7f4bd95ba..5ba13d47b 100644 --- a/database/rrdhost.c +++ b/database/rrdhost.c @@ -3,21 +3,21 @@ #define NETDATA_RRD_INTERNALS #include "rrd.h" -int storage_tiers = 1; -int storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 }; +bool dbengine_enabled = false; // will become true if and when dbengine is initialized +size_t storage_tiers = 3; +size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 }; RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW }; #if RRD_STORAGE_TIERS != 5 #error RRD_STORAGE_TIERS is not 5 - you need to update the grouping iterations per tier #endif -int get_tier_grouping(int tier) { +size_t get_tier_grouping(size_t tier) { if(unlikely(tier >= storage_tiers)) tier = storage_tiers - 1; - if(unlikely(tier < 0)) tier = 0; - int grouping = 1; + size_t grouping = 1; // first tier is always 1 iteration of whatever update every the chart has - for(int i = 1; i <= tier ;i++) + for(size_t i = 1; i <= tier ;i++) grouping *= storage_tiers_grouping_iterations[i]; return grouping; @@ -32,7 +32,7 @@ time_t rrdhost_free_orphan_time = 3600; bool is_storage_engine_shared(STORAGE_INSTANCE *engine) { #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ;tier++) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { if (engine == (STORAGE_INSTANCE *)multidb_ctx[tier]) return true; } @@ -43,107 +43,144 @@ bool is_storage_engine_shared(STORAGE_INSTANCE *engine) { // ---------------------------------------------------------------------------- -// RRDHOST index +// RRDHOST indexes management -int rrdhost_compare(void* a, void* b) { - if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1; - else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1; - else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid); +DICTIONARY *rrdhost_root_index = NULL; +static DICTIONARY *rrdhost_root_index_hostname = NULL; + +static inline void rrdhost_init() { + if(unlikely(!rrdhost_root_index)) { + rrdhost_root_index = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + } + + if(unlikely(!rrdhost_root_index_hostname)) { + rrdhost_root_index_hostname = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + } } -avl_tree_lock rrdhost_root_index = { - .avl_tree = { NULL, rrdhost_compare }, - .rwlock = AVL_LOCK_INITIALIZER -}; +// ---------------------------------------------------------------------------- +// RRDHOST index by UUID + +inline long rrdhost_hosts_available(void) { + return dictionary_entries(rrdhost_root_index); +} -RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) { - debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid); +inline RRDHOST *rrdhost_find_by_guid(const char *guid) { + return dictionary_get(rrdhost_root_index, guid); +} - RRDHOST tmp; - strncpyz(tmp.machine_guid, guid, GUID_LEN); - tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid); +static inline RRDHOST *rrdhost_index_add_by_guid(RRDHOST *host) { + RRDHOST *ret_machine_guid = dictionary_set(rrdhost_root_index, host->machine_guid, host, sizeof(RRDHOST)); + if(ret_machine_guid == host) + rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID); + else { + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID); + error("RRDHOST: %s() host with machine guid '%s' is already indexed", __FUNCTION__, host->machine_guid); + } - return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl_t *) &tmp); + return host; +} + +static void rrdhost_index_del_by_guid(RRDHOST *host) { + if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID)) { + if(!dictionary_del(rrdhost_root_index, host->machine_guid)) + error("RRDHOST: %s() failed to delete machine guid '%s' from index", __FUNCTION__, host->machine_guid); + + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_MACHINE_GUID); + } } -RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) { +// ---------------------------------------------------------------------------- +// RRDHOST index by hostname + +inline RRDHOST *rrdhost_find_by_hostname(const char *hostname) { if(unlikely(!strcmp(hostname, "localhost"))) return localhost; - if(unlikely(!hash)) hash = simple_hash(hostname); + return dictionary_get(rrdhost_root_index_hostname, hostname); +} - rrd_rdlock(); - RRDHOST *host; - rrdhost_foreach_read(host) { - if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) { - rrd_unlock(); - return host; - } +static inline RRDHOST *rrdhost_index_add_hostname(RRDHOST *host) { + if(!host->hostname) return host; + + RRDHOST *ret_hostname = dictionary_set(rrdhost_root_index_hostname, rrdhost_hostname(host), host, sizeof(RRDHOST)); + if(ret_hostname == host) + rrdhost_option_set(host, RRDHOST_OPTION_INDEXED_HOSTNAME); + else { + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME); + error("RRDHOST: %s() host with hostname '%s' is already indexed", __FUNCTION__, rrdhost_hostname(host)); } - rrd_unlock(); - return NULL; + return host; } -#define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl_t *)(rrdhost)) -#define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl_t *)(rrdhost)) +static inline void rrdhost_index_del_hostname(RRDHOST *host) { + if(unlikely(!host->hostname)) return; + + if(rrdhost_option_check(host, RRDHOST_OPTION_INDEXED_HOSTNAME)) { + if(!dictionary_del(rrdhost_root_index_hostname, rrdhost_hostname(host))) + error("RRDHOST: %s() failed to delete hostname '%s' from index", __FUNCTION__, rrdhost_hostname(host)); + rrdhost_option_clear(host, RRDHOST_OPTION_INDEXED_HOSTNAME); + } +} // ---------------------------------------------------------------------------- // RRDHOST - internal helpers static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) { - if(host->tags && tags && !strcmp(host->tags, tags)) + if(host->tags && tags && !strcmp(rrdhost_tags(host), tags)) return; - void *old = (void *)host->tags; - host->tags = (tags && *tags)?strdupz(tags):NULL; - freez(old); + STRING *old = host->tags; + host->tags = string_strdupz((tags && *tags)?tags:NULL); + string_freez(old); } static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) { - if(host->hostname && hostname && !strcmp(host->hostname, hostname)) + if(unlikely(hostname && !*hostname)) hostname = NULL; + + if(host->hostname && hostname && !strcmp(rrdhost_hostname(host), hostname)) return; - void *old = host->hostname; - host->hostname = strdupz(hostname?hostname:"localhost"); - host->hash_hostname = simple_hash(host->hostname); - freez(old); + rrdhost_index_del_hostname(host); + + STRING *old = host->hostname; + host->hostname = string_strdupz(hostname?hostname:"localhost"); + string_freez(old); + + rrdhost_index_add_hostname(host); } static inline void rrdhost_init_os(RRDHOST *host, const char *os) { - if(host->os && os && !strcmp(host->os, os)) + if(host->os && os && !strcmp(rrdhost_os(host), os)) return; - void *old = (void *)host->os; - host->os = strdupz(os?os:"unknown"); - freez(old); + STRING *old = host->os; + host->os = string_strdupz(os?os:"unknown"); + string_freez(old); } static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, const char *abbrev_timezone, int32_t utc_offset) { - if (host->timezone && timezone && !strcmp(host->timezone, timezone) && host->abbrev_timezone && abbrev_timezone && - !strcmp(host->abbrev_timezone, abbrev_timezone) && host->utc_offset == utc_offset) + if (host->timezone && timezone && !strcmp(rrdhost_timezone(host), timezone) && host->abbrev_timezone && abbrev_timezone && + !strcmp(rrdhost_abbrev_timezone(host), abbrev_timezone) && host->utc_offset == utc_offset) return; - void *old = (void *)host->timezone; - host->timezone = strdupz((timezone && *timezone)?timezone:"unknown"); - freez(old); + STRING *old = host->timezone; + host->timezone = string_strdupz((timezone && *timezone)?timezone:"unknown"); + string_freez(old); old = (void *)host->abbrev_timezone; - host->abbrev_timezone = strdupz((abbrev_timezone && *abbrev_timezone) ? abbrev_timezone : "UTC"); - freez(old); + host->abbrev_timezone = string_strdupz((abbrev_timezone && *abbrev_timezone) ? abbrev_timezone : "UTC"); + string_freez(old); host->utc_offset = utc_offset; } -static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) { - strncpy(host->machine_guid, machine_guid, GUID_LEN); - host->machine_guid[GUID_LEN] = '\0'; - host->hash_machine_guid = simple_hash(host->machine_guid); -} - -void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *hostname, - const char *registry_hostname, const char *guid, const char *os, const char *tags, +void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, + const char *registry_hostname, const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name, const char *program_version) { @@ -151,23 +188,48 @@ void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory host->rrd_update_every = update_every; host->rrd_memory_mode = memory_mode; - rrdhost_init_hostname(host, hostname); - - rrdhost_init_machine_guid(host, guid); - rrdhost_init_os(host, os); rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset); rrdhost_init_tags(host, tags); - host->program_name = strdupz((program_name && *program_name) ? program_name : "unknown"); - host->program_version = strdupz((program_version && *program_version) ? program_version : "unknown"); - - host->registry_hostname = strdupz((registry_hostname && *registry_hostname) ? registry_hostname : host->hostname); + host->program_name = string_strdupz((program_name && *program_name) ? program_name : "unknown"); + host->program_version = string_strdupz((program_version && *program_version) ? program_version : "unknown"); + host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname) ? registry_hostname : rrdhost_hostname(host)); } // ---------------------------------------------------------------------------- // RRDHOST - add a host +static void rrdhost_initialize_rrdpush_sender(RRDHOST *host, + unsigned int rrdpush_enabled, + char *rrdpush_destination, + char *rrdpush_api_key, + char *rrdpush_send_charts_matching +) { + if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED)) return; + + if(rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) { + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED); + + sender_init(host); + +#ifdef ENABLE_HTTPS + host->sender->ssl.conn = NULL; + host->sender->ssl.flags = NETDATA_SSL_START; +#endif + + host->rrdpush_send_destination = strdupz(rrdpush_destination); + rrdpush_destinations_init(host); + + host->rrdpush_send_api_key = strdupz(rrdpush_api_key); + host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT); + + rrdhost_option_set(host, RRDHOST_OPTION_SENDER_ENABLED); + } + else + rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED); +} + RRDHOST *rrdhost_create(const char *hostname, const char *registry_hostname, const char *guid, @@ -186,188 +248,136 @@ RRDHOST *rrdhost_create(const char *hostname, char *rrdpush_destination, char *rrdpush_api_key, char *rrdpush_send_charts_matching, + bool rrdpush_enable_replication, + time_t rrdpush_seconds_to_replicate, + time_t rrdpush_replication_step, struct rrdhost_system_info *system_info, int is_localhost, bool archived ) { debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid); + rrd_check_wrlock(); + + if(memory_mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled) { + error("memory mode 'dbengine' is not enabled, but host '%s' is configured for it. Falling back to 'alloc'", hostname); + memory_mode = RRD_MEMORY_MODE_ALLOC; + } + #ifdef ENABLE_DBENGINE int is_legacy = (memory_mode == RRD_MEMORY_MODE_DBENGINE) && is_legacy_child(guid); #else - int is_legacy = 1; +int is_legacy = 1; #endif - rrd_check_wrlock(); int is_in_multihost = (memory_mode == RRD_MEMORY_MODE_DBENGINE && !is_legacy); RRDHOST *host = callocz(1, sizeof(RRDHOST)); - set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, hostname, registry_hostname, guid, os, + strncpyz(host->machine_guid, guid, GUID_LEN + 1); + + set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, registry_hostname, os, tags, timezone, abbrev_timezone, utc_offset, program_name, program_version); + rrdhost_init_hostname(host, hostname); + host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries); host->health_enabled = ((memory_mode == RRD_MEMORY_MODE_NONE)) ? 0 : health_enabled; - sender_init(host); - netdata_mutex_init(&host->receiver_lock); - - host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0; - host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL; - if (host->rrdpush_send_destination) - host->destinations = destinations_init(host->rrdpush_send_destination); - host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL; - host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT); + if (likely(!archived)) { + rrdfunctions_init(host); + host->rrdlabels = rrdlabels_create(); + rrdhost_initialize_rrdpush_sender( + host, rrdpush_enabled, rrdpush_destination, rrdpush_api_key, rrdpush_send_charts_matching); + } - host->rrdpush_sender_pipe[0] = -1; - host->rrdpush_sender_pipe[1] = -1; - host->rrdpush_sender_socket = -1; + if(rrdpush_enable_replication) + rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION); + else + rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION); + + host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate; + host->rrdpush_replication_step = rrdpush_replication_step; + + switch(memory_mode) { + default: + case RRD_MEMORY_MODE_ALLOC: + case RRD_MEMORY_MODE_MAP: + case RRD_MEMORY_MODE_SAVE: + case RRD_MEMORY_MODE_RAM: + if(host->rrdpush_seconds_to_replicate > host->rrd_history_entries * host->rrd_update_every) + host->rrdpush_seconds_to_replicate = host->rrd_history_entries * host->rrd_update_every; + break; - //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused? -#ifdef ENABLE_HTTPS - host->ssl.conn = NULL; - host->ssl.flags = NETDATA_SSL_START; - host->stream_ssl.conn = NULL; - host->stream_ssl.flags = NETDATA_SSL_START; -#endif + case RRD_MEMORY_MODE_DBENGINE: + break; + } netdata_rwlock_init(&host->rrdhost_rwlock); - host->host_labels = rrdlabels_create(); - netdata_mutex_init(&host->aclk_state_lock); + netdata_mutex_init(&host->receiver_lock); host->system_info = system_info; - avl_init_lock(&(host->rrdset_root_index), rrdset_compare); - avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name); - avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare); - avl_init_lock(&(host->rrdvar_root_index), rrdvar_compare); + rrdset_index_init(host); if(config_get_boolean(CONFIG_SECTION_DB, "delete obsolete charts files", 1)) - rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS); + rrdhost_option_set(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS); if(config_get_boolean(CONFIG_SECTION_DB, "delete orphan hosts files", 1) && !is_localhost) - rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST); - - host->health_default_warn_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat warning", "never"); - host->health_default_crit_repeat_every = config_get_duration(CONFIG_SECTION_HEALTH, "default repeat critical", "never"); - avl_init_lock(&(host->alarms_idx_health_log), alarm_compare_id); - avl_init_lock(&(host->alarms_idx_name), alarm_compare_name); - - // ------------------------------------------------------------------------ - // initialize health variables - - host->health_log.next_log_id = 1; - host->health_log.next_alarm_id = 1; - host->health_log.max = 1000; - host->health_log.next_log_id = (uint32_t)now_realtime_sec(); - host->health_log.next_alarm_id = 0; - - long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max); - if(n < 10) { - error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max); - config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max); - } - else - host->health_log.max = (unsigned int)n; - - netdata_rwlock_init(&host->health_log.alarm_log_rwlock); + rrdhost_option_set(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST); char filename[FILENAME_MAX + 1]; - if(is_localhost) { - host->cache_dir = strdupz(netdata_configured_cache_dir); host->varlib_dir = strdupz(netdata_configured_varlib_dir); - } else { // this is not localhost - append our GUID to localhost path if (is_in_multihost) { // don't append to cache dir in multihost host->cache_dir = strdupz(netdata_configured_cache_dir); - } else { + } + else { snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid); host->cache_dir = strdupz(filename); } - if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || ( - host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) { + if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || + (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) { int r = mkdir(host->cache_dir, 0775); if(r != 0 && errno != EEXIST) - error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir); + error("Host '%s': cannot create directory '%s'", rrdhost_hostname(host), host->cache_dir); } snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid); host->varlib_dir = strdupz(filename); - - if(host->health_enabled) { - int r = mkdir(host->varlib_dir, 0775); - if(r != 0 && errno != EEXIST) - error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir); - } - - } - - if(host->health_enabled) { - snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir); - int r = mkdir(filename, 0775); - if(r != 0 && errno != EEXIST) - error("Host '%s': cannot create directory '%s'", host->hostname, filename); - } - - snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir); - host->health_log_filename = strdupz(filename); - - snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_primary_plugins_dir); - host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename)); - host->health_default_recipient = strdupz("root"); - - - // ------------------------------------------------------------------------ - // load health configuration - - if(host->health_enabled) { - rrdhost_wrlock(host); - health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL); - rrdhost_unlock(host); } - RRDHOST *t = rrdhost_index_add(host); + // this is also needed for custom host variables - not only health + if(!host->rrdvars) + host->rrdvars = rrdvariables_create(); + RRDHOST *t = rrdhost_index_add_by_guid(host); if(t != host) { - error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid); + error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", rrdhost_hostname(host), host->machine_guid, rrdhost_hostname(t), t->machine_guid); rrdhost_free(host, 1); return NULL; } if (likely(!uuid_parse(host->machine_guid, host->host_uuid))) { - int rc; - if (!archived) { - rc = sql_store_host_info(host); - if (unlikely(rc)) - error_report("Failed to store machine GUID to the database"); - } + if(!archived) + metaqueue_host_update_info(host->machine_guid); sql_load_node_id(host); - if (host->health_enabled) { - if (!file_is_migrated(host->health_log_filename)) { - rc = sql_create_health_log_table(host); - if (unlikely(rc)) { - error_report("Failed to create health log table in the database"); - health_alarm_log_load(host); - health_alarm_log_open(host); - } - else { - health_alarm_log_load(host); - add_migrated_file(host->health_log_filename, 0); - } - } else { - sql_create_health_log_table(host); - sql_health_alarm_log_load(host); - } - } } else error_report("Host machine GUID %s is not valid", host->machine_guid); + rrdfamily_index_init(host); + rrdcalctemplate_index_init(host); + rrdcalc_rrdhost_index_init(host); + + if (health_enabled) + health_thread_spawn(host); + if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { #ifdef ENABLE_DBENGINE char dbenginepath[FILENAME_MAX + 1]; @@ -376,14 +386,18 @@ RRDHOST *rrdhost_create(const char *hostname, snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir); ret = mkdir(dbenginepath, 0775); if (ret != 0 && errno != EEXIST) - error("Host '%s': cannot create directory '%s'", host->hostname, dbenginepath); + error("Host '%s': cannot create directory '%s'", rrdhost_hostname(host), dbenginepath); else ret = 0; // succeed if (is_legacy) { // initialize legacy dbengine instance as needed + host->db[0].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[0].eng = storage_engine_get(host->db[0].mode); + host->db[0].tier_grouping = get_tier_grouping(0); + ret = rrdeng_init( host, - (struct rrdengine_instance **)&host->storage_instance[0], + (struct rrdengine_instance **)&host->db[0].instance, dbenginepath, default_rrdeng_page_cache_mb, default_rrdeng_disk_quota_mb, @@ -392,18 +406,26 @@ RRDHOST *rrdhost_create(const char *hostname, if(ret == 0) { // assign the rest of the shared storage instances to it // to allow them collect its metrics too - for(int tier = 1; tier < storage_tiers ; tier++) - host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier]; + for(size_t tier = 1; tier < storage_tiers ; tier++) { + host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[tier].eng = storage_engine_get(host->db[tier].mode); + host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier]; + host->db[tier].tier_grouping = get_tier_grouping(tier); + } } } else { - for(int tier = 0; tier < storage_tiers ; tier++) - host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier]; + for(size_t tier = 0; tier < storage_tiers ; tier++) { + host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[tier].eng = storage_engine_get(host->db[tier].mode); + host->db[tier].instance = (STORAGE_INSTANCE *)multidb_ctx[tier]; + host->db[tier].tier_grouping = get_tier_grouping(tier); + } } if (ret) { // check legacy or multihost initialization success error( "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.", - host->hostname, host->machine_guid, host->cache_dir); + rrdhost_hostname(host), host->machine_guid, host->cache_dir); rrdhost_free(host, 1); host = NULL; //rrd_hosts_available++; //TODO: maybe we want this? @@ -416,27 +438,29 @@ RRDHOST *rrdhost_create(const char *hostname, #endif } else { + host->db[0].mode = host->rrd_memory_mode; + host->db[0].eng = storage_engine_get(host->db[0].mode); + host->db[0].instance = NULL; + host->db[0].tier_grouping = get_tier_grouping(0); + #ifdef ENABLE_DBENGINE // the first tier is reserved for the non-dbengine modes - for(int tier = 1; tier < storage_tiers ; tier++) - host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier]; + for(size_t tier = 1; tier < storage_tiers ; tier++) { + host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; + host->db[tier].eng = storage_engine_get(host->db[tier].mode); + host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier]; + host->db[tier].tier_grouping = get_tier_grouping(tier); + } #endif } // ------------------------------------------------------------------------ // link it and add it to the index - if(is_localhost) { - host->next = localhost; - localhost = host; - } - else { - if(localhost) { - host->next = localhost->next; - localhost->next = host; - } - else localhost = host; - } + if(is_localhost) + DOUBLE_LINKED_LIST_PREPEND_UNSAFE(localhost, host, prev, next); + else + DOUBLE_LINKED_LIST_APPEND_UNSAFE(localhost, host, prev, next); // ------------------------------------------------------------------------ // init new ML host and update system_info to let upstreams know @@ -466,28 +490,29 @@ RRDHOST *rrdhost_create(const char *hostname, ", health_log '%s'" ", alarms default handler '%s'" ", alarms default recipient '%s'" - , host->hostname - , host->registry_hostname + , rrdhost_hostname(host) + , rrdhost_registry_hostname(host) , host->machine_guid - , host->os - , host->timezone - , (host->tags)?host->tags:"" - , host->program_name - , host->program_version + , rrdhost_os(host) + , rrdhost_timezone(host) + , rrdhost_tags(host) + , rrdhost_program_name(host) + , rrdhost_program_version(host) , host->rrd_update_every , rrd_memory_mode_name(host->rrd_memory_mode) , host->rrd_history_entries - , host->rrdpush_send_enabled?"enabled":"disabled" + , rrdhost_has_rrdpush_sender_enabled(host)?"enabled":"disabled" , host->rrdpush_send_destination?host->rrdpush_send_destination:"" , host->rrdpush_send_api_key?host->rrdpush_send_api_key:"" , host->health_enabled?"enabled":"disabled" , host->cache_dir , host->varlib_dir , host->health_log_filename - , host->health_default_exec - , host->health_default_recipient + , string2str(host->health_default_exec) + , string2str(host->health_default_recipient) ); - sql_store_host_system_info(&host->host_uuid, system_info); + if(!archived) + metaqueue_host_update_system_info(host); rrd_hosts_available++; @@ -496,6 +521,8 @@ RRDHOST *rrdhost_create(const char *hostname, ml_new_host(host); else rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED); + + return host; } @@ -518,113 +545,97 @@ void rrdhost_update(RRDHOST *host , char *rrdpush_destination , char *rrdpush_api_key , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step , struct rrdhost_system_info *system_info ) { UNUSED(guid); - UNUSED(rrdpush_enabled); - UNUSED(rrdpush_destination); - UNUSED(rrdpush_api_key); - UNUSED(rrdpush_send_charts_matching); host->health_enabled = (mode == RRD_MEMORY_MODE_NONE) ? 0 : health_enabled; - //host->stream_version = STREAMING_PROTOCOL_CURRENT_VERSION; Unused? rrdhost_system_info_free(host->system_info); host->system_info = system_info; - sql_store_host_system_info(&host->host_uuid, system_info); + metaqueue_host_update_system_info(host); rrdhost_init_os(host, os); rrdhost_init_timezone(host, timezone, abbrev_timezone, utc_offset); - freez(host->registry_hostname); - host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname); + string_freez(host->registry_hostname); + host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname); - if(strcmp(host->hostname, hostname) != 0) { - info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", host->hostname, hostname); - char *t = host->hostname; - host->hostname = strdupz(hostname); - host->hash_hostname = simple_hash(host->hostname); - freez(t); + if(strcmp(rrdhost_hostname(host), hostname) != 0) { + info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", rrdhost_hostname(host), hostname); + rrdhost_init_hostname(host, hostname); } - if(strcmp(host->program_name, program_name) != 0) { - info("Host '%s' switched program name from '%s' to '%s'", host->hostname, host->program_name, program_name); - char *t = host->program_name; - host->program_name = strdupz(program_name); - freez(t); + if(strcmp(rrdhost_program_name(host), program_name) != 0) { + info("Host '%s' switched program name from '%s' to '%s'", rrdhost_hostname(host), rrdhost_program_name(host), program_name); + STRING *t = host->program_name; + host->program_name = string_strdupz(program_name); + string_freez(t); } - if(strcmp(host->program_version, program_version) != 0) { - info("Host '%s' switched program version from '%s' to '%s'", host->hostname, host->program_version, program_version); - char *t = host->program_version; - host->program_version = strdupz(program_version); - freez(t); + if(strcmp(rrdhost_program_version(host), program_version) != 0) { + info("Host '%s' switched program version from '%s' to '%s'", rrdhost_hostname(host), rrdhost_program_version(host), program_version); + STRING *t = host->program_version; + host->program_version = string_strdupz(program_version); + string_freez(t); } if(host->rrd_update_every != update_every) - error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", host->hostname, host->rrd_update_every, update_every); - - if(host->rrd_history_entries < history) - error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", host->hostname, host->rrd_history_entries, history); + error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", rrdhost_hostname(host), host->rrd_update_every, update_every); if(host->rrd_memory_mode != mode) - error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); + error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", rrdhost_hostname(host), rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); + + else if(host->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && host->rrd_history_entries < history) + error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", rrdhost_hostname(host), host->rrd_history_entries, history); // update host tags rrdhost_init_tags(host, tags); + if(!host->rrdvars) + host->rrdvars = rrdvariables_create(); + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED); - host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0; - host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL; - if (host->rrdpush_send_destination) - host->destinations = destinations_init(host->rrdpush_send_destination); - host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL; - host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT); + rrdfunctions_init(host); - if(host->health_enabled) { - int r; - char filename[FILENAME_MAX + 1]; + if(!host->rrdlabels) + host->rrdlabels = rrdlabels_create(); + + if (!host->rrdset_root_index) + rrdset_index_init(host); + + rrdhost_initialize_rrdpush_sender(host, + rrdpush_enabled, + rrdpush_destination, + rrdpush_api_key, + rrdpush_send_charts_matching); + + rrdfamily_index_init(host); + rrdcalctemplate_index_init(host); + rrdcalc_rrdhost_index_init(host); + + if(rrdpush_enable_replication) + rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION); + else + rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION); + + host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate; + host->rrdpush_replication_step = rrdpush_replication_step; - if (host != localhost) { - r = mkdir(host->varlib_dir, 0775); - if (r != 0 && errno != EEXIST) - error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir); - } - snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir); - r = mkdir(filename, 0775); - if(r != 0 && errno != EEXIST) - error("Host '%s': cannot create directory '%s'", host->hostname, filename); - - rrdhost_wrlock(host); - health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL); - rrdhost_unlock(host); - - if (!file_is_migrated(host->health_log_filename)) { - int rc = sql_create_health_log_table(host); - if (unlikely(rc)) { - error_report("Failed to create health log table in the database"); - - health_alarm_log_load(host); - health_alarm_log_open(host); - } else { - health_alarm_log_load(host); - add_migrated_file(host->health_log_filename, 0); - } - } else { - sql_create_health_log_table(host); - sql_health_alarm_log_load(host); - } - } rrd_hosts_available++; ml_new_host(host); rrdhost_load_rrdcontext_data(host); - info("Host %s is not in archived mode anymore", host->hostname); + info("Host %s is not in archived mode anymore", rrdhost_hostname(host)); } - return; + if (health_enabled) + health_thread_spawn(host); } RRDHOST *rrdhost_find_or_create( @@ -646,17 +657,20 @@ RRDHOST *rrdhost_find_or_create( , char *rrdpush_destination , char *rrdpush_api_key , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step , struct rrdhost_system_info *system_info , bool archived ) { debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid); rrd_wrlock(); - RRDHOST *host = rrdhost_find_by_guid(guid, 0); - if (unlikely(host && RRD_MEMORY_MODE_DBENGINE != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) { + RRDHOST *host = rrdhost_find_by_guid(guid); + if (unlikely(host && host->rrd_memory_mode != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) { /* If a legacy memory mode instantiates all dbengine state must be discarded to avoid inconsistencies */ error("Archived host '%s' has memory mode '%s', but the wanted one is '%s'. Discarding archived state.", - host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); + rrdhost_hostname(host), rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); rrdhost_free(host, 1); host = NULL; } @@ -680,6 +694,9 @@ RRDHOST *rrdhost_find_or_create( , rrdpush_destination , rrdpush_api_key , rrdpush_send_charts_matching + , rrdpush_enable_replication + , rrdpush_seconds_to_replicate + , rrdpush_replication_step , system_info , 0 , archived @@ -705,6 +722,9 @@ RRDHOST *rrdhost_find_or_create( , rrdpush_destination , rrdpush_api_key , rrdpush_send_charts_matching + , rrdpush_enable_replication + , rrdpush_seconds_to_replicate + , rrdpush_replication_step , system_info); } if (host) { @@ -721,6 +741,8 @@ RRDHOST *rrdhost_find_or_create( inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now) { if(host != protected_host && host != localhost + && rrdhost_receiver_replicating_charts(host) == 0 + && rrdhost_sender_replicating_charts(host) == 0 && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN) && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED) && !host->receiver @@ -731,50 +753,10 @@ inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, tim return 0; } -void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected_host) { - time_t now = now_realtime_sec(); - - RRDHOST *host; - -restart_after_removal: - rrdhost_foreach_write(host) { - if(rrdhost_should_be_removed(host, protected_host, now)) { - info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid); - - if (rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST) -#ifdef ENABLE_DBENGINE - /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0])) -#endif - ) - rrdhost_delete_charts(host); - else - rrdhost_save_charts(host); - - rrdhost_free(host, 0); - goto restart_after_removal; - } - } -} - // ---------------------------------------------------------------------------- // RRDHOST global / startup initialization -int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { - - if (unlikely(sql_init_database(DB_CHECK_NONE, system_info ? 0 : 1))) { - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - fatal("Failed to initialize SQLite"); - info("Skipping SQLITE metadata initialization since memory mode is not dbengine"); - } - - if (unlikely(sql_init_context_database(system_info ? 0 : 1))) { - error_report("Failed to initialize context metadata database"); - } - - if (unlikely(!system_info)) - goto unittest; - +void dbengine_init(char *hostname) { #ifdef ENABLE_DBENGINE storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); if(storage_tiers < 1) { @@ -807,14 +789,15 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { else rrdeng_page_descr_use_malloc(); - int created_tiers = 0; + size_t created_tiers = 0; char dbenginepath[FILENAME_MAX + 1]; char dbengineconfig[200 + 1]; - for(int tier = 0; tier < storage_tiers ;tier++) { + int divisor = 1; + for(size_t tier = 0; tier < storage_tiers ;tier++) { if(tier == 0) snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", netdata_configured_cache_dir); else - snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%d", netdata_configured_cache_dir, tier); + snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%zu", netdata_configured_cache_dir, tier); int ret = mkdir(dbenginepath, 0775); if (ret != 0 && errno != EEXIST) { @@ -822,27 +805,30 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { break; } - int page_cache_mb = default_rrdeng_page_cache_mb; - int disk_space_mb = default_multidb_disk_quota_mb; - int grouping_iterations = storage_tiers_grouping_iterations[tier]; + if(tier > 0) + divisor *= 2; + + int page_cache_mb = default_rrdeng_page_cache_mb / divisor; + int disk_space_mb = default_multidb_disk_quota_mb / divisor; + size_t grouping_iterations = storage_tiers_grouping_iterations[tier]; RRD_BACKFILL backfill = storage_tiers_backfill[tier]; if(tier > 0) { - snprintfz(dbengineconfig, 200, "dbengine tier %d page cache size MB", tier); + snprintfz(dbengineconfig, 200, "dbengine tier %zu page cache size MB", tier); page_cache_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, page_cache_mb); - snprintfz(dbengineconfig, 200, "dbengine tier %d multihost disk space MB", tier); + snprintfz(dbengineconfig, 200, "dbengine tier %zu multihost disk space MB", tier); disk_space_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, disk_space_mb); - snprintfz(dbengineconfig, 200, "dbengine tier %d update every iterations", tier); + snprintfz(dbengineconfig, 200, "dbengine tier %zu update every iterations", tier); grouping_iterations = config_get_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations); if(grouping_iterations < 2) { grouping_iterations = 2; config_set_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations); - error("DBENGINE on '%s': 'dbegnine tier %d update every iterations' cannot be less than 2. Assuming 2.", hostname, tier); + error("DBENGINE on '%s': 'dbegnine tier %zu update every iterations' cannot be less than 2. Assuming 2.", hostname, tier); } - snprintfz(dbengineconfig, 200, "dbengine tier %d backfill", tier); + snprintfz(dbengineconfig, 200, "dbengine tier %zu backfill", tier); const char *bf = config_get(CONFIG_SECTION_DB, dbengineconfig, backfill == RRD_BACKFILL_NEW ? "new" : backfill == RRD_BACKFILL_FULL ? "full" : "none"); if(strcmp(bf, "new") == 0) backfill = RRD_BACKFILL_NEW; else if(strcmp(bf, "full") == 0) backfill = RRD_BACKFILL_FULL; @@ -859,14 +845,14 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { if(tier > 0 && get_tier_grouping(tier) > 65535) { storage_tiers_grouping_iterations[tier] = 1; - error("DBENGINE on '%s': dbengine tier %d gives aggregation of more than 65535 points of tier 0. Disabling tiers above %d", hostname, tier, tier); + error("DBENGINE on '%s': dbengine tier %zu gives aggregation of more than 65535 points of tier 0. Disabling tiers above %zu", hostname, tier, tier); break; } - - internal_error(true, "DBENGINE tier %d grouping iterations is set to %d", tier, storage_tiers_grouping_iterations[tier]); + + internal_error(true, "DBENGINE tier %zu grouping iterations is set to %zu", tier, storage_tiers_grouping_iterations[tier]); ret = rrdeng_init(NULL, NULL, dbenginepath, page_cache_mb, disk_space_mb, tier); if(ret != 0) { - error("DBENGINE on '%s': Failed to initialize multi-host database tier %d on path '%s'", + error("DBENGINE on '%s': Failed to initialize multi-host database tier %zu on path '%s'", hostname, tier, dbenginepath); break; } @@ -875,13 +861,14 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { } if(created_tiers && created_tiers < storage_tiers) { - error("DBENGINE on '%s': Managed to create %d tiers instead of %d. Continuing with %d available.", + error("DBENGINE on '%s': Managed to create %zu tiers instead of %zu. Continuing with %zu available.", hostname, created_tiers, storage_tiers, created_tiers); storage_tiers = created_tiers; } else if(!created_tiers) fatal("DBENGINE on '%s', failed to initialize databases at '%s'.", hostname, netdata_configured_cache_dir); + dbengine_enabled = true; #else storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", 1); if(storage_tiers != 1) { @@ -889,12 +876,54 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { storage_tiers = 1; config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); } + dbengine_enabled = false; #endif +} - health_init(); - rrdpush_init(); +int rrd_init(char *hostname, struct rrdhost_system_info *system_info) { + rrdhost_init(); -unittest: + if (unlikely(sql_init_database(DB_CHECK_NONE, system_info ? 0 : 1))) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + fatal("Failed to initialize SQLite"); + info("Skipping SQLITE metadata initialization since memory mode is not dbengine"); + } + + if (unlikely(sql_init_context_database(system_info ? 0 : 1))) { + error_report("Failed to initialize context metadata database"); + } + + if (unlikely(strcmp(hostname, "unittest") == 0)) { + dbengine_enabled = true; + } + else { + health_init(); + rrdpush_init(); + + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) { + info("Initializing dbengine..."); + dbengine_init(hostname); + } + else { + info("Not initializing dbengine..."); + storage_tiers = 1; + } + + if (!dbengine_enabled) { + if (storage_tiers > 1) { + error("dbengine is not enabled, but %zu tiers have been requested. Resetting tiers to 1", + storage_tiers); + storage_tiers = 1; + } + + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + error("dbengine is not enabled, but it has been given as the default db mode. Resetting db mode to alloc"); + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + } + } + } + + metadata_sync_init(); debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname); rrd_wrlock(); localhost = rrdhost_create( @@ -916,6 +945,9 @@ unittest: , default_rrdpush_destination , default_rrdpush_api_key , default_rrdpush_send_charts_matching + , default_rrdpush_enable_replication + , default_rrdpush_seconds_to_replicate + , default_rrdpush_replication_step , system_info , 1 , 0 @@ -940,19 +972,19 @@ unittest: // there are only used when NETDATA_INTERNAL_CHECKS is set void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) { - debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname); + debug(D_RRDHOST, "Checking read lock on host '%s'", rrdhost_hostname(host)); int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock); if(ret == 0) - fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file); + fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", rrdhost_hostname(host), function, line, file); } void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) { - debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname); + debug(D_RRDHOST, "Checking write lock on host '%s'", rrdhost_hostname(host)); int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock); if(ret == 0) - fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file); + fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", rrdhost_hostname(host), function, line, file); } void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) { @@ -975,8 +1007,6 @@ void __rrd_check_wrlock(const char *file, const char *function, const unsigned l // RRDHOST - free void rrdhost_system_info_free(struct rrdhost_system_info *system_info) { - info("SYSTEM_INFO: free %p", system_info); - if(likely(system_info)) { freez(system_info->cloud_provider_type); freez(system_info->cloud_instance_type); @@ -1016,18 +1046,21 @@ void destroy_receiver_state(struct receiver_state *rpt); void stop_streaming_sender(RRDHOST *host) { + rrdhost_option_clear(host, RRDHOST_OPTION_SENDER_ENABLED); + if (unlikely(!host->sender)) return; rrdpush_sender_thread_stop(host); // stop a possibly running thread cbuffer_free(host->sender->buffer); - buffer_free(host->sender->build); #ifdef ENABLE_COMPRESSION if (host->sender->compressor) host->sender->compressor->destroy(&host->sender->compressor); #endif + replication_cleanup_sender(host->sender); freez(host->sender); host->sender = NULL; + rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_INITIALIZED); } void stop_streaming_receiver(RRDHOST *host) @@ -1051,7 +1084,7 @@ void rrdhost_free(RRDHOST *host, bool force) { if(!host) return; if (netdata_exit || force) - info("Freeing all memory for host '%s'...", host->hostname); + info("Freeing all memory for host '%s'...", rrdhost_hostname(host)); rrd_check_wrlock(); // make sure the RRDs are write locked @@ -1061,65 +1094,53 @@ void rrdhost_free(RRDHOST *host, bool force) { // ------------------------------------------------------------------------ // clean up streaming + stop_streaming_sender(host); if (netdata_exit || force) stop_streaming_receiver(host); + + // ------------------------------------------------------------------------ + // clean up alarms + + rrdcalc_delete_all(host); + + rrdhost_wrlock(host); // lock this RRDHOST + // ------------------------------------------------------------------------ // release its children resources #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ;tier++) { - if(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && - host->storage_instance[tier] && - !is_storage_engine_shared(host->storage_instance[tier])) - rrdeng_prepare_exit((struct rrdengine_instance *)host->storage_instance[tier]); + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE + && host->db[tier].instance + && !is_storage_engine_shared(host->db[tier].instance)) + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[tier].instance); } #endif - while(host->rrdset_root) - rrdset_free(host->rrdset_root); + // delete all the RRDSETs of the host + rrdset_index_destroy(host); + rrdcalc_rrdhost_index_destroy(host); + rrdcalctemplate_index_destroy(host); freez(host->exporting_flags); - while(host->alarms) - rrdcalc_unlink_and_free(host, host->alarms); - - RRDCALC *rc,*nc; - for(rc = host->alarms_with_foreach; rc ; rc = nc) { - nc = rc->next; - rrdcalc_free(rc); - } - host->alarms_with_foreach = NULL; - - while(host->templates) - rrdcalctemplate_unlink_and_free(host, host->templates); - - RRDCALCTEMPLATE *rt,*next; - for(rt = host->alarms_template_with_foreach; rt ; rt = next) { - next = rt->next; - rrdcalctemplate_free(rt); - } - host->alarms_template_with_foreach = NULL; - - debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname); - rrdvar_free_remaining_variables(host, &host->rrdvar_root_index); - health_alarm_log_free(host); #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ;tier++) { - if(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && - host->storage_instance[tier] && - !is_storage_engine_shared(host->storage_instance[tier])) - rrdeng_exit((struct rrdengine_instance *)host->storage_instance[tier]); + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE + && host->db[tier].instance + && !is_storage_engine_shared(host->db[tier].instance)) + rrdeng_exit((struct rrdengine_instance *)host->db[tier].instance); } #endif if (!netdata_exit && !force) { - info("Setting archive mode for host '%s'...", host->hostname); + info("Setting archive mode for host '%s'...", rrdhost_hostname(host)); rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED); rrdhost_unlock(host); return; @@ -1143,24 +1164,13 @@ void rrdhost_free(RRDHOST *host, bool force) { // ------------------------------------------------------------------------ // remove it from the indexes - if(rrdhost_index_del(host) != host) - error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname); + rrdhost_index_del_hostname(host); + rrdhost_index_del_by_guid(host); // ------------------------------------------------------------------------ // unlink it from the host - if(host == localhost) { - localhost = host->next; - } - else { - // find the previous one - RRDHOST *h; - for(h = localhost; h && h->next != host ; h = h->next) ; - - // bypass it - if(h) h->next = host->next; - else error("Request to free RRDHOST '%s': cannot find it", host->hostname); - } + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(localhost, host, prev, next); // ------------------------------------------------------------------------ // free it @@ -1168,37 +1178,36 @@ void rrdhost_free(RRDHOST *host, bool force) { pthread_mutex_destroy(&host->aclk_state_lock); freez(host->aclk_state.claimed_id); freez(host->aclk_state.prev_claimed_id); - freez((void *)host->tags); - rrdlabels_destroy(host->host_labels); - freez((void *)host->os); - freez((void *)host->timezone); - freez((void *)host->abbrev_timezone); - freez(host->program_version); - freez(host->program_name); + string_freez(host->tags); + rrdlabels_destroy(host->rrdlabels); + string_freez(host->os); + string_freez(host->timezone); + string_freez(host->abbrev_timezone); + string_freez(host->program_name); + string_freez(host->program_version); rrdhost_system_info_free(host->system_info); freez(host->cache_dir); freez(host->varlib_dir); freez(host->rrdpush_send_api_key); freez(host->rrdpush_send_destination); - struct rrdpush_destinations *tmp_destination; - while (host->destinations) { - tmp_destination = host->destinations->next; - freez(host->destinations); - host->destinations = tmp_destination; - } - freez(host->health_default_exec); - freez(host->health_default_recipient); + rrdpush_destinations_free(host); + string_freez(host->health_default_exec); + string_freez(host->health_default_recipient); freez(host->health_log_filename); - freez(host->hostname); - freez(host->registry_hostname); + string_freez(host->registry_hostname); simple_pattern_free(host->rrdpush_send_charts_matching); rrdhost_unlock(host); netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock); netdata_rwlock_destroy(&host->rrdhost_rwlock); freez(host->node_id); + rrdfamily_index_destroy(host); + rrdfunctions_destroy(host); + rrdvariables_destroy(host->rrdvars); + rrdhost_destroy_rrdcontexts(host); + string_freez(host->hostname); freez(host); #ifdef ENABLE_ACLK if (wc) @@ -1209,9 +1218,14 @@ void rrdhost_free(RRDHOST *host, bool force) { void rrdhost_free_all(void) { rrd_wrlock(); + /* Make sure child-hosts are released before the localhost. */ - while(localhost->next) rrdhost_free(localhost->next, 1); - rrdhost_free(localhost, 1); + while(localhost && localhost->next) + rrdhost_free(localhost->next, 1); + + if(localhost) + rrdhost_free(localhost, 1); + rrd_unlock(); } @@ -1221,25 +1235,20 @@ void rrdhost_free_all(void) { void rrdhost_save_charts(RRDHOST *host) { if(!host) return; - info("Saving/Closing database of host '%s'...", host->hostname); + info("Saving/Closing database of host '%s'...", rrdhost_hostname(host)); RRDSET *st; // we get a write lock // to ensure only one thread is saving the database - rrdhost_wrlock(host); - rrdset_foreach_write(st, host) { - rrdset_rdlock(st); rrdset_save(st); - rrdset_unlock(st); } - - rrdhost_unlock(host); + rrdset_foreach_done(st); } static void rrdhost_load_auto_labels(void) { - DICTIONARY *labels = localhost->host_labels; + DICTIONARY *labels = localhost->rrdlabels; if (localhost->system_info->cloud_provider_type) rrdlabels_add(labels, "_cloud_provider_type", localhost->system_info->cloud_provider_type, RRDLABEL_SRC_AUTO); @@ -1301,13 +1310,31 @@ static void rrdhost_load_auto_labels(void) { add_aclk_host_labels(); + health_add_host_labels(); + rrdlabels_add( - labels, "_is_parent", (localhost->next || configured_as_parent()) ? "true" : "false", RRDLABEL_SRC_AUTO); + labels, "_is_parent", (localhost->senders_count > 0) ? "true" : "false", RRDLABEL_SRC_AUTO); if (localhost->rrdpush_send_destination) rrdlabels_add(labels, "_streams_to", localhost->rrdpush_send_destination, RRDLABEL_SRC_AUTO); } +void rrdhost_set_is_parent_label(int count) { + DICTIONARY *labels = localhost->rrdlabels; + + if (count == 0 || count == 1) { + rrdlabels_add( + labels, "_is_parent", (count) ? "true" : "false", RRDLABEL_SRC_AUTO); + + //queue a node info +#ifdef ENABLE_ACLK + if (netdata_cloud_setting) { + aclk_queue_node_info(localhost); + } +#endif + } +} + static void rrdhost_load_config_labels(void) { int status = config_load(NULL, 1, CONFIG_SECTION_HOST_LABEL); if(!status) { @@ -1320,7 +1347,7 @@ static void rrdhost_load_config_labels(void) { config_section_wrlock(co); struct config_option *cv; for(cv = co->values; cv ; cv = cv->next) { - rrdlabels_add(localhost->host_labels, cv->name, cv->value, RRDLABEL_SRC_CONFIG); + rrdlabels_add(localhost->rrdlabels, cv->name, cv->value, RRDLABEL_SRC_CONFIG); cv->flags |= CONFIG_VALUE_USED; } config_section_unlock(co); @@ -1339,41 +1366,37 @@ static void rrdhost_load_kubernetes_labels(void) { debug(D_RRDHOST, "Attempting to fetch external labels via %s", label_script); pid_t pid; - FILE *fp = mypopen(label_script, &pid); - if(!fp) return; + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(label_script, &pid, &fp_child_input); + if(!fp_child_output) return; char buffer[1000 + 1]; - while (fgets(buffer, 1000, fp) != NULL) - rrdlabels_add_pair(localhost->host_labels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S); + while (fgets(buffer, 1000, fp_child_output) != NULL) + rrdlabels_add_pair(localhost->rrdlabels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S); // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':' // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null - int rc = mypclose(fp, pid); + int rc = netdata_pclose(fp_child_input, fp_child_output, pid); if(rc) error("%s exited abnormally. Failed to get kubernetes labels.", label_script); } void reload_host_labels(void) { - if(!localhost->host_labels) - localhost->host_labels = rrdlabels_create(); + if(!localhost->rrdlabels) + localhost->rrdlabels = rrdlabels_create(); - rrdlabels_unmark_all(localhost->host_labels); + rrdlabels_unmark_all(localhost->rrdlabels); // priority is important here rrdhost_load_config_labels(); rrdhost_load_kubernetes_labels(); rrdhost_load_auto_labels(); - rrdlabels_remove_all_unmarked(localhost->host_labels); - sql_store_host_labels(localhost); + rrdlabels_remove_all_unmarked(localhost->rrdlabels); + metaqueue_store_host_labels(localhost->machine_guid); health_label_log_save(localhost); -/* TODO-GAPS - fix this so that it looks properly at the state and version of the sender - if(localhost->rrdpush_send_enabled && localhost->rrdpush_sender_buffer){ - localhost->labels.labels_flag |= RRDHOST_FLAG_STREAM_LABELS_UPDATE; - rrdpush_send_labels(localhost); - } -*/ + rrdpush_send_host_labels(localhost); health_reload(); } @@ -1383,23 +1406,18 @@ void reload_host_labels(void) { void rrdhost_delete_charts(RRDHOST *host) { if(!host) return; - info("Deleting database of host '%s'...", host->hostname); + info("Deleting database of host '%s'...", rrdhost_hostname(host)); RRDSET *st; // we get a write lock // to ensure only one thread is saving the database - rrdhost_wrlock(host); - rrdset_foreach_write(st, host) { - rrdset_rdlock(st); - rrdset_delete_files(st); - rrdset_unlock(st); + rrdset_delete_files(st); } + rrdset_foreach_done(st); recursively_delete_dir(host->cache_dir, "left over host"); - - rrdhost_unlock(host); } // ---------------------------------------------------------------------------- @@ -1408,29 +1426,26 @@ void rrdhost_delete_charts(RRDHOST *host) { void rrdhost_cleanup_charts(RRDHOST *host) { if(!host) return; - info("Cleaning up database of host '%s'...", host->hostname); + info("Cleaning up database of host '%s'...", rrdhost_hostname(host)); RRDSET *st; - uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS); + uint32_t rrdhost_delete_obsolete_charts = rrdhost_option_check(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS); // we get a write lock // to ensure only one thread is saving the database - rrdhost_wrlock(host); - rrdset_foreach_write(st, host) { - rrdset_rdlock(st); if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)) rrdset_delete_files(st); + else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) rrdset_delete_obsolete_dimensions(st); + else rrdset_save(st); - rrdset_unlock(st); } - - rrdhost_unlock(host); + rrdset_foreach_done(st); } @@ -1459,11 +1474,9 @@ void rrdhost_cleanup_all(void) { RRDHOST *host; rrdhost_foreach_read(host) { - if (host != localhost && rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST) && !host->receiver -#ifdef ENABLE_DBENGINE + if (host != localhost && rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) && !host->receiver /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0])) -#endif + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) ) rrdhost_delete_charts(host); else @@ -1475,157 +1488,6 @@ void rrdhost_cleanup_all(void) { // ---------------------------------------------------------------------------- -// RRDHOST - save or delete all the host charts from disk - -void rrdhost_cleanup_obsolete_charts(RRDHOST *host) { - time_t now = now_realtime_sec(); - - RRDSET *st; - - uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS); - -restart_after_removal: - rrdset_foreach_write(st, host) { - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) - && st->last_accessed_time + rrdset_free_obsolete_time < now - && st->last_updated.tv_sec + rrdset_free_obsolete_time < now - && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now - )) { - st->rrdhost->obsolete_charts_count--; -#ifdef ENABLE_DBENGINE - if(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { - RRDDIM *rd, *last; - - rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED); - while (st->variables) rrdsetvar_free(st->variables); - while (st->alarms) rrdsetcalc_unlink(st->alarms); - rrdset_wrlock(st); - for (rd = st->dimensions, last = NULL ; likely(rd) ; ) { - if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { - last = rd; - rd = rd->next; - continue; - } - - if (rrddim_flag_check(rd, RRDDIM_FLAG_ACLK)) { - last = rd; - rd = rd->next; - continue; - } - rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); - while (rd->variables) - rrddimvar_free(rd->variables); - - if (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { - rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); - - /* only a collector can mark a chart as obsolete, so we must remove the reference */ - - size_t tiers_available = 0, tiers_said_yes = 0; - for(int tier = 0; tier < storage_tiers ;tier++) { - if(rd->tiers[tier]) { - tiers_available++; - - if(rd->tiers[tier]->collect_ops.finalize(rd->tiers[tier]->db_collection_handle)) - tiers_said_yes++; - - rd->tiers[tier]->db_collection_handle = NULL; - } - } - - if (tiers_available == tiers_said_yes && tiers_said_yes) { - /* This metric has no data and no references */ - delete_dimension_uuid(&rd->metric_uuid); - rrddim_free(st, rd); - if (unlikely(!last)) { - rd = st->dimensions; - } - else { - rd = last->next; - } - continue; - } -#ifdef ENABLE_ACLK - else - queue_dimension_to_aclk(rd, rd->last_collected_time.tv_sec); -#endif - } - last = rd; - rd = rd->next; - } - rrdset_unlock(st); - - debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", host->hostname, st->id); - rrdvar_free_remaining_variables(host, &st->rrdvar_root_index); - - rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); - - if (st->dimensions) { - /* If the chart still has dimensions don't delete it from the metadata log */ - continue; - } - } -#endif - rrdset_rdlock(st); - - if(rrdhost_delete_obsolete_charts) - rrdset_delete_files(st); - else - rrdset_save(st); - - rrdset_unlock(st); - - rrdset_free(st); - goto restart_after_removal; - } -#ifdef ENABLE_ACLK - else - sql_check_chart_liveness(st); -#endif - } -} - -void rrdset_check_obsoletion(RRDHOST *host) -{ - RRDSET *st; - time_t last_entry_t; - rrdset_foreach_read(st, host) { - last_entry_t = rrdset_last_entry_t(st); - if (last_entry_t && last_entry_t < host->senders_connect_time) { - rrdset_is_obsolete(st); - } - } -} - -void rrd_cleanup_obsolete_charts() -{ - rrd_rdlock(); - - RRDHOST *host; - rrdhost_foreach_read(host) - { - if (host->obsolete_charts_count) { - rrdhost_wrlock(host); - rrdhost_cleanup_obsolete_charts(host); - rrdhost_unlock(host); - } - - if ( host != localhost && - host->trigger_chart_obsoletion_check && - ((host->senders_last_chart_command && - host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec()) - || (host->senders_connect_time + 300 < now_realtime_sec())) ) { - rrdhost_rdlock(host); - rrdset_check_obsoletion(host); - rrdhost_unlock(host); - host->trigger_chart_obsoletion_check = 0; - } - } - - rrd_unlock(); -} - -// ---------------------------------------------------------------------------- // RRDHOST - set system info from environment variables // system_info fields must be heap allocated or NULL int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) { @@ -1761,57 +1623,18 @@ int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, ch return res; } -/** - * Alarm Compare ID - * - * Callback function used with the binary trees to compare the id of RRDCALC - * - * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree - * @param b the pointer to the binary tree. - * - * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. - */ -int alarm_compare_id(void *a, void *b) { - register uint32_t hash1 = ((RRDCALC *)a)->id; - register uint32_t hash2 = ((RRDCALC *)b)->id; - - if(hash1 < hash2) return -1; - else if(hash1 > hash2) return 1; - - return 0; -} - -/** - * Alarm Compare NAME - * - * Callback function used with the binary trees to compare the name of RRDCALC - * - * @param a a pointer to the RRDCAL item to insert,compare or update the binary tree - * @param b the pointer to the binary tree. - * - * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. - */ -int alarm_compare_name(void *a, void *b) { - RRDCALC *in1 = (RRDCALC *)a; - RRDCALC *in2 = (RRDCALC *)b; - - if(in1->hash < in2->hash) return -1; - else if(in1->hash > in2->hash) return 1; - - return strcmp(in1->name,in2->name); -} - // Added for gap-filling, if this proves to be a bottleneck in large-scale systems then we will need to cache // the last entry times as the metric updates, but let's see if it is a problem first. time_t rrdhost_last_entry_t(RRDHOST *h) { - rrdhost_rdlock(h); RRDSET *st; time_t result = 0; + rrdset_foreach_read(st, h) { time_t st_last = rrdset_last_entry_t(st); + if (st_last > result) result = st_last; } - rrdhost_unlock(h); + rrdset_foreach_done(st); return result; } diff --git a/database/rrdlabels.c b/database/rrdlabels.c index 5198cb4aa..743499ab5 100644 --- a/database/rrdlabels.c +++ b/database/rrdlabels.c @@ -369,12 +369,15 @@ __attribute__((constructor)) void initialize_labels_keys_char_map(void) { } -static size_t rrdlabels_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty) { +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) { if(unlikely(!dst_size)) return 0; + if(unlikely(!src || !*src)) { strncpyz((char *)dst, empty, dst_size); dst[dst_size - 1] = '\0'; - return strlen((char *)dst); + size_t len = strlen((char *)dst); + if(multibyte_length) *multibyte_length = len; + return len; } unsigned char *d = dst; @@ -385,7 +388,9 @@ static size_t rrdlabels_sanitize(unsigned char *dst, const unsigned char *src, s // copy while converting, but keep only one white space // we start wil last_is_space = 1 to skip leading spaces int last_is_space = 1; + size_t mblen = 0; + while(*src && d < end) { unsigned char c = *src; @@ -446,28 +451,34 @@ static size_t rrdlabels_sanitize(unsigned char *dst, const unsigned char *src, s *d = '\0'; // check if dst is all underscores and empty it if it is - d = dst; - while(*d == '_') d++; - if(unlikely(*d == '\0')) { - *dst = '\0'; - mblen = 0; + if(*dst == '_') { + unsigned char *t = dst; + while (*t == '_') t++; + if (unlikely(*t == '\0')) { + *dst = '\0'; + mblen = 0; + } } if(unlikely(*dst == '\0')) { strncpyz((char *)dst, empty, dst_size); dst[dst_size - 1] = '\0'; - return strlen((char *)dst); + mblen = strlen((char *)dst); + if(multibyte_length) *multibyte_length = mblen; + return mblen; } - return mblen; + if(multibyte_length) *multibyte_length = mblen; + + return d - dst; } static inline size_t rrdlabels_sanitize_name(char *dst, const char *src, size_t dst_size) { - return rrdlabels_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, ""); + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_names_char_map, 0, "", NULL); } static inline size_t rrdlabels_sanitize_value(char *dst, const char *src, size_t dst_size) { - return rrdlabels_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]"); + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_size, label_values_char_map, 1, "[none]", NULL); } // ---------------------------------------------------------------------------- @@ -478,9 +489,7 @@ typedef struct rrdlabel { RRDLABEL_SRC label_source; } RRDLABEL; -static void rrdlabel_insert_callback(const char *name, void *value, void *data) { - (void)name; - DICTIONARY *dict = (DICTIONARY *)data; (void)dict; +static void rrdlabel_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *dict_ptr __maybe_unused) { RRDLABEL *lb = (RRDLABEL *)value; // label_value is already allocated by the STRING @@ -488,42 +497,43 @@ static void rrdlabel_insert_callback(const char *name, void *value, void *data) lb->label_source &= ~RRDLABEL_FLAG_OLD; } -static void rrdlabel_delete_callback(const char *name, void *value, void *data) { - (void)name; - DICTIONARY *dict = (DICTIONARY *)data; (void)dict; +static void rrdlabel_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *dict_ptr __maybe_unused) { RRDLABEL *lb = (RRDLABEL *)value; string_freez(lb->label_value); lb->label_value = NULL; } -static void rrdlabel_conflict_callback(const char *name, void *oldvalue, void *newvalue, void *data) { - (void)name; - DICTIONARY *dict = (DICTIONARY *)data; (void)dict; +static bool rrdlabel_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *oldvalue, void *newvalue, void *dict_ptr __maybe_unused) { RRDLABEL *lbold = (RRDLABEL *)oldvalue; RRDLABEL *lbnew = (RRDLABEL *)newvalue; - if(lbold->label_value == lbnew->label_value || strcmp(string2str(lbold->label_value), string2str(lbnew->label_value)) == 0) { + if(lbold->label_value == lbnew->label_value) { // they are the same + lbold->label_source |= lbnew->label_source; lbold->label_source |= RRDLABEL_FLAG_OLD; lbold->label_source &= ~RRDLABEL_FLAG_NEW; // free the new one string_freez(lbnew->label_value); + + return false; } - else { - // they are different - string_freez(lbold->label_value); - lbold->label_value = lbnew->label_value; - lbold->label_source = lbnew->label_source; - lbold->label_source |= RRDLABEL_FLAG_NEW; - lbold->label_source &= ~RRDLABEL_FLAG_OLD; - } + + // they are different + + string_freez(lbold->label_value); + lbold->label_value = lbnew->label_value; + lbold->label_source = lbnew->label_source; + lbold->label_source |= RRDLABEL_FLAG_NEW; + lbold->label_source &= ~RRDLABEL_FLAG_OLD; + + return true; } DICTIONARY *rrdlabels_create(void) { - DICTIONARY *dict = dictionary_create(DICTIONARY_FLAG_DONT_OVERWRITE_VALUE); + DICTIONARY *dict = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); dictionary_register_insert_callback(dict, rrdlabel_insert_callback, dict); dictionary_register_delete_callback(dict, rrdlabel_delete_callback, dict); dictionary_register_conflict_callback(dict, rrdlabel_conflict_callback, dict); @@ -538,6 +548,9 @@ void rrdlabels_destroy(DICTIONARY *labels_dict) { dictionary_destroy(labels_dict); } +void rrdlabels_flush(DICTIONARY *labels_dict) { + dictionary_flush(labels_dict); +} // ---------------------------------------------------------------------------- // rrdlabels_add() @@ -620,10 +633,12 @@ void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls) { } // ---------------------------------------------------------------------------- -// rrdlabels_get_to_buffer_or_null() +// rrdlabels_get_value_to_buffer_or_null() void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null) { - DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key); + if(!labels) return; + + const DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key); RRDLABEL *lb = dictionary_acquired_item_value(acquired_item); if(lb && lb->label_value) @@ -634,15 +649,23 @@ void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const dictionary_acquired_item_release(labels, acquired_item); } +// ---------------------------------------------------------------------------- +// rrdlabels_get_value_to_char_or_null() + +void rrdlabels_get_value_to_char_or_null(DICTIONARY *labels, char **value, const char *key) { + const DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key); + RRDLABEL *lb = dictionary_acquired_item_value(acquired_item); + + *value = (lb && lb->label_value) ? strdupz(string2str(lb->label_value)) : NULL; + + dictionary_acquired_item_release(labels, acquired_item); +} // ---------------------------------------------------------------------------- // rrdlabels_unmark_all() // remove labels RRDLABEL_FLAG_OLD and RRDLABEL_FLAG_NEW from all dictionary items -static int remove_flags_old_new(const char *name, void *value, void *data) { - (void)name; - (void)data; - +static int remove_flags_old_new(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { RRDLABEL *lb = (RRDLABEL *)value; if(lb->label_source & RRDLABEL_FLAG_OLD) lb->label_source &= ~RRDLABEL_FLAG_OLD; @@ -660,12 +683,13 @@ void rrdlabels_unmark_all(DICTIONARY *labels) { // rrdlabels_remove_all_unmarked() // remove dictionary items that are neither old, nor new -static int remove_not_old_not_new_callback(const char *name, void *value, void *data) { +static int remove_not_old_not_new_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); DICTIONARY *dict = (DICTIONARY *)data; RRDLABEL *lb = (RRDLABEL *)value; if(!(lb->label_source & (RRDLABEL_FLAG_OLD | RRDLABEL_FLAG_NEW | RRDLABEL_FLAG_PERMANENT))) { - dictionary_del_having_write_lock(dict, name); + dictionary_del(dict, name); return 1; } @@ -685,7 +709,8 @@ struct labels_walkthrough { void *data; }; -static int labels_walkthrough_callback(const char *name, void *value, void *data) { +static int labels_walkthrough_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); struct labels_walkthrough *d = (struct labels_walkthrough *)data; RRDLABEL *lb = (RRDLABEL *)value; @@ -717,7 +742,8 @@ int rrdlabels_sorted_walkthrough_read(DICTIONARY *labels, int (*callback)(const // rrdlabels_migrate_to_these() // migrate an existing label list to a new list, INPLACE -static int copy_label_to_dictionary_callback(const char *name, void *value, void *data) { +static int copy_label_to_dictionary_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); DICTIONARY *dst = (DICTIONARY *)data; RRDLABEL *lb = (RRDLABEL *)value; labels_add_already_sanitized(dst, name, string2str(lb->label_value), lb->label_source); @@ -754,7 +780,8 @@ struct simple_pattern_match_name_value { char equal; }; -static int simple_pattern_match_name_only_callback(const char *name, void *value, void *data) { +static int simple_pattern_match_name_only_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); struct simple_pattern_match_name_value *t = (struct simple_pattern_match_name_value *)data; (void)value; @@ -764,7 +791,8 @@ static int simple_pattern_match_name_only_callback(const char *name, void *value return 0; } -static int simple_pattern_match_name_and_value_callback(const char *name, void *value, void *data) { +static int simple_pattern_match_name_and_value_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); struct simple_pattern_match_name_value *t = (struct simple_pattern_match_name_value *)data; RRDLABEL *lb = (RRDLABEL *)value; @@ -830,7 +858,9 @@ bool rrdlabels_match_simple_pattern(DICTIONARY *labels, const char *simple_patte // ---------------------------------------------------------------------------- // Log all labels -static int rrdlabels_log_label_to_buffer_callback(const char *name, void *value, void *data) { +static int rrdlabels_log_label_to_buffer_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); + BUFFER *wb = (BUFFER *)data; RRDLABEL *lb = (RRDLABEL *)value; @@ -880,7 +910,8 @@ struct labels_to_buffer { size_t count; }; -static int label_to_buffer_callback(const char *name, void *value, void *data) { +static int label_to_buffer_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name(item); struct labels_to_buffer *t = (struct labels_to_buffer *)data; RRDLABEL *lb = (RRDLABEL *)value; @@ -926,23 +957,17 @@ int rrdlabels_to_buffer(DICTIONARY *labels, BUFFER *wb, const char *before_each, return dictionary_walkthrough_read(labels, label_to_buffer_callback, (void *)&tmp); } -static int chart_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { - RRDSET *st = (RRDSET *)data; - sql_store_chart_label(st->chart_uuid, (int)ls, (char *)name, (char *)value); - return 1; -} - void rrdset_update_rrdlabels(RRDSET *st, DICTIONARY *new_rrdlabels) { - if(!st->state->chart_labels) - st->state->chart_labels = rrdlabels_create(); + if(!st->rrdlabels) + st->rrdlabels = rrdlabels_create(); if (new_rrdlabels) - rrdlabels_migrate_to_these(st->state->chart_labels, new_rrdlabels); + rrdlabels_migrate_to_these(st->rrdlabels, new_rrdlabels); - // TODO - we should also cleanup sqlite from old new_rrdlabels that have been removed - rrdlabels_walkthrough_read(st->state->chart_labels, chart_label_store_to_sql_callback, st); + metaqueue_chart_labels(st); } + // ---------------------------------------------------------------------------- // rrdlabels unit test @@ -1107,12 +1132,14 @@ int rrdlabels_unittest_simple_pattern() { int rrdlabels_unittest_sanitize_value(const char *src, const char *expected) { char buf[RRDLABELS_MAX_VALUE_LENGTH + 1]; - size_t mblen = rrdlabels_sanitize_value(buf, src, RRDLABELS_MAX_VALUE_LENGTH); + size_t len = rrdlabels_sanitize_value(buf, src, RRDLABELS_MAX_VALUE_LENGTH); + size_t expected_len = strlen(expected); int err = 0; if(strcmp(buf, expected) != 0) err = 1; + if(len != expected_len) err = 1; - fprintf(stderr, "%s(%s): %s, expected '%s', got '%s', mblen = %zu, bytes = %zu\n", __FUNCTION__, src, (err==1)?"FAILED":"OK", expected, buf, mblen, strlen(buf)); + fprintf(stderr, "%s(%s): %s, expected '%s', got '%s', expected bytes = %zu, got bytes = %zu\n", __FUNCTION__, src, (err==1)?"FAILED":"OK", expected, buf, expected_len, strlen(buf)); return err; } diff --git a/database/rrdset.c b/database/rrdset.c index 9693ee211..6eb3c7105 100644 --- a/database/rrdset.c +++ b/database/rrdset.c @@ -3,97 +3,452 @@ #define NETDATA_RRD_INTERNALS #include "rrd.h" #include <sched.h> +#include "storage_engine.h" -void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line) { - debug(D_RRD_CALLS, "Checking read lock on chart '%s'", st->id); +// ---------------------------------------------------------------------------- +// RRDSET name index - int ret = netdata_rwlock_trywrlock(&st->rrdset_rwlock); - if(ret == 0) - fatal("RRDSET '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", st->id, function, line, file); +static void rrdset_name_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *rrdhost __maybe_unused) { + RRDSET *st = rrdset; + rrdset_flag_set(st, RRDSET_FLAG_INDEXED_NAME); +} +static void rrdset_name_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *rrdhost __maybe_unused) { + RRDSET *st = rrdset; + rrdset_flag_clear(st, RRDSET_FLAG_INDEXED_NAME); } -void __rrdset_check_wrlock(RRDSET *st, const char *file, const char *function, const unsigned long line) { - debug(D_RRD_CALLS, "Checking write lock on chart '%s'", st->id); +static inline void rrdset_index_add_name(RRDHOST *host, RRDSET *st) { + if(!st->name) return; + dictionary_set(host->rrdset_root_index_name, rrdset_name(st), st, sizeof(RRDSET)); +} - int ret = netdata_rwlock_tryrdlock(&st->rrdset_rwlock); - if(ret == 0) - fatal("RRDSET '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", st->id, function, line, file); +static inline void rrdset_index_del_name(RRDHOST *host, RRDSET *st) { + if(rrdset_flag_check(st, RRDSET_FLAG_INDEXED_NAME)) + dictionary_del(host->rrdset_root_index_name, rrdset_name(st)); } +static inline RRDSET *rrdset_index_find_name(RRDHOST *host, const char *name) { + return dictionary_get(host->rrdset_root_index_name, name); +} // ---------------------------------------------------------------------------- // RRDSET index -int rrdset_compare(void* a, void* b) { - if(((RRDSET *)a)->hash < ((RRDSET *)b)->hash) return -1; - else if(((RRDSET *)a)->hash > ((RRDSET *)b)->hash) return 1; - else return strcmp(((RRDSET *)a)->id, ((RRDSET *)b)->id); +static inline void rrdset_update_permanent_labels(RRDSET *st) { + if(!st->rrdlabels) return; + + rrdlabels_add(st->rrdlabels, "_collect_plugin", rrdset_plugin_name(st), RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); + rrdlabels_add(st->rrdlabels, "_collect_module", rrdset_module_name(st), RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); } -static RRDSET *rrdset_index_find(RRDHOST *host, const char *id, uint32_t hash) { - RRDSET tmp; - strncpyz(tmp.id, id, RRD_ID_LENGTH_MAX); - tmp.hash = (hash)?hash:simple_hash(tmp.id); +static STRING *rrdset_fix_name(RRDHOST *host, const char *chart_full_id, const char *type, const char *current_name, const char *name) { + if(!name || !*name) return NULL; + + char full_name[RRD_ID_LENGTH_MAX + 1]; + char sanitized_name[CONFIG_MAX_VALUE + 1]; + char new_name[CONFIG_MAX_VALUE + 1]; + + snprintfz(full_name, RRD_ID_LENGTH_MAX, "%s.%s", type, name); + rrdset_strncpyz_name(sanitized_name, full_name, CONFIG_MAX_VALUE); + strncpyz(new_name, sanitized_name, CONFIG_MAX_VALUE); + + if(rrdset_index_find_name(host, new_name)) { + debug(D_RRD_CALLS, "RRDSET: chart name '%s' on host '%s' already exists.", new_name, rrdhost_hostname(host)); + if(!strcmp(chart_full_id, full_name) && (!current_name || !*current_name)) { + unsigned i = 1; + + do { + snprintfz(new_name, CONFIG_MAX_VALUE, "%s_%u", sanitized_name, i); + i++; + } while (rrdset_index_find_name(host, new_name)); - return (RRDSET *)avl_search_lock(&(host->rrdset_root_index), (avl_t *) &tmp); + info("RRDSET: using name '%s' for chart '%s' on host '%s'.", new_name, full_name, rrdhost_hostname(host)); + } + else + return NULL; + } + + return string_strdupz(new_name); } -// ---------------------------------------------------------------------------- -// RRDSET name index +struct rrdset_constructor { + RRDHOST *host; + const char *type; + const char *id; + const char *name; + const char *family; + const char *context; + const char *title; + const char *units; + const char *plugin; + const char *module; + long priority; + int update_every; + RRDSET_TYPE chart_type; + RRD_MEMORY_MODE memory_mode; + long history_entries; + + enum { + RRDSET_REACT_NONE = 0, + RRDSET_REACT_NEW = (1 << 0), + RRDSET_REACT_UPDATED = (1 << 1), + RRDSET_REACT_PLUGIN_UPDATED = (1 << 2), + RRDSET_REACT_MODULE_UPDATED = (1 << 3), + RRDSET_REACT_CHART_ACTIVATED = (1 << 4), + } react_action; +}; + +// the constructor - the dictionary is write locked while this runs +static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *constructor_data) { + struct rrdset_constructor *ctr = constructor_data; + RRDHOST *host = ctr->host; + RRDSET *st = rrdset; + + const char *chart_full_id = dictionary_acquired_item_name(item); + + st->id = string_strdupz(chart_full_id); + + st->name = rrdset_fix_name(host, chart_full_id, ctr->type, NULL, ctr->name); + if(!st->name) + st->name = rrdset_fix_name(host, chart_full_id, ctr->type, NULL, ctr->id); + rrdset_index_add_name(host, st); + + st->parts.id = string_strdupz(ctr->id); + st->parts.type = string_strdupz(ctr->type); + st->parts.name = string_strdupz(ctr->name); + + st->family = (ctr->family && *ctr->family) ? rrd_string_strdupz(ctr->family) : rrd_string_strdupz(ctr->type); + st->context = (ctr->context && *ctr->context) ? rrd_string_strdupz(ctr->context) : rrd_string_strdupz(chart_full_id); + + st->units = rrd_string_strdupz(ctr->units); + st->title = rrd_string_strdupz(ctr->title); + st->plugin_name = rrd_string_strdupz(ctr->plugin); + st->module_name = rrd_string_strdupz(ctr->module); + st->priority = ctr->priority; -#define rrdset_from_avlname(avlname_ptr) ((RRDSET *)((avlname_ptr) - offsetof(RRDSET, avlname))) + st->cache_dir = rrdset_cache_dir(host, chart_full_id); + st->entries = (ctr->memory_mode != RRD_MEMORY_MODE_DBENGINE) ? align_entries_to_pagesize(ctr->memory_mode, ctr->history_entries) : 5; + st->update_every = ctr->update_every; + st->rrd_memory_mode = ctr->memory_mode; -int rrdset_compare_name(void* a, void* b) { - RRDSET *A = rrdset_from_avlname(a); - RRDSET *B = rrdset_from_avlname(b); + st->chart_type = ctr->chart_type; + st->gap_when_lost_iterations_above = (int) (gap_when_lost_iterations_above + 2); + st->rrdhost = host; + + st->flags = RRDSET_FLAG_SYNC_CLOCK + | RRDSET_FLAG_INDEXED_ID + | RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED + | RRDSET_FLAG_SENDER_REPLICATION_FINISHED + ; + + netdata_rwlock_init(&st->alerts.rwlock); + + if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { + if(!rrdset_memory_load_or_create_map_save(st, st->rrd_memory_mode)) { + info("Failed to use db mode %s for chart '%s', falling back to ram mode.", (st->rrd_memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", rrdset_name(st)); + st->rrd_memory_mode = RRD_MEMORY_MODE_RAM; + } + } + + // initialize the db tiers + { + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = st->rrdhost->db[tier].eng; + if(!eng) continue; + + st->storage_metrics_groups[tier] = eng->api.collect_ops.metrics_group_get(host->db[tier].instance, &st->chart_uuid); + } + } + + rrddim_index_init(st); + + // chart variables - we need this for data collection to work (collector given chart variables) - not only health + rrdsetvar_index_init(st); + + if (host->health_enabled) { + st->rrdfamily = rrdfamily_add_and_acquire(host, rrdset_family(st)); + st->rrdvars = rrdvariables_create(); + rrddimvar_index_init(st); + } + + st->rrdlabels = rrdlabels_create(); + rrdset_update_permanent_labels(st); - // fprintf(stderr, "COMPARING: %s with %s\n", A->name, B->name); + st->green = NAN; + st->red = NAN; - if(A->hash_name < B->hash_name) return -1; - else if(A->hash_name > B->hash_name) return 1; - else return strcmp(A->name, B->name); + ctr->react_action = RRDSET_REACT_NEW; } -RRDSET *rrdset_index_add_name(RRDHOST *host, RRDSET *st) { - void *result; - // fprintf(stderr, "ADDING: %s (name: %s)\n", st->id, st->name); - result = avl_insert_lock(&host->rrdset_root_index_name, (avl_t *) (&st->avlname)); - if(result) return rrdset_from_avlname(result); - return NULL; +// the destructor - the dictionary is write locked while this runs +static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *rrdhost) { + RRDHOST *host = rrdhost; + RRDSET *st = rrdset; + + rrdset_flag_clear(st, RRDSET_FLAG_INDEXED_ID); + + // cleanup storage engines + { + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = st->rrdhost->db[tier].eng; + if(!eng) continue; + + eng->api.collect_ops.metrics_group_release(host->db[tier].instance, st->storage_metrics_groups[tier]); + } + } + + // remove it from the name index + rrdset_index_del_name(host, st); + + // release the collector info + dictionary_destroy(st->functions_view); + + rrdcalc_unlink_all_rrdset_alerts(st); + + // ------------------------------------------------------------------------ + // the order of destruction is important here + + // 1. delete RRDDIMVAR index - this will speed up the destruction of RRDDIMs + // because each dimension loops to find its own variables in this index. + // There are no references to the items on this index from the dimensions. + // To find their own, they have to walk-through the dictionary. + rrddimvar_index_destroy(st); // destroy the rrddimvar index + + // 2. delete RRDSETVAR index + rrdsetvar_index_destroy(st); // destroy the rrdsetvar index + + // 3. delete RRDVAR index after the above, to avoid triggering its garbage collector (they have references on this) + rrdvariables_destroy(st->rrdvars); // free all variables and destroy the rrdvar dictionary + + // 4. delete RRDFAMILY - this has to be last, because RRDDIMVAR and RRDSETVAR need the reference counter + rrdfamily_release(host, st->rrdfamily); // release the acquired rrdfamily -- has to be after all variables + + // 5. delete RRDDIMs, now their variables are not existing, so this is fast + rrddim_index_destroy(st); // free all the dimensions and destroy the dimensions index + + // 6. this has to be after the dimensions are freed, but before labels are freed (contexts need the labels) + rrdcontext_removed_rrdset(st); // let contexts know + + // 7. destroy the chart labels + rrdlabels_destroy(st->rrdlabels); // destroy the labels, after letting the contexts know + + rrdset_memory_file_free(st); // remove files of db mode save and map + + // ------------------------------------------------------------------------ + // free it + + netdata_rwlock_destroy(&st->alerts.rwlock); + + string_freez(st->id); + string_freez(st->name); + string_freez(st->parts.id); + string_freez(st->parts.type); + string_freez(st->parts.name); + string_freez(st->family); + string_freez(st->title); + string_freez(st->units); + string_freez(st->context); + string_freez(st->plugin_name); + string_freez(st->module_name); + + freez(st->exporting_flags); + freez(st->cache_dir); } -RRDSET *rrdset_index_del_name(RRDHOST *host, RRDSET *st) { - void *result; - // fprintf(stderr, "DELETING: %s (name: %s)\n", st->id, st->name); - result = (RRDSET *)avl_remove_lock(&((host)->rrdset_root_index_name), (avl_t *)(&st->avlname)); - if(result) return rrdset_from_avlname(result); - return NULL; +// the item to be inserted, is already in the dictionary +// this callback deals with the situation, migrating the existing object to the new values +// the dictionary is write locked while this runs +static bool rrdset_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *new_rrdset, void *constructor_data) { + (void)new_rrdset; // it is NULL + + struct rrdset_constructor *ctr = constructor_data; + RRDSET *st = rrdset; + + rrdset_isnot_obsolete(st); + + ctr->react_action = RRDSET_REACT_NONE; + + if (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED)) { + rrdset_flag_clear(st, RRDSET_FLAG_ARCHIVED); + ctr->react_action |= RRDSET_REACT_CHART_ACTIVATED; + } + + if (rrdset_reset_name(st, (ctr->name && *ctr->name) ? ctr->name : ctr->id) == 2) + ctr->react_action |= RRDSET_REACT_UPDATED; + + if (unlikely(st->priority != ctr->priority)) { + st->priority = ctr->priority; + ctr->react_action |= RRDSET_REACT_UPDATED; + } + + if (unlikely(st->update_every != ctr->update_every)) { + rrdset_set_update_every(st, ctr->update_every); + ctr->react_action |= RRDSET_REACT_UPDATED; + } + + if(ctr->plugin && *ctr->plugin) { + STRING *old_plugin = st->plugin_name; + st->plugin_name = rrd_string_strdupz(ctr->plugin); + if (old_plugin != st->plugin_name) + ctr->react_action |= RRDSET_REACT_PLUGIN_UPDATED; + string_freez(old_plugin); + } + + if(ctr->module && *ctr->module) { + STRING *old_module = st->module_name; + st->module_name = rrd_string_strdupz(ctr->module); + if (old_module != st->module_name) + ctr->react_action |= RRDSET_REACT_MODULE_UPDATED; + string_freez(old_module); + } + + if(ctr->title && *ctr->title) { + STRING *old_title = st->title; + st->title = rrd_string_strdupz(ctr->title); + if(old_title != st->title) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_title); + } + + if(ctr->units && *ctr->units) { + STRING *old_units = st->units; + st->units = rrd_string_strdupz(ctr->units); + if(old_units != st->units) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_units); + } + + if(ctr->family && *ctr->family) { + STRING *old_family = st->family; + st->family = rrd_string_strdupz(ctr->family); + if(old_family != st->family) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_family); + + // TODO - we should rename RRDFAMILY variables + } + + if(ctr->context && *ctr->context) { + STRING *old_context = st->context; + st->context = rrd_string_strdupz(ctr->context); + if(old_context != st->context) + ctr->react_action |= RRDSET_REACT_UPDATED; + string_freez(old_context); + } + + if(st->chart_type != ctr->chart_type) { + st->chart_type = ctr->chart_type; + ctr->react_action |= RRDSET_REACT_UPDATED; + } + + rrdset_update_permanent_labels(st); + + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + return ctr->react_action != RRDSET_REACT_NONE; } +// this is called after all insertions/conflicts, with the dictionary unlocked, with a reference to RRDSET +// so, any actions requiring locks on other objects, should be placed here +static void rrdset_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdset, void *constructor_data) { + struct rrdset_constructor *ctr = constructor_data; + RRDSET *st = rrdset; + RRDHOST *host = st->rrdhost; -// ---------------------------------------------------------------------------- -// RRDSET - find charts + st->last_accessed_time = now_realtime_sec(); + + if(host->health_enabled && (ctr->react_action & (RRDSET_REACT_NEW | RRDSET_REACT_CHART_ACTIVATED))) { + rrdset_flag_set(st, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + } + + if(ctr->react_action & (RRDSET_REACT_NEW | RRDSET_REACT_PLUGIN_UPDATED | RRDSET_REACT_MODULE_UPDATED)) { + if (ctr->react_action & RRDSET_REACT_NEW) { + if(unlikely(rrdcontext_find_chart_uuid(st, &st->chart_uuid))) { + uuid_generate(st->chart_uuid); + bool found_in_sql = false; (void)found_in_sql; + +// bool found_in_sql = true; +// if(unlikely(sql_find_chart_uuid(host, st, &st->chart_uuid))) { +// uuid_generate(st->chart_uuid); +// found_in_sql = false; +// } + +#ifdef NETDATA_INTERNAL_CHECKS + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(st->chart_uuid, uuid_str); + error_report("Chart UUID for host %s chart [%s] not found in context. It is now set to %s (%s)", + string2str(host->hostname), + string2str(st->name), uuid_str, found_in_sql ? "found in sqlite" : "newly generated"); +#endif + } + } + rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + rrdcontext_updated_rrdset(st); +} + +void rrdset_index_init(RRDHOST *host) { + if(!host->rrdset_root_index) { + host->rrdset_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdset_root_index, rrdset_insert_callback, NULL); + dictionary_register_conflict_callback(host->rrdset_root_index, rrdset_conflict_callback, NULL); + dictionary_register_react_callback(host->rrdset_root_index, rrdset_react_callback, NULL); + dictionary_register_delete_callback(host->rrdset_root_index, rrdset_delete_callback, host); + } + + if(!host->rrdset_root_index_name) { + host->rrdset_root_index_name = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + + dictionary_register_insert_callback(host->rrdset_root_index_name, rrdset_name_insert_callback, host); + dictionary_register_delete_callback(host->rrdset_root_index_name, rrdset_name_delete_callback, host); + } +} -static inline RRDSET *rrdset_index_find_name(RRDHOST *host, const char *name, uint32_t hash) { - void *result = NULL; - RRDSET tmp; - tmp.name = name; - tmp.hash_name = (hash)?hash:simple_hash(tmp.name); +void rrdset_index_destroy(RRDHOST *host) { + // destroy the name index first + dictionary_destroy(host->rrdset_root_index_name); + host->rrdset_root_index_name = NULL; - result = avl_search_lock(&host->rrdset_root_index_name, (avl_t *) (&(tmp.avlname))); - if(result) return rrdset_from_avlname(result); + // destroy the id index last + dictionary_destroy(host->rrdset_root_index); + host->rrdset_root_index = NULL; +} + +static inline RRDSET *rrdset_index_add(RRDHOST *host, const char *id, struct rrdset_constructor *st_ctr) { + return dictionary_set_advanced(host->rrdset_root_index, id, -1, NULL, sizeof(RRDSET), st_ctr); +} + +static inline void rrdset_index_del(RRDHOST *host, RRDSET *st) { + if(rrdset_flag_check(st, RRDSET_FLAG_INDEXED_ID)) + dictionary_del(host->rrdset_root_index, rrdset_id(st)); +} - return NULL; +static RRDSET *rrdset_index_find(RRDHOST *host, const char *id) { + // TODO - the name index should have an acquired dictionary item, not just a pointer to RRDSET + return dictionary_get(host->rrdset_root_index, id); } +// ---------------------------------------------------------------------------- +// RRDSET - find charts + inline RRDSET *rrdset_find(RRDHOST *host, const char *id) { - debug(D_RRD_CALLS, "rrdset_find() for chart '%s' in host '%s'", id, host->hostname); - RRDSET *st = rrdset_index_find(host, id, 0); + debug(D_RRD_CALLS, "rrdset_find() for chart '%s' in host '%s'", id, rrdhost_hostname(host)); + RRDSET *st = rrdset_index_find(host, id); + + if(st) + st->last_accessed_time = now_realtime_sec(); + return(st); } inline RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id) { - debug(D_RRD_CALLS, "rrdset_find_bytype() for chart '%s.%s' in host '%s'", type, id, host->hostname); + debug(D_RRD_CALLS, "rrdset_find_bytype() for chart '%s.%s' in host '%s'", type, id, rrdhost_hostname(host)); char buf[RRD_ID_LENGTH_MAX + 1]; strncpyz(buf, type, RRD_ID_LENGTH_MAX - 1); @@ -105,8 +460,8 @@ inline RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *i } inline RRDSET *rrdset_find_byname(RRDHOST *host, const char *name) { - debug(D_RRD_CALLS, "rrdset_find_byname() for chart '%s' in host '%s'", name, host->hostname); - RRDSET *st = rrdset_index_find_name(host, name, 0); + debug(D_RRD_CALLS, "rrdset_find_byname() for chart '%s' in host '%s'", name, rrdhost_hostname(host)); + RRDSET *st = rrdset_index_find_name(host, name); return(st); } @@ -128,57 +483,32 @@ char *rrdset_strncpyz_name(char *to, const char *from, size_t length) { return to; } -int rrdset_set_name(RRDSET *st, const char *name) { - if(unlikely(st->name && !strcmp(st->name, name))) +int rrdset_reset_name(RRDSET *st, const char *name) { + if(unlikely(!strcmp(rrdset_name(st), name))) return 1; RRDHOST *host = st->rrdhost; - debug(D_RRD_CALLS, "rrdset_set_name() old: '%s', new: '%s'", st->name?st->name:"", name); - - char full_name[RRD_ID_LENGTH_MAX + 1]; - char sanitized_name[CONFIG_MAX_VALUE + 1]; - char new_name[CONFIG_MAX_VALUE + 1]; - - snprintfz(full_name, RRD_ID_LENGTH_MAX, "%s.%s", st->type, name); - rrdset_strncpyz_name(sanitized_name, full_name, CONFIG_MAX_VALUE); - strncpyz(new_name, sanitized_name, CONFIG_MAX_VALUE); - - if(rrdset_index_find_name(host, new_name, 0)) { - debug(D_RRD_CALLS, "RRDSET: chart name '%s' on host '%s' already exists.", new_name, host->hostname); - if(!strcmp(st->id, full_name) && !st->name) { - unsigned i = 1; + debug(D_RRD_CALLS, "rrdset_reset_name() old: '%s', new: '%s'", rrdset_name(st), name); - do { - snprintfz(new_name, CONFIG_MAX_VALUE, "%s_%u", sanitized_name, i); - i++; - } while (rrdset_index_find_name(host, new_name, 0)); - - info("RRDSET: using name '%s' for chart '%s' on host '%s'.", new_name, full_name, host->hostname); - } else { - return 0; - } - } + STRING *name_string = rrdset_fix_name(host, rrdset_id(st), rrdset_parts_type(st), string2str(st->name), name); + if(!name_string) return 0; if(st->name) { rrdset_index_del_name(host, st); - st->name = strdupz(new_name); - st->hash_name = simple_hash(st->name); + string_freez(st->name); + st->name = name_string; rrdsetvar_rename_all(st); } - else { - st->name = strdupz(new_name); - st->hash_name = simple_hash(st->name); - } + else + st->name = name_string; - rrdset_wrlock(st); RRDDIM *rd; - rrddim_foreach_write(rd, st) + rrddim_foreach_read(rd, st) rrddimvar_rename_all(rd); - rrdset_unlock(st); + rrddim_foreach_done(rd); - if(unlikely(rrdset_index_add_name(host, st) != st)) - error("RRDSET: INTERNAL ERROR: attempted to index duplicate chart name '%s'", st->name); + rrdset_index_add_name(host, st); rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_SEND); rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_IGNORE); @@ -190,15 +520,65 @@ int rrdset_set_name(RRDSET *st, const char *name) { return 2; } +// get the timestamp of the last entry in the round-robin database +time_t rrdset_last_entry_t(RRDSET *st) { + RRDDIM *rd; + time_t last_entry_t = 0; + + rrddim_foreach_read(rd, st) { + time_t t = rrddim_last_entry_t(rd); + if(t > last_entry_t) last_entry_t = t; + } + rrddim_foreach_done(rd); + + return last_entry_t; +} + +// get the timestamp of first entry in the round-robin database +time_t rrdset_first_entry_t(RRDSET *st) { + RRDDIM *rd; + time_t first_entry_t = LONG_MAX; + + rrddim_foreach_read(rd, st) { + time_t t = rrddim_first_entry_t(rd); + if(t < first_entry_t) + first_entry_t = t; + } + rrddim_foreach_done(rd); + + if (unlikely(LONG_MAX == first_entry_t)) return 0; + return first_entry_t; +} + +time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier) { + if(unlikely(tier > storage_tiers)) + return 0; + + RRDDIM *rd; + time_t first_entry_t = LONG_MAX; + + rrddim_foreach_read(rd, st) { + time_t t = rrddim_first_entry_t_of_tier(rd, tier); + if(t && t < first_entry_t) + first_entry_t = t; + } + rrddim_foreach_done(rd); + + if (unlikely(LONG_MAX == first_entry_t)) return 0; + return first_entry_t; +} + inline void rrdset_is_obsolete(RRDSET *st) { if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED))) { - info("Cannot obsolete already archived chart %s", st->name); + info("Cannot obsolete already archived chart %s", rrdset_name(st)); return; } if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE); - st->rrdhost->obsolete_charts_count++; + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); + + st->last_accessed_time = now_realtime_sec(); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); @@ -212,7 +592,7 @@ inline void rrdset_is_obsolete(RRDSET *st) { inline void rrdset_isnot_obsolete(RRDSET *st) { if(unlikely((rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); - st->rrdhost->obsolete_charts_count--; + st->last_accessed_time = now_realtime_sec(); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); @@ -230,38 +610,52 @@ inline void rrdset_update_heterogeneous_flag(RRDSET *st) { rrdset_flag_clear(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); - RRD_ALGORITHM algorithm = st->dimensions->algorithm; - collected_number multiplier = ABS(st->dimensions->multiplier); - collected_number divisor = ABS(st->dimensions->divisor); + bool init = false, is_heterogeneous = false; + RRD_ALGORITHM algorithm; + collected_number multiplier; + collected_number divisor; rrddim_foreach_read(rd, st) { + if(!init) { + algorithm = rd->algorithm; + multiplier = rd->multiplier; + divisor = ABS(rd->divisor); + init = true; + continue; + } + if(algorithm != rd->algorithm || multiplier != ABS(rd->multiplier) || divisor != ABS(rd->divisor)) { if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { #ifdef NETDATA_INTERNAL_CHECKS info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", - rd->name, - st->name, - host->hostname, - rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm), - rd->multiplier, multiplier, - rd->divisor, divisor + rrddim_name(rd), + rrdset_name(st), + rrdhost_hostname(host), + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm), + rd->multiplier, multiplier, + rd->divisor, divisor ); #endif rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); } - return; + + is_heterogeneous = true; + break; } } + rrddim_foreach_done(rd); - rrdset_flag_clear(st, RRDSET_FLAG_HETEROGENEOUS); - rrdcontext_updated_rrdset_flags(st); + if(!is_heterogeneous) { + rrdset_flag_clear(st, RRDSET_FLAG_HETEROGENEOUS); + rrdcontext_updated_rrdset_flags(st); + } } // ---------------------------------------------------------------------------- // RRDSET - reset a chart void rrdset_reset(RRDSET *st) { - debug(D_RRD_CALLS, "rrdset_reset() %s", st->name); + debug(D_RRD_CALLS, "rrdset_reset() %s", rrdset_name(st)); st->last_collected_time.tv_sec = 0; st->last_collected_time.tv_usec = 0; @@ -270,7 +664,6 @@ void rrdset_reset(RRDSET *st) { st->current_entry = 0; st->counter = 0; st->counter_done = 0; - st->rrddim_page_alignment = 0; RRDDIM *rd; rrddim_foreach_read(rd, st) { @@ -279,12 +672,13 @@ void rrdset_reset(RRDSET *st) { rd->collections_counter = 0; if(!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { - for(int tier = 0; tier < storage_tiers ;tier++) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { if(rd->tiers[tier]) - rd->tiers[tier]->collect_ops.flush(rd->tiers[tier]->db_collection_handle); + rd->tiers[tier]->collect_ops->flush(rd->tiers[tier]->db_collection_handle); } } } + rrddim_foreach_done(rd); } // ---------------------------------------------------------------------------- @@ -336,101 +730,22 @@ static inline void last_updated_time_align(RRDSET *st) { void rrdset_free(RRDSET *st) { if(unlikely(!st)) return; - - RRDHOST *host = st->rrdhost; - - rrdhost_check_wrlock(host); // make sure we have a write lock on the host - rrdset_wrlock(st); // lock this RRDSET - // info("Removing chart '%s' ('%s')", st->id, st->name); - - // ------------------------------------------------------------------------ - // remove it from the indexes - - if(unlikely(rrdset_index_del(host, st) != st)) - error("RRDSET: INTERNAL ERROR: attempt to remove from index chart '%s', removed a different chart.", st->id); - - rrdset_index_del_name(host, st); - - // ------------------------------------------------------------------------ - // free its children structures - - freez(st->exporting_flags); - - while(st->variables) rrdsetvar_free(st->variables); -// while(st->alarms) rrdsetcalc_unlink(st->alarms); - /* We must free all connected alarms here in case this has been an ephemeral chart whose alarm was - * created by a template. This leads to an effective memory leak, which cannot be detected since the - * alarms will still be connected to the host, and freed during shutdown. */ - while(st->alarms) rrdcalc_unlink_and_free(st->rrdhost, st->alarms); - while(st->dimensions) rrddim_free(st, st->dimensions); - - rrdfamily_free(host, st->rrdfamily); - - debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", host->hostname, st->id); - rrdvar_free_remaining_variables(host, &st->rrdvar_root_index); - - // ------------------------------------------------------------------------ - // unlink it from the host - - if(st == host->rrdset_root) { - host->rrdset_root = st->next; - } - else { - // find the previous one - RRDSET *s; - for(s = host->rrdset_root; s && s->next != st ; s = s->next) ; - - // bypass it - if(s) s->next = st->next; - else error("Request to free RRDSET '%s': cannot find it under host '%s'", st->id, host->hostname); - } - - rrdset_unlock(st); - - // this has to be after the dimensions are freed - rrdcontext_removed_rrdset(st); - - // ------------------------------------------------------------------------ - // free it - - netdata_rwlock_destroy(&st->rrdset_rwlock); - - // free directly allocated members - freez((void *)st->name); - freez(st->type); - freez(st->family); - freez(st->title); - freez(st->units); - freez(st->context); - freez(st->cache_dir); - freez(st->plugin_name); - freez(st->module_name); - freez(st->state->old_title); - freez(st->state->old_units); - freez(st->state->old_context); - rrdlabels_destroy(st->state->chart_labels); - freez(st->state); - freez(st->chart_uuid); - - rrdset_memory_file_free(st); - freez(st); + rrdset_index_del(st->rrdhost, st); } void rrdset_save(RRDSET *st) { - rrdset_check_rdlock(st); - rrdset_memory_file_save(st); RRDDIM *rd; rrddim_foreach_read(rd, st) rrddim_memory_file_save(rd); + rrddim_foreach_done(rd); } void rrdset_delete_files(RRDSET *st) { RRDDIM *rd; - rrdset_check_rdlock(st); - info("Deleting chart '%s' ('%s') from disk...", st->id, st->name); + info("Deleting chart '%s' ('%s') from disk...", rrdset_id(st), rrdset_name(st)); if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { const char *cache_filename = rrdset_cache_filename(st); @@ -440,7 +755,7 @@ void rrdset_delete_files(RRDSET *st) { error("Cannot delete chart header file '%s'", cache_filename); } else - error("Cannot find the cache filename of chart '%s'", st->id); + error("Cannot find the cache filename of chart '%s'", rrdset_id(st)); } rrddim_foreach_read(rd, st) { @@ -451,6 +766,7 @@ void rrdset_delete_files(RRDSET *st) { if(unlikely(unlink(cache_filename) == -1)) error("Cannot delete dimension file '%s'", cache_filename); } + rrddim_foreach_done(rd); recursively_delete_dir(st->cache_dir, "left-over chart"); } @@ -458,9 +774,7 @@ void rrdset_delete_files(RRDSET *st) { void rrdset_delete_obsolete_dimensions(RRDSET *st) { RRDDIM *rd; - rrdset_check_rdlock(st); - - info("Deleting dimensions of chart '%s' ('%s') from disk...", st->id, st->name); + info("Deleting dimensions of chart '%s' ('%s') from disk...", rrdset_id(st), rrdset_name(st)); rrddim_foreach_read(rd, st) { if(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { @@ -471,30 +785,12 @@ void rrdset_delete_obsolete_dimensions(RRDSET *st) { error("Cannot delete dimension file '%s'", cache_filename); } } + rrddim_foreach_done(rd); } // ---------------------------------------------------------------------------- // RRDSET - create a chart -static inline RRDSET *rrdset_find_on_create(RRDHOST *host, const char *fullid) { - RRDSET *st = rrdset_find(host, fullid); - if(unlikely(st)) { - rrdset_isnot_obsolete(st); - debug(D_RRD_CALLS, "RRDSET '%s', already exists.", fullid); - return st; - } - - return NULL; -} - -static inline void rrdset_update_permanent_labels(RRDSET *st) { - if(!st->state || !st->state->chart_labels) return; - - rrdlabels_add(st->state->chart_labels, "_collect_plugin", st->plugin_name, RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); - rrdlabels_add(st->state->chart_labels, "_collect_module", st->module_name, RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); - rrdlabels_add(st->state->chart_labels, "_instance_family", st->family, RRDLABEL_SRC_AUTO| RRDLABEL_FLAG_PERMANENT); -} - RRDSET *rrdset_create_custom( RRDHOST *host , const char *type @@ -512,7 +808,10 @@ RRDSET *rrdset_create_custom( , RRD_MEMORY_MODE memory_mode , long history_entries ) { - if(!type || !type[0]) { + if (host != localhost) + host->senders_last_chart_command = now_realtime_sec(); + + if(!type || !type[0]) fatal("Cannot create rrd stats without a type: id '%s', name '%s', family '%s', context '%s', title '%s', units '%s', plugin '%s', module '%s'." , (id && *id)?id:"<unset>" , (name && *name)?name:"<unset>" @@ -523,10 +822,8 @@ RRDSET *rrdset_create_custom( , (plugin && *plugin)?plugin:"<unset>" , (module && *module)?module:"<unset>" ); - return NULL; - } - if(!id || !id[0]) { + if(!id || !id[0]) fatal("Cannot create rrd stats without an id: type '%s', name '%s', family '%s', context '%s', title '%s', units '%s', plugin '%s', module '%s'." , type , (name && *name)?name:"<unset>" @@ -537,311 +834,55 @@ RRDSET *rrdset_create_custom( , (plugin && *plugin)?plugin:"<unset>" , (module && *module)?module:"<unset>" ); - return NULL; - } - - if (host != localhost) { - host->senders_last_chart_command = now_realtime_sec(); - } // ------------------------------------------------------------------------ // check if it already exists - char fullid[RRD_ID_LENGTH_MAX + 1]; - snprintfz(fullid, RRD_ID_LENGTH_MAX, "%s.%s", type, id); - - int changed_from_archived_to_active = 0; - RRDSET *st = rrdset_find_on_create(host, fullid); - if (st) { - int mark_rebuild = 0; - if (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED)) { - rrdset_flag_clear(st, RRDSET_FLAG_ARCHIVED); - changed_from_archived_to_active = 1; - mark_rebuild |= META_CHART_ACTIVATED; - } - char *old_plugin = NULL, *old_module = NULL, *old_title = NULL, *old_context = NULL, - *old_title_v = NULL, *old_context_v = NULL, *old_units_v = NULL, *old_units = NULL; - int rc; - - if(unlikely(name)) - rc = rrdset_set_name(st, name); - else - rc = rrdset_set_name(st, id); - - if (rc == 2) - mark_rebuild |= META_CHART_UPDATED; - - if (unlikely(st->priority != priority)) { - st->priority = priority; - mark_rebuild |= META_CHART_UPDATED; - } - if (unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && st->update_every != update_every)) { - st->update_every = update_every; - mark_rebuild |= META_CHART_UPDATED; - } - - if (plugin && st->plugin_name) { - if (unlikely(strcmp(plugin, st->plugin_name))) { - old_plugin = st->plugin_name; - st->plugin_name = strdupz(plugin); - mark_rebuild |= META_PLUGIN_UPDATED; - } - } else { - if (plugin != st->plugin_name) { // one is NULL? - old_plugin = st->plugin_name; - st->plugin_name = plugin ? strdupz(plugin) : NULL; - mark_rebuild |= META_PLUGIN_UPDATED; - } - } - - if (module && st->module_name) { - if (unlikely(strcmp(module, st->module_name))) { - old_module = st->module_name; - st->module_name = strdupz(module); - mark_rebuild |= META_MODULE_UPDATED; - } - } else { - if (module != st->module_name) { - if (st->module_name && *st->module_name) { - old_module = st->module_name; - st->module_name = module ? strdupz(module) : NULL; - mark_rebuild |= META_MODULE_UPDATED; - } - } - } - - if (unlikely(title && st->state->old_title && strcmp(st->state->old_title, title))) { - char *new_title = strdupz(title); - old_title_v = st->state->old_title; - st->state->old_title = strdupz(title); - json_fix_string(new_title); - old_title = st->title; - st->title = new_title; - mark_rebuild |= META_CHART_UPDATED; - } - - if (unlikely(units && st->state->old_units && strcmp(st->state->old_units, units))) { - char *new_units = strdupz(units); - old_units_v = st->state->old_units; - st->state->old_units = strdupz(units); - json_fix_string(new_units); - old_units= st->units; - st->units = new_units; - mark_rebuild |= META_CHART_UPDATED; - } - - - if (st->chart_type != chart_type) { - st->chart_type = chart_type; - mark_rebuild |= META_CHART_UPDATED; - } - - if (unlikely(context && st->state->old_context && strcmp(st->state->old_context, context))) { - char *new_context = strdupz(context); - old_context_v = st->state->old_context; - st->state->old_context = strdupz(context); - json_fix_string(new_context); - old_context = st->context; - st->context = new_context; - st->hash_context = simple_hash(st->context); - mark_rebuild |= META_CHART_UPDATED; - } - - if (mark_rebuild) { - rrdset_flag_clear(st, RRDSET_FLAG_ACLK); - freez(old_plugin); - freez(old_module); - freez(old_title); - freez(old_units); - freez(old_context); - freez(old_title_v); - freez(old_units_v); - freez(old_context_v); - if (mark_rebuild != META_CHART_ACTIVATED) { - info("Collector updated metadata for chart %s", st->id); - sched_yield(); - } - } - if (mark_rebuild & (META_CHART_UPDATED | META_PLUGIN_UPDATED | META_MODULE_UPDATED)) { - debug(D_METADATALOG, "CHART [%s] metadata updated", st->id); - int rc = update_chart_metadata(st->chart_uuid, st, id, name); - if (unlikely(rc)) - error_report("Failed to update chart metadata in the database"); - - if (!changed_from_archived_to_active) { - rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - } - } - /* Fall-through during switch from archived to active so that the host lock is taken and health is linked */ - if (!changed_from_archived_to_active) { - rrdset_update_permanent_labels(st); - rrdcontext_updated_rrdset(st); - return st; - } - } - - rrdhost_wrlock(host); - - st = rrdset_find_on_create(host, fullid); - if(st) { - if (changed_from_archived_to_active) { - rrdset_flag_clear(st, RRDSET_FLAG_ARCHIVED); - rrdsetvar_create(st, "last_collected_t", RRDVAR_TYPE_TIME_T, &st->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "collected_total_raw", RRDVAR_TYPE_TOTAL, &st->last_collected_total, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "green", RRDVAR_TYPE_CALCULATED, &st->green, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "red", RRDVAR_TYPE_CALCULATED, &st->red, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "update_every", RRDVAR_TYPE_INT, &st->update_every, RRDVAR_OPTION_DEFAULT); - rrdsetcalc_link_matching(st); - rrdcalctemplate_link_matching(st); - } - rrdhost_unlock(host); - rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - rrdcontext_updated_rrdset(st); - return st; - } + char full_id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(full_id, RRD_ID_LENGTH_MAX, "%s.%s", type, id); // ------------------------------------------------------------------------ - // get the options from the config, we need to create it - - long entries = 5; - if (memory_mode != RRD_MEMORY_MODE_DBENGINE) - entries = align_entries_to_pagesize(memory_mode, history_entries); - - char *cache_dir = rrdset_cache_dir(host, fullid); - - // ------------------------------------------------------------------------ - // load it or allocate it + // allocate it debug(D_RRD_CALLS, "Creating RRD_STATS for '%s.%s'.", type, id); - st = callocz(1, sizeof(RRDSET)); - st->state = callocz(1, sizeof(*st->state)); - - strcpy(st->id, fullid); - st->hash = simple_hash(st->id); - - st->rrdhost = host; - st->cache_dir = cache_dir; - st->entries = entries; - st->update_every = update_every; - - if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP) { - if(!rrdset_memory_load_or_create_map_save(st, memory_mode)) { - info("Failed to use memory mode %s for chart '%s', falling back to ram", (memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", st->name); - memory_mode = RRD_MEMORY_MODE_RAM; - } - } - st->rrd_memory_mode = memory_mode; - - st->plugin_name = plugin?strdupz(plugin):NULL; - st->module_name = module?strdupz(module):NULL; - st->chart_type = chart_type; - st->type = strdupz(type); - st->family = family ? strdupz(family) : strdupz(st->type); - json_fix_string(st->family); - - st->state->is_ar_chart = strcmp(st->id, ML_ANOMALY_RATES_CHART_ID) == 0; - - st->units = units ? strdupz(units) : strdupz(""); - st->state->old_units = strdupz(st->units); - json_fix_string(st->units); - - st->context = context ? strdupz(context) : strdupz(st->id); - st->state->old_context = strdupz(st->context); - json_fix_string(st->context); - st->hash_context = simple_hash(st->context); - - st->priority = priority; - - rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); - - st->green = NAN; - st->red = NAN; - - st->gap_when_lost_iterations_above = (int) (gap_when_lost_iterations_above + 2); - - avl_init_lock(&st->dimensions_index, rrddim_compare); - avl_init_lock(&st->rrdvar_root_index, rrdvar_compare); - - netdata_rwlock_init(&st->rrdset_rwlock); - st->state->chart_labels = rrdlabels_create(); - rrdset_update_permanent_labels(st); - - if(name && *name && rrdset_set_name(st, name)) - // we did set the name - ; - else - // could not use the name, use the id - rrdset_set_name(st, id); - - st->title = strdupz(title); - st->state->old_title = strdupz(st->title); - json_fix_string(st->title); - - st->rrdfamily = rrdfamily_create(host, st->family); - - st->next = host->rrdset_root; - host->rrdset_root = st; - - if(host->health_enabled) { - rrdsetvar_create(st, "last_collected_t", RRDVAR_TYPE_TIME_T, &st->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "collected_total_raw", RRDVAR_TYPE_TOTAL, &st->last_collected_total, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "green", RRDVAR_TYPE_CALCULATED, &st->green, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "red", RRDVAR_TYPE_CALCULATED, &st->red, RRDVAR_OPTION_DEFAULT); - rrdsetvar_create(st, "update_every", RRDVAR_TYPE_INT, &st->update_every, RRDVAR_OPTION_DEFAULT); - } - - if(unlikely(rrdset_index_add(host, st) != st)) - error("RRDSET: INTERNAL ERROR: attempt to index duplicate chart '%s'", st->id); - - rrdsetcalc_link_matching(st); - rrdcalctemplate_link_matching(st); - - st->chart_uuid = find_chart_uuid(host, type, id, name); - if (unlikely(!st->chart_uuid)) - st->chart_uuid = create_chart_uuid(st, id, name); - else - update_chart_metadata(st->chart_uuid, st, id, name); - - store_active_chart(st->chart_uuid); - compute_chart_hash(st); - - rrdhost_unlock(host); - rrdcontext_updated_rrdset(st); + struct rrdset_constructor tmp = { + .host = host, + .type = type, + .id = id, + .name = name, + .family = family, + .context = context, + .title = title, + .units = units, + .plugin = plugin, + .module = module, + .priority = priority, + .update_every = update_every, + .chart_type = chart_type, + .memory_mode = memory_mode, + .history_entries = history_entries, + }; + + RRDSET *st = rrdset_index_add(host, full_id, &tmp); return(st); } - // ---------------------------------------------------------------------------- // RRDSET - data collection iteration control -inline void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds) { - if(unlikely(!st->last_collected_time.tv_sec || !microseconds || (rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK)))) { - // call the full next_usec() function - rrdset_next_usec(st, microseconds); - return; - } - - st->usec_since_last_update = microseconds; -} - -inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { - struct timeval now; - now_realtime_timeval(&now); - +void rrdset_timed_next(RRDSET *st, struct timeval now, usec_t duration_since_last_update) { #ifdef NETDATA_INTERNAL_CHECKS char *discard_reason = NULL; - usec_t discarded = microseconds; + usec_t discarded = duration_since_last_update; #endif if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK))) { // the chart needs to be re-synced to current time rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK); - // discard the microseconds supplied - microseconds = 0; + // discard the duration supplied + duration_since_last_update = 0; #ifdef NETDATA_INTERNAL_CHECKS if(!discard_reason) discard_reason = "SYNC CLOCK FLAG"; @@ -850,14 +891,14 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { if(unlikely(!st->last_collected_time.tv_sec)) { // the first entry - microseconds = st->update_every * USEC_PER_SEC; + duration_since_last_update = st->update_every * USEC_PER_SEC; #ifdef NETDATA_INTERNAL_CHECKS if(!discard_reason) discard_reason = "FIRST DATA COLLECTION"; #endif } - else if(unlikely(!microseconds)) { + else if(unlikely(!duration_since_last_update)) { // no dt given by the plugin - microseconds = dt_usec(&now, &st->last_collected_time); + duration_since_last_update = dt_usec(&now, &st->last_collected_time); #ifdef NETDATA_INTERNAL_CHECKS if(!discard_reason) discard_reason = "NO USEC GIVEN BY COLLECTOR"; #endif @@ -870,7 +911,13 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { // oops! the database is in the future #ifdef NETDATA_INTERNAL_CHECKS info("RRD database for chart '%s' on host '%s' is %0.5" NETDATA_DOUBLE_MODIFIER - " secs in the future (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (NETDATA_DOUBLE)-since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); + " secs in the future (counter #%zu, update #%zu). Adjusting it to current time." + , rrdset_id(st) + , rrdhost_hostname(st->rrdhost) + , (NETDATA_DOUBLE)-since_last_usec / USEC_PER_SEC + , st->counter + , st->counter_done + ); #endif st->last_collected_time.tv_sec = now.tv_sec - st->update_every; @@ -881,7 +928,7 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { st->last_updated.tv_usec = now.tv_usec; last_updated_time_align(st); - microseconds = st->update_every * USEC_PER_SEC; + duration_since_last_update = st->update_every * USEC_PER_SEC; #ifdef NETDATA_INTERNAL_CHECKS if(!discard_reason) discard_reason = "COLLECTION TIME IN FUTURE"; #endif @@ -890,24 +937,24 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { // oops! the database is too far behind #ifdef NETDATA_INTERNAL_CHECKS info("RRD database for chart '%s' on host '%s' is %0.5" NETDATA_DOUBLE_MODIFIER - " secs in the past (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (NETDATA_DOUBLE)since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); + " secs in the past (counter #%zu, update #%zu). Adjusting it to current time.", rrdset_id(st), rrdhost_hostname(st->rrdhost), (NETDATA_DOUBLE)since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); #endif - microseconds = (usec_t)since_last_usec; + duration_since_last_update = (usec_t)since_last_usec; #ifdef NETDATA_INTERNAL_CHECKS if(!discard_reason) discard_reason = "COLLECTION TIME TOO FAR IN THE PAST"; #endif } #ifdef NETDATA_INTERNAL_CHECKS - if(since_last_usec > 0 && (susec_t)microseconds < since_last_usec) { + if(since_last_usec > 0 && (susec_t) duration_since_last_update < since_last_usec) { static __thread susec_t min_delta = USEC_PER_SEC * 3600, permanent_min_delta = 0; static __thread time_t last_t = 0; // the first time initialize it so that it will make the check later if(last_t == 0) last_t = now.tv_sec + 60; - susec_t delta = since_last_usec - (susec_t)microseconds; + susec_t delta = since_last_usec - (susec_t) duration_since_last_update; if(delta < min_delta) min_delta = delta; if(now.tv_sec >= last_t + 60) { @@ -924,31 +971,49 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { #endif } - #ifdef NETDATA_INTERNAL_CHECKS - debug(D_RRD_CALLS, "rrdset_next_usec() for chart %s with microseconds %llu", st->name, microseconds); - rrdset_debug(st, "NEXT: %llu microseconds", microseconds); + debug(D_RRD_CALLS, "rrdset_timed_next() for chart %s with duration since last update %llu usec", rrdset_name(st), duration_since_last_update); + rrdset_debug(st, "NEXT: %llu microseconds", duration_since_last_update); - if(discarded && discarded != microseconds) - info("host '%s', chart '%s': discarded data collection time of %llu usec, replaced with %llu usec, reason: '%s'", st->rrdhost->hostname, st->id, discarded, microseconds, discard_reason?discard_reason:"UNDEFINED"); + internal_error(discarded && discarded != duration_since_last_update, + "host '%s', chart '%s': discarded data collection time of %llu usec, " + "replaced with %llu usec, reason: '%s'" + , rrdhost_hostname(st->rrdhost) + , rrdset_id(st) + , discarded + , duration_since_last_update + , discard_reason?discard_reason:"UNDEFINED" + ); - #endif + st->usec_since_last_update = duration_since_last_update; +} + +inline void rrdset_next_usec_unfiltered(RRDSET *st, usec_t duration_since_last_update) { + if(unlikely(!st->last_collected_time.tv_sec || !duration_since_last_update || (rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK)))) { + // call the full next_usec() function + rrdset_next_usec(st, duration_since_last_update); + return; + } - st->usec_since_last_update = microseconds; + st->usec_since_last_update = duration_since_last_update; } +inline void rrdset_next_usec(RRDSET *st, usec_t duration_since_last_update) { + struct timeval now; + + now_realtime_timeval(&now); + rrdset_timed_next(st, now, duration_since_last_update); +} // ---------------------------------------------------------------------------- // RRDSET - process the collected values for all dimensions of a chart -static inline usec_t rrdset_init_last_collected_time(RRDSET *st) { - now_realtime_timeval(&st->last_collected_time); +static inline usec_t rrdset_init_last_collected_time(RRDSET *st, struct timeval now) { + st->last_collected_time = now; last_collected_time_align(st); usec_t last_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "initialized last collected time to %0.3" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)last_collect_ut / USEC_PER_SEC); - #endif return last_collect_ut; } @@ -959,9 +1024,7 @@ static inline usec_t rrdset_update_last_collected_time(RRDSET *st) { st->last_collected_time.tv_sec = (time_t) (ut / USEC_PER_SEC); st->last_collected_time.tv_usec = (suseconds_t) (ut % USEC_PER_SEC); - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "updated last collected time to %0.3" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)last_collect_ut / USEC_PER_SEC); - #endif return last_collect_ut; } @@ -978,22 +1041,51 @@ static inline usec_t rrdset_init_last_updated_time(RRDSET *st) { usec_t last_updated_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "initialized last updated time to %0.3" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)last_updated_ut / USEC_PER_SEC); - #endif return last_updated_ut; } +static __thread size_t rrdset_done_statistics_points_stored_per_tier[RRD_STORAGE_TIERS]; + static inline time_t tier_next_point_time(RRDDIM *rd, struct rrddim_tier *t, time_t now) { time_t loop = (time_t)rd->update_every * (time_t)t->tier_grouping; return now + loop - ((now + loop) % loop); } -void store_metric_at_tier(RRDDIM *rd, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut) { +void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut __maybe_unused) { if (unlikely(!t->next_point_time)) t->next_point_time = tier_next_point_time(rd, t, sp.end_time); + if(unlikely(sp.start_time > t->next_point_time)) { + if (likely(!storage_point_is_unset(t->virtual_point))) { + + t->collect_ops->store_metric( + t->db_collection_handle, + t->next_point_time * USEC_PER_SEC, + t->virtual_point.sum, + t->virtual_point.min, + t->virtual_point.max, + t->virtual_point.count, + t->virtual_point.anomaly_count, + t->virtual_point.flags); + } + else { + t->collect_ops->store_metric( + t->db_collection_handle, + t->next_point_time * USEC_PER_SEC, + NAN, + NAN, + NAN, + 0, + 0, SN_FLAG_NONE); + } + + rrdset_done_statistics_points_stored_per_tier[tier]++; + t->virtual_point.count = 0; // make the point unset + t->next_point_time = tier_next_point_time(rd, t, sp.end_time); + } + // merge the dates into our virtual point if (unlikely(sp.start_time < t->virtual_point.start_time)) t->virtual_point.start_time = sp.start_time; @@ -1019,72 +1111,105 @@ void store_metric_at_tier(RRDDIM *rd, struct rrddim_tier *t, STORAGE_POINT sp, u t->virtual_point = sp; } } - - if(unlikely(sp.end_time >= t->next_point_time)) { - if (likely(!storage_point_is_unset(t->virtual_point))) { - - t->collect_ops.store_metric( - t->db_collection_handle, - now_ut, - t->virtual_point.sum, - t->virtual_point.min, - t->virtual_point.max, - t->virtual_point.count, - t->virtual_point.anomaly_count, - t->virtual_point.flags); - } - else { - t->collect_ops.store_metric( - t->db_collection_handle, - now_ut, - NAN, - NAN, - NAN, - 0, - 0, SN_FLAG_NONE); +} +#ifdef NETDATA_LOG_COLLECTION_ERRORS +void rrddim_store_metric_with_trace(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags, const char *function) { +#else // !NETDATA_LOG_COLLECTION_ERRORS +void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) { +#endif // !NETDATA_LOG_COLLECTION_ERRORS +#ifdef NETDATA_LOG_COLLECTION_ERRORS + rd->rrddim_store_metric_count++; + + if(likely(rd->rrddim_store_metric_count > 1)) { + usec_t expected = rd->rrddim_store_metric_last_ut + rd->update_every * USEC_PER_SEC; + + if(point_end_time_ut != rd->rrddim_store_metric_last_ut) { + internal_error(true, + "%s COLLECTION: 'host:%s/chart:%s/dim:%s' granularity %d, collection %zu, expected to store at tier 0 a value at %llu, but it gave %llu [%s%llu usec] (called from %s(), previously by %s())", + (point_end_time_ut < rd->rrddim_store_metric_last_ut) ? "**PAST**" : "GAP", + rrdhost_hostname(rd->rrdset->rrdhost), rrdset_id(rd->rrdset), rrddim_id(rd), + rd->update_every, + rd->rrddim_store_metric_count, + expected, point_end_time_ut, + (point_end_time_ut < rd->rrddim_store_metric_last_ut)?"by -" : "gap ", + expected - point_end_time_ut, + function, + rd->rrddim_store_metric_last_caller?rd->rrddim_store_metric_last_caller:"none"); } - - t->virtual_point.count = 0; - t->next_point_time = tier_next_point_time(rd, t, sp.end_time); } -} -static void store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) { + rd->rrddim_store_metric_last_ut = point_end_time_ut; + rd->rrddim_store_metric_last_caller = function; +#endif // NETDATA_LOG_COLLECTION_ERRORS // store the metric on tier 0 - rd->tiers[0]->collect_ops.store_metric(rd->tiers[0]->db_collection_handle, point_end_time_ut, n, 0, 0, 1, 0, flags); - - for(int tier = 1; tier < storage_tiers ;tier++) { + rd->tiers[0]->collect_ops->store_metric(rd->tiers[0]->db_collection_handle, point_end_time_ut, n, 0, 0, 1, 0, flags); + rrdset_done_statistics_points_stored_per_tier[0]++; + + time_t now = (time_t)(point_end_time_ut / USEC_PER_SEC); + + STORAGE_POINT sp = { + .start_time = now - rd->update_every, + .end_time = now, + .min = n, + .max = n, + .sum = n, + .count = 1, + .anomaly_count = (flags & SN_FLAG_NOT_ANOMALOUS) ? 0 : 1, + .flags = flags + }; + + for(size_t tier = 1; tier < storage_tiers ;tier++) { if(unlikely(!rd->tiers[tier])) continue; struct rrddim_tier *t = rd->tiers[tier]; - time_t now = (time_t)(point_end_time_ut / USEC_PER_SEC); - - if(!t->last_collected_ut) { + if(!rrddim_option_check(rd, RRDDIM_OPTION_BACKFILLED_HIGH_TIERS)) { // we have not collected this tier before // let's fill any gap that may exist rrdr_fill_tier_gap_from_smaller_tiers(rd, tier, now); + rrddim_option_set(rd, RRDDIM_OPTION_BACKFILLED_HIGH_TIERS); } - STORAGE_POINT sp = { - .start_time = now - rd->update_every, - .end_time = now, - .min = n, - .max = n, - .sum = n, - .count = 1, - .anomaly_count = (flags & SN_FLAG_NOT_ANOMALOUS) ? 0 : 1, - .flags = flags - }; + store_metric_at_tier(rd, tier, t, sp, point_end_time_ut); + } +} + +void store_metric_collection_completed() { + global_statistics_rrdset_done_chart_collection_completed(rrdset_done_statistics_points_stored_per_tier); +} + +// caching of dimensions rrdset_done() and rrdset_done_interpolate() loop through +struct rda_item { + const DICTIONARY_ITEM *item; + RRDDIM *rd; +}; + +static __thread struct rda_item *thread_rda = NULL; +static __thread size_t thread_rda_entries = 0; - t->last_collected_ut = point_end_time_ut; - store_metric_at_tier(rd, t, sp, point_end_time_ut); +struct rda_item *rrdset_thread_rda(size_t *dimensions) { + + if(unlikely(!thread_rda || (*dimensions) > thread_rda_entries)) { + freez(thread_rda); + thread_rda = mallocz((*dimensions) * sizeof(struct rda_item)); + thread_rda_entries = *dimensions; } + + *dimensions = thread_rda_entries; + return thread_rda; +} + +void rrdset_thread_rda_free(void) { + freez(thread_rda); + thread_rda = NULL; + thread_rda_entries = 0; } static inline size_t rrdset_done_interpolate( RRDSET *st + , struct rda_item *rda_base + , size_t rda_slots , usec_t update_every_ut , usec_t last_stored_ut , usec_t next_store_ut @@ -1113,17 +1238,26 @@ static inline size_t rrdset_done_interpolate( for( ; next_store_ut <= now_collect_ut ; last_collect_ut = next_store_ut, next_store_ut += update_every_ut, iterations-- ) { - #ifdef NETDATA_INTERNAL_CHECKS - if(iterations < 0) { error("INTERNAL CHECK: %s: iterations calculation wrapped! first_ut = %llu, last_stored_ut = %llu, next_store_ut = %llu, now_collect_ut = %llu", st->name, first_ut, last_stored_ut, next_store_ut, now_collect_ut); } + internal_error(iterations < 0, + "RRDSET: '%s': iterations calculation wrapped! " + "first_ut = %llu, last_stored_ut = %llu, next_store_ut = %llu, now_collect_ut = %llu" + , rrdset_id(st) + , first_ut + , last_stored_ut + , next_store_ut + , now_collect_ut + ); + rrdset_debug(st, "last_stored_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last updated time)", (NETDATA_DOUBLE)last_stored_ut/USEC_PER_SEC); rrdset_debug(st, "next_store_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (next interpolation point)", (NETDATA_DOUBLE)next_store_ut/USEC_PER_SEC); - #endif last_ut = next_store_ut; - rrddim_foreach_read(rd, st) { - if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) - continue; + struct rda_item *rda; + size_t dim_id; + for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; NETDATA_DOUBLE new_value; @@ -1135,18 +1269,16 @@ static inline size_t rrdset_done_interpolate( / (NETDATA_DOUBLE)(now_collect_ut - last_collect_ut) ); - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC2 INC " NETDATA_DOUBLE_FORMAT " = " NETDATA_DOUBLE_FORMAT " * (%llu - %llu)" " / (%llu - %llu)" - , rd->name + , rrddim_name(rd) , new_value , rd->calculated_value , next_store_ut, last_collect_ut , now_collect_ut, last_collect_ut ); - #endif rd->calculated_value -= new_value; new_value += rd->last_calculated_value; @@ -1155,12 +1287,10 @@ static inline size_t rrdset_done_interpolate( if(unlikely(next_store_ut - last_stored_ut < update_every_ut)) { - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: COLLECTION POINT IS SHORT " NETDATA_DOUBLE_FORMAT " - EXTRAPOLATING", - rd->name + rrddim_name(rd) , (NETDATA_DOUBLE)(next_store_ut - last_stored_ut) ); - #endif new_value = new_value * (NETDATA_DOUBLE)(st->update_every * USEC_PER_SEC) / (NETDATA_DOUBLE)(next_store_ut - last_stored_ut); } @@ -1189,24 +1319,23 @@ static inline size_t rrdset_done_interpolate( + rd->last_calculated_value ); - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC2 DEF " NETDATA_DOUBLE_FORMAT " = (((" "(" NETDATA_DOUBLE_FORMAT " - " NETDATA_DOUBLE_FORMAT ")" " * %llu" - " / %llu) + " NETDATA_DOUBLE_FORMAT, rd->name + " / %llu) + " NETDATA_DOUBLE_FORMAT, rrddim_name(rd) , new_value , rd->calculated_value, rd->last_calculated_value , (next_store_ut - first_ut) , (now_collect_ut - first_ut), rd->last_calculated_value ); - #endif } break; } if(unlikely(!store_this_entry)) { (void) ml_is_anomalous(rd, 0, false); - store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); + rrddim_store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); + rrdcontext_collected_rrddim(rd); continue; } @@ -1218,17 +1347,17 @@ static inline size_t rrdset_done_interpolate( dim_storage_flags &= ~((storage_number)SN_FLAG_NOT_ANOMALOUS); } - store_metric(rd, next_store_ut, new_value, dim_storage_flags); + rrddim_store_metric(rd, next_store_ut, new_value, dim_storage_flags); + rrdcontext_collected_rrddim(rd); rd->last_stored_value = new_value; } else { (void) ml_is_anomalous(rd, 0, false); - #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rd->name, current_entry); - #endif + rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rrddim_name(rd), current_entry); - store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); + rrddim_store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); + rrdcontext_collected_rrddim(rd); rd->last_stored_value = NAN; } @@ -1274,11 +1403,10 @@ static inline void rrdset_done_fill_the_gap(RRDSET *st) { rd->db[current_entry] = pack_storage_number(NAN, SN_FLAG_NONE); current_entry = ((current_entry + 1) >= entries) ? 0 : current_entry + 1; - #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING (FILLED THE GAP)", rd->name, current_entry); - #endif + rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING (FILLED THE GAP)", rrddim_name(rd), current_entry); } } + rrddim_foreach_done(rd); if(c > 0) { c--; @@ -1292,10 +1420,19 @@ static inline void rrdset_done_fill_the_gap(RRDSET *st) { } void rrdset_done(RRDSET *st) { + struct timeval now; + + now_realtime_timeval(&now); + rrdset_timed_done(st, now, /* pending_rrdset_next = */ st->counter_done != 0); +} + +void rrdset_timed_done(RRDSET *st, struct timeval now, bool pending_rrdset_next) { if(unlikely(netdata_exit)) return; - debug(D_RRD_CALLS, "rrdset_done() for chart %s", st->name); - rrdcontext_collected_rrdset(st); + if (pending_rrdset_next) + rrdset_next(st); + + debug(D_RRD_CALLS, "rrdset_done() for chart '%s'", rrdset_name(st)); RRDDIM *rd; @@ -1312,44 +1449,29 @@ void rrdset_done(RRDSET *st) { netdata_thread_disable_cancelability(); - // a read lock is OK here - rrdset_rdlock(st); - -#ifdef ENABLE_ACLK - if (likely(!st->state->is_ar_chart)) { - if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ACLK))) { - if (likely(st->dimensions && st->counter_done && !queue_chart_to_aclk(st))) { - rrdset_flag_set(st, RRDSET_FLAG_ACLK); - } - } - } -#endif - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) { - error("Chart '%s' has the OBSOLETE flag set, but it is collected.", st->id); + error("Chart '%s' has the OBSOLETE flag set, but it is collected.", rrdset_id(st)); rrdset_isnot_obsolete(st); } // check if the chart has a long time to be updated if(unlikely(st->usec_since_last_update > st->entries * update_every_ut && st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && st->rrd_memory_mode != RRD_MEMORY_MODE_NONE)) { - info("host '%s', chart %s: took too long to be updated (counter #%zu, update #%zu, %0.3" NETDATA_DOUBLE_MODIFIER - " secs). Resetting it.", st->rrdhost->hostname, st->name, st->counter, st->counter_done, (NETDATA_DOUBLE)st->usec_since_last_update / USEC_PER_SEC); + info("host '%s', chart '%s': took too long to be updated (counter #%zu, update #%zu, %0.3" NETDATA_DOUBLE_MODIFIER + " secs). Resetting it.", rrdhost_hostname(st->rrdhost), rrdset_id(st), st->counter, st->counter_done, (NETDATA_DOUBLE)st->usec_since_last_update / USEC_PER_SEC); rrdset_reset(st); st->usec_since_last_update = update_every_ut; store_this_entry = 0; first_entry = 1; } - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "microseconds since last update: %llu", st->usec_since_last_update); - #endif // set last_collected_time if(unlikely(!st->last_collected_time.tv_sec)) { // it is the first entry // set the last_collected_time to now - last_collect_ut = rrdset_init_last_collected_time(st) - update_every_ut; + last_collect_ut = rrdset_init_last_collected_time(st, now) - update_every_ut; // the first entry should not be stored store_this_entry = 0; @@ -1380,9 +1502,9 @@ void rrdset_done(RRDSET *st) { if(unlikely(dt_usec(&st->last_collected_time, &st->last_updated) > st->entries * update_every_ut && st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE)) { info( - "%s: too old data (last updated at %"PRId64".%"PRId64", last collected at %"PRId64".%"PRId64"). " + "'%s': too old data (last updated at %"PRId64".%"PRId64", last collected at %"PRId64".%"PRId64"). " "Resetting it. Will not store the next entry.", - st->name, + rrdset_id(st), (int64_t)st->last_updated.tv_sec, (int64_t)st->last_updated.tv_usec, (int64_t)st->last_collected_time.tv_sec, @@ -1402,9 +1524,9 @@ void rrdset_done(RRDSET *st) { if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && dt_usec(&st->last_collected_time, &st->last_updated) > (RRDENG_BLOCK_SIZE / sizeof(storage_number)) * update_every_ut)) { info( - "%s: too old data (last updated at %" PRId64 ".%" PRId64 ", last collected at %" PRId64 ".%" PRId64 "). " + "'%s': too old data (last updated at %" PRId64 ".%" PRId64 ", last collected at %" PRId64 ".%" PRId64 "). " "Resetting it. Will not store the next entry.", - st->name, + rrdset_id(st), (int64_t)st->last_updated.tv_sec, (int64_t)st->last_updated.tv_usec, (int64_t)st->last_collected_time.tv_sec, @@ -1449,79 +1571,108 @@ void rrdset_done(RRDSET *st) { store_this_entry = 1; last_collect_ut = next_store_ut - update_every_ut; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "Fixed first entry."); - #endif } else { store_this_entry = 0; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "Will not store the next entry."); - #endif } } after_first_database_work: st->counter_done++; - if(unlikely(st->rrdhost->rrdpush_send_enabled)) + if(unlikely(rrdhost_has_rrdpush_sender_enabled(st->rrdhost))) rrdset_done_push(st); - if (unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_NONE)) { - goto after_second_database_work; - } - #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "last_collect_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last collection time)", (NETDATA_DOUBLE)last_collect_ut/USEC_PER_SEC); - rrdset_debug(st, "now_collect_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (current collection time)", (NETDATA_DOUBLE)now_collect_ut/USEC_PER_SEC); - rrdset_debug(st, "last_stored_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last updated time)", (NETDATA_DOUBLE)last_stored_ut/USEC_PER_SEC); - rrdset_debug(st, "next_store_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (next interpolation point)", (NETDATA_DOUBLE)next_store_ut/USEC_PER_SEC); - #endif + uint32_t has_reset_value = 0; - // calculate totals and count the dimensions - int dimensions = 0; - st->collected_total = 0; + size_t rda_slots = dictionary_entries(st->rrddim_root_index); + struct rda_item *rda_base = rrdset_thread_rda(&rda_slots); + + size_t dim_id; + size_t dimensions = 0; + struct rda_item *rda = rda_base; + total_number collected_total = 0; + total_number last_collected_total = 0; rrddim_foreach_read(rd, st) { - if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) + if(rd_dfe.counter >= rda_slots) + break; + + rda = &rda_base[dimensions++]; + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + rda->item = NULL; + rda->rd = NULL; continue; + } + + // store the dimension in the array + rda->item = dictionary_acquired_item_dup(st->rrddim_root_index, rd_dfe.item); + rda->rd = dictionary_acquired_item_value(rda->item); + + // calculate totals + if(likely(rd->updated)) { + // if the new is smaller than the old (an overflow, or reset), set the old equal to the new + // to reset the calculation (it will give zero as the calculation for this second) + if(unlikely(rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL && rd->last_collected_value > rd->collected_value)) { + debug(D_RRD_STATS, "'%s' / '%s': RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT + , rrdset_id(st) + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + ); - dimensions++; + if(!(rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS))) + has_reset_value = 1; - if(likely(rd->updated)) - st->collected_total += rd->collected_value; + rd->last_collected_value = rd->collected_value; + } + + last_collected_total += rd->last_collected_value; + collected_total += rd->collected_value; + + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE))) { + error("Dimension %s in chart '%s' has the OBSOLETE flag set, but it is collected.", rrddim_name(rd), rrdset_id(st)); + rrddim_isnot_obsolete(st, rd); + } + } } + rrddim_foreach_done(rd); + rda_slots = dimensions; - uint32_t has_reset_value = 0; + if (unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_NONE)) + goto after_second_database_work; + + rrdset_debug(st, "last_collect_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last collection time)", (NETDATA_DOUBLE)last_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "now_collect_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (current collection time)", (NETDATA_DOUBLE)now_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "last_stored_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (last updated time)", (NETDATA_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" NETDATA_DOUBLE_MODIFIER " (next interpolation point)", (NETDATA_DOUBLE)next_store_ut/USEC_PER_SEC); // process all dimensions to calculate their values // based on the collected figures only // at this stage we do not interpolate anything - rrddim_foreach_read(rd, st) { - if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) - continue; + for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; if(unlikely(!rd->updated)) { rd->calculated_value = 0; continue; } - if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE))) { - error("Dimension %s in chart '%s' has the OBSOLETE flag set, but it is collected.", rd->name, st->id); - rrddim_isnot_obsolete(st, rd); - } - - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: START " " last_collected_value = " COLLECTED_NUMBER_FORMAT " collected_value = " COLLECTED_NUMBER_FORMAT " last_calculated_value = " NETDATA_DOUBLE_FORMAT - " calculated_value = " NETDATA_DOUBLE_FORMAT, rd->name - , rd->last_collected_value - , rd->collected_value - , rd->last_calculated_value - , rd->calculated_value + " calculated_value = " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value ); - #endif switch(rd->algorithm) { case RRD_ALGORITHM_ABSOLUTE: @@ -1529,22 +1680,20 @@ after_first_database_work: * (NETDATA_DOUBLE)rd->multiplier / (NETDATA_DOUBLE)rd->divisor; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC ABS/ABS-NO-IN " NETDATA_DOUBLE_FORMAT " = " COLLECTED_NUMBER_FORMAT " * " NETDATA_DOUBLE_FORMAT - " / " NETDATA_DOUBLE_FORMAT, rd->name + " / " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) , rd->calculated_value , rd->collected_value , (NETDATA_DOUBLE)rd->multiplier , (NETDATA_DOUBLE)rd->divisor ); - #endif - break; case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: - if(unlikely(!st->collected_total)) + if(unlikely(!collected_total)) rd->calculated_value = 0; else // the percentage of the current value @@ -1552,19 +1701,16 @@ after_first_database_work: rd->calculated_value = (NETDATA_DOUBLE)100 * (NETDATA_DOUBLE)rd->collected_value - / (NETDATA_DOUBLE)st->collected_total; + / (NETDATA_DOUBLE)collected_total; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC PCENT-ROW " NETDATA_DOUBLE_FORMAT " = 100" " * " COLLECTED_NUMBER_FORMAT " / " COLLECTED_NUMBER_FORMAT - , rd->name + , rrddim_name(rd) , rd->calculated_value , rd->collected_value - , st->collected_total + , collected_total ); - #endif - break; case RRD_ALGORITHM_INCREMENTAL: @@ -1578,12 +1724,13 @@ after_first_database_work: // It is imperative to set the comparison to uint64_t since type collected_number is signed and // produces wrong results as far as incremental counters are concerned. if(unlikely((uint64_t)rd->last_collected_value > (uint64_t)rd->collected_value)) { - debug(D_RRD_STATS, "%s.%s: RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT - , st->name, rd->name + debug(D_RRD_STATS, "'%s' / '%s': RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT + , rrdset_id(st) + , rrddim_name(rd) , rd->last_collected_value , rd->collected_value); - if(!(rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS))) + if(!(rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS))) has_reset_value = 1; uint64_t last = (uint64_t)rd->last_collected_value; @@ -1622,19 +1769,17 @@ after_first_database_work: / (NETDATA_DOUBLE) rd->divisor; } - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC INC PRE " NETDATA_DOUBLE_FORMAT " = (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" " * " NETDATA_DOUBLE_FORMAT - " / " NETDATA_DOUBLE_FORMAT, rd->name + " / " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) , rd->calculated_value , rd->collected_value, rd->last_collected_value , (NETDATA_DOUBLE)rd->multiplier , (NETDATA_DOUBLE)rd->divisor ); - #endif - break; case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: @@ -1643,42 +1788,24 @@ after_first_database_work: continue; } - // if the new is smaller than the old (an overflow, or reset), set the old equal to the new - // to reset the calculation (it will give zero as the calculation for this second) - if(unlikely(rd->last_collected_value > rd->collected_value)) { - debug(D_RRD_STATS, "%s.%s: RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT - , st->name, rd->name - , rd->last_collected_value - , rd->collected_value - ); - - if(!(rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS))) - has_reset_value = 1; - - rd->last_collected_value = rd->collected_value; - } - // the percentage of the current increment // over the increment of all dimensions together - if(unlikely(st->collected_total == st->last_collected_total)) + if(unlikely(collected_total == last_collected_total)) rd->calculated_value = 0; else rd->calculated_value = (NETDATA_DOUBLE)100 * (NETDATA_DOUBLE)(rd->collected_value - rd->last_collected_value) - / (NETDATA_DOUBLE)(st->collected_total - st->last_collected_total); + / (NETDATA_DOUBLE)(collected_total - last_collected_total); - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC PCENT-DIFF " NETDATA_DOUBLE_FORMAT " = 100" " * (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" " / (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" - , rd->name + , rrddim_name(rd) , rd->calculated_value , rd->collected_value, rd->last_collected_value - , st->collected_total, st->last_collected_total + , collected_total, last_collected_total ); - #endif - break; default: @@ -1686,43 +1813,41 @@ after_first_database_work: // it gets noticed when we add new types rd->calculated_value = 0; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: CALC " NETDATA_DOUBLE_FORMAT " = 0" - , rd->name + , rrddim_name(rd) , rd->calculated_value ); - #endif - break; } - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: PHASE2 " " last_collected_value = " COLLECTED_NUMBER_FORMAT " collected_value = " COLLECTED_NUMBER_FORMAT " last_calculated_value = " NETDATA_DOUBLE_FORMAT - " calculated_value = " NETDATA_DOUBLE_FORMAT, rd->name - , rd->last_collected_value - , rd->collected_value - , rd->last_calculated_value - , rd->calculated_value + " calculated_value = " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value ); - #endif - } // at this point we have all the calculated values ready // it is now time to interpolate values on a second boundary -#ifdef NETDATA_INTERNAL_CHECKS - if(unlikely(now_collect_ut < next_store_ut && st->counter_done > 1)) { - // this is collected in the same interpolation point - rrdset_debug(st, "THIS IS IN THE SAME INTERPOLATION POINT"); - info("INTERNAL CHECK: host '%s', chart '%s' collection %zu is in the same interpolation point: short by %llu microseconds", st->rrdhost->hostname, st->name, st->counter_done, next_store_ut - now_collect_ut); - } -#endif - - rrdset_done_interpolate(st +// #ifdef NETDATA_INTERNAL_CHECKS +// if(unlikely(now_collect_ut < next_store_ut && st->counter_done > 1)) { +// // this is collected in the same interpolation point +// rrdset_debug(st, "THIS IS IN THE SAME INTERPOLATION POINT"); +// info("INTERNAL CHECK: host '%s', chart '%s' collection %zu is in the same interpolation point: short by %llu microseconds", st->rrdhost->hostname, rrdset_name(st), st->counter_done, next_store_ut - now_collect_ut); +// } +// #endif + + rrdset_done_interpolate( + st + , rda_base + , rda_slots , update_every_ut , last_stored_ut , next_store_ut @@ -1733,54 +1858,41 @@ after_first_database_work: ); after_second_database_work: - st->last_collected_total = st->collected_total; + for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; -#ifdef ENABLE_ACLK - time_t mark = now_realtime_sec(); -#endif - rrddim_foreach_read(rd, st) { - if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) - continue; - -#ifdef ENABLE_ACLK - if (likely(!st->state->is_ar_chart)) { - if (!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN) && likely(rrdset_flag_check(st, RRDSET_FLAG_ACLK))) - queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, mark)); - } -#endif if(unlikely(!rd->updated)) continue; - #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "%s: setting last_collected_value (old: " COLLECTED_NUMBER_FORMAT ") to last_collected_value (new: " COLLECTED_NUMBER_FORMAT ")", rd->name, rd->last_collected_value, rd->collected_value); - #endif + rrdset_debug(st, "%s: setting last_collected_value (old: " COLLECTED_NUMBER_FORMAT ") to last_collected_value (new: " COLLECTED_NUMBER_FORMAT ")", rrddim_name(rd), rd->last_collected_value, rd->collected_value); rd->last_collected_value = rd->collected_value; switch(rd->algorithm) { case RRD_ALGORITHM_INCREMENTAL: if(unlikely(!first_entry)) { - #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "%s: setting last_calculated_value (old: " NETDATA_DOUBLE_FORMAT - ") to last_calculated_value (new: " NETDATA_DOUBLE_FORMAT ")", rd->name, rd->last_calculated_value + rd->calculated_value, rd->calculated_value); - #endif + rrdset_debug(st, "%s: setting last_calculated_value (old: " NETDATA_DOUBLE_FORMAT ") to " + "last_calculated_value (new: " NETDATA_DOUBLE_FORMAT ")" + , rrddim_name(rd) + , rd->last_calculated_value + rd->calculated_value + , rd->calculated_value); rd->last_calculated_value += rd->calculated_value; } else { - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "THIS IS THE FIRST POINT"); - #endif } break; case RRD_ALGORITHM_ABSOLUTE: case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: - #ifdef NETDATA_INTERNAL_CHECKS - rrdset_debug(st, "%s: setting last_calculated_value (old: " NETDATA_DOUBLE_FORMAT - ") to last_calculated_value (new: " NETDATA_DOUBLE_FORMAT ")", rd->name, rd->last_calculated_value, rd->calculated_value); - #endif + rrdset_debug(st, "%s: setting last_calculated_value (old: " NETDATA_DOUBLE_FORMAT ") to " + "last_calculated_value (new: " NETDATA_DOUBLE_FORMAT ")" + , rrddim_name(rd) + , rd->last_calculated_value + , rd->calculated_value); rd->last_calculated_value = rd->calculated_value; break; @@ -1790,19 +1902,17 @@ after_second_database_work: rd->collected_value = 0; rd->updated = 0; - #ifdef NETDATA_INTERNAL_CHECKS rrdset_debug(st, "%s: END " " last_collected_value = " COLLECTED_NUMBER_FORMAT " collected_value = " COLLECTED_NUMBER_FORMAT " last_calculated_value = " NETDATA_DOUBLE_FORMAT - " calculated_value = " NETDATA_DOUBLE_FORMAT, rd->name - , rd->last_collected_value - , rd->collected_value - , rd->last_calculated_value - , rd->calculated_value + " calculated_value = " NETDATA_DOUBLE_FORMAT + , rrddim_name(rd) + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value ); - #endif - } // ALL DONE ABOUT THE DATA UPDATE @@ -1812,99 +1922,54 @@ after_second_database_work: // update the memory mapped files with the latest values rrdset_memory_file_update(st); - rrddim_foreach_read(rd, st) { + + for(dim_id = 0, rda = rda_base; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; rrddim_memory_file_update(rd); } } - // find if there are any obsolete dimensions - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))) { - rrddim_foreach_read(rd, st) - if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE))) - break; + for(dim_id = 0, rda = rda_base; dim_id < rda_slots ; ++dim_id, ++rda) { + rd = rda->rd; + if(unlikely(!rd)) continue; - if(unlikely(rd)) { - time_t now = now_realtime_sec(); - - RRDDIM *last; - // there is a dimension to free - // upgrade our read lock to a write lock - rrdset_unlock(st); - rrdset_wrlock(st); - - for( rd = st->dimensions, last = NULL ; likely(rd) ; ) { - if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && !rrddim_flag_check(rd, RRDDIM_FLAG_ACLK) - && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now))) { - info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rd->name, rd->id, st->name, st->id); - - const char *cache_filename = rrddim_cache_filename(rd); - if(cache_filename) { - info("Deleting dimension file '%s'.", cache_filename); - if (unlikely(unlink(cache_filename) == -1)) - error("Cannot delete dimension file '%s'", cache_filename); - } - -#ifdef ENABLE_DBENGINE - if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { - rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); - while(rd->variables) - rrddimvar_free(rd->variables); - - rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); - /* only a collector can mark a chart as obsolete, so we must remove the reference */ - - size_t tiers_available = 0, tiers_said_yes = 0; - for(int tier = 0; tier < storage_tiers ;tier++) { - if(rd->tiers[tier]) { - tiers_available++; - - if(rd->tiers[tier]->collect_ops.finalize(rd->tiers[tier]->db_collection_handle)) - tiers_said_yes++; - - rd->tiers[tier]->db_collection_handle = NULL; - } - } - - if (tiers_available == tiers_said_yes && tiers_said_yes) { - /* This metric has no data and no references */ - delete_dimension_uuid(&rd->metric_uuid); - } else { - /* Do not delete this dimension */ -#ifdef ENABLE_ACLK - queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, mark)); -#endif - last = rd; - rd = rd->next; - continue; - } - } -#endif - if(unlikely(!last)) { - rrddim_free(st, rd); - rd = st->dimensions; - continue; - } - else { - rrddim_free(st, rd); - rd = last->next; - continue; - } - } - - last = rd; - rd = rd->next; - } - } - else { - rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); - } + dictionary_acquired_item_release(st->rrddim_root_index, rda->item); + rda->item = NULL; + rda->rd = NULL; } - rrdset_unlock(st); + rrdcontext_collected_rrdset(st); netdata_thread_enable_cancelability(); + + store_metric_collection_completed(); } +time_t rrdset_set_update_every(RRDSET *st, time_t update_every) { + + internal_error(true, "RRDSET '%s' switching update every from %d to %d", + rrdset_id(st), (int)st->update_every, (int)update_every); + + time_t prev_update_every = st->update_every; + st->update_every = update_every; + + // switch update every to the storage engine + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (rd->tiers[tier] && rd->tiers[tier]->db_collection_handle) + rd->tiers[tier]->collect_ops->change_collection_frequency(rd->tiers[tier]->db_collection_handle, (int)(st->rrdhost->db[tier].tier_grouping * st->update_every)); + } + + assert(rd->update_every == prev_update_every && + "chart's update every differs from the update every of its dimensions"); + rd->update_every = st->update_every; + } + rrddim_foreach_done(rd); + + return prev_update_every; +} // ---------------------------------------------------------------------------- // compatibility layer for RRDSET files v019 @@ -1966,8 +2031,8 @@ struct rrdset_map_save_v019 { usec_t usec_since_last_update; // NEEDS TO BE UPDATED - maintained on load struct timeval last_updated; // NEEDS TO BE UPDATED - check to reset all - fixed on load struct timeval last_collected_time; // ignored - long long collected_total; // NEEDS TO BE UPDATED - maintained on load - long long last_collected_total; // NEEDS TO BE UPDATED - maintained on load + long long collected_total; // ignored + long long last_collected_total; // ignored void *rrdfamily; // ignored void *rrdhost; // ignored void *next; // ignored @@ -1991,8 +2056,6 @@ void rrdset_memory_file_update(RRDSET *st) { st_on_file->usec_since_last_update = st->usec_since_last_update; st_on_file->last_updated.tv_sec = st->last_updated.tv_sec; st_on_file->last_updated.tv_usec = st->last_updated.tv_usec; - st_on_file->collected_total = st->collected_total; - st_on_file->last_collected_total = st->last_collected_total; } const char *rrdset_cache_filename(RRDSET *st) { @@ -2008,7 +2071,7 @@ void rrdset_memory_file_free(RRDSET *st) { rrdset_memory_file_update(st); struct rrdset_map_save_v019 *st_on_file = st->st_on_file; - munmap(st_on_file, st_on_file->memsize); + netdata_munmap(st_on_file, st_on_file->memsize); // remove the pointers from the RRDDIM st->st_on_file = NULL; @@ -2047,8 +2110,8 @@ bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mo info("Initializing file '%s'.", fullfilename); memset(st_on_file, 0, size); } - else if(strncmp(st_on_file->id, st->id, RRD_ID_LENGTH_MAX_V019) != 0) { - error("File '%s' contents are not for chart '%s'. Clearing it.", fullfilename, st->id); + else if(strncmp(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019) != 0) { + error("File '%s' contents are not for chart '%s'. Clearing it.", fullfilename, rrdset_id(st)); memset(st_on_file, 0, size); } else if(st_on_file->memsize != size || st_on_file->entries != st->entries) { @@ -2084,8 +2147,6 @@ bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mo st->usec_since_last_update = st_on_file->usec_since_last_update; st->last_updated.tv_sec = st_on_file->last_updated.tv_sec; st->last_updated.tv_usec = st_on_file->last_updated.tv_usec; - st->collected_total = st_on_file->collected_total; - st->last_collected_total = st_on_file->last_collected_total; // link it to st st->st_on_file = st_on_file; @@ -2094,7 +2155,7 @@ bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mo memset(st_on_file, 0, size); // set the values we need - strncpyz(st_on_file->id, st->id, RRD_ID_LENGTH_MAX_V019 + 1); + strncpyz(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019 + 1); strcpy(st_on_file->cache_filename, fullfilename); strcpy(st_on_file->magic, RRDSET_MAGIC_V019); st_on_file->memsize = size; diff --git a/database/rrdsetvar.c b/database/rrdsetvar.c index e520764a2..22cf8a1f0 100644 --- a/database/rrdsetvar.c +++ b/database/rrdsetvar.c @@ -1,190 +1,293 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_HEALTH_INTERNALS #include "rrd.h" -// ---------------------------------------------------------------------------- -// RRDSETVAR management -// CHART VARIABLES +typedef struct rrdsetvar { + STRING *name; // variable name + void *value; // we need this to maintain the allocation for custom chart variables -static inline void rrdsetvar_free_variables(RRDSETVAR *rs) { - RRDSET *st = rs->rrdset; + const RRDVAR_ACQUIRED *rrdvar_local; + const RRDVAR_ACQUIRED *rrdvar_family_chart_id; + const RRDVAR_ACQUIRED *rrdvar_family_chart_name; + const RRDVAR_ACQUIRED *rrdvar_host_chart_id; + const RRDVAR_ACQUIRED *rrdvar_host_chart_name; + + RRDVAR_FLAGS flags:24; + RRDVAR_TYPE type:8; +} RRDSETVAR; + +// should only be called while the rrdsetvar dict is write locked +// otherwise, 2+ threads may be setting the same variables at the same time +static inline void rrdsetvar_free_rrdvars_unsafe(RRDSET *st, RRDSETVAR *rs) { RRDHOST *host = st->rrdhost; // ------------------------------------------------------------------------ // CHART - rrdvar_free(host, &st->rrdvar_root_index, rs->var_local); - rs->var_local = NULL; + + if(st->rrdvars) { + rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local); + rs->rrdvar_local = NULL; + } // ------------------------------------------------------------------------ // FAMILY - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family); - rs->var_family = NULL; - rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); - rs->var_family_name = NULL; + if(st->rrdfamily) { + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_chart_id); + rs->rrdvar_family_chart_id = NULL; + + rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_chart_name); + rs->rrdvar_family_chart_name = NULL; + } // ------------------------------------------------------------------------ // HOST - rrdvar_free(host, &host->rrdvar_root_index, rs->var_host); - rs->var_host = NULL; - rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_name); - rs->var_host_name = NULL; + if(host->rrdvars && host->health_enabled) { + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id); + rs->rrdvar_host_chart_id = NULL; - // ------------------------------------------------------------------------ - // KEYS - freez(rs->key_fullid); - rs->key_fullid = NULL; - - freez(rs->key_fullname); - rs->key_fullname = NULL; + rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name); + rs->rrdvar_host_chart_name = NULL; + } } -static inline void rrdsetvar_create_variables(RRDSETVAR *rs) { - RRDSET *st = rs->rrdset; +// should only be called while the rrdsetvar dict is write locked +// otherwise, 2+ threads may be setting the same variables at the same time +static inline void rrdsetvar_update_rrdvars_unsafe(RRDSET *st, RRDSETVAR *rs) { RRDHOST *host = st->rrdhost; - RRDVAR_OPTIONS options = rs->options; - if(rs->options & RRDVAR_OPTION_ALLOCATED) - options &= ~ RRDVAR_OPTION_ALLOCATED; + RRDVAR_FLAGS options = rs->flags; + options &= ~RRDVAR_OPTIONS_REMOVED_WHEN_PROPAGATING_TO_RRDVAR; // ------------------------------------------------------------------------ // free the old ones (if any) - rrdsetvar_free_variables(rs); + rrdsetvar_free_rrdvars_unsafe(st, rs); // ------------------------------------------------------------------------ // KEYS char buffer[RRDVAR_MAX_LENGTH + 1]; - snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rs->variable); - rs->key_fullid = strdupz(buffer); + snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), string2str(rs->name)); + STRING *key_chart_id = string_strdupz(buffer); - snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rs->variable); - rs->key_fullname = strdupz(buffer); + snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), string2str(rs->name)); + STRING *key_chart_name = string_strdupz(buffer); // ------------------------------------------------------------------------ // CHART - rs->var_local = rrdvar_create_and_index("local", &st->rrdvar_root_index, rs->variable, rs->type, options, rs->value); + + if(st->rrdvars) { + rs->rrdvar_local = rrdvar_add_and_acquire("local", st->rrdvars, rs->name, rs->type, options, rs->value); + } // ------------------------------------------------------------------------ // FAMILY - rs->var_family = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_fullid, rs->type, options, rs->value); - rs->var_family_name = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_fullname, rs->type, options, rs->value); + + if(st->rrdfamily) { + rs->rrdvar_family_chart_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_chart_id, rs->type, options, rs->value); + rs->rrdvar_family_chart_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_chart_name, rs->type, options, rs->value); + } // ------------------------------------------------------------------------ // HOST - rs->var_host = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullid, rs->type, options, rs->value); - rs->var_host_name = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullname, rs->type, options, rs->value); -} -RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, RRDVAR_TYPE type, void *value, RRDVAR_OPTIONS options) { - debug(D_VARIABLES, "RRDVARSET create for chart id '%s' name '%s' with variable name '%s'", st->id, st->name, variable); - RRDSETVAR *rs = (RRDSETVAR *)callocz(1, sizeof(RRDSETVAR)); + if(host->rrdvars && host->health_enabled) { + rs->rrdvar_host_chart_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id, rs->type, options, rs->value); + rs->rrdvar_host_chart_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name, rs->type, options, rs->value); + } - rs->variable = strdupz(variable); - rs->hash = simple_hash(rs->variable); - rs->type = type; - rs->value = value; - rs->options = options; - rs->rrdset = st; + // free the keys + string_freez(key_chart_id); + string_freez(key_chart_name); +} - rs->next = st->variables; - st->variables = rs; +static void rrdsetvar_free_value_unsafe(RRDSETVAR *rs) { + if(rs->flags & RRDVAR_FLAG_ALLOCATED) { + void *old = rs->value; + rs->value = NULL; + rs->flags &= ~RRDVAR_FLAG_ALLOCATED; + freez(old); + } +} - rrdsetvar_create_variables(rs); +static void rrdsetvar_set_value_unsafe(RRDSETVAR *rs, void *new_value) { + rrdsetvar_free_value_unsafe(rs); - return rs; + if(new_value) + rs->value = new_value; + else { + NETDATA_DOUBLE *n = mallocz(sizeof(NETDATA_DOUBLE)); + *n = NAN; + rs->value = n; + rs->flags |= RRDVAR_FLAG_ALLOCATED; + } } -void rrdsetvar_rename_all(RRDSET *st) { - debug(D_VARIABLES, "RRDSETVAR rename for chart id '%s' name '%s'", st->id, st->name); +struct rrdsetvar_constructor { + RRDSET *rrdset; + const char *name; + void *value; + RRDVAR_FLAGS flags :16; + RRDVAR_TYPE type:8; +}; - RRDSETVAR *rs; - for(rs = st->variables; rs ; rs = rs->next) - rrdsetvar_create_variables(rs); +static void rrdsetvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *constructor_data) { + RRDSETVAR *rs = rrdsetvar; + struct rrdsetvar_constructor *ctr = constructor_data; + + ctr->flags &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; - rrdsetcalc_link_matching(st); + rs->name = string_strdupz(ctr->name); + rs->type = ctr->type; + rs->flags = ctr->flags; + rrdsetvar_set_value_unsafe(rs, ctr->value); + + // create the rrdvariables while we are having a write lock to the dictionary + rrdsetvar_update_rrdvars_unsafe(ctr->rrdset, rs); } -void rrdsetvar_free(RRDSETVAR *rs) { - RRDSET *st = rs->rrdset; - debug(D_VARIABLES, "RRDSETVAR free for chart id '%s' name '%s', variable '%s'", st->id, st->name, rs->variable); +static bool rrdsetvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *new_rrdsetvar __maybe_unused, void *constructor_data) { + RRDSETVAR *rs = rrdsetvar; + struct rrdsetvar_constructor *ctr = constructor_data; - if(st->variables == rs) { - st->variables = rs->next; - } - else { - RRDSETVAR *t; - for (t = st->variables; t && t->next != rs; t = t->next); - if(!t) error("RRDSETVAR '%s' not found in chart '%s' variables linked list", rs->key_fullname, st->id); - else t->next = rs->next; + ctr->flags &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; + + RRDVAR_FLAGS options = rs->flags; + options &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; + + if(((ctr->value == NULL && rs->value != NULL && rs->flags & RRDVAR_FLAG_ALLOCATED) || (rs->value == ctr->value)) + && ctr->flags == options && rs->type == ctr->type) { + // don't reset it - everything is the same, or as it should... + return false; } - rrdsetvar_free_variables(rs); + internal_error(true, "RRDSETVAR: resetting variable '%s' of chart '%s' of host '%s', options from 0x%x to 0x%x, type from %d to %d", + string2str(rs->name), rrdset_id(ctr->rrdset), rrdhost_hostname(ctr->rrdset->rrdhost), + options, ctr->flags, rs->type, ctr->type); - freez(rs->variable); + rrdsetvar_free_value_unsafe(rs); // we are going to change the options, so free it before setting it + rs->flags = ctr->flags; + rs->type = ctr->type; + rrdsetvar_set_value_unsafe(rs, ctr->value); - if(rs->options & RRDVAR_OPTION_ALLOCATED) - freez(rs->value); + // recreate the rrdvariables while we are having a write lock to the dictionary + rrdsetvar_update_rrdvars_unsafe(ctr->rrdset, rs); + return true; +} + +static void rrdsetvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *rrdset __maybe_unused) { + RRDSET *st = rrdset; + RRDSETVAR *rs = rrdsetvar; - freez(rs); + rrdsetvar_free_rrdvars_unsafe(st, rs); + rrdsetvar_free_value_unsafe(rs); + string_freez(rs->name); + rs->name = NULL; } -// -------------------------------------------------------------------------------------------------------------------- -// custom chart variables +void rrdsetvar_index_init(RRDSET *st) { + if(!st->rrdsetvar_root_index) { + st->rrdsetvar_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); -RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name) { - RRDHOST *host = st->rrdhost; + dictionary_register_insert_callback(st->rrdsetvar_root_index, rrdsetvar_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrdsetvar_root_index, rrdsetvar_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrdsetvar_root_index, rrdsetvar_delete_callback, st); + } +} - char *n = strdupz(name); - rrdvar_fix_name(n); - uint32_t hash = simple_hash(n); +void rrdsetvar_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrdsetvar_root_index); + st->rrdsetvar_root_index = NULL; +} - rrdset_wrlock(st); +const RRDSETVAR_ACQUIRED *rrdsetvar_add_and_acquire(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags) { + struct rrdsetvar_constructor tmp = { + .name = name, + .type = type, + .value = value, + .flags = flags, + .rrdset = st, + }; + + const RRDSETVAR_ACQUIRED *rsa = (const RRDSETVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(st->rrdsetvar_root_index, name, -1, NULL, sizeof(RRDSETVAR), &tmp); + return rsa; +} + +void rrdsetvar_add_and_leave_released(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags) { + const RRDSETVAR_ACQUIRED *rsa = rrdsetvar_add_and_acquire(st, name, type, value, flags); + dictionary_acquired_item_release(st->rrdsetvar_root_index, (const DICTIONARY_ITEM *)rsa); +} + +void rrdsetvar_rename_all(RRDSET *st) { + debug(D_VARIABLES, "RRDSETVAR rename for chart id '%s' name '%s'", rrdset_id(st), rrdset_name(st)); - // find it RRDSETVAR *rs; - for(rs = st->variables; rs ; rs = rs->next) { - if(hash == rs->hash && strcmp(n, rs->variable) == 0) { - rrdset_unlock(st); - if(rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR) { - freez(n); - return rs; - } - else { - error("RRDSETVAR: custom variable '%s' on chart '%s' of host '%s', conflicts with an internal chart variable", n, st->id, host->hostname); - freez(n); - return NULL; - } - } + dfe_start_write(st->rrdsetvar_root_index, rs) { + // should only be called while the rrdsetvar dict is write locked + rrdsetvar_update_rrdvars_unsafe(st, rs); } + dfe_done(rs); - // not found, allocate one + rrdcalc_link_matching_alerts_to_rrdset(st); +} - NETDATA_DOUBLE *v = mallocz(sizeof(NETDATA_DOUBLE)); - *v = NAN; +void rrdsetvar_release_and_delete_all(RRDSET *st) { + RRDSETVAR *rs; + dfe_start_write(st->rrdsetvar_root_index, rs) { + dictionary_del_advanced(st->rrdsetvar_root_index, string2str(rs->name), (ssize_t)string_strlen(rs->name) + 1); + } + dfe_done(rs); +} - rs = rrdsetvar_create(st, n, RRDVAR_TYPE_CALCULATED, v, RRDVAR_OPTION_ALLOCATED|RRDVAR_OPTION_CUSTOM_CHART_VAR); - rrdset_unlock(st); +void rrdsetvar_release(DICTIONARY *dict, const RRDSETVAR_ACQUIRED *rsa) { + dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rsa); +} + +// -------------------------------------------------------------------------------------------------------------------- +// custom chart variables - freez(n); +const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name) { + STRING *name_string = rrdvar_name_to_string(name); + const RRDSETVAR_ACQUIRED *rs = rrdsetvar_add_and_acquire(st, string2str(name_string), RRDVAR_TYPE_CALCULATED, NULL, RRDVAR_FLAG_CUSTOM_CHART_VAR); + string_freez(name_string); return rs; } -void rrdsetvar_custom_chart_variable_set(RRDSETVAR *rs, NETDATA_DOUBLE value) { - if(rs->type != RRDVAR_TYPE_CALCULATED || !(rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR) || !(rs->options & RRDVAR_OPTION_ALLOCATED)) { +void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value) { + if(!rsa) return; + + RRDSETVAR *rs = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rsa); + + if(rs->type != RRDVAR_TYPE_CALCULATED || !(rs->flags & RRDVAR_FLAG_CUSTOM_CHART_VAR) || !(rs->flags & RRDVAR_FLAG_ALLOCATED)) { error("RRDSETVAR: requested to set variable '%s' of chart '%s' on host '%s' to value " NETDATA_DOUBLE_FORMAT - " but the variable is not a custom chart one.", rs->variable, rs->rrdset->id, rs->rrdset->rrdhost->hostname, value); + " but the variable is not a custom chart one (it has options 0x%x, value pointer %p). Ignoring request.", string2str(rs->name), rrdset_id(st), rrdhost_hostname(st->rrdhost), value, (uint32_t)rs->flags, rs->value); } else { NETDATA_DOUBLE *v = rs->value; if(*v != value) { *v = value; + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES); + } + } +} + +void rrdsetvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb) { + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES); - // mark the chart to be sent upstream - rrdset_flag_clear(rs->rrdset, RRDSET_FLAG_UPSTREAM_EXPOSED); + // send the chart local custom variables + RRDSETVAR *rs; + dfe_start_read(st->rrdsetvar_root_index, rs) { + if(unlikely(rs->type == RRDVAR_TYPE_CALCULATED && rs->flags & RRDVAR_FLAG_CUSTOM_CHART_VAR)) { + NETDATA_DOUBLE *value = (NETDATA_DOUBLE *) rs->value; + + buffer_sprintf(wb + , "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n" + , string2str(rs->name) + , *value + ); } } + dfe_done(rs); } diff --git a/database/rrdsetvar.h b/database/rrdsetvar.h index 37f4da959..0c2e66712 100644 --- a/database/rrdsetvar.h +++ b/database/rrdsetvar.h @@ -11,34 +11,20 @@ // This means, there will be no speed penalty for using // these variables -struct rrdsetvar { - char *variable; // variable name - uint32_t hash; // variable name hash +void rrdsetvar_index_init(RRDSET *st); +void rrdsetvar_index_destroy(RRDSET *st); +void rrdsetvar_release_and_delete_all(RRDSET *st); - char *key_fullid; // chart type.chart id.variable - char *key_fullname; // chart type.chart name.variable +#define rrdsetvar_custom_chart_variable_release(st, rsa) rrdsetvar_release((st)->rrdsetvar_root_index, rsa) +void rrdsetvar_release(DICTIONARY *dict, const RRDSETVAR_ACQUIRED *rsa); - RRDVAR_TYPE type; - void *value; +const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name); +void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value); - RRDVAR_OPTIONS options; +void rrdsetvar_rename_all(RRDSET *st); +const RRDSETVAR_ACQUIRED *rrdsetvar_add_and_acquire(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags); +void rrdsetvar_add_and_leave_released(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags); - RRDVAR *var_local; - RRDVAR *var_family; - RRDVAR *var_host; - RRDVAR *var_family_name; - RRDVAR *var_host_name; - - struct rrdset *rrdset; - - struct rrdsetvar *next; -}; - -extern RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name); -extern void rrdsetvar_custom_chart_variable_set(RRDSETVAR *rv, NETDATA_DOUBLE value); - -extern void rrdsetvar_rename_all(RRDSET *st); -extern RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, RRDVAR_TYPE type, void *value, RRDVAR_OPTIONS options); -extern void rrdsetvar_free(RRDSETVAR *rs); +void rrdsetvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb); #endif //NETDATA_RRDSETVAR_H diff --git a/database/rrdvar.c b/database/rrdvar.c index d4dda1079..28be4f6a1 100644 --- a/database/rrdvar.c +++ b/database/rrdvar.c @@ -1,8 +1,19 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#define NETDATA_HEALTH_INTERNALS #include "rrd.h" +// the variables as stored in the variables indexes +// there are 3 indexes: +// 1. at each chart (RRDSET.rrdvar_root_index) +// 2. at each context (RRDFAMILY.rrdvar_root_index) +// 3. at each host (RRDHOST.rrdvar_root_index) +typedef struct rrdvar { + STRING *name; + void *value; + RRDVAR_FLAGS flags:24; + RRDVAR_TYPE type:8; +} RRDVAR; + // ---------------------------------------------------------------------------- // RRDVAR management @@ -20,168 +31,153 @@ inline int rrdvar_fix_name(char *variable) { return fixed; } -int rrdvar_compare(void* a, void* b) { - if(((RRDVAR *)a)->hash < ((RRDVAR *)b)->hash) return -1; - else if(((RRDVAR *)a)->hash > ((RRDVAR *)b)->hash) return 1; - else return strcmp(((RRDVAR *)a)->name, ((RRDVAR *)b)->name); +inline STRING *rrdvar_name_to_string(const char *name) { + char *variable = strdupz(name); + rrdvar_fix_name(variable); + STRING *name_string = string_strdupz(variable); + freez(variable); + return name_string; } -static inline RRDVAR *rrdvar_index_add(avl_tree_lock *tree, RRDVAR *rv) { - RRDVAR *ret = (RRDVAR *)avl_insert_lock(tree, (avl_t *)(rv)); - if(ret != rv) - debug(D_VARIABLES, "Request to insert RRDVAR '%s' into index failed. Already exists.", rv->name); +struct rrdvar_constructor { + STRING *name; + void *value; + RRDVAR_FLAGS options:16; + RRDVAR_TYPE type:8; - return ret; -} + enum { + RRDVAR_REACT_NONE = 0, + RRDVAR_REACT_NEW = (1 << 0), + } react_action; +}; + +static void rrdvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar, void *constructor_data) { + RRDVAR *rv = rrdvar; + struct rrdvar_constructor *ctr = constructor_data; -static inline RRDVAR *rrdvar_index_del(avl_tree_lock *tree, RRDVAR *rv) { - RRDVAR *ret = (RRDVAR *)avl_remove_lock(tree, (avl_t *)(rv)); - if(!ret) - error("Request to remove RRDVAR '%s' from index failed. Not Found.", rv->name); + ctr->options &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS; - return ret; + rv->name = string_dup(ctr->name); + rv->type = ctr->type; + rv->flags = ctr->options; + + if(!ctr->value) { + NETDATA_DOUBLE *v = mallocz(sizeof(NETDATA_DOUBLE)); + *v = NAN; + rv->value = v; + rv->flags |= RRDVAR_FLAG_ALLOCATED; + } + else + rv->value = ctr->value; + + ctr->react_action = RRDVAR_REACT_NEW; } -static inline RRDVAR *rrdvar_index_find(avl_tree_lock *tree, const char *name, uint32_t hash) { - RRDVAR tmp; - tmp.name = (char *)name; - tmp.hash = (hash)?hash:simple_hash(tmp.name); +static void rrdvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar, void *nothing __maybe_unused) { + RRDVAR *rv = rrdvar; - return (RRDVAR *)avl_search_lock(tree, (avl_t *)&tmp); + if(rv->flags & RRDVAR_FLAG_ALLOCATED) + freez(rv->value); + + string_freez(rv->name); + rv->name = NULL; } -inline void rrdvar_free(RRDHOST *host, avl_tree_lock *tree, RRDVAR *rv) { - (void)host; +DICTIONARY *rrdvariables_create(void) { + DICTIONARY *dict = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); - if(!rv) return; + dictionary_register_insert_callback(dict, rrdvar_insert_callback, NULL); + dictionary_register_delete_callback(dict, rrdvar_delete_callback, NULL); - if(tree) { - debug(D_VARIABLES, "Deleting variable '%s'", rv->name); - if(unlikely(!rrdvar_index_del(tree, rv))) - error("RRDVAR: Attempted to delete variable '%s' from host '%s', but it is not found.", rv->name, host->hostname); - } + return dict; +} - if(rv->options & RRDVAR_OPTION_ALLOCATED) - freez(rv->value); +void rrdvariables_destroy(DICTIONARY *dict) { + dictionary_destroy(dict); +} - freez(rv->name); - freez(rv); +static inline const RRDVAR_ACQUIRED *rrdvar_get_and_acquire(DICTIONARY *dict, STRING *name) { + return (const RRDVAR_ACQUIRED *)dictionary_get_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name) + 1); } -inline RRDVAR *rrdvar_create_and_index(const char *scope __maybe_unused, avl_tree_lock *tree, const char *name, - RRDVAR_TYPE type, RRDVAR_OPTIONS options, void *value) { - char *variable = strdupz(name); - rrdvar_fix_name(variable); - uint32_t hash = simple_hash(variable); - - RRDVAR *rv = rrdvar_index_find(tree, variable, hash); - if(unlikely(!rv)) { - debug(D_VARIABLES, "Variable '%s' not found in scope '%s'. Creating a new one.", variable, scope); - - rv = callocz(1, sizeof(RRDVAR)); - rv->name = variable; - rv->hash = hash; - rv->type = type; - rv->options = options; - rv->value = value; - rv->last_updated = now_realtime_sec(); - - RRDVAR *ret = rrdvar_index_add(tree, rv); - if(unlikely(ret != rv)) { - debug(D_VARIABLES, "Variable '%s' in scope '%s' already exists", variable, scope); - freez(rv); - freez(variable); - rv = NULL; - } - else - debug(D_VARIABLES, "Variable '%s' created in scope '%s'", variable, scope); - } - else { - debug(D_VARIABLES, "Variable '%s' is already found in scope '%s'.", variable, scope); +inline void rrdvar_release_and_del(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) { + if(unlikely(!dict || !rva)) return; - // already exists - freez(variable); + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); - // this is important - // it must return NULL - not the existing variable - or double-free will happen - rv = NULL; - } + dictionary_del_advanced(dict, string2str(rv->name), (ssize_t)string_strlen(rv->name) + 1); - return rv; + dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva); } -void rrdvar_free_remaining_variables(RRDHOST *host, avl_tree_lock *tree_lock) { - // This is not bullet proof - avl should support some means to destroy it - // with a callback for each item already in the index +inline const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(const char *scope __maybe_unused, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value) { + if(unlikely(!dict || !name)) return NULL; - RRDVAR *rv, *last = NULL; - while((rv = (RRDVAR *)tree_lock->avl_tree.root)) { - if(unlikely(rv == last)) { - error("RRDVAR: INTERNAL ERROR: Cannot cleanup tree of RRDVARs"); - break; - } - last = rv; - rrdvar_free(host, tree_lock, rv); - } + struct rrdvar_constructor tmp = { + .name = name, + .value = value, + .type = type, + .options = options, + .react_action = RRDVAR_REACT_NONE, + }; + return (const RRDVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name) + 1, NULL, sizeof(RRDVAR), &tmp); +} + +void rrdvar_delete_all(DICTIONARY *dict) { + dictionary_flush(dict); } + // ---------------------------------------------------------------------------- // CUSTOM HOST VARIABLES -inline int rrdvar_callback_for_all_host_variables(RRDHOST *host, int (*callback)(void * /*rrdvar*/, void * /*data*/), void *data) { - return avl_traverse_lock(&host->rrdvar_root_index, callback, data); +inline int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data) { + if(unlikely(!dict)) return 0; // when health is not enabled + return dictionary_walkthrough_read(dict, callback, data); } -static RRDVAR *rrdvar_custom_variable_create(const char *scope, avl_tree_lock *tree_lock, const char *name) { - NETDATA_DOUBLE *v = callocz(1, sizeof(NETDATA_DOUBLE)); - *v = NAN; - - RRDVAR *rv = rrdvar_create_and_index(scope, tree_lock, name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_CUSTOM_HOST_VAR|RRDVAR_OPTION_ALLOCATED, v); - if(unlikely(!rv)) { - freez(v); - debug(D_VARIABLES, "Requested variable '%s' already exists - possibly 2 plugins are updating it at the same time.", name); +const RRDVAR_ACQUIRED *rrdvar_custom_host_variable_add_and_acquire(RRDHOST *host, const char *name) { + DICTIONARY *dict = host->rrdvars; + if(unlikely(!dict)) return NULL; // when health is not enabled - char *variable = strdupz(name); - rrdvar_fix_name(variable); - uint32_t hash = simple_hash(variable); + STRING *name_string = rrdvar_name_to_string(name); - // find the existing one to return it - rv = rrdvar_index_find(tree_lock, variable, hash); + const RRDVAR_ACQUIRED *rva = rrdvar_add_and_acquire("host", dict, name_string, RRDVAR_TYPE_CALCULATED, RRDVAR_FLAG_CUSTOM_HOST_VAR, NULL); - freez(variable); - } - - return rv; + string_freez(name_string); + return rva; } -RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name) { - return rrdvar_custom_variable_create("host", &host->rrdvar_root_index, name); -} +void rrdvar_custom_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) { + if(unlikely(!host->rrdvars || !rva)) return; // when health is not enabled -void rrdvar_custom_host_variable_set(RRDHOST *host, RRDVAR *rv, NETDATA_DOUBLE value) { - if(rv->type != RRDVAR_TYPE_CALCULATED || !(rv->options & RRDVAR_OPTION_CUSTOM_HOST_VAR) || !(rv->options & RRDVAR_OPTION_ALLOCATED)) - error("requested to set variable '%s' to value " NETDATA_DOUBLE_FORMAT " but the variable is not a custom one.", rv->name, value); + if(rrdvar_type(rva) != RRDVAR_TYPE_CALCULATED || !(rrdvar_flags(rva) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_ALLOCATED))) + error("requested to set variable '%s' to value " NETDATA_DOUBLE_FORMAT " but the variable is not a custom one.", rrdvar_name(rva), value); else { + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); NETDATA_DOUBLE *v = rv->value; if(*v != value) { *v = value; - rv->last_updated = now_realtime_sec(); - // if the host is streaming, send this variable upstream immediately - rrdpush_sender_send_this_host_variable_now(host, rv); + rrdpush_sender_send_this_host_variable_now(host, rva); } } } -int foreach_host_variable_callback(RRDHOST *host, int (*callback)(RRDVAR * /*rv*/, void * /*data*/), void *data) { - return avl_traverse_lock(&host->rrdvar_root_index, (int (*)(void *, void *))callback, data); +void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) { + if(unlikely(!dict || !rva)) return; // when health is not enabled + dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva); } // ---------------------------------------------------------------------------- // RRDVAR lookup -NETDATA_DOUBLE rrdvar2number(RRDVAR *rv) { +NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva) { + if(unlikely(!rva)) return NAN; + + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + switch(rv->type) { case RRDVAR_TYPE_CALCULATED: { NETDATA_DOUBLE *n = (NETDATA_DOUBLE *)rv->value; @@ -190,17 +186,17 @@ NETDATA_DOUBLE rrdvar2number(RRDVAR *rv) { case RRDVAR_TYPE_TIME_T: { time_t *n = (time_t *)rv->value; - return *n; + return (NETDATA_DOUBLE)*n; } case RRDVAR_TYPE_COLLECTED: { collected_number *n = (collected_number *)rv->value; - return *n; + return (NETDATA_DOUBLE)*n; } case RRDVAR_TYPE_TOTAL: { total_number *n = (total_number *)rv->value; - return *n; + return (NETDATA_DOUBLE)*n; } case RRDVAR_TYPE_INT: { @@ -214,28 +210,31 @@ NETDATA_DOUBLE rrdvar2number(RRDVAR *rv) { } } -int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, NETDATA_DOUBLE *result) { +int health_variable_lookup(STRING *variable, RRDCALC *rc, NETDATA_DOUBLE *result) { RRDSET *st = rc->rrdset; if(!st) return 0; RRDHOST *host = st->rrdhost; - RRDVAR *rv; + const RRDVAR_ACQUIRED *rva; - rv = rrdvar_index_find(&st->rrdvar_root_index, variable, hash); - if(rv) { - *result = rrdvar2number(rv); + rva = rrdvar_get_and_acquire(st->rrdvars, variable); + if(rva) { + *result = rrdvar2number(rva); + dictionary_acquired_item_release(st->rrdvars, (const DICTIONARY_ITEM *)rva); return 1; } - rv = rrdvar_index_find(&st->rrdfamily->rrdvar_root_index, variable, hash); - if(rv) { - *result = rrdvar2number(rv); + rva = rrdvar_get_and_acquire(rrdfamily_rrdvars_dict(st->rrdfamily), variable); + if(rva) { + *result = rrdvar2number(rva); + dictionary_acquired_item_release(rrdfamily_rrdvars_dict(st->rrdfamily), (const DICTIONARY_ITEM *)rva); return 1; } - rv = rrdvar_index_find(&host->rrdvar_root_index, variable, hash); - if(rv) { - *result = rrdvar2number(rv); + rva = rrdvar_get_and_acquire(host->rrdvars, variable); + if(rva) { + *result = rrdvar2number(rva); + dictionary_acquired_item_release(host->rrdvars, (const DICTIONARY_ITEM *)rva); return 1; } @@ -248,19 +247,19 @@ int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, NET struct variable2json_helper { BUFFER *buf; size_t counter; - RRDVAR_OPTIONS options; + RRDVAR_FLAGS options; }; -static int single_variable2json(void *entry, void *data) { - struct variable2json_helper *helper = (struct variable2json_helper *)data; - RRDVAR *rv = (RRDVAR *)entry; - NETDATA_DOUBLE value = rrdvar2number(rv); +static int single_variable2json_callback(const DICTIONARY_ITEM *item __maybe_unused, void *entry __maybe_unused, void *helper_data) { + struct variable2json_helper *helper = (struct variable2json_helper *)helper_data; + const RRDVAR_ACQUIRED *rva = (const RRDVAR_ACQUIRED *)item; + NETDATA_DOUBLE value = rrdvar2number(rva); - if (helper->options == RRDVAR_OPTION_DEFAULT || rv->options & helper->options) { + if (helper->options == RRDVAR_FLAG_NONE || rrdvar_flags(rva) & helper->options) { if(unlikely(isnan(value) || isinf(value))) - buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": null", helper->counter?",":"", rv->name); + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": null", helper->counter?",":"", rrdvar_name(rva)); else - buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": %0.5" NETDATA_DOUBLE_MODIFIER, helper->counter?",":"", rv->name, (NETDATA_DOUBLE)value); + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": %0.5" NETDATA_DOUBLE_MODIFIER, helper->counter?",":"", rrdvar_name(rva), (NETDATA_DOUBLE)value); helper->counter++; } @@ -272,11 +271,10 @@ void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf) { struct variable2json_helper helper = { .buf = buf, .counter = 0, - .options = RRDVAR_OPTION_CUSTOM_CHART_VAR - }; + .options = RRDVAR_FLAG_CUSTOM_CHART_VAR}; buffer_sprintf(buf, "{"); - avl_traverse_lock(&st->rrdvar_root_index, single_variable2json, (void *)&helper); + rrdvar_walkthrough_read(st->rrdvars, single_variable2json_callback, &helper); buffer_strcat(buf, "\n\t\t\t}"); } @@ -286,20 +284,34 @@ void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf) { struct variable2json_helper helper = { .buf = buf, .counter = 0, - .options = RRDVAR_OPTION_DEFAULT - }; + .options = RRDVAR_FLAG_NONE}; - buffer_sprintf(buf, "{\n\t\"chart\": \"%s\",\n\t\"chart_name\": \"%s\",\n\t\"chart_context\": \"%s\",\n\t\"chart_variables\": {", st->id, st->name, st->context); - avl_traverse_lock(&st->rrdvar_root_index, single_variable2json, (void *)&helper); + buffer_sprintf(buf, "{\n\t\"chart\": \"%s\",\n\t\"chart_name\": \"%s\",\n\t\"chart_context\": \"%s\",\n\t\"chart_variables\": {", rrdset_id(st), rrdset_name(st), rrdset_context(st)); + rrdvar_walkthrough_read(st->rrdvars, single_variable2json_callback, &helper); - buffer_sprintf(buf, "\n\t},\n\t\"family\": \"%s\",\n\t\"family_variables\": {", st->family); + buffer_sprintf(buf, "\n\t},\n\t\"family\": \"%s\",\n\t\"family_variables\": {", rrdset_family(st)); helper.counter = 0; - avl_traverse_lock(&st->rrdfamily->rrdvar_root_index, single_variable2json, (void *)&helper); + rrdvar_walkthrough_read(rrdfamily_rrdvars_dict(st->rrdfamily), single_variable2json_callback, &helper); - buffer_sprintf(buf, "\n\t},\n\t\"host\": \"%s\",\n\t\"host_variables\": {", host->hostname); + buffer_sprintf(buf, "\n\t},\n\t\"host\": \"%s\",\n\t\"host_variables\": {", rrdhost_hostname(host)); helper.counter = 0; - avl_traverse_lock(&host->rrdvar_root_index, single_variable2json, (void *)&helper); + rrdvar_walkthrough_read(host->rrdvars, single_variable2json_callback, &helper); buffer_strcat(buf, "\n\t}\n}\n"); } +// ---------------------------------------------------------------------------- +// RRDVAR private members examination + +const char *rrdvar_name(const RRDVAR_ACQUIRED *rva) { + return dictionary_acquired_item_name((const DICTIONARY_ITEM *)rva); +} + +RRDVAR_FLAGS rrdvar_flags(const RRDVAR_ACQUIRED *rva) { + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + return rv->flags; +} +RRDVAR_TYPE rrdvar_type(const RRDVAR_ACQUIRED *rva) { + RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva); + return rv->type; +} diff --git a/database/rrdvar.h b/database/rrdvar.h index 9074edcdb..a511c732d 100644 --- a/database/rrdvar.h +++ b/database/rrdvar.h @@ -5,62 +5,67 @@ #include "libnetdata/libnetdata.h" -extern int rrdvar_compare(void *a, void *b); - typedef enum rrdvar_type { RRDVAR_TYPE_CALCULATED = 1, RRDVAR_TYPE_TIME_T = 2, RRDVAR_TYPE_COLLECTED = 3, RRDVAR_TYPE_TOTAL = 4, RRDVAR_TYPE_INT = 5 + + // this is 8 bit + // to increase it you have to set change the bitfield in + // rrdvar, rrdsetvar, rrddimvar } RRDVAR_TYPE; typedef enum rrdvar_options { - RRDVAR_OPTION_DEFAULT = 0, - RRDVAR_OPTION_ALLOCATED = (1 << 0), // the value ptr is allocated (not a reference) - RRDVAR_OPTION_CUSTOM_HOST_VAR = (1 << 1), // this is a custom host variable, not associated with a dimension - RRDVAR_OPTION_CUSTOM_CHART_VAR = (1 << 2), // this is a custom chart variable, not associated with a dimension - RRDVAR_OPTION_RRDCALC_LOCAL_VAR = (1 << 3), // this is a an alarm variable, attached to a chart - RRDVAR_OPTION_RRDCALC_FAMILY_VAR = (1 << 4), // this is a an alarm variable, attached to a family - RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR = (1 << 5), // this is a an alarm variable, attached to the host, using the chart id - RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR = (1 << 6), // this is a an alarm variable, attached to the host, using the chart name -} RRDVAR_OPTIONS; - -// the variables as stored in the variables indexes -// there are 3 indexes: -// 1. at each chart (RRDSET.rrdvar_root_index) -// 2. at each context (RRDFAMILY.rrdvar_root_index) -// 3. at each host (RRDHOST.rrdvar_root_index) -struct rrdvar { - avl_t avl; - - char *name; - uint32_t hash; - - RRDVAR_TYPE type; - RRDVAR_OPTIONS options; - - void *value; - - time_t last_updated; -}; + RRDVAR_FLAG_NONE = 0, + RRDVAR_FLAG_ALLOCATED = (1 << 0), // the value ptr is allocated (not a reference) + RRDVAR_FLAG_CUSTOM_HOST_VAR = (1 << 1), // this is a custom host variable, not associated with a dimension + RRDVAR_FLAG_CUSTOM_CHART_VAR = (1 << 2), // this is a custom chart variable, not associated with a dimension + RRDVAR_FLAG_RRDCALC_LOCAL_VAR = (1 << 3), // this is a an alarm variable, attached to a chart + RRDVAR_FLAG_RRDCALC_FAMILY_VAR = (1 << 4), // this is a an alarm variable, attached to a family + RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR = (1 << 5), // this is a an alarm variable, attached to the host, using the chart id + RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR = (1 << 6), // this is a an alarm variable, attached to the host, using the chart name + + // this is 24 bit + // to increase it you have to set change the bitfield in + // rrdvar, rrdsetvar, rrddimvar +} RRDVAR_FLAGS; + +#define RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS \ + (RRDVAR_FLAG_ALLOCATED) + +#define RRDVAR_OPTIONS_REMOVED_WHEN_PROPAGATING_TO_RRDVAR \ + (RRDVAR_FLAG_ALLOCATED) #define RRDVAR_MAX_LENGTH 1024 -extern int rrdvar_fix_name(char *variable); +int rrdvar_fix_name(char *variable); #include "rrd.h" -extern RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name); -extern void rrdvar_custom_host_variable_set(RRDHOST *host, RRDVAR *rv, NETDATA_DOUBLE value); -extern int foreach_host_variable_callback(RRDHOST *host, int (*callback)(RRDVAR *rv, void *data), void *data); -extern void rrdvar_free_remaining_variables(RRDHOST *host, avl_tree_lock *tree_lock); +STRING *rrdvar_name_to_string(const char *name); + +const RRDVAR_ACQUIRED *rrdvar_custom_host_variable_add_and_acquire(RRDHOST *host, const char *name); +void rrdvar_custom_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value); + +int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data); + +#define rrdvar_custom_host_variable_release(host, rva) rrdvar_release((host)->rrdvars, rva) +void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva); + +NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva); + +const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(const char *scope, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value); +void rrdvar_release_and_del(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva); -extern int rrdvar_callback_for_all_host_variables(RRDHOST *host, int (*callback)(void *rrdvar, void *data), void *data); +DICTIONARY *rrdvariables_create(void); +void rrdvariables_destroy(DICTIONARY *dict); -extern NETDATA_DOUBLE rrdvar2number(RRDVAR *rv); +void rrdvar_delete_all(DICTIONARY *dict); -extern RRDVAR *rrdvar_create_and_index(const char *scope, avl_tree_lock *tree, const char *name, RRDVAR_TYPE type, RRDVAR_OPTIONS options, void *value); -extern void rrdvar_free(RRDHOST *host, avl_tree_lock *tree, RRDVAR *rv); +const char *rrdvar_name(const RRDVAR_ACQUIRED *rva); +RRDVAR_FLAGS rrdvar_flags(const RRDVAR_ACQUIRED *rva); +RRDVAR_TYPE rrdvar_type(const RRDVAR_ACQUIRED *rva); #endif //NETDATA_RRDVAR_H diff --git a/database/sqlite/sqlite_aclk.c b/database/sqlite/sqlite_aclk.c index 43b341097..7e3a9b2eb 100644 --- a/database/sqlite/sqlite_aclk.c +++ b/database/sqlite/sqlite_aclk.c @@ -3,59 +3,20 @@ #include "sqlite_functions.h" #include "sqlite_aclk.h" -#include "sqlite_aclk_chart.h" #include "sqlite_aclk_node.h" -#ifdef ENABLE_ACLK -#include "../../aclk/aclk.h" -#endif - void sanity_check(void) { // make sure the compiler will stop on misconfigurations BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < ACLK_MAX_ENUMERATIONS_DEFINED); } const char *aclk_sync_config[] = { - "CREATE TABLE IF NOT EXISTS dimension_delete (dimension_id blob, dimension_name text, chart_type_id text, " - "dim_id blob, chart_id blob, host_id blob, date_created);", - - "CREATE INDEX IF NOT EXISTS ind_h1 ON dimension_delete (host_id);", - - "CREATE TRIGGER IF NOT EXISTS tr_dim_del AFTER DELETE ON dimension BEGIN INSERT INTO dimension_delete " - "(dimension_id, dimension_name, chart_type_id, dim_id, chart_id, host_id, date_created)" - " select old.id, old.name, c.type||\".\"||c.id, old.dim_id, old.chart_id, c.host_id, unixepoch() FROM" - " chart c WHERE c.chart_id = old.chart_id; END;", - - "DELETE FROM dimension_delete WHERE host_id NOT IN" - " (SELECT host_id FROM host) OR unixepoch() - date_created > 604800;", NULL, }; uv_mutex_t aclk_async_lock; struct aclk_database_worker_config *aclk_thread_head = NULL; -int retention_running = 0; - -#ifdef ENABLE_ACLK -static void stop_retention_run() -{ - uv_mutex_lock(&aclk_async_lock); - retention_running = 0; - uv_mutex_unlock(&aclk_async_lock); -} - -static int request_retention_run() -{ - int rc = 0; - uv_mutex_lock(&aclk_async_lock); - if (unlikely(retention_running)) - rc = 1; - else - retention_running = 1; - uv_mutex_unlock(&aclk_async_lock); - return rc; -} -#endif int claimed() { @@ -197,25 +158,6 @@ struct aclk_database_cmd aclk_database_deq_cmd(struct aclk_database_worker_confi return ret; } -int aclk_worker_enq_cmd(char *node_id, struct aclk_database_cmd *cmd) -{ - if (unlikely(!node_id || !cmd)) - return 0; - - uv_mutex_lock(&aclk_async_lock); - struct aclk_database_worker_config *wc = aclk_thread_head; - - while (wc) { - if (!strcmp(wc->node_id, node_id)) - break; - wc = wc->next; - } - uv_mutex_unlock(&aclk_async_lock); - if (wc) - aclk_database_enq_cmd(wc, cmd); - return (wc == NULL); -} - struct aclk_database_worker_config *find_inactive_wc_by_node_id(char *node_id) { if (unlikely(!node_id)) @@ -237,15 +179,14 @@ struct aclk_database_worker_config *find_inactive_wc_by_node_id(char *node_id) void aclk_sync_exit_all() { rrd_rdlock(); - RRDHOST *host = localhost; - while(host) { + RRDHOST *host; + rrdhost_foreach_read(host) { struct aclk_database_worker_config *wc = host->dbsync_worker; if (wc) { wc->is_shutting_down = 1; (void) aclk_database_deq_cmd(wc); uv_cond_signal(&wc->cmd_cond); } - host = host->next; } rrd_unlock(); @@ -304,23 +245,26 @@ static int create_host_callback(void *data, int argc, char **argv, char **column , (const char *) (argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown") , argv[3] ? str2i(argv[IDX_UPDATE_EVERY]) : 1 , argv[13] ? str2i(argv[IDX_ENTRIES]) : 0 - , RRD_MEMORY_MODE_DBENGINE + , default_rrd_memory_mode , 0 // health , 0 // rrdpush enabled , NULL //destination , NULL // api key , NULL // send charts matching + , false // rrdpush_enable_replication + , 0 // rrdpush_seconds_to_replicate + , 0 // rrdpush_replication_step , system_info , 1 ); if (likely(host)) - host->host_labels = sql_load_host_labels((uuid_t *)argv[IDX_HOST_ID]); + host->rrdlabels = sql_load_host_labels((uuid_t *)argv[IDX_HOST_ID]); #ifdef NETDATA_INTERNAL_CHECKS char node_str[UUID_STR_LEN] = "<none>"; if (likely(host->node_id)) uuid_unparse_lower(*host->node_id, node_str); - internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\"", host->hostname, host->machine_guid, node_str); + internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\"", rrdhost_hostname(host), host->machine_guid, node_str); #endif return 0; } @@ -335,7 +279,7 @@ int aclk_start_sync_thread(void *data, int argc, char **argv, char **column) uuid_unparse_lower(*((uuid_t *) argv[0]), uuid_str); - RRDHOST *host = rrdhost_find_by_guid(uuid_str, 0); + RRDHOST *host = rrdhost_find_by_guid(uuid_str); if (host == localhost) return 0; @@ -361,7 +305,7 @@ void sql_aclk_sync_init(void) for (int i = 0; aclk_sync_config[i]; i++) { debug(D_ACLK_SYNC, "Executing %s", aclk_sync_config[i]); - rc = sqlite3_exec(db_meta, aclk_sync_config[i], 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, aclk_sync_config[i], 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error aclk sync initialization setup, rc = %d (%s)", rc, err_msg); error_report("SQLite failed statement %s", aclk_sync_config[i]); @@ -372,18 +316,16 @@ void sql_aclk_sync_init(void) info("SQLite aclk sync initialization completed"); fatal_assert(0 == uv_mutex_init(&aclk_async_lock)); - if (likely(rrdcontext_enabled == CONFIG_BOOLEAN_YES)) { - rc = sqlite3_exec(db_meta, "SELECT host_id, hostname, registry_hostname, update_every, os, " - "timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " - "program_version, entries, health_enabled FROM host WHERE hops >0;", - create_host_callback, NULL, &err_msg); - if (rc != SQLITE_OK) { - error_report("SQLite error when loading archived hosts, rc = %d (%s)", rc, err_msg); - sqlite3_free(err_msg); - } + rc = sqlite3_exec_monitored(db_meta, "SELECT host_id, hostname, registry_hostname, update_every, os, " + "timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " + "program_version, entries, health_enabled FROM host WHERE hops >0;", + create_host_callback, NULL, &err_msg); + if (rc != SQLITE_OK) { + error_report("SQLite error when loading archived hosts, rc = %d (%s)", rc, err_msg); + sqlite3_free(err_msg); } - rc = sqlite3_exec(db_meta, "SELECT ni.host_id, ni.node_id FROM host h, node_instance ni WHERE " + rc = sqlite3_exec_monitored(db_meta, "SELECT ni.host_id, ni.node_id FROM host h, node_instance ni WHERE " "h.host_id = ni.host_id AND ni.node_id IS NOT NULL;", aclk_start_sync_thread, NULL, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error when starting ACLK sync threads, rc = %d (%s)", rc, err_msg); @@ -423,30 +365,6 @@ static void timer_cb(uv_timer_t* handle) } if (aclk_connected) { - if (wc->rotation_after && wc->rotation_after < now) { - cmd.opcode = ACLK_DATABASE_UPD_RETENTION; - if (!aclk_database_enq_cmd_noblock(wc, &cmd)) - wc->rotation_after += ACLK_DATABASE_ROTATION_INTERVAL; - } - - if (wc->chart_updates && !wc->chart_pending && wc->chart_payload_count) { - cmd.opcode = ACLK_DATABASE_PUSH_CHART; - cmd.count = ACLK_MAX_CHART_BATCH; - cmd.param1 = ACLK_MAX_CHART_BATCH_COUNT; - if (!aclk_database_enq_cmd_noblock(wc, &cmd)) { - if (wc->retry_count) - info("Queued chart/dimension payload command %s, retry count = %u", wc->host_guid, wc->retry_count); - wc->chart_pending = 1; - wc->retry_count = 0; - } else { - wc->retry_count++; - if (wc->retry_count % 100 == 0) - error_report("Failed to queue chart/dimension payload command %s, retry count = %u", - wc->host_guid, - wc->retry_count); - } - } - if (wc->alert_updates && !wc->pause_alert_updates) { cmd.opcode = ACLK_DATABASE_PUSH_ALERT; cmd.count = ACLK_MAX_ALERT_UPDATES; @@ -456,52 +374,12 @@ static void timer_cb(uv_timer_t* handle) #endif } - -#ifdef ENABLE_ACLK -void after_send_retention(uv_work_t *req, int status) -{ - struct aclk_database_worker_config *wc = req->data; - (void)status; - stop_retention_run(); - wc->retention_running = 0; - - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = ACLK_DATABASE_DIM_DELETION; - if (aclk_database_enq_cmd_noblock(wc, &cmd)) - info("Failed to queue a dimension deletion message"); - - cmd.opcode = ACLK_DATABASE_NODE_INFO; - if (aclk_database_enq_cmd_noblock(wc, &cmd)) - info("Failed to queue a node update info message"); -} - - -static void send_retention(uv_work_t *req) -{ - struct aclk_database_worker_config *wc = req->data; - - if (unlikely(wc->is_shutting_down)) - return; - - aclk_update_retention(wc); -} -#endif - #define MAX_CMD_BATCH_SIZE (256) void aclk_database_worker(void *arg) { worker_register("ACLKSYNC"); worker_register_job_name(ACLK_DATABASE_NOOP, "noop"); - worker_register_job_name(ACLK_DATABASE_ADD_CHART, "chart add"); - worker_register_job_name(ACLK_DATABASE_ADD_DIMENSION, "dimension add"); - worker_register_job_name(ACLK_DATABASE_PUSH_CHART, "chart push"); - worker_register_job_name(ACLK_DATABASE_PUSH_CHART_CONFIG, "chart conf push"); - worker_register_job_name(ACLK_DATABASE_RESET_CHART, "chart reset"); - worker_register_job_name(ACLK_DATABASE_CHART_ACK, "chart ack"); - worker_register_job_name(ACLK_DATABASE_UPD_RETENTION, "retention check"); - worker_register_job_name(ACLK_DATABASE_DIM_DELETION, "dimension delete"); worker_register_job_name(ACLK_DATABASE_ORPHAN_HOST, "node orphan"); worker_register_job_name(ACLK_DATABASE_ALARM_HEALTH_LOG, "alert log"); worker_register_job_name(ACLK_DATABASE_CLEANUP, "cleanup"); @@ -526,7 +404,7 @@ void aclk_database_worker(void *arg) char threadname[NETDATA_THREAD_NAME_MAX+1]; if (wc->host) - snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", wc->host->hostname); + snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", rrdhost_hostname(wc->host)); else { snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", wc->uuid_str); threadname[11] = '\0'; @@ -556,23 +434,13 @@ void aclk_database_worker(void *arg) timer_req.data = wc; fatal_assert(0 == uv_timer_start(&timer_req, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS)); -// wc->retry_count = 0; wc->node_info_send = 1; -// aclk_add_worker_thread(wc); - info("Starting ACLK sync thread for host %s -- scratch area %lu bytes", wc->host_guid, sizeof(*wc)); + info("Starting ACLK sync thread for host %s -- scratch area %lu bytes", wc->host_guid, (unsigned long int) sizeof(*wc)); memset(&cmd, 0, sizeof(cmd)); -#ifdef ENABLE_ACLK - uv_work_t retention_work; - sql_get_last_chart_sequence(wc); - wc->chart_payload_count = sql_get_pending_count(wc); - if (!wc->chart_payload_count) - info("%s: No pending charts and dimensions detected during startup", wc->host_guid); -#endif wc->startup_time = now_realtime_sec(); wc->cleanup_after = wc->startup_time + ACLK_DATABASE_CLEANUP_FIRST; - wc->rotation_after = wc->startup_time + ACLK_DATABASE_ROTATION_DELAY; debug(D_ACLK_SYNC,"Node %s reports pending message count = %u", wc->node_id, wc->chart_payload_count); @@ -604,6 +472,13 @@ void aclk_database_worker(void *arg) // MAINTENANCE case ACLK_DATABASE_CLEANUP: debug(D_ACLK_SYNC, "Database cleanup for %s", wc->host_guid); + + if (wc->startup_time + ACLK_DATABASE_CLEANUP_FIRST + 2 < now_realtime_sec() && claimed() && aclk_connected) { + cmd.opcode = ACLK_DATABASE_NODE_INFO; + cmd.completion = NULL; + (void) aclk_database_enq_cmd_noblock(wc, &cmd); + } + sql_maint_aclk_sync_database(wc, cmd); if (wc->host == localhost) sql_check_aclk_table_list(wc); @@ -614,33 +489,6 @@ void aclk_database_worker(void *arg) sql_delete_aclk_table_list(wc, cmd); break; -// CHART / DIMENSION OPERATIONS -#ifdef ENABLE_ACLK - case ACLK_DATABASE_ADD_CHART: - debug(D_ACLK_SYNC, "Adding chart event for %s", wc->host_guid); - aclk_add_chart_event(wc, cmd); - break; - case ACLK_DATABASE_ADD_DIMENSION: - debug(D_ACLK_SYNC, "Adding dimension event for %s", wc->host_guid); - aclk_add_dimension_event(wc, cmd); - break; - case ACLK_DATABASE_PUSH_CHART: - debug(D_ACLK_SYNC, "Pushing chart info to the cloud for node %s", wc->host_guid); - aclk_send_chart_event(wc, cmd); - break; - case ACLK_DATABASE_PUSH_CHART_CONFIG: - debug(D_ACLK_SYNC, "Pushing chart config info to the cloud for node %s", wc->host_guid); - aclk_send_chart_config(wc, cmd); - break; - case ACLK_DATABASE_CHART_ACK: - debug(D_ACLK_SYNC, "ACK chart SEQ for %s to %"PRIu64, wc->uuid_str, (uint64_t) cmd.param1); - aclk_receive_chart_ack(wc, cmd); - break; - case ACLK_DATABASE_RESET_CHART: - debug(D_ACLK_SYNC, "RESET chart SEQ for %s to %"PRIu64, wc->uuid_str, (uint64_t) cmd.param1); - aclk_receive_chart_reset(wc, cmd); - break; -#endif // ALERTS case ACLK_DATABASE_PUSH_ALERT_CONFIG: debug(D_ACLK_SYNC,"Pushing chart config info to the cloud for %s", wc->host_guid); @@ -673,27 +521,6 @@ void aclk_database_worker(void *arg) sql_build_node_collectors(wc); break; #ifdef ENABLE_ACLK - case ACLK_DATABASE_DIM_DELETION: - debug(D_ACLK_SYNC,"Sending dimension deletion information %s", wc->uuid_str); - aclk_process_dimension_deletion(wc, cmd); - break; - case ACLK_DATABASE_UPD_RETENTION: - if (unlikely(wc->retention_running)) - break; - - if (unlikely(request_retention_run())) { - wc->rotation_after = now_realtime_sec() + ACLK_DATABASE_RETENTION_RETRY; - break; - } - - debug(D_ACLK_SYNC,"Sending retention info for %s", wc->uuid_str); - retention_work.data = wc; - wc->retention_running = 1; - if (unlikely(uv_queue_work(loop, &retention_work, send_retention, after_send_retention))) { - wc->retention_running = 0; - stop_retention_run(); - } - break; // NODE_INSTANCE DETECTION case ACLK_DATABASE_ORPHAN_HOST: @@ -705,14 +532,14 @@ void aclk_database_worker(void *arg) case ACLK_DATABASE_TIMER: if (unlikely(localhost && !wc->host && !wc->is_orphan)) { if (claimed()) { - wc->host = rrdhost_find_by_guid(wc->host_guid, 0); + wc->host = rrdhost_find_by_guid(wc->host_guid); if (wc->host) { - info("HOST %s (%s) detected as active", wc->host->hostname, wc->host_guid); - snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", wc->host->hostname); + info("HOST %s (%s) detected as active", rrdhost_hostname(wc->host), wc->host_guid); + snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", rrdhost_hostname(wc->host)); uv_thread_set_name_np(wc->thread, threadname); wc->host->dbsync_worker = wc; if (unlikely(!wc->hostname)) - wc->hostname = strdupz(wc->host->hostname); + wc->hostname = strdupz(rrdhost_hostname(wc->host)); aclk_del_worker_thread(wc); wc->node_info_send = 1; } @@ -803,30 +630,6 @@ void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id) BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); - buffer_sprintf(sql, TABLE_ACLK_CHART, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, TABLE_ACLK_CHART_PAYLOAD, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, TABLE_ACLK_CHART_LATEST, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_CHART, uuid_str, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_CHART_LATEST, uuid_str, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, TRIGGER_ACLK_CHART_PAYLOAD, uuid_str, uuid_str, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); db_execute(buffer_tostring(sql)); buffer_flush(sql); @@ -844,16 +647,14 @@ void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id) uuid_unparse_lower(*node_id, wc->node_id); if (likely(host)) { host->dbsync_worker = (void *)wc; - wc->hostname = strdupz(host->hostname); + wc->hostname = strdupz(rrdhost_hostname(host)); } else wc->hostname = get_hostname_by_node_id(wc->node_id); wc->host = host; strcpy(wc->uuid_str, uuid_str); strcpy(wc->host_guid, host_guid); - wc->chart_updates = 0; wc->alert_updates = 0; - wc->retry_count = 0; aclk_database_init_cmd_queue(wc); aclk_add_worker_thread(wc); fatal_assert(0 == uv_thread_create(&(wc->thread), aclk_database_worker, wc)); @@ -873,31 +674,9 @@ void sql_maint_aclk_sync_database(struct aclk_database_worker_config *wc, struct BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); - buffer_sprintf(sql,"DELETE FROM aclk_chart_%s WHERE date_submitted IS NOT NULL AND " - "CAST(date_updated AS INT) < unixepoch()-%d;", wc->uuid_str, ACLK_DELETE_ACK_INTERNAL); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql,"DELETE FROM aclk_chart_payload_%s WHERE unique_id NOT IN " - "(SELECT unique_id FROM aclk_chart_%s) AND unique_id NOT IN (SELECT unique_id FROM aclk_chart_latest_%s);", - wc->uuid_str, wc->uuid_str, wc->uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - buffer_sprintf(sql,"DELETE FROM aclk_alert_%s WHERE date_submitted IS NOT NULL AND " "CAST(date_cloud_ack AS INT) < unixepoch()-%d;", wc->uuid_str, ACLK_DELETE_ACK_ALERTS_INTERNAL); db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql,"UPDATE aclk_chart_%s SET status = NULL, date_submitted=unixepoch() WHERE " - "date_submitted IS NULL AND CAST(date_created AS INT) < unixepoch()-%d;", wc->uuid_str, ACLK_AUTO_MARK_SUBMIT_INTERVAL); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql,"UPDATE aclk_chart_%s SET date_updated = unixepoch() WHERE date_updated IS NULL" - " AND date_submitted IS NOT NULL AND CAST(date_submitted AS INT) < unixepoch()-%d;", - wc->uuid_str, ACLK_AUTO_MARK_UPDATED_INTERVAL); - db_execute(buffer_tostring(sql)); buffer_free(sql); return; @@ -927,7 +706,7 @@ static int is_host_available(uuid_t *host_id) error_report("Failed to bind host_id parameter to select node instance information"); goto failed; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); failed: if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) @@ -980,7 +759,7 @@ void sql_delete_aclk_table_list(struct aclk_database_worker_config *wc, struct a } buffer_flush(sql); - while (sqlite3_step(res) == SQLITE_ROW) + while (sqlite3_step_monitored(res) == SQLITE_ROW) buffer_strcat(sql, (char *) sqlite3_column_text(res, 0)); rc = sqlite3_finalize(res); @@ -1016,42 +795,10 @@ void sql_check_aclk_table_list(struct aclk_database_worker_config *wc) { char *err_msg = NULL; debug(D_ACLK_SYNC,"Cleaning tables for nodes that do not exist"); - int rc = sqlite3_exec(db_meta, SQL_SELECT_ACLK_ACTIVE_LIST, sql_check_aclk_table, (void *) wc, &err_msg); + int rc = sqlite3_exec_monitored(db_meta, SQL_SELECT_ACLK_ACTIVE_LIST, sql_check_aclk_table, (void *) wc, &err_msg); if (rc != SQLITE_OK) { error_report("Query failed when trying to check for obsolete ACLK sync tables, %s", err_msg); sqlite3_free(err_msg); } - db_execute("DELETE FROM dimension_delete WHERE host_id NOT IN (SELECT host_id FROM host) " - " OR unixepoch() - date_created > 604800;"); - return; -} - -void aclk_data_rotated(void) -{ -#ifdef ENABLE_ACLK - - if (!aclk_connected) - return; - - time_t next_rotation_time = now_realtime_sec()+ACLK_DATABASE_ROTATION_DELAY; - rrd_rdlock(); - RRDHOST *this_host = localhost; - while (this_host) { - struct aclk_database_worker_config *wc = this_host->dbsync_worker; - if (wc) - wc->rotation_after = next_rotation_time; - this_host = this_host->next; - } - rrd_unlock(); - - struct aclk_database_worker_config *tmp = aclk_thread_head; - - uv_mutex_lock(&aclk_async_lock); - while (tmp) { - tmp->rotation_after = next_rotation_time; - tmp = tmp->next; - } - uv_mutex_unlock(&aclk_async_lock); -#endif return; } diff --git a/database/sqlite/sqlite_aclk.h b/database/sqlite/sqlite_aclk.h index b73f422e1..06d5d0270 100644 --- a/database/sqlite/sqlite_aclk.h +++ b/database/sqlite/sqlite_aclk.h @@ -5,8 +5,6 @@ #include "sqlite3.h" -// TODO: To be added -#include "../../aclk/schema-wrappers/chart_stream.h" #ifndef ACLK_MAX_CHART_BATCH #define ACLK_MAX_CHART_BATCH (200) @@ -15,15 +13,9 @@ #define ACLK_MAX_CHART_BATCH_COUNT (10) #endif #define ACLK_MAX_ALERT_UPDATES (5) -#define ACLK_DATABASE_CLEANUP_FIRST (60) -#define ACLK_DATABASE_ROTATION_DELAY (180) -#define ACLK_DATABASE_RETENTION_RETRY (60) +#define ACLK_DATABASE_CLEANUP_FIRST (1200) #define ACLK_DATABASE_CLEANUP_INTERVAL (3600) -#define ACLK_DATABASE_ROTATION_INTERVAL (3600) -#define ACLK_DELETE_ACK_INTERNAL (600) #define ACLK_DELETE_ACK_ALERTS_INTERNAL (86400) -#define ACLK_AUTO_MARK_SUBMIT_INTERVAL (3600) -#define ACLK_AUTO_MARK_UPDATED_INTERVAL (1800) #define ACLK_SYNC_QUERY_SIZE 512 struct aclk_completion { @@ -74,57 +66,14 @@ static inline void uuid_unparse_lower_fix(uuid_t *uuid, char *out) out[23] = '_'; } -static inline char *get_str_from_uuid(uuid_t *uuid) -{ - char uuid_str[GUID_LEN + 1]; - if (unlikely(!uuid)) { - uuid_t zero_uuid; - uuid_clear(zero_uuid); - uuid_unparse_lower(zero_uuid, uuid_str); - } - else - uuid_unparse_lower(*uuid, uuid_str); - return strdupz(uuid_str); -} - -#define TABLE_ACLK_CHART "CREATE TABLE IF NOT EXISTS aclk_chart_%s (sequence_id INTEGER PRIMARY KEY, " \ - "date_created, date_updated, date_submitted, status, uuid, type, unique_id, " \ - "update_count default 1, unique(uuid, status));" - -#define TABLE_ACLK_CHART_PAYLOAD "CREATE TABLE IF NOT EXISTS aclk_chart_payload_%s (unique_id BLOB PRIMARY KEY, " \ - "uuid, claim_id, type, date_created, payload);" - -#define TABLE_ACLK_CHART_LATEST "CREATE TABLE IF NOT EXISTS aclk_chart_latest_%s (uuid BLOB PRIMARY KEY, " \ - "unique_id, date_submitted);" - -#define TRIGGER_ACLK_CHART_PAYLOAD "CREATE TRIGGER IF NOT EXISTS aclk_tr_chart_payload_%s " \ - "after insert on aclk_chart_payload_%s " \ - "begin insert into aclk_chart_%s (uuid, unique_id, type, status, date_created) values " \ - " (new.uuid, new.unique_id, new.type, 'pending', unixepoch()) on conflict(uuid, status) " \ - " do update set unique_id = new.unique_id, update_count = update_count + 1; " \ - "end;" - #define TABLE_ACLK_ALERT "CREATE TABLE IF NOT EXISTS aclk_alert_%s (sequence_id INTEGER PRIMARY KEY, " \ - "alert_unique_id, date_created, date_submitted, date_cloud_ack, " \ + "alert_unique_id, date_created, date_submitted, date_cloud_ack, filtered_alert_unique_id NOT NULL, " \ "unique(alert_unique_id));" -#define INDEX_ACLK_CHART "CREATE INDEX IF NOT EXISTS aclk_chart_index_%s ON aclk_chart_%s (unique_id);" - -#define INDEX_ACLK_CHART_LATEST "CREATE INDEX IF NOT EXISTS aclk_chart_latest_index_%s ON aclk_chart_latest_%s (unique_id);" - #define INDEX_ACLK_ALERT "CREATE INDEX IF NOT EXISTS aclk_alert_index_%s ON aclk_alert_%s (alert_unique_id);" - enum aclk_database_opcode { ACLK_DATABASE_NOOP = 0, - ACLK_DATABASE_ADD_CHART, - ACLK_DATABASE_ADD_DIMENSION, - ACLK_DATABASE_PUSH_CHART, - ACLK_DATABASE_PUSH_CHART_CONFIG, - ACLK_DATABASE_RESET_CHART, - ACLK_DATABASE_CHART_ACK, - ACLK_DATABASE_UPD_RETENTION, - ACLK_DATABASE_DIM_DELETION, ACLK_DATABASE_ORPHAN_HOST, ACLK_DATABASE_ALARM_HEALTH_LOG, ACLK_DATABASE_CLEANUP, @@ -142,20 +91,11 @@ enum aclk_database_opcode { ACLK_MAX_ENUMERATIONS_DEFINED }; -struct aclk_chart_payload_t { - long sequence_id; - long last_sequence_id; - char *payload; - struct aclk_chart_payload_t *next; -}; - - struct aclk_database_cmd { enum aclk_database_opcode opcode; void *data; void *data_param; int count; - uint64_t param1; struct aclk_completion *completion; }; @@ -172,12 +112,8 @@ struct aclk_database_worker_config { char node_id[GUID_LEN + 1]; char host_guid[GUID_LEN + 1]; char *hostname; // hostname to avoid constant lookups - uint64_t chart_sequence_id; // last chart_sequence_id - time_t chart_timestamp; // last chart timestamp time_t cleanup_after; // Start a cleanup after this timestamp time_t startup_time; // When the sync thread started - time_t rotation_after; - uint64_t batch_id; // batch id to use uint64_t alerts_batch_id; // batch id for alerts to use uint64_t alerts_start_seq_id; // cloud has asked to start streaming from uint64_t alert_sequence_id; // last alert sequence_id @@ -193,15 +129,9 @@ struct aclk_database_worker_config { uv_cond_t cmd_cond; volatile unsigned queue_size; struct aclk_database_cmdqueue cmd_queue; - uint32_t retry_count; - int chart_updates; int alert_updates; - time_t batch_created; int node_info_send; time_t node_collectors_send; - int chart_pending; - int chart_reset_count; - int retention_running; volatile unsigned is_shutting_down; volatile unsigned is_orphan; struct aclk_database_worker_config *next; @@ -216,23 +146,25 @@ static inline RRDHOST *find_host_by_node_id(char *node_id) if (uuid_parse(node_id, node_uuid)) return NULL; - RRDHOST *host = localhost; - while(host) { - if (host->node_id && !(uuid_compare(*host->node_id, node_uuid))) - return host; - host = host->next; + rrd_rdlock(); + RRDHOST *host, *ret = NULL; + rrdhost_foreach_read(host) { + if (host->node_id && !(uuid_compare(*host->node_id, node_uuid))) { + ret = host; + break; + } } - return NULL; + rrd_unlock(); + + return ret; } extern sqlite3 *db_meta; -extern int aclk_database_enq_cmd_noblock(struct aclk_database_worker_config *wc, struct aclk_database_cmd *cmd); -extern void aclk_database_enq_cmd(struct aclk_database_worker_config *wc, struct aclk_database_cmd *cmd); -extern void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id); -int aclk_worker_enq_cmd(char *node_id, struct aclk_database_cmd *cmd); -void aclk_data_rotated(void); +int aclk_database_enq_cmd_noblock(struct aclk_database_worker_config *wc, struct aclk_database_cmd *cmd); +void aclk_database_enq_cmd(struct aclk_database_worker_config *wc, struct aclk_database_cmd *cmd); +void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id); void sql_aclk_sync_init(void); void sql_check_aclk_table_list(struct aclk_database_worker_config *wc); void sql_delete_aclk_table_list(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c index ea1cc9fea..47663a8d1 100644 --- a/database/sqlite/sqlite_aclk_alert.c +++ b/database/sqlite/sqlite_aclk_alert.c @@ -24,7 +24,7 @@ time_t removed_when(uint32_t alarm_id, uint32_t before_unique_id, uint32_t after return 0; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW)) { when = (time_t) sqlite3_column_int64(res, 0); } @@ -36,7 +36,14 @@ time_t removed_when(uint32_t alarm_id, uint32_t before_unique_id, uint32_t after return when; } -#define MAX_REMOVED_PERIOD 900 +void update_filtered(ALARM_ENTRY *ae, uint32_t unique_id, char *uuid_str) { + char sql[ACLK_SYNC_QUERY_SIZE]; + snprintfz(sql, ACLK_SYNC_QUERY_SIZE-1, "UPDATE aclk_alert_%s SET filtered_alert_unique_id = %u where filtered_alert_unique_id = %u", uuid_str, ae->unique_id, unique_id); + sqlite3_exec_monitored(db_meta, sql, 0, 0, NULL); + ae->flags |= HEALTH_ENTRY_FLAG_ACLK_QUEUED; +} + +#define MAX_REMOVED_PERIOD 86400 //decide if some events should be sent or not int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) { @@ -56,12 +63,13 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) uuid_t config_hash_id; RRDCALC_STATUS status; uint32_t unique_id; - + //get the previous sent event of this alarm_id + //base the search on the last filtered event snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "select hl.new_status, hl.config_hash_id, hl.unique_id from health_log_%s hl, aclk_alert_%s aa \ - where hl.unique_id = aa.alert_unique_id \ - and hl.alarm_id = %u and hl.unique_id <> %u \ - order by alarm_event_id desc LIMIT 1;", uuid_str, uuid_str, ae->alarm_id, ae->unique_id); + where hl.unique_id = aa.filtered_alert_unique_id \ + and hl.alarm_id = %u \ + order by alarm_event_id desc LIMIT 1;", uuid_str, uuid_str, ae->alarm_id); rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0); if (rc != SQLITE_OK) { @@ -70,7 +78,7 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) return send; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW)) { status = (RRDCALC_STATUS) sqlite3_column_int(res, 0); if (sqlite3_column_type(res, 1) != SQLITE_NULL) @@ -93,8 +101,9 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) } //same status, same config - if (ae->new_status == RRDCALC_STATUS_CLEAR) { + if (ae->new_status == RRDCALC_STATUS_CLEAR || ae->new_status == RRDCALC_STATUS_UNDEFINED) { send = 0; + update_filtered(ae, unique_id, uuid_str); goto done; } @@ -107,6 +116,7 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) goto done; } else { send = 0; + update_filtered(ae, unique_id, uuid_str); goto done; } } @@ -130,6 +140,8 @@ int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter) return 0; } + CHECK_SQLITE_CONNECTION(db_meta); + if (!skip_filter) { if (!should_send_to_cloud(host, ae)) { return 0; @@ -137,9 +149,6 @@ int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter) } int rc = 0; - - CHECK_SQLITE_CONNECTION(db_meta); - sqlite3_stmt *res_alert = NULL; char uuid_str[GUID_LEN + 1]; uuid_unparse_lower_fix(&host->host_uuid, uuid_str); @@ -148,8 +157,8 @@ int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter) buffer_sprintf( sql, - "INSERT INTO aclk_alert_%s (alert_unique_id, date_created) " - "VALUES (@alert_unique_id, unixepoch()) on conflict (alert_unique_id) do nothing; ", + "INSERT INTO aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " + "VALUES (@alert_unique_id, unixepoch(), @alert_unique_id) on conflict (alert_unique_id) do nothing; ", uuid_str); rc = sqlite3_prepare_v2(db_meta, buffer_tostring(sql), -1, &res_alert, 0); @@ -220,7 +229,7 @@ void aclk_push_alert_event(struct aclk_database_worker_config *wc, struct aclk_d int rc; if (unlikely(!wc->alert_updates)) { - log_access("ACLK STA [%s (%s)]: Ignoring alert push event, updates have been turned off for this node.", wc->node_id, wc->host ? wc->host->hostname : "N/A"); + log_access("ACLK STA [%s (%s)]: Ignoring alert push event, updates have been turned off for this node.", wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A"); return; } @@ -280,7 +289,7 @@ void aclk_push_alert_event(struct aclk_database_worker_config *wc, struct aclk_d static __thread uint64_t log_first_sequence_id = 0; static __thread uint64_t log_last_sequence_id = 0; - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { struct alarm_log_entry alarm_log; char old_value_string[100 + 1]; char new_value_string[100 + 1]; @@ -300,9 +309,9 @@ void aclk_push_alert_event(struct aclk_database_worker_config *wc, struct aclk_d alarm_log.config_hash = strdupz((char *)uuid_str); alarm_log.utc_offset = wc->host->utc_offset; - alarm_log.timezone = strdupz((char *)wc->host->abbrev_timezone); + alarm_log.timezone = strdupz(rrdhost_abbrev_timezone(wc->host)); alarm_log.exec_path = sqlite3_column_bytes(res, 14) > 0 ? strdupz((char *)sqlite3_column_text(res, 14)) : - strdupz((char *)wc->host->health_default_exec); + strdupz((char *)string2str(wc->host->health_default_exec)); alarm_log.conf_source = strdupz((char *)sqlite3_column_text(res, 16)); char *edit_command = sqlite3_column_bytes(res, 16) > 0 ? @@ -374,7 +383,7 @@ void aclk_push_alert_event(struct aclk_database_worker_config *wc, struct aclk_d log_access( "ACLK RES [%s (%s)]: ALERTS SENT from %" PRIu64 " to %" PRIu64 " batch=%" PRIu64, wc->node_id, - wc->host ? wc->host->hostname : "N/A", + wc->host ? rrdhost_hostname(wc->host) : "N/A", log_first_sequence_id, log_last_sequence_id, wc->alerts_batch_id); @@ -401,8 +410,8 @@ void sql_queue_existing_alerts_to_aclk(RRDHOST *host) BUFFER *sql = buffer_create(1024); buffer_sprintf(sql,"delete from aclk_alert_%s; " \ - "insert into aclk_alert_%s (alert_unique_id, date_created) " \ - "select unique_id alert_unique_id, unixepoch() from health_log_%s " \ + "insert into aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " \ + "select unique_id alert_unique_id, unixepoch(), unique_id alert_unique_id from health_log_%s " \ "where new_status <> 0 and new_status <> -2 and config_hash_id is not null and updated_by_id = 0 " \ "order by unique_id asc on conflict (alert_unique_id) do nothing;", uuid_str, uuid_str, uuid_str); @@ -424,9 +433,7 @@ void aclk_send_alarm_health_log(char *node_id) struct aclk_database_worker_config *wc = find_inactive_wc_by_node_id(node_id); if (likely(!wc)) { - rrd_rdlock(); RRDHOST *host = find_host_by_node_id(node_id); - rrd_unlock(); if (likely(host)) wc = (struct aclk_database_worker_config *)host->dbsync_worker; } @@ -460,9 +467,7 @@ void aclk_push_alarm_health_log(struct aclk_database_worker_config *wc, struct a RRDHOST *host = wc->host; if (unlikely(!host)) { - rrd_rdlock(); host = find_host_by_node_id(wc->node_id); - rrd_unlock(); if (unlikely(!host)) { log_access( @@ -500,7 +505,7 @@ void aclk_push_alarm_health_log(struct aclk_database_worker_config *wc, struct a last_timestamp.tv_sec = 0; last_timestamp.tv_usec = 0; - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { first_sequence = sqlite3_column_bytes(res, 0) > 0 ? (uint64_t) sqlite3_column_int64(res, 0) : 0; if (sqlite3_column_bytes(res, 1) > 0) { first_timestamp.tv_sec = sqlite3_column_int64(res, 1); @@ -536,8 +541,6 @@ void aclk_push_alarm_health_log(struct aclk_database_worker_config *wc, struct a freez(claim_id); buffer_free(sql); - - aclk_alert_reloaded = 1; #endif return; @@ -554,7 +557,7 @@ void aclk_send_alarm_configuration(char *config_hash) return; } - log_access("ACLK REQ [%s (%s)]: Request to send alert config %s.", wc->node_id, wc->host ? wc->host->hostname : "N/A", config_hash); + log_access("ACLK REQ [%s (%s)]: Request to send alert config %s.", wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A", config_hash); struct aclk_database_cmd cmd; memset(&cmd, 0, sizeof(cmd)); @@ -603,7 +606,7 @@ int aclk_push_alert_config_event(struct aclk_database_worker_config *wc, struct struct provide_alarm_configuration p_alarm_config; p_alarm_config.cfg_hash = NULL; - if (sqlite3_step(res) == SQLITE_ROW) { + if (sqlite3_step_monitored(res) == SQLITE_ROW) { alarm_config.alarm = sqlite3_column_bytes(res, 0) > 0 ? strdupz((char *)sqlite3_column_text(res, 0)) : NULL; alarm_config.tmpl = sqlite3_column_bytes(res, 1) > 0 ? strdupz((char *)sqlite3_column_text(res, 1)) : NULL; @@ -664,14 +667,14 @@ int aclk_push_alert_config_event(struct aclk_database_worker_config *wc, struct } if (likely(p_alarm_config.cfg_hash)) { - log_access("ACLK RES [%s (%s)]: Sent alert config %s.", wc->node_id, wc->host ? wc->host->hostname : "N/A", config_hash); + log_access("ACLK RES [%s (%s)]: Sent alert config %s.", wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A", config_hash); aclk_send_provide_alarm_cfg(&p_alarm_config); freez((char *) cmd.data_param); freez(p_alarm_config.cfg_hash); destroy_aclk_alarm_configuration(&alarm_config); } else - log_access("ACLK STA [%s (%s)]: Alert config for %s not found.", wc->node_id, wc->host ? wc->host->hostname : "N/A", config_hash); + log_access("ACLK STA [%s (%s)]: Alert config for %s not found.", wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A", config_hash); bind_fail: rc = sqlite3_finalize(res); @@ -697,9 +700,7 @@ void aclk_start_alert_streaming(char *node_id, uint64_t batch_id, uint64_t start return; struct aclk_database_worker_config *wc = NULL; - rrd_rdlock(); RRDHOST *host = find_host_by_node_id(node_id); - rrd_unlock(); if (likely(host)) { wc = (struct aclk_database_worker_config *)host->dbsync_worker ? (struct aclk_database_worker_config *)host->dbsync_worker : @@ -716,7 +717,7 @@ void aclk_start_alert_streaming(char *node_id, uint64_t batch_id, uint64_t start wc = (struct aclk_database_worker_config *)find_inactive_wc_by_node_id(node_id); if (likely(wc)) { - log_access("ACLK REQ [%s (%s)]: ALERTS STREAM from %"PRIu64" batch=%"PRIu64, node_id, wc->host ? wc->host->hostname : "N/A", start_seq_id, batch_id); + log_access("ACLK REQ [%s (%s)]: ALERTS STREAM from %"PRIu64" batch=%"PRIu64, node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A", start_seq_id, batch_id); __sync_synchronize(); wc->alerts_batch_id = batch_id; wc->alerts_start_seq_id = start_seq_id; @@ -736,15 +737,15 @@ void sql_process_queue_removed_alerts_to_aclk(struct aclk_database_worker_config BUFFER *sql = buffer_create(1024); - buffer_sprintf(sql,"insert into aclk_alert_%s (alert_unique_id, date_created) " \ - "select unique_id alert_unique_id, unixepoch() from health_log_%s " \ + buffer_sprintf(sql,"insert into aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " \ + "select unique_id alert_unique_id, unixepoch(), unique_id alert_unique_id from health_log_%s " \ "where new_status = -2 and updated_by_id = 0 and unique_id not in " \ "(select alert_unique_id from aclk_alert_%s) order by unique_id asc " \ "on conflict (alert_unique_id) do nothing;", wc->uuid_str, wc->uuid_str, wc->uuid_str); db_execute(buffer_tostring(sql)); - log_access("ACLK STA [%s (%s)]: QUEUED REMOVED ALERTS", wc->node_id, wc->host ? wc->host->hostname : "N/A"); + log_access("ACLK STA [%s (%s)]: QUEUED REMOVED ALERTS", wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A"); buffer_free(sql); @@ -780,17 +781,15 @@ void aclk_process_send_alarm_snapshot(char *node_id, char *claim_id, uint64_t sn return; struct aclk_database_worker_config *wc = NULL; - rrd_rdlock(); RRDHOST *host = find_host_by_node_id(node_id); if (likely(host)) wc = (struct aclk_database_worker_config *)host->dbsync_worker; - rrd_unlock(); if (likely(wc)) { log_access( "IN [%s (%s)]: Request to send alerts snapshot, snapshot_id %" PRIu64 " and ack_sequence_id %" PRIu64, wc->node_id, - wc->host ? wc->host->hostname : "N/A", + wc->host ? rrdhost_hostname(wc->host) : "N/A", snapshot_id, sequence_id); if (wc->alerts_snapshot_id == snapshot_id) @@ -831,13 +830,13 @@ void aclk_mark_alert_cloud_ack(char *uuid_str, uint64_t alerts_ack_sequence_id) #ifdef ENABLE_ACLK void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_ENTRY *ae, RRDHOST *host) { - char *edit_command = ae->source ? health_edit_command_from_source(ae->source) : strdupz("UNKNOWN=0=UNKNOWN"); + char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN"); char config_hash_id[GUID_LEN + 1]; uuid_unparse_lower(ae->config_hash_id, config_hash_id); - alarm_log->chart = strdupz((char *)ae->chart); - alarm_log->name = strdupz((char *)ae->name); - alarm_log->family = strdupz((char *)ae->family); + alarm_log->chart = strdupz(ae_chart_name(ae)); + alarm_log->name = strdupz(ae_name(ae)); + alarm_log->family = strdupz(ae_family(ae)); alarm_log->batch_id = 0; alarm_log->sequence_id = 0; @@ -846,9 +845,9 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN alarm_log->config_hash = strdupz((char *)config_hash_id); alarm_log->utc_offset = host->utc_offset; - alarm_log->timezone = strdupz((char *)host->abbrev_timezone); - alarm_log->exec_path = ae->exec ? strdupz((char *)ae->exec) : strdupz((char *)host->health_default_exec); - alarm_log->conf_source = ae->source ? strdupz((char *)ae->source) : strdupz((char *)""); + alarm_log->timezone = strdupz(rrdhost_abbrev_timezone(host)); + alarm_log->exec_path = ae->exec ? strdupz(ae_exec(ae)) : strdupz((char *)string2str(host->health_default_exec)); + alarm_log->conf_source = ae->source ? strdupz(ae_source(ae)) : strdupz((char *)""); alarm_log->command = strdupz((char *)edit_command); @@ -861,31 +860,31 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN alarm_log->last_repeat = (time_t)ae->last_repeat; alarm_log->silenced = - ((ae->flags & HEALTH_ENTRY_FLAG_SILENCED) || (ae->recipient && !strncmp((char *)ae->recipient, "silent", 6))) ? + ((ae->flags & HEALTH_ENTRY_FLAG_SILENCED) || (ae->recipient && !strncmp(ae_recipient(ae), "silent", 6))) ? 1 : 0; - alarm_log->value_string = strdupz(ae->new_value_string); - alarm_log->old_value_string = strdupz(ae->old_value_string); + alarm_log->value_string = strdupz(ae_new_value_string(ae)); + alarm_log->old_value_string = strdupz(ae_old_value_string(ae)); alarm_log->value = (!isnan(ae->new_value)) ? (NETDATA_DOUBLE)ae->new_value : 0; alarm_log->old_value = (!isnan(ae->old_value)) ? (NETDATA_DOUBLE)ae->old_value : 0; alarm_log->updated = (ae->flags & HEALTH_ENTRY_FLAG_UPDATED) ? 1 : 0; - alarm_log->rendered_info = ae->info ? strdupz(ae->info) : strdupz((char *)""); - alarm_log->chart_context = ae->chart_context ? strdupz(ae->chart_context) : strdupz((char *)""); + alarm_log->rendered_info = strdupz(ae_info(ae)); + alarm_log->chart_context = strdupz(ae_chart_context(ae)); freez(edit_command); } #endif #ifdef ENABLE_ACLK -static int have_recent_alarm(RRDHOST *host, uint32_t alarm_id, time_t mark) +static int have_recent_alarm(RRDHOST *host, uint32_t alarm_id, uint32_t mark) { ALARM_ENTRY *ae = host->health_log.alarms; while (ae) { - if (ae->alarm_id == alarm_id && ae->unique_id > mark && + if (ae->alarm_id == alarm_id && ae->unique_id >mark && (ae->new_status != RRDCALC_STATUS_WARNING && ae->new_status != RRDCALC_STATUS_CRITICAL)) return 1; ae = ae->next; @@ -905,7 +904,7 @@ void aclk_push_alert_snapshot_event(struct aclk_database_worker_config *wc, stru UNUSED(cmd); // we perhaps we don't need this for snapshots if (unlikely(!wc->alert_updates)) { - log_access("ACLK STA [%s (%s)]: Ignoring alert snapshot event, updates have been turned off for this node.", wc->node_id, wc->host ? wc->host->hostname : "N/A"); + log_access("ACLK STA [%s (%s)]: Ignoring alert snapshot event, updates have been turned off for this node.", wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A"); return; } @@ -921,7 +920,7 @@ void aclk_push_alert_snapshot_event(struct aclk_database_worker_config *wc, stru if (unlikely(!claim_id)) return; - log_access("ACLK REQ [%s (%s)]: Sending alerts snapshot, snapshot_id %" PRIu64, wc->node_id, wc->host ? wc->host->hostname : "N/A", wc->alerts_snapshot_id); + log_access("ACLK REQ [%s (%s)]: Sending alerts snapshot, snapshot_id %" PRIu64, wc->node_id, wc->host ? rrdhost_hostname(wc->host) : "N/A", wc->alerts_snapshot_id); aclk_mark_alert_cloud_ack(wc->uuid_str, wc->alerts_ack_sequence_id); @@ -1025,11 +1024,11 @@ void sql_aclk_alert_clean_dead_entries(RRDHOST *host) BUFFER *sql = buffer_create(1024); - buffer_sprintf(sql,"delete from aclk_alert_%s where alert_unique_id not in " + buffer_sprintf(sql,"delete from aclk_alert_%s where filtered_alert_unique_id not in " " (select unique_id from health_log_%s); ", uuid_str, uuid_str); char *err_msg = NULL; - int rc = sqlite3_exec(db_meta, buffer_tostring(sql), NULL, NULL, &err_msg); + int rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), NULL, NULL, &err_msg); if (rc != SQLITE_OK) { error_report("Failed when trying to clean stale ACLK alert entries from aclk_alert_%s, error message \"%s""", uuid_str, err_msg); @@ -1064,7 +1063,7 @@ int get_proto_alert_status(RRDHOST *host, struct proto_alert_status *proto_alert return 1; } - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { proto_alert_status->pending_min_sequence_id = sqlite3_column_bytes(res, 0) > 0 ? (uint64_t) sqlite3_column_int64(res, 0) : 0; proto_alert_status->pending_max_sequence_id = sqlite3_column_bytes(res, 1) > 0 ? (uint64_t) sqlite3_column_int64(res, 1) : 0; proto_alert_status->last_acked_sequence_id = sqlite3_column_bytes(res, 2) > 0 ? (uint64_t) sqlite3_column_int64(res, 2) : 0; diff --git a/database/sqlite/sqlite_aclk_alert.h b/database/sqlite/sqlite_aclk_alert.h index 0181b4842..88a939e87 100644 --- a/database/sqlite/sqlite_aclk_alert.h +++ b/database/sqlite/sqlite_aclk_alert.h @@ -26,6 +26,6 @@ void sql_process_queue_removed_alerts_to_aclk(struct aclk_database_worker_config void aclk_push_alert_snapshot_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); void aclk_process_send_alarm_snapshot(char *node_id, char *claim_id, uint64_t snapshot_id, uint64_t sequence_id); int get_proto_alert_status(RRDHOST *host, struct proto_alert_status *proto_alert_status); -extern int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter); +int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter); #endif //NETDATA_SQLITE_ACLK_ALERT_H diff --git a/database/sqlite/sqlite_aclk_chart.c b/database/sqlite/sqlite_aclk_chart.c deleted file mode 100644 index c1db60c49..000000000 --- a/database/sqlite/sqlite_aclk_chart.c +++ /dev/null @@ -1,1311 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "sqlite_functions.h" -#include "sqlite_aclk_chart.h" - -#ifdef ENABLE_ACLK -#include "../../aclk/aclk_charts_api.h" -#include "../../aclk/aclk.h" - -static inline int -sql_queue_chart_payload(struct aclk_database_worker_config *wc, void *data, enum aclk_database_opcode opcode) -{ - int rc; - if (unlikely(!wc)) - return 1; - - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = opcode; - cmd.data = data; - rc = aclk_database_enq_cmd_noblock(wc, &cmd); - return rc; -} - -static time_t payload_sent(char *uuid_str, uuid_t *uuid, void *payload, size_t payload_size) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - time_t send_status = 0; - - if (unlikely(!res)) { - char sql[ACLK_SYNC_QUERY_SIZE]; - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "SELECT acl.date_submitted FROM aclk_chart_latest_%s acl, aclk_chart_payload_%s acp " - "WHERE acl.unique_id = acp.unique_id AND acl.uuid = @uuid AND acp.payload = @payload;", - uuid_str, uuid_str); - rc = prepare_statement(db_meta, sql, &res); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to check payload data on %s", sql); - return 0; - } - } - - rc = sqlite3_bind_blob(res, 1, uuid, sizeof(*uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_blob(res, 2, payload, payload_size, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - while (sqlite3_step(res) == SQLITE_ROW) { - send_status = (time_t) sqlite3_column_int64(res, 0); - } - -bind_fail: - if (unlikely(sqlite3_reset(res) != SQLITE_OK)) - error_report("Failed to reset statement in check payload, rc = %d", rc); - return send_status; -} - -static int aclk_add_chart_payload( - struct aclk_database_worker_config *wc, - uuid_t *uuid, - char *claim_id, - ACLK_PAYLOAD_TYPE payload_type, - void *payload, - size_t payload_size, - time_t *send_status, - int check_sent) -{ - static __thread sqlite3_stmt *res_chart = NULL; - int rc; - time_t date_submitted; - - if (unlikely(!payload)) - return 0; - - if (check_sent) { - date_submitted = payload_sent(wc->uuid_str, uuid, payload, payload_size); - if (send_status) - *send_status = date_submitted; - if (date_submitted) - return 0; - } - - if (unlikely(!res_chart)) { - char sql[ACLK_SYNC_QUERY_SIZE]; - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, - "INSERT INTO aclk_chart_payload_%s (unique_id, uuid, claim_id, date_created, type, payload) " \ - "VALUES (@unique_id, @uuid, @claim_id, unixepoch(), @type, @payload);", wc->uuid_str); - rc = prepare_statement(db_meta, sql, &res_chart); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to store chart payload data"); - return 1; - } - } - - uuid_t unique_uuid; - uuid_generate(unique_uuid); - - uuid_t claim_uuid; - if (uuid_parse(claim_id, claim_uuid)) - return 1; - - rc = sqlite3_bind_blob(res_chart, 1, &unique_uuid, sizeof(unique_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_blob(res_chart, 2, uuid, sizeof(*uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_blob(res_chart, 3, &claim_uuid, sizeof(claim_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res_chart, 4, payload_type); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_blob(res_chart, 5, payload, payload_size, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = execute_insert(res_chart); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed store chart payload event, rc = %d", rc); - else { - wc->chart_payload_count++; - time_t now = now_realtime_sec(); - if (wc->rotation_after > now && wc->rotation_after < now + ACLK_DATABASE_ROTATION_DELAY) - wc->rotation_after = now + ACLK_DATABASE_ROTATION_DELAY; - } - -bind_fail: - if (unlikely(sqlite3_reset(res_chart) != SQLITE_OK)) - error_report("Failed to reset statement in store chart payload, rc = %d", rc); - return (rc != SQLITE_DONE); -} - -int aclk_add_chart_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - int rc = 0; - CHECK_SQLITE_CONNECTION(db_meta); - - char *claim_id = get_agent_claimid(); - - RRDSET *st = cmd.data; - - if (likely(claim_id)) { - struct chart_instance_updated chart_payload; - memset(&chart_payload, 0, sizeof(chart_payload)); - chart_payload.config_hash = get_str_from_uuid(&st->state->hash_id); - chart_payload.update_every = st->update_every; - chart_payload.memory_mode = st->rrd_memory_mode; - chart_payload.name = (char *)st->name; - chart_payload.node_id = wc->node_id; - chart_payload.claim_id = claim_id; - chart_payload.id = strdupz(st->id); - - chart_payload.chart_labels = rrdlabels_create(); - rrdlabels_copy(chart_payload.chart_labels, st->state->chart_labels); - - size_t size; - char *payload = generate_chart_instance_updated(&size, &chart_payload); - if (likely(payload)) - rc = aclk_add_chart_payload(wc, st->chart_uuid, claim_id, ACLK_PAYLOAD_CHART, (void *) payload, size, NULL, 1); - freez(payload); - chart_instance_updated_destroy(&chart_payload); - } - return rc; -} - -static inline int aclk_upd_dimension_event(struct aclk_database_worker_config *wc, char *claim_id, uuid_t *dim_uuid, - const char *dim_id, const char *dim_name, const char *chart_type_id, time_t first_time, time_t last_time, - time_t *send_status) -{ - int rc = 0; - size_t size; - - if (unlikely(!dim_uuid || !dim_id || !dim_name || !chart_type_id)) - return 0; - - struct chart_dimension_updated dim_payload; - memset(&dim_payload, 0, sizeof(dim_payload)); - -#ifdef NETDATA_INTERNAL_CHECKS - if (!first_time) - info("Host %s (node %s) deleting dimension id=[%s] name=[%s] chart=[%s]", - wc->host_guid, wc->node_id, dim_id, dim_name, chart_type_id); - if (last_time) - info("Host %s (node %s) stopped collecting dimension id=[%s] name=[%s] chart=[%s] %ld seconds ago at %ld", - wc->host_guid, wc->node_id, dim_id, dim_name, chart_type_id, now_realtime_sec() - last_time, last_time); -#endif - - dim_payload.node_id = wc->node_id; - dim_payload.claim_id = claim_id; - dim_payload.name = dim_name; - dim_payload.id = dim_id; - dim_payload.chart_id = chart_type_id; - dim_payload.created_at.tv_sec = first_time; - dim_payload.last_timestamp.tv_sec = last_time; - char *payload = generate_chart_dimension_updated(&size, &dim_payload); - if (likely(payload)) - rc = aclk_add_chart_payload(wc, dim_uuid, claim_id, ACLK_PAYLOAD_DIMENSION, (void *)payload, size, send_status, 1); - freez(payload); - return rc; -} - -void aclk_process_dimension_deletion(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - int rc = 0; - sqlite3_stmt *res = NULL; - - if (!aclk_connected) - return; - - if (unlikely(!db_meta)) - return; - - uuid_t host_id; - if (uuid_parse(wc->host_guid, host_id)) - return; - - char *claim_id = get_agent_claimid(); - if (!claim_id) - return; - - rc = sqlite3_prepare_v2( - db_meta, - "DELETE FROM dimension_delete where host_id = @host_id " - "RETURNING dimension_id, dimension_name, chart_type_id, dim_id LIMIT 10;", - -1, - &res, - 0); - - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement when trying to delete dimension deletes"); - freez(claim_id); - return; - } - - rc = sqlite3_bind_blob(res, 1, &host_id, sizeof(host_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - unsigned count = 0; - while (sqlite3_step(res) == SQLITE_ROW) { - (void) aclk_upd_dimension_event( - wc, - claim_id, - (uuid_t *)sqlite3_column_text(res, 3), - (const char *)sqlite3_column_text(res, 0), - (const char *)sqlite3_column_text(res, 1), - (const char *)sqlite3_column_text(res, 2), - 0, - 0, - NULL); - count++; - } - - if (count) { - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = ACLK_DATABASE_DIM_DELETION; - if (aclk_database_enq_cmd_noblock(wc, &cmd)) - info("Failed to queue a dimension deletion message"); - } - -bind_fail: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize statement when adding dimension deletion events, rc = %d", rc); - freez(claim_id); - return; -} - -int aclk_add_dimension_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - int rc = 1; - CHECK_SQLITE_CONNECTION(db_meta); - - struct aclk_chart_dimension_data *aclk_cd_data = cmd.data; - - char *claim_id = get_agent_claimid(); - if (!claim_id) - goto cleanup; - - rc = aclk_add_chart_payload(wc, &aclk_cd_data->uuid, claim_id, ACLK_PAYLOAD_DIMENSION, - (void *) aclk_cd_data->payload, aclk_cd_data->payload_size, NULL, aclk_cd_data->check_payload); - - freez(claim_id); -cleanup: - freez(aclk_cd_data->payload); - freez(aclk_cd_data); - return rc; -} - -void aclk_send_chart_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - int rc; - - wc->chart_pending = 0; - if (unlikely(!wc->chart_updates)) { - log_access( - "ACLK STA [%s (%s)]: Ignoring chart push event, updates have been turned off for this node.", - wc->node_id, - wc->host ? wc->host->hostname : "N/A"); - return; - } - - char *claim_id = get_agent_claimid(); - if (unlikely(!claim_id)) - return; - - uuid_t claim_uuid; - if (uuid_parse(claim_id, claim_uuid)) - return; - - int limit = cmd.count > 0 ? cmd.count : 1; - - uint64_t first_sequence; - uint64_t last_sequence; - time_t last_timestamp = 0; - - char sql[ACLK_SYNC_QUERY_SIZE]; - static __thread sqlite3_stmt *res = NULL; - - if (unlikely(!res)) { - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1,"SELECT ac.sequence_id, acp.payload, ac.date_created, ac.type, ac.uuid " \ - "FROM aclk_chart_%s ac, aclk_chart_payload_%s acp " \ - "WHERE ac.date_submitted IS NULL AND ac.unique_id = acp.unique_id AND ac.update_count > 0 " \ - "AND acp.claim_id = @claim_id ORDER BY ac.sequence_id ASC LIMIT %d;", wc->uuid_str, wc->uuid_str, limit); - rc = prepare_statement(db_meta, sql, &res); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement when trying to send a chart update via ACLK"); - freez(claim_id); - return; - } - } - - rc = sqlite3_bind_blob(res, 1, claim_uuid, sizeof(claim_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - char **payload_list = callocz(limit + 1, sizeof(char *)); - size_t *payload_list_size = callocz(limit + 1, sizeof(size_t)); - size_t *payload_list_max_size = callocz(limit + 1, sizeof(size_t)); - struct aclk_message_position *position_list = callocz(limit + 1, sizeof(*position_list)); - int *is_dim = callocz(limit + 1, sizeof(*is_dim)); - - int loop = cmd.param1; - - uint64_t start_sequence_id = wc->chart_sequence_id; - - while (loop > 0) { - uint64_t previous_sequence_id = wc->chart_sequence_id; - int count = 0; - first_sequence = 0; - last_sequence = 0; - while (count < limit && sqlite3_step(res) == SQLITE_ROW) { - size_t payload_size = sqlite3_column_bytes(res, 1); - if (payload_list_max_size[count] < payload_size) { - freez(payload_list[count]); - payload_list_max_size[count] = payload_size; - payload_list[count] = mallocz(payload_size); - } - payload_list_size[count] = payload_size; - memcpy(payload_list[count], sqlite3_column_blob(res, 1), payload_size); - position_list[count].sequence_id = (uint64_t)sqlite3_column_int64(res, 0); - position_list[count].previous_sequence_id = previous_sequence_id; - position_list[count].seq_id_creation_time.tv_sec = sqlite3_column_int64(res, 2); - position_list[count].seq_id_creation_time.tv_usec = 0; - if (!first_sequence) - first_sequence = position_list[count].sequence_id; - last_sequence = position_list[count].sequence_id; - last_timestamp = position_list[count].seq_id_creation_time.tv_sec; - previous_sequence_id = last_sequence; - is_dim[count] = sqlite3_column_int(res, 3) > 0; - count++; - if (wc->chart_payload_count) - wc->chart_payload_count--; - } - freez(payload_list[count]); - payload_list_max_size[count] = 0; - payload_list[count] = NULL; - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when pushing chart events, rc = %d", rc); - - if (likely(first_sequence)) { - - db_lock(); - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "UPDATE aclk_chart_%s SET status = NULL, date_submitted=unixepoch() " - "WHERE date_submitted IS NULL AND sequence_id BETWEEN %" PRIu64 " AND %" PRIu64 ";", - wc->uuid_str, first_sequence, last_sequence); - db_execute(sql); - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "INSERT OR REPLACE INTO aclk_chart_latest_%s (uuid, unique_id, date_submitted) " - " SELECT uuid, unique_id, date_submitted FROM aclk_chart_%s s " - " WHERE date_submitted IS NOT NULL AND sequence_id BETWEEN %" PRIu64 " AND %" PRIu64 - " ;", - wc->uuid_str, wc->uuid_str, first_sequence, last_sequence); - db_execute(sql); - db_unlock(); - - aclk_chart_inst_and_dim_update(payload_list, payload_list_size, is_dim, position_list, wc->batch_id); - log_access( - "ACLK RES [%s (%s)]: CHARTS SENT from %" PRIu64 " to %" PRIu64 " batch=%" PRIu64, - wc->node_id, - wc->hostname ? wc->hostname : "N/A", - first_sequence, - last_sequence, - wc->batch_id); - wc->chart_sequence_id = last_sequence; - wc->chart_timestamp = last_timestamp; - } else - break; - --loop; - } - - if (start_sequence_id != wc->chart_sequence_id) { - time_t now = now_realtime_sec(); - if (wc->rotation_after > now && wc->rotation_after < now + ACLK_DATABASE_ROTATION_DELAY) - wc->rotation_after = now + ACLK_DATABASE_ROTATION_DELAY; - } else { - wc->chart_payload_count = sql_get_pending_count(wc); - if (!wc->chart_payload_count) - log_access( - "ACLK STA [%s (%s)]: Sync of charts and dimensions done in %ld seconds.", - wc->node_id, - wc->hostname ? wc->hostname : "N/A", - now_realtime_sec() - wc->startup_time); - } - - for (int i = 0; i <= limit; ++i) - freez(payload_list[i]); - - freez(payload_list); - freez(payload_list_size); - freez(payload_list_max_size); - freez(position_list); - freez(is_dim); - -bind_fail: - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when pushing chart events, rc = %d", rc); - - freez(claim_id); - return; -} - -// Push one chart config to the cloud -int aclk_send_chart_config(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - UNUSED(wc); - - CHECK_SQLITE_CONNECTION(db_meta); - - sqlite3_stmt *res = NULL; - int rc = 0; - - char *hash_id = (char *) cmd.data_param; - - uuid_t hash_uuid; - rc = uuid_parse(hash_id, hash_uuid); - - if (unlikely(rc)) { - freez((char *) cmd.data_param); - return 1; - } - - BUFFER *sql = buffer_create(1024); - buffer_sprintf(sql, "SELECT type, family, context, title, priority, plugin, module, unit, chart_type " \ - "FROM chart_hash WHERE hash_id = @hash_id;"); - - rc = sqlite3_prepare_v2(db_meta, buffer_tostring(sql), -1, &res, 0); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement when trying to fetch a chart hash configuration"); - goto fail; - } - - rc = sqlite3_bind_blob(res, 1, &hash_uuid , sizeof(hash_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - struct chart_config_updated chart_config; - chart_config.config_hash = NULL; - - while (sqlite3_step(res) == SQLITE_ROW) { - chart_config.type = strdupz((char *)sqlite3_column_text(res, 0)); - chart_config.family = strdupz((char *)sqlite3_column_text(res, 1)); - chart_config.context = strdupz((char *)sqlite3_column_text(res, 2)); - chart_config.title = strdupz((char *)sqlite3_column_text(res, 3)); - chart_config.priority = sqlite3_column_int64(res, 4); - chart_config.plugin = strdupz((char *)sqlite3_column_text(res, 5)); - chart_config.module = sqlite3_column_bytes(res, 6) > 0 ? strdupz((char *)sqlite3_column_text(res, 6)) : NULL; - chart_config.chart_type = (RRDSET_TYPE) sqlite3_column_int(res,8); - chart_config.units = strdupz((char *)sqlite3_column_text(res, 7)); - chart_config.config_hash = strdupz(hash_id); - } - - if (likely(chart_config.config_hash)) { - log_access( - "ACLK REQ [%s (%s)]: Sending chart config for %s.", - wc->node_id, - wc->host ? wc->host->hostname : "N/A", - hash_id); - aclk_chart_config_updated(&chart_config, 1); - destroy_chart_config_updated(&chart_config); - } else - log_access( - "ACLK STA [%s (%s)]: Chart config for %s not found.", - wc->node_id, - wc->host ? wc->host->hostname : "N/A", - hash_id); - -bind_fail: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when pushing chart config hash, rc = %d", rc); -fail: - freez((char *)cmd.data_param); - buffer_free(sql); - return rc; -} - -void aclk_receive_chart_ack(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - int rc; - sqlite3_stmt *res = NULL; - - char sql[ACLK_SYNC_QUERY_SIZE]; - - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1,"UPDATE aclk_chart_%s SET date_updated=unixepoch() WHERE sequence_id <= @sequence_id " - "AND date_submitted IS NOT NULL AND date_updated IS NULL;", wc->uuid_str); - - rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to ack chart sequence ids"); - return; - } - - rc = sqlite3_bind_int64(res, 1, (uint64_t) cmd.param1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = execute_insert(res); - if (rc != SQLITE_DONE) - error_report("Failed to ACK sequence id, rc = %d", rc); - else - log_access( - "ACLK STA [%s (%s)]: CHARTS ACKNOWLEDGED IN THE DATABASE UP TO %" PRIu64, - wc->node_id, - wc->host ? wc->host->hostname : "N/A", - cmd.param1); - -bind_fail: - if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) - error_report("Failed to finalize statement to ACK older sequence ids, rc = %d", rc); - return; -} - -void aclk_receive_chart_reset(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd) -{ - BUFFER *sql = buffer_create(1024); - buffer_sprintf( - sql, - "UPDATE aclk_chart_%s SET status = NULL, date_submitted = NULL WHERE sequence_id >= %" PRIu64 ";", - wc->uuid_str, - cmd.param1); - db_execute(buffer_tostring(sql)); - if (cmd.param1 == 1) { - buffer_flush(sql); - log_access("ACLK REQ [%s (%s)]: Received chart full resync.", wc->node_id, wc->hostname ? wc->hostname: "N/A"); - buffer_sprintf(sql, "DELETE FROM aclk_chart_payload_%s; DELETE FROM aclk_chart_%s; " \ - "DELETE FROM aclk_chart_latest_%s;", wc->uuid_str, wc->uuid_str, wc->uuid_str); - db_lock(); - - db_execute("BEGIN TRANSACTION;"); - db_execute(buffer_tostring(sql)); - db_execute("COMMIT TRANSACTION;"); - - db_unlock(); - wc->chart_sequence_id = 0; - wc->chart_timestamp = 0; - wc->chart_payload_count = 0; - - RRDHOST *host = wc->host; - if (likely(host)) { - rrdhost_rdlock(host); - RRDSET *st; - rrdset_foreach_read(st, host) - { - rrdset_rdlock(st); - rrdset_flag_clear(st, RRDSET_FLAG_ACLK); - RRDDIM *rd; - rrddim_foreach_read(rd, st) - { - rrddim_flag_clear(rd, RRDDIM_FLAG_ACLK); - rd->aclk_live_status = (rd->aclk_live_status == 0); - } - rrdset_unlock(st); - } - rrdhost_unlock(host); - } else - error_report("ACLK synchronization thread for %s is not linked to HOST", wc->host_guid); - } else { - log_access( - "ACLK STA [%s (%s)]: RESTARTING CHART SYNC FROM SEQUENCE %" PRIu64, - wc->node_id, - wc->hostname ? wc->hostname : "N/A", - cmd.param1); - wc->chart_payload_count = sql_get_pending_count(wc); - sql_get_last_chart_sequence(wc); - } - buffer_free(sql); - wc->chart_updates = 1; - return; -} - -// -// Functions called directly from ACLK threads and will queue commands -// -void aclk_get_chart_config(char **hash_id) -{ - struct aclk_database_worker_config *wc = (struct aclk_database_worker_config *)localhost->dbsync_worker; - - if (unlikely(!wc || !hash_id)) - return; - - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = ACLK_DATABASE_PUSH_CHART_CONFIG; - for (int i = 0; hash_id[i]; ++i) { - // TODO: Verify that we have a valid hash_id - log_access( - "ACLK REQ [%s (%s)]: Request %d for chart config with hash %s received.", - wc->node_id, - wc->host ? wc->host->hostname : "N/A", - i, - hash_id[i]); - cmd.data_param = (void *)strdupz(hash_id[i]); - aclk_database_enq_cmd(wc, &cmd); - } - return; -} - -// Send a command to a node_id -// Need to discover the thread that will handle the request -// if thread not in active hosts, then try to find in the queue -static void aclk_submit_param_command(char *node_id, enum aclk_database_opcode aclk_command, uint64_t param) -{ - if (unlikely(!node_id)) - return; - - struct aclk_database_worker_config *wc = NULL; - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = aclk_command; - cmd.param1 = param; - - rrd_rdlock(); - RRDHOST *host = find_host_by_node_id(node_id); - if (likely(host)) - wc = (struct aclk_database_worker_config *)host->dbsync_worker; - rrd_unlock(); - if (wc) - aclk_database_enq_cmd(wc, &cmd); - else { - if (aclk_worker_enq_cmd(node_id, &cmd)) - log_access("ACLK STA [%s (N/A)]: ACLK synchronization thread is not active.", node_id); - } - return; -} - -void aclk_ack_chart_sequence_id(char *node_id, uint64_t last_sequence_id) -{ - if (unlikely(!node_id)) - return; - - char *hostname = get_hostname_by_node_id(node_id); - log_access("ACLK REQ [%s (%s)]: CHARTS ACKNOWLEDGED upto %" PRIu64, node_id, hostname ? hostname : "N/A", - last_sequence_id); - freez(hostname); - aclk_submit_param_command(node_id, ACLK_DATABASE_CHART_ACK, last_sequence_id); - return; -} - -// Start streaming charts / dimensions for node_id -void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at, uint64_t batch_id) -{ - UNUSED(created_at); - if (unlikely(!node_id)) - return; - - uuid_t node_uuid; - if (uuid_parse(node_id, node_uuid)) { - log_access("ACLK REQ [%s (N/A)]: CHARTS STREAM ignored, invalid node id", node_id); - return; - } - - struct aclk_database_worker_config *wc = find_inactive_wc_by_node_id(node_id); - rrd_rdlock(); - RRDHOST *host = localhost; - while(host) { - if (wc || (host->node_id && !(uuid_compare(*host->node_id, node_uuid)))) { - rrd_unlock(); - if (!wc) - wc = (struct aclk_database_worker_config *)host->dbsync_worker ? - (struct aclk_database_worker_config *)host->dbsync_worker : - (struct aclk_database_worker_config *)find_inactive_wc_by_node_id(node_id); - if (likely(wc)) { - wc->chart_reset_count++; - __sync_synchronize(); - wc->chart_updates = 0; - wc->batch_id = batch_id; - __sync_synchronize(); - wc->batch_created = now_realtime_sec(); - log_access( - "ACLK REQ [%s (%s)]: CHARTS STREAM from %"PRIu64" (LOCAL %"PRIu64") t=%ld resets=%d" , - wc->node_id, - wc->hostname ? wc->hostname : "N/A", - sequence_id + 1, - wc->chart_sequence_id, - wc->chart_timestamp, - wc->chart_reset_count); - if (sequence_id > wc->chart_sequence_id || wc->chart_reset_count > 10) { - log_access( - "ACLK RES [%s (%s)]: CHARTS FULL RESYNC REQUEST " - "remote_seq=%" PRIu64 " local_seq=%" PRIu64 " resets=%d ", - wc->node_id, - wc->hostname ? wc->hostname : "N/A", - sequence_id, - wc->chart_sequence_id, - wc->chart_reset_count); - - chart_reset_t chart_reset; - chart_reset.claim_id = get_agent_claimid(); - if (chart_reset.claim_id) { - chart_reset.node_id = node_id; - chart_reset.reason = SEQ_ID_NOT_EXISTS; - aclk_chart_reset(chart_reset); - freez(chart_reset.claim_id); - wc->chart_reset_count = -1; - } - } else { - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - // TODO: handle timestamp - if (sequence_id < wc->chart_sequence_id || - !sequence_id) { // || created_at != wc->chart_timestamp) { - log_access( - "ACLK REQ [%s (%s)]: CHART RESET from %" PRIu64 " t=%ld batch=%" PRIu64, - wc->node_id, - wc->hostname ? wc->hostname : "N/A", - sequence_id + 1, - wc->chart_timestamp, - wc->batch_id); - cmd.opcode = ACLK_DATABASE_RESET_CHART; - cmd.param1 = sequence_id + 1; - cmd.completion = NULL; - aclk_database_enq_cmd(wc, &cmd); - } else { - wc->chart_reset_count = 0; - wc->chart_updates = 1; - } - } - } else { - log_access("ACLK STA [%s (%s)]: ACLK synchronization thread is not active.", node_id, wc->hostname ? wc->hostname : "N/A"); - } - return; - } - host = host->next; - } - rrd_unlock(); - return; -} - -#define SQL_SELECT_HOST_MEMORY_MODE "SELECT memory_mode FROM chart WHERE host_id = @host_id LIMIT 1;" - -static RRD_MEMORY_MODE sql_get_host_memory_mode(uuid_t *host_id) -{ - int rc; - - RRD_MEMORY_MODE memory_mode = RRD_MEMORY_MODE_RAM; - sqlite3_stmt *res = NULL; - - rc = sqlite3_prepare_v2(db_meta, SQL_SELECT_HOST_MEMORY_MODE, -1, &res, 0); - - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to read host memory mode"); - return memory_mode; - } - - rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host parameter to fetch host memory mode"); - goto failed; - } - - while (sqlite3_step(res) == SQLITE_ROW) { - memory_mode = (RRD_MEMORY_MODE)sqlite3_column_int(res, 0); - } - -failed: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize the prepared statement when reading host memory mode"); - return memory_mode; -} - -#define SELECT_HOST_DIMENSION_LIST \ - "SELECT d.dim_id, c.update_every, c.type||'.'||c.id, d.id, d.name FROM chart c, dimension d " \ - "WHERE d.chart_id = c.chart_id AND c.host_id = @host_id ORDER BY c.update_every ASC;" - -#define SELECT_HOST_CHART_LIST \ - "SELECT distinct h.host_id, c.update_every, c.type||'.'||c.id FROM chart c, host h " \ - "WHERE c.host_id = h.host_id AND c.host_id = @host_id ORDER BY c.update_every ASC;" - -void aclk_update_retention(struct aclk_database_worker_config *wc) -{ - int rc; - - if (!aclk_connected) - return; - - if (wc->host && rrdhost_flag_check(wc->host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { - internal_error(true, "Skipping aclk_update_retention for host %s because context streaming is enabled", wc->host->hostname); - return; - } - - char *claim_id = get_agent_claimid(); - if (unlikely(!claim_id)) - return; - - sqlite3_stmt *res = NULL; - RRD_MEMORY_MODE memory_mode; - - uuid_t host_uuid; - rc = uuid_parse(wc->host_guid, host_uuid); - if (unlikely(rc)) { - freez(claim_id); - return; - } - - if (wc->host) - memory_mode = wc->host->rrd_memory_mode; - else - memory_mode = sql_get_host_memory_mode(&host_uuid); - - if (memory_mode == RRD_MEMORY_MODE_DBENGINE) - rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_DIMENSION_LIST, -1, &res, 0); - else - rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_CHART_LIST, -1, &res, 0); - - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch host dimensions"); - freez(claim_id); - return; - } - - rc = sqlite3_bind_blob(res, 1, &host_uuid, sizeof(host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host parameter to fetch host dimensions"); - goto failed; - } - - time_t start_time = LONG_MAX; - time_t first_entry_t; - time_t last_entry_t; - uint32_t update_every = 0; - uint32_t dimension_update_count = 0; - uint32_t total_checked = 0; - uint32_t total_deleted= 0; - uint32_t total_stopped= 0; - time_t send_status; - - struct retention_updated rotate_data; - - memset(&rotate_data, 0, sizeof(rotate_data)); - - int max_intervals = 32; - - rotate_data.interval_duration_count = 0; - rotate_data.interval_durations = callocz(max_intervals, sizeof(*rotate_data.interval_durations)); - - now_realtime_timeval(&rotate_data.rotation_timestamp); - rotate_data.memory_mode = memory_mode; - rotate_data.claim_id = claim_id; - rotate_data.node_id = strdupz(wc->node_id); - - time_t now = now_realtime_sec(); - while (sqlite3_step(res) == SQLITE_ROW && dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP) { - if (unlikely(netdata_exit)) - break; - if (!update_every || update_every != (uint32_t)sqlite3_column_int(res, 1)) { - if (update_every) { - debug(D_ACLK_SYNC, "Update %s for %u oldest time = %ld", wc->host_guid, update_every, start_time); - if (start_time == LONG_MAX) - rotate_data.interval_durations[rotate_data.interval_duration_count].retention = 0; - else - rotate_data.interval_durations[rotate_data.interval_duration_count].retention = - rotate_data.rotation_timestamp.tv_sec - start_time; - rotate_data.interval_duration_count++; - } - update_every = (uint32_t)sqlite3_column_int(res, 1); - rotate_data.interval_durations[rotate_data.interval_duration_count].update_every = update_every; - start_time = LONG_MAX; - } -#ifdef ENABLE_DBENGINE - if (memory_mode == RRD_MEMORY_MODE_DBENGINE) - rc = - rrdeng_metric_latest_time_by_uuid((uuid_t *)sqlite3_column_blob(res, 0), &first_entry_t, &last_entry_t, 0); - else -#endif - { - if (wc->host) { - RRDSET *st = NULL; - rc = (st = rrdset_find(wc->host, (const char *)sqlite3_column_text(res, 2))) ? 0 : 1; - if (!rc) { - first_entry_t = rrdset_first_entry_t(st); - last_entry_t = rrdset_last_entry_t(st); - } - } else { - rc = 0; - first_entry_t = rotate_data.rotation_timestamp.tv_sec; - } - } - - if (likely(!rc && first_entry_t)) - start_time = MIN(start_time, first_entry_t); - - if (memory_mode == RRD_MEMORY_MODE_DBENGINE && wc->chart_updates && (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP)) { - int live = ((now - last_entry_t) < (RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * update_every)); - if (rc) { - first_entry_t = 0; - last_entry_t = 0; - live = 0; - } - if (!wc->host || !first_entry_t) { - if (!first_entry_t) { - delete_dimension_uuid((uuid_t *)sqlite3_column_blob(res, 0)); - total_deleted++; - dimension_update_count++; - } - else { - (void)aclk_upd_dimension_event( - wc, - claim_id, - (uuid_t *)sqlite3_column_blob(res, 0), - (const char *)(const char *)sqlite3_column_text(res, 3), - (const char *)(const char *)sqlite3_column_text(res, 4), - (const char *)(const char *)sqlite3_column_text(res, 2), - first_entry_t, - live ? 0 : last_entry_t, - &send_status); - - if (!send_status) { - if (last_entry_t) - total_stopped++; - dimension_update_count++; - } - } - } - } - total_checked++; - } - if (update_every) { - debug(D_ACLK_SYNC, "Update %s for %u oldest time = %ld", wc->host_guid, update_every, start_time); - if (start_time == LONG_MAX) - rotate_data.interval_durations[rotate_data.interval_duration_count].retention = 0; - else - rotate_data.interval_durations[rotate_data.interval_duration_count].retention = - rotate_data.rotation_timestamp.tv_sec - start_time; - rotate_data.interval_duration_count++; - } - - if (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP && !netdata_exit) - log_access("ACLK STA [%s (%s)]: UPDATES %d RETENTION MESSAGE SENT. CHECKED %u DIMENSIONS. %u DELETED, %u STOPPED COLLECTING", - wc->node_id, wc->hostname ? wc->hostname : "N/A", wc->chart_updates, total_checked, total_deleted, total_stopped); - else - log_access("ACLK STA [%s (%s)]: UPDATES %d RETENTION MESSAGE NOT SENT. CHECKED %u DIMENSIONS. %u DELETED, %u STOPPED COLLECTING", - wc->node_id, wc->hostname ? wc->hostname : "N/A", wc->chart_updates, total_checked, total_deleted, total_stopped); - -#ifdef NETDATA_INTERNAL_CHECKS - info("Retention update for %s (chart updates = %d)", wc->host_guid, wc->chart_updates); - for (int i = 0; i < rotate_data.interval_duration_count; ++i) - info( - "Update for host %s (node %s) for %u Retention = %u", - wc->host_guid, - wc->node_id, - rotate_data.interval_durations[i].update_every, - rotate_data.interval_durations[i].retention); -#endif - if (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP && !netdata_exit) - aclk_retention_updated(&rotate_data); - freez(rotate_data.node_id); - freez(rotate_data.interval_durations); - -failed: - freez(claim_id); - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize the prepared statement when reading host dimensions"); - return; -} - -uint32_t sql_get_pending_count(struct aclk_database_worker_config *wc) -{ - char sql[ACLK_SYNC_QUERY_SIZE]; - static __thread sqlite3_stmt *res = NULL; - - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "SELECT count(1) FROM aclk_chart_%s ac WHERE ac.date_submitted IS NULL;", wc->uuid_str); - - int rc; - uint32_t chart_payload_count = 0; - if (unlikely(!res)) { - rc = prepare_statement(db_meta, sql, &res); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to count pending messages"); - return 0; - } - } - while (sqlite3_step(res) == SQLITE_ROW) - chart_payload_count = (uint32_t) sqlite3_column_int(res, 0); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when fetching pending messages, rc = %d", rc); - - return chart_payload_count; -} - -void sql_get_last_chart_sequence(struct aclk_database_worker_config *wc) -{ - char sql[ACLK_SYNC_QUERY_SIZE]; - - snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "SELECT ac.sequence_id, ac.date_created FROM aclk_chart_%s ac " \ - "WHERE ac.date_submitted IS NOT NULL ORDER BY ac.sequence_id DESC LIMIT 1;", wc->uuid_str); - - int rc; - sqlite3_stmt *res = NULL; - rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to find last chart sequence id"); - return; - } - - wc->chart_sequence_id = 0; - wc->chart_timestamp = 0; - while (sqlite3_step(res) == SQLITE_ROW) { - wc->chart_sequence_id = (uint64_t)sqlite3_column_int64(res, 0); - wc->chart_timestamp = (time_t)sqlite3_column_int64(res, 1); - } - - debug(D_ACLK_SYNC, "Node %s reports last sequence_id=%" PRIu64, wc->node_id, wc->chart_sequence_id); - - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when fetching chart sequence info, rc = %d", rc); - - return; -} - -void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated) -{ - RRDHOST *host = rd->rrdset->rrdhost; - if (likely(rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS))) - return; - - int live = !last_updated; - - if (likely(rd->aclk_live_status == live)) - return; - - time_t created_at = rd->tiers[0]->query_ops.oldest_time(rd->tiers[0]->db_metric_handle); - - if (unlikely(!created_at && rd->updated)) - created_at = rd->last_collected_time.tv_sec; - - rd->aclk_live_status = live; - - struct aclk_database_worker_config *wc = rd->rrdset->rrdhost->dbsync_worker; - if (unlikely(!wc)) - return; - - char *claim_id = get_agent_claimid(); - if (unlikely(!claim_id)) - return; - - struct chart_dimension_updated dim_payload; - memset(&dim_payload, 0, sizeof(dim_payload)); - dim_payload.node_id = wc->node_id; - dim_payload.claim_id = claim_id; - dim_payload.name = rd->name; - dim_payload.id = rd->id; - dim_payload.chart_id = rd->rrdset->id; - dim_payload.created_at.tv_sec = created_at; - dim_payload.last_timestamp.tv_sec = last_updated; - - size_t size = 0; - char *payload = generate_chart_dimension_updated(&size, &dim_payload); - - freez(claim_id); - if (unlikely(!payload)) - return; - - struct aclk_chart_dimension_data *aclk_cd_data = mallocz(sizeof(*aclk_cd_data)); - uuid_copy(aclk_cd_data->uuid, rd->metric_uuid); - aclk_cd_data->payload = payload; - aclk_cd_data->payload_size = size; - aclk_cd_data->check_payload = 1; - - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - - cmd.opcode = ACLK_DATABASE_ADD_DIMENSION; - cmd.data = aclk_cd_data; - int rc = aclk_database_enq_cmd_noblock(wc, &cmd); - - if (unlikely(rc)) { - freez(aclk_cd_data->payload); - freez(aclk_cd_data); - rd->aclk_live_status = !live; - } - return; -} - -void aclk_send_dimension_update(RRDDIM *rd) -{ - char *claim_id = get_agent_claimid(); - if (unlikely(!claim_id)) - return; - - time_t first_entry_t = rrddim_first_entry_t(rd); - time_t last_entry_t = rrddim_last_entry_t(rd); - - time_t now = now_realtime_sec(); - int live = ((now - rd->last_collected_time.tv_sec) < (RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every)); - - if (!live || rd->aclk_live_status != live || !first_entry_t) { - (void)aclk_upd_dimension_event( - rd->rrdset->rrdhost->dbsync_worker, - claim_id, - &rd->metric_uuid, - rd->id, - rd->name, - rd->rrdset->id, - first_entry_t, - live ? 0 : last_entry_t, - NULL); - - if (!first_entry_t) - debug( - D_ACLK_SYNC, - "%s: Update dimension chart=%s dim=%s live=%d (%ld, %ld)", - rd->rrdset->rrdhost->hostname, - rd->rrdset->name, - rd->name, - live, - first_entry_t, - last_entry_t); - else - debug( - D_ACLK_SYNC, - "%s: Update dimension chart=%s dim=%s live=%d (%ld, %ld) collected %ld seconds ago", - rd->rrdset->rrdhost->hostname, - rd->rrdset->name, - rd->name, - live, - first_entry_t, - last_entry_t, - now - last_entry_t); - rd->aclk_live_status = live; - } - - freez(claim_id); - return; -} - -#define SQL_SEQ_NULL(result, n) sqlite3_column_type(result, n) == SQLITE_NULL ? 0 : sqlite3_column_int64(result, n) - -struct aclk_chart_sync_stats *aclk_get_chart_sync_stats(RRDHOST *host) -{ - struct aclk_chart_sync_stats *aclk_statistics = NULL; - - struct aclk_database_worker_config *wc = NULL; - wc = (struct aclk_database_worker_config *)host->dbsync_worker; - if (!wc) - return NULL; - - aclk_statistics = callocz(1, sizeof(struct aclk_chart_sync_stats)); - - aclk_statistics->updates = wc->chart_updates; - aclk_statistics->batch_id = wc->batch_id; - - char host_uuid_fixed[GUID_LEN + 1]; - - strncpy(host_uuid_fixed, host->machine_guid, GUID_LEN); - host_uuid_fixed[GUID_LEN] = 0; - - host_uuid_fixed[8] = '_'; - host_uuid_fixed[13] = '_'; - host_uuid_fixed[18] = '_'; - host_uuid_fixed[23] = '_'; - - sqlite3_stmt *res = NULL; - BUFFER *sql = buffer_create(1024); - buffer_sprintf(sql, "SELECT min(sequence_id), max(sequence_id), 0 FROM aclk_chart_%s;", host_uuid_fixed); - buffer_sprintf(sql, "SELECT min(sequence_id), max(sequence_id), 0 FROM aclk_chart_%s WHERE date_submitted IS NULL;", host_uuid_fixed); - buffer_sprintf(sql, "SELECT min(sequence_id), max(sequence_id), 0 FROM aclk_chart_%s WHERE date_submitted IS NOT NULL;", host_uuid_fixed); - buffer_sprintf(sql, "SELECT min(sequence_id), max(sequence_id), 0 FROM aclk_chart_%s WHERE date_updated IS NOT NULL;", host_uuid_fixed); - buffer_sprintf(sql, "SELECT max(date_created), max(date_submitted), max(date_updated), 0 FROM aclk_chart_%s;", host_uuid_fixed); - - int rc = sqlite3_prepare_v2(db_meta, buffer_tostring(sql), -1, &res, 0); - if (rc != SQLITE_OK) { - buffer_free(sql); - freez(aclk_statistics); - return NULL; - } - - rc = sqlite3_step(res); - if (rc == SQLITE_ROW) { - aclk_statistics->min_seqid = SQL_SEQ_NULL(res, 0); - aclk_statistics->max_seqid = SQL_SEQ_NULL(res, 1); - } - - rc = sqlite3_step(res); - if (rc == SQLITE_ROW) { - aclk_statistics->min_seqid_pend = SQL_SEQ_NULL(res, 0); - aclk_statistics->max_seqid_pend = SQL_SEQ_NULL(res, 1); - } - - rc = sqlite3_step(res); - if (rc == SQLITE_ROW) { - aclk_statistics->min_seqid_sent = SQL_SEQ_NULL(res, 0); - aclk_statistics->max_seqid_sent = SQL_SEQ_NULL(res, 1); - } - - rc = sqlite3_step(res); - if (rc == SQLITE_ROW) { - aclk_statistics->min_seqid_ack = SQL_SEQ_NULL(res, 0); - aclk_statistics->max_seqid_ack = SQL_SEQ_NULL(res, 1); - } - - rc = sqlite3_step(res); - if (rc == SQLITE_ROW) { - aclk_statistics->min_seqid_ack = SQL_SEQ_NULL(res, 0); - aclk_statistics->max_seqid_ack = SQL_SEQ_NULL(res, 1); - } - - rc = sqlite3_step(res); - if (rc == SQLITE_ROW) { - aclk_statistics->max_date_created = (time_t) SQL_SEQ_NULL(res, 0); - aclk_statistics->max_date_submitted = (time_t) SQL_SEQ_NULL(res, 1); - aclk_statistics->max_date_ack = (time_t) SQL_SEQ_NULL(res, 2); - } - - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize statement when fetching aclk sync statistics, rc = %d", rc); - - buffer_free(sql); - return aclk_statistics; -} - -void sql_check_chart_liveness(RRDSET *st) { - RRDDIM *rd; - - if (unlikely(st->state->is_ar_chart)) - return; - - rrdset_rdlock(st); - - if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ACLK))) { - rrdset_unlock(st); - return; - } - - if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ACLK))) { - if (likely(st->dimensions && st->counter_done && !queue_chart_to_aclk(st))) { - debug(D_ACLK_SYNC,"Check chart liveness [%s] submit chart definition", st->name); - rrdset_flag_set(st, RRDSET_FLAG_ACLK); - } - } - else - debug(D_ACLK_SYNC,"Check chart liveness [%s] chart definition already submitted", st->name); - time_t mark = now_realtime_sec(); - - debug(D_ACLK_SYNC,"Check chart liveness [%s] scanning dimensions", st->name); - rrddim_foreach_read(rd, st) { - if (!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)) - queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, mark)); - } - rrdset_unlock(st); -} - -// ST is read locked -int queue_chart_to_aclk(RRDSET *st) -{ - RRDHOST *host = st->rrdhost; - - if (likely(rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS))) - return 0; - - return sql_queue_chart_payload((struct aclk_database_worker_config *) st->rrdhost->dbsync_worker, - st, ACLK_DATABASE_ADD_CHART); -} - -#endif //ENABLE_ACLK diff --git a/database/sqlite/sqlite_aclk_chart.h b/database/sqlite/sqlite_aclk_chart.h deleted file mode 100644 index 84325bf6c..000000000 --- a/database/sqlite/sqlite_aclk_chart.h +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_SQLITE_ACLK_CHART_H -#define NETDATA_SQLITE_ACLK_CHART_H - - -typedef enum payload_type { - ACLK_PAYLOAD_CHART, - ACLK_PAYLOAD_DIMENSION, - ACLK_PAYLOAD_DIMENSION_ROTATED -} ACLK_PAYLOAD_TYPE; - -extern sqlite3 *db_meta; - -#ifndef RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER -#define RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER (3) -#endif - -#ifndef RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER -#define RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER (30) -#endif - -#ifndef ACLK_MAX_DIMENSION_CLEANUP -#define ACLK_MAX_DIMENSION_CLEANUP (500) -#endif - -struct aclk_chart_dimension_data { - uuid_t uuid; - char *payload; - size_t payload_size; - uint8_t check_payload; -}; - -struct aclk_chart_sync_stats { - int updates; - uint64_t batch_id; - uint64_t min_seqid; - uint64_t max_seqid; - uint64_t min_seqid_pend; - uint64_t max_seqid_pend; - uint64_t min_seqid_sent; - uint64_t max_seqid_sent; - uint64_t min_seqid_ack; - uint64_t max_seqid_ack; - time_t max_date_created; - time_t max_date_submitted; - time_t max_date_ack; -}; - -extern int queue_chart_to_aclk(RRDSET *st); -extern void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated); -extern void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id); -int aclk_add_chart_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -int aclk_add_dimension_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -int aclk_send_chart_config(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -void aclk_ack_chart_sequence_id(char *node_id, uint64_t last_sequence_id); -void aclk_get_chart_config(char **hash_id_list); -void aclk_send_chart_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -void aclk_start_streaming(char *node_id, uint64_t seq_id, time_t created_at, uint64_t batch_id); -void sql_chart_deduplicate(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -void sql_check_rotation_state(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -void sql_get_last_chart_sequence(struct aclk_database_worker_config *wc); -void aclk_receive_chart_reset(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -void aclk_receive_chart_ack(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -void aclk_process_dimension_deletion(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd); -uint32_t sql_get_pending_count(struct aclk_database_worker_config *wc); -void aclk_send_dimension_update(RRDDIM *rd); -struct aclk_chart_sync_stats *aclk_get_chart_sync_stats(RRDHOST *host); -void sql_check_chart_liveness(RRDSET *st); -void aclk_update_retention(struct aclk_database_worker_config *wc); -#endif //NETDATA_SQLITE_ACLK_CHART_H diff --git a/database/sqlite/sqlite_aclk_node.c b/database/sqlite/sqlite_aclk_node.c index 3d11f83aa..afe774997 100644 --- a/database/sqlite/sqlite_aclk_node.c +++ b/database/sqlite/sqlite_aclk_node.c @@ -3,27 +3,25 @@ #include "sqlite_functions.h" #include "sqlite_aclk_node.h" -#ifdef ENABLE_ACLK -#include "../../aclk/aclk_charts_api.h" -#endif +#include "../../aclk/aclk_contexts_api.h" +#include "../../aclk/aclk_capas.h" #ifdef ENABLE_ACLK DICTIONARY *collectors_from_charts(RRDHOST *host, DICTIONARY *dict) { RRDSET *st; char name[500]; - rrdhost_rdlock(host); rrdset_foreach_read(st, host) { if (rrdset_is_available_for_viewers(st)) { struct collector_info col = { - .plugin = st->plugin_name ? st->plugin_name : "", - .module = st->module_name ? st->module_name : "" + .plugin = rrdset_plugin_name(st), + .module = rrdset_module_name(st) }; snprintfz(name, 499, "%s:%s", col.plugin, col.module); dictionary_set(dict, name, &col, sizeof(struct collector_info)); } } - rrdhost_unlock(host); + rrdset_foreach_done(st); return dict; } @@ -36,7 +34,7 @@ void sql_build_node_collectors(struct aclk_database_worker_config *wc) return; struct update_node_collectors upd_node_collectors; - DICTIONARY *dict = dictionary_create(DICTIONARY_FLAG_SINGLE_THREADED); + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); upd_node_collectors.node_id = wc->node_id; upd_node_collectors.claim_id = get_agent_claimid(); @@ -47,7 +45,7 @@ void sql_build_node_collectors(struct aclk_database_worker_config *wc) dictionary_destroy(dict); freez(upd_node_collectors.claim_id); - log_access("ACLK RES [%s (%s)]: NODE COLLECTORS SENT", wc->node_id, wc->host->hostname); + log_access("ACLK RES [%s (%s)]: NODE COLLECTORS SENT", wc->node_id, rrdhost_hostname(wc->host)); #else UNUSED(wc); #endif @@ -74,14 +72,7 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat node_info.ml_info.ml_capable = ml_capable(localhost); node_info.ml_info.ml_enabled = ml_enabled(wc->host); - struct capability instance_caps[] = { - { .name = "proto", .version = 1, .enabled = 1 }, - { .name = "ml", .version = ml_capable(localhost), .enabled = ml_enabled(wc->host) }, - { .name = "mc", .version = enable_metric_correlations ? metric_correlations_version : 0, .enabled = enable_metric_correlations }, - { .name = "ctx", .version = 1, .enabled = rrdcontext_enabled}, - { .name = NULL, .version = 0, .enabled = 0 } - }; - node_info.node_instance_capabilities = instance_caps; + node_info.node_instance_capabilities = aclk_get_node_instance_capas(wc->host); now_realtime_timeval(&node_info.updated_at); @@ -89,13 +80,12 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat char *host_version = NULL; if (host != localhost) { netdata_mutex_lock(&host->receiver_lock); - host_version = - strdupz(host->receiver && host->receiver->program_version ? host->receiver->program_version : "unknown"); + host_version = strdupz(host->receiver && host->receiver->program_version ? host->receiver->program_version : "unknown"); netdata_mutex_unlock(&host->receiver_lock); } - node_info.data.name = host->hostname; - node_info.data.os = (char *) host->os; + node_info.data.name = rrdhost_hostname(host); + node_info.data.os = rrdhost_os(host); node_info.data.os_name = host->system_info->host_os_name; node_info.data.os_version = host->system_info->host_os_version; node_info.data.kernel_name = host->system_info->kernel_name; @@ -106,8 +96,8 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat node_info.data.memory = host->system_info->host_ram_total ? host->system_info->host_ram_total : "0"; node_info.data.disk_space = host->system_info->host_disk_space ? host->system_info->host_disk_space : "0"; node_info.data.version = host_version ? host_version : VERSION; - node_info.data.release_channel = (char *) get_release_channel(); - node_info.data.timezone = (char *) host->abbrev_timezone; + node_info.data.release_channel = get_release_channel(); + node_info.data.timezone = rrdhost_abbrev_timezone(host); node_info.data.virtualization_type = host->system_info->virtualization ? host->system_info->virtualization : "unknown"; node_info.data.container_type = host->system_info->container ? host->system_info->container : "unknown"; node_info.data.custom_info = config_get(CONFIG_SECTION_WEB, "custom dashboard_info.js", ""); @@ -123,13 +113,14 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat node_info.data.ml_info.ml_capable = host->system_info->ml_capable; node_info.data.ml_info.ml_enabled = host->system_info->ml_enabled; - node_info.data.host_labels_ptr = host->host_labels; + node_info.data.host_labels_ptr = host->rrdlabels; aclk_update_node_info(&node_info); - log_access("ACLK RES [%s (%s)]: NODE INFO SENT for guid [%s] (%s)", wc->node_id, wc->host->hostname, wc->host_guid, wc->host == localhost ? "parent" : "child"); + log_access("ACLK RES [%s (%s)]: NODE INFO SENT for guid [%s] (%s)", wc->node_id, rrdhost_hostname(wc->host), wc->host_guid, wc->host == localhost ? "parent" : "child"); rrd_unlock(); freez(node_info.claim_id); + freez(node_info.node_instance_capabilities); freez(host_version); wc->node_collectors_send = now_realtime_sec(); diff --git a/database/sqlite/sqlite_context.c b/database/sqlite/sqlite_context.c index 901ab0031..9c7a61c6e 100644 --- a/database/sqlite/sqlite_context.c +++ b/database/sqlite/sqlite_context.c @@ -21,7 +21,6 @@ const char *database_context_cleanup[] = { }; sqlite3 *db_context_meta = NULL; - /* * Initialize the SQLite database * Return 0 on success @@ -125,22 +124,24 @@ void sql_close_context_database(void) // Fetching data // #define CTX_GET_CHART_LIST "SELECT c.chart_id, c.type||'.'||c.id, c.name, c.context, c.title, c.unit, c.priority, " \ - "c.update_every, c.chart_type, c.family FROM meta.chart c WHERE c.host_id = @host_id; " + "c.update_every, c.chart_type, c.family FROM meta.chart c WHERE c.host_id = @host_id and c.chart_id is not null; " void ctx_get_chart_list(uuid_t *host_uuid, void (*dict_cb)(SQL_CHART_DATA *, void *), void *data) { int rc; - sqlite3_stmt *res = NULL; + static __thread sqlite3_stmt *res = NULL; if (unlikely(!host_uuid)) { internal_error(true, "Requesting context chart list without host_id"); return; } - rc = sqlite3_prepare_v2(db_context_meta, CTX_GET_CHART_LIST, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch chart list"); - return; + if (unlikely(!res)) { + rc = prepare_statement(db_context_meta, CTX_GET_CHART_LIST, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to fetch chart list"); + return; + } } rc = sqlite3_bind_blob(res, 1, host_uuid, sizeof(*host_uuid), SQLITE_STATIC); @@ -150,7 +151,7 @@ void ctx_get_chart_list(uuid_t *host_uuid, void (*dict_cb)(SQL_CHART_DATA *, voi } SQL_CHART_DATA chart_data = { 0 }; - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { uuid_copy(chart_data.chart_id, *((uuid_t *)sqlite3_column_blob(res, 0))); chart_data.id = (char *) sqlite3_column_text(res, 1); chart_data.name = (char *) sqlite3_column_text(res, 2); @@ -165,22 +166,25 @@ void ctx_get_chart_list(uuid_t *host_uuid, void (*dict_cb)(SQL_CHART_DATA *, voi } skip_load: - rc = sqlite3_finalize(res); + rc = sqlite3_reset(res); if (rc != SQLITE_OK) - error_report("Failed to finalize statement that fetches chart label data, rc = %d", rc); + error_report("Failed to reset statement that fetches chart label data, rc = %d", rc); } // Dimension list -#define CTX_GET_DIMENSION_LIST "SELECT d.dim_id, d.id, d.name FROM meta.dimension d WHERE d.chart_id = @id;" +#define CTX_GET_DIMENSION_LIST "SELECT d.dim_id, d.id, d.name, CASE WHEN INSTR(d.options,\"hidden\") > 0 THEN 1 ELSE 0 END " \ + "FROM meta.dimension d WHERE d.chart_id = @id and d.dim_id is not null ORDER BY d.rowid ASC;" void ctx_get_dimension_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_DIMENSION_DATA *, void *), void *data) { int rc; - sqlite3_stmt *res = NULL; - - rc = sqlite3_prepare_v2(db_context_meta, CTX_GET_DIMENSION_LIST, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch chart dimension data"); - return; + static __thread sqlite3_stmt *res = NULL; + + if (unlikely(!res)) { + rc = prepare_statement(db_context_meta, CTX_GET_DIMENSION_LIST, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to fetch chart dimension data"); + return; + } } rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); @@ -191,17 +195,18 @@ void ctx_get_dimension_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_DIMENSION_DA SQL_DIMENSION_DATA dimension_data; - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { uuid_copy(dimension_data.dim_id, *((uuid_t *)sqlite3_column_blob(res, 0))); dimension_data.id = (char *) sqlite3_column_text(res, 1); dimension_data.name = (char *) sqlite3_column_text(res, 2); + dimension_data.hidden = sqlite3_column_int(res, 3); dict_cb(&dimension_data, data); } failed: - rc = sqlite3_finalize(res); + rc = sqlite3_reset(res); if (rc != SQLITE_OK) - error_report("Failed to finalize statement that fetches the chart dimension list, rc = %d", rc); + error_report("Failed to reset statement that fetches the chart dimension list, rc = %d", rc); } // LABEL LIST @@ -209,12 +214,14 @@ failed: void ctx_get_label_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_CLABEL_DATA *, void *), void *data) { int rc; - sqlite3_stmt *res = NULL; - - rc = sqlite3_prepare_v2(db_context_meta, CTX_GET_LABEL_LIST, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch chart lanbels"); - return; + static __thread sqlite3_stmt *res = NULL; + + if (unlikely(!res)) { + rc = prepare_statement(db_context_meta, CTX_GET_LABEL_LIST, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to fetch chart labels"); + return; + } } rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); @@ -225,7 +232,7 @@ void ctx_get_label_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_CLABEL_DATA *, v SQL_CLABEL_DATA label_data; - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { label_data.label_key = (char *) sqlite3_column_text(res, 0); label_data.label_value = (char *) sqlite3_column_text(res, 1); label_data.label_source = sqlite3_column_int(res, 2); @@ -233,9 +240,9 @@ void ctx_get_label_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_CLABEL_DATA *, v } failed: - rc = sqlite3_finalize(res); + rc = sqlite3_reset(res); if (rc != SQLITE_OK) - error_report("Failed to finalize statement that fetches chart label data, rc = %d", rc); + error_report("Failed to reset statement that fetches chart label data, rc = %d", rc); return; } @@ -250,12 +257,14 @@ void ctx_get_context_list(uuid_t *host_uuid, void (*dict_cb)(VERSIONED_CONTEXT_D return; int rc; - sqlite3_stmt *res = NULL; - - rc = sqlite3_prepare_v2(db_context_meta, CTX_GET_CONTEXT_LIST, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch stored context list"); - return; + static __thread sqlite3_stmt *res = NULL; + + if (unlikely(!res)) { + rc = prepare_statement(db_context_meta, CTX_GET_CONTEXT_LIST, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to fetch stored context list"); + return; + } } VERSIONED_CONTEXT_DATA context_data = {0}; @@ -267,7 +276,7 @@ void ctx_get_context_list(uuid_t *host_uuid, void (*dict_cb)(VERSIONED_CONTEXT_D goto failed; } - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { context_data.id = (char *) sqlite3_column_text(res, 0); context_data.version = sqlite3_column_int64(res, 1); context_data.title = (char *) sqlite3_column_text(res, 2); @@ -282,9 +291,9 @@ void ctx_get_context_list(uuid_t *host_uuid, void (*dict_cb)(VERSIONED_CONTEXT_D } failed: - rc = sqlite3_finalize(res); + rc = sqlite3_reset(res); if (rc != SQLITE_OK) - error_report("Failed to finalize statement that fetches stored context versioned data, rc = %d", rc); + error_report("Failed to reset statement that fetches stored context versioned data, rc = %d", rc); } @@ -437,6 +446,13 @@ skip_delete: return (rc_stored != SQLITE_DONE); } +int sql_context_cache_stats(int op) +{ + int count, dummy; + sqlite3_db_status(db_context_meta, op, &count, &dummy, 0); + return count; +} + // // TESTING FUNCTIONS // diff --git a/database/sqlite/sqlite_context.h b/database/sqlite/sqlite_context.h index 12937fffd..2e52b9bf8 100644 --- a/database/sqlite/sqlite_context.h +++ b/database/sqlite/sqlite_context.h @@ -6,6 +6,7 @@ #include "daemon/common.h" #include "sqlite3.h" +int sql_context_cache_stats(int op); typedef struct ctx_chart { uuid_t chart_id; const char *id; @@ -23,6 +24,7 @@ typedef struct ctx_dimension { uuid_t dim_id; char *id; char *name; + bool hidden; } SQL_DIMENSION_DATA; typedef struct ctx_label { @@ -50,19 +52,19 @@ typedef struct versioned_context_data { } VERSIONED_CONTEXT_DATA; -extern void ctx_get_context_list(uuid_t *host_uuid, void (*dict_cb)(VERSIONED_CONTEXT_DATA *, void *), void *data); +void ctx_get_context_list(uuid_t *host_uuid, void (*dict_cb)(VERSIONED_CONTEXT_DATA *, void *), void *data); -extern void ctx_get_chart_list(uuid_t *host_uuid, void (*dict_cb)(SQL_CHART_DATA *, void *), void *data); -extern void ctx_get_label_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_CLABEL_DATA *, void *), void *data); -extern void ctx_get_dimension_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_DIMENSION_DATA *, void *), void *data); +void ctx_get_chart_list(uuid_t *host_uuid, void (*dict_cb)(SQL_CHART_DATA *, void *), void *data); +void ctx_get_label_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_CLABEL_DATA *, void *), void *data); +void ctx_get_dimension_list(uuid_t *chart_uuid, void (*dict_cb)(SQL_DIMENSION_DATA *, void *), void *data); -extern int ctx_store_context(uuid_t *host_uuid, VERSIONED_CONTEXT_DATA *context_data); +int ctx_store_context(uuid_t *host_uuid, VERSIONED_CONTEXT_DATA *context_data); #define ctx_update_context(host_uuid, context_data) ctx_store_context(host_uuid, context_data) -extern int ctx_delete_context(uuid_t *host_id, VERSIONED_CONTEXT_DATA *context_data); +int ctx_delete_context(uuid_t *host_id, VERSIONED_CONTEXT_DATA *context_data); -extern int sql_init_context_database(int memory); -extern void sql_close_context_database(void); -extern int ctx_unittest(void); +int sql_init_context_database(int memory); +void sql_close_context_database(void); +int ctx_unittest(void); #endif //NETDATA_SQLITE_CONTEXT_H diff --git a/database/sqlite/sqlite_db_migration.c b/database/sqlite/sqlite_db_migration.c index bd4743364..8b1d01594 100644 --- a/database/sqlite/sqlite_db_migration.c +++ b/database/sqlite/sqlite_db_migration.c @@ -21,7 +21,7 @@ static int table_exists_in_database(const char *table) snprintf(sql, 127, "select 1 from sqlite_schema where type = 'table' and name = '%s';", table); - int rc = sqlite3_exec(db_meta, sql, return_int_cb, (void *) &exists, &err_msg); + int rc = sqlite3_exec_monitored(db_meta, sql, return_int_cb, (void *) &exists, &err_msg); if (rc != SQLITE_OK) { info("Error checking table existence; %s", err_msg); sqlite3_free(err_msg); @@ -39,7 +39,7 @@ static int column_exists_in_table(const char *table, const char *column) snprintf(sql, 127, "SELECT 1 FROM pragma_table_info('%s') where name = '%s';", table, column); - int rc = sqlite3_exec(db_meta, sql, return_int_cb, (void *) &exists, &err_msg); + int rc = sqlite3_exec_monitored(db_meta, sql, return_int_cb, (void *) &exists, &err_msg); if (rc != SQLITE_OK) { info("Error checking column existence; %s", err_msg); sqlite3_free(err_msg); @@ -64,6 +64,22 @@ const char *database_migrate_v2_v3[] = { NULL }; +const char *database_migrate_v4_v5[] = { + "DROP TABLE IF EXISTS chart_active;", + "DROP TABLE IF EXISTS dimension_active;", + "DROP TABLE IF EXISTS chart_hash;", + "DROP TABLE IF EXISTS chart_hash_map;", + "DROP VIEW IF EXISTS v_chart_hash;", + NULL +}; + +const char *database_migrate_v5_v6[] = { + "DROP TRIGGER IF EXISTS tr_dim_del;", + "DROP TABLE IF EXISTS dimension_delete;", + NULL +}; + + static int do_migration_v1_v2(sqlite3 *database, const char *name) { UNUSED(name); @@ -100,11 +116,11 @@ static int do_migration_v3_v4(sqlite3 *database, const char *name) return 1; } - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { char *table = strdupz((char *) sqlite3_column_text(res, 0)); if (!column_exists_in_table(table, "chart_context")) { snprintfz(sql, 255, "ALTER TABLE %s ADD chart_context text", table); - sqlite3_exec(database, sql, 0, 0, NULL); + sqlite3_exec_monitored(database, sql, 0, 0, NULL); } freez(table); } @@ -116,6 +132,57 @@ static int do_migration_v3_v4(sqlite3 *database, const char *name) return 0; } +static int do_migration_v4_v5(sqlite3 *database, const char *name) +{ + UNUSED(name); + info("Running \"%s\" database migration", name); + + return init_database_batch(database, DB_CHECK_NONE, 0, &database_migrate_v4_v5[0]); +} + +static int do_migration_v5_v6(sqlite3 *database, const char *name) +{ + UNUSED(name); + info("Running \"%s\" database migration", name); + + return init_database_batch(database, DB_CHECK_NONE, 0, &database_migrate_v5_v6[0]); +} + +static int do_migration_v6_v7(sqlite3 *database, const char *name) +{ + UNUSED(name); + info("Running \"%s\" database migration", name); + + char sql[256]; + + int rc; + sqlite3_stmt *res = NULL; + snprintfz(sql, 255, "SELECT name FROM sqlite_schema WHERE type ='table' AND name LIKE 'aclk_alert_%%';"); + rc = sqlite3_prepare_v2(database, sql, -1, &res, 0); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to alter aclk_alert tables"); + return 1; + } + + while (sqlite3_step_monitored(res) == SQLITE_ROW) { + char *table = strdupz((char *) sqlite3_column_text(res, 0)); + if (!column_exists_in_table(table, "filtered_alert_unique_id")) { + snprintfz(sql, 255, "ALTER TABLE %s ADD filtered_alert_unique_id", table); + sqlite3_exec_monitored(database, sql, 0, 0, NULL); + snprintfz(sql, 255, "UPDATE %s SET filtered_alert_unique_id = alert_unique_id", table); + sqlite3_exec_monitored(database, sql, 0, 0, NULL); + } + freez(table); + } + + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize statement when altering aclk_alert tables, rc = %d", rc); + + return 0; +} + + static int do_migration_noop(sqlite3 *database, const char *name) { UNUSED(database); @@ -135,7 +202,7 @@ static int migrate_database(sqlite3 *database, int target_version, char *db_name int user_version = 0; char *err_msg = NULL; - int rc = sqlite3_exec(database, "PRAGMA user_version;", return_int_cb, (void *) &user_version, &err_msg); + int rc = sqlite3_exec_monitored(database, "PRAGMA user_version;", return_int_cb, (void *) &user_version, &err_msg); if (rc != SQLITE_OK) { info("Error checking the %s database version; %s", db_name, err_msg); sqlite3_free(err_msg); @@ -163,6 +230,9 @@ DATABASE_FUNC_MIGRATION_LIST migration_action[] = { {.name = "v1 to v2", .func = do_migration_v1_v2}, {.name = "v2 to v3", .func = do_migration_v2_v3}, {.name = "v3 to v4", .func = do_migration_v3_v4}, + {.name = "v4 to v5", .func = do_migration_v4_v5}, + {.name = "v5 to v6", .func = do_migration_v5_v6}, + {.name = "v6 to v7", .func = do_migration_v6_v7}, // the terminator of this array {.name = NULL, .func = NULL} }; diff --git a/database/sqlite/sqlite_functions.c b/database/sqlite/sqlite_functions.c index f46450afa..eeb3c3822 100644 --- a/database/sqlite/sqlite_functions.c +++ b/database/sqlite/sqlite_functions.c @@ -3,7 +3,7 @@ #include "sqlite_functions.h" #include "sqlite_db_migration.h" -#define DB_METADATA_VERSION 4 +#define DB_METADATA_VERSION 7 const char *database_config[] = { "CREATE TABLE IF NOT EXISTS host(host_id BLOB PRIMARY KEY, hostname TEXT NOT NULL, " @@ -21,14 +21,10 @@ const char *database_config[] = { "CREATE TABLE IF NOT EXISTS dimension(dim_id blob PRIMARY KEY, chart_id blob, id text, name text, " "multiplier int, divisor int , algorithm int, options text);", - "DROP TABLE IF EXISTS chart_active;", - "DROP TABLE IF EXISTS dimension_active;", - - "CREATE TABLE IF NOT EXISTS chart_active(chart_id blob PRIMARY KEY, date_created int);", - "CREATE TABLE IF NOT EXISTS dimension_active(dim_id blob primary key, date_created int);", "CREATE TABLE IF NOT EXISTS metadata_migration(filename text, file_size, date_created int);", "CREATE INDEX IF NOT EXISTS ind_d1 on dimension (chart_id, id, name);", "CREATE INDEX IF NOT EXISTS ind_c1 on chart (host_id, id, type, name);", + "CREATE INDEX IF NOT EXISTS ind_c2 on chart (host_id, context);", "CREATE TABLE IF NOT EXISTS chart_label(chart_id blob, source_type int, label_key text, " "label_value text, date_created int, PRIMARY KEY (chart_id, label_key));", "CREATE TABLE IF NOT EXISTS node_instance (host_id blob PRIMARY KEY, claim_id, node_id, date_created);", @@ -45,39 +41,20 @@ const char *database_config[] = { "CREATE TABLE IF NOT EXISTS host_label(host_id blob, source_type int, label_key text NOT NULL, " "label_value text NOT NULL, date_created INT, PRIMARY KEY (host_id, label_key));", - "CREATE TABLE IF NOT EXISTS chart_hash_map(chart_id blob , hash_id blob, UNIQUE (chart_id, hash_id));", - - "CREATE TABLE IF NOT EXISTS chart_hash(hash_id blob PRIMARY KEY,type text, id text, name text, " - "family text, context text, title text, unit text, plugin text, " - "module text, priority integer, chart_type, last_used);", - - "CREATE VIEW IF NOT EXISTS v_chart_hash as SELECT ch.*, chm.chart_id FROM chart_hash ch, chart_hash_map chm " - "WHERE ch.hash_id = chm.hash_id;", - "CREATE TRIGGER IF NOT EXISTS ins_host AFTER INSERT ON host BEGIN INSERT INTO node_instance (host_id, date_created)" " SELECT new.host_id, unixepoch() WHERE new.host_id NOT IN (SELECT host_id FROM node_instance); END;", - "CREATE TRIGGER IF NOT EXISTS tr_v_chart_hash INSTEAD OF INSERT on v_chart_hash BEGIN " - "INSERT INTO chart_hash (hash_id, type, id, name, family, context, title, unit, plugin, " - "module, priority, chart_type, last_used) " - "values (new.hash_id, new.type, new.id, new.name, new.family, new.context, new.title, new.unit, new.plugin, " - "new.module, new.priority, new.chart_type, unixepoch()) " - "ON CONFLICT (hash_id) DO UPDATE SET last_used = unixepoch(); " - "INSERT INTO chart_hash_map (chart_id, hash_id) values (new.chart_id, new.hash_id) " - "on conflict (chart_id, hash_id) do nothing; END; ", - NULL }; const char *database_cleanup[] = { - "delete from chart where chart_id not in (select chart_id from dimension);", - "delete from host where host_id not in (select host_id from chart);", - "delete from chart_label where chart_id not in (select chart_id from chart);", - "DELETE FROM chart_hash_map WHERE chart_id NOT IN (SELECT chart_id FROM chart);", - "DELETE FROM chart_hash WHERE hash_id NOT IN (SELECT hash_id FROM chart_hash_map);", + "DELETE FROM chart WHERE chart_id NOT IN (SELECT chart_id FROM dimension);", + "DELETE FROM host WHERE host_id NOT IN (SELECT host_id FROM chart);", + "DELETE FROM chart_label WHERE chart_id NOT IN (SELECT chart_id FROM chart);", "DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host);", "DELETE FROM host_info WHERE host_id NOT IN (SELECT host_id FROM host);", "DELETE FROM host_label WHERE host_id NOT IN (SELECT host_id FROM host);", + "DROP TRIGGER IF EXISTS tr_dim_del;", NULL }; @@ -86,13 +63,49 @@ sqlite3 *db_meta = NULL; #define MAX_PREPARED_STATEMENTS (32) pthread_key_t key_pool[MAX_PREPARED_STATEMENTS]; -static uv_mutex_t sqlite_transaction_lock; +SQLITE_API int sqlite3_exec_monitored( + sqlite3 *db, /* An open database */ + const char *sql, /* SQL to be evaluated */ + int (*callback)(void*,int,char**,char**), /* Callback function */ + void *data, /* 1st argument to callback */ + char **errmsg /* Error msg written here */ +) { + int rc = sqlite3_exec(db, sql, callback, data, errmsg); + global_statistics_sqlite3_query_completed(rc == SQLITE_OK, rc == SQLITE_BUSY, rc == SQLITE_LOCKED); + return rc; +} + +SQLITE_API int sqlite3_step_monitored(sqlite3_stmt *stmt) { + int rc; + int cnt = 0; + + while (cnt++ < SQL_MAX_RETRY) { + rc = sqlite3_step(stmt); + switch (rc) { + case SQLITE_DONE: + global_statistics_sqlite3_query_completed(1, 0, 0); + break; + case SQLITE_ROW: + global_statistics_sqlite3_row_completed(); + break; + case SQLITE_BUSY: + case SQLITE_LOCKED: + global_statistics_sqlite3_query_completed(rc == SQLITE_DONE, rc == SQLITE_BUSY, rc == SQLITE_LOCKED); + usleep(SQLITE_INSERT_DELAY * USEC_PER_MS); + continue; + default: + break; + } + break; + } + return rc; +} int execute_insert(sqlite3_stmt *res) { int rc; int cnt = 0; - while ((rc = sqlite3_step(res)) != SQLITE_DONE && ++cnt < SQL_MAX_RETRY && likely(!netdata_exit)) { + while ((rc = sqlite3_step_monitored(res)) != SQLITE_DONE && ++cnt < SQL_MAX_RETRY && likely(!netdata_exit)) { if (likely(rc == SQLITE_BUSY || rc == SQLITE_LOCKED)) { usleep(SQLITE_INSERT_DELAY * USEC_PER_MS); error_report("Failed to insert/update, rc = %d -- attempt %d", rc, cnt); @@ -134,14 +147,14 @@ static void add_stmt_to_list(sqlite3_stmt *res) static void release_statement(void *statement) { int rc; -#ifdef NETDATA_INTERNAL_CHECKS +#ifdef NETDATA_DEV_MODE info("Thread %d: Cleaning prepared statement on %p", gettid(), statement); #endif if (unlikely(rc = sqlite3_finalize((sqlite3_stmt *) statement) != SQLITE_OK)) error_report("Failed to finalize statement, rc = %d", rc); } -int prepare_statement(sqlite3 *database, char *query, sqlite3_stmt **statement) +int prepare_statement(sqlite3 *database, const char *query, sqlite3_stmt **statement) { static __thread uint32_t keys_used = 0; @@ -155,7 +168,7 @@ int prepare_statement(sqlite3 *database, char *query, sqlite3_stmt **statement) if (likely(rc == SQLITE_OK)) { if (likely(key)) { ret = pthread_setspecific(*key, *statement); -#ifdef NETDATA_INTERNAL_CHECKS +#ifdef NETDATA_DEV_MODE info("Thread %d: Using key %u on statement %p", gettid(), keys_used, *statement); #endif } @@ -165,88 +178,6 @@ int prepare_statement(sqlite3 *database, char *query, sqlite3_stmt **statement) return rc; } -/* - * Store a chart or dimension UUID in chart_active or dimension_active - * The statement that will be prepared determines that - */ - -static int store_active_uuid_object(sqlite3_stmt **res, char *statement, uuid_t *uuid) -{ - int rc; - - // Check if we should need to prepare the statement - if (!*res) { - rc = prepare_statement(db_meta, statement, res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store active object, rc = %d", rc); - return rc; - } - } - - rc = sqlite3_bind_blob(*res, 1, uuid, sizeof(*uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to bind input parameter to store active object, rc = %d", rc); - else - rc = execute_insert(*res); - return rc; -} - -/* - * Marks a chart with UUID as active - * Input: UUID - */ -void store_active_chart(uuid_t *chart_uuid) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("Database has not been initialized"); - return; - } - - if (unlikely(!chart_uuid)) - return; - - rc = store_active_uuid_object(&res, SQL_STORE_ACTIVE_CHART, chart_uuid); - if (rc != SQLITE_DONE) - error_report("Failed to store active chart, rc = %d", rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize statement in store active chart, rc = %d", rc); - return; -} - -/* - * Marks a dimension with UUID as active - * Input: UUID - */ -void store_active_dimension(uuid_t *dimension_uuid) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("Database has not been initialized"); - return; - } - - if (unlikely(!dimension_uuid)) - return; - - rc = store_active_uuid_object(&res, SQL_STORE_ACTIVE_DIMENSION, dimension_uuid); - if (rc != SQLITE_DONE) - error_report("Failed to store active dimension, rc = %d", rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize statement in store active dimension, rc = %d", rc); - return; -} - static int check_table_integrity_cb(void *data, int argc, char **argv, char **column) { int *status = data; @@ -273,7 +204,7 @@ static int check_table_integrity(char *table) strcpy(wstr,"PRAGMA integrity_check;"); } - int rc = sqlite3_exec(db_meta, wstr, check_table_integrity_cb, (void *) &status, &err_msg); + int rc = sqlite3_exec_monitored(db_meta, wstr, check_table_integrity_cb, (void *) &status, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error during database integrity check for %s, rc = %d (%s)", table ? table : "the entire database", rc, err_msg); @@ -306,14 +237,13 @@ static void rebuild_chart() info("Rebuilding chart table"); for (int i = 0; rebuild_chart_commands[i]; i++) { info("Executing %s", rebuild_chart_commands[i]); - rc = sqlite3_exec(db_meta, rebuild_chart_commands[i], 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, rebuild_chart_commands[i], 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error during database setup, rc = %d (%s)", rc, err_msg); error_report("SQLite failed statement %s", rebuild_chart_commands[i]); sqlite3_free(err_msg); } } - return; } const char *rebuild_dimension_commands[] = { @@ -339,14 +269,13 @@ void rebuild_dimension() info("Rebuilding dimension table"); for (int i = 0; rebuild_dimension_commands[i]; i++) { info("Executing %s", rebuild_dimension_commands[i]); - rc = sqlite3_exec(db_meta, rebuild_dimension_commands[i], 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, rebuild_dimension_commands[i], 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error during database setup, rc = %d (%s)", rc, err_msg); error_report("SQLite failed statement %s", rebuild_dimension_commands[i]); sqlite3_free(err_msg); } } - return; } static int attempt_database_fix() @@ -366,7 +295,7 @@ int init_database_batch(sqlite3 *database, int rebuild, int init_type, const cha char *err_msg = NULL; for (int i = 0; batch[i]; i++) { debug(D_METADATALOG, "Executing %s", batch[i]); - rc = sqlite3_exec(database, batch[i], 0, 0, &err_msg); + rc = sqlite3_exec_monitored(database, batch[i], 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error during database %s, rc = %d (%s)", init_type ? "cleanup" : "setup", rc, err_msg); error_report("SQLite failed statement %s", batch[i]); @@ -384,6 +313,24 @@ int init_database_batch(sqlite3 *database, int rebuild, int init_type, const cha return 0; } +static void sqlite_uuid_parse(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + uuid_t uuid; + + if ( argc != 1 ){ + sqlite3_result_null(context); + return ; + } + int rc = uuid_parse((const char *) sqlite3_value_text(argv[0]), uuid); + if (rc == -1) { + sqlite3_result_null(context); + return ; + } + + sqlite3_result_blob(context, &uuid, sizeof(uuid_t), SQLITE_TRANSIENT); +} + + /* * Initialize the SQLite database * Return 0 on success @@ -437,7 +384,7 @@ int sql_init_database(db_check_action_type_t rebuild, int memory) if (rebuild & DB_CHECK_RECLAIM_SPACE) { if (!(rebuild & DB_CHECK_CONT)) info("Reclaiming space of %s", sqlite_database); - rc = sqlite3_exec(db_meta, "VACUUM;", 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, "VACUUM;", 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("Failed to execute VACUUM rc = %d (%s)", rc, err_msg); sqlite3_free(err_msg); @@ -497,12 +444,14 @@ int sql_init_database(db_check_action_type_t rebuild, int memory) if (init_database_batch(db_meta, rebuild, 0, &database_cleanup[0])) return 1; - fatal_assert(0 == uv_mutex_init(&sqlite_transaction_lock)); info("SQLite database initialization completed"); for (int i = 0; i < MAX_PREPARED_STATEMENTS; i++) (void)pthread_key_create(&key_pool[i], release_statement); + rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to register internal u2h function"); return 0; } @@ -523,233 +472,9 @@ void sql_close_database(void) rc = sqlite3_close_v2(db_meta); if (unlikely(rc != SQLITE_OK)) error_report("Error %d while closing the SQLite database, %s", rc, sqlite3_errstr(rc)); - return; -} - -#define FIND_UUID_TYPE "select 1 from host where host_id = @uuid union select 2 from chart where chart_id = @uuid union select 3 from dimension where dim_id = @uuid;" - -int find_uuid_type(uuid_t *uuid) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - int uuid_type = 3; - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, FIND_UUID_TYPE, &res); - if (rc != SQLITE_OK) { - error_report("Failed to bind prepare statement to find UUID type in the database"); - return 0; - } - } - - rc = sqlite3_bind_blob(res, 1, uuid, sizeof(*uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_step(res); - if (likely(rc == SQLITE_ROW)) - uuid_type = sqlite3_column_int(res, 0); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement during find uuid type, rc = %d", rc); - - return uuid_type; - -bind_fail: - return 0; -} - -int find_dimension_uuid(RRDSET *st, RRDDIM *rd, uuid_t *store_uuid) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - int status = 1; - - if (unlikely(!db_meta) && default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 1; - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_FIND_DIMENSION_UUID, &res); - if (rc != SQLITE_OK) { - error_report("Failed to bind prepare statement to lookup dimension UUID in the database"); - return 1; - } - } - - rc = sqlite3_bind_blob(res, 1, st->chart_uuid, sizeof(*st->chart_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 2, rd->id, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 3, rd->name, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_step(res); - if (likely(rc == SQLITE_ROW)) { - uuid_copy(*store_uuid, *((uuid_t *) sqlite3_column_blob(res, 0))); - status = 0; - } - else { - uuid_generate(*store_uuid); - status = sql_store_dimension(store_uuid, st->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor, rd->algorithm); - if (unlikely(status)) - error_report("Failed to store dimension metadata in the database"); - } - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement find dimension uuid, rc = %d", rc); - return status; - -bind_fail: - error_report("Failed to bind input parameter to perform dimension UUID database lookup, rc = %d", rc); - return 1; -} - -#define DELETE_DIMENSION_UUID "delete from dimension where dim_id = @uuid;" - -void delete_dimension_uuid(uuid_t *dimension_uuid) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - -#ifdef NETDATA_INTERNAL_CHECKS - char uuid_str[GUID_LEN + 1]; - uuid_unparse_lower(*dimension_uuid, uuid_str); - debug(D_METADATALOG,"Deleting dimension uuid %s", uuid_str); -#endif - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, DELETE_DIMENSION_UUID, &res); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to delete a dimension uuid"); - return; - } - } - - rc = sqlite3_bind_blob(res, 1, dimension_uuid, sizeof(*dimension_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_step(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to delete dimension uuid, rc = %d", rc); - -bind_fail: - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when deleting dimension UUID, rc = %d", rc); - return; -} - -/* - * Do a database lookup to find the UUID of a chart - * - */ -uuid_t *find_chart_uuid(RRDHOST *host, const char *type, const char *id, const char *name) -{ - static __thread sqlite3_stmt *res = NULL; - uuid_t *uuid = NULL; - int rc; - - if (unlikely(!db_meta) && default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return NULL; - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_FIND_CHART_UUID, &res); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement to lookup chart UUID in the database"); - return NULL; - } - } - - rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 2, type, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 3, id, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 4, name ? name : id, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_step(res); - if (likely(rc == SQLITE_ROW)) { - uuid = mallocz(sizeof(uuid_t)); - uuid_copy(*uuid, sqlite3_column_blob(res, 0)); - } - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when searching for a chart UUID, rc = %d", rc); - -#ifdef NETDATA_INTERNAL_CHECKS - char uuid_str[GUID_LEN + 1]; - if (likely(uuid)) { - uuid_unparse_lower(*uuid, uuid_str); - debug(D_METADATALOG, "Found UUID %s for chart %s.%s", uuid_str, type, name ? name : id); - } - else - debug(D_METADATALOG, "UUID not found for chart %s.%s", type, name ? name : id); -#endif - return uuid; - -bind_fail: - error_report("Failed to bind input parameter to perform chart UUID database lookup, rc = %d", rc); - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement when searching for a chart UUID, rc = %d", rc); - return NULL; -} - -int update_chart_metadata(uuid_t *chart_uuid, RRDSET *st, const char *id, const char *name) -{ - int rc; - - if (unlikely(!db_meta) && default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - - rc = sql_store_chart( - chart_uuid, &st->rrdhost->host_uuid, st->type, id, name, st->family, st->context, st->title, st->units, st->plugin_name, - st->module_name, st->priority, st->update_every, st->chart_type, st->rrd_memory_mode, st->entries); - - return rc; -} - -uuid_t *create_chart_uuid(RRDSET *st, const char *id, const char *name) -{ - uuid_t *uuid = NULL; - int rc; - - uuid = mallocz(sizeof(uuid_t)); - uuid_generate(*uuid); - -#ifdef NETDATA_INTERNAL_CHECKS - char uuid_str[GUID_LEN + 1]; - uuid_unparse_lower(*uuid, uuid_str); - debug(D_METADATALOG,"Generating uuid [%s] for chart %s under host %s", uuid_str, st->id, st->rrdhost->hostname); -#endif - - rc = update_chart_metadata(uuid, st, id, name); - - if (unlikely(rc)) - error_report("Failed to store chart metadata in the database"); - - return uuid; } -static int exec_statement_with_uuid(const char *sql, uuid_t *uuid) +int exec_statement_with_uuid(const char *sql, uuid_t *uuid) { int rc, result = 1; sqlite3_stmt *res = NULL; @@ -763,7 +488,7 @@ static int exec_statement_with_uuid(const char *sql, uuid_t *uuid) rc = sqlite3_bind_blob(res, 1, uuid, sizeof(*uuid), SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind host parameter to %s, rc = %d", sql, rc); - goto failed; + goto skip; } rc = execute_insert(res); @@ -772,7 +497,7 @@ static int exec_statement_with_uuid(const char *sql, uuid_t *uuid) else error_report("Failed to execute %s, rc = %d", sql, rc); -failed: +skip: rc = sqlite3_finalize(res); if (unlikely(rc != SQLITE_OK)) error_report("Failed to finalize statement %s, rc = %d", sql, rc); @@ -780,445 +505,13 @@ failed: } -// Migrate all hosts with hops zero to this host_uuid -void migrate_localhost(uuid_t *host_uuid) -{ - int rc; - - rc = exec_statement_with_uuid("UPDATE chart SET host_id = @host_id WHERE host_id in (SELECT host_id FROM host where host_id <> @host_id and hops = 0); ", host_uuid); - if (!rc) - rc = exec_statement_with_uuid("DELETE FROM host WHERE hops = 0 AND host_id <> @host_id; ", host_uuid); - if (!rc) - db_execute("DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host);"); - -} - -int sql_store_host( - uuid_t *host_uuid, const char *hostname, const char *registry_hostname, int update_every, const char *os, - const char *tzone, const char *tags, int hops) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - error_report("Database has not been initialized"); - return 1; - } - - if (unlikely((!res))) { - rc = prepare_statement(db_meta, SQL_STORE_HOST, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store host, rc = %d", rc); - return 1; - } - } - - rc = sqlite3_bind_blob(res, 1, host_uuid, sizeof(*host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 2, hostname, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 3, registry_hostname, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 4, update_every); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 5, os, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 6, tzone, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 7, tags, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 8, hops); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - int store_rc = sqlite3_step(res); - if (unlikely(store_rc != SQLITE_DONE)) - error_report("Failed to store host %s, rc = %d", hostname, rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement to store host %s, rc = %d", hostname, rc); - - return !(store_rc == SQLITE_DONE); -bind_fail: - error_report("Failed to bind parameter to store host %s, rc = %d", hostname, rc); - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement to store host %s, rc = %d", hostname, rc); - return 1; -} - -// -// Store host and host system info information in the database -#define SQL_STORE_HOST_INFO "INSERT OR REPLACE INTO host " \ - "(host_id, hostname, registry_hostname, update_every, os, timezone," \ - "tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, program_version," \ - "entries, health_enabled) " \ - "values (@host_id, @hostname, @registry_hostname, @update_every, @os, @timezone, @tags, @hops, @memory_mode, " \ - "@abbrev_timezone, @utc_offset, @program_name, @program_version, " \ - "@entries, @health_enabled);" - -int sql_store_host_info(RRDHOST *host) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - error_report("Database has not been initialized"); - return 1; - } - - if (unlikely((!res))) { - rc = prepare_statement(db_meta, SQL_STORE_HOST_INFO, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store host, rc = %d", rc); - return 1; - } - } - - rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 2, host->hostname, 0); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 3, host->registry_hostname, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 4, host->rrd_update_every); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 5, host->os, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 6, host->timezone, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 7, host->tags, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 8, host->system_info ? host->system_info->hops : 0); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 9, host->rrd_memory_mode); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 10, host->abbrev_timezone, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 11, host->utc_offset); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 12, host->program_name, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = bind_text_null(res, 13, host->program_version, 1); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int64(res, 14, host->rrd_history_entries); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 15, host->health_enabled); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - int store_rc = sqlite3_step(res); - if (unlikely(store_rc != SQLITE_DONE)) - error_report("Failed to store host %s, rc = %d", host->hostname, rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement to store host %s, rc = %d", host->hostname, rc); - - return !(store_rc == SQLITE_DONE); -bind_fail: - error_report("Failed to bind parameter to store host %s, rc = %d", host->hostname, rc); - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement to store host %s, rc = %d", host->hostname, rc); - return 1; -} - -/* - * Store a chart in the database - */ - -int sql_store_chart( - uuid_t *chart_uuid, uuid_t *host_uuid, const char *type, const char *id, const char *name, const char *family, - const char *context, const char *title, const char *units, const char *plugin, const char *module, long priority, - int update_every, int chart_type, int memory_mode, long history_entries) -{ - static __thread sqlite3_stmt *res = NULL; - int rc, param = 0; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - error_report("Database has not been initialized"); - return 1; - } - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_STORE_CHART, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store chart, rc = %d", rc); - return 1; - } - } - - param++; - rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_blob(res, 2, host_uuid, sizeof(*host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 3, type, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 4, id, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - if (name && *name) - rc = sqlite3_bind_text(res, 5, name, -1, SQLITE_STATIC); - else - rc = sqlite3_bind_null(res, 5); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 6, family, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 7, context, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 8, title, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 9, units, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 10, plugin, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 11, module, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 12, priority); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 13, update_every); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 14, chart_type); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 15, memory_mode); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 16, history_entries); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to store chart, rc = %d", rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement in chart store function, rc = %d", rc); - - return 0; - -bind_fail: - error_report("Failed to bind parameter %d to store chart, rc = %d", param, rc); - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement in chart store function, rc = %d", rc); - return 1; -} - -/* - * Store a dimension - */ -int sql_store_dimension( - uuid_t *dim_uuid, uuid_t *chart_uuid, const char *id, const char *name, collected_number multiplier, - collected_number divisor, int algorithm) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - error_report("Database has not been initialized"); - return 1; - } - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_STORE_DIMENSION, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store dimension, rc = %d", rc); - return 1; - } - } - - rc = sqlite3_bind_blob(res, 1, dim_uuid, sizeof(*dim_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_blob(res, 2, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 3, id, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_text(res, 4, name, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 5, multiplier); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 6, divisor); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = sqlite3_bind_int(res, 7, algorithm); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to store dimension, rc = %d", rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement in store dimension, rc = %d", rc); - return 0; - -bind_fail: - error_report("Failed to bind parameter to store dimension, rc = %d", rc); - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement in store dimension, rc = %d", rc); - return 1; -} - -/* - * Store set option for a dimension - */ -int sql_set_dimension_option(uuid_t *dim_uuid, char *option) -{ - sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - error_report("Database has not been initialized"); - return 1; - } - - rc = sqlite3_prepare_v2(db_meta, "UPDATE dimension SET options = @options WHERE dim_id = @dim_id", -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to update dimension options"); - return 0; - }; - - rc = sqlite3_bind_blob(res, 2, dim_uuid, sizeof(*dim_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - if (!option || !strcmp(option,"unhide")) - rc = sqlite3_bind_null(res, 1); - else - rc = sqlite3_bind_text(res, 1, option, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to update dimension option, rc = %d", rc); - -bind_fail: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize statement in update dimension options, rc = %d", rc); - return 0; -} - // -// Support for archived charts +// Support for archived charts (TO BE REMOVED) // #define SELECT_DIMENSION "select d.id, d.name from dimension d where d.chart_id = @chart_uuid;" -void sql_rrdim2json(sqlite3_stmt *res_dim, uuid_t *chart_uuid, BUFFER *wb, size_t *dimensions_count) +static void sql_rrdim2json(sqlite3_stmt *res_dim, uuid_t *chart_uuid, BUFFER *wb, size_t *dimensions_count) { int rc; @@ -1229,7 +522,7 @@ void sql_rrdim2json(sqlite3_stmt *res_dim, uuid_t *chart_uuid, BUFFER *wb, size_ int dimensions = 0; buffer_sprintf(wb, "\t\t\t\"dimensions\": {\n"); - while (sqlite3_step(res_dim) == SQLITE_ROW) { + while (sqlite3_step_monitored(res_dim) == SQLITE_ROW) { if (dimensions) buffer_strcat(wb, ",\n\t\t\t\t\""); else @@ -1291,11 +584,11 @@ void sql_rrdset2json(RRDHOST *host, BUFFER *wb) ",\n\t\"memory_mode\": \"%s\"" ",\n\t\"custom_info\": \"%s\"" ",\n\t\"charts\": {" - , host->hostname - , host->program_version + , rrdhost_hostname(host) + , rrdhost_program_version(host) , get_release_channel() - , host->os - , host->timezone + , rrdhost_os(host) + , rrdhost_timezone(host) , host->rrd_update_every , host->rrd_history_entries , rrd_memory_mode_name(host->rrd_memory_mode) @@ -1305,7 +598,7 @@ void sql_rrdset2json(RRDHOST *host, BUFFER *wb) size_t c = 0; size_t dimensions = 0; - while (sqlite3_step(res_chart) == SQLITE_ROW) { + while (sqlite3_step_monitored(res_chart) == SQLITE_ROW) { char id[512]; sprintf(id, "%s.%s", sqlite3_column_text(res_chart, 3), sqlite3_column_text(res_chart, 1)); RRDSET *st = rrdset_find(host, id); @@ -1386,7 +679,7 @@ void sql_rrdset2json(RRDHOST *host, BUFFER *wb) "\n\t\t\t\"hostname\": \"%s\"" "\n\t\t}" , (found > 0) ? "," : "" - , h->hostname + , rrdhost_hostname(h) ); found++; @@ -1400,7 +693,7 @@ void sql_rrdset2json(RRDHOST *host, BUFFER *wb) , "\n\t\t{" "\n\t\t\t\"hostname\": \"%s\"" "\n\t\t}" - , host->hostname + , rrdhost_hostname(host) ); } @@ -1414,95 +707,6 @@ failed: rc = sqlite3_finalize(res_chart); if (unlikely(rc != SQLITE_OK)) error_report("Failed to finalize the prepared statement when reading archived charts"); - - return; -} - -void free_temporary_host(RRDHOST *host) -{ - if (host) { - freez(host->hostname); - freez((char *)host->os); - freez((char *)host->tags); - freez((char *)host->timezone); - freez(host->program_name); - freez(host->program_version); - freez(host->registry_hostname); - freez(host->system_info); - freez(host); - } -} - -#define SELECT_HOST "select host_id, registry_hostname, update_every, os, timezone, tags from host where hostname = @hostname order by rowid desc;" -#define SELECT_HOST_BY_UUID "select h.host_id, h.registry_hostname, h.update_every, h.os, h.timezone, h.tags from host h, node_instance ni " \ - "where (ni.host_id = @host_id or ni.node_id = @host_id) AND ni.host_id = h.host_id;" - -RRDHOST *sql_create_host_by_uuid(char *hostname) -{ - int rc; - RRDHOST *host = NULL; - uuid_t host_uuid; - - sqlite3_stmt *res = NULL; - - rc = uuid_parse(hostname, host_uuid); - if (!rc) { - rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_BY_UUID, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch host by uuid"); - return NULL; - } - rc = sqlite3_bind_blob(res, 1, &host_uuid, sizeof(host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host_id parameter to fetch host information"); - goto failed; - } - } - else { - rc = sqlite3_prepare_v2(db_meta, SELECT_HOST, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch host by hostname"); - return NULL; - } - rc = sqlite3_bind_text(res, 1, hostname, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind hostname parameter to fetch host information"); - goto failed; - } - } - - rc = sqlite3_step(res); - if (unlikely(rc != SQLITE_ROW)) { - error_report("Failed to find hostname %s", hostname); - goto failed; - } - - char uuid_str[GUID_LEN + 1]; - uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, 0)), uuid_str); - - host = callocz(1, sizeof(RRDHOST)); - - set_host_properties(host, sqlite3_column_int(res, 2), RRD_MEMORY_MODE_DBENGINE, hostname, - (char *) sqlite3_column_text(res, 1), (const char *) uuid_str, - (char *) sqlite3_column_text(res, 3), (char *) sqlite3_column_text(res, 5), - (char *) sqlite3_column_text(res, 4), NULL, 0, NULL, NULL); - - uuid_copy(host->host_uuid, *((uuid_t *) sqlite3_column_blob(res, 0))); - - host->system_info = callocz(1, sizeof(*host->system_info));; - rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED); - -#ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ; tier++) - host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier]; -#endif - -failed: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize the prepared statement when reading host information"); - - return host; } void db_execute(const char *cmd) @@ -1511,35 +715,23 @@ void db_execute(const char *cmd) int cnt = 0; while (cnt < SQL_MAX_RETRY) { char *err_msg; - rc = sqlite3_exec(db_meta, cmd, 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, cmd, 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("Failed to execute '%s', rc = %d (%s) -- attempt %d", cmd, rc, err_msg, cnt); sqlite3_free(err_msg); if (likely(rc == SQLITE_BUSY || rc == SQLITE_LOCKED)) { usleep(SQLITE_INSERT_DELAY * USEC_PER_MS); } - else break; + else + break; } else break; + ++cnt; } - return; -} - -void db_lock(void) -{ - uv_mutex_lock(&sqlite_transaction_lock); - return; } -void db_unlock(void) -{ - uv_mutex_unlock(&sqlite_transaction_lock); - return; -} - - #define SELECT_MIGRATED_FILE "select 1 from metadata_migration where filename = @path;" int file_is_migrated(char *path) @@ -1559,7 +751,7 @@ int file_is_migrated(char *path) return 0; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) error_report("Failed to finalize the prepared statement when checking if metadata file is migrated"); @@ -1587,7 +779,7 @@ void add_migrated_file(char *path, uint64_t file_size) return; } - rc = sqlite3_bind_int64(res, 2, file_size); + rc = sqlite3_bind_int64(res, 2, (sqlite_int64) file_size); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind size parameter to store migration information"); return; @@ -1599,494 +791,9 @@ void add_migrated_file(char *path, uint64_t file_size) if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) error_report("Failed to finalize the prepared statement when checking if metadata file is migrated"); - - return; -} - -static int sql_store_label(sqlite3_stmt *res, uuid_t *uuid, int source_type, const char *label, const char *value) -{ - int rc; - - rc = sqlite3_bind_blob(res, 1, uuid, sizeof(*uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind UUID parameter to store label information"); - goto skip_store; - } - - rc = sqlite3_bind_int(res, 2, source_type); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind type parameter to store label information"); - goto skip_store; - } - - rc = sqlite3_bind_text(res, 3, label, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind label parameter to store label information"); - goto skip_store; - } - - rc = sqlite3_bind_text(res, 4, value, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind value parameter to store label information"); - goto skip_store; - } - - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to store label entry, rc = %d", rc); - -skip_store: - if (unlikely(sqlite3_reset(res) != SQLITE_OK)) - error_report("Failed to reset the prepared statement when storing label information"); - - return rc != SQLITE_DONE; -} - -#define SQL_INS_CHART_LABEL "insert or replace into chart_label " \ - "(chart_id, source_type, label_key, label_value, date_created) " \ - "values (@chart, @source, @label, @value, unixepoch());" - -void sql_store_chart_label(uuid_t *chart_uuid, int source_type, char *label, char *value) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("Database has not been initialized"); - return; - } - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_INS_CHART_LABEL, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement store chart labels"); - return; - } - } - - sql_store_label(res, chart_uuid, source_type, label, value); - - return; -} - -#define SQL_INS_HOST_LABEL "INSERT OR REPLACE INTO host_label " \ - "(host_id, source_type, label_key, label_value, date_created) " \ - "values (@chart, @source, @label, @value, unixepoch());" - -static void sql_store_host_label(uuid_t *host_uuid, int source_type, const char *label, const char *value) -{ - static __thread sqlite3_stmt *res = NULL; - int rc; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("Database has not been initialized"); - return; - } - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_INS_HOST_LABEL, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement store chart labels"); - return; - } - } - - (void) sql_store_label(res, host_uuid, source_type, label, value); -} - -int find_dimension_first_last_t(char *machine_guid, char *chart_id, char *dim_id, - uuid_t *uuid, time_t *first_entry_t, time_t *last_entry_t, uuid_t *rrdeng_uuid, int tier) -{ -#ifdef ENABLE_DBENGINE - int rc; - uuid_t legacy_uuid; - uuid_t multihost_legacy_uuid; - time_t dim_first_entry_t, dim_last_entry_t; - - rc = rrdeng_metric_latest_time_by_uuid(uuid, &dim_first_entry_t, &dim_last_entry_t, tier); - if (unlikely(rc)) { - rrdeng_generate_legacy_uuid(dim_id, chart_id, &legacy_uuid); - rc = rrdeng_metric_latest_time_by_uuid(&legacy_uuid, &dim_first_entry_t, &dim_last_entry_t, tier); - if (likely(rc)) { - rrdeng_convert_legacy_uuid_to_multihost(machine_guid, &legacy_uuid, &multihost_legacy_uuid); - rc = rrdeng_metric_latest_time_by_uuid(&multihost_legacy_uuid, &dim_first_entry_t, &dim_last_entry_t, tier); - if (likely(!rc)) - uuid_copy(*rrdeng_uuid, multihost_legacy_uuid); - } - else - uuid_copy(*rrdeng_uuid, legacy_uuid); - } - else - uuid_copy(*rrdeng_uuid, *uuid); - - if (likely(!rc)) { - *first_entry_t = MIN(*first_entry_t, dim_first_entry_t); - *last_entry_t = MAX(*last_entry_t, dim_last_entry_t); - } - return rc; -#else - UNUSED(machine_guid); - UNUSED(chart_id); - UNUSED(dim_id); - UNUSED(uuid); - UNUSED(first_entry_t); - UNUSED(last_entry_t); - UNUSED(rrdeng_uuid); - return 1; -#endif -} -#include "../storage_engine.h" -#ifdef ENABLE_DBENGINE -static RRDDIM *create_rrdim_entry(ONEWAYALLOC *owa, RRDSET *st, char *id, char *name, uuid_t *metric_uuid) -{ - STORAGE_ENGINE *eng = storage_engine_get(RRD_MEMORY_MODE_DBENGINE); - - if (unlikely(!eng)) - return NULL; - - RRDDIM *rd = onewayalloc_callocz(owa, 1, sizeof(*rd)); - rd->rrdset = st; - rd->update_every = st->update_every; - rd->last_stored_value = NAN; - rrddim_flag_set(rd, RRDDIM_FLAG_NONE); - - uuid_copy(rd->metric_uuid, *metric_uuid); - rd->id = onewayalloc_strdupz(owa, id); - rd->name = onewayalloc_strdupz(owa, name); - - for(int tier = 0; tier < storage_tiers ;tier++) { - rd->tiers[tier] = onewayalloc_callocz(owa, 1, sizeof(*rd->tiers[tier])); - rd->rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; - rd->tiers[tier]->tier_grouping = get_tier_grouping(tier); - rd->tiers[tier]->mode = RRD_MEMORY_MODE_DBENGINE; - rd->tiers[tier]->query_ops.init = rrdeng_load_metric_init; - rd->tiers[tier]->query_ops.next_metric = rrdeng_load_metric_next; - rd->tiers[tier]->query_ops.is_finished = rrdeng_load_metric_is_finished; - rd->tiers[tier]->query_ops.finalize = rrdeng_load_metric_finalize; - rd->tiers[tier]->query_ops.latest_time = rrdeng_metric_latest_time; - rd->tiers[tier]->query_ops.oldest_time = rrdeng_metric_oldest_time; - rd->tiers[tier]->db_metric_handle = eng->api.init(rd, st->rrdhost->storage_instance[tier]); - } - - return rd; -} -#endif - -#define SELECT_CHART_CONTEXT "select d.dim_id, d.id, d.name, c.id, c.type, c.name, c.update_every, c.chart_id, " \ - "c.context, CASE WHEN d.options = 'hidden' THEN 1 else 0 END from chart c, " \ - "dimension d, host h " \ - "where d.chart_id = c.chart_id and c.host_id = h.host_id and c.host_id = @host_id and c.context = @context " \ - "order by c.chart_id asc, c.type||c.id desc;" - -#define SELECT_CHART_SINGLE "select d.dim_id, d.id, d.name, c.id, c.type, c.name, c.update_every, c.chart_id, " \ - "c.context, CASE WHEN d.options = 'hidden' THEN 1 else 0 END from chart c, " \ - "dimension d, host h " \ - "where d.chart_id = c.chart_id and c.host_id = h.host_id and c.host_id = @host_id and c.type||'.'||c.id = @chart " \ - "order by c.chart_id asc, c.type||'.'||c.id desc;" - -void sql_build_context_param_list(ONEWAYALLOC *owa, struct context_param **param_list, RRDHOST *host, char *context, char *chart) -{ -#ifdef ENABLE_DBENGINE - int rc; - - if (unlikely(!param_list) || host->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return; - - if (unlikely(!(*param_list))) { - *param_list = onewayalloc_mallocz(owa, sizeof(struct context_param)); - (*param_list)->first_entry_t = LONG_MAX; - (*param_list)->last_entry_t = 0; - (*param_list)->rd = NULL; - (*param_list)->flags = CONTEXT_FLAGS_ARCHIVE; - if (chart) - (*param_list)->flags |= CONTEXT_FLAGS_CHART; - else - (*param_list)->flags |= CONTEXT_FLAGS_CONTEXT; - } - - sqlite3_stmt *res = NULL; - - if (context) - rc = sqlite3_prepare_v2(db_meta, SELECT_CHART_CONTEXT, -1, &res, 0); - else - rc = sqlite3_prepare_v2(db_meta, SELECT_CHART_SINGLE, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to fetch host archived charts"); - return; - } - - rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host parameter to fetch archived charts"); - goto failed; - } - - if (context) - rc = sqlite3_bind_text(res, 2, context, -1, SQLITE_STATIC); - else - rc = sqlite3_bind_text(res, 2, chart, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host parameter to fetch archived charts"); - goto failed; - } - - RRDSET *st = NULL; - char machine_guid[GUID_LEN + 1]; - uuid_unparse_lower(host->host_uuid, machine_guid); - uuid_t rrdeng_uuid; - uuid_t chart_id; - - while (sqlite3_step(res) == SQLITE_ROW) { - char id[512]; - sprintf(id, "%s.%s", sqlite3_column_text(res, 3), sqlite3_column_text(res, 1)); - - if (!st || uuid_compare(*(uuid_t *)sqlite3_column_blob(res, 7), chart_id)) { - if (unlikely(st && !st->counter)) { - onewayalloc_freez(owa, st->context); - onewayalloc_freez(owa, (char *) st->name); - onewayalloc_freez(owa, st); - } - st = onewayalloc_callocz(owa, 1, sizeof(*st)); - char n[RRD_ID_LENGTH_MAX + 1]; - - snprintfz( - n, RRD_ID_LENGTH_MAX, "%s.%s", (char *)sqlite3_column_text(res, 4), - (char *)sqlite3_column_text(res, 3)); - st->name = onewayalloc_strdupz(owa, n); - st->update_every = sqlite3_column_int(res, 6); - st->counter = 0; - if (chart) { - st->context = onewayalloc_strdupz(owa, (char *)sqlite3_column_text(res, 8)); - strncpyz(st->id, chart, RRD_ID_LENGTH_MAX); - } - uuid_copy(chart_id, *(uuid_t *)sqlite3_column_blob(res, 7)); - st->last_entry_t = 0; - st->rrdhost = host; - } - - if (unlikely(find_dimension_first_last_t(machine_guid, (char *)st->name, (char *)sqlite3_column_text(res, 1), - (uuid_t *)sqlite3_column_blob(res, 0), &(*param_list)->first_entry_t, &(*param_list)->last_entry_t, - &rrdeng_uuid, 0))) - continue; - - st->counter++; - st->last_entry_t = MAX(st->last_entry_t, (*param_list)->last_entry_t); - - RRDDIM *rd = create_rrdim_entry(owa, st, (char *)sqlite3_column_text(res, 1), (char *)sqlite3_column_text(res, 2), &rrdeng_uuid); - if (unlikely(!rd)) - continue; - if (sqlite3_column_int(res, 9) == 1) - rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN); - rd->next = (*param_list)->rd; - (*param_list)->rd = rd; - } - if (st) { - if (!st->counter) { - onewayalloc_freez(owa,st->context); - onewayalloc_freez(owa,(char *)st->name); - onewayalloc_freez(owa,st); - } - else - if (!st->context && context) - st->context = onewayalloc_strdupz(owa,context); - } - -failed: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize the prepared statement when reading archived charts"); -#else - UNUSED(param_list); - UNUSED(host); - UNUSED(context); - UNUSED(chart); -#endif - return; -} - - -/* - * Store a chart hash in the database - */ - -#define SQL_STORE_CHART_HASH "insert into v_chart_hash (hash_id, type, id, " \ - "name, family, context, title, unit, plugin, module, priority, chart_type, last_used, chart_id) " \ - "values (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11, ?12, unixepoch(), ?13);" - -int sql_store_chart_hash( - uuid_t *hash_id, uuid_t *chart_id, const char *type, const char *id, const char *name, const char *family, - const char *context, const char *title, const char *units, const char *plugin, const char *module, long priority, - RRDSET_TYPE chart_type) -{ - static __thread sqlite3_stmt *res = NULL; - int rc, param = 0; - - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) - return 0; - error_report("Database has not been initialized"); - return 1; - } - - if (unlikely(!res)) { - rc = prepare_statement(db_meta, SQL_STORE_CHART_HASH, &res); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store chart, rc = %d", rc); - return 1; - } - } - - param++; - rc = sqlite3_bind_blob(res, 1, hash_id, sizeof(*hash_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 2, type, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 3, id, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - if (name && *name) - rc = sqlite3_bind_text(res, 4, name, -1, SQLITE_STATIC); - else - rc = sqlite3_bind_null(res, 4); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 5, family, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 6, context, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 7, title, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 8, units, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 9, plugin, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_text(res, 10, module, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 11, (int) priority); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_int(res, 12, chart_type); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - param++; - rc = sqlite3_bind_blob(res, 13, chart_id, sizeof(*chart_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; - - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to store chart hash_id, rc = %d", rc); - - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement in chart hash_id store function, rc = %d", rc); - - return 0; - - bind_fail: - error_report("Failed to bind parameter %d to store chart hash_id, rc = %d", param, rc); - rc = sqlite3_reset(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement in chart hash_id store function, rc = %d", rc); - return 1; } -/* - chart hashes are used for cloud communication. - if cloud is disabled or openssl is not available (which will prevent cloud connectivity) - skip hash calculations -*/ -void compute_chart_hash(RRDSET *st) -{ -#if !defined DISABLE_CLOUD && defined ENABLE_HTTPS - EVP_MD_CTX *evpctx; - unsigned char hash_value[EVP_MAX_MD_SIZE]; - unsigned int hash_len; - char priority_str[32]; - - if (rrdhost_flag_check(st->rrdhost, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { - internal_error(true, "Skipping compute_chart_hash for host %s because context streaming is enabled", st->rrdhost->hostname); - return; - } - sprintf(priority_str, "%ld", st->priority); - - evpctx = EVP_MD_CTX_create(); - EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL); - //EVP_DigestUpdate(evpctx, st->type, strlen(st->type)); - EVP_DigestUpdate(evpctx, st->id, strlen(st->id)); - EVP_DigestUpdate(evpctx, st->name, strlen(st->name)); - EVP_DigestUpdate(evpctx, st->family, strlen(st->family)); - EVP_DigestUpdate(evpctx, st->context, strlen(st->context)); - EVP_DigestUpdate(evpctx, st->title, strlen(st->title)); - EVP_DigestUpdate(evpctx, st->units, strlen(st->units)); - EVP_DigestUpdate(evpctx, st->plugin_name, strlen(st->plugin_name)); - if (st->module_name) - EVP_DigestUpdate(evpctx, st->module_name, strlen(st->module_name)); -// EVP_DigestUpdate(evpctx, priority_str, strlen(priority_str)); - EVP_DigestUpdate(evpctx, &st->priority, sizeof(st->priority)); - EVP_DigestUpdate(evpctx, &st->chart_type, sizeof(st->chart_type)); - EVP_DigestFinal_ex(evpctx, hash_value, &hash_len); - EVP_MD_CTX_destroy(evpctx); - fatal_assert(hash_len > sizeof(uuid_t)); - - char uuid_str[GUID_LEN + 1]; - uuid_unparse_lower(*((uuid_t *) &hash_value), uuid_str); - //info("Calculating HASH %s for chart %s", uuid_str, st->name); - uuid_copy(st->state->hash_id, *((uuid_t *) &hash_value)); - - (void)sql_store_chart_hash( - (uuid_t *)&hash_value, - st->chart_uuid, - st->type, - st->id, - st->name, - st->family, - st->context, - st->title, - st->units, - st->plugin_name, - st->module_name, - st->priority, - st->chart_type); -#else - UNUSED(st); -#endif - return; -} #define SQL_STORE_CLAIM_ID "insert into node_instance " \ "(host_id, claim_id, date_created) values (@host_id, @claim_id, unixepoch()) " \ @@ -2156,7 +863,6 @@ static inline void set_host_node_id(RRDHOST *host, uuid_t *node_id) sql_create_aclk_table(host, &host->host_uuid, node_id); else uuid_unparse_lower(*node_id, wc->node_id); - return; } #define SQL_UPDATE_NODE_ID "update node_instance set node_id = @node_id where host_id = @host_id;" @@ -2199,7 +905,7 @@ int update_node_id(uuid_t *host_id, uuid_t *node_id) char host_guid[GUID_LEN + 1]; uuid_unparse_lower(*host_id, host_guid); rrd_wrlock(); - host = rrdhost_find_by_guid(host_guid, 0); + host = rrdhost_find_by_guid(host_guid); if (likely(host)) set_host_node_id(host, node_id); rrd_unlock(); @@ -2242,7 +948,7 @@ char *get_hostname_by_node_id(char *node) goto failed; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW)) hostname = strdupz((char *)sqlite3_column_text(res, 0)); @@ -2280,7 +986,7 @@ int get_host_id(uuid_t *node_id, uuid_t *host_id) goto failed; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW && host_id)) uuid_copy(*host_id, *((uuid_t *) sqlite3_column_blob(res, 0))); @@ -2316,7 +1022,7 @@ int get_node_id(uuid_t *host_id, uuid_t *node_id) goto failed; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW && node_id)) uuid_copy(*node_id, *((uuid_t *) sqlite3_column_blob(res, 0))); @@ -2375,9 +1081,9 @@ failed: #define SQL_GET_NODE_INSTANCE_LIST "select ni.node_id, ni.host_id, h.hostname " \ "from node_instance ni, host h where ni.host_id = h.host_id;" -struct node_instance_list *get_node_list(void) +struct node_instance_list *get_node_list(void) { - struct node_instance_list *node_list = NULL; + struct node_instance_list *node_list = NULL; sqlite3_stmt *res = NULL; int rc; @@ -2395,7 +1101,7 @@ struct node_instance_list *get_node_list(void) int row = 0; char host_guid[37]; - while (sqlite3_step(res) == SQLITE_ROW) + while (sqlite3_step_monitored(res) == SQLITE_ROW) row++; if (sqlite3_reset(res) != SQLITE_OK) { @@ -2405,8 +1111,9 @@ struct node_instance_list *get_node_list(void) node_list = callocz(row + 1, sizeof(*node_list)); int max_rows = row; row = 0; + // TODO: Check to remove lock rrd_rdlock(); - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { if (sqlite3_column_bytes(res, 0) == sizeof(uuid_t)) uuid_copy(node_list[row].node_id, *((uuid_t *)sqlite3_column_blob(res, 0))); if (sqlite3_column_bytes(res, 1) == sizeof(uuid_t)) { @@ -2414,7 +1121,7 @@ struct node_instance_list *get_node_list(void) uuid_copy(node_list[row].host_id, *host_id); node_list[row].queryable = 1; uuid_unparse_lower(*host_id, host_guid); - RRDHOST *host = rrdhost_find_by_guid(host_guid, 0); + RRDHOST *host = rrdhost_find_by_guid(host_guid); node_list[row].live = host && (host == localhost || host->receiver) ? 1 : 0; node_list[row].hops = (host && host->system_info) ? host->system_info->hops : uuid_compare(*host_id, localhost->host_uuid) ? 1 : 0; @@ -2461,7 +1168,7 @@ void sql_load_node_id(RRDHOST *host) goto failed; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW)) { if (likely(sqlite3_column_bytes(res, 0) == sizeof(uuid_t))) set_host_node_id(host, (uuid_t *)sqlite3_column_blob(res, 0)); @@ -2472,8 +1179,6 @@ void sql_load_node_id(RRDHOST *host) failed: if (unlikely(sqlite3_reset(res) != SQLITE_OK)) error_report("Failed to reset the prepared statement when loading node instance information"); - - return; }; @@ -2494,213 +1199,179 @@ void sql_build_host_system_info(uuid_t *host_id, struct rrdhost_system_info *sys rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind host parameter host information"); - goto skip_loading; + goto skip; } - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { rrdhost_set_system_info_variable(system_info, (char *) sqlite3_column_text(res, 0), (char *) sqlite3_column_text(res, 1)); } -skip_loading: +skip: if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) error_report("Failed to finalize the prepared statement when reading host information"); - return; } +#define SELECT_HOST_LABELS "SELECT label_key, label_value, source_type FROM host_label WHERE host_id = @host_id " \ + "AND label_key IS NOT NULL AND label_value IS NOT NULL;" -#define SQL_INS_HOST_SYSTEM_INFO "INSERT OR REPLACE INTO host_info " \ - "(host_id, system_key, system_value, date_created) " \ - "VALUES (@host, @key, @value, unixepoch());" - -void sql_store_host_system_info_key_value(uuid_t *host_id, const char *name, const char *value) +DICTIONARY *sql_load_host_labels(uuid_t *host_id) { - sqlite3_stmt *res = NULL; int rc; - if (unlikely(!db_meta)) { - if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("Database has not been initialized"); - return; - } + DICTIONARY *labels = NULL; + sqlite3_stmt *res = NULL; - rc = sqlite3_prepare_v2(db_meta, SQL_INS_HOST_SYSTEM_INFO, -1, &res, 0); + rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_LABELS, -1, &res, 0); if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to store system info"); - return; + error_report("Failed to prepare statement to read host information"); + return NULL; } rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host parameter to store system information"); - goto skip_store; + error_report("Failed to bind host parameter host information"); + goto skip; } - rc = sqlite3_bind_text(res, 2, name, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind label parameter to store name information"); - goto skip_store; - } + labels = rrdlabels_create(); - rc = sqlite3_bind_text(res, 3, value, -1, SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind value parameter to store value information"); - goto skip_store; + while (sqlite3_step_monitored(res) == SQLITE_ROW) { + rrdlabels_add( + labels, + (const char *)sqlite3_column_text(res, 0), + (const char *)sqlite3_column_text(res, 1), + sqlite3_column_int(res, 2)); } - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to store host system info, rc = %d", rc); - -skip_store: +skip: if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) - error_report("Failed to finalize the prepared statement when storing host system information"); - - return; + error_report("Failed to finalize the prepared statement when reading host information"); + return labels; } - -void sql_store_host_system_info(uuid_t *host_id, const struct rrdhost_system_info *system_info) +// Utils +int bind_text_null(sqlite3_stmt *res, int position, const char *text, bool can_be_null) { - if (unlikely(!system_info)) - return; - - if (system_info->container_os_name) - sql_store_host_system_info_key_value(host_id, "NETDATA_CONTAINER_OS_NAME", system_info->container_os_name); - - if (system_info->container_os_id) - sql_store_host_system_info_key_value(host_id, "NETDATA_CONTAINER_OS_ID", system_info->container_os_id); - - if (system_info->container_os_id_like) - sql_store_host_system_info_key_value(host_id, "NETDATA_CONTAINER_OS_ID_LIKE", system_info->container_os_id_like); - - if (system_info->container_os_version) - sql_store_host_system_info_key_value(host_id, "NETDATA_CONTAINER_OS_VERSION", system_info->container_os_version); - - if (system_info->container_os_version_id) - sql_store_host_system_info_key_value(host_id, "NETDATA_CONTAINER_OS_VERSION_ID", system_info->container_os_version_id); - - if (system_info->host_os_detection) - sql_store_host_system_info_key_value(host_id, "NETDATA_CONTAINER_OS_DETECTION", system_info->host_os_detection); - - if (system_info->host_os_name) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_OS_NAME", system_info->host_os_name); - - if (system_info->host_os_id) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_OS_ID", system_info->host_os_id); - - if (system_info->host_os_id_like) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_OS_ID_LIKE", system_info->host_os_id_like); - - if (system_info->host_os_version) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_OS_VERSION", system_info->host_os_version); + if (likely(text)) + return sqlite3_bind_text(res, position, text, -1, SQLITE_STATIC); + if (!can_be_null) + return 1; + return sqlite3_bind_null(res, position); +} - if (system_info->host_os_version_id) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_OS_VERSION_ID", system_info->host_os_version_id); +int sql_metadata_cache_stats(int op) +{ + int count, dummy; + sqlite3_db_status(db_meta, op, &count, &dummy, 0); + return count; +} - if (system_info->host_os_detection) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_OS_DETECTION", system_info->host_os_detection); +#define SQL_FIND_CHART_UUID \ + "SELECT chart_id FROM chart WHERE host_id = @host AND type=@type AND id=@id AND (name IS NULL OR name=@name) AND chart_id IS NOT NULL;" - if (system_info->kernel_name) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_KERNEL_NAME", system_info->kernel_name); +#define SQL_FIND_DIMENSION_UUID \ + "SELECT dim_id FROM dimension WHERE chart_id=@chart AND id=@id AND name=@name AND LENGTH(dim_id)=16;" - if (system_info->host_cores) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT", system_info->host_cores); - if (system_info->host_cpu_freq) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_CPU_FREQ", system_info->host_cpu_freq); +//Do a database lookup to find the UUID of a chart +//If found store it in store_uuid and return 0 +int sql_find_chart_uuid(RRDHOST *host, RRDSET *st, uuid_t *store_uuid) +{ + static __thread sqlite3_stmt *res = NULL; + int rc; - if (system_info->host_ram_total) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_TOTAL_RAM", system_info->host_ram_total); + const char *name = string2str(st->parts.name); - if (system_info->host_disk_space) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_TOTAL_DISK_SIZE", system_info->host_disk_space); + if (unlikely(!db_meta) && default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 1; - if (system_info->kernel_version) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_KERNEL_VERSION", system_info->kernel_version); + if (unlikely(!res)) { + rc = prepare_statement(db_meta, SQL_FIND_CHART_UUID, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to lookup chart UUID in the database"); + return 1; + } + } - if (system_info->architecture) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_ARCHITECTURE", system_info->architecture); + rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; - if (system_info->virtualization) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_VIRTUALIZATION", system_info->virtualization); + rc = sqlite3_bind_text(res, 2, string2str(st->parts.type), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; - if (system_info->virt_detection) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_VIRT_DETECTION", system_info->virt_detection); + rc = sqlite3_bind_text(res, 3, string2str(st->parts.id), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; - if (system_info->container) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_CONTAINER", system_info->container); + rc = sqlite3_bind_text(res, 4, name && *name ? name : string2str(st->parts.id), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; - if (system_info->container_detection) - sql_store_host_system_info_key_value(host_id, "NETDATA_SYSTEM_CONTAINER_DETECTION", system_info->container_detection); + int status = 1; + rc = sqlite3_step_monitored(res); + if (likely(rc == SQLITE_ROW)) { + uuid_copy(*store_uuid, sqlite3_column_blob(res, 0)); + status = 0; + } - if (system_info->is_k8s_node) - sql_store_host_system_info_key_value(host_id, "NETDATA_HOST_IS_K8S_NODE", system_info->is_k8s_node); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement when searching for a chart UUID, rc = %d", rc); - return; -} + return status; -static int save_host_label_callback(const char *name, const char *value, RRDLABEL_SRC label_source, void *data) -{ - RRDHOST *host = (RRDHOST *)data; - sql_store_host_label(&host->host_uuid, (int)label_source & ~(RRDLABEL_FLAG_INTERNAL), name, value); - return 0; +bind_fail: + error_report("Failed to bind input parameter to perform chart UUID database lookup, rc = %d", rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement when searching for a chart UUID, rc = %d", rc); + return 1; } -#define SQL_DELETE_HOST_LABELS "DELETE FROM host_label WHERE host_id = @uuid;" -void sql_store_host_labels(RRDHOST *host) -{ - int rc = exec_statement_with_uuid(SQL_DELETE_HOST_LABELS, &host->host_uuid); - if (rc != SQLITE_OK) - error_report("Failed to remove old host labels for host %s", host->hostname); - - rrdlabels_walkthrough_read(host->host_labels, save_host_label_callback, host); -} - -#define SELECT_HOST_LABELS "SELECT label_key, label_value, source_type FROM host_label WHERE host_id = @host_id " \ - "AND label_key IS NOT NULL AND label_value IS NOT NULL;" - -DICTIONARY *sql_load_host_labels(uuid_t *host_id) +int sql_find_dimension_uuid(RRDSET *st, RRDDIM *rd, uuid_t *store_uuid) { + static __thread sqlite3_stmt *res = NULL; int rc; + int status = 1; - DICTIONARY *labels = NULL; - sqlite3_stmt *res = NULL; + if (unlikely(!db_meta) && default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 1; - rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_LABELS, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to prepare statement to read host information"); - return NULL; + if (unlikely(!res)) { + rc = prepare_statement(db_meta, SQL_FIND_DIMENSION_UUID, &res); + if (rc != SQLITE_OK) { + error_report("Failed to bind prepare statement to lookup dimension UUID in the database"); + return 1; + } } - rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host parameter host information"); - goto skip_loading; - } + rc = sqlite3_bind_blob(res, 1, st->chart_uuid, sizeof(*st->chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; - labels = rrdlabels_create(); + rc = sqlite3_bind_text(res, 2, rrddim_id(rd), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; - while (sqlite3_step(res) == SQLITE_ROW) { - rrdlabels_add( - labels, - (const char *)sqlite3_column_text(res, 0), - (const char *)sqlite3_column_text(res, 1), - sqlite3_column_int(res, 2)); + rc = sqlite3_bind_text(res, 3, rrddim_name(rd), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_step_monitored(res); + if (likely(rc == SQLITE_ROW)) { + uuid_copy(*store_uuid, *((uuid_t *) sqlite3_column_blob(res, 0))); + status = 0; } -skip_loading: - if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) - error_report("Failed to finalize the prepared statement when reading host information"); - return labels; -} + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement find dimension uuid, rc = %d", rc); + return status; -// Utils -int bind_text_null(sqlite3_stmt *res, int position, const char *text, bool can_be_null) -{ - if (likely(text)) - return sqlite3_bind_text(res, position, text, -1, SQLITE_STATIC); - if (!can_be_null) - return 1; - return sqlite3_bind_null(res, position); +bind_fail: + error_report("Failed to bind input parameter to perform dimension UUID database lookup, rc = %d", rc); + return 1; } diff --git a/database/sqlite/sqlite_functions.h b/database/sqlite/sqlite_functions.h index e6808aa81..5731d5c9e 100644 --- a/database/sqlite/sqlite_functions.h +++ b/database/sqlite/sqlite_functions.h @@ -25,29 +25,7 @@ typedef enum db_check_action_type { } db_check_action_type_t; #define SQL_MAX_RETRY (100) -#define SQLITE_INSERT_DELAY (50) // Insert delay in case of lock - -#define SQL_STORE_HOST "insert or replace into host (host_id,hostname,registry_hostname,update_every,os,timezone,tags, hops) " \ - "values (?1,?2,?3,?4,?5,?6,?7,?8);" - -#define SQL_STORE_CHART "insert or replace into chart (chart_id, host_id, type, id, " \ - "name, family, context, title, unit, plugin, module, priority, update_every , chart_type , memory_mode , " \ - "history_entries) values (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16);" - -#define SQL_FIND_CHART_UUID \ - "select chart_id from chart where host_id = @host and type=@type and id=@id and (name is null or name=@name);" - -#define SQL_STORE_ACTIVE_CHART \ - "insert or replace into chart_active (chart_id, date_created) values (@id, unixepoch());" - -#define SQL_STORE_DIMENSION \ - "INSERT OR REPLACE into dimension (dim_id, chart_id, id, name, multiplier, divisor , algorithm) values (?0001,?0002,?0003,?0004,?0005,?0006,?0007);" - -#define SQL_FIND_DIMENSION_UUID \ - "select dim_id from dimension where chart_id=@chart and id=@id and name=@name and length(dim_id)=16;" - -#define SQL_STORE_ACTIVE_DIMENSION \ - "insert or replace into dimension_active (dim_id, date_created) values (@id, unixepoch());" +#define SQLITE_INSERT_DELAY (10) // Insert delay in case of lock #define CHECK_SQLITE_CONNECTION(db_meta) \ if (unlikely(!db_meta)) { \ @@ -58,59 +36,51 @@ typedef enum db_check_action_type { return 1; \ } -extern int sql_init_database(db_check_action_type_t rebuild, int memory); -extern void sql_close_database(void); -extern int bind_text_null(sqlite3_stmt *res, int position, const char *text, bool can_be_null); -extern int sql_store_host(uuid_t *guid, const char *hostname, const char *registry_hostname, int update_every, const char *os, - const char *timezone, const char *tags, int hops); - -extern int sql_store_host_info(RRDHOST *host); - -extern int sql_store_chart( - uuid_t *chart_uuid, uuid_t *host_uuid, const char *type, const char *id, const char *name, const char *family, - const char *context, const char *title, const char *units, const char *plugin, const char *module, long priority, - int update_every, int chart_type, int memory_mode, long history_entries); -extern int sql_store_dimension(uuid_t *dim_uuid, uuid_t *chart_uuid, const char *id, const char *name, collected_number multiplier, - collected_number divisor, int algorithm); - -extern int find_dimension_uuid(RRDSET *st, RRDDIM *rd, uuid_t *store_uuid); -extern void store_active_dimension(uuid_t *dimension_uuid); - -extern uuid_t *find_chart_uuid(RRDHOST *host, const char *type, const char *id, const char *name); -extern uuid_t *create_chart_uuid(RRDSET *st, const char *id, const char *name); -extern int update_chart_metadata(uuid_t *chart_uuid, RRDSET *st, const char *id, const char *name); -extern void store_active_chart(uuid_t *dimension_uuid); - -extern int find_uuid_type(uuid_t *uuid); - -extern void sql_rrdset2json(RRDHOST *host, BUFFER *wb); - -extern RRDHOST *sql_create_host_by_uuid(char *guid); -extern int prepare_statement(sqlite3 *database, char *query, sqlite3_stmt **statement); -extern int execute_insert(sqlite3_stmt *res); -extern void db_execute(const char *cmd); -extern int file_is_migrated(char *path); -extern void add_migrated_file(char *path, uint64_t file_size); -extern void db_unlock(void); -extern void db_lock(void); -extern void delete_dimension_uuid(uuid_t *dimension_uuid); -extern void sql_store_chart_label(uuid_t *chart_uuid, int source_type, char *label, char *value); -extern void sql_build_context_param_list(ONEWAYALLOC *owa, struct context_param **param_list, RRDHOST *host, char *context, char *chart); -extern void store_claim_id(uuid_t *host_id, uuid_t *claim_id); -extern int update_node_id(uuid_t *host_id, uuid_t *node_id); -extern int get_node_id(uuid_t *host_id, uuid_t *node_id); -extern int get_host_id(uuid_t *node_id, uuid_t *host_id); -extern void invalidate_node_instances(uuid_t *host_id, uuid_t *claim_id); -extern struct node_instance_list *get_node_list(void); -extern void sql_load_node_id(RRDHOST *host); -extern void compute_chart_hash(RRDSET *st); -extern int sql_set_dimension_option(uuid_t *dim_uuid, char *option); -char *get_hostname_by_node_id(char *node_id); -void free_temporary_host(RRDHOST *host); +SQLITE_API int sqlite3_step_monitored(sqlite3_stmt *stmt); +SQLITE_API int sqlite3_exec_monitored( + sqlite3 *db, /* An open database */ + const char *sql, /* SQL to be evaluated */ + int (*callback)(void*,int,char**,char**), /* Callback function */ + void *data, /* 1st argument to callback */ + char **errmsg /* Error msg written here */ + ); + +// Initialization and shutdown int init_database_batch(sqlite3 *database, int rebuild, int init_type, const char *batch[]); -void migrate_localhost(uuid_t *host_uuid); -extern void sql_store_host_system_info(uuid_t *host_id, const struct rrdhost_system_info *system_info); -extern void sql_build_host_system_info(uuid_t *host_id, struct rrdhost_system_info *system_info); -void sql_store_host_labels(RRDHOST *host); +int sql_init_database(db_check_action_type_t rebuild, int memory); +void sql_close_database(void); + +// Helpers +int bind_text_null(sqlite3_stmt *res, int position, const char *text, bool can_be_null); +int prepare_statement(sqlite3 *database, const char *query, sqlite3_stmt **statement); +int execute_insert(sqlite3_stmt *res); +int file_is_migrated(char *path); +int exec_statement_with_uuid(const char *sql, uuid_t *uuid); +void add_migrated_file(char *path, uint64_t file_size); +void db_execute(const char *cmd); + +// Look up functions +int get_node_id(uuid_t *host_id, uuid_t *node_id); +int get_host_id(uuid_t *node_id, uuid_t *host_id); +struct node_instance_list *get_node_list(void); +void sql_load_node_id(RRDHOST *host); +char *get_hostname_by_node_id(char *node_id); +int sql_find_chart_uuid(RRDHOST *host, RRDSET *st, uuid_t *store_uuid); +int sql_find_dimension_uuid(RRDSET *st, RRDDIM *rd, uuid_t *store_uuid); + +// Help build archived hosts in memory when agent starts +void sql_build_host_system_info(uuid_t *host_id, struct rrdhost_system_info *system_info); DICTIONARY *sql_load_host_labels(uuid_t *host_id); + +// For queries: To be removed when context queries are implemented +void sql_rrdset2json(RRDHOST *host, BUFFER *wb); + +// TODO: move to metadata +int update_node_id(uuid_t *host_id, uuid_t *node_id); + +void invalidate_node_instances(uuid_t *host_id, uuid_t *claim_id); + +// Provide statistics +int sql_metadata_cache_stats(int op); + #endif //NETDATA_SQLITE_FUNCTIONS_H diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c index 8e59cad1e..c189305b8 100644 --- a/database/sqlite/sqlite_health.c +++ b/database/sqlite/sqlite_health.c @@ -4,6 +4,7 @@ #include "sqlite_functions.h" #define MAX_HEALTH_SQL_SIZE 2048 +#define sqlite3_bind_string_or_null(res,key,param) ((key) ? sqlite3_bind_text(res, param, string2str(key), -1, SQLITE_STATIC) : sqlite3_bind_null(res, param)) /* Health related SQL queries Creates a health log table in sqlite, one per host guid @@ -15,7 +16,7 @@ int sql_create_health_log_table(RRDHOST *host) { if (unlikely(!db_meta)) { if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("HEALTH [%s]: Database has not been initialized", host->hostname); + error_report("HEALTH [%s]: Database has not been initialized", rrdhost_hostname(host)); return 1; } @@ -24,9 +25,9 @@ int sql_create_health_log_table(RRDHOST *host) { snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_CREATE_HEALTH_LOG_TABLE(uuid_str)); - rc = sqlite3_exec(db_meta, command, 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, command, 0, 0, &err_msg); if (rc != SQLITE_OK) { - error_report("HEALTH [%s]: SQLite error during creation of health log table, rc = %d (%s)", host->hostname, rc, err_msg); + error_report("HEALTH [%s]: SQLite error during creation of health log table, rc = %d (%s)", rrdhost_hostname(host), rc, err_msg); sqlite3_free(err_msg); return 1; } @@ -49,7 +50,7 @@ void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae) { if (unlikely(!db_meta)) { if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("HEALTH [%s]: Database has not been initialized", host->hostname); + error_report("HEALTH [%s]: Database has not been initialized", rrdhost_hostname(host)); return; } @@ -60,7 +61,7 @@ void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae) { rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0); if (unlikely(rc != SQLITE_OK)) { - error_report("HEALTH [%s]: Failed to prepare statement for SQL_UPDATE_HEALTH_LOG", host->hostname); + error_report("HEALTH [%s]: Failed to prepare statement for SQL_UPDATE_HEALTH_LOG", rrdhost_hostname(host)); return; } @@ -96,12 +97,12 @@ void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae) { rc = execute_insert(res); if (unlikely(rc != SQLITE_DONE)) { - error_report("HEALTH [%s]: Failed to update health log, rc = %d", host->hostname, rc); + error_report("HEALTH [%s]: Failed to update health log, rc = %d", rrdhost_hostname(host), rc); } failed: if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) - error_report("HEALTH [%s]: Failed to finalize the prepared statement for updating health log.", host->hostname); + error_report("HEALTH [%s]: Failed to finalize the prepared statement for updating health log.", rrdhost_hostname(host)); return; } @@ -122,7 +123,7 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { if (unlikely(!db_meta)) { if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("HEALTH [%s]: Database has not been initialized", host->hostname); + error_report("HEALTH [%s]: Database has not been initialized", rrdhost_hostname(host)); return; } @@ -133,11 +134,11 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0); if (unlikely(rc != SQLITE_OK)) { - error_report("HEALTH [%s]: Failed to prepare statement for SQL_INSERT_HEALTH_LOG", host->hostname); + error_report("HEALTH [%s]: Failed to prepare statement for SQL_INSERT_HEALTH_LOG", rrdhost_hostname(host)); return; } - rc = sqlite3_bind_text(res, 1, host->hostname, -1, SQLITE_STATIC); + rc = sqlite3_bind_text(res, 1, rrdhost_hostname(host), -1, SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind hostname parameter for SQL_INSERT_HEALTH_LOG"); goto failed; @@ -215,49 +216,49 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { goto failed; } - rc = sqlite3_bind_text(res, 14, ae->name, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->name, 14); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind name parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 15, ae->chart, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->chart, 15); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind chart parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 16, ae->family, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->family, 16); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind family parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 17, ae->exec, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->exec, 17); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind exec parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 18, ae->recipient, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->recipient, 18); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind recipient parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 19, ae->source, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->source, 19); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind source parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 20, ae->units, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->units, 20); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind host_id parameter to store node instance information"); goto failed; } - rc = sqlite3_bind_text(res, 21, ae->info, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->info, 21); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind info parameter for SQL_INSERT_HEALTH_LOG"); goto failed; @@ -305,25 +306,25 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { goto failed; } - rc = sqlite3_bind_text(res, 29, ae->classification, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->classification, 29); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind classification parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 30, ae->component, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->component, 30); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind component parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 31, ae->type, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->type, 31); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind type parameter for SQL_INSERT_HEALTH_LOG"); goto failed; } - rc = sqlite3_bind_text(res, 32, ae->chart_context, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, ae->chart_context, 32); if (unlikely(rc != SQLITE_OK)) { error_report("Failed to bind chart_context parameter for SQL_INSERT_HEALTH_LOG"); goto failed; @@ -331,7 +332,7 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { rc = execute_insert(res); if (unlikely(rc != SQLITE_DONE)) { - error_report("HEALTH [%s]: Failed to execute SQL_INSERT_HEALTH_LOG, rc = %d", host->hostname, rc); + error_report("HEALTH [%s]: Failed to execute SQL_INSERT_HEALTH_LOG, rc = %d", rrdhost_hostname(host), rc); goto failed; } @@ -340,7 +341,7 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { failed: if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) - error_report("HEALTH [%s]: Failed to finalize the prepared statement for inserting to health log.", host->hostname); + error_report("HEALTH [%s]: Failed to finalize the prepared statement for inserting to health log.", rrdhost_hostname(host)); return; } @@ -381,7 +382,7 @@ void sql_health_alarm_log_cleanup(RRDHOST *host) { char uuid_str[GUID_LEN + 1]; uuid_unparse_lower_fix(&host->host_uuid, uuid_str); - snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_CLEANUP_HEALTH_LOG(uuid_str, uuid_str, host->health_log_entries_written - rotate_every)); + snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_CLEANUP_HEALTH_LOG(uuid_str, uuid_str, (unsigned long int) (host->health_log_entries_written - rotate_every))); rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0); if (unlikely(rc != SQLITE_OK)) { @@ -389,7 +390,7 @@ void sql_health_alarm_log_cleanup(RRDHOST *host) { return; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (unlikely(rc != SQLITE_DONE)) error_report("Failed to cleanup health log table, rc = %d", rc); @@ -428,7 +429,7 @@ void sql_health_alarm_log_count(RRDHOST *host) { return; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); if (likely(rc == SQLITE_ROW)) host->health_log_entries_written = (size_t) sqlite3_column_int64(res, 0); @@ -436,7 +437,7 @@ void sql_health_alarm_log_count(RRDHOST *host) { if (unlikely(rc != SQLITE_OK)) error_report("Failed to finalize the prepared statement to count health log entries from db"); - info("HEALTH [%s]: Table health_log_%s, contains %lu entries.", host->hostname, uuid_str, host->health_log_entries_written); + info("HEALTH [%s]: Table health_log_%s, contains %lu entries.", rrdhost_hostname(host), uuid_str, (unsigned long int) host->health_log_entries_written); } #define SQL_INJECT_REMOVED(guid, guid2) "insert into health_log_%s (hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, " \ @@ -556,7 +557,7 @@ uint32_t sql_get_max_unique_id (char *uuid_str) return 0; } - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { max_unique_id = (uint32_t) sqlite3_column_int64(res, 0); } @@ -584,7 +585,7 @@ void sql_check_removed_alerts_state(char *uuid_str) return; } - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { status = (RRDCALC_STATUS) sqlite3_column_int(res, 0); unique_id = (uint32_t) sqlite3_column_int64(res, 1); alarm_id = (uint32_t) sqlite3_column_int64(res, 2); @@ -607,7 +608,7 @@ void sql_check_removed_alerts_state(char *uuid_str) #define SQL_LOAD_HEALTH_LOG(guid,limit) "SELECT hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context FROM (SELECT hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type, chart_context FROM health_log_%s order by unique_id desc limit %u) order by unique_id asc;", guid, limit void sql_health_alarm_log_load(RRDHOST *host) { sqlite3_stmt *res = NULL; - int rc; + int ret; ssize_t errored = 0, loaded = 0; char command[MAX_HEALTH_SQL_SIZE + 1]; @@ -615,7 +616,7 @@ void sql_health_alarm_log_load(RRDHOST *host) { if (unlikely(!db_meta)) { if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) - error_report("HEALTH [%s]: Database has not been initialized", host->hostname); + error_report("HEALTH [%s]: Database has not been initialized", rrdhost_hostname(host)); return; } @@ -626,47 +627,55 @@ void sql_health_alarm_log_load(RRDHOST *host) { snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_LOAD_HEALTH_LOG(uuid_str, host->health_log.max)); - rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0); - if (unlikely(rc != SQLITE_OK)) { - error_report("HEALTH [%s]: Failed to prepare sql statement to load health log.", host->hostname); + ret = sqlite3_prepare_v2(db_meta, command, -1, &res, 0); + if (unlikely(ret != SQLITE_OK)) { + error_report("HEALTH [%s]: Failed to prepare sql statement to load health log.", rrdhost_hostname(host)); return; } + DICTIONARY *all_rrdcalcs = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(host, rc) { + dictionary_set(all_rrdcalcs, rrdcalc_name(rc), rc, sizeof(*rc)); + } + foreach_rrdcalc_in_rrdhost_done(rc); + netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); - while (sqlite3_step(res) == SQLITE_ROW) { + while (sqlite3_step_monitored(res) == SQLITE_ROW) { ALARM_ENTRY *ae = NULL; // check that we have valid ids uint32_t unique_id = (uint32_t) sqlite3_column_int64(res, 1); if(!unique_id) { - error_report("HEALTH [%s]: Got invalid unique id. Ignoring it.", host->hostname); + error_report("HEALTH [%s]: Got invalid unique id. Ignoring it.", rrdhost_hostname(host)); errored++; continue; } uint32_t alarm_id = (uint32_t) sqlite3_column_int64(res, 2); if(!alarm_id) { - error_report("HEALTH [%s]: Got invalid alarm id. Ignoring it.", host->hostname); + error_report("HEALTH [%s]: Got invalid alarm id. Ignoring it.", rrdhost_hostname(host)); errored++; continue; } //need name, chart and family if (sqlite3_column_type(res, 13) == SQLITE_NULL) { - error_report("HEALTH [%s]: Got null name field. Ignoring it.", host->hostname); + error_report("HEALTH [%s]: Got null name field. Ignoring it.", rrdhost_hostname(host)); errored++; continue; } if (sqlite3_column_type(res, 14) == SQLITE_NULL) { - error_report("HEALTH [%s]: Got null chart field. Ignoring it.", host->hostname); + error_report("HEALTH [%s]: Got null chart field. Ignoring it.", rrdhost_hostname(host)); errored++; continue; } if (sqlite3_column_type(res, 15) == SQLITE_NULL) { - error_report("HEALTH [%s]: Got null family field. Ignoring it.", host->hostname); + error_report("HEALTH [%s]: Got null family field. Ignoring it.", rrdhost_hostname(host)); errored++; continue; } @@ -675,18 +684,7 @@ void sql_health_alarm_log_load(RRDHOST *host) { time_t last_repeat = 0; last_repeat = (time_t)sqlite3_column_int64(res, 27); - RRDCALC *rc = alarm_max_last_repeat(host, (char *) sqlite3_column_text(res, 14), simple_hash((char *) sqlite3_column_text(res, 14))); - if (!rc) { - for(rc = host->alarms; rc ; rc = rc->next) { - RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_name, (avl_t *)rc); - if(rdcmp != rc) { - error("Cannot insert the alarm index ID using log %s", rc->name); - } - } - - rc = alarm_max_last_repeat(host, (char *) sqlite3_column_text(res, 14), simple_hash((char *) sqlite3_column_text(res, 14))); - } - + rc = dictionary_get(all_rrdcalcs, (char *) sqlite3_column_text(res, 14)); if(unlikely(rc)) { if (rrdcalc_isrepeating(rc)) { rc->last_repeat = last_repeat; @@ -719,36 +717,32 @@ void sql_health_alarm_log_load(RRDHOST *host) { ae->exec_run_timestamp = (time_t) sqlite3_column_int64(res, 11); ae->delay_up_to_timestamp = (time_t) sqlite3_column_int64(res, 12); - ae->name = strdupz((char *) sqlite3_column_text(res, 13)); - ae->hash_name = simple_hash(ae->name); - - ae->chart = strdupz((char *) sqlite3_column_text(res, 14)); - ae->hash_chart = simple_hash(ae->chart); - - ae->family = strdupz((char *) sqlite3_column_text(res, 15)); + ae->name = string_strdupz((char *) sqlite3_column_text(res, 13)); + ae->chart = string_strdupz((char *) sqlite3_column_text(res, 14)); + ae->family = string_strdupz((char *) sqlite3_column_text(res, 15)); if (sqlite3_column_type(res, 16) != SQLITE_NULL) - ae->exec = strdupz((char *) sqlite3_column_text(res, 16)); + ae->exec = string_strdupz((char *) sqlite3_column_text(res, 16)); else ae->exec = NULL; if (sqlite3_column_type(res, 17) != SQLITE_NULL) - ae->recipient = strdupz((char *) sqlite3_column_text(res, 17)); + ae->recipient = string_strdupz((char *) sqlite3_column_text(res, 17)); else ae->recipient = NULL; if (sqlite3_column_type(res, 18) != SQLITE_NULL) - ae->source = strdupz((char *) sqlite3_column_text(res, 18)); + ae->source = string_strdupz((char *) sqlite3_column_text(res, 18)); else ae->source = NULL; if (sqlite3_column_type(res, 19) != SQLITE_NULL) - ae->units = strdupz((char *) sqlite3_column_text(res, 19)); + ae->units = string_strdupz((char *) sqlite3_column_text(res, 19)); else ae->units = NULL; if (sqlite3_column_type(res, 20) != SQLITE_NULL) - ae->info = strdupz((char *) sqlite3_column_text(res, 20)); + ae->info = string_strdupz((char *) sqlite3_column_text(res, 20)); else ae->info = NULL; @@ -763,30 +757,30 @@ void sql_health_alarm_log_load(RRDHOST *host) { ae->last_repeat = last_repeat; if (sqlite3_column_type(res, 28) != SQLITE_NULL) - ae->classification = strdupz((char *) sqlite3_column_text(res, 28)); + ae->classification = string_strdupz((char *) sqlite3_column_text(res, 28)); else ae->classification = NULL; if (sqlite3_column_type(res, 29) != SQLITE_NULL) - ae->component = strdupz((char *) sqlite3_column_text(res, 29)); + ae->component = string_strdupz((char *) sqlite3_column_text(res, 29)); else ae->component = NULL; if (sqlite3_column_type(res, 30) != SQLITE_NULL) - ae->type = strdupz((char *) sqlite3_column_text(res, 30)); + ae->type = string_strdupz((char *) sqlite3_column_text(res, 30)); else ae->type = NULL; if (sqlite3_column_type(res, 31) != SQLITE_NULL) - ae->chart_context = strdupz((char *) sqlite3_column_text(res, 31)); + ae->chart_context = string_strdupz((char *) sqlite3_column_text(res, 31)); else ae->chart_context = NULL; char value_string[100 + 1]; - freez(ae->old_value_string); - freez(ae->new_value_string); - ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1)); - ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1)); + string_freez(ae->old_value_string); + string_freez(ae->new_value_string); + ae->old_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae_units(ae), -1)); + ae->new_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae_units(ae), -1)); ae->next = host->health_log.alarms; host->health_log.alarms = ae; @@ -802,6 +796,9 @@ void sql_health_alarm_log_load(RRDHOST *host) { netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock); + dictionary_destroy(all_rrdcalcs); + all_rrdcalcs = NULL; + if(!host->health_max_unique_id) host->health_max_unique_id = (uint32_t)now_realtime_sec(); if(!host->health_max_alarm_id) host->health_max_alarm_id = (uint32_t)now_realtime_sec(); @@ -809,10 +806,10 @@ void sql_health_alarm_log_load(RRDHOST *host) { if (unlikely(!host->health_log.next_alarm_id || host->health_log.next_alarm_id <= host->health_max_alarm_id)) host->health_log.next_alarm_id = host->health_max_alarm_id + 1; - info("HEALTH [%s]: Table health_log_%s, loaded %zd alarm entries, errors in %zd entries.", host->hostname, uuid_str, loaded, errored); + log_health("[%s]: Table health_log_%s, loaded %zd alarm entries, errors in %zd entries.", rrdhost_hostname(host), uuid_str, loaded, errored); - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) + ret = sqlite3_finalize(res); + if (unlikely(ret != SQLITE_OK)) error_report("Failed to finalize the health log read statement"); sql_health_alarm_log_count(host); @@ -849,159 +846,153 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg) } param++; - rc = sqlite3_bind_blob(res, 1, hash_id, sizeof(*hash_id), SQLITE_STATIC); + rc = sqlite3_bind_blob(res, param, hash_id, sizeof(*hash_id), SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - if (cfg->alarm && *cfg->alarm) - rc = sqlite3_bind_text(res, 2, cfg->alarm, -1, SQLITE_STATIC); - else - rc = sqlite3_bind_null(res, 2); + rc = sqlite3_bind_string_or_null(res, cfg->alarm, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - if (cfg->template_key && *cfg->template_key) - rc = sqlite3_bind_text(res, 3, cfg->template_key, -1, SQLITE_STATIC); - else - rc = sqlite3_bind_null(res, 3); + rc = sqlite3_bind_string_or_null(res, cfg->template_key, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 4, cfg->on, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->on, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 5, cfg->classification, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->classification, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 6, cfg->component, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->component, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 7, cfg->type, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->type, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 8, cfg->os, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->os, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 9, cfg->host, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->host, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 10, cfg->lookup, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->lookup, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 11, cfg->every, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->every, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 12, cfg->units, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->units, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 13, cfg->calc, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->calc, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 14, cfg->families, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->families, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 15, cfg->plugin, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->plugin, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 16, cfg->module, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->module, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 17, cfg->charts, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->charts, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 18, cfg->green, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->green, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 19, cfg->red, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->red, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 20, cfg->warn, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->warn, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 21, cfg->crit, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->crit, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 22, cfg->exec, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->exec, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 23, cfg->to, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->to, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 24, cfg->info, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->info, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 25, cfg->delay, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->delay, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 26, cfg->options, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->options, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 27, cfg->repeat, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->repeat, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 28, cfg->host_labels, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->host_labels, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; if (cfg->p_db_lookup_after) { param++; - rc = sqlite3_bind_text(res, 29, cfg->p_db_lookup_dimensions, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->p_db_lookup_dimensions, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; param++; - rc = sqlite3_bind_text(res, 30, cfg->p_db_lookup_method, -1, SQLITE_STATIC); + rc = sqlite3_bind_string_or_null(res, cfg->p_db_lookup_method, param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; @@ -1071,7 +1062,7 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg) skip hash calculations */ #if !defined DISABLE_CLOUD && defined ENABLE_HTTPS -#define DIGEST_ALERT_CONFIG_VAL(v) ((v) ? EVP_DigestUpdate(evpctx, (v), strlen((v))) : EVP_DigestUpdate(evpctx, "", 1)) +#define DIGEST_ALERT_CONFIG_VAL(v) ((v) ? EVP_DigestUpdate(evpctx, (string2str(v)), string_strlen((v))) : EVP_DigestUpdate(evpctx, "", 1)) #endif int alert_hash_and_store_config( uuid_t hash_id, diff --git a/database/sqlite/sqlite_health.h b/database/sqlite/sqlite_health.h index ef837894a..87060dacc 100644 --- a/database/sqlite/sqlite_health.h +++ b/database/sqlite/sqlite_health.h @@ -6,12 +6,12 @@ #include "sqlite3.h" extern sqlite3 *db_meta; -extern void sql_health_alarm_log_load(RRDHOST *host); -extern int sql_create_health_log_table(RRDHOST *host); -extern void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae); -extern void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae); -extern void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae); -extern void sql_health_alarm_log_cleanup(RRDHOST *host); -extern int alert_hash_and_store_config(uuid_t hash_id, struct alert_config *cfg, int store_hash); -extern void sql_aclk_alert_clean_dead_entries(RRDHOST *host); +void sql_health_alarm_log_load(RRDHOST *host); +int sql_create_health_log_table(RRDHOST *host); +void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae); +void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae); +void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae); +void sql_health_alarm_log_cleanup(RRDHOST *host); +int alert_hash_and_store_config(uuid_t hash_id, struct alert_config *cfg, int store_hash); +void sql_aclk_alert_clean_dead_entries(RRDHOST *host); #endif //NETDATA_SQLITE_HEALTH_H diff --git a/database/sqlite/sqlite_metadata.c b/database/sqlite/sqlite_metadata.c new file mode 100644 index 000000000..4eb212152 --- /dev/null +++ b/database/sqlite/sqlite_metadata.c @@ -0,0 +1,1580 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sqlite_metadata.h" + +// SQL statements + +#define SQL_STORE_CLAIM_ID "insert into node_instance " \ + "(host_id, claim_id, date_created) values (@host_id, @claim_id, unixepoch()) " \ + "on conflict(host_id) do update set claim_id = excluded.claim_id;" + +#define SQL_DELETE_HOST_LABELS "DELETE FROM host_label WHERE host_id = @uuid;" + +#define STORE_HOST_LABEL \ + "INSERT OR REPLACE INTO host_label (host_id, source_type, label_key, label_value, date_created) VALUES " + +#define STORE_CHART_LABEL \ + "INSERT OR REPLACE INTO chart_label (chart_id, source_type, label_key, label_value, date_created) VALUES " + +#define STORE_HOST_OR_CHART_LABEL_VALUE "(u2h('%s'), %d,'%s','%s', unixepoch())" + +#define DELETE_DIMENSION_UUID "DELETE FROM dimension WHERE dim_id = @uuid;" + +#define SQL_STORE_HOST_INFO "INSERT OR REPLACE INTO host " \ + "(host_id, hostname, registry_hostname, update_every, os, timezone," \ + "tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, program_version," \ + "entries, health_enabled) " \ + "values (@host_id, @hostname, @registry_hostname, @update_every, @os, @timezone, @tags, @hops, @memory_mode, " \ + "@abbrev_timezone, @utc_offset, @program_name, @program_version, " \ + "@entries, @health_enabled);" + +#define SQL_STORE_CHART "insert or replace into chart (chart_id, host_id, type, id, " \ + "name, family, context, title, unit, plugin, module, priority, update_every , chart_type , memory_mode , " \ + "history_entries) values (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16);" + +#define SQL_STORE_DIMENSION "INSERT OR REPLACE INTO dimension (dim_id, chart_id, id, name, multiplier, divisor , algorithm, options) " \ + "VALUES (@dim_id, @chart_id, @id, @name, @multiplier, @divisor, @algorithm, @options);" + +#define SELECT_DIMENSION_LIST "SELECT dim_id, rowid FROM dimension WHERE rowid > @row_id" + +#define STORE_HOST_INFO "INSERT OR REPLACE INTO host_info (host_id, system_key, system_value, date_created) VALUES " +#define STORE_HOST_INFO_VALUES "(u2h('%s'), '%s','%s', unixepoch())" + +#define MIGRATE_LOCALHOST_TO_NEW_MACHINE_GUID \ + "UPDATE chart SET host_id = @host_id WHERE host_id in (SELECT host_id FROM host where host_id <> @host_id and hops = 0);" +#define DELETE_NON_EXISTING_LOCALHOST "DELETE FROM host WHERE hops = 0 AND host_id <> @host_id;" +#define DELETE_MISSING_NODE_INSTANCES "DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host);" + +#define METADATA_CMD_Q_MAX_SIZE (1024) // Max queue size; callers will block until there is room +#define METADATA_MAINTENANCE_FIRST_CHECK (1800) // Maintenance first run after agent startup in seconds +#define METADATA_MAINTENANCE_RETRY (60) // Retry run if already running or last run did actual work +#define METADATA_MAINTENANCE_INTERVAL (3600) // Repeat maintenance after latest successful + +#define METADATA_HOST_CHECK_FIRST_CHECK (5) // First check for pending metadata +#define METADATA_HOST_CHECK_INTERVAL (30) // Repeat check for pending metadata +#define METADATA_HOST_CHECK_IMMEDIATE (5) // Repeat immediate run because we have more metadata to write + +#define MAX_METADATA_CLEANUP (500) // Maximum metadata write operations (e.g deletes before retrying) +#define METADATA_MAX_BATCH_SIZE (512) // Maximum commands to execute before running the event loop +#define METADATA_MAX_TRANSACTION_BATCH (128) // Maximum commands to add in a transaction + +enum metadata_opcode { + METADATA_DATABASE_NOOP = 0, + METADATA_DATABASE_TIMER, + METADATA_ADD_CHART, + METADATA_ADD_CHART_LABEL, + METADATA_ADD_DIMENSION, + METADATA_DEL_DIMENSION, + METADATA_ADD_DIMENSION_OPTION, + METADATA_ADD_HOST_SYSTEM_INFO, + METADATA_ADD_HOST_INFO, + METADATA_STORE_CLAIM_ID, + METADATA_STORE_HOST_LABELS, + METADATA_STORE_BUFFER, + + METADATA_SKIP_TRANSACTION, // Dummy -- OPCODES less than this one can be in a tranasction + + METADATA_SCAN_HOSTS, + METADATA_MAINTENANCE, + METADATA_SYNC_SHUTDOWN, + METADATA_UNITTEST, + // leave this last + // we need it to check for worker utilization + METADATA_MAX_ENUMERATIONS_DEFINED +}; + +#define MAX_PARAM_LIST (2) +struct metadata_cmd { + enum metadata_opcode opcode; + struct completion *completion; + const void *param[MAX_PARAM_LIST]; +}; + +struct metadata_database_cmdqueue { + unsigned head, tail; + struct metadata_cmd cmd_array[METADATA_CMD_Q_MAX_SIZE]; +}; + +typedef enum { + METADATA_FLAG_CLEANUP = (1 << 0), // Cleanup is running + METADATA_FLAG_SCANNING_HOSTS = (1 << 1), // Scanning of hosts in worker thread + METADATA_FLAG_SHUTDOWN = (1 << 2), // Shutting down +} METADATA_FLAG; + +#define METADATA_WORKER_BUSY (METADATA_FLAG_CLEANUP | METADATA_FLAG_SCANNING_HOSTS) + +struct metadata_wc { + uv_thread_t thread; + time_t check_metadata_after; + time_t check_hosts_after; + volatile unsigned queue_size; + uv_loop_t *loop; + uv_async_t async; + METADATA_FLAG flags; + uint64_t row_id; + uv_timer_t timer_req; + struct completion init_complete; + /* FIFO command queue */ + uv_mutex_t cmd_mutex; + uv_cond_t cmd_cond; + struct metadata_database_cmdqueue cmd_queue; +}; + +#define metadata_flag_check(target_flags, flag) (__atomic_load_n(&((target_flags)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define metadata_flag_set(target_flags, flag) __atomic_or_fetch(&((target_flags)->flags), (flag), __ATOMIC_SEQ_CST) +#define metadata_flag_clear(target_flags, flag) __atomic_and_fetch(&((target_flags)->flags), ~(flag), __ATOMIC_SEQ_CST) + +// +// For unittest +// +struct thread_unittest { + int join; + unsigned added; + unsigned processed; + unsigned *done; +}; + + +// Metadata functions + +struct query_build { + BUFFER *sql; + int count; + char uuid_str[UUID_STR_LEN]; +}; + +static int host_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct query_build *lb = data; + if (unlikely(!lb->count)) + buffer_sprintf(lb->sql, STORE_HOST_LABEL); + else + buffer_strcat(lb->sql, ", "); + buffer_sprintf(lb->sql, STORE_HOST_OR_CHART_LABEL_VALUE, lb->uuid_str, (int)ls & ~(RRDLABEL_FLAG_INTERNAL), name, value); + lb->count++; + return 1; +} + +static int chart_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct query_build *lb = data; + if (unlikely(!lb->count)) + buffer_sprintf(lb->sql, STORE_CHART_LABEL); + else + buffer_strcat(lb->sql, ", "); + buffer_sprintf(lb->sql, STORE_HOST_OR_CHART_LABEL_VALUE, lb->uuid_str, ls, name, value); + lb->count++; + return 1; +} + +static void check_and_update_chart_labels(RRDSET *st, BUFFER *work_buffer) +{ + size_t old_version = st->rrdlabels_last_saved_version; + size_t new_version = dictionary_version(st->rrdlabels); + + if(new_version != old_version) { + buffer_flush(work_buffer); + struct query_build tmp = {.sql = work_buffer, .count = 0}; + uuid_unparse_lower(st->chart_uuid, tmp.uuid_str); + rrdlabels_walkthrough_read(st->rrdlabels, chart_label_store_to_sql_callback, &tmp); + st->rrdlabels_last_saved_version = new_version; + db_execute(buffer_tostring(work_buffer)); + } +} + +// Migrate all hosts with hops zero to this host_uuid +void migrate_localhost(uuid_t *host_uuid) +{ + int rc; + + rc = exec_statement_with_uuid(MIGRATE_LOCALHOST_TO_NEW_MACHINE_GUID, host_uuid); + if (!rc) + rc = exec_statement_with_uuid(DELETE_NON_EXISTING_LOCALHOST, host_uuid); + if (!rc) + db_execute(DELETE_MISSING_NODE_INSTANCES); + +} + +static void store_claim_id(uuid_t *host_id, uuid_t *claim_id) +{ + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_STORE_CLAIM_ID, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement store chart labels"); + return; + } + + rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to store node instance information"); + goto failed; + } + + if (claim_id) + rc = sqlite3_bind_blob(res, 2, claim_id, sizeof(*claim_id), SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, 2); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind claim_id parameter to store node instance information"); + goto failed; + } + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to store node instance information, rc = %d", rc); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when storing node instance information"); +} + +static void delete_dimension_uuid(uuid_t *dimension_uuid) +{ + static __thread sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!res)) { + rc = prepare_statement(db_meta, DELETE_DIMENSION_UUID, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to delete a dimension uuid"); + return; + } + } + + rc = sqlite3_bind_blob(res, 1, dimension_uuid, sizeof(*dimension_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto skip_execution; + + rc = sqlite3_step_monitored(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to delete dimension uuid, rc = %d", rc); + +skip_execution: + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement when deleting dimension UUID, rc = %d", rc); +} + +// +// Store host and host system info information in the database +static int sql_store_host_info(RRDHOST *host) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 0; + error_report("Database has not been initialized"); + return 1; + } + + if (unlikely((!res))) { + rc = prepare_statement(db_meta, SQL_STORE_HOST_INFO, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store host, rc = %d", rc); + return 1; + } + } + + rc = sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_hostname(host), 0); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_registry_hostname(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->rrd_update_every); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_os(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_timezone(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_tags(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->system_info ? host->system_info->hops : 0); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->rrd_memory_mode); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_abbrev_timezone(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->utc_offset); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_program_name(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_program_version(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int64(res, ++param, host->rrd_history_entries); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int ) host->health_enabled); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + int store_rc = sqlite3_step_monitored(res); + if (unlikely(store_rc != SQLITE_DONE)) + error_report("Failed to store host %s, rc = %d", rrdhost_hostname(host), rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement to store host %s, rc = %d", rrdhost_hostname(host), rc); + + return !(store_rc == SQLITE_DONE); +bind_fail: + error_report("Failed to bind %d parameter to store host %s, rc = %d", param, rrdhost_hostname(host), rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement to store host %s, rc = %d", rrdhost_hostname(host), rc); + return 1; +} + +static void sql_store_host_system_info_key_value(const char *name, const char *value, void *data) +{ + struct query_build *lb = data; + + if (unlikely(!value)) + return; + + if (unlikely(!lb->count)) + buffer_sprintf( + lb->sql, STORE_HOST_INFO); + else + buffer_strcat(lb->sql, ", "); + buffer_sprintf(lb->sql, STORE_HOST_INFO_VALUES, lb->uuid_str, name, value); + lb->count++; +} + +static BUFFER *sql_store_host_system_info(RRDHOST *host) +{ + struct rrdhost_system_info *system_info = host->system_info; + + if (unlikely(!system_info)) + return NULL; + + BUFFER *work_buffer = buffer_create(1024); + + struct query_build key_data = {.sql = work_buffer, .count = 0}; + uuid_unparse_lower(host->host_uuid, key_data.uuid_str); + + sql_store_host_system_info_key_value("NETDATA_CONTAINER_OS_NAME", system_info->container_os_name, &key_data); + sql_store_host_system_info_key_value("NETDATA_CONTAINER_OS_ID", system_info->container_os_id, &key_data); + sql_store_host_system_info_key_value("NETDATA_CONTAINER_OS_ID_LIKE", system_info->container_os_id_like, &key_data); + sql_store_host_system_info_key_value("NETDATA_CONTAINER_OS_VERSION", system_info->container_os_version, &key_data); + sql_store_host_system_info_key_value("NETDATA_CONTAINER_OS_VERSION_ID", system_info->container_os_version_id, &key_data); + sql_store_host_system_info_key_value("NETDATA_CONTAINER_OS_DETECTION", system_info->host_os_detection, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_OS_NAME", system_info->host_os_name, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_OS_ID", system_info->host_os_id, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_OS_ID_LIKE", system_info->host_os_id_like, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_OS_VERSION", system_info->host_os_version, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_OS_VERSION_ID", system_info->host_os_version_id, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_OS_DETECTION", system_info->host_os_detection, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_KERNEL_NAME", system_info->kernel_name, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT", system_info->host_cores, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_CPU_FREQ", system_info->host_cpu_freq, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_TOTAL_RAM", system_info->host_ram_total, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_TOTAL_DISK_SIZE", system_info->host_disk_space, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_KERNEL_VERSION", system_info->kernel_version, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_ARCHITECTURE", system_info->architecture, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_VIRTUALIZATION", system_info->virtualization, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_VIRT_DETECTION", system_info->virt_detection, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_CONTAINER", system_info->container, &key_data); + sql_store_host_system_info_key_value("NETDATA_SYSTEM_CONTAINER_DETECTION", system_info->container_detection, &key_data); + sql_store_host_system_info_key_value("NETDATA_HOST_IS_K8S_NODE", system_info->is_k8s_node, &key_data); + + return work_buffer; +} + + +/* + * Store set option for a dimension + */ +static int sql_set_dimension_option(uuid_t *dim_uuid, char *option) +{ + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 0; + error_report("Database has not been initialized"); + return 1; + } + + rc = sqlite3_prepare_v2(db_meta, "UPDATE dimension SET options = @options WHERE dim_id = @dim_id", -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to update dimension options"); + return 0; + }; + + rc = sqlite3_bind_blob(res, 2, dim_uuid, sizeof(*dim_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + if (!option || !strcmp(option,"unhide")) + rc = sqlite3_bind_null(res, 1); + else + rc = sqlite3_bind_text(res, 1, option, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to update dimension option, rc = %d", rc); + +bind_fail: + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize statement in update dimension options, rc = %d", rc); + return 0; +} + +/* + * Store a chart in the database + */ + +static int sql_store_chart( + uuid_t *chart_uuid, uuid_t *host_uuid, const char *type, const char *id, const char *name, const char *family, + const char *context, const char *title, const char *units, const char *plugin, const char *module, long priority, + int update_every, int chart_type, int memory_mode, long history_entries) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 0; + error_report("Database has not been initialized"); + return 1; + } + + if (unlikely(!res)) { + rc = prepare_statement(db_meta, SQL_STORE_CHART, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store chart, rc = %d", rc); + return 1; + } + } + + param++; + rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_blob(res, 2, host_uuid, sizeof(*host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 3, type, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 4, id, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + if (name && *name) + rc = sqlite3_bind_text(res, 5, name, -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, 5); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 6, family, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 7, context, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 8, title, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 9, units, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 10, plugin, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_text(res, 11, module, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_int(res, 12, (int) priority); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_int(res, 13, update_every); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_int(res, 14, chart_type); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_int(res, 15, memory_mode); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + param++; + rc = sqlite3_bind_int(res, 16, (int) history_entries); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to store chart, rc = %d", rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in chart store function, rc = %d", rc); + + return 0; + +bind_fail: + error_report("Failed to bind parameter %d to store chart, rc = %d", param, rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in chart store function, rc = %d", rc); + return 1; +} + +/* + * Store a dimension + */ +static int sql_store_dimension( + uuid_t *dim_uuid, uuid_t *chart_uuid, const char *id, const char *name, collected_number multiplier, + collected_number divisor, int algorithm, bool hidden) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 0; + error_report("Database has not been initialized"); + return 1; + } + + if (unlikely(!res)) { + rc = prepare_statement(db_meta, SQL_STORE_DIMENSION, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store dimension, rc = %d", rc); + return 1; + } + } + + rc = sqlite3_bind_blob(res, ++param, dim_uuid, sizeof(*dim_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_blob(res, ++param, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, id, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, name, -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int) multiplier); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int ) divisor); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, algorithm); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + if (hidden) + rc = sqlite3_bind_text(res, ++param, "hidden", -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to store dimension, rc = %d", rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in store dimension, rc = %d", rc); + return 0; + +bind_fail: + error_report("Failed to bind parameter %d to store dimension, rc = %d", param, rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in store dimension, rc = %d", rc); + return 1; +} + +static bool dimension_can_be_deleted(uuid_t *dim_uuid) +{ +#ifdef ENABLE_DBENGINE + bool no_retention = true; + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (!multidb_ctx[tier]) + continue; + time_t first_time_t = 0, last_time_t = 0; + if (rrdeng_metric_retention_by_uuid((void *) multidb_ctx[tier], dim_uuid, &first_time_t, &last_time_t) == 0) { + if (first_time_t > 0) { + no_retention = false; + break; + } + } + } + return no_retention; +#else + return false; +#endif +} + +static void check_dimension_metadata(struct metadata_wc *wc) +{ + int rc; + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, SELECT_DIMENSION_LIST, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to fetch host dimensions"); + return; + } + + rc = sqlite3_bind_int64(res, 1, (sqlite3_int64) wc->row_id); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to row parameter"); + goto skip_run; + } + + uint32_t total_checked = 0; + uint32_t total_deleted= 0; + uint64_t last_row_id = wc->row_id; + + info("METADATA: Checking dimensions starting after row %"PRIu64, wc->row_id); + + while (sqlite3_step_monitored(res) == SQLITE_ROW && total_deleted < MAX_METADATA_CLEANUP) { + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + break; + + last_row_id = sqlite3_column_int64(res, 1); + rc = dimension_can_be_deleted((uuid_t *)sqlite3_column_blob(res, 0)); + if (rc == true) { + delete_dimension_uuid((uuid_t *)sqlite3_column_blob(res, 0)); + total_deleted++; + } + total_checked++; + } + wc->row_id = last_row_id; + time_t now = now_realtime_sec(); + if (total_deleted > 0) { + wc->check_metadata_after = now + METADATA_MAINTENANCE_RETRY; + } else + wc->row_id = 0; + info("METADATA: Checked %u, deleted %u -- will resume after row %"PRIu64" in %lld seconds", total_checked, total_deleted, wc->row_id, + (long long)(wc->check_metadata_after - now)); + +skip_run: + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when reading dimensions"); +} + + +// +// EVENT LOOP STARTS HERE +// +static uv_mutex_t metadata_async_lock; + +static void metadata_init_cmd_queue(struct metadata_wc *wc) +{ + wc->cmd_queue.head = wc->cmd_queue.tail = 0; + wc->queue_size = 0; + fatal_assert(0 == uv_cond_init(&wc->cmd_cond)); + fatal_assert(0 == uv_mutex_init(&wc->cmd_mutex)); +} + +int metadata_enq_cmd_noblock(struct metadata_wc *wc, struct metadata_cmd *cmd) +{ + unsigned queue_size; + + /* wait for free space in queue */ + uv_mutex_lock(&wc->cmd_mutex); + + if (cmd->opcode == METADATA_SYNC_SHUTDOWN) { + metadata_flag_set(wc, METADATA_FLAG_SHUTDOWN); + uv_mutex_unlock(&wc->cmd_mutex); + return 0; + } + + if (unlikely((queue_size = wc->queue_size) == METADATA_CMD_Q_MAX_SIZE || + metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) { + uv_mutex_unlock(&wc->cmd_mutex); + return 1; + } + + fatal_assert(queue_size < METADATA_CMD_Q_MAX_SIZE); + /* enqueue command */ + wc->cmd_queue.cmd_array[wc->cmd_queue.tail] = *cmd; + wc->cmd_queue.tail = wc->cmd_queue.tail != METADATA_CMD_Q_MAX_SIZE - 1 ? + wc->cmd_queue.tail + 1 : 0; + wc->queue_size = queue_size + 1; + uv_mutex_unlock(&wc->cmd_mutex); + return 0; +} + +static void metadata_enq_cmd(struct metadata_wc *wc, struct metadata_cmd *cmd) +{ + unsigned queue_size; + + /* wait for free space in queue */ + uv_mutex_lock(&wc->cmd_mutex); + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) { + uv_mutex_unlock(&wc->cmd_mutex); + (void) uv_async_send(&wc->async); + return; + } + + if (cmd->opcode == METADATA_SYNC_SHUTDOWN) { + metadata_flag_set(wc, METADATA_FLAG_SHUTDOWN); + uv_mutex_unlock(&wc->cmd_mutex); + (void) uv_async_send(&wc->async); + return; + } + + while ((queue_size = wc->queue_size) == METADATA_CMD_Q_MAX_SIZE) { + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) { + uv_mutex_unlock(&wc->cmd_mutex); + return; + } + uv_cond_wait(&wc->cmd_cond, &wc->cmd_mutex); + } + fatal_assert(queue_size < METADATA_CMD_Q_MAX_SIZE); + /* enqueue command */ + wc->cmd_queue.cmd_array[wc->cmd_queue.tail] = *cmd; + wc->cmd_queue.tail = wc->cmd_queue.tail != METADATA_CMD_Q_MAX_SIZE - 1 ? + wc->cmd_queue.tail + 1 : 0; + wc->queue_size = queue_size + 1; + uv_mutex_unlock(&wc->cmd_mutex); + + /* wake up event loop */ + (void) uv_async_send(&wc->async); +} + +static struct metadata_cmd metadata_deq_cmd(struct metadata_wc *wc, enum metadata_opcode *next_opcode) +{ + struct metadata_cmd ret; + unsigned queue_size; + + uv_mutex_lock(&wc->cmd_mutex); + queue_size = wc->queue_size; + if (queue_size == 0) { + memset(&ret, 0, sizeof(ret)); + ret.opcode = METADATA_DATABASE_NOOP; + ret.completion = NULL; + *next_opcode = METADATA_DATABASE_NOOP; + } else { + /* dequeue command */ + ret = wc->cmd_queue.cmd_array[wc->cmd_queue.head]; + + if (queue_size == 1) { + wc->cmd_queue.head = wc->cmd_queue.tail = 0; + } else { + wc->cmd_queue.head = wc->cmd_queue.head != METADATA_CMD_Q_MAX_SIZE - 1 ? + wc->cmd_queue.head + 1 : 0; + } + wc->queue_size = queue_size - 1; + if (wc->queue_size > 0) + *next_opcode = wc->cmd_queue.cmd_array[wc->cmd_queue.head].opcode; + else + *next_opcode = METADATA_DATABASE_NOOP; + /* wake up producers */ + uv_cond_signal(&wc->cmd_cond); + } + uv_mutex_unlock(&wc->cmd_mutex); + + return ret; +} + +static void async_cb(uv_async_t *handle) +{ + uv_stop(handle->loop); + uv_update_time(handle->loop); +} + +#define TIMER_INITIAL_PERIOD_MS (1000) +#define TIMER_REPEAT_PERIOD_MS (1000) + +static void timer_cb(uv_timer_t* handle) +{ + uv_stop(handle->loop); + uv_update_time(handle->loop); + + struct metadata_wc *wc = handle->data; + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + + time_t now = now_realtime_sec(); + + if (wc->check_metadata_after && wc->check_metadata_after < now) { + cmd.opcode = METADATA_MAINTENANCE; + if (!metadata_enq_cmd_noblock(wc, &cmd)) + wc->check_metadata_after = now + METADATA_MAINTENANCE_INTERVAL; + } + + if (wc->check_hosts_after && wc->check_hosts_after < now) { + cmd.opcode = METADATA_SCAN_HOSTS; + if (!metadata_enq_cmd_noblock(wc, &cmd)) + wc->check_hosts_after = now + METADATA_HOST_CHECK_INTERVAL; + } +} + +static void after_metadata_cleanup(uv_work_t *req, int status) +{ + UNUSED(status); + + struct metadata_wc *wc = req->data; + metadata_flag_clear(wc, METADATA_FLAG_CLEANUP); +} +static void start_metadata_cleanup(uv_work_t *req) +{ + struct metadata_wc *wc = req->data; + check_dimension_metadata(wc); +} + +struct scan_metadata_payload { + uv_work_t request; + struct metadata_wc *wc; + struct completion *completion; + uint32_t max_count; +}; + +// Callback after scan of hosts is done +static void after_metadata_hosts(uv_work_t *req, int status __maybe_unused) +{ + struct scan_metadata_payload *data = req->data; + struct metadata_wc *wc = data->wc; + + metadata_flag_clear(wc, METADATA_FLAG_SCANNING_HOSTS); + internal_error(true, "METADATA: scanning hosts complete"); + if (unlikely(data->completion)) { + completion_mark_complete(data->completion); + internal_error(true, "METADATA: Sending completion done"); + } + freez(data); +} + +static bool metadata_scan_host(RRDHOST *host, uint32_t max_count) { + RRDSET *st; + int rc; + + bool more_to_do = false; + uint32_t scan_count = 1; + BUFFER *work_buffer = buffer_create(1024); + + rrdset_foreach_reentrant(st, host) { + if (scan_count == max_count) { + more_to_do = true; + break; + } + if(rrdset_flag_check(st, RRDSET_FLAG_METADATA_UPDATE)) { + rrdset_flag_clear(st, RRDSET_FLAG_METADATA_UPDATE); + scan_count++; + + check_and_update_chart_labels(st, work_buffer); + + rc = sql_store_chart( + &st->chart_uuid, + &st->rrdhost->host_uuid, + string2str(st->parts.type), + string2str(st->parts.id), + string2str(st->parts.name), + rrdset_family(st), + rrdset_context(st), + rrdset_title(st), + rrdset_units(st), + rrdset_plugin_name(st), + rrdset_module_name(st), + st->priority, + st->update_every, + st->chart_type, + st->rrd_memory_mode, + st->entries); + if (unlikely(rc)) + internal_error(true, "METADATA: Failed to store chart metadata %s", string2str(st->id)); + } + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if(rrddim_flag_check(rd, RRDDIM_FLAG_METADATA_UPDATE)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_METADATA_UPDATE); + + rc = sql_store_dimension( + &rd->metric_uuid, + &rd->rrdset->chart_uuid, + string2str(rd->id), + string2str(rd->name), + rd->multiplier, + rd->divisor, + rd->algorithm, + rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)); + + if (unlikely(rc)) + error_report("METADATA: Failed to store dimension %s", string2str(rd->id)); + } + } + rrddim_foreach_done(rd); + } + rrdset_foreach_done(st); + + buffer_free(work_buffer); + return more_to_do; +} + +// Worker thread to scan hosts for pending metadata to store +static void start_metadata_hosts(uv_work_t *req __maybe_unused) +{ + RRDHOST *host; + + struct scan_metadata_payload *data = req->data; + struct metadata_wc *wc = data->wc; + + bool run_again = false; + dfe_start_reentrant(rrdhost_root_index, host) { + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED) || !rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_UPDATE)) + continue; + internal_error(true, "METADATA: Scanning host %s", rrdhost_hostname(host)); + rrdhost_flag_clear(host,RRDHOST_FLAG_METADATA_UPDATE); + if (unlikely(metadata_scan_host(host, data->max_count))) { + run_again = true; + rrdhost_flag_set(host,RRDHOST_FLAG_METADATA_UPDATE); + internal_error(true,"METADATA: Rescheduling host %s to run; more charts to store", rrdhost_hostname(host)); + } + } + dfe_done(host); + if (unlikely(run_again)) + wc->check_hosts_after = now_realtime_sec() + METADATA_HOST_CHECK_IMMEDIATE; + else + wc->check_hosts_after = now_realtime_sec() + METADATA_HOST_CHECK_INTERVAL; +} + +static void metadata_event_loop(void *arg) +{ + worker_register("METASYNC"); + worker_register_job_name(METADATA_DATABASE_NOOP, "noop"); + worker_register_job_name(METADATA_DATABASE_TIMER, "timer"); + worker_register_job_name(METADATA_ADD_CHART, "add chart"); + worker_register_job_name(METADATA_ADD_CHART_LABEL, "add chart label"); + worker_register_job_name(METADATA_ADD_DIMENSION, "add dimension"); + worker_register_job_name(METADATA_DEL_DIMENSION, "delete dimension"); + worker_register_job_name(METADATA_ADD_DIMENSION_OPTION, "dimension option"); + worker_register_job_name(METADATA_ADD_HOST_SYSTEM_INFO, "host system info"); + worker_register_job_name(METADATA_ADD_HOST_INFO, "host info"); + worker_register_job_name(METADATA_STORE_CLAIM_ID, "add claim id"); + worker_register_job_name(METADATA_STORE_HOST_LABELS, "host labels"); + worker_register_job_name(METADATA_MAINTENANCE, "maintenance"); + + + int ret; + uv_loop_t *loop; + unsigned cmd_batch_size; + struct metadata_wc *wc = arg; + enum metadata_opcode opcode, next_opcode; + uv_work_t metadata_cleanup_worker; + + uv_thread_set_name_np(wc->thread, "METASYNC"); + loop = wc->loop = mallocz(sizeof(uv_loop_t)); + ret = uv_loop_init(loop); + if (ret) { + error("uv_loop_init(): %s", uv_strerror(ret)); + goto error_after_loop_init; + } + loop->data = wc; + + ret = uv_async_init(wc->loop, &wc->async, async_cb); + if (ret) { + error("uv_async_init(): %s", uv_strerror(ret)); + goto error_after_async_init; + } + wc->async.data = wc; + + ret = uv_timer_init(loop, &wc->timer_req); + if (ret) { + error("uv_timer_init(): %s", uv_strerror(ret)); + goto error_after_timer_init; + } + wc->timer_req.data = wc; + fatal_assert(0 == uv_timer_start(&wc->timer_req, timer_cb, TIMER_INITIAL_PERIOD_MS, TIMER_REPEAT_PERIOD_MS)); + + info("Starting metadata sync thread with %d entries command queue", METADATA_CMD_Q_MAX_SIZE); + + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + metadata_flag_clear(wc, METADATA_FLAG_CLEANUP); + metadata_flag_clear(wc, METADATA_FLAG_SCANNING_HOSTS); + + wc->check_metadata_after = now_realtime_sec() + METADATA_MAINTENANCE_FIRST_CHECK; + wc->check_hosts_after = now_realtime_sec() + METADATA_HOST_CHECK_FIRST_CHECK; + + int shutdown = 0; + int in_transaction = 0; + int commands_in_transaction = 0; + // This can be used in the event loop for all opcodes (not workers) + BUFFER *work_buffer = buffer_create(1024); + wc->row_id = 0; + completion_mark_complete(&wc->init_complete); + + while (shutdown == 0 || (wc->flags & METADATA_WORKER_BUSY)) { + RRDDIM *rd = NULL; + RRDSET *st = NULL; + RRDHOST *host = NULL; + DICTIONARY_ITEM *dict_item = NULL; + BUFFER *buffer = NULL; + uuid_t *uuid; + int rc; + + worker_is_idle(); + uv_run(loop, UV_RUN_DEFAULT); + + /* wait for commands */ + cmd_batch_size = 0; + do { + if (unlikely(cmd_batch_size >= METADATA_MAX_BATCH_SIZE)) + break; + + cmd = metadata_deq_cmd(wc, &next_opcode); + opcode = cmd.opcode; + + if (unlikely(opcode == METADATA_DATABASE_NOOP && metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) { + shutdown = 1; + continue; + } + + ++cmd_batch_size; + + // If we are not in transaction and this command is the same with the next ; start a transaction + if (!in_transaction && opcode < METADATA_SKIP_TRANSACTION && opcode == next_opcode) { + if (opcode != METADATA_DATABASE_NOOP) { + in_transaction = 1; + db_execute("BEGIN TRANSACTION;"); + } + } + + if (likely(in_transaction)) { + commands_in_transaction++; + } + + if (likely(opcode != METADATA_DATABASE_NOOP)) + worker_is_busy(opcode); + + switch (opcode) { + case METADATA_DATABASE_NOOP: + case METADATA_DATABASE_TIMER: + break; + case METADATA_ADD_CHART: + dict_item = (DICTIONARY_ITEM * ) cmd.param[0]; + st = (RRDSET *) dictionary_acquired_item_value(dict_item); + + rc = sql_store_chart( + &st->chart_uuid, + &st->rrdhost->host_uuid, + string2str(st->parts.type), + string2str(st->parts.id), + string2str(st->parts.name), + rrdset_family(st), + rrdset_context(st), + rrdset_title(st), + rrdset_units(st), + rrdset_plugin_name(st), + rrdset_module_name(st), + st->priority, + st->update_every, + st->chart_type, + st->rrd_memory_mode, + st->entries); + + if (unlikely(rc)) + error_report("Failed to store chart %s", rrdset_id(st)); + + dictionary_acquired_item_release(st->rrdhost->rrdset_root_index, dict_item); + break; + case METADATA_ADD_CHART_LABEL: + dict_item = (DICTIONARY_ITEM * ) cmd.param[0]; + st = (RRDSET *) dictionary_acquired_item_value(dict_item); + check_and_update_chart_labels(st, work_buffer); + dictionary_acquired_item_release(st->rrdhost->rrdset_root_index, dict_item); + break; + case METADATA_ADD_DIMENSION: + dict_item = (DICTIONARY_ITEM * ) cmd.param[0]; + rd = (RRDDIM *) dictionary_acquired_item_value(dict_item); + + rc = sql_store_dimension( + &rd->metric_uuid, + &rd->rrdset->chart_uuid, + string2str(rd->id), + string2str(rd->name), + rd->multiplier, + rd->divisor, + rd->algorithm, + rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)); + + if (unlikely(rc)) + error_report("Failed to store dimension %s", rrddim_id(rd)); + + dictionary_acquired_item_release(rd->rrdset->rrddim_root_index, dict_item); + break; + case METADATA_DEL_DIMENSION: + uuid = (uuid_t *) cmd.param[0]; + if (likely(dimension_can_be_deleted(uuid))) + delete_dimension_uuid(uuid); + freez(uuid); + break; + case METADATA_ADD_DIMENSION_OPTION: + dict_item = (DICTIONARY_ITEM * ) cmd.param[0]; + rd = (RRDDIM *) dictionary_acquired_item_value(dict_item); + rc = sql_set_dimension_option( + &rd->metric_uuid, rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN) ? "hidden" : NULL); + if (unlikely(rc)) + error_report("Failed to store dimension option for %s", string2str(rd->id)); + dictionary_acquired_item_release(rd->rrdset->rrddim_root_index, dict_item); + break; + case METADATA_ADD_HOST_SYSTEM_INFO: + buffer = (BUFFER *) cmd.param[0]; + db_execute(buffer_tostring(buffer)); + buffer_free(buffer); + break; + case METADATA_ADD_HOST_INFO: + dict_item = (DICTIONARY_ITEM * ) cmd.param[0]; + host = (RRDHOST *) dictionary_acquired_item_value(dict_item); + rc = sql_store_host_info(host); + if (unlikely(rc)) + error_report("Failed to store host info in the database for %s", string2str(host->hostname)); + dictionary_acquired_item_release(rrdhost_root_index, dict_item); + break; + case METADATA_STORE_CLAIM_ID: + store_claim_id((uuid_t *) cmd.param[0], (uuid_t *) cmd.param[1]); + freez((void *) cmd.param[0]); + freez((void *) cmd.param[1]); + break; + case METADATA_STORE_HOST_LABELS: + dict_item = (DICTIONARY_ITEM * ) cmd.param[0]; + host = (RRDHOST *) dictionary_acquired_item_value(dict_item); + rc = exec_statement_with_uuid(SQL_DELETE_HOST_LABELS, &host->host_uuid); + + if (likely(rc == SQLITE_OK)) { + buffer_flush(work_buffer); + struct query_build tmp = {.sql = work_buffer, .count = 0}; + uuid_unparse_lower(host->host_uuid, tmp.uuid_str); + rrdlabels_walkthrough_read(host->rrdlabels, host_label_store_to_sql_callback, &tmp); + db_execute(buffer_tostring(work_buffer)); + } + + dictionary_acquired_item_release(rrdhost_root_index, dict_item); + break; + + case METADATA_SCAN_HOSTS: + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SCANNING_HOSTS))) + break; + + struct scan_metadata_payload *data = mallocz(sizeof(*data)); + data->request.data = data; + data->wc = wc; + data->completion = cmd.completion; // Completion by the worker + + if (unlikely(cmd.completion)) { + data->max_count = 0; // 0 will process all pending updates + cmd.completion = NULL; // Do not complete after launching worker (worker will do) + } + else + data->max_count = 1000; + + metadata_flag_set(wc, METADATA_FLAG_SCANNING_HOSTS); + if (unlikely( + uv_queue_work(loop,&data->request, + start_metadata_hosts, + after_metadata_hosts))) { + // Failed to launch worker -- let the event loop handle completion + cmd.completion = data->completion; + freez(data); + metadata_flag_clear(wc, METADATA_FLAG_SCANNING_HOSTS); + } + break; + case METADATA_STORE_BUFFER: + buffer = (BUFFER *) cmd.param[0]; + db_execute(buffer_tostring(buffer)); + buffer_free(buffer); + break; + case METADATA_MAINTENANCE: + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_CLEANUP))) + break; + + metadata_cleanup_worker.data = wc; + metadata_flag_set(wc, METADATA_FLAG_CLEANUP); + if (unlikely( + uv_queue_work(loop, &metadata_cleanup_worker, start_metadata_cleanup, after_metadata_cleanup))) { + metadata_flag_clear(wc, METADATA_FLAG_CLEANUP); + } + break; + case METADATA_UNITTEST:; + struct thread_unittest *tu = (struct thread_unittest *) cmd.param[0]; + sleep_usec(1000); // processing takes 1ms + __atomic_fetch_add(&tu->processed, 1, __ATOMIC_SEQ_CST); + break; + default: + break; + } + if (in_transaction && (commands_in_transaction >= METADATA_MAX_TRANSACTION_BATCH || opcode != next_opcode)) { + in_transaction = 0; + db_execute("COMMIT TRANSACTION;"); + commands_in_transaction = 0; + } + + if (cmd.completion) + completion_mark_complete(cmd.completion); + } while (opcode != METADATA_DATABASE_NOOP); + } + + if (!uv_timer_stop(&wc->timer_req)) + uv_close((uv_handle_t *)&wc->timer_req, NULL); + + /* + * uv_async_send after uv_close does not seem to crash in linux at the moment, + * it is however undocumented behaviour we need to be aware if this becomes + * an issue in the future. + */ + uv_close((uv_handle_t *)&wc->async, NULL); + uv_run(loop, UV_RUN_DEFAULT); + + uv_cond_destroy(&wc->cmd_cond); + /* uv_mutex_destroy(&wc->cmd_mutex); */ + //fatal_assert(0 == uv_loop_close(loop)); + int rc; + + do { + rc = uv_loop_close(loop); + } while (rc != UV_EBUSY); + + freez(loop); + worker_unregister(); + + buffer_free(work_buffer); + info("METADATA: Shutting down event loop"); + completion_mark_complete(&wc->init_complete); + return; + +error_after_timer_init: + uv_close((uv_handle_t *)&wc->async, NULL); +error_after_async_init: + fatal_assert(0 == uv_loop_close(loop)); +error_after_loop_init: + freez(loop); + worker_unregister(); +} + +struct metadata_wc metasync_worker = {.loop = NULL}; + +void metadata_sync_shutdown(void) +{ + completion_init(&metasync_worker.init_complete); + + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + info("METADATA: Sending a shutdown command"); + cmd.opcode = METADATA_SYNC_SHUTDOWN; + metadata_enq_cmd(&metasync_worker, &cmd); + + /* wait for metadata thread to shut down */ + info("METADATA: Waiting for shutdown ACK"); + completion_wait_for(&metasync_worker.init_complete); + completion_destroy(&metasync_worker.init_complete); + info("METADATA: Shutdown complete"); +} + +void metadata_sync_shutdown_prepare(void) +{ + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + + struct completion compl; + completion_init(&compl); + + info("METADATA: Sending a scan host command"); + uint32_t max_wait_iterations = 2000; + while (unlikely(metadata_flag_check(&metasync_worker, METADATA_FLAG_SCANNING_HOSTS)) && max_wait_iterations--) { + if (max_wait_iterations == 1999) + info("METADATA: Current worker is running; waiting to finish"); + sleep_usec(1000); + } + + cmd.opcode = METADATA_SCAN_HOSTS; + cmd.completion = &compl; + metadata_enq_cmd(&metasync_worker, &cmd); + + info("METADATA: Waiting for host scan completion"); + completion_wait_for(&compl); + completion_destroy(&compl); + info("METADATA: Host scan complete; can continue with shutdown"); +} + +// ------------------------------------------------------------- +// Init function called on agent startup + +void metadata_sync_init(void) +{ + struct metadata_wc *wc = &metasync_worker; + + fatal_assert(0 == uv_mutex_init(&metadata_async_lock)); + + memset(wc, 0, sizeof(*wc)); + metadata_init_cmd_queue(wc); + completion_init(&wc->init_complete); + + fatal_assert(0 == uv_thread_create(&(wc->thread), metadata_event_loop, wc)); + + completion_wait_for(&wc->init_complete); + completion_destroy(&wc->init_complete); + + info("SQLite metadata sync initialization complete"); +} + + +// Helpers + +static inline void queue_metadata_cmd(enum metadata_opcode opcode, const void *param0, const void *param1) +{ + struct metadata_cmd cmd; + cmd.opcode = opcode; + cmd.param[0] = param0; + cmd.param[1] = param1; + cmd.completion = NULL; + metadata_enq_cmd(&metasync_worker, &cmd); + +} + +// Public +void metaqueue_chart_update(RRDSET *st) +{ + const DICTIONARY_ITEM *acquired_st = dictionary_get_and_acquire_item(st->rrdhost->rrdset_root_index, string2str(st->id)); + queue_metadata_cmd(METADATA_ADD_CHART, acquired_st, NULL); +} + +// +// RD may not be collected, so we may store it needlessly +void metaqueue_dimension_update(RRDDIM *rd) +{ + const DICTIONARY_ITEM *acquired_rd = + dictionary_get_and_acquire_item(rd->rrdset->rrddim_root_index, string2str(rd->id)); + + if (unlikely(rrdset_flag_check(rd->rrdset, RRDSET_FLAG_METADATA_UPDATE))) { + metaqueue_chart_update(rd->rrdset); + rrdset_flag_clear(rd->rrdset, RRDSET_FLAG_METADATA_UPDATE); + } + + queue_metadata_cmd(METADATA_ADD_DIMENSION, acquired_rd, NULL); +} + +void metaqueue_dimension_update_flags(RRDDIM *rd) +{ + const DICTIONARY_ITEM *acquired_rd = + dictionary_get_and_acquire_item(rd->rrdset->rrddim_root_index, string2str(rd->id)); + queue_metadata_cmd(METADATA_ADD_DIMENSION_OPTION, acquired_rd, NULL); +} + +void metaqueue_host_update_system_info(RRDHOST *host) +{ + BUFFER *work_buffer = sql_store_host_system_info(host); + + if (unlikely(!work_buffer)) + return; + + queue_metadata_cmd(METADATA_ADD_HOST_SYSTEM_INFO, work_buffer, NULL); +} + +void metaqueue_host_update_info(const char *machine_guid) +{ + const DICTIONARY_ITEM *acquired_host = dictionary_get_and_acquire_item(rrdhost_root_index, machine_guid); + queue_metadata_cmd(METADATA_ADD_HOST_INFO, acquired_host, NULL); +} + +void metaqueue_delete_dimension_uuid(uuid_t *uuid) +{ + if (unlikely(!metasync_worker.loop)) + return; + uuid_t *use_uuid = mallocz(sizeof(*uuid)); + uuid_copy(*use_uuid, *uuid); + queue_metadata_cmd(METADATA_DEL_DIMENSION, use_uuid, NULL); +} + +void metaqueue_store_claim_id(uuid_t *host_uuid, uuid_t *claim_uuid) +{ + if (unlikely(!host_uuid)) + return; + + uuid_t *local_host_uuid = mallocz(sizeof(*host_uuid)); + uuid_t *local_claim_uuid = NULL; + + uuid_copy(*local_host_uuid, *host_uuid); + if (likely(claim_uuid)) { + local_claim_uuid = mallocz(sizeof(*claim_uuid)); + uuid_copy(*local_claim_uuid, *claim_uuid); + } + queue_metadata_cmd(METADATA_STORE_CLAIM_ID, local_host_uuid, local_claim_uuid); +} + +void metaqueue_store_host_labels(const char *machine_guid) +{ + const DICTIONARY_ITEM *acquired_host = dictionary_get_and_acquire_item(rrdhost_root_index, machine_guid); + queue_metadata_cmd(METADATA_STORE_HOST_LABELS, acquired_host, NULL); +} + +void metaqueue_buffer(BUFFER *buffer) +{ + queue_metadata_cmd(METADATA_STORE_BUFFER, buffer, NULL); +} + +void metaqueue_chart_labels(RRDSET *st) +{ + const DICTIONARY_ITEM *acquired_st = dictionary_get_and_acquire_item(st->rrdhost->rrdset_root_index, string2str(st->id)); + queue_metadata_cmd(METADATA_ADD_CHART_LABEL, acquired_st, NULL); +} + + +// +// unitests +// + +static void *unittest_queue_metadata(void *arg) { + struct thread_unittest *tu = arg; + + struct metadata_cmd cmd; + cmd.opcode = METADATA_UNITTEST; + cmd.param[0] = tu; + cmd.param[1] = NULL; + cmd.completion = NULL; + metadata_enq_cmd(&metasync_worker, &cmd); + + do { + __atomic_fetch_add(&tu->added, 1, __ATOMIC_SEQ_CST); + metadata_enq_cmd(&metasync_worker, &cmd); + sleep_usec(10000); + } while (!__atomic_load_n(&tu->join, __ATOMIC_RELAXED)); + return arg; +} + +static void *metadata_unittest_threads(void) +{ + + unsigned done; + + struct thread_unittest tu = { + .join = 0, + .added = 0, + .processed = 0, + .done = &done, + }; + + // Queue messages / Time it + time_t seconds_to_run = 5; + int threads_to_create = 4; + fprintf( + stderr, + "\nChecking metadata queue using %d threads for %lld seconds...\n", + threads_to_create, + (long long)seconds_to_run); + + netdata_thread_t threads[threads_to_create]; + tu.join = 0; + for (int i = 0; i < threads_to_create; i++) { + char buf[100 + 1]; + snprintf(buf, 100, "meta%d", i); + netdata_thread_create( + &threads[i], + buf, + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_queue_metadata, + &tu); + } + uv_async_send(&metasync_worker.async); + sleep_usec(seconds_to_run * USEC_PER_SEC); + + __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED); + for (int i = 0; i < threads_to_create; i++) { + void *retval; + netdata_thread_join(threads[i], &retval); + } +// uv_async_send(&metasync_worker.async); + sleep_usec(5 * USEC_PER_SEC); + + fprintf(stderr, "Added %u elements, processed %u\n", tu.added, tu.processed); + + return 0; +} + +int metadata_unittest(void) +{ + metadata_sync_init(); + + // Queue items for a specific period of time + metadata_unittest_threads(); + + fprintf(stderr, "Items still in queue %u\n", metasync_worker.queue_size); + metadata_sync_shutdown(); + + return 0; +} diff --git a/database/sqlite/sqlite_metadata.h b/database/sqlite/sqlite_metadata.h new file mode 100644 index 000000000..9293facf8 --- /dev/null +++ b/database/sqlite/sqlite_metadata.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SQLITE_METADATA_H +#define NETDATA_SQLITE_METADATA_H + +#include "sqlite3.h" +#include "sqlite_functions.h" + +// To initialize and shutdown +void metadata_sync_init(void); +void metadata_sync_shutdown(void); +void metadata_sync_shutdown_prepare(void); + +void metaqueue_dimension_update(RRDDIM *rd); +void metaqueue_chart_update(RRDSET *st); +void metaqueue_dimension_update_flags(RRDDIM *rd); +void metaqueue_host_update_system_info(RRDHOST *host); +void metaqueue_host_update_info(const char *machine_guid); +void metaqueue_delete_dimension_uuid(uuid_t *uuid); +void metaqueue_store_claim_id(uuid_t *host_uuid, uuid_t *claim_uuid); +void metaqueue_store_host_labels(const char *machine_guid); +void metaqueue_chart_labels(RRDSET *st); +void migrate_localhost(uuid_t *host_uuid); +void metaqueue_buffer(BUFFER *buffer); + +// UNIT TEST +int metadata_unittest(void); +#endif //NETDATA_SQLITE_METADATA_H diff --git a/database/storage_engine.c b/database/storage_engine.c index 76597acd5..edf017db4 100644 --- a/database/storage_engine.c +++ b/database/storage_engine.c @@ -10,7 +10,10 @@ .init = rrddim_collect_init,\ .store_metric = rrddim_collect_store_metric,\ .flush = rrddim_store_metric_flush,\ - .finalize = rrddim_collect_finalize\ + .finalize = rrddim_collect_finalize, \ + .change_collection_frequency = rrddim_store_metric_change_collection_frequency, \ + .metrics_group_get = rrddim_metrics_group_get, \ + .metrics_group_release = rrddim_metrics_group_release, \ } #define im_query_ops { \ @@ -27,8 +30,10 @@ static STORAGE_ENGINE engines[] = { .id = RRD_MEMORY_MODE_NONE, .name = RRD_MEMORY_MODE_NONE_NAME, .api = { - .init = rrddim_metric_init, - .free = rrddim_metric_free, + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, .collect_ops = im_collect_ops, .query_ops = im_query_ops } @@ -37,8 +42,10 @@ static STORAGE_ENGINE engines[] = { .id = RRD_MEMORY_MODE_RAM, .name = RRD_MEMORY_MODE_RAM_NAME, .api = { - .init = rrddim_metric_init, - .free = rrddim_metric_free, + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, .collect_ops = im_collect_ops, .query_ops = im_query_ops } @@ -47,8 +54,10 @@ static STORAGE_ENGINE engines[] = { .id = RRD_MEMORY_MODE_MAP, .name = RRD_MEMORY_MODE_MAP_NAME, .api = { - .init = rrddim_metric_init, - .free = rrddim_metric_free, + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, .collect_ops = im_collect_ops, .query_ops = im_query_ops } @@ -57,8 +66,10 @@ static STORAGE_ENGINE engines[] = { .id = RRD_MEMORY_MODE_SAVE, .name = RRD_MEMORY_MODE_SAVE_NAME, .api = { - .init = rrddim_metric_init, - .free = rrddim_metric_free, + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, .collect_ops = im_collect_ops, .query_ops = im_query_ops } @@ -67,8 +78,10 @@ static STORAGE_ENGINE engines[] = { .id = RRD_MEMORY_MODE_ALLOC, .name = RRD_MEMORY_MODE_ALLOC_NAME, .api = { - .init = rrddim_metric_init, - .free = rrddim_metric_free, + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, .collect_ops = im_collect_ops, .query_ops = im_query_ops } @@ -78,13 +91,18 @@ static STORAGE_ENGINE engines[] = { .id = RRD_MEMORY_MODE_DBENGINE, .name = RRD_MEMORY_MODE_DBENGINE_NAME, .api = { - .init = rrdeng_metric_init, - .free = rrdeng_metric_free, + .metric_get = rrdeng_metric_get, + .metric_get_or_create = rrdeng_metric_get_or_create, + .metric_dup = rrdeng_metric_dup, + .metric_release = rrdeng_metric_release, .collect_ops = { .init = rrdeng_store_metric_init, .store_metric = rrdeng_store_metric_next, .flush = rrdeng_store_metric_flush_current_page, - .finalize = rrdeng_store_metric_finalize + .finalize = rrdeng_store_metric_finalize, + .change_collection_frequency = rrdeng_store_metric_change_collection_frequency, + .metrics_group_get = rrdeng_metrics_group_get, + .metrics_group_release = rrdeng_metrics_group_release, }, .query_ops = { .init = rrdeng_load_metric_init, diff --git a/database/storage_engine.h b/database/storage_engine.h index 3ed515e0a..b7fb7383a 100644 --- a/database/storage_engine.h +++ b/database/storage_engine.h @@ -5,28 +5,8 @@ #include "rrd.h" -typedef struct storage_engine STORAGE_ENGINE; - -// ------------------------------------------------------------------------ -// function pointers for all APIs provided by a storge engine -typedef struct storage_engine_api { - STORAGE_METRIC_HANDLE *(*init)(RRDDIM *rd, STORAGE_INSTANCE *instance); - void (*free)(STORAGE_METRIC_HANDLE *); - struct rrddim_collect_ops collect_ops; - struct rrddim_query_ops query_ops; -} STORAGE_ENGINE_API; - -struct storage_engine { - RRD_MEMORY_MODE id; - const char* name; - STORAGE_ENGINE_API api; -}; - -extern STORAGE_ENGINE* storage_engine_get(RRD_MEMORY_MODE mmode); -extern STORAGE_ENGINE* storage_engine_find(const char* name); - // Iterator over existing engines -extern STORAGE_ENGINE* storage_engine_foreach_init(); -extern STORAGE_ENGINE* storage_engine_foreach_next(STORAGE_ENGINE* it); +STORAGE_ENGINE* storage_engine_foreach_init(); +STORAGE_ENGINE* storage_engine_foreach_next(STORAGE_ENGINE* it); #endif |