diff options
Diffstat (limited to 'src/database/contexts')
-rw-r--r-- | src/database/contexts/README.md | 0 | ||||
-rw-r--r-- | src/database/contexts/api_v1.c | 439 | ||||
-rw-r--r-- | src/database/contexts/api_v2.c | 2454 | ||||
-rw-r--r-- | src/database/contexts/context.c | 342 | ||||
-rw-r--r-- | src/database/contexts/instance.c | 536 | ||||
-rw-r--r-- | src/database/contexts/internal.h | 388 | ||||
-rw-r--r-- | src/database/contexts/metric.c | 327 | ||||
-rw-r--r-- | src/database/contexts/query_scope.c | 126 | ||||
-rw-r--r-- | src/database/contexts/query_target.c | 1233 | ||||
-rw-r--r-- | src/database/contexts/rrdcontext.c | 329 | ||||
-rw-r--r-- | src/database/contexts/rrdcontext.h | 760 | ||||
-rw-r--r-- | src/database/contexts/worker.c | 1147 |
12 files changed, 8081 insertions, 0 deletions
diff --git a/src/database/contexts/README.md b/src/database/contexts/README.md new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/database/contexts/README.md diff --git a/src/database/contexts/api_v1.c b/src/database/contexts/api_v1.c new file mode 100644 index 000000000..355aaf91a --- /dev/null +++ b/src/database/contexts/api_v1.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +static void rrd_flags_to_buffer_json_array_items(RRD_FLAGS flags, BUFFER *wb) { + if(flags & RRD_FLAG_QUEUED_FOR_HUB) + buffer_json_add_array_item_string(wb, "QUEUED"); + + if(flags & RRD_FLAG_DELETED) + buffer_json_add_array_item_string(wb, "DELETED"); + + if(flags & RRD_FLAG_COLLECTED) + buffer_json_add_array_item_string(wb, "COLLECTED"); + + if(flags & RRD_FLAG_UPDATED) + buffer_json_add_array_item_string(wb, "UPDATED"); + + if(flags & RRD_FLAG_ARCHIVED) + buffer_json_add_array_item_string(wb, "ARCHIVED"); + + if(flags & RRD_FLAG_OWN_LABELS) + buffer_json_add_array_item_string(wb, "OWN_LABELS"); + + if(flags & RRD_FLAG_LIVE_RETENTION) + buffer_json_add_array_item_string(wb, "LIVE_RETENTION"); + + if(flags & RRD_FLAG_HIDDEN) + buffer_json_add_array_item_string(wb, "HIDDEN"); + + if(flags & RRD_FLAG_QUEUED_FOR_PP) + buffer_json_add_array_item_string(wb, "PENDING_UPDATES"); +} + +// ---------------------------------------------------------------------------- +// /api/v1/context(s) API + +struct rrdcontext_to_json { + BUFFER *wb; + RRDCONTEXT_TO_JSON_OPTIONS options; + time_t after; + time_t before; + SIMPLE_PATTERN *chart_label_key; + SIMPLE_PATTERN *chart_labels_filter; + SIMPLE_PATTERN *chart_dimensions; + size_t written; + time_t now; + time_t combined_first_time_s; + time_t combined_last_time_s; + RRD_FLAGS combined_flags; +}; + +static inline int rrdmetric_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + struct rrdcontext_to_json * t = data; + RRDMETRIC *rm = value; + BUFFER *wb = t->wb; + RRDCONTEXT_TO_JSON_OPTIONS options = t->options; + time_t after = t->after; + time_t before = t->before; + + if(unlikely(rrd_flag_is_deleted(rm) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) + return 0; + + if(after && (!rm->last_time_s || after > rm->last_time_s)) + return 0; + + if(before && (!rm->first_time_s || before < rm->first_time_s)) + return 0; + + if(t->chart_dimensions + && !simple_pattern_matches_string(t->chart_dimensions, rm->id) + && rm->name != rm->id + && !simple_pattern_matches_string(t->chart_dimensions, rm->name)) + return 0; + + if(t->written) { + t->combined_first_time_s = MIN(t->combined_first_time_s, rm->first_time_s); + t->combined_last_time_s = MAX(t->combined_last_time_s, rm->last_time_s); + t->combined_flags |= rrd_flags_get(rm); + } + else { + t->combined_first_time_s = rm->first_time_s; + t->combined_last_time_s = rm->last_time_s; + t->combined_flags = rrd_flags_get(rm); + } + + buffer_json_member_add_object(wb, id); + + if(options & RRDCONTEXT_OPTION_SHOW_UUIDS) { + char uuid[UUID_STR_LEN]; + uuid_unparse(rm->uuid, uuid); + buffer_json_member_add_string(wb, "uuid", uuid); + } + + buffer_json_member_add_string(wb, "name", string2str(rm->name)); + buffer_json_member_add_time_t(wb, "first_time_t", rm->first_time_s); + buffer_json_member_add_time_t(wb, "last_time_t", rrd_flag_is_collected(rm) ? (long long)t->now : (long long)rm->last_time_s); + buffer_json_member_add_boolean(wb, "collected", rrd_flag_is_collected(rm)); + + if(options & RRDCONTEXT_OPTION_SHOW_DELETED) + buffer_json_member_add_boolean(wb, "deleted", rrd_flag_is_deleted(rm)); + + if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { + buffer_json_member_add_array(wb, "flags"); + rrd_flags_to_buffer_json_array_items(rrd_flags_get(rm), wb); + buffer_json_array_close(wb); + } + + buffer_json_object_close(wb); + t->written++; + return 1; +} + +static inline int rrdinstance_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + + struct rrdcontext_to_json *t_parent = data; + RRDINSTANCE *ri = value; + BUFFER *wb = t_parent->wb; + RRDCONTEXT_TO_JSON_OPTIONS options = t_parent->options; + time_t after = t_parent->after; + time_t before = t_parent->before; + bool has_filter = t_parent->chart_label_key || t_parent->chart_labels_filter || t_parent->chart_dimensions; + + if(unlikely(rrd_flag_is_deleted(ri) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) + return 0; + + if(after && (!ri->last_time_s || after > ri->last_time_s)) + return 0; + + if(before && (!ri->first_time_s || before < ri->first_time_s)) + return 0; + + if(t_parent->chart_label_key && rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, t_parent->chart_label_key, + '\0', NULL) != SP_MATCHED_POSITIVE) + return 0; + + if(t_parent->chart_labels_filter && rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, + t_parent->chart_labels_filter, ':', + NULL) != SP_MATCHED_POSITIVE) + return 0; + + time_t first_time_s = ri->first_time_s; + time_t last_time_s = ri->last_time_s; + RRD_FLAGS flags = rrd_flags_get(ri); + + BUFFER *wb_metrics = NULL; + if(options & RRDCONTEXT_OPTION_SHOW_METRICS || t_parent->chart_dimensions) { + + wb_metrics = buffer_create(4096, &netdata_buffers_statistics.buffers_api); + buffer_json_initialize(wb_metrics, "\"", "\"", wb->json.depth + 2, false, BUFFER_JSON_OPTIONS_DEFAULT); + + struct rrdcontext_to_json t_metrics = { + .wb = wb_metrics, + .options = options, + .chart_label_key = t_parent->chart_label_key, + .chart_labels_filter = t_parent->chart_labels_filter, + .chart_dimensions = t_parent->chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = t_parent->now, + }; + dictionary_walkthrough_read(ri->rrdmetrics, rrdmetric_to_json_callback, &t_metrics); + + if(has_filter && !t_metrics.written) { + buffer_free(wb_metrics); + return 0; + } + + first_time_s = t_metrics.combined_first_time_s; + last_time_s = t_metrics.combined_last_time_s; + flags = t_metrics.combined_flags; + } + + if(t_parent->written) { + t_parent->combined_first_time_s = MIN(t_parent->combined_first_time_s, first_time_s); + t_parent->combined_last_time_s = MAX(t_parent->combined_last_time_s, last_time_s); + t_parent->combined_flags |= flags; + } + else { + t_parent->combined_first_time_s = first_time_s; + t_parent->combined_last_time_s = last_time_s; + t_parent->combined_flags = flags; + } + + buffer_json_member_add_object(wb, id); + + if(options & RRDCONTEXT_OPTION_SHOW_UUIDS) { + char uuid[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid); + buffer_json_member_add_string(wb, "uuid", uuid); + } + + buffer_json_member_add_string(wb, "name", string2str(ri->name)); + buffer_json_member_add_string(wb, "context", string2str(ri->rc->id)); + buffer_json_member_add_string(wb, "title", string2str(ri->title)); + buffer_json_member_add_string(wb, "units", string2str(ri->units)); + buffer_json_member_add_string(wb, "family", string2str(ri->family)); + buffer_json_member_add_string(wb, "chart_type", rrdset_type_name(ri->chart_type)); + buffer_json_member_add_uint64(wb, "priority", ri->priority); + buffer_json_member_add_time_t(wb, "update_every", ri->update_every_s); + buffer_json_member_add_time_t(wb, "first_time_t", first_time_s); + buffer_json_member_add_time_t(wb, "last_time_t", (flags & RRD_FLAG_COLLECTED) ? (long long)t_parent->now : (long long)last_time_s); + buffer_json_member_add_boolean(wb, "collected", flags & RRD_FLAG_COLLECTED); + + if(options & RRDCONTEXT_OPTION_SHOW_DELETED) + buffer_json_member_add_boolean(wb, "deleted", rrd_flag_is_deleted(ri)); + + if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { + buffer_json_member_add_array(wb, "flags"); + rrd_flags_to_buffer_json_array_items(rrd_flags_get(ri), wb); + buffer_json_array_close(wb); + } + + if(options & RRDCONTEXT_OPTION_SHOW_LABELS && ri->rrdlabels && rrdlabels_entries(ri->rrdlabels)) { + buffer_json_member_add_object(wb, "labels"); + rrdlabels_to_buffer_json_members(ri->rrdlabels, wb); + buffer_json_object_close(wb); + } + + if(wb_metrics) { + buffer_json_member_add_object(wb, "dimensions"); + buffer_fast_strcat(wb, buffer_tostring(wb_metrics), buffer_strlen(wb_metrics)); + buffer_json_object_close(wb); + + buffer_free(wb_metrics); + } + + buffer_json_object_close(wb); + t_parent->written++; + return 1; +} + +static inline int rrdcontext_to_json_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *id = dictionary_acquired_item_name(item); + struct rrdcontext_to_json *t_parent = data; + RRDCONTEXT *rc = value; + BUFFER *wb = t_parent->wb; + RRDCONTEXT_TO_JSON_OPTIONS options = t_parent->options; + time_t after = t_parent->after; + time_t before = t_parent->before; + bool has_filter = t_parent->chart_label_key || t_parent->chart_labels_filter || t_parent->chart_dimensions; + + if(unlikely(rrd_flag_check(rc, RRD_FLAG_HIDDEN) && !(options & RRDCONTEXT_OPTION_SHOW_HIDDEN))) + return 0; + + if(unlikely(rrd_flag_is_deleted(rc) && !(options & RRDCONTEXT_OPTION_SHOW_DELETED))) + return 0; + + if(options & RRDCONTEXT_OPTION_DEEPSCAN) + rrdcontext_recalculate_context_retention(rc, RRD_FLAG_NONE, false); + + if(after && (!rc->last_time_s || after > rc->last_time_s)) + return 0; + + if(before && (!rc->first_time_s || before < rc->first_time_s)) + return 0; + + time_t first_time_s = rc->first_time_s; + time_t last_time_s = rc->last_time_s; + RRD_FLAGS flags = rrd_flags_get(rc); + + BUFFER *wb_instances = NULL; + if((options & (RRDCONTEXT_OPTION_SHOW_LABELS|RRDCONTEXT_OPTION_SHOW_INSTANCES|RRDCONTEXT_OPTION_SHOW_METRICS)) + || t_parent->chart_label_key + || t_parent->chart_labels_filter + || t_parent->chart_dimensions) { + + wb_instances = buffer_create(4096, &netdata_buffers_statistics.buffers_api); + buffer_json_initialize(wb_instances, "\"", "\"", wb->json.depth + 2, false, BUFFER_JSON_OPTIONS_DEFAULT); + + struct rrdcontext_to_json t_instances = { + .wb = wb_instances, + .options = options, + .chart_label_key = t_parent->chart_label_key, + .chart_labels_filter = t_parent->chart_labels_filter, + .chart_dimensions = t_parent->chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = t_parent->now, + }; + dictionary_walkthrough_read(rc->rrdinstances, rrdinstance_to_json_callback, &t_instances); + + if(has_filter && !t_instances.written) { + buffer_free(wb_instances); + return 0; + } + + first_time_s = t_instances.combined_first_time_s; + last_time_s = t_instances.combined_last_time_s; + flags = t_instances.combined_flags; + } + + if(!(options & RRDCONTEXT_OPTION_SKIP_ID)) + buffer_json_member_add_object(wb, id); + + rrdcontext_lock(rc); + + buffer_json_member_add_string(wb, "title", string2str(rc->title)); + buffer_json_member_add_string(wb, "units", string2str(rc->units)); + buffer_json_member_add_string(wb, "family", string2str(rc->family)); + buffer_json_member_add_string(wb, "chart_type", rrdset_type_name(rc->chart_type)); + buffer_json_member_add_uint64(wb, "priority", rc->priority); + buffer_json_member_add_time_t(wb, "first_time_t", first_time_s); + buffer_json_member_add_time_t(wb, "last_time_t", (flags & RRD_FLAG_COLLECTED) ? (long long)t_parent->now : (long long)last_time_s); + buffer_json_member_add_boolean(wb, "collected", (flags & RRD_FLAG_COLLECTED)); + + if(options & RRDCONTEXT_OPTION_SHOW_DELETED) + buffer_json_member_add_boolean(wb, "deleted", rrd_flag_is_deleted(rc)); + + if(options & RRDCONTEXT_OPTION_SHOW_FLAGS) { + buffer_json_member_add_array(wb, "flags"); + rrd_flags_to_buffer_json_array_items(rrd_flags_get(rc), wb); + buffer_json_array_close(wb); + } + + if(options & RRDCONTEXT_OPTION_SHOW_QUEUED) { + buffer_json_member_add_array(wb, "queued_reasons"); + rrd_reasons_to_buffer_json_array_items(rc->queue.queued_flags, wb); + buffer_json_array_close(wb); + + buffer_json_member_add_time_t(wb, "last_queued", (time_t)(rc->queue.queued_ut / USEC_PER_SEC)); + buffer_json_member_add_time_t(wb, "scheduled_dispatch", (time_t)(rc->queue.scheduled_dispatch_ut / USEC_PER_SEC)); + buffer_json_member_add_time_t(wb, "last_dequeued", (time_t)(rc->queue.dequeued_ut / USEC_PER_SEC)); + buffer_json_member_add_uint64(wb, "dispatches", rc->queue.dispatches); + buffer_json_member_add_uint64(wb, "hub_version", rc->hub.version); + buffer_json_member_add_uint64(wb, "version", rc->version); + + buffer_json_member_add_array(wb, "pp_reasons"); + rrd_reasons_to_buffer_json_array_items(rc->pp.queued_flags, wb); + buffer_json_array_close(wb); + + buffer_json_member_add_time_t(wb, "pp_last_queued", (time_t)(rc->pp.queued_ut / USEC_PER_SEC)); + buffer_json_member_add_time_t(wb, "pp_last_dequeued", (time_t)(rc->pp.dequeued_ut / USEC_PER_SEC)); + buffer_json_member_add_uint64(wb, "pp_executed", rc->pp.executions); + } + + rrdcontext_unlock(rc); + + if(wb_instances) { + buffer_json_member_add_object(wb, "charts"); + buffer_fast_strcat(wb, buffer_tostring(wb_instances), buffer_strlen(wb_instances)); + buffer_json_object_close(wb); + + buffer_free(wb_instances); + } + + if(!(options & RRDCONTEXT_OPTION_SKIP_ID)) + buffer_json_object_close(wb); + + t_parent->written++; + return 1; +} + +int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions) { + if(!host->rrdctx.contexts) { + netdata_log_error("%s(): request for host '%s' that does not have rrdcontexts initialized.", __FUNCTION__, rrdhost_hostname(host)); + return HTTP_RESP_NOT_FOUND; + } + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item(host->rrdctx.contexts, context); + if(!rca) return HTTP_RESP_NOT_FOUND; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + if(after != 0 && before != 0) + rrdr_relative_window_to_absolute_query(&after, &before, NULL, false); + + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + struct rrdcontext_to_json t_contexts = { + .wb = wb, + .options = options|RRDCONTEXT_OPTION_SKIP_ID, + .chart_label_key = chart_label_key, + .chart_labels_filter = chart_labels_filter, + .chart_dimensions = chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = now_realtime_sec(), + }; + rrdcontext_to_json_callback((DICTIONARY_ITEM *)rca, rc, &t_contexts); + buffer_json_finalize(wb); + + rrdcontext_release(rca); + + if(!t_contexts.written) + return HTTP_RESP_NOT_FOUND; + + return HTTP_RESP_OK; +} + +int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions) { + if(!host->rrdctx.contexts) { + netdata_log_error("%s(): request for host '%s' that does not have rrdcontexts initialized.", __FUNCTION__, rrdhost_hostname(host)); + return HTTP_RESP_NOT_FOUND; + } + + char node_uuid[UUID_STR_LEN] = ""; + + if(host->node_id) + uuid_unparse(*host->node_id, node_uuid); + + if(after != 0 && before != 0) + rrdr_relative_window_to_absolute_query(&after, &before, NULL, false); + + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(host)); + buffer_json_member_add_string(wb, "machine_guid", host->machine_guid); + buffer_json_member_add_string(wb, "node_id", node_uuid); + buffer_json_member_add_string(wb, "claim_id", host->aclk_state.claimed_id ? host->aclk_state.claimed_id : ""); + + if(options & RRDCONTEXT_OPTION_SHOW_LABELS) { + buffer_json_member_add_object(wb, "host_labels"); + rrdlabels_to_buffer_json_members(host->rrdlabels, wb); + buffer_json_object_close(wb); + } + + buffer_json_member_add_object(wb, "contexts"); + struct rrdcontext_to_json t_contexts = { + .wb = wb, + .options = options, + .chart_label_key = chart_label_key, + .chart_labels_filter = chart_labels_filter, + .chart_dimensions = chart_dimensions, + .after = after, + .before = before, + .written = 0, + .now = now_realtime_sec(), + }; + dictionary_walkthrough_read(host->rrdctx.contexts, rrdcontext_to_json_callback, &t_contexts); + buffer_json_object_close(wb); + + buffer_json_finalize(wb); + + return HTTP_RESP_OK; +} + diff --git a/src/database/contexts/api_v2.c b/src/database/contexts/api_v2.c new file mode 100644 index 000000000..07cd3ac83 --- /dev/null +++ b/src/database/contexts/api_v2.c @@ -0,0 +1,2454 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +#include "aclk/aclk_capas.h" + +// ---------------------------------------------------------------------------- +// /api/v2/contexts API + +struct alert_transitions_facets alert_transition_facets[] = { + [ATF_STATUS] = { + .id = "f_status", + .name = "Alert Status", + .query_param = "f_status", + .order = 1, + }, + [ATF_TYPE] = { + .id = "f_type", + .name = "Alert Type", + .query_param = "f_type", + .order = 2, + }, + [ATF_ROLE] = { + .id = "f_role", + .name = "Recipient Role", + .query_param = "f_role", + .order = 3, + }, + [ATF_CLASS] = { + .id = "f_class", + .name = "Alert Class", + .query_param = "f_class", + .order = 4, + }, + [ATF_COMPONENT] = { + .id = "f_component", + .name = "Alert Component", + .query_param = "f_component", + .order = 5, + }, + [ATF_NODE] = { + .id = "f_node", + .name = "Alert Node", + .query_param = "f_node", + .order = 6, + }, + [ATF_ALERT_NAME] = { + .id = "f_alert", + .name = "Alert Name", + .query_param = "f_alert", + .order = 7, + }, + [ATF_CHART_NAME] = { + .id = "f_instance", + .name = "Instance Name", + .query_param = "f_instance", + .order = 8, + }, + [ATF_CONTEXT] = { + .id = "f_context", + .name = "Context", + .query_param = "f_context", + .order = 9, + }, + + // terminator + [ATF_TOTAL_ENTRIES] = { + .id = NULL, + .name = NULL, + .query_param = NULL, + .order = 9999, + } +}; + +struct facet_entry { + uint32_t count; +}; + +struct alert_transitions_callback_data { + struct rrdcontext_to_json_v2_data *ctl; + BUFFER *wb; + bool debug; + bool only_one_config; + + struct { + SIMPLE_PATTERN *pattern; + DICTIONARY *dict; + } facets[ATF_TOTAL_ENTRIES]; + + uint32_t max_items_to_return; + uint32_t items_to_return; + + uint32_t items_evaluated; + uint32_t items_matched; + + + struct sql_alert_transition_fixed_size *base; // double linked list - last item is base->prev + struct sql_alert_transition_fixed_size *last_added; // the last item added, not the last of the list + + struct { + size_t first; + size_t skips_before; + size_t skips_after; + size_t backwards; + size_t forwards; + size_t prepend; + size_t append; + size_t shifts; + } operations; + + uint32_t configs_added; +}; + +typedef enum __attribute__ ((__packed__)) { + FTS_MATCHED_NONE = 0, + FTS_MATCHED_HOST, + FTS_MATCHED_CONTEXT, + FTS_MATCHED_INSTANCE, + FTS_MATCHED_DIMENSION, + FTS_MATCHED_LABEL, + FTS_MATCHED_ALERT, + FTS_MATCHED_ALERT_INFO, + FTS_MATCHED_FAMILY, + FTS_MATCHED_TITLE, + FTS_MATCHED_UNITS, +} FTS_MATCH; + +static const char *fts_match_to_string(FTS_MATCH match) { + switch(match) { + case FTS_MATCHED_HOST: + return "HOST"; + + case FTS_MATCHED_CONTEXT: + return "CONTEXT"; + + case FTS_MATCHED_INSTANCE: + return "INSTANCE"; + + case FTS_MATCHED_DIMENSION: + return "DIMENSION"; + + case FTS_MATCHED_ALERT: + return "ALERT"; + + case FTS_MATCHED_ALERT_INFO: + return "ALERT_INFO"; + + case FTS_MATCHED_LABEL: + return "LABEL"; + + case FTS_MATCHED_FAMILY: + return "FAMILY"; + + case FTS_MATCHED_TITLE: + return "TITLE"; + + case FTS_MATCHED_UNITS: + return "UNITS"; + + default: + return "NONE"; + } +} + +struct function_v2_entry { + size_t size; + size_t used; + size_t *node_ids; + STRING *help; + STRING *tags; + HTTP_ACCESS access; + int priority; +}; + +struct context_v2_entry { + size_t count; + STRING *id; + STRING *family; + uint32_t priority; + time_t first_time_s; + time_t last_time_s; + RRD_FLAGS flags; + FTS_MATCH match; +}; + +struct alert_counts { + size_t critical; + size_t warning; + size_t clear; + size_t error; +}; + +struct alert_v2_entry { + RRDCALC *tmp; + + STRING *name; + STRING *summary; + RRDLABELS *recipient; + RRDLABELS *classification; + RRDLABELS *context; + RRDLABELS *component; + RRDLABELS *type; + + size_t ati; + + struct alert_counts counts; + + size_t instances; + DICTIONARY *nodes; + DICTIONARY *configs; +}; + +struct alert_by_x_entry { + struct { + struct alert_counts counts; + size_t silent; + size_t total; + } running; + + struct { + size_t available; + } prototypes; +}; + +typedef struct full_text_search_index { + size_t searches; + size_t string_searches; + size_t char_searches; +} FTS_INDEX; + +static inline bool full_text_search_string(FTS_INDEX *fts, SIMPLE_PATTERN *q, STRING *ptr) { + fts->searches++; + fts->string_searches++; + return simple_pattern_matches_string(q, ptr); +} + +static inline bool full_text_search_char(FTS_INDEX *fts, SIMPLE_PATTERN *q, char *ptr) { + fts->searches++; + fts->char_searches++; + return simple_pattern_matches(q, ptr); +} + +struct contexts_v2_node { + size_t ni; + RRDHOST *host; +}; + +struct rrdcontext_to_json_v2_data { + time_t now; + + BUFFER *wb; + struct api_v2_contexts_request *request; + + CONTEXTS_V2_MODE mode; + CONTEXTS_V2_OPTIONS options; + struct query_versions versions; + + struct { + SIMPLE_PATTERN *scope_pattern; + SIMPLE_PATTERN *pattern; + size_t ni; + DICTIONARY *dict; // the result set + } nodes; + + struct { + SIMPLE_PATTERN *scope_pattern; + SIMPLE_PATTERN *pattern; + size_t ci; + DICTIONARY *dict; // the result set + } contexts; + + struct { + SIMPLE_PATTERN *alert_name_pattern; + time_t alarm_id_filter; + + size_t ati; + + DICTIONARY *summary; + DICTIONARY *alert_instances; + + DICTIONARY *by_type; + DICTIONARY *by_component; + DICTIONARY *by_classification; + DICTIONARY *by_recipient; + DICTIONARY *by_module; + } alerts; + + struct { + FTS_MATCH host_match; + char host_node_id_str[UUID_STR_LEN]; + SIMPLE_PATTERN *pattern; + FTS_INDEX fts; + } q; + + struct { + DICTIONARY *dict; // the result set + } functions; + + struct { + bool enabled; + bool relative; + time_t after; + time_t before; + } window; + + struct query_timings timings; +}; + +static void alert_counts_add(struct alert_counts *t, RRDCALC *rc) { + switch(rc->status) { + case RRDCALC_STATUS_CRITICAL: + t->critical++; + break; + + case RRDCALC_STATUS_WARNING: + t->warning++; + break; + + case RRDCALC_STATUS_CLEAR: + t->clear++; + break; + + case RRDCALC_STATUS_REMOVED: + case RRDCALC_STATUS_UNINITIALIZED: + break; + + case RRDCALC_STATUS_UNDEFINED: + default: + if(!netdata_double_isnumber(rc->value)) + t->error++; + + break; + } +} + +static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) { + t->instances++; + + alert_counts_add(&t->counts, rc); + + dictionary_set(t->nodes, rc->rrdset->rrdhost->machine_guid, NULL, 0); + + char key[UUID_STR_LEN + 1]; + uuid_unparse_lower(rc->config.hash_id, key); + dictionary_set(t->configs, key, NULL, 0); +} + +static void alerts_by_x_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + static STRING *silent = NULL; + if(unlikely(!silent)) silent = string_strdupz("silent"); + + struct alert_by_x_entry *b = value; + RRDCALC *rc = data; + if(!rc) { + // prototype + b->prototypes.available++; + } + else { + alert_counts_add(&b->running.counts, rc); + + b->running.total++; + + if (rc->config.recipient == silent) + b->running.silent++; + } +} + +static bool alerts_by_x_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value __maybe_unused, void *data __maybe_unused) { + alerts_by_x_insert_callback(item, old_value, data); + return false; +} + +static void alerts_v2_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + struct rrdcontext_to_json_v2_data *ctl = data; + struct alert_v2_entry *t = value; + RRDCALC *rc = t->tmp; + t->name = rc->config.name; + t->summary = rc->config.summary; // the original summary + t->context = rrdlabels_create(); + t->recipient = rrdlabels_create(); + t->classification = rrdlabels_create(); + t->component = rrdlabels_create(); + t->type = rrdlabels_create(); + if (string_strlen(rc->rrdset->context)) + rrdlabels_add(t->context, string2str(rc->rrdset->context), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.recipient)) + rrdlabels_add(t->recipient, string2str(rc->config.recipient), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.classification)) + rrdlabels_add(t->classification, string2str(rc->config.classification), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.component)) + rrdlabels_add(t->component, string2str(rc->config.component), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.type)) + rrdlabels_add(t->type, string2str(rc->config.type), "yes", RRDLABEL_SRC_AUTO); + t->ati = ctl->alerts.ati++; + + t->nodes = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_VALUE_LINK_DONT_CLONE|DICT_OPTION_NAME_LINK_DONT_CLONE); + t->configs = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_VALUE_LINK_DONT_CLONE|DICT_OPTION_NAME_LINK_DONT_CLONE); + + alerts_v2_add(t, rc); +} + +static bool alerts_v2_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) { + struct alert_v2_entry *t = old_value, *n = new_value; + RRDCALC *rc = n->tmp; + if (string_strlen(rc->rrdset->context)) + rrdlabels_add(t->context, string2str(rc->rrdset->context), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.recipient)) + rrdlabels_add(t->recipient, string2str(rc->config.recipient), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.classification)) + rrdlabels_add(t->classification, string2str(rc->config.classification), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.component)) + rrdlabels_add(t->component, string2str(rc->config.component), "yes", RRDLABEL_SRC_AUTO); + if (string_strlen(rc->config.type)) + rrdlabels_add(t->type, string2str(rc->config.type), "yes", RRDLABEL_SRC_AUTO); + alerts_v2_add(t, rc); + return true; +} + +static void alerts_v2_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct alert_v2_entry *t = value; + + rrdlabels_destroy(t->context); + rrdlabels_destroy(t->recipient); + rrdlabels_destroy(t->classification); + rrdlabels_destroy(t->component); + rrdlabels_destroy(t->type); + + dictionary_destroy(t->nodes); + dictionary_destroy(t->configs); +} + +static void alert_instances_v2_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + struct rrdcontext_to_json_v2_data *ctl = data; + struct sql_alert_instance_v2_entry *t = value; + RRDCALC *rc = t->tmp; + + t->context = rc->rrdset->context; + t->chart_id = rc->rrdset->id; + t->chart_name = rc->rrdset->name; + t->family = rc->rrdset->family; + t->units = rc->config.units; + t->classification = rc->config.classification; + t->type = rc->config.type; + t->recipient = rc->config.recipient; + t->component = rc->config.component; + t->name = rc->config.name; + t->source = rc->config.source; + t->status = rc->status; + t->flags = rc->run_flags; + t->info = rc->config.info; + t->summary = rc->summary; + t->value = rc->value; + t->last_updated = rc->last_updated; + t->last_status_change = rc->last_status_change; + t->last_status_change_value = rc->last_status_change_value; + t->host = rc->rrdset->rrdhost; + t->alarm_id = rc->id; + t->ni = ctl->nodes.ni; + + uuid_copy(t->config_hash_id, rc->config.hash_id); + health_alarm_log_get_global_id_and_transition_id_for_rrdcalc(rc, &t->global_id, &t->last_transition_id); +} + +static bool alert_instances_v2_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value __maybe_unused, void *new_value __maybe_unused, void *data __maybe_unused) { + internal_fatal(true, "This should never happen!"); + return true; +} + +static void alert_instances_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + ; +} + +static FTS_MATCH rrdcontext_to_json_v2_full_text_search(struct rrdcontext_to_json_v2_data *ctl, RRDCONTEXT *rc, SIMPLE_PATTERN *q) { + if(unlikely(full_text_search_string(&ctl->q.fts, q, rc->id) || + full_text_search_string(&ctl->q.fts, q, rc->family))) + return FTS_MATCHED_CONTEXT; + + if(unlikely(full_text_search_string(&ctl->q.fts, q, rc->title))) + return FTS_MATCHED_TITLE; + + if(unlikely(full_text_search_string(&ctl->q.fts, q, rc->units))) + return FTS_MATCHED_UNITS; + + FTS_MATCH matched = FTS_MATCHED_NONE; + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(matched) break; + + if(ctl->window.enabled && !query_matches_retention(ctl->window.after, ctl->window.before, ri->first_time_s, (ri->flags & RRD_FLAG_COLLECTED) ? ctl->now : ri->last_time_s, 0)) + continue; + + if(unlikely(full_text_search_string(&ctl->q.fts, q, ri->id)) || + (ri->name != ri->id && full_text_search_string(&ctl->q.fts, q, ri->name))) { + matched = FTS_MATCHED_INSTANCE; + break; + } + + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + if(ctl->window.enabled && !query_matches_retention(ctl->window.after, ctl->window.before, rm->first_time_s, (rm->flags & RRD_FLAG_COLLECTED) ? ctl->now : rm->last_time_s, 0)) + continue; + + if(unlikely(full_text_search_string(&ctl->q.fts, q, rm->id)) || + (rm->name != rm->id && full_text_search_string(&ctl->q.fts, q, rm->name))) { + matched = FTS_MATCHED_DIMENSION; + break; + } + } + dfe_done(rm); + + size_t label_searches = 0; + if(unlikely(ri->rrdlabels && rrdlabels_entries(ri->rrdlabels) && + rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, q, ':', &label_searches) == SP_MATCHED_POSITIVE)) { + ctl->q.fts.searches += label_searches; + ctl->q.fts.char_searches += label_searches; + matched = FTS_MATCHED_LABEL; + break; + } + ctl->q.fts.searches += label_searches; + ctl->q.fts.char_searches += label_searches; + + if(ri->rrdset) { + RRDSET *st = ri->rrdset; + rw_spinlock_read_lock(&st->alerts.spinlock); + for (RRDCALC *rcl = st->alerts.base; rcl; rcl = rcl->next) { + if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->config.name))) { + matched = FTS_MATCHED_ALERT; + break; + } + + if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->config.info))) { + matched = FTS_MATCHED_ALERT_INFO; + break; + } + } + rw_spinlock_read_unlock(&st->alerts.spinlock); + } + } + dfe_done(ri); + return matched; +} + +static bool rrdcontext_matches_alert(struct rrdcontext_to_json_v2_data *ctl, RRDCONTEXT *rc) { + size_t matches = 0; + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(ri->rrdset) { + RRDSET *st = ri->rrdset; + rw_spinlock_read_lock(&st->alerts.spinlock); + for (RRDCALC *rcl = st->alerts.base; rcl; rcl = rcl->next) { + if(ctl->alerts.alert_name_pattern && !simple_pattern_matches_string(ctl->alerts.alert_name_pattern, rcl->config.name)) + continue; + + if(ctl->alerts.alarm_id_filter && ctl->alerts.alarm_id_filter != rcl->id) + continue; + + size_t m = ctl->request->alerts.status & CONTEXTS_V2_ALERT_STATUSES ? 0 : 1; + + if (!m) { + if ((ctl->request->alerts.status & CONTEXT_V2_ALERT_UNINITIALIZED) && + rcl->status == RRDCALC_STATUS_UNINITIALIZED) + m++; + + if ((ctl->request->alerts.status & CONTEXT_V2_ALERT_UNDEFINED) && + rcl->status == RRDCALC_STATUS_UNDEFINED) + m++; + + if ((ctl->request->alerts.status & CONTEXT_V2_ALERT_CLEAR) && + rcl->status == RRDCALC_STATUS_CLEAR) + m++; + + if ((ctl->request->alerts.status & CONTEXT_V2_ALERT_RAISED) && + rcl->status >= RRDCALC_STATUS_RAISED) + m++; + + if ((ctl->request->alerts.status & CONTEXT_V2_ALERT_WARNING) && + rcl->status == RRDCALC_STATUS_WARNING) + m++; + + if ((ctl->request->alerts.status & CONTEXT_V2_ALERT_CRITICAL) && + rcl->status == RRDCALC_STATUS_CRITICAL) + m++; + + if(!m) + continue; + } + + struct alert_v2_entry t = { + .tmp = rcl, + }; + struct alert_v2_entry *a2e = + dictionary_set(ctl->alerts.summary, string2str(rcl->config.name), + &t, sizeof(struct alert_v2_entry)); + size_t ati = a2e->ati; + matches++; + + dictionary_set_advanced(ctl->alerts.by_type, + string2str(rcl->config.type), + (ssize_t)string_strlen(rcl->config.type), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + dictionary_set_advanced(ctl->alerts.by_component, + string2str(rcl->config.component), + (ssize_t)string_strlen(rcl->config.component), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + dictionary_set_advanced(ctl->alerts.by_classification, + string2str(rcl->config.classification), + (ssize_t)string_strlen(rcl->config.classification), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + dictionary_set_advanced(ctl->alerts.by_recipient, + string2str(rcl->config.recipient), + (ssize_t)string_strlen(rcl->config.recipient), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + char *module = NULL; + rrdlabels_get_value_strdup_or_null(st->rrdlabels, &module, "_collect_module"); + if(!module || !*module) module = "[unset]"; + + dictionary_set_advanced(ctl->alerts.by_module, + module, + -1, + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + if (ctl->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) { + char key[20 + 1]; + snprintfz(key, sizeof(key) - 1, "%p", rcl); + + struct sql_alert_instance_v2_entry z = { + .ati = ati, + .tmp = rcl, + }; + dictionary_set(ctl->alerts.alert_instances, key, &z, sizeof(z)); + } + } + rw_spinlock_read_unlock(&st->alerts.spinlock); + } + } + dfe_done(ri); + + return matches != 0; +} + + +static ssize_t rrdcontext_to_json_v2_add_context(void *data, RRDCONTEXT_ACQUIRED *rca, bool queryable_context __maybe_unused) { + struct rrdcontext_to_json_v2_data *ctl = data; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + if(ctl->window.enabled && !query_matches_retention(ctl->window.after, ctl->window.before, rc->first_time_s, (rc->flags & RRD_FLAG_COLLECTED) ? ctl->now : rc->last_time_s, 0)) + return 0; // continue to next context + + FTS_MATCH match = ctl->q.host_match; + if((ctl->mode & CONTEXTS_V2_SEARCH) && ctl->q.pattern) { + match = rrdcontext_to_json_v2_full_text_search(ctl, rc, ctl->q.pattern); + + if(match == FTS_MATCHED_NONE) + return 0; // continue to next context + } + + if(ctl->mode & CONTEXTS_V2_ALERTS) { + if(!rrdcontext_matches_alert(ctl, rc)) + return 0; // continue to next context + } + + if(ctl->contexts.dict) { + struct context_v2_entry t = { + .count = 1, + .id = rc->id, + .family = string_dup(rc->family), + .priority = rc->priority, + .first_time_s = rc->first_time_s, + .last_time_s = rc->last_time_s, + .flags = rc->flags, + .match = match, + }; + + dictionary_set(ctl->contexts.dict, string2str(rc->id), &t, sizeof(struct context_v2_entry)); + } + + return 1; +} + +void buffer_json_agent_status_id(BUFFER *wb, size_t ai, usec_t duration_ut) { + buffer_json_member_add_object(wb, "st"); + { + buffer_json_member_add_uint64(wb, "ai", ai); + buffer_json_member_add_uint64(wb, "code", 200); + buffer_json_member_add_string(wb, "msg", ""); + if (duration_ut) + buffer_json_member_add_double(wb, "ms", (NETDATA_DOUBLE) duration_ut / 1000.0); + } + buffer_json_object_close(wb); +} + +void buffer_json_node_add_v2(BUFFER *wb, RRDHOST *host, size_t ni, usec_t duration_ut, bool status) { + buffer_json_member_add_string(wb, "mg", host->machine_guid); + + if(host->node_id) + buffer_json_member_add_uuid(wb, "nd", host->node_id); + buffer_json_member_add_string(wb, "nm", rrdhost_hostname(host)); + buffer_json_member_add_uint64(wb, "ni", ni); + + if(status) + buffer_json_agent_status_id(wb, 0, duration_ut); +} + +static void rrdhost_receiver_to_json(BUFFER *wb, RRDHOST_STATUS *s, const char *key) { + buffer_json_member_add_object(wb, key); + { + buffer_json_member_add_uint64(wb, "id", s->ingest.id); + buffer_json_member_add_uint64(wb, "hops", s->ingest.hops); + buffer_json_member_add_string(wb, "type", rrdhost_ingest_type_to_string(s->ingest.type)); + buffer_json_member_add_string(wb, "status", rrdhost_ingest_status_to_string(s->ingest.status)); + buffer_json_member_add_time_t(wb, "since", s->ingest.since); + buffer_json_member_add_time_t(wb, "age", s->now - s->ingest.since); + + if(s->ingest.type == RRDHOST_INGEST_TYPE_CHILD) { + if(s->ingest.status == RRDHOST_INGEST_STATUS_OFFLINE) + buffer_json_member_add_string(wb, "reason", stream_handshake_error_to_string(s->ingest.reason)); + + if(s->ingest.status == RRDHOST_INGEST_STATUS_REPLICATING) { + buffer_json_member_add_object(wb, "replication"); + { + buffer_json_member_add_boolean(wb, "in_progress", s->ingest.replication.in_progress); + buffer_json_member_add_double(wb, "completion", s->ingest.replication.completion); + buffer_json_member_add_uint64(wb, "instances", s->ingest.replication.instances); + } + buffer_json_object_close(wb); // replication + } + + if(s->ingest.status == RRDHOST_INGEST_STATUS_REPLICATING || s->ingest.status == RRDHOST_INGEST_STATUS_ONLINE) { + buffer_json_member_add_object(wb, "source"); + { + char buf[1024 + 1]; + snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->ingest.peers.local.ip, s->ingest.peers.local.port, s->ingest.ssl ? ":SSL" : ""); + buffer_json_member_add_string(wb, "local", buf); + + snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->ingest.peers.peer.ip, s->ingest.peers.peer.port, s->ingest.ssl ? ":SSL" : ""); + buffer_json_member_add_string(wb, "remote", buf); + + stream_capabilities_to_json_array(wb, s->ingest.capabilities, "capabilities"); + } + buffer_json_object_close(wb); // source + } + } + } + buffer_json_object_close(wb); // collection +} + +static void rrdhost_sender_to_json(BUFFER *wb, RRDHOST_STATUS *s, const char *key) { + if(s->stream.status == RRDHOST_STREAM_STATUS_DISABLED) + return; + + buffer_json_member_add_object(wb, key); + { + buffer_json_member_add_uint64(wb, "id", s->stream.id); + buffer_json_member_add_uint64(wb, "hops", s->stream.hops); + buffer_json_member_add_string(wb, "status", rrdhost_streaming_status_to_string(s->stream.status)); + buffer_json_member_add_time_t(wb, "since", s->stream.since); + buffer_json_member_add_time_t(wb, "age", s->now - s->stream.since); + + if (s->stream.status == RRDHOST_STREAM_STATUS_OFFLINE) + buffer_json_member_add_string(wb, "reason", stream_handshake_error_to_string(s->stream.reason)); + + if (s->stream.status == RRDHOST_STREAM_STATUS_REPLICATING) { + buffer_json_member_add_object(wb, "replication"); + { + buffer_json_member_add_boolean(wb, "in_progress", s->stream.replication.in_progress); + buffer_json_member_add_double(wb, "completion", s->stream.replication.completion); + buffer_json_member_add_uint64(wb, "instances", s->stream.replication.instances); + } + buffer_json_object_close(wb); + } + + buffer_json_member_add_object(wb, "destination"); + { + char buf[1024 + 1]; + snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->stream.peers.local.ip, s->stream.peers.local.port, s->stream.ssl ? ":SSL" : ""); + buffer_json_member_add_string(wb, "local", buf); + + snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->stream.peers.peer.ip, s->stream.peers.peer.port, s->stream.ssl ? ":SSL" : ""); + buffer_json_member_add_string(wb, "remote", buf); + + stream_capabilities_to_json_array(wb, s->stream.capabilities, "capabilities"); + + buffer_json_member_add_object(wb, "traffic"); + { + buffer_json_member_add_boolean(wb, "compression", s->stream.compression); + buffer_json_member_add_uint64(wb, "data", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_DATA]); + buffer_json_member_add_uint64(wb, "metadata", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_METADATA]); + buffer_json_member_add_uint64(wb, "functions", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_FUNCTIONS]); + buffer_json_member_add_uint64(wb, "replication", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_REPLICATION]); + buffer_json_member_add_uint64(wb, "dyncfg", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_DYNCFG]); + } + buffer_json_object_close(wb); // traffic + + buffer_json_member_add_array(wb, "candidates"); + struct rrdpush_destinations *d; + for (d = s->host->destinations; d; d = d->next) { + buffer_json_add_array_item_object(wb); + buffer_json_member_add_uint64(wb, "attempts", d->attempts); + { + + if (d->ssl) { + snprintfz(buf, sizeof(buf) - 1, "%s:SSL", string2str(d->destination)); + buffer_json_member_add_string(wb, "destination", buf); + } + else + buffer_json_member_add_string(wb, "destination", string2str(d->destination)); + + buffer_json_member_add_time_t(wb, "since", d->since); + buffer_json_member_add_time_t(wb, "age", s->now - d->since); + buffer_json_member_add_string(wb, "last_handshake", stream_handshake_error_to_string(d->reason)); + if(d->postpone_reconnection_until > s->now) { + buffer_json_member_add_time_t(wb, "next_check", d->postpone_reconnection_until); + buffer_json_member_add_time_t(wb, "next_in", d->postpone_reconnection_until - s->now); + } + } + buffer_json_object_close(wb); // each candidate + } + buffer_json_array_close(wb); // candidates + } + buffer_json_object_close(wb); // destination + } + buffer_json_object_close(wb); // streaming +} + +static void agent_capabilities_to_json(BUFFER *wb, RRDHOST *host, const char *key) { + buffer_json_member_add_array(wb, key); + + struct capability *capas = aclk_get_node_instance_capas(host); + for(struct capability *capa = capas; capa->name ;capa++) { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "name", capa->name); + buffer_json_member_add_uint64(wb, "version", capa->version); + buffer_json_member_add_boolean(wb, "enabled", capa->enabled); + } + buffer_json_object_close(wb); + } + buffer_json_array_close(wb); + freez(capas); +} + +static inline void host_dyncfg_to_json_v2(BUFFER *wb, const char *key, RRDHOST_STATUS *s) { + buffer_json_member_add_object(wb, key); + { + buffer_json_member_add_string(wb, "status", rrdhost_dyncfg_status_to_string(s->dyncfg.status)); + } + buffer_json_object_close(wb); // health + +} + +static inline void rrdhost_health_to_json_v2(BUFFER *wb, const char *key, RRDHOST_STATUS *s) { + buffer_json_member_add_object(wb, key); + { + buffer_json_member_add_string(wb, "status", rrdhost_health_status_to_string(s->health.status)); + if (s->health.status == RRDHOST_HEALTH_STATUS_RUNNING) { + buffer_json_member_add_object(wb, "alerts"); + { + buffer_json_member_add_uint64(wb, "critical", s->health.alerts.critical); + buffer_json_member_add_uint64(wb, "warning", s->health.alerts.warning); + buffer_json_member_add_uint64(wb, "clear", s->health.alerts.clear); + buffer_json_member_add_uint64(wb, "undefined", s->health.alerts.undefined); + buffer_json_member_add_uint64(wb, "uninitialized", s->health.alerts.uninitialized); + } + buffer_json_object_close(wb); // alerts + } + } + buffer_json_object_close(wb); // health +} + +static void rrdcontext_to_json_v2_rrdhost(BUFFER *wb, RRDHOST *host, struct rrdcontext_to_json_v2_data *ctl, size_t node_id) { + buffer_json_add_array_item_object(wb); // this node + buffer_json_node_add_v2(wb, host, node_id, 0, + (ctl->mode & CONTEXTS_V2_AGENTS) && !(ctl->mode & CONTEXTS_V2_NODE_INSTANCES)); + + if(ctl->mode & (CONTEXTS_V2_NODES_INFO | CONTEXTS_V2_NODE_INSTANCES)) { + RRDHOST_STATUS s; + rrdhost_status(host, ctl->now, &s); + + if (ctl->mode & (CONTEXTS_V2_NODES_INFO)) { + buffer_json_member_add_string(wb, "v", rrdhost_program_version(host)); + + host_labels2json(host, wb, "labels"); + + if (host->system_info) { + buffer_json_member_add_object(wb, "hw"); + { + buffer_json_member_add_string_or_empty(wb, "architecture", host->system_info->architecture); + buffer_json_member_add_string_or_empty(wb, "cpu_frequency", host->system_info->host_cpu_freq); + buffer_json_member_add_string_or_empty(wb, "cpus", host->system_info->host_cores); + buffer_json_member_add_string_or_empty(wb, "memory", host->system_info->host_ram_total); + buffer_json_member_add_string_or_empty(wb, "disk_space", host->system_info->host_disk_space); + buffer_json_member_add_string_or_empty(wb, "virtualization", host->system_info->virtualization); + buffer_json_member_add_string_or_empty(wb, "container", host->system_info->container); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "os"); + { + buffer_json_member_add_string_or_empty(wb, "id", host->system_info->host_os_id); + buffer_json_member_add_string_or_empty(wb, "nm", host->system_info->host_os_name); + buffer_json_member_add_string_or_empty(wb, "v", host->system_info->host_os_version); + buffer_json_member_add_object(wb, "kernel"); + buffer_json_member_add_string_or_empty(wb, "nm", host->system_info->kernel_name); + buffer_json_member_add_string_or_empty(wb, "v", host->system_info->kernel_version); + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); + } + + // created - the node is created but never connected to cloud + // unreachable - not currently connected + // stale - connected but not having live data + // reachable - connected with live data + // pruned - not connected for some time and has been removed + buffer_json_member_add_string(wb, "state", rrdhost_state_cloud_emulation(host) ? "reachable" : "stale"); + + rrdhost_health_to_json_v2(wb, "health", &s); + agent_capabilities_to_json(wb, host, "capabilities"); + } + + if (ctl->mode & (CONTEXTS_V2_NODE_INSTANCES)) { + buffer_json_member_add_array(wb, "instances"); + buffer_json_add_array_item_object(wb); // this instance + { + buffer_json_agent_status_id(wb, 0, 0); + + buffer_json_member_add_object(wb, "db"); + { + buffer_json_member_add_string(wb, "status", rrdhost_db_status_to_string(s.db.status)); + buffer_json_member_add_string(wb, "liveness", rrdhost_db_liveness_to_string(s.db.liveness)); + buffer_json_member_add_string(wb, "mode", rrd_memory_mode_name(s.db.mode)); + buffer_json_member_add_time_t(wb, "first_time", s.db.first_time_s); + buffer_json_member_add_time_t(wb, "last_time", s.db.last_time_s); + buffer_json_member_add_uint64(wb, "metrics", s.db.metrics); + buffer_json_member_add_uint64(wb, "instances", s.db.instances); + buffer_json_member_add_uint64(wb, "contexts", s.db.contexts); + } + buffer_json_object_close(wb); + + rrdhost_receiver_to_json(wb, &s, "ingest"); + rrdhost_sender_to_json(wb, &s, "stream"); + + buffer_json_member_add_object(wb, "ml"); + buffer_json_member_add_string(wb, "status", rrdhost_ml_status_to_string(s.ml.status)); + buffer_json_member_add_string(wb, "type", rrdhost_ml_type_to_string(s.ml.type)); + if (s.ml.status == RRDHOST_ML_STATUS_RUNNING) { + buffer_json_member_add_object(wb, "metrics"); + { + buffer_json_member_add_uint64(wb, "anomalous", s.ml.metrics.anomalous); + buffer_json_member_add_uint64(wb, "normal", s.ml.metrics.normal); + buffer_json_member_add_uint64(wb, "trained", s.ml.metrics.trained); + buffer_json_member_add_uint64(wb, "pending", s.ml.metrics.pending); + buffer_json_member_add_uint64(wb, "silenced", s.ml.metrics.silenced); + } + buffer_json_object_close(wb); // metrics + } + buffer_json_object_close(wb); // ml + + rrdhost_health_to_json_v2(wb, "health", &s); + + host_functions2json(host, wb); // functions + agent_capabilities_to_json(wb, host, "capabilities"); + + host_dyncfg_to_json_v2(wb, "dyncfg", &s); + } + buffer_json_object_close(wb); // this instance + buffer_json_array_close(wb); // instances + } + } + buffer_json_object_close(wb); // this node +} + +static ssize_t rrdcontext_to_json_v2_add_host(void *data, RRDHOST *host, bool queryable_host) { + if(!queryable_host || !host->rrdctx.contexts) + // the host matches the 'scope_host' but does not match the 'host' patterns + // or the host does not have any contexts + return 0; // continue to next host + + struct rrdcontext_to_json_v2_data *ctl = data; + + if(ctl->window.enabled && !rrdhost_matches_window(host, ctl->window.after, ctl->window.before, ctl->now)) + // the host does not have data in the requested window + return 0; // continue to next host + + if(ctl->request->timeout_ms && now_monotonic_usec() > ctl->timings.received_ut + ctl->request->timeout_ms * USEC_PER_MS) + // timed out + return -2; // stop the query + + if(ctl->request->interrupt_callback && ctl->request->interrupt_callback(ctl->request->interrupt_callback_data)) + // interrupted + return -1; // stop the query + + bool host_matched = (ctl->mode & CONTEXTS_V2_NODES); + bool do_contexts = (ctl->mode & (CONTEXTS_V2_CONTEXTS | CONTEXTS_V2_ALERTS)); + + ctl->q.host_match = FTS_MATCHED_NONE; + if((ctl->mode & CONTEXTS_V2_SEARCH)) { + // check if we match the host itself + if(ctl->q.pattern && ( + full_text_search_string(&ctl->q.fts, ctl->q.pattern, host->hostname) || + full_text_search_char(&ctl->q.fts, ctl->q.pattern, host->machine_guid) || + (ctl->q.pattern && full_text_search_char(&ctl->q.fts, ctl->q.pattern, ctl->q.host_node_id_str)))) { + ctl->q.host_match = FTS_MATCHED_HOST; + do_contexts = true; + } + } + + if(do_contexts) { + // save it + SIMPLE_PATTERN *old_q = ctl->q.pattern; + + if(ctl->q.host_match == FTS_MATCHED_HOST) + // do not do pattern matching on contexts - we matched the host itself + ctl->q.pattern = NULL; + + ssize_t added = query_scope_foreach_context( + host, ctl->request->scope_contexts, + ctl->contexts.scope_pattern, ctl->contexts.pattern, + rrdcontext_to_json_v2_add_context, queryable_host, ctl); + + // restore it + ctl->q.pattern = old_q; + + if(unlikely(added < 0)) + return -1; // stop the query + + if(added) + host_matched = true; + } + + if(!host_matched) + return 0; + + if(ctl->mode & CONTEXTS_V2_FUNCTIONS) { + struct function_v2_entry t = { + .used = 1, + .size = 1, + .node_ids = &ctl->nodes.ni, + .help = NULL, + .tags = NULL, + .access = HTTP_ACCESS_ALL, + .priority = RRDFUNCTIONS_PRIORITY_DEFAULT, + }; + host_functions_to_dict(host, ctl->functions.dict, &t, sizeof(t), &t.help, &t.tags, &t.access, &t.priority); + } + + if(ctl->mode & CONTEXTS_V2_NODES) { + struct contexts_v2_node t = { + .ni = ctl->nodes.ni++, + .host = host, + }; + + dictionary_set(ctl->nodes.dict, host->machine_guid, &t, sizeof(struct contexts_v2_node)); + } + + return 1; +} + +static void buffer_json_contexts_v2_mode_to_array(BUFFER *wb, const char *key, CONTEXTS_V2_MODE mode) { + buffer_json_member_add_array(wb, key); + + if(mode & CONTEXTS_V2_VERSIONS) + buffer_json_add_array_item_string(wb, "versions"); + + if(mode & CONTEXTS_V2_AGENTS) + buffer_json_add_array_item_string(wb, "agents"); + + if(mode & CONTEXTS_V2_AGENTS_INFO) + buffer_json_add_array_item_string(wb, "agents-info"); + + if(mode & CONTEXTS_V2_NODES) + buffer_json_add_array_item_string(wb, "nodes"); + + if(mode & CONTEXTS_V2_NODES_INFO) + buffer_json_add_array_item_string(wb, "nodes-info"); + + if(mode & CONTEXTS_V2_NODE_INSTANCES) + buffer_json_add_array_item_string(wb, "nodes-instances"); + + if(mode & CONTEXTS_V2_CONTEXTS) + buffer_json_add_array_item_string(wb, "contexts"); + + if(mode & CONTEXTS_V2_SEARCH) + buffer_json_add_array_item_string(wb, "search"); + + if(mode & CONTEXTS_V2_ALERTS) + buffer_json_add_array_item_string(wb, "alerts"); + + if(mode & CONTEXTS_V2_ALERT_TRANSITIONS) + buffer_json_add_array_item_string(wb, "alert_transitions"); + + buffer_json_array_close(wb); +} + +void buffer_json_query_timings(BUFFER *wb, const char *key, struct query_timings *timings) { + timings->finished_ut = now_monotonic_usec(); + if(!timings->executed_ut) + timings->executed_ut = timings->finished_ut; + if(!timings->preprocessed_ut) + timings->preprocessed_ut = timings->received_ut; + buffer_json_member_add_object(wb, key); + buffer_json_member_add_double(wb, "prep_ms", (NETDATA_DOUBLE)(timings->preprocessed_ut - timings->received_ut) / USEC_PER_MS); + buffer_json_member_add_double(wb, "query_ms", (NETDATA_DOUBLE)(timings->executed_ut - timings->preprocessed_ut) / USEC_PER_MS); + buffer_json_member_add_double(wb, "output_ms", (NETDATA_DOUBLE)(timings->finished_ut - timings->executed_ut) / USEC_PER_MS); + buffer_json_member_add_double(wb, "total_ms", (NETDATA_DOUBLE)(timings->finished_ut - timings->received_ut) / USEC_PER_MS); + buffer_json_member_add_double(wb, "cloud_ms", (NETDATA_DOUBLE)(timings->finished_ut - timings->received_ut) / USEC_PER_MS); + buffer_json_object_close(wb); +} + +void build_info_to_json_object(BUFFER *b); + +static void convert_seconds_to_dhms(time_t seconds, char *result, int result_size) { + int days, hours, minutes; + + days = (int) (seconds / (24 * 3600)); + seconds = (int) (seconds % (24 * 3600)); + hours = (int) (seconds / 3600); + seconds %= 3600; + minutes = (int) (seconds / 60); + seconds %= 60; + + // Format the result into the provided string buffer + BUFFER *buf = buffer_create(128, NULL); + if (days) + buffer_sprintf(buf,"%d day%s%s", days, days==1 ? "" : "s", hours || minutes ? ", " : ""); + if (hours) + buffer_sprintf(buf,"%d hour%s%s", hours, hours==1 ? "" : "s", minutes ? ", " : ""); + if (minutes) + buffer_sprintf(buf,"%d minute%s%s", minutes, minutes==1 ? "" : "s", seconds ? ", " : ""); + if (seconds) + buffer_sprintf(buf,"%d second%s", (int) seconds, seconds==1 ? "" : "s"); + strncpyz(result, buffer_tostring(buf), result_size); + buffer_free(buf); +} + +void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now_s, bool info, bool array) { + if(!now_s) + now_s = now_realtime_sec(); + + if(array) { + buffer_json_member_add_array(wb, "agents"); + buffer_json_add_array_item_object(wb); + } + else + buffer_json_member_add_object(wb, "agent"); + + buffer_json_member_add_string(wb, "mg", localhost->machine_guid); + buffer_json_member_add_uuid(wb, "nd", localhost->node_id); + buffer_json_member_add_string(wb, "nm", rrdhost_hostname(localhost)); + buffer_json_member_add_time_t(wb, "now", now_s); + + if(array) + buffer_json_member_add_uint64(wb, "ai", 0); + + if(info) { + buffer_json_member_add_object(wb, "application"); + build_info_to_json_object(wb); + buffer_json_object_close(wb); // netdata + + buffer_json_cloud_status(wb, now_s); + + buffer_json_member_add_array(wb, "db_size"); + size_t group_seconds = localhost->rrd_update_every; + for (size_t tier = 0; tier < storage_tiers; tier++) { + STORAGE_ENGINE *eng = localhost->db[tier].eng; + if (!eng) continue; + + group_seconds *= storage_tiers_grouping_iterations[tier]; + uint64_t max = storage_engine_disk_space_max(eng->seb, localhost->db[tier].si); + uint64_t used = storage_engine_disk_space_used(eng->seb, localhost->db[tier].si); +#ifdef ENABLE_DBENGINE + if (!max && eng->seb == STORAGE_ENGINE_BACKEND_DBENGINE) { + max = get_directory_free_bytes_space(multidb_ctx[tier]); + max += used; + } +#endif + time_t first_time_s = storage_engine_global_first_time_s(eng->seb, localhost->db[tier].si); + size_t currently_collected_metrics = storage_engine_collected_metrics(eng->seb, localhost->db[tier].si); + + NETDATA_DOUBLE percent; + if (used && max) + percent = (NETDATA_DOUBLE) used * 100.0 / (NETDATA_DOUBLE) max; + else + percent = 0.0; + + buffer_json_add_array_item_object(wb); + buffer_json_member_add_uint64(wb, "tier", tier); + char human_retention[128]; + convert_seconds_to_dhms((time_t) group_seconds, human_retention, sizeof(human_retention) - 1); + buffer_json_member_add_string(wb, "point_every", human_retention); + + buffer_json_member_add_uint64(wb, "metrics", storage_engine_metrics(eng->seb, localhost->db[tier].si)); + buffer_json_member_add_uint64(wb, "samples", storage_engine_samples(eng->seb, localhost->db[tier].si)); + + if(used || max) { + buffer_json_member_add_uint64(wb, "disk_used", used); + buffer_json_member_add_uint64(wb, "disk_max", max); + buffer_json_member_add_double(wb, "disk_percent", percent); + } + + if(first_time_s) { + time_t retention = now_s - first_time_s; + + buffer_json_member_add_time_t(wb, "from", first_time_s); + buffer_json_member_add_time_t(wb, "to", now_s); + buffer_json_member_add_time_t(wb, "retention", retention); + + convert_seconds_to_dhms(retention, human_retention, sizeof(human_retention) - 1); + buffer_json_member_add_string(wb, "retention_human", human_retention); + + if(used || max) { // we have disk space information + time_t time_retention = 0; +#ifdef ENABLE_DBENGINE + time_retention = multidb_ctx[tier]->config.max_retention_s; +#endif + time_t space_retention = (time_t)((NETDATA_DOUBLE)(now_s - first_time_s) * 100.0 / percent); + time_t actual_retention = MIN(space_retention, time_retention ? time_retention : space_retention); + + if (time_retention) { + convert_seconds_to_dhms(time_retention, human_retention, sizeof(human_retention) - 1); + buffer_json_member_add_time_t(wb, "requested_retention", time_retention); + buffer_json_member_add_string(wb, "requested_retention_human", human_retention); + } + + convert_seconds_to_dhms(actual_retention, human_retention, sizeof(human_retention) - 1); + buffer_json_member_add_time_t(wb, "expected_retention", actual_retention); + buffer_json_member_add_string(wb, "expected_retention_human", human_retention); + } + } + + if(currently_collected_metrics) + buffer_json_member_add_uint64(wb, "currently_collected_metrics", currently_collected_metrics); + + buffer_json_object_close(wb); + } + buffer_json_array_close(wb); // db_size + } + + if(timings) + buffer_json_query_timings(wb, "timings", timings); + + buffer_json_object_close(wb); + + if(array) + buffer_json_array_close(wb); +} + +void buffer_json_cloud_timings(BUFFER *wb, const char *key, struct query_timings *timings) { + if(!timings->finished_ut) + timings->finished_ut = now_monotonic_usec(); + + buffer_json_member_add_object(wb, key); + buffer_json_member_add_double(wb, "routing_ms", 0.0); + buffer_json_member_add_double(wb, "node_max_ms", 0.0); + buffer_json_member_add_double(wb, "total_ms", (NETDATA_DOUBLE)(timings->finished_ut - timings->received_ut) / USEC_PER_MS); + buffer_json_object_close(wb); +} + +static void functions_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct function_v2_entry *t = value; + + // it is initialized with a static reference - we need to mallocz() the array + size_t *v = t->node_ids; + t->node_ids = mallocz(sizeof(size_t)); + *t->node_ids = *v; + t->size = 1; + t->used = 1; +} + +static bool functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) { + struct function_v2_entry *t = old_value, *n = new_value; + size_t *v = n->node_ids; + + if(t->used >= t->size) { + t->node_ids = reallocz(t->node_ids, t->size * 2 * sizeof(size_t)); + t->size *= 2; + } + + t->node_ids[t->used++] = *v; + + return true; +} + +static void functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct function_v2_entry *t = value; + freez(t->node_ids); +} + +static bool contexts_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) { + struct context_v2_entry *o = old_value; + struct context_v2_entry *n = new_value; + + o->count++; + + if(o->family != n->family) { + if((o->flags & RRD_FLAG_COLLECTED) && !(n->flags & RRD_FLAG_COLLECTED)) + // keep old + ; + else if(!(o->flags & RRD_FLAG_COLLECTED) && (n->flags & RRD_FLAG_COLLECTED)) { + // keep new + string_freez(o->family); + o->family = string_dup(n->family); + } + else { + // merge + STRING *old_family = o->family; + o->family = string_2way_merge(o->family, n->family); + string_freez(old_family); + } + } + + if(o->priority != n->priority) { + if((o->flags & RRD_FLAG_COLLECTED) && !(n->flags & RRD_FLAG_COLLECTED)) + // keep o + ; + else if(!(o->flags & RRD_FLAG_COLLECTED) && (n->flags & RRD_FLAG_COLLECTED)) + // keep n + o->priority = n->priority; + else + // keep the min + o->priority = MIN(o->priority, n->priority); + } + + if(o->first_time_s && n->first_time_s) + o->first_time_s = MIN(o->first_time_s, n->first_time_s); + else if(!o->first_time_s) + o->first_time_s = n->first_time_s; + + if(o->last_time_s && n->last_time_s) + o->last_time_s = MAX(o->last_time_s, n->last_time_s); + else if(!o->last_time_s) + o->last_time_s = n->last_time_s; + + o->flags |= n->flags; + o->match = MIN(o->match, n->match); + + string_freez(n->family); + + return true; +} + +static void contexts_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct context_v2_entry *z = value; + string_freez(z->family); +} + +static void rrdcontext_v2_set_transition_filter(const char *machine_guid, const char *context, time_t alarm_id, void *data) { + struct rrdcontext_to_json_v2_data *ctl = data; + + if(machine_guid && *machine_guid) { + if(ctl->nodes.scope_pattern) + simple_pattern_free(ctl->nodes.scope_pattern); + + if(ctl->nodes.pattern) + simple_pattern_free(ctl->nodes.pattern); + + ctl->nodes.scope_pattern = string_to_simple_pattern(machine_guid); + ctl->nodes.pattern = NULL; + } + + if(context && *context) { + if(ctl->contexts.scope_pattern) + simple_pattern_free(ctl->contexts.scope_pattern); + + if(ctl->contexts.pattern) + simple_pattern_free(ctl->contexts.pattern); + + ctl->contexts.scope_pattern = string_to_simple_pattern(context); + ctl->contexts.pattern = NULL; + } + + ctl->alerts.alarm_id_filter = alarm_id; +} + +struct alert_instances_callback_data { + BUFFER *wb; + struct rrdcontext_to_json_v2_data *ctl; + bool debug; +}; + +static void contexts_v2_alert_config_to_json_from_sql_alert_config_data(struct sql_alert_config_data *t, void *data) { + struct alert_transitions_callback_data *d = data; + BUFFER *wb = d->wb; + bool debug = d->debug; + d->configs_added++; + + if(d->only_one_config) + buffer_json_add_array_item_object(wb); // alert config + + { + buffer_json_member_add_string(wb, "name", t->name); + buffer_json_member_add_uuid(wb, "config_hash_id", t->config_hash_id); + + buffer_json_member_add_object(wb, "selectors"); + { + bool is_template = t->selectors.on_template && *t->selectors.on_template ? true : false; + buffer_json_member_add_string(wb, "type", is_template ? "template" : "alarm"); + buffer_json_member_add_string(wb, "on", is_template ? t->selectors.on_template : t->selectors.on_key); + + buffer_json_member_add_string(wb, "families", t->selectors.families); + buffer_json_member_add_string(wb, "host_labels", t->selectors.host_labels); + buffer_json_member_add_string(wb, "chart_labels", t->selectors.chart_labels); + } + buffer_json_object_close(wb); // selectors + + buffer_json_member_add_object(wb, "value"); // value + { + // buffer_json_member_add_string(wb, "every", t->value.every); // does not exist in Netdata Cloud + buffer_json_member_add_string(wb, "units", t->value.units); + buffer_json_member_add_uint64(wb, "update_every", t->value.update_every); + + if (t->value.db.after || debug) { + buffer_json_member_add_object(wb, "db"); + { + // buffer_json_member_add_string(wb, "lookup", t->value.db.lookup); // does not exist in Netdata Cloud + + buffer_json_member_add_time_t(wb, "after", t->value.db.after); + buffer_json_member_add_time_t(wb, "before", t->value.db.before); + buffer_json_member_add_string(wb, "time_group_condition", alerts_group_conditions_id2txt(t->value.db.time_group_condition)); + buffer_json_member_add_double(wb, "time_group_value", t->value.db.time_group_value); + buffer_json_member_add_string(wb, "dims_group", alerts_dims_grouping_id2group(t->value.db.dims_group)); + buffer_json_member_add_string(wb, "data_source", alerts_data_source_id2source(t->value.db.data_source)); + buffer_json_member_add_string(wb, "method", t->value.db.method); + buffer_json_member_add_string(wb, "dimensions", t->value.db.dimensions); + rrdr_options_to_buffer_json_array(wb, "options", (RRDR_OPTIONS)t->value.db.options); + } + buffer_json_object_close(wb); // db + } + + if (t->value.calc || debug) + buffer_json_member_add_string(wb, "calc", t->value.calc); + } + buffer_json_object_close(wb); // value + + if (t->status.warn || t->status.crit || debug) { + buffer_json_member_add_object(wb, "status"); // status + { + NETDATA_DOUBLE green = t->status.green ? str2ndd(t->status.green, NULL) : NAN; + NETDATA_DOUBLE red = t->status.red ? str2ndd(t->status.red, NULL) : NAN; + + if (!isnan(green) || debug) + buffer_json_member_add_double(wb, "green", green); + + if (!isnan(red) || debug) + buffer_json_member_add_double(wb, "red", red); + + if (t->status.warn || debug) + buffer_json_member_add_string(wb, "warn", t->status.warn); + + if (t->status.crit || debug) + buffer_json_member_add_string(wb, "crit", t->status.crit); + } + buffer_json_object_close(wb); // status + } + + buffer_json_member_add_object(wb, "notification"); + { + buffer_json_member_add_string(wb, "type", "agent"); + buffer_json_member_add_string(wb, "exec", t->notification.exec ? t->notification.exec : NULL); + buffer_json_member_add_string(wb, "to", t->notification.to_key ? t->notification.to_key : string2str(localhost->health.health_default_recipient)); + buffer_json_member_add_string(wb, "delay", t->notification.delay); + buffer_json_member_add_string(wb, "repeat", t->notification.repeat); + buffer_json_member_add_string(wb, "options", t->notification.options); + } + buffer_json_object_close(wb); // notification + + buffer_json_member_add_string(wb, "class", t->classification); + buffer_json_member_add_string(wb, "component", t->component); + buffer_json_member_add_string(wb, "type", t->type); + buffer_json_member_add_string(wb, "info", t->info); + buffer_json_member_add_string(wb, "summary", t->summary); + // buffer_json_member_add_string(wb, "source", t->source); // moved to alert instance + } + + if(d->only_one_config) + buffer_json_object_close(wb); +} + +int contexts_v2_alert_config_to_json(struct web_client *w, const char *config_hash_id) { + struct alert_transitions_callback_data data = { + .wb = w->response.data, + .debug = false, + .only_one_config = false, + }; + DICTIONARY *configs = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_set(configs, config_hash_id, NULL, 0); + + buffer_flush(w->response.data); + + buffer_json_initialize(w->response.data, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + int added = sql_get_alert_configuration(configs, contexts_v2_alert_config_to_json_from_sql_alert_config_data, &data, false); + buffer_json_finalize(w->response.data); + + int ret = HTTP_RESP_OK; + + if(added <= 0) { + buffer_flush(w->response.data); + w->response.data->content_type = CT_TEXT_PLAIN; + if(added < 0) { + buffer_strcat(w->response.data, "Failed to execute SQL query."); + ret = HTTP_RESP_INTERNAL_SERVER_ERROR; + } + else { + buffer_strcat(w->response.data, "Config is not found."); + ret = HTTP_RESP_NOT_FOUND; + } + } + + return ret; +} + +static int contexts_v2_alert_instance_to_json_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + struct sql_alert_instance_v2_entry *t = value; + struct alert_instances_callback_data *d = data; + struct rrdcontext_to_json_v2_data *ctl = d->ctl; (void)ctl; + bool debug = d->debug; (void)debug; + BUFFER *wb = d->wb; + + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_uint64(wb, "ni", t->ni); + + buffer_json_member_add_string(wb, "nm", string2str(t->name)); + buffer_json_member_add_string(wb, "ch", string2str(t->chart_id)); + buffer_json_member_add_string(wb, "ch_n", string2str(t->chart_name)); + + if(ctl->request->options & CONTEXT_V2_OPTION_ALERTS_WITH_SUMMARY) + buffer_json_member_add_uint64(wb, "ati", t->ati); + + if(ctl->request->options & CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES) { + buffer_json_member_add_string(wb, "units", string2str(t->units)); + buffer_json_member_add_string(wb, "fami", string2str(t->family)); + buffer_json_member_add_string(wb, "info", string2str(t->info)); + buffer_json_member_add_string(wb, "sum", string2str(t->summary)); + buffer_json_member_add_string(wb, "ctx", string2str(t->context)); + buffer_json_member_add_string(wb, "st", rrdcalc_status2string(t->status)); + buffer_json_member_add_uuid(wb, "tr_i", &t->last_transition_id); + buffer_json_member_add_double(wb, "tr_v", t->last_status_change_value); + buffer_json_member_add_time_t(wb, "tr_t", t->last_status_change); + buffer_json_member_add_uuid(wb, "cfg", &t->config_hash_id); + buffer_json_member_add_string(wb, "src", string2str(t->source)); + + buffer_json_member_add_string(wb, "to", string2str(t->recipient)); + buffer_json_member_add_string(wb, "tp", string2str(t->type)); + buffer_json_member_add_string(wb, "cm", string2str(t->component)); + buffer_json_member_add_string(wb, "cl", string2str(t->classification)); + + // Agent specific fields + buffer_json_member_add_uint64(wb, "gi", t->global_id); + // rrdcalc_flags_to_json_array (wb, "flags", t->flags); + } + + if(ctl->request->options & CONTEXT_V2_OPTION_ALERTS_WITH_VALUES) { + // Netdata Cloud fetched these by querying the agents + buffer_json_member_add_double(wb, "v", t->value); + buffer_json_member_add_time_t(wb, "t", t->last_updated); + } + } + buffer_json_object_close(wb); // alert instance + + return 1; +} + +static void contexts_v2_alerts_by_x_update_prototypes(void *data, STRING *type, STRING *component, STRING *classification, STRING *recipient) { + struct rrdcontext_to_json_v2_data *ctl = data; + + dictionary_set_advanced(ctl->alerts.by_type, string2str(type), (ssize_t)string_strlen(type), NULL, sizeof(struct alert_by_x_entry), NULL); + dictionary_set_advanced(ctl->alerts.by_component, string2str(component), (ssize_t)string_strlen(component), NULL, sizeof(struct alert_by_x_entry), NULL); + dictionary_set_advanced(ctl->alerts.by_classification, string2str(classification), (ssize_t)string_strlen(classification), NULL, sizeof(struct alert_by_x_entry), NULL); + dictionary_set_advanced(ctl->alerts.by_recipient, string2str(recipient), (ssize_t)string_strlen(recipient), NULL, sizeof(struct alert_by_x_entry), NULL); +} + +static void contexts_v2_alerts_by_x_to_json(BUFFER *wb, DICTIONARY *dict, const char *key) { + buffer_json_member_add_array(wb, key); + { + struct alert_by_x_entry *b; + dfe_start_read(dict, b) { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "name", b_dfe.name); + buffer_json_member_add_uint64(wb, "cr", b->running.counts.critical); + buffer_json_member_add_uint64(wb, "wr", b->running.counts.warning); + buffer_json_member_add_uint64(wb, "cl", b->running.counts.clear); + buffer_json_member_add_uint64(wb, "er", b->running.counts.error); + buffer_json_member_add_uint64(wb, "running", b->running.total); + + buffer_json_member_add_uint64(wb, "running_silent", b->running.silent); + + if(b->prototypes.available) + buffer_json_member_add_uint64(wb, "available", b->prototypes.available); + } + buffer_json_object_close(wb); + } + dfe_done(b); + } + buffer_json_array_close(wb); +} + +static void contexts_v2_alert_instances_to_json(BUFFER *wb, const char *key, struct rrdcontext_to_json_v2_data *ctl, bool debug) { + buffer_json_member_add_array(wb, key); + { + struct alert_instances_callback_data data = { + .wb = wb, + .ctl = ctl, + .debug = debug, + }; + dictionary_walkthrough_rw(ctl->alerts.alert_instances, DICTIONARY_LOCK_READ, + contexts_v2_alert_instance_to_json_callback, &data); + } + buffer_json_array_close(wb); // alerts_instances +} + +static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_data *ctl, bool debug) { + if(ctl->request->options & CONTEXT_V2_OPTION_ALERTS_WITH_SUMMARY) { + buffer_json_member_add_array(wb, "alerts"); + { + struct alert_v2_entry *t; + dfe_start_read(ctl->alerts.summary, t) + { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_uint64(wb, "ati", t->ati); + + buffer_json_member_add_array(wb, "ni"); + void *host_guid; + dfe_start_read(t->nodes, host_guid) { + struct contexts_v2_node *cn = dictionary_get(ctl->nodes.dict,host_guid_dfe.name); + buffer_json_add_array_item_int64(wb, (int64_t) cn->ni); + } + dfe_done(host_guid); + buffer_json_array_close(wb); + + buffer_json_member_add_string(wb, "nm", string2str(t->name)); + buffer_json_member_add_string(wb, "sum", string2str(t->summary)); + + buffer_json_member_add_uint64(wb, "cr", t->counts.critical); + buffer_json_member_add_uint64(wb, "wr", t->counts.warning); + buffer_json_member_add_uint64(wb, "cl", t->counts.clear); + buffer_json_member_add_uint64(wb, "er", t->counts.error); + + buffer_json_member_add_uint64(wb, "in", t->instances); + buffer_json_member_add_uint64(wb, "nd", dictionary_entries(t->nodes)); + buffer_json_member_add_uint64(wb, "cfg", dictionary_entries(t->configs)); + + buffer_json_member_add_array(wb, "ctx"); + rrdlabels_key_to_buffer_array_item(t->context, wb); + buffer_json_array_close(wb); // ctx + + buffer_json_member_add_array(wb, "cls"); + rrdlabels_key_to_buffer_array_item(t->classification, wb); + buffer_json_array_close(wb); // classification + + + buffer_json_member_add_array(wb, "cp"); + rrdlabels_key_to_buffer_array_item(t->component, wb); + buffer_json_array_close(wb); // component + + buffer_json_member_add_array(wb, "ty"); + rrdlabels_key_to_buffer_array_item(t->type, wb); + buffer_json_array_close(wb); // type + + buffer_json_member_add_array(wb, "to"); + rrdlabels_key_to_buffer_array_item(t->recipient, wb); + buffer_json_array_close(wb); // recipient + } + buffer_json_object_close(wb); // alert name + } + dfe_done(t); + } + buffer_json_array_close(wb); // alerts + + health_prototype_metadata_foreach(ctl, contexts_v2_alerts_by_x_update_prototypes); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_type, "alerts_by_type"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_component, "alerts_by_component"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_classification, "alerts_by_classification"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_recipient, "alerts_by_recipient"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_module, "alerts_by_module"); + } + + if(ctl->request->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES|CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) { + contexts_v2_alert_instances_to_json(wb, "alert_instances", ctl, debug); + } +} + +#define SQL_TRANSITION_DATA_SMALL_STRING (6 * 8) +#define SQL_TRANSITION_DATA_MEDIUM_STRING (12 * 8) +#define SQL_TRANSITION_DATA_BIG_STRING 512 + +struct sql_alert_transition_fixed_size { + usec_t global_id; + nd_uuid_t transition_id; + nd_uuid_t host_id; + nd_uuid_t config_hash_id; + uint32_t alarm_id; + char alert_name[SQL_TRANSITION_DATA_SMALL_STRING]; + char chart[RRD_ID_LENGTH_MAX]; + char chart_name[RRD_ID_LENGTH_MAX]; + char chart_context[SQL_TRANSITION_DATA_MEDIUM_STRING]; + char family[SQL_TRANSITION_DATA_SMALL_STRING]; + char recipient[SQL_TRANSITION_DATA_MEDIUM_STRING]; + char units[SQL_TRANSITION_DATA_SMALL_STRING]; + char exec[SQL_TRANSITION_DATA_BIG_STRING]; + char info[SQL_TRANSITION_DATA_BIG_STRING]; + char summary[SQL_TRANSITION_DATA_BIG_STRING]; + char classification[SQL_TRANSITION_DATA_SMALL_STRING]; + char type[SQL_TRANSITION_DATA_SMALL_STRING]; + char component[SQL_TRANSITION_DATA_SMALL_STRING]; + time_t when_key; + time_t duration; + time_t non_clear_duration; + uint64_t flags; + time_t delay_up_to_timestamp; + time_t exec_run_timestamp; + int exec_code; + int new_status; + int old_status; + int delay; + time_t last_repeat; + NETDATA_DOUBLE new_value; + NETDATA_DOUBLE old_value; + + char machine_guid[UUID_STR_LEN]; + struct sql_alert_transition_fixed_size *next; + struct sql_alert_transition_fixed_size *prev; +}; + +static struct sql_alert_transition_fixed_size *contexts_v2_alert_transition_dup(struct sql_alert_transition_data *t, const char *machine_guid, struct sql_alert_transition_fixed_size *dst) { + struct sql_alert_transition_fixed_size *n = dst ? dst : mallocz(sizeof(*n)); + + n->global_id = t->global_id; + uuid_copy(n->transition_id, *t->transition_id); + uuid_copy(n->host_id, *t->host_id); + uuid_copy(n->config_hash_id, *t->config_hash_id); + n->alarm_id = t->alarm_id; + strncpyz(n->alert_name, t->alert_name ? t->alert_name : "", sizeof(n->alert_name) - 1); + strncpyz(n->chart, t->chart ? t->chart : "", sizeof(n->chart) - 1); + strncpyz(n->chart_name, t->chart_name ? t->chart_name : n->chart, sizeof(n->chart_name) - 1); + strncpyz(n->chart_context, t->chart_context ? t->chart_context : "", sizeof(n->chart_context) - 1); + strncpyz(n->family, t->family ? t->family : "", sizeof(n->family) - 1); + strncpyz(n->recipient, t->recipient ? t->recipient : "", sizeof(n->recipient) - 1); + strncpyz(n->units, t->units ? t->units : "", sizeof(n->units) - 1); + strncpyz(n->exec, t->exec ? t->exec : "", sizeof(n->exec) - 1); + strncpyz(n->info, t->info ? t->info : "", sizeof(n->info) - 1); + strncpyz(n->summary, t->summary ? t->summary : "", sizeof(n->summary) - 1); + strncpyz(n->classification, t->classification ? t->classification : "", sizeof(n->classification) - 1); + strncpyz(n->type, t->type ? t->type : "", sizeof(n->type) - 1); + strncpyz(n->component, t->component ? t->component : "", sizeof(n->component) - 1); + n->when_key = t->when_key; + n->duration = t->duration; + n->non_clear_duration = t->non_clear_duration; + n->flags = t->flags; + n->delay_up_to_timestamp = t->delay_up_to_timestamp; + n->exec_run_timestamp = t->exec_run_timestamp; + n->exec_code = t->exec_code; + n->new_status = t->new_status; + n->old_status = t->old_status; + n->delay = t->delay; + n->last_repeat = t->last_repeat; + n->new_value = t->new_value; + n->old_value = t->old_value; + + memcpy(n->machine_guid, machine_guid, sizeof(n->machine_guid)); + n->next = n->prev = NULL; + + return n; +} + +static void contexts_v2_alert_transition_free(struct sql_alert_transition_fixed_size *t) { + freez(t); +} + +static inline void contexts_v2_alert_transition_keep(struct alert_transitions_callback_data *d, struct sql_alert_transition_data *t, const char *machine_guid) { + d->items_matched++; + + if(unlikely(t->global_id <= d->ctl->request->alerts.global_id_anchor)) { + // this is in our past, we are not interested + d->operations.skips_before++; + return; + } + + if(unlikely(!d->base)) { + d->last_added = contexts_v2_alert_transition_dup(t, machine_guid, NULL); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(d->base, d->last_added, prev, next); + d->items_to_return++; + d->operations.first++; + return; + } + + struct sql_alert_transition_fixed_size *last = d->last_added; + while(last->prev != d->base->prev && t->global_id > last->prev->global_id) { + last = last->prev; + d->operations.backwards++; + } + + while(last->next && t->global_id < last->next->global_id) { + last = last->next; + d->operations.forwards++; + } + + if(d->items_to_return >= d->max_items_to_return) { + if(last == d->base->prev && t->global_id < last->global_id) { + d->operations.skips_after++; + return; + } + } + + d->items_to_return++; + + if(t->global_id > last->global_id) { + if(d->items_to_return > d->max_items_to_return) { + d->items_to_return--; + d->operations.shifts++; + d->last_added = d->base->prev; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(d->base, d->last_added, prev, next); + d->last_added = contexts_v2_alert_transition_dup(t, machine_guid, d->last_added); + } + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(d->base, d->last_added, prev, next); + d->operations.prepend++; + } + else { + d->last_added = contexts_v2_alert_transition_dup(t, machine_guid, NULL); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(d->base, d->last_added, prev, next); + d->operations.append++; + } + + while(d->items_to_return > d->max_items_to_return) { + // we have to remove something + + struct sql_alert_transition_fixed_size *tmp = d->base->prev; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(d->base, tmp, prev, next); + d->items_to_return--; + + if(unlikely(d->last_added == tmp)) + d->last_added = d->base; + + contexts_v2_alert_transition_free(tmp); + + d->operations.shifts++; + } +} + +static void contexts_v2_alert_transition_callback(struct sql_alert_transition_data *t, void *data) { + struct alert_transitions_callback_data *d = data; + d->items_evaluated++; + + char machine_guid[UUID_STR_LEN] = ""; + uuid_unparse_lower(*t->host_id, machine_guid); + + const char *facets[ATF_TOTAL_ENTRIES] = { + [ATF_STATUS] = rrdcalc_status2string(t->new_status), + [ATF_CLASS] = t->classification, + [ATF_TYPE] = t->type, + [ATF_COMPONENT] = t->component, + [ATF_ROLE] = t->recipient && *t->recipient ? t->recipient : string2str(localhost->health.health_default_recipient), + [ATF_NODE] = machine_guid, + [ATF_ALERT_NAME] = t->alert_name, + [ATF_CHART_NAME] = t->chart_name, + [ATF_CONTEXT] = t->chart_context, + }; + + for(size_t i = 0; i < ATF_TOTAL_ENTRIES ;i++) { + if (!facets[i] || !*facets[i]) facets[i] = "unknown"; + + struct facet_entry tmp = { + .count = 0, + }; + dictionary_set(d->facets[i].dict, facets[i], &tmp, sizeof(tmp)); + } + + bool selected[ATF_TOTAL_ENTRIES] = { 0 }; + + uint32_t selected_by = 0; + for(size_t i = 0; i < ATF_TOTAL_ENTRIES ;i++) { + selected[i] = !d->facets[i].pattern || simple_pattern_matches(d->facets[i].pattern, facets[i]); + if(selected[i]) + selected_by++; + } + + if(selected_by == ATF_TOTAL_ENTRIES) { + // this item is selected by all facets + // put it in our result (if it fits) + contexts_v2_alert_transition_keep(d, t, machine_guid); + } + + if(selected_by >= ATF_TOTAL_ENTRIES - 1) { + // this item is selected by all, or all except one facet + // in both cases we need to add it to our counters + + for (size_t i = 0; i < ATF_TOTAL_ENTRIES; i++) { + uint32_t counted_by = selected_by; + + if (counted_by != ATF_TOTAL_ENTRIES) { + counted_by = 0; + for (size_t j = 0; j < ATF_TOTAL_ENTRIES; j++) { + if (i == j || selected[j]) + counted_by++; + } + } + + if (counted_by == ATF_TOTAL_ENTRIES) { + // we need to count it on this facet + struct facet_entry *x = dictionary_get(d->facets[i].dict, facets[i]); + internal_fatal(!x, "facet is not found"); + if(x) + x->count++; + } + } + } +} + +static void contexts_v2_alert_transitions_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_data *ctl, bool debug) { + struct alert_transitions_callback_data data = { + .wb = wb, + .ctl = ctl, + .debug = debug, + .only_one_config = true, + .max_items_to_return = ctl->request->alerts.last, + .items_to_return = 0, + .base = NULL, + }; + + for(size_t i = 0; i < ATF_TOTAL_ENTRIES ;i++) { + data.facets[i].dict = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_FIXED_SIZE | DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(struct facet_entry)); + if(ctl->request->alerts.facets[i]) + data.facets[i].pattern = simple_pattern_create(ctl->request->alerts.facets[i], ",|", SIMPLE_PATTERN_EXACT, false); + } + + sql_alert_transitions( + ctl->nodes.dict, + ctl->window.after, + ctl->window.before, + ctl->request->contexts, + ctl->request->alerts.alert, + ctl->request->alerts.transition, + contexts_v2_alert_transition_callback, + &data, + debug); + + buffer_json_member_add_array(wb, "facets"); + for (size_t i = 0; i < ATF_TOTAL_ENTRIES; i++) { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "id", alert_transition_facets[i].id); + buffer_json_member_add_string(wb, "name", alert_transition_facets[i].name); + buffer_json_member_add_uint64(wb, "order", alert_transition_facets[i].order); + buffer_json_member_add_array(wb, "options"); + { + struct facet_entry *x; + dfe_start_read(data.facets[i].dict, x) { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "id", x_dfe.name); + if (i == ATF_NODE) { + RRDHOST *host = rrdhost_find_by_guid(x_dfe.name); + if (host) + buffer_json_member_add_string(wb, "name", rrdhost_hostname(host)); + else + buffer_json_member_add_string(wb, "name", x_dfe.name); + } else + buffer_json_member_add_string(wb, "name", x_dfe.name); + buffer_json_member_add_uint64(wb, "count", x->count); + } + buffer_json_object_close(wb); + } + dfe_done(x); + } + buffer_json_array_close(wb); // options + } + buffer_json_object_close(wb); // facet + } + buffer_json_array_close(wb); // facets + + buffer_json_member_add_array(wb, "transitions"); + for(struct sql_alert_transition_fixed_size *t = data.base; t ; t = t->next) { + buffer_json_add_array_item_object(wb); + { + RRDHOST *host = rrdhost_find_by_guid(t->machine_guid); + + buffer_json_member_add_uint64(wb, "gi", t->global_id); + buffer_json_member_add_uuid(wb, "transition_id", &t->transition_id); + buffer_json_member_add_uuid(wb, "config_hash_id", &t->config_hash_id); + buffer_json_member_add_string(wb, "machine_guid", t->machine_guid); + + if(host) { + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(host)); + + if(host->node_id) + buffer_json_member_add_uuid(wb, "node_id", host->node_id); + } + + buffer_json_member_add_string(wb, "alert", *t->alert_name ? t->alert_name : NULL); + buffer_json_member_add_string(wb, "instance", *t->chart ? t->chart : NULL); + buffer_json_member_add_string(wb, "instance_n", *t->chart_name ? t->chart_name : NULL); + buffer_json_member_add_string(wb, "context", *t->chart_context ? t->chart_context : NULL); + // buffer_json_member_add_string(wb, "family", *t->family ? t->family : NULL); + buffer_json_member_add_string(wb, "component", *t->component ? t->component : NULL); + buffer_json_member_add_string(wb, "classification", *t->classification ? t->classification : NULL); + buffer_json_member_add_string(wb, "type", *t->type ? t->type : NULL); + + buffer_json_member_add_time_t(wb, "when", t->when_key); + buffer_json_member_add_string(wb, "info", *t->info ? t->info : ""); + buffer_json_member_add_string(wb, "summary", *t->summary ? t->summary : ""); + buffer_json_member_add_string(wb, "units", *t->units ? t->units : NULL); + buffer_json_member_add_object(wb, "new"); + { + buffer_json_member_add_string(wb, "status", rrdcalc_status2string(t->new_status)); + buffer_json_member_add_double(wb, "value", t->new_value); + } + buffer_json_object_close(wb); // new + buffer_json_member_add_object(wb, "old"); + { + buffer_json_member_add_string(wb, "status", rrdcalc_status2string(t->old_status)); + buffer_json_member_add_double(wb, "value", t->old_value); + buffer_json_member_add_time_t(wb, "duration", t->duration); + buffer_json_member_add_time_t(wb, "raised_duration", t->non_clear_duration); + } + buffer_json_object_close(wb); // old + + buffer_json_member_add_object(wb, "notification"); + { + buffer_json_member_add_time_t(wb, "when", t->exec_run_timestamp); + buffer_json_member_add_time_t(wb, "delay", t->delay); + buffer_json_member_add_time_t(wb, "delay_up_to_time", t->delay_up_to_timestamp); + health_entry_flags_to_json_array(wb, "flags", t->flags); + buffer_json_member_add_string(wb, "exec", *t->exec ? t->exec : string2str(localhost->health.health_default_exec)); + buffer_json_member_add_uint64(wb, "exec_code", t->exec_code); + buffer_json_member_add_string(wb, "to", *t->recipient ? t->recipient : string2str(localhost->health.health_default_recipient)); + } + buffer_json_object_close(wb); // notification + } + buffer_json_object_close(wb); // a transition + } + buffer_json_array_close(wb); // all transitions + + if(ctl->options & CONTEXT_V2_OPTION_ALERTS_WITH_CONFIGURATIONS) { + DICTIONARY *configs = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE); + + for(struct sql_alert_transition_fixed_size *t = data.base; t ; t = t->next) { + char guid[UUID_STR_LEN]; + uuid_unparse_lower(t->config_hash_id, guid); + dictionary_set(configs, guid, NULL, 0); + } + + buffer_json_member_add_array(wb, "configurations"); + sql_get_alert_configuration(configs, contexts_v2_alert_config_to_json_from_sql_alert_config_data, &data, debug); + buffer_json_array_close(wb); + + dictionary_destroy(configs); + } + + while(data.base) { + struct sql_alert_transition_fixed_size *t = data.base; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(data.base, t, prev, next); + contexts_v2_alert_transition_free(t); + } + + for(size_t i = 0; i < ATF_TOTAL_ENTRIES ;i++) { + dictionary_destroy(data.facets[i].dict); + simple_pattern_free(data.facets[i].pattern); + } + + buffer_json_member_add_object(wb, "items"); + { + // all the items in the window, under the scope_nodes, ignoring the facets (filters) + buffer_json_member_add_uint64(wb, "evaluated", data.items_evaluated); + + // all the items matching the query (if you didn't put anchor_gi and last, these are all the items you would get back) + buffer_json_member_add_uint64(wb, "matched", data.items_matched); + + // the items included in this response + buffer_json_member_add_uint64(wb, "returned", data.items_to_return); + + // same as last=X parameter + buffer_json_member_add_uint64(wb, "max_to_return", data.max_items_to_return); + + // items before the first returned, this should be 0 if anchor_gi is not set + buffer_json_member_add_uint64(wb, "before", data.operations.skips_before); + + // items after the last returned, when this is zero there aren't any items after the current list + buffer_json_member_add_uint64(wb, "after", data.operations.skips_after + data.operations.shifts); + } + buffer_json_object_close(wb); // items + + if(debug) { + buffer_json_member_add_object(wb, "stats"); + { + buffer_json_member_add_uint64(wb, "first", data.operations.first); + buffer_json_member_add_uint64(wb, "prepend", data.operations.prepend); + buffer_json_member_add_uint64(wb, "append", data.operations.append); + buffer_json_member_add_uint64(wb, "backwards", data.operations.backwards); + buffer_json_member_add_uint64(wb, "forwards", data.operations.forwards); + buffer_json_member_add_uint64(wb, "shifts", data.operations.shifts); + buffer_json_member_add_uint64(wb, "skips_before", data.operations.skips_before); + buffer_json_member_add_uint64(wb, "skips_after", data.operations.skips_after); + } + buffer_json_object_close(wb); + } +} + +int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTEXTS_V2_MODE mode) { + int resp = HTTP_RESP_OK; + bool run = true; + + if(mode & CONTEXTS_V2_SEARCH) + mode |= CONTEXTS_V2_CONTEXTS; + + if(mode & (CONTEXTS_V2_AGENTS_INFO)) + mode |= CONTEXTS_V2_AGENTS; + + if(mode & (CONTEXTS_V2_FUNCTIONS | CONTEXTS_V2_CONTEXTS | CONTEXTS_V2_SEARCH | CONTEXTS_V2_NODES_INFO | CONTEXTS_V2_NODE_INSTANCES)) + mode |= CONTEXTS_V2_NODES; + + if(mode & CONTEXTS_V2_ALERTS) { + mode |= CONTEXTS_V2_NODES; + req->options &= ~CONTEXT_V2_OPTION_ALERTS_WITH_CONFIGURATIONS; + + if(!(req->options & (CONTEXT_V2_OPTION_ALERTS_WITH_SUMMARY|CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES|CONTEXT_V2_OPTION_ALERTS_WITH_VALUES))) + req->options |= CONTEXT_V2_OPTION_ALERTS_WITH_SUMMARY; + } + + if(mode & CONTEXTS_V2_ALERT_TRANSITIONS) { + mode |= CONTEXTS_V2_NODES; + req->options &= ~CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES; + } + + struct rrdcontext_to_json_v2_data ctl = { + .wb = wb, + .request = req, + .mode = mode, + .options = req->options, + .versions = { 0 }, + .nodes.scope_pattern = string_to_simple_pattern(req->scope_nodes), + .nodes.pattern = string_to_simple_pattern(req->nodes), + .contexts.pattern = string_to_simple_pattern(req->contexts), + .contexts.scope_pattern = string_to_simple_pattern(req->scope_contexts), + .q.pattern = string_to_simple_pattern_nocase(req->q), + .alerts.alert_name_pattern = string_to_simple_pattern(req->alerts.alert), + .window = { + .enabled = false, + .relative = false, + .after = req->after, + .before = req->before, + }, + .timings = { + .received_ut = now_monotonic_usec(), + } + }; + + bool debug = ctl.options & CONTEXT_V2_OPTION_DEBUG; + + if(mode & CONTEXTS_V2_NODES) { + ctl.nodes.dict = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct contexts_v2_node)); + } + + if(mode & CONTEXTS_V2_CONTEXTS) { + ctl.contexts.dict = dictionary_create_advanced( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, + sizeof(struct context_v2_entry)); + + dictionary_register_conflict_callback(ctl.contexts.dict, contexts_conflict_callback, &ctl); + dictionary_register_delete_callback(ctl.contexts.dict, contexts_delete_callback, &ctl); + } + + if(mode & CONTEXTS_V2_FUNCTIONS) { + ctl.functions.dict = dictionary_create_advanced( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, + sizeof(struct function_v2_entry)); + + dictionary_register_insert_callback(ctl.functions.dict, functions_insert_callback, &ctl); + dictionary_register_conflict_callback(ctl.functions.dict, functions_conflict_callback, &ctl); + dictionary_register_delete_callback(ctl.functions.dict, functions_delete_callback, &ctl); + } + + if(mode & CONTEXTS_V2_ALERTS) { + if(req->alerts.transition) { + ctl.options |= CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES|CONTEXT_V2_OPTION_ALERTS_WITH_VALUES; + run = sql_find_alert_transition(req->alerts.transition, rrdcontext_v2_set_transition_filter, &ctl); + if(!run) { + resp = HTTP_RESP_NOT_FOUND; + goto cleanup; + } + } + + ctl.alerts.summary = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_v2_entry)); + + dictionary_register_insert_callback(ctl.alerts.summary, alerts_v2_insert_callback, &ctl); + dictionary_register_conflict_callback(ctl.alerts.summary, alerts_v2_conflict_callback, &ctl); + dictionary_register_delete_callback(ctl.alerts.summary, alerts_v2_delete_callback, &ctl); + + ctl.alerts.by_type = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_type, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_type, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_component = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_component, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_component, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_classification = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_classification, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_classification, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_recipient = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_recipient, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_recipient, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_module = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_module, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_module, alerts_by_x_conflict_callback, NULL); + + if(ctl.options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) { + ctl.alerts.alert_instances = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct sql_alert_instance_v2_entry)); + + dictionary_register_insert_callback(ctl.alerts.alert_instances, alert_instances_v2_insert_callback, &ctl); + dictionary_register_conflict_callback(ctl.alerts.alert_instances, alert_instances_v2_conflict_callback, &ctl); + dictionary_register_delete_callback(ctl.alerts.alert_instances, alert_instances_delete_callback, &ctl); + } + } + + if(req->after || req->before) { + ctl.window.relative = rrdr_relative_window_to_absolute_query(&ctl.window.after, &ctl.window.before, &ctl.now + , false + ); + ctl.window.enabled = !(mode & CONTEXTS_V2_ALERT_TRANSITIONS); + } + else + ctl.now = now_realtime_sec(); + + buffer_json_initialize(wb, "\"", "\"", 0, true, + ((req->options & CONTEXT_V2_OPTION_MINIFY) && !(req->options & CONTEXT_V2_OPTION_DEBUG)) ? BUFFER_JSON_OPTIONS_MINIFY : BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_uint64(wb, "api", 2); + + if(req->options & CONTEXT_V2_OPTION_DEBUG) { + buffer_json_member_add_object(wb, "request"); + { + buffer_json_contexts_v2_mode_to_array(wb, "mode", mode); + web_client_api_request_v2_contexts_options_to_buffer_json_array(wb, "options", req->options); + + buffer_json_member_add_object(wb, "scope"); + { + buffer_json_member_add_string(wb, "scope_nodes", req->scope_nodes); + if (mode & (CONTEXTS_V2_CONTEXTS | CONTEXTS_V2_SEARCH | CONTEXTS_V2_ALERTS)) + buffer_json_member_add_string(wb, "scope_contexts", req->scope_contexts); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "selectors"); + { + buffer_json_member_add_string(wb, "nodes", req->nodes); + + if (mode & (CONTEXTS_V2_CONTEXTS | CONTEXTS_V2_SEARCH | CONTEXTS_V2_ALERTS)) + buffer_json_member_add_string(wb, "contexts", req->contexts); + + if(mode & (CONTEXTS_V2_ALERTS | CONTEXTS_V2_ALERT_TRANSITIONS)) { + buffer_json_member_add_object(wb, "alerts"); + + if(mode & CONTEXTS_V2_ALERTS) + web_client_api_request_v2_contexts_alerts_status_to_buffer_json_array(wb, "status", req->alerts.status); + + if(mode & CONTEXTS_V2_ALERT_TRANSITIONS) { + buffer_json_member_add_string(wb, "context", req->contexts); + buffer_json_member_add_uint64(wb, "anchor_gi", req->alerts.global_id_anchor); + buffer_json_member_add_uint64(wb, "last", req->alerts.last); + } + + buffer_json_member_add_string(wb, "alert", req->alerts.alert); + buffer_json_member_add_string(wb, "transition", req->alerts.transition); + buffer_json_object_close(wb); // alerts + } + } + buffer_json_object_close(wb); // selectors + + buffer_json_member_add_object(wb, "filters"); + { + if (mode & CONTEXTS_V2_SEARCH) + buffer_json_member_add_string(wb, "q", req->q); + + buffer_json_member_add_time_t(wb, "after", req->after); + buffer_json_member_add_time_t(wb, "before", req->before); + } + buffer_json_object_close(wb); // filters + + if(mode & CONTEXTS_V2_ALERT_TRANSITIONS) { + buffer_json_member_add_object(wb, "facets"); + { + for (int i = 0; i < ATF_TOTAL_ENTRIES; i++) { + buffer_json_member_add_string(wb, alert_transition_facets[i].query_param, req->alerts.facets[i]); + } + } + buffer_json_object_close(wb); // facets + } + } + buffer_json_object_close(wb); + } + + ssize_t ret = 0; + if(run) + ret = query_scope_foreach_host(ctl.nodes.scope_pattern, ctl.nodes.pattern, + rrdcontext_to_json_v2_add_host, &ctl, + &ctl.versions, ctl.q.host_node_id_str); + + if(unlikely(ret < 0)) { + buffer_flush(wb); + + if(ret == -2) { + buffer_strcat(wb, "query timeout"); + resp = HTTP_RESP_GATEWAY_TIMEOUT; + } + else { + buffer_strcat(wb, "query interrupted"); + resp = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + goto cleanup; + } + + ctl.timings.executed_ut = now_monotonic_usec(); + + if(mode & CONTEXTS_V2_ALERT_TRANSITIONS) { + contexts_v2_alert_transitions_to_json(wb, &ctl, debug); + } + else { + if (mode & CONTEXTS_V2_NODES) { + buffer_json_member_add_array(wb, "nodes"); + struct contexts_v2_node *t; + dfe_start_read(ctl.nodes.dict, t) { + rrdcontext_to_json_v2_rrdhost(wb, t->host, &ctl, t->ni); + } + dfe_done(t); + buffer_json_array_close(wb); + } + + if (mode & CONTEXTS_V2_FUNCTIONS) { + buffer_json_member_add_array(wb, "functions"); + { + struct function_v2_entry *t; + dfe_start_read(ctl.functions.dict, t) { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "name", t_dfe.name); + buffer_json_member_add_string(wb, "help", string2str(t->help)); + buffer_json_member_add_array(wb, "ni"); + { + for (size_t i = 0; i < t->used; i++) + buffer_json_add_array_item_uint64(wb, t->node_ids[i]); + } + buffer_json_array_close(wb); + buffer_json_member_add_string(wb, "tags", string2str(t->tags)); + http_access2buffer_json_array(wb, "access", t->access); + buffer_json_member_add_uint64(wb, "priority", t->priority); + } + buffer_json_object_close(wb); + } + dfe_done(t); + } + buffer_json_array_close(wb); + } + + if (mode & CONTEXTS_V2_CONTEXTS) { + buffer_json_member_add_object(wb, "contexts"); + { + struct context_v2_entry *z; + dfe_start_read(ctl.contexts.dict, z) { + bool collected = z->flags & RRD_FLAG_COLLECTED; + + buffer_json_member_add_object(wb, string2str(z->id)); + { + buffer_json_member_add_string(wb, "family", string2str(z->family)); + buffer_json_member_add_uint64(wb, "priority", z->priority); + buffer_json_member_add_time_t(wb, "first_entry", z->first_time_s); + buffer_json_member_add_time_t(wb, "last_entry", collected ? ctl.now : z->last_time_s); + buffer_json_member_add_boolean(wb, "live", collected); + if (mode & CONTEXTS_V2_SEARCH) + buffer_json_member_add_string(wb, "match", fts_match_to_string(z->match)); + } + buffer_json_object_close(wb); + } + dfe_done(z); + } + buffer_json_object_close(wb); // contexts + } + + if (mode & CONTEXTS_V2_ALERTS) + contexts_v2_alerts_to_json(wb, &ctl, debug); + + if (mode & CONTEXTS_V2_SEARCH) { + buffer_json_member_add_object(wb, "searches"); + { + buffer_json_member_add_uint64(wb, "strings", ctl.q.fts.string_searches); + buffer_json_member_add_uint64(wb, "char", ctl.q.fts.char_searches); + buffer_json_member_add_uint64(wb, "total", ctl.q.fts.searches); + } + buffer_json_object_close(wb); + } + + if (mode & (CONTEXTS_V2_VERSIONS)) + version_hashes_api_v2(wb, &ctl.versions); + + if (mode & CONTEXTS_V2_AGENTS) + buffer_json_agents_v2(wb, &ctl.timings, ctl.now, mode & (CONTEXTS_V2_AGENTS_INFO), true); + } + + buffer_json_cloud_timings(wb, "timings", &ctl.timings); + + buffer_json_finalize(wb); + +cleanup: + dictionary_destroy(ctl.nodes.dict); + dictionary_destroy(ctl.contexts.dict); + dictionary_destroy(ctl.functions.dict); + dictionary_destroy(ctl.alerts.summary); + dictionary_destroy(ctl.alerts.alert_instances); + dictionary_destroy(ctl.alerts.by_type); + dictionary_destroy(ctl.alerts.by_component); + dictionary_destroy(ctl.alerts.by_classification); + dictionary_destroy(ctl.alerts.by_recipient); + dictionary_destroy(ctl.alerts.by_module); + simple_pattern_free(ctl.nodes.scope_pattern); + simple_pattern_free(ctl.nodes.pattern); + simple_pattern_free(ctl.contexts.pattern); + simple_pattern_free(ctl.contexts.scope_pattern); + simple_pattern_free(ctl.q.pattern); + simple_pattern_free(ctl.alerts.alert_name_pattern); + + return resp; +} diff --git a/src/database/contexts/context.c b/src/database/contexts/context.c new file mode 100644 index 000000000..33037eb93 --- /dev/null +++ b/src/database/contexts/context.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +inline const char *rrdcontext_acquired_id(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return string2str(rc->id); +} + +inline bool rrdcontext_acquired_belongs_to_host(RRDCONTEXT_ACQUIRED *rca, RRDHOST *host) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return rc->rrdhost == host; +} + +// ---------------------------------------------------------------------------- +// RRDCONTEXT + +static void rrdcontext_freez(RRDCONTEXT *rc) { + string_freez(rc->id); + string_freez(rc->title); + string_freez(rc->units); + string_freez(rc->family); +} + +static void rrdcontext_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost) { + RRDHOST *host = (RRDHOST *)rrdhost; + RRDCONTEXT *rc = (RRDCONTEXT *)value; + + rc->rrdhost = host; + rc->flags = rc->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics at constructor + + if(rc->hub.version) { + // we are loading data from the SQL database + + if(rc->version) + netdata_log_error("RRDCONTEXT: context '%s' is already initialized with version %"PRIu64", but it is loaded again from SQL with version %"PRIu64"", string2str(rc->id), rc->version, rc->hub.version); + + // IMPORTANT + // replace all string pointers in rc->hub with our own versions + // the originals are coming from a tmp allocation of sqlite + + string_freez(rc->id); + rc->id = string_strdupz(rc->hub.id); + rc->hub.id = string2str(rc->id); + + string_freez(rc->title); + rc->title = string_strdupz(rc->hub.title); + rc->hub.title = string2str(rc->title); + + string_freez(rc->units); + rc->units = string_strdupz(rc->hub.units); + rc->hub.units = string2str(rc->units); + + string_freez(rc->family); + rc->family = string_strdupz(rc->hub.family); + rc->hub.family = string2str(rc->family); + + rc->chart_type = rrdset_type_id(rc->hub.chart_type); + rc->hub.chart_type = rrdset_type_name(rc->chart_type); + + rc->version = rc->hub.version; + rc->priority = rc->hub.priority; + rc->first_time_s = (time_t)rc->hub.first_time_s; + rc->last_time_s = (time_t)rc->hub.last_time_s; + + if(rc->hub.deleted || !rc->hub.first_time_s) + rrd_flag_set_deleted(rc, RRD_FLAG_NONE); + else { + if (rc->last_time_s == 0) + rrd_flag_set_collected(rc); + else + rrd_flag_set_archived(rc); + } + + rc->flags |= RRD_FLAG_UPDATE_REASON_LOAD_SQL; // no need for atomics at constructor + } + else { + // we are adding this context now for the first time + rc->version = now_realtime_sec(); + } + + rrdinstances_create_in_rrdcontext(rc); + spinlock_init(&rc->spinlock); + + // signal the react callback to do the job + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); +} + +static void rrdcontext_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost __maybe_unused) { + + RRDCONTEXT *rc = (RRDCONTEXT *)value; + + rrdinstances_destroy_from_rrdcontext(rc); + rrdcontext_freez(rc); +} + +typedef enum __attribute__((packed)) { + OLDNEW_KEEP_OLD, + OLDNEW_USE_NEW, + OLDNEW_MERGE, +} OLDNEW; + +static inline OLDNEW oldnew_decide(bool archived, bool new_archived) { + if(archived && !new_archived) + return OLDNEW_USE_NEW; + + if(!archived && new_archived) + return OLDNEW_KEEP_OLD; + + return OLDNEW_MERGE; +} + +static inline void string_replace(STRING **stringpp, STRING *new_string) { + STRING *old = *stringpp; + *stringpp = string_dup(new_string); + string_freez(old); +} + +static inline void string_merge(STRING **stringpp, STRING *new_string) { + STRING *old = *stringpp; + *stringpp = string_2way_merge(*stringpp, new_string); + string_freez(old); +} + +static void rrdcontext_merge_with(RRDCONTEXT *rc, bool archived, STRING *title, STRING *family, STRING *units, RRDSET_TYPE chart_type, uint32_t priority) { + OLDNEW oldnew = oldnew_decide(rrd_flag_is_archived(rc), archived); + + switch(oldnew) { + case OLDNEW_KEEP_OLD: + break; + + case OLDNEW_USE_NEW: + if(rc->title != title) { + string_replace(&rc->title, title); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + if(rc->family != family) { + string_replace(&rc->family, family); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + break; + + case OLDNEW_MERGE: + if(rc->title != title) { + string_merge(&rc->title, title); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + if(rc->family != family) { + string_merge(&rc->family, family); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + break; + } + + switch(oldnew) { + case OLDNEW_KEEP_OLD: + break; + + case OLDNEW_USE_NEW: + case OLDNEW_MERGE: + if(rc->units != units) { + string_replace(&rc->units, units); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rc->chart_type != chart_type) { + rc->chart_type = chart_type; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rc->priority != priority) { + rc->priority = priority; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + break; + } +} + +void rrdcontext_update_from_collected_rrdinstance(RRDINSTANCE *ri) { + rrdcontext_merge_with(ri->rc, rrd_flag_is_archived(ri), + ri->title, ri->family, ri->units, ri->chart_type, ri->priority); +} + +static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdhost __maybe_unused) { + RRDCONTEXT *rc = (RRDCONTEXT *)old_value; + RRDCONTEXT *rc_new = (RRDCONTEXT *)new_value; + + //current rc is not archived, new_rc is archived, don't merge + if (!rrd_flag_is_archived(rc) && rrd_flag_is_archived(rc_new)) { + rrdcontext_freez(rc_new); + return false; + } + + rrdcontext_lock(rc); + + rrdcontext_merge_with(rc, rrd_flag_is_archived(rc_new), + rc_new->title, rc_new->family, rc_new->units, rc_new->chart_type, rc_new->priority); + + rrd_flag_set(rc, rc_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on rc_new + + if(rrd_flag_is_collected(rc) && rrd_flag_is_archived(rc)) + rrd_flag_set_collected(rc); + + if(rrd_flag_is_updated(rc)) + rrd_flag_set(rc, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); + + rrdcontext_unlock(rc); + + // free the resources of the new one + rrdcontext_freez(rc_new); + + // the react callback will continue from here + return rrd_flag_is_updated(rc); +} + +static void rrdcontext_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdhost __maybe_unused) { + RRDCONTEXT *rc = (RRDCONTEXT *)value; + rrdcontext_trigger_updates(rc, __FUNCTION__ ); +} + +void rrdcontext_trigger_updates(RRDCONTEXT *rc, const char *function) { + if(rrd_flag_is_updated(rc) || !rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrdcontext_queue_for_post_processing(rc, function, rc->flags); +} + +static void rrdcontext_hub_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_HUB); + rc->queue.queued_ut = now_realtime_usec(); + rc->queue.queued_flags = rrd_flags_get(rc); +} + +static void rrdcontext_hub_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_HUB); +} + +static bool rrdcontext_hub_queue_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *new_context __maybe_unused, void *nothing __maybe_unused) { + // context and new_context are the same + // we just need to update the timings + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_HUB); + rc->queue.queued_ut = now_realtime_usec(); + rc->queue.queued_flags |= rrd_flags_get(rc); + + return true; +} + +static void rrdcontext_post_processing_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); + rc->pp.queued_flags = rc->flags; + rc->pp.queued_ut = now_realtime_usec(); +} + +static void rrdcontext_post_processing_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + + // IMPORTANT: + // Do not rely on this flag being absent, because the dictionaries have delayed deletions (garbage collect) + // so, this flag may not be deleted immediately from the context. + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_PP); + rc->pp.dequeued_ut = now_realtime_usec(); +} + +static bool rrdcontext_post_processing_queue_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *new_context __maybe_unused, void *nothing __maybe_unused) { + RRDCONTEXT *rc = context; + bool changed = false; + + if(!(rc->flags & RRD_FLAG_QUEUED_FOR_PP)) { + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); + changed = true; + } + + if(rc->pp.queued_flags != rc->flags) { + rc->pp.queued_flags |= rc->flags; + changed = true; + } + + return changed; +} + + +void rrdhost_create_rrdcontexts(RRDHOST *host) { + if(unlikely(!host)) return; + if(likely(host->rrdctx.contexts)) return; + + host->rrdctx.contexts = dictionary_create_advanced( + DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + &dictionary_stats_category_rrdcontext, sizeof(RRDCONTEXT)); + + dictionary_register_insert_callback(host->rrdctx.contexts, rrdcontext_insert_callback, host); + dictionary_register_delete_callback(host->rrdctx.contexts, rrdcontext_delete_callback, host); + dictionary_register_conflict_callback(host->rrdctx.contexts, rrdcontext_conflict_callback, host); + dictionary_register_react_callback(host->rrdctx.contexts, rrdcontext_react_callback, host); + + host->rrdctx.hub_queue = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_VALUE_LINK_DONT_CLONE, &dictionary_stats_category_rrdcontext, 0); + dictionary_register_insert_callback(host->rrdctx.hub_queue, rrdcontext_hub_queue_insert_callback, NULL); + dictionary_register_delete_callback(host->rrdctx.hub_queue, rrdcontext_hub_queue_delete_callback, NULL); + dictionary_register_conflict_callback(host->rrdctx.hub_queue, rrdcontext_hub_queue_conflict_callback, NULL); + + host->rrdctx.pp_queue = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_VALUE_LINK_DONT_CLONE, &dictionary_stats_category_rrdcontext, 0); + dictionary_register_insert_callback(host->rrdctx.pp_queue, rrdcontext_post_processing_queue_insert_callback, NULL); + dictionary_register_delete_callback(host->rrdctx.pp_queue, rrdcontext_post_processing_queue_delete_callback, NULL); + dictionary_register_conflict_callback(host->rrdctx.pp_queue, rrdcontext_post_processing_queue_conflict_callback, NULL); +} + +void rrdhost_destroy_rrdcontexts(RRDHOST *host) { + if(unlikely(!host)) return; + if(unlikely(!host->rrdctx.contexts)) return; + + DICTIONARY *old; + + if(host->rrdctx.hub_queue) { + old = host->rrdctx.hub_queue; + host->rrdctx.hub_queue = NULL; + + RRDCONTEXT *rc; + dfe_start_write(old, rc) { + dictionary_del(old, string2str(rc->id)); + } + dfe_done(rc); + dictionary_destroy(old); + } + + if(host->rrdctx.pp_queue) { + old = host->rrdctx.pp_queue; + host->rrdctx.pp_queue = NULL; + + RRDCONTEXT *rc; + dfe_start_write(old, rc) { + dictionary_del(old, string2str(rc->id)); + } + dfe_done(rc); + dictionary_destroy(old); + } + + old = host->rrdctx.contexts; + host->rrdctx.contexts = NULL; + dictionary_destroy(old); +} + diff --git a/src/database/contexts/instance.c b/src/database/contexts/instance.c new file mode 100644 index 000000000..5d841bc82 --- /dev/null +++ b/src/database/contexts/instance.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDINSTANCE + +bool rrdinstance_acquired_id_and_name_are_same(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->id == ri->name; +} + +inline const char *rrdinstance_acquired_id(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->id); +} + +inline const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->name); +} + +inline bool rrdinstance_acquired_has_name(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return (ri->name && ri->name != ri->id); +} + +inline const char *rrdinstance_acquired_units(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string2str(ri->units); +} + +inline STRING *rrdinstance_acquired_units_dup(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string_dup(ri->units); +} + +inline RRDLABELS *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->rrdlabels; +} + +inline DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + if(!ri->rrdset) return NULL; + return ri->rrdset->functions_view; +} + +inline RRDHOST *rrdinstance_acquired_rrdhost(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->rc->rrdhost; +} + +inline bool rrdinstance_acquired_belongs_to_context(RRDINSTANCE_ACQUIRED *ria, RRDCONTEXT_ACQUIRED *rca) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return ri->rc == rc; +} + +inline time_t rrdinstance_acquired_update_every(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->update_every_s; +} + +// ---------------------------------------------------------------------------- +// RRDINSTANCE + +static void rrdinstance_free(RRDINSTANCE *ri) { + + if(rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) + rrdlabels_destroy(ri->rrdlabels); + + rrdmetrics_destroy_from_rrdinstance(ri); + string_freez(ri->id); + string_freez(ri->name); + string_freez(ri->title); + string_freez(ri->units); + string_freez(ri->family); + + ri->id = NULL; + ri->name = NULL; + ri->title = NULL; + ri->units = NULL; + ri->family = NULL; + ri->rc = NULL; + ri->rrdlabels = NULL; + ri->rrdmetrics = NULL; + ri->rrdset = NULL; +} + +static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext) { + RRDINSTANCE *ri = value; + + // link it to its parent + ri->rc = rrdcontext; + + ri->flags = ri->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics + + if(!ri->name) + ri->name = string_dup(ri->id); + + if(ri->rrdset) { + ri->rrdlabels = ri->rrdset->rrdlabels; + ri->flags &= ~RRD_FLAG_OWN_LABELS; // no need of atomics at the constructor + } + else { + ri->rrdlabels = rrdlabels_create(); + ri->flags |= RRD_FLAG_OWN_LABELS; // no need of atomics at the constructor + } + + if(ri->rrdset) { + if(unlikely(rrdset_flag_check(ri->rrdset, RRDSET_FLAG_HIDDEN))) + ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor + else + ri->flags &= ~RRD_FLAG_HIDDEN; // no need of atomics at the constructor + } + + rrdmetrics_create_in_rrdinstance(ri); + + // signal the react callback to do the job + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); +} + +static void rrdinstance_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = (RRDINSTANCE *)value; + + internal_error(ri->rrdset, "RRDINSTANCE: '%s' is freed but there is a RRDSET linked to it.", string2str(ri->id)); + + rrdinstance_free(ri); +} + +static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = (RRDINSTANCE *)old_value; + RRDINSTANCE *ri_new = (RRDINSTANCE *)new_value; + + internal_error(ri->id != ri_new->id, + "RRDINSTANCE: '%s' cannot change id to '%s'", + string2str(ri->id), string2str(ri_new->id)); + + if(!uuid_eq(ri->uuid, ri_new->uuid)) { +#ifdef NETDATA_INTERNAL_CHECKS + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid1); + uuid_unparse(ri_new->uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' of host '%s' changed UUID from '%s' to '%s'", + string2str(ri->id), rrdhost_hostname(ri->rc->rrdhost), uuid1, uuid2); +#endif + + uuid_copy(ri->uuid, ri_new->uuid); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->rrdset && ri_new->rrdset && ri->rrdset != ri_new->rrdset) { + ri->rrdset = ri_new->rrdset; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(ri->rrdset && !uuid_eq(ri->uuid, ri->rrdset->chart_uuid)) { + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid1); + uuid_unparse(ri->rrdset->chart_uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' is linked to RRDSET '%s' but they have different UUIDs. RRDINSTANCE has '%s', RRDSET has '%s'", string2str(ri->id), rrdset_id(ri->rrdset), uuid1, uuid2); + } +#endif + + if(ri->name != ri_new->name) { + STRING *old = ri->name; + ri->name = string_dup(ri_new->name); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->title != ri_new->title) { + STRING *old = ri->title; + ri->title = string_dup(ri_new->title); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->units != ri_new->units) { + STRING *old = ri->units; + ri->units = string_dup(ri_new->units); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->family != ri_new->family) { + STRING *old = ri->family; + ri->family = string_dup(ri_new->family); + string_freez(old); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->chart_type != ri_new->chart_type) { + ri->chart_type = ri_new->chart_type; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->priority != ri_new->priority) { + ri->priority = ri_new->priority; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->update_every_s != ri_new->update_every_s) { + ri->update_every_s = ri_new->update_every_s; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(ri->rrdset != ri_new->rrdset) { + ri->rrdset = ri_new->rrdset; + + if(ri->rrdset && rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { + RRDLABELS *old = ri->rrdlabels; + ri->rrdlabels = ri->rrdset->rrdlabels; + rrd_flag_clear(ri, RRD_FLAG_OWN_LABELS); + rrdlabels_destroy(old); + } + else if(!ri->rrdset && !rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { + ri->rrdlabels = rrdlabels_create(); + rrd_flag_set(ri, RRD_FLAG_OWN_LABELS); + } + } + + if(ri->rrdset) { + if(unlikely(rrdset_flag_check(ri->rrdset, RRDSET_FLAG_HIDDEN))) + rrd_flag_set(ri, RRD_FLAG_HIDDEN); + else + rrd_flag_clear(ri, RRD_FLAG_HIDDEN); + } + + rrd_flag_set(ri, ri_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on ri_new + + if(rrd_flag_is_collected(ri) && rrd_flag_is_archived(ri)) + rrd_flag_set_collected(ri); + + if(rrd_flag_is_updated(ri)) + rrd_flag_set(ri, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); + + // free the new one + rrdinstance_free(ri_new); + + // the react callback will continue from here + return rrd_flag_is_updated(ri); +} + +static void rrdinstance_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext __maybe_unused) { + RRDINSTANCE *ri = value; + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +void rrdinstances_create_in_rrdcontext(RRDCONTEXT *rc) { + if(unlikely(!rc || rc->rrdinstances)) return; + + rc->rrdinstances = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + &dictionary_stats_category_rrdcontext, sizeof(RRDINSTANCE)); + + dictionary_register_insert_callback(rc->rrdinstances, rrdinstance_insert_callback, rc); + dictionary_register_delete_callback(rc->rrdinstances, rrdinstance_delete_callback, rc); + dictionary_register_conflict_callback(rc->rrdinstances, rrdinstance_conflict_callback, rc); + dictionary_register_react_callback(rc->rrdinstances, rrdinstance_react_callback, rc); +} + +void rrdinstances_destroy_from_rrdcontext(RRDCONTEXT *rc) { + if(unlikely(!rc || !rc->rrdinstances)) return; + + dictionary_destroy(rc->rrdinstances); + rc->rrdinstances = NULL; +} + +void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function) { + RRDSET *st = ri->rrdset; + + if(likely(st)) { + if(unlikely((unsigned int) st->priority != ri->priority)) { + ri->priority = st->priority; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + if(unlikely(st->update_every != ri->update_every_s)) { + ri->update_every_s = st->update_every; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } + else if(unlikely(rrd_flag_is_collected(ri))) { + // there is no rrdset, but we have it as collected! + + rrd_flag_set_archived(ri); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); + } + + if(rrd_flag_is_updated(ri) || !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION)) { + rrd_flag_set_updated(ri->rc, RRD_FLAG_UPDATE_REASON_TRIGGERED); + rrdcontext_queue_for_post_processing(ri->rc, function, ri->flags); + } +} + +// ---------------------------------------------------------------------------- +// RRDINSTANCE HOOKS ON RRDSET + +inline void rrdinstance_from_rrdset(RRDSET *st) { + RRDCONTEXT trc = { + .id = string_dup(st->context), + .title = string_dup(st->title), + .units = string_dup(st->units), + .family = string_dup(st->family), + .priority = st->priority, + .chart_type = st->chart_type, + .flags = RRD_FLAG_NONE, // no need for atomics + .rrdhost = st->rrdhost, + }; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_set_and_acquire_item(st->rrdhost->rrdctx.contexts, string2str(trc.id), &trc, sizeof(trc)); + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE tri = { + .id = string_dup(st->id), + .name = string_dup(st->name), + .units = string_dup(st->units), + .family = string_dup(st->family), + .title = string_dup(st->title), + .chart_type = st->chart_type, + .priority = st->priority, + .update_every_s = st->update_every, + .flags = RRD_FLAG_NONE, // no need for atomics + .rrdset = st, + }; + uuid_copy(tri.uuid, st->chart_uuid); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_set_and_acquire_item(rc->rrdinstances, string2str(tri.id), &tri, sizeof(tri)); + + RRDCONTEXT_ACQUIRED *rca_old = st->rrdcontexts.rrdcontext; + RRDINSTANCE_ACQUIRED *ria_old = st->rrdcontexts.rrdinstance; + + st->rrdcontexts.rrdcontext = rca; + st->rrdcontexts.rrdinstance = ria; + + if(rca == rca_old) { + rrdcontext_release(rca_old); + rca_old = NULL; + } + + if(ria == ria_old) { + rrdinstance_release(ria_old); + ria_old = NULL; + } + + if(rca_old && ria_old) { + // Oops! The chart changed context! + + // RRDCONTEXT *rc_old = rrdcontext_acquired_value(rca_old); + RRDINSTANCE *ri_old = rrdinstance_acquired_value(ria_old); + + // migrate all dimensions to the new metrics + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (!rd->rrdcontexts.rrdmetric) continue; + + RRDMETRIC *rm_old = rrdmetric_acquired_value(rd->rrdcontexts.rrdmetric); + rrd_flags_replace(rm_old, RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + rm_old->rrddim = NULL; + rm_old->first_time_s = 0; + rm_old->last_time_s = 0; + + rrdmetric_release(rd->rrdcontexts.rrdmetric); + rd->rrdcontexts.rrdmetric = NULL; + + rrdmetric_from_rrddim(rd); + } + rrddim_foreach_done(rd); + + // mark the old instance, ready to be deleted + if(!rrd_flag_check(ri_old, RRD_FLAG_OWN_LABELS)) + ri_old->rrdlabels = rrdlabels_create(); + + rrd_flags_replace(ri_old, RRD_FLAG_OWN_LABELS|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + ri_old->rrdset = NULL; + ri_old->first_time_s = 0; + ri_old->last_time_s = 0; + + rrdinstance_trigger_updates(ri_old, __FUNCTION__ ); + rrdinstance_release(ria_old); + + /* + // trigger updates on the old context + if(!dictionary_entries(rc_old->rrdinstances) && !dictionary_stats_referenced_items(rc_old->rrdinstances)) { + rrdcontext_lock(rc_old); + rc_old->flags = ((rc_old->flags & RRD_FLAG_QUEUED)?RRD_FLAG_QUEUED:RRD_FLAG_NONE)|RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION; + rc_old->first_time_s = 0; + rc_old->last_time_s = 0; + rrdcontext_unlock(rc_old); + rrdcontext_trigger_updates(rc_old, __FUNCTION__ ); + } + else + rrdcontext_trigger_updates(rc_old, __FUNCTION__ ); + */ + + rrdcontext_release(rca_old); + rca_old = NULL; + ria_old = NULL; + } + + if(rca_old || ria_old) + fatal("RRDCONTEXT: cannot switch rrdcontext without switching rrdinstance too"); +} + +#define rrdset_get_rrdinstance(st) rrdset_get_rrdinstance_with_trace(st, __FUNCTION__) +static inline RRDINSTANCE *rrdset_get_rrdinstance_with_trace(RRDSET *st, const char *function) { + if(unlikely(!st->rrdcontexts.rrdinstance)) { + netdata_log_error("RRDINSTANCE: RRDSET '%s' is not linked to an RRDINSTANCE at %s()", rrdset_id(st), function); + return NULL; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(st->rrdcontexts.rrdinstance); + if(unlikely(!ri)) { + netdata_log_error("RRDINSTANCE: RRDSET '%s' lost its link to an RRDINSTANCE at %s()", rrdset_id(st), function); + return NULL; + } + + if(unlikely(ri->rrdset != st)) + fatal("RRDINSTANCE: '%s' is not linked to RRDSET '%s' at %s()", string2str(ri->id), rrdset_id(st), function); + + return ri; +} + +inline void rrdinstance_rrdset_is_freed(RRDSET *st) { + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + rrd_flag_set_archived(ri); + + if(!rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) { + ri->rrdlabels = rrdlabels_create(); + rrdlabels_copy(ri->rrdlabels, st->rrdlabels); + rrd_flag_set(ri, RRD_FLAG_OWN_LABELS); + } + + ri->rrdset = NULL; + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + + rrdinstance_release(st->rrdcontexts.rrdinstance); + st->rrdcontexts.rrdinstance = NULL; + + rrdcontext_release(st->rrdcontexts.rrdcontext); + st->rrdcontexts.rrdcontext = NULL; + st->rrdcontexts.collected = false; +} + +inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) { + st->rrdcontexts.collected = false; + + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +inline void rrdinstance_updated_rrdset_name(RRDSET *st) { + st->rrdcontexts.collected = false; + + // the chart may not be initialized when this is called + if(unlikely(!st->rrdcontexts.rrdinstance)) return; + + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + if(st->name != ri->name) { + STRING *old = ri->name; + ri->name = string_dup(st->name); + string_freez(old); + + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + } +} + +inline void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, RRDSET *st) { + if(unlikely(ri->rrdset != st)) + fatal("RRDCONTEXT: instance '%s' is not linked to chart '%s' on host '%s'", + string2str(ri->id), rrdset_id(st), rrdhost_hostname(st->rrdhost)); + + bool st_is_hidden = rrdset_flag_check(st, RRDSET_FLAG_HIDDEN); + bool ri_is_hidden = rrd_flag_check(ri, RRD_FLAG_HIDDEN); + + if(unlikely(st_is_hidden != ri_is_hidden)) { + if (unlikely(st_is_hidden && !ri_is_hidden)) + rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + + else if (unlikely(!st_is_hidden && ri_is_hidden)) { + rrd_flag_clear(ri, RRD_FLAG_HIDDEN); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } +} + +inline void rrdinstance_updated_rrdset_flags(RRDSET *st) { + st->rrdcontexts.collected = false; + + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) return; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) + rrd_flag_set_archived(ri); + + rrdinstance_updated_rrdset_flags_no_action(ri, st); + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + +inline void rrdinstance_collected_rrdset(RRDSET *st) { + if(st->rrdcontexts.collected) + return; + + st->rrdcontexts.collected = true; + + RRDINSTANCE *ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) { + rrdcontext_updated_rrdset(st); + ri = rrdset_get_rrdinstance(st); + if(unlikely(!ri)) + return; + } + + rrdinstance_updated_rrdset_flags_no_action(ri, st); + + if(unlikely(ri->internal.collected_metrics_count && !rrd_flag_is_collected(ri))) + rrd_flag_set_collected(ri); + + // we use this variable to detect BEGIN/END without SET + ri->internal.collected_metrics_count = 0; + + rrdinstance_trigger_updates(ri, __FUNCTION__ ); +} + diff --git a/src/database/contexts/internal.h b/src/database/contexts/internal.h new file mode 100644 index 000000000..270c59649 --- /dev/null +++ b/src/database/contexts/internal.h @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCONTEXT_INTERNAL_H +#define NETDATA_RRDCONTEXT_INTERNAL_H 1 + +#include "rrdcontext.h" +#include "../sqlite/sqlite_context.h" +#include "../../aclk/schema-wrappers/context.h" +#include "../../aclk/aclk_contexts_api.h" +#include "../../aclk/aclk.h" +#include "../storage_engine.h" + +#define MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST 5000 +#define FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS 120 +#define RRDCONTEXT_WORKER_THREAD_HEARTBEAT_USEC (1000 * USEC_PER_MS) +#define RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY 10 + +#define LOG_TRANSITIONS false + +#define WORKER_JOB_HOSTS 1 +#define WORKER_JOB_CHECK 2 +#define WORKER_JOB_SEND 3 +#define WORKER_JOB_DEQUEUE 4 +#define WORKER_JOB_RETENTION 5 +#define WORKER_JOB_QUEUED 6 +#define WORKER_JOB_CLEANUP 7 +#define WORKER_JOB_CLEANUP_DELETE 8 +#define WORKER_JOB_PP_METRIC 9 // post-processing metrics +#define WORKER_JOB_PP_INSTANCE 10 // post-processing instances +#define WORKER_JOB_PP_CONTEXT 11 // post-processing contexts +#define WORKER_JOB_HUB_QUEUE_SIZE 12 +#define WORKER_JOB_PP_QUEUE_SIZE 13 + + +typedef enum __attribute__ ((__packed__)) { + RRD_FLAG_NONE = 0, + RRD_FLAG_DELETED = (1 << 0), // this is a deleted object (metrics, instances, contexts) + RRD_FLAG_COLLECTED = (1 << 1), // this object is currently being collected + RRD_FLAG_UPDATED = (1 << 2), // this object has updates to propagate + RRD_FLAG_ARCHIVED = (1 << 3), // this object is not currently being collected + RRD_FLAG_OWN_LABELS = (1 << 4), // this instance has its own labels - not linked to an RRDSET + RRD_FLAG_LIVE_RETENTION = (1 << 5), // we have got live retention from the database + RRD_FLAG_QUEUED_FOR_HUB = (1 << 6), // this context is currently queued to be dispatched to hub + RRD_FLAG_QUEUED_FOR_PP = (1 << 7), // this context is currently queued to be post-processed + RRD_FLAG_HIDDEN = (1 << 8), // don't expose this to the hub or the API + + RRD_FLAG_UPDATE_REASON_TRIGGERED = (1 << 9), // the update was triggered by the child object + RRD_FLAG_UPDATE_REASON_LOAD_SQL = (1 << 10), // this object has just been loaded from SQL + RRD_FLAG_UPDATE_REASON_NEW_OBJECT = (1 << 11), // this object has just been created + RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT = (1 << 12), // we received an update on this object + RRD_FLAG_UPDATE_REASON_CHANGED_LINKING = (1 << 13), // an instance or a metric switched RRDSET or RRDDIM + RRD_FLAG_UPDATE_REASON_CHANGED_METADATA = (1 << 14), // this context or instance changed uuid, name, units, title, family, chart type, priority, update every, rrd changed flags + RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 15), // this object has no retention + RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 16), // this object changed its oldest time in the db + RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 17), // this object change its latest time in the db + RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 18), // this object has stopped being collected + RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 19), // this object has started being collected + RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 20), // this context belongs to a host that just disconnected + RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 21), // this context is not used anymore + RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 22), // this context changed because of a db rotation + + RRD_FLAG_MERGED_COLLECTED_RI_TO_RC = (1 << 29), + + // action to perform on an object + RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION = (1 << 30), // this object has to update its retention from the db +} RRD_FLAGS; + +struct rrdcontext_reason { + RRD_FLAGS flag; + const char *name; + usec_t delay_ut; +}; + +extern struct rrdcontext_reason rrdcontext_reasons[]; + +#define RRD_FLAG_ALL_UPDATE_REASONS ( \ + RRD_FLAG_UPDATE_REASON_TRIGGERED \ + |RRD_FLAG_UPDATE_REASON_LOAD_SQL \ + |RRD_FLAG_UPDATE_REASON_NEW_OBJECT \ + |RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT \ + |RRD_FLAG_UPDATE_REASON_CHANGED_LINKING \ + |RRD_FLAG_UPDATE_REASON_CHANGED_METADATA \ + |RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + |RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T \ + |RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T \ + |RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED \ + |RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED \ + |RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ + |RRD_FLAG_UPDATE_REASON_DB_ROTATION \ + |RRD_FLAG_UPDATE_REASON_UNUSED \ + ) + +#define RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS ( \ + RRD_FLAG_ARCHIVED \ + |RRD_FLAG_HIDDEN \ + |RRD_FLAG_ALL_UPDATE_REASONS \ + ) + +#define RRD_FLAGS_REQUIRED_FOR_DELETIONS ( \ + RRD_FLAG_DELETED \ + |RRD_FLAG_LIVE_RETENTION \ +) + +#define RRD_FLAGS_PREVENTING_DELETIONS ( \ + RRD_FLAG_QUEUED_FOR_HUB \ + |RRD_FLAG_COLLECTED \ + |RRD_FLAG_QUEUED_FOR_PP \ +) + +// get all the flags of an object +#define rrd_flags_get(obj) __atomic_load_n(&((obj)->flags), __ATOMIC_SEQ_CST) + +// check if ANY of the given flags (bits) is set +#define rrd_flag_check(obj, flag) (rrd_flags_get(obj) & (flag)) + +// check if ALL the given flags (bits) are set +#define rrd_flag_check_all(obj, flag) (rrd_flag_check(obj, flag) == (flag)) + +// set one or more flags (bits) +#define rrd_flag_set(obj, flag) __atomic_or_fetch(&((obj)->flags), flag, __ATOMIC_SEQ_CST) + +// clear one or more flags (bits) +#define rrd_flag_clear(obj, flag) __atomic_and_fetch(&((obj)->flags), ~(flag), __ATOMIC_SEQ_CST) + +// replace the flags of an object, with the supplied ones +#define rrd_flags_replace(obj, all_flags) __atomic_store_n(&((obj)->flags), all_flags, __ATOMIC_SEQ_CST) + +static inline void +rrd_flag_add_remove_atomic(RRD_FLAGS *flags, RRD_FLAGS check, RRD_FLAGS conditionally_add, RRD_FLAGS always_remove) { + RRD_FLAGS expected, desired; + + do { + expected = *flags; + + desired = expected; + desired &= ~(always_remove); + + if(!(expected & check)) + desired |= (check | conditionally_add); + + } while(!__atomic_compare_exchange_n(flags, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); +} + +#define rrd_flag_set_collected(obj) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_COLLECTED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED | RRD_FLAG_UPDATED \ + \ + /* always remove these flags */ \ + , RRD_FLAG_ARCHIVED \ + | RRD_FLAG_DELETED \ + | RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED \ + | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + | RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ + ) + +#define rrd_flag_set_archived(obj) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_ARCHIVED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED | RRD_FLAG_UPDATED \ + \ + /* always remove these flags */ \ + , RRD_FLAG_COLLECTED \ + | RRD_FLAG_DELETED \ + | RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED \ + | RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ + ) + +#define rrd_flag_set_deleted(obj, reason) \ + rrd_flag_add_remove_atomic(&((obj)->flags) \ + /* check this flag */ \ + , RRD_FLAG_DELETED \ + \ + /* add these flags together with the above, if the above is not already set */ \ + , RRD_FLAG_UPDATE_REASON_ZERO_RETENTION | RRD_FLAG_UPDATED | (reason) \ + \ + /* always remove these flags */ \ + , RRD_FLAG_ARCHIVED \ + | RRD_FLAG_COLLECTED \ + ) + +#define rrd_flag_is_collected(obj) rrd_flag_check(obj, RRD_FLAG_COLLECTED) +#define rrd_flag_is_archived(obj) rrd_flag_check(obj, RRD_FLAG_ARCHIVED) +#define rrd_flag_is_deleted(obj) rrd_flag_check(obj, RRD_FLAG_DELETED) +#define rrd_flag_is_updated(obj) rrd_flag_check(obj, RRD_FLAG_UPDATED) + +// mark an object as updated, providing reasons (additional bits) +#define rrd_flag_set_updated(obj, reason) rrd_flag_set(obj, RRD_FLAG_UPDATED | (reason)) + +// clear an object as being updated, clearing also all the reasons +#define rrd_flag_unset_updated(obj) rrd_flag_clear(obj, RRD_FLAG_UPDATED | RRD_FLAG_ALL_UPDATE_REASONS) + + +typedef struct rrdmetric { + nd_uuid_t uuid; + + STRING *id; + STRING *name; + + RRDDIM *rrddim; + + time_t first_time_s; + time_t last_time_s; + RRD_FLAGS flags; + + struct rrdinstance *ri; +} RRDMETRIC; + +typedef struct rrdinstance { + nd_uuid_t uuid; + + STRING *id; + STRING *name; + STRING *title; + STRING *units; + STRING *family; + uint32_t priority:24; + RRDSET_TYPE chart_type; + + RRD_FLAGS flags; // flags related to this instance + time_t first_time_s; + time_t last_time_s; + + time_t update_every_s; // data collection frequency + RRDSET *rrdset; // pointer to RRDSET when collected, or NULL + + RRDLABELS *rrdlabels; // linked to RRDSET->chart_labels or own version + + struct rrdcontext *rc; + DICTIONARY *rrdmetrics; + + struct { + uint32_t collected_metrics_count; // a temporary variable to detect BEGIN/END without SET + // don't use it for other purposes + // it goes up and then resets to zero, on every iteration + } internal; +} RRDINSTANCE; + +typedef struct rrdcontext { + uint64_t version; + + STRING *id; + STRING *title; + STRING *units; + STRING *family; + uint32_t priority; + RRDSET_TYPE chart_type; + + SPINLOCK spinlock; + + RRD_FLAGS flags; + time_t first_time_s; + time_t last_time_s; + + VERSIONED_CONTEXT_DATA hub; + + DICTIONARY *rrdinstances; + RRDHOST *rrdhost; + + struct { + RRD_FLAGS queued_flags; // the last flags that triggered the post-processing + usec_t queued_ut; // the last time this was queued + usec_t dequeued_ut; // the last time we sent (or deduplicated) this context + size_t executions; // how many times this context has been processed + } pp; + + struct { + RRD_FLAGS queued_flags; // the last flags that triggered the queueing + usec_t queued_ut; // the last time this was queued + usec_t delay_calc_ut; // the last time we calculated the scheduled_dispatched_ut + usec_t scheduled_dispatch_ut; // the time it was/is scheduled to be sent + usec_t dequeued_ut; // the last time we sent (or deduplicated) this context + size_t dispatches; // the number of times this has been dispatched to hub + } queue; + + struct { + uint32_t metrics; // the number of metrics in this context + } stats; +} RRDCONTEXT; + + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDMETRIC + +bool rrdmetric_update_retention(RRDMETRIC *rm); + +static inline RRDMETRIC *rrdmetric_acquired_value(RRDMETRIC_ACQUIRED *rma) { + return dictionary_acquired_item_value((DICTIONARY_ITEM *)rma); +} + +static inline RRDMETRIC_ACQUIRED *rrdmetric_acquired_dup(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return (RRDMETRIC_ACQUIRED *)dictionary_acquired_item_dup(rm->ri->rrdmetrics, (DICTIONARY_ITEM *)rma); +} + +static inline void rrdmetric_release(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + dictionary_acquired_item_release(rm->ri->rrdmetrics, (DICTIONARY_ITEM *)rma); +} + +void rrdmetric_rrddim_is_freed(RRDDIM *rd); +void rrdmetric_updated_rrddim_flags(RRDDIM *rd); +void rrdmetric_collected_rrddim(RRDDIM *rd); + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDINSTANCE + +static inline RRDINSTANCE *rrdinstance_acquired_value(RRDINSTANCE_ACQUIRED *ria) { + return dictionary_acquired_item_value((DICTIONARY_ITEM *)ria); +} + +static inline RRDINSTANCE_ACQUIRED *rrdinstance_acquired_dup(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return (RRDINSTANCE_ACQUIRED *)dictionary_acquired_item_dup(ri->rc->rrdinstances, (DICTIONARY_ITEM *)ria); +} + +static inline void rrdinstance_release(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + dictionary_acquired_item_release(ri->rc->rrdinstances, (DICTIONARY_ITEM *)ria); +} + +void rrdinstance_from_rrdset(RRDSET *st); +void rrdinstance_rrdset_is_freed(RRDSET *st); +void rrdinstance_rrdset_has_updated_retention(RRDSET *st); +void rrdinstance_updated_rrdset_name(RRDSET *st); +void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, RRDSET *st); +void rrdinstance_updated_rrdset_flags(RRDSET *st); +void rrdinstance_collected_rrdset(RRDSET *st); + +void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function, RRD_FLAGS flags); + +// ---------------------------------------------------------------------------- +// helper one-liners for RRDCONTEXT + +static inline RRDCONTEXT *rrdcontext_acquired_value(RRDCONTEXT_ACQUIRED *rca) { + return dictionary_acquired_item_value((DICTIONARY_ITEM *)rca); +} + +static inline RRDCONTEXT_ACQUIRED *rrdcontext_acquired_dup(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return (RRDCONTEXT_ACQUIRED *)dictionary_acquired_item_dup(rc->rrdhost->rrdctx.contexts, (DICTIONARY_ITEM *)rca); +} + +static inline void rrdcontext_release(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + dictionary_acquired_item_release(rc->rrdhost->rrdctx.contexts, (DICTIONARY_ITEM *)rca); +} + +// ---------------------------------------------------------------------------- +// Forward definitions + +void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs); +void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, bool worker_jobs); + +#define rrdcontext_lock(rc) spinlock_lock(&((rc)->spinlock)) +#define rrdcontext_unlock(rc) spinlock_unlock(&((rc)->spinlock)) + +void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function); +void rrdcontext_trigger_updates(RRDCONTEXT *rc, const char *function); + +void rrdinstances_create_in_rrdcontext(RRDCONTEXT *rc); +void rrdinstances_destroy_from_rrdcontext(RRDCONTEXT *rc); + +void rrdmetrics_destroy_from_rrdinstance(RRDINSTANCE *ri); +void rrdmetrics_create_in_rrdinstance(RRDINSTANCE *ri); + +void rrdmetric_from_rrddim(RRDDIM *rd); + +void rrd_reasons_to_buffer_json_array_items(RRD_FLAGS flags, BUFFER *wb); + +#define rrdcontext_version_hash(host) rrdcontext_version_hash_with_callback(host, NULL, false, NULL) +uint64_t rrdcontext_version_hash_with_callback( + RRDHOST *host, + void (*callback)(RRDCONTEXT *, bool, void *), + bool snapshot, + void *bundle); + +void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused); + +void rrdcontext_update_from_collected_rrdinstance(RRDINSTANCE *ri); + +#endif //NETDATA_RRDCONTEXT_INTERNAL_H diff --git a/src/database/contexts/metric.c b/src/database/contexts/metric.c new file mode 100644 index 000000000..be29f33ea --- /dev/null +++ b/src/database/contexts/metric.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +static void rrdmetric_trigger_updates(RRDMETRIC *rm, const char *function); + +inline const char *rrdmetric_acquired_id(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string2str(rm->id); +} + +inline const char *rrdmetric_acquired_name(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string2str(rm->name); +} + +inline bool rrdmetric_acquired_has_name(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return (rm->name && rm->name != rm->id); +} + +inline STRING *rrdmetric_acquired_id_dup(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string_dup(rm->id); +} + +inline STRING *rrdmetric_acquired_name_dup(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return string_dup(rm->name); +} + +inline NETDATA_DOUBLE rrdmetric_acquired_last_stored_value(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + if(rm->rrddim) + return rm->rrddim->collector.last_stored_value; + + return NAN; +} + +inline bool rrdmetric_acquired_belongs_to_instance(RRDMETRIC_ACQUIRED *rma, RRDINSTANCE_ACQUIRED *ria) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return rm->ri == ri; +} + +inline time_t rrdmetric_acquired_first_entry(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + return rm->first_time_s; +} + +inline time_t rrdmetric_acquired_last_entry(RRDMETRIC_ACQUIRED *rma) { + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + if(rrd_flag_check(rm, RRD_FLAG_COLLECTED)) + return 0; + + return rm->last_time_s; +} + +// ---------------------------------------------------------------------------- +// RRDMETRIC + +// free the contents of RRDMETRIC. +// RRDMETRIC itself is managed by DICTIONARY - no need to free it here. +static void rrdmetric_free(RRDMETRIC *rm) { + string_freez(rm->id); + string_freez(rm->name); + + rm->id = NULL; + rm->name = NULL; + rm->ri = NULL; +} + +// called when this rrdmetric is inserted to the rrdmetrics dictionary of a rrdinstance +// the constructor of the rrdmetric object +static void rrdmetric_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance) { + RRDMETRIC *rm = value; + + // link it to its parent + rm->ri = rrdinstance; + + // remove flags that we need to figure out at runtime + rm->flags = rm->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS; // no need for atomics + + // signal the react callback to do the job + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_NEW_OBJECT); +} + +// called when this rrdmetric is deleted from the rrdmetrics dictionary of a rrdinstance +// the destructor of the rrdmetric object +static void rrdmetric_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = value; + + internal_error(rm->rrddim, "RRDMETRIC: '%s' is freed but there is a RRDDIM linked to it.", string2str(rm->id)); + + // free the resources + rrdmetric_free(rm); +} + +// called when the same rrdmetric is inserted again to the rrdmetrics dictionary of a rrdinstance +// while this is called, the dictionary is write locked, but there may be other users of the object +static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = old_value; + RRDMETRIC *rm_new = new_value; + + internal_error(rm->id != rm_new->id, + "RRDMETRIC: '%s' cannot change id to '%s'", + string2str(rm->id), string2str(rm_new->id)); + + if(!uuid_eq(rm->uuid, rm_new->uuid)) { +#ifdef NETDATA_INTERNAL_CHECKS + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(rm->uuid, uuid1); + uuid_unparse(rm_new->uuid, uuid2); + + time_t old_first_time_s = 0; + time_t old_last_time_s = 0; + if(rrdmetric_update_retention(rm)) { + old_first_time_s = rm->first_time_s; + old_last_time_s = rm->last_time_s; + } + + uuid_copy(rm->uuid, rm_new->uuid); + + time_t new_first_time_s = 0; + time_t new_last_time_s = 0; + if(rrdmetric_update_retention(rm)) { + new_first_time_s = rm->first_time_s; + new_last_time_s = rm->last_time_s; + } + + internal_error(true, + "RRDMETRIC: '%s' of instance '%s' of host '%s' changed UUID from '%s' (retention %ld to %ld, %ld secs) to '%s' (retention %ld to %ld, %ld secs)" + , string2str(rm->id) + , string2str(rm->ri->id) + , rrdhost_hostname(rm->ri->rc->rrdhost) + , uuid1, old_first_time_s, old_last_time_s, old_last_time_s - old_first_time_s + , uuid2, new_first_time_s, new_last_time_s, new_last_time_s - new_first_time_s + ); +#else + uuid_copy(rm->uuid, rm_new->uuid); +#endif + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(rm->rrddim && rm_new->rrddim && rm->rrddim != rm_new->rrddim) { + rm->rrddim = rm_new->rrddim; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(rm->rrddim && !uuid_eq(rm->uuid, rm->rrddim->metric_uuid)) { + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(rm->uuid, uuid1); + uuid_unparse(rm_new->uuid, uuid2); + internal_error(true, "RRDMETRIC: '%s' is linked to RRDDIM '%s' but they have different UUIDs. RRDMETRIC has '%s', RRDDIM has '%s'", string2str(rm->id), rrddim_id(rm->rrddim), uuid1, uuid2); + } +#endif + + if(rm->rrddim != rm_new->rrddim) + rm->rrddim = rm_new->rrddim; + + if(rm->name != rm_new->name) { + STRING *old = rm->name; + rm->name = string_dup(rm_new->name); + string_freez(old); + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + + if(!rm->first_time_s || (rm_new->first_time_s && rm_new->first_time_s < rm->first_time_s)) { + rm->first_time_s = rm_new->first_time_s; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(!rm->last_time_s || (rm_new->last_time_s && rm_new->last_time_s > rm->last_time_s)) { + rm->last_time_s = rm_new->last_time_s; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set(rm, rm_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no needs for atomics on rm_new + + if(rrd_flag_is_collected(rm) && rrd_flag_is_archived(rm)) + rrd_flag_set_collected(rm); + + if(rrd_flag_check(rm, RRD_FLAG_UPDATED)) + rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT); + + rrdmetric_free(rm_new); + + // the react callback will continue from here + return rrd_flag_is_updated(rm); +} + +// this is called after the insert or the conflict callbacks, +// but the dictionary is now unlocked +static void rrdmetric_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdinstance __maybe_unused) { + RRDMETRIC *rm = value; + rrdmetric_trigger_updates(rm, __FUNCTION__ ); +} + +void rrdmetrics_create_in_rrdinstance(RRDINSTANCE *ri) { + if(unlikely(!ri)) return; + if(likely(ri->rrdmetrics)) return; + + ri->rrdmetrics = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + &dictionary_stats_category_rrdcontext, sizeof(RRDMETRIC)); + + dictionary_register_insert_callback(ri->rrdmetrics, rrdmetric_insert_callback, ri); + dictionary_register_delete_callback(ri->rrdmetrics, rrdmetric_delete_callback, ri); + dictionary_register_conflict_callback(ri->rrdmetrics, rrdmetric_conflict_callback, ri); + dictionary_register_react_callback(ri->rrdmetrics, rrdmetric_react_callback, ri); +} + +void rrdmetrics_destroy_from_rrdinstance(RRDINSTANCE *ri) { + if(unlikely(!ri || !ri->rrdmetrics)) return; + dictionary_destroy(ri->rrdmetrics); + ri->rrdmetrics = NULL; +} + +// trigger post-processing of the rrdmetric, escalating changes to the rrdinstance it belongs +static void rrdmetric_trigger_updates(RRDMETRIC *rm, const char *function) { + if(unlikely(rrd_flag_is_collected(rm)) && (!rm->rrddim || rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD))) + rrd_flag_set_archived(rm); + + if(rrd_flag_is_updated(rm) || !rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION)) { + rrd_flag_set_updated(rm->ri, RRD_FLAG_UPDATE_REASON_TRIGGERED); + rrdcontext_queue_for_post_processing(rm->ri->rc, function, rm->flags); + } +} + +// ---------------------------------------------------------------------------- +// RRDMETRIC HOOKS ON RRDDIM + +void rrdmetric_from_rrddim(RRDDIM *rd) { + if(unlikely(!rd->rrdset)) + fatal("RRDMETRIC: rrddim '%s' does not have a rrdset.", rrddim_id(rd)); + + if(unlikely(!rd->rrdset->rrdhost)) + fatal("RRDMETRIC: rrdset '%s' does not have a rrdhost", rrdset_id(rd->rrdset)); + + if(unlikely(!rd->rrdset->rrdcontexts.rrdinstance)) + fatal("RRDMETRIC: rrdset '%s' does not have a rrdinstance", rrdset_id(rd->rrdset)); + + RRDINSTANCE *ri = rrdinstance_acquired_value(rd->rrdset->rrdcontexts.rrdinstance); + + RRDMETRIC trm = { + .id = string_dup(rd->id), + .name = string_dup(rd->name), + .flags = RRD_FLAG_NONE, // no need for atomics + .rrddim = rd, + }; + uuid_copy(trm.uuid, rd->metric_uuid); + + RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)dictionary_set_and_acquire_item(ri->rrdmetrics, string2str(trm.id), &trm, sizeof(trm)); + + if(rd->rrdcontexts.rrdmetric) + rrdmetric_release(rd->rrdcontexts.rrdmetric); + + rd->rrdcontexts.rrdmetric = rma; + rd->rrdcontexts.collected = false; +} + +#define rrddim_get_rrdmetric(rd) rrddim_get_rrdmetric_with_trace(rd, __FUNCTION__) +static inline RRDMETRIC *rrddim_get_rrdmetric_with_trace(RRDDIM *rd, const char *function) { + if(unlikely(!rd->rrdcontexts.rrdmetric)) { + netdata_log_error("RRDMETRIC: RRDDIM '%s' is not linked to an RRDMETRIC at %s()", rrddim_id(rd), function); + return NULL; + } + + RRDMETRIC *rm = rrdmetric_acquired_value(rd->rrdcontexts.rrdmetric); + if(unlikely(!rm)) { + netdata_log_error("RRDMETRIC: RRDDIM '%s' lost the link to its RRDMETRIC at %s()", rrddim_id(rd), function); + return NULL; + } + + if(unlikely(rm->rrddim != rd)) + fatal("RRDMETRIC: '%s' is not linked to RRDDIM '%s' at %s()", string2str(rm->id), rrddim_id(rd), function); + + return rm; +} + +inline void rrdmetric_rrddim_is_freed(RRDDIM *rd) { + RRDMETRIC *rm = rrddim_get_rrdmetric(rd); + if(unlikely(!rm)) return; + + if(unlikely(rrd_flag_is_collected(rm))) + rrd_flag_set_archived(rm); + + rm->rrddim = NULL; + rrdmetric_trigger_updates(rm, __FUNCTION__ ); + rrdmetric_release(rd->rrdcontexts.rrdmetric); + rd->rrdcontexts.rrdmetric = NULL; + rd->rrdcontexts.collected = false; +} + +inline void rrdmetric_updated_rrddim_flags(RRDDIM *rd) { + rd->rrdcontexts.collected = false; + + RRDMETRIC *rm = rrddim_get_rrdmetric(rd); + if(unlikely(!rm)) return; + + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED|RRDDIM_FLAG_OBSOLETE))) { + if(unlikely(rrd_flag_is_collected(rm))) + rrd_flag_set_archived(rm); + } + + rrdmetric_trigger_updates(rm, __FUNCTION__ ); +} + +inline void rrdmetric_collected_rrddim(RRDDIM *rd) { + if(rd->rrdcontexts.collected) + return; + + rd->rrdcontexts.collected = true; + + RRDMETRIC *rm = rrddim_get_rrdmetric(rd); + if(unlikely(!rm)) return; + + if(unlikely(!rrd_flag_is_collected(rm))) + rrd_flag_set_collected(rm); + + // we use this variable to detect BEGIN/END without SET + rm->ri->internal.collected_metrics_count++; + + rrdmetric_trigger_updates(rm, __FUNCTION__ ); +} diff --git a/src/database/contexts/query_scope.c b/src/database/contexts/query_scope.c new file mode 100644 index 000000000..f3bcd0b3f --- /dev/null +++ b/src/database/contexts/query_scope.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +ssize_t query_scope_foreach_host(SIMPLE_PATTERN *scope_hosts_sp, SIMPLE_PATTERN *hosts_sp, + foreach_host_cb_t cb, void *data, + struct query_versions *versions, + char *host_node_id_str) { + char uuid[UUID_STR_LEN]; + if(!host_node_id_str) host_node_id_str = uuid; + host_node_id_str[0] = '\0'; + + RRDHOST *host; + ssize_t added = 0; + uint64_t v_hash = 0; + uint64_t h_hash = 0; + uint64_t a_hash = 0; + uint64_t t_hash = 0; + + dfe_start_read(rrdhost_root_index, host) { + if(host->node_id) + uuid_unparse_lower(*host->node_id, host_node_id_str); + else + host_node_id_str[0] = '\0'; + + SIMPLE_PATTERN_RESULT match = SP_MATCHED_POSITIVE; + if(scope_hosts_sp) { + match = simple_pattern_matches_string_extract(scope_hosts_sp, host->hostname, NULL, 0); + if(match == SP_NOT_MATCHED) { + match = simple_pattern_matches_extract(scope_hosts_sp, host->machine_guid, NULL, 0); + if(match == SP_NOT_MATCHED && *host_node_id_str) + match = simple_pattern_matches_extract(scope_hosts_sp, host_node_id_str, NULL, 0); + } + } + + if(match != SP_MATCHED_POSITIVE) + continue; + + dfe_unlock(host); + + if(hosts_sp) { + match = simple_pattern_matches_string_extract(hosts_sp, host->hostname, NULL, 0); + if(match == SP_NOT_MATCHED) { + match = simple_pattern_matches_extract(hosts_sp, host->machine_guid, NULL, 0); + if(match == SP_NOT_MATCHED && *host_node_id_str) + match = simple_pattern_matches_extract(hosts_sp, host_node_id_str, NULL, 0); + } + } + + bool queryable_host = (match == SP_MATCHED_POSITIVE); + + v_hash += dictionary_version(host->rrdctx.contexts); + h_hash += dictionary_version(host->rrdctx.hub_queue); + a_hash += dictionary_version(host->rrdcalc_root_index); + t_hash += __atomic_load_n(&host->health_transitions, __ATOMIC_RELAXED); + ssize_t ret = cb(data, host, queryable_host); + if(ret < 0) { + added = ret; + break; + } + added += ret; + } + dfe_done(host); + + if(versions) { + versions->contexts_hard_hash = v_hash; + versions->contexts_soft_hash = h_hash; + versions->alerts_hard_hash = a_hash; + versions->alerts_soft_hash = t_hash; + } + + return added; +} + +ssize_t query_scope_foreach_context(RRDHOST *host, const char *scope_contexts, SIMPLE_PATTERN *scope_contexts_sp, + SIMPLE_PATTERN *contexts_sp, foreach_context_cb_t cb, bool queryable_host, void *data) { + if(unlikely(!host->rrdctx.contexts)) + return 0; + + ssize_t added = 0; + + RRDCONTEXT_ACQUIRED *rca = NULL; + + if(scope_contexts) + rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item(host->rrdctx.contexts, scope_contexts); + + if(likely(rca)) { + // we found it! + + bool queryable_context = queryable_host; + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(queryable_context && contexts_sp && !simple_pattern_matches_string(contexts_sp, rc->id)) + queryable_context = false; + + added = cb(data, rca, queryable_context); + + rrdcontext_release(rca); + } + else { + // Probably it is a pattern, we need to search for it... + RRDCONTEXT *rc; + dfe_start_read(host->rrdctx.contexts, rc) { + if(scope_contexts_sp && !simple_pattern_matches_string(scope_contexts_sp, rc->id)) + continue; + + dfe_unlock(rc); + + bool queryable_context = queryable_host; + if(queryable_context && contexts_sp && !simple_pattern_matches_string(contexts_sp, rc->id)) + queryable_context = false; + + ssize_t ret = cb(data, (RRDCONTEXT_ACQUIRED *)rc_dfe.item, queryable_context); + + if(ret < 0) { + added = ret; + break; + } + + added += ret; + } + dfe_done(rc); + } + + return added; +} + diff --git a/src/database/contexts/query_target.c b/src/database/contexts/query_target.c new file mode 100644 index 000000000..29a9c3e59 --- /dev/null +++ b/src/database/contexts/query_target.c @@ -0,0 +1,1233 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +#define QUERY_TARGET_MAX_REALLOC_INCREASE 500 +#define query_target_realloc_size(size, start) \ + (size) ? ((size) < QUERY_TARGET_MAX_REALLOC_INCREASE ? (size) * 2 : (size) + QUERY_TARGET_MAX_REALLOC_INCREASE) : (start) + +static void query_metric_release(QUERY_TARGET *qt, QUERY_METRIC *qm); +static void query_dimension_release(QUERY_DIMENSION *qd); +static void query_instance_release(QUERY_INSTANCE *qi); +static void query_context_release(QUERY_CONTEXT *qc); +static void query_node_release(QUERY_NODE *qn); + +static __thread QUERY_TARGET *thread_qt = NULL; +static struct { + struct { + SPINLOCK spinlock; + size_t count; + QUERY_TARGET *base; + } available; + + struct { + SPINLOCK spinlock; + size_t count; + QUERY_TARGET *base; + } used; +} query_target_base = { + .available = { + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .base = NULL, + .count = 0, + }, + .used = { + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .base = NULL, + .count = 0, + }, +}; + +static void query_target_destroy(QUERY_TARGET *qt) { + __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->query.size * sizeof(*qt->query.array), __ATOMIC_RELAXED); + freez(qt->query.array); + + __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->dimensions.size * sizeof(*qt->dimensions.array), __ATOMIC_RELAXED); + freez(qt->dimensions.array); + + __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->instances.size * sizeof(*qt->instances.array), __ATOMIC_RELAXED); + freez(qt->instances.array); + + __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->contexts.size * sizeof(*qt->contexts.array), __ATOMIC_RELAXED); + freez(qt->contexts.array); + + __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->nodes.size * sizeof(*qt->nodes.array), __ATOMIC_RELAXED); + freez(qt->nodes.array); + + freez(qt); +} + +void query_target_release(QUERY_TARGET *qt) { + if(unlikely(!qt)) return; + + internal_fatal(!qt->internal.used, "QUERY TARGET: qt to be released is not used"); + + simple_pattern_free(qt->nodes.scope_pattern); + qt->nodes.scope_pattern = NULL; + + simple_pattern_free(qt->nodes.pattern); + qt->nodes.pattern = NULL; + + simple_pattern_free(qt->contexts.scope_pattern); + qt->contexts.scope_pattern = NULL; + + simple_pattern_free(qt->contexts.pattern); + qt->contexts.pattern = NULL; + + simple_pattern_free(qt->instances.pattern); + qt->instances.pattern = NULL; + + simple_pattern_free(qt->instances.chart_label_key_pattern); + qt->instances.chart_label_key_pattern = NULL; + + simple_pattern_free(qt->instances.labels_pattern); + qt->instances.labels_pattern = NULL; + simple_pattern_free(qt->query.pattern); + qt->query.pattern = NULL; + + // release the query + for(size_t i = 0, used = qt->query.used; i < used ;i++) { + QUERY_METRIC *qm = query_metric(qt, i); + query_metric_release(qt, qm); + } + qt->query.used = 0; + + // release the dimensions + for(size_t i = 0, used = qt->dimensions.used; i < used ; i++) { + QUERY_DIMENSION *qd = query_dimension(qt, i); + query_dimension_release(qd); + } + qt->dimensions.used = 0; + + // release the instances + for(size_t i = 0, used = qt->instances.used; i < used ;i++) { + QUERY_INSTANCE *qi = query_instance(qt, i); + query_instance_release(qi); + } + qt->instances.used = 0; + + // release the contexts + for(size_t i = 0, used = qt->contexts.used; i < used ;i++) { + QUERY_CONTEXT *qc = query_context(qt, i); + rrdcontext_release(qc->rca); + qc->rca = NULL; + } + qt->contexts.used = 0; + + // release the nodes + for(size_t i = 0, used = qt->nodes.used; i < used ; i++) { + QUERY_NODE *qn = query_node(qt, i); + query_node_release(qn); + } + qt->nodes.used = 0; + + qt->db.minimum_latest_update_every_s = 0; + qt->db.first_time_s = 0; + qt->db.last_time_s = 0; + + for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) + qt->group_by[g].used = 0; + + qt->id[0] = '\0'; + + spinlock_lock(&query_target_base.used.spinlock); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(query_target_base.used.base, qt, internal.prev, internal.next); + query_target_base.used.count--; + spinlock_unlock(&query_target_base.used.spinlock); + + qt->internal.used = false; + thread_qt = NULL; + + if (qt->internal.queries > 1000) { + query_target_destroy(qt); + } + else { + spinlock_lock(&query_target_base.available.spinlock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(query_target_base.available.base, qt, internal.prev, internal.next); + query_target_base.available.count++; + spinlock_unlock(&query_target_base.available.spinlock); + } +} + +static QUERY_TARGET *query_target_get(void) { + spinlock_lock(&query_target_base.available.spinlock); + QUERY_TARGET *qt = query_target_base.available.base; + if (qt) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(query_target_base.available.base, qt, internal.prev, internal.next); + query_target_base.available.count--; + } + spinlock_unlock(&query_target_base.available.spinlock); + + if(unlikely(!qt)) + qt = callocz(1, sizeof(*qt)); + + spinlock_lock(&query_target_base.used.spinlock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(query_target_base.used.base, qt, internal.prev, internal.next); + query_target_base.used.count++; + spinlock_unlock(&query_target_base.used.spinlock); + + qt->internal.used = true; + qt->internal.queries++; + thread_qt = qt; + + return qt; +} + +// this is used to release a query target from a cancelled thread +void query_target_free(void) { + query_target_release(thread_qt); +} + +// ---------------------------------------------------------------------------- +// query API + +typedef struct query_target_locals { + time_t start_s; + + QUERY_TARGET *qt; + + RRDSET *st; + + const char *scope_nodes; + const char *scope_contexts; + + const char *nodes; + const char *contexts; + const char *instances; + const char *dimensions; + const char *chart_label_key; + const char *labels; + const char *alerts; + + long long after; + long long before; + bool match_ids; + bool match_names; + + size_t metrics_skipped_due_to_not_matching_timeframe; + + char host_node_id_str[UUID_STR_LEN]; + QUERY_NODE *qn; // temp to pass on callbacks, ignore otherwise - no need to free +} QUERY_TARGET_LOCALS; + +struct storage_engine *query_metric_storage_engine(QUERY_TARGET *qt, QUERY_METRIC *qm, size_t tier) { + QUERY_NODE *qn = query_node(qt, qm->link.query_node_id); + return qn->rrdhost->db[tier].eng; +} + +static inline void query_metric_release(QUERY_TARGET *qt, QUERY_METRIC *qm) { + qm->plan.used = 0; + + // reset the tiers + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(qm->tiers[tier].smh) { + STORAGE_ENGINE *eng = query_metric_storage_engine(qt, qm, tier); + eng->api.metric_release(qm->tiers[tier].smh); + qm->tiers[tier].smh = NULL; + } + } +} + +static bool query_metric_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CONTEXT *qc, + QUERY_INSTANCE *qi, size_t qd_slot, RRDMETRIC *rm, RRDR_DIMENSION_FLAGS options) { + QUERY_TARGET *qt = qtl->qt; + RRDINSTANCE *ri = rm->ri; + + time_t common_first_time_s = 0; + time_t common_last_time_s = 0; + time_t common_update_every_s = 0; + size_t tiers_added = 0; + + struct { + STORAGE_ENGINE *eng; + STORAGE_METRIC_HANDLE *smh; + time_t db_first_time_s; + time_t db_last_time_s; + time_t db_update_every_s; + } tier_retention[storage_tiers]; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + STORAGE_ENGINE *eng = qn->rrdhost->db[tier].eng; + tier_retention[tier].eng = eng; + tier_retention[tier].db_update_every_s = (time_t) (qn->rrdhost->db[tier].tier_grouping * ri->update_every_s); + + if(rm->rrddim && rm->rrddim->tiers[tier].smh) + tier_retention[tier].smh = eng->api.metric_dup(rm->rrddim->tiers[tier].smh); + else + tier_retention[tier].smh = eng->api.metric_get(qn->rrdhost->db[tier].si, &rm->uuid); + + if(tier_retention[tier].smh) { + tier_retention[tier].db_first_time_s = storage_engine_oldest_time_s(tier_retention[tier].eng->seb, tier_retention[tier].smh); + tier_retention[tier].db_last_time_s = storage_engine_latest_time_s(tier_retention[tier].eng->seb, tier_retention[tier].smh); + + if(!common_first_time_s) + common_first_time_s = tier_retention[tier].db_first_time_s; + else if(tier_retention[tier].db_first_time_s) + common_first_time_s = MIN(common_first_time_s, tier_retention[tier].db_first_time_s); + + if(!common_last_time_s) + common_last_time_s = tier_retention[tier].db_last_time_s; + else + common_last_time_s = MAX(common_last_time_s, tier_retention[tier].db_last_time_s); + + if(!common_update_every_s) + common_update_every_s = tier_retention[tier].db_update_every_s; + else if(tier_retention[tier].db_update_every_s) + common_update_every_s = MIN(common_update_every_s, tier_retention[tier].db_update_every_s); + + tiers_added++; + } + else { + tier_retention[tier].db_first_time_s = 0; + tier_retention[tier].db_last_time_s = 0; + tier_retention[tier].db_update_every_s = 0; + } + } + + for (size_t tier = 0; tier < storage_tiers; tier++) { + if(!qt->db.tiers[tier].update_every || (tier_retention[tier].db_update_every_s && tier_retention[tier].db_update_every_s < qt->db.tiers[tier].update_every)) + qt->db.tiers[tier].update_every = tier_retention[tier].db_update_every_s; + + if(!qt->db.tiers[tier].retention.first_time_s || (tier_retention[tier].db_first_time_s && tier_retention[tier].db_first_time_s < qt->db.tiers[tier].retention.first_time_s)) + qt->db.tiers[tier].retention.first_time_s = tier_retention[tier].db_first_time_s; + + if(!qt->db.tiers[tier].retention.last_time_s || (tier_retention[tier].db_last_time_s && tier_retention[tier].db_last_time_s > qt->db.tiers[tier].retention.last_time_s)) + qt->db.tiers[tier].retention.last_time_s = tier_retention[tier].db_last_time_s; + } + + bool timeframe_matches = + (tiers_added && + query_matches_retention(qt->window.after, qt->window.before, common_first_time_s, common_last_time_s, common_update_every_s)) + ? true : false; + + if(timeframe_matches) { + if(ri->rrdset) + ri->rrdset->last_accessed_time_s = qtl->start_s; + + if (qt->query.used == qt->query.size) { + size_t old_mem = qt->query.size * sizeof(*qt->query.array); + qt->query.size = query_target_realloc_size(qt->query.size, 4); + size_t new_mem = qt->query.size * sizeof(*qt->query.array); + qt->query.array = reallocz(qt->query.array, new_mem); + + __atomic_add_fetch(&netdata_buffers_statistics.query_targets_size, new_mem - old_mem, __ATOMIC_RELAXED); + } + QUERY_METRIC *qm = &qt->query.array[qt->query.used++]; + memset(qm, 0, sizeof(*qm)); + + qm->status = options; + + qm->link.query_node_id = qn->slot; + qm->link.query_context_id = qc->slot; + qm->link.query_instance_id = qi->slot; + qm->link.query_dimension_id = qd_slot; + + if (!qt->db.first_time_s || common_first_time_s < qt->db.first_time_s) + qt->db.first_time_s = common_first_time_s; + + if (!qt->db.last_time_s || common_last_time_s > qt->db.last_time_s) + qt->db.last_time_s = common_last_time_s; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + internal_fatal(tier_retention[tier].eng != query_metric_storage_engine(qt, qm, tier), "QUERY TARGET: storage engine mismatch"); + qm->tiers[tier].smh = tier_retention[tier].smh; + qm->tiers[tier].db_first_time_s = tier_retention[tier].db_first_time_s; + qm->tiers[tier].db_last_time_s = tier_retention[tier].db_last_time_s; + qm->tiers[tier].db_update_every_s = tier_retention[tier].db_update_every_s; + } + + return true; + } + + // cleanup anything we allocated to the retention we will not use + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (tier_retention[tier].smh) { + tier_retention[tier].eng->api.metric_release(tier_retention[tier].smh); + tier_retention[tier].smh = NULL; + } + } + + return false; +} + +static inline bool rrdmetric_retention_matches_query(QUERY_TARGET *qt, RRDMETRIC *rm, time_t now_s) { + time_t first_time_s = rm->first_time_s; + time_t last_time_s = rrd_flag_is_collected(rm) ? now_s : rm->last_time_s; + time_t update_every_s = rm->ri->update_every_s; + return query_matches_retention(qt->window.after, qt->window.before, first_time_s, last_time_s, update_every_s); +} + +static inline void query_dimension_release(QUERY_DIMENSION *qd) { + rrdmetric_release(qd->rma); + qd->rma = NULL; +} + +static QUERY_DIMENSION *query_dimension_allocate(QUERY_TARGET *qt, RRDMETRIC_ACQUIRED *rma, QUERY_STATUS status, size_t priority) { + if(qt->dimensions.used == qt->dimensions.size) { + size_t old_mem = qt->dimensions.size * sizeof(*qt->dimensions.array); + qt->dimensions.size = query_target_realloc_size(qt->dimensions.size, 4); + size_t new_mem = qt->dimensions.size * sizeof(*qt->dimensions.array); + qt->dimensions.array = reallocz(qt->dimensions.array, new_mem); + + __atomic_add_fetch(&netdata_buffers_statistics.query_targets_size, new_mem - old_mem, __ATOMIC_RELAXED); + } + QUERY_DIMENSION *qd = &qt->dimensions.array[qt->dimensions.used]; + memset(qd, 0, sizeof(*qd)); + + qd->slot = qt->dimensions.used++; + qd->rma = rrdmetric_acquired_dup(rma); + qd->status = status; + qd->priority = priority; + + return qd; +} + +static bool query_dimension_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi, + RRDMETRIC_ACQUIRED *rma, bool queryable_instance, size_t *metrics_added, size_t priority) { + QUERY_TARGET *qt = qtl->qt; + + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + if(rrd_flag_is_deleted(rm)) + return false; + + QUERY_STATUS status = QUERY_STATUS_NONE; + + bool undo = false; + if(!queryable_instance) { + if(rrdmetric_retention_matches_query(qt, rm, qtl->start_s)) { + qi->metrics.excluded++; + qc->metrics.excluded++; + qn->metrics.excluded++; + status |= QUERY_STATUS_EXCLUDED; + } + else + undo = true; + } + else { + RRDR_DIMENSION_FLAGS options = RRDR_DIMENSION_DEFAULT; + bool needed = false; + + if (qt->query.pattern) { + // the user asked for specific dimensions + + SIMPLE_PATTERN_RESULT ret = SP_NOT_MATCHED; + + if(qtl->match_ids) + ret = simple_pattern_matches_string_extract(qt->query.pattern, rm->id, NULL, 0); + + if(ret == SP_NOT_MATCHED && qtl->match_names && (rm->name != rm->id || !qtl->match_ids)) + ret = simple_pattern_matches_string_extract(qt->query.pattern, rm->name, NULL, 0); + + if(ret == SP_MATCHED_POSITIVE) { + needed = true; + options |= RRDR_DIMENSION_SELECTED | RRDR_DIMENSION_NONZERO; + } + else { + // the user selection does not match this dimension + // but, we may still need to query it + + if (query_target_needs_all_dimensions(qt)) { + // this is percentage calculation + // so, we need this dimension to calculate the percentage + needed = true; + options |= RRDR_DIMENSION_HIDDEN; + } + else { + // the user did not select this dimension + // and the calculation is not percentage + // so, no need to query it + ; + } + } + } + else { + // we don't have a dimensions pattern + // so this is a selected dimension + // if it is not hidden + + if(rrd_flag_check(rm, RRD_FLAG_HIDDEN) || (rm->rrddim && rrddim_option_check(rm->rrddim, RRDDIM_OPTION_HIDDEN))) { + // this is a hidden dimension + // we don't need to query it + status |= QUERY_STATUS_DIMENSION_HIDDEN; + options |= RRDR_DIMENSION_HIDDEN; + + if (query_target_needs_all_dimensions(qt)) { + // this is percentage calculation + // so, we need this dimension to calculate the percentage + needed = true; + } + } + else { + // this is a not hidden dimension + // and the user did not provide any selection for dimensions + // so, we need to query it + needed = true; + options |= RRDR_DIMENSION_SELECTED; + } + } + + if (needed) { + if(query_metric_add(qtl, qn, qc, qi, qt->dimensions.used, rm, options)) { + (*metrics_added)++; + + qi->metrics.selected++; + qc->metrics.selected++; + qn->metrics.selected++; + } + else { + undo = true; + qtl->metrics_skipped_due_to_not_matching_timeframe++; + } + } + else if(rrdmetric_retention_matches_query(qt, rm, qtl->start_s)) { + qi->metrics.excluded++; + qc->metrics.excluded++; + qn->metrics.excluded++; + status |= QUERY_STATUS_EXCLUDED; + } + else + undo = true; + } + + if(undo) + return false; + + query_dimension_allocate(qt, rma, status, priority); + return true; +} + +static inline STRING *rrdinstance_create_id_fqdn_v1(RRDINSTANCE_ACQUIRED *ria) { + if(unlikely(!ria)) + return NULL; + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string_dup(ri->id); +} + +static inline STRING *rrdinstance_create_name_fqdn_v1(RRDINSTANCE_ACQUIRED *ria) { + if(unlikely(!ria)) + return NULL; + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return string_dup(ri->name); +} + +static inline STRING *rrdinstance_create_id_fqdn_v2(RRDINSTANCE_ACQUIRED *ria) { + if(unlikely(!ria)) + return NULL; + + char buffer[RRD_ID_LENGTH_MAX + 1]; + + RRDHOST *host = rrdinstance_acquired_rrdhost(ria); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s@%s", rrdinstance_acquired_id(ria), host->machine_guid); + return string_strdupz(buffer); +} + +static inline STRING *rrdinstance_create_name_fqdn_v2(RRDINSTANCE_ACQUIRED *ria) { + if(unlikely(!ria)) + return NULL; + + char buffer[RRD_ID_LENGTH_MAX + 1]; + + RRDHOST *host = rrdinstance_acquired_rrdhost(ria); + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s@%s", rrdinstance_acquired_name(ria), rrdhost_hostname(host)); + return string_strdupz(buffer); +} + +inline STRING *query_instance_id_fqdn(QUERY_INSTANCE *qi, size_t version) { + if(!qi->id_fqdn) { + if (version <= 1) + qi->id_fqdn = rrdinstance_create_id_fqdn_v1(qi->ria); + else + qi->id_fqdn = rrdinstance_create_id_fqdn_v2(qi->ria); + } + + return qi->id_fqdn; +} + +inline STRING *query_instance_name_fqdn(QUERY_INSTANCE *qi, size_t version) { + if(!qi->name_fqdn) { + if (version <= 1) + qi->name_fqdn = rrdinstance_create_name_fqdn_v1(qi->ria); + else + qi->name_fqdn = rrdinstance_create_name_fqdn_v2(qi->ria); + } + + return qi->name_fqdn; +} + +RRDSET *rrdinstance_acquired_rrdset(RRDINSTANCE_ACQUIRED *ria) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + return ri->rrdset; +} + +const char *rrdcontext_acquired_units(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return string2str(rc->units); +} + +RRDSET_TYPE rrdcontext_acquired_chart_type(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return rc->chart_type; +} + +const char *rrdcontext_acquired_title(RRDCONTEXT_ACQUIRED *rca) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + return string2str(rc->title); +} + +static void query_target_eval_instance_rrdcalc(QUERY_TARGET_LOCALS *qtl __maybe_unused, + QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi) { + RRDSET *st = rrdinstance_acquired_rrdset(qi->ria); + if (st) { + rw_spinlock_read_lock(&st->alerts.spinlock); + for (RRDCALC *rc = st->alerts.base; rc; rc = rc->next) { + switch(rc->status) { + case RRDCALC_STATUS_CLEAR: + qi->alerts.clear++; + qc->alerts.clear++; + qn->alerts.clear++; + break; + + case RRDCALC_STATUS_WARNING: + qi->alerts.warning++; + qc->alerts.warning++; + qn->alerts.warning++; + break; + + case RRDCALC_STATUS_CRITICAL: + qi->alerts.critical++; + qc->alerts.critical++; + qn->alerts.critical++; + break; + + default: + case RRDCALC_STATUS_UNINITIALIZED: + case RRDCALC_STATUS_UNDEFINED: + case RRDCALC_STATUS_REMOVED: + qi->alerts.other++; + qc->alerts.other++; + qn->alerts.other++; + break; + } + } + rw_spinlock_read_unlock(&st->alerts.spinlock); + } +} + +static bool query_target_match_alert_pattern(RRDINSTANCE_ACQUIRED *ria, SIMPLE_PATTERN *pattern) { + if(!pattern) + return true; + + RRDSET *st = rrdinstance_acquired_rrdset(ria); + if (!st) + return false; + + BUFFER *wb = NULL; + bool matched = false; + rw_spinlock_read_lock(&st->alerts.spinlock); + if (st->alerts.base) { + for (RRDCALC *rc = st->alerts.base; rc; rc = rc->next) { + SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_string_extract(pattern, rc->config.name, NULL, 0); + + if(ret == SP_MATCHED_POSITIVE) { + matched = true; + break; + } + else if(ret == SP_MATCHED_NEGATIVE) + break; + + if (!wb) + wb = buffer_create(0, NULL); + else + buffer_flush(wb); + + buffer_fast_strcat(wb, string2str(rc->config.name), string_strlen(rc->config.name)); + buffer_fast_strcat(wb, ":", 1); + buffer_strcat(wb, rrdcalc_status2string(rc->status)); + + ret = simple_pattern_matches_buffer_extract(pattern, wb, NULL, 0); + + if(ret == SP_MATCHED_POSITIVE) { + matched = true; + break; + } + else if(ret == SP_MATCHED_NEGATIVE) + break; + } + } + rw_spinlock_read_unlock(&st->alerts.spinlock); + + buffer_free(wb); + return matched; +} + +static inline void query_instance_release(QUERY_INSTANCE *qi) { + if(qi->ria) { + rrdinstance_release(qi->ria); + qi->ria = NULL; + } + + string_freez(qi->id_fqdn); + qi->id_fqdn = NULL; + + string_freez(qi->name_fqdn); + qi->name_fqdn = NULL; +} + +static inline QUERY_INSTANCE *query_instance_allocate(QUERY_TARGET *qt, RRDINSTANCE_ACQUIRED *ria, size_t qn_slot) { + if(qt->instances.used == qt->instances.size) { + size_t old_mem = qt->instances.size * sizeof(*qt->instances.array); + qt->instances.size = query_target_realloc_size(qt->instances.size, 2); + size_t new_mem = qt->instances.size * sizeof(*qt->instances.array); + qt->instances.array = reallocz(qt->instances.array, new_mem); + + __atomic_add_fetch(&netdata_buffers_statistics.query_targets_size, new_mem - old_mem, __ATOMIC_RELAXED); + } + QUERY_INSTANCE *qi = &qt->instances.array[qt->instances.used]; + memset(qi, 0, sizeof(*qi)); + + qi->slot = qt->instances.used; + qt->instances.used++; + qi->ria = rrdinstance_acquired_dup(ria); + qi->query_host_id = qn_slot; + + return qi; +} + +static inline SIMPLE_PATTERN_RESULT query_instance_matches(QUERY_INSTANCE *qi, + RRDINSTANCE *ri, + SIMPLE_PATTERN *instances_sp, + bool match_ids, + bool match_names, + size_t version, + char *host_node_id_str) { + SIMPLE_PATTERN_RESULT ret = SP_MATCHED_POSITIVE; + + if(instances_sp) { + ret = SP_NOT_MATCHED; + + if(match_ids) + ret = simple_pattern_matches_string_extract(instances_sp, ri->id, NULL, 0); + if (ret == SP_NOT_MATCHED && match_names && (ri->name != ri->id || !match_ids)) + ret = simple_pattern_matches_string_extract(instances_sp, ri->name, NULL, 0); + if (ret == SP_NOT_MATCHED && match_ids) + ret = simple_pattern_matches_string_extract(instances_sp, query_instance_id_fqdn(qi, version), NULL, 0); + if (ret == SP_NOT_MATCHED && match_names) + ret = simple_pattern_matches_string_extract(instances_sp, query_instance_name_fqdn(qi, version), NULL, 0); + + if (ret == SP_NOT_MATCHED && match_ids && host_node_id_str[0]) { + char buffer[RRD_ID_LENGTH_MAX + 1]; + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s@%s", rrdinstance_acquired_id(qi->ria), host_node_id_str); + ret = simple_pattern_matches_extract(instances_sp, buffer, NULL, 0); + } + } + + return ret; +} + +static inline bool query_instance_matches_labels( + RRDINSTANCE *ri, + SIMPLE_PATTERN *chart_label_key_sp, + SIMPLE_PATTERN *labels_sp) +{ + + if (chart_label_key_sp && rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, chart_label_key_sp, '\0', NULL) != SP_MATCHED_POSITIVE) + return false; + + if (labels_sp) { + struct pattern_array *pa = pattern_array_add_simple_pattern(NULL, labels_sp, ':'); + bool found = pattern_array_label_match(pa, ri->rrdlabels, ':', NULL); + pattern_array_free(pa); + return found; + } + + return true; +} + +static bool query_instance_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CONTEXT *qc, + RRDINSTANCE_ACQUIRED *ria, bool queryable_instance, bool filter_instances) { + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + if(rrd_flag_is_deleted(ri)) + return false; + + QUERY_TARGET *qt = qtl->qt; + QUERY_INSTANCE *qi = query_instance_allocate(qt, ria, qn->slot); + + if(qt->db.minimum_latest_update_every_s == 0 || ri->update_every_s < qt->db.minimum_latest_update_every_s) + qt->db.minimum_latest_update_every_s = ri->update_every_s; + + if(queryable_instance && filter_instances) + queryable_instance = (SP_MATCHED_POSITIVE == query_instance_matches( + qi, ri, qt->instances.pattern, qtl->match_ids, qtl->match_names, qt->request.version, qtl->host_node_id_str)); + + if(queryable_instance) + queryable_instance = query_instance_matches_labels( + ri, + qt->instances.chart_label_key_pattern, + qt->instances.labels_pattern); + + if(queryable_instance) { + if(qt->instances.alerts_pattern && !query_target_match_alert_pattern(ria, qt->instances.alerts_pattern)) + queryable_instance = false; + } + + if(queryable_instance && qt->request.version >= 2) + query_target_eval_instance_rrdcalc(qtl, qn, qc, qi); + + size_t dimensions_added = 0, metrics_added = 0, priority = 0; + + if(unlikely(qt->request.rma)) { + if(query_dimension_add(qtl, qn, qc, qi, qt->request.rma, queryable_instance, &metrics_added, priority++)) + dimensions_added++; + } + else { + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + if(query_dimension_add(qtl, qn, qc, qi, (RRDMETRIC_ACQUIRED *) rm_dfe.item, + queryable_instance, &metrics_added, priority++)) + dimensions_added++; + } + dfe_done(rm); + } + + if(!dimensions_added) { + qt->instances.used--; + query_instance_release(qi); + return false; + } + else { + if(metrics_added) { + qc->instances.selected++; + qn->instances.selected++; + } + else { + qc->instances.excluded++; + qn->instances.excluded++; + } + } + + return true; +} + +static inline void query_context_release(QUERY_CONTEXT *qc) { + rrdcontext_release(qc->rca); + qc->rca = NULL; +} + +static inline QUERY_CONTEXT *query_context_allocate(QUERY_TARGET *qt, RRDCONTEXT_ACQUIRED *rca) { + if(qt->contexts.used == qt->contexts.size) { + size_t old_mem = qt->contexts.size * sizeof(*qt->contexts.array); + qt->contexts.size = query_target_realloc_size(qt->contexts.size, 2); + size_t new_mem = qt->contexts.size * sizeof(*qt->contexts.array); + qt->contexts.array = reallocz(qt->contexts.array, new_mem); + + __atomic_add_fetch(&netdata_buffers_statistics.query_targets_size, new_mem - old_mem, __ATOMIC_RELAXED); + } + QUERY_CONTEXT *qc = &qt->contexts.array[qt->contexts.used]; + memset(qc, 0, sizeof(*qc)); + qc->slot = qt->contexts.used++; + qc->rca = rrdcontext_acquired_dup(rca); + + return qc; +} + +static ssize_t query_context_add(void *data, RRDCONTEXT_ACQUIRED *rca, bool queryable_context) { + QUERY_TARGET_LOCALS *qtl = data; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(rrd_flag_is_deleted(rc)) + return 0; + + QUERY_NODE *qn = qtl->qn; + QUERY_TARGET *qt = qtl->qt; + QUERY_CONTEXT *qc = query_context_allocate(qt, rca); + + ssize_t added = 0; + if(unlikely(qt->request.ria)) { + if(query_instance_add(qtl, qn, qc, qt->request.ria, queryable_context, false)) + added++; + } + else if(unlikely(qtl->st && qtl->st->rrdcontexts.rrdcontext == rca && qtl->st->rrdcontexts.rrdinstance)) { + if(query_instance_add(qtl, qn, qc, qtl->st->rrdcontexts.rrdinstance, queryable_context, false)) + added++; + } + else { + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(query_instance_add(qtl, qn, qc, (RRDINSTANCE_ACQUIRED *) ri_dfe.item, queryable_context, true)) + added++; + } + dfe_done(ri); + } + + if(!added) { + query_context_release(qc); + qt->contexts.used--; + return 0; + } + + return added; +} + +static inline void query_node_release(QUERY_NODE *qn) { + qn->rrdhost = NULL; +} + +static inline QUERY_NODE *query_node_allocate(QUERY_TARGET *qt, RRDHOST *host) { + if(qt->nodes.used == qt->nodes.size) { + size_t old_mem = qt->nodes.size * sizeof(*qt->nodes.array); + qt->nodes.size = query_target_realloc_size(qt->nodes.size, 2); + size_t new_mem = qt->nodes.size * sizeof(*qt->nodes.array); + qt->nodes.array = reallocz(qt->nodes.array, new_mem); + + __atomic_add_fetch(&netdata_buffers_statistics.query_targets_size, new_mem - old_mem, __ATOMIC_RELAXED); + } + QUERY_NODE *qn = &qt->nodes.array[qt->nodes.used]; + memset(qn, 0, sizeof(*qn)); + + qn->slot = qt->nodes.used++; + qn->rrdhost = host; + + return qn; +} + +static ssize_t query_node_add(void *data, RRDHOST *host, bool queryable_host) { + QUERY_TARGET_LOCALS *qtl = data; + QUERY_TARGET *qt = qtl->qt; + QUERY_NODE *qn = query_node_allocate(qt, host); + + if(host->node_id) { + if(!qtl->host_node_id_str[0]) + uuid_unparse_lower(*host->node_id, qn->node_id); + else + memcpy(qn->node_id, qtl->host_node_id_str, sizeof(qn->node_id)); + } + else + qn->node_id[0] = '\0'; + + // is the chart given valid? + if(unlikely(qtl->st && (!qtl->st->rrdcontexts.rrdinstance || !qtl->st->rrdcontexts.rrdcontext))) { + netdata_log_error("QUERY TARGET: RRDSET '%s' given, but it is not linked to rrdcontext structures. Linking it now.", rrdset_name(qtl->st)); + rrdinstance_from_rrdset(qtl->st); + + if(unlikely(qtl->st && (!qtl->st->rrdcontexts.rrdinstance || !qtl->st->rrdcontexts.rrdcontext))) { + netdata_log_error("QUERY TARGET: RRDSET '%s' given, but failed to be linked to rrdcontext structures. Switching to context query.", + rrdset_name(qtl->st)); + + if (!is_valid_sp(qtl->instances)) + qtl->instances = rrdset_name(qtl->st); + + qtl->st = NULL; + } + } + + qtl->qn = qn; + + ssize_t added = 0; + if(unlikely(qt->request.rca)) { + if(query_context_add(qtl, qt->request.rca, true)) + added++; + } + else if(unlikely(qtl->st)) { + // single chart data queries + if(query_context_add(qtl, qtl->st->rrdcontexts.rrdcontext, true)) + added++; + } + else { + // context pattern queries + added = query_scope_foreach_context( + host, qtl->scope_contexts, + qt->contexts.scope_pattern, qt->contexts.pattern, + query_context_add, queryable_host, qtl); + + if(added < 0) + added = 0; + } + + qtl->qn = NULL; + + if(!added) { + query_node_release(qn); + qt->nodes.used--; + return false; + } + + return true; +} + +void query_target_generate_name(QUERY_TARGET *qt) { + char options_buffer[100 + 1]; + web_client_api_request_v1_data_options_to_string(options_buffer, 100, qt->request.options); + + char resampling_buffer[20 + 1] = ""; + if(qt->request.resampling_time > 1) + snprintfz(resampling_buffer, 20, "/resampling:%lld", (long long)qt->request.resampling_time); + + char tier_buffer[20 + 1] = ""; + if(qt->request.options & RRDR_OPTION_SELECTED_TIER) + snprintfz(tier_buffer, 20, "/tier:%zu", qt->request.tier); + + if(qt->request.st) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "chart://hosts:%s/instance:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , rrdhost_hostname(qt->request.st->rrdhost) + , rrdset_name(qt->request.st) + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , time_grouping_tostring(qt->request.time_group_method) + , qt->request.time_group_options ? qt->request.time_group_options : "" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else if(qt->request.host && qt->request.rca && qt->request.ria && qt->request.rma) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "metric://hosts:%s/context:%s/instance:%s/dimension:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , rrdhost_hostname(qt->request.host) + , rrdcontext_acquired_id(qt->request.rca) + , rrdinstance_acquired_id(qt->request.ria) + , rrdmetric_acquired_id(qt->request.rma) + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , time_grouping_tostring(qt->request.time_group_method) + , qt->request.time_group_options ? qt->request.time_group_options : "" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else if(qt->request.version >= 2) + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "data_v2://scope_nodes:%s/scope_contexts:%s/nodes:%s/contexts:%s/instances:%s/labels:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/time_group:%s%s/options:%s%s%s" + , qt->request.scope_nodes ? qt->request.scope_nodes : "*" + , qt->request.scope_contexts ? qt->request.scope_contexts : "*" + , qt->request.nodes ? qt->request.nodes : "*" + , (qt->request.contexts) ? qt->request.contexts : "*" + , (qt->request.instances) ? qt->request.instances : "*" + , (qt->request.labels) ? qt->request.labels : "*" + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , time_grouping_tostring(qt->request.time_group_method) + , qt->request.time_group_options ? qt->request.time_group_options : "" + , options_buffer + , resampling_buffer + , tier_buffer + ); + else + snprintfz(qt->id, MAX_QUERY_TARGET_ID_LENGTH, "context://hosts:%s/contexts:%s/instances:%s/dimensions:%s/after:%lld/before:%lld/points:%zu/group:%s%s/options:%s%s%s" + , (qt->request.host) ? rrdhost_hostname(qt->request.host) : ((qt->request.nodes) ? qt->request.nodes : "*") + , (qt->request.contexts) ? qt->request.contexts : "*" + , (qt->request.instances) ? qt->request.instances : "*" + , (qt->request.dimensions) ? qt->request.dimensions : "*" + , (long long)qt->request.after + , (long long)qt->request.before + , qt->request.points + , time_grouping_tostring(qt->request.time_group_method) + , qt->request.time_group_options ? qt->request.time_group_options : "" + , options_buffer + , resampling_buffer + , tier_buffer + ); + + json_fix_string(qt->id); +} + +QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) { + if(!service_running(ABILITY_DATA_QUERIES)) + return NULL; + + QUERY_TARGET *qt = query_target_get(); + + if(!qtr->received_ut) + qtr->received_ut = now_monotonic_usec(); + + qt->timings.received_ut = qtr->received_ut; + + if(qtr->nodes && !qtr->scope_nodes) + qtr->scope_nodes = qtr->nodes; + + if(qtr->contexts && !qtr->scope_contexts) + qtr->scope_contexts = qtr->contexts; + + memset(&qt->db, 0, sizeof(qt->db)); + qt->query_points = STORAGE_POINT_UNSET; + + // copy the request into query_thread_target + qt->request = *qtr; + + query_target_generate_name(qt); + qt->window.after = qt->request.after; + qt->window.before = qt->request.before; + + qt->window.options = qt->request.options; + if(query_target_has_percentage_of_group(qt)) + qt->window.options &= ~RRDR_OPTION_PERCENTAGE; + + qt->internal.relative = rrdr_relative_window_to_absolute_query(&qt->window.after, &qt->window.before + , &qt->window.now, unittest_running + ); + + // prepare our local variables - we need these across all these functions + QUERY_TARGET_LOCALS qtl = { + .qt = qt, + .start_s = now_realtime_sec(), + .st = qt->request.st, + .scope_nodes = qt->request.scope_nodes, + .scope_contexts = qt->request.scope_contexts, + .nodes = qt->request.nodes, + .contexts = qt->request.contexts, + .instances = qt->request.instances, + .dimensions = qt->request.dimensions, + .chart_label_key = qt->request.chart_label_key, + .labels = qt->request.labels, + .alerts = qt->request.alerts, + }; + + RRDHOST *host = qt->request.host; + + // prepare all the patterns + qt->nodes.scope_pattern = string_to_simple_pattern(qtl.scope_nodes); + qt->nodes.pattern = string_to_simple_pattern(qtl.nodes); + + qt->contexts.pattern = string_to_simple_pattern(qtl.contexts); + qt->contexts.scope_pattern = string_to_simple_pattern(qtl.scope_contexts); + + qt->instances.pattern = string_to_simple_pattern(qtl.instances); + qt->query.pattern = string_to_simple_pattern(qtl.dimensions); + qt->instances.chart_label_key_pattern = string_to_simple_pattern(qtl.chart_label_key); + qt->instances.labels_pattern = string_to_simple_pattern(qtl.labels); + qt->instances.alerts_pattern = string_to_simple_pattern(qtl.alerts); + + qtl.match_ids = qt->request.options & RRDR_OPTION_MATCH_IDS; + qtl.match_names = qt->request.options & RRDR_OPTION_MATCH_NAMES; + if(likely(!qtl.match_ids && !qtl.match_names)) + qtl.match_ids = qtl.match_names = true; + + // verify that the chart belongs to the host we are interested + if(qtl.st) { + if (!host) { + // It is NULL, set it ourselves. + host = qtl.st->rrdhost; + } + else if (unlikely(host != qtl.st->rrdhost)) { + // Oops! A different host! + netdata_log_error("QUERY TARGET: RRDSET '%s' given does not belong to host '%s'. Switching query host to '%s'", + rrdset_name(qtl.st), rrdhost_hostname(host), rrdhost_hostname(qtl.st->rrdhost)); + host = qtl.st->rrdhost; + } + } + + if(host) { + if(host->node_id) + uuid_unparse_lower(*host->node_id, qtl.host_node_id_str); + else + qtl.host_node_id_str[0] = '\0'; + + // single host query + qt->versions.contexts_hard_hash = dictionary_version(host->rrdctx.contexts); + qt->versions.contexts_soft_hash = dictionary_version(host->rrdctx.hub_queue); + qt->versions.alerts_hard_hash = dictionary_version(host->rrdcalc_root_index); + qt->versions.alerts_soft_hash = __atomic_load_n(&host->health_transitions, __ATOMIC_RELAXED); + query_node_add(&qtl, host, true); + qtl.nodes = rrdhost_hostname(host); + } + else + query_scope_foreach_host(qt->nodes.scope_pattern, qt->nodes.pattern, + query_node_add, &qtl, + &qt->versions, + qtl.host_node_id_str); + + // we need the available db retention for this call + // so it has to be done last + query_target_calculate_window(qt); + + qt->timings.preprocessed_ut = now_monotonic_usec(); + + return qt; +} + +ssize_t weights_foreach_rrdmetric_in_context(RRDCONTEXT_ACQUIRED *rca, + SIMPLE_PATTERN *instances_sp, + SIMPLE_PATTERN *chart_label_key_sp, + SIMPLE_PATTERN *labels_sp, + SIMPLE_PATTERN *alerts_sp, + SIMPLE_PATTERN *dimensions_sp, + bool match_ids, bool match_names, + size_t version, + weights_add_metric_t cb, + void *data + ) { + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(!rc || rrd_flag_is_deleted(rc)) + return 0; + + char host_node_id_str[UUID_STR_LEN] = ""; + + bool proceed = true; + + ssize_t count = 0; + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(rrd_flag_is_deleted(ri)) + continue; + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *) ri_dfe.item; + + if(instances_sp) { + QUERY_INSTANCE qi = { .ria = ria, }; + SIMPLE_PATTERN_RESULT ret = query_instance_matches(&qi, ri, instances_sp, match_ids, match_names, version, host_node_id_str); + qi.ria = NULL; + query_instance_release(&qi); + + if (ret != SP_MATCHED_POSITIVE) + continue; + } + + if(!query_instance_matches_labels(ri, chart_label_key_sp, labels_sp)) + continue; + + if(alerts_sp && !query_target_match_alert_pattern(ria, alerts_sp)) + continue; + + dfe_unlock(ri); + + RRDMETRIC *rm; + dfe_start_read(ri->rrdmetrics, rm) { + if(rrd_flag_is_deleted(rm)) + continue; + + if(dimensions_sp) { + SIMPLE_PATTERN_RESULT ret = SP_NOT_MATCHED; + + if (match_ids) + ret = simple_pattern_matches_string_extract(dimensions_sp, rm->id, NULL, 0); + + if (ret == SP_NOT_MATCHED && match_names && (rm->name != rm->id || !match_ids)) + ret = simple_pattern_matches_string_extract(dimensions_sp, rm->name, NULL, 0); + + if(ret != SP_MATCHED_POSITIVE) + continue; + } + + dfe_unlock(rm); + + RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)rm_dfe.item; + ssize_t ret = cb(data, rc->rrdhost, rca, ria, rma); + + if(ret < 0) { + proceed = false; + break; + } + + count += ret; + } + dfe_done(rm); + + if(unlikely(!proceed)) + break; + } + dfe_done(ri); + return count; +} diff --git a/src/database/contexts/rrdcontext.c b/src/database/contexts/rrdcontext.c new file mode 100644 index 000000000..f755e1f7e --- /dev/null +++ b/src/database/contexts/rrdcontext.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +// ---------------------------------------------------------------------------- +// visualizing flags + +struct rrdcontext_reason rrdcontext_reasons[] = { + // context related + {RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_METADATA, "changed metadata", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC }, + + // not context related + {RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION, "updated retention", 65 * USEC_PER_SEC }, + + // terminator + {0, NULL, 0 }, +}; + +void rrd_reasons_to_buffer_json_array_items(RRD_FLAGS flags, BUFFER *wb) { + for(int i = 0, added = 0; rrdcontext_reasons[i].name ; i++) { + if (flags & rrdcontext_reasons[i].flag) { + buffer_json_add_array_item_string(wb, rrdcontext_reasons[i].name); + added++; + } + } +} +// ---------------------------------------------------------------------------- +// public API + +void rrdcontext_updated_rrddim(RRDDIM *rd) { + rrdmetric_from_rrddim(rd); +} + +void rrdcontext_removed_rrddim(RRDDIM *rd) { + rrdmetric_rrddim_is_freed(rd); +} + +void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_updated_rrddim_divisor(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_updated_rrddim_flags(RRDDIM *rd) { + rrdmetric_updated_rrddim_flags(rd); +} + +void rrdcontext_collected_rrddim(RRDDIM *rd) { + rrdmetric_collected_rrddim(rd); +} + +void rrdcontext_updated_rrdset(RRDSET *st) { + rrdinstance_from_rrdset(st); +} + +void rrdcontext_removed_rrdset(RRDSET *st) { + rrdinstance_rrdset_is_freed(st); +} + +void rrdcontext_updated_retention_rrdset(RRDSET *st) { + rrdinstance_rrdset_has_updated_retention(st); +} + +void rrdcontext_updated_rrdset_name(RRDSET *st) { + rrdinstance_updated_rrdset_name(st); +} + +void rrdcontext_updated_rrdset_flags(RRDSET *st) { + rrdinstance_updated_rrdset_flags(st); +} + +void rrdcontext_collected_rrdset(RRDSET *st) { + rrdinstance_collected_rrdset(st); +} + +void rrdcontext_host_child_connected(RRDHOST *host) { + (void)host; + + // no need to do anything here + ; +} + +usec_t rrdcontext_next_db_rotation_ut = 0; +void rrdcontext_db_rotation(void) { + // called when the db rotates its database + rrdcontext_next_db_rotation_ut = now_realtime_usec() + FULL_RETENTION_SCAN_DELAY_AFTER_DB_ROTATION_SECS * USEC_PER_SEC; +} + +int rrdcontext_find_dimension_uuid(RRDSET *st, const char *id, nd_uuid_t *store_uuid) { + if(!st->rrdhost) return 1; + if(!st->context) return 2; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item(st->rrdhost->rrdctx.contexts, string2str(st->context)); + if(!rca) return 3; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_get_and_acquire_item(rc->rrdinstances, string2str(st->id)); + if(!ria) { + rrdcontext_release(rca); + return 4; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + + RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)dictionary_get_and_acquire_item(ri->rrdmetrics, id); + if(!rma) { + rrdinstance_release(ria); + rrdcontext_release(rca); + return 5; + } + + RRDMETRIC *rm = rrdmetric_acquired_value(rma); + + uuid_copy(*store_uuid, rm->uuid); + + rrdmetric_release(rma); + rrdinstance_release(ria); + rrdcontext_release(rca); + return 0; +} + +int rrdcontext_find_chart_uuid(RRDSET *st, nd_uuid_t *store_uuid) { + if(!st->rrdhost) return 1; + if(!st->context) return 2; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item(st->rrdhost->rrdctx.contexts, string2str(st->context)); + if(!rca) return 3; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_get_and_acquire_item(rc->rrdinstances, string2str(st->id)); + if(!ria) { + rrdcontext_release(rca); + return 4; + } + + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + uuid_copy(*store_uuid, ri->uuid); + + rrdinstance_release(ria); + rrdcontext_release(rca); + return 0; +} + +void rrdcontext_host_child_disconnected(RRDHOST *host) { + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, false); +} + +int rrdcontext_foreach_instance_with_rrdset_in_context(RRDHOST *host, const char *context, int (*callback)(RRDSET *st, void *data), void *data) { + if(unlikely(!host || !context || !*context || !callback)) + return -1; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_get_and_acquire_item(host->rrdctx.contexts, context); + if(unlikely(!rca)) return -1; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + if(unlikely(!rc)) return -1; + + int ret = 0; + RRDINSTANCE *ri; + dfe_start_read(rc->rrdinstances, ri) { + if(ri->rrdset) { + int r = callback(ri->rrdset, data); + if(r >= 0) ret += r; + else { + ret = r; + break; + } + } + } + dfe_done(ri); + + rrdcontext_release(rca); + + return ret; +} + +// ---------------------------------------------------------------------------- +// ACLK interface + +static bool rrdhost_check_our_claim_id(const char *claim_id) { + if(!localhost->aclk_state.claimed_id) return false; + return (strcasecmp(claim_id, localhost->aclk_state.claimed_id) == 0) ? true : false; +} + +void rrdcontext_hub_checkpoint_command(void *ptr) { + struct ctxs_checkpoint *cmd = ptr; + + if(!rrdhost_check_our_claim_id(cmd->claim_id)) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "RRDCONTEXT: received checkpoint command for claim_id '%s', node id '%s', " + "but this is not our claim id. Ours '%s', received '%s'. Ignoring command.", + cmd->claim_id, cmd->node_id, + localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET", + cmd->claim_id); + + return; + } + + RRDHOST *host = find_host_by_node_id(cmd->node_id); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', " + "but there is no node with such node id here. Ignoring command.", + cmd->claim_id, cmd->node_id); + + return; + } + + if(rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', " + "while node '%s' has an active context streaming.", + cmd->claim_id, cmd->node_id, rrdhost_hostname(host)); + + // disable it temporarily, so that our worker will not attempt to send messages in parallel + rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); + } + + uint64_t our_version_hash = rrdcontext_version_hash(host); + + if(cmd->version_hash != our_version_hash) { + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "RRDCONTEXT: received version hash %"PRIu64" for host '%s', does not match our version hash %"PRIu64". " + "Sending snapshot of all contexts.", + cmd->version_hash, rrdhost_hostname(host), our_version_hash); + +#ifdef ENABLE_ACLK + // prepare the snapshot + char uuid[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, uuid); + contexts_snapshot_t bundle = contexts_snapshot_new(cmd->claim_id, uuid, our_version_hash); + + // do a deep scan on every metric of the host to make sure all our data are updated + rrdcontext_recalculate_host_retention(host, RRD_FLAG_NONE, false); + + // calculate version hash and pack all the messages together in one go + our_version_hash = rrdcontext_version_hash_with_callback(host, rrdcontext_message_send_unsafe, true, bundle); + + // update the version + contexts_snapshot_set_version(bundle, our_version_hash); + + // send it + aclk_send_contexts_snapshot(bundle); +#endif + } + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "RRDCONTEXT: host '%s' enabling streaming of contexts", + rrdhost_hostname(host)); + + rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); + char node_str[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, node_str); + nd_log(NDLS_ACCESS, NDLP_DEBUG, + "ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED", + node_str, rrdhost_hostname(host)); +} + +void rrdcontext_hub_stop_streaming_command(void *ptr) { + struct stop_streaming_ctxs *cmd = ptr; + + if(!rrdhost_check_our_claim_id(cmd->claim_id)) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "RRDCONTEXT: received stop streaming command for claim_id '%s', node id '%s', " + "but this is not our claim id. Ours '%s', received '%s'. Ignoring command.", + cmd->claim_id, cmd->node_id, + localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET", + cmd->claim_id); + + return; + } + + RRDHOST *host = find_host_by_node_id(cmd->node_id); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', " + "but there is no node with such node id here. Ignoring command.", + cmd->claim_id, cmd->node_id); + + return; + } + + if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) { + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', " + "but node '%s' does not have active context streaming. Ignoring command.", + cmd->claim_id, cmd->node_id, rrdhost_hostname(host)); + + return; + } + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "RRDCONTEXT: host '%s' disabling streaming of contexts", + rrdhost_hostname(host)); + + rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS); +} + +bool rrdcontext_retention_match(RRDCONTEXT_ACQUIRED *rca, time_t after, time_t before) { + if(unlikely(!rca)) return false; + + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + if(rrd_flag_is_collected(rc)) + return query_matches_retention(after, before, rc->first_time_s, before > rc->last_time_s ? before : rc->last_time_s, 1); + else + return query_matches_retention(after, before, rc->first_time_s, rc->last_time_s, 1); +} diff --git a/src/database/contexts/rrdcontext.h b/src/database/contexts/rrdcontext.h new file mode 100644 index 000000000..9fea55d38 --- /dev/null +++ b/src/database/contexts/rrdcontext.h @@ -0,0 +1,760 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCONTEXT_H +#define NETDATA_RRDCONTEXT_H 1 + +// ---------------------------------------------------------------------------- +// RRDMETRIC + +typedef struct rrdmetric_acquired RRDMETRIC_ACQUIRED; + +// ---------------------------------------------------------------------------- +// RRDINSTANCE + +typedef struct rrdinstance_acquired RRDINSTANCE_ACQUIRED; + +// ---------------------------------------------------------------------------- +// RRDCONTEXT + +typedef struct rrdcontext_acquired RRDCONTEXT_ACQUIRED; + +// ---------------------------------------------------------------------------- + +#include "../rrd.h" + +bool rrdinstance_acquired_id_and_name_are_same(RRDINSTANCE_ACQUIRED *ria); +const char *rrdmetric_acquired_id(RRDMETRIC_ACQUIRED *rma); +const char *rrdmetric_acquired_name(RRDMETRIC_ACQUIRED *rma); +bool rrdmetric_acquired_has_name(RRDMETRIC_ACQUIRED *rma); + +STRING *rrdmetric_acquired_id_dup(RRDMETRIC_ACQUIRED *rma); +STRING *rrdmetric_acquired_name_dup(RRDMETRIC_ACQUIRED *rma); + +NETDATA_DOUBLE rrdmetric_acquired_last_stored_value(RRDMETRIC_ACQUIRED *rma); +time_t rrdmetric_acquired_first_entry(RRDMETRIC_ACQUIRED *rma); +time_t rrdmetric_acquired_last_entry(RRDMETRIC_ACQUIRED *rma); +bool rrdmetric_acquired_belongs_to_instance(RRDMETRIC_ACQUIRED *rma, RRDINSTANCE_ACQUIRED *ria); + +const char *rrdinstance_acquired_id(RRDINSTANCE_ACQUIRED *ria); +const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria); +bool rrdinstance_acquired_has_name(RRDINSTANCE_ACQUIRED *ria); +const char *rrdinstance_acquired_units(RRDINSTANCE_ACQUIRED *ria); +STRING *rrdinstance_acquired_units_dup(RRDINSTANCE_ACQUIRED *ria); +RRDLABELS *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria); +DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria); +RRDHOST *rrdinstance_acquired_rrdhost(RRDINSTANCE_ACQUIRED *ria); +RRDSET *rrdinstance_acquired_rrdset(RRDINSTANCE_ACQUIRED *ria); + +bool rrdinstance_acquired_belongs_to_context(RRDINSTANCE_ACQUIRED *ria, RRDCONTEXT_ACQUIRED *rca); +time_t rrdinstance_acquired_update_every(RRDINSTANCE_ACQUIRED *ria); + +const char *rrdcontext_acquired_units(RRDCONTEXT_ACQUIRED *rca); +const char *rrdcontext_acquired_title(RRDCONTEXT_ACQUIRED *rca); +RRDSET_TYPE rrdcontext_acquired_chart_type(RRDCONTEXT_ACQUIRED *rca); + +// ---------------------------------------------------------------------------- +// public API for rrdhost + +void rrdhost_load_rrdcontext_data(RRDHOST *host); +void rrdhost_create_rrdcontexts(RRDHOST *host); +void rrdhost_destroy_rrdcontexts(RRDHOST *host); + +void rrdcontext_host_child_connected(RRDHOST *host); +void rrdcontext_host_child_disconnected(RRDHOST *host); + +int rrdcontext_foreach_instance_with_rrdset_in_context(RRDHOST *host, const char *context, int (*callback)(RRDSET *st, void *data), void *data); + +typedef enum { + RRDCONTEXT_OPTION_NONE = 0, + RRDCONTEXT_OPTION_SHOW_METRICS = (1 << 0), + RRDCONTEXT_OPTION_SHOW_INSTANCES = (1 << 1), + RRDCONTEXT_OPTION_SHOW_LABELS = (1 << 2), + RRDCONTEXT_OPTION_SHOW_QUEUED = (1 << 3), + RRDCONTEXT_OPTION_SHOW_FLAGS = (1 << 4), + RRDCONTEXT_OPTION_SHOW_DELETED = (1 << 5), + RRDCONTEXT_OPTION_DEEPSCAN = (1 << 6), + RRDCONTEXT_OPTION_SHOW_UUIDS = (1 << 7), + RRDCONTEXT_OPTION_SHOW_HIDDEN = (1 << 8), + RRDCONTEXT_OPTION_SKIP_ID = (1 << 31), // internal use +} RRDCONTEXT_TO_JSON_OPTIONS; + +#define RRDCONTEXT_OPTIONS_ALL (RRDCONTEXT_OPTION_SHOW_METRICS|RRDCONTEXT_OPTION_SHOW_INSTANCES|RRDCONTEXT_OPTION_SHOW_LABELS|RRDCONTEXT_OPTION_SHOW_QUEUED|RRDCONTEXT_OPTION_SHOW_FLAGS|RRDCONTEXT_OPTION_SHOW_DELETED|RRDCONTEXT_OPTION_SHOW_UUIDS|RRDCONTEXT_OPTION_SHOW_HIDDEN) + +int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, const char *context, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); +int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, RRDCONTEXT_TO_JSON_OPTIONS options, SIMPLE_PATTERN *chart_label_key, SIMPLE_PATTERN *chart_labels_filter, SIMPLE_PATTERN *chart_dimensions); + +// ---------------------------------------------------------------------------- +// public API for rrdcontexts + +const char *rrdcontext_acquired_id(RRDCONTEXT_ACQUIRED *rca); +bool rrdcontext_acquired_belongs_to_host(RRDCONTEXT_ACQUIRED *rca, RRDHOST *host); + +// ---------------------------------------------------------------------------- +// public API for rrddims + +void rrdcontext_updated_rrddim(RRDDIM *rd); +void rrdcontext_removed_rrddim(RRDDIM *rd); +void rrdcontext_updated_rrddim_algorithm(RRDDIM *rd); +void rrdcontext_updated_rrddim_multiplier(RRDDIM *rd); +void rrdcontext_updated_rrddim_divisor(RRDDIM *rd); +void rrdcontext_updated_rrddim_flags(RRDDIM *rd); +void rrdcontext_collected_rrddim(RRDDIM *rd); +int rrdcontext_find_dimension_uuid(RRDSET *st, const char *id, nd_uuid_t *store_uuid); + +// ---------------------------------------------------------------------------- +// public API for rrdsets + +void rrdcontext_updated_rrdset(RRDSET *st); +void rrdcontext_removed_rrdset(RRDSET *st); +void rrdcontext_updated_rrdset_name(RRDSET *st); +void rrdcontext_updated_rrdset_flags(RRDSET *st); +void rrdcontext_updated_retention_rrdset(RRDSET *st); +void rrdcontext_collected_rrdset(RRDSET *st); +int rrdcontext_find_chart_uuid(RRDSET *st, nd_uuid_t *store_uuid); + +// ---------------------------------------------------------------------------- +// public API for ACLK + +void rrdcontext_hub_checkpoint_command(void *cmd); +void rrdcontext_hub_stop_streaming_command(void *cmd); + + +// ---------------------------------------------------------------------------- +// public API for threads + +void rrdcontext_db_rotation(void); +void *rrdcontext_main(void *); + +// ---------------------------------------------------------------------------- +// public API for queries + +typedef enum __attribute__ ((__packed__)) { + QUERY_STATUS_NONE = 0, + QUERY_STATUS_QUERIED = (1 << 0), + QUERY_STATUS_DIMENSION_HIDDEN = (1 << 1), + QUERY_STATUS_EXCLUDED = (1 << 2), + QUERY_STATUS_FAILED = (1 << 3), +} QUERY_STATUS; + +typedef struct query_plan_entry { + size_t tier; + time_t after; + time_t before; +} QUERY_PLAN_ENTRY; + +#define QUERY_PLANS_MAX (RRD_STORAGE_TIERS) + +typedef struct query_metrics_counts { // counts the number of metrics related to an object + size_t selected; // selected to be queried + size_t excluded; // not selected to be queried + size_t queried; // successfully queried + size_t failed; // failed to be queried +} QUERY_METRICS_COUNTS; + +typedef struct query_instances_counts { // counts the number of instances related to an object + size_t selected; // selected to be queried + size_t excluded; // not selected to be queried + size_t queried; // successfully queried + size_t failed; // failed to be queried +} QUERY_INSTANCES_COUNTS; + +typedef struct query_alerts_counts { // counts the number of alerts related to an object + size_t clear; // number of alerts in clear state + size_t warning; // number of alerts in warning state + size_t critical; // number of alerts in critical state + size_t other; // number of alerts in any other state +} QUERY_ALERTS_COUNTS; + +typedef struct _query_node { + uint32_t slot; + RRDHOST *rrdhost; + char node_id[UUID_STR_LEN]; + usec_t duration_ut; + + STORAGE_POINT query_points; + QUERY_INSTANCES_COUNTS instances; + QUERY_METRICS_COUNTS metrics; + QUERY_ALERTS_COUNTS alerts; +} QUERY_NODE; + +typedef struct _query_context { + uint32_t slot; + RRDCONTEXT_ACQUIRED *rca; + + STORAGE_POINT query_points; + QUERY_INSTANCES_COUNTS instances; + QUERY_METRICS_COUNTS metrics; + QUERY_ALERTS_COUNTS alerts; +} QUERY_CONTEXT; + +typedef struct _query_instance { + uint32_t slot; + uint32_t query_host_id; + RRDINSTANCE_ACQUIRED *ria; + STRING *id_fqdn; // never access this directly - it is created on demand via query_instance_id_fqdn() + STRING *name_fqdn; // never access this directly - it is created on demand via query_instance_name_fqdn() + + STORAGE_POINT query_points; + QUERY_METRICS_COUNTS metrics; + QUERY_ALERTS_COUNTS alerts; +} QUERY_INSTANCE; + +typedef struct _query_dimension { + uint32_t slot; + uint32_t priority; + RRDMETRIC_ACQUIRED *rma; + QUERY_STATUS status; +} QUERY_DIMENSION; + +typedef struct _query_metric { + RRDR_DIMENSION_FLAGS status; + + struct query_metric_tier { + STORAGE_METRIC_HANDLE *smh; + time_t db_first_time_s; // the oldest timestamp available for this tier + time_t db_last_time_s; // the latest timestamp available for this tier + time_t db_update_every_s; // latest update every for this tier + long weight; + } tiers[RRD_STORAGE_TIERS]; + + struct { + size_t used; + QUERY_PLAN_ENTRY array[QUERY_PLANS_MAX]; + } plan; + + struct { + uint32_t query_node_id; + uint32_t query_context_id; + uint32_t query_instance_id; + uint32_t query_dimension_id; + } link; + + STORAGE_POINT query_points; + + struct { + uint32_t slot; + uint32_t first_slot; + STRING *id; + STRING *name; + STRING *units; + } grouped_as; + + usec_t duration_ut; +} QUERY_METRIC; + +#define MAX_QUERY_TARGET_ID_LENGTH 255 +#define MAX_QUERY_GROUP_BY_PASSES 2 + +typedef bool (*qt_interrupt_callback_t)(void *data); + +struct group_by_pass { + RRDR_GROUP_BY group_by; + char *group_by_label; + RRDR_GROUP_BY_FUNCTION aggregation; +}; + +typedef struct query_target_request { + size_t version; + + const char *scope_nodes; + const char *scope_contexts; + + // selecting / filtering metrics to be queried + RRDHOST *host; // the host to be queried (can be NULL, hosts will be used) + RRDCONTEXT_ACQUIRED *rca; // the context to be queried (can be NULL) + RRDINSTANCE_ACQUIRED *ria; // the instance to be queried (can be NULL) + RRDMETRIC_ACQUIRED *rma; // the metric to be queried (can be NULL) + RRDSET *st; // the chart to be queried (NULL, for context queries) + const char *nodes; // hosts simple pattern + const char *contexts; // contexts simple pattern (context queries) + const char *instances; // charts simple pattern (for context queries) + const char *dimensions; // dimensions simple pattern + const char *chart_label_key; // select only the chart having this label key + const char *labels; // select only the charts having this combo of label key:value + const char *alerts; // select only the charts having this combo of alert name:status + + time_t after; // the requested timeframe + time_t before; // the requested timeframe + size_t points; // the requested number of points to be returned + + uint32_t format; // DATASOURCE_FORMAT + RRDR_OPTIONS options; + time_t timeout_ms; // the timeout of the query in milliseconds + + size_t tier; + QUERY_SOURCE query_source; + STORAGE_PRIORITY priority; + + // resampling metric values across time + time_t resampling_time; + + // grouping metric values across time + RRDR_TIME_GROUPING time_group_method; + const char *time_group_options; + + // group by across multiple time-series + struct group_by_pass group_by[MAX_QUERY_GROUP_BY_PASSES]; + + usec_t received_ut; + + qt_interrupt_callback_t interrupt_callback; + void *interrupt_callback_data; + + nd_uuid_t *transaction; +} QUERY_TARGET_REQUEST; + +#define GROUP_BY_MAX_LABEL_KEYS 10 + +struct query_tier_statistics { + size_t queries; + size_t points; + time_t update_every; + struct { + time_t first_time_s; + time_t last_time_s; + } retention; +}; + +struct query_versions { + uint64_t contexts_hard_hash; + uint64_t contexts_soft_hash; + uint64_t alerts_hard_hash; + uint64_t alerts_soft_hash; +}; + +struct query_timings { + usec_t received_ut; + usec_t preprocessed_ut; + usec_t executed_ut; + usec_t finished_ut; +}; + +#define query_view_update_every(qt) ((qt)->window.group * (qt)->window.query_granularity) + +typedef struct query_target { + char id[MAX_QUERY_TARGET_ID_LENGTH + 1]; // query identifier (for logging) + QUERY_TARGET_REQUEST request; + + struct { + time_t now; // the current timestamp, the absolute max for any query timestamp + bool relative; // true when the request made with relative timestamps, true if it was absolute + bool aligned; + time_t after; // the absolute timestamp this query is about + time_t before; // the absolute timestamp this query is about + time_t query_granularity; + size_t points; // the number of points the query will return (maybe different from the request) + size_t group; + RRDR_TIME_GROUPING time_group_method; + const char *time_group_options; + size_t resampling_group; + NETDATA_DOUBLE resampling_divisor; + RRDR_OPTIONS options; + size_t tier; + } window; + + struct { + size_t queries[RRD_STORAGE_TIERS]; + time_t first_time_s; // the combined first_time_t of all metrics in the query, across all tiers + time_t last_time_s; // the combined last_time_T of all metrics in the query, across all tiers + time_t minimum_latest_update_every_s; // the min update every of the metrics in the query + struct query_tier_statistics tiers[RRD_STORAGE_TIERS]; + } db; + + struct { + QUERY_METRIC *array; // the metrics to be queried (all of them should be queried, no exceptions) + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + } query; + + struct { + QUERY_DIMENSION *array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + } dimensions; + + struct { + QUERY_INSTANCE *array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + SIMPLE_PATTERN *labels_pattern; + SIMPLE_PATTERN *alerts_pattern; + SIMPLE_PATTERN *chart_label_key_pattern; + } instances; + + struct { + QUERY_CONTEXT *array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + SIMPLE_PATTERN *scope_pattern; + } contexts; + + struct { + QUERY_NODE *array; + uint32_t used; // how many items of the array are used + uint32_t size; // the size of the array + SIMPLE_PATTERN *pattern; + SIMPLE_PATTERN *scope_pattern; + } nodes; + + struct { + size_t used; + char *label_keys[GROUP_BY_MAX_LABEL_KEYS * MAX_QUERY_GROUP_BY_PASSES]; + } group_by[MAX_QUERY_GROUP_BY_PASSES]; + + STORAGE_POINT query_points; + struct query_versions versions; + struct query_timings timings; + + struct { + SPINLOCK spinlock; + bool used; // when true, this query is currently being used + bool relative; // when true, this query uses relative timestamps + size_t queries; // how many query we have done so far with this QUERY_TARGET - not related to database queries + struct query_target *prev; + struct query_target *next; + } internal; +} QUERY_TARGET; + + +struct sql_alert_transition_data { + usec_t global_id; + nd_uuid_t *transition_id; + nd_uuid_t *host_id; + nd_uuid_t *config_hash_id; + uint32_t alarm_id; + const char *alert_name; + const char *chart; + const char *chart_name; + const char *chart_context; + const char *family; + const char *recipient; + const char *units; + const char *exec; + const char *info; + const char *summary; + const char *classification; + const char *type; + const char *component; + time_t when_key; + time_t duration; + time_t non_clear_duration; + uint64_t flags; + time_t delay_up_to_timestamp; + time_t exec_run_timestamp; + int exec_code; + int new_status; + int old_status; + int delay; + time_t last_repeat; + NETDATA_DOUBLE new_value; + NETDATA_DOUBLE old_value; +}; + +struct sql_alert_config_data { + nd_uuid_t *config_hash_id; + const char *name; + + struct { + const char *on_template; + const char *on_key; + + const char *families; + const char *host_labels; + const char *chart_labels; + } selectors; + + const char *info; + const char *classification; + const char *component; + const char *type; + const char *summary; + + struct { + struct { + const char *dimensions; + const char *method; + ALERT_LOOKUP_TIME_GROUP_CONDITION time_group_condition; + NETDATA_DOUBLE time_group_value; + ALERT_LOOKUP_DIMS_GROUPING dims_group; + ALERT_LOOKUP_DATA_SOURCE data_source; + uint32_t options; + + int32_t after; + int32_t before; + + const char *lookup; // the lookup line, unparsed + } db; + + const char *calc; // the calculation expression, unparsed + const char *units; + + int32_t update_every; // the update frequency of the alert, in seconds + const char *every; // the every line, unparsed + } value; + + struct { + const char *green; // the green threshold, unparsed + const char *red; // the red threshold, unparsed + const char *warn; // the warning expression, unparsed + const char *crit; // the critical expression, unparsed + } status; + + struct { + const char *exec; // the script to execute, or NULL to execute the default script + const char *to_key; // the recipient, or NULL for the default recipient + const char *delay; // the delay line, unparsed + const char *repeat; // the repeat line, unparsed + const char *options; // FIXME what is this? + } notification; + + const char *source; // the configuration file and line this alert came from +}; + +int contexts_v2_alert_config_to_json(struct web_client *w, const char *config_hash_id); + +struct sql_alert_instance_v2_entry { + RRDCALC *tmp; + + size_t ati; + + STRING *context; + STRING *chart_id; + STRING *chart_name; + STRING *name; + STRING *family; + STRING *units; + STRING *source; + STRING *classification; + STRING *type; + STRING *component; + STRING *recipient; + RRDCALC_STATUS status; + RRDCALC_FLAGS flags; + STRING *info; + STRING *summary; + NETDATA_DOUBLE value; + time_t last_updated; + time_t last_status_change; + NETDATA_DOUBLE last_status_change_value; + nd_uuid_t config_hash_id; + usec_t global_id; + nd_uuid_t last_transition_id; + uint32_t alarm_id; + RRDHOST *host; + size_t ni; +}; + +static inline NEVERNULL QUERY_NODE *query_node(QUERY_TARGET *qt, size_t id) { + internal_fatal(id >= qt->nodes.used, "QUERY: invalid query host id"); + return &qt->nodes.array[id]; +} + +static inline NEVERNULL QUERY_CONTEXT *query_context(QUERY_TARGET *qt, size_t query_context_id) { + internal_fatal(query_context_id >= qt->contexts.used, "QUERY: invalid query context id"); + return &qt->contexts.array[query_context_id]; +} + +static inline NEVERNULL QUERY_INSTANCE *query_instance(QUERY_TARGET *qt, size_t query_instance_id) { + internal_fatal(query_instance_id >= qt->instances.used, "QUERY: invalid query instance id"); + return &qt->instances.array[query_instance_id]; +} + +static inline NEVERNULL QUERY_DIMENSION *query_dimension(QUERY_TARGET *qt, size_t query_dimension_id) { + internal_fatal(query_dimension_id >= qt->dimensions.used, "QUERY: invalid query dimension id"); + return &qt->dimensions.array[query_dimension_id]; +} + +static inline NEVERNULL QUERY_METRIC *query_metric(QUERY_TARGET *qt, size_t id) { + internal_fatal(id >= qt->query.used, "QUERY: invalid query metric id"); + return &qt->query.array[id]; +} + +static inline const char *query_metric_id(QUERY_TARGET *qt, QUERY_METRIC *qm) { + QUERY_DIMENSION *qd = query_dimension(qt, qm->link.query_dimension_id); + return rrdmetric_acquired_id(qd->rma); +} + +static inline const char *query_metric_name(QUERY_TARGET *qt, QUERY_METRIC *qm) { + QUERY_DIMENSION *qd = query_dimension(qt, qm->link.query_dimension_id); + return rrdmetric_acquired_name(qd->rma); +} + +struct storage_engine *query_metric_storage_engine(QUERY_TARGET *qt, QUERY_METRIC *qm, size_t tier); + +STRING *query_instance_id_fqdn(QUERY_INSTANCE *qi, size_t version); +STRING *query_instance_name_fqdn(QUERY_INSTANCE *qi, size_t version); + +void query_target_free(void); +void query_target_release(QUERY_TARGET *qt); + +QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr); + +typedef enum __attribute__((packed)) { + ATF_STATUS = 0, + ATF_CLASS, + ATF_TYPE, + ATF_COMPONENT, + ATF_ROLE, + ATF_NODE, + ATF_ALERT_NAME, + ATF_CHART_NAME, + ATF_CONTEXT, + + // total + ATF_TOTAL_ENTRIES, +} ALERT_TRANSITION_FACET; + +struct alert_transitions_facets { + const char *name; + const char *query_param; + const char *id; + size_t order; +}; + +extern struct alert_transitions_facets alert_transition_facets[]; + +struct api_v2_contexts_request { + char *scope_nodes; + char *scope_contexts; + char *nodes; + char *contexts; + char *q; + + CONTEXTS_V2_OPTIONS options; + + struct { + CONTEXTS_V2_ALERT_STATUS status; + char *alert; + char *transition; + uint32_t last; + + const char *facets[ATF_TOTAL_ENTRIES]; + usec_t global_id_anchor; + } alerts; + + time_t after; + time_t before; + time_t timeout_ms; + + qt_interrupt_callback_t interrupt_callback; + void *interrupt_callback_data; +}; + +typedef enum __attribute__ ((__packed__)) { + CONTEXTS_V2_SEARCH = (1 << 1), + CONTEXTS_V2_NODES = (1 << 2), + CONTEXTS_V2_NODES_INFO = (1 << 3), + CONTEXTS_V2_NODE_INSTANCES = (1 << 4), + CONTEXTS_V2_CONTEXTS = (1 << 5), + CONTEXTS_V2_AGENTS = (1 << 6), + CONTEXTS_V2_AGENTS_INFO = (1 << 7), + CONTEXTS_V2_VERSIONS = (1 << 8), + CONTEXTS_V2_FUNCTIONS = (1 << 9), + CONTEXTS_V2_ALERTS = (1 << 10), + CONTEXTS_V2_ALERT_TRANSITIONS = (1 << 11), +} CONTEXTS_V2_MODE; + +int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTEXTS_V2_MODE mode); + +RRDCONTEXT_TO_JSON_OPTIONS rrdcontext_to_json_parse_options(char *o); +void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now_s, bool info, bool array); +void buffer_json_node_add_v2(BUFFER *wb, RRDHOST *host, size_t ni, usec_t duration_ut, bool status); +void buffer_json_query_timings(BUFFER *wb, const char *key, struct query_timings *timings); +void buffer_json_cloud_timings(BUFFER *wb, const char *key, struct query_timings *timings); + +// ---------------------------------------------------------------------------- +// scope + +typedef ssize_t (*foreach_host_cb_t)(void *data, RRDHOST *host, bool queryable); +ssize_t query_scope_foreach_host(SIMPLE_PATTERN *scope_hosts_sp, SIMPLE_PATTERN *hosts_sp, + foreach_host_cb_t cb, void *data, + struct query_versions *versions, + char *host_node_id_str); + +typedef ssize_t (*foreach_context_cb_t)(void *data, RRDCONTEXT_ACQUIRED *rca, bool queryable_context); +ssize_t query_scope_foreach_context(RRDHOST *host, const char *scope_contexts, SIMPLE_PATTERN *scope_contexts_sp, SIMPLE_PATTERN *contexts_sp, foreach_context_cb_t cb, bool queryable_host, void *data); + +// ---------------------------------------------------------------------------- +// public API for weights + +typedef ssize_t (*weights_add_metric_t)(void *data, RRDHOST *host, RRDCONTEXT_ACQUIRED *rca, RRDINSTANCE_ACQUIRED *ria, RRDMETRIC_ACQUIRED *rma); +ssize_t weights_foreach_rrdmetric_in_context(RRDCONTEXT_ACQUIRED *rca, + SIMPLE_PATTERN *instances_sp, + SIMPLE_PATTERN *chart_label_key_sp, + SIMPLE_PATTERN *labels_sp, + SIMPLE_PATTERN *alerts_sp, + SIMPLE_PATTERN *dimensions_sp, + bool match_ids, bool match_names, + size_t version, + weights_add_metric_t cb, + void *data); + +bool rrdcontext_retention_match(RRDCONTEXT_ACQUIRED *rca, time_t after, time_t before); + +#define query_matches_retention(after, before, first_entry_s, last_entry_s, update_every_s) \ + (((first_entry_s) - ((update_every_s) * 2) <= (before)) && \ + ((last_entry_s) + ((update_every_s) * 2) >= (after))) + +#define query_target_aggregatable(qt) ((qt)->window.options & RRDR_OPTION_RETURN_RAW) + +static inline bool query_has_group_by_aggregation_percentage(QUERY_TARGET *qt) { + + // backwards compatibility + // If the request was made with group_by = "percentage-of-instance" + // we need to send back "raw" output with "count" + // otherwise, we need to send back "raw" output with "hidden" + + bool last_is_percentage = false; + + for(int g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) { + if(qt->request.group_by[g].group_by == RRDR_GROUP_BY_NONE) + break; + + if(qt->request.group_by[g].group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) + // backwards compatibility + return false; + + if(qt->request.group_by[g].aggregation == RRDR_GROUP_BY_FUNCTION_PERCENTAGE) + last_is_percentage = true; + + else + last_is_percentage = false; + } + + return last_is_percentage; +} + +static inline bool query_target_has_percentage_of_group(QUERY_TARGET *qt) { + for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++) { + if (qt->request.group_by[g].group_by & RRDR_GROUP_BY_PERCENTAGE_OF_INSTANCE) + return true; + + if (qt->request.group_by[g].aggregation == RRDR_GROUP_BY_FUNCTION_PERCENTAGE) + return true; + } + + return false; +} + +static inline bool query_target_needs_all_dimensions(QUERY_TARGET *qt) { + if(qt->request.options & RRDR_OPTION_PERCENTAGE) + return true; + + return query_target_has_percentage_of_group(qt); +} + +static inline bool query_target_has_percentage_units(QUERY_TARGET *qt) { + if(qt->window.time_group_method == RRDR_GROUPING_CV) + return true; + + if((qt->request.options & RRDR_OPTION_PERCENTAGE) && !(qt->window.options & RRDR_OPTION_RETURN_RAW)) + return true; + + return query_target_has_percentage_of_group(qt); +} + +#endif // NETDATA_RRDCONTEXT_H + diff --git a/src/database/contexts/worker.c b/src/database/contexts/worker.c new file mode 100644 index 000000000..6012c14f5 --- /dev/null +++ b/src/database/contexts/worker.c @@ -0,0 +1,1147 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "internal.h" + +static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc); + +static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused); + +static void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc); + +static void rrdcontext_dequeue_from_post_processing(RRDCONTEXT *rc); +static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAGS reason, bool worker_jobs); + +static void rrdcontext_garbage_collect_single_host(RRDHOST *host, bool worker_jobs); +static void rrdcontext_garbage_collect_for_all_hosts(void); + +extern usec_t rrdcontext_next_db_rotation_ut; + +// ---------------------------------------------------------------------------- +// load from SQL + +static void rrdinstance_load_clabel(SQL_CLABEL_DATA *sld, void *data) { + RRDINSTANCE *ri = data; + rrdlabels_add(ri->rrdlabels, sld->label_key, sld->label_value, sld->label_source); +} + +static void rrdinstance_load_dimension(SQL_DIMENSION_DATA *sd, void *data) { + RRDINSTANCE *ri = data; + + RRDMETRIC trm = { + .id = string_strdupz(sd->id), + .name = string_strdupz(sd->name), + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomic + }; + if(sd->hidden) trm.flags |= RRD_FLAG_HIDDEN; + + uuid_copy(trm.uuid, sd->dim_id); + + dictionary_set(ri->rrdmetrics, string2str(trm.id), &trm, sizeof(trm)); +} + +static void rrdinstance_load_chart_callback(SQL_CHART_DATA *sc, void *data) { + RRDHOST *host = data; + + RRDCONTEXT tc = { + .id = string_strdupz(sc->context), + .title = string_strdupz(sc->title), + .units = string_strdupz(sc->units), + .family = string_strdupz(sc->family), + .priority = sc->priority, + .chart_type = sc->chart_type, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics + .rrdhost = host, + }; + + RRDCONTEXT_ACQUIRED *rca = (RRDCONTEXT_ACQUIRED *)dictionary_set_and_acquire_item(host->rrdctx.contexts, string2str(tc.id), &tc, sizeof(tc)); + RRDCONTEXT *rc = rrdcontext_acquired_value(rca); + + RRDINSTANCE tri = { + .id = string_strdupz(sc->id), + .name = string_strdupz(sc->name), + .title = string_strdupz(sc->title), + .units = string_strdupz(sc->units), + .family = string_strdupz(sc->family), + .chart_type = sc->chart_type, + .priority = sc->priority, + .update_every_s = sc->update_every, + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics + }; + uuid_copy(tri.uuid, sc->chart_id); + + RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_set_and_acquire_item(rc->rrdinstances, sc->id, &tri, sizeof(tri)); + RRDINSTANCE *ri = rrdinstance_acquired_value(ria); + + ctx_get_dimension_list(&ri->uuid, rrdinstance_load_dimension, ri); + ctx_get_label_list(&ri->uuid, rrdinstance_load_clabel, ri); + rrdinstance_trigger_updates(ri, __FUNCTION__ ); + rrdinstance_release(ria); + rrdcontext_release(rca); +} + +static void rrdcontext_load_context_callback(VERSIONED_CONTEXT_DATA *ctx_data, void *data) { + RRDHOST *host = data; + (void)host; + + RRDCONTEXT trc = { + .id = string_strdupz(ctx_data->id), + .flags = RRD_FLAG_ARCHIVED | RRD_FLAG_UPDATE_REASON_LOAD_SQL, // no need for atomics + + // no need to set more data here + // we only need the hub data + + .hub = *ctx_data, + }; + dictionary_set(host->rrdctx.contexts, string2str(trc.id), &trc, sizeof(trc)); +} + +void rrdhost_load_rrdcontext_data(RRDHOST *host) { + if(host->rrdctx.contexts) return; + + rrdhost_create_rrdcontexts(host); + ctx_get_context_list(&host->host_uuid, rrdcontext_load_context_callback, host); + ctx_get_chart_list(&host->host_uuid, rrdinstance_load_chart_callback, host); + + RRDCONTEXT *rc; + dfe_start_read(host->rrdctx.contexts, rc) { + rrdcontext_trigger_updates(rc, __FUNCTION__ ); + } + dfe_done(rc); + + rrdcontext_garbage_collect_single_host(host, false); +} + +// ---------------------------------------------------------------------------- +// version hash calculation + +uint64_t rrdcontext_version_hash_with_callback( + RRDHOST *host, + void (*callback)(RRDCONTEXT *, bool, void *), + bool snapshot, + void *bundle) { + + if(unlikely(!host || !host->rrdctx.contexts)) return 0; + + RRDCONTEXT *rc; + uint64_t hash = 0; + + // loop through all contexts of the host + dfe_start_read(host->rrdctx.contexts, rc) { + + rrdcontext_lock(rc); + + if(unlikely(rrd_flag_check(rc, RRD_FLAG_HIDDEN))) { + rrdcontext_unlock(rc); + continue; + } + + if(unlikely(callback)) + callback(rc, snapshot, bundle); + + // skip any deleted contexts + if(unlikely(rrd_flag_is_deleted(rc))) { + rrdcontext_unlock(rc); + continue; + } + + // we use rc->hub.* which has the latest + // metadata we have sent to the hub + + // if a context is currently queued, rc->hub.* does NOT + // reflect the queued changes. rc->hub.* is updated with + // their metadata, after messages are dispatched to hub. + + // when the context is being collected, + // rc->hub.last_time_t is already zero + + hash += rc->hub.version + rc->hub.last_time_s - rc->hub.first_time_s; + + rrdcontext_unlock(rc); + + } + dfe_done(rc); + + return hash; +} + +// ---------------------------------------------------------------------------- +// retention recalculation + +static void rrdhost_update_cached_retention(RRDHOST *host, time_t first_time_s, time_t last_time_s, bool global) { + if(unlikely(!host)) + return; + + spinlock_lock(&host->retention.spinlock); + + if(global) { + host->retention.first_time_s = first_time_s; + host->retention.last_time_s = last_time_s; + } + else { + if(!host->retention.first_time_s || first_time_s < host->retention.first_time_s) + host->retention.first_time_s = first_time_s; + + if(!host->retention.last_time_s || last_time_s > host->retention.last_time_s) + host->retention.last_time_s = last_time_s; + } + + spinlock_unlock(&host->retention.spinlock); +} + +void rrdcontext_recalculate_context_retention(RRDCONTEXT *rc, RRD_FLAGS reason, bool worker_jobs) { + rrdcontext_post_process_updates(rc, true, reason, worker_jobs); +} + +void rrdcontext_recalculate_host_retention(RRDHOST *host, RRD_FLAGS reason, bool worker_jobs) { + if(unlikely(!host || !host->rrdctx.contexts)) return; + + time_t first_time_s = 0; + time_t last_time_s = 0; + + RRDCONTEXT *rc; + dfe_start_read(host->rrdctx.contexts, rc) { + rrdcontext_recalculate_context_retention(rc, reason, worker_jobs); + + if(!first_time_s || rc->first_time_s < first_time_s) + first_time_s = rc->first_time_s; + + if(!last_time_s || rc->last_time_s > last_time_s) + last_time_s = rc->last_time_s; + } + dfe_done(rc); + + rrdhost_update_cached_retention(host, first_time_s, last_time_s, true); +} + +static void rrdcontext_recalculate_retention_all_hosts(void) { + rrdcontext_next_db_rotation_ut = 0; + RRDHOST *host; + dfe_start_reentrant(rrdhost_root_index, host) { + worker_is_busy(WORKER_JOB_RETENTION); + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DB_ROTATION, true); + } + dfe_done(host); +} + +// ---------------------------------------------------------------------------- +// garbage collector + +bool rrdmetric_update_retention(RRDMETRIC *rm) { + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + + if(rm->rrddim) { + min_first_time_t = rrddim_first_entry_s(rm->rrddim); + max_last_time_t = rrddim_last_entry_s(rm->rrddim); + } + else { + RRDHOST *rrdhost = rm->ri->rc->rrdhost; + for (size_t tier = 0; tier < storage_tiers; tier++) { + STORAGE_ENGINE *eng = rrdhost->db[tier].eng; + + time_t first_time_t, last_time_t; + if (eng->api.metric_retention_by_uuid(rrdhost->db[tier].si, &rm->uuid, &first_time_t, &last_time_t)) { + if (first_time_t < min_first_time_t) + min_first_time_t = first_time_t; + + if (last_time_t > max_last_time_t) + max_last_time_t = last_time_t; + } + } + } + + if((min_first_time_t == LONG_MAX || min_first_time_t == 0) && max_last_time_t == 0) + return false; + + if(min_first_time_t == LONG_MAX) + min_first_time_t = 0; + + if(min_first_time_t > max_last_time_t) { + internal_error(true, "RRDMETRIC: retention of '%s' is flipped, first_time_t = %ld, last_time_t = %ld", string2str(rm->id), min_first_time_t, max_last_time_t); + time_t tmp = min_first_time_t; + min_first_time_t = max_last_time_t; + max_last_time_t = tmp; + } + + // check if retention changed + + if (min_first_time_t != rm->first_time_s) { + rm->first_time_s = min_first_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (max_last_time_t != rm->last_time_s) { + rm->last_time_s = max_last_time_t; + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(unlikely(!rm->first_time_s && !rm->last_time_s)) + rrd_flag_set_deleted(rm, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + rrd_flag_set(rm, RRD_FLAG_LIVE_RETENTION); + + return true; +} + +static inline bool rrdmetric_should_be_deleted(RRDMETRIC *rm) { + if(likely(!rrd_flag_check(rm, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(rm, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(likely(rm->rrddim)) + return false; + + rrdmetric_update_retention(rm); + if(rm->first_time_s || rm->last_time_s) + return false; + + return true; +} + +static inline bool rrdinstance_should_be_deleted(RRDINSTANCE *ri) { + if(likely(!rrd_flag_check(ri, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(ri, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(likely(ri->rrdset)) + return false; + + if(unlikely(dictionary_referenced_items(ri->rrdmetrics) != 0)) + return false; + + if(unlikely(dictionary_entries(ri->rrdmetrics) != 0)) + return false; + + if(ri->first_time_s || ri->last_time_s) + return false; + + return true; +} + +static inline bool rrdcontext_should_be_deleted(RRDCONTEXT *rc) { + if(likely(!rrd_flag_check(rc, RRD_FLAGS_REQUIRED_FOR_DELETIONS))) + return false; + + if(likely(rrd_flag_check(rc, RRD_FLAGS_PREVENTING_DELETIONS))) + return false; + + if(unlikely(dictionary_referenced_items(rc->rrdinstances) != 0)) + return false; + + if(unlikely(dictionary_entries(rc->rrdinstances) != 0)) + return false; + + if(unlikely(rc->first_time_s || rc->last_time_s)) + return false; + + return true; +} + +void rrdcontext_delete_from_sql_unsafe(RRDCONTEXT *rc) { + // we need to refresh the string pointers in rc->hub + // in case the context changed values + rc->hub.id = string2str(rc->id); + rc->hub.title = string2str(rc->title); + rc->hub.units = string2str(rc->units); + rc->hub.family = string2str(rc->family); + + // delete it from SQL + if(ctx_delete_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + netdata_log_error("RRDCONTEXT: failed to delete context '%s' version %"PRIu64" from SQL.", + rc->hub.id, rc->hub.version); +} + +static void rrdcontext_garbage_collect_single_host(RRDHOST *host, bool worker_jobs) { + + internal_error(true, "RRDCONTEXT: garbage collecting context structures of host '%s'", rrdhost_hostname(host)); + + RRDCONTEXT *rc; + dfe_start_reentrant(host->rrdctx.contexts, rc) { + if(unlikely(worker_jobs && !service_running(SERVICE_CONTEXT))) break; + + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP); + + rrdcontext_lock(rc); + + RRDINSTANCE *ri; + dfe_start_reentrant(rc->rrdinstances, ri) { + if(unlikely(worker_jobs && !service_running(SERVICE_CONTEXT))) break; + + RRDMETRIC *rm; + dfe_start_write(ri->rrdmetrics, rm) { + if(rrdmetric_should_be_deleted(rm)) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + if(!dictionary_del(ri->rrdmetrics, string2str(rm->id))) + netdata_log_error("RRDCONTEXT: metric '%s' of instance '%s' of context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(rm->id), + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: metric '%s' of instance '%s' of context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(rm->id), + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + } + } + dfe_done(rm); + + if(rrdinstance_should_be_deleted(ri)) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + if(!dictionary_del(rc->rrdinstances, string2str(ri->id))) + netdata_log_error("RRDCONTEXT: instance '%s' of context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: instance '%s' of context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(ri->id), + string2str(rc->id), + rrdhost_hostname(host)); + } + } + dfe_done(ri); + + if(unlikely(rrdcontext_should_be_deleted(rc))) { + if(worker_jobs) worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_delete_from_sql_unsafe(rc); + + if(!dictionary_del(host->rrdctx.contexts, string2str(rc->id))) + netdata_log_error("RRDCONTEXT: context '%s' of host '%s', failed to be deleted from rrdmetrics dictionary.", + string2str(rc->id), + rrdhost_hostname(host)); + else + internal_error( + true, + "RRDCONTEXT: context '%s' of host '%s', deleted from rrdmetrics dictionary.", + string2str(rc->id), + rrdhost_hostname(host)); + } + + // the item is referenced in the dictionary + // so, it is still here to unlock, even if we have deleted it + rrdcontext_unlock(rc); + } + dfe_done(rc); +} + +static void rrdcontext_garbage_collect_for_all_hosts(void) { + RRDHOST *host; + dfe_start_reentrant(rrdhost_root_index, host) { + rrdcontext_garbage_collect_single_host(host, true); + } + dfe_done(host); +} + +// ---------------------------------------------------------------------------- +// post processing + +static void rrdmetric_process_updates(RRDMETRIC *rm, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(rm, reason); + + if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + return; + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_METRIC); + + if(reason & RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) { + rrd_flag_set_archived(rm); + rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD); + } + if(rrd_flag_is_deleted(rm) && (reason & RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + rrd_flag_set_archived(rm); + + rrdmetric_update_retention(rm); + + rrd_flag_unset_updated(rm); +} + +static void rrdinstance_post_process_updates(RRDINSTANCE *ri, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(ri, reason); + + if(!force && !rrd_flag_is_updated(ri) && rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION)) + return; + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_INSTANCE); + + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + size_t metrics_active = 0, metrics_deleted = 0; + bool live_retention = true, currently_collected = false; + if(dictionary_entries(ri->rrdmetrics) > 0) { + RRDMETRIC *rm; + dfe_start_read((DICTIONARY *)ri->rrdmetrics, rm) { + if(unlikely(!service_running(SERVICE_CONTEXT))) break; + + RRD_FLAGS reason_to_pass = reason; + if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; + + rrdmetric_process_updates(rm, force, reason_to_pass, worker_jobs); + + if(unlikely(!rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION))) + live_retention = false; + + if (unlikely((rrdmetric_should_be_deleted(rm)))) { + metrics_deleted++; + continue; + } + + if(!currently_collected && rrd_flag_check(rm, RRD_FLAG_COLLECTED) && rm->first_time_s) + currently_collected = true; + + metrics_active++; + + if (rm->first_time_s && rm->first_time_s < min_first_time_t) + min_first_time_t = rm->first_time_s; + + if (rm->last_time_s && rm->last_time_s > max_last_time_t) + max_last_time_t = rm->last_time_s; + } + dfe_done(rm); + } + + if(unlikely(live_retention && !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + rrd_flag_set(ri, RRD_FLAG_LIVE_RETENTION); + else if(unlikely(!live_retention && rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + rrd_flag_clear(ri, RRD_FLAG_LIVE_RETENTION); + + if(unlikely(!metrics_active)) { + // no metrics available + + if(ri->first_time_s) { + ri->first_time_s = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(ri->last_time_s) { + ri->last_time_s = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + // we have active metrics... + + if (unlikely(min_first_time_t == LONG_MAX)) + min_first_time_t = 0; + + if (unlikely(min_first_time_t == 0 || max_last_time_t == 0)) { + if(ri->first_time_s) { + ri->first_time_s = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(ri->last_time_s) { + ri->last_time_s = 0; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(live_retention)) + rrd_flag_set_deleted(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + rrd_flag_clear(ri, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + if (unlikely(ri->first_time_s != min_first_time_t)) { + ri->first_time_s = min_first_time_t; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (unlikely(ri->last_time_s != max_last_time_t)) { + ri->last_time_s = max_last_time_t; + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(currently_collected)) + rrd_flag_set_collected(ri); + else + rrd_flag_set_archived(ri); + } + } + + rrd_flag_unset_updated(ri); +} + +static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAGS reason, bool worker_jobs) { + if(reason != RRD_FLAG_NONE) + rrd_flag_set_updated(rc, reason); + + if(worker_jobs) + worker_is_busy(WORKER_JOB_PP_CONTEXT); + + size_t min_priority_collected = LONG_MAX; + size_t min_priority_not_collected = LONG_MAX; + size_t min_priority = LONG_MAX; + time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; + size_t instances_active = 0, instances_deleted = 0, metrics = 0; + bool live_retention = true, currently_collected = false, hidden = true; + if(dictionary_entries(rc->rrdinstances) > 0) { + RRDINSTANCE *ri; + dfe_start_reentrant(rc->rrdinstances, ri) { + if(unlikely(!service_running(SERVICE_CONTEXT))) break; + + RRD_FLAGS reason_to_pass = reason; + if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; + + rrdinstance_post_process_updates(ri, force, reason_to_pass, worker_jobs); + + if(unlikely(hidden && !rrd_flag_check(ri, RRD_FLAG_HIDDEN))) + hidden = false; + + if(unlikely(live_retention && !rrd_flag_check(ri, RRD_FLAG_LIVE_RETENTION))) + live_retention = false; + + if (unlikely(rrdinstance_should_be_deleted(ri))) { + instances_deleted++; + continue; + } + + bool ri_collected = rrd_flag_is_collected(ri); + + if(ri_collected && !rrd_flag_check(ri, RRD_FLAG_MERGED_COLLECTED_RI_TO_RC)) { + rrdcontext_update_from_collected_rrdinstance(ri); + rrd_flag_set(ri, RRD_FLAG_MERGED_COLLECTED_RI_TO_RC); + } + + if(unlikely(!currently_collected && rrd_flag_is_collected(ri) && ri->first_time_s)) + currently_collected = true; + + internal_error(rc->units != ri->units, + "RRDCONTEXT: '%s' rrdinstance '%s' has different units, context '%s', instance '%s'", + string2str(rc->id), string2str(ri->id), + string2str(rc->units), string2str(ri->units)); + + instances_active++; + metrics += dictionary_entries(ri->rrdmetrics); + + if (ri->priority >= RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY) { + if(rrd_flag_check(ri, RRD_FLAG_COLLECTED)) { + if(ri->priority < min_priority_collected) + min_priority_collected = ri->priority; + } + else { + if(ri->priority < min_priority_not_collected) + min_priority_not_collected = ri->priority; + } + } + + if (ri->first_time_s && ri->first_time_s < min_first_time_t) + min_first_time_t = ri->first_time_s; + + if (ri->last_time_s && ri->last_time_s > max_last_time_t) + max_last_time_t = ri->last_time_s; + } + dfe_done(ri); + + rc->stats.metrics = metrics; + + if(min_priority_collected != LONG_MAX) + // use the collected priority + min_priority = min_priority_collected; + else + // use the non-collected priority + min_priority = min_priority_not_collected; + } + + { + bool previous_hidden = rrd_flag_check(rc, RRD_FLAG_HIDDEN); + if (hidden != previous_hidden) { + if (hidden && !rrd_flag_check(rc, RRD_FLAG_HIDDEN)) + rrd_flag_set(rc, RRD_FLAG_HIDDEN); + else if (!hidden && rrd_flag_check(rc, RRD_FLAG_HIDDEN)) + rrd_flag_clear(rc, RRD_FLAG_HIDDEN); + } + + bool previous_live_retention = rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION); + if (live_retention != previous_live_retention) { + if (live_retention && !rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrd_flag_set(rc, RRD_FLAG_LIVE_RETENTION); + else if (!live_retention && rrd_flag_check(rc, RRD_FLAG_LIVE_RETENTION)) + rrd_flag_clear(rc, RRD_FLAG_LIVE_RETENTION); + } + } + + rrdcontext_lock(rc); + rc->pp.executions++; + + if(unlikely(!instances_active)) { + // we had some instances, but they are gone now... + + if(rc->first_time_s) { + rc->first_time_s = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(rc->last_time_s) { + rc->last_time_s = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + // we have some active instances... + + if (unlikely(min_first_time_t == LONG_MAX)) + min_first_time_t = 0; + + if (unlikely(min_first_time_t == 0 && max_last_time_t == 0)) { + if(rc->first_time_s) { + rc->first_time_s = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if(rc->last_time_s) { + rc->last_time_s = 0; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + rrd_flag_set_deleted(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + } + else { + rrd_flag_clear(rc, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); + + if (unlikely(rc->first_time_s != min_first_time_t)) { + rc->first_time_s = min_first_time_t; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T); + } + + if (rc->last_time_s != max_last_time_t) { + rc->last_time_s = max_last_time_t; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T); + } + + if(likely(currently_collected)) + rrd_flag_set_collected(rc); + else + rrd_flag_set_archived(rc); + } + + if (min_priority != LONG_MAX && rc->priority != min_priority) { + rc->priority = min_priority; + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); + } + } + + if(unlikely(rrd_flag_is_updated(rc) && rc->rrdhost->rrdctx.hub_queue)) { + if(check_if_cloud_version_changed_unsafe(rc, false)) { + rc->version = rrdcontext_get_next_version(rc); + dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx.hub_queue, + string2str(rc->id), rc, sizeof(*rc)); + } + } + + rrd_flag_unset_updated(rc); + rrdcontext_unlock(rc); +} + +void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function __maybe_unused, RRD_FLAGS flags __maybe_unused) { + if(unlikely(!rc->rrdhost->rrdctx.pp_queue)) return; + +#if 0 + if(string_strcmp(rc->id, "system.cpu") == 0) { + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + buffer_json_member_add_array(wb, "flags"); + rrd_flags_to_buffer_json_array_items(rc->flags, wb); + buffer_json_array_close(wb); + buffer_json_member_add_array(wb, "reasons"); + rrd_reasons_to_buffer_json_array_items(rc->flags, wb); + buffer_json_array_close(wb); + buffer_json_finalize(wb); + nd_log(NDLS_DAEMON, NDLP_EMERG, "%s() context '%s', triggered: %s", + function, string2str(rc->id), buffer_tostring(wb)); + } +#endif + + dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx.pp_queue, + string2str(rc->id), + rc, + sizeof(*rc)); +} + +static void rrdcontext_dequeue_from_post_processing(RRDCONTEXT *rc) { + if(unlikely(!rc->rrdhost->rrdctx.pp_queue)) return; + dictionary_del(rc->rrdhost->rrdctx.pp_queue, string2str(rc->id)); +} + +static void rrdcontext_post_process_queued_contexts(RRDHOST *host) { + if(unlikely(!host->rrdctx.pp_queue)) return; + + RRDCONTEXT *rc; + dfe_start_reentrant(host->rrdctx.pp_queue, rc) { + if(unlikely(!service_running(SERVICE_CONTEXT))) break; + + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_post_process_updates(rc, false, RRD_FLAG_NONE, true); + } + dfe_done(rc); +} + +// ---------------------------------------------------------------------------- +// dispatching contexts to cloud + +static uint64_t rrdcontext_get_next_version(RRDCONTEXT *rc) { + time_t now = now_realtime_sec(); + uint64_t version = MAX(rc->version, rc->hub.version); + version = MAX((uint64_t)now, version); + version++; + return version; +} + +void rrdcontext_message_send_unsafe(RRDCONTEXT *rc, bool snapshot __maybe_unused, void *bundle __maybe_unused) { + + // save it, so that we know the last version we sent to hub + rc->version = rc->hub.version = rrdcontext_get_next_version(rc); + rc->hub.id = string2str(rc->id); + rc->hub.title = string2str(rc->title); + rc->hub.units = string2str(rc->units); + rc->hub.family = string2str(rc->family); + rc->hub.chart_type = rrdset_type_name(rc->chart_type); + rc->hub.priority = rc->priority; + rc->hub.first_time_s = rc->first_time_s; + rc->hub.last_time_s = rrd_flag_is_collected(rc) ? 0 : rc->last_time_s; + rc->hub.deleted = rrd_flag_is_deleted(rc) ? true : false; + +#ifdef ENABLE_ACLK + struct context_updated message = { + .id = rc->hub.id, + .version = rc->hub.version, + .title = rc->hub.title, + .units = rc->hub.units, + .family = rc->hub.family, + .chart_type = rc->hub.chart_type, + .priority = rc->hub.priority, + .first_entry = rc->hub.first_time_s, + .last_entry = rc->hub.last_time_s, + .deleted = rc->hub.deleted, + }; + + if(likely(!rrd_flag_check(rc, RRD_FLAG_HIDDEN))) { + if (snapshot) { + if (!rc->hub.deleted) + contexts_snapshot_add_ctx_update(bundle, &message); + } + else + contexts_updated_add_ctx_update(bundle, &message); + } +#endif + + // store it to SQL + + if(rrd_flag_is_deleted(rc)) + rrdcontext_delete_from_sql_unsafe(rc); + + else if (ctx_store_context(&rc->rrdhost->host_uuid, &rc->hub) != 0) + netdata_log_error("RRDCONTEXT: failed to save context '%s' version %"PRIu64" to SQL.", rc->hub.id, rc->hub.version); +} + +static bool check_if_cloud_version_changed_unsafe(RRDCONTEXT *rc, bool sending __maybe_unused) { + bool id_changed = false, + title_changed = false, + units_changed = false, + family_changed = false, + chart_type_changed = false, + priority_changed = false, + first_time_changed = false, + last_time_changed = false, + deleted_changed = false; + + RRD_FLAGS flags = rrd_flags_get(rc); + + if(unlikely(string2str(rc->id) != rc->hub.id)) + id_changed = true; + + if(unlikely(string2str(rc->title) != rc->hub.title)) + title_changed = true; + + if(unlikely(string2str(rc->units) != rc->hub.units)) + units_changed = true; + + if(unlikely(string2str(rc->family) != rc->hub.family)) + family_changed = true; + + if(unlikely(rrdset_type_name(rc->chart_type) != rc->hub.chart_type)) + chart_type_changed = true; + + if(unlikely(rc->priority != rc->hub.priority)) + priority_changed = true; + + if(unlikely((uint64_t)rc->first_time_s != rc->hub.first_time_s)) + first_time_changed = true; + + if(unlikely((uint64_t)((flags & RRD_FLAG_COLLECTED) ? 0 : rc->last_time_s) != rc->hub.last_time_s)) + last_time_changed = true; + + if(unlikely(((flags & RRD_FLAG_DELETED) ? true : false) != rc->hub.deleted)) + deleted_changed = true; + + if(unlikely(id_changed || title_changed || units_changed || family_changed || chart_type_changed || priority_changed || first_time_changed || last_time_changed || deleted_changed)) { + + internal_error(LOG_TRANSITIONS, + "RRDCONTEXT: %s NEW VERSION '%s'%s of host '%s', version %"PRIu64", title '%s'%s, units '%s'%s, family '%s'%s, chart type '%s'%s, priority %u%s, first_time_t %ld%s, last_time_t %ld%s, deleted '%s'%s, (queued for %llu ms, expected %llu ms)", + sending?"SENDING":"QUEUE", + string2str(rc->id), id_changed ? " (CHANGED)" : "", + rrdhost_hostname(rc->rrdhost), + rc->version, + string2str(rc->title), title_changed ? " (CHANGED)" : "", + string2str(rc->units), units_changed ? " (CHANGED)" : "", + string2str(rc->family), family_changed ? " (CHANGED)" : "", + rrdset_type_name(rc->chart_type), chart_type_changed ? " (CHANGED)" : "", + rc->priority, priority_changed ? " (CHANGED)" : "", + rc->first_time_s, first_time_changed ? " (CHANGED)" : "", + (flags & RRD_FLAG_COLLECTED) ? 0 : rc->last_time_s, last_time_changed ? " (CHANGED)" : "", + (flags & RRD_FLAG_DELETED) ? "true" : "false", deleted_changed ? " (CHANGED)" : "", + sending ? (now_realtime_usec() - rc->queue.queued_ut) / USEC_PER_MS : 0, + sending ? (rc->queue.scheduled_dispatch_ut - rc->queue.queued_ut) / USEC_PER_MS : 0 + ); + + rrdhost_update_cached_retention(rc->rrdhost, rc->first_time_s, rc->last_time_s, false); + + return true; + } + + if(!(flags & RRD_FLAG_COLLECTED)) + rrdhost_update_cached_retention(rc->rrdhost, rc->first_time_s, rc->last_time_s, false); + + return false; +} + +static inline usec_t rrdcontext_calculate_queued_dispatch_time_ut(RRDCONTEXT *rc, usec_t now_ut) { + + if(likely(rc->queue.delay_calc_ut >= rc->queue.queued_ut)) + return rc->queue.scheduled_dispatch_ut; + + RRD_FLAGS flags = rc->queue.queued_flags; + + usec_t delay = LONG_MAX; + int i; + struct rrdcontext_reason *reason; + for(i = 0, reason = &rrdcontext_reasons[i]; reason->name ; reason = &rrdcontext_reasons[++i]) { + if(unlikely(flags & reason->flag)) { + if(reason->delay_ut < delay) + delay = reason->delay_ut; + } + } + + if(unlikely(delay == LONG_MAX)) { + internal_error(true, "RRDCONTEXT: '%s', cannot find minimum delay of flags %x", string2str(rc->id), (unsigned int)flags); + delay = 60 * USEC_PER_SEC; + } + + rc->queue.delay_calc_ut = now_ut; + usec_t dispatch_ut = rc->queue.scheduled_dispatch_ut = rc->queue.queued_ut + delay; + return dispatch_ut; +} + +static void rrdcontext_dequeue_from_hub_queue(RRDCONTEXT *rc) { + dictionary_del(rc->rrdhost->rrdctx.hub_queue, string2str(rc->id)); +} + +static void rrdcontext_dispatch_queued_contexts_to_hub(RRDHOST *host, usec_t now_ut) { + + // check if we have received a streaming command for this host + if(!host->node_id || !rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS) || !aclk_connected || !host->rrdctx.hub_queue) + return; + + // check if there are queued items to send + if(!dictionary_entries(host->rrdctx.hub_queue)) + return; + + size_t messages_added = 0; + contexts_updated_t bundle = NULL; + + RRDCONTEXT *rc; + dfe_start_reentrant(host->rrdctx.hub_queue, rc) { + if(unlikely(!service_running(SERVICE_CONTEXT))) break; + + if(unlikely(messages_added >= MESSAGES_PER_BUNDLE_TO_SEND_TO_HUB_PER_HOST)) + break; + + worker_is_busy(WORKER_JOB_QUEUED); + usec_t dispatch_ut = rrdcontext_calculate_queued_dispatch_time_ut(rc, now_ut); + char *claim_id = get_agent_claimid(); + + if(unlikely(now_ut >= dispatch_ut) && claim_id) { + worker_is_busy(WORKER_JOB_CHECK); + + rrdcontext_lock(rc); + + if(check_if_cloud_version_changed_unsafe(rc, true)) { + worker_is_busy(WORKER_JOB_SEND); + +#ifdef ENABLE_ACLK + if(!bundle) { + // prepare the bundle to send the messages + char uuid[UUID_STR_LEN]; + uuid_unparse_lower(*host->node_id, uuid); + + bundle = contexts_updated_new(claim_id, uuid, 0, now_ut); + } +#endif + // update the hub data of the context, give a new version, pack the message + // and save an update to SQL + rrdcontext_message_send_unsafe(rc, false, bundle); + messages_added++; + + rc->queue.dispatches++; + rc->queue.dequeued_ut = now_ut; + } + else + rc->version = rc->hub.version; + + // remove it from the queue + worker_is_busy(WORKER_JOB_DEQUEUE); + rrdcontext_dequeue_from_hub_queue(rc); + + if(unlikely(rrdcontext_should_be_deleted(rc))) { + // this is a deleted context - delete it forever... + + worker_is_busy(WORKER_JOB_CLEANUP_DELETE); + + rrdcontext_dequeue_from_post_processing(rc); + rrdcontext_delete_from_sql_unsafe(rc); + + STRING *id = string_dup(rc->id); + rrdcontext_unlock(rc); + + // delete it from the master dictionary + if(!dictionary_del(host->rrdctx.contexts, string2str(rc->id))) + netdata_log_error("RRDCONTEXT: '%s' of host '%s' failed to be deleted from rrdcontext dictionary.", + string2str(id), rrdhost_hostname(host)); + + string_freez(id); + } + else + rrdcontext_unlock(rc); + } + freez(claim_id); + } + dfe_done(rc); + +#ifdef ENABLE_ACLK + if(service_running(SERVICE_CONTEXT) && bundle) { + // we have a bundle to send messages + + // update the version hash + contexts_updated_update_version_hash(bundle, rrdcontext_version_hash(host)); + + // send it + aclk_send_contexts_updated(bundle); + } + else if(bundle) + contexts_updated_delete(bundle); +#endif + +} + +// ---------------------------------------------------------------------------- +// worker thread + +static void rrdcontext_main_cleanup(void *pptr) { + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + // custom code + worker_unregister(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *rrdcontext_main(void *ptr) { + CLEANUP_FUNCTION_REGISTER(rrdcontext_main_cleanup) cleanup_ptr = ptr; + + worker_register("RRDCONTEXT"); + worker_register_job_name(WORKER_JOB_HOSTS, "hosts"); + worker_register_job_name(WORKER_JOB_CHECK, "dedup checks"); + worker_register_job_name(WORKER_JOB_SEND, "sent contexts"); + worker_register_job_name(WORKER_JOB_DEQUEUE, "deduplicated contexts"); + worker_register_job_name(WORKER_JOB_RETENTION, "metrics retention"); + worker_register_job_name(WORKER_JOB_QUEUED, "queued contexts"); + worker_register_job_name(WORKER_JOB_CLEANUP, "cleanups"); + worker_register_job_name(WORKER_JOB_CLEANUP_DELETE, "deletes"); + worker_register_job_name(WORKER_JOB_PP_METRIC, "check metrics"); + worker_register_job_name(WORKER_JOB_PP_INSTANCE, "check instances"); + worker_register_job_name(WORKER_JOB_PP_CONTEXT, "check contexts"); + + worker_register_job_custom_metric(WORKER_JOB_HUB_QUEUE_SIZE, "hub queue size", "contexts", WORKER_METRIC_ABSOLUTE); + worker_register_job_custom_metric(WORKER_JOB_PP_QUEUE_SIZE, "post processing queue size", "contexts", WORKER_METRIC_ABSOLUTE); + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = RRDCONTEXT_WORKER_THREAD_HEARTBEAT_USEC; + + while (service_running(SERVICE_CONTEXT)) { + worker_is_idle(); + heartbeat_next(&hb, step); + + if(unlikely(!service_running(SERVICE_CONTEXT))) break; + + usec_t now_ut = now_realtime_usec(); + + if(rrdcontext_next_db_rotation_ut && now_ut > rrdcontext_next_db_rotation_ut) { + rrdcontext_recalculate_retention_all_hosts(); + rrdcontext_garbage_collect_for_all_hosts(); + rrdcontext_next_db_rotation_ut = 0; + } + + size_t hub_queued_contexts_for_all_hosts = 0; + size_t pp_queued_contexts_for_all_hosts = 0; + + RRDHOST *host; + dfe_start_reentrant(rrdhost_root_index, host) { + if(unlikely(!service_running(SERVICE_CONTEXT))) break; + + worker_is_busy(WORKER_JOB_HOSTS); + + if(host->rrdctx.pp_queue) { + pp_queued_contexts_for_all_hosts += dictionary_entries(host->rrdctx.pp_queue); + rrdcontext_post_process_queued_contexts(host); + dictionary_garbage_collect(host->rrdctx.pp_queue); + } + + if(host->rrdctx.hub_queue) { + hub_queued_contexts_for_all_hosts += dictionary_entries(host->rrdctx.hub_queue); + rrdcontext_dispatch_queued_contexts_to_hub(host, now_ut); + dictionary_garbage_collect(host->rrdctx.hub_queue); + } + + if (host->rrdctx.contexts) + dictionary_garbage_collect(host->rrdctx.contexts); + + // calculate the number of metrics and instances in the host + RRDCONTEXT *rc; + uint32_t metrics = 0, instances = 0; + dfe_start_read(host->rrdctx.contexts, rc) { + metrics += rc->stats.metrics; + instances += dictionary_entries(rc->rrdinstances); + } + dfe_done(rc); + host->rrdctx.metrics = metrics; + host->rrdctx.instances = instances; + } + dfe_done(host); + + worker_set_metric(WORKER_JOB_HUB_QUEUE_SIZE, (NETDATA_DOUBLE)hub_queued_contexts_for_all_hosts); + worker_set_metric(WORKER_JOB_PP_QUEUE_SIZE, (NETDATA_DOUBLE)pp_queued_contexts_for_all_hosts); + } + + return NULL; +} |