summaryrefslogtreecommitdiffstats
path: root/src/plugins.d/pluginsd_replication.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins.d/pluginsd_replication.c')
-rw-r--r--src/plugins.d/pluginsd_replication.c371
1 files changed, 371 insertions, 0 deletions
diff --git a/src/plugins.d/pluginsd_replication.c b/src/plugins.d/pluginsd_replication.c
new file mode 100644
index 000000000..8d0975210
--- /dev/null
+++ b/src/plugins.d/pluginsd_replication.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "pluginsd_replication.h"
+
+PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PARSER *parser) {
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *id = get_word(words, num_words, idx++);
+ char *start_time_str = get_word(words, num_words, idx++);
+ char *end_time_str = get_word(words, num_words, idx++);
+ char *child_now_str = get_word(words, num_words, idx++);
+
+ RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ RRDSET *st;
+ if (likely(!id || !*id))
+ st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ else
+ st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+
+ if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_REPLAY_BEGIN))
+ return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ if(start_time_str && end_time_str) {
+ time_t start_time = (time_t) str2ull_encoded(start_time_str);
+ time_t end_time = (time_t) str2ull_encoded(end_time_str);
+
+ time_t wall_clock_time = 0, tolerance;
+ bool wall_clock_comes_from_child; (void)wall_clock_comes_from_child;
+ if(child_now_str) {
+ wall_clock_time = (time_t) str2ull_encoded(child_now_str);
+ tolerance = st->update_every + 1;
+ wall_clock_comes_from_child = true;
+ }
+
+ if(wall_clock_time <= 0) {
+ wall_clock_time = now_realtime_sec();
+ tolerance = st->update_every + 5;
+ wall_clock_comes_from_child = false;
+ }
+
+#ifdef NETDATA_LOG_REPLICATION_REQUESTS
+ internal_error(
+ (!st->replay.start_streaming && (end_time < st->replay.after || start_time > st->replay.before)),
+ "REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, which does not match our request (%ld to %ld).",
+ rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time, st->replay.after, st->replay.before);
+
+ internal_error(
+ true,
+ "REPLAY: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN " from %ld to %ld, child wall clock is %ld (%s), had requested %ld to %ld",
+ rrdhost_hostname(st->rrdhost), rrdset_id(st),
+ start_time, end_time, wall_clock_time, wall_clock_comes_from_child ? "from child" : "parent time",
+ st->replay.after, st->replay.before);
+#endif
+
+ if(start_time && end_time && start_time < wall_clock_time + tolerance && end_time < wall_clock_time + tolerance && start_time < end_time) {
+ if (unlikely(end_time - start_time != st->update_every))
+ rrdset_set_update_every_s(st, end_time - start_time);
+
+ st->last_collected_time.tv_sec = end_time;
+ st->last_collected_time.tv_usec = 0;
+
+ st->last_updated.tv_sec = end_time;
+ st->last_updated.tv_usec = 0;
+
+ st->counter++;
+ st->counter_done++;
+
+ // these are only needed for db mode RAM, ALLOC
+ st->db.current_entry++;
+ if(st->db.current_entry >= st->db.entries)
+ st->db.current_entry -= st->db.entries;
+
+ parser->user.replay.start_time = start_time;
+ parser->user.replay.end_time = end_time;
+ parser->user.replay.start_time_ut = (usec_t) start_time * USEC_PER_SEC;
+ parser->user.replay.end_time_ut = (usec_t) end_time * USEC_PER_SEC;
+ parser->user.replay.wall_clock_time = wall_clock_time;
+ parser->user.replay.rset_enabled = true;
+
+ return PARSER_RC_OK;
+ }
+
+ netdata_log_error("PLUGINSD REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_BEGIN
+ " from %ld to %ld, but timestamps are invalid "
+ "(now is %ld [%s], tolerance %ld). Ignoring " PLUGINSD_KEYWORD_REPLAY_SET,
+ rrdhost_hostname(st->rrdhost), rrdset_id(st), start_time, end_time,
+ wall_clock_time, wall_clock_comes_from_child ? "child wall clock" : "parent wall clock",
+ tolerance);
+ }
+
+ // the child sends an RBEGIN without any parameters initially
+ // setting rset_enabled to false, means the RSET should not store any metrics
+ // to store metrics, the RBEGIN needs to have timestamps
+ parser->user.replay.start_time = 0;
+ parser->user.replay.end_time = 0;
+ parser->user.replay.start_time_ut = 0;
+ parser->user.replay.end_time_ut = 0;
+ parser->user.replay.wall_clock_time = 0;
+ parser->user.replay.rset_enabled = false;
+ return PARSER_RC_OK;
+}
+
+PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARSER *parser) {
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *dimension = get_word(words, num_words, idx++);
+ char *value_str = get_word(words, num_words, idx++);
+ char *flags_str = get_word(words, num_words, idx++);
+
+ RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_SET);
+ if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ if(!parser->user.replay.rset_enabled) {
+ nd_log_limit_static_thread_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_ERR,
+ "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors",
+ rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+
+ // we have to return OK here
+ return PARSER_RC_OK;
+ }
+
+ RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_SET);
+ if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ st->pluginsd.set = true;
+
+ if (unlikely(!parser->user.replay.start_time || !parser->user.replay.end_time)) {
+ netdata_log_error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s with invalid timestamps %ld to %ld from a %s. Disabling it.",
+ rrdhost_hostname(host),
+ rrdset_id(st),
+ dimension,
+ PLUGINSD_KEYWORD_REPLAY_SET,
+ parser->user.replay.start_time,
+ parser->user.replay.end_time,
+ PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+ }
+
+ if (unlikely(!value_str || !*value_str))
+ value_str = "NAN";
+
+ if(unlikely(!flags_str))
+ flags_str = "";
+
+ if (likely(value_str)) {
+ RRDDIM_FLAGS rd_flags = rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE | RRDDIM_FLAG_ARCHIVED);
+
+ if(!(rd_flags & RRDDIM_FLAG_ARCHIVED)) {
+ NETDATA_DOUBLE value = str2ndd_encoded(value_str, NULL);
+ SN_FLAGS flags = pluginsd_parse_storage_number_flags(flags_str);
+
+ if (!netdata_double_isnumber(value) || (flags == SN_EMPTY_SLOT)) {
+ value = NAN;
+ flags = SN_EMPTY_SLOT;
+ }
+
+ rrddim_store_metric(rd, parser->user.replay.end_time_ut, value, flags);
+ rd->collector.last_collected_time.tv_sec = parser->user.replay.end_time;
+ rd->collector.last_collected_time.tv_usec = 0;
+ rd->collector.counter++;
+ }
+ else {
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING,
+ "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. "
+ "Ignoring data.",
+ rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_name(rd));
+ }
+ }
+
+ return PARSER_RC_OK;
+}
+
+PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, size_t num_words, PARSER *parser) {
+ if(parser->user.replay.rset_enabled == false)
+ return PARSER_RC_OK;
+
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *dimension = get_word(words, num_words, idx++);
+ char *last_collected_ut_str = get_word(words, num_words, idx++);
+ char *last_collected_value_str = get_word(words, num_words, idx++);
+ char *last_calculated_value_str = get_word(words, num_words, idx++);
+ char *last_stored_value_str = get_word(words, num_words, idx++);
+
+ RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
+ if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ if(st->pluginsd.set) {
+ // reset pos to reuse the same RDAs
+ st->pluginsd.pos = 0;
+ st->pluginsd.set = false;
+ }
+
+ RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
+ if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ usec_t dim_last_collected_ut = (usec_t)rd->collector.last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->collector.last_collected_time.tv_usec;
+ usec_t last_collected_ut = last_collected_ut_str ? str2ull_encoded(last_collected_ut_str) : 0;
+ if(last_collected_ut > dim_last_collected_ut) {
+ rd->collector.last_collected_time.tv_sec = (time_t)(last_collected_ut / USEC_PER_SEC);
+ rd->collector.last_collected_time.tv_usec = (last_collected_ut % USEC_PER_SEC);
+ }
+
+ rd->collector.last_collected_value = last_collected_value_str ? str2ll_encoded(last_collected_value_str) : 0;
+ rd->collector.last_calculated_value = last_calculated_value_str ? str2ndd_encoded(last_calculated_value_str, NULL) : 0;
+ rd->collector.last_stored_value = last_stored_value_str ? str2ndd_encoded(last_stored_value_str, NULL) : 0.0;
+
+ return PARSER_RC_OK;
+}
+
+PARSER_RC pluginsd_replay_rrdset_collection_state(char **words, size_t num_words, PARSER *parser) {
+ if(parser->user.replay.rset_enabled == false)
+ return PARSER_RC_OK;
+
+ char *last_collected_ut_str = get_word(words, num_words, 1);
+ char *last_updated_ut_str = get_word(words, num_words, 2);
+
+ RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE);
+ if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE,
+ PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ usec_t chart_last_collected_ut = (usec_t)st->last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)st->last_collected_time.tv_usec;
+ usec_t last_collected_ut = last_collected_ut_str ? str2ull_encoded(last_collected_ut_str) : 0;
+ if(last_collected_ut > chart_last_collected_ut) {
+ st->last_collected_time.tv_sec = (time_t)(last_collected_ut / USEC_PER_SEC);
+ st->last_collected_time.tv_usec = (last_collected_ut % USEC_PER_SEC);
+ }
+
+ usec_t chart_last_updated_ut = (usec_t)st->last_updated.tv_sec * USEC_PER_SEC + (usec_t)st->last_updated.tv_usec;
+ usec_t last_updated_ut = last_updated_ut_str ? str2ull_encoded(last_updated_ut_str) : 0;
+ if(last_updated_ut > chart_last_updated_ut) {
+ st->last_updated.tv_sec = (time_t)(last_updated_ut / USEC_PER_SEC);
+ st->last_updated.tv_usec = (last_updated_ut % USEC_PER_SEC);
+ }
+
+ st->counter++;
+ st->counter_done++;
+
+ return PARSER_RC_OK;
+}
+
+PARSER_RC pluginsd_replay_end(char **words, size_t num_words, PARSER *parser) {
+ if (num_words < 7) { // accepts 7, but the 7th is optional
+ netdata_log_error("REPLAY: malformed " PLUGINSD_KEYWORD_REPLAY_END " command");
+ return PARSER_RC_ERROR;
+ }
+
+ const char *update_every_child_txt = get_word(words, num_words, 1);
+ const char *first_entry_child_txt = get_word(words, num_words, 2);
+ const char *last_entry_child_txt = get_word(words, num_words, 3);
+ const char *start_streaming_txt = get_word(words, num_words, 4);
+ const char *first_entry_requested_txt = get_word(words, num_words, 5);
+ const char *last_entry_requested_txt = get_word(words, num_words, 6);
+ const char *child_world_time_txt = get_word(words, num_words, 7); // optional
+
+ time_t update_every_child = (time_t) str2ull_encoded(update_every_child_txt);
+ time_t first_entry_child = (time_t) str2ull_encoded(first_entry_child_txt);
+ time_t last_entry_child = (time_t) str2ull_encoded(last_entry_child_txt);
+
+ bool start_streaming = (strcmp(start_streaming_txt, "true") == 0);
+ time_t first_entry_requested = (time_t) str2ull_encoded(first_entry_requested_txt);
+ time_t last_entry_requested = (time_t) str2ull_encoded(last_entry_requested_txt);
+
+ // the optional child world time
+ time_t child_world_time = (child_world_time_txt && *child_world_time_txt) ? (time_t) str2ull_encoded(
+ child_world_time_txt) : now_realtime_sec();
+
+ RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_END);
+ if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_END, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+#ifdef NETDATA_LOG_REPLICATION_REQUESTS
+ internal_error(true,
+ "PLUGINSD REPLAY: 'host:%s/chart:%s': got a " PLUGINSD_KEYWORD_REPLAY_END " child db from %llu to %llu, start_streaming %s, had requested from %llu to %llu, wall clock %llu",
+ rrdhost_hostname(host), rrdset_id(st),
+ (unsigned long long)first_entry_child, (unsigned long long)last_entry_child,
+ start_streaming?"true":"false",
+ (unsigned long long)first_entry_requested, (unsigned long long)last_entry_requested,
+ (unsigned long long)child_world_time
+ );
+#endif
+
+ parser->user.data_collections_count++;
+
+ if(parser->user.replay.rset_enabled && st->rrdhost->receiver) {
+ time_t now = now_realtime_sec();
+ time_t started = st->rrdhost->receiver->replication_first_time_t;
+ time_t current = parser->user.replay.end_time;
+
+ if(started && current > started) {
+ host->rrdpush_receiver_replication_percent = (NETDATA_DOUBLE) (current - started) * 100.0 / (NETDATA_DOUBLE) (now - started);
+ worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION,
+ host->rrdpush_receiver_replication_percent);
+ }
+ }
+
+ parser->user.replay.start_time = 0;
+ parser->user.replay.end_time = 0;
+ parser->user.replay.start_time_ut = 0;
+ parser->user.replay.end_time_ut = 0;
+ parser->user.replay.wall_clock_time = 0;
+ parser->user.replay.rset_enabled = false;
+
+ st->counter++;
+ st->counter_done++;
+ store_metric_collection_completed();
+
+#ifdef NETDATA_LOG_REPLICATION_REQUESTS
+ st->replay.start_streaming = false;
+ st->replay.after = 0;
+ st->replay.before = 0;
+ if(start_streaming)
+ st->replay.log_next_data_collection = true;
+#endif
+
+ if (start_streaming) {
+ if (st->update_every != update_every_child)
+ rrdset_set_update_every_s(st, update_every_child);
+
+ if(rrdset_flag_check(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS)) {
+ rrdset_flag_set(st, RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED);
+ rrdset_flag_clear(st, RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS);
+ rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK);
+ rrdhost_receiver_replicating_charts_minus_one(st->rrdhost);
+ }
+#ifdef NETDATA_LOG_REPLICATION_REQUESTS
+ else
+ internal_error(true, "REPLAY ERROR: 'host:%s/chart:%s' got a " PLUGINSD_KEYWORD_REPLAY_END " with enable_streaming = true, but there is no replication in progress for this chart.",
+ rrdhost_hostname(host), rrdset_id(st));
+#endif
+
+ pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_END);
+
+ host->rrdpush_receiver_replication_percent = 100.0;
+ worker_set_metric(WORKER_RECEIVER_JOB_REPLICATION_COMPLETION, host->rrdpush_receiver_replication_percent);
+
+ return PARSER_RC_OK;
+ }
+
+ pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_END);
+
+ rrdcontext_updated_retention_rrdset(st);
+
+ bool ok = replicate_chart_request(send_to_plugin, parser, host, st,
+ first_entry_child, last_entry_child, child_world_time,
+ first_entry_requested, last_entry_requested);
+ return ok ? PARSER_RC_OK : PARSER_RC_ERROR;
+}