diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-11-30 18:47:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-11-30 18:47:00 +0000 |
commit | 03bf87dcb06f7021bfb2df2fa8691593c6148aff (patch) | |
tree | e16b06711a2ed77cafb4b7754be0220c3d14a9d7 /streaming/rrdpush.c | |
parent | Adding upstream version 1.36.1. (diff) | |
download | netdata-03bf87dcb06f7021bfb2df2fa8691593c6148aff.tar.xz netdata-03bf87dcb06f7021bfb2df2fa8691593c6148aff.zip |
Adding upstream version 1.37.0.upstream/1.37.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'streaming/rrdpush.c')
-rw-r--r-- | streaming/rrdpush.c | 613 |
1 files changed, 400 insertions, 213 deletions
diff --git a/streaming/rrdpush.c b/streaming/rrdpush.c index b73f24633..a57f1b080 100644 --- a/streaming/rrdpush.c +++ b/streaming/rrdpush.c @@ -11,8 +11,8 @@ * 1. a random data collection thread, calling rrdset_done_push() * this is called for each chart. * - * the output of this work is kept in a BUFFER in RRDHOST - * the sender thread is signalled via a pipe (also in RRDHOST) + * the output of this work is kept in a thread BUFFER + * the sender thread is signalled via a pipe (in RRDHOST) * * 2. a sender thread running at the sending netdata * this is spawned automatically on the first chart to be pushed @@ -46,6 +46,9 @@ unsigned int default_compression_enabled = 1; char *default_rrdpush_destination = NULL; char *default_rrdpush_api_key = NULL; char *default_rrdpush_send_charts_matching = NULL; +bool default_rrdpush_enable_replication = true; +time_t default_rrdpush_seconds_to_replicate = 86400; +time_t default_rrdpush_replication_step = 600; #ifdef ENABLE_HTTPS int netdata_use_ssl_on_stream = NETDATA_SSL_OPTIONAL; char *netdata_ssl_ca_path = NULL; @@ -66,6 +69,31 @@ static void load_stream_conf() { freez(filename); } +bool rrdpush_receiver_needs_dbengine() { + struct section *co; + + for(co = stream_config.first_section; co; co = co->next) { + if(strcmp(co->name, "stream") == 0) + continue; // the first section is not relevant + + char *s; + + s = appconfig_get_by_section(co, "enabled", NULL); + if(!s || !appconfig_test_boolean_value(s)) + continue; + + s = appconfig_get_by_section(co, "default memory mode", NULL); + if(s && strcmp(s, "dbengine") == 0) + return true; + + s = appconfig_get_by_section(co, "memory mode", NULL); + if(s && strcmp(s, "dbengine") == 0) + return true; + } + + return false; +} + int rrdpush_init() { // -------------------------------------------------------------------- // load stream.conf @@ -75,6 +103,11 @@ int rrdpush_init() { default_rrdpush_destination = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", ""); default_rrdpush_api_key = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", ""); default_rrdpush_send_charts_matching = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "send charts matching", "*"); + + default_rrdpush_enable_replication = config_get_boolean(CONFIG_SECTION_DB, "enable replication", default_rrdpush_enable_replication); + default_rrdpush_seconds_to_replicate = config_get_number(CONFIG_SECTION_DB, "seconds to replicate", default_rrdpush_seconds_to_replicate); + default_rrdpush_replication_step = config_get_number(CONFIG_SECTION_DB, "seconds per replication step", default_rrdpush_replication_step); + rrdhost_free_orphan_time = config_get_number(CONFIG_SECTION_DB, "cleanup orphan hosts after secs", rrdhost_free_orphan_time); #ifdef ENABLE_COMPRESSION @@ -101,14 +134,14 @@ int rrdpush_init() { bool invalid_certificate = appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "ssl skip certificate verification", CONFIG_BOOLEAN_NO); if(invalid_certificate == CONFIG_BOOLEAN_YES){ - if(netdata_validate_server == NETDATA_SSL_VALID_CERTIFICATE){ + if(netdata_ssl_validate_server == NETDATA_SSL_VALID_CERTIFICATE){ info("Netdata is configured to accept invalid SSL certificate."); - netdata_validate_server = NETDATA_SSL_INVALID_CERTIFICATE; + netdata_ssl_validate_server = NETDATA_SSL_INVALID_CERTIFICATE; } } - netdata_ssl_ca_path = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CApath", "/etc/ssl/certs/"); - netdata_ssl_ca_file = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CAfile", "/etc/ssl/certs/certs.pem"); + netdata_ssl_ca_path = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CApath", NULL); + netdata_ssl_ca_file = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "CAfile", NULL); #endif return default_rrdpush_enabled; @@ -128,30 +161,31 @@ int rrdpush_init() { // this is for the first iterations of each chart unsigned int remote_clock_resync_iterations = 60; - -static inline int should_send_chart_matching(RRDSET *st) { - // Do not stream anomaly rates charts. - if (unlikely(st->state->is_ar_chart)) +static inline bool should_send_chart_matching(RRDSET *st, RRDSET_FLAGS flags) { + if(!(flags & RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED)) return false; - if (rrdset_flag_check(st, RRDSET_FLAG_ANOMALY_DETECTION)) - return ml_streaming_enabled(); - - if(!rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE)) { + if(unlikely(!(flags & (RRDSET_FLAG_UPSTREAM_SEND | RRDSET_FLAG_UPSTREAM_IGNORE)))) { RRDHOST *host = st->rrdhost; - if(simple_pattern_matches(host->rrdpush_send_charts_matching, st->id) || - simple_pattern_matches(host->rrdpush_send_charts_matching, st->name)) { - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE); - rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); + if (flags & RRDSET_FLAG_ANOMALY_DETECTION) { + if(ml_streaming_enabled()) + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); + else + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); } - else { - rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND); + else if(simple_pattern_matches(host->rrdpush_send_charts_matching, rrdset_id(st)) || + simple_pattern_matches(host->rrdpush_send_charts_matching, rrdset_name(st))) + + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); + else rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); - } + + // get the flags again, to know how to respond + flags = rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE); } - return(rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND)); + return flags & RRDSET_FLAG_UPSTREAM_SEND; } int configured_as_parent() { @@ -173,42 +207,25 @@ int configured_as_parent() { return is_parent; } -// checks if the current chart definition has been sent -static inline int need_to_send_chart_definition(RRDSET *st) { - rrdset_check_rdlock(st); - - if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_EXPOSED)))) - return 1; - - RRDDIM *rd; - rrddim_foreach_read(rd, st) { - if(unlikely(!rd->exposed)) { - #ifdef NETDATA_INTERNAL_CHECKS - info("host '%s', chart '%s', dimension '%s' flag 'exposed' triggered chart refresh to upstream", st->rrdhost->hostname, st->id, rd->id); - #endif - return 1; - } - } - - return 0; -} - // chart labels static int send_clabels_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { BUFFER *wb = (BUFFER *)data; buffer_sprintf(wb, "CLABEL \"%s\" \"%s\" %d\n", name, value, ls); return 1; } -void rrdpush_send_clabels(RRDHOST *host, RRDSET *st) { - if (st->state && st->state->chart_labels) { - if(rrdlabels_walkthrough_read(st->state->chart_labels, send_clabels_callback, host->sender->build) > 0) - buffer_sprintf(host->sender->build,"CLABEL_COMMIT\n"); + +static void rrdpush_send_clabels(BUFFER *wb, RRDSET *st) { + if (st->rrdlabels) { + if(rrdlabels_walkthrough_read(st->rrdlabels, send_clabels_callback, wb) > 0) + buffer_sprintf(wb, "CLABEL_COMMIT\n"); } } // Send the current chart definition. // Assumes that collector thread has already called sender_start for mutex / buffer state. -static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { +static inline bool rrdpush_send_chart_definition(BUFFER *wb, RRDSET *st) { + bool replication_progress = false; + RRDHOST *host = st->rrdhost; rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_EXPOSED); @@ -216,9 +233,9 @@ static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { // properly set the name for the remote end to parse it char *name = ""; if(likely(st->name)) { - if(unlikely(strcmp(st->id, st->name))) { + if(unlikely(st->id != st->name)) { // they differ - name = strchr(st->name, '.'); + name = strchr(rrdset_name(st), '.'); if(name) name++; else @@ -228,14 +245,14 @@ static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { // send the chart buffer_sprintf( - host->sender->build + wb , "CHART \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %ld %d \"%s %s %s %s\" \"%s\" \"%s\"\n" - , st->id + , rrdset_id(st) , name - , st->title - , st->units - , st->family - , st->context + , rrdset_title(st) + , rrdset_units(st) + , rrdset_family(st) + , rrdset_context(st) , rrdset_type_name(st->chart_type) , st->priority , st->update_every @@ -243,120 +260,190 @@ static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { , rrdset_flag_check(st, RRDSET_FLAG_DETAIL)?"detail":"" , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":"" , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":"" - , (st->plugin_name)?st->plugin_name:"" - , (st->module_name)?st->module_name:"" + , rrdset_plugin_name(st) + , rrdset_module_name(st) ); // send the chart labels - if (host->sender->version >= STREAM_VERSION_CLABELS) - rrdpush_send_clabels(host, st); + if (stream_has_capability(host->sender, STREAM_CAP_CLABELS)) + rrdpush_send_clabels(wb, st); // send the dimensions RRDDIM *rd; rrddim_foreach_read(rd, st) { buffer_sprintf( - host->sender->build + wb , "DIMENSION \"%s\" \"%s\" \"%s\" " COLLECTED_NUMBER_FORMAT " " COLLECTED_NUMBER_FORMAT " \"%s %s %s\"\n" - , rd->id - , rd->name + , rrddim_id(rd) + , rrddim_name(rd) , rrd_algorithm_name(rd->algorithm) , rd->multiplier , rd->divisor , rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)?"obsolete":"" - , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":"" - , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":"" + , rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)?"hidden":"" + , rrddim_option_check(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":"" ); rd->exposed = 1; } + rrddim_foreach_done(rd); + + // send the chart functions + if(stream_has_capability(host->sender, STREAM_CAP_FUNCTIONS)) + rrd_functions_expose_rrdpush(st, wb); // send the chart local custom variables - RRDSETVAR *rs; - for(rs = st->variables; rs ;rs = rs->next) { - if(unlikely(rs->type == RRDVAR_TYPE_CALCULATED && rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR)) { - NETDATA_DOUBLE *value = (NETDATA_DOUBLE *) rs->value; - - buffer_sprintf( - host->sender->build - , "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n" - , rs->variable - , *value - ); + rrdsetvar_print_to_streaming_custom_chart_variables(st, wb); + + if (stream_has_capability(host->sender, STREAM_CAP_REPLICATION)) { + time_t first_entry_local = rrdset_first_entry_t_of_tier(st, 0); + time_t last_entry_local = st->last_updated.tv_sec; + + if(unlikely(!last_entry_local)) + last_entry_local = rrdset_last_entry_t(st); + + time_t now = now_realtime_sec(); + if(unlikely(last_entry_local > now)) { + internal_error(true, + "RRDSET REPLAY ERROR: 'host:%s/chart:%s' last updated time %ld is in the future, adjusting it to now %ld", + rrdhost_hostname(st->rrdhost), rrdset_id(st), + last_entry_local, now); + last_entry_local = now; + } + + if(unlikely(first_entry_local && last_entry_local && first_entry_local >= last_entry_local)) { + internal_error(true, + "RRDSET REPLAY ERROR: 'host:%s/chart:%s' first updated time %ld is equal or bigger than last updated time %ld, adjusting it last updated time - update every", + rrdhost_hostname(st->rrdhost), rrdset_id(st), + first_entry_local, last_entry_local); + first_entry_local = last_entry_local - st->update_every; + } + + if(unlikely(!first_entry_local && last_entry_local)) { + internal_error(true, + "RRDSET REPLAY ERROR: 'host:%s/chart:%s' first time %ld, last time %ld, setting both to last time", + rrdhost_hostname(st->rrdhost), rrdset_id(st), + first_entry_local, last_entry_local); + first_entry_local = last_entry_local; } + + buffer_sprintf(wb, PLUGINSD_KEYWORD_CHART_DEFINITION_END " %llu %llu %llu\n", + (unsigned long long)first_entry_local, + (unsigned long long)last_entry_local, + (unsigned long long)now); + + rrdset_flag_set(st, RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS); + rrdset_flag_clear(st, RRDSET_FLAG_SENDER_REPLICATION_FINISHED); + rrdhost_sender_replicating_charts_plus_one(st->rrdhost); + replication_progress = true; + +#ifdef NETDATA_LOG_REPLICATION_REQUESTS + internal_error(true, "REPLAY: 'host:%s/chart:%s' replication starts", + rrdhost_hostname(st->rrdhost), rrdset_id(st)); +#endif } st->upstream_resync_time = st->last_collected_time.tv_sec + (remote_clock_resync_iterations * st->update_every); + return replication_progress; } // sends the current chart dimensions -static inline void rrdpush_send_chart_metrics_nolock(RRDSET *st, struct sender_state *s) { - RRDHOST *host = st->rrdhost; - buffer_sprintf(host->sender->build, "BEGIN \"%s\" %llu", st->id, (st->last_collected_time.tv_sec > st->upstream_resync_time)?st->usec_since_last_update:0); - if (s->version >= VERSION_GAP_FILLING) - buffer_sprintf(host->sender->build, " %"PRId64"\n", (int64_t)st->last_collected_time.tv_sec); +static void rrdpush_send_chart_metrics(BUFFER *wb, RRDSET *st, struct sender_state *s, RRDSET_FLAGS flags) { + buffer_fast_strcat(wb, "BEGIN \"", 7); + buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id)); + buffer_fast_strcat(wb, "\" ", 2); + + if(stream_has_capability(s, STREAM_CAP_REPLICATION) || st->last_collected_time.tv_sec > st->upstream_resync_time) + buffer_print_llu(wb, st->usec_since_last_update); else - buffer_strcat(host->sender->build, "\n"); + buffer_fast_strcat(wb, "0", 1); + + buffer_fast_strcat(wb, "\n", 1); RRDDIM *rd; rrddim_foreach_read(rd, st) { - if(rd->updated && rd->exposed) - buffer_sprintf(host->sender->build - , "SET \"%s\" = " COLLECTED_NUMBER_FORMAT "\n" - , rd->id - , rd->collected_value - ); + if(unlikely(!rd->updated)) + continue; + + if(likely(rd->exposed)) { + buffer_fast_strcat(wb, "SET \"", 5); + buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id)); + buffer_fast_strcat(wb, "\" = ", 4); + buffer_print_ll(wb, rd->collected_value); + buffer_fast_strcat(wb, "\n", 1); + } + else { + internal_error(true, "STREAM: 'host:%s/chart:%s/dim:%s' flag 'exposed' is updated but not exposed", + rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); + // we will include it in the next iteration + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + } } - buffer_strcat(host->sender->build, "END\n"); + rrddim_foreach_done(rd); + + if(unlikely(flags & RRDSET_FLAG_UPSTREAM_SEND_VARIABLES)) + rrdsetvar_print_to_streaming_custom_chart_variables(st, wb); + + buffer_fast_strcat(wb, "END\n", 4); } static void rrdpush_sender_thread_spawn(RRDHOST *host); // Called from the internal collectors to mark a chart obsolete. -void rrdset_push_chart_definition_now(RRDSET *st) { +bool rrdset_push_chart_definition_now(RRDSET *st) { RRDHOST *host = st->rrdhost; - if(unlikely(!host->rrdpush_send_enabled || !should_send_chart_matching(st))) - return; + if(unlikely(!rrdhost_can_send_definitions_to_parent(host) + || !should_send_chart_matching(st, __atomic_load_n(&st->flags, __ATOMIC_SEQ_CST)))) + return false; + + BUFFER *wb = sender_start(host->sender); + rrdpush_send_chart_definition(wb, st); + sender_commit(host->sender, wb); - rrdset_rdlock(st); - sender_start(host->sender); - rrdpush_send_chart_definition_nolock(st); - sender_commit(host->sender); - rrdset_unlock(st); + return true; } void rrdset_done_push(RRDSET *st) { - if(unlikely(!should_send_chart_matching(st))) - return; - RRDHOST *host = st->rrdhost; - if(unlikely(host->rrdpush_send_enabled && !host->rrdpush_sender_spawn)) - rrdpush_sender_thread_spawn(host); + // fetch the flags we need to check with one atomic operation + RRDHOST_FLAGS host_flags = __atomic_load_n(&host->flags, __ATOMIC_SEQ_CST); + + // check if we are not connected + if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_READY_4_METRICS))) { + + if(unlikely(!(host_flags & (RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN | RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED)))) + rrdpush_sender_thread_spawn(host); + + if(unlikely(!(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS))) { + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS); + error("STREAM %s [send]: not ready - collected metrics are not sent to parent.", rrdhost_hostname(host)); + } - // Handle non-connected case - if(unlikely(!__atomic_load_n(&host->rrdpush_sender_connected, __ATOMIC_SEQ_CST))) { - if(unlikely(!host->rrdpush_sender_error_shown)) - error("STREAM %s [send]: not ready - discarding collected metrics.", host->hostname); - host->rrdpush_sender_error_shown = 1; return; } - else if(unlikely(host->rrdpush_sender_error_shown)) { - info("STREAM %s [send]: sending metrics...", host->hostname); - host->rrdpush_sender_error_shown = 0; + else if(unlikely(host_flags & RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS)) { + info("STREAM %s [send]: sending metrics to parent...", rrdhost_hostname(host)); + rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_LOGGED_STATUS); } - sender_start(host->sender); + RRDSET_FLAGS rrdset_flags = __atomic_load_n(&st->flags, __ATOMIC_SEQ_CST); + bool exposed_upstream = (rrdset_flags & RRDSET_FLAG_UPSTREAM_EXPOSED); + bool replication_in_progress = !(rrdset_flags & RRDSET_FLAG_SENDER_REPLICATION_FINISHED); - if(need_to_send_chart_definition(st)) - rrdpush_send_chart_definition_nolock(st); + if(unlikely((exposed_upstream && replication_in_progress) || + !should_send_chart_matching(st, rrdset_flags))) + return; + + BUFFER *wb = sender_start(host->sender); - rrdpush_send_chart_metrics_nolock(st, host->sender); + if(unlikely(!exposed_upstream)) + replication_in_progress = rrdpush_send_chart_definition(wb, st); - // signal the sender there are more data - if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1) - error("STREAM %s [send]: cannot write to internal pipe", host->hostname); + if (likely(!replication_in_progress)) + rrdpush_send_chart_metrics(wb, st, host->sender, rrdset_flags); - sender_commit(host->sender); + sender_commit(host->sender, wb); } // labels @@ -365,45 +452,38 @@ static int send_labels_callback(const char *name, const char *value, RRDLABEL_SR buffer_sprintf(wb, "LABEL \"%s\" = %d \"%s\"\n", name, ls, value); return 1; } -void rrdpush_send_labels(RRDHOST *host) { - if (!host->host_labels || !rrdhost_flag_check(host, RRDHOST_FLAG_STREAM_LABELS_UPDATE) || (rrdhost_flag_check(host, RRDHOST_FLAG_STREAM_LABELS_STOP))) +void rrdpush_send_host_labels(RRDHOST *host) { + if(unlikely(!rrdhost_can_send_definitions_to_parent(host) + || !stream_has_capability(host->sender, STREAM_CAP_HLABELS))) return; - sender_start(host->sender); - - rrdlabels_walkthrough_read(host->host_labels, send_labels_callback, host->sender->build); - buffer_sprintf(host->sender->build, "OVERWRITE %s\n", "labels"); - sender_commit(host->sender); + BUFFER *wb = sender_start(host->sender); - if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1) - error("STREAM %s [send]: cannot write to internal pipe", host->hostname); + rrdlabels_walkthrough_read(host->rrdlabels, send_labels_callback, wb); + buffer_sprintf(wb, "OVERWRITE %s\n", "labels"); - rrdhost_flag_clear(host, RRDHOST_FLAG_STREAM_LABELS_UPDATE); + sender_commit(host->sender, wb); } void rrdpush_claimed_id(RRDHOST *host) { - if(unlikely(!host->rrdpush_send_enabled || !__atomic_load_n(&host->rrdpush_sender_connected, __ATOMIC_SEQ_CST))) - return; - - if(host->sender->version < STREAM_VERSION_CLAIM) + if(!stream_has_capability(host->sender, STREAM_CAP_CLAIM)) return; - sender_start(host->sender); + if(unlikely(!rrdhost_can_send_definitions_to_parent(host))) + return; + + BUFFER *wb = sender_start(host->sender); rrdhost_aclk_state_lock(host); - buffer_sprintf(host->sender->build, "CLAIMED_ID %s %s\n", host->machine_guid, (host->aclk_state.claimed_id ? host->aclk_state.claimed_id : "NULL") ); + buffer_sprintf(wb, "CLAIMED_ID %s %s\n", host->machine_guid, (host->aclk_state.claimed_id ? host->aclk_state.claimed_id : "NULL") ); rrdhost_aclk_state_unlock(host); - sender_commit(host->sender); - - // signal the sender there are more data - if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1) - error("STREAM %s [send]: cannot write to internal pipe", host->hostname); + sender_commit(host->sender, wb); } int connect_to_one_of_destinations( - struct rrdpush_destinations *destinations, + RRDHOST *host, int default_port, struct timeval *timeout, size_t *reconnects_counter, @@ -413,28 +493,44 @@ int connect_to_one_of_destinations( { int sock = -1; - for (struct rrdpush_destinations *d = destinations; d; d = d->next) { - if (d->disabled_no_proper_reply) { - d->disabled_no_proper_reply = 0; - continue; - } else if (d->disabled_because_of_localhost) { - continue; - } else if (d->disabled_already_streaming && (d->disabled_already_streaming + 30 > now_realtime_sec())) { - continue; - } else if (d->disabled_because_of_denied_access) { - d->disabled_because_of_denied_access = 0; + for (struct rrdpush_destinations *d = host->destinations; d; d = d->next) { + time_t now = now_realtime_sec(); + + if(d->postpone_reconnection_until > now) { + info( + "STREAM %s: skipping destination '%s' (default port: %d) due to last error (code: %d, %s), will retry it in %d seconds", + rrdhost_hostname(host), + string2str(d->destination), + default_port, + d->last_handshake, d->last_error?d->last_error:"unset reason description", + (int)(d->postpone_reconnection_until - now)); + continue; } + info( + "STREAM %s: attempting to connect to '%s' (default port: %d)...", + rrdhost_hostname(host), + string2str(d->destination), + default_port); + if (reconnects_counter) *reconnects_counter += 1; - sock = connect_to_this(d->destination, default_port, timeout); + + sock = connect_to_this(string2str(d->destination), default_port, timeout); + if (sock != -1) { - if (connected_to && connected_to_size) { - strncpy(connected_to, d->destination, connected_to_size); - connected_to[connected_to_size - 1] = '\0'; - } + if (connected_to && connected_to_size) + strncpyz(connected_to, string2str(d->destination), connected_to_size); + *destination = d; + + // move the current item to the end of the list + // without this, this destination will break the loop again and again + // not advancing the destinations to find one that may work + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(host->destinations, d, prev, next); + DOUBLE_LINKED_LIST_APPEND_UNSAFE(host->destinations, d, prev, next); + break; } } @@ -442,44 +538,51 @@ int connect_to_one_of_destinations( return sock; } -struct rrdpush_destinations *destinations_init(const char *dests) { - const char *s = dests; - struct rrdpush_destinations *destinations = NULL, *prev = NULL; - while(*s) { - const char *e = s; - - // skip path, moving both s(tart) and e(nd) - if(*e == '/') - while(!isspace(*e) && *e != ',') s = ++e; - - // skip separators, moving both s(tart) and e(nd) - while(isspace(*e) || *e == ',') s = ++e; - - // move e(nd) to the first separator - while(*e && !isspace(*e) && *e != ',' && *e != '/') e++; - - // is there anything? - if(!*s || s == e) break; - - char buf[e - s + 1]; - strncpyz(buf, s, e - s); - struct rrdpush_destinations *d = callocz(1, sizeof(struct rrdpush_destinations)); - strncpyz(d->destination, buf, sizeof(d->destination)-1); - d->disabled_no_proper_reply = 0; - d->disabled_because_of_localhost = 0; - d->disabled_already_streaming = 0; - d->disabled_because_of_denied_access = 0; - d->next = NULL; - if (!destinations) { - destinations = d; - } else { - prev->next = d; - } - prev = d; +struct destinations_init_tmp { + RRDHOST *host; + struct rrdpush_destinations *list; + int count; +}; + +bool destinations_init_add_one(char *entry, void *data) { + struct destinations_init_tmp *t = data; + + struct rrdpush_destinations *d = callocz(1, sizeof(struct rrdpush_destinations)); + d->destination = string_strdupz(entry); + + DOUBLE_LINKED_LIST_APPEND_UNSAFE(t->list, d, prev, next); + + t->count++; + info("STREAM: added streaming destination No %d: '%s' to host '%s'", t->count, string2str(d->destination), rrdhost_hostname(t->host)); + + return false; // we return false, so that we will get all defined destinations +} + +void rrdpush_destinations_init(RRDHOST *host) { + if(!host->rrdpush_send_destination) return; + + rrdpush_destinations_free(host); + + struct destinations_init_tmp t = { + .host = host, + .list = NULL, + .count = 0, + }; + + foreach_entry_in_connection_string(host->rrdpush_send_destination, destinations_init_add_one, &t); - s = e; + host->destinations = t.list; +} + +void rrdpush_destinations_free(RRDHOST *host) { + while (host->destinations) { + struct rrdpush_destinations *tmp = host->destinations; + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(host->destinations, tmp, prev, next); + string_freez(tmp->destination); + freez(tmp); } - return destinations; + + host->destinations = NULL; } // ---------------------------------------------------------------------------- @@ -495,11 +598,13 @@ void rrdpush_sender_thread_stop(RRDHOST *host) { netdata_mutex_lock(&host->sender->mutex); netdata_thread_t thr = 0; - if(host->rrdpush_sender_spawn) { - info("STREAM %s [send]: signaling sending thread to stop...", host->hostname); + if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) { + rrdhost_flag_clear(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN); + + info("STREAM %s [send]: signaling sending thread to stop...", rrdhost_hostname(host)); // signal the thread that we want to join it - host->rrdpush_sender_join = 1; + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_JOIN); // copy the thread id, so that we will be waiting for the right one // even if a new one has been spawn @@ -512,10 +617,10 @@ void rrdpush_sender_thread_stop(RRDHOST *host) { netdata_mutex_unlock(&host->sender->mutex); if(thr != 0) { - info("STREAM %s [send]: waiting for the sending thread to stop...", host->hostname); + info("STREAM %s [send]: waiting for the sending thread to stop...", rrdhost_hostname(host)); void *result; netdata_thread_join(thr, &result); - info("STREAM %s [send]: sending thread has exited.", host->hostname); + info("STREAM %s [send]: sending thread has exited.", rrdhost_hostname(host)); } } @@ -531,15 +636,16 @@ void log_stream_connection(const char *client_ip, const char *client_port, const static void rrdpush_sender_thread_spawn(RRDHOST *host) { netdata_mutex_lock(&host->sender->mutex); - if(!host->rrdpush_sender_spawn) { + if(!rrdhost_flag_check(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN)) { char tag[NETDATA_THREAD_TAG_MAX + 1]; - snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STREAM_SENDER[%s]", host->hostname); + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STREAM_SENDER[%s]", rrdhost_hostname(host)); if(netdata_thread_create(&host->rrdpush_sender_thread, tag, NETDATA_THREAD_OPTION_JOINABLE, rrdpush_sender_thread, (void *) host->sender)) - error("STREAM %s [send]: failed to create new thread for client.", host->hostname); + error("STREAM %s [send]: failed to create new thread for client.", rrdhost_hostname(host)); else - host->rrdpush_sender_spawn = 1; + rrdhost_flag_set(host, RRDHOST_FLAG_RRDPUSH_SENDER_SPAWN); } + netdata_mutex_unlock(&host->sender->mutex); } @@ -608,7 +714,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { else if(!strcmp(name, "tags")) tags = value; else if(!strcmp(name, "ver")) - stream_version = MIN((uint32_t) strtoul(value, NULL, 0), STREAMING_PROTOCOL_CURRENT_VERSION); + stream_version = convert_stream_version_to_capabilities(strtoul(value, NULL, 0)); else { // An old Netdata child does not have a compatible streaming protocol, map to something sane. if (!strcmp(name, "NETDATA_SYSTEM_OS_NAME")) @@ -624,7 +730,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { else if (!strcmp(name, "NETDATA_SYSTEM_OS_DETECTION")) name = "NETDATA_HOST_OS_DETECTION"; else if(!strcmp(name, "NETDATA_PROTOCOL_VERSION") && stream_version == UINT_MAX) { - stream_version = 1; + stream_version = convert_stream_version_to_capabilities(1); } if (unlikely(rrdhost_set_system_info_variable(system_info, name, value))) { @@ -635,7 +741,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { } if (stream_version == UINT_MAX) - stream_version = 0; + stream_version = convert_stream_version_to_capabilities(0); if(!key || !*key) { rrdhost_system_info_free(system_info); @@ -660,21 +766,30 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { if(regenerate_guid(key, buf) == -1) { rrdhost_system_info_free(system_info); - log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - INVALID KEY"); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - INVALID KEY"); error("STREAM [receive from [%s]:%s]: API key '%s' is not valid GUID (use the command uuidgen to generate one). Forbidding access.", w->client_ip, w->client_port, key); return rrdpush_receiver_permission_denied(w); } if(regenerate_guid(machine_guid, buf) == -1) { rrdhost_system_info_free(system_info); - log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - INVALID MACHINE GUID"); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - INVALID MACHINE GUID"); error("STREAM [receive from [%s]:%s]: machine GUID '%s' is not GUID. Forbidding access.", w->client_ip, w->client_port, machine_guid); return rrdpush_receiver_permission_denied(w); } + const char *api_key_type = appconfig_get(&stream_config, key, "type", "api"); + if(!api_key_type || !*api_key_type) api_key_type = "unknown"; + if(strcmp(api_key_type, "api") != 0) { + rrdhost_system_info_free(system_info); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - API KEY GIVEN IS NOT API KEY"); + error("STREAM [receive from [%s]:%s]: API key '%s' is a %s GUID. Forbidding access.", w->client_ip, w->client_port, key, api_key_type); + return rrdpush_receiver_permission_denied(w); + } + if(!appconfig_get_boolean(&stream_config, key, "enabled", 0)) { rrdhost_system_info_free(system_info); - log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - KEY NOT ENABLED"); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - KEY NOT ENABLED"); error("STREAM [receive from [%s]:%s]: API key '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, key); return rrdpush_receiver_permission_denied(w); } @@ -685,7 +800,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { if(!simple_pattern_matches(key_allow_from, w->client_ip)) { simple_pattern_free(key_allow_from); rrdhost_system_info_free(system_info); - log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname) ? hostname : "-", "ACCESS DENIED - KEY NOT ALLOWED FROM THIS IP"); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - KEY NOT ALLOWED FROM THIS IP"); error("STREAM [receive from [%s]:%s]: API key '%s' is not permitted from this IP. Forbidding access.", w->client_ip, w->client_port, key); return rrdpush_receiver_permission_denied(w); } @@ -693,9 +808,18 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { } } + const char *machine_guid_type = appconfig_get(&stream_config, machine_guid, "type", "machine"); + if(!machine_guid_type || !*machine_guid_type) machine_guid_type = "unknown"; + if(strcmp(machine_guid_type, "machine") != 0) { + rrdhost_system_info_free(system_info); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - MACHINE GUID GIVEN IS NOT A MACHINE GUID"); + error("STREAM [receive from [%s]:%s]: machine GUID '%s' is a %s GUID. Forbidding access.", w->client_ip, w->client_port, machine_guid, machine_guid_type); + return rrdpush_receiver_permission_denied(w); + } + if(!appconfig_get_boolean(&stream_config, machine_guid, "enabled", 1)) { rrdhost_system_info_free(system_info); - log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname)?hostname:"-", "ACCESS DENIED - MACHINE GUID NOT ENABLED"); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - MACHINE GUID NOT ENABLED"); error("STREAM [receive from [%s]:%s]: machine GUID '%s' is not allowed. Forbidding access.", w->client_ip, w->client_port, machine_guid); return rrdpush_receiver_permission_denied(w); } @@ -706,7 +830,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { if(!simple_pattern_matches(machine_allow_from, w->client_ip)) { simple_pattern_free(machine_allow_from); rrdhost_system_info_free(system_info); - log_stream_connection(w->client_ip, w->client_port, (key && *key)?key:"-", (machine_guid && *machine_guid)?machine_guid:"-", (hostname && *hostname) ? hostname : "-", "ACCESS DENIED - MACHINE GUID NOT ALLOWED FROM THIS IP"); + log_stream_connection(w->client_ip, w->client_port, key, machine_guid, hostname, "ACCESS DENIED - MACHINE GUID NOT ALLOWED FROM THIS IP"); error("STREAM [receive from [%s]:%s]: Machine GUID '%s' is not permitted from this IP. Forbidding access.", w->client_ip, w->client_port, machine_guid); return rrdpush_receiver_permission_denied(w); } @@ -746,7 +870,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { struct receiver_state *rpt = callocz(1, sizeof(*rpt)); rrd_rdlock(); - RRDHOST *host = rrdhost_find_by_guid(machine_guid, 0); + RRDHOST *host = rrdhost_find_by_guid(machine_guid); if (unlikely(host && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) /* Ignore archived hosts. */ host = NULL; if (host) { @@ -763,7 +887,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { info( "STREAM %s [receive from [%s]:%s]: multiple connections for same host detected - " "existing connection is dead (%"PRId64" sec), accepting new connection.", - host->hostname, + rrdhost_hostname(host), w->client_ip, w->client_port, (int64_t)age); @@ -772,12 +896,12 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { netdata_mutex_unlock(&host->receiver_lock); rrdhost_unlock(host); rrd_unlock(); - log_stream_connection(w->client_ip, w->client_port, key, host->machine_guid, host->hostname, + log_stream_connection(w->client_ip, w->client_port, key, host->machine_guid, rrdhost_hostname(host), "REJECTED - ALREADY CONNECTED"); info( "STREAM %s [receive from [%s]:%s]: multiple connections for same host detected - " "existing connection is active (within last %"PRId64" sec), rejecting new connection.", - host->hostname, + rrdhost_hostname(host), w->client_ip, w->client_port, (int64_t)age); @@ -811,7 +935,7 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { rpt->client_port = strdupz(w->client_port); rpt->update_every = update_every; rpt->system_info = system_info; - rpt->stream_version = stream_version; + rpt->capabilities = stream_version; #ifdef ENABLE_HTTPS rpt->ssl.conn = w->ssl.conn; rpt->ssl.flags = w->ssl.flags; @@ -855,3 +979,66 @@ int rrdpush_receiver_thread_spawn(struct web_client *w, char *url) { buffer_flush(w->response.data); return 200; } + +static void stream_capabilities_to_string(BUFFER *wb, STREAM_CAPABILITIES caps) { + if(caps & STREAM_CAP_V1) buffer_strcat(wb, "V1 "); + if(caps & STREAM_CAP_V2) buffer_strcat(wb, "V2 "); + if(caps & STREAM_CAP_VN) buffer_strcat(wb, "VN "); + if(caps & STREAM_CAP_VCAPS) buffer_strcat(wb, "VCAPS "); + if(caps & STREAM_CAP_HLABELS) buffer_strcat(wb, "HLABELS "); + if(caps & STREAM_CAP_CLAIM) buffer_strcat(wb, "CLAIM "); + if(caps & STREAM_CAP_CLABELS) buffer_strcat(wb, "CLABELS "); + if(caps & STREAM_CAP_COMPRESSION) buffer_strcat(wb, "COMPRESSION "); + if(caps & STREAM_CAP_FUNCTIONS) buffer_strcat(wb, "FUNCTIONS "); + if(caps & STREAM_CAP_REPLICATION) buffer_strcat(wb, "REPLICATION "); + if(caps & STREAM_CAP_BINARY) buffer_strcat(wb, "BINARY "); +} + +void log_receiver_capabilities(struct receiver_state *rpt) { + BUFFER *wb = buffer_create(100); + stream_capabilities_to_string(wb, rpt->capabilities); + + info("STREAM %s [receive from [%s]:%s]: established link with negotiated capabilities: %s", + rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, buffer_tostring(wb)); + + buffer_free(wb); +} + +void log_sender_capabilities(struct sender_state *s) { + BUFFER *wb = buffer_create(100); + stream_capabilities_to_string(wb, s->capabilities); + + info("STREAM %s [send to %s]: established link with negotiated capabilities: %s", + rrdhost_hostname(s->host), s->connected_to, buffer_tostring(wb)); + + buffer_free(wb); +} + +STREAM_CAPABILITIES convert_stream_version_to_capabilities(int32_t version) { + STREAM_CAPABILITIES caps = 0; + + if(version <= 1) caps = STREAM_CAP_V1; + else if(version < STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_V2 | STREAM_CAP_HLABELS; + else if(version <= STREAM_OLD_VERSION_CLAIM) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM; + else if(version <= STREAM_OLD_VERSION_CLABELS) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS; + else if(version <= STREAM_OLD_VERSION_COMPRESSION) caps = STREAM_CAP_VN | STREAM_CAP_HLABELS | STREAM_CAP_CLAIM | STREAM_CAP_CLABELS | STREAM_HAS_COMPRESSION; + else caps = version; + + if(caps & STREAM_CAP_VCAPS) + caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2|STREAM_CAP_VN); + + if(caps & STREAM_CAP_VN) + caps &= ~(STREAM_CAP_V1|STREAM_CAP_V2); + + if(caps & STREAM_CAP_V2) + caps &= ~(STREAM_CAP_V1); + + return caps & STREAM_OUR_CAPABILITIES; +} + +int32_t stream_capabilities_to_vn(uint32_t caps) { + if(caps & STREAM_CAP_COMPRESSION) return STREAM_OLD_VERSION_COMPRESSION; + if(caps & STREAM_CAP_CLABELS) return STREAM_OLD_VERSION_CLABELS; + return STREAM_OLD_VERSION_CLAIM; // if(caps & STREAM_CAP_CLAIM) +} + |