diff options
Diffstat (limited to 'daemon/service.c')
-rw-r--r-- | daemon/service.c | 184 |
1 files changed, 125 insertions, 59 deletions
diff --git a/daemon/service.c b/daemon/service.c index f7fe86e04..8a65de66c 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -76,33 +76,48 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { rrddim_free(st, rd); } -static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { +static inline bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { + if(!all_dimensions && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) + return true; + worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS); + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + RRDDIM *rd; time_t now = now_realtime_sec(); - bool done_all_dimensions = true; + size_t dim_candidates = 0; + size_t dim_archives = 0; dfe_start_write(st->rrddim_root_index, rd) { - if(unlikely( - all_dimensions || - (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now)) - )) { - - if(dictionary_acquired_item_references(rd_dfe.item) == 1) { - netdata_log_info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st)); - svc_rrddim_obsolete_to_archive(rd); + bool candidate = (all_dimensions || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)); + + if(candidate) { + dim_candidates++; + + if(rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now) { + size_t references = dictionary_acquired_item_references(rd_dfe.item); + if(references == 1) { +// netdata_log_info("Removing obsolete dimension 'host:%s/chart:%s/dim:%s'", +// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); + svc_rrddim_obsolete_to_archive(rd); + dim_archives++; + } +// else +// netdata_log_info("Cannot remove obsolete dimension 'host:%s/chart:%s/dim:%s'", +// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); } - else - done_all_dimensions = false; } - else - done_all_dimensions = false; } dfe_done(rd); - return done_all_dimensions; + if(dim_archives != dim_candidates) { + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + return false; + } + + return true; } static void svc_rrdset_obsolete_to_free(RRDSET *st) { @@ -132,50 +147,78 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) { rrdset_free(st); } -static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { +static inline void svc_rrdhost_cleanup_charts_marked_obsolete(RRDHOST *host) { + if(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) + return; + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS); + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + + size_t full_candidates = 0; + size_t full_archives = 0; + size_t partial_candidates = 0; + size_t partial_archives = 0; + time_t now = now_realtime_sec(); RRDSET *st; rrdset_foreach_reentrant(st, host) { if(rrdset_is_replicating(st)) continue; - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) - && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now - && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now - && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now - )) { - svc_rrdset_obsolete_to_free(st); - } - else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) { - rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); - svc_rrdset_archive_obsolete_dimensions(st, false); + RRDSET_FLAGS flags = rrdset_flag_get(st); + bool obsolete_chart = flags & RRDSET_FLAG_OBSOLETE; + bool obsolete_dims = flags & RRDSET_FLAG_OBSOLETE_DIMENSIONS; + + if(obsolete_dims) { + partial_candidates++; + + if(svc_rrdset_archive_obsolete_dimensions(st, false)) + partial_archives++; } - else if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) { - rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); + + if(obsolete_chart) { + full_candidates++; + + if(unlikely( st->last_accessed_time_s + rrdset_free_obsolete_time_s < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now + )) { + svc_rrdset_obsolete_to_free(st); + full_archives++; + } } } rrdset_foreach_done(st); + + if(partial_archives != partial_candidates) + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + + if(full_archives != full_candidates) + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); } -static void svc_rrdset_check_obsoletion(RRDHOST *host) { +static void svc_rrdhost_detect_obsolete_charts(RRDHOST *host) { worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK); time_t now = now_realtime_sec(); time_t last_entry_t; RRDSET *st; + + time_t child_connect_time = host->child_connect_time; + rrdset_foreach_read(st, host) { if(rrdset_is_replicating(st)) continue; last_entry_t = rrdset_last_entry_s(st); - if(last_entry_t && last_entry_t < host->child_connect_time && - host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every - < now) + if (last_entry_t && last_entry_t < child_connect_time && + child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + + (ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every) < + now) - rrdset_is_obsolete(st); + rrdset_is_obsolete___safe_from_collector_thread(st); } rrdset_foreach_done(st); } @@ -190,24 +233,24 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) continue; - if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) { - rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); - svc_rrdhost_cleanup_obsolete_charts(host); - } + svc_rrdhost_cleanup_charts_marked_obsolete(host); - if(host != localhost - && host->trigger_chart_obsoletion_check - && ( - ( - host->child_last_chart_command - && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec() - ) - || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) - ) - ) { - svc_rrdset_check_obsoletion(host); + if (host == localhost) + continue; + + netdata_mutex_lock(&host->receiver_lock); + + time_t now = now_realtime_sec(); + + if (host->trigger_chart_obsoletion_check && + ((host->child_last_chart_command && + host->child_last_chart_command + host->health.health_delay_up_to < now) || + (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now))) { + svc_rrdhost_detect_obsolete_charts(host); host->trigger_chart_obsoletion_check = 0; } + + netdata_mutex_unlock(&host->receiver_lock); } rrd_unlock(); @@ -226,22 +269,45 @@ restart_after_removal: if(!rrdhost_should_be_removed(host, protected_host, now)) continue; - netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); + bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); + if (!is_archived) { + netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); - if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) - /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) - ) { - worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); - rrdhost_delete_charts(host); + if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) + /* don't delete multi-host DB host files */ + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) + ) { + worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); + rrdhost_delete_charts(host); + } + else { + worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); + rrdhost_save_charts(host); + } } - else { - worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); - rrdhost_save_charts(host); + + bool force = false; + + if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s) + force = true; + + if (!force && is_archived) + continue; + + if (force) { + netdata_log_info("Host '%s' with machine guid '%s' is archived, ephemeral clean up.", rrdhost_hostname(host), host->machine_guid); } worker_is_busy(WORKER_JOB_FREE_HOST); - rrdhost_free___while_having_rrd_wrlock(host, false); +#ifdef ENABLE_ACLK + // in case we have cloud connection we inform cloud + // a child disconnected + if (netdata_cloud_enabled && force) { + aclk_host_state_update(host, 0, 0); + unregister_node(host->machine_guid); + } +#endif + rrdhost_free___while_having_rrd_wrlock(host, force); goto restart_after_removal; } |