summaryrefslogtreecommitdiffstats
path: root/daemon/service.c
diff options
context:
space:
mode:
Diffstat (limited to 'daemon/service.c')
-rw-r--r--daemon/service.c184
1 files changed, 125 insertions, 59 deletions
diff --git a/daemon/service.c b/daemon/service.c
index f7fe86e04..8a65de66c 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -76,33 +76,48 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
rrddim_free(st, rd);
}
-static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+static inline bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+ if(!all_dimensions && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
+ return true;
+
worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS);
+ rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
+
RRDDIM *rd;
time_t now = now_realtime_sec();
- bool done_all_dimensions = true;
+ size_t dim_candidates = 0;
+ size_t dim_archives = 0;
dfe_start_write(st->rrddim_root_index, rd) {
- if(unlikely(
- all_dimensions ||
- (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now))
- )) {
-
- if(dictionary_acquired_item_references(rd_dfe.item) == 1) {
- netdata_log_info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st));
- svc_rrddim_obsolete_to_archive(rd);
+ bool candidate = (all_dimensions || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE));
+
+ if(candidate) {
+ dim_candidates++;
+
+ if(rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now) {
+ size_t references = dictionary_acquired_item_references(rd_dfe.item);
+ if(references == 1) {
+// netdata_log_info("Removing obsolete dimension 'host:%s/chart:%s/dim:%s'",
+// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
+ svc_rrddim_obsolete_to_archive(rd);
+ dim_archives++;
+ }
+// else
+// netdata_log_info("Cannot remove obsolete dimension 'host:%s/chart:%s/dim:%s'",
+// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
}
- else
- done_all_dimensions = false;
}
- else
- done_all_dimensions = false;
}
dfe_done(rd);
- return done_all_dimensions;
+ if(dim_archives != dim_candidates) {
+ rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
+ return false;
+ }
+
+ return true;
}
static void svc_rrdset_obsolete_to_free(RRDSET *st) {
@@ -132,50 +147,78 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) {
rrdset_free(st);
}
-static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
+static inline void svc_rrdhost_cleanup_charts_marked_obsolete(RRDHOST *host) {
+ if(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS))
+ return;
+
worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS);
+ rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
+
+ size_t full_candidates = 0;
+ size_t full_archives = 0;
+ size_t partial_candidates = 0;
+ size_t partial_archives = 0;
+
time_t now = now_realtime_sec();
RRDSET *st;
rrdset_foreach_reentrant(st, host) {
if(rrdset_is_replicating(st))
continue;
- if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
- && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
- && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
- && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
- )) {
- svc_rrdset_obsolete_to_free(st);
- }
- else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) {
- rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
- svc_rrdset_archive_obsolete_dimensions(st, false);
+ RRDSET_FLAGS flags = rrdset_flag_get(st);
+ bool obsolete_chart = flags & RRDSET_FLAG_OBSOLETE;
+ bool obsolete_dims = flags & RRDSET_FLAG_OBSOLETE_DIMENSIONS;
+
+ if(obsolete_dims) {
+ partial_candidates++;
+
+ if(svc_rrdset_archive_obsolete_dimensions(st, false))
+ partial_archives++;
}
- else if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) {
- rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS);
+
+ if(obsolete_chart) {
+ full_candidates++;
+
+ if(unlikely( st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
+ && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
+ && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
+ )) {
+ svc_rrdset_obsolete_to_free(st);
+ full_archives++;
+ }
}
}
rrdset_foreach_done(st);
+
+ if(partial_archives != partial_candidates)
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
+
+ if(full_archives != full_candidates)
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS);
}
-static void svc_rrdset_check_obsoletion(RRDHOST *host) {
+static void svc_rrdhost_detect_obsolete_charts(RRDHOST *host) {
worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK);
time_t now = now_realtime_sec();
time_t last_entry_t;
RRDSET *st;
+
+ time_t child_connect_time = host->child_connect_time;
+
rrdset_foreach_read(st, host) {
if(rrdset_is_replicating(st))
continue;
last_entry_t = rrdset_last_entry_s(st);
- if(last_entry_t && last_entry_t < host->child_connect_time &&
- host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every
- < now)
+ if (last_entry_t && last_entry_t < child_connect_time &&
+ child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT +
+ (ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every) <
+ now)
- rrdset_is_obsolete(st);
+ rrdset_is_obsolete___safe_from_collector_thread(st);
}
rrdset_foreach_done(st);
}
@@ -190,24 +233,24 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() {
if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host))
continue;
- if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) {
- rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
- svc_rrdhost_cleanup_obsolete_charts(host);
- }
+ svc_rrdhost_cleanup_charts_marked_obsolete(host);
- if(host != localhost
- && host->trigger_chart_obsoletion_check
- && (
- (
- host->child_last_chart_command
- && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec()
- )
- || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec())
- )
- ) {
- svc_rrdset_check_obsoletion(host);
+ if (host == localhost)
+ continue;
+
+ netdata_mutex_lock(&host->receiver_lock);
+
+ time_t now = now_realtime_sec();
+
+ if (host->trigger_chart_obsoletion_check &&
+ ((host->child_last_chart_command &&
+ host->child_last_chart_command + host->health.health_delay_up_to < now) ||
+ (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now))) {
+ svc_rrdhost_detect_obsolete_charts(host);
host->trigger_chart_obsoletion_check = 0;
}
+
+ netdata_mutex_unlock(&host->receiver_lock);
}
rrd_unlock();
@@ -226,22 +269,45 @@ restart_after_removal:
if(!rrdhost_should_be_removed(host, protected_host, now))
continue;
- netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
+ bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED);
+ if (!is_archived) {
+ netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
- if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
- /* don't delete multi-host DB host files */
- && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
- ) {
- worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
- rrdhost_delete_charts(host);
+ if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
+ /* don't delete multi-host DB host files */
+ && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
+ ) {
+ worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
+ rrdhost_delete_charts(host);
+ }
+ else {
+ worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS);
+ rrdhost_save_charts(host);
+ }
}
- else {
- worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS);
- rrdhost_save_charts(host);
+
+ bool force = false;
+
+ if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s)
+ force = true;
+
+ if (!force && is_archived)
+ continue;
+
+ if (force) {
+ netdata_log_info("Host '%s' with machine guid '%s' is archived, ephemeral clean up.", rrdhost_hostname(host), host->machine_guid);
}
worker_is_busy(WORKER_JOB_FREE_HOST);
- rrdhost_free___while_having_rrd_wrlock(host, false);
+#ifdef ENABLE_ACLK
+ // in case we have cloud connection we inform cloud
+ // a child disconnected
+ if (netdata_cloud_enabled && force) {
+ aclk_host_state_update(host, 0, 0);
+ unregister_node(host->machine_guid);
+ }
+#endif
+ rrdhost_free___while_having_rrd_wrlock(host, force);
goto restart_after_removal;
}