summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:20:02 +0000
commit58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /daemon
parentReleasing debian version 1.43.2-1. (diff)
downloadnetdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemon')
-rw-r--r--daemon/analytics.c117
-rw-r--r--daemon/analytics.h1
-rw-r--r--daemon/buildinfo.c75
-rw-r--r--daemon/commands.c17
-rw-r--r--daemon/common.h1
-rw-r--r--daemon/daemon.c31
-rw-r--r--daemon/global_statistics.c101
-rw-r--r--daemon/global_statistics.h5
-rw-r--r--daemon/main.c238
-rw-r--r--daemon/service.c184
-rw-r--r--daemon/signals.c16
-rw-r--r--daemon/static_threads.c10
-rwxr-xr-xdaemon/system-info.sh6
-rw-r--r--daemon/unit_test.c95
14 files changed, 620 insertions, 277 deletions
diff --git a/daemon/analytics.c b/daemon/analytics.c
index c149e258..353ebd13 100644
--- a/daemon/analytics.c
+++ b/daemon/analytics.c
@@ -117,12 +117,14 @@ void analytics_free_data(void)
*/
void analytics_set_data(char **name, char *value)
{
+ spinlock_lock(&analytics_data.spinlock);
if (*name) {
analytics_data.data_length -= strlen(*name);
freez(*name);
}
*name = strdupz(value);
analytics_data.data_length += strlen(*name);
+ spinlock_unlock(&analytics_data.spinlock);
}
/*
@@ -131,6 +133,7 @@ void analytics_set_data(char **name, char *value)
void analytics_set_data_str(char **name, const char *value)
{
size_t value_string_len;
+ spinlock_lock(&analytics_data.spinlock);
if (*name) {
analytics_data.data_length -= strlen(*name);
freez(*name);
@@ -139,6 +142,7 @@ void analytics_set_data_str(char **name, const char *value)
*name = mallocz(sizeof(char) * value_string_len);
snprintfz(*name, value_string_len - 1, "\"%s\"", value);
analytics_data.data_length += strlen(*name);
+ spinlock_unlock(&analytics_data.spinlock);
}
/*
@@ -149,7 +153,7 @@ void analytics_log_prometheus(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.prometheus_hits < ANALYTICS_MAX_PROMETHEUS_HITS)) {
analytics_data.prometheus_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.prometheus_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b);
}
}
@@ -162,7 +166,7 @@ void analytics_log_shell(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.shell_hits < ANALYTICS_MAX_SHELL_HITS)) {
analytics_data.shell_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.shell_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b);
}
}
@@ -175,7 +179,7 @@ void analytics_log_json(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.json_hits < ANALYTICS_MAX_JSON_HITS)) {
analytics_data.json_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.json_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b);
}
}
@@ -188,7 +192,7 @@ void analytics_log_dashboard(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.dashboard_hits < ANALYTICS_MAX_DASHBOARD_HITS)) {
analytics_data.dashboard_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.dashboard_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits);
analytics_set_data(&analytics_data.netdata_dashboard_used, b);
}
}
@@ -198,7 +202,7 @@ void analytics_log_dashboard(void)
*/
void analytics_report_oom_score(long long int score){
char b[21];
- snprintfz(b, 20, "%lld", score);
+ snprintfz(b, sizeof(b) - 1, "%lld", score);
analytics_set_data(&analytics_data.netdata_config_oom_score, b);
}
@@ -222,11 +226,11 @@ void analytics_mirrored_hosts(void)
}
rrd_unlock();
- snprintfz(b, 20, "%zu", count);
+ snprintfz(b, sizeof(b) - 1, "%zu", count);
analytics_set_data(&analytics_data.netdata_mirrored_host_count, b);
- snprintfz(b, 20, "%zu", reachable);
+ snprintfz(b, sizeof(b) - 1, "%zu", reachable);
analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, b);
- snprintfz(b, 20, "%zu", unreachable);
+ snprintfz(b, sizeof(b) - 1, "%zu", unreachable);
analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, b);
}
@@ -281,7 +285,7 @@ void analytics_collectors(void)
.plugin = rrdset_plugin_name(st),
.module = rrdset_module_name(st)
};
- snprintfz(name, 499, "%s:%s", col.plugin, col.module);
+ snprintfz(name, sizeof(name) - 1, "%s:%s", col.plugin, col.module);
dictionary_set(dict, name, &col, sizeof(struct collector));
}
rrdset_foreach_done(st);
@@ -297,7 +301,7 @@ void analytics_collectors(void)
{
char b[21];
- snprintfz(b, 20, "%d", ap.c);
+ snprintfz(b, sizeof(b) - 1, "%d", ap.c);
analytics_set_data(&analytics_data.netdata_collectors_count, b);
}
@@ -402,7 +406,7 @@ void analytics_charts(void)
analytics_data.charts_count = c;
{
char b[21];
- snprintfz(b, 20, "%zu", c);
+ snprintfz(b, sizeof(b) - 1, "%zu", c);
analytics_set_data(&analytics_data.netdata_charts_count, b);
}
}
@@ -427,7 +431,7 @@ void analytics_metrics(void)
analytics_data.metrics_count = dimensions;
{
char b[21];
- snprintfz(b, 20, "%zu", dimensions);
+ snprintfz(b, sizeof(b) - 1, "%zu", dimensions);
analytics_set_data(&analytics_data.netdata_metrics_count, b);
}
}
@@ -454,11 +458,11 @@ void analytics_alarms(void)
}
foreach_rrdcalc_in_rrdhost_done(rc);
- snprintfz(b, 20, "%zu", alarm_normal);
+ snprintfz(b, sizeof(b) - 1, "%zu", alarm_normal);
analytics_set_data(&analytics_data.netdata_alarms_normal, b);
- snprintfz(b, 20, "%zu", alarm_warn);
+ snprintfz(b, sizeof(b) - 1, "%zu", alarm_warn);
analytics_set_data(&analytics_data.netdata_alarms_warning, b);
- snprintfz(b, 20, "%zu", alarm_crit);
+ snprintfz(b, sizeof(b) - 1, "%zu", alarm_crit);
analytics_set_data(&analytics_data.netdata_alarms_critical, b);
}
@@ -467,6 +471,8 @@ void analytics_alarms(void)
*/
void analytics_misc(void)
{
+ analytics_data.spinlock.locked = false;
+
#ifdef ENABLE_ACLK
analytics_set_data(&analytics_data.netdata_host_cloud_available, "true");
analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, "Next Generation");
@@ -540,19 +546,19 @@ void analytics_gather_mutable_meta_data(void)
{
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.prometheus_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b);
- snprintfz(b, 20, "%zu", analytics_data.shell_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b);
- snprintfz(b, 20, "%zu", analytics_data.json_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b);
- snprintfz(b, 20, "%zu", analytics_data.dashboard_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits);
analytics_set_data(&analytics_data.netdata_dashboard_used, b);
- snprintfz(b, 20, "%zu", rrdhost_hosts_available());
+ snprintfz(b, sizeof(b) - 1, "%zu", rrdhost_hosts_available());
analytics_set_data(&analytics_data.netdata_config_hosts_available, b);
}
}
@@ -664,10 +670,10 @@ void set_late_global_environment(struct rrdhost_system_info *system_info)
#ifdef ENABLE_DBENGINE
{
char b[16];
- snprintfz(b, 15, "%d", default_rrdeng_page_cache_mb);
+ snprintfz(b, sizeof(b) - 1, "%d", default_rrdeng_page_cache_mb);
analytics_set_data(&analytics_data.netdata_config_page_cache_size, b);
- snprintfz(b, 15, "%d", default_multidb_disk_quota_mb);
+ snprintfz(b, sizeof(b) - 1, "%d", default_multidb_disk_quota_mb);
analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, b);
}
#endif
@@ -824,11 +830,10 @@ void get_system_timezone(void)
}
}
-void set_global_environment()
-{
+void set_global_environment() {
{
char b[16];
- snprintfz(b, 15, "%d", default_rrd_update_every);
+ snprintfz(b, sizeof(b) - 1, "%d", default_rrd_update_every);
setenv("NETDATA_UPDATE_EVERY", b, 1);
}
@@ -843,7 +848,6 @@ void set_global_environment()
setenv("NETDATA_LIB_DIR", verify_or_create_required_directory(netdata_configured_varlib_dir), 1);
setenv("NETDATA_LOCK_DIR", verify_or_create_required_directory(netdata_configured_lock_dir), 1);
setenv("NETDATA_LOG_DIR", verify_or_create_required_directory(netdata_configured_log_dir), 1);
- setenv("HOME", verify_or_create_required_directory(netdata_configured_home_dir), 1);
setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1);
{
@@ -922,16 +926,14 @@ void set_global_environment()
freez(default_port);
// set the path we need
- char path[1024 + 1], *p = getenv("PATH");
- if (!p)
- p = "/bin:/usr/bin";
- snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin");
+ char path[4096], *p = getenv("PATH");
+ if (!p) p = "/bin:/usr/bin";
+ snprintfz(path, sizeof(path), "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin");
setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1);
// python options
p = getenv("PYTHONPATH");
- if (!p)
- p = "";
+ if (!p) p = "";
setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1);
// disable buffering for python plugins
@@ -941,37 +943,46 @@ void set_global_environment()
setenv("LC_ALL", "C", 1);
}
-void send_statistics(const char *action, const char *action_result, const char *action_data)
-{
+void send_statistics(const char *action, const char *action_result, const char *action_data) {
static char *as_script;
if (netdata_anonymous_statistics_enabled == -1) {
char *optout_file = mallocz(
sizeof(char) *
(strlen(netdata_configured_user_config_dir) + strlen(".opt-out-from-anonymous-statistics") + 2));
+
sprintf(optout_file, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics");
+
if (likely(access(optout_file, R_OK) != 0)) {
as_script = mallocz(
sizeof(char) *
(strlen(netdata_configured_primary_plugins_dir) + strlen("anonymous-statistics.sh") + 2));
+
sprintf(as_script, "%s/%s", netdata_configured_primary_plugins_dir, "anonymous-statistics.sh");
+
if (unlikely(access(as_script, R_OK) != 0)) {
netdata_anonymous_statistics_enabled = 0;
- netdata_log_info("Anonymous statistics script %s not found.", as_script);
+
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Statistics script '%s' not found.",
+ as_script);
+
freez(as_script);
- } else {
- netdata_anonymous_statistics_enabled = 1;
}
- } else {
+ else
+ netdata_anonymous_statistics_enabled = 1;
+ }
+ else {
netdata_anonymous_statistics_enabled = 0;
as_script = NULL;
}
+
freez(optout_file);
}
- if (!netdata_anonymous_statistics_enabled)
- return;
- if (!action)
+
+ if (!netdata_anonymous_statistics_enabled || !action)
return;
+
if (!action_result)
action_result = "";
if (!action_data)
@@ -1030,7 +1041,9 @@ void send_statistics(const char *action, const char *action_result, const char *
analytics_data.netdata_prebuilt_distro,
analytics_data.netdata_fail_reason);
- netdata_log_info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "%s '%s' '%s' '%s'",
+ as_script, action, action_result, action_data);
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input);
@@ -1039,11 +1052,21 @@ void send_statistics(const char *action, const char *action_result, const char *
char *s = fgets(buffer, 4, fp_child_output);
int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
if (exit_code)
- netdata_log_error("Execution of anonymous statistics script returned %d.", exit_code);
- if (s && strncmp(buffer, "200", 3))
- netdata_log_error("Execution of anonymous statistics script returned http code %s.", buffer);
- } else {
- netdata_log_error("Failed to run anonymous statistics script %s.", as_script);
+
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Statistics script returned error: %d",
+ exit_code);
+
+ if (s && strncmp(buffer, "200", 3) != 0)
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Statistics script returned http code: %s",
+ buffer);
+
}
+ else
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Failed to run statistics script: %s.",
+ as_script);
+
freez(command_to_run);
}
diff --git a/daemon/analytics.h b/daemon/analytics.h
index 0a5cc458..de8d569f 100644
--- a/daemon/analytics.h
+++ b/daemon/analytics.h
@@ -71,6 +71,7 @@ struct analytics_data {
size_t charts_count;
size_t metrics_count;
+ SPINLOCK spinlock;
bool exporting_enabled;
};
diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c
index 4bc1e72a..41af56af 100644
--- a/daemon/buildinfo.c
+++ b/daemon/buildinfo.c
@@ -48,6 +48,7 @@ typedef enum __attribute__((packed)) {
BIB_FEATURE_CLOUD,
BIB_FEATURE_HEALTH,
BIB_FEATURE_STREAMING,
+ BIB_FEATURE_BACKFILLING,
BIB_FEATURE_REPLICATION,
BIB_FEATURE_STREAMING_COMPRESSION,
BIB_FEATURE_CONTEXTS,
@@ -66,6 +67,7 @@ typedef enum __attribute__((packed)) {
BIB_CONNECTIVITY_NATIVE_HTTPS,
BIB_CONNECTIVITY_TLS_HOST_VERIFY,
BIB_LIB_LZ4,
+ BIB_LIB_ZSTD,
BIB_LIB_ZLIB,
BIB_LIB_JUDY,
BIB_LIB_DLIB,
@@ -99,6 +101,7 @@ typedef enum __attribute__((packed)) {
BIB_PLUGIN_SLABINFO,
BIB_PLUGIN_XEN,
BIB_PLUGIN_XEN_VBD_ERROR,
+ BIB_PLUGIN_LOGS_MANAGEMENT,
BIB_EXPORT_AWS_KINESIS,
BIB_EXPORT_GCP_PUBSUB,
BIB_EXPORT_MONGOC,
@@ -340,7 +343,7 @@ static struct {
.json = "cpu_frequency",
.value = "unknown",
},
- [BIB_HW_RAM_SIZE] = {
+ [BIB_HW_ARCHITECTURE] = {
.category = BIC_HARDWARE,
.type = BIT_STRING,
.analytics = NULL,
@@ -348,7 +351,7 @@ static struct {
.json = "cpu_architecture",
.value = "unknown",
},
- [BIB_HW_DISK_SPACE] = {
+ [BIB_HW_RAM_SIZE] = {
.category = BIC_HARDWARE,
.type = BIT_STRING,
.analytics = NULL,
@@ -356,7 +359,7 @@ static struct {
.json = "ram",
.value = "unknown",
},
- [BIB_HW_ARCHITECTURE] = {
+ [BIB_HW_DISK_SPACE] = {
.category = BIC_HARDWARE,
.type = BIT_STRING,
.analytics = NULL,
@@ -484,6 +487,14 @@ static struct {
.json = "streaming",
.value = NULL,
},
+ [BIB_FEATURE_BACKFILLING] = {
+ .category = BIC_FEATURE,
+ .type = BIT_BOOLEAN,
+ .analytics = NULL,
+ .print = "Back-filling (of higher database tiers)",
+ .json = "back-filling",
+ .value = NULL,
+ },
[BIB_FEATURE_REPLICATION] = {
.category = BIC_FEATURE,
.type = BIT_BOOLEAN,
@@ -498,7 +509,7 @@ static struct {
.analytics = "Stream Compression",
.print = "Streaming and Replication Compression",
.json = "stream-compression",
- .value = "none",
+ .value = NULL,
},
[BIB_FEATURE_CONTEXTS] = {
.category = BIC_FEATURE,
@@ -628,6 +639,14 @@ static struct {
.json = "lz4",
.value = NULL,
},
+ [BIB_LIB_ZSTD] = {
+ .category = BIC_LIBS,
+ .type = BIT_BOOLEAN,
+ .analytics = NULL,
+ .print = "ZSTD (fast, lossless compression algorithm)",
+ .json = "zstd",
+ .value = NULL,
+ },
[BIB_LIB_ZLIB] = {
.category = BIC_LIBS,
.type = BIT_BOOLEAN,
@@ -893,6 +912,14 @@ static struct {
.json = "xen-vbd-error",
.value = NULL,
},
+ [BIB_PLUGIN_LOGS_MANAGEMENT] = {
+ .category = BIC_PLUGINS,
+ .type = BIT_BOOLEAN,
+ .analytics = "Logs Management",
+ .print = "Logs Management",
+ .json = "logs-management",
+ .value = NULL,
+ },
[BIB_EXPORT_MONGOC] = {
.category = BIC_EXPORTERS,
.type = BIT_BOOLEAN,
@@ -1029,6 +1056,23 @@ static void build_info_set_value(BUILD_INFO_SLOT slot, const char *value) {
BUILD_INFO[slot].value = value;
}
+static void build_info_append_value(BUILD_INFO_SLOT slot, const char *value) {
+ size_t size = BUILD_INFO[slot].value ? strlen(BUILD_INFO[slot].value) + 1 : 0;
+ size += strlen(value);
+ char buf[size + 1];
+
+ if(BUILD_INFO[slot].value) {
+ strcpy(buf, BUILD_INFO[slot].value);
+ strcat(buf, " ");
+ strcat(buf, value);
+ }
+ else
+ strcpy(buf, value);
+
+ freez((void *)BUILD_INFO[slot].value);
+ BUILD_INFO[slot].value = strdupz(buf);
+}
+
static void build_info_set_value_strdupz(BUILD_INFO_SLOT slot, const char *value) {
if(!value) value = "";
build_info_set_value(slot, strdupz(value));
@@ -1075,14 +1119,21 @@ __attribute__((constructor)) void initialize_build_info(void) {
build_info_set_status(BIB_FEATURE_HEALTH, true);
build_info_set_status(BIB_FEATURE_STREAMING, true);
+ build_info_set_status(BIB_FEATURE_BACKFILLING, true);
build_info_set_status(BIB_FEATURE_REPLICATION, true);
-#ifdef ENABLE_RRDPUSH_COMPRESSION
build_info_set_status(BIB_FEATURE_STREAMING_COMPRESSION, true);
-#ifdef ENABLE_LZ4
- build_info_set_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4");
+
+#ifdef ENABLE_BROTLI
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli");
#endif
+#ifdef ENABLE_ZSTD
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "zstd");
#endif
+#ifdef ENABLE_LZ4
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4");
+#endif
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "gzip");
build_info_set_status(BIB_FEATURE_CONTEXTS, true);
build_info_set_status(BIB_FEATURE_TIERING, true);
@@ -1117,6 +1168,9 @@ __attribute__((constructor)) void initialize_build_info(void) {
#ifdef ENABLE_LZ4
build_info_set_status(BIB_LIB_LZ4, true);
#endif
+#ifdef ENABLE_ZSTD
+ build_info_set_status(BIB_LIB_ZSTD, true);
+#endif
build_info_set_status(BIB_LIB_ZLIB, true);
@@ -1198,6 +1252,9 @@ __attribute__((constructor)) void initialize_build_info(void) {
#ifdef HAVE_XENSTAT_VBD_ERROR
build_info_set_status(BIB_PLUGIN_XEN_VBD_ERROR, true);
#endif
+#ifdef ENABLE_LOGSMANAGEMENT
+ build_info_set_status(BIB_PLUGIN_LOGS_MANAGEMENT, true);
+#endif
build_info_set_status(BIB_EXPORT_PROMETHEUS_EXPORTER, true);
build_info_set_status(BIB_EXPORT_GRAPHITE, true);
@@ -1234,7 +1291,7 @@ __attribute__((constructor)) void initialize_build_info(void) {
// ----------------------------------------------------------------------------
// system info
-int get_system_info(struct rrdhost_system_info *system_info, bool log);
+int get_system_info(struct rrdhost_system_info *system_info);
static void populate_system_info(void) {
static bool populated = false;
static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
@@ -1257,7 +1314,7 @@ static void populate_system_info(void) {
}
else {
system_info = callocz(1, sizeof(struct rrdhost_system_info));
- get_system_info(system_info, false);
+ get_system_info(system_info);
free_system_info = true;
}
diff --git a/daemon/commands.c b/daemon/commands.c
index a8afb5a0..ed544224 100644
--- a/daemon/commands.c
+++ b/daemon/commands.c
@@ -142,10 +142,10 @@ static cmd_status_t cmd_reload_health_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("COMMAND: Reloading HEALTH configuration.");
health_reload();
- error_log_limit_reset();
+ nd_log_limits_reset();
return CMD_STATUS_SUCCESS;
}
@@ -155,11 +155,11 @@ static cmd_status_t cmd_save_database_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("COMMAND: Saving databases.");
rrdhost_save_all();
netdata_log_info("COMMAND: Databases saved.");
- error_log_limit_reset();
+ nd_log_limits_reset();
return CMD_STATUS_SUCCESS;
}
@@ -169,10 +169,9 @@ static cmd_status_t cmd_reopen_logs_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
- netdata_log_info("COMMAND: Reopening all log files.");
- reopen_all_log_files();
- error_log_limit_reset();
+ nd_log_limits_unlimited();
+ nd_log_reopen_log_files();
+ nd_log_limits_reset();
return CMD_STATUS_SUCCESS;
}
@@ -182,7 +181,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("COMMAND: Cleaning up to exit.");
netdata_cleanup_and_exit(0);
exit(0);
diff --git a/daemon/common.h b/daemon/common.h
index 4a390592..b1739879 100644
--- a/daemon/common.h
+++ b/daemon/common.h
@@ -28,6 +28,7 @@
#define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed)
+#define config_section_destroy(section) appconfig_section_destroy_non_loaded(&netdata_config, section)
#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name)
// ----------------------------------------------------------------------------
diff --git a/daemon/daemon.c b/daemon/daemon.c
index c7f0b51c..433fa037 100644
--- a/daemon/daemon.c
+++ b/daemon/daemon.c
@@ -27,24 +27,10 @@ void get_netdata_execution_path(void) {
netdata_exe_file[exepath_size] = '\0';
- strcpy(netdata_exe_path, netdata_exe_file);
- dirname(netdata_exe_path);
-}
-
-static void chown_open_file(int fd, uid_t uid, gid_t gid) {
- if(fd == -1) return;
-
- struct stat buf;
-
- if(fstat(fd, &buf) == -1) {
- netdata_log_error("Cannot fstat() fd %d", fd);
- return;
- }
-
- if((buf.st_uid != uid || buf.st_gid != gid) && S_ISREG(buf.st_mode)) {
- if(fchown(fd, uid, gid) == -1)
- netdata_log_error("Cannot fchown() fd %d.", fd);
- }
+ // macOS's dirname(3) does not modify passed string
+ char *tmpdir = strdupz(netdata_exe_file);
+ strcpy(netdata_exe_path, dirname(tmpdir));
+ freez(tmpdir);
}
static void fix_directory_file_permissions(const char *dirname, uid_t uid, gid_t gid, bool recursive)
@@ -124,9 +110,6 @@ int become_user(const char *username, int pid_fd) {
uid_t uid = pw->pw_uid;
gid_t gid = pw->pw_gid;
- if (am_i_root)
- netdata_log_info("I am root, so checking permissions");
-
prepare_required_directories(uid, gid);
if(pidfile[0]) {
@@ -150,9 +133,9 @@ int become_user(const char *username, int pid_fd) {
}
}
+ nd_log_chown_log_files(uid, gid);
chown_open_file(STDOUT_FILENO, uid, gid);
chown_open_file(STDERR_FILENO, uid, gid);
- chown_open_file(stdaccess_fd, uid, gid);
chown_open_file(pid_fd, uid, gid);
if(supplementary_groups && ngroups > 0) {
@@ -229,7 +212,7 @@ static void oom_score_adj(void) {
// check the environment
char *s = getenv("OOMScoreAdjust");
if(!s || !*s) {
- snprintfz(buf, 30, "%d", (int)wanted_score);
+ snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score);
s = buf;
}
@@ -264,7 +247,7 @@ static void oom_score_adj(void) {
int written = 0;
int fd = open("/proc/self/oom_score_adj", O_WRONLY);
if(fd != -1) {
- snprintfz(buf, 30, "%d", (int)wanted_score);
+ snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score);
ssize_t len = strlen(buf);
if(len > 0 && write(fd, buf, (size_t)len) == len) written = 1;
close(fd);
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index ab910e18..9fb1df5f 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -65,6 +65,11 @@ static struct global_statistics {
uint64_t backfill_queries_made;
uint64_t backfill_db_points_read;
+ uint64_t tier0_hot_gorilla_buffers;
+
+ uint64_t tier0_disk_compressed_bytes;
+ uint64_t tier0_disk_uncompressed_bytes;
+
uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS];
} global_statistics = {
@@ -80,6 +85,10 @@ static struct global_statistics {
.api_data_queries_made = 0,
.api_data_db_points_read = 0,
.api_data_result_points_generated = 0,
+
+ .tier0_hot_gorilla_buffers = 0,
+ .tier0_disk_compressed_bytes = 0,
+ .tier0_disk_uncompressed_bytes = 0,
};
void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) {
@@ -108,6 +117,18 @@ void global_statistics_backfill_query_completed(size_t points_read) {
__atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED);
}
+void global_statistics_gorilla_buffer_add_hot() {
+ __atomic_fetch_add(&global_statistics.tier0_hot_gorilla_buffers, 1, __ATOMIC_RELAXED);
+}
+
+void global_statistics_tier0_disk_compressed_bytes(uint32_t size) {
+ __atomic_fetch_add(&global_statistics.tier0_disk_compressed_bytes, size, __ATOMIC_RELAXED);
+}
+
+void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size) {
+ __atomic_fetch_add(&global_statistics.tier0_disk_uncompressed_bytes, size, __ATOMIC_RELAXED);
+}
+
void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) {
switch(query_source) {
case QUERY_SOURCE_API_DATA:
@@ -210,6 +231,11 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t
gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED);
gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED);
+ gs->tier0_hot_gorilla_buffers = __atomic_load_n(&global_statistics.tier0_hot_gorilla_buffers, __ATOMIC_RELAXED);
+
+ gs->tier0_disk_compressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_compressed_bytes, __ATOMIC_RELAXED);
+ gs->tier0_disk_uncompressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_uncompressed_bytes, __ATOMIC_RELAXED);
+
for(size_t tier = 0; tier < storage_tiers ;tier++)
gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED);
@@ -816,7 +842,7 @@ static void global_statistics_charts(void) {
for(size_t tier = 0; tier < storage_tiers ;tier++) {
char buf[30 + 1];
- snprintfz(buf, 30, "tier%zu", tier);
+ snprintfz(buf, sizeof(buf) - 1, "tier%zu", tier);
rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
}
@@ -828,6 +854,72 @@ static void global_statistics_charts(void) {
}
ml_update_global_statistics_charts(gs.ml_models_consulted);
+
+ // ----------------------------------------------------------------
+
+#ifdef ENABLE_DBENGINE
+ if (tier_page_type[0] == PAGE_GORILLA_METRICS)
+ {
+ static RRDSET *st_tier0_gorilla_pages = NULL;
+ static RRDDIM *rd_num_gorilla_pages = NULL;
+
+ if (unlikely(!st_tier0_gorilla_pages)) {
+ st_tier0_gorilla_pages = rrdset_create_localhost(
+ "netdata"
+ , "tier0_gorilla_pages"
+ , NULL
+ , "tier0_gorilla_pages"
+ , NULL
+ , "Number of gorilla_pages"
+ , "count"
+ , "netdata"
+ , "stats"
+ , 131004
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_num_gorilla_pages = rrddim_add(st_tier0_gorilla_pages, "count", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_tier0_gorilla_pages, rd_num_gorilla_pages, (collected_number)gs.tier0_hot_gorilla_buffers);
+
+ rrdset_done(st_tier0_gorilla_pages);
+ }
+
+ if (tier_page_type[0] == PAGE_GORILLA_METRICS)
+ {
+ static RRDSET *st_tier0_compression_info = NULL;
+
+ static RRDDIM *rd_compressed_bytes = NULL;
+ static RRDDIM *rd_uncompressed_bytes = NULL;
+
+ if (unlikely(!st_tier0_compression_info)) {
+ st_tier0_compression_info = rrdset_create_localhost(
+ "netdata"
+ , "tier0_compression_info"
+ , NULL
+ , "tier0_compression_info"
+ , NULL
+ , "Tier 0 compression info"
+ , "bytes"
+ , "netdata"
+ , "stats"
+ , 131005
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_compressed_bytes = rrddim_add(st_tier0_compression_info, "compressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_uncompressed_bytes = rrddim_add(st_tier0_compression_info, "uncompressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_tier0_compression_info, rd_compressed_bytes, (collected_number)gs.tier0_disk_compressed_bytes);
+ rrddim_set_by_pointer(st_tier0_compression_info, rd_uncompressed_bytes, (collected_number)gs.tier0_disk_uncompressed_bytes);
+
+ rrdset_done(st_tier0_compression_info);
+ }
+#endif
}
// ----------------------------------------------------------------------------
@@ -1881,8 +1973,6 @@ static void dbengine2_statistics_charts(void) {
static RRDDIM *rd_mrg_metrics = NULL;
static RRDDIM *rd_mrg_acquired = NULL;
static RRDDIM *rd_mrg_collected = NULL;
- static RRDDIM *rd_mrg_with_retention = NULL;
- static RRDDIM *rd_mrg_without_retention = NULL;
static RRDDIM *rd_mrg_multiple_writers = NULL;
if (unlikely(!st_mrg_metrics)) {
@@ -1903,8 +1993,6 @@ static void dbengine2_statistics_charts(void) {
rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_mrg_with_retention = rrddim_add(st_mrg_metrics, "with retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_mrg_without_retention = rrddim_add(st_mrg_metrics, "without retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
priority++;
@@ -1912,8 +2000,6 @@ static void dbengine2_statistics_charts(void) {
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries);
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced);
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers);
- rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_with_retention, (collected_number)mrg_stats.entries_with_retention);
- rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_without_retention, (collected_number)mrg_stats.entries - (collected_number)mrg_stats.entries_with_retention);
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts);
rrdset_done(st_mrg_metrics);
@@ -3445,6 +3531,7 @@ static struct worker_utilization all_workers_utilization[] = {
{ .name = "TC", .family = "workers plugin tc", .priority = 1000000 },
{ .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 },
{ .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 },
+ { .name = "LOGSMANAGPLG",.family = "workers plugin logs management", .priority = 1000000 },
{ .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 },
{ .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 },
{ .name = "SERVICE", .family = "workers service", .priority = 1000000 },
diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h
index 7bdb153d..44717c6c 100644
--- a/daemon/global_statistics.h
+++ b/daemon/global_statistics.h
@@ -45,6 +45,11 @@ void global_statistics_sqlite3_query_completed(bool success, bool busy, bool loc
void global_statistics_sqlite3_row_completed(void);
void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array);
+void global_statistics_gorilla_buffer_add_hot();
+
+void global_statistics_tier0_disk_compressed_bytes(uint32_t size);
+void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size);
+
void global_statistics_web_request_completed(uint64_t dt,
uint64_t bytes_received,
uint64_t bytes_sent,
diff --git a/daemon/main.c b/daemon/main.c
index 5d25f88b..3e1fda96 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -4,6 +4,8 @@
#include "buildinfo.h"
#include "static_threads.h"
+#include "database/engine/page_test.h"
+
#if defined(ENV32BIT)
#warning COMPILING 32BIT NETDATA
#endif
@@ -313,7 +315,7 @@ void netdata_cleanup_and_exit(int ret) {
const char *prev_msg = NULL;
bool timeout = false;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
send_statistics("EXIT", ret?"ERROR":"OK","-");
@@ -371,6 +373,10 @@ void netdata_cleanup_and_exit(int ret) {
SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL
, 3 * USEC_PER_SEC);
+ delta_shutdown_time("prepare metasync shutdown");
+
+ metadata_sync_shutdown_prepare();
+
delta_shutdown_time("disable ML detection and training threads");
ml_stop_threads();
@@ -396,10 +402,6 @@ void netdata_cleanup_and_exit(int ret) {
rrdhost_cleanup_all();
- delta_shutdown_time("prepare metasync shutdown");
-
- metadata_sync_shutdown_prepare();
-
delta_shutdown_time("stop aclk threads");
timeout = !service_wait_exit(
@@ -422,6 +424,13 @@ void netdata_cleanup_and_exit(int ret) {
delta_shutdown_time("flush dbengine tiers");
for (size_t tier = 0; tier < storage_tiers; tier++)
rrdeng_prepare_exit(multidb_ctx[tier]);
+
+ for (size_t tier = 0; tier < storage_tiers; tier++) {
+ if (!multidb_ctx[tier])
+ continue;
+ completion_wait_for(&multidb_ctx[tier]->quiesce.completion);
+ completion_destroy(&multidb_ctx[tier]->quiesce.completion);
+ }
}
#endif
@@ -440,17 +449,20 @@ void netdata_cleanup_and_exit(int ret) {
delta_shutdown_time("wait for dbengine collectors to finish");
size_t running = 1;
- while(running) {
+ size_t count = 10;
+ while(running && count) {
running = 0;
for (size_t tier = 0; tier < storage_tiers; tier++)
running += rrdeng_collectors_running(multidb_ctx[tier]);
if(running) {
- error_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
- error_limit(&erl, "waiting for %zu collectors to finish", running);
+ nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
+ nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
+ "waiting for %zu collectors to finish", running);
// sleep_usec(100 * USEC_PER_MS);
cleanup_destroyed_dictionaries();
}
+ count--;
}
delta_shutdown_time("wait for dbengine main cache to finish flushing");
@@ -463,6 +475,8 @@ void netdata_cleanup_and_exit(int ret) {
delta_shutdown_time("stop dbengine tiers");
for (size_t tier = 0; tier < storage_tiers; tier++)
rrdeng_exit(multidb_ctx[tier]);
+
+ rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
}
#endif
}
@@ -614,8 +628,14 @@ int killpid(pid_t pid) {
int ret;
netdata_log_debug(D_EXIT, "Request to kill pid %d", pid);
+ int signal = SIGTERM;
+//#ifdef NETDATA_INTERNAL_CHECKS
+// if(service_running(SERVICE_COLLECTORS))
+// signal = SIGABRT;
+//#endif
+
errno = 0;
- ret = kill(pid, SIGTERM);
+ ret = kill(pid, signal);
if (ret == -1) {
switch(errno) {
case ESRCH:
@@ -662,7 +682,7 @@ static void set_nofile_limit(struct rlimit *rl) {
}
void cancel_main_threads() {
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
int i, found = 0;
usec_t max = 5 * USEC_PER_SEC, step = 100000;
@@ -752,7 +772,7 @@ int help(int exitcode) {
" | '-' '-' '-' '-' real-time performance monitoring, done right! \n"
" +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n"
"\n"
- " Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud>\n"
+ " Copyright (C) 2016-2023, Netdata, Inc. <info@netdata.cloud>\n"
" Released under GNU General Public License v3 or later.\n"
" All rights reserved.\n"
"\n"
@@ -790,6 +810,7 @@ int help(int exitcode) {
" -W unittest Run internal unittests and exit.\n\n"
" -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n"
" -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
+ " -W sqlite-analyze Run update statistics and exit.\n\n"
#ifdef ENABLE_DBENGINE
" -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
" -W stresstest=A,B,C,D,E,F,G\n"
@@ -841,44 +862,52 @@ static void security_init(){
#endif
static void log_init(void) {
+ nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon"));
+
+ time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD;
+ size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS;
+ period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period);
+ logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs);
+ nd_log_set_flood_protection(logs, period);
+
+ const char *netdata_log_level = getenv("NETDATA_LOG_LEVEL");
+ netdata_log_level = netdata_log_level ? nd_log_id2priority(nd_log_priority2id(netdata_log_level)) : NDLP_INFO_STR;
+
+ nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", netdata_log_level));
+
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir);
- stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename);
+ nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", filename));
- snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir);
- stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename);
+ bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */;
+ if(with_journal)
+ snprintfz(filename, FILENAME_MAX, "journal");
+ else
+ snprintfz(filename, FILENAME_MAX, "%s/daemon.log", netdata_configured_log_dir);
+ nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", filename));
- snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
- stdcollector_filename = config_get(CONFIG_SECTION_LOGS, "collector", filename);
+ if(with_journal)
+ snprintfz(filename, FILENAME_MAX, "journal");
+ else
+ snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
+ nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", filename));
snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir);
- stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename);
+ nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", filename));
- snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
- stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename);
+ if(with_journal)
+ snprintfz(filename, FILENAME_MAX, "journal");
+ else
+ snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
+ nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", filename));
#ifdef ENABLE_ACLK
aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO);
if (aclklog_enabled) {
snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir);
- aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename);
+ nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename));
}
#endif
-
- char deffacility[8];
- snprintfz(deffacility,7,"%s","daemon");
- facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility);
-
- error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period);
- error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period);
- error_log_errors_per_period_backup = error_log_errors_per_period;
-
- setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1);
- setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1);
-
- char *selected_level = config_get(CONFIG_SECTION_LOGS, "severity level", NETDATA_LOG_LEVEL_INFO_STR);
- global_log_severity_level = log_severity_string_to_severity_level(selected_level);
- setenv("NETDATA_LOG_SEVERITY_LEVEL", selected_level , 1);
}
char *initialize_lock_directory_path(char *prefix)
@@ -1050,6 +1079,36 @@ static void backwards_compatible_config() {
config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics",
CONFIG_SECTION_DB, "enable zero metrics");
+ config_move(CONFIG_SECTION_LOGS, "error",
+ CONFIG_SECTION_LOGS, "daemon");
+
+ config_move(CONFIG_SECTION_LOGS, "severity level",
+ CONFIG_SECTION_LOGS, "level");
+
+ config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection",
+ CONFIG_SECTION_LOGS, "logs to trigger flood protection");
+
+ config_move(CONFIG_SECTION_LOGS, "errors flood protection period",
+ CONFIG_SECTION_LOGS, "logs flood protection period");
+ config_move(CONFIG_SECTION_HEALTH, "is ephemeral",
+ CONFIG_SECTION_GLOBAL, "is ephemeral node");
+
+ config_move(CONFIG_SECTION_HEALTH, "has unstable connection",
+ CONFIG_SECTION_GLOBAL, "has unstable connection");
+}
+
+static int get_hostname(char *buf, size_t buf_size) {
+ if (netdata_configured_host_prefix && *netdata_configured_host_prefix) {
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix);
+
+ if (!read_file(filename, buf, buf_size)) {
+ trim(buf);
+ return 0;
+ }
+ }
+
+ return gethostname(buf, buf_size);
}
static void get_netdata_configured_variables() {
@@ -1058,10 +1117,12 @@ static void get_netdata_configured_variables() {
// ------------------------------------------------------------------------
// get the hostname
+ netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
+ verify_netdata_host_prefix(true);
+
char buf[HOSTNAME_MAX + 1];
- if(gethostname(buf, HOSTNAME_MAX) == -1){
+ if (get_hostname(buf, HOSTNAME_MAX))
netdata_log_error("Cannot get machine hostname.");
- }
netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf);
netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname);
@@ -1112,8 +1173,6 @@ static void get_netdata_configured_variables() {
netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir);
netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir);
netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir);
- char *env_home=getenv("HOME");
- netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir);
netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir);
@@ -1124,6 +1183,16 @@ static void get_netdata_configured_variables() {
#ifdef ENABLE_DBENGINE
// ------------------------------------------------------------------------
+ // get default Database Engine page type
+
+ const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw");
+ if (strcmp(page_type, "gorilla") == 0) {
+ tier_page_type[0] = PAGE_GORILLA_METRICS;
+ } else if (strcmp(page_type, "raw") != 0) {
+ netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type);
+ }
+
+ // ------------------------------------------------------------------------
// get default Database Engine page cache size in MiB
default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
@@ -1161,10 +1230,6 @@ static void get_netdata_configured_variables() {
default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
}
#endif
- // ------------------------------------------------------------------------
-
- netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
- verify_netdata_host_prefix();
// --------------------------------------------------------------------
// get KSM settings
@@ -1184,6 +1249,7 @@ static void get_netdata_configured_variables() {
// --------------------------------------------------------------------
rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
+ rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s);
// Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
// cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
// https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
@@ -1262,7 +1328,7 @@ static inline void coverity_remove_taint(char *s)
(void)s;
}
-int get_system_info(struct rrdhost_system_info *system_info, bool log) {
+int get_system_info(struct rrdhost_system_info *system_info) {
char *script;
script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2));
sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh");
@@ -1294,11 +1360,7 @@ int get_system_info(struct rrdhost_system_info *system_info, bool log) {
if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) {
netdata_log_error("Unexpected environment variable %s=%s", line, value);
- }
- else {
- if(log)
- netdata_log_info("%s=%s", line, value);
-
+ } else {
setenv(line, value, 1);
}
}
@@ -1337,6 +1399,8 @@ int julytest(void);
int pluginsd_parser_unittest(void);
void replication_initialize(void);
void bearer_tokens_init(void);
+int unittest_rrdpush_compressions(void);
+int uuid_unittest(void);
int main(int argc, char **argv) {
// initialize the system clocks
@@ -1346,8 +1410,6 @@ int main(int argc, char **argv) {
usec_t started_ut = now_monotonic_usec();
usec_t last_ut = started_ut;
const char *prev_msg = NULL;
- // Initialize stderror avoiding coredump when netdata_log_info() or netdata_log_error() is called
- stderror = stderr;
int i;
int config_loaded = 0;
@@ -1439,6 +1501,10 @@ int main(int argc, char **argv) {
#ifdef ENABLE_DBENGINE
char* createdataset_string = "createdataset=";
char* stresstest_string = "stresstest=";
+
+ if(strcmp(optarg, "pgd-tests") == 0) {
+ return pgd_test(argc, argv);
+ }
#endif
if(strcmp(optarg, "sqlite-meta-recover") == 0) {
@@ -1451,6 +1517,11 @@ int main(int argc, char **argv) {
return 0;
}
+ if(strcmp(optarg, "sqlite-analyze") == 0) {
+ sql_init_database(DB_CHECK_ANALYZE, 0);
+ return 0;
+ }
+
if(strcmp(optarg, "unittest") == 0) {
unittest_running = true;
@@ -1495,6 +1566,8 @@ int main(int argc, char **argv) {
return 1;
if (ctx_unittest())
return 1;
+ if (uuid_unittest())
+ return 1;
fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
return 0;
}
@@ -1521,6 +1594,10 @@ int main(int argc, char **argv) {
unittest_running = true;
return buffer_unittest();
}
+ else if(strcmp(optarg, "uuidtest") == 0) {
+ unittest_running = true;
+ return uuid_unittest();
+ }
#ifdef ENABLE_DBENGINE
else if(strcmp(optarg, "mctest") == 0) {
unittest_running = true;
@@ -1550,6 +1627,10 @@ int main(int argc, char **argv) {
unittest_running = true;
return pluginsd_parser_unittest();
}
+ else if(strcmp(optarg, "rrdpush_compressions_test") == 0) {
+ unittest_running = true;
+ return unittest_rrdpush_compressions();
+ }
else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
optarg += strlen(createdataset_string);
unsigned history_seconds = strtoul(optarg, NULL, 0);
@@ -1851,7 +1932,7 @@ int main(int argc, char **argv) {
{
char buf[20 + 1];
- snprintfz(buf, 20, "%d", libuv_worker_threads);
+ snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads);
setenv("UV_THREADPOOL_SIZE", buf, 1);
}
@@ -1894,13 +1975,15 @@ int main(int argc, char **argv) {
// get log filenames and settings
log_init();
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
// initialize the log files
- open_all_log_files();
+ nd_log_initialize();
netdata_log_info("Netdata agent version \""VERSION"\" is starting");
ieee754_doubles = is_system_ieee754_double();
+ if(!ieee754_doubles)
+ globally_disabled_capabilities |= STREAM_CAP_IEEE754;
aral_judy_init();
@@ -1925,11 +2008,11 @@ int main(int argc, char **argv) {
set_silencers_filename();
health_initialize_global_silencers();
- // --------------------------------------------------------------------
- // Initialize ML configuration
-
- delta_startup_time("initialize ML");
- ml_init();
+// // --------------------------------------------------------------------
+// // Initialize ML configuration
+//
+// delta_startup_time("initialize ML");
+// ml_init();
// --------------------------------------------------------------------
// setup process signals
@@ -1949,6 +2032,15 @@ int main(int argc, char **argv) {
// setup threads configs
default_stacksize = netdata_threads_init();
+#ifdef NETDATA_INTERNAL_CHECKS
+ config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true);
+ config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", true);
+#endif
+
+ if(config_get_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", false))
+ // this has to run before starting any other threads that use workers
+ workers_utilization_enable();
+
for (i = 0; static_threads[i].name != NULL ; i++) {
struct netdata_static_thread *st = &static_threads[i];
@@ -1973,8 +2065,18 @@ int main(int argc, char **argv) {
web_client_api_v1_init();
web_server_threading_selection();
- if(web_server_mode != WEB_SERVER_MODE_NONE)
- api_listen_sockets_setup();
+ if(web_server_mode != WEB_SERVER_MODE_NONE) {
+ if (!api_listen_sockets_setup()) {
+ netdata_log_error("Cannot setup listen port(s). Is Netdata already running?");
+ exit(1);
+ }
+ }
+
+ // --------------------------------------------------------------------
+ // Initialize ML configuration
+
+ delta_startup_time("initialize ML");
+ ml_init();
#ifdef ENABLE_H2O
delta_startup_time("initialize h2o server");
@@ -2006,6 +2108,16 @@ int main(int argc, char **argv) {
if(become_daemon(dont_fork, user) == -1)
fatal("Cannot daemonize myself.");
+ // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME".
+ struct passwd *pw = getpwuid(getuid());
+ if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) {
+ netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir);
+ } else {
+ netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir);
+ }
+
+ setenv("HOME", netdata_configured_home_dir, 1);
+
dyn_conf_init();
netdata_log_info("netdata started on pid %d.", getpid());
@@ -2039,7 +2151,7 @@ int main(int argc, char **argv) {
netdata_anonymous_statistics_enabled=-1;
struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
__atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
- get_system_info(system_info, true);
+ get_system_info(system_info);
(void) registry_get_this_machine_guid();
system_info->hops = 0;
get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
@@ -2076,7 +2188,7 @@ int main(int argc, char **argv) {
// ------------------------------------------------------------------------
// enable log flood protection
- error_log_limit_reset();
+ nd_log_limits_reset();
// Load host labels
delta_startup_time("collect host labels");
diff --git a/daemon/service.c b/daemon/service.c
index f7fe86e0..8a65de66 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -76,33 +76,48 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
rrddim_free(st, rd);
}
-static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+static inline bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+ if(!all_dimensions && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
+ return true;
+
worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS);
+ rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
+
RRDDIM *rd;
time_t now = now_realtime_sec();
- bool done_all_dimensions = true;
+ size_t dim_candidates = 0;
+ size_t dim_archives = 0;
dfe_start_write(st->rrddim_root_index, rd) {
- if(unlikely(
- all_dimensions ||
- (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now))
- )) {
-
- if(dictionary_acquired_item_references(rd_dfe.item) == 1) {
- netdata_log_info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st));
- svc_rrddim_obsolete_to_archive(rd);
+ bool candidate = (all_dimensions || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE));
+
+ if(candidate) {
+ dim_candidates++;
+
+ if(rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now) {
+ size_t references = dictionary_acquired_item_references(rd_dfe.item);
+ if(references == 1) {
+// netdata_log_info("Removing obsolete dimension 'host:%s/chart:%s/dim:%s'",
+// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
+ svc_rrddim_obsolete_to_archive(rd);
+ dim_archives++;
+ }
+// else
+// netdata_log_info("Cannot remove obsolete dimension 'host:%s/chart:%s/dim:%s'",
+// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
}
- else
- done_all_dimensions = false;
}
- else
- done_all_dimensions = false;
}
dfe_done(rd);
- return done_all_dimensions;
+ if(dim_archives != dim_candidates) {
+ rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
+ return false;
+ }
+
+ return true;
}
static void svc_rrdset_obsolete_to_free(RRDSET *st) {
@@ -132,50 +147,78 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) {
rrdset_free(st);
}
-static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
+static inline void svc_rrdhost_cleanup_charts_marked_obsolete(RRDHOST *host) {
+ if(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS))
+ return;
+
worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS);
+ rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
+
+ size_t full_candidates = 0;
+ size_t full_archives = 0;
+ size_t partial_candidates = 0;
+ size_t partial_archives = 0;
+
time_t now = now_realtime_sec();
RRDSET *st;
rrdset_foreach_reentrant(st, host) {
if(rrdset_is_replicating(st))
continue;
- if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
- && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
- && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
- && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
- )) {
- svc_rrdset_obsolete_to_free(st);
- }
- else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) {
- rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
- svc_rrdset_archive_obsolete_dimensions(st, false);
+ RRDSET_FLAGS flags = rrdset_flag_get(st);
+ bool obsolete_chart = flags & RRDSET_FLAG_OBSOLETE;
+ bool obsolete_dims = flags & RRDSET_FLAG_OBSOLETE_DIMENSIONS;
+
+ if(obsolete_dims) {
+ partial_candidates++;
+
+ if(svc_rrdset_archive_obsolete_dimensions(st, false))
+ partial_archives++;
}
- else if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) {
- rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS);
+
+ if(obsolete_chart) {
+ full_candidates++;
+
+ if(unlikely( st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
+ && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
+ && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
+ )) {
+ svc_rrdset_obsolete_to_free(st);
+ full_archives++;
+ }
}
}
rrdset_foreach_done(st);
+
+ if(partial_archives != partial_candidates)
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
+
+ if(full_archives != full_candidates)
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS);
}
-static void svc_rrdset_check_obsoletion(RRDHOST *host) {
+static void svc_rrdhost_detect_obsolete_charts(RRDHOST *host) {
worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK);
time_t now = now_realtime_sec();
time_t last_entry_t;
RRDSET *st;
+
+ time_t child_connect_time = host->child_connect_time;
+
rrdset_foreach_read(st, host) {
if(rrdset_is_replicating(st))
continue;
last_entry_t = rrdset_last_entry_s(st);
- if(last_entry_t && last_entry_t < host->child_connect_time &&
- host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every
- < now)
+ if (last_entry_t && last_entry_t < child_connect_time &&
+ child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT +
+ (ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every) <
+ now)
- rrdset_is_obsolete(st);
+ rrdset_is_obsolete___safe_from_collector_thread(st);
}
rrdset_foreach_done(st);
}
@@ -190,24 +233,24 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() {
if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host))
continue;
- if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) {
- rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
- svc_rrdhost_cleanup_obsolete_charts(host);
- }
+ svc_rrdhost_cleanup_charts_marked_obsolete(host);
- if(host != localhost
- && host->trigger_chart_obsoletion_check
- && (
- (
- host->child_last_chart_command
- && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec()
- )
- || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec())
- )
- ) {
- svc_rrdset_check_obsoletion(host);
+ if (host == localhost)
+ continue;
+
+ netdata_mutex_lock(&host->receiver_lock);
+
+ time_t now = now_realtime_sec();
+
+ if (host->trigger_chart_obsoletion_check &&
+ ((host->child_last_chart_command &&
+ host->child_last_chart_command + host->health.health_delay_up_to < now) ||
+ (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now))) {
+ svc_rrdhost_detect_obsolete_charts(host);
host->trigger_chart_obsoletion_check = 0;
}
+
+ netdata_mutex_unlock(&host->receiver_lock);
}
rrd_unlock();
@@ -226,22 +269,45 @@ restart_after_removal:
if(!rrdhost_should_be_removed(host, protected_host, now))
continue;
- netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
+ bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED);
+ if (!is_archived) {
+ netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
- if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
- /* don't delete multi-host DB host files */
- && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
- ) {
- worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
- rrdhost_delete_charts(host);
+ if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
+ /* don't delete multi-host DB host files */
+ && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
+ ) {
+ worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
+ rrdhost_delete_charts(host);
+ }
+ else {
+ worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS);
+ rrdhost_save_charts(host);
+ }
}
- else {
- worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS);
- rrdhost_save_charts(host);
+
+ bool force = false;
+
+ if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s)
+ force = true;
+
+ if (!force && is_archived)
+ continue;
+
+ if (force) {
+ netdata_log_info("Host '%s' with machine guid '%s' is archived, ephemeral clean up.", rrdhost_hostname(host), host->machine_guid);
}
worker_is_busy(WORKER_JOB_FREE_HOST);
- rrdhost_free___while_having_rrd_wrlock(host, false);
+#ifdef ENABLE_ACLK
+ // in case we have cloud connection we inform cloud
+ // a child disconnected
+ if (netdata_cloud_enabled && force) {
+ aclk_host_state_update(host, 0, 0);
+ unregister_node(host->machine_guid);
+ }
+#endif
+ rrdhost_free___while_having_rrd_wrlock(host, force);
goto restart_after_removal;
}
diff --git a/daemon/signals.c b/daemon/signals.c
index ae28874c..4f225433 100644
--- a/daemon/signals.c
+++ b/daemon/signals.c
@@ -42,7 +42,7 @@ static void signal_handler(int signo) {
if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) {
char buffer[200 + 1];
- snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
+ snprintfz(buffer, sizeof(buffer) - 1, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) {
// nothing to do - we cannot write but there is no way to complain about it
;
@@ -203,28 +203,28 @@ void signals_handle(void) {
switch (signals_waiting[i].action) {
case NETDATA_SIGNAL_RELOAD_HEALTH:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Reloading HEALTH configuration...", name);
- error_log_limit_reset();
+ nd_log_limits_reset();
execute_command(CMD_RELOAD_HEALTH, NULL, NULL);
break;
case NETDATA_SIGNAL_SAVE_DATABASE:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Saving databases...", name);
- error_log_limit_reset();
+ nd_log_limits_reset();
execute_command(CMD_SAVE_DATABASE, NULL, NULL);
break;
case NETDATA_SIGNAL_REOPEN_LOGS:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name);
- error_log_limit_reset();
+ nd_log_limits_reset();
execute_command(CMD_REOPEN_LOGS, NULL, NULL);
break;
case NETDATA_SIGNAL_EXIT_CLEANLY:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name);
commands_exit();
netdata_cleanup_and_exit(0);
diff --git a/daemon/static_threads.c b/daemon/static_threads.c
index 830b854e..b70373d7 100644
--- a/daemon/static_threads.c
+++ b/daemon/static_threads.c
@@ -61,7 +61,7 @@ const struct netdata_static_thread static_threads_common[] = {
.config_name = "netdata monitoring",
.env_name = "NETDATA_INTERNALS_MONITORING",
.global_variable = &global_statistics_enabled,
- .enabled = 1,
+ .enabled = 0,
.thread = NULL,
.init_routine = NULL,
.start_routine = global_statistics_main
@@ -69,10 +69,10 @@ const struct netdata_static_thread static_threads_common[] = {
{
.name = "STATS_WORKERS",
.config_section = CONFIG_SECTION_PLUGINS,
- .config_name = "netdata monitoring",
+ .config_name = "netdata monitoring extended",
.env_name = "NETDATA_INTERNALS_MONITORING",
.global_variable = &global_statistics_enabled,
- .enabled = 1,
+ .enabled = 0, // this is ignored - check main() for "netdata monitoring extended"
.thread = NULL,
.init_routine = NULL,
.start_routine = global_statistics_workers_main
@@ -80,10 +80,10 @@ const struct netdata_static_thread static_threads_common[] = {
{
.name = "STATS_SQLITE3",
.config_section = CONFIG_SECTION_PLUGINS,
- .config_name = "netdata monitoring",
+ .config_name = "netdata monitoring extended",
.env_name = "NETDATA_INTERNALS_MONITORING",
.global_variable = &global_statistics_enabled,
- .enabled = 1,
+ .enabled = 0, // this is ignored - check main() for "netdata monitoring extended"
.thread = NULL,
.init_routine = NULL,
.start_routine = global_statistics_sqlite3_main
diff --git a/daemon/system-info.sh b/daemon/system-info.sh
index 2a3ba7d7..aaca7fd4 100755
--- a/daemon/system-info.sh
+++ b/daemon/system-info.sh
@@ -57,7 +57,7 @@ if [ -z "${VIRTUALIZATION}" ]; then
VIRTUALIZATION="unknown"
VIRT_DETECTION="none"
elif [ "$VIRTUALIZATION" != "none" ] && [ "$VIRTUALIZATION" != "unknown" ]; then
- VIRTUALIZATION=$(virtualization_normalize_name $VIRTUALIZATION)
+ VIRTUALIZATION=$(virtualization_normalize_name "$VIRTUALIZATION")
fi
else
# Passed from outside - probably in docker run
@@ -101,6 +101,10 @@ if [ "${CONTAINER}" = "unknown" ]; then
CONT_DETECTION="kubernetes"
fi
+ if [ "${KERNEL_NAME}" = FreeBSD ] && command -v sysctl && sysctl security.jail.jailed 2>/dev/null | grep -q "1$"; then
+ CONTAINER="jail"
+ CONT_DETECTION="sysctl"
+ fi
fi
# -------------------------------------------------------------------------------------------------
diff --git a/daemon/unit_test.c b/daemon/unit_test.c
index b8d22931..149bbec0 100644
--- a/daemon/unit_test.c
+++ b/daemon/unit_test.c
@@ -97,7 +97,7 @@ static int check_number_printing(void) {
int i, failed = 0;
for(i = 0; values[i].correct ; i++) {
print_netdata_double(netdata, values[i].n);
- snprintfz(system, 512, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n);
+ snprintfz(system, sizeof(system) - 1, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n);
int ok = 1;
if(strcmp(netdata, values[i].correct) != 0) {
@@ -319,7 +319,7 @@ void benchmark_storage_number(int loop, int multiplier) {
for(i = 0; i < loop ;i++) {
n *= multiplier;
if(n > storage_number_positive_max) n = storage_number_positive_min;
- snprintfz(buffer, 100, NETDATA_DOUBLE_FORMAT, n);
+ snprintfz(buffer, sizeof(buffer) - 1, NETDATA_DOUBLE_FORMAT, n);
}
}
@@ -507,7 +507,7 @@ int unit_test_buffer() {
const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s";
buffer_sprintf(wb, fmt, string, string, string, string);
- snprintfz(final, 9000, fmt, string, string, string, string);
+ snprintfz(final, sizeof(final) - 1, fmt, string, string, string, string);
const char *s = buffer_tostring(wb);
@@ -1272,7 +1272,7 @@ int run_test(struct test *test)
default_rrd_update_every = test->update_every;
char name[101];
- snprintfz(name, 100, "unittest-%s", test->name);
+ snprintfz(name, sizeof(name) - 1, "unittest-%s", test->name);
// create the chart
RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1
@@ -1534,7 +1534,7 @@ int unit_test(long delay, long shift)
repeat++;
char name[101];
- snprintfz(name, 100, "unittest-%d-%ld-%ld", repeat, delay, shift);
+ snprintfz(name, sizeof(name) - 1, "unittest-%d-%ld-%ld", repeat, delay, shift);
//debug_flags = 0xffffffff;
default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC;
@@ -1681,13 +1681,6 @@ int test_sqlite(void) {
rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL);
if (rc != SQLITE_OK)
goto error;
- buffer_flush(sql);
-
- buffer_sprintf(sql, INDEX_ACLK_ALERT, uuid_str, uuid_str);
- rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL);
- if (rc != SQLITE_OK)
- goto error;
- buffer_flush(sql);
buffer_free(sql);
fprintf(stderr,"SQLite is OK\n");
@@ -1831,30 +1824,29 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name)
/* We don't want to drop metrics when generating load, we prefer to block data generation itself */
return rrdhost_find_or_create(
- name
- , name
- , name
- , os_type
- , netdata_configured_timezone
- , netdata_configured_abbrev_timezone
- , netdata_configured_utc_offset
- , ""
- , program_name
- , program_version
- , default_rrd_update_every
- , default_rrd_history_entries
- , RRD_MEMORY_MODE_DBENGINE
- , default_health_enabled
- , default_rrdpush_enabled
- , default_rrdpush_destination
- , default_rrdpush_api_key
- , default_rrdpush_send_charts_matching
- , default_rrdpush_enable_replication
- , default_rrdpush_seconds_to_replicate
- , default_rrdpush_replication_step
- , NULL
- , 0
- );
+ name,
+ name,
+ name,
+ os_type,
+ netdata_configured_timezone,
+ netdata_configured_abbrev_timezone,
+ netdata_configured_utc_offset,
+ "",
+ program_name,
+ program_version,
+ default_rrd_update_every,
+ default_rrd_history_entries,
+ RRD_MEMORY_MODE_DBENGINE,
+ default_health_enabled,
+ default_rrdpush_enabled,
+ default_rrdpush_destination,
+ default_rrdpush_api_key,
+ default_rrdpush_send_charts_matching,
+ default_rrdpush_enable_replication,
+ default_rrdpush_seconds_to_replicate,
+ default_rrdpush_replication_step,
+ NULL,
+ 0);
}
// constants for test_dbengine
@@ -1878,7 +1870,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI
char name[101];
for (i = 0 ; i < CHARTS ; ++i) {
- snprintfz(name, 100, "dbengine-chart-%d", i);
+ snprintfz(name, sizeof(name) - 1, "dbengine-chart-%d", i);
// create the chart
st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest",
@@ -1886,7 +1878,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI
rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG);
rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST);
for (j = 0 ; j < DIMS ; ++j) {
- snprintfz(name, 100, "dim-%d", j);
+ snprintfz(name, sizeof(name) - 1, "dim-%d", j);
rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
@@ -2108,6 +2100,14 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
return errors + value_errors + time_errors;
}
+void test_dbengine_charts_and_dims_are_not_collected(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]) {
+ for(int c = 0; c < CHARTS ; c++) {
+ st[c]->rrdcontexts.collected = false;
+ for(int d = 0; d < DIMS ; d++)
+ rd[c][d]->rrdcontexts.collected = false;
+ }
+}
+
int test_dbengine(void)
{
fprintf(stderr, "%s() running...\n", __FUNCTION__ );
@@ -2117,7 +2117,7 @@ int test_dbengine(void)
RRDDIM *rd[CHARTS][DIMS];
time_t time_start[REGIONS], time_end[REGIONS];
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
fprintf(stderr, "\nRunning DB-engine test\n");
default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
@@ -2135,6 +2135,7 @@ int test_dbengine(void)
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
+ test_dbengine_charts_and_dims_are_not_collected(st, rd);
current_region = 1; //this is the second region of data
update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds
@@ -2152,6 +2153,7 @@ int test_dbengine(void)
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
+ test_dbengine_charts_and_dims_are_not_collected(st, rd);
current_region = 2; //this is the third region of data
update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds
@@ -2169,6 +2171,7 @@ int test_dbengine(void)
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
+ test_dbengine_charts_and_dims_are_not_collected(st, rd);
for (current_region = 0 ; current_region < REGIONS ; ++current_region) {
errors += test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]);
@@ -2237,6 +2240,7 @@ int test_dbengine(void)
rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance);
rrdhost_delete_charts(host);
rrdeng_exit((struct rrdengine_instance *)host->db[0].instance);
+ rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
rrd_unlock();
return errors + value_errors + time_errors;
@@ -2343,7 +2347,7 @@ void generate_dbengine_dataset(unsigned history_seconds)
(1024 * 1024);
default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'");
host = dbengine_rrdhost_find_or_create("dbengine-dataset");
@@ -2368,12 +2372,12 @@ void generate_dbengine_dataset(unsigned history_seconds)
thread_info[i]->time_max = 0;
thread_info[i]->done = 0;
completion_init(&thread_info[i]->charts_initialized);
- assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i]));
+ fatal_assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i]));
completion_wait_for(&thread_info[i]->charts_initialized);
completion_destroy(&thread_info[i]->charts_initialized);
}
for (i = 0 ; i < DSET_CHARTS ; ++i) {
- assert(0 == uv_thread_join(&thread_info[i]->thread));
+ fatal_assert(0 == uv_thread_join(&thread_info[i]->thread));
}
for (i = 0 ; i < DSET_CHARTS ; ++i) {
@@ -2518,7 +2522,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
unsigned i, j;
time_t time_start, test_duration;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
if (!TEST_DURATION_SEC)
TEST_DURATION_SEC = 10;
@@ -2575,7 +2579,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
chart_threads[i]->done = 0;
chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0;
completion_init(&chart_threads[i]->charts_initialized);
- assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i]));
+ fatal_assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i]));
}
/* barrier so that subsequent queries can access valid chart data */
for (i = 0 ; i < DSET_CHARTS ; ++i) {
@@ -2597,7 +2601,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
query_threads[i]->chart_threads[j] = chart_threads[j];
}
query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0;
- assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i]));
+ fatal_assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i]));
}
sleep(TEST_DURATION_SEC);
/* stop workload */
@@ -2646,6 +2650,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance);
rrdhost_delete_charts(host);
rrdeng_exit((struct rrdengine_instance *)host->db[0].instance);
+ rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL);
rrd_unlock();
}