diff options
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/analytics.c | 117 | ||||
-rw-r--r-- | daemon/analytics.h | 1 | ||||
-rw-r--r-- | daemon/buildinfo.c | 75 | ||||
-rw-r--r-- | daemon/commands.c | 17 | ||||
-rw-r--r-- | daemon/common.h | 1 | ||||
-rw-r--r-- | daemon/daemon.c | 31 | ||||
-rw-r--r-- | daemon/global_statistics.c | 101 | ||||
-rw-r--r-- | daemon/global_statistics.h | 5 | ||||
-rw-r--r-- | daemon/main.c | 238 | ||||
-rw-r--r-- | daemon/service.c | 184 | ||||
-rw-r--r-- | daemon/signals.c | 16 | ||||
-rw-r--r-- | daemon/static_threads.c | 10 | ||||
-rwxr-xr-x | daemon/system-info.sh | 6 | ||||
-rw-r--r-- | daemon/unit_test.c | 95 |
14 files changed, 620 insertions, 277 deletions
diff --git a/daemon/analytics.c b/daemon/analytics.c index c149e2583..353ebd136 100644 --- a/daemon/analytics.c +++ b/daemon/analytics.c @@ -117,12 +117,14 @@ void analytics_free_data(void) */ void analytics_set_data(char **name, char *value) { + spinlock_lock(&analytics_data.spinlock); if (*name) { analytics_data.data_length -= strlen(*name); freez(*name); } *name = strdupz(value); analytics_data.data_length += strlen(*name); + spinlock_unlock(&analytics_data.spinlock); } /* @@ -131,6 +133,7 @@ void analytics_set_data(char **name, char *value) void analytics_set_data_str(char **name, const char *value) { size_t value_string_len; + spinlock_lock(&analytics_data.spinlock); if (*name) { analytics_data.data_length -= strlen(*name); freez(*name); @@ -139,6 +142,7 @@ void analytics_set_data_str(char **name, const char *value) *name = mallocz(sizeof(char) * value_string_len); snprintfz(*name, value_string_len - 1, "\"%s\"", value); analytics_data.data_length += strlen(*name); + spinlock_unlock(&analytics_data.spinlock); } /* @@ -149,7 +153,7 @@ void analytics_log_prometheus(void) if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.prometheus_hits < ANALYTICS_MAX_PROMETHEUS_HITS)) { analytics_data.prometheus_hits++; char b[21]; - snprintfz(b, 20, "%zu", analytics_data.prometheus_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits); analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); } } @@ -162,7 +166,7 @@ void analytics_log_shell(void) if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.shell_hits < ANALYTICS_MAX_SHELL_HITS)) { analytics_data.shell_hits++; char b[21]; - snprintfz(b, 20, "%zu", analytics_data.shell_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits); analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); } } @@ -175,7 +179,7 @@ void analytics_log_json(void) if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.json_hits < ANALYTICS_MAX_JSON_HITS)) { analytics_data.json_hits++; char b[21]; - snprintfz(b, 20, "%zu", analytics_data.json_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits); analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); } } @@ -188,7 +192,7 @@ void analytics_log_dashboard(void) if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.dashboard_hits < ANALYTICS_MAX_DASHBOARD_HITS)) { analytics_data.dashboard_hits++; char b[21]; - snprintfz(b, 20, "%zu", analytics_data.dashboard_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits); analytics_set_data(&analytics_data.netdata_dashboard_used, b); } } @@ -198,7 +202,7 @@ void analytics_log_dashboard(void) */ void analytics_report_oom_score(long long int score){ char b[21]; - snprintfz(b, 20, "%lld", score); + snprintfz(b, sizeof(b) - 1, "%lld", score); analytics_set_data(&analytics_data.netdata_config_oom_score, b); } @@ -222,11 +226,11 @@ void analytics_mirrored_hosts(void) } rrd_unlock(); - snprintfz(b, 20, "%zu", count); + snprintfz(b, sizeof(b) - 1, "%zu", count); analytics_set_data(&analytics_data.netdata_mirrored_host_count, b); - snprintfz(b, 20, "%zu", reachable); + snprintfz(b, sizeof(b) - 1, "%zu", reachable); analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, b); - snprintfz(b, 20, "%zu", unreachable); + snprintfz(b, sizeof(b) - 1, "%zu", unreachable); analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, b); } @@ -281,7 +285,7 @@ void analytics_collectors(void) .plugin = rrdset_plugin_name(st), .module = rrdset_module_name(st) }; - snprintfz(name, 499, "%s:%s", col.plugin, col.module); + snprintfz(name, sizeof(name) - 1, "%s:%s", col.plugin, col.module); dictionary_set(dict, name, &col, sizeof(struct collector)); } rrdset_foreach_done(st); @@ -297,7 +301,7 @@ void analytics_collectors(void) { char b[21]; - snprintfz(b, 20, "%d", ap.c); + snprintfz(b, sizeof(b) - 1, "%d", ap.c); analytics_set_data(&analytics_data.netdata_collectors_count, b); } @@ -402,7 +406,7 @@ void analytics_charts(void) analytics_data.charts_count = c; { char b[21]; - snprintfz(b, 20, "%zu", c); + snprintfz(b, sizeof(b) - 1, "%zu", c); analytics_set_data(&analytics_data.netdata_charts_count, b); } } @@ -427,7 +431,7 @@ void analytics_metrics(void) analytics_data.metrics_count = dimensions; { char b[21]; - snprintfz(b, 20, "%zu", dimensions); + snprintfz(b, sizeof(b) - 1, "%zu", dimensions); analytics_set_data(&analytics_data.netdata_metrics_count, b); } } @@ -454,11 +458,11 @@ void analytics_alarms(void) } foreach_rrdcalc_in_rrdhost_done(rc); - snprintfz(b, 20, "%zu", alarm_normal); + snprintfz(b, sizeof(b) - 1, "%zu", alarm_normal); analytics_set_data(&analytics_data.netdata_alarms_normal, b); - snprintfz(b, 20, "%zu", alarm_warn); + snprintfz(b, sizeof(b) - 1, "%zu", alarm_warn); analytics_set_data(&analytics_data.netdata_alarms_warning, b); - snprintfz(b, 20, "%zu", alarm_crit); + snprintfz(b, sizeof(b) - 1, "%zu", alarm_crit); analytics_set_data(&analytics_data.netdata_alarms_critical, b); } @@ -467,6 +471,8 @@ void analytics_alarms(void) */ void analytics_misc(void) { + analytics_data.spinlock.locked = false; + #ifdef ENABLE_ACLK analytics_set_data(&analytics_data.netdata_host_cloud_available, "true"); analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, "Next Generation"); @@ -540,19 +546,19 @@ void analytics_gather_mutable_meta_data(void) { char b[21]; - snprintfz(b, 20, "%zu", analytics_data.prometheus_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits); analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); - snprintfz(b, 20, "%zu", analytics_data.shell_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits); analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); - snprintfz(b, 20, "%zu", analytics_data.json_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits); analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); - snprintfz(b, 20, "%zu", analytics_data.dashboard_hits); + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits); analytics_set_data(&analytics_data.netdata_dashboard_used, b); - snprintfz(b, 20, "%zu", rrdhost_hosts_available()); + snprintfz(b, sizeof(b) - 1, "%zu", rrdhost_hosts_available()); analytics_set_data(&analytics_data.netdata_config_hosts_available, b); } } @@ -664,10 +670,10 @@ void set_late_global_environment(struct rrdhost_system_info *system_info) #ifdef ENABLE_DBENGINE { char b[16]; - snprintfz(b, 15, "%d", default_rrdeng_page_cache_mb); + snprintfz(b, sizeof(b) - 1, "%d", default_rrdeng_page_cache_mb); analytics_set_data(&analytics_data.netdata_config_page_cache_size, b); - snprintfz(b, 15, "%d", default_multidb_disk_quota_mb); + snprintfz(b, sizeof(b) - 1, "%d", default_multidb_disk_quota_mb); analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, b); } #endif @@ -824,11 +830,10 @@ void get_system_timezone(void) } } -void set_global_environment() -{ +void set_global_environment() { { char b[16]; - snprintfz(b, 15, "%d", default_rrd_update_every); + snprintfz(b, sizeof(b) - 1, "%d", default_rrd_update_every); setenv("NETDATA_UPDATE_EVERY", b, 1); } @@ -843,7 +848,6 @@ void set_global_environment() setenv("NETDATA_LIB_DIR", verify_or_create_required_directory(netdata_configured_varlib_dir), 1); setenv("NETDATA_LOCK_DIR", verify_or_create_required_directory(netdata_configured_lock_dir), 1); setenv("NETDATA_LOG_DIR", verify_or_create_required_directory(netdata_configured_log_dir), 1); - setenv("HOME", verify_or_create_required_directory(netdata_configured_home_dir), 1); setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1); { @@ -922,16 +926,14 @@ void set_global_environment() freez(default_port); // set the path we need - char path[1024 + 1], *p = getenv("PATH"); - if (!p) - p = "/bin:/usr/bin"; - snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"); + char path[4096], *p = getenv("PATH"); + if (!p) p = "/bin:/usr/bin"; + snprintfz(path, sizeof(path), "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"); setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1); // python options p = getenv("PYTHONPATH"); - if (!p) - p = ""; + if (!p) p = ""; setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1); // disable buffering for python plugins @@ -941,37 +943,46 @@ void set_global_environment() setenv("LC_ALL", "C", 1); } -void send_statistics(const char *action, const char *action_result, const char *action_data) -{ +void send_statistics(const char *action, const char *action_result, const char *action_data) { static char *as_script; if (netdata_anonymous_statistics_enabled == -1) { char *optout_file = mallocz( sizeof(char) * (strlen(netdata_configured_user_config_dir) + strlen(".opt-out-from-anonymous-statistics") + 2)); + sprintf(optout_file, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + if (likely(access(optout_file, R_OK) != 0)) { as_script = mallocz( sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("anonymous-statistics.sh") + 2)); + sprintf(as_script, "%s/%s", netdata_configured_primary_plugins_dir, "anonymous-statistics.sh"); + if (unlikely(access(as_script, R_OK) != 0)) { netdata_anonymous_statistics_enabled = 0; - netdata_log_info("Anonymous statistics script %s not found.", as_script); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "Statistics script '%s' not found.", + as_script); + freez(as_script); - } else { - netdata_anonymous_statistics_enabled = 1; } - } else { + else + netdata_anonymous_statistics_enabled = 1; + } + else { netdata_anonymous_statistics_enabled = 0; as_script = NULL; } + freez(optout_file); } - if (!netdata_anonymous_statistics_enabled) - return; - if (!action) + + if (!netdata_anonymous_statistics_enabled || !action) return; + if (!action_result) action_result = ""; if (!action_data) @@ -1030,7 +1041,9 @@ void send_statistics(const char *action, const char *action_result, const char * analytics_data.netdata_prebuilt_distro, analytics_data.netdata_fail_reason); - netdata_log_info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data); + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "%s '%s' '%s' '%s'", + as_script, action, action_result, action_data); FILE *fp_child_input; FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); @@ -1039,11 +1052,21 @@ void send_statistics(const char *action, const char *action_result, const char * char *s = fgets(buffer, 4, fp_child_output); int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); if (exit_code) - netdata_log_error("Execution of anonymous statistics script returned %d.", exit_code); - if (s && strncmp(buffer, "200", 3)) - netdata_log_error("Execution of anonymous statistics script returned http code %s.", buffer); - } else { - netdata_log_error("Failed to run anonymous statistics script %s.", as_script); + + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "Statistics script returned error: %d", + exit_code); + + if (s && strncmp(buffer, "200", 3) != 0) + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "Statistics script returned http code: %s", + buffer); + } + else + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "Failed to run statistics script: %s.", + as_script); + freez(command_to_run); } diff --git a/daemon/analytics.h b/daemon/analytics.h index 0a5cc458d..de8d569f9 100644 --- a/daemon/analytics.h +++ b/daemon/analytics.h @@ -71,6 +71,7 @@ struct analytics_data { size_t charts_count; size_t metrics_count; + SPINLOCK spinlock; bool exporting_enabled; }; diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c index 4bc1e72a4..41af56af8 100644 --- a/daemon/buildinfo.c +++ b/daemon/buildinfo.c @@ -48,6 +48,7 @@ typedef enum __attribute__((packed)) { BIB_FEATURE_CLOUD, BIB_FEATURE_HEALTH, BIB_FEATURE_STREAMING, + BIB_FEATURE_BACKFILLING, BIB_FEATURE_REPLICATION, BIB_FEATURE_STREAMING_COMPRESSION, BIB_FEATURE_CONTEXTS, @@ -66,6 +67,7 @@ typedef enum __attribute__((packed)) { BIB_CONNECTIVITY_NATIVE_HTTPS, BIB_CONNECTIVITY_TLS_HOST_VERIFY, BIB_LIB_LZ4, + BIB_LIB_ZSTD, BIB_LIB_ZLIB, BIB_LIB_JUDY, BIB_LIB_DLIB, @@ -99,6 +101,7 @@ typedef enum __attribute__((packed)) { BIB_PLUGIN_SLABINFO, BIB_PLUGIN_XEN, BIB_PLUGIN_XEN_VBD_ERROR, + BIB_PLUGIN_LOGS_MANAGEMENT, BIB_EXPORT_AWS_KINESIS, BIB_EXPORT_GCP_PUBSUB, BIB_EXPORT_MONGOC, @@ -340,7 +343,7 @@ static struct { .json = "cpu_frequency", .value = "unknown", }, - [BIB_HW_RAM_SIZE] = { + [BIB_HW_ARCHITECTURE] = { .category = BIC_HARDWARE, .type = BIT_STRING, .analytics = NULL, @@ -348,7 +351,7 @@ static struct { .json = "cpu_architecture", .value = "unknown", }, - [BIB_HW_DISK_SPACE] = { + [BIB_HW_RAM_SIZE] = { .category = BIC_HARDWARE, .type = BIT_STRING, .analytics = NULL, @@ -356,7 +359,7 @@ static struct { .json = "ram", .value = "unknown", }, - [BIB_HW_ARCHITECTURE] = { + [BIB_HW_DISK_SPACE] = { .category = BIC_HARDWARE, .type = BIT_STRING, .analytics = NULL, @@ -484,6 +487,14 @@ static struct { .json = "streaming", .value = NULL, }, + [BIB_FEATURE_BACKFILLING] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Back-filling (of higher database tiers)", + .json = "back-filling", + .value = NULL, + }, [BIB_FEATURE_REPLICATION] = { .category = BIC_FEATURE, .type = BIT_BOOLEAN, @@ -498,7 +509,7 @@ static struct { .analytics = "Stream Compression", .print = "Streaming and Replication Compression", .json = "stream-compression", - .value = "none", + .value = NULL, }, [BIB_FEATURE_CONTEXTS] = { .category = BIC_FEATURE, @@ -628,6 +639,14 @@ static struct { .json = "lz4", .value = NULL, }, + [BIB_LIB_ZSTD] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "ZSTD (fast, lossless compression algorithm)", + .json = "zstd", + .value = NULL, + }, [BIB_LIB_ZLIB] = { .category = BIC_LIBS, .type = BIT_BOOLEAN, @@ -893,6 +912,14 @@ static struct { .json = "xen-vbd-error", .value = NULL, }, + [BIB_PLUGIN_LOGS_MANAGEMENT] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "Logs Management", + .print = "Logs Management", + .json = "logs-management", + .value = NULL, + }, [BIB_EXPORT_MONGOC] = { .category = BIC_EXPORTERS, .type = BIT_BOOLEAN, @@ -1029,6 +1056,23 @@ static void build_info_set_value(BUILD_INFO_SLOT slot, const char *value) { BUILD_INFO[slot].value = value; } +static void build_info_append_value(BUILD_INFO_SLOT slot, const char *value) { + size_t size = BUILD_INFO[slot].value ? strlen(BUILD_INFO[slot].value) + 1 : 0; + size += strlen(value); + char buf[size + 1]; + + if(BUILD_INFO[slot].value) { + strcpy(buf, BUILD_INFO[slot].value); + strcat(buf, " "); + strcat(buf, value); + } + else + strcpy(buf, value); + + freez((void *)BUILD_INFO[slot].value); + BUILD_INFO[slot].value = strdupz(buf); +} + static void build_info_set_value_strdupz(BUILD_INFO_SLOT slot, const char *value) { if(!value) value = ""; build_info_set_value(slot, strdupz(value)); @@ -1075,14 +1119,21 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_set_status(BIB_FEATURE_HEALTH, true); build_info_set_status(BIB_FEATURE_STREAMING, true); + build_info_set_status(BIB_FEATURE_BACKFILLING, true); build_info_set_status(BIB_FEATURE_REPLICATION, true); -#ifdef ENABLE_RRDPUSH_COMPRESSION build_info_set_status(BIB_FEATURE_STREAMING_COMPRESSION, true); -#ifdef ENABLE_LZ4 - build_info_set_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4"); + +#ifdef ENABLE_BROTLI + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli"); #endif +#ifdef ENABLE_ZSTD + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "zstd"); #endif +#ifdef ENABLE_LZ4 + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4"); +#endif + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "gzip"); build_info_set_status(BIB_FEATURE_CONTEXTS, true); build_info_set_status(BIB_FEATURE_TIERING, true); @@ -1117,6 +1168,9 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef ENABLE_LZ4 build_info_set_status(BIB_LIB_LZ4, true); #endif +#ifdef ENABLE_ZSTD + build_info_set_status(BIB_LIB_ZSTD, true); +#endif build_info_set_status(BIB_LIB_ZLIB, true); @@ -1198,6 +1252,9 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef HAVE_XENSTAT_VBD_ERROR build_info_set_status(BIB_PLUGIN_XEN_VBD_ERROR, true); #endif +#ifdef ENABLE_LOGSMANAGEMENT + build_info_set_status(BIB_PLUGIN_LOGS_MANAGEMENT, true); +#endif build_info_set_status(BIB_EXPORT_PROMETHEUS_EXPORTER, true); build_info_set_status(BIB_EXPORT_GRAPHITE, true); @@ -1234,7 +1291,7 @@ __attribute__((constructor)) void initialize_build_info(void) { // ---------------------------------------------------------------------------- // system info -int get_system_info(struct rrdhost_system_info *system_info, bool log); +int get_system_info(struct rrdhost_system_info *system_info); static void populate_system_info(void) { static bool populated = false; static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; @@ -1257,7 +1314,7 @@ static void populate_system_info(void) { } else { system_info = callocz(1, sizeof(struct rrdhost_system_info)); - get_system_info(system_info, false); + get_system_info(system_info); free_system_info = true; } diff --git a/daemon/commands.c b/daemon/commands.c index a8afb5a00..ed544224e 100644 --- a/daemon/commands.c +++ b/daemon/commands.c @@ -142,10 +142,10 @@ static cmd_status_t cmd_reload_health_execute(char *args, char **message) (void)args; (void)message; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("COMMAND: Reloading HEALTH configuration."); health_reload(); - error_log_limit_reset(); + nd_log_limits_reset(); return CMD_STATUS_SUCCESS; } @@ -155,11 +155,11 @@ static cmd_status_t cmd_save_database_execute(char *args, char **message) (void)args; (void)message; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("COMMAND: Saving databases."); rrdhost_save_all(); netdata_log_info("COMMAND: Databases saved."); - error_log_limit_reset(); + nd_log_limits_reset(); return CMD_STATUS_SUCCESS; } @@ -169,10 +169,9 @@ static cmd_status_t cmd_reopen_logs_execute(char *args, char **message) (void)args; (void)message; - error_log_limit_unlimited(); - netdata_log_info("COMMAND: Reopening all log files."); - reopen_all_log_files(); - error_log_limit_reset(); + nd_log_limits_unlimited(); + nd_log_reopen_log_files(); + nd_log_limits_reset(); return CMD_STATUS_SUCCESS; } @@ -182,7 +181,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message) (void)args; (void)message; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("COMMAND: Cleaning up to exit."); netdata_cleanup_and_exit(0); exit(0); diff --git a/daemon/common.h b/daemon/common.h index 4a3905924..b1739879f 100644 --- a/daemon/common.h +++ b/daemon/common.h @@ -28,6 +28,7 @@ #define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed) +#define config_section_destroy(section) appconfig_section_destroy_non_loaded(&netdata_config, section) #define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name) // ---------------------------------------------------------------------------- diff --git a/daemon/daemon.c b/daemon/daemon.c index c7f0b51c6..433fa0373 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -27,24 +27,10 @@ void get_netdata_execution_path(void) { netdata_exe_file[exepath_size] = '\0'; - strcpy(netdata_exe_path, netdata_exe_file); - dirname(netdata_exe_path); -} - -static void chown_open_file(int fd, uid_t uid, gid_t gid) { - if(fd == -1) return; - - struct stat buf; - - if(fstat(fd, &buf) == -1) { - netdata_log_error("Cannot fstat() fd %d", fd); - return; - } - - if((buf.st_uid != uid || buf.st_gid != gid) && S_ISREG(buf.st_mode)) { - if(fchown(fd, uid, gid) == -1) - netdata_log_error("Cannot fchown() fd %d.", fd); - } + // macOS's dirname(3) does not modify passed string + char *tmpdir = strdupz(netdata_exe_file); + strcpy(netdata_exe_path, dirname(tmpdir)); + freez(tmpdir); } static void fix_directory_file_permissions(const char *dirname, uid_t uid, gid_t gid, bool recursive) @@ -124,9 +110,6 @@ int become_user(const char *username, int pid_fd) { uid_t uid = pw->pw_uid; gid_t gid = pw->pw_gid; - if (am_i_root) - netdata_log_info("I am root, so checking permissions"); - prepare_required_directories(uid, gid); if(pidfile[0]) { @@ -150,9 +133,9 @@ int become_user(const char *username, int pid_fd) { } } + nd_log_chown_log_files(uid, gid); chown_open_file(STDOUT_FILENO, uid, gid); chown_open_file(STDERR_FILENO, uid, gid); - chown_open_file(stdaccess_fd, uid, gid); chown_open_file(pid_fd, uid, gid); if(supplementary_groups && ngroups > 0) { @@ -229,7 +212,7 @@ static void oom_score_adj(void) { // check the environment char *s = getenv("OOMScoreAdjust"); if(!s || !*s) { - snprintfz(buf, 30, "%d", (int)wanted_score); + snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score); s = buf; } @@ -264,7 +247,7 @@ static void oom_score_adj(void) { int written = 0; int fd = open("/proc/self/oom_score_adj", O_WRONLY); if(fd != -1) { - snprintfz(buf, 30, "%d", (int)wanted_score); + snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score); ssize_t len = strlen(buf); if(len > 0 && write(fd, buf, (size_t)len) == len) written = 1; close(fd); diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index ab910e189..9fb1df5f8 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -65,6 +65,11 @@ static struct global_statistics { uint64_t backfill_queries_made; uint64_t backfill_db_points_read; + uint64_t tier0_hot_gorilla_buffers; + + uint64_t tier0_disk_compressed_bytes; + uint64_t tier0_disk_uncompressed_bytes; + uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS]; } global_statistics = { @@ -80,6 +85,10 @@ static struct global_statistics { .api_data_queries_made = 0, .api_data_db_points_read = 0, .api_data_result_points_generated = 0, + + .tier0_hot_gorilla_buffers = 0, + .tier0_disk_compressed_bytes = 0, + .tier0_disk_uncompressed_bytes = 0, }; void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) { @@ -108,6 +117,18 @@ void global_statistics_backfill_query_completed(size_t points_read) { __atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED); } +void global_statistics_gorilla_buffer_add_hot() { + __atomic_fetch_add(&global_statistics.tier0_hot_gorilla_buffers, 1, __ATOMIC_RELAXED); +} + +void global_statistics_tier0_disk_compressed_bytes(uint32_t size) { + __atomic_fetch_add(&global_statistics.tier0_disk_compressed_bytes, size, __ATOMIC_RELAXED); +} + +void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size) { + __atomic_fetch_add(&global_statistics.tier0_disk_uncompressed_bytes, size, __ATOMIC_RELAXED); +} + void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) { switch(query_source) { case QUERY_SOURCE_API_DATA: @@ -210,6 +231,11 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED); gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED); + gs->tier0_hot_gorilla_buffers = __atomic_load_n(&global_statistics.tier0_hot_gorilla_buffers, __ATOMIC_RELAXED); + + gs->tier0_disk_compressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_compressed_bytes, __ATOMIC_RELAXED); + gs->tier0_disk_uncompressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_uncompressed_bytes, __ATOMIC_RELAXED); + for(size_t tier = 0; tier < storage_tiers ;tier++) gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED); @@ -816,7 +842,7 @@ static void global_statistics_charts(void) { for(size_t tier = 0; tier < storage_tiers ;tier++) { char buf[30 + 1]; - snprintfz(buf, 30, "tier%zu", tier); + snprintfz(buf, sizeof(buf) - 1, "tier%zu", tier); rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } } @@ -828,6 +854,72 @@ static void global_statistics_charts(void) { } ml_update_global_statistics_charts(gs.ml_models_consulted); + + // ---------------------------------------------------------------- + +#ifdef ENABLE_DBENGINE + if (tier_page_type[0] == PAGE_GORILLA_METRICS) + { + static RRDSET *st_tier0_gorilla_pages = NULL; + static RRDDIM *rd_num_gorilla_pages = NULL; + + if (unlikely(!st_tier0_gorilla_pages)) { + st_tier0_gorilla_pages = rrdset_create_localhost( + "netdata" + , "tier0_gorilla_pages" + , NULL + , "tier0_gorilla_pages" + , NULL + , "Number of gorilla_pages" + , "count" + , "netdata" + , "stats" + , 131004 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_num_gorilla_pages = rrddim_add(st_tier0_gorilla_pages, "count", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_tier0_gorilla_pages, rd_num_gorilla_pages, (collected_number)gs.tier0_hot_gorilla_buffers); + + rrdset_done(st_tier0_gorilla_pages); + } + + if (tier_page_type[0] == PAGE_GORILLA_METRICS) + { + static RRDSET *st_tier0_compression_info = NULL; + + static RRDDIM *rd_compressed_bytes = NULL; + static RRDDIM *rd_uncompressed_bytes = NULL; + + if (unlikely(!st_tier0_compression_info)) { + st_tier0_compression_info = rrdset_create_localhost( + "netdata" + , "tier0_compression_info" + , NULL + , "tier0_compression_info" + , NULL + , "Tier 0 compression info" + , "bytes" + , "netdata" + , "stats" + , 131005 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_compressed_bytes = rrddim_add(st_tier0_compression_info, "compressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_uncompressed_bytes = rrddim_add(st_tier0_compression_info, "uncompressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_tier0_compression_info, rd_compressed_bytes, (collected_number)gs.tier0_disk_compressed_bytes); + rrddim_set_by_pointer(st_tier0_compression_info, rd_uncompressed_bytes, (collected_number)gs.tier0_disk_uncompressed_bytes); + + rrdset_done(st_tier0_compression_info); + } +#endif } // ---------------------------------------------------------------------------- @@ -1881,8 +1973,6 @@ static void dbengine2_statistics_charts(void) { static RRDDIM *rd_mrg_metrics = NULL; static RRDDIM *rd_mrg_acquired = NULL; static RRDDIM *rd_mrg_collected = NULL; - static RRDDIM *rd_mrg_with_retention = NULL; - static RRDDIM *rd_mrg_without_retention = NULL; static RRDDIM *rd_mrg_multiple_writers = NULL; if (unlikely(!st_mrg_metrics)) { @@ -1903,8 +1993,6 @@ static void dbengine2_statistics_charts(void) { rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_mrg_with_retention = rrddim_add(st_mrg_metrics, "with retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_mrg_without_retention = rrddim_add(st_mrg_metrics, "without retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } priority++; @@ -1912,8 +2000,6 @@ static void dbengine2_statistics_charts(void) { rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries); rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced); rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers); - rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_with_retention, (collected_number)mrg_stats.entries_with_retention); - rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_without_retention, (collected_number)mrg_stats.entries - (collected_number)mrg_stats.entries_with_retention); rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts); rrdset_done(st_mrg_metrics); @@ -3445,6 +3531,7 @@ static struct worker_utilization all_workers_utilization[] = { { .name = "TC", .family = "workers plugin tc", .priority = 1000000 }, { .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 }, { .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 }, + { .name = "LOGSMANAGPLG",.family = "workers plugin logs management", .priority = 1000000 }, { .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 }, { .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 }, { .name = "SERVICE", .family = "workers service", .priority = 1000000 }, diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h index 7bdb153dd..44717c6cf 100644 --- a/daemon/global_statistics.h +++ b/daemon/global_statistics.h @@ -45,6 +45,11 @@ void global_statistics_sqlite3_query_completed(bool success, bool busy, bool loc void global_statistics_sqlite3_row_completed(void); void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array); +void global_statistics_gorilla_buffer_add_hot(); + +void global_statistics_tier0_disk_compressed_bytes(uint32_t size); +void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size); + void global_statistics_web_request_completed(uint64_t dt, uint64_t bytes_received, uint64_t bytes_sent, diff --git a/daemon/main.c b/daemon/main.c index 5d25f88b5..3e1fda963 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -4,6 +4,8 @@ #include "buildinfo.h" #include "static_threads.h" +#include "database/engine/page_test.h" + #if defined(ENV32BIT) #warning COMPILING 32BIT NETDATA #endif @@ -313,7 +315,7 @@ void netdata_cleanup_and_exit(int ret) { const char *prev_msg = NULL; bool timeout = false; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); send_statistics("EXIT", ret?"ERROR":"OK","-"); @@ -371,6 +373,10 @@ void netdata_cleanup_and_exit(int ret) { SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL , 3 * USEC_PER_SEC); + delta_shutdown_time("prepare metasync shutdown"); + + metadata_sync_shutdown_prepare(); + delta_shutdown_time("disable ML detection and training threads"); ml_stop_threads(); @@ -396,10 +402,6 @@ void netdata_cleanup_and_exit(int ret) { rrdhost_cleanup_all(); - delta_shutdown_time("prepare metasync shutdown"); - - metadata_sync_shutdown_prepare(); - delta_shutdown_time("stop aclk threads"); timeout = !service_wait_exit( @@ -422,6 +424,13 @@ void netdata_cleanup_and_exit(int ret) { delta_shutdown_time("flush dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_prepare_exit(multidb_ctx[tier]); + + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (!multidb_ctx[tier]) + continue; + completion_wait_for(&multidb_ctx[tier]->quiesce.completion); + completion_destroy(&multidb_ctx[tier]->quiesce.completion); + } } #endif @@ -440,17 +449,20 @@ void netdata_cleanup_and_exit(int ret) { delta_shutdown_time("wait for dbengine collectors to finish"); size_t running = 1; - while(running) { + size_t count = 10; + while(running && count) { running = 0; for (size_t tier = 0; tier < storage_tiers; tier++) running += rrdeng_collectors_running(multidb_ctx[tier]); if(running) { - error_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS); - error_limit(&erl, "waiting for %zu collectors to finish", running); + nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, + "waiting for %zu collectors to finish", running); // sleep_usec(100 * USEC_PER_MS); cleanup_destroyed_dictionaries(); } + count--; } delta_shutdown_time("wait for dbengine main cache to finish flushing"); @@ -463,6 +475,8 @@ void netdata_cleanup_and_exit(int ret) { delta_shutdown_time("stop dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit(multidb_ctx[tier]); + + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); } #endif } @@ -614,8 +628,14 @@ int killpid(pid_t pid) { int ret; netdata_log_debug(D_EXIT, "Request to kill pid %d", pid); + int signal = SIGTERM; +//#ifdef NETDATA_INTERNAL_CHECKS +// if(service_running(SERVICE_COLLECTORS)) +// signal = SIGABRT; +//#endif + errno = 0; - ret = kill(pid, SIGTERM); + ret = kill(pid, signal); if (ret == -1) { switch(errno) { case ESRCH: @@ -662,7 +682,7 @@ static void set_nofile_limit(struct rlimit *rl) { } void cancel_main_threads() { - error_log_limit_unlimited(); + nd_log_limits_unlimited(); int i, found = 0; usec_t max = 5 * USEC_PER_SEC, step = 100000; @@ -752,7 +772,7 @@ int help(int exitcode) { " | '-' '-' '-' '-' real-time performance monitoring, done right! \n" " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n" "\n" - " Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud>\n" + " Copyright (C) 2016-2023, Netdata, Inc. <info@netdata.cloud>\n" " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" @@ -790,6 +810,7 @@ int help(int exitcode) { " -W unittest Run internal unittests and exit.\n\n" " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n" " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" + " -W sqlite-analyze Run update statistics and exit.\n\n" #ifdef ENABLE_DBENGINE " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" " -W stresstest=A,B,C,D,E,F,G\n" @@ -841,44 +862,52 @@ static void security_init(){ #endif static void log_init(void) { + nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon")); + + time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD; + size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS; + period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period); + logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs); + nd_log_set_flood_protection(logs, period); + + const char *netdata_log_level = getenv("NETDATA_LOG_LEVEL"); + netdata_log_level = netdata_log_level ? nd_log_id2priority(nd_log_priority2id(netdata_log_level)) : NDLP_INFO_STR; + + nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", netdata_log_level)); + char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir); - stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename); + nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", filename)); - snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir); - stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename); + bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */; + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/daemon.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", filename)); - snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir); - stdcollector_filename = config_get(CONFIG_SECTION_LOGS, "collector", filename); + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", filename)); snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); - stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename); + nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", filename)); - snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); - stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename); + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", filename)); #ifdef ENABLE_ACLK aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO); if (aclklog_enabled) { snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir); - aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename); + nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename)); } #endif - - char deffacility[8]; - snprintfz(deffacility,7,"%s","daemon"); - facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility); - - error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period); - error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period); - error_log_errors_per_period_backup = error_log_errors_per_period; - - setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1); - setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1); - - char *selected_level = config_get(CONFIG_SECTION_LOGS, "severity level", NETDATA_LOG_LEVEL_INFO_STR); - global_log_severity_level = log_severity_string_to_severity_level(selected_level); - setenv("NETDATA_LOG_SEVERITY_LEVEL", selected_level , 1); } char *initialize_lock_directory_path(char *prefix) @@ -1050,6 +1079,36 @@ static void backwards_compatible_config() { config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", CONFIG_SECTION_DB, "enable zero metrics"); + config_move(CONFIG_SECTION_LOGS, "error", + CONFIG_SECTION_LOGS, "daemon"); + + config_move(CONFIG_SECTION_LOGS, "severity level", + CONFIG_SECTION_LOGS, "level"); + + config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "logs to trigger flood protection"); + + config_move(CONFIG_SECTION_LOGS, "errors flood protection period", + CONFIG_SECTION_LOGS, "logs flood protection period"); + config_move(CONFIG_SECTION_HEALTH, "is ephemeral", + CONFIG_SECTION_GLOBAL, "is ephemeral node"); + + config_move(CONFIG_SECTION_HEALTH, "has unstable connection", + CONFIG_SECTION_GLOBAL, "has unstable connection"); +} + +static int get_hostname(char *buf, size_t buf_size) { + if (netdata_configured_host_prefix && *netdata_configured_host_prefix) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix); + + if (!read_file(filename, buf, buf_size)) { + trim(buf); + return 0; + } + } + + return gethostname(buf, buf_size); } static void get_netdata_configured_variables() { @@ -1058,10 +1117,12 @@ static void get_netdata_configured_variables() { // ------------------------------------------------------------------------ // get the hostname + netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", ""); + verify_netdata_host_prefix(true); + char buf[HOSTNAME_MAX + 1]; - if(gethostname(buf, HOSTNAME_MAX) == -1){ + if (get_hostname(buf, HOSTNAME_MAX)) netdata_log_error("Cannot get machine hostname."); - } netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf); netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname); @@ -1112,8 +1173,6 @@ static void get_netdata_configured_variables() { netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir); netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir); netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir); - char *env_home=getenv("HOME"); - netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir); netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir); @@ -1124,6 +1183,16 @@ static void get_netdata_configured_variables() { #ifdef ENABLE_DBENGINE // ------------------------------------------------------------------------ + // get default Database Engine page type + + const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw"); + if (strcmp(page_type, "gorilla") == 0) { + tier_page_type[0] = PAGE_GORILLA_METRICS; + } else if (strcmp(page_type, "raw") != 0) { + netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type); + } + + // ------------------------------------------------------------------------ // get default Database Engine page cache size in MiB default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); @@ -1161,10 +1230,6 @@ static void get_netdata_configured_variables() { default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; } #endif - // ------------------------------------------------------------------------ - - netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", ""); - verify_netdata_host_prefix(); // -------------------------------------------------------------------- // get KSM settings @@ -1184,6 +1249,7 @@ static void get_netdata_configured_variables() { // -------------------------------------------------------------------- rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); + rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s); // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information. @@ -1262,7 +1328,7 @@ static inline void coverity_remove_taint(char *s) (void)s; } -int get_system_info(struct rrdhost_system_info *system_info, bool log) { +int get_system_info(struct rrdhost_system_info *system_info) { char *script; script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2)); sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh"); @@ -1294,11 +1360,7 @@ int get_system_info(struct rrdhost_system_info *system_info, bool log) { if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) { netdata_log_error("Unexpected environment variable %s=%s", line, value); - } - else { - if(log) - netdata_log_info("%s=%s", line, value); - + } else { setenv(line, value, 1); } } @@ -1337,6 +1399,8 @@ int julytest(void); int pluginsd_parser_unittest(void); void replication_initialize(void); void bearer_tokens_init(void); +int unittest_rrdpush_compressions(void); +int uuid_unittest(void); int main(int argc, char **argv) { // initialize the system clocks @@ -1346,8 +1410,6 @@ int main(int argc, char **argv) { usec_t started_ut = now_monotonic_usec(); usec_t last_ut = started_ut; const char *prev_msg = NULL; - // Initialize stderror avoiding coredump when netdata_log_info() or netdata_log_error() is called - stderror = stderr; int i; int config_loaded = 0; @@ -1439,6 +1501,10 @@ int main(int argc, char **argv) { #ifdef ENABLE_DBENGINE char* createdataset_string = "createdataset="; char* stresstest_string = "stresstest="; + + if(strcmp(optarg, "pgd-tests") == 0) { + return pgd_test(argc, argv); + } #endif if(strcmp(optarg, "sqlite-meta-recover") == 0) { @@ -1451,6 +1517,11 @@ int main(int argc, char **argv) { return 0; } + if(strcmp(optarg, "sqlite-analyze") == 0) { + sql_init_database(DB_CHECK_ANALYZE, 0); + return 0; + } + if(strcmp(optarg, "unittest") == 0) { unittest_running = true; @@ -1495,6 +1566,8 @@ int main(int argc, char **argv) { return 1; if (ctx_unittest()) return 1; + if (uuid_unittest()) + return 1; fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); return 0; } @@ -1521,6 +1594,10 @@ int main(int argc, char **argv) { unittest_running = true; return buffer_unittest(); } + else if(strcmp(optarg, "uuidtest") == 0) { + unittest_running = true; + return uuid_unittest(); + } #ifdef ENABLE_DBENGINE else if(strcmp(optarg, "mctest") == 0) { unittest_running = true; @@ -1550,6 +1627,10 @@ int main(int argc, char **argv) { unittest_running = true; return pluginsd_parser_unittest(); } + else if(strcmp(optarg, "rrdpush_compressions_test") == 0) { + unittest_running = true; + return unittest_rrdpush_compressions(); + } else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { optarg += strlen(createdataset_string); unsigned history_seconds = strtoul(optarg, NULL, 0); @@ -1851,7 +1932,7 @@ int main(int argc, char **argv) { { char buf[20 + 1]; - snprintfz(buf, 20, "%d", libuv_worker_threads); + snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads); setenv("UV_THREADPOOL_SIZE", buf, 1); } @@ -1894,13 +1975,15 @@ int main(int argc, char **argv) { // get log filenames and settings log_init(); - error_log_limit_unlimited(); + nd_log_limits_unlimited(); // initialize the log files - open_all_log_files(); + nd_log_initialize(); netdata_log_info("Netdata agent version \""VERSION"\" is starting"); ieee754_doubles = is_system_ieee754_double(); + if(!ieee754_doubles) + globally_disabled_capabilities |= STREAM_CAP_IEEE754; aral_judy_init(); @@ -1925,11 +2008,11 @@ int main(int argc, char **argv) { set_silencers_filename(); health_initialize_global_silencers(); - // -------------------------------------------------------------------- - // Initialize ML configuration - - delta_startup_time("initialize ML"); - ml_init(); +// // -------------------------------------------------------------------- +// // Initialize ML configuration +// +// delta_startup_time("initialize ML"); +// ml_init(); // -------------------------------------------------------------------- // setup process signals @@ -1949,6 +2032,15 @@ int main(int argc, char **argv) { // setup threads configs default_stacksize = netdata_threads_init(); +#ifdef NETDATA_INTERNAL_CHECKS + config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true); + config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", true); +#endif + + if(config_get_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", false)) + // this has to run before starting any other threads that use workers + workers_utilization_enable(); + for (i = 0; static_threads[i].name != NULL ; i++) { struct netdata_static_thread *st = &static_threads[i]; @@ -1973,8 +2065,18 @@ int main(int argc, char **argv) { web_client_api_v1_init(); web_server_threading_selection(); - if(web_server_mode != WEB_SERVER_MODE_NONE) - api_listen_sockets_setup(); + if(web_server_mode != WEB_SERVER_MODE_NONE) { + if (!api_listen_sockets_setup()) { + netdata_log_error("Cannot setup listen port(s). Is Netdata already running?"); + exit(1); + } + } + + // -------------------------------------------------------------------- + // Initialize ML configuration + + delta_startup_time("initialize ML"); + ml_init(); #ifdef ENABLE_H2O delta_startup_time("initialize h2o server"); @@ -2006,6 +2108,16 @@ int main(int argc, char **argv) { if(become_daemon(dont_fork, user) == -1) fatal("Cannot daemonize myself."); + // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME". + struct passwd *pw = getpwuid(getuid()); + if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) { + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir); + } else { + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir); + } + + setenv("HOME", netdata_configured_home_dir, 1); + dyn_conf_init(); netdata_log_info("netdata started on pid %d.", getpid()); @@ -2039,7 +2151,7 @@ int main(int argc, char **argv) { netdata_anonymous_statistics_enabled=-1; struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); - get_system_info(system_info, true); + get_system_info(system_info); (void) registry_get_this_machine_guid(); system_info->hops = 0; get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); @@ -2076,7 +2188,7 @@ int main(int argc, char **argv) { // ------------------------------------------------------------------------ // enable log flood protection - error_log_limit_reset(); + nd_log_limits_reset(); // Load host labels delta_startup_time("collect host labels"); diff --git a/daemon/service.c b/daemon/service.c index f7fe86e04..8a65de66c 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -76,33 +76,48 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { rrddim_free(st, rd); } -static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { +static inline bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { + if(!all_dimensions && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) + return true; + worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS); + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + RRDDIM *rd; time_t now = now_realtime_sec(); - bool done_all_dimensions = true; + size_t dim_candidates = 0; + size_t dim_archives = 0; dfe_start_write(st->rrddim_root_index, rd) { - if(unlikely( - all_dimensions || - (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now)) - )) { - - if(dictionary_acquired_item_references(rd_dfe.item) == 1) { - netdata_log_info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st)); - svc_rrddim_obsolete_to_archive(rd); + bool candidate = (all_dimensions || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)); + + if(candidate) { + dim_candidates++; + + if(rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now) { + size_t references = dictionary_acquired_item_references(rd_dfe.item); + if(references == 1) { +// netdata_log_info("Removing obsolete dimension 'host:%s/chart:%s/dim:%s'", +// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); + svc_rrddim_obsolete_to_archive(rd); + dim_archives++; + } +// else +// netdata_log_info("Cannot remove obsolete dimension 'host:%s/chart:%s/dim:%s'", +// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); } - else - done_all_dimensions = false; } - else - done_all_dimensions = false; } dfe_done(rd); - return done_all_dimensions; + if(dim_archives != dim_candidates) { + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + return false; + } + + return true; } static void svc_rrdset_obsolete_to_free(RRDSET *st) { @@ -132,50 +147,78 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) { rrdset_free(st); } -static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { +static inline void svc_rrdhost_cleanup_charts_marked_obsolete(RRDHOST *host) { + if(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) + return; + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS); + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + + size_t full_candidates = 0; + size_t full_archives = 0; + size_t partial_candidates = 0; + size_t partial_archives = 0; + time_t now = now_realtime_sec(); RRDSET *st; rrdset_foreach_reentrant(st, host) { if(rrdset_is_replicating(st)) continue; - if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) - && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now - && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now - && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now - )) { - svc_rrdset_obsolete_to_free(st); - } - else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) { - rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); - svc_rrdset_archive_obsolete_dimensions(st, false); + RRDSET_FLAGS flags = rrdset_flag_get(st); + bool obsolete_chart = flags & RRDSET_FLAG_OBSOLETE; + bool obsolete_dims = flags & RRDSET_FLAG_OBSOLETE_DIMENSIONS; + + if(obsolete_dims) { + partial_candidates++; + + if(svc_rrdset_archive_obsolete_dimensions(st, false)) + partial_archives++; } - else if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) { - rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); + + if(obsolete_chart) { + full_candidates++; + + if(unlikely( st->last_accessed_time_s + rrdset_free_obsolete_time_s < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now + )) { + svc_rrdset_obsolete_to_free(st); + full_archives++; + } } } rrdset_foreach_done(st); + + if(partial_archives != partial_candidates) + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + + if(full_archives != full_candidates) + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); } -static void svc_rrdset_check_obsoletion(RRDHOST *host) { +static void svc_rrdhost_detect_obsolete_charts(RRDHOST *host) { worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK); time_t now = now_realtime_sec(); time_t last_entry_t; RRDSET *st; + + time_t child_connect_time = host->child_connect_time; + rrdset_foreach_read(st, host) { if(rrdset_is_replicating(st)) continue; last_entry_t = rrdset_last_entry_s(st); - if(last_entry_t && last_entry_t < host->child_connect_time && - host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every - < now) + if (last_entry_t && last_entry_t < child_connect_time && + child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + + (ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every) < + now) - rrdset_is_obsolete(st); + rrdset_is_obsolete___safe_from_collector_thread(st); } rrdset_foreach_done(st); } @@ -190,24 +233,24 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) continue; - if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) { - rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); - svc_rrdhost_cleanup_obsolete_charts(host); - } + svc_rrdhost_cleanup_charts_marked_obsolete(host); - if(host != localhost - && host->trigger_chart_obsoletion_check - && ( - ( - host->child_last_chart_command - && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec() - ) - || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) - ) - ) { - svc_rrdset_check_obsoletion(host); + if (host == localhost) + continue; + + netdata_mutex_lock(&host->receiver_lock); + + time_t now = now_realtime_sec(); + + if (host->trigger_chart_obsoletion_check && + ((host->child_last_chart_command && + host->child_last_chart_command + host->health.health_delay_up_to < now) || + (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now))) { + svc_rrdhost_detect_obsolete_charts(host); host->trigger_chart_obsoletion_check = 0; } + + netdata_mutex_unlock(&host->receiver_lock); } rrd_unlock(); @@ -226,22 +269,45 @@ restart_after_removal: if(!rrdhost_should_be_removed(host, protected_host, now)) continue; - netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); + bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); + if (!is_archived) { + netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); - if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) - /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) - ) { - worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); - rrdhost_delete_charts(host); + if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) + /* don't delete multi-host DB host files */ + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) + ) { + worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); + rrdhost_delete_charts(host); + } + else { + worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); + rrdhost_save_charts(host); + } } - else { - worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); - rrdhost_save_charts(host); + + bool force = false; + + if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s) + force = true; + + if (!force && is_archived) + continue; + + if (force) { + netdata_log_info("Host '%s' with machine guid '%s' is archived, ephemeral clean up.", rrdhost_hostname(host), host->machine_guid); } worker_is_busy(WORKER_JOB_FREE_HOST); - rrdhost_free___while_having_rrd_wrlock(host, false); +#ifdef ENABLE_ACLK + // in case we have cloud connection we inform cloud + // a child disconnected + if (netdata_cloud_enabled && force) { + aclk_host_state_update(host, 0, 0); + unregister_node(host->machine_guid); + } +#endif + rrdhost_free___while_having_rrd_wrlock(host, force); goto restart_after_removal; } diff --git a/daemon/signals.c b/daemon/signals.c index ae28874cc..4f2254334 100644 --- a/daemon/signals.c +++ b/daemon/signals.c @@ -42,7 +42,7 @@ static void signal_handler(int signo) { if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) { char buffer[200 + 1]; - snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name); + snprintfz(buffer, sizeof(buffer) - 1, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name); if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) { // nothing to do - we cannot write but there is no way to complain about it ; @@ -203,28 +203,28 @@ void signals_handle(void) { switch (signals_waiting[i].action) { case NETDATA_SIGNAL_RELOAD_HEALTH: - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Reloading HEALTH configuration...", name); - error_log_limit_reset(); + nd_log_limits_reset(); execute_command(CMD_RELOAD_HEALTH, NULL, NULL); break; case NETDATA_SIGNAL_SAVE_DATABASE: - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Saving databases...", name); - error_log_limit_reset(); + nd_log_limits_reset(); execute_command(CMD_SAVE_DATABASE, NULL, NULL); break; case NETDATA_SIGNAL_REOPEN_LOGS: - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name); - error_log_limit_reset(); + nd_log_limits_reset(); execute_command(CMD_REOPEN_LOGS, NULL, NULL); break; case NETDATA_SIGNAL_EXIT_CLEANLY: - error_log_limit_unlimited(); + nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name); commands_exit(); netdata_cleanup_and_exit(0); diff --git a/daemon/static_threads.c b/daemon/static_threads.c index 830b854e6..b70373d74 100644 --- a/daemon/static_threads.c +++ b/daemon/static_threads.c @@ -61,7 +61,7 @@ const struct netdata_static_thread static_threads_common[] = { .config_name = "netdata monitoring", .env_name = "NETDATA_INTERNALS_MONITORING", .global_variable = &global_statistics_enabled, - .enabled = 1, + .enabled = 0, .thread = NULL, .init_routine = NULL, .start_routine = global_statistics_main @@ -69,10 +69,10 @@ const struct netdata_static_thread static_threads_common[] = { { .name = "STATS_WORKERS", .config_section = CONFIG_SECTION_PLUGINS, - .config_name = "netdata monitoring", + .config_name = "netdata monitoring extended", .env_name = "NETDATA_INTERNALS_MONITORING", .global_variable = &global_statistics_enabled, - .enabled = 1, + .enabled = 0, // this is ignored - check main() for "netdata monitoring extended" .thread = NULL, .init_routine = NULL, .start_routine = global_statistics_workers_main @@ -80,10 +80,10 @@ const struct netdata_static_thread static_threads_common[] = { { .name = "STATS_SQLITE3", .config_section = CONFIG_SECTION_PLUGINS, - .config_name = "netdata monitoring", + .config_name = "netdata monitoring extended", .env_name = "NETDATA_INTERNALS_MONITORING", .global_variable = &global_statistics_enabled, - .enabled = 1, + .enabled = 0, // this is ignored - check main() for "netdata monitoring extended" .thread = NULL, .init_routine = NULL, .start_routine = global_statistics_sqlite3_main diff --git a/daemon/system-info.sh b/daemon/system-info.sh index 2a3ba7d76..aaca7fd4b 100755 --- a/daemon/system-info.sh +++ b/daemon/system-info.sh @@ -57,7 +57,7 @@ if [ -z "${VIRTUALIZATION}" ]; then VIRTUALIZATION="unknown" VIRT_DETECTION="none" elif [ "$VIRTUALIZATION" != "none" ] && [ "$VIRTUALIZATION" != "unknown" ]; then - VIRTUALIZATION=$(virtualization_normalize_name $VIRTUALIZATION) + VIRTUALIZATION=$(virtualization_normalize_name "$VIRTUALIZATION") fi else # Passed from outside - probably in docker run @@ -101,6 +101,10 @@ if [ "${CONTAINER}" = "unknown" ]; then CONT_DETECTION="kubernetes" fi + if [ "${KERNEL_NAME}" = FreeBSD ] && command -v sysctl && sysctl security.jail.jailed 2>/dev/null | grep -q "1$"; then + CONTAINER="jail" + CONT_DETECTION="sysctl" + fi fi # ------------------------------------------------------------------------------------------------- diff --git a/daemon/unit_test.c b/daemon/unit_test.c index b8d229316..149bbec0c 100644 --- a/daemon/unit_test.c +++ b/daemon/unit_test.c @@ -97,7 +97,7 @@ static int check_number_printing(void) { int i, failed = 0; for(i = 0; values[i].correct ; i++) { print_netdata_double(netdata, values[i].n); - snprintfz(system, 512, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); + snprintfz(system, sizeof(system) - 1, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); int ok = 1; if(strcmp(netdata, values[i].correct) != 0) { @@ -319,7 +319,7 @@ void benchmark_storage_number(int loop, int multiplier) { for(i = 0; i < loop ;i++) { n *= multiplier; if(n > storage_number_positive_max) n = storage_number_positive_min; - snprintfz(buffer, 100, NETDATA_DOUBLE_FORMAT, n); + snprintfz(buffer, sizeof(buffer) - 1, NETDATA_DOUBLE_FORMAT, n); } } @@ -507,7 +507,7 @@ int unit_test_buffer() { const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s"; buffer_sprintf(wb, fmt, string, string, string, string); - snprintfz(final, 9000, fmt, string, string, string, string); + snprintfz(final, sizeof(final) - 1, fmt, string, string, string, string); const char *s = buffer_tostring(wb); @@ -1272,7 +1272,7 @@ int run_test(struct test *test) default_rrd_update_every = test->update_every; char name[101]; - snprintfz(name, 100, "unittest-%s", test->name); + snprintfz(name, sizeof(name) - 1, "unittest-%s", test->name); // create the chart RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1 @@ -1534,7 +1534,7 @@ int unit_test(long delay, long shift) repeat++; char name[101]; - snprintfz(name, 100, "unittest-%d-%ld-%ld", repeat, delay, shift); + snprintfz(name, sizeof(name) - 1, "unittest-%d-%ld-%ld", repeat, delay, shift); //debug_flags = 0xffffffff; default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; @@ -1681,13 +1681,6 @@ int test_sqlite(void) { rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); if (rc != SQLITE_OK) goto error; - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_ALERT, uuid_str, uuid_str); - rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); - if (rc != SQLITE_OK) - goto error; - buffer_flush(sql); buffer_free(sql); fprintf(stderr,"SQLite is OK\n"); @@ -1831,30 +1824,29 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name) /* We don't want to drop metrics when generating load, we prefer to block data generation itself */ return rrdhost_find_or_create( - name - , name - , name - , os_type - , netdata_configured_timezone - , netdata_configured_abbrev_timezone - , netdata_configured_utc_offset - , "" - , program_name - , program_version - , default_rrd_update_every - , default_rrd_history_entries - , RRD_MEMORY_MODE_DBENGINE - , default_health_enabled - , default_rrdpush_enabled - , default_rrdpush_destination - , default_rrdpush_api_key - , default_rrdpush_send_charts_matching - , default_rrdpush_enable_replication - , default_rrdpush_seconds_to_replicate - , default_rrdpush_replication_step - , NULL - , 0 - ); + name, + name, + name, + os_type, + netdata_configured_timezone, + netdata_configured_abbrev_timezone, + netdata_configured_utc_offset, + "", + program_name, + program_version, + default_rrd_update_every, + default_rrd_history_entries, + RRD_MEMORY_MODE_DBENGINE, + default_health_enabled, + default_rrdpush_enabled, + default_rrdpush_destination, + default_rrdpush_api_key, + default_rrdpush_send_charts_matching, + default_rrdpush_enable_replication, + default_rrdpush_seconds_to_replicate, + default_rrdpush_replication_step, + NULL, + 0); } // constants for test_dbengine @@ -1878,7 +1870,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI char name[101]; for (i = 0 ; i < CHARTS ; ++i) { - snprintfz(name, 100, "dbengine-chart-%d", i); + snprintfz(name, sizeof(name) - 1, "dbengine-chart-%d", i); // create the chart st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", @@ -1886,7 +1878,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG); rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST); for (j = 0 ; j < DIMS ; ++j) { - snprintfz(name, 100, "dim-%d", j); + snprintfz(name, sizeof(name) - 1, "dim-%d", j); rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } @@ -2108,6 +2100,14 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] return errors + value_errors + time_errors; } +void test_dbengine_charts_and_dims_are_not_collected(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]) { + for(int c = 0; c < CHARTS ; c++) { + st[c]->rrdcontexts.collected = false; + for(int d = 0; d < DIMS ; d++) + rd[c][d]->rrdcontexts.collected = false; + } +} + int test_dbengine(void) { fprintf(stderr, "%s() running...\n", __FUNCTION__ ); @@ -2117,7 +2117,7 @@ int test_dbengine(void) RRDDIM *rd[CHARTS][DIMS]; time_t time_start[REGIONS], time_end[REGIONS]; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); fprintf(stderr, "\nRunning DB-engine test\n"); default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; @@ -2135,6 +2135,7 @@ int test_dbengine(void) time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + test_dbengine_charts_and_dims_are_not_collected(st, rd); current_region = 1; //this is the second region of data update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds @@ -2152,6 +2153,7 @@ int test_dbengine(void) time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + test_dbengine_charts_and_dims_are_not_collected(st, rd); current_region = 2; //this is the third region of data update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds @@ -2169,6 +2171,7 @@ int test_dbengine(void) time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + test_dbengine_charts_and_dims_are_not_collected(st, rd); for (current_region = 0 ; current_region < REGIONS ; ++current_region) { errors += test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]); @@ -2237,6 +2240,7 @@ int test_dbengine(void) rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); rrdhost_delete_charts(host); rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); rrd_unlock(); return errors + value_errors + time_errors; @@ -2343,7 +2347,7 @@ void generate_dbengine_dataset(unsigned history_seconds) (1024 * 1024); default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'"); host = dbengine_rrdhost_find_or_create("dbengine-dataset"); @@ -2368,12 +2372,12 @@ void generate_dbengine_dataset(unsigned history_seconds) thread_info[i]->time_max = 0; thread_info[i]->done = 0; completion_init(&thread_info[i]->charts_initialized); - assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i])); + fatal_assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i])); completion_wait_for(&thread_info[i]->charts_initialized); completion_destroy(&thread_info[i]->charts_initialized); } for (i = 0 ; i < DSET_CHARTS ; ++i) { - assert(0 == uv_thread_join(&thread_info[i]->thread)); + fatal_assert(0 == uv_thread_join(&thread_info[i]->thread)); } for (i = 0 ; i < DSET_CHARTS ; ++i) { @@ -2518,7 +2522,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi unsigned i, j; time_t time_start, test_duration; - error_log_limit_unlimited(); + nd_log_limits_unlimited(); if (!TEST_DURATION_SEC) TEST_DURATION_SEC = 10; @@ -2575,7 +2579,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi chart_threads[i]->done = 0; chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0; completion_init(&chart_threads[i]->charts_initialized); - assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i])); + fatal_assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i])); } /* barrier so that subsequent queries can access valid chart data */ for (i = 0 ; i < DSET_CHARTS ; ++i) { @@ -2597,7 +2601,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi query_threads[i]->chart_threads[j] = chart_threads[j]; } query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0; - assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i])); + fatal_assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i])); } sleep(TEST_DURATION_SEC); /* stop workload */ @@ -2646,6 +2650,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); rrdhost_delete_charts(host); rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); rrd_unlock(); } |