diff options
Diffstat (limited to '')
-rw-r--r-- | src/daemon/main.c (renamed from daemon/main.c) | 421 |
1 files changed, 215 insertions, 206 deletions
diff --git a/daemon/main.c b/src/daemon/main.c index 3e1fda963..c9602432a 100644 --- a/daemon/main.c +++ b/src/daemon/main.c @@ -2,10 +2,15 @@ #include "common.h" #include "buildinfo.h" +#include "daemon/watcher.h" #include "static_threads.h" #include "database/engine/page_test.h" +#ifdef ENABLE_SENTRY +#include "sentry-native/sentry-native.h" +#endif + #if defined(ENV32BIT) #warning COMPILING 32BIT NETDATA #endif @@ -295,133 +300,94 @@ static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { return (running == 0); } -#define delta_shutdown_time(msg) \ - { \ - usec_t now_ut = now_monotonic_usec(); \ - if(prev_msg) \ - netdata_log_info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \ - else \ - netdata_log_info("NETDATA SHUTDOWN: next: %s", msg); \ - last_ut = now_ut; \ - prev_msg = msg; \ - timeout = false; \ - } - void web_client_cache_destroy(void); -void netdata_cleanup_and_exit(int ret) { - usec_t started_ut = now_monotonic_usec(); - usec_t last_ut = started_ut; - const char *prev_msg = NULL; - bool timeout = false; +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { + watcher_shutdown_begin(); nd_log_limits_unlimited(); netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); - send_statistics("EXIT", ret?"ERROR":"OK","-"); + // send the stat from our caller + analytics_statistic_t statistic = { action, action_result, action_data }; + analytics_statistic_send(&statistic); - delta_shutdown_time("create shutdown file"); + // notify we are exiting + statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"}; + analytics_statistic_send(&statistic); char agent_crash_file[FILENAME_MAX + 1]; char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); (void) rename(agent_crash_file, agent_incomplete_shutdown_file); + watcher_step_complete(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); #ifdef ENABLE_DBENGINE if(dbengine_enabled) { - delta_shutdown_time("dbengine exit mode"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit_mode(multidb_ctx[tier]); } #endif - - delta_shutdown_time("close webrtc connections"); + watcher_step_complete(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); webrtc_close_all_connections(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); - delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk"); - - service_signal_exit( - SERVICE_MAINTENANCE - | ABILITY_DATA_QUERIES - | ABILITY_WEB_REQUESTS - | ABILITY_STREAMING_CONNECTIONS - | SERVICE_ACLK - | SERVICE_ACLKSYNC - ); - - delta_shutdown_time("stop replication, exporters, health and web servers threads"); - - timeout = !service_wait_exit( - SERVICE_EXPORTERS - | SERVICE_HEALTH - | SERVICE_WEB_SERVER - | SERVICE_HTTPD - , 3 * USEC_PER_SEC); + service_signal_exit(SERVICE_MAINTENANCE | ABILITY_DATA_QUERIES | ABILITY_WEB_REQUESTS | + ABILITY_STREAMING_CONNECTIONS | SERVICE_ACLK | SERVICE_ACLKSYNC); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); - delta_shutdown_time("stop collectors and streaming threads"); + service_wait_exit(SERVICE_MAINTENANCE, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); - timeout = !service_wait_exit( - SERVICE_COLLECTORS - | SERVICE_STREAMING - , 3 * USEC_PER_SEC); + service_wait_exit(SERVICE_EXPORTERS | SERVICE_HEALTH | SERVICE_WEB_SERVER | SERVICE_HTTPD, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); - delta_shutdown_time("stop replication threads"); + service_wait_exit(SERVICE_COLLECTORS | SERVICE_STREAMING, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); - timeout = !service_wait_exit( - SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL - , 3 * USEC_PER_SEC); - - delta_shutdown_time("prepare metasync shutdown"); + service_wait_exit(SERVICE_REPLICATION, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); metadata_sync_shutdown_prepare(); - - delta_shutdown_time("disable ML detection and training threads"); + watcher_step_complete(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); ml_stop_threads(); ml_fini(); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); - delta_shutdown_time("stop context thread"); - - timeout = !service_wait_exit( - SERVICE_CONTEXT - , 3 * USEC_PER_SEC); - - delta_shutdown_time("stop maintenance thread"); - - timeout = !service_wait_exit( - SERVICE_MAINTENANCE - , 3 * USEC_PER_SEC); - - delta_shutdown_time("clear web client cache"); + service_wait_exit(SERVICE_CONTEXT, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); web_client_cache_destroy(); + watcher_step_complete(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); - delta_shutdown_time("clean rrdhost database"); - - rrdhost_cleanup_all(); - - delta_shutdown_time("stop aclk threads"); - - timeout = !service_wait_exit( - SERVICE_ACLK - , 3 * USEC_PER_SEC); + service_wait_exit(SERVICE_ACLK, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ACLK_THREADS); - delta_shutdown_time("stop all remaining worker threads"); - - timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC); - - delta_shutdown_time("cancel main threads"); + service_wait_exit(~0, 10 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); cancel_main_threads(); + watcher_step_complete(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + + if (ret) + { + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); - if(!ret) { + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } + else + { // exit cleanly #ifdef ENABLE_DBENGINE if(dbengine_enabled) { - delta_shutdown_time("flush dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_prepare_exit(multidb_ctx[tier]); @@ -433,21 +399,16 @@ void netdata_cleanup_and_exit(int ret) { } } #endif + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); - // free the database - delta_shutdown_time("stop collection for all hosts"); - - // rrdhost_free_all(); rrd_finalize_collection_for_all_hosts(); - - delta_shutdown_time("stop metasync threads"); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); metadata_sync_shutdown(); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); #ifdef ENABLE_DBENGINE if(dbengine_enabled) { - delta_shutdown_time("wait for dbengine collectors to finish"); - size_t running = 1; size_t count = 10; while(running && count) { @@ -455,62 +416,71 @@ void netdata_cleanup_and_exit(int ret) { for (size_t tier = 0; tier < storage_tiers; tier++) running += rrdeng_collectors_running(multidb_ctx[tier]); - if(running) { + if (running) { nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS); - nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, - "waiting for %zu collectors to finish", running); - // sleep_usec(100 * USEC_PER_MS); - cleanup_destroyed_dictionaries(); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, "waiting for %zu collectors to finish", running); } count--; } - - delta_shutdown_time("wait for dbengine main cache to finish flushing"); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); while (pgc_hot_and_dirty_entries(main_cache)) { pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL); sleep_usec(100 * USEC_PER_MS); } + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); - delta_shutdown_time("stop dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit(multidb_ctx[tier]); - rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } else { + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); } +#else + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); #endif } - delta_shutdown_time("close SQL context db"); - - sql_close_context_database(); - - delta_shutdown_time("closed SQL main db"); + sqlite_close_databases(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + sqlite_library_shutdown(); - sql_close_database(); // unlink the pid if(pidfile[0]) { - delta_shutdown_time("remove pid file"); - if(unlink(pidfile) != 0) netdata_log_error("EXIT: cannot unlink pidfile '%s'.", pidfile); } + watcher_step_complete(WATCHER_STEP_ID_REMOVE_PID_FILE); #ifdef ENABLE_HTTPS - delta_shutdown_time("free openssl structures"); netdata_ssl_cleanup(); #endif - - delta_shutdown_time("remove incomplete shutdown file"); + watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); (void) unlink(agent_incomplete_shutdown_file); - - delta_shutdown_time("exit"); - - usec_t ended_ut = now_monotonic_usec(); - netdata_log_info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS); + watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + watcher_shutdown_end(); + watcher_thread_stop(); + +#ifdef ENABLE_SENTRY + if (ret) + abort(); + else { + sentry_native_fini(); + exit(ret); + } +#else exit(ret); +#endif } void web_server_threading_selection(void) { @@ -684,6 +654,9 @@ static void set_nofile_limit(struct rlimit *rl) { void cancel_main_threads() { nd_log_limits_unlimited(); + if (!static_threads) + return; + int i, found = 0; usec_t max = 5 * USEC_PER_SEC, step = 100000; for (i = 0; static_threads[i].name != NULL ; i++) { @@ -707,8 +680,14 @@ void cancel_main_threads() { sleep_usec(step); found = 0; for (i = 0; static_threads[i].name != NULL ; i++) { - if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) - found++; + if (static_threads[i].enabled == NETDATA_MAIN_THREAD_EXITED) + continue; + + // Don't wait ourselves. + if (static_threads[i].thread && (*static_threads[i].thread == pthread_self())) + continue; + + found++; } } @@ -725,23 +704,28 @@ void cancel_main_threads() { freez(static_threads[i].thread); freez(static_threads); + static_threads = NULL; } -struct option_def option_definitions[] = { - // opt description arg name default value - { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, - { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, - { 'd', "Fork. Run in the background.", NULL, "run in the background"}, - { 'h', "Display this help message.", NULL, NULL}, - { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, - { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, - { 'p', "API/Web port to use.", "port", "19999"}, - { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, - { 't', "The internal clock of netdata.", "seconds", "1"}, - { 'u', "Run as user.", "username", "netdata"}, - { 'v', "Print netdata version and exit.", NULL, NULL}, - { 'V', "Print netdata version and exit.", NULL, NULL}, - { 'W', "See Advanced options below.", "options", NULL}, +static const struct option_def { + const char val; + const char *description; + const char *arg_name; + const char *default_value; +} option_definitions[] = { + {'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, + {'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, + {'d', "Fork. Run in the background.", NULL, "run in the background"}, + {'h', "Display this help message.", NULL, NULL}, + {'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, + {'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, + {'p', "API/Web port to use.", "port", "19999"}, + {'s', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, + {'t', "The internal clock of netdata.", "seconds", "1"}, + {'u', "Run as user.", "username", "netdata"}, + {'v', "Print netdata version and exit.", NULL, NULL}, + {'V', "Print netdata version and exit.", NULL, NULL}, + {'W', "See Advanced options below.", "options", NULL}, }; int help(int exitcode) { @@ -836,7 +820,6 @@ int help(int exitcode) { fprintf(stream, "\n Signals netdata handles:\n\n" " - HUP Close and reopen log files.\n" - " - USR1 Save internal DB to disk.\n" " - USR2 Reload health configuration.\n" "\n" ); @@ -1070,12 +1053,6 @@ static void backwards_compatible_config() { config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", CONFIG_SECTION_DB, "cleanup orphan hosts after secs"); - config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", - CONFIG_SECTION_DB, "delete obsolete charts files"); - - config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", - CONFIG_SECTION_DB, "delete orphan hosts files"); - config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", CONFIG_SECTION_DB, "enable zero metrics"); @@ -1102,7 +1079,7 @@ static int get_hostname(char *buf, size_t buf_size) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix); - if (!read_file(filename, buf, buf_size)) { + if (!read_txt_file(filename, buf, buf_size)) { trim(buf); return 0; } @@ -1185,10 +1162,13 @@ static void get_netdata_configured_variables() { // ------------------------------------------------------------------------ // get default Database Engine page type - const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw"); - if (strcmp(page_type, "gorilla") == 0) { - tier_page_type[0] = PAGE_GORILLA_METRICS; - } else if (strcmp(page_type, "raw") != 0) { + const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); + if (strcmp(page_type, "gorilla") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_GORILLA_32BIT; + else if (strcmp(page_type, "raw") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; + else { + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type); } @@ -1227,7 +1207,7 @@ static void get_netdata_configured_variables() { #else if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead."); - default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; } #endif @@ -1371,12 +1351,6 @@ int get_system_info(struct rrdhost_system_info *system_info) { return 0; } -void set_silencers_filename() { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir); - silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename); -} - /* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST* be set in this procedure to be called in all the relevant code paths. */ @@ -1401,6 +1375,25 @@ void replication_initialize(void); void bearer_tokens_init(void); int unittest_rrdpush_compressions(void); int uuid_unittest(void); +int progress_unittest(void); +int dyncfg_unittest(void); + +int unittest_prepare_rrd(char **user) { + post_conf_load(user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + health_plugin_disable(); + storage_tiers = 1; + registry_init(); + if(rrd_init("unittest", NULL, true)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + default_rrdpush_enabled = 0; + + return 0; +} int main(int argc, char **argv) { // initialize the system clocks @@ -1508,66 +1501,53 @@ int main(int argc, char **argv) { #endif if(strcmp(optarg, "sqlite-meta-recover") == 0) { - sql_init_database(DB_CHECK_RECOVER, 0); + sql_init_meta_database(DB_CHECK_RECOVER, 0); return 0; } if(strcmp(optarg, "sqlite-compact") == 0) { - sql_init_database(DB_CHECK_RECLAIM_SPACE, 0); + sql_init_meta_database(DB_CHECK_RECLAIM_SPACE, 0); return 0; } if(strcmp(optarg, "sqlite-analyze") == 0) { - sql_init_database(DB_CHECK_ANALYZE, 0); + sql_init_meta_database(DB_CHECK_ANALYZE, 0); return 0; } if(strcmp(optarg, "unittest") == 0) { unittest_running = true; - if (pluginsd_parser_unittest()) - return 1; + // set defaults for dbegnine unittest + config_set(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); + default_rrdeng_disk_quota_mb = default_multidb_disk_quota_mb = 256; - if (unit_test_static_threads()) - return 1; - if (unit_test_buffer()) - return 1; - if (unit_test_str2ld()) - return 1; - if (buffer_unittest()) - return 1; - if (unit_test_bitmaps()) + if (sqlite_library_init()) return 1; + + if (pluginsd_parser_unittest()) return 1; + if (unit_test_static_threads()) return 1; + if (unit_test_buffer()) return 1; + if (unit_test_str2ld()) return 1; + if (buffer_unittest()) return 1; + if (unit_test_bitmaps()) return 1; + // No call to load the config file on this code-path - post_conf_load(&user); - get_netdata_configured_variables(); - default_rrd_update_every = 1; - default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; - default_health_enabled = 0; - storage_tiers = 1; - registry_init(); - if(rrd_init("unittest", NULL, true)) { - fprintf(stderr, "rrd_init failed for unittest\n"); - return 1; - } - default_rrdpush_enabled = 0; - if(run_all_mockup_tests()) return 1; - if(unit_test_storage()) return 1; + if (unittest_prepare_rrd(&user)) return 1; + if (run_all_mockup_tests()) return 1; + if (unit_test_storage()) return 1; #ifdef ENABLE_DBENGINE - if(test_dbengine()) return 1; + if (test_dbengine()) return 1; #endif - if(test_sqlite()) return 1; - if(string_unittest(10000)) return 1; - if (dictionary_unittest(10000)) - return 1; - if(aral_unittest(10000)) - return 1; - if (rrdlabels_unittest()) - return 1; - if (ctx_unittest()) - return 1; - if (uuid_unittest()) - return 1; + if (test_sqlite()) return 1; + if (string_unittest(10000)) return 1; + if (dictionary_unittest(10000)) return 1; + if (aral_unittest(10000)) return 1; + if (rrdlabels_unittest()) return 1; + if (ctx_unittest()) return 1; + if (uuid_unittest()) return 1; + if (dyncfg_unittest()) return 1; + sqlite_library_shutdown(); fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); return 0; } @@ -1631,6 +1611,16 @@ int main(int argc, char **argv) { unittest_running = true; return unittest_rrdpush_compressions(); } + else if(strcmp(optarg, "progresstest") == 0) { + unittest_running = true; + return progress_unittest(); + } + else if(strcmp(optarg, "dyncfgtest") == 0) { + unittest_running = true; + if(unittest_prepare_rrd(&user)) + return 1; + return dyncfg_unittest(); + } else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { optarg += strlen(createdataset_string); unsigned history_seconds = strtoul(optarg, NULL, 0); @@ -1889,12 +1879,14 @@ int main(int argc, char **argv) { for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); } - if(!config_loaded) { load_netdata_conf(NULL, 0, &user); load_cloud_conf(0); } + // @stelfrag: Where is the right place to call this? + watcher_thread_start(); + // ------------------------------------------------------------------------ // initialize netdata { @@ -2005,7 +1997,7 @@ int main(int argc, char **argv) { // -------------------------------------------------------------------- // This is the safest place to start the SILENCERS structure - set_silencers_filename(); + health_set_silencers_filename(); health_initialize_global_silencers(); // // -------------------------------------------------------------------- @@ -2031,6 +2023,9 @@ int main(int argc, char **argv) { // setup threads configs default_stacksize = netdata_threads_init(); + // musl default thread stack size is 128k, let's set it to a higher value to avoid random crashes + if (default_stacksize < 1 * 1024 * 1024) + default_stacksize = 1 * 1024 * 1024; #ifdef NETDATA_INTERNAL_CHECKS config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true); @@ -2071,6 +2066,8 @@ int main(int argc, char **argv) { exit(1); } } + if (sqlite_library_init()) + fatal("Failed to initialize sqlite library"); // -------------------------------------------------------------------- // Initialize ML configuration @@ -2080,9 +2077,9 @@ int main(int argc, char **argv) { #ifdef ENABLE_H2O delta_startup_time("initialize h2o server"); - for (int i = 0; static_threads[i].name; i++) { - if (static_threads[i].start_routine == h2o_main) - static_threads[i].enabled = httpd_is_enabled(); + for (int t = 0; static_threads[t].name; t++) { + if (static_threads[t].start_routine == h2o_main) + static_threads[t].enabled = httpd_is_enabled(); } #endif } @@ -2108,6 +2105,11 @@ int main(int argc, char **argv) { if(become_daemon(dont_fork, user) == -1) fatal("Cannot daemonize myself."); + // init sentry +#ifdef ENABLE_SENTRY + sentry_native_init(); +#endif + // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME". struct passwd *pw = getpwuid(getuid()); if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) { @@ -2118,7 +2120,7 @@ int main(int argc, char **argv) { setenv("HOME", netdata_configured_home_dir, 1); - dyn_conf_init(); + dyncfg_init(true); netdata_log_info("netdata started on pid %d.", getpid()); @@ -2171,7 +2173,7 @@ int main(int argc, char **argv) { int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); int crash_detected = (unlink(agent_crash_file) == 0); - int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444); + int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 444); if (fd >= 0) close(fd); @@ -2230,11 +2232,16 @@ int main(int argc, char **argv) { netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS); netdata_ready = true; - send_statistics("START", "-", "-"); - if (crash_detected) - send_statistics("CRASH", "-", "-"); - if (incomplete_shutdown_detected) - send_statistics("INCOMPLETE_SHUTDOWN", "-", "-"); + analytics_statistic_t start_statistic = { "START", "-", "-" }; + analytics_statistic_send(&start_statistic); + if (crash_detected) { + analytics_statistic_t crash_statistic = { "CRASH", "-", "-" }; + analytics_statistic_send(&crash_statistic); + } + if (incomplete_shutdown_detected) { + analytics_statistic_t incomplete_shutdown_statistic = { "INCOMPLETE_SHUTDOWN", "-", "-" }; + analytics_statistic_send(&incomplete_shutdown_statistic); + } //check if ANALYTICS needs to start if (netdata_anonymous_statistics_enabled == 1) { @@ -2256,7 +2263,9 @@ int main(int argc, char **argv) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized - send_statistics("ACLK_DISABLED", "-", "-"); + analytics_statistic_t statistic = { "ACLK_DISABLED", "-", "-" }; + analytics_statistic_send(&statistic); + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); if (fd == -1) netdata_log_error("Cannot create file '%s'. Please fix this.", filename); |