diff options
Diffstat (limited to 'libnetdata/log')
-rw-r--r-- | libnetdata/log/log.c | 166 | ||||
-rw-r--r-- | libnetdata/log/log.h | 45 |
2 files changed, 184 insertions, 27 deletions
diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c index d6793b69..fb3b2d03 100644 --- a/libnetdata/log/log.c +++ b/libnetdata/log/log.c @@ -3,6 +3,10 @@ #include <daemon/main.h> #include "../libnetdata.h" +#ifdef HAVE_BACKTRACE +#include <execinfo.h> +#endif + int web_server_is_multithreaded = 1; const char *program_name = ""; @@ -11,14 +15,19 @@ uint64_t debug_flags = 0; int access_log_syslog = 1; int error_log_syslog = 1; int output_log_syslog = 1; // debug log +int health_log_syslog = 1; int stdaccess_fd = -1; FILE *stdaccess = NULL; +int stdhealth_fd = -1; +FILE *stdhealth = NULL; + const char *stdaccess_filename = NULL; const char *stderr_filename = NULL; const char *stdout_filename = NULL; const char *facility_log = NULL; +const char *stdhealth_filename = NULL; #ifdef ENABLE_ACLK const char *aclklog_filename = NULL; @@ -451,16 +460,13 @@ void syslog_init() { } } -#define LOG_DATE_LENGTH 26 - -static inline void log_date(char *buffer, size_t len) { +void log_date(char *buffer, size_t len, time_t now) { if(unlikely(!buffer || !len)) return; - time_t t; + time_t t = now; struct tm *tmp, tmbuf; - t = now_realtime_sec(); tmp = localtime_r(&t, &tmbuf); if (tmp == NULL) { @@ -576,7 +582,10 @@ void reopen_all_log_files() { #endif if(stdaccess_filename) - stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd); + stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd); + + if(stdhealth_filename) + stdhealth = open_log_file(stdhealth_fd, stdhealth, stdhealth_filename, &health_log_syslog, 1, &stdhealth_fd); } void open_all_log_files() { @@ -592,6 +601,8 @@ void open_all_log_files() { #endif stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd); + + stdhealth = open_log_file(stdhealth_fd, stdhealth, stdhealth_filename, &health_log_syslog, 1, &stdhealth_fd); } // ---------------------------------------------------------------------------- @@ -625,7 +636,7 @@ int error_log_limit(int reset) { if(reset) { if(prevented) { char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( stderr, "%s: %s LOG FLOOD PROTECTION reset for process '%s' " @@ -648,7 +659,7 @@ int error_log_limit(int reset) { if(now - start > error_log_throttle_period) { if(prevented) { char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( stderr, "%s: %s LOG FLOOD PROTECTION resuming logging from process '%s' " @@ -672,7 +683,7 @@ int error_log_limit(int reset) { if(counter > error_log_errors_per_period) { if(!prevented) { char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( stderr, "%s: %s LOG FLOOD PROTECTION too many logs (%lu logs in %"PRId64" seconds, threshold is set to %lu logs " @@ -727,7 +738,7 @@ void debug_int( const char *file, const char *function, const unsigned long line va_list args; char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); va_start( args, fmt ); printf("%s: %s DEBUG : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); @@ -747,7 +758,7 @@ void debug_int( const char *file, const char *function, const unsigned long line // ---------------------------------------------------------------------------- // info log -void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) +void info_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { va_list args; @@ -766,7 +777,7 @@ void info_int( const char *file, const char *function, const unsigned long line, } char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS @@ -807,7 +818,10 @@ static const char *strerror_result_string(const char *a, const char *b) { (void) #error "cannot detect the format of function strerror_r()" #endif -void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { +void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + if(erl->sleep_ut) + sleep_usec(erl->sleep_ut); + // save a copy of errno - just in case this function generates a new error int __errno = errno; @@ -815,6 +829,13 @@ void error_int( const char *prefix, const char *file, const char *function, cons log_lock(); + erl->count++; + time_t now = now_boottime_sec(); + if(now - erl->last_logged < erl->log_every) { + log_unlock(); + return; + } + // prevent logging too much if (error_log_limit(0)) { log_unlock(); @@ -828,7 +849,7 @@ void error_int( const char *prefix, const char *file, const char *function, cons } char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS @@ -839,6 +860,12 @@ void error_int( const char *prefix, const char *file, const char *function, cons vfprintf( stderr, fmt, args ); va_end( args ); + if(erl->count > 1) + fprintf(stderr, " (similar messages repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); + + if(erl->sleep_ut) + fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); + if(__errno) { char buf[1024]; fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); @@ -847,9 +874,74 @@ void error_int( const char *prefix, const char *file, const char *function, cons else fputc('\n', stderr); + erl->last_logged = now; + erl->count = 0; + log_unlock(); } +void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + // save a copy of errno - just in case this function generates a new error + int __errno = errno; + + va_list args; + + log_lock(); + + // prevent logging too much + if (error_log_limit(0)) { + log_unlock(); + return; + } + + if(error_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_ERR, fmt, args ); + va_end( args ); + } + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + va_start( args, fmt ); +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); +#else + fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); +#endif + vfprintf( stderr, fmt, args ); + va_end( args ); + + if(__errno) { + char buf[1024]; + fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + errno = 0; + } + else + fputc('\n', stderr); + + log_unlock(); +} + +#ifdef NETDATA_INTERNAL_CHECKS +static void crash_netdata(void) { + // make Netdata core dump + abort(); +} +#endif + +#ifdef HAVE_BACKTRACE +#define BT_BUF_SIZE 100 +static void print_call_stack(void) { + int nptrs; + void *buffer[BT_BUF_SIZE]; + + nptrs = backtrace(buffer, BT_BUF_SIZE); + if(nptrs) + backtrace_symbols_fd(buffer, nptrs, fileno(stderr)); +} +#endif + void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { // save a copy of errno - just in case this function generates a new error int __errno = errno; @@ -872,7 +964,7 @@ void fatal_int( const char *file, const char *function, const unsigned long line } char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); log_lock(); @@ -897,6 +989,14 @@ void fatal_int( const char *file, const char *function, const unsigned long line snprintfz(action_result, 60, "%s:%s", program_name, strncmp(thread_tag, "STREAM_RECEIVER", strlen("STREAM_RECEIVER")) ? thread_tag : "[x]"); send_statistics("FATAL", action_result, action_data); +#ifdef HAVE_BACKTRACE + print_call_stack(); +#endif + +#ifdef NETDATA_INTERNAL_CHECKS + crash_netdata(); +#endif + netdata_cleanup_and_exit(1); } @@ -919,7 +1019,7 @@ void log_access( const char *fmt, ... ) { netdata_mutex_lock(&access_mutex); char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf(stdaccess, "%s: ", date); va_start( args, fmt ); @@ -932,6 +1032,38 @@ void log_access( const char *fmt, ... ) { } } +// ---------------------------------------------------------------------------- +// health log + +void log_health( const char *fmt, ... ) { + va_list args; + + if(health_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_INFO, fmt, args ); + va_end( args ); + } + + if(stdhealth) { + static netdata_mutex_t health_mutex = NETDATA_MUTEX_INITIALIZER; + + if(web_server_is_multithreaded) + netdata_mutex_lock(&health_mutex); + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf(stdhealth, "%s: ", date); + + va_start( args, fmt ); + vfprintf( stdhealth, fmt, args ); + va_end( args ); + fputc('\n', stdhealth); + + if(web_server_is_multithreaded) + netdata_mutex_unlock(&health_mutex); + } +} + #ifdef ENABLE_ACLK void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name) { if (aclklog) { @@ -939,7 +1071,7 @@ void log_aclk_message_bin( const char *data, const size_t data_len, int tx, cons netdata_mutex_lock(&aclklog_mutex); char date[LOG_DATE_LENGTH]; - log_date(date, LOG_DATE_LENGTH); + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf(aclklog, "%s: %s Msg:\"%s\", MQTT-topic:\"%s\": ", date, tx ? "OUTGOING" : "INCOMING", message_name, mqtt_topic); fwrite(data, data_len, 1, aclklog); diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h index ae86720c..11dab4c1 100644 --- a/libnetdata/log/log.h +++ b/libnetdata/log/log.h @@ -45,6 +45,8 @@ extern "C" { #define D_ACLK 0x0000000200000000 #define D_METADATALOG 0x0000000400000000 #define D_ACLK_SYNC 0x0000000800000000 +#define D_META_SYNC 0x0000001000000000 +#define D_REPLICATION 0x0000002000000000 #define D_SYSTEM 0x8000000000000000 extern int web_server_is_multithreaded; @@ -56,9 +58,13 @@ extern const char *program_name; extern int stdaccess_fd; extern FILE *stdaccess; +extern int stdhealth_fd; +extern FILE *stdhealth; + extern const char *stdaccess_filename; extern const char *stderr_filename; extern const char *stdout_filename; +extern const char *stdhealth_filename; extern const char *facility_log; #ifdef ENABLE_ACLK @@ -71,42 +77,61 @@ extern int aclklog_enabled; extern int access_log_syslog; extern int error_log_syslog; extern int output_log_syslog; +extern int health_log_syslog; extern time_t error_log_throttle_period; extern unsigned long error_log_errors_per_period, error_log_errors_per_period_backup; -extern int error_log_limit(int reset); +int error_log_limit(int reset); + +void open_all_log_files(); +void reopen_all_log_files(); -extern void open_all_log_files(); -extern void reopen_all_log_files(); +#define LOG_DATE_LENGTH 26 +void log_date(char *buffer, size_t len, time_t now); static inline void debug_dummy(void) {} void error_log_limit_reset(void); void error_log_limit_unlimited(void); +typedef struct error_with_limit { + time_t log_every; + size_t count; + time_t last_logged; + usec_t sleep_ut; +} ERROR_LIMIT; + +#define error_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } +#define error_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } + #ifdef NETDATA_INTERNAL_CHECKS #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#define internal_fatal(condition, args...) do { if(unlikely(condition)) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #else #define debug(type, args...) debug_dummy() #define internal_error(args...) debug_dummy() +#define internal_fatal(args...) debug_dummy() #endif #define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args) #define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr)) -extern void send_statistics(const char *action, const char *action_result, const char *action_data); -extern void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -extern void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -extern void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); -extern void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); -extern void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); +void send_statistics(const char *action, const char *action_result, const char *action_data); +void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); +void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); +void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);; +void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); +void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); +void log_health( const char *fmt, ... ) PRINTFLIKE(1, 2); #ifdef ENABLE_ACLK -extern void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name); +void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name); #endif # ifdef __cplusplus |