diff options
Diffstat (limited to 'libnetdata/log')
-rw-r--r-- | libnetdata/log/README.md | 10 | ||||
-rw-r--r-- | libnetdata/log/log.c | 110 | ||||
-rw-r--r-- | libnetdata/log/log.h | 19 |
3 files changed, 98 insertions, 41 deletions
diff --git a/libnetdata/log/README.md b/libnetdata/log/README.md index a767dd44..5f9e5bc7 100644 --- a/libnetdata/log/README.md +++ b/libnetdata/log/README.md @@ -1,5 +1,15 @@ <!-- +title "Log" custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/log/README.md +sidebar_label: "Log" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata libraries" --> +# Log + +The netdata log library supports debug, info, error and fatal error logging. +By default we have an access log, an error log and a collectors log. + diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c index fb3b2d03..1dcdba9c 100644 --- a/libnetdata/log/log.c +++ b/libnetdata/log/log.c @@ -14,6 +14,7 @@ uint64_t debug_flags = 0; int access_log_syslog = 1; int error_log_syslog = 1; +int collector_log_syslog = 1; int output_log_syslog = 1; // debug log int health_log_syslog = 1; @@ -23,11 +24,15 @@ FILE *stdaccess = NULL; int stdhealth_fd = -1; FILE *stdhealth = NULL; +int stdcollector_fd = -1; +FILE *stderror = NULL; + const char *stdaccess_filename = NULL; const char *stderr_filename = NULL; const char *stdout_filename = NULL; const char *facility_log = NULL; const char *stdhealth_filename = NULL; +const char *stdcollector_filename = NULL; #ifdef ENABLE_ACLK const char *aclklog_filename = NULL; @@ -573,8 +578,14 @@ void reopen_all_log_files() { if(stdout_filename) open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); - if(stderr_filename) - open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + if(stdcollector_filename) + open_log_file(STDERR_FILENO, stderr, stdcollector_filename, &collector_log_syslog, 0, NULL); + + if(stderr_filename) { + log_lock(); + stderror = open_log_file(stdcollector_fd, stderror, stderr_filename, &error_log_syslog, 1, &stdcollector_fd); + log_unlock(); + } #ifdef ENABLE_ACLK if (aclklog_enabled) @@ -593,7 +604,11 @@ void open_all_log_files() { open_log_file(STDIN_FILENO, stdin, "/dev/null", NULL, 0, NULL); open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); - open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + open_log_file(STDERR_FILENO, stderr, stdcollector_filename, &collector_log_syslog, 0, NULL); + + log_lock(); + stderror = open_log_file(stdcollector_fd, NULL, stderr_filename, &error_log_syslog, 1, &stdcollector_fd); + log_unlock(); #ifdef ENABLE_ACLK if(aclklog_enabled) @@ -616,7 +631,9 @@ int error_log_limit(int reset) { static time_t start = 0; static unsigned long counter = 0, prevented = 0; - // fprintf(stderr, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); + FILE *fp = (!stderror) ? stderr : stderror; + + // fprintf(fp, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); // do not throttle if the period is 0 if(error_log_throttle_period == 0) @@ -638,7 +655,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + fp, "%s: %s LOG FLOOD PROTECTION reset for process '%s' " "(prevented %lu logs in the last %"PRId64" seconds).\n", date, @@ -661,7 +678,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + fp, "%s: %s LOG FLOOD PROTECTION resuming logging from process '%s' " "(prevented %lu logs in the last %"PRId64" seconds).\n", date, @@ -685,7 +702,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + fp, "%s: %s LOG FLOOD PROTECTION too many logs (%lu logs in %"PRId64" seconds, threshold is set to %lu logs " "in %"PRId64" seconds). Preventing more logs from process '%s' for %"PRId64" seconds.\n", date, @@ -758,9 +775,10 @@ void debug_int( const char *file, const char *function, const unsigned long line // ---------------------------------------------------------------------------- // info log -void info_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) +void info_int( int is_collector, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { va_list args; + FILE *fp = (is_collector || !stderror) ? stderr : stderror; log_lock(); @@ -770,7 +788,7 @@ void info_int( const char *file __maybe_unused, const char *function __maybe_unu return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_INFO, fmt, args ); va_end( args ); @@ -781,14 +799,15 @@ void info_int( const char *file __maybe_unused, const char *function __maybe_unu va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); + fprintf(fp, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); } @@ -819,6 +838,8 @@ static const char *strerror_result_string(const char *a, const char *b) { (void) #endif void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + FILE *fp = (!stderror) ? stderr : stderror; + if(erl->sleep_ut) sleep_usec(erl->sleep_ut); @@ -842,7 +863,7 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_ERR, fmt, args ); va_end( args ); @@ -853,26 +874,29 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, prefix, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); + fprintf(fp, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); if(erl->count > 1) - fprintf(stderr, " (similar messages repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); + fprintf(fp, " (similar messages repeated %zu times in the last %llu secs)", + erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); if(erl->sleep_ut) - fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); + fprintf(fp, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); if(__errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + fprintf(fp, + " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); errno = 0; } else - fputc('\n', stderr); + fputc('\n', fp); erl->last_logged = now; erl->count = 0; @@ -880,9 +904,10 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma log_unlock(); } -void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { +void error_int(int is_collector, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { // save a copy of errno - just in case this function generates a new error int __errno = errno; + FILE *fp = (is_collector || !stderror) ? stderr : stderror; va_list args; @@ -894,7 +919,7 @@ void error_int(const char *prefix, const char *file __maybe_unused, const char * return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_ERR, fmt, args ); va_end( args ); @@ -905,20 +930,22 @@ void error_int(const char *prefix, const char *file __maybe_unused, const char * va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, prefix, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); + fprintf(fp, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); if(__errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + fprintf(fp, + " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); errno = 0; } else - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); } @@ -933,23 +960,27 @@ static void crash_netdata(void) { #ifdef HAVE_BACKTRACE #define BT_BUF_SIZE 100 static void print_call_stack(void) { + FILE *fp = (!stderror) ? stderr : stderror; + int nptrs; void *buffer[BT_BUF_SIZE]; nptrs = backtrace(buffer, BT_BUF_SIZE); if(nptrs) - backtrace_symbols_fd(buffer, nptrs, fileno(stderr)); + backtrace_symbols_fd(buffer, nptrs, fileno(fp)); } #endif void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { + FILE *fp = (!stderror) ? stderr : stderror; + // save a copy of errno - just in case this function generates a new error int __errno = errno; va_list args; const char *thread_tag; char os_threadname[NETDATA_THREAD_NAME_MAX + 1]; - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_CRIT, fmt, args ); va_end( args ); @@ -970,15 +1001,16 @@ void fatal_int( const char *file, const char *function, const unsigned long line va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); + fprintf(fp, + "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); #else - fprintf(stderr, "%s: %s FATAL : %s : ", date, program_name, thread_tag); + fprintf(fp, "%s: %s FATAL : %s : ", date, program_name, thread_tag); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); perror(" # "); - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); @@ -986,7 +1018,15 @@ void fatal_int( const char *file, const char *function, const unsigned long line snprintfz(action_data, 70, "%04lu@%-10.10s:%-15.15s/%d", line, file, function, __errno); char action_result[60+1]; - snprintfz(action_result, 60, "%s:%s", program_name, strncmp(thread_tag, "STREAM_RECEIVER", strlen("STREAM_RECEIVER")) ? thread_tag : "[x]"); + const char *tag_to_send = thread_tag; + + // anonymize thread names + if(strncmp(thread_tag, THREAD_TAG_STREAM_RECEIVER, strlen(THREAD_TAG_STREAM_RECEIVER)) == 0) + tag_to_send = THREAD_TAG_STREAM_RECEIVER; + if(strncmp(thread_tag, THREAD_TAG_STREAM_SENDER, strlen(THREAD_TAG_STREAM_SENDER)) == 0) + tag_to_send = THREAD_TAG_STREAM_SENDER; + + snprintfz(action_result, 60, "%s:%s", program_name, tag_to_send); send_statistics("FATAL", action_result, action_data); #ifdef HAVE_BACKTRACE diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h index 11dab4c1..3d9f0927 100644 --- a/libnetdata/log/log.h +++ b/libnetdata/log/log.h @@ -61,10 +61,14 @@ extern FILE *stdaccess; extern int stdhealth_fd; extern FILE *stdhealth; +extern int stdcollector_fd; +extern FILE *stderror; + extern const char *stdaccess_filename; extern const char *stderr_filename; extern const char *stdout_filename; extern const char *stdhealth_filename; +extern const char *stdcollector_filename; extern const char *facility_log; #ifdef ENABLE_ACLK @@ -106,7 +110,7 @@ typedef struct error_with_limit { #ifdef NETDATA_INTERNAL_CHECKS #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) -#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#define internal_error(condition, args...) do { if(unlikely(condition)) error_int(0, "IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #define internal_fatal(condition, args...) do { if(unlikely(condition)) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #else #define debug(type, args...) debug_dummy() @@ -114,17 +118,20 @@ typedef struct error_with_limit { #define internal_fatal(args...) debug_dummy() #endif -#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) -#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args) -#define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define info(args...) info_int(0, __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_info(args...) info_int(1, __FILE__, __FUNCTION__, __LINE__, ##args) +#define infoerr(args...) error_int(0, "INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error(args...) error_int(0, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_infoerr(args...) error_int(1, "INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_error(args...) error_int(1, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr)) void send_statistics(const char *action, const char *action_result, const char *action_data); void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void info_int( int is_collector, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void error_int( int is_collector, const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(6, 7); void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);; void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); |