summaryrefslogtreecommitdiffstats
path: root/libnetdata/log
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2022-11-30 18:47:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2022-11-30 18:47:05 +0000
commit97e01009d69b8fbebfebf68f51e3d126d0ed43fc (patch)
tree02e8b836c3a9d89806f3e67d4a5fe9f52dbb0061 /libnetdata/log
parentReleasing debian version 1.36.1-1. (diff)
downloadnetdata-97e01009d69b8fbebfebf68f51e3d126d0ed43fc.tar.xz
netdata-97e01009d69b8fbebfebf68f51e3d126d0ed43fc.zip
Merging upstream version 1.37.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'libnetdata/log')
-rw-r--r--libnetdata/log/log.c166
-rw-r--r--libnetdata/log/log.h45
2 files changed, 184 insertions, 27 deletions
diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c
index d6793b69b..fb3b2d034 100644
--- a/libnetdata/log/log.c
+++ b/libnetdata/log/log.c
@@ -3,6 +3,10 @@
#include <daemon/main.h>
#include "../libnetdata.h"
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#endif
+
int web_server_is_multithreaded = 1;
const char *program_name = "";
@@ -11,14 +15,19 @@ uint64_t debug_flags = 0;
int access_log_syslog = 1;
int error_log_syslog = 1;
int output_log_syslog = 1; // debug log
+int health_log_syslog = 1;
int stdaccess_fd = -1;
FILE *stdaccess = NULL;
+int stdhealth_fd = -1;
+FILE *stdhealth = NULL;
+
const char *stdaccess_filename = NULL;
const char *stderr_filename = NULL;
const char *stdout_filename = NULL;
const char *facility_log = NULL;
+const char *stdhealth_filename = NULL;
#ifdef ENABLE_ACLK
const char *aclklog_filename = NULL;
@@ -451,16 +460,13 @@ void syslog_init() {
}
}
-#define LOG_DATE_LENGTH 26
-
-static inline void log_date(char *buffer, size_t len) {
+void log_date(char *buffer, size_t len, time_t now) {
if(unlikely(!buffer || !len))
return;
- time_t t;
+ time_t t = now;
struct tm *tmp, tmbuf;
- t = now_realtime_sec();
tmp = localtime_r(&t, &tmbuf);
if (tmp == NULL) {
@@ -576,7 +582,10 @@ void reopen_all_log_files() {
#endif
if(stdaccess_filename)
- stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd);
+ stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd);
+
+ if(stdhealth_filename)
+ stdhealth = open_log_file(stdhealth_fd, stdhealth, stdhealth_filename, &health_log_syslog, 1, &stdhealth_fd);
}
void open_all_log_files() {
@@ -592,6 +601,8 @@ void open_all_log_files() {
#endif
stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd);
+
+ stdhealth = open_log_file(stdhealth_fd, stdhealth, stdhealth_filename, &health_log_syslog, 1, &stdhealth_fd);
}
// ----------------------------------------------------------------------------
@@ -625,7 +636,7 @@ int error_log_limit(int reset) {
if(reset) {
if(prevented) {
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
fprintf(
stderr,
"%s: %s LOG FLOOD PROTECTION reset for process '%s' "
@@ -648,7 +659,7 @@ int error_log_limit(int reset) {
if(now - start > error_log_throttle_period) {
if(prevented) {
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
fprintf(
stderr,
"%s: %s LOG FLOOD PROTECTION resuming logging from process '%s' "
@@ -672,7 +683,7 @@ int error_log_limit(int reset) {
if(counter > error_log_errors_per_period) {
if(!prevented) {
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
fprintf(
stderr,
"%s: %s LOG FLOOD PROTECTION too many logs (%lu logs in %"PRId64" seconds, threshold is set to %lu logs "
@@ -727,7 +738,7 @@ void debug_int( const char *file, const char *function, const unsigned long line
va_list args;
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
va_start( args, fmt );
printf("%s: %s DEBUG : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function);
@@ -747,7 +758,7 @@ void debug_int( const char *file, const char *function, const unsigned long line
// ----------------------------------------------------------------------------
// info log
-void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... )
+void info_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... )
{
va_list args;
@@ -766,7 +777,7 @@ void info_int( const char *file, const char *function, const unsigned long line,
}
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
va_start( args, fmt );
#ifdef NETDATA_INTERNAL_CHECKS
@@ -807,7 +818,10 @@ static const char *strerror_result_string(const char *a, const char *b) { (void)
#error "cannot detect the format of function strerror_r()"
#endif
-void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) {
+void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
+ if(erl->sleep_ut)
+ sleep_usec(erl->sleep_ut);
+
// save a copy of errno - just in case this function generates a new error
int __errno = errno;
@@ -815,6 +829,13 @@ void error_int( const char *prefix, const char *file, const char *function, cons
log_lock();
+ erl->count++;
+ time_t now = now_boottime_sec();
+ if(now - erl->last_logged < erl->log_every) {
+ log_unlock();
+ return;
+ }
+
// prevent logging too much
if (error_log_limit(0)) {
log_unlock();
@@ -828,7 +849,7 @@ void error_int( const char *prefix, const char *file, const char *function, cons
}
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
va_start( args, fmt );
#ifdef NETDATA_INTERNAL_CHECKS
@@ -839,6 +860,12 @@ void error_int( const char *prefix, const char *file, const char *function, cons
vfprintf( stderr, fmt, args );
va_end( args );
+ if(erl->count > 1)
+ fprintf(stderr, " (similar messages repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0));
+
+ if(erl->sleep_ut)
+ fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut);
+
if(__errno) {
char buf[1024];
fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf));
@@ -847,9 +874,74 @@ void error_int( const char *prefix, const char *file, const char *function, cons
else
fputc('\n', stderr);
+ erl->last_logged = now;
+ erl->count = 0;
+
log_unlock();
}
+void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
+ // save a copy of errno - just in case this function generates a new error
+ int __errno = errno;
+
+ va_list args;
+
+ log_lock();
+
+ // prevent logging too much
+ if (error_log_limit(0)) {
+ log_unlock();
+ return;
+ }
+
+ if(error_log_syslog) {
+ va_start( args, fmt );
+ vsyslog(LOG_ERR, fmt, args );
+ va_end( args );
+ }
+
+ char date[LOG_DATE_LENGTH];
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
+
+ va_start( args, fmt );
+#ifdef NETDATA_INTERNAL_CHECKS
+ fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function);
+#else
+ fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag());
+#endif
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+
+ if(__errno) {
+ char buf[1024];
+ fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf));
+ errno = 0;
+ }
+ else
+ fputc('\n', stderr);
+
+ log_unlock();
+}
+
+#ifdef NETDATA_INTERNAL_CHECKS
+static void crash_netdata(void) {
+ // make Netdata core dump
+ abort();
+}
+#endif
+
+#ifdef HAVE_BACKTRACE
+#define BT_BUF_SIZE 100
+static void print_call_stack(void) {
+ int nptrs;
+ void *buffer[BT_BUF_SIZE];
+
+ nptrs = backtrace(buffer, BT_BUF_SIZE);
+ if(nptrs)
+ backtrace_symbols_fd(buffer, nptrs, fileno(stderr));
+}
+#endif
+
void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) {
// save a copy of errno - just in case this function generates a new error
int __errno = errno;
@@ -872,7 +964,7 @@ void fatal_int( const char *file, const char *function, const unsigned long line
}
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
log_lock();
@@ -897,6 +989,14 @@ void fatal_int( const char *file, const char *function, const unsigned long line
snprintfz(action_result, 60, "%s:%s", program_name, strncmp(thread_tag, "STREAM_RECEIVER", strlen("STREAM_RECEIVER")) ? thread_tag : "[x]");
send_statistics("FATAL", action_result, action_data);
+#ifdef HAVE_BACKTRACE
+ print_call_stack();
+#endif
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ crash_netdata();
+#endif
+
netdata_cleanup_and_exit(1);
}
@@ -919,7 +1019,7 @@ void log_access( const char *fmt, ... ) {
netdata_mutex_lock(&access_mutex);
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
fprintf(stdaccess, "%s: ", date);
va_start( args, fmt );
@@ -932,6 +1032,38 @@ void log_access( const char *fmt, ... ) {
}
}
+// ----------------------------------------------------------------------------
+// health log
+
+void log_health( const char *fmt, ... ) {
+ va_list args;
+
+ if(health_log_syslog) {
+ va_start( args, fmt );
+ vsyslog(LOG_INFO, fmt, args );
+ va_end( args );
+ }
+
+ if(stdhealth) {
+ static netdata_mutex_t health_mutex = NETDATA_MUTEX_INITIALIZER;
+
+ if(web_server_is_multithreaded)
+ netdata_mutex_lock(&health_mutex);
+
+ char date[LOG_DATE_LENGTH];
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
+ fprintf(stdhealth, "%s: ", date);
+
+ va_start( args, fmt );
+ vfprintf( stdhealth, fmt, args );
+ va_end( args );
+ fputc('\n', stdhealth);
+
+ if(web_server_is_multithreaded)
+ netdata_mutex_unlock(&health_mutex);
+ }
+}
+
#ifdef ENABLE_ACLK
void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name) {
if (aclklog) {
@@ -939,7 +1071,7 @@ void log_aclk_message_bin( const char *data, const size_t data_len, int tx, cons
netdata_mutex_lock(&aclklog_mutex);
char date[LOG_DATE_LENGTH];
- log_date(date, LOG_DATE_LENGTH);
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
fprintf(aclklog, "%s: %s Msg:\"%s\", MQTT-topic:\"%s\": ", date, tx ? "OUTGOING" : "INCOMING", message_name, mqtt_topic);
fwrite(data, data_len, 1, aclklog);
diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h
index ae86720cb..11dab4c1d 100644
--- a/libnetdata/log/log.h
+++ b/libnetdata/log/log.h
@@ -45,6 +45,8 @@ extern "C" {
#define D_ACLK 0x0000000200000000
#define D_METADATALOG 0x0000000400000000
#define D_ACLK_SYNC 0x0000000800000000
+#define D_META_SYNC 0x0000001000000000
+#define D_REPLICATION 0x0000002000000000
#define D_SYSTEM 0x8000000000000000
extern int web_server_is_multithreaded;
@@ -56,9 +58,13 @@ extern const char *program_name;
extern int stdaccess_fd;
extern FILE *stdaccess;
+extern int stdhealth_fd;
+extern FILE *stdhealth;
+
extern const char *stdaccess_filename;
extern const char *stderr_filename;
extern const char *stdout_filename;
+extern const char *stdhealth_filename;
extern const char *facility_log;
#ifdef ENABLE_ACLK
@@ -71,42 +77,61 @@ extern int aclklog_enabled;
extern int access_log_syslog;
extern int error_log_syslog;
extern int output_log_syslog;
+extern int health_log_syslog;
extern time_t error_log_throttle_period;
extern unsigned long error_log_errors_per_period, error_log_errors_per_period_backup;
-extern int error_log_limit(int reset);
+int error_log_limit(int reset);
+
+void open_all_log_files();
+void reopen_all_log_files();
-extern void open_all_log_files();
-extern void reopen_all_log_files();
+#define LOG_DATE_LENGTH 26
+void log_date(char *buffer, size_t len, time_t now);
static inline void debug_dummy(void) {}
void error_log_limit_reset(void);
void error_log_limit_unlimited(void);
+typedef struct error_with_limit {
+ time_t log_every;
+ size_t count;
+ time_t last_logged;
+ usec_t sleep_ut;
+} ERROR_LIMIT;
+
+#define error_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) }
+#define error_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) }
+
#ifdef NETDATA_INTERNAL_CHECKS
#define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
+#define internal_fatal(condition, args...) do { if(unlikely(condition)) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
#else
#define debug(type, args...) debug_dummy()
#define internal_error(args...) debug_dummy()
+#define internal_fatal(args...) debug_dummy()
#endif
#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args)
#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args)
#define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args)
+#define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args)
#define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args)
#define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr))
-extern void send_statistics(const char *action, const char *action_result, const char *action_data);
-extern void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5);
-extern void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5);
-extern void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6);
-extern void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5);
-extern void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2);
+void send_statistics(const char *action, const char *action_result, const char *action_data);
+void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5);
+void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5);
+void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6);
+void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);;
+void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5);
+void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2);
+void log_health( const char *fmt, ... ) PRINTFLIKE(1, 2);
#ifdef ENABLE_ACLK
-extern void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name);
+void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name);
#endif
# ifdef __cplusplus