diff options
Diffstat (limited to 'src/util/watchdog.c')
-rw-r--r-- | src/util/watchdog.c | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/src/util/watchdog.c b/src/util/watchdog.c new file mode 100644 index 0000000..3ec1fbc --- /dev/null +++ b/src/util/watchdog.c @@ -0,0 +1,314 @@ +/*++ +/* NAME +/* watchdog 3 +/* SUMMARY +/* watchdog timer +/* SYNOPSIS +/* #include <watchdog.h> +/* +/* WATCHDOG *watchdog_create(timeout, action, context) +/* unsigned timeout; +/* void (*action)(WATCHDOG *watchdog, char *context); +/* char *context; +/* +/* void watchdog_start(watchdog) +/* WATCHDOG *watchdog; +/* +/* void watchdog_stop(watchdog) +/* WATCHDOG *watchdog; +/* +/* void watchdog_destroy(watchdog) +/* WATCHDOG *watchdog; +/* +/* void watchdog_pat() +/* DESCRIPTION +/* This module implements watchdog timers that are based on ugly +/* UNIX alarm timers. The module is designed to survive systems +/* with clocks that jump occasionally. +/* +/* Watchdog timers can be stacked. Only one watchdog timer can be +/* active at a time. Only the last created watchdog timer can be +/* manipulated. Watchdog timers must be destroyed in reverse order +/* of creation. +/* +/* watchdog_create() suspends the current watchdog timer, if any, +/* and instantiates a new watchdog timer. +/* +/* watchdog_start() starts or restarts the watchdog timer. +/* +/* watchdog_stop() stops the watchdog timer. +/* +/* watchdog_destroy() stops the watchdog timer, and resumes the +/* watchdog timer instance that was suspended by watchdog_create(). +/* +/* watchdog_pat() pats the watchdog, so it stays quiet. +/* +/* Arguments: +/* .IP timeout +/* The watchdog time limit. When the watchdog timer runs, the +/* process must invoke watchdog_start(), watchdog_stop() or +/* watchdog_destroy() before the time limit is reached. +/* .IP action +/* A null pointer, or pointer to function that is called when the +/* watchdog alarm goes off. The default action is to terminate +/* the process with a fatal error. +/* .IP context +/* Application context that is passed to the action routine. +/* .IP watchdog +/* Must be a pointer to the most recently created watchdog instance. +/* This argument is checked upon each call. +/* BUGS +/* UNIX alarm timers are not stackable, so there can be at most one +/* watchdog instance active at any given time. +/* SEE ALSO +/* msg(3) diagnostics interface +/* DIAGNOSTICS +/* Fatal errors: memory allocation problem, system call failure. +/* Panics: interface violations. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + +/* System library. */ + +#include <sys_defs.h> +#include <unistd.h> +#include <signal.h> +#include <posix_signals.h> + +/* Utility library. */ + +#include <msg.h> +#include <mymalloc.h> +#include <killme_after.h> +#include <watchdog.h> + +/* Application-specific. */ + + /* + * Rather than having one timer that goes off when it is too late, we break + * up the time limit into smaller intervals so that we can deal with clocks + * that jump occasionally. + */ +#define WATCHDOG_STEPS 3 + + /* + * UNIX alarms are not stackable, but we can save and restore state, so that + * watchdogs can at least be nested, sort of. + */ +struct WATCHDOG { + unsigned timeout; /* our time resolution */ + WATCHDOG_FN action; /* application routine */ + char *context; /* application context */ + int trip_run; /* number of successive timeouts */ + WATCHDOG *saved_watchdog; /* saved state */ + struct sigaction saved_action; /* saved state */ + unsigned saved_time; /* saved state */ +}; + + /* + * However, only one watchdog instance can be current, and the caller has to + * restore state before a prior watchdog instance can be manipulated. + */ +static WATCHDOG *watchdog_curr; + + /* + * Workaround for systems where the alarm signal does not wakeup the event + * machinery, and therefore does not restart the watchdog timer in the + * single_server etc. skeletons. The symptom is that programs abort when the + * watchdog timeout is less than the max_idle time. + */ +#ifdef USE_WATCHDOG_PIPE +#include <errno.h> +#include <iostuff.h> +#include <events.h> + +static int watchdog_pipe[2]; + +/* watchdog_read - read event pipe */ + +static void watchdog_read(int unused_event, void *unused_context) +{ + char ch; + + while (read(watchdog_pipe[0], &ch, 1) > 0) + /* void */ ; +} + +#endif /* USE_WATCHDOG_PIPE */ + +/* watchdog_event - handle timeout event */ + +static void watchdog_event(int unused_sig) +{ + const char *myname = "watchdog_event"; + WATCHDOG *wp; + + /* + * This routine runs as a signal handler. We should not do anything that + * could involve memory allocation/deallocation, but exiting without + * proper explanation would be unacceptable. For this reason, msg(3) was + * made safe for usage by signal handlers that terminate the process. + */ + if ((wp = watchdog_curr) == 0) + msg_panic("%s: no instance", myname); + if (msg_verbose > 1) + msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run); + if (++(wp->trip_run) < WATCHDOG_STEPS) { +#ifdef USE_WATCHDOG_PIPE + int saved_errno = errno; + + /* Wake up the events(3) engine. */ + if (write(watchdog_pipe[1], "", 1) != 1) + msg_warn("%s: write watchdog_pipe: %m", myname); + errno = saved_errno; +#endif + alarm(wp->timeout); + } else { + if (wp->action) + wp->action(wp, wp->context); + else { + killme_after(5); +#ifdef TEST + pause(); +#endif + msg_fatal("watchdog timeout"); + } + } +} + +/* watchdog_create - create watchdog instance */ + +WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context) +{ + const char *myname = "watchdog_create"; + struct sigaction sig_action; + WATCHDOG *wp; + + wp = (WATCHDOG *) mymalloc(sizeof(*wp)); + if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0) + msg_panic("%s: timeout %d is too small", myname, timeout); + wp->action = action; + wp->context = context; + wp->saved_watchdog = watchdog_curr; + wp->saved_time = alarm(0); + sigemptyset(&sig_action.sa_mask); +#ifdef SA_RESTART + sig_action.sa_flags = SA_RESTART; +#else + sig_action.sa_flags = 0; +#endif + sig_action.sa_handler = watchdog_event; + if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0) + msg_fatal("%s: sigaction(SIGALRM): %m", myname); + if (msg_verbose > 1) + msg_info("%s: %p %d", myname, (void *) wp, timeout); +#ifdef USE_WATCHDOG_PIPE + if (watchdog_curr == 0) { + if (pipe(watchdog_pipe) < 0) + msg_fatal("%s: pipe: %m", myname); + non_blocking(watchdog_pipe[0], NON_BLOCKING); + non_blocking(watchdog_pipe[1], NON_BLOCKING); + close_on_exec(watchdog_pipe[0], CLOSE_ON_EXEC); /* Fix 20190126 */ + close_on_exec(watchdog_pipe[1], CLOSE_ON_EXEC); /* Fix 20190126 */ + event_enable_read(watchdog_pipe[0], watchdog_read, (void *) 0); + } +#endif + return (watchdog_curr = wp); +} + +/* watchdog_destroy - destroy watchdog instance, restore state */ + +void watchdog_destroy(WATCHDOG *wp) +{ + const char *myname = "watchdog_destroy"; + + watchdog_stop(wp); + watchdog_curr = wp->saved_watchdog; + if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0) + msg_fatal("%s: sigaction(SIGALRM): %m", myname); + if (wp->saved_time) + alarm(wp->saved_time); + myfree((void *) wp); +#ifdef USE_WATCHDOG_PIPE + if (watchdog_curr == 0) { + event_disable_readwrite(watchdog_pipe[0]); + (void) close(watchdog_pipe[0]); + (void) close(watchdog_pipe[1]); + } +#endif + if (msg_verbose > 1) + msg_info("%s: %p", myname, (void *) wp); +} + +/* watchdog_start - enable watchdog timer */ + +void watchdog_start(WATCHDOG *wp) +{ + const char *myname = "watchdog_start"; + + if (wp != watchdog_curr) + msg_panic("%s: wrong watchdog instance", myname); + wp->trip_run = 0; + alarm(wp->timeout); + if (msg_verbose > 1) + msg_info("%s: %p", myname, (void *) wp); +} + +/* watchdog_stop - disable watchdog timer */ + +void watchdog_stop(WATCHDOG *wp) +{ + const char *myname = "watchdog_stop"; + + if (wp != watchdog_curr) + msg_panic("%s: wrong watchdog instance", myname); + alarm(0); + if (msg_verbose > 1) + msg_info("%s: %p", myname, (void *) wp); +} + +/* watchdog_pat - pat the dog so it stays quiet */ + +void watchdog_pat(void) +{ + const char *myname = "watchdog_pat"; + + if (watchdog_curr) + watchdog_curr->trip_run = 0; + if (msg_verbose > 1) + msg_info("%s: %p", myname, (void *) watchdog_curr); +} + +#ifdef TEST + +#include <vstream.h> + +int main(int unused_argc, char **unused_argv) +{ + WATCHDOG *wp; + + msg_verbose = 2; + + wp = watchdog_create(10, (WATCHDOG_FN) 0, (void *) 0); + watchdog_start(wp); + do { + watchdog_pat(); + } while (VSTREAM_GETCHAR() != VSTREAM_EOF); + watchdog_destroy(wp); + return (0); +} + +#endif |