summaryrefslogtreecommitdiffstats
path: root/src/util/watchdog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/watchdog.c')
-rw-r--r--src/util/watchdog.c314
1 files changed, 314 insertions, 0 deletions
diff --git a/src/util/watchdog.c b/src/util/watchdog.c
new file mode 100644
index 0000000..3ec1fbc
--- /dev/null
+++ b/src/util/watchdog.c
@@ -0,0 +1,314 @@
+/*++
+/* NAME
+/* watchdog 3
+/* SUMMARY
+/* watchdog timer
+/* SYNOPSIS
+/* #include <watchdog.h>
+/*
+/* WATCHDOG *watchdog_create(timeout, action, context)
+/* unsigned timeout;
+/* void (*action)(WATCHDOG *watchdog, char *context);
+/* char *context;
+/*
+/* void watchdog_start(watchdog)
+/* WATCHDOG *watchdog;
+/*
+/* void watchdog_stop(watchdog)
+/* WATCHDOG *watchdog;
+/*
+/* void watchdog_destroy(watchdog)
+/* WATCHDOG *watchdog;
+/*
+/* void watchdog_pat()
+/* DESCRIPTION
+/* This module implements watchdog timers that are based on ugly
+/* UNIX alarm timers. The module is designed to survive systems
+/* with clocks that jump occasionally.
+/*
+/* Watchdog timers can be stacked. Only one watchdog timer can be
+/* active at a time. Only the last created watchdog timer can be
+/* manipulated. Watchdog timers must be destroyed in reverse order
+/* of creation.
+/*
+/* watchdog_create() suspends the current watchdog timer, if any,
+/* and instantiates a new watchdog timer.
+/*
+/* watchdog_start() starts or restarts the watchdog timer.
+/*
+/* watchdog_stop() stops the watchdog timer.
+/*
+/* watchdog_destroy() stops the watchdog timer, and resumes the
+/* watchdog timer instance that was suspended by watchdog_create().
+/*
+/* watchdog_pat() pats the watchdog, so it stays quiet.
+/*
+/* Arguments:
+/* .IP timeout
+/* The watchdog time limit. When the watchdog timer runs, the
+/* process must invoke watchdog_start(), watchdog_stop() or
+/* watchdog_destroy() before the time limit is reached.
+/* .IP action
+/* A null pointer, or pointer to function that is called when the
+/* watchdog alarm goes off. The default action is to terminate
+/* the process with a fatal error.
+/* .IP context
+/* Application context that is passed to the action routine.
+/* .IP watchdog
+/* Must be a pointer to the most recently created watchdog instance.
+/* This argument is checked upon each call.
+/* BUGS
+/* UNIX alarm timers are not stackable, so there can be at most one
+/* watchdog instance active at any given time.
+/* SEE ALSO
+/* msg(3) diagnostics interface
+/* DIAGNOSTICS
+/* Fatal errors: memory allocation problem, system call failure.
+/* Panics: interface violations.
+/* LICENSE
+/* .ad
+/* .fi
+/* The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/* Wietse Venema
+/* IBM T.J. Watson Research
+/* P.O. Box 704
+/* Yorktown Heights, NY 10598, USA
+/*
+/* Wietse Venema
+/* Google, Inc.
+/* 111 8th Avenue
+/* New York, NY 10011, USA
+/*--*/
+
+/* System library. */
+
+#include <sys_defs.h>
+#include <unistd.h>
+#include <signal.h>
+#include <posix_signals.h>
+
+/* Utility library. */
+
+#include <msg.h>
+#include <mymalloc.h>
+#include <killme_after.h>
+#include <watchdog.h>
+
+/* Application-specific. */
+
+ /*
+ * Rather than having one timer that goes off when it is too late, we break
+ * up the time limit into smaller intervals so that we can deal with clocks
+ * that jump occasionally.
+ */
+#define WATCHDOG_STEPS 3
+
+ /*
+ * UNIX alarms are not stackable, but we can save and restore state, so that
+ * watchdogs can at least be nested, sort of.
+ */
+struct WATCHDOG {
+ unsigned timeout; /* our time resolution */
+ WATCHDOG_FN action; /* application routine */
+ char *context; /* application context */
+ int trip_run; /* number of successive timeouts */
+ WATCHDOG *saved_watchdog; /* saved state */
+ struct sigaction saved_action; /* saved state */
+ unsigned saved_time; /* saved state */
+};
+
+ /*
+ * However, only one watchdog instance can be current, and the caller has to
+ * restore state before a prior watchdog instance can be manipulated.
+ */
+static WATCHDOG *watchdog_curr;
+
+ /*
+ * Workaround for systems where the alarm signal does not wakeup the event
+ * machinery, and therefore does not restart the watchdog timer in the
+ * single_server etc. skeletons. The symptom is that programs abort when the
+ * watchdog timeout is less than the max_idle time.
+ */
+#ifdef USE_WATCHDOG_PIPE
+#include <errno.h>
+#include <iostuff.h>
+#include <events.h>
+
+static int watchdog_pipe[2];
+
+/* watchdog_read - read event pipe */
+
+static void watchdog_read(int unused_event, void *unused_context)
+{
+ char ch;
+
+ while (read(watchdog_pipe[0], &ch, 1) > 0)
+ /* void */ ;
+}
+
+#endif /* USE_WATCHDOG_PIPE */
+
+/* watchdog_event - handle timeout event */
+
+static void watchdog_event(int unused_sig)
+{
+ const char *myname = "watchdog_event";
+ WATCHDOG *wp;
+
+ /*
+ * This routine runs as a signal handler. We should not do anything that
+ * could involve memory allocation/deallocation, but exiting without
+ * proper explanation would be unacceptable. For this reason, msg(3) was
+ * made safe for usage by signal handlers that terminate the process.
+ */
+ if ((wp = watchdog_curr) == 0)
+ msg_panic("%s: no instance", myname);
+ if (msg_verbose > 1)
+ msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run);
+ if (++(wp->trip_run) < WATCHDOG_STEPS) {
+#ifdef USE_WATCHDOG_PIPE
+ int saved_errno = errno;
+
+ /* Wake up the events(3) engine. */
+ if (write(watchdog_pipe[1], "", 1) != 1)
+ msg_warn("%s: write watchdog_pipe: %m", myname);
+ errno = saved_errno;
+#endif
+ alarm(wp->timeout);
+ } else {
+ if (wp->action)
+ wp->action(wp, wp->context);
+ else {
+ killme_after(5);
+#ifdef TEST
+ pause();
+#endif
+ msg_fatal("watchdog timeout");
+ }
+ }
+}
+
+/* watchdog_create - create watchdog instance */
+
+WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
+{
+ const char *myname = "watchdog_create";
+ struct sigaction sig_action;
+ WATCHDOG *wp;
+
+ wp = (WATCHDOG *) mymalloc(sizeof(*wp));
+ if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0)
+ msg_panic("%s: timeout %d is too small", myname, timeout);
+ wp->action = action;
+ wp->context = context;
+ wp->saved_watchdog = watchdog_curr;
+ wp->saved_time = alarm(0);
+ sigemptyset(&sig_action.sa_mask);
+#ifdef SA_RESTART
+ sig_action.sa_flags = SA_RESTART;
+#else
+ sig_action.sa_flags = 0;
+#endif
+ sig_action.sa_handler = watchdog_event;
+ if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0)
+ msg_fatal("%s: sigaction(SIGALRM): %m", myname);
+ if (msg_verbose > 1)
+ msg_info("%s: %p %d", myname, (void *) wp, timeout);
+#ifdef USE_WATCHDOG_PIPE
+ if (watchdog_curr == 0) {
+ if (pipe(watchdog_pipe) < 0)
+ msg_fatal("%s: pipe: %m", myname);
+ non_blocking(watchdog_pipe[0], NON_BLOCKING);
+ non_blocking(watchdog_pipe[1], NON_BLOCKING);
+ close_on_exec(watchdog_pipe[0], CLOSE_ON_EXEC); /* Fix 20190126 */
+ close_on_exec(watchdog_pipe[1], CLOSE_ON_EXEC); /* Fix 20190126 */
+ event_enable_read(watchdog_pipe[0], watchdog_read, (void *) 0);
+ }
+#endif
+ return (watchdog_curr = wp);
+}
+
+/* watchdog_destroy - destroy watchdog instance, restore state */
+
+void watchdog_destroy(WATCHDOG *wp)
+{
+ const char *myname = "watchdog_destroy";
+
+ watchdog_stop(wp);
+ watchdog_curr = wp->saved_watchdog;
+ if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0)
+ msg_fatal("%s: sigaction(SIGALRM): %m", myname);
+ if (wp->saved_time)
+ alarm(wp->saved_time);
+ myfree((void *) wp);
+#ifdef USE_WATCHDOG_PIPE
+ if (watchdog_curr == 0) {
+ event_disable_readwrite(watchdog_pipe[0]);
+ (void) close(watchdog_pipe[0]);
+ (void) close(watchdog_pipe[1]);
+ }
+#endif
+ if (msg_verbose > 1)
+ msg_info("%s: %p", myname, (void *) wp);
+}
+
+/* watchdog_start - enable watchdog timer */
+
+void watchdog_start(WATCHDOG *wp)
+{
+ const char *myname = "watchdog_start";
+
+ if (wp != watchdog_curr)
+ msg_panic("%s: wrong watchdog instance", myname);
+ wp->trip_run = 0;
+ alarm(wp->timeout);
+ if (msg_verbose > 1)
+ msg_info("%s: %p", myname, (void *) wp);
+}
+
+/* watchdog_stop - disable watchdog timer */
+
+void watchdog_stop(WATCHDOG *wp)
+{
+ const char *myname = "watchdog_stop";
+
+ if (wp != watchdog_curr)
+ msg_panic("%s: wrong watchdog instance", myname);
+ alarm(0);
+ if (msg_verbose > 1)
+ msg_info("%s: %p", myname, (void *) wp);
+}
+
+/* watchdog_pat - pat the dog so it stays quiet */
+
+void watchdog_pat(void)
+{
+ const char *myname = "watchdog_pat";
+
+ if (watchdog_curr)
+ watchdog_curr->trip_run = 0;
+ if (msg_verbose > 1)
+ msg_info("%s: %p", myname, (void *) watchdog_curr);
+}
+
+#ifdef TEST
+
+#include <vstream.h>
+
+int main(int unused_argc, char **unused_argv)
+{
+ WATCHDOG *wp;
+
+ msg_verbose = 2;
+
+ wp = watchdog_create(10, (WATCHDOG_FN) 0, (void *) 0);
+ watchdog_start(wp);
+ do {
+ watchdog_pat();
+ } while (VSTREAM_GETCHAR() != VSTREAM_EOF);
+ watchdog_destroy(wp);
+ return (0);
+}
+
+#endif