diff options
Diffstat (limited to '')
-rw-r--r-- | daemon/signals.c | 98 |
1 files changed, 33 insertions, 65 deletions
diff --git a/daemon/signals.c b/daemon/signals.c index c857a9b57..3699010ce 100644 --- a/daemon/signals.c +++ b/daemon/signals.c @@ -2,8 +2,6 @@ #include "common.h" -static int reaper_enabled = 0; - typedef enum signal_action { NETDATA_SIGNAL_END_OF_LIST, NETDATA_SIGNAL_IGNORE, @@ -78,16 +76,6 @@ void signals_init(void) { struct sigaction sa; sa.sa_flags = 0; - // Enable process tracking / reaper if running as init (pid == 1). - // This prevents zombie processes when running in a container. - if (getpid() == 1) { - info("SIGNAL: Enabling reaper"); - netdata_popen_tracking_init(); - reaper_enabled = 1; - } else { - info("SIGNAL: Not enabling reaper"); - } - // ignore all signals while we run in a signal handler sigfillset(&sa.sa_mask); @@ -97,10 +85,6 @@ void signals_init(void) { case NETDATA_SIGNAL_IGNORE: sa.sa_handler = SIG_IGN; break; - case NETDATA_SIGNAL_CHILD: - if (reaper_enabled == 0) - continue; - // FALLTHROUGH default: sa.sa_handler = signal_handler; break; @@ -115,9 +99,6 @@ void signals_restore_SIGCHLD(void) { struct sigaction sa; - if (reaper_enabled == 0) - return; - sa.sa_flags = 0; sigfillset(&sa.sa_mask); sa.sa_handler = signal_handler; @@ -137,9 +118,6 @@ void signals_reset(void) { if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name); } - - if (reaper_enabled == 1) - netdata_popen_tracking_cleanup(); } // reap_child reaps the child identified by pid. @@ -147,39 +125,42 @@ static void reap_child(pid_t pid) { siginfo_t i; errno = 0; - debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid); - if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { + debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid); + if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { if (errno != ECHILD) - error("SIGNAL: Failed to wait for: %d", pid); + error("SIGNAL: waitid(%d): failed to wait for child", pid); else - debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid); + info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid); return; - } else if (i.si_pid == 0) { + } + else if (i.si_pid == 0) { // Process didn't exit, this shouldn't happen. + error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid); return; } switch (i.si_code) { - case CLD_EXITED: - debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status); - break; - case CLD_KILLED: - debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status); - break; - case CLD_DUMPED: - debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status); - break; - case CLD_STOPPED: - debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status); - break; - case CLD_TRAPPED: - debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status); - break; - case CLD_CONTINUED: - debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status); - break; - default: - debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); + case CLD_EXITED: + info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status); + break; + case CLD_KILLED: + info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status); + break; + case CLD_DUMPED: + info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status); + break; + case CLD_STOPPED: + info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status); + break; + case CLD_TRAPPED: + info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status); + break; + case CLD_CONTINUED: + info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status); + break; + default: + info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); + break; } } @@ -187,25 +168,13 @@ static void reap_child(pid_t pid) { static void reap_children() { siginfo_t i; - while (1 == 1) { - // Identify which process caused the signal so we can determine - // if we need to reap a re-parented process. + while(1) { i.si_pid = 0; - if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) { - if (errno != ECHILD) // This shouldn't happen with WNOHANG but does. - error("SIGNAL: Failed to wait"); - return; - } else if (i.si_pid == 0) { - // No child exited. + if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0) + // nothing to do return; - } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) { - // myp managed, sleep for a short time to avoid busy wait while - // this is handled by myp. - usleep(10000); - } else { - // Unknown process, likely a re-parented child, reap it. - reap_child(i.si_pid); - } + + reap_child(i.si_pid); } } @@ -267,7 +236,6 @@ void signals_handle(void) { break; case NETDATA_SIGNAL_CHILD: - debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name); reap_children(); break; |