summaryrefslogtreecommitdiffstats
path: root/daemon/signals.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--daemon/signals.c98
1 files changed, 33 insertions, 65 deletions
diff --git a/daemon/signals.c b/daemon/signals.c
index c857a9b5..3699010c 100644
--- a/daemon/signals.c
+++ b/daemon/signals.c
@@ -2,8 +2,6 @@
#include "common.h"
-static int reaper_enabled = 0;
-
typedef enum signal_action {
NETDATA_SIGNAL_END_OF_LIST,
NETDATA_SIGNAL_IGNORE,
@@ -78,16 +76,6 @@ void signals_init(void) {
struct sigaction sa;
sa.sa_flags = 0;
- // Enable process tracking / reaper if running as init (pid == 1).
- // This prevents zombie processes when running in a container.
- if (getpid() == 1) {
- info("SIGNAL: Enabling reaper");
- netdata_popen_tracking_init();
- reaper_enabled = 1;
- } else {
- info("SIGNAL: Not enabling reaper");
- }
-
// ignore all signals while we run in a signal handler
sigfillset(&sa.sa_mask);
@@ -97,10 +85,6 @@ void signals_init(void) {
case NETDATA_SIGNAL_IGNORE:
sa.sa_handler = SIG_IGN;
break;
- case NETDATA_SIGNAL_CHILD:
- if (reaper_enabled == 0)
- continue;
- // FALLTHROUGH
default:
sa.sa_handler = signal_handler;
break;
@@ -115,9 +99,6 @@ void signals_restore_SIGCHLD(void)
{
struct sigaction sa;
- if (reaper_enabled == 0)
- return;
-
sa.sa_flags = 0;
sigfillset(&sa.sa_mask);
sa.sa_handler = signal_handler;
@@ -137,9 +118,6 @@ void signals_reset(void) {
if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name);
}
-
- if (reaper_enabled == 1)
- netdata_popen_tracking_cleanup();
}
// reap_child reaps the child identified by pid.
@@ -147,39 +125,42 @@ static void reap_child(pid_t pid) {
siginfo_t i;
errno = 0;
- debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid);
- if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
+ debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid);
+ if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
if (errno != ECHILD)
- error("SIGNAL: Failed to wait for: %d", pid);
+ error("SIGNAL: waitid(%d): failed to wait for child", pid);
else
- debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid);
+ info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid);
return;
- } else if (i.si_pid == 0) {
+ }
+ else if (i.si_pid == 0) {
// Process didn't exit, this shouldn't happen.
+ error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid);
return;
}
switch (i.si_code) {
- case CLD_EXITED:
- debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status);
- break;
- case CLD_KILLED:
- debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status);
- break;
- case CLD_DUMPED:
- debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status);
- break;
- case CLD_STOPPED:
- debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status);
- break;
- case CLD_TRAPPED:
- debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status);
- break;
- case CLD_CONTINUED:
- debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status);
- break;
- default:
- debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
+ case CLD_EXITED:
+ info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status);
+ break;
+ case CLD_KILLED:
+ info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status);
+ break;
+ case CLD_DUMPED:
+ info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status);
+ break;
+ case CLD_STOPPED:
+ info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status);
+ break;
+ case CLD_TRAPPED:
+ info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status);
+ break;
+ case CLD_CONTINUED:
+ info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status);
+ break;
+ default:
+ info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
+ break;
}
}
@@ -187,25 +168,13 @@ static void reap_child(pid_t pid) {
static void reap_children() {
siginfo_t i;
- while (1 == 1) {
- // Identify which process caused the signal so we can determine
- // if we need to reap a re-parented process.
+ while(1) {
i.si_pid = 0;
- if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) {
- if (errno != ECHILD) // This shouldn't happen with WNOHANG but does.
- error("SIGNAL: Failed to wait");
- return;
- } else if (i.si_pid == 0) {
- // No child exited.
+ if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0)
+ // nothing to do
return;
- } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) {
- // myp managed, sleep for a short time to avoid busy wait while
- // this is handled by myp.
- usleep(10000);
- } else {
- // Unknown process, likely a re-parented child, reap it.
- reap_child(i.si_pid);
- }
+
+ reap_child(i.si_pid);
}
}
@@ -267,7 +236,6 @@ void signals_handle(void) {
break;
case NETDATA_SIGNAL_CHILD:
- debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name);
reap_children();
break;