1 files changed, 64 insertions, 52 deletions
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
index 21e432e..5bd3512 100644
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010-2023 the Pacemaker project contributors
+ * Copyright 2010-2024 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -10,6 +10,10 @@
 #include <crm_internal.h>
 #include "pacemakerd.h"
 
+#if SUPPORT_COROSYNC
+#include "pcmkd_corosync.h"
+#endif
+
 #include <errno.h>
 #include <grp.h>
 #include <signal.h>
@@ -21,22 +25,25 @@
 #include <unistd.h>
 
 #include <crm/cluster.h>
-#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+
+enum child_daemon_flags {
+    child_none                  = 0,
+    child_respawn               = 1 << 0,
+    child_needs_cluster         = 1 << 1,
+    child_needs_retry           = 1 << 2,
+    child_active_before_startup = 1 << 3,
+};
 
 typedef struct pcmk_child_s {
     pid_t pid;
     int respawn_count;
-    bool respawn;
     const char *name;
     const char *uid;
     const char *command;
     const char *endpoint;  /* IPC server name */
-    bool needs_cluster;
     int check_count;
-
-    /* Anything below here will be dynamically initialized */
-    bool needs_retry;
-    bool active_before_startup;
+    uint32_t flags;
 } pcmk_child_t;
 
 #define PCMK_PROCESS_CHECK_INTERVAL 1
@@ -48,34 +55,34 @@ typedef struct pcmk_child_s {
 
 static pcmk_child_t pcmk_children[] = {
     {
-        0, 0, true,  "pacemaker-based", CRM_DAEMON_USER,
+        0, 0, "pacemaker-based", CRM_DAEMON_USER,
         CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
-        true
+        0, child_respawn | child_needs_cluster
     },
     {
-        0, 0, true, "pacemaker-fenced", NULL,
+        0, 0, "pacemaker-fenced", NULL,
         CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
-        true
+        0, child_respawn | child_needs_cluster
     },
     {
-        0, 0, true,  "pacemaker-execd", NULL,
+        0, 0, "pacemaker-execd", NULL,
         CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
-        false
+        0, child_respawn
     },
     {
-        0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER,
-        CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD,
-        true
+        0, 0, "pacemaker-attrd", CRM_DAEMON_USER,
+        CRM_DAEMON_DIR "/pacemaker-attrd", PCMK__VALUE_ATTRD,
+        0, child_respawn | child_needs_cluster
     },
     {
-        0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER,
+        0, 0, "pacemaker-schedulerd", CRM_DAEMON_USER,
         CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
-        false
+        0, child_respawn
     },
     {
-        0, 0, true, "pacemaker-controld", CRM_DAEMON_USER,
+        0, 0, "pacemaker-controld", CRM_DAEMON_USER,
         CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
-        true
+        0, child_respawn | child_needs_cluster
     },
 };
 
@@ -103,7 +110,7 @@ unsigned int shutdown_complete_state_reported_to = 0;
 gboolean shutdown_complete_state_reported_client_closed = FALSE;
 
 /* state we report when asked via pacemakerd-api status-ping */
-const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
+const char *pacemakerd_state = PCMK__VALUE_INIT;
 gboolean running_with_sbd = FALSE; /* local copy */
 
 GMainLoop *mainloop = NULL;
@@ -154,7 +161,7 @@ check_next_subdaemon(gpointer user_data)
                             pcmk_children[next_child].pid),
                         pcmk_children[next_child].check_count);
                 stop_child(&pcmk_children[next_child], SIGKILL);
-                if (pcmk_children[next_child].respawn) {
+                if (pcmk_is_set(pcmk_children[next_child].flags, child_respawn)) {
                     /* as long as the respawn-limit isn't reached
                        give it another round of check retries
                      */
@@ -166,7 +173,7 @@ check_next_subdaemon(gpointer user_data)
                         (long long) PCMK__SPECIAL_PID_AS_0(
                             pcmk_children[next_child].pid),
                         pcmk_children[next_child].check_count);
-                if (pcmk_children[next_child].respawn) {
+                if (pcmk_is_set(pcmk_children[next_child].flags, child_respawn)) {
                     /* as long as the respawn-limit isn't reached
                        and we haven't run out of connect retries
                        we account this as progress we are willing
@@ -180,7 +187,7 @@ check_next_subdaemon(gpointer user_data)
              */
             break;
         case pcmk_rc_ipc_unresponsive:
-            if (!pcmk_children[next_child].respawn) {
+            if (!pcmk_is_set(pcmk_children[next_child].flags, child_respawn)) {
                 /* if a subdaemon is down and we don't want it
                    to be restarted this is a success during
                    shutdown. if it isn't restarted anymore
@@ -191,7 +198,7 @@ check_next_subdaemon(gpointer user_data)
                     subdaemon_check_progress = time(NULL);
                 }
             }
-            if (!pcmk_children[next_child].active_before_startup) {
+            if (!pcmk_is_set(pcmk_children[next_child].flags, child_active_before_startup)) {
                 crm_trace("found %s[%lld] missing - signal-handler "
                           "will take care of it",
                            pcmk_children[next_child].name,
@@ -199,7 +206,7 @@ check_next_subdaemon(gpointer user_data)
                             pcmk_children[next_child].pid));
                 break;
             }
-            if (pcmk_children[next_child].respawn) {
+            if (pcmk_is_set(pcmk_children[next_child].flags, child_respawn)) {
                 crm_err("%s[%lld] terminated",
                         pcmk_children[next_child].name,
                         (long long) PCMK__SPECIAL_PID_AS_0(
@@ -264,14 +271,14 @@ pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitco
             case CRM_EX_FATAL:
                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
                          name, pid);
-                child->respawn = false;
+                child->flags &= ~child_respawn;
                 fatal_error = TRUE;
                 pcmk_shutdown(SIGTERM);
                 break;
 
             case CRM_EX_PANIC:
                 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
-                child->respawn = false;
+                child->flags &= ~child_respawn;
                 fatal_error = TRUE;
                 pcmk__panic(__func__);
                 pcmk_shutdown(SIGTERM);
@@ -291,20 +298,20 @@ static void
 pcmk_process_exit(pcmk_child_t * child)
 {
     child->pid = 0;
-    child->active_before_startup = false;
+    child->flags &= ~child_active_before_startup;
     child->check_count = 0;
 
     child->respawn_count += 1;
     if (child->respawn_count > MAX_RESPAWN) {
         crm_err("Child respawn count exceeded by %s", child->name);
-        child->respawn = false;
+        child->flags &= ~child_respawn;
     }
 
     if (shutdown_trigger) {
         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
         mainloop_set_trigger(shutdown_trigger);
 
-    } else if (!child->respawn) {
+    } else if (!pcmk_is_set(child->flags, child_respawn)) {
         /* nothing to do */
 
     } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
@@ -316,10 +323,10 @@ pcmk_process_exit(pcmk_child_t * child)
                  " appears alright per %s IPC end-point",
                  child->name, child->endpoint);
 
-    } else if (child->needs_cluster && !pcmkd_cluster_connected()) {
+    } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
         crm_notice("Not respawning %s subdaemon until cluster returns",
                    child->name);
-        child->needs_retry = true;
+        child->flags |= child_needs_retry;
 
     } else {
         crm_notice("Respawning %s subdaemon after unexpected exit",
@@ -336,7 +343,7 @@ pcmk_shutdown_worker(gpointer user_data)
 
     if (phase == PCMK__NELEM(pcmk_children) - 1) {
         crm_notice("Shutting down Pacemaker");
-        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
+        pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
     }
 
     for (; phase >= 0; phase--) {
@@ -345,7 +352,7 @@ pcmk_shutdown_worker(gpointer user_data)
         if (child->pid != 0) {
             time_t now = time(NULL);
 
-            if (child->respawn) {
+            if (pcmk_is_set(child->flags, child_respawn)) {
                 if (child->pid == PCMK__SPECIAL_PID) {
                     crm_warn("The process behind %s IPC cannot be"
                              " terminated, so either wait the graceful"
@@ -359,7 +366,7 @@ pcmk_shutdown_worker(gpointer user_data)
                              child->command);
                 }
                 next_log = now + 30;
-                child->respawn = false;
+                child->flags &= ~child_respawn;
                 stop_child(child, SIGTERM);
                 if (phase < PCMK_CHILD_CONTROLD) {
                     g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
@@ -381,7 +388,7 @@ pcmk_shutdown_worker(gpointer user_data)
     }
 
     crm_notice("Shutdown complete");
-    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
+    pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
     if (!fatal_error && running_with_sbd &&
         pcmk__get_sbd_sync_resource_startup() &&
         !shutdown_complete_state_reported_client_closed) {
@@ -393,8 +400,12 @@ pcmk_shutdown_worker(gpointer user_data)
     {
         const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
         if(delay) {
+            long long delay_ms = crm_get_msec(delay);
+
             sync();
-            pcmk__sleep_ms(crm_get_msec(delay));
+            if (delay_ms > 0) {
+                pcmk__sleep_ms((unsigned int) QB_MIN(delay_ms, UINT_MAX));
+            }
         }
     }
 
@@ -427,7 +438,7 @@ start_child(pcmk_child_t * child)
     const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
     const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
 
-    child->active_before_startup = false;
+    child->flags &= ~child_active_before_startup;
     child->check_count = 0;
 
     if (child->command == NULL) {
@@ -481,19 +492,20 @@ start_child(pcmk_child_t * child)
         (void)setsid();
 
         /* Setup the two alternate arg arrays */
-        opts_vgrind[0] = strdup(VALGRIND_BIN);
+        opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
         if (use_callgrind) {
-            opts_vgrind[1] = strdup("--tool=callgrind");
-            opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
-            opts_vgrind[3] = strdup(child->command);
+            opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
+            opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
+                                            CRM_STATE_DIR "/callgrind.out.%p");
+            opts_vgrind[3] = pcmk__str_copy(child->command);
             opts_vgrind[4] = NULL;
         } else {
-            opts_vgrind[1] = strdup(child->command);
+            opts_vgrind[1] = pcmk__str_copy(child->command);
             opts_vgrind[2] = NULL;
             opts_vgrind[3] = NULL;
             opts_vgrind[4] = NULL;
         }
-        opts_default[0] = strdup(child->command);
+        opts_default[0] = pcmk__str_copy(child->command);
 
         if(gid) {
             // Drop root group access if not needed
@@ -759,7 +771,7 @@ find_and_track_existing_processes(void)
                                (long long) PCMK__SPECIAL_PID_AS_0(
                                                pcmk_children[i].pid));
                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
-                    pcmk_children[i].active_before_startup = true;
+                    pcmk_children[i].flags |= child_active_before_startup;
                     break;
                 case pcmk_rc_ipc_pid_only:
                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
@@ -802,7 +814,7 @@ find_and_track_existing_processes(void)
 gboolean
 init_children_processes(void *user_data)
 {
-    if (is_corosync_cluster()) {
+    if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
         /* Corosync clusters can drop root group access, because we set
          * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
          * to corosync.
@@ -825,8 +837,8 @@ init_children_processes(void *user_data)
      *
      * This may be useful for the daemons to know
      */
-    pcmk__set_env_option(PCMK__ENV_RESPAWNED, "true", false);
-    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
+    pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
+    pacemakerd_state = PCMK__VALUE_RUNNING;
     return TRUE;
 }
 
@@ -843,13 +855,13 @@ void
 restart_cluster_subdaemons(void)
 {
     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
-        if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) {
+        if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
             continue;
         }
 
         crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
         if (start_child(&pcmk_children[i])) {
-            pcmk_children[i].needs_retry = false;
+            pcmk_children[i].flags &= ~child_needs_retry;
         }
     }
 }